xref: /titanic_44/usr/src/uts/i86pc/ml/interrupt.s (revision 70ab954a5d6c4d36858fd6e7e3dd4498d06d2c40)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28/*	  All Rights Reserved					*/
29
30/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
31/*	  All Rights Reserved					*/
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35#include <sys/asm_linkage.h>
36#include <sys/asm_misc.h>
37#include <sys/regset.h>
38#include <sys/psw.h>
39#include <sys/x86_archext.h>
40
41#if defined(__lint)
42
43#include <sys/types.h>
44#include <sys/thread.h>
45#include <sys/systm.h>
46
47#else   /* __lint */
48
49#include <sys/segments.h>
50#include <sys/pcb.h>
51#include <sys/trap.h>
52#include <sys/ftrace.h>
53#include <sys/traptrace.h>
54#include <sys/clock.h>
55#include <sys/panic.h>
56#include "assym.h"
57
58_ftrace_intr_thread_fmt:
59	.string	"intr_thread(): regs=0x%lx, int=0x%x, pil=0x%x"
60
61#endif	/* lint */
62
63#if defined(__i386)
64
65#if defined(__lint)
66
67void
68patch_tsc(void)
69{}
70
71#else	/* __lint */
72
73/*
74 * To cope with processors that do not implement the rdtsc instruction,
75 * we patch the kernel to use rdtsc if that feature is detected on the CPU.
76 * On an unpatched kernel, all locations requiring rdtsc are nop's.
77 *
78 * This function patches the nop's to rdtsc.
79 */
80	ENTRY_NP(patch_tsc)
81	movw	_rdtsc_insn, %cx
82	movw	%cx, _tsc_patch1
83	movw	%cx, _tsc_patch2
84	movw	%cx, _tsc_patch3
85	movw	%cx, _tsc_patch4
86	movw	%cx, _tsc_patch5
87	movw	%cx, _tsc_patch6
88	movw	%cx, _tsc_patch7
89	movw	%cx, _tsc_patch8
90	movw	%cx, _tsc_patch9
91	movw	%cx, _tsc_patch10
92	movw	%cx, _tsc_patch11
93	movw	%cx, _tsc_patch12
94	movw	%cx, _tsc_patch13
95	movw	%cx, _tsc_patch14
96	movw	%cx, _tsc_patch15
97	movw	%cx, _tsc_patch16
98	movw	%cx, _tsc_patch17
99	ret
100_rdtsc_insn:
101	rdtsc
102	SET_SIZE(patch_tsc)
103
104#endif	/* __lint */
105
106#endif	/* __i386 */
107
108
109#if defined(__lint)
110
111void
112_interrupt(void)
113{}
114
115#else	/* __lint */
116
117#if defined(__amd64)
118
119	/*
120	 * Common register usage:
121	 *
122	 * %rbx		cpu pointer
123	 * %r12		trap trace pointer -and- stash of
124	 *		vec across intr_thread dispatch.
125	 * %r13d	ipl of isr
126	 * %r14d	old ipl (ipl level we entered on)
127	 * %r15		interrupted thread stack pointer
128	 */
129	ENTRY_NP2(cmnint, _interrupt)
130
131	INTR_PUSH
132
133	/*
134	 * At the end of TRACE_PTR %r12 points to the current TRAPTRACE entry
135	 */
136	TRACE_PTR(%r12, %rax, %eax, %rdx, $TT_INTERRUPT)
137						/* Uses labels 8 and 9 */
138	TRACE_REGS(%r12, %rsp, %rax, %rbx)	/* Uses label 9 */
139	TRACE_STAMP(%r12)		/* Clobbers %eax, %edx, uses 9 */
140
141	DISABLE_INTR_FLAGS		/* (and set kernel flag values) */
142
143	movq	%rsp, %rbp
144
145	TRACE_STACK(%r12)
146
147	LOADCPU(%rbx)				/* &cpu */
148	leaq	REGOFF_TRAPNO(%rbp), %rsi	/* &vector */
149	movl	CPU_PRI(%rbx), %r14d		/* old ipl */
150	movl	CPU_SOFTINFO(%rbx), %edx
151
152#ifdef TRAPTRACE
153	movl	$255, TTR_IPL(%r12)
154	movl	%r14d, %edi
155	movb	%dil, TTR_PRI(%r12)
156	movl	CPU_BASE_SPL(%rbx), %edi
157	movb	%dil, TTR_SPL(%r12)
158	movb	$255, TTR_VECTOR(%r12)
159#endif
160
161	/*
162	 * Check to see if the trap number is T_SOFTINT; if it is,
163	 * jump straight to dosoftint now.
164	 */
165	cmpq	$T_SOFTINT, (%rsi)
166	je	dosoftint
167
168	/*
169	 * Raise the interrupt priority level, returns newpil.
170	 * (The vector address is in %rsi so setlvl can update it.)
171	 */
172	movl	%r14d, %edi			/* old ipl */
173						/* &vector */
174	call	*setlvl(%rip)
175
176#ifdef TRAPTRACE
177	movb	%al, TTR_IPL(%r12)
178#endif
179	/*
180	 * check for spurious interrupt
181	 */
182	cmpl	$-1, %eax
183	je	_sys_rtt
184
185#ifdef TRAPTRACE
186	movl	%r14d, %edx
187	movb	%dl, TTR_PRI(%r12)
188	movl	CPU_BASE_SPL(%rbx), %edx
189	movb	%dl, TTR_SPL(%r12)
190#endif
191	movl	%eax, CPU_PRI(%rbx)		/* update ipl */
192
193#ifdef TRAPTRACE
194	movl	REGOFF_TRAPNO(%rbp), %edx
195	movb	%dl, TTR_VECTOR(%r12)
196#endif
197	movl	%eax, %r13d			/* ipl of isr */
198
199	/*
200	 * At this point we can take one of two paths.
201	 * If the new level is at or below lock level, we will
202	 * run this interrupt in a separate thread.
203	 */
204	cmpl	$LOCK_LEVEL, %eax
205	jbe	intr_thread
206
207	movq	%rbx, %rdi		/* &cpu */
208	movl	%r13d, %esi		/* ipl */
209	movl	%r14d, %edx		/* old ipl */
210	movq	%rbp, %rcx		/* &regs */
211	call	hilevel_intr_prolog
212	orl	%eax, %eax		/* zero if need to switch stack */
213	jnz	1f
214
215	/*
216	 * Save the thread stack and get on the cpu's interrupt stack
217	 */
218	movq	%rsp, %r15
219	movq	CPU_INTR_STACK(%rbx), %rsp
2201:
221
222	sti
223
224	/*
225	 * Walk the list of handlers for this vector, calling
226	 * them as we go until no more interrupts are claimed.
227	 */
228	movl	REGOFF_TRAPNO(%rbp), %edi
229	call	av_dispatch_autovect
230
231	cli
232
233	movq	%rbx, %rdi			/* &cpu */
234	movl	%r13d, %esi			/* ipl */
235	movl	%r14d, %edx			/* oldipl */
236	movl	REGOFF_TRAPNO(%rbp), %ecx	/* vec */
237	call	hilevel_intr_epilog
238	orl	%eax, %eax		/* zero if need to switch stack */
239	jnz	2f
240	movq	%r15, %rsp
2412:	/*
242	 * Check for, and execute, softints before we iret.
243	 *
244	 * (dosoftint expects oldipl in %r14d (which is where it is)
245	 * the cpu pointer in %rbx (which is where it is) and the
246	 * softinfo in %edx (which is where we'll put it right now))
247	 */
248	movl	CPU_SOFTINFO(%rbx), %edx
249	orl	%edx, %edx
250	jz	_sys_rtt
251	jmp	dosoftint
252	/*NOTREACHED*/
253
254	SET_SIZE(cmnint)
255	SET_SIZE(_interrupt)
256
257/*
258 * Handle an interrupt in a new thread
259 *
260 * As we branch here, interrupts are still masked,
261 * %rbx still contains the cpu pointer,
262 * %r14d contains the old ipl that we came in on, and
263 * %eax contains the new ipl that we got from the setlvl routine
264 */
265
266	ENTRY_NP(intr_thread)
267
268	movq	%rbx, %rdi	/* &cpu */
269	movq	%rbp, %rsi	/* &regs = stack pointer for _sys_rtt */
270	movl	REGOFF_TRAPNO(%rbp), %r12d	/* stash the vec */
271	movl	%eax, %edx	/* new pil from setlvlx() */
272	call	intr_thread_prolog
273	movq	%rsp, %r15
274	movq	%rax, %rsp	/* t_stk from interrupt thread */
275	movq	%rsp, %rbp
276
277	sti
278
279	testl	$FTRACE_ENABLED, CPU_FTRACE_STATE(%rbx)
280	jz	1f
281	/*
282	 * ftracing support. do we need this on x86?
283	 */
284	leaq	_ftrace_intr_thread_fmt(%rip), %rdi
285	movq	%rbp, %rsi			/* &regs */
286	movl	%r12d, %edx			/* vec */
287	movq	CPU_THREAD(%rbx), %r11		/* (the interrupt thread) */
288	movzbl	T_PIL(%r11), %ecx		/* newipl */
289	call	ftrace_3_notick
2901:
291	movl	%r12d, %edi			/* vec */
292	call	av_dispatch_autovect
293
294	cli
295
296	movq	%rbx, %rdi			/* &cpu */
297	movl	%r12d, %esi			/* vec */
298	movl	%r14d, %edx			/* oldpil */
299	call	intr_thread_epilog
300	/*
301	 * If we return from here (we might not if the interrupted thread
302	 * has exited or blocked, in which case we'll have quietly swtch()ed
303	 * away) then we need to switch back to our old %rsp
304	 */
305	movq	%r15, %rsp
306	movq	%rsp, %rbp
307	/*
308	 * Check for, and execute, softints before we iret.
309	 *
310	 * (dosoftint expects oldpil in %r14d, the cpu pointer in %rbx and
311	 * the mcpu_softinfo.st_pending field in %edx.
312	 */
313	movl	CPU_SOFTINFO(%rbx), %edx
314	orl	%edx, %edx
315	jz	_sys_rtt
316	/*FALLTHROUGH*/
317
318/*
319 * Process soft interrupts.
320 * Interrupts are masked, and we have a minimal frame on the stack.
321 * %edx should contain the mcpu_softinfo.st_pending field
322 */
323
324	ALTENTRY(dosoftint)
325
326	movq	%rbx, %rdi	/* &cpu */
327	movq	%rbp, %rsi	/* &regs = stack pointer for _sys_rtt */
328				/* cpu->cpu_m.mcpu_softinfo.st_pending */
329	movl	%r14d, %ecx	/* oldipl */
330	call	dosoftint_prolog
331	/*
332	 * dosoftint_prolog() usually returns a stack pointer for the
333	 * interrupt thread that we must switch to.  However, if the
334	 * returned stack pointer is NULL, then the software interrupt was
335	 * too low in priority to run now; we'll catch it another time.
336	 */
337	orq	%rax, %rax
338	jz	_sys_rtt
339	movq	%rsp, %r15
340	movq	%rax, %rsp	/* t_stk from interrupt thread */
341	movq	%rsp, %rbp
342
343	sti
344
345	/*
346	 * Enabling interrupts (above) could raise the current ipl
347	 * and base spl.  But, we continue processing the current soft
348	 * interrupt and we will check the base spl next time around
349	 * so that blocked interrupt threads get a chance to run.
350	 */
351	movq	CPU_THREAD(%rbx), %r11	/* now an interrupt thread */
352	movzbl	T_PIL(%r11), %edi
353	call	av_dispatch_softvect
354
355	cli
356
357	movq	%rbx, %rdi		/* &cpu */
358	movl	%r14d, %esi		/* oldpil */
359	call	dosoftint_epilog
360	movq	%r15, %rsp		/* back on old stack pointer */
361	movq	%rsp, %rbp
362	movl	CPU_SOFTINFO(%rbx), %edx
363	orl	%edx, %edx
364	jz	_sys_rtt
365	jmp	dosoftint
366
367	SET_SIZE(dosoftint)
368	SET_SIZE(intr_thread)
369
370#elif defined(__i386)
371
372/*
373 * One day, this should just invoke the C routines that know how to
374 * do all the interrupt bookkeeping.  In the meantime, try
375 * and make the assembler a little more comprehensible.
376 */
377
378#define	INC64(basereg, offset)			\
379	addl	$1, offset(basereg);		\
380	adcl	$0, offset + 4(basereg)
381
382#define	TSC_CLR(basereg, offset)		\
383	movl	$0, offset(basereg);		\
384	movl	$0, offset + 4(basereg)
385
386/*
387 * The following macros assume the time value is in %edx:%eax
388 * e.g. from a rdtsc instruction.
389 */
390#define	TSC_STORE(reg, offset)		\
391	movl	%eax, offset(reg);	\
392	movl	%edx, offset + 4(reg)
393
394#define	TSC_LOAD(reg, offset)	\
395	movl	offset(reg), %eax;	\
396	movl	offset + 4(reg), %edx
397
398#define	TSC_ADD_TO(reg, offset)		\
399	addl	%eax, offset(reg);	\
400	adcl	%edx, offset + 4(reg)
401
402#define	TSC_SUB_FROM(reg, offset)	\
403	subl	offset(reg), %eax;	\
404	sbbl	offset + 4(reg), %edx	/* interval in edx:eax */
405
406/*
407 * basereg   - pointer to cpu struct
408 * pilreg    - pil or converted pil (pil - (LOCK_LEVEL + 1))
409 *
410 * Returns (base + pil * 8) in pilreg
411 */
412#define	PILBASE(basereg, pilreg)	\
413	lea	(basereg, pilreg, 8), pilreg
414
415/*
416 * Returns (base + (pil - (LOCK_LEVEL + 1)) * 8) in pilreg
417 */
418#define	HIGHPILBASE(basereg, pilreg)		\
419	subl	$LOCK_LEVEL + 1, pilreg;	\
420	PILBASE(basereg, pilreg)
421
422/*
423 * Returns (base + pil * 16) in pilreg
424 */
425#define	PILBASE_INTRSTAT(basereg, pilreg)	\
426	shl	$4, pilreg;			\
427	addl	basereg, pilreg;
428
429/*
430 * Returns (cpu + cpu_mstate * 8) in tgt
431 */
432#define	INTRACCTBASE(cpureg, tgtreg)		\
433	movzwl	CPU_MSTATE(cpureg), tgtreg;	\
434	lea	(cpureg, tgtreg, 8), tgtreg
435
436/*
437 * cpu_stats.sys.intr[PIL]++
438 */
439#define	INC_CPU_STATS_INTR(pilreg, tmpreg, tmpreg_32, basereg)	\
440	movl	pilreg, tmpreg_32;				\
441	PILBASE(basereg, tmpreg);				\
442	INC64(tmpreg, _CONST(CPU_STATS_SYS_INTR - 8))
443
444/*
445 * Unlink thread from CPU's list
446 */
447#define	UNLINK_INTR_THREAD(cpureg, ithread, tmpreg)	\
448	mov	CPU_INTR_THREAD(cpureg), ithread;	\
449	mov	T_LINK(ithread), tmpreg;		\
450	mov	tmpreg, CPU_INTR_THREAD(cpureg)
451
452/*
453 * Link a thread into CPU's list
454 */
455#define	LINK_INTR_THREAD(cpureg, ithread, tmpreg)	\
456	mov	CPU_INTR_THREAD(cpureg), tmpreg;	\
457	mov	tmpreg, T_LINK(ithread);		\
458	mov	ithread, CPU_INTR_THREAD(cpureg)
459
460#if defined(DEBUG)
461
462/*
463 * Do not call panic, if panic is already in progress.
464 */
465#define	__PANIC(msg, label)		\
466	cmpl	$0, panic_quiesce;		\
467	jne	label;				\
468	pushl	$msg;				\
469	call	panic
470
471#define	__CMP64_JNE(basereg, offset, label)	\
472	cmpl	$0, offset(basereg);		\
473	jne	label;				\
474	cmpl	$0, offset + 4(basereg);	\
475	jne	label
476
477/*
478 * ASSERT(!(CPU->cpu_intr_actv & (1 << PIL)))
479 */
480#define	ASSERT_NOT_CPU_INTR_ACTV(pilreg, basereg, msg)	\
481	btl	pilreg, CPU_INTR_ACTV(basereg);		\
482	jnc	4f;					\
483	__PANIC(msg, 4f);				\
4844:
485
486/*
487 * ASSERT(CPU->cpu_intr_actv & (1 << PIL))
488 */
489#define	ASSERT_CPU_INTR_ACTV(pilreg, basereg, msg)	\
490	btl	pilreg, CPU_INTR_ACTV(basereg);		\
491	jc	5f;					\
492	__PANIC(msg, 5f);				\
4935:
494
495/*
496 * ASSERT(CPU->cpu_pil_high_start != 0)
497 */
498#define	ASSERT_CPU_PIL_HIGH_START_NZ(basereg)			\
499	__CMP64_JNE(basereg, CPU_PIL_HIGH_START, 6f);		\
500	__PANIC(_interrupt_timestamp_zero, 6f);		\
5016:
502
503/*
504 * ASSERT(t->t_intr_start != 0)
505 */
506#define	ASSERT_T_INTR_START_NZ(basereg)				\
507	__CMP64_JNE(basereg, T_INTR_START, 7f);			\
508	__PANIC(_intr_thread_t_intr_start_zero, 7f);	\
5097:
510
511_interrupt_actv_bit_set:
512	.string	"_interrupt(): cpu_intr_actv bit already set for PIL"
513_interrupt_actv_bit_not_set:
514	.string	"_interrupt(): cpu_intr_actv bit not set for PIL"
515_interrupt_timestamp_zero:
516	.string "_interrupt(): timestamp zero upon handler return"
517_intr_thread_actv_bit_not_set:
518	.string	"intr_thread():	cpu_intr_actv bit not set for PIL"
519_intr_thread_t_intr_start_zero:
520	.string	"intr_thread():	t_intr_start zero upon handler return"
521_dosoftint_actv_bit_set:
522	.string	"dosoftint(): cpu_intr_actv bit already set for PIL"
523_dosoftint_actv_bit_not_set:
524	.string	"dosoftint(): cpu_intr_actv bit not set for PIL"
525
526	DGDEF(intr_thread_cnt)
527
528#else
529#define	ASSERT_NOT_CPU_INTR_ACTV(pilreg, basereg, msg)
530#define	ASSERT_CPU_INTR_ACTV(pilreg, basereg, msg)
531#define	ASSERT_CPU_PIL_HIGH_START_NZ(basereg)
532#define	ASSERT_T_INTR_START_NZ(basereg)
533#endif
534
535	ENTRY_NP2(cmnint, _interrupt)
536
537	INTR_PUSH
538
539	/*
540	 * At the end of TRACE_PTR %esi points to the current TRAPTRACE entry
541	 */
542	TRACE_PTR(%esi, %eax, %eax, %edx, $TT_INTERRUPT)
543						/* Uses labels 8 and 9 */
544	TRACE_REGS(%esi, %esp, %eax, %ebx)	/* Uses label 9 */
545	TRACE_STAMP(%esi)		/* Clobbers %eax, %edx, uses 9 */
546
547	movl	%esp, %ebp
548	DISABLE_INTR_FLAGS
549	LOADCPU(%ebx)		/* get pointer to CPU struct. Avoid gs refs */
550	leal    REGOFF_TRAPNO(%ebp), %ecx	/* get address of vector */
551	movl	CPU_PRI(%ebx), %edi		/* get ipl */
552	movl	CPU_SOFTINFO(%ebx), %edx
553
554	/
555	/ Check to see if the trap number is T_SOFTINT; if it is, we'll
556	/ jump straight to dosoftint now.
557	/
558	cmpl	$T_SOFTINT, (%ecx)
559	je	dosoftint
560
561	/ raise interrupt priority level
562	/ oldipl is in %edi, vectorp is in %ecx
563	/ newipl is returned in %eax
564	pushl	%ecx
565	pushl	%edi
566	call    *setlvl
567	popl	%edi			/* save oldpil in %edi */
568	popl	%ecx
569
570#ifdef TRAPTRACE
571	movb	%al, TTR_IPL(%esi)
572#endif
573
574	/ check for spurious interrupt
575	cmp	$-1, %eax
576	je	_sys_rtt
577
578#ifdef TRAPTRACE
579	movl	CPU_PRI(%ebx), %edx
580	movb	%dl, TTR_PRI(%esi)
581	movl	CPU_BASE_SPL(%ebx), %edx
582	movb	%dl, TTR_SPL(%esi)
583#endif
584
585	movl	%eax, CPU_PRI(%ebx) /* update ipl */
586	movl	REGOFF_TRAPNO(%ebp), %ecx /* reload the interrupt vector */
587
588#ifdef TRAPTRACE
589	movb	%cl, TTR_VECTOR(%esi)
590#endif
591
592	/ At this point we can take one of two paths.  If the new priority
593	/ level is less than or equal to LOCK LEVEL then we jump to code that
594	/ will run this interrupt as a separate thread.  Otherwise the
595	/ interrupt is NOT run as a separate thread.
596
597	/ %edi - old priority level
598	/ %ebp - pointer to REGS
599	/ %ecx - translated vector
600	/ %eax - ipl of isr
601	/ %ebx - cpu pointer
602
603	cmpl 	$LOCK_LEVEL, %eax	/* compare to highest thread level */
604	jbe	intr_thread		/* process as a separate thread */
605
606	cmpl	$CBE_HIGH_PIL, %eax	/* Is this a CY_HIGH_LEVEL interrupt? */
607	jne	2f
608
609	movl	REGOFF_PC(%ebp), %esi
610	movl	%edi, CPU_PROFILE_PIL(%ebx)	/* record interrupted PIL */
611	testw	$CPL_MASK, REGOFF_CS(%ebp)	/* trap from supervisor mode? */
612	jz	1f
613	movl	%esi, CPU_PROFILE_UPC(%ebx)	/* record user PC */
614	movl	$0, CPU_PROFILE_PC(%ebx)	/* zero kernel PC */
615	jmp	2f
616
6171:
618	movl	%esi, CPU_PROFILE_PC(%ebx)	/* record kernel PC */
619	movl	$0, CPU_PROFILE_UPC(%ebx)	/* zero user PC */
620
6212:
622	pushl	%ecx				/* vec */
623	pushl	%eax				/* newpil */
624
625	/
626	/ See if we are interrupting another high-level interrupt.
627	/
628	movl	CPU_INTR_ACTV(%ebx), %eax
629	andl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, %eax
630	jz	0f
631	/
632	/ We have interrupted another high-level interrupt.
633	/ Load starting timestamp, compute interval, update cumulative counter.
634	/
635	bsrl	%eax, %ecx		/* find PIL of interrupted handler */
636	movl	%ecx, %esi		/* save PIL for later */
637	HIGHPILBASE(%ebx, %ecx)
638_tsc_patch1:
639	nop; nop			/* patched to rdtsc if available */
640	TSC_SUB_FROM(%ecx, CPU_PIL_HIGH_START)
641
642	PILBASE_INTRSTAT(%ebx, %esi)
643	TSC_ADD_TO(%esi, CPU_INTRSTAT)
644	INTRACCTBASE(%ebx, %ecx)
645	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
646	/
647	/ Another high-level interrupt is active below this one, so
648	/ there is no need to check for an interrupt thread. That will be
649	/ done by the lowest priority high-level interrupt active.
650	/
651	jmp	1f
6520:
653	/
654	/ See if we are interrupting a low-level interrupt thread.
655	/
656	movl	CPU_THREAD(%ebx), %esi
657	testw	$T_INTR_THREAD, T_FLAGS(%esi)
658	jz	1f
659	/
660	/ We have interrupted an interrupt thread. Account for its time slice
661	/ only if its time stamp is non-zero.
662	/
663	cmpl	$0, T_INTR_START+4(%esi)
664	jne	0f
665	cmpl	$0, T_INTR_START(%esi)
666	je	1f
6670:
668	movzbl	T_PIL(%esi), %ecx /* %ecx has PIL of interrupted handler */
669	PILBASE_INTRSTAT(%ebx, %ecx)
670_tsc_patch2:
671	nop; nop			/* patched to rdtsc if available */
672	TSC_SUB_FROM(%esi, T_INTR_START)
673	TSC_CLR(%esi, T_INTR_START)
674	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
675	INTRACCTBASE(%ebx, %ecx)
676	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
6771:
678	/ Store starting timestamp in CPU structure for this PIL.
679	popl	%ecx			/* restore new PIL */
680	pushl	%ecx
681	HIGHPILBASE(%ebx, %ecx)
682_tsc_patch3:
683	nop; nop			/* patched to rdtsc if available */
684	TSC_STORE(%ecx, CPU_PIL_HIGH_START)
685
686	popl	%eax			/* restore new pil */
687	popl	%ecx			/* vec */
688	/
689	/ Set bit for this PIL in CPU's interrupt active bitmask.
690	/
691
692	ASSERT_NOT_CPU_INTR_ACTV(%eax, %ebx, _interrupt_actv_bit_set)
693
694	/ Save old CPU_INTR_ACTV
695	movl	CPU_INTR_ACTV(%ebx), %esi
696
697	cmpl	$15, %eax
698	jne	0f
699	/ PIL-15 interrupt. Increment nest-count in upper 16 bits of intr_actv
700	incw	CPU_INTR_ACTV_REF(%ebx)	/* increment ref count */
7010:
702	btsl	%eax, CPU_INTR_ACTV(%ebx)
703	/
704	/ Handle high-level nested interrupt on separate interrupt stack
705	/
706	testl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, %esi
707	jnz	onstack			/* already on interrupt stack */
708	movl	%esp, %eax
709	movl	CPU_INTR_STACK(%ebx), %esp	/* get on interrupt stack */
710	pushl	%eax			/* save the thread stack pointer */
711onstack:
712	movl	$autovect, %esi		/* get autovect structure before */
713					/* sti to save on AGI later */
714	sti				/* enable interrupts */
715	pushl	%ecx			/* save interrupt vector */
716	/
717	/ Get handler address
718	/
719pre_loop1:
720	movl	AVH_LINK(%esi, %ecx, 8), %esi
721	xorl	%ebx, %ebx	/* bh is no. of intpts in chain */
722				/* bl is DDI_INTR_CLAIMED status of chain */
723	testl	%esi, %esi		/* if pointer is null */
724	jz	.intr_ret		/* then skip */
725loop1:
726	incb	%bh
727	movl	AV_VECTOR(%esi), %edx	/* get the interrupt routine */
728	testl	%edx, %edx		/* if func is null */
729	jz	.intr_ret		/* then skip */
730	pushl	$0
731	pushl	AV_INTARG2(%esi)
732	pushl	AV_INTARG1(%esi)
733	pushl	AV_VECTOR(%esi)
734	pushl	AV_DIP(%esi)
735	call	__dtrace_probe_interrupt__start
736	pushl	AV_INTARG2(%esi)	/* get 2nd arg to interrupt routine */
737	pushl	AV_INTARG1(%esi)	/* get first arg to interrupt routine */
738	call	*%edx			/* call interrupt routine with arg */
739	addl	$8, %esp
740	movl	%eax, 16(%esp)
741	call	__dtrace_probe_interrupt__complete
742	addl	$20, %esp
743	orb	%al, %bl		/* see if anyone claims intpt. */
744	movl	AV_LINK(%esi), %esi	/* get next routine on list */
745	testl	%esi, %esi		/* if pointer is non-null */
746	jnz	loop1			/* then continue */
747
748.intr_ret:
749	cmpb	$1, %bh		/* if only 1 intpt in chain, it is OK */
750	je	.intr_ret1
751	orb	%bl, %bl	/* If no one claims intpt, then it is OK */
752	jz	.intr_ret1
753	movl	(%esp), %ecx		/* else restore intr vector */
754	movl	$autovect, %esi		/* get autovect structure */
755	jmp	pre_loop1		/* and try again. */
756
757.intr_ret1:
758	LOADCPU(%ebx)			/* get pointer to cpu struct */
759
760	cli
761	movl	CPU_PRI(%ebx), %esi
762
763	/ cpu_stats.sys.intr[PIL]++
764	INC_CPU_STATS_INTR(%esi, %eax, %eax, %ebx)
765
766	/
767	/ Clear bit for this PIL in CPU's interrupt active bitmask.
768	/
769
770	ASSERT_CPU_INTR_ACTV(%esi, %ebx, _interrupt_actv_bit_not_set)
771
772	cmpl	$15, %esi
773	jne	0f
774	/ Only clear bit if reference count is now zero.
775	decw	CPU_INTR_ACTV_REF(%ebx)
776	jnz	1f
7770:
778	btrl	%esi, CPU_INTR_ACTV(%ebx)
7791:
780	/
781	/ Take timestamp, compute interval, update cumulative counter.
782	/ esi = PIL
783_tsc_patch4:
784	nop; nop			/* patched to rdtsc if available */
785	movl	%esi, %ecx		/* save for later */
786	HIGHPILBASE(%ebx, %esi)
787
788	ASSERT_CPU_PIL_HIGH_START_NZ(%esi)
789
790	TSC_SUB_FROM(%esi, CPU_PIL_HIGH_START)
791
792	PILBASE_INTRSTAT(%ebx, %ecx)
793	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
794	INTRACCTBASE(%ebx, %esi)
795	TSC_ADD_TO(%esi, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
796	/
797	/ Check for lower-PIL nested high-level interrupt beneath current one
798	/ If so, place a starting timestamp in its pil_high_start entry.
799	/
800	movl	CPU_INTR_ACTV(%ebx), %eax
801	movl	%eax, %esi
802	andl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, %eax
803	jz	0f
804	bsrl	%eax, %ecx		/* find PIL of nested interrupt */
805	HIGHPILBASE(%ebx, %ecx)
806_tsc_patch5:
807	nop; nop			/* patched to rdtsc if available */
808	TSC_STORE(%ecx, CPU_PIL_HIGH_START)
809	/
810	/ Another high-level interrupt is active below this one, so
811	/ there is no need to check for an interrupt thread. That will be
812	/ done by the lowest priority high-level interrupt active.
813	/
814	jmp	1f
8150:
816	/ Check to see if there is a low-level interrupt active. If so,
817	/ place a starting timestamp in the thread structure.
818	movl	CPU_THREAD(%ebx), %esi
819	testw	$T_INTR_THREAD, T_FLAGS(%esi)
820	jz	1f
821_tsc_patch6:
822	nop; nop			/* patched to rdtsc if available */
823	TSC_STORE(%esi, T_INTR_START)
8241:
825	movl	%edi, CPU_PRI(%ebx)
826				/* interrupt vector already on stack */
827	pushl	%edi			/* old ipl */
828	call	*setlvlx
829	addl	$8, %esp		/* eax contains the current ipl */
830
831	movl	CPU_INTR_ACTV(%ebx), %esi /* reset stack pointer if no more */
832	shrl	$LOCK_LEVEL + 1, %esi	/* HI PRI intrs. */
833	jnz	.intr_ret2
834	popl	%esp			/* restore the thread stack pointer */
835.intr_ret2:
836	movl	CPU_SOFTINFO(%ebx), %edx /* any pending software interrupts */
837	orl	%edx, %edx
838	jz	_sys_rtt
839	jmp	dosoftint	/* check for softints before we return. */
840	SET_SIZE(cmnint)
841	SET_SIZE(_interrupt)
842
843#endif	/* __i386 */
844
845/*
846 * Declare a uintptr_t which has the size of _interrupt to enable stack
847 * traceback code to know when a regs structure is on the stack.
848 */
849	.globl	_interrupt_size
850	.align	CLONGSIZE
851_interrupt_size:
852	.NWORD	. - _interrupt
853	.type	_interrupt_size, @object
854
855#endif	/* __lint */
856
857#if defined(__i386)
858
859/*
860 * Handle an interrupt in a new thread.
861 *	Entry:  traps disabled.
862 *		%edi - old priority level
863 *		%ebp - pointer to REGS
864 *		%ecx - translated vector
865 *		%eax - ipl of isr.
866 *		%ebx - pointer to CPU struct
867 *	Uses:
868 */
869
870#if !defined(__lint)
871
872	ENTRY_NP(intr_thread)
873	/
874	/ Set bit for this PIL in CPU's interrupt active bitmask.
875	/
876
877	ASSERT_NOT_CPU_INTR_ACTV(%eax, %ebx, _interrupt_actv_bit_set)
878
879	btsl	%eax, CPU_INTR_ACTV(%ebx)
880
881	/ Get set to run interrupt thread.
882	/ There should always be an interrupt thread since we allocate one
883	/ for each level on the CPU.
884	/
885	/ Note that the code in kcpc_overflow_intr -relies- on the ordering
886	/ of events here - in particular that t->t_lwp of the interrupt
887	/ thread is set to the pinned thread *before* curthread is changed
888	/
889	movl	CPU_THREAD(%ebx), %edx		/* cur thread in edx */
890
891	/
892	/ Are we interrupting an interrupt thread? If so, account for it.
893	/
894	testw	$T_INTR_THREAD, T_FLAGS(%edx)
895	jz	0f
896	/
897	/ We have interrupted an interrupt thread. Account for its time slice
898	/ only if its time stamp is non-zero. t_intr_start may be zero due to
899	/ cpu_intr_swtch_enter.
900	/
901	cmpl	$0, T_INTR_START+4(%edx)
902	jne	1f
903	cmpl	$0, T_INTR_START(%edx)
904	je	0f
9051:
906	pushl	%ecx
907	pushl	%eax
908	movl	%edx, %esi
909_tsc_patch7:
910	nop; nop			/* patched to rdtsc if available */
911	TSC_SUB_FROM(%esi, T_INTR_START)
912	TSC_CLR(%esi, T_INTR_START)
913	movzbl	T_PIL(%esi), %ecx
914	PILBASE_INTRSTAT(%ebx, %ecx)
915	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
916	INTRACCTBASE(%ebx, %ecx)
917	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
918	movl	%esi, %edx
919	popl	%eax
920	popl	%ecx
9210:
922	movl	%esp, T_SP(%edx)	/* mark stack in curthread for resume */
923	pushl	%edi			/* get a temporary register */
924	UNLINK_INTR_THREAD(%ebx, %esi, %edi)
925
926	movl	T_LWP(%edx), %edi
927	movl	%edx, T_INTR(%esi)		/* push old thread */
928	movl	%edi, T_LWP(%esi)
929	/
930	/ Threads on the interrupt thread free list could have state already
931	/ set to TS_ONPROC, but it helps in debugging if they're TS_FREE
932	/
933	movl	$ONPROC_THREAD, T_STATE(%esi)
934	/
935	/ chain the interrupted thread onto list from the interrupt thread.
936	/ Set the new interrupt thread as the current one.
937	/
938	popl	%edi			/* Don't need a temp reg anymore */
939	movl	T_STACK(%esi), %esp		/* interrupt stack pointer */
940	movl	%esp, %ebp
941	movl	%esi, CPU_THREAD(%ebx)		/* set new thread */
942	pushl	%eax				/* save the ipl */
943	/
944	/ Initialize thread priority level from intr_pri
945	/
946	movb	%al, T_PIL(%esi)	/* store pil */
947	movzwl	intr_pri, %ebx		/* XXX Can cause probs if new class */
948					/* is loaded on some other cpu. */
949	addl	%ebx, %eax		/* convert level to dispatch priority */
950	movw	%ax, T_PRI(%esi)
951
952	/
953	/ Take timestamp and store it in the thread structure.
954	/
955	movl	%eax, %ebx		/* save priority over rdtsc */
956_tsc_patch8:
957	nop; nop			/* patched to rdtsc if available */
958	TSC_STORE(%esi, T_INTR_START)
959	movl	%ebx, %eax		/* restore priority */
960
961	/ The following 3 instructions need not be in cli.
962	/ Putting them here only to avoid the AGI penalty on Pentiums.
963
964	pushl	%ecx			/* save interrupt vector. */
965	pushl	%esi			/* save interrupt thread */
966	movl	$autovect, %esi		/* get autovect structure */
967	sti				/* enable interrupts */
968
969	/ Fast event tracing.
970	LOADCPU(%ebx)
971	movl	CPU_FTRACE_STATE(%ebx), %ebx
972	testl	$FTRACE_ENABLED, %ebx
973	jz	1f
974
975	movl	8(%esp), %ebx
976	pushl	%ebx			/* ipl */
977	pushl	%ecx			/* int vector */
978	movl	T_SP(%edx), %ebx
979	pushl	%ebx			/* &regs */
980	pushl	$_ftrace_intr_thread_fmt
981	call	ftrace_3_notick
982	addl	$8, %esp
983	popl	%ecx			/* restore int vector */
984	addl	$4, %esp
9851:
986pre_loop2:
987	movl	AVH_LINK(%esi, %ecx, 8), %esi
988	xorl	%ebx, %ebx	/* bh is cno. of intpts in chain */
989				/* bl is DDI_INTR_CLAIMED status of * chain */
990	testl	%esi, %esi	/* if pointer is null */
991	jz	loop_done2	/* we're done */
992loop2:
993	movl	AV_VECTOR(%esi), %edx	/* get the interrupt routine */
994	testl	%edx, %edx		/* if pointer is null */
995	jz	loop_done2		/* we're done */
996	incb	%bh
997	pushl	$0
998	pushl	AV_INTARG2(%esi)
999	pushl	AV_INTARG1(%esi)
1000	pushl	AV_VECTOR(%esi)
1001	pushl	AV_DIP(%esi)
1002	call	__dtrace_probe_interrupt__start
1003	pushl	AV_INTARG2(%esi)	/* get 2nd arg to interrupt routine */
1004	pushl	AV_INTARG1(%esi)	/* get first arg to interrupt routine */
1005	call	*%edx			/* call interrupt routine with arg */
1006	addl	$8, %esp
1007	movl	%eax, 16(%esp)
1008	call	__dtrace_probe_interrupt__complete
1009	addl	$20, %esp
1010	orb	%al, %bl		/* see if anyone claims intpt. */
1011	movl	AV_TICKSP(%esi), %ecx
1012	testl	%ecx, %ecx
1013	jz	no_time
1014	call	intr_get_time
1015	movl	AV_TICKSP(%esi), %ecx
1016	TSC_ADD_TO(%ecx, 0)
1017no_time:
1018	movl	AV_LINK(%esi), %esi	/* get next routine on list */
1019	testl	%esi, %esi		/* if pointer is non-null */
1020	jnz	loop2			/* continue */
1021loop_done2:
1022	cmpb	$1, %bh		/* if only 1 intpt in chain, it is OK */
1023	je	.loop_done2_1
1024	orb	%bl, %bl	/* If no one claims intpt, then it is OK */
1025	jz	.loop_done2_1
1026	movl	$autovect, %esi		/* else get autovect structure */
1027	movl	4(%esp), %ecx		/* restore intr vector */
1028	jmp	pre_loop2		/* and try again. */
1029.loop_done2_1:
1030	popl	%esi			/* restore intr thread pointer */
1031
1032	LOADCPU(%ebx)
1033
1034	cli		/* protect interrupt thread pool and intr_actv */
1035	movzbl	T_PIL(%esi), %eax
1036
1037	/ Save value in regs
1038	pushl	%eax			/* current pil */
1039	pushl	%edx			/* (huh?) */
1040	pushl	%edi			/* old pil */
1041
1042	/ cpu_stats.sys.intr[PIL]++
1043	INC_CPU_STATS_INTR(%eax, %edx, %edx, %ebx)
1044
1045	/
1046	/ Take timestamp, compute interval, and update cumulative counter.
1047	/ esi = thread pointer, ebx = cpu pointer, eax = PIL
1048	/
1049	movl	%eax, %edi
1050
1051	ASSERT_T_INTR_START_NZ(%esi)
1052
1053_tsc_patch9:
1054	nop; nop			/* patched to rdtsc if available */
1055	TSC_SUB_FROM(%esi, T_INTR_START)
1056	PILBASE_INTRSTAT(%ebx, %edi)
1057	TSC_ADD_TO(%edi, CPU_INTRSTAT)
1058	INTRACCTBASE(%ebx, %edi)
1059	TSC_ADD_TO(%edi, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
1060	popl	%edi
1061	popl	%edx
1062	popl	%eax
1063
1064	/
1065	/ Clear bit for this PIL in CPU's interrupt active bitmask.
1066	/
1067
1068	ASSERT_CPU_INTR_ACTV(%eax, %ebx, _intr_thread_actv_bit_not_set)
1069
1070	btrl	%eax, CPU_INTR_ACTV(%ebx)
1071
1072	/ if there is still an interrupted thread underneath this one
1073	/ then the interrupt was never blocked and the return is fairly
1074	/ simple.  Otherwise jump to intr_thread_exit
1075	cmpl	$0, T_INTR(%esi)
1076	je	intr_thread_exit
1077
1078	/
1079	/ link the thread back onto the interrupt thread pool
1080	LINK_INTR_THREAD(%ebx, %esi, %edx)
1081
1082	movl	CPU_BASE_SPL(%ebx), %eax	/* used below. */
1083	/ set the thread state to free so kmdb doesn't see it
1084	movl	$FREE_THREAD, T_STATE(%esi)
1085
1086	cmpl	%eax, %edi		/* if (oldipl >= basespl) */
1087	jae	intr_restore_ipl	/* then use oldipl */
1088	movl	%eax, %edi		/* else use basespl */
1089intr_restore_ipl:
1090	movl	%edi, CPU_PRI(%ebx)
1091					/* intr vector already on stack */
1092	pushl	%edi			/* old ipl */
1093	call	*setlvlx		/* eax contains the current ipl */
1094	/
1095	/ Switch back to the interrupted thread
1096	movl	T_INTR(%esi), %ecx
1097
1098	/ Place starting timestamp in interrupted thread's thread structure.
1099_tsc_patch10:
1100	nop; nop			/* patched to rdtsc if available */
1101	TSC_STORE(%ecx, T_INTR_START)
1102
1103	movl	T_SP(%ecx), %esp	/* restore stack pointer */
1104	movl	%esp, %ebp
1105	movl	%ecx, CPU_THREAD(%ebx)
1106
1107	movl	CPU_SOFTINFO(%ebx), %edx /* any pending software interrupts */
1108	orl	%edx, %edx
1109	jz	_sys_rtt
1110	jmp	dosoftint	/* check for softints before we return. */
1111
1112	/
1113	/ An interrupt returned on what was once (and still might be)
1114	/ an interrupt thread stack, but the interrupted process is no longer
1115	/ there.  This means the interrupt must have blocked.
1116	/
1117	/ There is no longer a thread under this one, so put this thread back
1118	/ on the CPU's free list and resume the idle thread which will dispatch
1119	/ the next thread to run.
1120	/
1121	/ All interrupts are disabled here
1122	/
1123
1124intr_thread_exit:
1125#ifdef DEBUG
1126	incl	intr_thread_cnt
1127#endif
1128	INC64(%ebx, CPU_STATS_SYS_INTRBLK)	/* cpu_stats.sys.intrblk++ */
1129	/
1130	/ Put thread back on the interrupt thread list.
1131	/ As a reminder, the regs at this point are
1132	/	esi	interrupt thread
1133	/	edi	old ipl
1134	/	ebx	ptr to CPU struct
1135
1136	/ Set CPU's base SPL level based on active interrupts bitmask
1137	call	set_base_spl
1138
1139	movl	CPU_BASE_SPL(%ebx), %edi
1140	movl	%edi, CPU_PRI(%ebx)
1141					/* interrupt vector already on stack */
1142	pushl	%edi
1143	call	*setlvlx
1144	addl	$8, %esp		/* XXX - don't need to pop since */
1145					/* we are ready to switch */
1146	call	splhigh			/* block all intrs below lock level */
1147	/
1148	/ Set the thread state to free so kmdb doesn't see it
1149	/
1150	movl	$FREE_THREAD, T_STATE(%esi)
1151	/
1152	/ Put thread on either the interrupt pool or the free pool and
1153	/ call swtch() to resume another thread.
1154	/
1155	LINK_INTR_THREAD(%ebx, %esi, %edx)
1156	call 	swtch
1157	/ swtch() shouldn't return
1158
1159	SET_SIZE(intr_thread)
1160
1161#endif	/* __lint */
1162#endif	/* __i386 */
1163
1164/*
1165 * Set Cpu's base SPL level, base on which interrupt levels are active
1166 *	Called at spl7 or above.
1167 */
1168
1169#if defined(__lint)
1170
1171void
1172set_base_spl(void)
1173{}
1174
1175#else	/* __lint */
1176
1177	ENTRY_NP(set_base_spl)
1178	movl	%gs:CPU_INTR_ACTV, %eax	/* load active interrupts mask */
1179	testl	%eax, %eax		/* is it zero? */
1180	jz	setbase
1181	testl	$0xff00, %eax
1182	jnz	ah_set
1183	shl	$24, %eax		/* shift 'em over so we can find */
1184					/* the 1st bit faster */
1185	bsrl	%eax, %eax
1186	subl	$24, %eax
1187setbase:
1188	movl	%eax, %gs:CPU_BASE_SPL	/* store base priority */
1189	ret
1190ah_set:
1191	shl	$16, %eax
1192	bsrl	%eax, %eax
1193	subl	$16, %eax
1194	jmp	setbase
1195	SET_SIZE(set_base_spl)
1196
1197#endif	/* __lint */
1198
1199#if defined(__i386)
1200
1201/*
1202 * int
1203 * intr_passivate(from, to)
1204 *      thread_id_t     from;           interrupt thread
1205 *      thread_id_t     to;             interrupted thread
1206 *
1207 *	intr_passivate(t, itp) makes the interrupted thread "t" runnable.
1208 *
1209 *	Since t->t_sp has already been saved, t->t_pc is all that needs
1210 *	set in this function.
1211 *
1212 *	Returns interrupt level of the thread.
1213 */
1214
1215#if defined(__lint)
1216
1217/* ARGSUSED */
1218int
1219intr_passivate(kthread_id_t from, kthread_id_t to)
1220{ return (0); }
1221
1222#else	/* __lint */
1223
1224	ENTRY(intr_passivate)
1225	movl	8(%esp), %eax		/* interrupted thread  */
1226	movl	$_sys_rtt, T_PC(%eax)	/* set T_PC for interrupted thread */
1227
1228	movl	4(%esp), %eax		/* interrupt thread */
1229	movl	T_STACK(%eax), %eax	/* get the pointer to the start of */
1230					/* of the interrupt thread stack */
1231	movl	-4(%eax), %eax		/* interrupt level was the first */
1232					/* thing pushed onto the stack */
1233	ret
1234	SET_SIZE(intr_passivate)
1235
1236#endif	/* __lint */
1237#endif	/* __i386 */
1238
1239#if defined(__lint)
1240
1241void
1242fakesoftint(void)
1243{}
1244
1245#else	/* __lint */
1246
1247	/
1248	/ If we're here, we're being called from splx() to fake a soft
1249	/ interrupt (note that interrupts are still disabled from splx()).
1250	/ We execute this code when a soft interrupt is posted at
1251	/ level higher than the CPU's current spl; when spl is lowered in
1252	/ splx(), it will see the softint and jump here.  We'll do exactly
1253	/ what a trap would do:  push our flags, %cs, %eip, error code
1254	/ and trap number (T_SOFTINT).  The cmnint() code will see T_SOFTINT
1255	/ and branch to the dosoftint() code.
1256	/
1257#if defined(__amd64)
1258
1259	/*
1260	 * In 64-bit mode, iretq -always- pops all five regs
1261	 * Imitate the 16-byte auto-align of the stack, and the
1262	 * zero-ed out %ss value.
1263	 */
1264	ENTRY_NP(fakesoftint)
1265	movq	%rsp, %r11
1266	andq	$-16, %rsp
1267	pushq	$KDS_SEL	/* %ss */
1268	pushq	%r11		/* %rsp */
1269	pushf			/* rflags */
1270	pushq	$KCS_SEL	/* %cs */
1271	leaq	fakesoftint_return(%rip), %r11
1272	pushq	%r11		/* %rip */
1273	pushq	$0		/* err */
1274	pushq	$T_SOFTINT	/* trap */
1275	jmp	cmnint
1276	SET_SIZE(fakesoftint)
1277
1278#elif defined(__i386)
1279
1280	ENTRY_NP(fakesoftint)
1281	pushf
1282	push	%cs
1283	push	$fakesoftint_return
1284	push	$0
1285	push	$T_SOFTINT
1286	jmp	cmnint
1287	SET_SIZE(fakesoftint)
1288
1289#endif	/* __i386 */
1290
1291	.align	CPTRSIZE
1292	.globl	_fakesoftint_size
1293	.type	_fakesoftint_size, @object
1294_fakesoftint_size:
1295	.NWORD	. - fakesoftint
1296	SET_SIZE(_fakesoftint_size)
1297
1298/*
1299 * dosoftint(old_pil in %edi, softinfo in %edx, CPU pointer in %ebx)
1300 * Process software interrupts
1301 * Interrupts are disabled here.
1302 */
1303#if defined(__i386)
1304
1305	ENTRY_NP(dosoftint)
1306
1307	bsrl	%edx, %edx		/* find highest pending interrupt */
1308	cmpl 	%edx, %edi		/* if curipl >= pri soft pending intr */
1309	jae	_sys_rtt		/* skip */
1310
1311	movl	%gs:CPU_BASE_SPL, %eax	/* check for blocked intr threads */
1312	cmpl	%edx, %eax		/* if basespl >= pri soft pending */
1313	jae	_sys_rtt		/* skip */
1314
1315	lock				/* MP protect */
1316	btrl	%edx, CPU_SOFTINFO(%ebx) /* clear the selected interrupt bit */
1317	jnc	dosoftint_again
1318
1319	movl	%edx, CPU_PRI(%ebx) /* set IPL to sofint level */
1320	pushl	%edx
1321	call	*setspl			/* mask levels upto the softint level */
1322	popl	%eax			/* priority we are at in %eax */
1323
1324	/ Get set to run interrupt thread.
1325	/ There should always be an interrupt thread since we allocate one
1326	/ for each level on the CPU.
1327	UNLINK_INTR_THREAD(%ebx, %esi, %edx)
1328
1329	/
1330	/ Note that the code in kcpc_overflow_intr -relies- on the ordering
1331	/ of events here - in particular that t->t_lwp of the interrupt
1332	/ thread is set to the pinned thread *before* curthread is changed
1333	/
1334	movl	CPU_THREAD(%ebx), %ecx
1335
1336	/ If we are interrupting an interrupt thread, account for it.
1337	testw	$T_INTR_THREAD, T_FLAGS(%ecx)
1338	jz	0f
1339	/
1340	/ We have interrupted an interrupt thread. Account for its time slice
1341	/ only if its time stamp is non-zero. t_intr_start may be zero due to
1342	/ cpu_intr_swtch_enter.
1343	/
1344	cmpl	$0, T_INTR_START+4(%ecx)
1345	jne	1f
1346	cmpl	$0, T_INTR_START(%ecx)
1347	je	0f
13481:
1349	pushl	%eax
1350	movl	%eax, %ebp
1351_tsc_patch11:
1352	nop; nop			/* patched to rdtsc if available */
1353	PILBASE_INTRSTAT(%ebx, %ebp)
1354	TSC_SUB_FROM(%ecx, T_INTR_START)
1355	TSC_ADD_TO(%ebp, CPU_INTRSTAT)
1356	INTRACCTBASE(%ebx, %ebp)
1357	TSC_ADD_TO(%ebp, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
1358	popl	%eax
13590:
1360	movl	T_LWP(%ecx), %ebp
1361	movl	%ebp, T_LWP(%esi)
1362	/
1363	/ Threads on the interrupt thread free list could have state already
1364	/ set to TS_ONPROC, but it helps in debugging if they're TS_FREE
1365	/ Could eliminate the next two instructions with a little work.
1366	/
1367	movl	$ONPROC_THREAD, T_STATE(%esi)
1368	/
1369	/ Push interrupted thread onto list from new thread.
1370	/ Set the new thread as the current one.
1371	/ Set interrupted thread's T_SP because if it is the idle thread,
1372	/ Resume() may use that stack between threads.
1373	/
1374	movl	%esp, T_SP(%ecx)		/* mark stack for resume */
1375	movl	%ecx, T_INTR(%esi)		/* push old thread */
1376	movl	%esi, CPU_THREAD(%ebx)		/* set new thread */
1377	movl	T_STACK(%esi), %esp		/* interrupt stack pointer */
1378	movl	%esp, %ebp
1379
1380	pushl	%eax			/* push ipl as first element in stack */
1381					/* see intr_passivate() */
1382	/
1383	/ Set bit for this PIL in CPU's interrupt active bitmask.
1384	/
1385
1386	ASSERT_NOT_CPU_INTR_ACTV(%eax, %ebx, _dosoftint_actv_bit_set)
1387
1388	btsl	%eax, CPU_INTR_ACTV(%ebx)
1389
1390	/
1391	/ Initialize thread priority level from intr_pri
1392	/
1393	movb	%al, T_PIL(%esi)	/* store pil */
1394	movzwl	intr_pri, %ecx
1395	addl	%eax, %ecx		/* convert level to dispatch priority */
1396	movw	%cx, T_PRI(%esi)
1397
1398	/
1399	/ Store starting timestamp in thread structure.
1400	/ esi = thread, ebx = cpu pointer, eax = PIL
1401	/
1402	movl	%eax, %ecx		/* save PIL from rdtsc clobber */
1403_tsc_patch12:
1404	nop; nop			/* patched to rdtsc if available */
1405	TSC_STORE(%esi, T_INTR_START)
1406
1407	sti				/* enable interrupts */
1408
1409	/
1410	/ Enabling interrupts (above) could raise the current
1411	/ IPL and base SPL. But, we continue processing the current soft
1412	/ interrupt and we will check the base SPL next time in the loop
1413	/ so that blocked interrupt thread would get a chance to run.
1414	/
1415
1416	/
1417	/ dispatch soft interrupts
1418	/
1419	pushl	%ecx
1420	call	av_dispatch_softvect
1421	addl	$4, %esp
1422
1423	cli				/* protect interrupt thread pool */
1424					/* and softinfo & sysinfo */
1425	movl	CPU_THREAD(%ebx), %esi	/* restore thread pointer */
1426	movzbl	T_PIL(%esi), %ecx
1427
1428	/ cpu_stats.sys.intr[PIL]++
1429	INC_CPU_STATS_INTR(%ecx, %edx, %edx, %ebx)
1430
1431	/
1432	/ Clear bit for this PIL in CPU's interrupt active bitmask.
1433	/
1434
1435	ASSERT_CPU_INTR_ACTV(%ecx, %ebx, _dosoftint_actv_bit_not_set)
1436
1437	btrl	%ecx, CPU_INTR_ACTV(%ebx)
1438
1439	/
1440	/ Take timestamp, compute interval, update cumulative counter.
1441	/ esi = thread, ebx = cpu, ecx = PIL
1442	/
1443	PILBASE_INTRSTAT(%ebx, %ecx)
1444_tsc_patch13:
1445	nop; nop		/* patched to rdtsc if available */
1446	TSC_SUB_FROM(%esi, T_INTR_START)
1447	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
1448	INTRACCTBASE(%ebx, %ecx)
1449	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
1450
1451	/ if there is still an interrupt thread underneath this one
1452	/ then the interrupt was never blocked and the return is fairly
1453	/ simple.  Otherwise jump to softintr_thread_exit.
1454	/ softintr_thread_exit expect esi to be curthread & ebx to be ipl.
1455	cmpl	$0, T_INTR(%esi)
1456	je	softintr_thread_exit
1457
1458	/
1459	/ link the thread back onto the interrupt thread pool
1460	LINK_INTR_THREAD(%ebx, %esi, %edx)
1461
1462	/ set the thread state to free so kmdb doesn't see it
1463	movl	$FREE_THREAD, T_STATE(%esi)
1464	/
1465	/ Switch back to the interrupted thread
1466	movl	T_INTR(%esi), %ecx
1467	movl	%ecx, CPU_THREAD(%ebx)
1468	movl	T_SP(%ecx), %esp	/* restore stack pointer */
1469	movl	%esp, %ebp
1470
1471	/ If we are returning to an interrupt thread, store a starting
1472	/ timestamp in the thread structure.
1473	testw	$T_INTR_THREAD, T_FLAGS(%ecx)
1474	jz	0f
1475_tsc_patch14:
1476	nop; nop			/* patched to rdtsc if available */
1477	TSC_STORE(%ecx, T_INTR_START)
14780:
1479	movl	CPU_BASE_SPL(%ebx), %eax
1480	cmpl	%eax, %edi		/* if (oldipl >= basespl) */
1481	jae	softintr_restore_ipl	/* then use oldipl */
1482	movl	%eax, %edi		/* else use basespl */
1483softintr_restore_ipl:
1484	movl	%edi, CPU_PRI(%ebx) /* set IPL to old level */
1485	pushl	%edi
1486	call	*setspl
1487	popl	%eax
1488dosoftint_again:
1489	movl	CPU_SOFTINFO(%ebx), %edx /* any pending software interrupts */
1490	orl	%edx, %edx
1491	jz	_sys_rtt
1492	jmp	dosoftint		/* process more software interrupts */
1493
1494softintr_thread_exit:
1495	/
1496	/ Put thread back on the interrupt thread list.
1497	/ As a reminder, the regs at this point are
1498	/	%esi	interrupt thread
1499
1500	/
1501	/ This was an interrupt thread, so set CPU's base SPL level
1502	/ set_base_spl only uses %eax.
1503	/
1504	call	set_base_spl		/* interrupt vector already on stack */
1505	/
1506	/ Set the thread state to free so kmdb doesn't see it
1507	/
1508	movl	$FREE_THREAD, T_STATE(%esi)
1509	/
1510	/ Put thread on either the interrupt pool or the free pool and
1511	/ call swtch() to resume another thread.
1512	/
1513	LOADCPU(%ebx)
1514	LINK_INTR_THREAD(%ebx, %esi, %edx)
1515	call	splhigh			/* block all intrs below lock lvl */
1516	call	swtch
1517	/ swtch() shouldn't return
1518	SET_SIZE(dosoftint)
1519
1520#endif	/* __i386 */
1521#endif	/* __lint */
1522
1523#if defined(lint)
1524
1525/*
1526 * intr_get_time() is a resource for interrupt handlers to determine how
1527 * much time has been spent handling the current interrupt. Such a function
1528 * is needed because higher level interrupts can arrive during the
1529 * processing of an interrupt, thus making direct comparisons of %tick by
1530 * the handler inaccurate. intr_get_time() only returns time spent in the
1531 * current interrupt handler.
1532 *
1533 * The caller must be calling from an interrupt handler running at a pil
1534 * below or at lock level. Timings are not provided for high-level
1535 * interrupts.
1536 *
1537 * The first time intr_get_time() is called while handling an interrupt,
1538 * it returns the time since the interrupt handler was invoked. Subsequent
1539 * calls will return the time since the prior call to intr_get_time(). Time
1540 * is returned as ticks. Use tsc_scalehrtime() to convert ticks to nsec.
1541 *
1542 * Theory Of Intrstat[][]:
1543 *
1544 * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two
1545 * uint64_ts per pil.
1546 *
1547 * intrstat[pil][0] is a cumulative count of the number of ticks spent
1548 * handling all interrupts at the specified pil on this CPU. It is
1549 * exported via kstats to the user.
1550 *
1551 * intrstat[pil][1] is always a count of ticks less than or equal to the
1552 * value in [0]. The difference between [1] and [0] is the value returned
1553 * by a call to intr_get_time(). At the start of interrupt processing,
1554 * [0] and [1] will be equal (or nearly so). As the interrupt consumes
1555 * time, [0] will increase, but [1] will remain the same. A call to
1556 * intr_get_time() will return the difference, then update [1] to be the
1557 * same as [0]. Future calls will return the time since the last call.
1558 * Finally, when the interrupt completes, [1] is updated to the same as [0].
1559 *
1560 * Implementation:
1561 *
1562 * intr_get_time() works much like a higher level interrupt arriving. It
1563 * "checkpoints" the timing information by incrementing intrstat[pil][0]
1564 * to include elapsed running time, and by setting t_intr_start to rdtsc.
1565 * It then sets the return value to intrstat[pil][0] - intrstat[pil][1],
1566 * and updates intrstat[pil][1] to be the same as the new value of
1567 * intrstat[pil][0].
1568 *
1569 * In the normal handling of interrupts, after an interrupt handler returns
1570 * and the code in intr_thread() updates intrstat[pil][0], it then sets
1571 * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1],
1572 * the timings are reset, i.e. intr_get_time() will return [0] - [1] which
1573 * is 0.
1574 *
1575 * Whenever interrupts arrive on a CPU which is handling a lower pil
1576 * interrupt, they update the lower pil's [0] to show time spent in the
1577 * handler that they've interrupted. This results in a growing discrepancy
1578 * between [0] and [1], which is returned the next time intr_get_time() is
1579 * called. Time spent in the higher-pil interrupt will not be returned in
1580 * the next intr_get_time() call from the original interrupt, because
1581 * the higher-pil interrupt's time is accumulated in intrstat[higherpil][].
1582 */
1583
1584/*ARGSUSED*/
1585uint64_t
1586intr_get_time(void)
1587{ return 0; }
1588#else	/* lint */
1589
1590
1591#if defined(__amd64)
1592	ENTRY_NP(intr_get_time)
1593	cli				/* make this easy -- block intrs */
1594	LOADCPU(%rdi)
1595	call	intr_thread_get_time
1596	sti
1597	ret
1598	SET_SIZE(intr_get_time)
1599
1600#elif defined(__i386)
1601
1602#ifdef DEBUG
1603
1604
1605_intr_get_time_high_pil:
1606	.string	"intr_get_time(): %pil > LOCK_LEVEL"
1607_intr_get_time_not_intr:
1608	.string	"intr_get_time(): not called from an interrupt thread"
1609_intr_get_time_no_start_time:
1610	.string	"intr_get_time(): t_intr_start == 0"
1611
1612/*
1613 * ASSERT(%pil <= LOCK_LEVEL)
1614 */
1615#define	ASSERT_PIL_BELOW_LOCK_LEVEL(cpureg)				\
1616	testl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, CPU_INTR_ACTV(cpureg);	\
1617	jz	0f;							\
1618	__PANIC(_intr_get_time_high_pil, 0f);				\
16190:
1620
1621/*
1622 * ASSERT((t_flags & T_INTR_THREAD) != 0 && t_pil > 0)
1623 */
1624#define	ASSERT_NO_PIL_0_INTRS(thrreg)			\
1625	testw	$T_INTR_THREAD, T_FLAGS(thrreg);	\
1626	jz	1f;					\
1627	cmpb	$0, T_PIL(thrreg);			\
1628	jne	0f;					\
16291:							\
1630	__PANIC(_intr_get_time_not_intr, 0f);		\
16310:
1632
1633/*
1634 * ASSERT(t_intr_start != 0)
1635 */
1636#define	ASSERT_INTR_START_NOT_0(thrreg)			\
1637	cmpl	$0, T_INTR_START(thrreg);		\
1638	jnz	0f;					\
1639	cmpl	$0, T_INTR_START+4(thrreg);		\
1640	jnz	0f;					\
1641	__PANIC(_intr_get_time_no_start_time, 0f);	\
16420:
1643
1644#endif /* DEBUG */
1645
1646	ENTRY_NP(intr_get_time)
1647
1648	cli				/* make this easy -- block intrs */
1649	pushl	%esi			/* and free up some registers */
1650	pushl	%ebx
1651
1652	LOADCPU(%esi)
1653	movl	CPU_THREAD(%esi), %ecx
1654
1655#ifdef DEBUG
1656	ASSERT_PIL_BELOW_LOCK_LEVEL(%esi)
1657	ASSERT_NO_PIL_0_INTRS(%ecx)
1658	ASSERT_INTR_START_NOT_0(%ecx)
1659#endif /* DEBUG */
1660
1661_tsc_patch17:
1662	nop; nop			/* patched to rdtsc if available */
1663	TSC_SUB_FROM(%ecx, T_INTR_START)	/* get elapsed time */
1664	TSC_ADD_TO(%ecx, T_INTR_START)		/* T_INTR_START = rdtsc */
1665
1666	INTRACCTBASE(%esi, %ebx)			/* %ebx = CPU + cpu_mstate*8 */
1667	TSC_ADD_TO(%ebx, CPU_INTRACCT);		/* intracct[ms] += elapsed */
1668	movzbl	T_PIL(%ecx), %ecx			/* %ecx = pil */
1669	PILBASE_INTRSTAT(%esi, %ecx)		/* %ecx = CPU + pil*16 */
1670	TSC_ADD_TO(%ecx, CPU_INTRSTAT)		/* intrstat[0] += elapsed */
1671	TSC_LOAD(%ecx, CPU_INTRSTAT)		/* get new intrstat[0] */
1672	TSC_SUB_FROM(%ecx, CPU_INTRSTAT+8)	/* diff with intrstat[1] */
1673	TSC_ADD_TO(%ecx, CPU_INTRSTAT+8)	/* intrstat[1] = intrstat[0] */
1674
1675	/* %edx/%eax contain difference between old and new intrstat[1] */
1676
1677	popl	%ebx
1678	popl	%esi
1679	sti
1680	ret
1681	SET_SIZE(intr_get_time)
1682#endif	/* __i386 */
1683
1684#endif  /* lint */
1685