xref: /titanic_51/usr/src/uts/i86pc/ml/interrupt.s (revision f3861e1a2ceec23a5b699c24d814b7775a9e0b52)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
28/*	  All Rights Reserved					*/
29
30/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
31/*	  All Rights Reserved					*/
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35#include <sys/asm_linkage.h>
36#include <sys/asm_misc.h>
37#include <sys/regset.h>
38#include <sys/psw.h>
39#include <sys/x86_archext.h>
40
41#if defined(__lint)
42
43#include <sys/types.h>
44#include <sys/thread.h>
45#include <sys/systm.h>
46
47#else   /* __lint */
48
49#include <sys/segments.h>
50#include <sys/pcb.h>
51#include <sys/trap.h>
52#include <sys/ftrace.h>
53#include <sys/traptrace.h>
54#include <sys/clock.h>
55#include <sys/panic.h>
56#include "assym.h"
57
58_ftrace_intr_thread_fmt:
59	.string	"intr_thread(): regs=0x%lx, int=0x%x, pil=0x%x"
60
61#endif	/* lint */
62
63#if defined(__i386)
64
65#if defined(__lint)
66
67void
68patch_tsc(void)
69{}
70
71#else	/* __lint */
72
73/*
74 * To cope with processors that do not implement the rdtsc instruction,
75 * we patch the kernel to use rdtsc if that feature is detected on the CPU.
76 * On an unpatched kernel, all locations requiring rdtsc are nop's.
77 *
78 * This function patches the nop's to rdtsc.
79 */
80	ENTRY_NP(patch_tsc)
81	movw	_rdtsc_insn, %cx
82	movw	%cx, _tsc_patch1
83	movw	%cx, _tsc_patch2
84	movw	%cx, _tsc_patch3
85	movw	%cx, _tsc_patch4
86	movw	%cx, _tsc_patch5
87	movw	%cx, _tsc_patch6
88	movw	%cx, _tsc_patch7
89	movw	%cx, _tsc_patch8
90	movw	%cx, _tsc_patch9
91	movw	%cx, _tsc_patch10
92	movw	%cx, _tsc_patch11
93	movw	%cx, _tsc_patch12
94	movw	%cx, _tsc_patch13
95	movw	%cx, _tsc_patch14
96	movw	%cx, _tsc_patch15
97	movw	%cx, _tsc_patch16
98	movw	%cx, _tsc_patch17
99	ret
100_rdtsc_insn:
101	rdtsc
102	SET_SIZE(patch_tsc)
103
104#endif	/* __lint */
105
106#endif	/* __i386 */
107
108
109#if defined(__lint)
110
111void
112_interrupt(void)
113{}
114
115#else	/* __lint */
116
117#if defined(__amd64)
118
119	/*
120	 * Common register usage:
121	 *
122	 * %rbx		cpu pointer
123	 * %r12		trap trace pointer -and- stash of
124	 *		vec across intr_thread dispatch.
125	 * %r13d	ipl of isr
126	 * %r14d	old ipl (ipl level we entered on)
127	 * %r15		interrupted thread stack pointer
128	 */
129	ENTRY_NP2(cmnint, _interrupt)
130
131	INTR_PUSH
132
133	/*
134	 * At the end of TRACE_PTR %r12 points to the current TRAPTRACE entry
135	 */
136	TRACE_PTR(%r12, %rax, %eax, %rdx, $TT_INTERRUPT)
137						/* Uses labels 8 and 9 */
138	TRACE_REGS(%r12, %rsp, %rax, %rbx)	/* Uses label 9 */
139	TRACE_STAMP(%r12)		/* Clobbers %eax, %edx, uses 9 */
140
141	DISABLE_INTR_FLAGS		/* (and set kernel flag values) */
142
143	movq	%rsp, %rbp
144
145	TRACE_STACK(%r12)
146
147	LOADCPU(%rbx)				/* &cpu */
148	leaq	REGOFF_TRAPNO(%rbp), %rsi	/* &vector */
149	movl	CPU_PRI(%rbx), %r14d		/* old ipl */
150	movl	CPU_SOFTINFO(%rbx), %edx
151
152#ifdef TRAPTRACE
153	movl	$255, TTR_IPL(%r12)
154	movl	%r14d, %edi
155	movb	%dil, TTR_PRI(%r12)
156	movl	CPU_BASE_SPL(%rbx), %edi
157	movb	%dil, TTR_SPL(%r12)
158	movb	$255, TTR_VECTOR(%r12)
159#endif
160
161	/*
162	 * Check to see if the trap number is T_SOFTINT; if it is,
163	 * jump straight to dosoftint now.
164	 */
165	cmpq	$T_SOFTINT, (%rsi)
166	je	dosoftint
167
168	/*
169	 * Raise the interrupt priority level, returns newpil.
170	 * (The vector address is in %rsi so setlvl can update it.)
171	 */
172	movl	%r14d, %edi			/* old ipl */
173						/* &vector */
174	call	*setlvl(%rip)
175
176#ifdef TRAPTRACE
177	movb	%al, TTR_IPL(%r12)
178#endif
179	/*
180	 * check for spurious interrupt
181	 */
182	cmpl	$-1, %eax
183	je	_sys_rtt
184
185#ifdef TRAPTRACE
186	movl	%r14d, %edx
187	movb	%dl, TTR_PRI(%r12)
188	movl	CPU_BASE_SPL(%rbx), %edx
189	movb	%dl, TTR_SPL(%r12)
190#endif
191	movl	%eax, CPU_PRI(%rbx)		/* update ipl */
192
193#ifdef TRAPTRACE
194	movl	REGOFF_TRAPNO(%rbp), %edx
195	movb	%dl, TTR_VECTOR(%r12)
196#endif
197	movl	%eax, %r13d			/* ipl of isr */
198
199	/*
200	 * At this point we can take one of two paths.
201	 * If the new level is at or below lock level, we will
202	 * run this interrupt in a separate thread.
203	 */
204	cmpl	$LOCK_LEVEL, %eax
205	jbe	intr_thread
206
207	movq	%rbx, %rdi		/* &cpu */
208	movl	%r13d, %esi		/* ipl */
209	movl	%r14d, %edx		/* old ipl */
210	movq	%rbp, %rcx		/* &regs */
211	call	hilevel_intr_prolog
212	orl	%eax, %eax		/* zero if need to switch stack */
213	jnz	1f
214
215	/*
216	 * Save the thread stack and get on the cpu's interrupt stack
217	 */
218	movq	%rsp, %r15
219	movq	CPU_INTR_STACK(%rbx), %rsp
2201:
221
222	sti
223
224	/*
225	 * Walk the list of handlers for this vector, calling
226	 * them as we go until no more interrupts are claimed.
227	 */
228	movl	REGOFF_TRAPNO(%rbp), %edi
229	call	av_dispatch_autovect
230
231	cli
232
233	movq	%rbx, %rdi			/* &cpu */
234	movl	%r13d, %esi			/* ipl */
235	movl	%r14d, %edx			/* oldipl */
236	movl	REGOFF_TRAPNO(%rbp), %ecx	/* vec */
237	call	hilevel_intr_epilog
238	orl	%eax, %eax		/* zero if need to switch stack */
239	jnz	2f
240	movq	%r15, %rsp
2412:	/*
242	 * Check for, and execute, softints before we iret.
243	 *
244	 * (dosoftint expects oldipl in %r14d (which is where it is)
245	 * the cpu pointer in %rbx (which is where it is) and the
246	 * softinfo in %edx (which is where we'll put it right now))
247	 */
248	movl	CPU_SOFTINFO(%rbx), %edx
249	orl	%edx, %edx
250	jz	_sys_rtt
251	jmp	dosoftint
252	/*NOTREACHED*/
253
254	SET_SIZE(cmnint)
255	SET_SIZE(_interrupt)
256
257/*
258 * Handle an interrupt in a new thread
259 *
260 * As we branch here, interrupts are still masked,
261 * %rbx still contains the cpu pointer,
262 * %r14d contains the old ipl that we came in on, and
263 * %eax contains the new ipl that we got from the setlvl routine
264 */
265
266	ENTRY_NP(intr_thread)
267
268	movq	%rbx, %rdi	/* &cpu */
269	movq	%rbp, %rsi	/* &regs = stack pointer for _sys_rtt */
270	movl	REGOFF_TRAPNO(%rbp), %r12d	/* stash the vec */
271	movl	%eax, %edx	/* new pil from setlvlx() */
272	call	intr_thread_prolog
273	movq	%rsp, %r15
274	movq	%rax, %rsp	/* t_stk from interrupt thread */
275	movq	%rsp, %rbp
276
277	sti
278
279	testl	$FTRACE_ENABLED, CPU_FTRACE_STATE(%rbx)
280	jz	1f
281	/*
282	 * ftracing support. do we need this on x86?
283	 */
284	leaq	_ftrace_intr_thread_fmt(%rip), %rdi
285	movq	%rbp, %rsi			/* &regs */
286	movl	%r12d, %edx			/* vec */
287	movq	CPU_THREAD(%rbx), %r11		/* (the interrupt thread) */
288	movzbl	T_PIL(%r11), %ecx		/* newipl */
289	call	ftrace_3_notick
2901:
291	movl	%r12d, %edi			/* vec */
292	call	av_dispatch_autovect
293
294	cli
295
296	movq	%rbx, %rdi			/* &cpu */
297	movl	%r12d, %esi			/* vec */
298	movl	%r14d, %edx			/* oldpil */
299	call	intr_thread_epilog
300	/*
301	 * If we return from here (we might not if the interrupted thread
302	 * has exited or blocked, in which case we'll have quietly swtch()ed
303	 * away) then we need to switch back to our old %rsp
304	 */
305	movq	%r15, %rsp
306	movq	%rsp, %rbp
307	/*
308	 * Check for, and execute, softints before we iret.
309	 *
310	 * (dosoftint expects oldpil in %r14d, the cpu pointer in %rbx and
311	 * the mcpu_softinfo.st_pending field in %edx.
312	 */
313	movl	CPU_SOFTINFO(%rbx), %edx
314	orl	%edx, %edx
315	jz	_sys_rtt
316	/*FALLTHROUGH*/
317
318/*
319 * Process soft interrupts.
320 * Interrupts are masked, and we have a minimal frame on the stack.
321 * %edx should contain the mcpu_softinfo.st_pending field
322 */
323
324	ALTENTRY(dosoftint)
325
326	movq	%rbx, %rdi	/* &cpu */
327	movq	%rbp, %rsi	/* &regs = stack pointer for _sys_rtt */
328				/* cpu->cpu_m.mcpu_softinfo.st_pending */
329	movl	%r14d, %ecx	/* oldipl */
330	call	dosoftint_prolog
331	/*
332	 * dosoftint_prolog() usually returns a stack pointer for the
333	 * interrupt thread that we must switch to.  However, if the
334	 * returned stack pointer is NULL, then the software interrupt was
335	 * too low in priority to run now; we'll catch it another time.
336	 */
337	orq	%rax, %rax
338	jz	_sys_rtt
339	movq	%rsp, %r15
340	movq	%rax, %rsp	/* t_stk from interrupt thread */
341	movq	%rsp, %rbp
342
343	sti
344
345	/*
346	 * Enabling interrupts (above) could raise the current ipl
347	 * and base spl.  But, we continue processing the current soft
348	 * interrupt and we will check the base spl next time around
349	 * so that blocked interrupt threads get a chance to run.
350	 */
351	movq	CPU_THREAD(%rbx), %r11	/* now an interrupt thread */
352	movzbl	T_PIL(%r11), %edi
353	call	av_dispatch_softvect
354
355	cli
356
357	movq	%rbx, %rdi		/* &cpu */
358	movl	%r14d, %esi		/* oldpil */
359	call	dosoftint_epilog
360	movq	%r15, %rsp		/* back on old stack pointer */
361	movq	%rsp, %rbp
362	movl	CPU_SOFTINFO(%rbx), %edx
363	orl	%edx, %edx
364	jz	_sys_rtt
365	jmp	dosoftint
366
367	SET_SIZE(dosoftint)
368	SET_SIZE(intr_thread)
369
370#elif defined(__i386)
371
372/*
373 * One day, this should just invoke the C routines that know how to
374 * do all the interrupt bookkeeping.  In the meantime, try
375 * and make the assembler a little more comprehensible.
376 */
377
378#define	INC64(basereg, offset)			\
379	addl	$1, offset(basereg);		\
380	adcl	$0, offset + 4(basereg)
381
382#define	TSC_CLR(basereg, offset)		\
383	movl	$0, offset(basereg);		\
384	movl	$0, offset + 4(basereg)
385
386/*
387 * The following macros assume the time value is in %edx:%eax
388 * e.g. from a rdtsc instruction.
389 */
390#define	TSC_STORE(reg, offset)		\
391	movl	%eax, offset(reg);	\
392	movl	%edx, offset + 4(reg)
393
394#define	TSC_LOAD(reg, offset)	\
395	movl	offset(reg), %eax;	\
396	movl	offset + 4(reg), %edx
397
398#define	TSC_ADD_TO(reg, offset)		\
399	addl	%eax, offset(reg);	\
400	adcl	%edx, offset + 4(reg)
401
402#define	TSC_SUB_FROM(reg, offset)	\
403	subl	offset(reg), %eax;	\
404	sbbl	offset + 4(reg), %edx	/* interval in edx:eax */
405
406/*
407 * basereg   - pointer to cpu struct
408 * pilreg    - pil or converted pil (pil - (LOCK_LEVEL + 1))
409 *
410 * Returns (base + pil * 8) in pilreg
411 */
412#define	PILBASE(basereg, pilreg)	\
413	lea	(basereg, pilreg, 8), pilreg
414
415/*
416 * Returns (base + (pil - (LOCK_LEVEL + 1)) * 8) in pilreg
417 */
418#define	HIGHPILBASE(basereg, pilreg)		\
419	subl	$LOCK_LEVEL + 1, pilreg;	\
420	PILBASE(basereg, pilreg)
421
422/*
423 * Returns (base + pil * 16) in pilreg
424 */
425#define	PILBASE_INTRSTAT(basereg, pilreg)	\
426	shl	$4, pilreg;			\
427	addl	basereg, pilreg;
428
429/*
430 * Returns (cpu + cpu_mstate * 8) in tgt
431 */
432#define	INTRACCTBASE(cpureg, tgtreg)		\
433	movzwl	CPU_MSTATE(cpureg), tgtreg;	\
434	lea	(cpureg, tgtreg, 8), tgtreg
435
436/*
437 * cpu_stats.sys.intr[PIL]++
438 */
439#define	INC_CPU_STATS_INTR(pilreg, tmpreg, tmpreg_32, basereg)	\
440	movl	pilreg, tmpreg_32;				\
441	PILBASE(basereg, tmpreg);				\
442	INC64(tmpreg, _CONST(CPU_STATS_SYS_INTR - 8))
443
444/*
445 * Unlink thread from CPU's list
446 */
447#define	UNLINK_INTR_THREAD(cpureg, ithread, tmpreg)	\
448	mov	CPU_INTR_THREAD(cpureg), ithread;	\
449	mov	T_LINK(ithread), tmpreg;		\
450	mov	tmpreg, CPU_INTR_THREAD(cpureg)
451
452/*
453 * Link a thread into CPU's list
454 */
455#define	LINK_INTR_THREAD(cpureg, ithread, tmpreg)	\
456	mov	CPU_INTR_THREAD(cpureg), tmpreg;	\
457	mov	tmpreg, T_LINK(ithread);		\
458	mov	ithread, CPU_INTR_THREAD(cpureg)
459
460#if defined(DEBUG)
461
462/*
463 * Do not call panic, if panic is already in progress.
464 */
465#define	__PANIC(msg, label)		\
466	cmpl	$0, panic_quiesce;		\
467	jne	label;				\
468	pushl	$msg;				\
469	call	panic
470
471#define	__CMP64_JNE(basereg, offset, label)	\
472	cmpl	$0, offset(basereg);		\
473	jne	label;				\
474	cmpl	$0, offset + 4(basereg);	\
475	jne	label
476
477/*
478 * ASSERT(!(CPU->cpu_intr_actv & (1 << PIL)))
479 */
480#define	ASSERT_NOT_CPU_INTR_ACTV(pilreg, basereg, msg)	\
481	btl	pilreg, CPU_INTR_ACTV(basereg);		\
482	jnc	4f;					\
483	__PANIC(msg, 4f);				\
4844:
485
486/*
487 * ASSERT(CPU->cpu_intr_actv & (1 << PIL))
488 */
489#define	ASSERT_CPU_INTR_ACTV(pilreg, basereg, msg)	\
490	btl	pilreg, CPU_INTR_ACTV(basereg);		\
491	jc	5f;					\
492	__PANIC(msg, 5f);				\
4935:
494
495/*
496 * ASSERT(CPU->cpu_pil_high_start != 0)
497 */
498#define	ASSERT_CPU_PIL_HIGH_START_NZ(basereg)			\
499	__CMP64_JNE(basereg, CPU_PIL_HIGH_START, 6f);		\
500	__PANIC(_interrupt_timestamp_zero, 6f);		\
5016:
502
503/*
504 * ASSERT(t->t_intr_start != 0)
505 */
506#define	ASSERT_T_INTR_START_NZ(basereg)				\
507	__CMP64_JNE(basereg, T_INTR_START, 7f);			\
508	__PANIC(_intr_thread_t_intr_start_zero, 7f);	\
5097:
510
511_interrupt_actv_bit_set:
512	.string	"_interrupt(): cpu_intr_actv bit already set for PIL"
513_interrupt_actv_bit_not_set:
514	.string	"_interrupt(): cpu_intr_actv bit not set for PIL"
515_interrupt_timestamp_zero:
516	.string "_interrupt(): timestamp zero upon handler return"
517_intr_thread_actv_bit_not_set:
518	.string	"intr_thread():	cpu_intr_actv bit not set for PIL"
519_intr_thread_t_intr_start_zero:
520	.string	"intr_thread():	t_intr_start zero upon handler return"
521_dosoftint_actv_bit_set:
522	.string	"dosoftint(): cpu_intr_actv bit already set for PIL"
523_dosoftint_actv_bit_not_set:
524	.string	"dosoftint(): cpu_intr_actv bit not set for PIL"
525
526	DGDEF(intr_thread_cnt)
527	.4byte	0
528
529#else
530#define	ASSERT_NOT_CPU_INTR_ACTV(pilreg, basereg, msg)
531#define	ASSERT_CPU_INTR_ACTV(pilreg, basereg, msg)
532#define	ASSERT_CPU_PIL_HIGH_START_NZ(basereg)
533#define	ASSERT_T_INTR_START_NZ(basereg)
534#endif
535
536	ENTRY_NP2(cmnint, _interrupt)
537
538	INTR_PUSH
539
540	/*
541	 * At the end of TRACE_PTR %esi points to the current TRAPTRACE entry
542	 */
543	TRACE_PTR(%esi, %eax, %eax, %edx, $TT_INTERRUPT)
544						/* Uses labels 8 and 9 */
545	TRACE_REGS(%esi, %esp, %eax, %ebx)	/* Uses label 9 */
546	TRACE_STAMP(%esi)		/* Clobbers %eax, %edx, uses 9 */
547
548	movl	%esp, %ebp
549	DISABLE_INTR_FLAGS
550	LOADCPU(%ebx)		/* get pointer to CPU struct. Avoid gs refs */
551	leal    REGOFF_TRAPNO(%ebp), %ecx	/* get address of vector */
552	movl	CPU_PRI(%ebx), %edi		/* get ipl */
553	movl	CPU_SOFTINFO(%ebx), %edx
554
555	/
556	/ Check to see if the trap number is T_SOFTINT; if it is, we'll
557	/ jump straight to dosoftint now.
558	/
559	cmpl	$T_SOFTINT, (%ecx)
560	je	dosoftint
561
562	/ raise interrupt priority level
563	/ oldipl is in %edi, vectorp is in %ecx
564	/ newipl is returned in %eax
565	pushl	%ecx
566	pushl	%edi
567	call    *setlvl
568	popl	%edi			/* save oldpil in %edi */
569	popl	%ecx
570
571#ifdef TRAPTRACE
572	movb	%al, TTR_IPL(%esi)
573#endif
574
575	/ check for spurious interrupt
576	cmp	$-1, %eax
577	je	_sys_rtt
578
579#ifdef TRAPTRACE
580	movl	CPU_PRI(%ebx), %edx
581	movb	%dl, TTR_PRI(%esi)
582	movl	CPU_BASE_SPL(%ebx), %edx
583	movb	%dl, TTR_SPL(%esi)
584#endif
585
586	movl	%eax, CPU_PRI(%ebx) /* update ipl */
587	movl	REGOFF_TRAPNO(%ebp), %ecx /* reload the interrupt vector */
588
589#ifdef TRAPTRACE
590	movb	%cl, TTR_VECTOR(%esi)
591#endif
592
593	/ At this point we can take one of two paths.  If the new priority
594	/ level is less than or equal to LOCK LEVEL then we jump to code that
595	/ will run this interrupt as a separate thread.  Otherwise the
596	/ interrupt is NOT run as a separate thread.
597
598	/ %edi - old priority level
599	/ %ebp - pointer to REGS
600	/ %ecx - translated vector
601	/ %eax - ipl of isr
602	/ %ebx - cpu pointer
603
604	cmpl 	$LOCK_LEVEL, %eax	/* compare to highest thread level */
605	jbe	intr_thread		/* process as a separate thread */
606
607	cmpl	$CBE_HIGH_PIL, %eax	/* Is this a CY_HIGH_LEVEL interrupt? */
608	jne	2f
609
610	movl	REGOFF_PC(%ebp), %esi
611	movl	%edi, CPU_PROFILE_PIL(%ebx)	/* record interrupted PIL */
612	testw	$CPL_MASK, REGOFF_CS(%ebp)	/* trap from supervisor mode? */
613	jz	1f
614	movl	%esi, CPU_PROFILE_UPC(%ebx)	/* record user PC */
615	movl	$0, CPU_PROFILE_PC(%ebx)	/* zero kernel PC */
616	jmp	2f
617
6181:
619	movl	%esi, CPU_PROFILE_PC(%ebx)	/* record kernel PC */
620	movl	$0, CPU_PROFILE_UPC(%ebx)	/* zero user PC */
621
6222:
623	pushl	%ecx				/* vec */
624	pushl	%eax				/* newpil */
625
626	/
627	/ See if we are interrupting another high-level interrupt.
628	/
629	movl	CPU_INTR_ACTV(%ebx), %eax
630	andl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, %eax
631	jz	0f
632	/
633	/ We have interrupted another high-level interrupt.
634	/ Load starting timestamp, compute interval, update cumulative counter.
635	/
636	bsrl	%eax, %ecx		/* find PIL of interrupted handler */
637	movl	%ecx, %esi		/* save PIL for later */
638	HIGHPILBASE(%ebx, %ecx)
639_tsc_patch1:
640	nop; nop			/* patched to rdtsc if available */
641	TSC_SUB_FROM(%ecx, CPU_PIL_HIGH_START)
642
643	PILBASE_INTRSTAT(%ebx, %esi)
644	TSC_ADD_TO(%esi, CPU_INTRSTAT)
645	INTRACCTBASE(%ebx, %ecx)
646	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
647	/
648	/ Another high-level interrupt is active below this one, so
649	/ there is no need to check for an interrupt thread. That will be
650	/ done by the lowest priority high-level interrupt active.
651	/
652	jmp	1f
6530:
654	/
655	/ See if we are interrupting a low-level interrupt thread.
656	/
657	movl	CPU_THREAD(%ebx), %esi
658	testw	$T_INTR_THREAD, T_FLAGS(%esi)
659	jz	1f
660	/
661	/ We have interrupted an interrupt thread. Account for its time slice
662	/ only if its time stamp is non-zero.
663	/
664	cmpl	$0, T_INTR_START+4(%esi)
665	jne	0f
666	cmpl	$0, T_INTR_START(%esi)
667	je	1f
6680:
669	movzbl	T_PIL(%esi), %ecx /* %ecx has PIL of interrupted handler */
670	PILBASE_INTRSTAT(%ebx, %ecx)
671_tsc_patch2:
672	nop; nop			/* patched to rdtsc if available */
673	TSC_SUB_FROM(%esi, T_INTR_START)
674	TSC_CLR(%esi, T_INTR_START)
675	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
676	INTRACCTBASE(%ebx, %ecx)
677	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
6781:
679	/ Store starting timestamp in CPU structure for this PIL.
680	popl	%ecx			/* restore new PIL */
681	pushl	%ecx
682	HIGHPILBASE(%ebx, %ecx)
683_tsc_patch3:
684	nop; nop			/* patched to rdtsc if available */
685	TSC_STORE(%ecx, CPU_PIL_HIGH_START)
686
687	popl	%eax			/* restore new pil */
688	popl	%ecx			/* vec */
689	/
690	/ Set bit for this PIL in CPU's interrupt active bitmask.
691	/
692
693	ASSERT_NOT_CPU_INTR_ACTV(%eax, %ebx, _interrupt_actv_bit_set)
694
695	/ Save old CPU_INTR_ACTV
696	movl	CPU_INTR_ACTV(%ebx), %esi
697
698	cmpl	$15, %eax
699	jne	0f
700	/ PIL-15 interrupt. Increment nest-count in upper 16 bits of intr_actv
701	incw	CPU_INTR_ACTV_REF(%ebx)	/* increment ref count */
7020:
703	btsl	%eax, CPU_INTR_ACTV(%ebx)
704	/
705	/ Handle high-level nested interrupt on separate interrupt stack
706	/
707	testl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, %esi
708	jnz	onstack			/* already on interrupt stack */
709	movl	%esp, %eax
710	movl	CPU_INTR_STACK(%ebx), %esp	/* get on interrupt stack */
711	pushl	%eax			/* save the thread stack pointer */
712onstack:
713	movl	$autovect, %esi		/* get autovect structure before */
714					/* sti to save on AGI later */
715	sti				/* enable interrupts */
716	pushl	%ecx			/* save interrupt vector */
717	/
718	/ Get handler address
719	/
720pre_loop1:
721	movl	AVH_LINK(%esi, %ecx, 8), %esi
722	xorl	%ebx, %ebx	/* bh is no. of intpts in chain */
723				/* bl is DDI_INTR_CLAIMED status of chain */
724	testl	%esi, %esi		/* if pointer is null */
725	jz	.intr_ret		/* then skip */
726loop1:
727	incb	%bh
728	movl	AV_VECTOR(%esi), %edx	/* get the interrupt routine */
729	testl	%edx, %edx		/* if func is null */
730	jz	.intr_ret		/* then skip */
731	pushl	$0
732	pushl	AV_INTARG2(%esi)
733	pushl	AV_INTARG1(%esi)
734	pushl	AV_VECTOR(%esi)
735	pushl	AV_DIP(%esi)
736	call	__dtrace_probe_interrupt__start
737	pushl	AV_INTARG2(%esi)	/* get 2nd arg to interrupt routine */
738	pushl	AV_INTARG1(%esi)	/* get first arg to interrupt routine */
739	call	*%edx			/* call interrupt routine with arg */
740	addl	$8, %esp
741	movl	%eax, 16(%esp)
742	call	__dtrace_probe_interrupt__complete
743	addl	$20, %esp
744	orb	%al, %bl		/* see if anyone claims intpt. */
745	movl	AV_LINK(%esi), %esi	/* get next routine on list */
746	testl	%esi, %esi		/* if pointer is non-null */
747	jnz	loop1			/* then continue */
748
749.intr_ret:
750	cmpb	$1, %bh		/* if only 1 intpt in chain, it is OK */
751	je	.intr_ret1
752	orb	%bl, %bl	/* If no one claims intpt, then it is OK */
753	jz	.intr_ret1
754	movl	(%esp), %ecx		/* else restore intr vector */
755	movl	$autovect, %esi		/* get autovect structure */
756	jmp	pre_loop1		/* and try again. */
757
758.intr_ret1:
759	LOADCPU(%ebx)			/* get pointer to cpu struct */
760
761	cli
762	movl	CPU_PRI(%ebx), %esi
763
764	/ cpu_stats.sys.intr[PIL]++
765	INC_CPU_STATS_INTR(%esi, %eax, %eax, %ebx)
766
767	/
768	/ Clear bit for this PIL in CPU's interrupt active bitmask.
769	/
770
771	ASSERT_CPU_INTR_ACTV(%esi, %ebx, _interrupt_actv_bit_not_set)
772
773	cmpl	$15, %esi
774	jne	0f
775	/ Only clear bit if reference count is now zero.
776	decw	CPU_INTR_ACTV_REF(%ebx)
777	jnz	1f
7780:
779	btrl	%esi, CPU_INTR_ACTV(%ebx)
7801:
781	/
782	/ Take timestamp, compute interval, update cumulative counter.
783	/ esi = PIL
784_tsc_patch4:
785	nop; nop			/* patched to rdtsc if available */
786	movl	%esi, %ecx		/* save for later */
787	HIGHPILBASE(%ebx, %esi)
788
789	ASSERT_CPU_PIL_HIGH_START_NZ(%esi)
790
791	TSC_SUB_FROM(%esi, CPU_PIL_HIGH_START)
792
793	PILBASE_INTRSTAT(%ebx, %ecx)
794	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
795	INTRACCTBASE(%ebx, %esi)
796	TSC_ADD_TO(%esi, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
797	/
798	/ Check for lower-PIL nested high-level interrupt beneath current one
799	/ If so, place a starting timestamp in its pil_high_start entry.
800	/
801	movl	CPU_INTR_ACTV(%ebx), %eax
802	movl	%eax, %esi
803	andl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, %eax
804	jz	0f
805	bsrl	%eax, %ecx		/* find PIL of nested interrupt */
806	HIGHPILBASE(%ebx, %ecx)
807_tsc_patch5:
808	nop; nop			/* patched to rdtsc if available */
809	TSC_STORE(%ecx, CPU_PIL_HIGH_START)
810	/
811	/ Another high-level interrupt is active below this one, so
812	/ there is no need to check for an interrupt thread. That will be
813	/ done by the lowest priority high-level interrupt active.
814	/
815	jmp	1f
8160:
817	/ Check to see if there is a low-level interrupt active. If so,
818	/ place a starting timestamp in the thread structure.
819	movl	CPU_THREAD(%ebx), %esi
820	testw	$T_INTR_THREAD, T_FLAGS(%esi)
821	jz	1f
822_tsc_patch6:
823	nop; nop			/* patched to rdtsc if available */
824	TSC_STORE(%esi, T_INTR_START)
8251:
826	movl	%edi, CPU_PRI(%ebx)
827				/* interrupt vector already on stack */
828	pushl	%edi			/* old ipl */
829	call	*setlvlx
830	addl	$8, %esp		/* eax contains the current ipl */
831
832	movl	CPU_INTR_ACTV(%ebx), %esi /* reset stack pointer if no more */
833	shrl	$LOCK_LEVEL + 1, %esi	/* HI PRI intrs. */
834	jnz	.intr_ret2
835	popl	%esp			/* restore the thread stack pointer */
836.intr_ret2:
837	movl	CPU_SOFTINFO(%ebx), %edx /* any pending software interrupts */
838	orl	%edx, %edx
839	jz	_sys_rtt
840	jmp	dosoftint	/* check for softints before we return. */
841	SET_SIZE(cmnint)
842	SET_SIZE(_interrupt)
843
844#endif	/* __i386 */
845
846/*
847 * Declare a uintptr_t which has the size of _interrupt to enable stack
848 * traceback code to know when a regs structure is on the stack.
849 */
850	.globl	_interrupt_size
851	.align	CLONGSIZE
852_interrupt_size:
853	.NWORD	. - _interrupt
854	.type	_interrupt_size, @object
855
856#endif	/* __lint */
857
858#if defined(__i386)
859
860/*
861 * Handle an interrupt in a new thread.
862 *	Entry:  traps disabled.
863 *		%edi - old priority level
864 *		%ebp - pointer to REGS
865 *		%ecx - translated vector
866 *		%eax - ipl of isr.
867 *		%ebx - pointer to CPU struct
868 *	Uses:
869 */
870
871#if !defined(__lint)
872
873	ENTRY_NP(intr_thread)
874	/
875	/ Set bit for this PIL in CPU's interrupt active bitmask.
876	/
877
878	ASSERT_NOT_CPU_INTR_ACTV(%eax, %ebx, _interrupt_actv_bit_set)
879
880	btsl	%eax, CPU_INTR_ACTV(%ebx)
881
882	/ Get set to run interrupt thread.
883	/ There should always be an interrupt thread since we allocate one
884	/ for each level on the CPU.
885	/
886	/ Note that the code in kcpc_overflow_intr -relies- on the ordering
887	/ of events here - in particular that t->t_lwp of the interrupt
888	/ thread is set to the pinned thread *before* curthread is changed
889	/
890	movl	CPU_THREAD(%ebx), %edx		/* cur thread in edx */
891
892	/
893	/ Are we interrupting an interrupt thread? If so, account for it.
894	/
895	testw	$T_INTR_THREAD, T_FLAGS(%edx)
896	jz	0f
897	/
898	/ We have interrupted an interrupt thread. Account for its time slice
899	/ only if its time stamp is non-zero. t_intr_start may be zero due to
900	/ cpu_intr_swtch_enter.
901	/
902	cmpl	$0, T_INTR_START+4(%edx)
903	jne	1f
904	cmpl	$0, T_INTR_START(%edx)
905	je	0f
9061:
907	pushl	%ecx
908	pushl	%eax
909	movl	%edx, %esi
910_tsc_patch7:
911	nop; nop			/* patched to rdtsc if available */
912	TSC_SUB_FROM(%esi, T_INTR_START)
913	TSC_CLR(%esi, T_INTR_START)
914	movzbl	T_PIL(%esi), %ecx
915	PILBASE_INTRSTAT(%ebx, %ecx)
916	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
917	INTRACCTBASE(%ebx, %ecx)
918	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
919	movl	%esi, %edx
920	popl	%eax
921	popl	%ecx
9220:
923	movl	%esp, T_SP(%edx)	/* mark stack in curthread for resume */
924	pushl	%edi			/* get a temporary register */
925	UNLINK_INTR_THREAD(%ebx, %esi, %edi)
926
927	movl	T_LWP(%edx), %edi
928	movl	%edx, T_INTR(%esi)		/* push old thread */
929	movl	%edi, T_LWP(%esi)
930	/
931	/ Threads on the interrupt thread free list could have state already
932	/ set to TS_ONPROC, but it helps in debugging if they're TS_FREE
933	/
934	movl	$ONPROC_THREAD, T_STATE(%esi)
935	/
936	/ chain the interrupted thread onto list from the interrupt thread.
937	/ Set the new interrupt thread as the current one.
938	/
939	popl	%edi			/* Don't need a temp reg anymore */
940	movl	T_STACK(%esi), %esp		/* interrupt stack pointer */
941	movl	%esp, %ebp
942	movl	%esi, CPU_THREAD(%ebx)		/* set new thread */
943	pushl	%eax				/* save the ipl */
944	/
945	/ Initialize thread priority level from intr_pri
946	/
947	movb	%al, T_PIL(%esi)	/* store pil */
948	movzwl	intr_pri, %ebx		/* XXX Can cause probs if new class */
949					/* is loaded on some other cpu. */
950	addl	%ebx, %eax		/* convert level to dispatch priority */
951	movw	%ax, T_PRI(%esi)
952
953	/
954	/ Take timestamp and store it in the thread structure.
955	/
956	movl	%eax, %ebx		/* save priority over rdtsc */
957_tsc_patch8:
958	nop; nop			/* patched to rdtsc if available */
959	TSC_STORE(%esi, T_INTR_START)
960	movl	%ebx, %eax		/* restore priority */
961
962	/ The following 3 instructions need not be in cli.
963	/ Putting them here only to avoid the AGI penalty on Pentiums.
964
965	pushl	%ecx			/* save interrupt vector. */
966	pushl	%esi			/* save interrupt thread */
967	movl	$autovect, %esi		/* get autovect structure */
968	sti				/* enable interrupts */
969
970	/ Fast event tracing.
971	LOADCPU(%ebx)
972	movl	CPU_FTRACE_STATE(%ebx), %ebx
973	testl	$FTRACE_ENABLED, %ebx
974	jz	1f
975
976	movl	8(%esp), %ebx
977	pushl	%ebx			/* ipl */
978	pushl	%ecx			/* int vector */
979	movl	T_SP(%edx), %ebx
980	pushl	%ebx			/* &regs */
981	pushl	$_ftrace_intr_thread_fmt
982	call	ftrace_3_notick
983	addl	$8, %esp
984	popl	%ecx			/* restore int vector */
985	addl	$4, %esp
9861:
987pre_loop2:
988	movl	AVH_LINK(%esi, %ecx, 8), %esi
989	xorl	%ebx, %ebx	/* bh is cno. of intpts in chain */
990				/* bl is DDI_INTR_CLAIMED status of * chain */
991	testl	%esi, %esi	/* if pointer is null */
992	jz	loop_done2	/* we're done */
993loop2:
994	movl	AV_VECTOR(%esi), %edx	/* get the interrupt routine */
995	testl	%edx, %edx		/* if pointer is null */
996	jz	loop_done2		/* we're done */
997	incb	%bh
998	pushl	$0
999	pushl	AV_INTARG2(%esi)
1000	pushl	AV_INTARG1(%esi)
1001	pushl	AV_VECTOR(%esi)
1002	pushl	AV_DIP(%esi)
1003	call	__dtrace_probe_interrupt__start
1004	pushl	AV_INTARG2(%esi)	/* get 2nd arg to interrupt routine */
1005	pushl	AV_INTARG1(%esi)	/* get first arg to interrupt routine */
1006	call	*%edx			/* call interrupt routine with arg */
1007	addl	$8, %esp
1008	movl	%eax, 16(%esp)
1009	call	__dtrace_probe_interrupt__complete
1010	addl	$20, %esp
1011	orb	%al, %bl		/* see if anyone claims intpt. */
1012	movl	AV_TICKSP(%esi), %ecx
1013	testl	%ecx, %ecx
1014	jz	no_time
1015	call	intr_get_time
1016	movl	AV_TICKSP(%esi), %ecx
1017	TSC_ADD_TO(%ecx, 0)
1018no_time:
1019	movl	AV_LINK(%esi), %esi	/* get next routine on list */
1020	testl	%esi, %esi		/* if pointer is non-null */
1021	jnz	loop2			/* continue */
1022loop_done2:
1023	cmpb	$1, %bh		/* if only 1 intpt in chain, it is OK */
1024	je	.loop_done2_1
1025	orb	%bl, %bl	/* If no one claims intpt, then it is OK */
1026	jz	.loop_done2_1
1027	movl	$autovect, %esi		/* else get autovect structure */
1028	movl	4(%esp), %ecx		/* restore intr vector */
1029	jmp	pre_loop2		/* and try again. */
1030.loop_done2_1:
1031	popl	%esi			/* restore intr thread pointer */
1032
1033	LOADCPU(%ebx)
1034
1035	cli		/* protect interrupt thread pool and intr_actv */
1036	movzbl	T_PIL(%esi), %eax
1037
1038	/ Save value in regs
1039	pushl	%eax			/* current pil */
1040	pushl	%edx			/* (huh?) */
1041	pushl	%edi			/* old pil */
1042
1043	/ cpu_stats.sys.intr[PIL]++
1044	INC_CPU_STATS_INTR(%eax, %edx, %edx, %ebx)
1045
1046	/
1047	/ Take timestamp, compute interval, and update cumulative counter.
1048	/ esi = thread pointer, ebx = cpu pointer, eax = PIL
1049	/
1050	movl	%eax, %edi
1051
1052	ASSERT_T_INTR_START_NZ(%esi)
1053
1054_tsc_patch9:
1055	nop; nop			/* patched to rdtsc if available */
1056	TSC_SUB_FROM(%esi, T_INTR_START)
1057	PILBASE_INTRSTAT(%ebx, %edi)
1058	TSC_ADD_TO(%edi, CPU_INTRSTAT)
1059	INTRACCTBASE(%ebx, %edi)
1060	TSC_ADD_TO(%edi, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
1061	popl	%edi
1062	popl	%edx
1063	popl	%eax
1064
1065	/
1066	/ Clear bit for this PIL in CPU's interrupt active bitmask.
1067	/
1068
1069	ASSERT_CPU_INTR_ACTV(%eax, %ebx, _intr_thread_actv_bit_not_set)
1070
1071	btrl	%eax, CPU_INTR_ACTV(%ebx)
1072
1073	/ if there is still an interrupted thread underneath this one
1074	/ then the interrupt was never blocked and the return is fairly
1075	/ simple.  Otherwise jump to intr_thread_exit
1076	cmpl	$0, T_INTR(%esi)
1077	je	intr_thread_exit
1078
1079	/
1080	/ link the thread back onto the interrupt thread pool
1081	LINK_INTR_THREAD(%ebx, %esi, %edx)
1082
1083	movl	CPU_BASE_SPL(%ebx), %eax	/* used below. */
1084	/ set the thread state to free so kmdb doesn't see it
1085	movl	$FREE_THREAD, T_STATE(%esi)
1086
1087	cmpl	%eax, %edi		/* if (oldipl >= basespl) */
1088	jae	intr_restore_ipl	/* then use oldipl */
1089	movl	%eax, %edi		/* else use basespl */
1090intr_restore_ipl:
1091	movl	%edi, CPU_PRI(%ebx)
1092					/* intr vector already on stack */
1093	pushl	%edi			/* old ipl */
1094	call	*setlvlx		/* eax contains the current ipl */
1095	/
1096	/ Switch back to the interrupted thread
1097	movl	T_INTR(%esi), %ecx
1098
1099	/ Place starting timestamp in interrupted thread's thread structure.
1100_tsc_patch10:
1101	nop; nop			/* patched to rdtsc if available */
1102	TSC_STORE(%ecx, T_INTR_START)
1103
1104	movl	T_SP(%ecx), %esp	/* restore stack pointer */
1105	movl	%esp, %ebp
1106	movl	%ecx, CPU_THREAD(%ebx)
1107
1108	movl	CPU_SOFTINFO(%ebx), %edx /* any pending software interrupts */
1109	orl	%edx, %edx
1110	jz	_sys_rtt
1111	jmp	dosoftint	/* check for softints before we return. */
1112
1113	/
1114	/ An interrupt returned on what was once (and still might be)
1115	/ an interrupt thread stack, but the interrupted process is no longer
1116	/ there.  This means the interrupt must have blocked.
1117	/
1118	/ There is no longer a thread under this one, so put this thread back
1119	/ on the CPU's free list and resume the idle thread which will dispatch
1120	/ the next thread to run.
1121	/
1122	/ All interrupts are disabled here
1123	/
1124
1125intr_thread_exit:
1126#ifdef DEBUG
1127	incl	intr_thread_cnt
1128#endif
1129	INC64(%ebx, CPU_STATS_SYS_INTRBLK)	/* cpu_stats.sys.intrblk++ */
1130	/
1131	/ Put thread back on the interrupt thread list.
1132	/ As a reminder, the regs at this point are
1133	/	esi	interrupt thread
1134	/	edi	old ipl
1135	/	ebx	ptr to CPU struct
1136
1137	/ Set CPU's base SPL level based on active interrupts bitmask
1138	call	set_base_spl
1139
1140	movl	CPU_BASE_SPL(%ebx), %edi
1141	movl	%edi, CPU_PRI(%ebx)
1142					/* interrupt vector already on stack */
1143	pushl	%edi
1144	call	*setlvlx
1145	addl	$8, %esp		/* XXX - don't need to pop since */
1146					/* we are ready to switch */
1147	call	splhigh			/* block all intrs below lock level */
1148	/
1149	/ Set the thread state to free so kmdb doesn't see it
1150	/
1151	movl	$FREE_THREAD, T_STATE(%esi)
1152	/
1153	/ Put thread on either the interrupt pool or the free pool and
1154	/ call swtch() to resume another thread.
1155	/
1156	LINK_INTR_THREAD(%ebx, %esi, %edx)
1157	call 	swtch
1158	/ swtch() shouldn't return
1159
1160	SET_SIZE(intr_thread)
1161
1162#endif	/* __lint */
1163#endif	/* __i386 */
1164
1165/*
1166 * Set Cpu's base SPL level, base on which interrupt levels are active
1167 *	Called at spl7 or above.
1168 */
1169
1170#if defined(__lint)
1171
1172void
1173set_base_spl(void)
1174{}
1175
1176#else	/* __lint */
1177
1178	ENTRY_NP(set_base_spl)
1179	movl	%gs:CPU_INTR_ACTV, %eax	/* load active interrupts mask */
1180	testl	%eax, %eax		/* is it zero? */
1181	jz	setbase
1182	testl	$0xff00, %eax
1183	jnz	ah_set
1184	shl	$24, %eax		/* shift 'em over so we can find */
1185					/* the 1st bit faster */
1186	bsrl	%eax, %eax
1187	subl	$24, %eax
1188setbase:
1189	movl	%eax, %gs:CPU_BASE_SPL	/* store base priority */
1190	ret
1191ah_set:
1192	shl	$16, %eax
1193	bsrl	%eax, %eax
1194	subl	$16, %eax
1195	jmp	setbase
1196	SET_SIZE(set_base_spl)
1197
1198#endif	/* __lint */
1199
1200#if defined(__i386)
1201
1202/*
1203 * int
1204 * intr_passivate(from, to)
1205 *      thread_id_t     from;           interrupt thread
1206 *      thread_id_t     to;             interrupted thread
1207 *
1208 *	intr_passivate(t, itp) makes the interrupted thread "t" runnable.
1209 *
1210 *	Since t->t_sp has already been saved, t->t_pc is all that needs
1211 *	set in this function.
1212 *
1213 *	Returns interrupt level of the thread.
1214 */
1215
1216#if defined(__lint)
1217
1218/* ARGSUSED */
1219int
1220intr_passivate(kthread_id_t from, kthread_id_t to)
1221{ return (0); }
1222
1223#else	/* __lint */
1224
1225	ENTRY(intr_passivate)
1226	movl	8(%esp), %eax		/* interrupted thread  */
1227	movl	$_sys_rtt, T_PC(%eax)	/* set T_PC for interrupted thread */
1228
1229	movl	4(%esp), %eax		/* interrupt thread */
1230	movl	T_STACK(%eax), %eax	/* get the pointer to the start of */
1231					/* of the interrupt thread stack */
1232	movl	-4(%eax), %eax		/* interrupt level was the first */
1233					/* thing pushed onto the stack */
1234	ret
1235	SET_SIZE(intr_passivate)
1236
1237#endif	/* __lint */
1238#endif	/* __i386 */
1239
1240#if defined(__lint)
1241
1242void
1243fakesoftint(void)
1244{}
1245
1246#else	/* __lint */
1247
1248	/
1249	/ If we're here, we're being called from splx() to fake a soft
1250	/ interrupt (note that interrupts are still disabled from splx()).
1251	/ We execute this code when a soft interrupt is posted at
1252	/ level higher than the CPU's current spl; when spl is lowered in
1253	/ splx(), it will see the softint and jump here.  We'll do exactly
1254	/ what a trap would do:  push our flags, %cs, %eip, error code
1255	/ and trap number (T_SOFTINT).  The cmnint() code will see T_SOFTINT
1256	/ and branch to the dosoftint() code.
1257	/
1258#if defined(__amd64)
1259
1260	/*
1261	 * In 64-bit mode, iretq -always- pops all five regs
1262	 * Imitate the 16-byte auto-align of the stack, and the
1263	 * zero-ed out %ss value.
1264	 */
1265	ENTRY_NP(fakesoftint)
1266	movq	%rsp, %r11
1267	andq	$-16, %rsp
1268	pushq	$KDS_SEL	/* %ss */
1269	pushq	%r11		/* %rsp */
1270	pushf			/* rflags */
1271	pushq	$KCS_SEL	/* %cs */
1272	leaq	fakesoftint_return(%rip), %r11
1273	pushq	%r11		/* %rip */
1274	pushq	$0		/* err */
1275	pushq	$T_SOFTINT	/* trap */
1276	jmp	cmnint
1277	SET_SIZE(fakesoftint)
1278
1279#elif defined(__i386)
1280
1281	ENTRY_NP(fakesoftint)
1282	pushf
1283	push	%cs
1284	push	$fakesoftint_return
1285	push	$0
1286	push	$T_SOFTINT
1287	jmp	cmnint
1288	SET_SIZE(fakesoftint)
1289
1290#endif	/* __i386 */
1291
1292	.align	CPTRSIZE
1293	.globl	_fakesoftint_size
1294	.type	_fakesoftint_size, @object
1295_fakesoftint_size:
1296	.NWORD	. - fakesoftint
1297	SET_SIZE(_fakesoftint_size)
1298
1299/*
1300 * dosoftint(old_pil in %edi, softinfo in %edx, CPU pointer in %ebx)
1301 * Process software interrupts
1302 * Interrupts are disabled here.
1303 */
1304#if defined(__i386)
1305
1306	ENTRY_NP(dosoftint)
1307
1308	bsrl	%edx, %edx		/* find highest pending interrupt */
1309	cmpl 	%edx, %edi		/* if curipl >= pri soft pending intr */
1310	jae	_sys_rtt		/* skip */
1311
1312	movl	%gs:CPU_BASE_SPL, %eax	/* check for blocked intr threads */
1313	cmpl	%edx, %eax		/* if basespl >= pri soft pending */
1314	jae	_sys_rtt		/* skip */
1315
1316	lock				/* MP protect */
1317	btrl	%edx, CPU_SOFTINFO(%ebx) /* clear the selected interrupt bit */
1318	jnc	dosoftint_again
1319
1320	movl	%edx, CPU_PRI(%ebx) /* set IPL to sofint level */
1321	pushl	%edx
1322	call	*setspl			/* mask levels upto the softint level */
1323	popl	%eax			/* priority we are at in %eax */
1324
1325	/ Get set to run interrupt thread.
1326	/ There should always be an interrupt thread since we allocate one
1327	/ for each level on the CPU.
1328	UNLINK_INTR_THREAD(%ebx, %esi, %edx)
1329
1330	/
1331	/ Note that the code in kcpc_overflow_intr -relies- on the ordering
1332	/ of events here - in particular that t->t_lwp of the interrupt
1333	/ thread is set to the pinned thread *before* curthread is changed
1334	/
1335	movl	CPU_THREAD(%ebx), %ecx
1336
1337	/ If we are interrupting an interrupt thread, account for it.
1338	testw	$T_INTR_THREAD, T_FLAGS(%ecx)
1339	jz	0f
1340	/
1341	/ We have interrupted an interrupt thread. Account for its time slice
1342	/ only if its time stamp is non-zero. t_intr_start may be zero due to
1343	/ cpu_intr_swtch_enter.
1344	/
1345	cmpl	$0, T_INTR_START+4(%ecx)
1346	jne	1f
1347	cmpl	$0, T_INTR_START(%ecx)
1348	je	0f
13491:
1350	pushl	%eax
1351	movl	%eax, %ebp
1352_tsc_patch11:
1353	nop; nop			/* patched to rdtsc if available */
1354	PILBASE_INTRSTAT(%ebx, %ebp)
1355	TSC_SUB_FROM(%ecx, T_INTR_START)
1356	TSC_ADD_TO(%ebp, CPU_INTRSTAT)
1357	INTRACCTBASE(%ebx, %ebp)
1358	TSC_ADD_TO(%ebp, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
1359	popl	%eax
13600:
1361	movl	T_LWP(%ecx), %ebp
1362	movl	%ebp, T_LWP(%esi)
1363	/
1364	/ Threads on the interrupt thread free list could have state already
1365	/ set to TS_ONPROC, but it helps in debugging if they're TS_FREE
1366	/ Could eliminate the next two instructions with a little work.
1367	/
1368	movl	$ONPROC_THREAD, T_STATE(%esi)
1369	/
1370	/ Push interrupted thread onto list from new thread.
1371	/ Set the new thread as the current one.
1372	/ Set interrupted thread's T_SP because if it is the idle thread,
1373	/ Resume() may use that stack between threads.
1374	/
1375	movl	%esp, T_SP(%ecx)		/* mark stack for resume */
1376	movl	%ecx, T_INTR(%esi)		/* push old thread */
1377	movl	%esi, CPU_THREAD(%ebx)		/* set new thread */
1378	movl	T_STACK(%esi), %esp		/* interrupt stack pointer */
1379	movl	%esp, %ebp
1380
1381	pushl	%eax			/* push ipl as first element in stack */
1382					/* see intr_passivate() */
1383	/
1384	/ Set bit for this PIL in CPU's interrupt active bitmask.
1385	/
1386
1387	ASSERT_NOT_CPU_INTR_ACTV(%eax, %ebx, _dosoftint_actv_bit_set)
1388
1389	btsl	%eax, CPU_INTR_ACTV(%ebx)
1390
1391	/
1392	/ Initialize thread priority level from intr_pri
1393	/
1394	movb	%al, T_PIL(%esi)	/* store pil */
1395	movzwl	intr_pri, %ecx
1396	addl	%eax, %ecx		/* convert level to dispatch priority */
1397	movw	%cx, T_PRI(%esi)
1398
1399	/
1400	/ Store starting timestamp in thread structure.
1401	/ esi = thread, ebx = cpu pointer, eax = PIL
1402	/
1403	movl	%eax, %ecx		/* save PIL from rdtsc clobber */
1404_tsc_patch12:
1405	nop; nop			/* patched to rdtsc if available */
1406	TSC_STORE(%esi, T_INTR_START)
1407
1408	sti				/* enable interrupts */
1409
1410	/
1411	/ Enabling interrupts (above) could raise the current
1412	/ IPL and base SPL. But, we continue processing the current soft
1413	/ interrupt and we will check the base SPL next time in the loop
1414	/ so that blocked interrupt thread would get a chance to run.
1415	/
1416
1417	/
1418	/ dispatch soft interrupts
1419	/
1420	pushl	%ecx
1421	call	av_dispatch_softvect
1422	addl	$4, %esp
1423
1424	cli				/* protect interrupt thread pool */
1425					/* and softinfo & sysinfo */
1426	movl	CPU_THREAD(%ebx), %esi	/* restore thread pointer */
1427	movzbl	T_PIL(%esi), %ecx
1428
1429	/ cpu_stats.sys.intr[PIL]++
1430	INC_CPU_STATS_INTR(%ecx, %edx, %edx, %ebx)
1431
1432	/
1433	/ Clear bit for this PIL in CPU's interrupt active bitmask.
1434	/
1435
1436	ASSERT_CPU_INTR_ACTV(%ecx, %ebx, _dosoftint_actv_bit_not_set)
1437
1438	btrl	%ecx, CPU_INTR_ACTV(%ebx)
1439
1440	/
1441	/ Take timestamp, compute interval, update cumulative counter.
1442	/ esi = thread, ebx = cpu, ecx = PIL
1443	/
1444	PILBASE_INTRSTAT(%ebx, %ecx)
1445_tsc_patch13:
1446	nop; nop		/* patched to rdtsc if available */
1447	TSC_SUB_FROM(%esi, T_INTR_START)
1448	TSC_ADD_TO(%ecx, CPU_INTRSTAT)
1449	INTRACCTBASE(%ebx, %ecx)
1450	TSC_ADD_TO(%ecx, CPU_INTRACCT)	/* cpu_intracct[cpu_mstate] += tsc */
1451
1452	/ if there is still an interrupt thread underneath this one
1453	/ then the interrupt was never blocked and the return is fairly
1454	/ simple.  Otherwise jump to softintr_thread_exit.
1455	/ softintr_thread_exit expect esi to be curthread & ebx to be ipl.
1456	cmpl	$0, T_INTR(%esi)
1457	je	softintr_thread_exit
1458
1459	/
1460	/ link the thread back onto the interrupt thread pool
1461	LINK_INTR_THREAD(%ebx, %esi, %edx)
1462
1463	/ set the thread state to free so kmdb doesn't see it
1464	movl	$FREE_THREAD, T_STATE(%esi)
1465	/
1466	/ Switch back to the interrupted thread
1467	movl	T_INTR(%esi), %ecx
1468	movl	%ecx, CPU_THREAD(%ebx)
1469	movl	T_SP(%ecx), %esp	/* restore stack pointer */
1470	movl	%esp, %ebp
1471
1472	/ If we are returning to an interrupt thread, store a starting
1473	/ timestamp in the thread structure.
1474	testw	$T_INTR_THREAD, T_FLAGS(%ecx)
1475	jz	0f
1476_tsc_patch14:
1477	nop; nop			/* patched to rdtsc if available */
1478	TSC_STORE(%ecx, T_INTR_START)
14790:
1480	movl	CPU_BASE_SPL(%ebx), %eax
1481	cmpl	%eax, %edi		/* if (oldipl >= basespl) */
1482	jae	softintr_restore_ipl	/* then use oldipl */
1483	movl	%eax, %edi		/* else use basespl */
1484softintr_restore_ipl:
1485	movl	%edi, CPU_PRI(%ebx) /* set IPL to old level */
1486	pushl	%edi
1487	call	*setspl
1488	popl	%eax
1489dosoftint_again:
1490	movl	CPU_SOFTINFO(%ebx), %edx /* any pending software interrupts */
1491	orl	%edx, %edx
1492	jz	_sys_rtt
1493	jmp	dosoftint		/* process more software interrupts */
1494
1495softintr_thread_exit:
1496	/
1497	/ Put thread back on the interrupt thread list.
1498	/ As a reminder, the regs at this point are
1499	/	%esi	interrupt thread
1500
1501	/
1502	/ This was an interrupt thread, so set CPU's base SPL level
1503	/ set_base_spl only uses %eax.
1504	/
1505	call	set_base_spl		/* interrupt vector already on stack */
1506	/
1507	/ Set the thread state to free so kmdb doesn't see it
1508	/
1509	movl	$FREE_THREAD, T_STATE(%esi)
1510	/
1511	/ Put thread on either the interrupt pool or the free pool and
1512	/ call swtch() to resume another thread.
1513	/
1514	LOADCPU(%ebx)
1515	LINK_INTR_THREAD(%ebx, %esi, %edx)
1516	call	splhigh			/* block all intrs below lock lvl */
1517	call	swtch
1518	/ swtch() shouldn't return
1519	SET_SIZE(dosoftint)
1520
1521#endif	/* __i386 */
1522#endif	/* __lint */
1523
1524#if defined(lint)
1525
1526/*
1527 * intr_get_time() is a resource for interrupt handlers to determine how
1528 * much time has been spent handling the current interrupt. Such a function
1529 * is needed because higher level interrupts can arrive during the
1530 * processing of an interrupt, thus making direct comparisons of %tick by
1531 * the handler inaccurate. intr_get_time() only returns time spent in the
1532 * current interrupt handler.
1533 *
1534 * The caller must be calling from an interrupt handler running at a pil
1535 * below or at lock level. Timings are not provided for high-level
1536 * interrupts.
1537 *
1538 * The first time intr_get_time() is called while handling an interrupt,
1539 * it returns the time since the interrupt handler was invoked. Subsequent
1540 * calls will return the time since the prior call to intr_get_time(). Time
1541 * is returned as ticks. Use tsc_scalehrtime() to convert ticks to nsec.
1542 *
1543 * Theory Of Intrstat[][]:
1544 *
1545 * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two
1546 * uint64_ts per pil.
1547 *
1548 * intrstat[pil][0] is a cumulative count of the number of ticks spent
1549 * handling all interrupts at the specified pil on this CPU. It is
1550 * exported via kstats to the user.
1551 *
1552 * intrstat[pil][1] is always a count of ticks less than or equal to the
1553 * value in [0]. The difference between [1] and [0] is the value returned
1554 * by a call to intr_get_time(). At the start of interrupt processing,
1555 * [0] and [1] will be equal (or nearly so). As the interrupt consumes
1556 * time, [0] will increase, but [1] will remain the same. A call to
1557 * intr_get_time() will return the difference, then update [1] to be the
1558 * same as [0]. Future calls will return the time since the last call.
1559 * Finally, when the interrupt completes, [1] is updated to the same as [0].
1560 *
1561 * Implementation:
1562 *
1563 * intr_get_time() works much like a higher level interrupt arriving. It
1564 * "checkpoints" the timing information by incrementing intrstat[pil][0]
1565 * to include elapsed running time, and by setting t_intr_start to rdtsc.
1566 * It then sets the return value to intrstat[pil][0] - intrstat[pil][1],
1567 * and updates intrstat[pil][1] to be the same as the new value of
1568 * intrstat[pil][0].
1569 *
1570 * In the normal handling of interrupts, after an interrupt handler returns
1571 * and the code in intr_thread() updates intrstat[pil][0], it then sets
1572 * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1],
1573 * the timings are reset, i.e. intr_get_time() will return [0] - [1] which
1574 * is 0.
1575 *
1576 * Whenever interrupts arrive on a CPU which is handling a lower pil
1577 * interrupt, they update the lower pil's [0] to show time spent in the
1578 * handler that they've interrupted. This results in a growing discrepancy
1579 * between [0] and [1], which is returned the next time intr_get_time() is
1580 * called. Time spent in the higher-pil interrupt will not be returned in
1581 * the next intr_get_time() call from the original interrupt, because
1582 * the higher-pil interrupt's time is accumulated in intrstat[higherpil][].
1583 */
1584
1585/*ARGSUSED*/
1586uint64_t
1587intr_get_time(void)
1588{ return 0; }
1589#else	/* lint */
1590
1591
1592#if defined(__amd64)
1593	ENTRY_NP(intr_get_time)
1594	cli				/* make this easy -- block intrs */
1595	LOADCPU(%rdi)
1596	call	intr_thread_get_time
1597	sti
1598	ret
1599	SET_SIZE(intr_get_time)
1600
1601#elif defined(__i386)
1602
1603#ifdef DEBUG
1604
1605
1606_intr_get_time_high_pil:
1607	.string	"intr_get_time(): %pil > LOCK_LEVEL"
1608_intr_get_time_not_intr:
1609	.string	"intr_get_time(): not called from an interrupt thread"
1610_intr_get_time_no_start_time:
1611	.string	"intr_get_time(): t_intr_start == 0"
1612
1613/*
1614 * ASSERT(%pil <= LOCK_LEVEL)
1615 */
1616#define	ASSERT_PIL_BELOW_LOCK_LEVEL(cpureg)				\
1617	testl	$CPU_INTR_ACTV_HIGH_LEVEL_MASK, CPU_INTR_ACTV(cpureg);	\
1618	jz	0f;							\
1619	__PANIC(_intr_get_time_high_pil, 0f);				\
16200:
1621
1622/*
1623 * ASSERT((t_flags & T_INTR_THREAD) != 0 && t_pil > 0)
1624 */
1625#define	ASSERT_NO_PIL_0_INTRS(thrreg)			\
1626	testw	$T_INTR_THREAD, T_FLAGS(thrreg);	\
1627	jz	1f;					\
1628	cmpb	$0, T_PIL(thrreg);			\
1629	jne	0f;					\
16301:							\
1631	__PANIC(_intr_get_time_not_intr, 0f);		\
16320:
1633
1634/*
1635 * ASSERT(t_intr_start != 0)
1636 */
1637#define	ASSERT_INTR_START_NOT_0(thrreg)			\
1638	cmpl	$0, T_INTR_START(thrreg);		\
1639	jnz	0f;					\
1640	cmpl	$0, T_INTR_START+4(thrreg);		\
1641	jnz	0f;					\
1642	__PANIC(_intr_get_time_no_start_time, 0f);	\
16430:
1644
1645#endif /* DEBUG */
1646
1647	ENTRY_NP(intr_get_time)
1648
1649	cli				/* make this easy -- block intrs */
1650	pushl	%esi			/* and free up some registers */
1651	pushl	%ebx
1652
1653	LOADCPU(%esi)
1654	movl	CPU_THREAD(%esi), %ecx
1655
1656#ifdef DEBUG
1657	ASSERT_PIL_BELOW_LOCK_LEVEL(%esi)
1658	ASSERT_NO_PIL_0_INTRS(%ecx)
1659	ASSERT_INTR_START_NOT_0(%ecx)
1660#endif /* DEBUG */
1661
1662_tsc_patch17:
1663	nop; nop			/* patched to rdtsc if available */
1664	TSC_SUB_FROM(%ecx, T_INTR_START)	/* get elapsed time */
1665	TSC_ADD_TO(%ecx, T_INTR_START)		/* T_INTR_START = rdtsc */
1666
1667	INTRACCTBASE(%esi, %ebx)			/* %ebx = CPU + cpu_mstate*8 */
1668	TSC_ADD_TO(%ebx, CPU_INTRACCT);		/* intracct[ms] += elapsed */
1669	movzbl	T_PIL(%ecx), %ecx			/* %ecx = pil */
1670	PILBASE_INTRSTAT(%esi, %ecx)		/* %ecx = CPU + pil*16 */
1671	TSC_ADD_TO(%ecx, CPU_INTRSTAT)		/* intrstat[0] += elapsed */
1672	TSC_LOAD(%ecx, CPU_INTRSTAT)		/* get new intrstat[0] */
1673	TSC_SUB_FROM(%ecx, CPU_INTRSTAT+8)	/* diff with intrstat[1] */
1674	TSC_ADD_TO(%ecx, CPU_INTRSTAT+8)	/* intrstat[1] = intrstat[0] */
1675
1676	/* %edx/%eax contain difference between old and new intrstat[1] */
1677
1678	popl	%ebx
1679	popl	%esi
1680	sti
1681	ret
1682	SET_SIZE(intr_get_time)
1683#endif	/* __i386 */
1684
1685#endif  /* lint */
1686