xref: /titanic_51/usr/src/uts/sun4u/ml/trap_table.s (revision f2a3c691e1fab4dee486fd83642311ec59dc3732)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif /* !lint */
31#include <sys/asm_linkage.h>
32#include <sys/privregs.h>
33#include <sys/sun4asi.h>
34#include <sys/cheetahregs.h>
35#include <sys/machtrap.h>
36#include <sys/machthread.h>
37#include <sys/pcb.h>
38#include <sys/pte.h>
39#include <sys/mmu.h>
40#include <sys/machpcb.h>
41#include <sys/async.h>
42#include <sys/intreg.h>
43#include <sys/scb.h>
44#include <sys/psr_compat.h>
45#include <sys/syscall.h>
46#include <sys/machparam.h>
47#include <sys/traptrace.h>
48#include <vm/hat_sfmmu.h>
49#include <sys/archsystm.h>
50#include <sys/utrap.h>
51#include <sys/clock.h>
52#include <sys/intr.h>
53#include <sys/fpu/fpu_simulator.h>
54#include <vm/seg_spt.h>
55
56/*
57 * WARNING: If you add a fast trap handler which can be invoked by a
58 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
59 * instead of "done" instruction to return back to the user mode. See
60 * comments for the "fast_trap_done" entry point for more information.
61 *
62 * An alternate FAST_TRAP_DONE_CHK_INTR macro should be used for the
63 * cases where you always want to process any pending interrupts before
64 * returning back to the user mode.
65 */
66#define	FAST_TRAP_DONE		\
67	ba,a	fast_trap_done
68
69#define	FAST_TRAP_DONE_CHK_INTR	\
70	ba,a	fast_trap_done_chk_intr
71
72/*
73 * SPARC V9 Trap Table
74 *
75 * Most of the trap handlers are made from common building
76 * blocks, and some are instantiated multiple times within
77 * the trap table. So, I build a bunch of macros, then
78 * populate the table using only the macros.
79 *
80 * Many macros branch to sys_trap.  Its calling convention is:
81 *	%g1		kernel trap handler
82 *	%g2, %g3	args for above
83 *	%g4		desire %pil
84 */
85
86#ifdef	TRAPTRACE
87
88/*
89 * Tracing macro. Adds two instructions if TRAPTRACE is defined.
90 */
91#define	TT_TRACE(label)		\
92	ba	label		;\
93	rd	%pc, %g7
94#define	TT_TRACE_INS	2
95
96#define	TT_TRACE_L(label)	\
97	ba	label		;\
98	rd	%pc, %l4	;\
99	clr	%l4
100#define	TT_TRACE_L_INS	3
101
102#else
103
104#define	TT_TRACE(label)
105#define	TT_TRACE_INS	0
106
107#define	TT_TRACE_L(label)
108#define	TT_TRACE_L_INS	0
109
110#endif
111
112/*
113 * This macro is used to update per cpu mmu stats in perf critical
114 * paths. It is only enabled in debug kernels or if SFMMU_STAT_GATHER
115 * is defined.
116 */
117#if defined(DEBUG) || defined(SFMMU_STAT_GATHER)
118#define	HAT_PERCPU_DBSTAT(stat)			\
119	mov	stat, %g1			;\
120	ba	stat_mmu			;\
121	rd	%pc, %g7
122#else
123#define	HAT_PERCPU_DBSTAT(stat)
124#endif /* DEBUG || SFMMU_STAT_GATHER */
125
126/*
127 * This first set are funneled to trap() with %tt as the type.
128 * Trap will then either panic or send the user a signal.
129 */
130/*
131 * NOT is used for traps that just shouldn't happen.
132 * It comes in both single and quadruple flavors.
133 */
134#if !defined(lint)
135	.global	trap
136#endif /* !lint */
137#define	NOT			\
138	TT_TRACE(trace_gen)	;\
139	set	trap, %g1	;\
140	rdpr	%tt, %g3	;\
141	ba,pt	%xcc, sys_trap	;\
142	sub	%g0, 1, %g4	;\
143	.align	32
144#define	NOT4	NOT; NOT; NOT; NOT
145/*
146 * RED is for traps that use the red mode handler.
147 * We should never see these either.
148 */
149#define	RED	NOT
150/*
151 * BAD is used for trap vectors we don't have a kernel
152 * handler for.
153 * It also comes in single and quadruple versions.
154 */
155#define	BAD	NOT
156#define	BAD4	NOT4
157
158#define	DONE			\
159	done;			\
160	.align	32
161
162/*
163 * TRAP vectors to the trap() function.
164 * It's main use is for user errors.
165 */
166#if !defined(lint)
167	.global	trap
168#endif /* !lint */
169#define	TRAP(arg)		\
170	TT_TRACE(trace_gen)	;\
171	set	trap, %g1	;\
172	mov	arg, %g3	;\
173	ba,pt	%xcc, sys_trap	;\
174	sub	%g0, 1, %g4	;\
175	.align	32
176
177/*
178 * SYSCALL is used for system calls on both ILP32 and LP64 kernels
179 * depending on the "which" parameter (should be syscall_trap,
180 * syscall_trap32, or nosys for unused system call traps).
181 */
182#define	SYSCALL(which)			\
183	TT_TRACE(trace_gen)		;\
184	set	(which), %g1		;\
185	ba,pt	%xcc, sys_trap		;\
186	sub	%g0, 1, %g4		;\
187	.align	32
188
189#define	FLUSHW()			\
190	set	trap, %g1		;\
191	mov	T_FLUSHW, %g3		;\
192	sub	%g0, 1, %g4		;\
193	save				;\
194	flushw				;\
195	restore				;\
196	FAST_TRAP_DONE			;\
197	.align	32
198
199/*
200 * GOTO just jumps to a label.
201 * It's used for things that can be fixed without going thru sys_trap.
202 */
203#define	GOTO(label)		\
204	.global	label		;\
205	ba,a	label		;\
206	.empty			;\
207	.align	32
208
209/*
210 * GOTO_TT just jumps to a label.
211 * correctable ECC error traps at  level 0 and 1 will use this macro.
212 * It's used for things that can be fixed without going thru sys_trap.
213 */
214#define	GOTO_TT(label, ttlabel)		\
215	.global	label		;\
216	TT_TRACE(ttlabel)	;\
217	ba,a	label		;\
218	.empty			;\
219	.align	32
220
221/*
222 * Privileged traps
223 * Takes breakpoint if privileged, calls trap() if not.
224 */
225#define	PRIV(label)			\
226	rdpr	%tstate, %g1		;\
227	btst	TSTATE_PRIV, %g1	;\
228	bnz	label			;\
229	rdpr	%tt, %g3		;\
230	set	trap, %g1		;\
231	ba,pt	%xcc, sys_trap		;\
232	sub	%g0, 1, %g4		;\
233	.align	32
234
235
236/*
237 * DTrace traps.
238 */
239#define	DTRACE_FASTTRAP			\
240	.global dtrace_fasttrap_probe				;\
241	.global dtrace_fasttrap_probe_ptr			;\
242	sethi	%hi(dtrace_fasttrap_probe_ptr), %g4		;\
243	ldn	[%g4 + %lo(dtrace_fasttrap_probe_ptr)], %g4	;\
244	set	dtrace_fasttrap_probe, %g1			;\
245	brnz,pn	%g4, user_trap					;\
246	sub	%g0, 1, %g4					;\
247	FAST_TRAP_DONE						;\
248	.align	32
249
250#define	DTRACE_PID			\
251	.global dtrace_pid_probe				;\
252	set	dtrace_pid_probe, %g1				;\
253	ba,pt	%xcc, user_trap					;\
254	sub	%g0, 1, %g4					;\
255	.align	32
256
257#define	DTRACE_RETURN			\
258	.global dtrace_return_probe				;\
259	set	dtrace_return_probe, %g1			;\
260	ba,pt	%xcc, user_trap					;\
261	sub	%g0, 1, %g4					;\
262	.align	32
263
264/*
265 * REGISTER WINDOW MANAGEMENT MACROS
266 */
267
268/*
269 * various convenient units of padding
270 */
271#define	SKIP(n)	.skip 4*(n)
272
273/*
274 * CLEAN_WINDOW is the simple handler for cleaning a register window.
275 */
276#define	CLEAN_WINDOW						\
277	TT_TRACE_L(trace_win)					;\
278	rdpr %cleanwin, %l0; inc %l0; wrpr %l0, %cleanwin	;\
279	clr %l0; clr %l1; clr %l2; clr %l3			;\
280	clr %l4; clr %l5; clr %l6; clr %l7			;\
281	clr %o0; clr %o1; clr %o2; clr %o3			;\
282	clr %o4; clr %o5; clr %o6; clr %o7			;\
283	retry; .align 128
284
285#if !defined(lint)
286
287/*
288 * If we get an unresolved tlb miss while in a window handler, the fault
289 * handler will resume execution at the last instruction of the window
290 * hander, instead of delivering the fault to the kernel.  Spill handlers
291 * use this to spill windows into the wbuf.
292 *
293 * The mixed handler works by checking %sp, and branching to the correct
294 * handler.  This is done by branching back to label 1: for 32b frames,
295 * or label 2: for 64b frames; which implies the handler order is: 32b,
296 * 64b, mixed.  The 1: and 2: labels are offset into the routines to
297 * allow the branchs' delay slots to contain useful instructions.
298 */
299
300/*
301 * SPILL_32bit spills a 32-bit-wide kernel register window.  It
302 * assumes that the kernel context and the nucleus context are the
303 * same.  The stack pointer is required to be eight-byte aligned even
304 * though this code only needs it to be four-byte aligned.
305 */
306#define	SPILL_32bit(tail)					\
307	srl	%sp, 0, %sp					;\
3081:	st	%l0, [%sp + 0]					;\
309	st	%l1, [%sp + 4]					;\
310	st	%l2, [%sp + 8]					;\
311	st	%l3, [%sp + 12]					;\
312	st	%l4, [%sp + 16]					;\
313	st	%l5, [%sp + 20]					;\
314	st	%l6, [%sp + 24]					;\
315	st	%l7, [%sp + 28]					;\
316	st	%i0, [%sp + 32]					;\
317	st	%i1, [%sp + 36]					;\
318	st	%i2, [%sp + 40]					;\
319	st	%i3, [%sp + 44]					;\
320	st	%i4, [%sp + 48]					;\
321	st	%i5, [%sp + 52]					;\
322	st	%i6, [%sp + 56]					;\
323	st	%i7, [%sp + 60]					;\
324	TT_TRACE_L(trace_win)					;\
325	saved							;\
326	retry							;\
327	SKIP(31-19-TT_TRACE_L_INS)				;\
328	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
329	.empty
330
331/*
332 * SPILL_32bit_asi spills a 32-bit-wide register window into a 32-bit
333 * wide address space via the designated asi.  It is used to spill
334 * non-kernel windows.  The stack pointer is required to be eight-byte
335 * aligned even though this code only needs it to be four-byte
336 * aligned.
337 */
338#define	SPILL_32bit_asi(asi_num, tail)				\
339	srl	%sp, 0, %sp					;\
3401:	sta	%l0, [%sp + %g0]asi_num				;\
341	mov	4, %g1						;\
342	sta	%l1, [%sp + %g1]asi_num				;\
343	mov	8, %g2						;\
344	sta	%l2, [%sp + %g2]asi_num				;\
345	mov	12, %g3						;\
346	sta	%l3, [%sp + %g3]asi_num				;\
347	add	%sp, 16, %g4					;\
348	sta	%l4, [%g4 + %g0]asi_num				;\
349	sta	%l5, [%g4 + %g1]asi_num				;\
350	sta	%l6, [%g4 + %g2]asi_num				;\
351	sta	%l7, [%g4 + %g3]asi_num				;\
352	add	%g4, 16, %g4					;\
353	sta	%i0, [%g4 + %g0]asi_num				;\
354	sta	%i1, [%g4 + %g1]asi_num				;\
355	sta	%i2, [%g4 + %g2]asi_num				;\
356	sta	%i3, [%g4 + %g3]asi_num				;\
357	add	%g4, 16, %g4					;\
358	sta	%i4, [%g4 + %g0]asi_num				;\
359	sta	%i5, [%g4 + %g1]asi_num				;\
360	sta	%i6, [%g4 + %g2]asi_num				;\
361	sta	%i7, [%g4 + %g3]asi_num				;\
362	TT_TRACE_L(trace_win)					;\
363	saved							;\
364	retry							;\
365	SKIP(31-25-TT_TRACE_L_INS)				;\
366	ba,a,pt %xcc, fault_32bit_/**/tail			;\
367	.empty
368
369/*
370 * SPILL_32bit_tt1 spills a 32-bit-wide register window into a 32-bit
371 * wide address space via the designated asi.  It is used to spill
372 * windows at tl>1 where performance isn't the primary concern and
373 * where we don't want to use unnecessary registers.  The stack
374 * pointer is required to be eight-byte aligned even though this code
375 * only needs it to be four-byte aligned.
376 */
377#define	SPILL_32bit_tt1(asi_num, tail)				\
378	mov	asi_num, %asi					;\
3791:	srl	%sp, 0, %sp					;\
380	sta	%l0, [%sp + 0]%asi				;\
381	sta	%l1, [%sp + 4]%asi				;\
382	sta	%l2, [%sp + 8]%asi				;\
383	sta	%l3, [%sp + 12]%asi				;\
384	sta	%l4, [%sp + 16]%asi				;\
385	sta	%l5, [%sp + 20]%asi				;\
386	sta	%l6, [%sp + 24]%asi				;\
387	sta	%l7, [%sp + 28]%asi				;\
388	sta	%i0, [%sp + 32]%asi				;\
389	sta	%i1, [%sp + 36]%asi				;\
390	sta	%i2, [%sp + 40]%asi				;\
391	sta	%i3, [%sp + 44]%asi				;\
392	sta	%i4, [%sp + 48]%asi				;\
393	sta	%i5, [%sp + 52]%asi				;\
394	sta	%i6, [%sp + 56]%asi				;\
395	sta	%i7, [%sp + 60]%asi				;\
396	TT_TRACE_L(trace_win)					;\
397	saved							;\
398	retry							;\
399	SKIP(31-20-TT_TRACE_L_INS)				;\
400	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
401	.empty
402
403
404/*
405 * FILL_32bit fills a 32-bit-wide kernel register window.  It assumes
406 * that the kernel context and the nucleus context are the same.  The
407 * stack pointer is required to be eight-byte aligned even though this
408 * code only needs it to be four-byte aligned.
409 */
410#define	FILL_32bit(tail)					\
411	srl	%sp, 0, %sp					;\
4121:	TT_TRACE_L(trace_win)					;\
413	ld	[%sp + 0], %l0					;\
414	ld	[%sp + 4], %l1					;\
415	ld	[%sp + 8], %l2					;\
416	ld	[%sp + 12], %l3					;\
417	ld	[%sp + 16], %l4					;\
418	ld	[%sp + 20], %l5					;\
419	ld	[%sp + 24], %l6					;\
420	ld	[%sp + 28], %l7					;\
421	ld	[%sp + 32], %i0					;\
422	ld	[%sp + 36], %i1					;\
423	ld	[%sp + 40], %i2					;\
424	ld	[%sp + 44], %i3					;\
425	ld	[%sp + 48], %i4					;\
426	ld	[%sp + 52], %i5					;\
427	ld	[%sp + 56], %i6					;\
428	ld	[%sp + 60], %i7					;\
429	restored						;\
430	retry							;\
431	SKIP(31-19-TT_TRACE_L_INS)				;\
432	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
433	.empty
434
435/*
436 * FILL_32bit_asi fills a 32-bit-wide register window from a 32-bit
437 * wide address space via the designated asi.  It is used to fill
438 * non-kernel windows.  The stack pointer is required to be eight-byte
439 * aligned even though this code only needs it to be four-byte
440 * aligned.
441 */
442#define	FILL_32bit_asi(asi_num, tail)				\
443	srl	%sp, 0, %sp					;\
4441:	TT_TRACE_L(trace_win)					;\
445	mov	4, %g1						;\
446	lda	[%sp + %g0]asi_num, %l0				;\
447	mov	8, %g2						;\
448	lda	[%sp + %g1]asi_num, %l1				;\
449	mov	12, %g3						;\
450	lda	[%sp + %g2]asi_num, %l2				;\
451	lda	[%sp + %g3]asi_num, %l3				;\
452	add	%sp, 16, %g4					;\
453	lda	[%g4 + %g0]asi_num, %l4				;\
454	lda	[%g4 + %g1]asi_num, %l5				;\
455	lda	[%g4 + %g2]asi_num, %l6				;\
456	lda	[%g4 + %g3]asi_num, %l7				;\
457	add	%g4, 16, %g4					;\
458	lda	[%g4 + %g0]asi_num, %i0				;\
459	lda	[%g4 + %g1]asi_num, %i1				;\
460	lda	[%g4 + %g2]asi_num, %i2				;\
461	lda	[%g4 + %g3]asi_num, %i3				;\
462	add	%g4, 16, %g4					;\
463	lda	[%g4 + %g0]asi_num, %i4				;\
464	lda	[%g4 + %g1]asi_num, %i5				;\
465	lda	[%g4 + %g2]asi_num, %i6				;\
466	lda	[%g4 + %g3]asi_num, %i7				;\
467	restored						;\
468	retry							;\
469	SKIP(31-25-TT_TRACE_L_INS)				;\
470	ba,a,pt %xcc, fault_32bit_/**/tail			;\
471	.empty
472
473/*
474 * FILL_32bit_tt1 fills a 32-bit-wide register window from a 32-bit
475 * wide address space via the designated asi.  It is used to fill
476 * windows at tl>1 where performance isn't the primary concern and
477 * where we don't want to use unnecessary registers.  The stack
478 * pointer is required to be eight-byte aligned even though this code
479 * only needs it to be four-byte aligned.
480 */
481#define	FILL_32bit_tt1(asi_num, tail)				\
482	mov	asi_num, %asi					;\
4831:	srl	%sp, 0, %sp					;\
484	TT_TRACE_L(trace_win)					;\
485	lda	[%sp + 0]%asi, %l0				;\
486	lda	[%sp + 4]%asi, %l1				;\
487	lda	[%sp + 8]%asi, %l2				;\
488	lda	[%sp + 12]%asi, %l3				;\
489	lda	[%sp + 16]%asi, %l4				;\
490	lda	[%sp + 20]%asi, %l5				;\
491	lda	[%sp + 24]%asi, %l6				;\
492	lda	[%sp + 28]%asi, %l7				;\
493	lda	[%sp + 32]%asi, %i0				;\
494	lda	[%sp + 36]%asi, %i1				;\
495	lda	[%sp + 40]%asi, %i2				;\
496	lda	[%sp + 44]%asi, %i3				;\
497	lda	[%sp + 48]%asi, %i4				;\
498	lda	[%sp + 52]%asi, %i5				;\
499	lda	[%sp + 56]%asi, %i6				;\
500	lda	[%sp + 60]%asi, %i7				;\
501	restored						;\
502	retry							;\
503	SKIP(31-20-TT_TRACE_L_INS)				;\
504	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
505	.empty
506
507
508/*
509 * SPILL_64bit spills a 64-bit-wide kernel register window.  It
510 * assumes that the kernel context and the nucleus context are the
511 * same.  The stack pointer is required to be eight-byte aligned.
512 */
513#define	SPILL_64bit(tail)					\
5142:	stx	%l0, [%sp + V9BIAS64 + 0]			;\
515	stx	%l1, [%sp + V9BIAS64 + 8]			;\
516	stx	%l2, [%sp + V9BIAS64 + 16]			;\
517	stx	%l3, [%sp + V9BIAS64 + 24]			;\
518	stx	%l4, [%sp + V9BIAS64 + 32]			;\
519	stx	%l5, [%sp + V9BIAS64 + 40]			;\
520	stx	%l6, [%sp + V9BIAS64 + 48]			;\
521	stx	%l7, [%sp + V9BIAS64 + 56]			;\
522	stx	%i0, [%sp + V9BIAS64 + 64]			;\
523	stx	%i1, [%sp + V9BIAS64 + 72]			;\
524	stx	%i2, [%sp + V9BIAS64 + 80]			;\
525	stx	%i3, [%sp + V9BIAS64 + 88]			;\
526	stx	%i4, [%sp + V9BIAS64 + 96]			;\
527	stx	%i5, [%sp + V9BIAS64 + 104]			;\
528	stx	%i6, [%sp + V9BIAS64 + 112]			;\
529	stx	%i7, [%sp + V9BIAS64 + 120]			;\
530	TT_TRACE_L(trace_win)					;\
531	saved							;\
532	retry							;\
533	SKIP(31-18-TT_TRACE_L_INS)				;\
534	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
535	.empty
536
537/*
538 * SPILL_64bit_asi spills a 64-bit-wide register window into a 64-bit
539 * wide address space via the designated asi.  It is used to spill
540 * non-kernel windows.  The stack pointer is required to be eight-byte
541 * aligned.
542 */
543#define	SPILL_64bit_asi(asi_num, tail)				\
544	mov	0 + V9BIAS64, %g1				;\
5452:	stxa	%l0, [%sp + %g1]asi_num				;\
546	mov	8 + V9BIAS64, %g2				;\
547	stxa	%l1, [%sp + %g2]asi_num				;\
548	mov	16 + V9BIAS64, %g3				;\
549	stxa	%l2, [%sp + %g3]asi_num				;\
550	mov	24 + V9BIAS64, %g4				;\
551	stxa	%l3, [%sp + %g4]asi_num				;\
552	add	%sp, 32, %g5					;\
553	stxa	%l4, [%g5 + %g1]asi_num				;\
554	stxa	%l5, [%g5 + %g2]asi_num				;\
555	stxa	%l6, [%g5 + %g3]asi_num				;\
556	stxa	%l7, [%g5 + %g4]asi_num				;\
557	add	%g5, 32, %g5					;\
558	stxa	%i0, [%g5 + %g1]asi_num				;\
559	stxa	%i1, [%g5 + %g2]asi_num				;\
560	stxa	%i2, [%g5 + %g3]asi_num				;\
561	stxa	%i3, [%g5 + %g4]asi_num				;\
562	add	%g5, 32, %g5					;\
563	stxa	%i4, [%g5 + %g1]asi_num				;\
564	stxa	%i5, [%g5 + %g2]asi_num				;\
565	stxa	%i6, [%g5 + %g3]asi_num				;\
566	stxa	%i7, [%g5 + %g4]asi_num				;\
567	TT_TRACE_L(trace_win)					;\
568	saved							;\
569	retry							;\
570	SKIP(31-25-TT_TRACE_L_INS)				;\
571	ba,a,pt %xcc, fault_64bit_/**/tail			;\
572	.empty
573
574/*
575 * SPILL_64bit_tt1 spills a 64-bit-wide register window into a 64-bit
576 * wide address space via the designated asi.  It is used to spill
577 * windows at tl>1 where performance isn't the primary concern and
578 * where we don't want to use unnecessary registers.  The stack
579 * pointer is required to be eight-byte aligned.
580 */
581#define	SPILL_64bit_tt1(asi_num, tail)				\
582	mov	asi_num, %asi					;\
5832:	stxa	%l0, [%sp + V9BIAS64 + 0]%asi			;\
584	stxa	%l1, [%sp + V9BIAS64 + 8]%asi			;\
585	stxa	%l2, [%sp + V9BIAS64 + 16]%asi			;\
586	stxa	%l3, [%sp + V9BIAS64 + 24]%asi			;\
587	stxa	%l4, [%sp + V9BIAS64 + 32]%asi			;\
588	stxa	%l5, [%sp + V9BIAS64 + 40]%asi			;\
589	stxa	%l6, [%sp + V9BIAS64 + 48]%asi			;\
590	stxa	%l7, [%sp + V9BIAS64 + 56]%asi			;\
591	stxa	%i0, [%sp + V9BIAS64 + 64]%asi			;\
592	stxa	%i1, [%sp + V9BIAS64 + 72]%asi			;\
593	stxa	%i2, [%sp + V9BIAS64 + 80]%asi			;\
594	stxa	%i3, [%sp + V9BIAS64 + 88]%asi			;\
595	stxa	%i4, [%sp + V9BIAS64 + 96]%asi			;\
596	stxa	%i5, [%sp + V9BIAS64 + 104]%asi			;\
597	stxa	%i6, [%sp + V9BIAS64 + 112]%asi			;\
598	stxa	%i7, [%sp + V9BIAS64 + 120]%asi			;\
599	TT_TRACE_L(trace_win)					;\
600	saved							;\
601	retry							;\
602	SKIP(31-19-TT_TRACE_L_INS)				;\
603	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
604	.empty
605
606
607/*
608 * FILL_64bit fills a 64-bit-wide kernel register window.  It assumes
609 * that the kernel context and the nucleus context are the same.  The
610 * stack pointer is required to be eight-byte aligned.
611 */
612#define	FILL_64bit(tail)					\
6132:	TT_TRACE_L(trace_win)					;\
614	ldx	[%sp + V9BIAS64 + 0], %l0			;\
615	ldx	[%sp + V9BIAS64 + 8], %l1			;\
616	ldx	[%sp + V9BIAS64 + 16], %l2			;\
617	ldx	[%sp + V9BIAS64 + 24], %l3			;\
618	ldx	[%sp + V9BIAS64 + 32], %l4			;\
619	ldx	[%sp + V9BIAS64 + 40], %l5			;\
620	ldx	[%sp + V9BIAS64 + 48], %l6			;\
621	ldx	[%sp + V9BIAS64 + 56], %l7			;\
622	ldx	[%sp + V9BIAS64 + 64], %i0			;\
623	ldx	[%sp + V9BIAS64 + 72], %i1			;\
624	ldx	[%sp + V9BIAS64 + 80], %i2			;\
625	ldx	[%sp + V9BIAS64 + 88], %i3			;\
626	ldx	[%sp + V9BIAS64 + 96], %i4			;\
627	ldx	[%sp + V9BIAS64 + 104], %i5			;\
628	ldx	[%sp + V9BIAS64 + 112], %i6			;\
629	ldx	[%sp + V9BIAS64 + 120], %i7			;\
630	restored						;\
631	retry							;\
632	SKIP(31-18-TT_TRACE_L_INS)				;\
633	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
634	.empty
635
636/*
637 * FILL_64bit_asi fills a 64-bit-wide register window from a 64-bit
638 * wide address space via the designated asi.  It is used to fill
639 * non-kernel windows.  The stack pointer is required to be eight-byte
640 * aligned.
641 */
642#define	FILL_64bit_asi(asi_num, tail)				\
643	mov	V9BIAS64 + 0, %g1				;\
6442:	TT_TRACE_L(trace_win)					;\
645	ldxa	[%sp + %g1]asi_num, %l0				;\
646	mov	V9BIAS64 + 8, %g2				;\
647	ldxa	[%sp + %g2]asi_num, %l1				;\
648	mov	V9BIAS64 + 16, %g3				;\
649	ldxa	[%sp + %g3]asi_num, %l2				;\
650	mov	V9BIAS64 + 24, %g4				;\
651	ldxa	[%sp + %g4]asi_num, %l3				;\
652	add	%sp, 32, %g5					;\
653	ldxa	[%g5 + %g1]asi_num, %l4				;\
654	ldxa	[%g5 + %g2]asi_num, %l5				;\
655	ldxa	[%g5 + %g3]asi_num, %l6				;\
656	ldxa	[%g5 + %g4]asi_num, %l7				;\
657	add	%g5, 32, %g5					;\
658	ldxa	[%g5 + %g1]asi_num, %i0				;\
659	ldxa	[%g5 + %g2]asi_num, %i1				;\
660	ldxa	[%g5 + %g3]asi_num, %i2				;\
661	ldxa	[%g5 + %g4]asi_num, %i3				;\
662	add	%g5, 32, %g5					;\
663	ldxa	[%g5 + %g1]asi_num, %i4				;\
664	ldxa	[%g5 + %g2]asi_num, %i5				;\
665	ldxa	[%g5 + %g3]asi_num, %i6				;\
666	ldxa	[%g5 + %g4]asi_num, %i7				;\
667	restored						;\
668	retry							;\
669	SKIP(31-25-TT_TRACE_L_INS)				;\
670	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
671	.empty
672
673/*
674 * FILL_64bit_tt1 fills a 64-bit-wide register window from a 64-bit
675 * wide address space via the designated asi.  It is used to fill
676 * windows at tl>1 where performance isn't the primary concern and
677 * where we don't want to use unnecessary registers.  The stack
678 * pointer is required to be eight-byte aligned.
679 */
680#define	FILL_64bit_tt1(asi_num, tail)				\
681	mov	asi_num, %asi					;\
682	TT_TRACE_L(trace_win)					;\
683	ldxa	[%sp + V9BIAS64 + 0]%asi, %l0			;\
684	ldxa	[%sp + V9BIAS64 + 8]%asi, %l1			;\
685	ldxa	[%sp + V9BIAS64 + 16]%asi, %l2			;\
686	ldxa	[%sp + V9BIAS64 + 24]%asi, %l3			;\
687	ldxa	[%sp + V9BIAS64 + 32]%asi, %l4			;\
688	ldxa	[%sp + V9BIAS64 + 40]%asi, %l5			;\
689	ldxa	[%sp + V9BIAS64 + 48]%asi, %l6			;\
690	ldxa	[%sp + V9BIAS64 + 56]%asi, %l7			;\
691	ldxa	[%sp + V9BIAS64 + 64]%asi, %i0			;\
692	ldxa	[%sp + V9BIAS64 + 72]%asi, %i1			;\
693	ldxa	[%sp + V9BIAS64 + 80]%asi, %i2			;\
694	ldxa	[%sp + V9BIAS64 + 88]%asi, %i3			;\
695	ldxa	[%sp + V9BIAS64 + 96]%asi, %i4			;\
696	ldxa	[%sp + V9BIAS64 + 104]%asi, %i5			;\
697	ldxa	[%sp + V9BIAS64 + 112]%asi, %i6			;\
698	ldxa	[%sp + V9BIAS64 + 120]%asi, %i7			;\
699	restored						;\
700	retry							;\
701	SKIP(31-19-TT_TRACE_L_INS)				;\
702	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
703	.empty
704
705#endif /* !lint */
706
707/*
708 * SPILL_mixed spills either size window, depending on
709 * whether %sp is even or odd, to a 32-bit address space.
710 * This may only be used in conjunction with SPILL_32bit/
711 * SPILL_64bit. New versions of SPILL_mixed_{tt1,asi} would be
712 * needed for use with SPILL_{32,64}bit_{tt1,asi}.  Particular
713 * attention should be paid to the instructions that belong
714 * in the delay slots of the branches depending on the type
715 * of spill handler being branched to.
716 * Clear upper 32 bits of %sp if it is odd.
717 * We won't need to clear them in 64 bit kernel.
718 */
719#define	SPILL_mixed						\
720	btst	1, %sp						;\
721	bz,a,pt	%xcc, 1b					;\
722	srl	%sp, 0, %sp					;\
723	ba,pt	%xcc, 2b					;\
724	nop							;\
725	.align	128
726
727/*
728 * FILL_mixed(ASI) fills either size window, depending on
729 * whether %sp is even or odd, from a 32-bit address space.
730 * This may only be used in conjunction with FILL_32bit/
731 * FILL_64bit. New versions of FILL_mixed_{tt1,asi} would be
732 * needed for use with FILL_{32,64}bit_{tt1,asi}. Particular
733 * attention should be paid to the instructions that belong
734 * in the delay slots of the branches depending on the type
735 * of fill handler being branched to.
736 * Clear upper 32 bits of %sp if it is odd.
737 * We won't need to clear them in 64 bit kernel.
738 */
739#define	FILL_mixed						\
740	btst	1, %sp						;\
741	bz,a,pt	%xcc, 1b					;\
742	srl	%sp, 0, %sp					;\
743	ba,pt	%xcc, 2b					;\
744	nop							;\
745	.align	128
746
747
748/*
749 * SPILL_32clean/SPILL_64clean spill 32-bit and 64-bit register windows,
750 * respectively, into the address space via the designated asi.  The
751 * unbiased stack pointer is required to be eight-byte aligned (even for
752 * the 32-bit case even though this code does not require such strict
753 * alignment).
754 *
755 * With SPARC v9 the spill trap takes precedence over the cleanwin trap
756 * so when cansave == 0, canrestore == 6, and cleanwin == 6 the next save
757 * will cause cwp + 2 to be spilled but will not clean cwp + 1.  That
758 * window may contain kernel data so in user_rtt we set wstate to call
759 * these spill handlers on the first user spill trap.  These handler then
760 * spill the appropriate window but also back up a window and clean the
761 * window that didn't get a cleanwin trap.
762 */
763#define	SPILL_32clean(asi_num, tail)				\
764	srl	%sp, 0, %sp					;\
765	sta	%l0, [%sp + %g0]asi_num				;\
766	mov	4, %g1						;\
767	sta	%l1, [%sp + %g1]asi_num				;\
768	mov	8, %g2						;\
769	sta	%l2, [%sp + %g2]asi_num				;\
770	mov	12, %g3						;\
771	sta	%l3, [%sp + %g3]asi_num				;\
772	add	%sp, 16, %g4					;\
773	sta	%l4, [%g4 + %g0]asi_num				;\
774	sta	%l5, [%g4 + %g1]asi_num				;\
775	sta	%l6, [%g4 + %g2]asi_num				;\
776	sta	%l7, [%g4 + %g3]asi_num				;\
777	add	%g4, 16, %g4					;\
778	sta	%i0, [%g4 + %g0]asi_num				;\
779	sta	%i1, [%g4 + %g1]asi_num				;\
780	sta	%i2, [%g4 + %g2]asi_num				;\
781	sta	%i3, [%g4 + %g3]asi_num				;\
782	add	%g4, 16, %g4					;\
783	sta	%i4, [%g4 + %g0]asi_num				;\
784	sta	%i5, [%g4 + %g1]asi_num				;\
785	sta	%i6, [%g4 + %g2]asi_num				;\
786	sta	%i7, [%g4 + %g3]asi_num				;\
787	TT_TRACE_L(trace_win)					;\
788	b	.spill_clean					;\
789	  mov	WSTATE_USER32, %g7				;\
790	SKIP(31-25-TT_TRACE_L_INS)				;\
791	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
792	.empty
793
794#define	SPILL_64clean(asi_num, tail)				\
795	mov	0 + V9BIAS64, %g1				;\
796	stxa	%l0, [%sp + %g1]asi_num				;\
797	mov	8 + V9BIAS64, %g2				;\
798	stxa	%l1, [%sp + %g2]asi_num				;\
799	mov	16 + V9BIAS64, %g3				;\
800	stxa	%l2, [%sp + %g3]asi_num				;\
801	mov	24 + V9BIAS64, %g4				;\
802	stxa	%l3, [%sp + %g4]asi_num				;\
803	add	%sp, 32, %g5					;\
804	stxa	%l4, [%g5 + %g1]asi_num				;\
805	stxa	%l5, [%g5 + %g2]asi_num				;\
806	stxa	%l6, [%g5 + %g3]asi_num				;\
807	stxa	%l7, [%g5 + %g4]asi_num				;\
808	add	%g5, 32, %g5					;\
809	stxa	%i0, [%g5 + %g1]asi_num				;\
810	stxa	%i1, [%g5 + %g2]asi_num				;\
811	stxa	%i2, [%g5 + %g3]asi_num				;\
812	stxa	%i3, [%g5 + %g4]asi_num				;\
813	add	%g5, 32, %g5					;\
814	stxa	%i4, [%g5 + %g1]asi_num				;\
815	stxa	%i5, [%g5 + %g2]asi_num				;\
816	stxa	%i6, [%g5 + %g3]asi_num				;\
817	stxa	%i7, [%g5 + %g4]asi_num				;\
818	TT_TRACE_L(trace_win)					;\
819	b	.spill_clean					;\
820	  mov	WSTATE_USER64, %g7				;\
821	SKIP(31-25-TT_TRACE_L_INS)				;\
822	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
823	.empty
824
825
826/*
827 * Floating point disabled.
828 */
829#define	FP_DISABLED_TRAP		\
830	TT_TRACE(trace_gen)		;\
831	ba,pt	%xcc,.fp_disabled	;\
832	nop				;\
833	.align	32
834
835/*
836 * Floating point exceptions.
837 */
838#define	FP_IEEE_TRAP			\
839	TT_TRACE(trace_gen)		;\
840	ba,pt	%xcc,.fp_ieee_exception	;\
841	nop				;\
842	.align	32
843
844#define	FP_TRAP				\
845	TT_TRACE(trace_gen)		;\
846	ba,pt	%xcc,.fp_exception	;\
847	nop				;\
848	.align	32
849
850#if !defined(lint)
851/*
852 * asynchronous traps at level 0 and level 1
853 *
854 * The first instruction must be a membar for UltraSPARC-III
855 * to stop RED state entry if the store queue has many
856 * pending bad stores (PRM, Chapter 11).
857 */
858#define ASYNC_TRAP(ttype, ttlabel)\
859	membar	#Sync		;\
860	TT_TRACE(ttlabel)	;\
861	ba	async_err	;\
862	mov	ttype, %g5	;\
863	.align	32
864
865/*
866 * Defaults to BAD entry, but establishes label to be used for
867 * architecture-specific overwrite of trap table entry.
868 */
869#define	LABELED_BAD(table_name)		\
870	.global	table_name		;\
871table_name:				;\
872	BAD
873
874#endif /* !lint */
875
876/*
877 * illegal instruction trap
878 */
879#define	ILLTRAP_INSTR			  \
880	membar	#Sync			  ;\
881	TT_TRACE(trace_gen)		  ;\
882	or	%g0, P_UTRAP4, %g2	  ;\
883	or	%g0, T_UNIMP_INSTR, %g3   ;\
884	sethi	%hi(.check_v9utrap), %g4  ;\
885	jmp	%g4 + %lo(.check_v9utrap) ;\
886	nop				  ;\
887	.align	32
888
889/*
890 * tag overflow trap
891 */
892#define	TAG_OVERFLOW			  \
893	TT_TRACE(trace_gen)		  ;\
894	or	%g0, P_UTRAP10, %g2	  ;\
895	or	%g0, T_TAG_OVERFLOW, %g3  ;\
896	sethi	%hi(.check_v9utrap), %g4  ;\
897	jmp	%g4 + %lo(.check_v9utrap) ;\
898	nop				  ;\
899	.align	32
900
901/*
902 * divide by zero trap
903 */
904#define	DIV_BY_ZERO			  \
905	TT_TRACE(trace_gen)		  ;\
906	or	%g0, P_UTRAP11, %g2	  ;\
907	or	%g0, T_IDIV0, %g3	  ;\
908	sethi	%hi(.check_v9utrap), %g4  ;\
909	jmp	%g4 + %lo(.check_v9utrap) ;\
910	nop				  ;\
911	.align	32
912
913/*
914 * trap instruction for V9 user trap handlers
915 */
916#define	TRAP_INSTR			  \
917	TT_TRACE(trace_gen)		  ;\
918	or	%g0, T_SOFTWARE_TRAP, %g3 ;\
919	sethi	%hi(.check_v9utrap), %g4  ;\
920	jmp	%g4 + %lo(.check_v9utrap) ;\
921	nop				  ;\
922	.align	32
923#define	TRP4	TRAP_INSTR; TRAP_INSTR; TRAP_INSTR; TRAP_INSTR
924
925/*
926 * LEVEL_INTERRUPT is for level N interrupts.
927 * VECTOR_INTERRUPT is for the vector trap.
928 */
929#define	LEVEL_INTERRUPT(level)		\
930	.global	tt_pil/**/level		;\
931tt_pil/**/level:			;\
932	ba,pt	%xcc, pil_interrupt	;\
933	mov	level, %g4		;\
934	.align	32
935
936#define	LEVEL14_INTERRUPT			\
937	ba	pil14_interrupt			;\
938	mov	PIL_14, %g4			;\
939	.align	32
940
941#define	VECTOR_INTERRUPT				\
942	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g1	;\
943	btst	IRSR_BUSY, %g1				;\
944	bnz,pt	%xcc, vec_interrupt			;\
945	nop						;\
946	ba,a,pt	%xcc, vec_intr_spurious			;\
947	.empty						;\
948	.align	32
949
950/*
951 * MMU Trap Handlers.
952 */
953#define	SWITCH_GLOBALS	/* mmu->alt, alt->mmu */			\
954	rdpr	%pstate, %g5						;\
955	wrpr	%g5, PSTATE_MG | PSTATE_AG, %pstate
956
957#define	IMMU_EXCEPTION							\
958	membar	#Sync							;\
959	SWITCH_GLOBALS							;\
960	wr	%g0, ASI_IMMU, %asi					;\
961	rdpr	%tpc, %g2						;\
962	ldxa	[MMU_SFSR]%asi, %g3					;\
963	ba,pt	%xcc, .mmu_exception_end				;\
964	mov	T_INSTR_EXCEPTION, %g1					;\
965	.align	32
966
967#define	DMMU_EXCEPTION							\
968	SWITCH_GLOBALS							;\
969	wr	%g0, ASI_DMMU, %asi					;\
970	ldxa	[MMU_TAG_ACCESS]%asi, %g2				;\
971	ldxa	[MMU_SFSR]%asi, %g3					;\
972	ba,pt	%xcc, .mmu_exception_end				;\
973	mov	T_DATA_EXCEPTION, %g1					;\
974	.align	32
975
976#define	DMMU_EXC_AG_PRIV						\
977	wr	%g0, ASI_DMMU, %asi					;\
978	ldxa	[MMU_SFAR]%asi, %g2					;\
979	ba,pt	%xcc, .mmu_priv_exception				;\
980	ldxa	[MMU_SFSR]%asi, %g3					;\
981	.align	32
982
983#define	DMMU_EXC_AG_NOT_ALIGNED						\
984	wr	%g0, ASI_DMMU, %asi					;\
985	ldxa	[MMU_SFAR]%asi, %g2					;\
986	ba,pt	%xcc, .mmu_exception_not_aligned			;\
987	ldxa	[MMU_SFSR]%asi, %g3					;\
988	.align	32
989
990/*
991 * SPARC V9 IMPL. DEP. #109(1) and (2) and #110(1) and (2)
992 */
993#define	DMMU_EXC_LDDF_NOT_ALIGNED					\
994	btst	1, %sp							;\
995	bnz,pt	%xcc, .lddf_exception_not_aligned			;\
996	wr	%g0, ASI_DMMU, %asi					;\
997	ldxa	[MMU_SFAR]%asi, %g2					;\
998	ba,pt	%xcc, .mmu_exception_not_aligned			;\
999	ldxa	[MMU_SFSR]%asi, %g3					;\
1000	.align	32
1001
1002#define	DMMU_EXC_STDF_NOT_ALIGNED					\
1003	btst	1, %sp							;\
1004	bnz,pt	%xcc, .stdf_exception_not_aligned			;\
1005	wr	%g0, ASI_DMMU, %asi					;\
1006	ldxa	[MMU_SFAR]%asi, %g2					;\
1007	ba,pt	%xcc, .mmu_exception_not_aligned			;\
1008	ldxa	[MMU_SFSR]%asi, %g3					;\
1009	.align	32
1010
1011/*
1012 * Flush the TLB using either the primary, secondary, or nucleus flush
1013 * operation based on whether the ctx from the tag access register matches
1014 * the primary or secondary context (flush the nucleus if neither matches).
1015 *
1016 * Requires a membar #Sync before next ld/st.
1017 * exits with:
1018 * g2 = tag access register
1019 * g3 = ctx number
1020 */
1021#if TAGACC_CTX_MASK != CTXREG_CTX_MASK
1022#error "TAGACC_CTX_MASK != CTXREG_CTX_MASK"
1023#endif
1024#define	DTLB_DEMAP_ENTRY						\
1025	mov	MMU_TAG_ACCESS, %g1					;\
1026	mov	MMU_PCONTEXT, %g5					;\
1027	ldxa	[%g1]ASI_DMMU, %g2					;\
1028	sethi	%hi(TAGACC_CTX_MASK), %g4				;\
1029	or	%g4, %lo(TAGACC_CTX_MASK), %g4				;\
1030	and	%g2, %g4, %g3			/* g3 = ctx */		;\
1031	ldxa	[%g5]ASI_DMMU, %g6		/* g6 = primary ctx */	;\
1032	and	%g6, %g4, %g6			/* &= CTXREG_CTX_MASK */ ;\
1033	cmp	%g3, %g6						;\
1034	be,pt	%xcc, 1f						;\
1035	andn	%g2, %g4, %g1			/* ctx = primary */	;\
1036	mov	MMU_SCONTEXT, %g5					;\
1037	ldxa	[%g5]ASI_DMMU, %g6		/* g6 = secondary ctx */ ;\
1038	and	%g6, %g4, %g6			/* &= CTXREG_CTX_MASK */ ;\
1039	cmp	%g3, %g6						;\
1040	be,a,pt	%xcc, 1f						;\
1041	  or	%g1, DEMAP_SECOND, %g1					;\
1042	or	%g1, DEMAP_NUCLEUS, %g1					;\
10431:	stxa	%g0, [%g1]ASI_DTLB_DEMAP	/* MMU_DEMAP_PAGE */	;\
1044	membar	#Sync
1045
1046#if defined(cscope)
1047/*
1048 * Define labels to direct cscope quickly to labels that
1049 * are generated by macro expansion of DTLB_MISS().
1050 */
1051	.global	tt0_dtlbmiss
1052tt0_dtlbmiss:
1053	.global	tt1_dtlbmiss
1054tt1_dtlbmiss:
1055	nop
1056#endif
1057
1058/*
1059 * Needs to be exactly 32 instructions
1060 *
1061 * UTLB NOTE: If we don't hit on the 8k pointer then we branch
1062 * to a special 4M tsb handler. It would be nice if that handler
1063 * could live in this file but currently it seems better to allow
1064 * it to fall thru to sfmmu_tsb_miss.
1065 */
1066#define	DTLB_MISS(table_name)						;\
1067	.global	table_name/**/_dtlbmiss					;\
1068table_name/**/_dtlbmiss:						;\
1069	HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */	;\
1070	mov	MMU_TAG_ACCESS, %g6		/* select tag acc */	;\
1071	ldxa	[%g0]ASI_DMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1072	ldxa	[%g6]ASI_DMMU, %g2		/* g2 = tag access */	;\
1073	sllx	%g2, TAGACC_CTX_LSHIFT, %g3				;\
1074	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctx */		;\
1075	cmp	%g3, INVALID_CONTEXT					;\
1076	ble,pn	%xcc, sfmmu_kdtlb_miss					;\
1077	  srlx	%g2, TAG_VALO_SHIFT, %g7	/* g7 = tsb tag */	;\
1078	brlz,pn %g1, sfmmu_udtlb_slowpath				;\
1079	  nop								;\
1080	ldda	[%g1]ASI_NQUAD_LD, %g4	/* g4 = tag, %g5 data */	;\
1081	cmp	%g4, %g7						;\
1082	bne,pn	%xcc, sfmmu_tsb_miss_tt		/* no 4M TSB, miss */	;\
1083	  mov	%g0, %g3		/* clear 4M tsbe ptr */		;\
1084	TT_TRACE(trace_tsbhit)		/* 2 instr ifdef TRAPTRACE */	;\
1085	stxa	%g5, [%g0]ASI_DTLB_IN	/* trapstat expects TTE */	;\
1086	retry				/* in %g5 */			;\
1087	unimp	0							;\
1088	unimp	0							;\
1089	unimp	0							;\
1090	unimp	0							;\
1091	unimp	0							;\
1092	unimp	0							;\
1093	unimp	0							;\
1094	unimp	0							;\
1095	unimp	0							;\
1096	unimp	0							;\
1097	unimp	0							;\
1098	.align 128
1099
1100#if defined(cscope)
1101/*
1102 * Define labels to direct cscope quickly to labels that
1103 * are generated by macro expansion of ITLB_MISS().
1104 */
1105	.global	tt0_itlbmiss
1106tt0_itlbmiss:
1107	.global	tt1_itlbmiss
1108tt1_itlbmiss:
1109	nop
1110#endif
1111
1112/*
1113 * Instruction miss handler.
1114 * ldda instructions will have their ASI patched
1115 * by sfmmu_patch_ktsb at runtime.
1116 * MUST be EXACTLY 32 instructions or we'll break.
1117 */
1118#define	ITLB_MISS(table_name)						 \
1119	.global	table_name/**/_itlbmiss					;\
1120table_name/**/_itlbmiss:						;\
1121	HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */	;\
1122	mov	MMU_TAG_ACCESS, %g6		/* select tag acc */	;\
1123	ldxa	[%g0]ASI_IMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1124	ldxa	[%g6]ASI_IMMU, %g2		/* g2 = tag access */	;\
1125	sllx	%g2, TAGACC_CTX_LSHIFT, %g3				;\
1126	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctx */		;\
1127	cmp	%g3, INVALID_CONTEXT					;\
1128	ble,pn	%xcc, sfmmu_kitlb_miss					;\
1129	  srlx	%g2, TAG_VALO_SHIFT, %g7	/* g7 = tsb tag */	;\
1130	brlz,pn	%g1, sfmmu_uitlb_slowpath	/* if >1 TSB branch */	;\
1131	  nop								;\
1132	ldda	[%g1]ASI_NQUAD_LD, %g4	/* g4 = tag, g5 = data */	;\
1133	cmp	%g4, %g7						;\
1134	bne,pn	%xcc, sfmmu_tsb_miss_tt	/* br if 8k ptr miss */		;\
1135	  mov	%g0, %g3		/* no 4M TSB */			;\
1136	andcc	%g5, TTE_EXECPRM_INT, %g0 /* check execute bit */	;\
1137	bz,pn	%icc, exec_fault					;\
1138	  nop								;\
1139	TT_TRACE(trace_tsbhit)		/* 2 instr ifdef TRAPTRACE */	;\
1140	stxa	%g5, [%g0]ASI_ITLB_IN	/* trapstat expects %g5 */	;\
1141	retry								;\
1142	unimp	0							;\
1143	unimp	0							;\
1144	unimp	0							;\
1145	unimp	0							;\
1146	unimp	0							;\
1147	unimp	0							;\
1148	unimp	0							;\
1149	unimp	0							;\
1150	.align 128
1151
1152
1153/*
1154 * This macro is the first level handler for fast protection faults.
1155 * It first demaps the tlb entry which generated the fault and then
1156 * attempts to set the modify bit on the hash.  It needs to be
1157 * exactly 32 instructions.
1158 */
1159#define	DTLB_PROT							 \
1160	DTLB_DEMAP_ENTRY		/* 20 instructions */		;\
1161	/*								;\
1162	 * At this point:						;\
1163	 *   g1 = ????							;\
1164	 *   g2 = tag access register					;\
1165	 *   g3 = ctx number						;\
1166	 *   g4 = ????							;\
1167	 */								;\
1168	TT_TRACE(trace_dataprot)	/* 2 instr ifdef TRAPTRACE */	;\
1169					/* clobbers g1 and g6 */	;\
1170	ldxa	[%g0]ASI_DMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1171	brnz,pt %g3, sfmmu_uprot_trap		/* user trap */		;\
1172	  nop								;\
1173	ba,a,pt	%xcc, sfmmu_kprot_trap		/* kernel trap */	;\
1174	unimp	0							;\
1175	unimp	0							;\
1176	unimp	0							;\
1177	unimp	0							;\
1178	unimp	0							;\
1179	unimp	0							;\
1180	.align 128
1181
1182#define	DMMU_EXCEPTION_TL1						;\
1183	SWITCH_GLOBALS							;\
1184	ba,a,pt	%xcc, mmu_trap_tl1					;\
1185	  nop								;\
1186	.align 32
1187
1188#define	MISALIGN_ADDR_TL1						;\
1189	ba,a,pt	%xcc, mmu_trap_tl1					;\
1190	  nop								;\
1191	.align 32
1192
1193/*
1194 * Trace a tsb hit
1195 * g1 = tsbe pointer (in/clobbered)
1196 * g2 = tag access register (in)
1197 * g3 - g4 = scratch (clobbered)
1198 * g5 = tsbe data (in)
1199 * g6 = scratch (clobbered)
1200 * g7 = pc we jumped here from (in)
1201 * ttextra = value to OR in to trap type (%tt) (in)
1202 */
1203#ifdef TRAPTRACE
1204#define TRACE_TSBHIT(ttextra)						 \
1205	membar	#Sync							;\
1206	sethi	%hi(FLUSH_ADDR), %g6					;\
1207	flush	%g6							;\
1208	TRACE_PTR(%g3, %g6)						;\
1209	GET_TRACE_TICK(%g6)						;\
1210	stxa	%g6, [%g3 + TRAP_ENT_TICK]%asi				;\
1211	stxa	%g2, [%g3 + TRAP_ENT_SP]%asi	/* tag access */	;\
1212	stxa	%g5, [%g3 + TRAP_ENT_F1]%asi	/* tsb data */		;\
1213	rdpr	%tnpc, %g6						;\
1214	stxa	%g6, [%g3 + TRAP_ENT_F2]%asi				;\
1215	stxa	%g1, [%g3 + TRAP_ENT_F3]%asi	/* tsb pointer */	;\
1216	stxa	%g0, [%g3 + TRAP_ENT_F4]%asi				;\
1217	rdpr	%tpc, %g6						;\
1218	stxa	%g6, [%g3 + TRAP_ENT_TPC]%asi				;\
1219	rdpr	%tl, %g6						;\
1220	stha	%g6, [%g3 + TRAP_ENT_TL]%asi				;\
1221	rdpr	%tt, %g6						;\
1222	or	%g6, (ttextra), %g6					;\
1223	stha	%g6, [%g3 + TRAP_ENT_TT]%asi				;\
1224	ldxa	[%g0]ASI_IMMU, %g1		/* tag target */	;\
1225	ldxa	[%g0]ASI_DMMU, %g4					;\
1226	cmp	%g6, FAST_IMMU_MISS_TT					;\
1227	movne	%icc, %g4, %g1						;\
1228	stxa	%g1, [%g3 + TRAP_ENT_TSTATE]%asi /* tsb tag */		;\
1229	stxa	%g0, [%g3 + TRAP_ENT_TR]%asi				;\
1230	TRACE_NEXT(%g3, %g4, %g6)
1231#else
1232#define TRACE_TSBHIT(ttextra)
1233#endif
1234
1235#if defined(lint)
1236
1237struct scb	trap_table;
1238struct scb	scb;		/* trap_table/scb are the same object */
1239
1240#else /* lint */
1241
1242/*
1243 * =======================================================================
1244 *		SPARC V9 TRAP TABLE
1245 *
1246 * The trap table is divided into two halves: the first half is used when
1247 * taking traps when TL=0; the second half is used when taking traps from
1248 * TL>0. Note that handlers in the second half of the table might not be able
1249 * to make the same assumptions as handlers in the first half of the table.
1250 *
1251 * Worst case trap nesting so far:
1252 *
1253 *	at TL=0 client issues software trap requesting service
1254 *	at TL=1 nucleus wants a register window
1255 *	at TL=2 register window clean/spill/fill takes a TLB miss
1256 *	at TL=3 processing TLB miss
1257 *	at TL=4 handle asynchronous error
1258 *
1259 * Note that a trap from TL=4 to TL=5 places Spitfire in "RED mode".
1260 *
1261 * =======================================================================
1262 */
1263	.section ".text"
1264	.align	4
1265	.global trap_table, scb, trap_table0, trap_table1, etrap_table
1266	.type	trap_table, #object
1267	.type	scb, #object
1268trap_table:
1269scb:
1270trap_table0:
1271	/* hardware traps */
1272	NOT;				/* 000	reserved */
1273	RED;				/* 001	power on reset */
1274	RED;				/* 002	watchdog reset */
1275	RED;				/* 003	externally initiated reset */
1276	RED;				/* 004	software initiated reset */
1277	RED;				/* 005	red mode exception */
1278	NOT; NOT;			/* 006 - 007 reserved */
1279	IMMU_EXCEPTION;			/* 008	instruction access exception */
1280	NOT;				/* 009	instruction access MMU miss */
1281	ASYNC_TRAP(T_INSTR_ERROR, trace_gen);
1282					/* 00A	instruction access error */
1283	NOT; NOT4;			/* 00B - 00F reserved */
1284	ILLTRAP_INSTR;			/* 010	illegal instruction */
1285	TRAP(T_PRIV_INSTR);		/* 011	privileged opcode */
1286	NOT;				/* 012	unimplemented LDD */
1287	NOT;				/* 013	unimplemented STD */
1288	NOT4; NOT4; NOT4;		/* 014 - 01F reserved */
1289	FP_DISABLED_TRAP;		/* 020	fp disabled */
1290	FP_IEEE_TRAP;			/* 021	fp exception ieee 754 */
1291	FP_TRAP;			/* 022	fp exception other */
1292	TAG_OVERFLOW;			/* 023	tag overflow */
1293	CLEAN_WINDOW;			/* 024 - 027 clean window */
1294	DIV_BY_ZERO;			/* 028	division by zero */
1295	NOT;				/* 029	internal processor error */
1296	NOT; NOT; NOT4;			/* 02A - 02F reserved */
1297	DMMU_EXCEPTION;			/* 030	data access exception */
1298	NOT;				/* 031	data access MMU miss */
1299	ASYNC_TRAP(T_DATA_ERROR, trace_gen);
1300					/* 032	data access error */
1301	NOT;				/* 033	data access protection */
1302	DMMU_EXC_AG_NOT_ALIGNED;	/* 034	mem address not aligned */
1303	DMMU_EXC_LDDF_NOT_ALIGNED;	/* 035	LDDF mem address not aligned */
1304	DMMU_EXC_STDF_NOT_ALIGNED;	/* 036	STDF mem address not aligned */
1305	DMMU_EXC_AG_PRIV;		/* 037	privileged action */
1306	NOT;				/* 038	LDQF mem address not aligned */
1307	NOT;				/* 039	STQF mem address not aligned */
1308	NOT; NOT; NOT4;			/* 03A - 03F reserved */
1309	NOT;				/* 040	async data error */
1310	LEVEL_INTERRUPT(1);		/* 041	interrupt level 1 */
1311	LEVEL_INTERRUPT(2);		/* 042	interrupt level 2 */
1312	LEVEL_INTERRUPT(3);		/* 043	interrupt level 3 */
1313	LEVEL_INTERRUPT(4);		/* 044	interrupt level 4 */
1314	LEVEL_INTERRUPT(5);		/* 045	interrupt level 5 */
1315	LEVEL_INTERRUPT(6);		/* 046	interrupt level 6 */
1316	LEVEL_INTERRUPT(7);		/* 047	interrupt level 7 */
1317	LEVEL_INTERRUPT(8);		/* 048	interrupt level 8 */
1318	LEVEL_INTERRUPT(9);		/* 049	interrupt level 9 */
1319	LEVEL_INTERRUPT(10);		/* 04A	interrupt level 10 */
1320	LEVEL_INTERRUPT(11);		/* 04B	interrupt level 11 */
1321	LEVEL_INTERRUPT(12);		/* 04C	interrupt level 12 */
1322	LEVEL_INTERRUPT(13);		/* 04D	interrupt level 13 */
1323	LEVEL14_INTERRUPT;		/* 04E	interrupt level 14 */
1324	LEVEL_INTERRUPT(15);		/* 04F	interrupt level 15 */
1325	NOT4; NOT4; NOT4; NOT4;		/* 050 - 05F reserved */
1326	VECTOR_INTERRUPT;		/* 060	interrupt vector */
1327	GOTO(kmdb_trap);		/* 061	PA watchpoint */
1328	GOTO(kmdb_trap);		/* 062	VA watchpoint */
1329	GOTO_TT(ce_err, trace_gen);	/* 063	corrected ECC error */
1330	ITLB_MISS(tt0);			/* 064	instruction access MMU miss */
1331	DTLB_MISS(tt0);			/* 068	data access MMU miss */
1332	DTLB_PROT;			/* 06C	data access protection */
1333	LABELED_BAD(tt0_fecc);		/* 070  fast ecache ECC error */
1334	LABELED_BAD(tt0_dperr);		/* 071  Cheetah+ dcache parity error */
1335	LABELED_BAD(tt0_iperr);		/* 072  Cheetah+ icache parity error */
1336	NOT;				/* 073  reserved */
1337	NOT4; NOT4; NOT4;		/* 074 - 07F reserved */
1338	NOT4;				/* 080	spill 0 normal */
1339	SPILL_32bit_asi(ASI_AIUP,sn0);	/* 084	spill 1 normal */
1340	SPILL_64bit_asi(ASI_AIUP,sn0);	/* 088	spill 2 normal */
1341	SPILL_32clean(ASI_AIUP,sn0);	/* 08C	spill 3 normal */
1342	SPILL_64clean(ASI_AIUP,sn0);	/* 090	spill 4 normal */
1343	SPILL_32bit(not);		/* 094	spill 5 normal */
1344	SPILL_64bit(not);		/* 098	spill 6 normal */
1345	SPILL_mixed;			/* 09C	spill 7 normal */
1346	NOT4;				/* 0A0	spill 0 other */
1347	SPILL_32bit_asi(ASI_AIUS,so0);	/* 0A4	spill 1 other */
1348	SPILL_64bit_asi(ASI_AIUS,so0);	/* 0A8	spill 2 other */
1349	SPILL_32bit_asi(ASI_AIUS,so0);	/* 0AC	spill 3 other */
1350	SPILL_64bit_asi(ASI_AIUS,so0);	/* 0B0	spill 4 other */
1351	NOT4;				/* 0B4	spill 5 other */
1352	NOT4;				/* 0B8	spill 6 other */
1353	NOT4;				/* 0BC	spill 7 other */
1354	NOT4;				/* 0C0	fill 0 normal */
1355	FILL_32bit_asi(ASI_AIUP,fn0);	/* 0C4	fill 1 normal */
1356	FILL_64bit_asi(ASI_AIUP,fn0);	/* 0C8	fill 2 normal */
1357	FILL_32bit_asi(ASI_AIUP,fn0);	/* 0CC	fill 3 normal */
1358	FILL_64bit_asi(ASI_AIUP,fn0);	/* 0D0	fill 4 normal */
1359	FILL_32bit(not);		/* 0D4	fill 5 normal */
1360	FILL_64bit(not);		/* 0D8	fill 6 normal */
1361	FILL_mixed;			/* 0DC	fill 7 normal */
1362	NOT4;				/* 0E0	fill 0 other */
1363	NOT4;				/* 0E4	fill 1 other */
1364	NOT4;				/* 0E8	fill 2 other */
1365	NOT4;				/* 0EC	fill 3 other */
1366	NOT4;				/* 0F0	fill 4 other */
1367	NOT4;				/* 0F4	fill 5 other */
1368	NOT4;				/* 0F8	fill 6 other */
1369	NOT4;				/* 0FC	fill 7 other */
1370	/* user traps */
1371	GOTO(syscall_trap_4x);		/* 100	old system call */
1372	TRAP(T_BREAKPOINT);		/* 101	user breakpoint */
1373	TRAP(T_DIV0);			/* 102	user divide by zero */
1374	FLUSHW();			/* 103	flush windows */
1375	GOTO(.clean_windows);		/* 104	clean windows */
1376	BAD;				/* 105	range check ?? */
1377	GOTO(.fix_alignment);		/* 106	do unaligned references */
1378	BAD;				/* 107	unused */
1379	SYSCALL(syscall_trap32);	/* 108	ILP32 system call on LP64 */
1380	GOTO(set_trap0_addr);		/* 109	set trap0 address */
1381	BAD; BAD; BAD4;			/* 10A - 10F unused */
1382	TRP4; TRP4; TRP4; TRP4;		/* 110 - 11F V9 user trap handlers */
1383	GOTO(.getcc);			/* 120	get condition codes */
1384	GOTO(.setcc);			/* 121	set condition codes */
1385	GOTO(.getpsr);			/* 122	get psr */
1386	GOTO(.setpsr);			/* 123	set psr (some fields) */
1387	GOTO(get_timestamp);		/* 124	get timestamp */
1388	GOTO(get_virtime);		/* 125	get lwp virtual time */
1389	PRIV(self_xcall);		/* 126	self xcall */
1390	GOTO(get_hrestime);		/* 127	get hrestime */
1391	BAD;				/* 128	ST_SETV9STACK */
1392	GOTO(.getlgrp);			/* 129  get lgrpid */
1393	BAD; BAD; BAD4;			/* 12A - 12F unused */
1394	BAD4; BAD4; 			/* 130 - 137 unused */
1395	DTRACE_PID;			/* 138  dtrace pid tracing provider */
1396	DTRACE_FASTTRAP;		/* 139  dtrace fasttrap provider */
1397	DTRACE_RETURN;			/* 13A	dtrace pid return probe */
1398	BAD; BAD4;			/* 13B - 13F unused */
1399	SYSCALL(syscall_trap)		/* 140  LP64 system call */
1400	SYSCALL(nosys);			/* 141  unused system call trap */
1401#ifdef DEBUG_USER_TRAPTRACECTL
1402	GOTO(.traptrace_freeze);	/* 142  freeze traptrace */
1403	GOTO(.traptrace_unfreeze);	/* 143  unfreeze traptrace */
1404#else
1405	SYSCALL(nosys);			/* 142  unused system call trap */
1406	SYSCALL(nosys);			/* 143  unused system call trap */
1407#endif
1408	BAD4; BAD4; BAD4;		/* 144 - 14F unused */
1409	BAD4; BAD4; BAD4; BAD4;		/* 150 - 15F unused */
1410	BAD4; BAD4; BAD4; BAD4;		/* 160 - 16F unused */
1411	BAD;				/* 170 - unused */
1412	BAD;				/* 171 - unused */
1413	BAD; BAD;			/* 172 - 173 unused */
1414	BAD4; BAD4;			/* 174 - 17B unused */
1415#ifdef	PTL1_PANIC_DEBUG
1416	mov PTL1_BAD_DEBUG, %g1; GOTO(ptl1_panic);
1417					/* 17C	test ptl1_panic */
1418#else
1419	BAD;				/* 17C  unused */
1420#endif	/* PTL1_PANIC_DEBUG */
1421	PRIV(kmdb_trap);		/* 17D	kmdb enter (L1-A) */
1422	PRIV(kmdb_trap);		/* 17E	kmdb breakpoint */
1423	PRIV(kctx_obp_bpt);		/* 17F	obp breakpoint */
1424	/* reserved */
1425	NOT4; NOT4; NOT4; NOT4;		/* 180 - 18F reserved */
1426	NOT4; NOT4; NOT4; NOT4;		/* 190 - 19F reserved */
1427	NOT4; NOT4; NOT4; NOT4;		/* 1A0 - 1AF reserved */
1428	NOT4; NOT4; NOT4; NOT4;		/* 1B0 - 1BF reserved */
1429	NOT4; NOT4; NOT4; NOT4;		/* 1C0 - 1CF reserved */
1430	NOT4; NOT4; NOT4; NOT4;		/* 1D0 - 1DF reserved */
1431	NOT4; NOT4; NOT4; NOT4;		/* 1E0 - 1EF reserved */
1432	NOT4; NOT4; NOT4; NOT4;		/* 1F0 - 1FF reserved */
1433trap_table1:
1434	NOT4; NOT4; NOT; NOT;		/* 000 - 009 unused */
1435	ASYNC_TRAP(T_INSTR_ERROR + T_TL1, trace_gen);
1436					/* 00A	instruction access error */
1437	NOT; NOT4;			/* 00B - 00F unused */
1438	NOT4; NOT4; NOT4; NOT4;		/* 010 - 01F unused */
1439	NOT4;				/* 020 - 023 unused */
1440	CLEAN_WINDOW;			/* 024 - 027 clean window */
1441	NOT4; NOT4;			/* 028 - 02F unused */
1442	DMMU_EXCEPTION_TL1;		/* 030 	data access exception */
1443	NOT;				/* 031 unused */
1444	ASYNC_TRAP(T_DATA_ERROR + T_TL1, trace_gen);
1445					/* 032	data access error */
1446	NOT;				/* 033	unused */
1447	MISALIGN_ADDR_TL1;		/* 034	mem address not aligned */
1448	NOT; NOT; NOT; NOT4; NOT4	/* 035 - 03F unused */
1449	NOT4; NOT4; NOT4; NOT4;		/* 040 - 04F unused */
1450	NOT4; NOT4; NOT4; NOT4;		/* 050 - 05F unused */
1451	NOT;				/* 060	unused */
1452	GOTO(kmdb_trap_tl1);		/* 061	PA watchpoint */
1453	GOTO(kmdb_trap_tl1);		/* 062	VA watchpoint */
1454	GOTO_TT(ce_err_tl1, trace_gen);	/* 063	corrected ECC error */
1455	ITLB_MISS(tt1);			/* 064	instruction access MMU miss */
1456	DTLB_MISS(tt1);			/* 068	data access MMU miss */
1457	DTLB_PROT;			/* 06C	data access protection */
1458	LABELED_BAD(tt1_fecc);		/* 070  fast ecache ECC error */
1459	LABELED_BAD(tt1_dperr);		/* 071  Cheetah+ dcache parity error */
1460	LABELED_BAD(tt1_iperr);		/* 072  Cheetah+ icache parity error */
1461	NOT;				/* 073  reserved */
1462	NOT4; NOT4; NOT4;		/* 074 - 07F reserved */
1463	NOT4;				/* 080	spill 0 normal */
1464	SPILL_32bit_tt1(ASI_AIUP,sn1);	/* 084	spill 1 normal */
1465	SPILL_64bit_tt1(ASI_AIUP,sn1);	/* 088	spill 2 normal */
1466	SPILL_32bit_tt1(ASI_AIUP,sn1);	/* 08C	spill 3 normal */
1467	SPILL_64bit_tt1(ASI_AIUP,sn1);	/* 090	spill 4 normal */
1468	SPILL_32bit(not);		/* 094	spill 5 normal */
1469	SPILL_64bit(not);		/* 098	spill 6 normal */
1470	SPILL_mixed;			/* 09C	spill 7 normal */
1471	NOT4;				/* 0A0	spill 0 other */
1472	SPILL_32bit_tt1(ASI_AIUS,so1);	/* 0A4	spill 1 other */
1473	SPILL_64bit_tt1(ASI_AIUS,so1);	/* 0A8	spill 2 other */
1474	SPILL_32bit_tt1(ASI_AIUS,so1);	/* 0AC	spill 3 other */
1475	SPILL_64bit_tt1(ASI_AIUS,so1);	/* 0B0  spill 4 other */
1476	NOT4;				/* 0B4  spill 5 other */
1477	NOT4;				/* 0B8  spill 6 other */
1478	NOT4;				/* 0BC  spill 7 other */
1479	NOT4;				/* 0C0	fill 0 normal */
1480	FILL_32bit_tt1(ASI_AIUP,fn1);	/* 0C4	fill 1 normal */
1481	FILL_64bit_tt1(ASI_AIUP,fn1);	/* 0C8	fill 2 normal */
1482	FILL_32bit_tt1(ASI_AIUP,fn1);	/* 0CC	fill 3 normal */
1483	FILL_64bit_tt1(ASI_AIUP,fn1);	/* 0D0	fill 4 normal */
1484	FILL_32bit(not);		/* 0D4	fill 5 normal */
1485	FILL_64bit(not);		/* 0D8	fill 6 normal */
1486	FILL_mixed;			/* 0DC	fill 7 normal */
1487	NOT4; NOT4; NOT4; NOT4;		/* 0E0 - 0EF unused */
1488	NOT4; NOT4; NOT4; NOT4;		/* 0F0 - 0FF unused */
1489	LABELED_BAD(tt1_swtrap0);	/* 100  fast ecache ECC error (cont) */
1490	LABELED_BAD(tt1_swtrap1);	/* 101  Ch+ D$ parity error (cont) */
1491	LABELED_BAD(tt1_swtrap2);	/* 102  Ch+ I$ parity error (cont) */
1492	NOT;				/* 103  reserved */
1493/*
1494 * We only reserve the above four special case soft traps for code running
1495 * at TL>0, so we can truncate the trap table here.
1496 */
1497etrap_table:
1498	.size	trap_table, (.-trap_table)
1499	.size	scb, (.-scb)
1500
1501/*
1502 * We get to exec_fault in the case of an instruction miss and tte
1503 * has no execute bit set.  We go to tl0 to handle it.
1504 *
1505 * g1 = tsbe pointer (in/clobbered)
1506 * g2 = tag access register (in)
1507 * g3 - g4 = scratch (clobbered)
1508 * g5 = tsbe data (in)
1509 * g6 = scratch (clobbered)
1510 */
1511	ALTENTRY(exec_fault)
1512	TRACE_TSBHIT(0x200)
1513	SWITCH_GLOBALS
1514	mov	MMU_TAG_ACCESS, %g4
1515	ldxa	[%g4]ASI_IMMU, %g2			! arg1 = addr
1516	mov	T_INSTR_MMU_MISS, %g3			! arg2 = traptype
1517	set	trap, %g1
1518	ba,pt	%xcc, sys_trap
1519	  mov	-1, %g4
1520
1521.mmu_exception_not_aligned:
1522	rdpr	%tstate, %g1
1523	btst	TSTATE_PRIV, %g1
1524	bnz,pn	%icc, 2f
1525	nop
1526	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1527	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1528	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1529	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1530	brz,pt	%g5, 2f
1531	nop
1532	ldn	[%g5 + P_UTRAP15], %g5			! unaligned utrap?
1533	brz,pn	%g5, 2f
1534	nop
1535	btst	1, %sp
1536	bz,pt	%xcc, 1f				! 32 bit user program
1537	nop
1538	ba,pt	%xcc, .setup_v9utrap			! 64 bit user program
1539	nop
15401:
1541	ba,pt	%xcc, .setup_utrap
1542	or	%g2, %g0, %g7
15432:
1544	ba,pt	%xcc, .mmu_exception_end
1545	mov	T_ALIGNMENT, %g1
1546
1547.mmu_priv_exception:
1548	rdpr	%tstate, %g1
1549	btst	TSTATE_PRIV, %g1
1550	bnz,pn	%icc, 1f
1551	nop
1552	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1553	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1554	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1555	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1556	brz,pt	%g5, 1f
1557	nop
1558	ldn	[%g5 + P_UTRAP16], %g5
1559	brnz,pt	%g5, .setup_v9utrap
1560	nop
15611:
1562	mov	T_PRIV_INSTR, %g1
1563
1564.mmu_exception_end:
1565	CPU_INDEX(%g4, %g5)
1566	set	cpu_core, %g5
1567	sllx	%g4, CPU_CORE_SHIFT, %g4
1568	add	%g4, %g5, %g4
1569	lduh	[%g4 + CPUC_DTRACE_FLAGS], %g5
1570	andcc	%g5, CPU_DTRACE_NOFAULT, %g0
1571	bz	%xcc, .mmu_exception_tlb_chk
1572	or	%g5, CPU_DTRACE_BADADDR, %g5
1573	stuh	%g5, [%g4 + CPUC_DTRACE_FLAGS]
1574	done
1575
1576.mmu_exception_tlb_chk:
1577	GET_CPU_IMPL(%g5)			! check SFSR.FT to see if this
1578	cmp	%g5, PANTHER_IMPL		! is a TLB parity error. But
1579	bne	2f				! we only do this check while
1580	mov	1, %g4				! running on Panther CPUs
1581	sllx	%g4, PN_SFSR_PARITY_SHIFT, %g4	! since US-I/II use the same
1582	andcc	%g3, %g4, %g0			! bit for something else which
1583	bz	2f				! will be handled later.
1584	nop
1585.mmu_exception_is_tlb_parity:
1586	.weak itlb_parity_trap
1587	.weak dtlb_parity_trap
1588	set	itlb_parity_trap, %g4
1589	cmp	%g1, T_INSTR_EXCEPTION		! branch to the itlb or
1590	be	3f				! dtlb parity handler
1591	nop					! if this trap is due
1592	set	dtlb_parity_trap, %g4
1593	cmp	%g1, T_DATA_EXCEPTION		! to a IMMU exception
1594	be	3f				! or DMMU exception.
1595	nop
15962:
1597	sllx	%g3, 32, %g3
1598	or	%g3, %g1, %g3
1599	set	trap, %g1
1600	ba,pt	%xcc, sys_trap
1601	sub	%g0, 1, %g4
16023:
1603	jmp	%g4				! off to the appropriate
1604	nop					! TLB parity handler
1605
1606.fp_disabled:
1607	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1608	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1609#ifdef SF_ERRATA_30 /* call causes fp-disabled */
1610	brz,a,pn %g1, 2f
1611	  nop
1612#endif
1613	rdpr	%tstate, %g4
1614	btst	TSTATE_PRIV, %g4
1615#ifdef SF_ERRATA_30 /* call causes fp-disabled */
1616	bnz,pn %icc, 2f
1617	  nop
1618#else
1619	bnz,a,pn %icc, ptl1_panic
1620	  mov	PTL1_BAD_FPTRAP, %g1
1621#endif
1622	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1623	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1624	brz,a,pt %g5, 2f
1625	  nop
1626	ldn	[%g5 + P_UTRAP7], %g5			! fp_disabled utrap?
1627	brz,a,pn %g5, 2f
1628	  nop
1629	btst	1, %sp
1630	bz,a,pt	%xcc, 1f				! 32 bit user program
1631	  nop
1632	ba,a,pt	%xcc, .setup_v9utrap			! 64 bit user program
1633	  nop
16341:
1635	ba,pt	%xcc, .setup_utrap
1636	  or	%g0, %g0, %g7
16372:
1638	set	fp_disabled, %g1
1639	ba,pt	%xcc, sys_trap
1640	  sub	%g0, 1, %g4
1641
1642.fp_ieee_exception:
1643	rdpr	%tstate, %g1
1644	btst	TSTATE_PRIV, %g1
1645	bnz,a,pn %icc, ptl1_panic
1646	  mov	PTL1_BAD_FPTRAP, %g1
1647	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1648	stx	%fsr, [%g1 + CPU_TMP1]
1649	ldx	[%g1 + CPU_TMP1], %g2
1650	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1651	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1652	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1653	brz,a,pt %g5, 1f
1654	  nop
1655	ldn	[%g5 + P_UTRAP8], %g5
1656	brnz,a,pt %g5, .setup_v9utrap
1657	  nop
16581:
1659	set	_fp_ieee_exception, %g1
1660	ba,pt	%xcc, sys_trap
1661	  sub	%g0, 1, %g4
1662
1663/*
1664 * Register Inputs:
1665 *	%g5		user trap handler
1666 *	%g7		misaligned addr - for alignment traps only
1667 */
1668.setup_utrap:
1669	set	trap, %g1			! setup in case we go
1670	mov	T_FLUSH_PCB, %g3		! through sys_trap on
1671	sub	%g0, 1, %g4			! the save instruction below
1672
1673	/*
1674	 * If the DTrace pid provider is single stepping a copied-out
1675	 * instruction, t->t_dtrace_step will be set. In that case we need
1676	 * to abort the single-stepping (since execution of the instruction
1677	 * was interrupted) and use the value of t->t_dtrace_npc as the %npc.
1678	 */
1679	save	%sp, -SA(MINFRAME32), %sp	! window for trap handler
1680	CPU_ADDR(%g1, %g4)			! load CPU struct addr
1681	ldn	[%g1 + CPU_THREAD], %g1		! load thread pointer
1682	ldub	[%g1 + T_DTRACE_STEP], %g2	! load t->t_dtrace_step
1683	rdpr	%tnpc, %l2			! arg1 == tnpc
1684	brz,pt	%g2, 1f
1685	rdpr	%tpc, %l1			! arg0 == tpc
1686
1687	ldub	[%g1 + T_DTRACE_AST], %g2	! load t->t_dtrace_ast
1688	ldn	[%g1 + T_DTRACE_NPC], %l2	! arg1 = t->t_dtrace_npc (step)
1689	brz,pt	%g2, 1f
1690	st	%g0, [%g1 + T_DTRACE_FT]	! zero all pid provider flags
1691	stub	%g2, [%g1 + T_ASTFLAG]		! aston(t) if t->t_dtrace_ast
16921:
1693	mov	%g7, %l3			! arg2 == misaligned address
1694
1695	rdpr	%tstate, %g1			! cwp for trap handler
1696	rdpr	%cwp, %g4
1697	bclr	TSTATE_CWP_MASK, %g1
1698	wrpr	%g1, %g4, %tstate
1699	wrpr	%g0, %g5, %tnpc			! trap handler address
1700	FAST_TRAP_DONE
1701	/* NOTREACHED */
1702
1703.check_v9utrap:
1704	rdpr	%tstate, %g1
1705	btst	TSTATE_PRIV, %g1
1706	bnz,a,pn %icc, 3f
1707	  nop
1708	CPU_ADDR(%g4, %g1)				! load CPU struct addr
1709	ldn	[%g4 + CPU_THREAD], %g5			! load thread pointer
1710	ldn	[%g5 + T_PROCP], %g5			! load proc pointer
1711	ldn	[%g5 + P_UTRAPS], %g5			! are there utraps?
1712
1713	cmp	%g3, T_SOFTWARE_TRAP
1714	bne,a,pt %icc, 1f
1715	  nop
1716
1717	brz,pt %g5, 3f			! if p_utraps == NULL goto trap()
1718	  rdpr	%tt, %g3		! delay - get actual hw trap type
1719
1720	sub	%g3, 254, %g1		! UT_TRAP_INSTRUCTION_16 = p_utraps[18]
1721	ba,pt	%icc, 2f
1722	  smul	%g1, CPTRSIZE, %g2
17231:
1724	brz,a,pt %g5, 3f		! if p_utraps == NULL goto trap()
1725	  nop
1726
1727	cmp	%g3, T_UNIMP_INSTR
1728	bne,a,pt %icc, 2f
1729	  nop
1730
1731	mov	1, %g1
1732	st	%g1, [%g4 + CPU_TL1_HDLR] ! set CPU_TL1_HDLR
1733	rdpr	%tpc, %g1		! ld trapping instruction using
1734	lduwa	[%g1]ASI_AIUP, %g1	! "AS IF USER" ASI which could fault
1735	st	%g0, [%g4 + CPU_TL1_HDLR] ! clr CPU_TL1_HDLR
1736
1737	sethi	%hi(0xc1c00000), %g4	! setup mask for illtrap instruction
1738	andcc	%g1, %g4, %g4		! and instruction with mask
1739	bnz,a,pt %icc, 3f		! if %g4 == zero, %g1 is an ILLTRAP
1740	  nop				! fall thru to setup
17412:
1742	ldn	[%g5 + %g2], %g5
1743	brnz,a,pt %g5, .setup_v9utrap
1744	  nop
17453:
1746	set	trap, %g1
1747	ba,pt	%xcc, sys_trap
1748	  sub	%g0, 1, %g4
1749	/* NOTREACHED */
1750
1751/*
1752 * Register Inputs:
1753 *	%g5		user trap handler
1754 */
1755.setup_v9utrap:
1756	set	trap, %g1			! setup in case we go
1757	mov	T_FLUSH_PCB, %g3		! through sys_trap on
1758	sub	%g0, 1, %g4			! the save instruction below
1759
1760	/*
1761	 * If the DTrace pid provider is single stepping a copied-out
1762	 * instruction, t->t_dtrace_step will be set. In that case we need
1763	 * to abort the single-stepping (since execution of the instruction
1764	 * was interrupted) and use the value of t->t_dtrace_npc as the %npc.
1765	 */
1766	save	%sp, -SA(MINFRAME64), %sp	! window for trap handler
1767	CPU_ADDR(%g1, %g4)			! load CPU struct addr
1768	ldn	[%g1 + CPU_THREAD], %g1		! load thread pointer
1769	ldub	[%g1 + T_DTRACE_STEP], %g2	! load t->t_dtrace_step
1770	rdpr	%tnpc, %l7			! arg1 == tnpc
1771	brz,pt	%g2, 1f
1772	rdpr	%tpc, %l6			! arg0 == tpc
1773
1774	ldub	[%g1 + T_DTRACE_AST], %g2	! load t->t_dtrace_ast
1775	ldn	[%g1 + T_DTRACE_NPC], %l7	! arg1 == t->t_dtrace_npc (step)
1776	brz,pt	%g2, 1f
1777	st	%g0, [%g1 + T_DTRACE_FT]	! zero all pid provider flags
1778	stub	%g2, [%g1 + T_ASTFLAG]		! aston(t) if t->t_dtrace_ast
17791:
1780	rdpr	%tstate, %g2			! cwp for trap handler
1781	rdpr	%cwp, %g4
1782	bclr	TSTATE_CWP_MASK, %g2
1783	wrpr	%g2, %g4, %tstate
1784
1785	ldn	[%g1 + T_PROCP], %g4		! load proc pointer
1786	ldn	[%g4 + P_AS], %g4		! load as pointer
1787	ldn	[%g4 + A_USERLIMIT], %g4	! load as userlimit
1788	cmp	%l7, %g4			! check for single-step set
1789	bne,pt	%xcc, 4f
1790	  nop
1791	ldn	[%g1 + T_LWP], %g1		! load klwp pointer
1792	ld	[%g1 + PCB_STEP], %g4		! load single-step flag
1793	cmp	%g4, STEP_ACTIVE		! step flags set in pcb?
1794	bne,pt	%icc, 4f
1795	  nop
1796	stn	%g5, [%g1 + PCB_TRACEPC]	! save trap handler addr in pcb
1797	mov	%l7, %g4			! on entry to precise user trap
1798	add	%l6, 4, %l7			! handler, %l6 == pc, %l7 == npc
1799						! at time of trap
1800	wrpr	%g0, %g4, %tnpc			! generate FLTBOUNDS,
1801						! %g4 == userlimit
1802	FAST_TRAP_DONE
1803	/* NOTREACHED */
18044:
1805	wrpr	%g0, %g5, %tnpc			! trap handler address
1806	FAST_TRAP_DONE_CHK_INTR
1807	/* NOTREACHED */
1808
1809.fp_exception:
1810	CPU_ADDR(%g1, %g4)
1811	stx	%fsr, [%g1 + CPU_TMP1]
1812	ldx	[%g1 + CPU_TMP1], %g2
1813
1814	/*
1815	 * Cheetah takes unfinished_FPop trap for certain range of operands
1816	 * to the "fitos" instruction. Instead of going through the slow
1817	 * software emulation path, we try to simulate the "fitos" instruction
1818	 * via "fitod" and "fdtos" provided the following conditions are met:
1819	 *
1820	 *	fpu_exists is set (if DEBUG)
1821	 *	not in privileged mode
1822	 *	ftt is unfinished_FPop
1823	 *	NXM IEEE trap is not enabled
1824	 *	instruction at %tpc is "fitos"
1825	 *
1826	 *  Usage:
1827	 *	%g1	per cpu address
1828	 *	%g2	%fsr
1829	 *	%g6	user instruction
1830	 *
1831	 * Note that we can take a memory access related trap while trying
1832	 * to fetch the user instruction. Therefore, we set CPU_TL1_HDLR
1833	 * flag to catch those traps and let the SFMMU code deal with page
1834	 * fault and data access exception.
1835	 */
1836#if defined(DEBUG) || defined(NEED_FPU_EXISTS)
1837	sethi	%hi(fpu_exists), %g7
1838	ld	[%g7 + %lo(fpu_exists)], %g7
1839	brz,pn %g7, .fp_exception_cont
1840	  nop
1841#endif
1842	rdpr	%tstate, %g7			! branch if in privileged mode
1843	btst	TSTATE_PRIV, %g7
1844	bnz,pn	%xcc, .fp_exception_cont
1845	srl	%g2, FSR_FTT_SHIFT, %g7		! extract ftt from %fsr
1846	and	%g7, (FSR_FTT>>FSR_FTT_SHIFT), %g7
1847	cmp	%g7, FTT_UNFIN
1848	set	FSR_TEM_NX, %g5
1849	bne,pn	%xcc, .fp_exception_cont	! branch if NOT unfinished_FPop
1850	  andcc	%g2, %g5, %g0
1851	bne,pn	%xcc, .fp_exception_cont	! branch if FSR_TEM_NX enabled
1852	  rdpr	%tpc, %g5			! get faulting PC
1853
1854	or	%g0, 1, %g7
1855	st	%g7, [%g1 + CPU_TL1_HDLR]	! set tl1_hdlr flag
1856	lda	[%g5]ASI_USER, %g6		! get user's instruction
1857	st	%g0, [%g1 + CPU_TL1_HDLR]	! clear tl1_hdlr flag
1858
1859	set	FITOS_INSTR_MASK, %g7
1860	and	%g6, %g7, %g7
1861	set	FITOS_INSTR, %g5
1862	cmp	%g7, %g5
1863	bne,pn	%xcc, .fp_exception_cont	! branch if not FITOS_INSTR
1864	 nop
1865
1866	/*
1867	 * This is unfinished FPops trap for "fitos" instruction. We
1868	 * need to simulate "fitos" via "fitod" and "fdtos" instruction
1869	 * sequence.
1870	 *
1871	 * We need a temporary FP register to do the conversion. Since
1872	 * both source and destination operands for the "fitos" instruction
1873	 * have to be within %f0-%f31, we use an FP register from the upper
1874	 * half to guarantee that it won't collide with the source or the
1875	 * dest operand. However, we do have to save and restore its value.
1876	 *
1877	 * We use %d62 as a temporary FP register for the conversion and
1878	 * branch to appropriate instruction within the conversion tables
1879	 * based upon the rs2 and rd values.
1880	 */
1881
1882	std	%d62, [%g1 + CPU_TMP1]		! save original value
1883
1884	srl	%g6, FITOS_RS2_SHIFT, %g7
1885	and	%g7, FITOS_REG_MASK, %g7
1886	set	_fitos_fitod_table, %g4
1887	sllx	%g7, 2, %g7
1888	jmp	%g4 + %g7
1889	  ba,pt	%xcc, _fitos_fitod_done
1890	.empty
1891
1892_fitos_fitod_table:
1893	  fitod	%f0, %d62
1894	  fitod	%f1, %d62
1895	  fitod	%f2, %d62
1896	  fitod	%f3, %d62
1897	  fitod	%f4, %d62
1898	  fitod	%f5, %d62
1899	  fitod	%f6, %d62
1900	  fitod	%f7, %d62
1901	  fitod	%f8, %d62
1902	  fitod	%f9, %d62
1903	  fitod	%f10, %d62
1904	  fitod	%f11, %d62
1905	  fitod	%f12, %d62
1906	  fitod	%f13, %d62
1907	  fitod	%f14, %d62
1908	  fitod	%f15, %d62
1909	  fitod	%f16, %d62
1910	  fitod	%f17, %d62
1911	  fitod	%f18, %d62
1912	  fitod	%f19, %d62
1913	  fitod	%f20, %d62
1914	  fitod	%f21, %d62
1915	  fitod	%f22, %d62
1916	  fitod	%f23, %d62
1917	  fitod	%f24, %d62
1918	  fitod	%f25, %d62
1919	  fitod	%f26, %d62
1920	  fitod	%f27, %d62
1921	  fitod	%f28, %d62
1922	  fitod	%f29, %d62
1923	  fitod	%f30, %d62
1924	  fitod	%f31, %d62
1925_fitos_fitod_done:
1926
1927	/*
1928	 * Now convert data back into single precision
1929	 */
1930	srl	%g6, FITOS_RD_SHIFT, %g7
1931	and	%g7, FITOS_REG_MASK, %g7
1932	set	_fitos_fdtos_table, %g4
1933	sllx	%g7, 2, %g7
1934	jmp	%g4 + %g7
1935	  ba,pt	%xcc, _fitos_fdtos_done
1936	.empty
1937
1938_fitos_fdtos_table:
1939	  fdtos	%d62, %f0
1940	  fdtos	%d62, %f1
1941	  fdtos	%d62, %f2
1942	  fdtos	%d62, %f3
1943	  fdtos	%d62, %f4
1944	  fdtos	%d62, %f5
1945	  fdtos	%d62, %f6
1946	  fdtos	%d62, %f7
1947	  fdtos	%d62, %f8
1948	  fdtos	%d62, %f9
1949	  fdtos	%d62, %f10
1950	  fdtos	%d62, %f11
1951	  fdtos	%d62, %f12
1952	  fdtos	%d62, %f13
1953	  fdtos	%d62, %f14
1954	  fdtos	%d62, %f15
1955	  fdtos	%d62, %f16
1956	  fdtos	%d62, %f17
1957	  fdtos	%d62, %f18
1958	  fdtos	%d62, %f19
1959	  fdtos	%d62, %f20
1960	  fdtos	%d62, %f21
1961	  fdtos	%d62, %f22
1962	  fdtos	%d62, %f23
1963	  fdtos	%d62, %f24
1964	  fdtos	%d62, %f25
1965	  fdtos	%d62, %f26
1966	  fdtos	%d62, %f27
1967	  fdtos	%d62, %f28
1968	  fdtos	%d62, %f29
1969	  fdtos	%d62, %f30
1970	  fdtos	%d62, %f31
1971_fitos_fdtos_done:
1972
1973	ldd	[%g1 + CPU_TMP1], %d62		! restore %d62
1974
1975#if DEBUG
1976	/*
1977	 * Update FPop_unfinished trap kstat
1978	 */
1979	set	fpustat+FPUSTAT_UNFIN_KSTAT, %g7
1980	ldx	[%g7], %g5
19811:
1982	add	%g5, 1, %g6
1983
1984	casxa	[%g7] ASI_N, %g5, %g6
1985	cmp	%g5, %g6
1986	bne,a,pn %xcc, 1b
1987	  or	%g0, %g6, %g5
1988
1989	/*
1990	 * Update fpu_sim_fitos kstat
1991	 */
1992	set	fpuinfo+FPUINFO_FITOS_KSTAT, %g7
1993	ldx	[%g7], %g5
19941:
1995	add	%g5, 1, %g6
1996
1997	casxa	[%g7] ASI_N, %g5, %g6
1998	cmp	%g5, %g6
1999	bne,a,pn %xcc, 1b
2000	  or	%g0, %g6, %g5
2001#endif /* DEBUG */
2002
2003	FAST_TRAP_DONE
2004
2005.fp_exception_cont:
2006	/*
2007	 * Let _fp_exception deal with simulating FPop instruction.
2008	 * Note that we need to pass %fsr in %g2 (already read above).
2009	 */
2010
2011	set	_fp_exception, %g1
2012	ba,pt	%xcc, sys_trap
2013	sub	%g0, 1, %g4
2014
2015.clean_windows:
2016	set	trap, %g1
2017	mov	T_FLUSH_PCB, %g3
2018	sub	%g0, 1, %g4
2019	save
2020	flushw
2021	restore
2022	wrpr	%g0, %g0, %cleanwin	! no clean windows
2023
2024	CPU_ADDR(%g4, %g5)
2025	ldn	[%g4 + CPU_MPCB], %g4
2026	brz,a,pn %g4, 1f
2027	  nop
2028	ld	[%g4 + MPCB_WSTATE], %g5
2029	add	%g5, WSTATE_CLEAN_OFFSET, %g5
2030	wrpr	%g0, %g5, %wstate
20311:	FAST_TRAP_DONE
2032
2033/*
2034 * .spill_clean: clean the previous window, restore the wstate, and
2035 * "done".
2036 *
2037 * Entry: %g7 contains new wstate
2038 */
2039.spill_clean:
2040	sethi	%hi(nwin_minus_one), %g5
2041	ld	[%g5 + %lo(nwin_minus_one)], %g5 ! %g5 = nwin - 1
2042	rdpr	%cwp, %g6			! %g6 = %cwp
2043	deccc	%g6				! %g6--
2044	movneg	%xcc, %g5, %g6			! if (%g6<0) %g6 = nwin-1
2045	wrpr	%g6, %cwp
2046	TT_TRACE_L(trace_win)
2047	clr	%l0
2048	clr	%l1
2049	clr	%l2
2050	clr	%l3
2051	clr	%l4
2052	clr	%l5
2053	clr	%l6
2054	clr	%l7
2055	wrpr	%g0, %g7, %wstate
2056	saved
2057	retry			! restores correct %cwp
2058
2059.fix_alignment:
2060	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2061	ldn	[%g1 + CPU_THREAD], %g1	! load thread pointer
2062	ldn	[%g1 + T_PROCP], %g1
2063	mov	1, %g2
2064	stb	%g2, [%g1 + P_FIXALIGNMENT]
2065	FAST_TRAP_DONE
2066
2067#define	STDF_REG(REG, ADDR, TMP)		\
2068	sll	REG, 3, REG			;\
2069mark1:	set	start1, TMP			;\
2070	jmp	REG + TMP			;\
2071	  nop					;\
2072start1:	ba,pt	%xcc, done1			;\
2073	  std	%f0, [ADDR + CPU_TMP1]		;\
2074	ba,pt	%xcc, done1			;\
2075	  std	%f32, [ADDR + CPU_TMP1]		;\
2076	ba,pt	%xcc, done1			;\
2077	  std	%f2, [ADDR + CPU_TMP1]		;\
2078	ba,pt	%xcc, done1			;\
2079	  std	%f34, [ADDR + CPU_TMP1]		;\
2080	ba,pt	%xcc, done1			;\
2081	  std	%f4, [ADDR + CPU_TMP1]		;\
2082	ba,pt	%xcc, done1			;\
2083	  std	%f36, [ADDR + CPU_TMP1]		;\
2084	ba,pt	%xcc, done1			;\
2085	  std	%f6, [ADDR + CPU_TMP1]		;\
2086	ba,pt	%xcc, done1			;\
2087	  std	%f38, [ADDR + CPU_TMP1]		;\
2088	ba,pt	%xcc, done1			;\
2089	  std	%f8, [ADDR + CPU_TMP1]		;\
2090	ba,pt	%xcc, done1			;\
2091	  std	%f40, [ADDR + CPU_TMP1]		;\
2092	ba,pt	%xcc, done1			;\
2093	  std	%f10, [ADDR + CPU_TMP1]		;\
2094	ba,pt	%xcc, done1			;\
2095	  std	%f42, [ADDR + CPU_TMP1]		;\
2096	ba,pt	%xcc, done1			;\
2097	  std	%f12, [ADDR + CPU_TMP1]		;\
2098	ba,pt	%xcc, done1			;\
2099	  std	%f44, [ADDR + CPU_TMP1]		;\
2100	ba,pt	%xcc, done1			;\
2101	  std	%f14, [ADDR + CPU_TMP1]		;\
2102	ba,pt	%xcc, done1			;\
2103	  std	%f46, [ADDR + CPU_TMP1]		;\
2104	ba,pt	%xcc, done1			;\
2105	  std	%f16, [ADDR + CPU_TMP1]		;\
2106	ba,pt	%xcc, done1			;\
2107	  std	%f48, [ADDR + CPU_TMP1]		;\
2108	ba,pt	%xcc, done1			;\
2109	  std	%f18, [ADDR + CPU_TMP1]		;\
2110	ba,pt	%xcc, done1			;\
2111	  std	%f50, [ADDR + CPU_TMP1]		;\
2112	ba,pt	%xcc, done1			;\
2113	  std	%f20, [ADDR + CPU_TMP1]		;\
2114	ba,pt	%xcc, done1			;\
2115	  std	%f52, [ADDR + CPU_TMP1]		;\
2116	ba,pt	%xcc, done1			;\
2117	  std	%f22, [ADDR + CPU_TMP1]		;\
2118	ba,pt	%xcc, done1			;\
2119	  std	%f54, [ADDR + CPU_TMP1]		;\
2120	ba,pt	%xcc, done1			;\
2121	  std	%f24, [ADDR + CPU_TMP1]		;\
2122	ba,pt	%xcc, done1			;\
2123	  std	%f56, [ADDR + CPU_TMP1]		;\
2124	ba,pt	%xcc, done1			;\
2125	  std	%f26, [ADDR + CPU_TMP1]		;\
2126	ba,pt	%xcc, done1			;\
2127	  std	%f58, [ADDR + CPU_TMP1]		;\
2128	ba,pt	%xcc, done1			;\
2129	  std	%f28, [ADDR + CPU_TMP1]		;\
2130	ba,pt	%xcc, done1			;\
2131	  std	%f60, [ADDR + CPU_TMP1]		;\
2132	ba,pt	%xcc, done1			;\
2133	  std	%f30, [ADDR + CPU_TMP1]		;\
2134	ba,pt	%xcc, done1			;\
2135	  std	%f62, [ADDR + CPU_TMP1]		;\
2136done1:
2137
2138#define	LDDF_REG(REG, ADDR, TMP)		\
2139	sll	REG, 3, REG			;\
2140mark2:	set	start2, TMP			;\
2141	jmp	REG + TMP			;\
2142	  nop					;\
2143start2:	ba,pt	%xcc, done2			;\
2144	  ldd	[ADDR + CPU_TMP1], %f0		;\
2145	ba,pt	%xcc, done2			;\
2146	  ldd	[ADDR + CPU_TMP1], %f32		;\
2147	ba,pt	%xcc, done2			;\
2148	  ldd	[ADDR + CPU_TMP1], %f2		;\
2149	ba,pt	%xcc, done2			;\
2150	  ldd	[ADDR + CPU_TMP1], %f34		;\
2151	ba,pt	%xcc, done2			;\
2152	  ldd	[ADDR + CPU_TMP1], %f4		;\
2153	ba,pt	%xcc, done2			;\
2154	  ldd	[ADDR + CPU_TMP1], %f36		;\
2155	ba,pt	%xcc, done2			;\
2156	  ldd	[ADDR + CPU_TMP1], %f6		;\
2157	ba,pt	%xcc, done2			;\
2158	  ldd	[ADDR + CPU_TMP1], %f38		;\
2159	ba,pt	%xcc, done2			;\
2160	  ldd	[ADDR + CPU_TMP1], %f8		;\
2161	ba,pt	%xcc, done2			;\
2162	  ldd	[ADDR + CPU_TMP1], %f40		;\
2163	ba,pt	%xcc, done2			;\
2164	  ldd	[ADDR + CPU_TMP1], %f10		;\
2165	ba,pt	%xcc, done2			;\
2166	  ldd	[ADDR + CPU_TMP1], %f42		;\
2167	ba,pt	%xcc, done2			;\
2168	  ldd	[ADDR + CPU_TMP1], %f12		;\
2169	ba,pt	%xcc, done2			;\
2170	  ldd	[ADDR + CPU_TMP1], %f44		;\
2171	ba,pt	%xcc, done2			;\
2172	  ldd	[ADDR + CPU_TMP1], %f14		;\
2173	ba,pt	%xcc, done2			;\
2174	  ldd	[ADDR + CPU_TMP1], %f46		;\
2175	ba,pt	%xcc, done2			;\
2176	  ldd	[ADDR + CPU_TMP1], %f16		;\
2177	ba,pt	%xcc, done2			;\
2178	  ldd	[ADDR + CPU_TMP1], %f48		;\
2179	ba,pt	%xcc, done2			;\
2180	  ldd	[ADDR + CPU_TMP1], %f18		;\
2181	ba,pt	%xcc, done2			;\
2182	  ldd	[ADDR + CPU_TMP1], %f50		;\
2183	ba,pt	%xcc, done2			;\
2184	  ldd	[ADDR + CPU_TMP1], %f20		;\
2185	ba,pt	%xcc, done2			;\
2186	  ldd	[ADDR + CPU_TMP1], %f52		;\
2187	ba,pt	%xcc, done2			;\
2188	  ldd	[ADDR + CPU_TMP1], %f22		;\
2189	ba,pt	%xcc, done2			;\
2190	  ldd	[ADDR + CPU_TMP1], %f54		;\
2191	ba,pt	%xcc, done2			;\
2192	  ldd	[ADDR + CPU_TMP1], %f24		;\
2193	ba,pt	%xcc, done2			;\
2194	  ldd	[ADDR + CPU_TMP1], %f56		;\
2195	ba,pt	%xcc, done2			;\
2196	  ldd	[ADDR + CPU_TMP1], %f26		;\
2197	ba,pt	%xcc, done2			;\
2198	  ldd	[ADDR + CPU_TMP1], %f58		;\
2199	ba,pt	%xcc, done2			;\
2200	  ldd	[ADDR + CPU_TMP1], %f28		;\
2201	ba,pt	%xcc, done2			;\
2202	  ldd	[ADDR + CPU_TMP1], %f60		;\
2203	ba,pt	%xcc, done2			;\
2204	  ldd	[ADDR + CPU_TMP1], %f30		;\
2205	ba,pt	%xcc, done2			;\
2206	  ldd	[ADDR + CPU_TMP1], %f62		;\
2207done2:
2208
2209.lddf_exception_not_aligned:
2210	/*
2211	 * Cheetah overwrites SFAR on a DTLB miss, hence read it now.
2212	 */
2213	ldxa	[MMU_SFAR]%asi, %g5	! misaligned vaddr in %g5
2214
2215#if defined(DEBUG) || defined(NEED_FPU_EXISTS)
2216	sethi	%hi(fpu_exists), %g2		! check fpu_exists
2217	ld	[%g2 + %lo(fpu_exists)], %g2
2218	brz,a,pn %g2, 4f
2219	  nop
2220#endif
2221	CPU_ADDR(%g1, %g4)
2222	or	%g0, 1, %g4
2223	st	%g4, [%g1 + CPU_TL1_HDLR] ! set tl1_hdlr flag
2224
2225	rdpr	%tpc, %g2
2226	lda	[%g2]ASI_AIUP, %g6	! get the user's lddf instruction
2227	srl	%g6, 23, %g1		! using ldda or not?
2228	and	%g1, 1, %g1
2229	brz,a,pt %g1, 2f		! check for ldda instruction
2230	  nop
2231	srl	%g6, 13, %g1		! check immflag
2232	and	%g1, 1, %g1
2233	rdpr	%tstate, %g2		! %tstate in %g2
2234	brnz,a,pn %g1, 1f
2235	  srl	%g2, 31, %g1		! get asi from %tstate
2236	srl	%g6, 5, %g1		! get asi from instruction
2237	and	%g1, 0xFF, %g1		! imm_asi field
22381:
2239	cmp	%g1, ASI_P		! primary address space
2240	be,a,pt %icc, 2f
2241	  nop
2242	cmp	%g1, ASI_PNF		! primary no fault address space
2243	be,a,pt %icc, 2f
2244	  nop
2245	cmp	%g1, ASI_S		! secondary address space
2246	be,a,pt %icc, 2f
2247	  nop
2248	cmp	%g1, ASI_SNF		! secondary no fault address space
2249	bne,a,pn %icc, 3f
2250	  nop
22512:
2252	lduwa	[%g5]ASI_USER, %g7	! get first half of misaligned data
2253	add	%g5, 4, %g5		! increment misaligned data address
2254	lduwa	[%g5]ASI_USER, %g5	! get second half of misaligned data
2255
2256	sllx	%g7, 32, %g7
2257	or	%g5, %g7, %g5		! combine data
2258	CPU_ADDR(%g7, %g1)		! save data on a per-cpu basis
2259	stx	%g5, [%g7 + CPU_TMP1]	! save in cpu_tmp1
2260
2261	srl	%g6, 25, %g3		! %g6 has the instruction
2262	and	%g3, 0x1F, %g3		! %g3 has rd
2263	LDDF_REG(%g3, %g7, %g4)
2264
2265	CPU_ADDR(%g1, %g4)
2266	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
2267	FAST_TRAP_DONE
22683:
2269	CPU_ADDR(%g1, %g4)
2270	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
22714:
2272	set	T_USER, %g3		! trap type in %g3
2273	or	%g3, T_LDDF_ALIGN, %g3
2274	mov	%g5, %g2		! misaligned vaddr in %g2
2275	set	fpu_trap, %g1		! goto C for the little and
2276	ba,pt	%xcc, sys_trap		! no fault little asi's
2277	  sub	%g0, 1, %g4
2278
2279.stdf_exception_not_aligned:
2280	/*
2281	 * Cheetah overwrites SFAR on a DTLB miss, hence read it now.
2282	 */
2283	ldxa	[MMU_SFAR]%asi, %g5	! misaligned vaddr in %g5
2284
2285#if defined(DEBUG) || defined(NEED_FPU_EXISTS)
2286	sethi	%hi(fpu_exists), %g7		! check fpu_exists
2287	ld	[%g7 + %lo(fpu_exists)], %g3
2288	brz,a,pn %g3, 4f
2289	  nop
2290#endif
2291	CPU_ADDR(%g1, %g4)
2292	or	%g0, 1, %g4
2293	st	%g4, [%g1 + CPU_TL1_HDLR] ! set tl1_hdlr flag
2294
2295	rdpr	%tpc, %g2
2296	lda	[%g2]ASI_AIUP, %g6	! get the user's stdf instruction
2297
2298	srl	%g6, 23, %g1		! using stda or not?
2299	and	%g1, 1, %g1
2300	brz,a,pt %g1, 2f		! check for stda instruction
2301	  nop
2302	srl	%g6, 13, %g1		! check immflag
2303	and	%g1, 1, %g1
2304	rdpr	%tstate, %g2		! %tstate in %g2
2305	brnz,a,pn %g1, 1f
2306	  srl	%g2, 31, %g1		! get asi from %tstate
2307	srl	%g6, 5, %g1		! get asi from instruction
2308	and	%g1, 0xFF, %g1		! imm_asi field
23091:
2310	cmp	%g1, ASI_P		! primary address space
2311	be,a,pt %icc, 2f
2312	  nop
2313	cmp	%g1, ASI_S		! secondary address space
2314	bne,a,pn %icc, 3f
2315	  nop
23162:
2317	srl	%g6, 25, %g6
2318	and	%g6, 0x1F, %g6		! %g6 has rd
2319	CPU_ADDR(%g7, %g1)
2320	STDF_REG(%g6, %g7, %g4)		! STDF_REG(REG, ADDR, TMP)
2321
2322	ldx	[%g7 + CPU_TMP1], %g6
2323	srlx	%g6, 32, %g7
2324	stuwa	%g7, [%g5]ASI_USER	! first half
2325	add	%g5, 4, %g5		! increment misaligned data address
2326	stuwa	%g6, [%g5]ASI_USER	! second half
2327
2328	CPU_ADDR(%g1, %g4)
2329	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
2330	FAST_TRAP_DONE
23313:
2332	CPU_ADDR(%g1, %g4)
2333	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
23344:
2335	set	T_USER, %g3		! trap type in %g3
2336	or	%g3, T_STDF_ALIGN, %g3
2337	mov	%g5, %g2		! misaligned vaddr in %g2
2338	set	fpu_trap, %g1		! goto C for the little and
2339	ba,pt	%xcc, sys_trap		! nofault little asi's
2340	  sub	%g0, 1, %g4
2341
2342#ifdef DEBUG_USER_TRAPTRACECTL
2343
2344.traptrace_freeze:
2345	mov	%l0, %g1 ; mov	%l1, %g2 ; mov	%l2, %g3 ; mov	%l4, %g4
2346	TT_TRACE_L(trace_win)
2347	mov	%g4, %l4 ; mov	%g3, %l2 ; mov	%g2, %l1 ; mov	%g1, %l0
2348	set	trap_freeze, %g1
2349	mov	1, %g2
2350	st	%g2, [%g1]
2351	FAST_TRAP_DONE
2352
2353.traptrace_unfreeze:
2354	set	trap_freeze, %g1
2355	st	%g0, [%g1]
2356	mov	%l0, %g1 ; mov	%l1, %g2 ; mov	%l2, %g3 ; mov	%l4, %g4
2357	TT_TRACE_L(trace_win)
2358	mov	%g4, %l4 ; mov	%g3, %l2 ; mov	%g2, %l1 ; mov	%g1, %l0
2359	FAST_TRAP_DONE
2360
2361#endif /* DEBUG_USER_TRAPTRACECTL */
2362
2363.getcc:
2364	CPU_ADDR(%g1, %g2)
2365	stx	%o0, [%g1 + CPU_TMP1]		! save %o0
2366	stx	%o1, [%g1 + CPU_TMP2]		! save %o1
2367	rdpr	%tstate, %g3			! get tstate
2368	srlx	%g3, PSR_TSTATE_CC_SHIFT, %o0	! shift ccr to V8 psr
2369	set	PSR_ICC, %g2
2370	and	%o0, %g2, %o0			! mask out the rest
2371	srl	%o0, PSR_ICC_SHIFT, %o0		! right justify
2372	rdpr	%pstate, %o1
2373	wrpr	%o1, PSTATE_AG, %pstate		! get into normal globals
2374	mov	%o0, %g1			! move ccr to normal %g1
2375	wrpr	%g0, %o1, %pstate		! back into alternate globals
2376	ldx	[%g1 + CPU_TMP1], %o0		! restore %o0
2377	ldx	[%g1 + CPU_TMP2], %o1		! restore %o1
2378	FAST_TRAP_DONE
2379
2380.setcc:
2381	CPU_ADDR(%g1, %g2)
2382	stx	%o0, [%g1 + CPU_TMP1]		! save %o0
2383	stx	%o1, [%g1 + CPU_TMP2]		! save %o1
2384	rdpr	%pstate, %o0
2385	wrpr	%o0, PSTATE_AG, %pstate		! get into normal globals
2386	mov	%g1, %o1
2387	wrpr	%g0, %o0, %pstate		! back to alternates
2388	sll	%o1, PSR_ICC_SHIFT, %g2
2389	set	PSR_ICC, %g3
2390	and	%g2, %g3, %g2			! mask out rest
2391	sllx	%g2, PSR_TSTATE_CC_SHIFT, %g2
2392	rdpr	%tstate, %g3			! get tstate
2393	srl	%g3, 0, %g3			! clear upper word
2394	or	%g3, %g2, %g3			! or in new bits
2395	wrpr	%g3, %tstate
2396	ldx	[%g1 + CPU_TMP1], %o0		! restore %o0
2397	ldx	[%g1 + CPU_TMP2], %o1		! restore %o1
2398	FAST_TRAP_DONE
2399
2400/*
2401 * getpsr(void)
2402 * Note that the xcc part of the ccr is not provided.
2403 * The V8 code shows why the V9 trap is not faster:
2404 * #define GETPSR_TRAP() \
2405 *      mov %psr, %i0; jmp %l2; rett %l2+4; nop;
2406 */
2407
2408	.type	.getpsr, #function
2409.getpsr:
2410	rdpr	%tstate, %g1			! get tstate
2411	srlx	%g1, PSR_TSTATE_CC_SHIFT, %o0	! shift ccr to V8 psr
2412	set	PSR_ICC, %g2
2413	and	%o0, %g2, %o0			! mask out the rest
2414
2415	rd	%fprs, %g1			! get fprs
2416	and	%g1, FPRS_FEF, %g2		! mask out dirty upper/lower
2417	sllx	%g2, PSR_FPRS_FEF_SHIFT, %g2	! shift fef to V8 psr.ef
2418	or	%o0, %g2, %o0			! or result into psr.ef
2419
2420	set	V9_PSR_IMPLVER, %g2		! SI assigned impl/ver: 0xef
2421	or	%o0, %g2, %o0			! or psr.impl/ver
2422	FAST_TRAP_DONE
2423	SET_SIZE(.getpsr)
2424
2425/*
2426 * setpsr(newpsr)
2427 * Note that there is no support for ccr.xcc in the V9 code.
2428 */
2429
2430	.type	.setpsr, #function
2431.setpsr:
2432	rdpr	%tstate, %g1			! get tstate
2433!	setx	TSTATE_V8_UBITS, %g2
2434	or 	%g0, CCR_ICC, %g3
2435	sllx	%g3, TSTATE_CCR_SHIFT, %g2
2436
2437	andn	%g1, %g2, %g1			! zero current user bits
2438	set	PSR_ICC, %g2
2439	and	%g2, %o0, %g2			! clear all but psr.icc bits
2440	sllx	%g2, PSR_TSTATE_CC_SHIFT, %g3	! shift to tstate.ccr.icc
2441	wrpr	%g1, %g3, %tstate		! write tstate
2442
2443	set	PSR_EF, %g2
2444	and	%g2, %o0, %g2			! clear all but fp enable bit
2445	srlx	%g2, PSR_FPRS_FEF_SHIFT, %g4	! shift ef to V9 fprs.fef
2446	wr	%g0, %g4, %fprs			! write fprs
2447
2448	CPU_ADDR(%g1, %g2)			! load CPU struct addr to %g1
2449	ldn	[%g1 + CPU_THREAD], %g2		! load thread pointer
2450	ldn	[%g2 + T_LWP], %g3		! load klwp pointer
2451	ldn	[%g3 + LWP_FPU], %g2		! get lwp_fpu pointer
2452	stuw	%g4, [%g2 + FPU_FPRS]		! write fef value to fpu_fprs
2453	srlx	%g4, 2, %g4			! shift fef value to bit 0
2454	stub	%g4, [%g2 + FPU_EN]		! write fef value to fpu_en
2455	FAST_TRAP_DONE
2456	SET_SIZE(.setpsr)
2457
2458/*
2459 * getlgrp
2460 * get home lgrpid on which the calling thread is currently executing.
2461 */
2462	.type	.getlgrp, #function
2463.getlgrp:
2464	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2465	ld	[%g1 + CPU_ID], %o0	! load cpu_id
2466	ldn	[%g1 + CPU_THREAD], %g2	! load thread pointer
2467	ldn	[%g2 + T_LPL], %g2	! load lpl pointer
2468	ld	[%g2 + LPL_LGRPID], %g1	! load lpl_lgrpid
2469	sra	%g1, 0, %o1
2470	FAST_TRAP_DONE
2471	SET_SIZE(.getlgrp)
2472
2473/*
2474 * Entry for old 4.x trap (trap 0).
2475 */
2476	ENTRY_NP(syscall_trap_4x)
2477	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2478	ldn	[%g1 + CPU_THREAD], %g2	! load thread pointer
2479	ldn	[%g2 + T_LWP], %g2	! load klwp pointer
2480	ld	[%g2 + PCB_TRAP0], %g2	! lwp->lwp_pcb.pcb_trap0addr
2481	brz,pn	%g2, 1f			! has it been set?
2482	st	%l0, [%g1 + CPU_TMP1]	! delay - save some locals
2483	st	%l1, [%g1 + CPU_TMP2]
2484	rdpr	%tnpc, %l1		! save old tnpc
2485	wrpr	%g0, %g2, %tnpc		! setup tnpc
2486
2487	rdpr	%pstate, %l0
2488	wrpr	%l0, PSTATE_AG, %pstate	! switch to normal globals
2489	mov	%l1, %g6		! pass tnpc to user code in %g6
2490	wrpr	%l0, %g0, %pstate	! switch back to alternate globals
2491
2492	! Note that %g1 still contains CPU struct addr
2493	ld	[%g1 + CPU_TMP2], %l1	! restore locals
2494	ld	[%g1 + CPU_TMP1], %l0
2495	FAST_TRAP_DONE_CHK_INTR
24961:
2497	mov	%g1, %l0
2498	st	%l1, [%g1 + CPU_TMP2]
2499	rdpr	%pstate, %l1
2500	wrpr	%l1, PSTATE_AG, %pstate
2501	!
2502	! check for old syscall mmap which is the only different one which
2503	! must be the same.  Others are handled in the compatibility library.
2504	!
2505	cmp	%g1, OSYS_mmap	! compare to old 4.x mmap
2506	movz	%icc, SYS_mmap, %g1
2507	wrpr	%g0, %l1, %pstate
2508	ld	[%l0 + CPU_TMP2], %l1	! restore locals
2509	ld	[%l0 + CPU_TMP1], %l0
2510	SYSCALL(syscall_trap32)
2511	SET_SIZE(syscall_trap_4x)
2512
2513/*
2514 * Handler for software trap 9.
2515 * Set trap0 emulation address for old 4.x system call trap.
2516 * XXX - this should be a system call.
2517 */
2518	ENTRY_NP(set_trap0_addr)
2519	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2520	ldn	[%g1 + CPU_THREAD], %g2	! load thread pointer
2521	ldn	[%g2 + T_LWP], %g2	! load klwp pointer
2522	st	%l0, [%g1 + CPU_TMP1]	! save some locals
2523	st	%l1, [%g1 + CPU_TMP2]
2524	rdpr	%pstate, %l0
2525	wrpr	%l0, PSTATE_AG, %pstate
2526	mov	%g1, %l1
2527	wrpr	%g0, %l0, %pstate
2528	andn	%l1, 3, %l1		! force alignment
2529	st	%l1, [%g2 + PCB_TRAP0]	! lwp->lwp_pcb.pcb_trap0addr
2530	ld	[%g1 + CPU_TMP1], %l0	! restore locals
2531	ld	[%g1 + CPU_TMP2], %l1
2532	FAST_TRAP_DONE
2533	SET_SIZE(set_trap0_addr)
2534
2535/*
2536 * mmu_trap_tl1
2537 * trap handler for unexpected mmu traps.
2538 * simply checks if the trap was a user lddf/stdf alignment trap, in which
2539 * case we go to fpu_trap or a user trap from the window handler, in which
2540 * case we go save the state on the pcb.  Otherwise, we go to ptl1_panic.
2541 */
2542	.type	mmu_trap_tl1, #function
2543mmu_trap_tl1:
2544#ifdef	TRAPTRACE
2545	TRACE_PTR(%g5, %g6)
2546	GET_TRACE_TICK(%g6)
2547	stxa	%g6, [%g5 + TRAP_ENT_TICK]%asi
2548	rdpr	%tl, %g6
2549	stha	%g6, [%g5 + TRAP_ENT_TL]%asi
2550	rdpr	%tt, %g6
2551	stha	%g6, [%g5 + TRAP_ENT_TT]%asi
2552	rdpr	%tstate, %g6
2553	stxa	%g6, [%g5 + TRAP_ENT_TSTATE]%asi
2554	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2555	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2556	rdpr	%tpc, %g6
2557	stna	%g6, [%g5 + TRAP_ENT_TPC]%asi
2558	set	MMU_SFAR, %g6
2559	ldxa	[%g6]ASI_DMMU, %g6
2560	stxa	%g6, [%g5 + TRAP_ENT_F1]%asi
2561	CPU_PADDR(%g7, %g6);
2562	add	%g7, CPU_TL1_HDLR, %g7
2563	lda	[%g7]ASI_MEM, %g6
2564	stxa	%g6, [%g5 + TRAP_ENT_F2]%asi
2565	set	0xdeadbeef, %g6
2566	stna	%g6, [%g5 + TRAP_ENT_F3]%asi
2567	stna	%g6, [%g5 + TRAP_ENT_F4]%asi
2568	TRACE_NEXT(%g5, %g6, %g7)
2569#endif /* TRAPTRACE */
2570
2571	GET_CPU_IMPL(%g5)
2572	cmp	%g5, PANTHER_IMPL
2573	bne	mmu_trap_tl1_4
2574	  nop
2575	rdpr	%tt, %g5
2576	cmp	%g5, T_DATA_EXCEPTION
2577	bne	mmu_trap_tl1_4
2578	  nop
2579	wr	%g0, ASI_DMMU, %asi
2580	ldxa	[MMU_SFSR]%asi, %g5
2581	mov	1, %g6
2582	sllx	%g6, PN_SFSR_PARITY_SHIFT, %g6
2583	andcc	%g5, %g6, %g0
2584	bz	mmu_trap_tl1_4
2585
2586	/*
2587	 * We are running on a Panther and have hit a DTLB parity error.
2588	 */
2589	ldxa	[MMU_TAG_ACCESS]%asi, %g2
2590	mov	%g5, %g3
2591	ba,pt	%xcc, .mmu_exception_is_tlb_parity
2592	mov	T_DATA_EXCEPTION, %g1
2593
2594mmu_trap_tl1_4:
2595	CPU_PADDR(%g7, %g6);
2596	add     %g7, CPU_TL1_HDLR, %g7		! %g7 = &cpu_m.tl1_hdlr (PA)
2597	/*
2598	 * AM is cleared on trap, so addresses are 64 bit
2599	 */
2600	lda     [%g7]ASI_MEM, %g6
2601	brz,a,pt %g6, 1f
2602	  nop
2603	/*
2604	 * We are going to update cpu_m.tl1_hdlr using physical address.
2605	 * Flush the D$ line, so that stale data won't be accessed later.
2606	 */
2607	CPU_ADDR(%g6, %g5)
2608	add     %g6, CPU_TL1_HDLR, %g6		! %g6 = &cpu_m.tl1_hdlr (VA)
2609	GET_CPU_IMPL(%g5)
2610	cmp	%g5, CHEETAH_IMPL
2611	bl,pn	%icc, 3f
2612	sethi	%hi(dcache_line_mask), %g5
2613	stxa	%g0, [%g7]ASI_DC_INVAL
2614	membar	#Sync
2615	ba,pt	%xcc, 2f
2616	nop
26173:
2618	ld	[%g5 + %lo(dcache_line_mask)], %g5
2619	and	%g6, %g5, %g5
2620	stxa	%g0, [%g5]ASI_DC_TAG
2621	membar	#Sync
26222:
2623	sta     %g0, [%g7]ASI_MEM
2624	SWITCH_GLOBALS				! back to mmu globals
2625	ba,a,pt	%xcc, sfmmu_mmu_trap		! handle page faults
26261:
2627	rdpr	%tt, %g5
2628	rdpr	%tl, %g7
2629	sub	%g7, 1, %g6
2630	wrpr	%g6, %tl
2631	rdpr	%tt, %g6
2632	wrpr	%g7, %tl
2633	and	%g6, WTRAP_TTMASK, %g6
2634	cmp	%g6, WTRAP_TYPE
2635	bne,a,pn %xcc, ptl1_panic
2636	mov	PTL1_BAD_MMUTRAP, %g1
2637	rdpr	%tpc, %g7
2638	/* tpc should be in the trap table */
2639	set	trap_table, %g6
2640	cmp	%g7, %g6
2641	blt,a,pn %xcc, ptl1_panic
2642	  mov	PTL1_BAD_MMUTRAP, %g1
2643	set	etrap_table, %g6
2644	cmp	%g7, %g6
2645	bge,a,pn %xcc, ptl1_panic
2646	  mov	PTL1_BAD_MMUTRAP, %g1
2647	cmp	%g5, T_ALIGNMENT
2648	move	%icc, MMU_SFAR, %g6
2649	movne	%icc, MMU_TAG_ACCESS, %g6
2650	ldxa	[%g6]ASI_DMMU, %g6
2651	andn	%g7, WTRAP_ALIGN, %g7	/* 128 byte aligned */
2652	add	%g7, WTRAP_FAULTOFF, %g7
2653	wrpr	%g0, %g7, %tnpc
2654	done
2655	SET_SIZE(mmu_trap_tl1)
2656
2657/*
2658 * Several traps use kmdb_trap and kmdb_trap_tl1 as their handlers.  These
2659 * traps are valid only when kmdb is loaded.  When the debugger is active,
2660 * the code below is rewritten to transfer control to the appropriate
2661 * debugger entry points.
2662 */
2663	.global	kmdb_trap
2664	.align	8
2665kmdb_trap:
2666	ba,a	trap_table0
2667	jmp	%g1 + 0
2668	nop
2669
2670	.global	kmdb_trap_tl1
2671	.align	8
2672kmdb_trap_tl1:
2673	ba,a	trap_table0
2674	jmp	%g1 + 0
2675	nop
2676
2677/*
2678 * This entry is copied from OBP's trap table during boot.
2679 */
2680	.global	obp_bpt
2681	.align	8
2682obp_bpt:
2683	NOT
2684
2685/*
2686 * if kernel, set PCONTEXT to 0 for debuggers
2687 * if user, clear nucleus page sizes
2688 */
2689	.global kctx_obp_bpt
2690kctx_obp_bpt:
2691	set	obp_bpt, %g2
26921:
2693	mov	MMU_PCONTEXT, %g1
2694	ldxa	[%g1]ASI_DMMU, %g1
2695	srlx	%g1, CTXREG_NEXT_SHIFT, %g3
2696	brz,pt	%g3, 3f			! nucleus pgsz is 0, no problem
2697	  sllx	%g3, CTXREG_NEXT_SHIFT, %g3
2698	set	CTXREG_CTX_MASK, %g4	! check Pcontext
2699	btst	%g4, %g1
2700	bz,a,pt	%xcc, 2f
2701	  clr	%g3			! kernel:  PCONTEXT=0
2702	xor	%g3, %g1, %g3		! user:	clr N_pgsz0/1 bits
27032:
2704	set	DEMAP_ALL_TYPE, %g1
2705	stxa	%g0, [%g1]ASI_DTLB_DEMAP
2706	stxa	%g0, [%g1]ASI_ITLB_DEMAP
2707	mov	MMU_PCONTEXT, %g1
2708	stxa	%g3, [%g1]ASI_DMMU
2709        membar  #Sync
2710	sethi	%hi(FLUSH_ADDR), %g1
2711	flush	%g1			! flush required by immu
27123:
2713	jmp	%g2
2714	  nop
2715
2716
2717#ifdef	TRAPTRACE
2718/*
2719 * TRAPTRACE support.
2720 * labels here are branched to with "rd %pc, %g7" in the delay slot.
2721 * Return is done by "jmp %g7 + 4".
2722 */
2723
2724trace_gen:
2725	TRACE_PTR(%g3, %g6)
2726	GET_TRACE_TICK(%g6)
2727	stxa	%g6, [%g3 + TRAP_ENT_TICK]%asi
2728	rdpr	%tl, %g6
2729	stha	%g6, [%g3 + TRAP_ENT_TL]%asi
2730	rdpr	%tt, %g6
2731	stha	%g6, [%g3 + TRAP_ENT_TT]%asi
2732	rdpr	%tstate, %g6
2733	stxa	%g6, [%g3 + TRAP_ENT_TSTATE]%asi
2734	stna	%sp, [%g3 + TRAP_ENT_SP]%asi
2735	rdpr	%tpc, %g6
2736	stna	%g6, [%g3 + TRAP_ENT_TPC]%asi
2737	TRACE_NEXT(%g3, %g4, %g5)
2738	jmp	%g7 + 4
2739	nop
2740
2741trace_win:
2742	TRACE_WIN_INFO(0, %l0, %l1, %l2)
2743	! Keep the locals as clean as possible, caller cleans %l4
2744	clr	%l2
2745	clr	%l1
2746	jmp	%l4 + 4
2747	  clr	%l0
2748
2749/*
2750 * Trace a tsb hit
2751 * g1 = tsbe pointer (in/clobbered)
2752 * g2 = tag access register (in)
2753 * g3 - g4 = scratch (clobbered)
2754 * g5 = tsbe data (in)
2755 * g6 = scratch (clobbered)
2756 * g7 = pc we jumped here from (in)
2757 */
2758
2759	! Do not disturb %g5, it will be used after the trace
2760	ALTENTRY(trace_tsbhit)
2761	TRACE_TSBHIT(0)
2762	jmp	%g7 + 4
2763	nop
2764
2765/*
2766 * Trace a TSB miss
2767 *
2768 * g1 = tsb8k pointer (in)
2769 * g2 = tag access register (in)
2770 * g3 = tsb4m pointer (in)
2771 * g4 = tsbe tag (in/clobbered)
2772 * g5 - g6 = scratch (clobbered)
2773 * g7 = pc we jumped here from (in)
2774 */
2775	.global	trace_tsbmiss
2776trace_tsbmiss:
2777	membar	#Sync
2778	sethi	%hi(FLUSH_ADDR), %g6
2779	flush	%g6
2780	TRACE_PTR(%g5, %g6)
2781	GET_TRACE_TICK(%g6)
2782	stxa	%g6, [%g5 + TRAP_ENT_TICK]%asi
2783	stxa	%g2, [%g5 + TRAP_ENT_SP]%asi		! tag access
2784	stxa	%g4, [%g5 + TRAP_ENT_F1]%asi		! tsb tag
2785	rdpr	%tnpc, %g6
2786	stxa	%g6, [%g5 + TRAP_ENT_F2]%asi
2787	stna	%g1, [%g5 + TRAP_ENT_F3]%asi		! tsb8k pointer
2788	srlx	%g1, 32, %g6
2789	stna	%g6, [%g5 + TRAP_ENT_F4]%asi		! huh?
2790	rdpr	%tpc, %g6
2791	stna	%g6, [%g5 + TRAP_ENT_TPC]%asi
2792	rdpr	%tl, %g6
2793	stha	%g6, [%g5 + TRAP_ENT_TL]%asi
2794	rdpr	%tt, %g6
2795	or	%g6, TT_MMU_MISS, %g4
2796	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2797	cmp	%g6, FAST_IMMU_MISS_TT
2798	be,a	%icc, 1f
2799	  ldxa	[%g0]ASI_IMMU, %g6
2800	ldxa	[%g0]ASI_DMMU, %g6
28011:	stxa	%g6, [%g5 + TRAP_ENT_TSTATE]%asi	! tag target
2802	stxa	%g3, [%g5 + TRAP_ENT_TR]%asi		! tsb4m pointer
2803	TRACE_NEXT(%g5, %g4, %g6)
2804	jmp	%g7 + 4
2805	nop
2806
2807/*
2808 * g2 = tag access register (in)
2809 * g3 = ctx number (in)
2810 */
2811trace_dataprot:
2812	membar	#Sync
2813	sethi	%hi(FLUSH_ADDR), %g6
2814	flush	%g6
2815	TRACE_PTR(%g1, %g6)
2816	GET_TRACE_TICK(%g6)
2817	stxa	%g6, [%g1 + TRAP_ENT_TICK]%asi
2818	rdpr	%tpc, %g6
2819	stna	%g6, [%g1 + TRAP_ENT_TPC]%asi
2820	rdpr	%tstate, %g6
2821	stxa	%g6, [%g1 + TRAP_ENT_TSTATE]%asi
2822	stxa	%g2, [%g1 + TRAP_ENT_SP]%asi		! tag access reg
2823	stxa	%g0, [%g1 + TRAP_ENT_TR]%asi
2824	stxa	%g0, [%g1 + TRAP_ENT_F1]%asi
2825	stxa	%g0, [%g1 + TRAP_ENT_F2]%asi
2826	stxa	%g0, [%g1 + TRAP_ENT_F3]%asi
2827	stxa	%g0, [%g1 + TRAP_ENT_F4]%asi
2828	rdpr	%tl, %g6
2829	stha	%g6, [%g1 + TRAP_ENT_TL]%asi
2830	rdpr	%tt, %g6
2831	stha	%g6, [%g1 + TRAP_ENT_TT]%asi
2832	TRACE_NEXT(%g1, %g4, %g5)
2833	jmp	%g7 + 4
2834	nop
2835
2836#endif /* TRAPTRACE */
2837
2838/*
2839 * expects offset into tsbmiss area in %g1 and return pc in %g7
2840 */
2841stat_mmu:
2842	CPU_INDEX(%g5, %g6)
2843	sethi	%hi(tsbmiss_area), %g6
2844	sllx	%g5, TSBMISS_SHIFT, %g5
2845	or	%g6, %lo(tsbmiss_area), %g6
2846	add	%g6, %g5, %g6		/* g6 = tsbmiss area */
2847	ld	[%g6 + %g1], %g5
2848	add	%g5, 1, %g5
2849	jmp	%g7 + 4
2850	st	%g5, [%g6 + %g1]
2851
2852
2853/*
2854 * fast_trap_done, fast_trap_done_chk_intr:
2855 *
2856 * Due to the design of UltraSPARC pipeline, pending interrupts are not
2857 * taken immediately after a RETRY or DONE instruction which causes IE to
2858 * go from 0 to 1. Instead, the instruction at %tpc or %tnpc is allowed
2859 * to execute first before taking any interrupts. If that instruction
2860 * results in other traps, and if the corresponding trap handler runs
2861 * entirely at TL=1 with interrupts disabled, then pending interrupts
2862 * won't be taken until after yet another instruction following the %tpc
2863 * or %tnpc.
2864 *
2865 * A malicious user program can use this feature to block out interrupts
2866 * for extended durations, which can result in send_mondo_timeout kernel
2867 * panic.
2868 *
2869 * This problem is addressed by servicing any pending interrupts via
2870 * sys_trap before returning back to the user mode from a fast trap
2871 * handler. The "done" instruction within a fast trap handler, which
2872 * runs entirely at TL=1 with interrupts disabled, is replaced with the
2873 * FAST_TRAP_DONE macro, which branches control to this fast_trap_done
2874 * entry point.
2875 *
2876 * We check for any pending interrupts here and force a sys_trap to
2877 * service those interrupts, if any. To minimize overhead, pending
2878 * interrupts are checked if the %tpc happens to be at 16K boundary,
2879 * which allows a malicious program to execute at most 4K consecutive
2880 * instructions before we service any pending interrupts. If a worst
2881 * case fast trap handler takes about 2 usec, then interrupts will be
2882 * blocked for at most 8 msec, less than a clock tick.
2883 *
2884 * For the cases where we don't know if the %tpc will cross a 16K
2885 * boundary, we can't use the above optimization and always process
2886 * any pending interrupts via fast_frap_done_chk_intr entry point.
2887 *
2888 * Entry Conditions:
2889 * 	%pstate		am:0 priv:1 ie:0
2890 * 			globals are AG (not normal globals)
2891 */
2892
2893	.global	fast_trap_done, fast_trap_done_chk_intr
2894fast_trap_done:
2895	rdpr	%tpc, %g5
2896	sethi	%hi(0xffffc000), %g6	! 1's complement of 0x3fff
2897	andncc	%g5, %g6, %g0		! check lower 14 bits of %tpc
2898	bz,a,pn	%icc, 1f		! branch if zero (lower 32 bits only)
2899	  ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g5
2900	done
2901
2902fast_trap_done_chk_intr:
2903	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g5
2904
29051:	rd	SOFTINT, %g6
2906	and	%g5, IRSR_BUSY, %g5
2907	orcc	%g5, %g6, %g0
2908	bnz,pn	%xcc, 2f		! branch if any pending intr
2909	nop
2910	done
2911
29122:
2913	/*
2914	 * We get here if there are any pending interrupts.
2915	 * Adjust %tpc/%tnpc as we'll be resuming via "retry"
2916	 * instruction.
2917	 */
2918	rdpr	%tnpc, %g5
2919	wrpr	%g0, %g5, %tpc
2920	add	%g5, 4, %g5
2921	wrpr	%g0, %g5, %tnpc
2922
2923	/*
2924	 * Force a dummy sys_trap call so that interrupts can be serviced.
2925	 */
2926	set	fast_trap_dummy_call, %g1
2927	ba,pt	%xcc, sys_trap
2928	  mov	-1, %g4
2929
2930fast_trap_dummy_call:
2931	retl
2932	nop
2933
2934#endif	/* lint */
2935
2936