xref: /titanic_50/usr/src/uts/sun4u/ml/trap_table.s (revision 6bb0858833ad931216a3c40682436131bec0007f)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif /* !lint */
31#include <sys/asm_linkage.h>
32#include <sys/privregs.h>
33#include <sys/sun4asi.h>
34#include <sys/spitregs.h>
35#include <sys/cheetahregs.h>
36#include <sys/machtrap.h>
37#include <sys/machthread.h>
38#include <sys/machbrand.h>
39#include <sys/pcb.h>
40#include <sys/pte.h>
41#include <sys/mmu.h>
42#include <sys/machpcb.h>
43#include <sys/async.h>
44#include <sys/intreg.h>
45#include <sys/scb.h>
46#include <sys/psr_compat.h>
47#include <sys/syscall.h>
48#include <sys/machparam.h>
49#include <sys/traptrace.h>
50#include <vm/hat_sfmmu.h>
51#include <sys/archsystm.h>
52#include <sys/utrap.h>
53#include <sys/clock.h>
54#include <sys/intr.h>
55#include <sys/fpu/fpu_simulator.h>
56#include <vm/seg_spt.h>
57
58/*
59 * WARNING: If you add a fast trap handler which can be invoked by a
60 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
61 * instead of "done" instruction to return back to the user mode. See
62 * comments for the "fast_trap_done" entry point for more information.
63 *
64 * An alternate FAST_TRAP_DONE_CHK_INTR macro should be used for the
65 * cases where you always want to process any pending interrupts before
66 * returning back to the user mode.
67 */
68#define	FAST_TRAP_DONE		\
69	ba,a	fast_trap_done
70
71#define	FAST_TRAP_DONE_CHK_INTR	\
72	ba,a	fast_trap_done_chk_intr
73
74/*
75 * SPARC V9 Trap Table
76 *
77 * Most of the trap handlers are made from common building
78 * blocks, and some are instantiated multiple times within
79 * the trap table. So, I build a bunch of macros, then
80 * populate the table using only the macros.
81 *
82 * Many macros branch to sys_trap.  Its calling convention is:
83 *	%g1		kernel trap handler
84 *	%g2, %g3	args for above
85 *	%g4		desire %pil
86 */
87
88#ifdef	TRAPTRACE
89
90/*
91 * Tracing macro. Adds two instructions if TRAPTRACE is defined.
92 */
93#define	TT_TRACE(label)		\
94	ba	label		;\
95	rd	%pc, %g7
96#define	TT_TRACE_INS	2
97
98#define	TT_TRACE_L(label)	\
99	ba	label		;\
100	rd	%pc, %l4	;\
101	clr	%l4
102#define	TT_TRACE_L_INS	3
103
104#else
105
106#define	TT_TRACE(label)
107#define	TT_TRACE_INS	0
108
109#define	TT_TRACE_L(label)
110#define	TT_TRACE_L_INS	0
111
112#endif
113
114/*
115 * This macro is used to update per cpu mmu stats in perf critical
116 * paths. It is only enabled in debug kernels or if SFMMU_STAT_GATHER
117 * is defined.
118 */
119#if defined(DEBUG) || defined(SFMMU_STAT_GATHER)
120#define	HAT_PERCPU_DBSTAT(stat)			\
121	mov	stat, %g1			;\
122	ba	stat_mmu			;\
123	rd	%pc, %g7
124#else
125#define	HAT_PERCPU_DBSTAT(stat)
126#endif /* DEBUG || SFMMU_STAT_GATHER */
127
128/*
129 * This first set are funneled to trap() with %tt as the type.
130 * Trap will then either panic or send the user a signal.
131 */
132/*
133 * NOT is used for traps that just shouldn't happen.
134 * It comes in both single and quadruple flavors.
135 */
136#if !defined(lint)
137	.global	trap
138#endif /* !lint */
139#define	NOT			\
140	TT_TRACE(trace_gen)	;\
141	set	trap, %g1	;\
142	rdpr	%tt, %g3	;\
143	ba,pt	%xcc, sys_trap	;\
144	sub	%g0, 1, %g4	;\
145	.align	32
146#define	NOT4	NOT; NOT; NOT; NOT
147/*
148 * RED is for traps that use the red mode handler.
149 * We should never see these either.
150 */
151#define	RED	NOT
152/*
153 * BAD is used for trap vectors we don't have a kernel
154 * handler for.
155 * It also comes in single and quadruple versions.
156 */
157#define	BAD	NOT
158#define	BAD4	NOT4
159
160#define	DONE			\
161	done;			\
162	.align	32
163
164/*
165 * TRAP vectors to the trap() function.
166 * It's main use is for user errors.
167 */
168#if !defined(lint)
169	.global	trap
170#endif /* !lint */
171#define	TRAP(arg)		\
172	TT_TRACE(trace_gen)	;\
173	set	trap, %g1	;\
174	mov	arg, %g3	;\
175	ba,pt	%xcc, sys_trap	;\
176	sub	%g0, 1, %g4	;\
177	.align	32
178
179/*
180 * SYSCALL is used for unsupported syscall interfaces (with 'which'
181 * set to 'nosys') and legacy support of old SunOS 4.x syscalls (with
182 * 'which' set to 'syscall_trap32').
183 *
184 * The SYSCALL_TRAP* macros are used for syscall entry points.
185 * SYSCALL_TRAP is used to support LP64 syscalls and SYSCALL_TRAP32
186 * is used to support ILP32.  Each macro can only be used once
187 * since they each define a symbol.  The symbols are used as hot patch
188 * points by the brand infrastructure to dynamically enable and disable
189 * brand syscall interposition.  See the comments around BRAND_CALLBACK
190 * and brand_plat_interposition_enable() for more information.
191 */
192#define	SYSCALL_NOTT(which)		\
193	set	(which), %g1		;\
194	ba,pt	%xcc, sys_trap		;\
195	sub	%g0, 1, %g4		;\
196	.align	32
197
198#define	SYSCALL(which)			\
199	TT_TRACE(trace_gen)		;\
200	SYSCALL_NOTT(which)
201
202#define	SYSCALL_TRAP32				\
203	TT_TRACE(trace_gen)			;\
204	ALTENTRY(syscall_trap32_patch_point)	\
205	SYSCALL_NOTT(syscall_trap32)
206
207#define	SYSCALL_TRAP				\
208	TT_TRACE(trace_gen)			;\
209	ALTENTRY(syscall_trap_patch_point)	\
210	SYSCALL_NOTT(syscall_trap)
211
212#define	FLUSHW()			\
213	set	trap, %g1		;\
214	mov	T_FLUSHW, %g3		;\
215	sub	%g0, 1, %g4		;\
216	save				;\
217	flushw				;\
218	restore				;\
219	FAST_TRAP_DONE			;\
220	.align	32
221
222/*
223 * GOTO just jumps to a label.
224 * It's used for things that can be fixed without going thru sys_trap.
225 */
226#define	GOTO(label)		\
227	.global	label		;\
228	ba,a	label		;\
229	.empty			;\
230	.align	32
231
232/*
233 * GOTO_TT just jumps to a label.
234 * correctable ECC error traps at  level 0 and 1 will use this macro.
235 * It's used for things that can be fixed without going thru sys_trap.
236 */
237#define	GOTO_TT(label, ttlabel)		\
238	.global	label		;\
239	TT_TRACE(ttlabel)	;\
240	ba,a	label		;\
241	.empty			;\
242	.align	32
243
244/*
245 * Privileged traps
246 * Takes breakpoint if privileged, calls trap() if not.
247 */
248#define	PRIV(label)			\
249	rdpr	%tstate, %g1		;\
250	btst	TSTATE_PRIV, %g1	;\
251	bnz	label			;\
252	rdpr	%tt, %g3		;\
253	set	trap, %g1		;\
254	ba,pt	%xcc, sys_trap		;\
255	sub	%g0, 1, %g4		;\
256	.align	32
257
258
259/*
260 * DTrace traps.
261 */
262#define	DTRACE_PID			\
263	.global dtrace_pid_probe				;\
264	set	dtrace_pid_probe, %g1				;\
265	ba,pt	%xcc, user_trap					;\
266	sub	%g0, 1, %g4					;\
267	.align	32
268
269#define	DTRACE_RETURN			\
270	.global dtrace_return_probe				;\
271	set	dtrace_return_probe, %g1			;\
272	ba,pt	%xcc, user_trap					;\
273	sub	%g0, 1, %g4					;\
274	.align	32
275
276/*
277 * REGISTER WINDOW MANAGEMENT MACROS
278 */
279
280/*
281 * various convenient units of padding
282 */
283#define	SKIP(n)	.skip 4*(n)
284
285/*
286 * CLEAN_WINDOW is the simple handler for cleaning a register window.
287 */
288#define	CLEAN_WINDOW						\
289	TT_TRACE_L(trace_win)					;\
290	rdpr %cleanwin, %l0; inc %l0; wrpr %l0, %cleanwin	;\
291	clr %l0; clr %l1; clr %l2; clr %l3			;\
292	clr %l4; clr %l5; clr %l6; clr %l7			;\
293	clr %o0; clr %o1; clr %o2; clr %o3			;\
294	clr %o4; clr %o5; clr %o6; clr %o7			;\
295	retry; .align 128
296
297#if !defined(lint)
298
299/*
300 * If we get an unresolved tlb miss while in a window handler, the fault
301 * handler will resume execution at the last instruction of the window
302 * hander, instead of delivering the fault to the kernel.  Spill handlers
303 * use this to spill windows into the wbuf.
304 *
305 * The mixed handler works by checking %sp, and branching to the correct
306 * handler.  This is done by branching back to label 1: for 32b frames,
307 * or label 2: for 64b frames; which implies the handler order is: 32b,
308 * 64b, mixed.  The 1: and 2: labels are offset into the routines to
309 * allow the branchs' delay slots to contain useful instructions.
310 */
311
312/*
313 * SPILL_32bit spills a 32-bit-wide kernel register window.  It
314 * assumes that the kernel context and the nucleus context are the
315 * same.  The stack pointer is required to be eight-byte aligned even
316 * though this code only needs it to be four-byte aligned.
317 */
318#define	SPILL_32bit(tail)					\
319	srl	%sp, 0, %sp					;\
3201:	st	%l0, [%sp + 0]					;\
321	st	%l1, [%sp + 4]					;\
322	st	%l2, [%sp + 8]					;\
323	st	%l3, [%sp + 12]					;\
324	st	%l4, [%sp + 16]					;\
325	st	%l5, [%sp + 20]					;\
326	st	%l6, [%sp + 24]					;\
327	st	%l7, [%sp + 28]					;\
328	st	%i0, [%sp + 32]					;\
329	st	%i1, [%sp + 36]					;\
330	st	%i2, [%sp + 40]					;\
331	st	%i3, [%sp + 44]					;\
332	st	%i4, [%sp + 48]					;\
333	st	%i5, [%sp + 52]					;\
334	st	%i6, [%sp + 56]					;\
335	st	%i7, [%sp + 60]					;\
336	TT_TRACE_L(trace_win)					;\
337	saved							;\
338	retry							;\
339	SKIP(31-19-TT_TRACE_L_INS)				;\
340	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
341	.empty
342
343/*
344 * SPILL_32bit_asi spills a 32-bit-wide register window into a 32-bit
345 * wide address space via the designated asi.  It is used to spill
346 * non-kernel windows.  The stack pointer is required to be eight-byte
347 * aligned even though this code only needs it to be four-byte
348 * aligned.
349 */
350#define	SPILL_32bit_asi(asi_num, tail)				\
351	srl	%sp, 0, %sp					;\
3521:	sta	%l0, [%sp + %g0]asi_num				;\
353	mov	4, %g1						;\
354	sta	%l1, [%sp + %g1]asi_num				;\
355	mov	8, %g2						;\
356	sta	%l2, [%sp + %g2]asi_num				;\
357	mov	12, %g3						;\
358	sta	%l3, [%sp + %g3]asi_num				;\
359	add	%sp, 16, %g4					;\
360	sta	%l4, [%g4 + %g0]asi_num				;\
361	sta	%l5, [%g4 + %g1]asi_num				;\
362	sta	%l6, [%g4 + %g2]asi_num				;\
363	sta	%l7, [%g4 + %g3]asi_num				;\
364	add	%g4, 16, %g4					;\
365	sta	%i0, [%g4 + %g0]asi_num				;\
366	sta	%i1, [%g4 + %g1]asi_num				;\
367	sta	%i2, [%g4 + %g2]asi_num				;\
368	sta	%i3, [%g4 + %g3]asi_num				;\
369	add	%g4, 16, %g4					;\
370	sta	%i4, [%g4 + %g0]asi_num				;\
371	sta	%i5, [%g4 + %g1]asi_num				;\
372	sta	%i6, [%g4 + %g2]asi_num				;\
373	sta	%i7, [%g4 + %g3]asi_num				;\
374	TT_TRACE_L(trace_win)					;\
375	saved							;\
376	retry							;\
377	SKIP(31-25-TT_TRACE_L_INS)				;\
378	ba,a,pt %xcc, fault_32bit_/**/tail			;\
379	.empty
380
381/*
382 * SPILL_32bit_tt1 spills a 32-bit-wide register window into a 32-bit
383 * wide address space via the designated asi.  It is used to spill
384 * windows at tl>1 where performance isn't the primary concern and
385 * where we don't want to use unnecessary registers.  The stack
386 * pointer is required to be eight-byte aligned even though this code
387 * only needs it to be four-byte aligned.
388 */
389#define	SPILL_32bit_tt1(asi_num, tail)				\
390	mov	asi_num, %asi					;\
3911:	srl	%sp, 0, %sp					;\
392	sta	%l0, [%sp + 0]%asi				;\
393	sta	%l1, [%sp + 4]%asi				;\
394	sta	%l2, [%sp + 8]%asi				;\
395	sta	%l3, [%sp + 12]%asi				;\
396	sta	%l4, [%sp + 16]%asi				;\
397	sta	%l5, [%sp + 20]%asi				;\
398	sta	%l6, [%sp + 24]%asi				;\
399	sta	%l7, [%sp + 28]%asi				;\
400	sta	%i0, [%sp + 32]%asi				;\
401	sta	%i1, [%sp + 36]%asi				;\
402	sta	%i2, [%sp + 40]%asi				;\
403	sta	%i3, [%sp + 44]%asi				;\
404	sta	%i4, [%sp + 48]%asi				;\
405	sta	%i5, [%sp + 52]%asi				;\
406	sta	%i6, [%sp + 56]%asi				;\
407	sta	%i7, [%sp + 60]%asi				;\
408	TT_TRACE_L(trace_win)					;\
409	saved							;\
410	retry							;\
411	SKIP(31-20-TT_TRACE_L_INS)				;\
412	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
413	.empty
414
415
416/*
417 * FILL_32bit fills a 32-bit-wide kernel register window.  It assumes
418 * that the kernel context and the nucleus context are the same.  The
419 * stack pointer is required to be eight-byte aligned even though this
420 * code only needs it to be four-byte aligned.
421 */
422#define	FILL_32bit(tail)					\
423	srl	%sp, 0, %sp					;\
4241:	TT_TRACE_L(trace_win)					;\
425	ld	[%sp + 0], %l0					;\
426	ld	[%sp + 4], %l1					;\
427	ld	[%sp + 8], %l2					;\
428	ld	[%sp + 12], %l3					;\
429	ld	[%sp + 16], %l4					;\
430	ld	[%sp + 20], %l5					;\
431	ld	[%sp + 24], %l6					;\
432	ld	[%sp + 28], %l7					;\
433	ld	[%sp + 32], %i0					;\
434	ld	[%sp + 36], %i1					;\
435	ld	[%sp + 40], %i2					;\
436	ld	[%sp + 44], %i3					;\
437	ld	[%sp + 48], %i4					;\
438	ld	[%sp + 52], %i5					;\
439	ld	[%sp + 56], %i6					;\
440	ld	[%sp + 60], %i7					;\
441	restored						;\
442	retry							;\
443	SKIP(31-19-TT_TRACE_L_INS)				;\
444	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
445	.empty
446
447/*
448 * FILL_32bit_asi fills a 32-bit-wide register window from a 32-bit
449 * wide address space via the designated asi.  It is used to fill
450 * non-kernel windows.  The stack pointer is required to be eight-byte
451 * aligned even though this code only needs it to be four-byte
452 * aligned.
453 */
454#define	FILL_32bit_asi(asi_num, tail)				\
455	srl	%sp, 0, %sp					;\
4561:	TT_TRACE_L(trace_win)					;\
457	mov	4, %g1						;\
458	lda	[%sp + %g0]asi_num, %l0				;\
459	mov	8, %g2						;\
460	lda	[%sp + %g1]asi_num, %l1				;\
461	mov	12, %g3						;\
462	lda	[%sp + %g2]asi_num, %l2				;\
463	lda	[%sp + %g3]asi_num, %l3				;\
464	add	%sp, 16, %g4					;\
465	lda	[%g4 + %g0]asi_num, %l4				;\
466	lda	[%g4 + %g1]asi_num, %l5				;\
467	lda	[%g4 + %g2]asi_num, %l6				;\
468	lda	[%g4 + %g3]asi_num, %l7				;\
469	add	%g4, 16, %g4					;\
470	lda	[%g4 + %g0]asi_num, %i0				;\
471	lda	[%g4 + %g1]asi_num, %i1				;\
472	lda	[%g4 + %g2]asi_num, %i2				;\
473	lda	[%g4 + %g3]asi_num, %i3				;\
474	add	%g4, 16, %g4					;\
475	lda	[%g4 + %g0]asi_num, %i4				;\
476	lda	[%g4 + %g1]asi_num, %i5				;\
477	lda	[%g4 + %g2]asi_num, %i6				;\
478	lda	[%g4 + %g3]asi_num, %i7				;\
479	restored						;\
480	retry							;\
481	SKIP(31-25-TT_TRACE_L_INS)				;\
482	ba,a,pt %xcc, fault_32bit_/**/tail			;\
483	.empty
484
485/*
486 * FILL_32bit_tt1 fills a 32-bit-wide register window from a 32-bit
487 * wide address space via the designated asi.  It is used to fill
488 * windows at tl>1 where performance isn't the primary concern and
489 * where we don't want to use unnecessary registers.  The stack
490 * pointer is required to be eight-byte aligned even though this code
491 * only needs it to be four-byte aligned.
492 */
493#define	FILL_32bit_tt1(asi_num, tail)				\
494	mov	asi_num, %asi					;\
4951:	srl	%sp, 0, %sp					;\
496	TT_TRACE_L(trace_win)					;\
497	lda	[%sp + 0]%asi, %l0				;\
498	lda	[%sp + 4]%asi, %l1				;\
499	lda	[%sp + 8]%asi, %l2				;\
500	lda	[%sp + 12]%asi, %l3				;\
501	lda	[%sp + 16]%asi, %l4				;\
502	lda	[%sp + 20]%asi, %l5				;\
503	lda	[%sp + 24]%asi, %l6				;\
504	lda	[%sp + 28]%asi, %l7				;\
505	lda	[%sp + 32]%asi, %i0				;\
506	lda	[%sp + 36]%asi, %i1				;\
507	lda	[%sp + 40]%asi, %i2				;\
508	lda	[%sp + 44]%asi, %i3				;\
509	lda	[%sp + 48]%asi, %i4				;\
510	lda	[%sp + 52]%asi, %i5				;\
511	lda	[%sp + 56]%asi, %i6				;\
512	lda	[%sp + 60]%asi, %i7				;\
513	restored						;\
514	retry							;\
515	SKIP(31-20-TT_TRACE_L_INS)				;\
516	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
517	.empty
518
519
520/*
521 * SPILL_64bit spills a 64-bit-wide kernel register window.  It
522 * assumes that the kernel context and the nucleus context are the
523 * same.  The stack pointer is required to be eight-byte aligned.
524 */
525#define	SPILL_64bit(tail)					\
5262:	stx	%l0, [%sp + V9BIAS64 + 0]			;\
527	stx	%l1, [%sp + V9BIAS64 + 8]			;\
528	stx	%l2, [%sp + V9BIAS64 + 16]			;\
529	stx	%l3, [%sp + V9BIAS64 + 24]			;\
530	stx	%l4, [%sp + V9BIAS64 + 32]			;\
531	stx	%l5, [%sp + V9BIAS64 + 40]			;\
532	stx	%l6, [%sp + V9BIAS64 + 48]			;\
533	stx	%l7, [%sp + V9BIAS64 + 56]			;\
534	stx	%i0, [%sp + V9BIAS64 + 64]			;\
535	stx	%i1, [%sp + V9BIAS64 + 72]			;\
536	stx	%i2, [%sp + V9BIAS64 + 80]			;\
537	stx	%i3, [%sp + V9BIAS64 + 88]			;\
538	stx	%i4, [%sp + V9BIAS64 + 96]			;\
539	stx	%i5, [%sp + V9BIAS64 + 104]			;\
540	stx	%i6, [%sp + V9BIAS64 + 112]			;\
541	stx	%i7, [%sp + V9BIAS64 + 120]			;\
542	TT_TRACE_L(trace_win)					;\
543	saved							;\
544	retry							;\
545	SKIP(31-18-TT_TRACE_L_INS)				;\
546	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
547	.empty
548
549/*
550 * SPILL_64bit_asi spills a 64-bit-wide register window into a 64-bit
551 * wide address space via the designated asi.  It is used to spill
552 * non-kernel windows.  The stack pointer is required to be eight-byte
553 * aligned.
554 */
555#define	SPILL_64bit_asi(asi_num, tail)				\
556	mov	0 + V9BIAS64, %g1				;\
5572:	stxa	%l0, [%sp + %g1]asi_num				;\
558	mov	8 + V9BIAS64, %g2				;\
559	stxa	%l1, [%sp + %g2]asi_num				;\
560	mov	16 + V9BIAS64, %g3				;\
561	stxa	%l2, [%sp + %g3]asi_num				;\
562	mov	24 + V9BIAS64, %g4				;\
563	stxa	%l3, [%sp + %g4]asi_num				;\
564	add	%sp, 32, %g5					;\
565	stxa	%l4, [%g5 + %g1]asi_num				;\
566	stxa	%l5, [%g5 + %g2]asi_num				;\
567	stxa	%l6, [%g5 + %g3]asi_num				;\
568	stxa	%l7, [%g5 + %g4]asi_num				;\
569	add	%g5, 32, %g5					;\
570	stxa	%i0, [%g5 + %g1]asi_num				;\
571	stxa	%i1, [%g5 + %g2]asi_num				;\
572	stxa	%i2, [%g5 + %g3]asi_num				;\
573	stxa	%i3, [%g5 + %g4]asi_num				;\
574	add	%g5, 32, %g5					;\
575	stxa	%i4, [%g5 + %g1]asi_num				;\
576	stxa	%i5, [%g5 + %g2]asi_num				;\
577	stxa	%i6, [%g5 + %g3]asi_num				;\
578	stxa	%i7, [%g5 + %g4]asi_num				;\
579	TT_TRACE_L(trace_win)					;\
580	saved							;\
581	retry							;\
582	SKIP(31-25-TT_TRACE_L_INS)				;\
583	ba,a,pt %xcc, fault_64bit_/**/tail			;\
584	.empty
585
586/*
587 * SPILL_64bit_tt1 spills a 64-bit-wide register window into a 64-bit
588 * wide address space via the designated asi.  It is used to spill
589 * windows at tl>1 where performance isn't the primary concern and
590 * where we don't want to use unnecessary registers.  The stack
591 * pointer is required to be eight-byte aligned.
592 */
593#define	SPILL_64bit_tt1(asi_num, tail)				\
594	mov	asi_num, %asi					;\
5952:	stxa	%l0, [%sp + V9BIAS64 + 0]%asi			;\
596	stxa	%l1, [%sp + V9BIAS64 + 8]%asi			;\
597	stxa	%l2, [%sp + V9BIAS64 + 16]%asi			;\
598	stxa	%l3, [%sp + V9BIAS64 + 24]%asi			;\
599	stxa	%l4, [%sp + V9BIAS64 + 32]%asi			;\
600	stxa	%l5, [%sp + V9BIAS64 + 40]%asi			;\
601	stxa	%l6, [%sp + V9BIAS64 + 48]%asi			;\
602	stxa	%l7, [%sp + V9BIAS64 + 56]%asi			;\
603	stxa	%i0, [%sp + V9BIAS64 + 64]%asi			;\
604	stxa	%i1, [%sp + V9BIAS64 + 72]%asi			;\
605	stxa	%i2, [%sp + V9BIAS64 + 80]%asi			;\
606	stxa	%i3, [%sp + V9BIAS64 + 88]%asi			;\
607	stxa	%i4, [%sp + V9BIAS64 + 96]%asi			;\
608	stxa	%i5, [%sp + V9BIAS64 + 104]%asi			;\
609	stxa	%i6, [%sp + V9BIAS64 + 112]%asi			;\
610	stxa	%i7, [%sp + V9BIAS64 + 120]%asi			;\
611	TT_TRACE_L(trace_win)					;\
612	saved							;\
613	retry							;\
614	SKIP(31-19-TT_TRACE_L_INS)				;\
615	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
616	.empty
617
618
619/*
620 * FILL_64bit fills a 64-bit-wide kernel register window.  It assumes
621 * that the kernel context and the nucleus context are the same.  The
622 * stack pointer is required to be eight-byte aligned.
623 */
624#define	FILL_64bit(tail)					\
6252:	TT_TRACE_L(trace_win)					;\
626	ldx	[%sp + V9BIAS64 + 0], %l0			;\
627	ldx	[%sp + V9BIAS64 + 8], %l1			;\
628	ldx	[%sp + V9BIAS64 + 16], %l2			;\
629	ldx	[%sp + V9BIAS64 + 24], %l3			;\
630	ldx	[%sp + V9BIAS64 + 32], %l4			;\
631	ldx	[%sp + V9BIAS64 + 40], %l5			;\
632	ldx	[%sp + V9BIAS64 + 48], %l6			;\
633	ldx	[%sp + V9BIAS64 + 56], %l7			;\
634	ldx	[%sp + V9BIAS64 + 64], %i0			;\
635	ldx	[%sp + V9BIAS64 + 72], %i1			;\
636	ldx	[%sp + V9BIAS64 + 80], %i2			;\
637	ldx	[%sp + V9BIAS64 + 88], %i3			;\
638	ldx	[%sp + V9BIAS64 + 96], %i4			;\
639	ldx	[%sp + V9BIAS64 + 104], %i5			;\
640	ldx	[%sp + V9BIAS64 + 112], %i6			;\
641	ldx	[%sp + V9BIAS64 + 120], %i7			;\
642	restored						;\
643	retry							;\
644	SKIP(31-18-TT_TRACE_L_INS)				;\
645	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
646	.empty
647
648/*
649 * FILL_64bit_asi fills a 64-bit-wide register window from a 64-bit
650 * wide address space via the designated asi.  It is used to fill
651 * non-kernel windows.  The stack pointer is required to be eight-byte
652 * aligned.
653 */
654#define	FILL_64bit_asi(asi_num, tail)				\
655	mov	V9BIAS64 + 0, %g1				;\
6562:	TT_TRACE_L(trace_win)					;\
657	ldxa	[%sp + %g1]asi_num, %l0				;\
658	mov	V9BIAS64 + 8, %g2				;\
659	ldxa	[%sp + %g2]asi_num, %l1				;\
660	mov	V9BIAS64 + 16, %g3				;\
661	ldxa	[%sp + %g3]asi_num, %l2				;\
662	mov	V9BIAS64 + 24, %g4				;\
663	ldxa	[%sp + %g4]asi_num, %l3				;\
664	add	%sp, 32, %g5					;\
665	ldxa	[%g5 + %g1]asi_num, %l4				;\
666	ldxa	[%g5 + %g2]asi_num, %l5				;\
667	ldxa	[%g5 + %g3]asi_num, %l6				;\
668	ldxa	[%g5 + %g4]asi_num, %l7				;\
669	add	%g5, 32, %g5					;\
670	ldxa	[%g5 + %g1]asi_num, %i0				;\
671	ldxa	[%g5 + %g2]asi_num, %i1				;\
672	ldxa	[%g5 + %g3]asi_num, %i2				;\
673	ldxa	[%g5 + %g4]asi_num, %i3				;\
674	add	%g5, 32, %g5					;\
675	ldxa	[%g5 + %g1]asi_num, %i4				;\
676	ldxa	[%g5 + %g2]asi_num, %i5				;\
677	ldxa	[%g5 + %g3]asi_num, %i6				;\
678	ldxa	[%g5 + %g4]asi_num, %i7				;\
679	restored						;\
680	retry							;\
681	SKIP(31-25-TT_TRACE_L_INS)				;\
682	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
683	.empty
684
685/*
686 * FILL_64bit_tt1 fills a 64-bit-wide register window from a 64-bit
687 * wide address space via the designated asi.  It is used to fill
688 * windows at tl>1 where performance isn't the primary concern and
689 * where we don't want to use unnecessary registers.  The stack
690 * pointer is required to be eight-byte aligned.
691 */
692#define	FILL_64bit_tt1(asi_num, tail)				\
693	mov	asi_num, %asi					;\
694	TT_TRACE_L(trace_win)					;\
695	ldxa	[%sp + V9BIAS64 + 0]%asi, %l0			;\
696	ldxa	[%sp + V9BIAS64 + 8]%asi, %l1			;\
697	ldxa	[%sp + V9BIAS64 + 16]%asi, %l2			;\
698	ldxa	[%sp + V9BIAS64 + 24]%asi, %l3			;\
699	ldxa	[%sp + V9BIAS64 + 32]%asi, %l4			;\
700	ldxa	[%sp + V9BIAS64 + 40]%asi, %l5			;\
701	ldxa	[%sp + V9BIAS64 + 48]%asi, %l6			;\
702	ldxa	[%sp + V9BIAS64 + 56]%asi, %l7			;\
703	ldxa	[%sp + V9BIAS64 + 64]%asi, %i0			;\
704	ldxa	[%sp + V9BIAS64 + 72]%asi, %i1			;\
705	ldxa	[%sp + V9BIAS64 + 80]%asi, %i2			;\
706	ldxa	[%sp + V9BIAS64 + 88]%asi, %i3			;\
707	ldxa	[%sp + V9BIAS64 + 96]%asi, %i4			;\
708	ldxa	[%sp + V9BIAS64 + 104]%asi, %i5			;\
709	ldxa	[%sp + V9BIAS64 + 112]%asi, %i6			;\
710	ldxa	[%sp + V9BIAS64 + 120]%asi, %i7			;\
711	restored						;\
712	retry							;\
713	SKIP(31-19-TT_TRACE_L_INS)				;\
714	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
715	.empty
716
717#endif /* !lint */
718
719/*
720 * SPILL_mixed spills either size window, depending on
721 * whether %sp is even or odd, to a 32-bit address space.
722 * This may only be used in conjunction with SPILL_32bit/
723 * SPILL_64bit. New versions of SPILL_mixed_{tt1,asi} would be
724 * needed for use with SPILL_{32,64}bit_{tt1,asi}.  Particular
725 * attention should be paid to the instructions that belong
726 * in the delay slots of the branches depending on the type
727 * of spill handler being branched to.
728 * Clear upper 32 bits of %sp if it is odd.
729 * We won't need to clear them in 64 bit kernel.
730 */
731#define	SPILL_mixed						\
732	btst	1, %sp						;\
733	bz,a,pt	%xcc, 1b					;\
734	srl	%sp, 0, %sp					;\
735	ba,pt	%xcc, 2b					;\
736	nop							;\
737	.align	128
738
739/*
740 * FILL_mixed(ASI) fills either size window, depending on
741 * whether %sp is even or odd, from a 32-bit address space.
742 * This may only be used in conjunction with FILL_32bit/
743 * FILL_64bit. New versions of FILL_mixed_{tt1,asi} would be
744 * needed for use with FILL_{32,64}bit_{tt1,asi}. Particular
745 * attention should be paid to the instructions that belong
746 * in the delay slots of the branches depending on the type
747 * of fill handler being branched to.
748 * Clear upper 32 bits of %sp if it is odd.
749 * We won't need to clear them in 64 bit kernel.
750 */
751#define	FILL_mixed						\
752	btst	1, %sp						;\
753	bz,a,pt	%xcc, 1b					;\
754	srl	%sp, 0, %sp					;\
755	ba,pt	%xcc, 2b					;\
756	nop							;\
757	.align	128
758
759
760/*
761 * SPILL_32clean/SPILL_64clean spill 32-bit and 64-bit register windows,
762 * respectively, into the address space via the designated asi.  The
763 * unbiased stack pointer is required to be eight-byte aligned (even for
764 * the 32-bit case even though this code does not require such strict
765 * alignment).
766 *
767 * With SPARC v9 the spill trap takes precedence over the cleanwin trap
768 * so when cansave == 0, canrestore == 6, and cleanwin == 6 the next save
769 * will cause cwp + 2 to be spilled but will not clean cwp + 1.  That
770 * window may contain kernel data so in user_rtt we set wstate to call
771 * these spill handlers on the first user spill trap.  These handler then
772 * spill the appropriate window but also back up a window and clean the
773 * window that didn't get a cleanwin trap.
774 */
775#define	SPILL_32clean(asi_num, tail)				\
776	srl	%sp, 0, %sp					;\
777	sta	%l0, [%sp + %g0]asi_num				;\
778	mov	4, %g1						;\
779	sta	%l1, [%sp + %g1]asi_num				;\
780	mov	8, %g2						;\
781	sta	%l2, [%sp + %g2]asi_num				;\
782	mov	12, %g3						;\
783	sta	%l3, [%sp + %g3]asi_num				;\
784	add	%sp, 16, %g4					;\
785	sta	%l4, [%g4 + %g0]asi_num				;\
786	sta	%l5, [%g4 + %g1]asi_num				;\
787	sta	%l6, [%g4 + %g2]asi_num				;\
788	sta	%l7, [%g4 + %g3]asi_num				;\
789	add	%g4, 16, %g4					;\
790	sta	%i0, [%g4 + %g0]asi_num				;\
791	sta	%i1, [%g4 + %g1]asi_num				;\
792	sta	%i2, [%g4 + %g2]asi_num				;\
793	sta	%i3, [%g4 + %g3]asi_num				;\
794	add	%g4, 16, %g4					;\
795	sta	%i4, [%g4 + %g0]asi_num				;\
796	sta	%i5, [%g4 + %g1]asi_num				;\
797	sta	%i6, [%g4 + %g2]asi_num				;\
798	sta	%i7, [%g4 + %g3]asi_num				;\
799	TT_TRACE_L(trace_win)					;\
800	b	.spill_clean					;\
801	  mov	WSTATE_USER32, %g7				;\
802	SKIP(31-25-TT_TRACE_L_INS)				;\
803	ba,a,pt	%xcc, fault_32bit_/**/tail			;\
804	.empty
805
806#define	SPILL_64clean(asi_num, tail)				\
807	mov	0 + V9BIAS64, %g1				;\
808	stxa	%l0, [%sp + %g1]asi_num				;\
809	mov	8 + V9BIAS64, %g2				;\
810	stxa	%l1, [%sp + %g2]asi_num				;\
811	mov	16 + V9BIAS64, %g3				;\
812	stxa	%l2, [%sp + %g3]asi_num				;\
813	mov	24 + V9BIAS64, %g4				;\
814	stxa	%l3, [%sp + %g4]asi_num				;\
815	add	%sp, 32, %g5					;\
816	stxa	%l4, [%g5 + %g1]asi_num				;\
817	stxa	%l5, [%g5 + %g2]asi_num				;\
818	stxa	%l6, [%g5 + %g3]asi_num				;\
819	stxa	%l7, [%g5 + %g4]asi_num				;\
820	add	%g5, 32, %g5					;\
821	stxa	%i0, [%g5 + %g1]asi_num				;\
822	stxa	%i1, [%g5 + %g2]asi_num				;\
823	stxa	%i2, [%g5 + %g3]asi_num				;\
824	stxa	%i3, [%g5 + %g4]asi_num				;\
825	add	%g5, 32, %g5					;\
826	stxa	%i4, [%g5 + %g1]asi_num				;\
827	stxa	%i5, [%g5 + %g2]asi_num				;\
828	stxa	%i6, [%g5 + %g3]asi_num				;\
829	stxa	%i7, [%g5 + %g4]asi_num				;\
830	TT_TRACE_L(trace_win)					;\
831	b	.spill_clean					;\
832	  mov	WSTATE_USER64, %g7				;\
833	SKIP(31-25-TT_TRACE_L_INS)				;\
834	ba,a,pt	%xcc, fault_64bit_/**/tail			;\
835	.empty
836
837
838/*
839 * Floating point disabled.
840 */
841#define	FP_DISABLED_TRAP		\
842	TT_TRACE(trace_gen)		;\
843	ba,pt	%xcc,.fp_disabled	;\
844	nop				;\
845	.align	32
846
847/*
848 * Floating point exceptions.
849 */
850#define	FP_IEEE_TRAP			\
851	TT_TRACE(trace_gen)		;\
852	ba,pt	%xcc,.fp_ieee_exception	;\
853	nop				;\
854	.align	32
855
856#define	FP_TRAP				\
857	TT_TRACE(trace_gen)		;\
858	ba,pt	%xcc,.fp_exception	;\
859	nop				;\
860	.align	32
861
862#if !defined(lint)
863/*
864 * asynchronous traps at level 0 and level 1
865 *
866 * The first instruction must be a membar for UltraSPARC-III
867 * to stop RED state entry if the store queue has many
868 * pending bad stores (PRM, Chapter 11).
869 */
870#define ASYNC_TRAP(ttype, ttlabel, table_name)\
871	.global	table_name	;\
872table_name:			;\
873	membar	#Sync		;\
874	TT_TRACE(ttlabel)	;\
875	ba	async_err	;\
876	mov	ttype, %g5	;\
877	.align	32
878
879/*
880 * Defaults to BAD entry, but establishes label to be used for
881 * architecture-specific overwrite of trap table entry.
882 */
883#define	LABELED_BAD(table_name)		\
884	.global	table_name		;\
885table_name:				;\
886	BAD
887
888#endif /* !lint */
889
890/*
891 * illegal instruction trap
892 */
893#define	ILLTRAP_INSTR			  \
894	membar	#Sync			  ;\
895	TT_TRACE(trace_gen)		  ;\
896	or	%g0, P_UTRAP4, %g2	  ;\
897	or	%g0, T_UNIMP_INSTR, %g3   ;\
898	sethi	%hi(.check_v9utrap), %g4  ;\
899	jmp	%g4 + %lo(.check_v9utrap) ;\
900	nop				  ;\
901	.align	32
902
903/*
904 * tag overflow trap
905 */
906#define	TAG_OVERFLOW			  \
907	TT_TRACE(trace_gen)		  ;\
908	or	%g0, P_UTRAP10, %g2	  ;\
909	or	%g0, T_TAG_OVERFLOW, %g3  ;\
910	sethi	%hi(.check_v9utrap), %g4  ;\
911	jmp	%g4 + %lo(.check_v9utrap) ;\
912	nop				  ;\
913	.align	32
914
915/*
916 * divide by zero trap
917 */
918#define	DIV_BY_ZERO			  \
919	TT_TRACE(trace_gen)		  ;\
920	or	%g0, P_UTRAP11, %g2	  ;\
921	or	%g0, T_IDIV0, %g3	  ;\
922	sethi	%hi(.check_v9utrap), %g4  ;\
923	jmp	%g4 + %lo(.check_v9utrap) ;\
924	nop				  ;\
925	.align	32
926
927/*
928 * trap instruction for V9 user trap handlers
929 */
930#define	TRAP_INSTR			  \
931	TT_TRACE(trace_gen)		  ;\
932	or	%g0, T_SOFTWARE_TRAP, %g3 ;\
933	sethi	%hi(.check_v9utrap), %g4  ;\
934	jmp	%g4 + %lo(.check_v9utrap) ;\
935	nop				  ;\
936	.align	32
937#define	TRP4	TRAP_INSTR; TRAP_INSTR; TRAP_INSTR; TRAP_INSTR
938
939/*
940 * LEVEL_INTERRUPT is for level N interrupts.
941 * VECTOR_INTERRUPT is for the vector trap.
942 */
943#define	LEVEL_INTERRUPT(level)		\
944	.global	tt_pil/**/level		;\
945tt_pil/**/level:			;\
946	ba,pt	%xcc, pil_interrupt	;\
947	mov	level, %g4		;\
948	.align	32
949
950#define	LEVEL14_INTERRUPT			\
951	ba	pil14_interrupt			;\
952	mov	PIL_14, %g4			;\
953	.align	32
954
955#define	VECTOR_INTERRUPT				\
956	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g1	;\
957	btst	IRSR_BUSY, %g1				;\
958	bnz,pt	%xcc, vec_interrupt			;\
959	nop						;\
960	ba,a,pt	%xcc, vec_intr_spurious			;\
961	.empty						;\
962	.align	32
963
964/*
965 * MMU Trap Handlers.
966 */
967#define	SWITCH_GLOBALS	/* mmu->alt, alt->mmu */			\
968	rdpr	%pstate, %g5						;\
969	wrpr	%g5, PSTATE_MG | PSTATE_AG, %pstate
970
971#define	IMMU_EXCEPTION							\
972	membar	#Sync							;\
973	SWITCH_GLOBALS							;\
974	wr	%g0, ASI_IMMU, %asi					;\
975	rdpr	%tpc, %g2						;\
976	ldxa	[MMU_SFSR]%asi, %g3					;\
977	ba,pt	%xcc, .mmu_exception_end				;\
978	mov	T_INSTR_EXCEPTION, %g1					;\
979	.align	32
980
981#define	DMMU_EXCEPTION							\
982	SWITCH_GLOBALS							;\
983	wr	%g0, ASI_DMMU, %asi					;\
984	ldxa	[MMU_TAG_ACCESS]%asi, %g2				;\
985	ldxa	[MMU_SFSR]%asi, %g3					;\
986	ba,pt	%xcc, .mmu_exception_end				;\
987	mov	T_DATA_EXCEPTION, %g1					;\
988	.align	32
989
990#define	DMMU_EXC_AG_PRIV						\
991	wr	%g0, ASI_DMMU, %asi					;\
992	ldxa	[MMU_SFAR]%asi, %g2					;\
993	ba,pt	%xcc, .mmu_priv_exception				;\
994	ldxa	[MMU_SFSR]%asi, %g3					;\
995	.align	32
996
997#define	DMMU_EXC_AG_NOT_ALIGNED						\
998	wr	%g0, ASI_DMMU, %asi					;\
999	ldxa	[MMU_SFAR]%asi, %g2					;\
1000	ba,pt	%xcc, .mmu_exception_not_aligned			;\
1001	ldxa	[MMU_SFSR]%asi, %g3					;\
1002	.align	32
1003
1004/*
1005 * SPARC V9 IMPL. DEP. #109(1) and (2) and #110(1) and (2)
1006 */
1007#define	DMMU_EXC_LDDF_NOT_ALIGNED					\
1008	btst	1, %sp							;\
1009	bnz,pt	%xcc, .lddf_exception_not_aligned			;\
1010	wr	%g0, ASI_DMMU, %asi					;\
1011	ldxa	[MMU_SFAR]%asi, %g2					;\
1012	ba,pt	%xcc, .mmu_exception_not_aligned			;\
1013	ldxa	[MMU_SFSR]%asi, %g3					;\
1014	.align	32
1015
1016#define	DMMU_EXC_STDF_NOT_ALIGNED					\
1017	btst	1, %sp							;\
1018	bnz,pt	%xcc, .stdf_exception_not_aligned			;\
1019	wr	%g0, ASI_DMMU, %asi					;\
1020	ldxa	[MMU_SFAR]%asi, %g2					;\
1021	ba,pt	%xcc, .mmu_exception_not_aligned			;\
1022	ldxa	[MMU_SFSR]%asi, %g3					;\
1023	.align	32
1024
1025/*
1026 * Flush the TLB using either the primary, secondary, or nucleus flush
1027 * operation based on whether the ctx from the tag access register matches
1028 * the primary or secondary context (flush the nucleus if neither matches).
1029 *
1030 * Requires a membar #Sync before next ld/st.
1031 * exits with:
1032 * g2 = tag access register
1033 * g3 = ctx number
1034 */
1035#if TAGACC_CTX_MASK != CTXREG_CTX_MASK
1036#error "TAGACC_CTX_MASK != CTXREG_CTX_MASK"
1037#endif
1038#define	DTLB_DEMAP_ENTRY						\
1039	mov	MMU_TAG_ACCESS, %g1					;\
1040	mov	MMU_PCONTEXT, %g5					;\
1041	ldxa	[%g1]ASI_DMMU, %g2					;\
1042	sethi	%hi(TAGACC_CTX_MASK), %g4				;\
1043	or	%g4, %lo(TAGACC_CTX_MASK), %g4				;\
1044	and	%g2, %g4, %g3			/* g3 = ctx */		;\
1045	ldxa	[%g5]ASI_DMMU, %g6		/* g6 = primary ctx */	;\
1046	and	%g6, %g4, %g6			/* &= CTXREG_CTX_MASK */ ;\
1047	cmp	%g3, %g6						;\
1048	be,pt	%xcc, 1f						;\
1049	andn	%g2, %g4, %g1			/* ctx = primary */	;\
1050	mov	MMU_SCONTEXT, %g5					;\
1051	ldxa	[%g5]ASI_DMMU, %g6		/* g6 = secondary ctx */ ;\
1052	and	%g6, %g4, %g6			/* &= CTXREG_CTX_MASK */ ;\
1053	cmp	%g3, %g6						;\
1054	be,a,pt	%xcc, 1f						;\
1055	  or	%g1, DEMAP_SECOND, %g1					;\
1056	or	%g1, DEMAP_NUCLEUS, %g1					;\
10571:	stxa	%g0, [%g1]ASI_DTLB_DEMAP	/* MMU_DEMAP_PAGE */	;\
1058	membar	#Sync
1059
1060#if defined(cscope)
1061/*
1062 * Define labels to direct cscope quickly to labels that
1063 * are generated by macro expansion of DTLB_MISS().
1064 */
1065	.global	tt0_dtlbmiss
1066tt0_dtlbmiss:
1067	.global	tt1_dtlbmiss
1068tt1_dtlbmiss:
1069	nop
1070#endif
1071
1072/*
1073 * Needs to be exactly 32 instructions
1074 *
1075 * UTLB NOTE: If we don't hit on the 8k pointer then we branch
1076 * to a special 4M tsb handler. It would be nice if that handler
1077 * could live in this file but currently it seems better to allow
1078 * it to fall thru to sfmmu_tsb_miss.
1079 */
1080#ifdef UTSB_PHYS
1081#define	DTLB_MISS(table_name)						;\
1082	.global	table_name/**/_dtlbmiss					;\
1083table_name/**/_dtlbmiss:						;\
1084	HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */	;\
1085	mov	MMU_TAG_ACCESS, %g6		/* select tag acc */	;\
1086	ldxa	[%g0]ASI_DMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1087	ldxa	[%g6]ASI_DMMU, %g2		/* g2 = tag access */	;\
1088	sllx	%g2, TAGACC_CTX_LSHIFT, %g3				;\
1089	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctx */		;\
1090	cmp	%g3, INVALID_CONTEXT					;\
1091	ble,pn	%xcc, sfmmu_kdtlb_miss					;\
1092	  srlx	%g2, TAG_VALO_SHIFT, %g7	/* g7 = tsb tag */	;\
1093	mov	SCRATCHPAD_UTSBREG, %g3				;\
1094	ldxa	[%g3]ASI_SCRATCHPAD, %g3	/* g3 = 2nd tsb reg */	;\
1095	brgez,pn %g3, sfmmu_udtlb_slowpath	/* branch if 2 TSBs */	;\
1096	  nop								;\
1097	ldda	[%g1]ASI_QUAD_LDD_PHYS, %g4	/* g4 = tag, %g5 data */;\
1098	cmp	%g4, %g7						;\
1099	bne,pn	%xcc, sfmmu_tsb_miss_tt		/* no 4M TSB, miss */	;\
1100	  mov	%g0, %g3			/* clear 4M tsbe ptr */	;\
1101	TT_TRACE(trace_tsbhit)		/* 2 instr ifdef TRAPTRACE */	;\
1102	stxa	%g5, [%g0]ASI_DTLB_IN	/* trapstat expects TTE */	;\
1103	retry				/* in %g5 */			;\
1104	unimp	0							;\
1105	unimp	0							;\
1106	unimp	0							;\
1107	unimp	0							;\
1108	unimp	0							;\
1109	unimp	0							;\
1110	unimp	0							;\
1111	unimp	0							;\
1112	unimp	0							;\
1113	.align 128
1114#else /* UTSB_PHYS */
1115#define	DTLB_MISS(table_name)						;\
1116	.global	table_name/**/_dtlbmiss					;\
1117table_name/**/_dtlbmiss:						;\
1118	HAT_PERCPU_DBSTAT(TSBMISS_DTLBMISS) /* 3 instr ifdef DEBUG */	;\
1119	mov	MMU_TAG_ACCESS, %g6		/* select tag acc */	;\
1120	ldxa	[%g0]ASI_DMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1121	ldxa	[%g6]ASI_DMMU, %g2		/* g2 = tag access */	;\
1122	sllx	%g2, TAGACC_CTX_LSHIFT, %g3				;\
1123	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctx */		;\
1124	cmp	%g3, INVALID_CONTEXT					;\
1125	ble,pn	%xcc, sfmmu_kdtlb_miss					;\
1126	  srlx	%g2, TAG_VALO_SHIFT, %g7	/* g7 = tsb tag */	;\
1127	brlz,pn %g1, sfmmu_udtlb_slowpath				;\
1128	  nop								;\
1129	ldda	[%g1]ASI_NQUAD_LD, %g4	/* g4 = tag, %g5 data */	;\
1130	cmp	%g4, %g7						;\
1131	bne,pn	%xcc, sfmmu_tsb_miss_tt		/* no 4M TSB, miss */	;\
1132	  mov	%g0, %g3		/* clear 4M tsbe ptr */		;\
1133	TT_TRACE(trace_tsbhit)		/* 2 instr ifdef TRAPTRACE */	;\
1134	stxa	%g5, [%g0]ASI_DTLB_IN	/* trapstat expects TTE */	;\
1135	retry				/* in %g5 */			;\
1136	unimp	0							;\
1137	unimp	0							;\
1138	unimp	0							;\
1139	unimp	0							;\
1140	unimp	0							;\
1141	unimp	0							;\
1142	unimp	0							;\
1143	unimp	0							;\
1144	unimp	0							;\
1145	unimp	0							;\
1146	unimp	0							;\
1147	.align 128
1148#endif /* UTSB_PHYS */
1149
1150#if defined(cscope)
1151/*
1152 * Define labels to direct cscope quickly to labels that
1153 * are generated by macro expansion of ITLB_MISS().
1154 */
1155	.global	tt0_itlbmiss
1156tt0_itlbmiss:
1157	.global	tt1_itlbmiss
1158tt1_itlbmiss:
1159	nop
1160#endif
1161
1162/*
1163 * Instruction miss handler.
1164 * ldda instructions will have their ASI patched
1165 * by sfmmu_patch_ktsb at runtime.
1166 * MUST be EXACTLY 32 instructions or we'll break.
1167 */
1168#ifdef UTSB_PHYS
1169#define	ITLB_MISS(table_name)						 \
1170	.global	table_name/**/_itlbmiss					;\
1171table_name/**/_itlbmiss:						;\
1172	HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */	;\
1173	mov	MMU_TAG_ACCESS, %g6		/* select tag acc */	;\
1174	ldxa	[%g0]ASI_IMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1175	ldxa	[%g6]ASI_IMMU, %g2		/* g2 = tag access */	;\
1176	sllx	%g2, TAGACC_CTX_LSHIFT, %g3				;\
1177	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctx */		;\
1178	cmp	%g3, INVALID_CONTEXT					;\
1179	ble,pn	%xcc, sfmmu_kitlb_miss					;\
1180	  srlx	%g2, TAG_VALO_SHIFT, %g7	/* g7 = tsb tag */	;\
1181	mov	SCRATCHPAD_UTSBREG, %g3				;\
1182	ldxa	[%g3]ASI_SCRATCHPAD, %g3	/* g3 = 2nd tsb reg */	;\
1183	brgez,pn %g3, sfmmu_uitlb_slowpath	/* branch if 2 TSBs */	;\
1184	  nop								;\
1185	ldda	[%g1]ASI_QUAD_LDD_PHYS, %g4 /* g4 = tag, g5 = data */	;\
1186	cmp	%g4, %g7						;\
1187	bne,pn	%xcc, sfmmu_tsb_miss_tt	/* br if 8k ptr miss */		;\
1188	  mov	%g0, %g3		/* no 4M TSB */			;\
1189	andcc	%g5, TTE_EXECPRM_INT, %g0 /* check execute bit */	;\
1190	bz,pn	%icc, exec_fault					;\
1191	  nop								;\
1192	TT_TRACE(trace_tsbhit)		/* 2 instr ifdef TRAPTRACE */	;\
1193	stxa	%g5, [%g0]ASI_ITLB_IN	/* trapstat expects %g5 */	;\
1194	retry								;\
1195	unimp	0							;\
1196	unimp	0							;\
1197	unimp	0							;\
1198	unimp	0							;\
1199	unimp	0							;\
1200	unimp	0							;\
1201	.align 128
1202#else /* UTSB_PHYS */
1203#define	ITLB_MISS(table_name)						 \
1204	.global	table_name/**/_itlbmiss					;\
1205table_name/**/_itlbmiss:						;\
1206	HAT_PERCPU_DBSTAT(TSBMISS_ITLBMISS) /* 3 instr ifdef DEBUG */	;\
1207	mov	MMU_TAG_ACCESS, %g6		/* select tag acc */	;\
1208	ldxa	[%g0]ASI_IMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1209	ldxa	[%g6]ASI_IMMU, %g2		/* g2 = tag access */	;\
1210	sllx	%g2, TAGACC_CTX_LSHIFT, %g3				;\
1211	srlx	%g3, TAGACC_CTX_LSHIFT, %g3	/* g3 = ctx */		;\
1212	cmp	%g3, INVALID_CONTEXT					;\
1213	ble,pn	%xcc, sfmmu_kitlb_miss					;\
1214	  srlx	%g2, TAG_VALO_SHIFT, %g7	/* g7 = tsb tag */	;\
1215	brlz,pn	%g1, sfmmu_uitlb_slowpath	/* if >1 TSB branch */	;\
1216	  nop								;\
1217	ldda	[%g1]ASI_NQUAD_LD, %g4	/* g4 = tag, g5 = data */	;\
1218	cmp	%g4, %g7						;\
1219	bne,pn	%xcc, sfmmu_tsb_miss_tt	/* br if 8k ptr miss */		;\
1220	  mov	%g0, %g3		/* no 4M TSB */			;\
1221	andcc	%g5, TTE_EXECPRM_INT, %g0 /* check execute bit */	;\
1222	bz,pn	%icc, exec_fault					;\
1223	  nop								;\
1224	TT_TRACE(trace_tsbhit)		/* 2 instr ifdef TRAPTRACE */	;\
1225	stxa	%g5, [%g0]ASI_ITLB_IN	/* trapstat expects %g5 */	;\
1226	retry								;\
1227	unimp	0							;\
1228	unimp	0							;\
1229	unimp	0							;\
1230	unimp	0							;\
1231	unimp	0							;\
1232	unimp	0							;\
1233	unimp	0							;\
1234	unimp	0							;\
1235	.align 128
1236#endif /* UTSB_PHYS */
1237
1238
1239/*
1240 * This macro is the first level handler for fast protection faults.
1241 * It first demaps the tlb entry which generated the fault and then
1242 * attempts to set the modify bit on the hash.  It needs to be
1243 * exactly 32 instructions.
1244 */
1245#define	DTLB_PROT							 \
1246	DTLB_DEMAP_ENTRY		/* 20 instructions */		;\
1247	/*								;\
1248	 * At this point:						;\
1249	 *   g1 = ????							;\
1250	 *   g2 = tag access register					;\
1251	 *   g3 = ctx number						;\
1252	 *   g4 = ????							;\
1253	 */								;\
1254	TT_TRACE(trace_dataprot)	/* 2 instr ifdef TRAPTRACE */	;\
1255					/* clobbers g1 and g6 */	;\
1256	ldxa	[%g0]ASI_DMMU_TSB_8K, %g1	/* g1 = tsbe ptr */	;\
1257	brnz,pt %g3, sfmmu_uprot_trap		/* user trap */		;\
1258	  nop								;\
1259	ba,a,pt	%xcc, sfmmu_kprot_trap		/* kernel trap */	;\
1260	unimp	0							;\
1261	unimp	0							;\
1262	unimp	0							;\
1263	unimp	0							;\
1264	unimp	0							;\
1265	unimp	0							;\
1266	.align 128
1267
1268#define	DMMU_EXCEPTION_TL1						;\
1269	SWITCH_GLOBALS							;\
1270	ba,a,pt	%xcc, mmu_trap_tl1					;\
1271	  nop								;\
1272	.align 32
1273
1274#define	MISALIGN_ADDR_TL1						;\
1275	ba,a,pt	%xcc, mmu_trap_tl1					;\
1276	  nop								;\
1277	.align 32
1278
1279/*
1280 * Trace a tsb hit
1281 * g1 = tsbe pointer (in/clobbered)
1282 * g2 = tag access register (in)
1283 * g3 - g4 = scratch (clobbered)
1284 * g5 = tsbe data (in)
1285 * g6 = scratch (clobbered)
1286 * g7 = pc we jumped here from (in)
1287 * ttextra = value to OR in to trap type (%tt) (in)
1288 */
1289#ifdef TRAPTRACE
1290#define TRACE_TSBHIT(ttextra)						 \
1291	membar	#Sync							;\
1292	sethi	%hi(FLUSH_ADDR), %g6					;\
1293	flush	%g6							;\
1294	TRACE_PTR(%g3, %g6)						;\
1295	GET_TRACE_TICK(%g6)						;\
1296	stxa	%g6, [%g3 + TRAP_ENT_TICK]%asi				;\
1297	stxa	%g2, [%g3 + TRAP_ENT_SP]%asi	/* tag access */	;\
1298	stxa	%g5, [%g3 + TRAP_ENT_F1]%asi	/* tsb data */		;\
1299	rdpr	%tnpc, %g6						;\
1300	stxa	%g6, [%g3 + TRAP_ENT_F2]%asi				;\
1301	stxa	%g1, [%g3 + TRAP_ENT_F3]%asi	/* tsb pointer */	;\
1302	stxa	%g0, [%g3 + TRAP_ENT_F4]%asi				;\
1303	rdpr	%tpc, %g6						;\
1304	stxa	%g6, [%g3 + TRAP_ENT_TPC]%asi				;\
1305	rdpr	%tl, %g6						;\
1306	stha	%g6, [%g3 + TRAP_ENT_TL]%asi				;\
1307	rdpr	%tt, %g6						;\
1308	or	%g6, (ttextra), %g6					;\
1309	stha	%g6, [%g3 + TRAP_ENT_TT]%asi				;\
1310	ldxa	[%g0]ASI_IMMU, %g1		/* tag target */	;\
1311	ldxa	[%g0]ASI_DMMU, %g4					;\
1312	cmp	%g6, FAST_IMMU_MISS_TT					;\
1313	movne	%icc, %g4, %g1						;\
1314	stxa	%g1, [%g3 + TRAP_ENT_TSTATE]%asi /* tsb tag */		;\
1315	stxa	%g0, [%g3 + TRAP_ENT_TR]%asi				;\
1316	TRACE_NEXT(%g3, %g4, %g6)
1317#else
1318#define TRACE_TSBHIT(ttextra)
1319#endif
1320
1321#if defined(lint)
1322
1323struct scb	trap_table;
1324struct scb	scb;		/* trap_table/scb are the same object */
1325
1326#else /* lint */
1327
1328/*
1329 * =======================================================================
1330 *		SPARC V9 TRAP TABLE
1331 *
1332 * The trap table is divided into two halves: the first half is used when
1333 * taking traps when TL=0; the second half is used when taking traps from
1334 * TL>0. Note that handlers in the second half of the table might not be able
1335 * to make the same assumptions as handlers in the first half of the table.
1336 *
1337 * Worst case trap nesting so far:
1338 *
1339 *	at TL=0 client issues software trap requesting service
1340 *	at TL=1 nucleus wants a register window
1341 *	at TL=2 register window clean/spill/fill takes a TLB miss
1342 *	at TL=3 processing TLB miss
1343 *	at TL=4 handle asynchronous error
1344 *
1345 * Note that a trap from TL=4 to TL=5 places Spitfire in "RED mode".
1346 *
1347 * =======================================================================
1348 */
1349	.section ".text"
1350	.align	4
1351	.global trap_table, scb, trap_table0, trap_table1, etrap_table
1352	.type	trap_table, #object
1353	.type	scb, #object
1354trap_table:
1355scb:
1356trap_table0:
1357	/* hardware traps */
1358	NOT;				/* 000	reserved */
1359	RED;				/* 001	power on reset */
1360	RED;				/* 002	watchdog reset */
1361	RED;				/* 003	externally initiated reset */
1362	RED;				/* 004	software initiated reset */
1363	RED;				/* 005	red mode exception */
1364	NOT; NOT;			/* 006 - 007 reserved */
1365	IMMU_EXCEPTION;			/* 008	instruction access exception */
1366	NOT;				/* 009	instruction access MMU miss */
1367	ASYNC_TRAP(T_INSTR_ERROR, trace_gen, tt0_iae);
1368					/* 00A	instruction access error */
1369	NOT; NOT4;			/* 00B - 00F reserved */
1370	ILLTRAP_INSTR;			/* 010	illegal instruction */
1371	TRAP(T_PRIV_INSTR);		/* 011	privileged opcode */
1372	NOT;				/* 012	unimplemented LDD */
1373	NOT;				/* 013	unimplemented STD */
1374	NOT4; NOT4; NOT4;		/* 014 - 01F reserved */
1375	FP_DISABLED_TRAP;		/* 020	fp disabled */
1376	FP_IEEE_TRAP;			/* 021	fp exception ieee 754 */
1377	FP_TRAP;			/* 022	fp exception other */
1378	TAG_OVERFLOW;			/* 023	tag overflow */
1379	CLEAN_WINDOW;			/* 024 - 027 clean window */
1380	DIV_BY_ZERO;			/* 028	division by zero */
1381	NOT;				/* 029	internal processor error */
1382	NOT; NOT; NOT4;			/* 02A - 02F reserved */
1383	DMMU_EXCEPTION;			/* 030	data access exception */
1384	NOT;				/* 031	data access MMU miss */
1385	ASYNC_TRAP(T_DATA_ERROR, trace_gen, tt0_dae);
1386					/* 032	data access error */
1387	NOT;				/* 033	data access protection */
1388	DMMU_EXC_AG_NOT_ALIGNED;	/* 034	mem address not aligned */
1389	DMMU_EXC_LDDF_NOT_ALIGNED;	/* 035	LDDF mem address not aligned */
1390	DMMU_EXC_STDF_NOT_ALIGNED;	/* 036	STDF mem address not aligned */
1391	DMMU_EXC_AG_PRIV;		/* 037	privileged action */
1392	NOT;				/* 038	LDQF mem address not aligned */
1393	NOT;				/* 039	STQF mem address not aligned */
1394	NOT; NOT; NOT4;			/* 03A - 03F reserved */
1395	LABELED_BAD(tt0_asdat);		/* 040	async data error */
1396	LEVEL_INTERRUPT(1);		/* 041	interrupt level 1 */
1397	LEVEL_INTERRUPT(2);		/* 042	interrupt level 2 */
1398	LEVEL_INTERRUPT(3);		/* 043	interrupt level 3 */
1399	LEVEL_INTERRUPT(4);		/* 044	interrupt level 4 */
1400	LEVEL_INTERRUPT(5);		/* 045	interrupt level 5 */
1401	LEVEL_INTERRUPT(6);		/* 046	interrupt level 6 */
1402	LEVEL_INTERRUPT(7);		/* 047	interrupt level 7 */
1403	LEVEL_INTERRUPT(8);		/* 048	interrupt level 8 */
1404	LEVEL_INTERRUPT(9);		/* 049	interrupt level 9 */
1405	LEVEL_INTERRUPT(10);		/* 04A	interrupt level 10 */
1406	LEVEL_INTERRUPT(11);		/* 04B	interrupt level 11 */
1407	LEVEL_INTERRUPT(12);		/* 04C	interrupt level 12 */
1408	LEVEL_INTERRUPT(13);		/* 04D	interrupt level 13 */
1409	LEVEL14_INTERRUPT;		/* 04E	interrupt level 14 */
1410	LEVEL_INTERRUPT(15);		/* 04F	interrupt level 15 */
1411	NOT4; NOT4; NOT4; NOT4;		/* 050 - 05F reserved */
1412	VECTOR_INTERRUPT;		/* 060	interrupt vector */
1413	GOTO(kmdb_trap);		/* 061	PA watchpoint */
1414	GOTO(kmdb_trap);		/* 062	VA watchpoint */
1415	GOTO_TT(ce_err, trace_gen);	/* 063	corrected ECC error */
1416	ITLB_MISS(tt0);			/* 064	instruction access MMU miss */
1417	DTLB_MISS(tt0);			/* 068	data access MMU miss */
1418	DTLB_PROT;			/* 06C	data access protection */
1419	LABELED_BAD(tt0_fecc);		/* 070  fast ecache ECC error */
1420	LABELED_BAD(tt0_dperr);		/* 071  Cheetah+ dcache parity error */
1421	LABELED_BAD(tt0_iperr);		/* 072  Cheetah+ icache parity error */
1422	NOT;				/* 073  reserved */
1423	NOT4; NOT4; NOT4;		/* 074 - 07F reserved */
1424	NOT4;				/* 080	spill 0 normal */
1425	SPILL_32bit_asi(ASI_AIUP,sn0);	/* 084	spill 1 normal */
1426	SPILL_64bit_asi(ASI_AIUP,sn0);	/* 088	spill 2 normal */
1427	SPILL_32clean(ASI_AIUP,sn0);	/* 08C	spill 3 normal */
1428	SPILL_64clean(ASI_AIUP,sn0);	/* 090	spill 4 normal */
1429	SPILL_32bit(not);		/* 094	spill 5 normal */
1430	SPILL_64bit(not);		/* 098	spill 6 normal */
1431	SPILL_mixed;			/* 09C	spill 7 normal */
1432	NOT4;				/* 0A0	spill 0 other */
1433	SPILL_32bit_asi(ASI_AIUS,so0);	/* 0A4	spill 1 other */
1434	SPILL_64bit_asi(ASI_AIUS,so0);	/* 0A8	spill 2 other */
1435	SPILL_32bit_asi(ASI_AIUS,so0);	/* 0AC	spill 3 other */
1436	SPILL_64bit_asi(ASI_AIUS,so0);	/* 0B0	spill 4 other */
1437	NOT4;				/* 0B4	spill 5 other */
1438	NOT4;				/* 0B8	spill 6 other */
1439	NOT4;				/* 0BC	spill 7 other */
1440	NOT4;				/* 0C0	fill 0 normal */
1441	FILL_32bit_asi(ASI_AIUP,fn0);	/* 0C4	fill 1 normal */
1442	FILL_64bit_asi(ASI_AIUP,fn0);	/* 0C8	fill 2 normal */
1443	FILL_32bit_asi(ASI_AIUP,fn0);	/* 0CC	fill 3 normal */
1444	FILL_64bit_asi(ASI_AIUP,fn0);	/* 0D0	fill 4 normal */
1445	FILL_32bit(not);		/* 0D4	fill 5 normal */
1446	FILL_64bit(not);		/* 0D8	fill 6 normal */
1447	FILL_mixed;			/* 0DC	fill 7 normal */
1448	NOT4;				/* 0E0	fill 0 other */
1449	NOT4;				/* 0E4	fill 1 other */
1450	NOT4;				/* 0E8	fill 2 other */
1451	NOT4;				/* 0EC	fill 3 other */
1452	NOT4;				/* 0F0	fill 4 other */
1453	NOT4;				/* 0F4	fill 5 other */
1454	NOT4;				/* 0F8	fill 6 other */
1455	NOT4;				/* 0FC	fill 7 other */
1456	/* user traps */
1457	GOTO(syscall_trap_4x);		/* 100	old system call */
1458	TRAP(T_BREAKPOINT);		/* 101	user breakpoint */
1459	TRAP(T_DIV0);			/* 102	user divide by zero */
1460	FLUSHW();			/* 103	flush windows */
1461	GOTO(.clean_windows);		/* 104	clean windows */
1462	BAD;				/* 105	range check ?? */
1463	GOTO(.fix_alignment);		/* 106	do unaligned references */
1464	BAD;				/* 107	unused */
1465	SYSCALL_TRAP32;			/* 108	ILP32 system call on LP64 */
1466	GOTO(set_trap0_addr);		/* 109	set trap0 address */
1467	BAD; BAD; BAD4;			/* 10A - 10F unused */
1468	TRP4; TRP4; TRP4; TRP4;		/* 110 - 11F V9 user trap handlers */
1469	GOTO(.getcc);			/* 120	get condition codes */
1470	GOTO(.setcc);			/* 121	set condition codes */
1471	GOTO(.getpsr);			/* 122	get psr */
1472	GOTO(.setpsr);			/* 123	set psr (some fields) */
1473	GOTO(get_timestamp);		/* 124	get timestamp */
1474	GOTO(get_virtime);		/* 125	get lwp virtual time */
1475	PRIV(self_xcall);		/* 126	self xcall */
1476	GOTO(get_hrestime);		/* 127	get hrestime */
1477	BAD;				/* 128	ST_SETV9STACK */
1478	GOTO(.getlgrp);			/* 129  get lgrpid */
1479	BAD; BAD; BAD4;			/* 12A - 12F unused */
1480	BAD4; BAD4; 			/* 130 - 137 unused */
1481	DTRACE_PID;			/* 138  dtrace pid tracing provider */
1482	BAD;				/* 139  unused */
1483	DTRACE_RETURN;			/* 13A	dtrace pid return probe */
1484	BAD; BAD4;			/* 13B - 13F unused */
1485	SYSCALL_TRAP;			/* 140  LP64 system call */
1486	SYSCALL(nosys);			/* 141  unused system call trap */
1487#ifdef DEBUG_USER_TRAPTRACECTL
1488	GOTO(.traptrace_freeze);	/* 142  freeze traptrace */
1489	GOTO(.traptrace_unfreeze);	/* 143  unfreeze traptrace */
1490#else
1491	SYSCALL(nosys);			/* 142  unused system call trap */
1492	SYSCALL(nosys);			/* 143  unused system call trap */
1493#endif
1494	BAD4; BAD4; BAD4;		/* 144 - 14F unused */
1495	BAD4; BAD4; BAD4; BAD4;		/* 150 - 15F unused */
1496	BAD4; BAD4; BAD4; BAD4;		/* 160 - 16F unused */
1497	BAD;				/* 170 - unused */
1498	BAD;				/* 171 - unused */
1499	BAD; BAD;			/* 172 - 173 unused */
1500	BAD4; BAD4;			/* 174 - 17B unused */
1501#ifdef	PTL1_PANIC_DEBUG
1502	mov PTL1_BAD_DEBUG, %g1; GOTO(ptl1_panic);
1503					/* 17C	test ptl1_panic */
1504#else
1505	BAD;				/* 17C  unused */
1506#endif	/* PTL1_PANIC_DEBUG */
1507	PRIV(kmdb_trap);		/* 17D	kmdb enter (L1-A) */
1508	PRIV(kmdb_trap);		/* 17E	kmdb breakpoint */
1509	PRIV(kctx_obp_bpt);		/* 17F	obp breakpoint */
1510	/* reserved */
1511	NOT4; NOT4; NOT4; NOT4;		/* 180 - 18F reserved */
1512	NOT4; NOT4; NOT4; NOT4;		/* 190 - 19F reserved */
1513	NOT4; NOT4; NOT4; NOT4;		/* 1A0 - 1AF reserved */
1514	NOT4; NOT4; NOT4; NOT4;		/* 1B0 - 1BF reserved */
1515	NOT4; NOT4; NOT4; NOT4;		/* 1C0 - 1CF reserved */
1516	NOT4; NOT4; NOT4; NOT4;		/* 1D0 - 1DF reserved */
1517	NOT4; NOT4; NOT4; NOT4;		/* 1E0 - 1EF reserved */
1518	NOT4; NOT4; NOT4; NOT4;		/* 1F0 - 1FF reserved */
1519trap_table1:
1520	NOT4; NOT4; NOT; NOT;		/* 000 - 009 unused */
1521	ASYNC_TRAP(T_INSTR_ERROR + T_TL1, trace_gen, tt1_iae);
1522					/* 00A	instruction access error */
1523	NOT; NOT4;			/* 00B - 00F unused */
1524	NOT4; NOT4; NOT4; NOT4;		/* 010 - 01F unused */
1525	NOT4;				/* 020 - 023 unused */
1526	CLEAN_WINDOW;			/* 024 - 027 clean window */
1527	NOT4; NOT4;			/* 028 - 02F unused */
1528	DMMU_EXCEPTION_TL1;		/* 030 	data access exception */
1529	NOT;				/* 031 unused */
1530	ASYNC_TRAP(T_DATA_ERROR + T_TL1, trace_gen, tt1_dae);
1531					/* 032	data access error */
1532	NOT;				/* 033	unused */
1533	MISALIGN_ADDR_TL1;		/* 034	mem address not aligned */
1534	NOT; NOT; NOT; NOT4; NOT4	/* 035 - 03F unused */
1535	LABELED_BAD(tt1_asdat);		/* 040	async data error */
1536	NOT; NOT; NOT;			/* 041 - 043 unused */
1537	NOT4; NOT4; NOT4;		/* 044 - 04F unused */
1538	NOT4; NOT4; NOT4; NOT4;		/* 050 - 05F unused */
1539	NOT;				/* 060	unused */
1540	GOTO(kmdb_trap_tl1);		/* 061	PA watchpoint */
1541	GOTO(kmdb_trap_tl1);		/* 062	VA watchpoint */
1542	GOTO_TT(ce_err_tl1, trace_gen);	/* 063	corrected ECC error */
1543	ITLB_MISS(tt1);			/* 064	instruction access MMU miss */
1544	DTLB_MISS(tt1);			/* 068	data access MMU miss */
1545	DTLB_PROT;			/* 06C	data access protection */
1546	LABELED_BAD(tt1_fecc);		/* 070  fast ecache ECC error */
1547	LABELED_BAD(tt1_dperr);		/* 071  Cheetah+ dcache parity error */
1548	LABELED_BAD(tt1_iperr);		/* 072  Cheetah+ icache parity error */
1549	NOT;				/* 073  reserved */
1550	NOT4; NOT4; NOT4;		/* 074 - 07F reserved */
1551	NOT4;				/* 080	spill 0 normal */
1552	SPILL_32bit_tt1(ASI_AIUP,sn1);	/* 084	spill 1 normal */
1553	SPILL_64bit_tt1(ASI_AIUP,sn1);	/* 088	spill 2 normal */
1554	SPILL_32bit_tt1(ASI_AIUP,sn1);	/* 08C	spill 3 normal */
1555	SPILL_64bit_tt1(ASI_AIUP,sn1);	/* 090	spill 4 normal */
1556	SPILL_32bit(not);		/* 094	spill 5 normal */
1557	SPILL_64bit(not);		/* 098	spill 6 normal */
1558	SPILL_mixed;			/* 09C	spill 7 normal */
1559	NOT4;				/* 0A0	spill 0 other */
1560	SPILL_32bit_tt1(ASI_AIUS,so1);	/* 0A4	spill 1 other */
1561	SPILL_64bit_tt1(ASI_AIUS,so1);	/* 0A8	spill 2 other */
1562	SPILL_32bit_tt1(ASI_AIUS,so1);	/* 0AC	spill 3 other */
1563	SPILL_64bit_tt1(ASI_AIUS,so1);	/* 0B0  spill 4 other */
1564	NOT4;				/* 0B4  spill 5 other */
1565	NOT4;				/* 0B8  spill 6 other */
1566	NOT4;				/* 0BC  spill 7 other */
1567	NOT4;				/* 0C0	fill 0 normal */
1568	FILL_32bit_tt1(ASI_AIUP,fn1);	/* 0C4	fill 1 normal */
1569	FILL_64bit_tt1(ASI_AIUP,fn1);	/* 0C8	fill 2 normal */
1570	FILL_32bit_tt1(ASI_AIUP,fn1);	/* 0CC	fill 3 normal */
1571	FILL_64bit_tt1(ASI_AIUP,fn1);	/* 0D0	fill 4 normal */
1572	FILL_32bit(not);		/* 0D4	fill 5 normal */
1573	FILL_64bit(not);		/* 0D8	fill 6 normal */
1574	FILL_mixed;			/* 0DC	fill 7 normal */
1575	NOT4; NOT4; NOT4; NOT4;		/* 0E0 - 0EF unused */
1576	NOT4; NOT4; NOT4; NOT4;		/* 0F0 - 0FF unused */
1577	LABELED_BAD(tt1_swtrap0);	/* 100  fast ecache ECC error (cont) */
1578	LABELED_BAD(tt1_swtrap1);	/* 101  Ch+ D$ parity error (cont) */
1579	LABELED_BAD(tt1_swtrap2);	/* 102  Ch+ I$ parity error (cont) */
1580	NOT;				/* 103  reserved */
1581/*
1582 * We only reserve the above four special case soft traps for code running
1583 * at TL>0, so we can truncate the trap table here.
1584 */
1585etrap_table:
1586	.size	trap_table, (.-trap_table)
1587	.size	scb, (.-scb)
1588
1589/*
1590 * We get to exec_fault in the case of an instruction miss and tte
1591 * has no execute bit set.  We go to tl0 to handle it.
1592 *
1593 * g1 = tsbe pointer (in/clobbered)
1594 * g2 = tag access register (in)
1595 * g3 - g4 = scratch (clobbered)
1596 * g5 = tsbe data (in)
1597 * g6 = scratch (clobbered)
1598 */
1599	ALTENTRY(exec_fault)
1600	TRACE_TSBHIT(0x200)
1601	SWITCH_GLOBALS
1602	mov	MMU_TAG_ACCESS, %g4
1603	ldxa	[%g4]ASI_IMMU, %g2			! arg1 = addr
1604	mov	T_INSTR_MMU_MISS, %g3			! arg2 = traptype
1605	set	trap, %g1
1606	ba,pt	%xcc, sys_trap
1607	  mov	-1, %g4
1608
1609.mmu_exception_not_aligned:
1610	rdpr	%tstate, %g1
1611	btst	TSTATE_PRIV, %g1
1612	bnz,pn	%icc, 2f
1613	nop
1614	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1615	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1616	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1617	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1618	brz,pt	%g5, 2f
1619	nop
1620	ldn	[%g5 + P_UTRAP15], %g5			! unaligned utrap?
1621	brz,pn	%g5, 2f
1622	nop
1623	btst	1, %sp
1624	bz,pt	%xcc, 1f				! 32 bit user program
1625	nop
1626	ba,pt	%xcc, .setup_v9utrap			! 64 bit user program
1627	nop
16281:
1629	ba,pt	%xcc, .setup_utrap
1630	or	%g2, %g0, %g7
16312:
1632	ba,pt	%xcc, .mmu_exception_end
1633	mov	T_ALIGNMENT, %g1
1634
1635.mmu_priv_exception:
1636	rdpr	%tstate, %g1
1637	btst	TSTATE_PRIV, %g1
1638	bnz,pn	%icc, 1f
1639	nop
1640	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1641	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1642	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1643	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1644	brz,pt	%g5, 1f
1645	nop
1646	ldn	[%g5 + P_UTRAP16], %g5
1647	brnz,pt	%g5, .setup_v9utrap
1648	nop
16491:
1650	mov	T_PRIV_INSTR, %g1
1651
1652.mmu_exception_end:
1653	CPU_INDEX(%g4, %g5)
1654	set	cpu_core, %g5
1655	sllx	%g4, CPU_CORE_SHIFT, %g4
1656	add	%g4, %g5, %g4
1657	lduh	[%g4 + CPUC_DTRACE_FLAGS], %g5
1658	andcc	%g5, CPU_DTRACE_NOFAULT, %g0
1659	bz	%xcc, .mmu_exception_tlb_chk
1660	or	%g5, CPU_DTRACE_BADADDR, %g5
1661	stuh	%g5, [%g4 + CPUC_DTRACE_FLAGS]
1662	done
1663
1664.mmu_exception_tlb_chk:
1665	GET_CPU_IMPL(%g5)			! check SFSR.FT to see if this
1666	cmp	%g5, PANTHER_IMPL		! is a TLB parity error. But
1667	bne	2f				! we only do this check while
1668	mov	1, %g4				! running on Panther CPUs
1669	sllx	%g4, PN_SFSR_PARITY_SHIFT, %g4	! since US-I/II use the same
1670	andcc	%g3, %g4, %g0			! bit for something else which
1671	bz	2f				! will be handled later.
1672	nop
1673.mmu_exception_is_tlb_parity:
1674	.weak itlb_parity_trap
1675	.weak dtlb_parity_trap
1676	set	itlb_parity_trap, %g4
1677	cmp	%g1, T_INSTR_EXCEPTION		! branch to the itlb or
1678	be	3f				! dtlb parity handler
1679	nop					! if this trap is due
1680	set	dtlb_parity_trap, %g4
1681	cmp	%g1, T_DATA_EXCEPTION		! to a IMMU exception
1682	be	3f				! or DMMU exception.
1683	nop
16842:
1685	sllx	%g3, 32, %g3
1686	or	%g3, %g1, %g3
1687	set	trap, %g1
1688	ba,pt	%xcc, sys_trap
1689	sub	%g0, 1, %g4
16903:
1691	jmp	%g4				! off to the appropriate
1692	nop					! TLB parity handler
1693
1694.fp_disabled:
1695	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1696	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1697#ifdef SF_ERRATA_30 /* call causes fp-disabled */
1698	brz,a,pn %g1, 2f
1699	  nop
1700#endif
1701	rdpr	%tstate, %g4
1702	btst	TSTATE_PRIV, %g4
1703#ifdef SF_ERRATA_30 /* call causes fp-disabled */
1704	bnz,pn %icc, 2f
1705	  nop
1706#else
1707	bnz,a,pn %icc, ptl1_panic
1708	  mov	PTL1_BAD_FPTRAP, %g1
1709#endif
1710	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1711	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1712	brz,a,pt %g5, 2f
1713	  nop
1714	ldn	[%g5 + P_UTRAP7], %g5			! fp_disabled utrap?
1715	brz,a,pn %g5, 2f
1716	  nop
1717	btst	1, %sp
1718	bz,a,pt	%xcc, 1f				! 32 bit user program
1719	  nop
1720	ba,a,pt	%xcc, .setup_v9utrap			! 64 bit user program
1721	  nop
17221:
1723	ba,pt	%xcc, .setup_utrap
1724	  or	%g0, %g0, %g7
17252:
1726	set	fp_disabled, %g1
1727	ba,pt	%xcc, sys_trap
1728	  sub	%g0, 1, %g4
1729
1730.fp_ieee_exception:
1731	rdpr	%tstate, %g1
1732	btst	TSTATE_PRIV, %g1
1733	bnz,a,pn %icc, ptl1_panic
1734	  mov	PTL1_BAD_FPTRAP, %g1
1735	CPU_ADDR(%g1, %g4)				! load CPU struct addr
1736	stx	%fsr, [%g1 + CPU_TMP1]
1737	ldx	[%g1 + CPU_TMP1], %g2
1738	ldn	[%g1 + CPU_THREAD], %g1			! load thread pointer
1739	ldn	[%g1 + T_PROCP], %g1			! load proc pointer
1740	ldn	[%g1 + P_UTRAPS], %g5			! are there utraps?
1741	brz,a,pt %g5, 1f
1742	  nop
1743	ldn	[%g5 + P_UTRAP8], %g5
1744	brnz,a,pt %g5, .setup_v9utrap
1745	  nop
17461:
1747	set	_fp_ieee_exception, %g1
1748	ba,pt	%xcc, sys_trap
1749	  sub	%g0, 1, %g4
1750
1751/*
1752 * Register Inputs:
1753 *	%g5		user trap handler
1754 *	%g7		misaligned addr - for alignment traps only
1755 */
1756.setup_utrap:
1757	set	trap, %g1			! setup in case we go
1758	mov	T_FLUSH_PCB, %g3		! through sys_trap on
1759	sub	%g0, 1, %g4			! the save instruction below
1760
1761	/*
1762	 * If the DTrace pid provider is single stepping a copied-out
1763	 * instruction, t->t_dtrace_step will be set. In that case we need
1764	 * to abort the single-stepping (since execution of the instruction
1765	 * was interrupted) and use the value of t->t_dtrace_npc as the %npc.
1766	 */
1767	save	%sp, -SA(MINFRAME32), %sp	! window for trap handler
1768	CPU_ADDR(%g1, %g4)			! load CPU struct addr
1769	ldn	[%g1 + CPU_THREAD], %g1		! load thread pointer
1770	ldub	[%g1 + T_DTRACE_STEP], %g2	! load t->t_dtrace_step
1771	rdpr	%tnpc, %l2			! arg1 == tnpc
1772	brz,pt	%g2, 1f
1773	rdpr	%tpc, %l1			! arg0 == tpc
1774
1775	ldub	[%g1 + T_DTRACE_AST], %g2	! load t->t_dtrace_ast
1776	ldn	[%g1 + T_DTRACE_NPC], %l2	! arg1 = t->t_dtrace_npc (step)
1777	brz,pt	%g2, 1f
1778	st	%g0, [%g1 + T_DTRACE_FT]	! zero all pid provider flags
1779	stub	%g2, [%g1 + T_ASTFLAG]		! aston(t) if t->t_dtrace_ast
17801:
1781	mov	%g7, %l3			! arg2 == misaligned address
1782
1783	rdpr	%tstate, %g1			! cwp for trap handler
1784	rdpr	%cwp, %g4
1785	bclr	TSTATE_CWP_MASK, %g1
1786	wrpr	%g1, %g4, %tstate
1787	wrpr	%g0, %g5, %tnpc			! trap handler address
1788	FAST_TRAP_DONE
1789	/* NOTREACHED */
1790
1791.check_v9utrap:
1792	rdpr	%tstate, %g1
1793	btst	TSTATE_PRIV, %g1
1794	bnz,a,pn %icc, 3f
1795	  nop
1796	CPU_ADDR(%g4, %g1)				! load CPU struct addr
1797	ldn	[%g4 + CPU_THREAD], %g5			! load thread pointer
1798	ldn	[%g5 + T_PROCP], %g5			! load proc pointer
1799	ldn	[%g5 + P_UTRAPS], %g5			! are there utraps?
1800
1801	cmp	%g3, T_SOFTWARE_TRAP
1802	bne,a,pt %icc, 1f
1803	  nop
1804
1805	brz,pt %g5, 3f			! if p_utraps == NULL goto trap()
1806	  rdpr	%tt, %g3		! delay - get actual hw trap type
1807
1808	sub	%g3, 254, %g1		! UT_TRAP_INSTRUCTION_16 = p_utraps[18]
1809	ba,pt	%icc, 2f
1810	  smul	%g1, CPTRSIZE, %g2
18111:
1812	brz,a,pt %g5, 3f		! if p_utraps == NULL goto trap()
1813	  nop
1814
1815	cmp	%g3, T_UNIMP_INSTR
1816	bne,a,pt %icc, 2f
1817	  nop
1818
1819	mov	1, %g1
1820	st	%g1, [%g4 + CPU_TL1_HDLR] ! set CPU_TL1_HDLR
1821	rdpr	%tpc, %g1		! ld trapping instruction using
1822	lduwa	[%g1]ASI_AIUP, %g1	! "AS IF USER" ASI which could fault
1823	st	%g0, [%g4 + CPU_TL1_HDLR] ! clr CPU_TL1_HDLR
1824
1825	sethi	%hi(0xc1c00000), %g4	! setup mask for illtrap instruction
1826	andcc	%g1, %g4, %g4		! and instruction with mask
1827	bnz,a,pt %icc, 3f		! if %g4 == zero, %g1 is an ILLTRAP
1828	  nop				! fall thru to setup
18292:
1830	ldn	[%g5 + %g2], %g5
1831	brnz,a,pt %g5, .setup_v9utrap
1832	  nop
18333:
1834	set	trap, %g1
1835	ba,pt	%xcc, sys_trap
1836	  sub	%g0, 1, %g4
1837	/* NOTREACHED */
1838
1839/*
1840 * Register Inputs:
1841 *	%g5		user trap handler
1842 */
1843.setup_v9utrap:
1844	set	trap, %g1			! setup in case we go
1845	mov	T_FLUSH_PCB, %g3		! through sys_trap on
1846	sub	%g0, 1, %g4			! the save instruction below
1847
1848	/*
1849	 * If the DTrace pid provider is single stepping a copied-out
1850	 * instruction, t->t_dtrace_step will be set. In that case we need
1851	 * to abort the single-stepping (since execution of the instruction
1852	 * was interrupted) and use the value of t->t_dtrace_npc as the %npc.
1853	 */
1854	save	%sp, -SA(MINFRAME64), %sp	! window for trap handler
1855	CPU_ADDR(%g1, %g4)			! load CPU struct addr
1856	ldn	[%g1 + CPU_THREAD], %g1		! load thread pointer
1857	ldub	[%g1 + T_DTRACE_STEP], %g2	! load t->t_dtrace_step
1858	rdpr	%tnpc, %l7			! arg1 == tnpc
1859	brz,pt	%g2, 1f
1860	rdpr	%tpc, %l6			! arg0 == tpc
1861
1862	ldub	[%g1 + T_DTRACE_AST], %g2	! load t->t_dtrace_ast
1863	ldn	[%g1 + T_DTRACE_NPC], %l7	! arg1 == t->t_dtrace_npc (step)
1864	brz,pt	%g2, 1f
1865	st	%g0, [%g1 + T_DTRACE_FT]	! zero all pid provider flags
1866	stub	%g2, [%g1 + T_ASTFLAG]		! aston(t) if t->t_dtrace_ast
18671:
1868	rdpr	%tstate, %g2			! cwp for trap handler
1869	rdpr	%cwp, %g4
1870	bclr	TSTATE_CWP_MASK, %g2
1871	wrpr	%g2, %g4, %tstate
1872
1873	ldn	[%g1 + T_PROCP], %g4		! load proc pointer
1874	ldn	[%g4 + P_AS], %g4		! load as pointer
1875	ldn	[%g4 + A_USERLIMIT], %g4	! load as userlimit
1876	cmp	%l7, %g4			! check for single-step set
1877	bne,pt	%xcc, 4f
1878	  nop
1879	ldn	[%g1 + T_LWP], %g1		! load klwp pointer
1880	ld	[%g1 + PCB_STEP], %g4		! load single-step flag
1881	cmp	%g4, STEP_ACTIVE		! step flags set in pcb?
1882	bne,pt	%icc, 4f
1883	  nop
1884	stn	%g5, [%g1 + PCB_TRACEPC]	! save trap handler addr in pcb
1885	mov	%l7, %g4			! on entry to precise user trap
1886	add	%l6, 4, %l7			! handler, %l6 == pc, %l7 == npc
1887						! at time of trap
1888	wrpr	%g0, %g4, %tnpc			! generate FLTBOUNDS,
1889						! %g4 == userlimit
1890	FAST_TRAP_DONE
1891	/* NOTREACHED */
18924:
1893	wrpr	%g0, %g5, %tnpc			! trap handler address
1894	FAST_TRAP_DONE_CHK_INTR
1895	/* NOTREACHED */
1896
1897.fp_exception:
1898	CPU_ADDR(%g1, %g4)
1899	stx	%fsr, [%g1 + CPU_TMP1]
1900	ldx	[%g1 + CPU_TMP1], %g2
1901
1902	/*
1903	 * Cheetah takes unfinished_FPop trap for certain range of operands
1904	 * to the "fitos" instruction. Instead of going through the slow
1905	 * software emulation path, we try to simulate the "fitos" instruction
1906	 * via "fitod" and "fdtos" provided the following conditions are met:
1907	 *
1908	 *	fpu_exists is set (if DEBUG)
1909	 *	not in privileged mode
1910	 *	ftt is unfinished_FPop
1911	 *	NXM IEEE trap is not enabled
1912	 *	instruction at %tpc is "fitos"
1913	 *
1914	 *  Usage:
1915	 *	%g1	per cpu address
1916	 *	%g2	%fsr
1917	 *	%g6	user instruction
1918	 *
1919	 * Note that we can take a memory access related trap while trying
1920	 * to fetch the user instruction. Therefore, we set CPU_TL1_HDLR
1921	 * flag to catch those traps and let the SFMMU code deal with page
1922	 * fault and data access exception.
1923	 */
1924#if defined(DEBUG) || defined(NEED_FPU_EXISTS)
1925	sethi	%hi(fpu_exists), %g7
1926	ld	[%g7 + %lo(fpu_exists)], %g7
1927	brz,pn %g7, .fp_exception_cont
1928	  nop
1929#endif
1930	rdpr	%tstate, %g7			! branch if in privileged mode
1931	btst	TSTATE_PRIV, %g7
1932	bnz,pn	%xcc, .fp_exception_cont
1933	srl	%g2, FSR_FTT_SHIFT, %g7		! extract ftt from %fsr
1934	and	%g7, (FSR_FTT>>FSR_FTT_SHIFT), %g7
1935	cmp	%g7, FTT_UNFIN
1936	set	FSR_TEM_NX, %g5
1937	bne,pn	%xcc, .fp_exception_cont	! branch if NOT unfinished_FPop
1938	  andcc	%g2, %g5, %g0
1939	bne,pn	%xcc, .fp_exception_cont	! branch if FSR_TEM_NX enabled
1940	  rdpr	%tpc, %g5			! get faulting PC
1941
1942	or	%g0, 1, %g7
1943	st	%g7, [%g1 + CPU_TL1_HDLR]	! set tl1_hdlr flag
1944	lda	[%g5]ASI_USER, %g6		! get user's instruction
1945	st	%g0, [%g1 + CPU_TL1_HDLR]	! clear tl1_hdlr flag
1946
1947	set	FITOS_INSTR_MASK, %g7
1948	and	%g6, %g7, %g7
1949	set	FITOS_INSTR, %g5
1950	cmp	%g7, %g5
1951	bne,pn	%xcc, .fp_exception_cont	! branch if not FITOS_INSTR
1952	 nop
1953
1954	/*
1955	 * This is unfinished FPops trap for "fitos" instruction. We
1956	 * need to simulate "fitos" via "fitod" and "fdtos" instruction
1957	 * sequence.
1958	 *
1959	 * We need a temporary FP register to do the conversion. Since
1960	 * both source and destination operands for the "fitos" instruction
1961	 * have to be within %f0-%f31, we use an FP register from the upper
1962	 * half to guarantee that it won't collide with the source or the
1963	 * dest operand. However, we do have to save and restore its value.
1964	 *
1965	 * We use %d62 as a temporary FP register for the conversion and
1966	 * branch to appropriate instruction within the conversion tables
1967	 * based upon the rs2 and rd values.
1968	 */
1969
1970	std	%d62, [%g1 + CPU_TMP1]		! save original value
1971
1972	srl	%g6, FITOS_RS2_SHIFT, %g7
1973	and	%g7, FITOS_REG_MASK, %g7
1974	set	_fitos_fitod_table, %g4
1975	sllx	%g7, 2, %g7
1976	jmp	%g4 + %g7
1977	  ba,pt	%xcc, _fitos_fitod_done
1978	.empty
1979
1980_fitos_fitod_table:
1981	  fitod	%f0, %d62
1982	  fitod	%f1, %d62
1983	  fitod	%f2, %d62
1984	  fitod	%f3, %d62
1985	  fitod	%f4, %d62
1986	  fitod	%f5, %d62
1987	  fitod	%f6, %d62
1988	  fitod	%f7, %d62
1989	  fitod	%f8, %d62
1990	  fitod	%f9, %d62
1991	  fitod	%f10, %d62
1992	  fitod	%f11, %d62
1993	  fitod	%f12, %d62
1994	  fitod	%f13, %d62
1995	  fitod	%f14, %d62
1996	  fitod	%f15, %d62
1997	  fitod	%f16, %d62
1998	  fitod	%f17, %d62
1999	  fitod	%f18, %d62
2000	  fitod	%f19, %d62
2001	  fitod	%f20, %d62
2002	  fitod	%f21, %d62
2003	  fitod	%f22, %d62
2004	  fitod	%f23, %d62
2005	  fitod	%f24, %d62
2006	  fitod	%f25, %d62
2007	  fitod	%f26, %d62
2008	  fitod	%f27, %d62
2009	  fitod	%f28, %d62
2010	  fitod	%f29, %d62
2011	  fitod	%f30, %d62
2012	  fitod	%f31, %d62
2013_fitos_fitod_done:
2014
2015	/*
2016	 * Now convert data back into single precision
2017	 */
2018	srl	%g6, FITOS_RD_SHIFT, %g7
2019	and	%g7, FITOS_REG_MASK, %g7
2020	set	_fitos_fdtos_table, %g4
2021	sllx	%g7, 2, %g7
2022	jmp	%g4 + %g7
2023	  ba,pt	%xcc, _fitos_fdtos_done
2024	.empty
2025
2026_fitos_fdtos_table:
2027	  fdtos	%d62, %f0
2028	  fdtos	%d62, %f1
2029	  fdtos	%d62, %f2
2030	  fdtos	%d62, %f3
2031	  fdtos	%d62, %f4
2032	  fdtos	%d62, %f5
2033	  fdtos	%d62, %f6
2034	  fdtos	%d62, %f7
2035	  fdtos	%d62, %f8
2036	  fdtos	%d62, %f9
2037	  fdtos	%d62, %f10
2038	  fdtos	%d62, %f11
2039	  fdtos	%d62, %f12
2040	  fdtos	%d62, %f13
2041	  fdtos	%d62, %f14
2042	  fdtos	%d62, %f15
2043	  fdtos	%d62, %f16
2044	  fdtos	%d62, %f17
2045	  fdtos	%d62, %f18
2046	  fdtos	%d62, %f19
2047	  fdtos	%d62, %f20
2048	  fdtos	%d62, %f21
2049	  fdtos	%d62, %f22
2050	  fdtos	%d62, %f23
2051	  fdtos	%d62, %f24
2052	  fdtos	%d62, %f25
2053	  fdtos	%d62, %f26
2054	  fdtos	%d62, %f27
2055	  fdtos	%d62, %f28
2056	  fdtos	%d62, %f29
2057	  fdtos	%d62, %f30
2058	  fdtos	%d62, %f31
2059_fitos_fdtos_done:
2060
2061	ldd	[%g1 + CPU_TMP1], %d62		! restore %d62
2062
2063#if DEBUG
2064	/*
2065	 * Update FPop_unfinished trap kstat
2066	 */
2067	set	fpustat+FPUSTAT_UNFIN_KSTAT, %g7
2068	ldx	[%g7], %g5
20691:
2070	add	%g5, 1, %g6
2071
2072	casxa	[%g7] ASI_N, %g5, %g6
2073	cmp	%g5, %g6
2074	bne,a,pn %xcc, 1b
2075	  or	%g0, %g6, %g5
2076
2077	/*
2078	 * Update fpu_sim_fitos kstat
2079	 */
2080	set	fpuinfo+FPUINFO_FITOS_KSTAT, %g7
2081	ldx	[%g7], %g5
20821:
2083	add	%g5, 1, %g6
2084
2085	casxa	[%g7] ASI_N, %g5, %g6
2086	cmp	%g5, %g6
2087	bne,a,pn %xcc, 1b
2088	  or	%g0, %g6, %g5
2089#endif /* DEBUG */
2090
2091	FAST_TRAP_DONE
2092
2093.fp_exception_cont:
2094	/*
2095	 * Let _fp_exception deal with simulating FPop instruction.
2096	 * Note that we need to pass %fsr in %g2 (already read above).
2097	 */
2098
2099	set	_fp_exception, %g1
2100	ba,pt	%xcc, sys_trap
2101	sub	%g0, 1, %g4
2102
2103.clean_windows:
2104	set	trap, %g1
2105	mov	T_FLUSH_PCB, %g3
2106	sub	%g0, 1, %g4
2107	save
2108	flushw
2109	restore
2110	wrpr	%g0, %g0, %cleanwin	! no clean windows
2111
2112	CPU_ADDR(%g4, %g5)
2113	ldn	[%g4 + CPU_MPCB], %g4
2114	brz,a,pn %g4, 1f
2115	  nop
2116	ld	[%g4 + MPCB_WSTATE], %g5
2117	add	%g5, WSTATE_CLEAN_OFFSET, %g5
2118	wrpr	%g0, %g5, %wstate
21191:	FAST_TRAP_DONE
2120
2121/*
2122 * .spill_clean: clean the previous window, restore the wstate, and
2123 * "done".
2124 *
2125 * Entry: %g7 contains new wstate
2126 */
2127.spill_clean:
2128	sethi	%hi(nwin_minus_one), %g5
2129	ld	[%g5 + %lo(nwin_minus_one)], %g5 ! %g5 = nwin - 1
2130	rdpr	%cwp, %g6			! %g6 = %cwp
2131	deccc	%g6				! %g6--
2132	movneg	%xcc, %g5, %g6			! if (%g6<0) %g6 = nwin-1
2133	wrpr	%g6, %cwp
2134	TT_TRACE_L(trace_win)
2135	clr	%l0
2136	clr	%l1
2137	clr	%l2
2138	clr	%l3
2139	clr	%l4
2140	clr	%l5
2141	clr	%l6
2142	clr	%l7
2143	wrpr	%g0, %g7, %wstate
2144	saved
2145	retry			! restores correct %cwp
2146
2147.fix_alignment:
2148	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2149	ldn	[%g1 + CPU_THREAD], %g1	! load thread pointer
2150	ldn	[%g1 + T_PROCP], %g1
2151	mov	1, %g2
2152	stb	%g2, [%g1 + P_FIXALIGNMENT]
2153	FAST_TRAP_DONE
2154
2155#define	STDF_REG(REG, ADDR, TMP)		\
2156	sll	REG, 3, REG			;\
2157mark1:	set	start1, TMP			;\
2158	jmp	REG + TMP			;\
2159	  nop					;\
2160start1:	ba,pt	%xcc, done1			;\
2161	  std	%f0, [ADDR + CPU_TMP1]		;\
2162	ba,pt	%xcc, done1			;\
2163	  std	%f32, [ADDR + CPU_TMP1]		;\
2164	ba,pt	%xcc, done1			;\
2165	  std	%f2, [ADDR + CPU_TMP1]		;\
2166	ba,pt	%xcc, done1			;\
2167	  std	%f34, [ADDR + CPU_TMP1]		;\
2168	ba,pt	%xcc, done1			;\
2169	  std	%f4, [ADDR + CPU_TMP1]		;\
2170	ba,pt	%xcc, done1			;\
2171	  std	%f36, [ADDR + CPU_TMP1]		;\
2172	ba,pt	%xcc, done1			;\
2173	  std	%f6, [ADDR + CPU_TMP1]		;\
2174	ba,pt	%xcc, done1			;\
2175	  std	%f38, [ADDR + CPU_TMP1]		;\
2176	ba,pt	%xcc, done1			;\
2177	  std	%f8, [ADDR + CPU_TMP1]		;\
2178	ba,pt	%xcc, done1			;\
2179	  std	%f40, [ADDR + CPU_TMP1]		;\
2180	ba,pt	%xcc, done1			;\
2181	  std	%f10, [ADDR + CPU_TMP1]		;\
2182	ba,pt	%xcc, done1			;\
2183	  std	%f42, [ADDR + CPU_TMP1]		;\
2184	ba,pt	%xcc, done1			;\
2185	  std	%f12, [ADDR + CPU_TMP1]		;\
2186	ba,pt	%xcc, done1			;\
2187	  std	%f44, [ADDR + CPU_TMP1]		;\
2188	ba,pt	%xcc, done1			;\
2189	  std	%f14, [ADDR + CPU_TMP1]		;\
2190	ba,pt	%xcc, done1			;\
2191	  std	%f46, [ADDR + CPU_TMP1]		;\
2192	ba,pt	%xcc, done1			;\
2193	  std	%f16, [ADDR + CPU_TMP1]		;\
2194	ba,pt	%xcc, done1			;\
2195	  std	%f48, [ADDR + CPU_TMP1]		;\
2196	ba,pt	%xcc, done1			;\
2197	  std	%f18, [ADDR + CPU_TMP1]		;\
2198	ba,pt	%xcc, done1			;\
2199	  std	%f50, [ADDR + CPU_TMP1]		;\
2200	ba,pt	%xcc, done1			;\
2201	  std	%f20, [ADDR + CPU_TMP1]		;\
2202	ba,pt	%xcc, done1			;\
2203	  std	%f52, [ADDR + CPU_TMP1]		;\
2204	ba,pt	%xcc, done1			;\
2205	  std	%f22, [ADDR + CPU_TMP1]		;\
2206	ba,pt	%xcc, done1			;\
2207	  std	%f54, [ADDR + CPU_TMP1]		;\
2208	ba,pt	%xcc, done1			;\
2209	  std	%f24, [ADDR + CPU_TMP1]		;\
2210	ba,pt	%xcc, done1			;\
2211	  std	%f56, [ADDR + CPU_TMP1]		;\
2212	ba,pt	%xcc, done1			;\
2213	  std	%f26, [ADDR + CPU_TMP1]		;\
2214	ba,pt	%xcc, done1			;\
2215	  std	%f58, [ADDR + CPU_TMP1]		;\
2216	ba,pt	%xcc, done1			;\
2217	  std	%f28, [ADDR + CPU_TMP1]		;\
2218	ba,pt	%xcc, done1			;\
2219	  std	%f60, [ADDR + CPU_TMP1]		;\
2220	ba,pt	%xcc, done1			;\
2221	  std	%f30, [ADDR + CPU_TMP1]		;\
2222	ba,pt	%xcc, done1			;\
2223	  std	%f62, [ADDR + CPU_TMP1]		;\
2224done1:
2225
2226#define	LDDF_REG(REG, ADDR, TMP)		\
2227	sll	REG, 3, REG			;\
2228mark2:	set	start2, TMP			;\
2229	jmp	REG + TMP			;\
2230	  nop					;\
2231start2:	ba,pt	%xcc, done2			;\
2232	  ldd	[ADDR + CPU_TMP1], %f0		;\
2233	ba,pt	%xcc, done2			;\
2234	  ldd	[ADDR + CPU_TMP1], %f32		;\
2235	ba,pt	%xcc, done2			;\
2236	  ldd	[ADDR + CPU_TMP1], %f2		;\
2237	ba,pt	%xcc, done2			;\
2238	  ldd	[ADDR + CPU_TMP1], %f34		;\
2239	ba,pt	%xcc, done2			;\
2240	  ldd	[ADDR + CPU_TMP1], %f4		;\
2241	ba,pt	%xcc, done2			;\
2242	  ldd	[ADDR + CPU_TMP1], %f36		;\
2243	ba,pt	%xcc, done2			;\
2244	  ldd	[ADDR + CPU_TMP1], %f6		;\
2245	ba,pt	%xcc, done2			;\
2246	  ldd	[ADDR + CPU_TMP1], %f38		;\
2247	ba,pt	%xcc, done2			;\
2248	  ldd	[ADDR + CPU_TMP1], %f8		;\
2249	ba,pt	%xcc, done2			;\
2250	  ldd	[ADDR + CPU_TMP1], %f40		;\
2251	ba,pt	%xcc, done2			;\
2252	  ldd	[ADDR + CPU_TMP1], %f10		;\
2253	ba,pt	%xcc, done2			;\
2254	  ldd	[ADDR + CPU_TMP1], %f42		;\
2255	ba,pt	%xcc, done2			;\
2256	  ldd	[ADDR + CPU_TMP1], %f12		;\
2257	ba,pt	%xcc, done2			;\
2258	  ldd	[ADDR + CPU_TMP1], %f44		;\
2259	ba,pt	%xcc, done2			;\
2260	  ldd	[ADDR + CPU_TMP1], %f14		;\
2261	ba,pt	%xcc, done2			;\
2262	  ldd	[ADDR + CPU_TMP1], %f46		;\
2263	ba,pt	%xcc, done2			;\
2264	  ldd	[ADDR + CPU_TMP1], %f16		;\
2265	ba,pt	%xcc, done2			;\
2266	  ldd	[ADDR + CPU_TMP1], %f48		;\
2267	ba,pt	%xcc, done2			;\
2268	  ldd	[ADDR + CPU_TMP1], %f18		;\
2269	ba,pt	%xcc, done2			;\
2270	  ldd	[ADDR + CPU_TMP1], %f50		;\
2271	ba,pt	%xcc, done2			;\
2272	  ldd	[ADDR + CPU_TMP1], %f20		;\
2273	ba,pt	%xcc, done2			;\
2274	  ldd	[ADDR + CPU_TMP1], %f52		;\
2275	ba,pt	%xcc, done2			;\
2276	  ldd	[ADDR + CPU_TMP1], %f22		;\
2277	ba,pt	%xcc, done2			;\
2278	  ldd	[ADDR + CPU_TMP1], %f54		;\
2279	ba,pt	%xcc, done2			;\
2280	  ldd	[ADDR + CPU_TMP1], %f24		;\
2281	ba,pt	%xcc, done2			;\
2282	  ldd	[ADDR + CPU_TMP1], %f56		;\
2283	ba,pt	%xcc, done2			;\
2284	  ldd	[ADDR + CPU_TMP1], %f26		;\
2285	ba,pt	%xcc, done2			;\
2286	  ldd	[ADDR + CPU_TMP1], %f58		;\
2287	ba,pt	%xcc, done2			;\
2288	  ldd	[ADDR + CPU_TMP1], %f28		;\
2289	ba,pt	%xcc, done2			;\
2290	  ldd	[ADDR + CPU_TMP1], %f60		;\
2291	ba,pt	%xcc, done2			;\
2292	  ldd	[ADDR + CPU_TMP1], %f30		;\
2293	ba,pt	%xcc, done2			;\
2294	  ldd	[ADDR + CPU_TMP1], %f62		;\
2295done2:
2296
2297.lddf_exception_not_aligned:
2298	/*
2299	 * Cheetah overwrites SFAR on a DTLB miss, hence read it now.
2300	 */
2301	ldxa	[MMU_SFAR]%asi, %g5	! misaligned vaddr in %g5
2302
2303#if defined(DEBUG) || defined(NEED_FPU_EXISTS)
2304	sethi	%hi(fpu_exists), %g2		! check fpu_exists
2305	ld	[%g2 + %lo(fpu_exists)], %g2
2306	brz,a,pn %g2, 4f
2307	  nop
2308#endif
2309	CPU_ADDR(%g1, %g4)
2310	or	%g0, 1, %g4
2311	st	%g4, [%g1 + CPU_TL1_HDLR] ! set tl1_hdlr flag
2312
2313	rdpr	%tpc, %g2
2314	lda	[%g2]ASI_AIUP, %g6	! get the user's lddf instruction
2315	srl	%g6, 23, %g1		! using ldda or not?
2316	and	%g1, 1, %g1
2317	brz,a,pt %g1, 2f		! check for ldda instruction
2318	  nop
2319	srl	%g6, 13, %g1		! check immflag
2320	and	%g1, 1, %g1
2321	rdpr	%tstate, %g2		! %tstate in %g2
2322	brnz,a,pn %g1, 1f
2323	  srl	%g2, 31, %g1		! get asi from %tstate
2324	srl	%g6, 5, %g1		! get asi from instruction
2325	and	%g1, 0xFF, %g1		! imm_asi field
23261:
2327	cmp	%g1, ASI_P		! primary address space
2328	be,a,pt %icc, 2f
2329	  nop
2330	cmp	%g1, ASI_PNF		! primary no fault address space
2331	be,a,pt %icc, 2f
2332	  nop
2333	cmp	%g1, ASI_S		! secondary address space
2334	be,a,pt %icc, 2f
2335	  nop
2336	cmp	%g1, ASI_SNF		! secondary no fault address space
2337	bne,a,pn %icc, 3f
2338	  nop
23392:
2340	lduwa	[%g5]ASI_USER, %g7	! get first half of misaligned data
2341	add	%g5, 4, %g5		! increment misaligned data address
2342	lduwa	[%g5]ASI_USER, %g5	! get second half of misaligned data
2343
2344	sllx	%g7, 32, %g7
2345	or	%g5, %g7, %g5		! combine data
2346	CPU_ADDR(%g7, %g1)		! save data on a per-cpu basis
2347	stx	%g5, [%g7 + CPU_TMP1]	! save in cpu_tmp1
2348
2349	srl	%g6, 25, %g3		! %g6 has the instruction
2350	and	%g3, 0x1F, %g3		! %g3 has rd
2351	LDDF_REG(%g3, %g7, %g4)
2352
2353	CPU_ADDR(%g1, %g4)
2354	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
2355	FAST_TRAP_DONE
23563:
2357	CPU_ADDR(%g1, %g4)
2358	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
23594:
2360	set	T_USER, %g3		! trap type in %g3
2361	or	%g3, T_LDDF_ALIGN, %g3
2362	mov	%g5, %g2		! misaligned vaddr in %g2
2363	set	fpu_trap, %g1		! goto C for the little and
2364	ba,pt	%xcc, sys_trap		! no fault little asi's
2365	  sub	%g0, 1, %g4
2366
2367.stdf_exception_not_aligned:
2368	/*
2369	 * Cheetah overwrites SFAR on a DTLB miss, hence read it now.
2370	 */
2371	ldxa	[MMU_SFAR]%asi, %g5	! misaligned vaddr in %g5
2372
2373#if defined(DEBUG) || defined(NEED_FPU_EXISTS)
2374	sethi	%hi(fpu_exists), %g7		! check fpu_exists
2375	ld	[%g7 + %lo(fpu_exists)], %g3
2376	brz,a,pn %g3, 4f
2377	  nop
2378#endif
2379	CPU_ADDR(%g1, %g4)
2380	or	%g0, 1, %g4
2381	st	%g4, [%g1 + CPU_TL1_HDLR] ! set tl1_hdlr flag
2382
2383	rdpr	%tpc, %g2
2384	lda	[%g2]ASI_AIUP, %g6	! get the user's stdf instruction
2385
2386	srl	%g6, 23, %g1		! using stda or not?
2387	and	%g1, 1, %g1
2388	brz,a,pt %g1, 2f		! check for stda instruction
2389	  nop
2390	srl	%g6, 13, %g1		! check immflag
2391	and	%g1, 1, %g1
2392	rdpr	%tstate, %g2		! %tstate in %g2
2393	brnz,a,pn %g1, 1f
2394	  srl	%g2, 31, %g1		! get asi from %tstate
2395	srl	%g6, 5, %g1		! get asi from instruction
2396	and	%g1, 0xFF, %g1		! imm_asi field
23971:
2398	cmp	%g1, ASI_P		! primary address space
2399	be,a,pt %icc, 2f
2400	  nop
2401	cmp	%g1, ASI_S		! secondary address space
2402	bne,a,pn %icc, 3f
2403	  nop
24042:
2405	srl	%g6, 25, %g6
2406	and	%g6, 0x1F, %g6		! %g6 has rd
2407	CPU_ADDR(%g7, %g1)
2408	STDF_REG(%g6, %g7, %g4)		! STDF_REG(REG, ADDR, TMP)
2409
2410	ldx	[%g7 + CPU_TMP1], %g6
2411	srlx	%g6, 32, %g7
2412	stuwa	%g7, [%g5]ASI_USER	! first half
2413	add	%g5, 4, %g5		! increment misaligned data address
2414	stuwa	%g6, [%g5]ASI_USER	! second half
2415
2416	CPU_ADDR(%g1, %g4)
2417	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
2418	FAST_TRAP_DONE
24193:
2420	CPU_ADDR(%g1, %g4)
2421	st	%g0, [%g1 + CPU_TL1_HDLR] ! clear tl1_hdlr flag
24224:
2423	set	T_USER, %g3		! trap type in %g3
2424	or	%g3, T_STDF_ALIGN, %g3
2425	mov	%g5, %g2		! misaligned vaddr in %g2
2426	set	fpu_trap, %g1		! goto C for the little and
2427	ba,pt	%xcc, sys_trap		! nofault little asi's
2428	  sub	%g0, 1, %g4
2429
2430#ifdef DEBUG_USER_TRAPTRACECTL
2431
2432.traptrace_freeze:
2433	mov	%l0, %g1 ; mov	%l1, %g2 ; mov	%l2, %g3 ; mov	%l4, %g4
2434	TT_TRACE_L(trace_win)
2435	mov	%g4, %l4 ; mov	%g3, %l2 ; mov	%g2, %l1 ; mov	%g1, %l0
2436	set	trap_freeze, %g1
2437	mov	1, %g2
2438	st	%g2, [%g1]
2439	FAST_TRAP_DONE
2440
2441.traptrace_unfreeze:
2442	set	trap_freeze, %g1
2443	st	%g0, [%g1]
2444	mov	%l0, %g1 ; mov	%l1, %g2 ; mov	%l2, %g3 ; mov	%l4, %g4
2445	TT_TRACE_L(trace_win)
2446	mov	%g4, %l4 ; mov	%g3, %l2 ; mov	%g2, %l1 ; mov	%g1, %l0
2447	FAST_TRAP_DONE
2448
2449#endif /* DEBUG_USER_TRAPTRACECTL */
2450
2451.getcc:
2452	CPU_ADDR(%g1, %g2)
2453	stx	%o0, [%g1 + CPU_TMP1]		! save %o0
2454	stx	%o1, [%g1 + CPU_TMP2]		! save %o1
2455	rdpr	%tstate, %g3			! get tstate
2456	srlx	%g3, PSR_TSTATE_CC_SHIFT, %o0	! shift ccr to V8 psr
2457	set	PSR_ICC, %g2
2458	and	%o0, %g2, %o0			! mask out the rest
2459	srl	%o0, PSR_ICC_SHIFT, %o0		! right justify
2460	rdpr	%pstate, %o1
2461	wrpr	%o1, PSTATE_AG, %pstate		! get into normal globals
2462	mov	%o0, %g1			! move ccr to normal %g1
2463	wrpr	%g0, %o1, %pstate		! back into alternate globals
2464	ldx	[%g1 + CPU_TMP1], %o0		! restore %o0
2465	ldx	[%g1 + CPU_TMP2], %o1		! restore %o1
2466	FAST_TRAP_DONE
2467
2468.setcc:
2469	CPU_ADDR(%g1, %g2)
2470	stx	%o0, [%g1 + CPU_TMP1]		! save %o0
2471	stx	%o1, [%g1 + CPU_TMP2]		! save %o1
2472	rdpr	%pstate, %o0
2473	wrpr	%o0, PSTATE_AG, %pstate		! get into normal globals
2474	mov	%g1, %o1
2475	wrpr	%g0, %o0, %pstate		! back to alternates
2476	sll	%o1, PSR_ICC_SHIFT, %g2
2477	set	PSR_ICC, %g3
2478	and	%g2, %g3, %g2			! mask out rest
2479	sllx	%g2, PSR_TSTATE_CC_SHIFT, %g2
2480	rdpr	%tstate, %g3			! get tstate
2481	srl	%g3, 0, %g3			! clear upper word
2482	or	%g3, %g2, %g3			! or in new bits
2483	wrpr	%g3, %tstate
2484	ldx	[%g1 + CPU_TMP1], %o0		! restore %o0
2485	ldx	[%g1 + CPU_TMP2], %o1		! restore %o1
2486	FAST_TRAP_DONE
2487
2488/*
2489 * getpsr(void)
2490 * Note that the xcc part of the ccr is not provided.
2491 * The V8 code shows why the V9 trap is not faster:
2492 * #define GETPSR_TRAP() \
2493 *      mov %psr, %i0; jmp %l2; rett %l2+4; nop;
2494 */
2495
2496	.type	.getpsr, #function
2497.getpsr:
2498	rdpr	%tstate, %g1			! get tstate
2499	srlx	%g1, PSR_TSTATE_CC_SHIFT, %o0	! shift ccr to V8 psr
2500	set	PSR_ICC, %g2
2501	and	%o0, %g2, %o0			! mask out the rest
2502
2503	rd	%fprs, %g1			! get fprs
2504	and	%g1, FPRS_FEF, %g2		! mask out dirty upper/lower
2505	sllx	%g2, PSR_FPRS_FEF_SHIFT, %g2	! shift fef to V8 psr.ef
2506	or	%o0, %g2, %o0			! or result into psr.ef
2507
2508	set	V9_PSR_IMPLVER, %g2		! SI assigned impl/ver: 0xef
2509	or	%o0, %g2, %o0			! or psr.impl/ver
2510	FAST_TRAP_DONE
2511	SET_SIZE(.getpsr)
2512
2513/*
2514 * setpsr(newpsr)
2515 * Note that there is no support for ccr.xcc in the V9 code.
2516 */
2517
2518	.type	.setpsr, #function
2519.setpsr:
2520	rdpr	%tstate, %g1			! get tstate
2521!	setx	TSTATE_V8_UBITS, %g2
2522	or 	%g0, CCR_ICC, %g3
2523	sllx	%g3, TSTATE_CCR_SHIFT, %g2
2524
2525	andn	%g1, %g2, %g1			! zero current user bits
2526	set	PSR_ICC, %g2
2527	and	%g2, %o0, %g2			! clear all but psr.icc bits
2528	sllx	%g2, PSR_TSTATE_CC_SHIFT, %g3	! shift to tstate.ccr.icc
2529	wrpr	%g1, %g3, %tstate		! write tstate
2530
2531	set	PSR_EF, %g2
2532	and	%g2, %o0, %g2			! clear all but fp enable bit
2533	srlx	%g2, PSR_FPRS_FEF_SHIFT, %g4	! shift ef to V9 fprs.fef
2534	wr	%g0, %g4, %fprs			! write fprs
2535
2536	CPU_ADDR(%g1, %g2)			! load CPU struct addr to %g1
2537	ldn	[%g1 + CPU_THREAD], %g2		! load thread pointer
2538	ldn	[%g2 + T_LWP], %g3		! load klwp pointer
2539	ldn	[%g3 + LWP_FPU], %g2		! get lwp_fpu pointer
2540	stuw	%g4, [%g2 + FPU_FPRS]		! write fef value to fpu_fprs
2541	srlx	%g4, 2, %g4			! shift fef value to bit 0
2542	stub	%g4, [%g2 + FPU_EN]		! write fef value to fpu_en
2543	FAST_TRAP_DONE
2544	SET_SIZE(.setpsr)
2545
2546/*
2547 * getlgrp
2548 * get home lgrpid on which the calling thread is currently executing.
2549 */
2550	.type	.getlgrp, #function
2551.getlgrp:
2552	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2553	ld	[%g1 + CPU_ID], %o0	! load cpu_id
2554	ldn	[%g1 + CPU_THREAD], %g2	! load thread pointer
2555	ldn	[%g2 + T_LPL], %g2	! load lpl pointer
2556	ld	[%g2 + LPL_LGRPID], %g1	! load lpl_lgrpid
2557	sra	%g1, 0, %o1
2558	FAST_TRAP_DONE
2559	SET_SIZE(.getlgrp)
2560
2561/*
2562 * Entry for old 4.x trap (trap 0).
2563 */
2564	ENTRY_NP(syscall_trap_4x)
2565	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2566	ldn	[%g1 + CPU_THREAD], %g2	! load thread pointer
2567	ldn	[%g2 + T_LWP], %g2	! load klwp pointer
2568	ld	[%g2 + PCB_TRAP0], %g2	! lwp->lwp_pcb.pcb_trap0addr
2569	brz,pn	%g2, 1f			! has it been set?
2570	st	%l0, [%g1 + CPU_TMP1]	! delay - save some locals
2571	st	%l1, [%g1 + CPU_TMP2]
2572	rdpr	%tnpc, %l1		! save old tnpc
2573	wrpr	%g0, %g2, %tnpc		! setup tnpc
2574
2575	rdpr	%pstate, %l0
2576	wrpr	%l0, PSTATE_AG, %pstate	! switch to normal globals
2577	mov	%l1, %g6		! pass tnpc to user code in %g6
2578	wrpr	%l0, %g0, %pstate	! switch back to alternate globals
2579
2580	! Note that %g1 still contains CPU struct addr
2581	ld	[%g1 + CPU_TMP2], %l1	! restore locals
2582	ld	[%g1 + CPU_TMP1], %l0
2583	FAST_TRAP_DONE_CHK_INTR
25841:
2585	mov	%g1, %l0
2586	st	%l1, [%g1 + CPU_TMP2]
2587	rdpr	%pstate, %l1
2588	wrpr	%l1, PSTATE_AG, %pstate
2589	!
2590	! check for old syscall mmap which is the only different one which
2591	! must be the same.  Others are handled in the compatibility library.
2592	!
2593	cmp	%g1, OSYS_mmap	! compare to old 4.x mmap
2594	movz	%icc, SYS_mmap, %g1
2595	wrpr	%g0, %l1, %pstate
2596	ld	[%l0 + CPU_TMP2], %l1	! restore locals
2597	ld	[%l0 + CPU_TMP1], %l0
2598	SYSCALL(syscall_trap32)
2599	SET_SIZE(syscall_trap_4x)
2600
2601/*
2602 * Handler for software trap 9.
2603 * Set trap0 emulation address for old 4.x system call trap.
2604 * XXX - this should be a system call.
2605 */
2606	ENTRY_NP(set_trap0_addr)
2607	CPU_ADDR(%g1, %g2)		! load CPU struct addr to %g1 using %g2
2608	ldn	[%g1 + CPU_THREAD], %g2	! load thread pointer
2609	ldn	[%g2 + T_LWP], %g2	! load klwp pointer
2610	st	%l0, [%g1 + CPU_TMP1]	! save some locals
2611	st	%l1, [%g1 + CPU_TMP2]
2612	rdpr	%pstate, %l0
2613	wrpr	%l0, PSTATE_AG, %pstate
2614	mov	%g1, %l1
2615	wrpr	%g0, %l0, %pstate
2616	andn	%l1, 3, %l1		! force alignment
2617	st	%l1, [%g2 + PCB_TRAP0]	! lwp->lwp_pcb.pcb_trap0addr
2618	ld	[%g1 + CPU_TMP1], %l0	! restore locals
2619	ld	[%g1 + CPU_TMP2], %l1
2620	FAST_TRAP_DONE
2621	SET_SIZE(set_trap0_addr)
2622
2623/*
2624 * mmu_trap_tl1
2625 * trap handler for unexpected mmu traps.
2626 * simply checks if the trap was a user lddf/stdf alignment trap, in which
2627 * case we go to fpu_trap or a user trap from the window handler, in which
2628 * case we go save the state on the pcb.  Otherwise, we go to ptl1_panic.
2629 */
2630	.type	mmu_trap_tl1, #function
2631mmu_trap_tl1:
2632#ifdef	TRAPTRACE
2633	TRACE_PTR(%g5, %g6)
2634	GET_TRACE_TICK(%g6)
2635	stxa	%g6, [%g5 + TRAP_ENT_TICK]%asi
2636	rdpr	%tl, %g6
2637	stha	%g6, [%g5 + TRAP_ENT_TL]%asi
2638	rdpr	%tt, %g6
2639	stha	%g6, [%g5 + TRAP_ENT_TT]%asi
2640	rdpr	%tstate, %g6
2641	stxa	%g6, [%g5 + TRAP_ENT_TSTATE]%asi
2642	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2643	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2644	rdpr	%tpc, %g6
2645	stna	%g6, [%g5 + TRAP_ENT_TPC]%asi
2646	set	MMU_SFAR, %g6
2647	ldxa	[%g6]ASI_DMMU, %g6
2648	stxa	%g6, [%g5 + TRAP_ENT_F1]%asi
2649	CPU_PADDR(%g7, %g6);
2650	add	%g7, CPU_TL1_HDLR, %g7
2651	lda	[%g7]ASI_MEM, %g6
2652	stxa	%g6, [%g5 + TRAP_ENT_F2]%asi
2653	set	0xdeadbeef, %g6
2654	stna	%g6, [%g5 + TRAP_ENT_F3]%asi
2655	stna	%g6, [%g5 + TRAP_ENT_F4]%asi
2656	TRACE_NEXT(%g5, %g6, %g7)
2657#endif /* TRAPTRACE */
2658
2659	GET_CPU_IMPL(%g5)
2660	cmp	%g5, PANTHER_IMPL
2661	bne	mmu_trap_tl1_4
2662	  nop
2663	rdpr	%tt, %g5
2664	cmp	%g5, T_DATA_EXCEPTION
2665	bne	mmu_trap_tl1_4
2666	  nop
2667	wr	%g0, ASI_DMMU, %asi
2668	ldxa	[MMU_SFSR]%asi, %g5
2669	mov	1, %g6
2670	sllx	%g6, PN_SFSR_PARITY_SHIFT, %g6
2671	andcc	%g5, %g6, %g0
2672	bz	mmu_trap_tl1_4
2673
2674	/*
2675	 * We are running on a Panther and have hit a DTLB parity error.
2676	 */
2677	ldxa	[MMU_TAG_ACCESS]%asi, %g2
2678	mov	%g5, %g3
2679	ba,pt	%xcc, .mmu_exception_is_tlb_parity
2680	mov	T_DATA_EXCEPTION, %g1
2681
2682mmu_trap_tl1_4:
2683	CPU_PADDR(%g7, %g6);
2684	add     %g7, CPU_TL1_HDLR, %g7		! %g7 = &cpu_m.tl1_hdlr (PA)
2685	/*
2686	 * AM is cleared on trap, so addresses are 64 bit
2687	 */
2688	lda     [%g7]ASI_MEM, %g6
2689	brz,a,pt %g6, 1f
2690	  nop
2691	/*
2692	 * We are going to update cpu_m.tl1_hdlr using physical address.
2693	 * Flush the D$ line, so that stale data won't be accessed later.
2694	 */
2695	CPU_ADDR(%g6, %g5)
2696	add     %g6, CPU_TL1_HDLR, %g6		! %g6 = &cpu_m.tl1_hdlr (VA)
2697	GET_CPU_IMPL(%g5)
2698	cmp	%g5, CHEETAH_IMPL
2699	bl,pt	%icc, 3f
2700	 cmp	%g5, SPITFIRE_IMPL
2701	stxa	%g0, [%g7]ASI_DC_INVAL
2702	membar	#Sync
2703	ba,pt	%xcc, 2f
2704	 nop
27053:
2706	bl,pt	%icc, 2f
2707	 sethi	%hi(dcache_line_mask), %g5
2708	ld	[%g5 + %lo(dcache_line_mask)], %g5
2709	and	%g6, %g5, %g5
2710	stxa	%g0, [%g5]ASI_DC_TAG
2711	membar	#Sync
27122:
2713	sta     %g0, [%g7]ASI_MEM
2714	SWITCH_GLOBALS				! back to mmu globals
2715	ba,a,pt	%xcc, sfmmu_mmu_trap		! handle page faults
27161:
2717	rdpr	%tt, %g5
2718	rdpr	%tl, %g7
2719	sub	%g7, 1, %g6
2720	wrpr	%g6, %tl
2721	rdpr	%tt, %g6
2722	wrpr	%g7, %tl
2723	and	%g6, WTRAP_TTMASK, %g6
2724	cmp	%g6, WTRAP_TYPE
2725	bne,a,pn %xcc, ptl1_panic
2726	mov	PTL1_BAD_MMUTRAP, %g1
2727	rdpr	%tpc, %g7
2728	/* tpc should be in the trap table */
2729	set	trap_table, %g6
2730	cmp	%g7, %g6
2731	blt,a,pn %xcc, ptl1_panic
2732	  mov	PTL1_BAD_MMUTRAP, %g1
2733	set	etrap_table, %g6
2734	cmp	%g7, %g6
2735	bge,a,pn %xcc, ptl1_panic
2736	  mov	PTL1_BAD_MMUTRAP, %g1
2737	cmp	%g5, T_ALIGNMENT
2738	move	%icc, MMU_SFAR, %g6
2739	movne	%icc, MMU_TAG_ACCESS, %g6
2740	ldxa	[%g6]ASI_DMMU, %g6
2741	andn	%g7, WTRAP_ALIGN, %g7	/* 128 byte aligned */
2742	add	%g7, WTRAP_FAULTOFF, %g7
2743	wrpr	%g0, %g7, %tnpc
2744	done
2745	SET_SIZE(mmu_trap_tl1)
2746
2747/*
2748 * Several traps use kmdb_trap and kmdb_trap_tl1 as their handlers.  These
2749 * traps are valid only when kmdb is loaded.  When the debugger is active,
2750 * the code below is rewritten to transfer control to the appropriate
2751 * debugger entry points.
2752 */
2753	.global	kmdb_trap
2754	.align	8
2755kmdb_trap:
2756	ba,a	trap_table0
2757	jmp	%g1 + 0
2758	nop
2759
2760	.global	kmdb_trap_tl1
2761	.align	8
2762kmdb_trap_tl1:
2763	ba,a	trap_table0
2764	jmp	%g1 + 0
2765	nop
2766
2767/*
2768 * This entry is copied from OBP's trap table during boot.
2769 */
2770	.global	obp_bpt
2771	.align	8
2772obp_bpt:
2773	NOT
2774
2775/*
2776 * if kernel, set PCONTEXT to 0 for debuggers
2777 * if user, clear nucleus page sizes
2778 */
2779	.global kctx_obp_bpt
2780kctx_obp_bpt:
2781	set	obp_bpt, %g2
27821:
2783	mov	MMU_PCONTEXT, %g1
2784	ldxa	[%g1]ASI_DMMU, %g1
2785	srlx	%g1, CTXREG_NEXT_SHIFT, %g3
2786	brz,pt	%g3, 3f			! nucleus pgsz is 0, no problem
2787	  sllx	%g3, CTXREG_NEXT_SHIFT, %g3
2788	set	CTXREG_CTX_MASK, %g4	! check Pcontext
2789	btst	%g4, %g1
2790	bz,a,pt	%xcc, 2f
2791	  clr	%g3			! kernel:  PCONTEXT=0
2792	xor	%g3, %g1, %g3		! user:	clr N_pgsz0/1 bits
27932:
2794	set	DEMAP_ALL_TYPE, %g1
2795	stxa	%g0, [%g1]ASI_DTLB_DEMAP
2796	stxa	%g0, [%g1]ASI_ITLB_DEMAP
2797	mov	MMU_PCONTEXT, %g1
2798	stxa	%g3, [%g1]ASI_DMMU
2799        membar  #Sync
2800	sethi	%hi(FLUSH_ADDR), %g1
2801	flush	%g1			! flush required by immu
28023:
2803	jmp	%g2
2804	  nop
2805
2806
2807#ifdef	TRAPTRACE
2808/*
2809 * TRAPTRACE support.
2810 * labels here are branched to with "rd %pc, %g7" in the delay slot.
2811 * Return is done by "jmp %g7 + 4".
2812 */
2813
2814trace_gen:
2815	TRACE_PTR(%g3, %g6)
2816	GET_TRACE_TICK(%g6)
2817	stxa	%g6, [%g3 + TRAP_ENT_TICK]%asi
2818	rdpr	%tl, %g6
2819	stha	%g6, [%g3 + TRAP_ENT_TL]%asi
2820	rdpr	%tt, %g6
2821	stha	%g6, [%g3 + TRAP_ENT_TT]%asi
2822	rdpr	%tstate, %g6
2823	stxa	%g6, [%g3 + TRAP_ENT_TSTATE]%asi
2824	stna	%sp, [%g3 + TRAP_ENT_SP]%asi
2825	rdpr	%tpc, %g6
2826	stna	%g6, [%g3 + TRAP_ENT_TPC]%asi
2827	TRACE_NEXT(%g3, %g4, %g5)
2828	jmp	%g7 + 4
2829	nop
2830
2831trace_win:
2832	TRACE_WIN_INFO(0, %l0, %l1, %l2)
2833	! Keep the locals as clean as possible, caller cleans %l4
2834	clr	%l2
2835	clr	%l1
2836	jmp	%l4 + 4
2837	  clr	%l0
2838
2839/*
2840 * Trace a tsb hit
2841 * g1 = tsbe pointer (in/clobbered)
2842 * g2 = tag access register (in)
2843 * g3 - g4 = scratch (clobbered)
2844 * g5 = tsbe data (in)
2845 * g6 = scratch (clobbered)
2846 * g7 = pc we jumped here from (in)
2847 */
2848
2849	! Do not disturb %g5, it will be used after the trace
2850	ALTENTRY(trace_tsbhit)
2851	TRACE_TSBHIT(0)
2852	jmp	%g7 + 4
2853	nop
2854
2855/*
2856 * Trace a TSB miss
2857 *
2858 * g1 = tsb8k pointer (in)
2859 * g2 = tag access register (in)
2860 * g3 = tsb4m pointer (in)
2861 * g4 = tsbe tag (in/clobbered)
2862 * g5 - g6 = scratch (clobbered)
2863 * g7 = pc we jumped here from (in)
2864 */
2865	.global	trace_tsbmiss
2866trace_tsbmiss:
2867	membar	#Sync
2868	sethi	%hi(FLUSH_ADDR), %g6
2869	flush	%g6
2870	TRACE_PTR(%g5, %g6)
2871	GET_TRACE_TICK(%g6)
2872	stxa	%g6, [%g5 + TRAP_ENT_TICK]%asi
2873	stxa	%g2, [%g5 + TRAP_ENT_SP]%asi		! tag access
2874	stxa	%g4, [%g5 + TRAP_ENT_F1]%asi		! tsb tag
2875	rdpr	%tnpc, %g6
2876	stxa	%g6, [%g5 + TRAP_ENT_F2]%asi
2877	stna	%g1, [%g5 + TRAP_ENT_F3]%asi		! tsb8k pointer
2878	srlx	%g1, 32, %g6
2879	stna	%g6, [%g5 + TRAP_ENT_F4]%asi		! huh?
2880	rdpr	%tpc, %g6
2881	stna	%g6, [%g5 + TRAP_ENT_TPC]%asi
2882	rdpr	%tl, %g6
2883	stha	%g6, [%g5 + TRAP_ENT_TL]%asi
2884	rdpr	%tt, %g6
2885	or	%g6, TT_MMU_MISS, %g4
2886	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2887	cmp	%g6, FAST_IMMU_MISS_TT
2888	be,a	%icc, 1f
2889	  ldxa	[%g0]ASI_IMMU, %g6
2890	ldxa	[%g0]ASI_DMMU, %g6
28911:	stxa	%g6, [%g5 + TRAP_ENT_TSTATE]%asi	! tag target
2892	stxa	%g3, [%g5 + TRAP_ENT_TR]%asi		! tsb4m pointer
2893	TRACE_NEXT(%g5, %g4, %g6)
2894	jmp	%g7 + 4
2895	nop
2896
2897/*
2898 * g2 = tag access register (in)
2899 * g3 = ctx number (in)
2900 */
2901trace_dataprot:
2902	membar	#Sync
2903	sethi	%hi(FLUSH_ADDR), %g6
2904	flush	%g6
2905	TRACE_PTR(%g1, %g6)
2906	GET_TRACE_TICK(%g6)
2907	stxa	%g6, [%g1 + TRAP_ENT_TICK]%asi
2908	rdpr	%tpc, %g6
2909	stna	%g6, [%g1 + TRAP_ENT_TPC]%asi
2910	rdpr	%tstate, %g6
2911	stxa	%g6, [%g1 + TRAP_ENT_TSTATE]%asi
2912	stxa	%g2, [%g1 + TRAP_ENT_SP]%asi		! tag access reg
2913	stxa	%g0, [%g1 + TRAP_ENT_TR]%asi
2914	stxa	%g0, [%g1 + TRAP_ENT_F1]%asi
2915	stxa	%g0, [%g1 + TRAP_ENT_F2]%asi
2916	stxa	%g0, [%g1 + TRAP_ENT_F3]%asi
2917	stxa	%g0, [%g1 + TRAP_ENT_F4]%asi
2918	rdpr	%tl, %g6
2919	stha	%g6, [%g1 + TRAP_ENT_TL]%asi
2920	rdpr	%tt, %g6
2921	stha	%g6, [%g1 + TRAP_ENT_TT]%asi
2922	TRACE_NEXT(%g1, %g4, %g5)
2923	jmp	%g7 + 4
2924	nop
2925
2926#endif /* TRAPTRACE */
2927
2928/*
2929 * expects offset into tsbmiss area in %g1 and return pc in %g7
2930 */
2931stat_mmu:
2932	CPU_INDEX(%g5, %g6)
2933	sethi	%hi(tsbmiss_area), %g6
2934	sllx	%g5, TSBMISS_SHIFT, %g5
2935	or	%g6, %lo(tsbmiss_area), %g6
2936	add	%g6, %g5, %g6		/* g6 = tsbmiss area */
2937	ld	[%g6 + %g1], %g5
2938	add	%g5, 1, %g5
2939	jmp	%g7 + 4
2940	st	%g5, [%g6 + %g1]
2941
2942
2943/*
2944 * fast_trap_done, fast_trap_done_chk_intr:
2945 *
2946 * Due to the design of UltraSPARC pipeline, pending interrupts are not
2947 * taken immediately after a RETRY or DONE instruction which causes IE to
2948 * go from 0 to 1. Instead, the instruction at %tpc or %tnpc is allowed
2949 * to execute first before taking any interrupts. If that instruction
2950 * results in other traps, and if the corresponding trap handler runs
2951 * entirely at TL=1 with interrupts disabled, then pending interrupts
2952 * won't be taken until after yet another instruction following the %tpc
2953 * or %tnpc.
2954 *
2955 * A malicious user program can use this feature to block out interrupts
2956 * for extended durations, which can result in send_mondo_timeout kernel
2957 * panic.
2958 *
2959 * This problem is addressed by servicing any pending interrupts via
2960 * sys_trap before returning back to the user mode from a fast trap
2961 * handler. The "done" instruction within a fast trap handler, which
2962 * runs entirely at TL=1 with interrupts disabled, is replaced with the
2963 * FAST_TRAP_DONE macro, which branches control to this fast_trap_done
2964 * entry point.
2965 *
2966 * We check for any pending interrupts here and force a sys_trap to
2967 * service those interrupts, if any. To minimize overhead, pending
2968 * interrupts are checked if the %tpc happens to be at 16K boundary,
2969 * which allows a malicious program to execute at most 4K consecutive
2970 * instructions before we service any pending interrupts. If a worst
2971 * case fast trap handler takes about 2 usec, then interrupts will be
2972 * blocked for at most 8 msec, less than a clock tick.
2973 *
2974 * For the cases where we don't know if the %tpc will cross a 16K
2975 * boundary, we can't use the above optimization and always process
2976 * any pending interrupts via fast_frap_done_chk_intr entry point.
2977 *
2978 * Entry Conditions:
2979 * 	%pstate		am:0 priv:1 ie:0
2980 * 			globals are AG (not normal globals)
2981 */
2982
2983	.global	fast_trap_done, fast_trap_done_chk_intr
2984fast_trap_done:
2985	rdpr	%tpc, %g5
2986	sethi	%hi(0xffffc000), %g6	! 1's complement of 0x3fff
2987	andncc	%g5, %g6, %g0		! check lower 14 bits of %tpc
2988	bz,a,pn	%icc, 1f		! branch if zero (lower 32 bits only)
2989	  ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g5
2990	done
2991
2992	ALTENTRY(fast_trap_done_check_interrupts)
2993fast_trap_done_chk_intr:
2994	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g5
2995
29961:	rd	SOFTINT, %g6
2997	and	%g5, IRSR_BUSY, %g5
2998	orcc	%g5, %g6, %g0
2999	bnz,pn	%xcc, 2f		! branch if any pending intr
3000	nop
3001	done
3002
30032:
3004	/*
3005	 * We get here if there are any pending interrupts.
3006	 * Adjust %tpc/%tnpc as we'll be resuming via "retry"
3007	 * instruction.
3008	 */
3009	rdpr	%tnpc, %g5
3010	wrpr	%g0, %g5, %tpc
3011	add	%g5, 4, %g5
3012	wrpr	%g0, %g5, %tnpc
3013
3014	/*
3015	 * Force a dummy sys_trap call so that interrupts can be serviced.
3016	 */
3017	set	fast_trap_dummy_call, %g1
3018	ba,pt	%xcc, sys_trap
3019	  mov	-1, %g4
3020
3021fast_trap_dummy_call:
3022	retl
3023	nop
3024
3025/*
3026 * Currently the brand syscall interposition code is not enabled by
3027 * default.  Instead, when a branded zone is first booted the brand
3028 * infrastructure will patch the trap table so that the syscall
3029 * entry points are redirected to syscall_wrapper32 and syscall_wrapper
3030 * for ILP32 and LP64 syscalls respectively.  this is done in
3031 * brand_plat_interposition_enable().  Note that the syscall wrappers
3032 * below do not collect any trap trace data since the syscall hot patch
3033 * points are reached after trap trace data has already been collected.
3034 */
3035#define	BRAND_CALLBACK(callback_id)					    \
3036	CPU_ADDR(%g2, %g1)		/* load CPU struct addr to %g2	*/ ;\
3037	ldn	[%g2 + CPU_THREAD], %g3	/* load thread pointer		*/ ;\
3038	ldn	[%g3 + T_PROCP], %g3	/* get proc pointer		*/ ;\
3039	ldn	[%g3 + P_BRAND], %g3	/* get brand pointer		*/ ;\
3040	brz	%g3, 1f			/* No brand?  No callback. 	*/ ;\
3041	nop 								   ;\
3042	ldn	[%g3 + B_MACHOPS], %g3	/* get machops list		*/ ;\
3043	ldn	[%g3 + (callback_id << 3)], %g3 			   ;\
3044	brz	%g3, 1f							   ;\
3045	/*								    \
3046	 * This isn't pretty.  We want a low-latency way for the callback   \
3047	 * routine to decline to do anything.  We just pass in an address   \
3048	 * the routine can directly jmp back to, pretending that nothing    \
3049	 * has happened.						    \
3050	 * 								    \
3051	 * %g1: return address (where the brand handler jumps back to)	    \
3052	 * %g2: address of CPU structure				    \
3053	 * %g3: address of brand handler (where we will jump to)	    \
3054	 */								    \
3055	mov	%pc, %g1						   ;\
3056	add	%g1, 16, %g1						   ;\
3057	jmp	%g3							   ;\
3058	nop								   ;\
30591:
3060
3061	ENTRY_NP(syscall_wrapper32)
3062	BRAND_CALLBACK(BRAND_CB_SYSCALL32)
3063	SYSCALL_NOTT(syscall_trap32)
3064	SET_SIZE(syscall_wrapper32)
3065
3066	ENTRY_NP(syscall_wrapper)
3067	BRAND_CALLBACK(BRAND_CB_SYSCALL)
3068	SYSCALL_NOTT(syscall_trap)
3069	SET_SIZE(syscall_wrapper)
3070
3071#endif	/* lint */
3072