xref: /illumos-gate/usr/src/uts/sparc/v9/ml/sparcv9_subr.S (revision f37b3cbb6f67aaea5eec1c335bdc7bf432867d64)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * General assembly language routines.
28 * It is the intent of this file to contain routines that are
29 * independent of the specific kernel architecture, and those that are
30 * common across kernel architectures.
31 * As architectures diverge, and implementations of specific
32 * architecture-dependent routines change, the routines should be moved
33 * from this file into the respective ../`arch -k`/subr.s file.
34 * Or, if you want to be really nice, move them to a file whose
35 * name has something to do with the routine you are moving.
36 */
37
38#include <sys/asm_linkage.h>
39#include <sys/privregs.h>
40#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
41#include <sys/machthread.h>
42#include <sys/clock.h>
43#include <sys/psr_compat.h>
44#include <sys/isa_defs.h>
45#include <sys/dditypes.h>
46#include <sys/panic.h>
47#include <sys/machlock.h>
48#include <sys/ontrap.h>
49
50#include "assym.h"
51
52	.seg	".text"
53	.align	4
54
55/*
56 * Macro to raise processor priority level.
57 * Avoid dropping processor priority if already at high level.
58 * Also avoid going below CPU->cpu_base_spl, which could've just been set by
59 * a higher-level interrupt thread that just blocked.
60 *
61 * level can be %o0 (not other regs used here) or a constant.
62 */
63#define	RAISE(level) \
64	rdpr	%pil, %o1;		/* get current PIL */		\
65	cmp	%o1, level;		/* is PIL high enough? */	\
66	bge	1f;			/* yes, return */		\
67	nop;								\
68	wrpr	%g0, PIL_MAX, %pil;	/* freeze CPU_BASE_SPL */	\
69	ldn	[THREAD_REG + T_CPU], %o2;				\
70	ld	[%o2 + CPU_BASE_SPL], %o2;				\
71	cmp	%o2, level;		/* compare new to base */	\
72	movl	%xcc, level, %o2;	/* use new if base lower */	\
73	wrpr	%g0, %o2, %pil;						\
741:									\
75	retl;								\
76	mov	%o1, %o0		/* return old PIL */
77
78/*
79 * Macro to raise processor priority level to level >= DISP_LEVEL.
80 * Doesn't require comparison to CPU->cpu_base_spl.
81 *
82 * newpil can be %o0 (not other regs used here) or a constant.
83 */
84#define	RAISE_HIGH(level) \
85	rdpr	%pil, %o1;		/* get current PIL */		\
86	cmp	%o1, level;		/* is PIL high enough? */	\
87	bge	1f;			/* yes, return */		\
88	nop;								\
89	wrpr	%g0, level, %pil;	/* use chose value */		\
901:									\
91	retl;								\
92	mov	%o1, %o0		/* return old PIL */
93
94/*
95 * Macro to set the priority to a specified level.
96 * Avoid dropping the priority below CPU->cpu_base_spl.
97 *
98 * newpil can be %o0 (not other regs used here) or a constant with
99 * the new PIL in the PSR_PIL field of the level arg.
100 */
101#define SETPRI(level) \
102	rdpr	%pil, %o1;		/* get current PIL */		\
103	wrpr	%g0, PIL_MAX, %pil;	/* freeze CPU_BASE_SPL */	\
104	ldn	[THREAD_REG + T_CPU], %o2;				\
105	ld	[%o2 + CPU_BASE_SPL], %o2;				\
106	cmp	%o2, level;		/* compare new to base */	\
107	movl	%xcc, level, %o2;	/* use new if base lower */	\
108	wrpr	%g0, %o2, %pil;						\
109	retl;								\
110	mov	%o1, %o0		/* return old PIL */
111
112/*
113 * Macro to set the priority to a specified level at or above LOCK_LEVEL.
114 * Doesn't require comparison to CPU->cpu_base_spl.
115 *
116 * newpil can be %o0 (not other regs used here) or a constant with
117 * the new PIL in the PSR_PIL field of the level arg.
118 */
119#define	SETPRI_HIGH(level) \
120	rdpr	%pil, %o1;		/* get current PIL */		\
121	wrpr	%g0, level, %pil;					\
122	retl;								\
123	mov	%o1, %o0		/* return old PIL */
124
125	/*
126	 * Berkley 4.3 introduced symbolically named interrupt levels
127	 * as a way deal with priority in a machine independent fashion.
128	 * Numbered priorities are machine specific, and should be
129	 * discouraged where possible.
130	 *
131	 * Note, for the machine specific priorities there are
132	 * examples listed for devices that use a particular priority.
133	 * It should not be construed that all devices of that
134	 * type should be at that priority.  It is currently were
135	 * the current devices fit into the priority scheme based
136	 * upon time criticalness.
137	 *
138	 * The underlying assumption of these assignments is that
139	 * SPARC9 IPL 10 is the highest level from which a device
140	 * routine can call wakeup.  Devices that interrupt from higher
141	 * levels are restricted in what they can do.  If they need
142	 * kernels services they should schedule a routine at a lower
143	 * level (via software interrupt) to do the required
144	 * processing.
145	 *
146	 * Examples of this higher usage:
147	 *	Level	Usage
148	 *	15	Asynchronous memory exceptions
149	 *	14	Profiling clock (and PROM uart polling clock)
150	 *	13	Audio device
151	 *	12	Serial ports
152	 *	11	Floppy controller
153	 *
154	 * The serial ports request lower level processing on level 6.
155	 * Audio and floppy request lower level processing on level 4.
156	 *
157	 * Also, almost all splN routines (where N is a number or a
158	 * mnemonic) will do a RAISE(), on the assumption that they are
159	 * never used to lower our priority.
160	 * The exceptions are:
161	 *	spl8()		Because you can't be above 15 to begin with!
162	 *	splzs()		Because this is used at boot time to lower our
163	 *			priority, to allow the PROM to poll the uart.
164	 *	spl0()		Used to lower priority to 0.
165	 */
166
167	/* locks out all interrupts, including memory errors */
168	ENTRY(spl8)
169	SETPRI_HIGH(15)
170	SET_SIZE(spl8)
171
172	/* just below the level that profiling runs */
173	ENTRY(spl7)
174	RAISE_HIGH(13)
175	SET_SIZE(spl7)
176
177	/* sun specific - highest priority onboard serial i/o zs ports */
178	ENTRY(splzs)
179	SETPRI_HIGH(12)	/* Can't be a RAISE, as it's used to lower us */
180	SET_SIZE(splzs)
181
182	/*
183	 * should lock out clocks and all interrupts,
184	 * as you can see, there are exceptions
185	 */
186	ENTRY(splhi)
187	ALTENTRY(splhigh)
188	ALTENTRY(spl6)
189	ALTENTRY(i_ddi_splhigh)
190	RAISE_HIGH(DISP_LEVEL)
191	SET_SIZE(i_ddi_splhigh)
192	SET_SIZE(spl6)
193	SET_SIZE(splhigh)
194	SET_SIZE(splhi)
195
196	/* allow all interrupts */
197	ENTRY(spl0)
198	SETPRI(0)
199	SET_SIZE(spl0)
200
201/*
202 * splx - set PIL back to that indicated by the old %pil passed as an argument,
203 * or to the CPU's base priority, whichever is higher.
204 */
205
206	ENTRY(splx)
207	ALTENTRY(i_ddi_splx)
208	SETPRI(%o0)		/* set PIL */
209	SET_SIZE(i_ddi_splx)
210	SET_SIZE(splx)
211
212/*
213 * splr()
214 *
215 * splr is like splx but will only raise the priority and never drop it
216 * Be careful not to set priority lower than CPU->cpu_base_pri,
217 * even though it seems we're raising the priority, it could be set higher
218 * at any time by an interrupt routine, so we must block interrupts and
219 * look at CPU->cpu_base_pri.
220 */
221
222	ENTRY(splr)
223	RAISE(%o0)
224	SET_SIZE(splr)
225
226/*
227 * on_fault()
228 * Catch lofault faults. Like setjmp except it returns one
229 * if code following causes uncorrectable fault. Turned off
230 * by calling no_fault().
231 */
232
233	ENTRY(on_fault)
234	membar	#Sync			! sync error barrier (see copy.s)
235	stn	%o0, [THREAD_REG + T_ONFAULT]
236	set	catch_fault, %o1
237	b	setjmp			! let setjmp do the rest
238	stn	%o1, [THREAD_REG + T_LOFAULT]	! put catch_fault in t_lofault
239
240catch_fault:
241	save	%sp, -SA(WINDOWSIZE), %sp ! goto next window so that we can rtn
242	ldn	[THREAD_REG + T_ONFAULT], %o0
243	membar	#Sync				! sync error barrier
244	stn	%g0, [THREAD_REG + T_ONFAULT]	! turn off onfault
245	b	longjmp			! let longjmp do the rest
246	stn	%g0, [THREAD_REG + T_LOFAULT]	! turn off lofault
247	SET_SIZE(on_fault)
248
249/*
250 * no_fault()
251 * turn off fault catching.
252 */
253
254	ENTRY(no_fault)
255	membar	#Sync				! sync error barrier
256	stn	%g0, [THREAD_REG + T_ONFAULT]
257	retl
258	stn	%g0, [THREAD_REG + T_LOFAULT]	! turn off lofault
259	SET_SIZE(no_fault)
260
261/*
262 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  On sparcv9,
263 * the trap code will complete trap processing but reset the return %pc to
264 * ot_trampoline, which will by default be set to the address of this code.
265 * We longjmp(&curthread->t_ontrap->ot_jmpbuf) to return back to on_trap().
266 */
267
268	ENTRY(on_trap_trampoline)
269	ldn	[THREAD_REG + T_ONTRAP], %o0
270	b	longjmp
271	add	%o0, OT_JMPBUF, %o0
272	SET_SIZE(on_trap_trampoline)
273
274/*
275 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
276 * more information about the on_trap() mechanism.  If the on_trap_data is the
277 * same as the topmost stack element, we just modify that element.
278 * On UltraSPARC, we need to issue a membar #Sync before modifying t_ontrap.
279 * The issue barrier is defined to force all deferred errors to complete before
280 * we go any further.  We want these errors to be processed before we modify
281 * our current error protection.
282 */
283
284	ENTRY(on_trap)
285	membar	#Sync				! force error barrier
286	sth	%o1, [%o0 + OT_PROT]		! ot_prot = prot
287	sth	%g0, [%o0 + OT_TRAP]		! ot_trap = 0
288	set	on_trap_trampoline, %o2		! %o2 = &on_trap_trampoline
289	stn	%o2, [%o0 + OT_TRAMPOLINE]	! ot_trampoline = %o2
290	stn	%g0, [%o0 + OT_HANDLE]		! ot_handle = NULL
291	ldn	[THREAD_REG + T_ONTRAP], %o2	! %o2 = curthread->t_ontrap
292	cmp	%o0, %o2			! if (otp == %o2)
293	be	0f				!    don't modify t_ontrap
294	stn	%g0, [%o0 + OT_PAD1]		! delay - ot_pad1 = NULL
295
296	stn	%o2, [%o0 + OT_PREV]		! ot_prev = t_ontrap
297	membar	#Sync				! force error barrier
298	stn	%o0, [THREAD_REG + T_ONTRAP]	! t_ontrap = otp
299
3000:	b	setjmp				! let setjmp do the rest
301	add	%o0, OT_JMPBUF, %o0		! %o0 = &ot_jmpbuf
302	SET_SIZE(on_trap)
303
304/*
305 * Setjmp and longjmp implement non-local gotos using state vectors
306 * type label_t.
307 */
308
309	ENTRY(setjmp)
310	stn	%o7, [%o0 + L_PC]	! save return address
311	stn	%sp, [%o0 + L_SP]	! save stack ptr
312	retl
313	clr	%o0			! return 0
314	SET_SIZE(setjmp)
315
316
317	ENTRY(longjmp)
318	!
319        ! The following save is required so that an extra register
320        ! window is flushed.  Flushw flushes nwindows-2
321        ! register windows.  If setjmp and longjmp are called from
322        ! within the same window, that window will not get pushed
323        ! out onto the stack without the extra save below.  Tail call
324        ! optimization can lead to callers of longjmp executing
325        ! from a window that could be the same as the setjmp,
326        ! thus the need for the following save.
327        !
328	save    %sp, -SA(MINFRAME), %sp
329	flushw				! flush all but this window
330	ldn	[%i0 + L_PC], %i7	! restore return addr
331	ldn	[%i0 + L_SP], %fp	! restore sp for dest on foreign stack
332	ret				! return 1
333	restore	%g0, 1, %o0		! takes underflow, switches stacks
334	SET_SIZE(longjmp)
335
336/*
337 * movtuc(length, from, to, table)
338 *
339 * VAX movtuc instruction (sort of).
340 */
341
342	ENTRY(movtuc)
343	tst     %o0
344	ble,pn	%ncc, 2f		! check length
345	clr     %o4
346
347	ldub    [%o1 + %o4], %g1        ! get next byte in string
3480:
349	ldub    [%o3 + %g1], %g1        ! get corresponding table entry
350	tst     %g1                     ! escape char?
351	bnz     1f
352	stb     %g1, [%o2 + %o4]        ! delay slot, store it
353
354	retl                            ! return (bytes moved)
355	mov     %o4, %o0
3561:
357	inc     %o4                     ! increment index
358	cmp     %o4, %o0                ! index < length ?
359	bl,a,pt	%ncc, 0b
360	ldub    [%o1 + %o4], %g1        ! delay slot, get next byte in string
3612:
362	retl                            ! return (bytes moved)
363	mov     %o4, %o0
364	SET_SIZE(movtuc)
365
366/*
367 * scanc(length, string, table, mask)
368 *
369 * VAX scanc instruction.
370 */
371
372	ENTRY(scanc)
373	tst	%o0
374	ble,pn	%ncc, 1f		! check length
375	clr	%o4
3760:
377	ldub	[%o1 + %o4], %g1	! get next byte in string
378	cmp	%o4, %o0		! interlock slot, index < length ?
379	ldub	[%o2 + %g1], %g1	! get corresponding table entry
380	bge,pn	%ncc, 1f		! interlock slot
381	btst	%o3, %g1		! apply the mask
382	bz,a	0b
383	inc	%o4			! delay slot, increment index
3841:
385	retl				! return(length - index)
386	sub	%o0, %o4, %o0
387	SET_SIZE(scanc)
388
389/*
390 * if a() calls b() calls caller(),
391 * caller() returns return address in a().
392 */
393
394	ENTRY(caller)
395	retl
396	mov	%i7, %o0
397	SET_SIZE(caller)
398
399/*
400 * if a() calls callee(), callee() returns the
401 * return address in a();
402 */
403
404	ENTRY(callee)
405	retl
406	mov	%o7, %o0
407	SET_SIZE(callee)
408
409/*
410 * return the current frame pointer
411 */
412
413	ENTRY(getfp)
414	retl
415	mov	%fp, %o0
416	SET_SIZE(getfp)
417
418/*
419 * Get vector base register
420 */
421
422	ENTRY(gettbr)
423	retl
424	mov     %tbr, %o0
425	SET_SIZE(gettbr)
426
427/*
428 * Get processor state register, V9 faked to look like V8.
429 * Note: does not provide ccr.xcc and provides FPRS.FEF instead of
430 * PSTATE.PEF, because PSTATE.PEF is always on in order to allow the
431 * libc_psr memcpy routines to run without hitting the fp_disabled trap.
432 */
433
434	ENTRY(getpsr)
435	rd	%ccr, %o1			! get ccr
436        sll	%o1, PSR_ICC_SHIFT, %o0		! move icc to V8 psr.icc
437	rd	%fprs, %o1			! get fprs
438	and	%o1, FPRS_FEF, %o1		! mask out dirty upper/lower
439	sllx	%o1, PSR_FPRS_FEF_SHIFT, %o1	! shift fef to V8 psr.ef
440        or	%o0, %o1, %o0			! or into psr.ef
441        set	V9_PSR_IMPLVER, %o1		! SI assigned impl/ver: 0xef
442        retl
443        or	%o0, %o1, %o0			! or into psr.impl/ver
444	SET_SIZE(getpsr)
445
446/*
447 * Get current processor interrupt level
448 */
449
450	ENTRY(getpil)
451	retl
452	rdpr	%pil, %o0
453	SET_SIZE(getpil)
454
455	ENTRY(setpil)
456	retl
457	wrpr	%g0, %o0, %pil
458	SET_SIZE(setpil)
459
460
461/*
462 * _insque(entryp, predp)
463 *
464 * Insert entryp after predp in a doubly linked list.
465 */
466
467	ENTRY(_insque)
468	ldn	[%o1], %g1		! predp->forw
469	stn	%o1, [%o0 + CPTRSIZE]	! entryp->back = predp
470	stn	%g1, [%o0]		! entryp->forw = predp->forw
471	stn	%o0, [%o1]		! predp->forw = entryp
472	retl
473	stn	%o0, [%g1 + CPTRSIZE]	! predp->forw->back = entryp
474	SET_SIZE(_insque)
475
476/*
477 * _remque(entryp)
478 *
479 * Remove entryp from a doubly linked list
480 */
481
482	ENTRY(_remque)
483	ldn	[%o0], %g1		! entryp->forw
484	ldn	[%o0 + CPTRSIZE], %g2	! entryp->back
485	stn	%g1, [%g2]		! entryp->back->forw = entryp->forw
486	retl
487	stn	%g2, [%g1 + CPTRSIZE]	! entryp->forw->back = entryp->back
488	SET_SIZE(_remque)
489
490
491/*
492 * strlen(str)
493 *
494 * Returns the number of non-NULL bytes in string argument.
495 *
496 * XXX -  why is this here, rather than the traditional file?
497 *	  why does it have local labels which don't start with a `.'?
498 */
499
500	ENTRY(strlen)
501	mov	%o0, %o1
502	andcc	%o1, 3, %o3		! is src word aligned
503	bz	$nowalgnd
504	clr	%o0			! length of non-zero bytes
505	cmp	%o3, 2			! is src half-word aligned
506	be	$s2algn
507	cmp	%o3, 3			! src is byte aligned
508	ldub	[%o1], %o3		! move 1 or 3 bytes to align it
509	inc	1, %o1			! in either case, safe to do a byte
510	be	$s3algn
511	tst	%o3
512$s1algn:
513	bnz,a	$s2algn			! now go align dest
514	inc	1, %o0
515	b,a	$done
516
517$s2algn:
518	lduh	[%o1], %o3		! know src is half-byte aligned
519	inc	2, %o1
520	srl	%o3, 8, %o4
521	tst	%o4			! is the first byte zero
522	bnz,a	1f
523	inc	%o0
524	b,a	$done
5251:	andcc	%o3, 0xff, %o3		! is the second byte zero
526	bnz,a	$nowalgnd
527	inc	%o0
528	b,a	$done
529$s3algn:
530	bnz,a	$nowalgnd
531	inc	1, %o0
532	b,a	$done
533
534$nowalgnd:
535	! use trick to check if any read bytes of a word are zero
536	! the following two constants will generate "byte carries"
537	! and check if any bit in a byte is set, if all characters
538	! are 7bits (unsigned) this allways works, otherwise
539	! there is a specil case that rarely happens, see below
540
541	set	0x7efefeff, %o3
542	set	0x81010100, %o4
543
5443:	ld	[%o1], %o2		! main loop
545	inc	4, %o1
546	add	%o2, %o3, %o5		! generate byte-carries
547	xor	%o5, %o2, %o5		! see if orignal bits set
548	and	%o5, %o4, %o5
549	cmp	%o5, %o4		! if ==,  no zero bytes
550	be,a	3b
551	inc	4, %o0
552
553	! check for the zero byte and increment the count appropriately
554	! some information (the carry bit) is lost if bit 31
555	! was set (very rare), if this is the rare condition,
556	! return to the main loop again
557
558	sethi	%hi(0xff000000), %o5	! mask used to test for terminator
559	andcc	%o2, %o5, %g0		! check if first byte was zero
560	bnz	1f
561	srl	%o5, 8, %o5
562$done:
563	retl
564	nop
5651:	andcc	%o2, %o5, %g0		! check if second byte was zero
566	bnz	1f
567	srl	%o5, 8, %o5
568$done1:
569	retl
570	inc	%o0
5711:	andcc 	%o2, %o5, %g0		! check if third byte was zero
572	bnz	1f
573	andcc	%o2, 0xff, %g0		! check if last byte is zero
574$done2:
575	retl
576	inc	2, %o0
5771:	bnz,a	3b
578	inc	4, %o0			! count of bytes
579$done3:
580	retl
581	inc	3, %o0
582	SET_SIZE(strlen)
583
584/*
585 * Provide a C callable interface to the membar instruction.
586 */
587
588	ENTRY(membar_ldld)
589	retl
590	membar	#LoadLoad
591	SET_SIZE(membar_ldld)
592
593	ENTRY(membar_stld)
594	retl
595	membar	#StoreLoad
596	SET_SIZE(membar_stld)
597
598	ENTRY(membar_ldst)
599	retl
600	membar	#LoadStore
601	SET_SIZE(membar_ldst)
602
603	ENTRY(membar_stst)
604	retl
605	membar	#StoreStore
606	SET_SIZE(membar_stst)
607
608	ENTRY(membar_ldld_stld)
609	ALTENTRY(membar_stld_ldld)
610	retl
611	membar	#LoadLoad|#StoreLoad
612	SET_SIZE(membar_stld_ldld)
613	SET_SIZE(membar_ldld_stld)
614
615	ENTRY(membar_ldld_ldst)
616	ALTENTRY(membar_ldst_ldld)
617	retl
618	membar	#LoadLoad|#LoadStore
619	SET_SIZE(membar_ldst_ldld)
620	SET_SIZE(membar_ldld_ldst)
621
622	ENTRY(membar_ldld_stst)
623	ALTENTRY(membar_stst_ldld)
624	retl
625	membar	#LoadLoad|#StoreStore
626	SET_SIZE(membar_stst_ldld)
627	SET_SIZE(membar_ldld_stst)
628
629	ENTRY(membar_stld_ldst)
630	ALTENTRY(membar_ldst_stld)
631	retl
632	membar	#StoreLoad|#LoadStore
633	SET_SIZE(membar_ldst_stld)
634	SET_SIZE(membar_stld_ldst)
635
636	ENTRY(membar_stld_stst)
637	ALTENTRY(membar_stst_stld)
638	retl
639	membar	#StoreLoad|#StoreStore
640	SET_SIZE(membar_stst_stld)
641	SET_SIZE(membar_stld_stst)
642
643	ENTRY(membar_ldst_stst)
644	ALTENTRY(membar_stst_ldst)
645	retl
646	membar	#LoadStore|#StoreStore
647	SET_SIZE(membar_stst_ldst)
648	SET_SIZE(membar_ldst_stst)
649
650	ENTRY(membar_lookaside)
651	retl
652	membar	#Lookaside
653	SET_SIZE(membar_lookaside)
654
655	ENTRY(membar_memissue)
656	retl
657	membar	#MemIssue
658	SET_SIZE(membar_memissue)
659
660	ENTRY(membar_sync)
661	retl
662	membar	#Sync
663	SET_SIZE(membar_sync)
664
665
666/*
667 * Since all of the fuword() variants are so similar, we have a macro to spit
668 * them out.
669 */
670
671#define	FUWORD(NAME, LOAD, STORE, COPYOP)	\
672	ENTRY(NAME);				\
673	sethi	%hi(1f), %o5;			\
674	ldn	[THREAD_REG + T_LOFAULT], %o3;	\
675	or	%o5, %lo(1f), %o5;		\
676	membar	#Sync;				\
677	stn	%o5, [THREAD_REG + T_LOFAULT];	\
678	LOAD	[%o0]ASI_USER, %o2;		\
679	membar	#Sync;				\
680	stn	%o3, [THREAD_REG + T_LOFAULT];	\
681	mov	0, %o0;				\
682	retl;					\
683	STORE	%o2, [%o1];			\
6841:						\
685	membar	#Sync;				\
686	stn	%o3, [THREAD_REG + T_LOFAULT];	\
687	ldn	[THREAD_REG + T_COPYOPS], %o2;	\
688	brz	%o2, 2f;			\
689	nop;					\
690	ldn	[%o2 + COPYOP], %g1;		\
691	jmp	%g1;				\
692	nop;					\
6932:						\
694	retl;					\
695	mov	-1, %o0;			\
696	SET_SIZE(NAME)
697
698	FUWORD(fuword64, ldxa, stx, CP_FUWORD64)
699	FUWORD(fuword32, lda, st, CP_FUWORD32)
700	FUWORD(fuword16, lduha, sth, CP_FUWORD16)
701	FUWORD(fuword8, lduba, stb, CP_FUWORD8)
702
703
704/*
705 * Since all of the suword() variants are so similar, we have a macro to spit
706 * them out.
707 */
708
709#define	SUWORD(NAME, STORE, COPYOP)		\
710	ENTRY(NAME)				\
711	sethi	%hi(1f), %o5;			\
712	ldn	[THREAD_REG + T_LOFAULT], %o3;	\
713	or	%o5, %lo(1f), %o5;		\
714	membar	#Sync;				\
715	stn	%o5, [THREAD_REG + T_LOFAULT];	\
716	STORE	%o1, [%o0]ASI_USER;		\
717	membar	#Sync;				\
718	stn	%o3, [THREAD_REG + T_LOFAULT];	\
719	retl;					\
720	clr	%o0;				\
7211:						\
722	membar	#Sync;				\
723	stn	%o3, [THREAD_REG + T_LOFAULT];	\
724	ldn	[THREAD_REG + T_COPYOPS], %o2;	\
725	brz	%o2, 2f;			\
726	nop;					\
727	ldn	[%o2 + COPYOP], %g1;		\
728	jmp	%g1;				\
729	nop;					\
7302:						\
731	retl;					\
732	mov	-1, %o0;			\
733	SET_SIZE(NAME)
734
735	SUWORD(suword64, stxa, CP_SUWORD64)
736	SUWORD(suword32, sta, CP_SUWORD32)
737	SUWORD(suword16, stha, CP_SUWORD16)
738	SUWORD(suword8, stba, CP_SUWORD8)
739
740	ENTRY(fuword8_noerr)
741	lduba	[%o0]ASI_USER, %o0
742	retl
743	stb	%o0, [%o1]
744	SET_SIZE(fuword8_noerr)
745
746	ENTRY(fuword16_noerr)
747	lduha	[%o0]ASI_USER, %o0
748	retl
749	sth	%o0, [%o1]
750	SET_SIZE(fuword16_noerr)
751
752	ENTRY(fuword32_noerr)
753	lda	[%o0]ASI_USER, %o0
754	retl
755	st	%o0, [%o1]
756	SET_SIZE(fuword32_noerr)
757
758	ENTRY(fuword64_noerr)
759	ldxa	[%o0]ASI_USER, %o0
760	retl
761	stx	%o0, [%o1]
762	SET_SIZE(fuword64_noerr)
763
764	ENTRY(suword8_noerr)
765	retl
766	stba	%o1, [%o0]ASI_USER
767	SET_SIZE(suword8_noerr)
768
769	ENTRY(suword16_noerr)
770	retl
771	stha	%o1, [%o0]ASI_USER
772	SET_SIZE(suword16_noerr)
773
774	ENTRY(suword32_noerr)
775	retl
776	sta	%o1, [%o0]ASI_USER
777	SET_SIZE(suword32_noerr)
778
779	ENTRY(suword64_noerr)
780	retl
781	stxa	%o1, [%o0]ASI_USER
782	SET_SIZE(suword64_noerr)
783
784	.weak	subyte
785	subyte=suword8
786	.weak	subyte_noerr
787	subyte_noerr=suword8_noerr
788#ifdef _LP64
789	.weak	fulword
790	fulword=fuword64
791	.weak	fulword_noerr
792	fulword_noerr=fuword64_noerr
793	.weak	sulword
794	sulword=suword64
795	.weak	sulword_noerr
796	sulword_noerr=suword64_noerr
797#else
798	.weak	fulword
799	fulword=fuword32
800	.weak	fulword_noerr
801	fulword_noerr=fuword32_noerr
802	.weak	sulword
803	sulword=suword32
804	.weak	sulword_noerr
805	sulword_noerr=suword32_noerr
806#endif	/* LP64 */
807
808/*
809 * We define rdtick here, but not for sun4v. On sun4v systems, the %tick
810 * and %stick should not be read directly without considering the tick
811 * and stick offset kernel variables introduced to support sun4v OS
812 * suspension.
813 */
814#if !defined (sun4v)
815
816	ENTRY(rdtick)
817	retl
818	rd	%tick, %o0
819        SET_SIZE(rdtick)
820
821#endif /* !sun4v */
822
823/*
824 * Set tba to given address, no side effects.
825 */
826
827	ENTRY(set_tba)
828	mov	%o0, %o1
829	rdpr	%tba, %o0
830	wrpr	%o1, %tba
831	retl
832	nop
833	SET_SIZE(set_tba)
834
835	ENTRY(get_tba)
836	retl
837	rdpr	%tba, %o0
838	SET_SIZE(get_tba)
839
840	ENTRY_NP(setpstate)
841	retl
842	wrpr	%g0, %o0, %pstate
843	SET_SIZE(setpstate)
844
845	ENTRY_NP(getpstate)
846	retl
847	rdpr	%pstate, %o0
848	SET_SIZE(getpstate)
849
850	ENTRY_NP(dtrace_interrupt_disable)
851	rdpr	%pstate, %o0
852	andn	%o0, PSTATE_IE, %o1
853	retl
854	wrpr	%g0, %o1, %pstate
855	SET_SIZE(dtrace_interrupt_disable)
856
857	ENTRY_NP(dtrace_interrupt_enable)
858	retl
859	wrpr	%g0, %o0, %pstate
860	SET_SIZE(dtrace_interrupt_enable)
861
862#ifdef SF_ERRATA_51
863	.align 32
864	ENTRY(dtrace_membar_return)
865	retl
866	nop
867	SET_SIZE(dtrace_membar_return)
868#define	DTRACE_MEMBAR_RETURN	ba,pt %icc, dtrace_membar_return
869#else
870#define	DTRACE_MEMBAR_RETURN	retl
871#endif
872
873	ENTRY(dtrace_membar_producer)
874	DTRACE_MEMBAR_RETURN
875	membar	#StoreStore
876	SET_SIZE(dtrace_membar_producer)
877
878	ENTRY(dtrace_membar_consumer)
879	DTRACE_MEMBAR_RETURN
880	membar	#LoadLoad
881	SET_SIZE(dtrace_membar_consumer)
882
883	ENTRY_NP(dtrace_flush_windows)
884	retl
885	flushw
886	SET_SIZE(dtrace_flush_windows)
887
888	/*
889	 * %g1	pcstack
890	 * %g2	iteration count
891	 * %g3	final %fp
892	 * %g4	final %i7
893	 * %g5	saved %cwp (so we can get back to the original window)
894	 *
895	 * %o0	pcstack / return value (iteration count)
896	 * %o1	limit / saved %cansave
897	 * %o2	lastfp
898	 * %o3	lastpc
899	 * %o4	saved %canrestore
900	 * %o5	saved %pstate (to restore interrupts)
901	 *
902	 * Note:  The frame pointer returned via lastfp is safe to use as
903	 *	long as getpcstack_top() returns either (0) or a value less
904	 *	than (limit).
905	 */
906	ENTRY_NP(getpcstack_top)
907
908	rdpr	%pstate, %o5
909	andn	%o5, PSTATE_IE, %g1
910	wrpr	%g0, %g1, %pstate	! disable interrupts
911
912	mov	%o0, %g1		! we need the pcstack pointer while
913					! we're visiting other windows
914
915	rdpr	%canrestore, %g2	! number of available windows
916	sub	%g2, 1, %g2		! account for skipped frame
917	cmp	%g2, %o1		! compare with limit
918	movg	%icc, %o1, %g2		! %g2 = min(%canrestore-1, limit)
919
920	brlez,a,pn %g2, 3f		! Use slow path if count <= 0 --
921	clr	%o0			! return zero.
922
923	mov	%g2, %o0		! set up return value
924
925	rdpr	%cwp, %g5		! remember the register window state
926	rdpr	%cansave, %o1		! 'restore' changes, so we can undo
927	rdpr	%canrestore, %o4	! its effects when we finish.
928
929	restore				! skip caller's frame
9301:
931	st	%i7, [%g1]		! stash return address in pcstack
932	restore				! go to the next frame
933	subcc	%g2, 1, %g2		! decrement the count
934	bnz,pt	%icc, 1b		! loop until count reaches 0
935	add	%g1, 4, %g1		! increment pcstack
936
937	mov	%i6, %g3		! copy the final %fp and return PC
938	mov	%i7, %g4		! aside so we can return them to our
939					! caller
940
941	wrpr	%g0, %g5, %cwp		! jump back to the original window
942	wrpr	%g0, %o1, %cansave	! and restore the original register
943	wrpr	%g0, %o4, %canrestore	! window state.
9442:
945	stn	%g3, [%o2]		! store the frame pointer and pc
946	st	%g4, [%o3]		! so our caller can continue the trace
947
948	retl				! return to caller
949	wrpr	%g0, %o5, %pstate	! restore interrupts
950
9513:
952	flushw				! flush register windows, then
953	ldn	[%fp + STACK_BIAS + 14*CLONGSIZE], %g3	! load initial fp
954	ba	2b
955	ldn	[%fp + STACK_BIAS + 15*CLONGSIZE], %g4	! and pc
956	SET_SIZE(getpcstack_top)
957
958	ENTRY_NP(setwstate)
959	retl
960	wrpr	%g0, %o0, %wstate
961	SET_SIZE(setwstate)
962
963
964	ENTRY_NP(getwstate)
965	retl
966	rdpr	%wstate, %o0
967	SET_SIZE(getwstate)
968
969
970/*
971 * int panic_trigger(int *tp)
972 *
973 * A panic trigger is a word which is updated atomically and can only be set
974 * once.  We atomically store 0xFF into the high byte and load the old value.
975 * If the byte was 0xFF, the trigger has already been activated and we fail.
976 * If the previous value was 0 or not 0xFF, we succeed.  This allows a
977 * partially corrupt trigger to still trigger correctly.  DTrace has its own
978 * version of this function to allow it to panic correctly from probe context.
979 */
980
981	ENTRY_NP(panic_trigger)
982	ldstub	[%o0], %o0		! store 0xFF, load byte into %o0
983	cmp	%o0, 0xFF		! compare %o0 to 0xFF
984	set	1, %o1			! %o1 = 1
985	be,a	0f			! if (%o0 == 0xFF) goto 0f (else annul)
986	set	0, %o1			! delay - %o1 = 0
9870:	retl
988	mov	%o1, %o0		! return (%o1);
989	SET_SIZE(panic_trigger)
990
991	ENTRY_NP(dtrace_panic_trigger)
992	ldstub	[%o0], %o0		! store 0xFF, load byte into %o0
993	cmp	%o0, 0xFF		! compare %o0 to 0xFF
994	set	1, %o1			! %o1 = 1
995	be,a	0f			! if (%o0 == 0xFF) goto 0f (else annul)
996	set	0, %o1			! delay - %o1 = 0
9970:	retl
998	mov	%o1, %o0		! return (%o1);
999	SET_SIZE(dtrace_panic_trigger)
1000
1001/*
1002 * void vpanic(const char *format, va_list alist)
1003 *
1004 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
1005 * into the panic code implemented in panicsys().  vpanic() is responsible
1006 * for passing through the format string and arguments, and constructing a
1007 * regs structure on the stack into which it saves the current register
1008 * values.  If we are not dying due to a fatal trap, these registers will
1009 * then be preserved in panicbuf as the current processor state.  Before
1010 * invoking panicsys(), vpanic() activates the first panic trigger (see
1011 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
1012 * DTrace takes a slightly different panic path if it must panic from probe
1013 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
1014 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
1015 * branches back into vpanic().
1016 */
1017
1018	ENTRY_NP(vpanic)
1019
1020	save	%sp, -SA(MINFRAME + REGSIZE), %sp	! save and allocate regs
1021
1022	!
1023	! The v9 struct regs has a 64-bit r_tstate field, which we use here
1024	! to store the %ccr, %asi, %pstate, and %cwp as they would appear
1025	! in %tstate if a trap occurred.  We leave it up to the debugger to
1026	! realize what happened and extract the register values.
1027	!
1028	rd	%ccr, %l0				! %l0 = %ccr
1029	sllx	%l0, TSTATE_CCR_SHIFT, %l0		! %l0 <<= CCR_SHIFT
1030	rd	%asi, %l1				! %l1 = %asi
1031	sllx	%l1, TSTATE_ASI_SHIFT, %l1		! %l1 <<= ASI_SHIFT
1032	or	%l0, %l1, %l0				! %l0 |= %l1
1033	rdpr	%pstate, %l1				! %l1 = %pstate
1034	sllx	%l1, TSTATE_PSTATE_SHIFT, %l1		! %l1 <<= PSTATE_SHIFT
1035	or	%l0, %l1, %l0				! %l0 |= %l1
1036	rdpr	%cwp, %l1				! %l1 = %cwp
1037	sllx	%l1, TSTATE_CWP_SHIFT, %l1		! %l1 <<= CWP_SHIFT
1038	or	%l0, %l1, %l0				! %l0 |= %l1
1039
1040	set	vpanic, %l1				! %l1 = %pc (vpanic)
1041	add	%l1, 4, %l2				! %l2 = %npc (vpanic+4)
1042	rd	%y, %l3					! %l3 = %y
1043	!
1044	! Flush register windows before panic_trigger() in order to avoid a
1045	! problem that a dump hangs if flush_windows() causes another panic.
1046	!
1047	call	flush_windows
1048	nop
1049
1050	sethi	%hi(panic_quiesce), %o0
1051	call	panic_trigger
1052	or	%o0, %lo(panic_quiesce), %o0		! if (!panic_trigger(
1053
1054vpanic_common:
1055	tst	%o0					!     &panic_quiesce))
1056	be	0f					!   goto 0f;
1057	mov	%o0, %l4				!   delay - %l4 = %o0
1058
1059	!
1060	! If panic_trigger() was successful, we are the first to initiate a
1061	! panic: switch to the panic_stack.
1062	!
1063	set	panic_stack, %o0			! %o0 = panic_stack
1064	set	PANICSTKSIZE, %o1			! %o1 = size of stack
1065	add	%o0, %o1, %o0				! %o0 = top of stack
1066
1067	sub	%o0, SA(MINFRAME + REGSIZE) + STACK_BIAS, %sp
1068
1069	!
1070	! Now that we've got everything set up, store each register to its
1071	! designated location in the regs structure allocated on the stack.
1072	! The register set we store is the equivalent of the registers at
1073	! the time the %pc was pointing to vpanic, thus the %i's now contain
1074	! what the %o's contained prior to the save instruction.
1075	!
10760:	stx	%l0, [%sp + STACK_BIAS + SA(MINFRAME) + TSTATE_OFF]
1077	stx	%g1, [%sp + STACK_BIAS + SA(MINFRAME) + G1_OFF]
1078	stx	%g2, [%sp + STACK_BIAS + SA(MINFRAME) + G2_OFF]
1079	stx	%g3, [%sp + STACK_BIAS + SA(MINFRAME) + G3_OFF]
1080	stx	%g4, [%sp + STACK_BIAS + SA(MINFRAME) + G4_OFF]
1081	stx	%g5, [%sp + STACK_BIAS + SA(MINFRAME) + G5_OFF]
1082	stx	%g6, [%sp + STACK_BIAS + SA(MINFRAME) + G6_OFF]
1083	stx	%g7, [%sp + STACK_BIAS + SA(MINFRAME) + G7_OFF]
1084	stx	%i0, [%sp + STACK_BIAS + SA(MINFRAME) + O0_OFF]
1085	stx	%i1, [%sp + STACK_BIAS + SA(MINFRAME) + O1_OFF]
1086	stx	%i2, [%sp + STACK_BIAS + SA(MINFRAME) + O2_OFF]
1087	stx	%i3, [%sp + STACK_BIAS + SA(MINFRAME) + O3_OFF]
1088	stx	%i4, [%sp + STACK_BIAS + SA(MINFRAME) + O4_OFF]
1089	stx	%i5, [%sp + STACK_BIAS + SA(MINFRAME) + O5_OFF]
1090	stx	%i6, [%sp + STACK_BIAS + SA(MINFRAME) + O6_OFF]
1091	stx	%i7, [%sp + STACK_BIAS + SA(MINFRAME) + O7_OFF]
1092	stn	%l1, [%sp + STACK_BIAS + SA(MINFRAME) + PC_OFF]
1093	stn	%l2, [%sp + STACK_BIAS + SA(MINFRAME) + NPC_OFF]
1094	st	%l3, [%sp + STACK_BIAS + SA(MINFRAME) + Y_OFF]
1095
1096	mov	%l4, %o3				! %o3 = on_panic_stack
1097	add	%sp, STACK_BIAS + SA(MINFRAME), %o2	! %o2 = &regs
1098	mov	%i1, %o1				! %o1 = alist
1099	call	panicsys				! panicsys();
1100	mov	%i0, %o0				! %o0 = format
1101	ret
1102	restore
1103
1104	SET_SIZE(vpanic)
1105
1106	ENTRY_NP(dtrace_vpanic)
1107
1108	save	%sp, -SA(MINFRAME + REGSIZE), %sp	! save and allocate regs
1109
1110	!
1111	! The v9 struct regs has a 64-bit r_tstate field, which we use here
1112	! to store the %ccr, %asi, %pstate, and %cwp as they would appear
1113	! in %tstate if a trap occurred.  We leave it up to the debugger to
1114	! realize what happened and extract the register values.
1115	!
1116	rd	%ccr, %l0				! %l0 = %ccr
1117	sllx	%l0, TSTATE_CCR_SHIFT, %l0		! %l0 <<= CCR_SHIFT
1118	rd	%asi, %l1				! %l1 = %asi
1119	sllx	%l1, TSTATE_ASI_SHIFT, %l1		! %l1 <<= ASI_SHIFT
1120	or	%l0, %l1, %l0				! %l0 |= %l1
1121	rdpr	%pstate, %l1				! %l1 = %pstate
1122	sllx	%l1, TSTATE_PSTATE_SHIFT, %l1		! %l1 <<= PSTATE_SHIFT
1123	or	%l0, %l1, %l0				! %l0 |= %l1
1124	rdpr	%cwp, %l1				! %l1 = %cwp
1125	sllx	%l1, TSTATE_CWP_SHIFT, %l1		! %l1 <<= CWP_SHIFT
1126	or	%l0, %l1, %l0				! %l0 |= %l1
1127
1128	set	dtrace_vpanic, %l1			! %l1 = %pc (vpanic)
1129	add	%l1, 4, %l2				! %l2 = %npc (vpanic+4)
1130	rd	%y, %l3					! %l3 = %y
1131	!
1132	! Flush register windows before panic_trigger() in order to avoid a
1133	! problem that a dump hangs if flush_windows() causes another panic.
1134	!
1135	call	dtrace_flush_windows
1136	nop
1137
1138	sethi	%hi(panic_quiesce), %o0
1139	call	dtrace_panic_trigger
1140	or	%o0, %lo(panic_quiesce), %o0		! if (!panic_trigger(
1141
1142	ba,a	vpanic_common
1143	SET_SIZE(dtrace_vpanic)
1144
1145	ENTRY(get_subcc_ccr)
1146	wr	%g0, %ccr	! clear condition codes
1147	subcc	%o0, %o1, %g0
1148	retl
1149	rd	%ccr, %o0	! return condition codes
1150	SET_SIZE(get_subcc_ccr)
1151
1152	ENTRY_NP(ftrace_interrupt_disable)
1153	rdpr	%pstate, %o0
1154	andn	%o0, PSTATE_IE, %o1
1155	retl
1156	wrpr	%g0, %o1, %pstate
1157	SET_SIZE(ftrace_interrupt_disable)
1158
1159	ENTRY_NP(ftrace_interrupt_enable)
1160	retl
1161	wrpr	%g0, %o0, %pstate
1162	SET_SIZE(ftrace_interrupt_enable)
1163
1164