xref: /titanic_50/usr/src/uts/sun4u/cpu/common_asm.s (revision de3d2ce46fc25c7b67ccbae4afe5f15e5357568f)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#if !defined(lint)
27#include "assym.h"
28#endif	/* !lint */
29
30/*
31 * General assembly language routines.
32 * It is the intent of this file to contain routines that are
33 * specific to cpu architecture.
34 */
35
36/*
37 * WARNING: If you add a fast trap handler which can be invoked by a
38 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
39 * instead of "done" instruction to return back to the user mode. See
40 * comments for the "fast_trap_done" entry point for more information.
41 */
42#define	FAST_TRAP_DONE	\
43	ba,a	fast_trap_done
44
45/*
46 * Override GET_NATIVE_TIME for the cpu module code.  This is not
47 * guaranteed to be exactly one instruction, be careful of using
48 * the macro in delay slots.
49 *
50 * Do not use any instruction that modifies condition codes as the
51 * caller may depend on these to remain unchanged across the macro.
52 */
53#if defined(CHEETAH) || defined(OLYMPUS_C)
54
55#define	GET_NATIVE_TIME(out, scr1, scr2) \
56	rd	STICK, out
57#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
58	rd	STICK, reg;		\
59	add	reg, delta, reg;	\
60	wr	reg, STICK
61#define	RD_TICKCMPR(out, scr)		\
62	rd	STICK_COMPARE, out
63#define	WR_TICKCMPR(in, scr1, scr2, label) \
64	wr	in, STICK_COMPARE
65
66#elif defined(HUMMINGBIRD)
67#include <sys/spitregs.h>
68
69/*
70 * the current hummingbird version of %stick and %stick_cmp
71 * were both implemented as (2) 32-bit locations in ASI_IO space;
72 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
73 *
74 * 64-bit opcodes are required, but move only 32-bits:
75 *
76 * ldxa [phys]ASI_IO, %dst 	reads  the low 32-bits from phys into %dst
77 * stxa %src, [phys]ASI_IO 	writes the low 32-bits from %src into phys
78 *
79 * reg equivalent		[phys]ASI_IO
80 * ------------------		---------------
81 * %stick_cmp  low-32		0x1FE.0000.F060
82 * %stick_cmp high-32		0x1FE.0000.F068
83 * %stick      low-32		0x1FE.0000.F070
84 * %stick     high-32		0x1FE.0000.F078
85 */
86#define	HSTC_LOW	0x60			/* stick_cmp low  32-bits */
87#define	HSTC_HIGH	0x68			/* stick_cmp high 32-bits */
88#define	HST_LOW		0x70			/* stick low  32-bits */
89#define	HST_HIGH	0x78			/* stick high 32-bits */
90#define	HST_DIFF	0x08			/* low<-->high diff */
91
92/*
93 * Any change in the number of instructions in SETL41()
94 * will affect SETL41_OFF
95 */
96#define	SETL41(reg, byte) \
97	sethi	%hi(0x1FE00000), reg;		/* 0000.0000.1FE0.0000 */ \
98	or	reg, 0xF, reg;			/* 0000.0000.1FE0.000F */ \
99	sllx	reg, 12, reg;			/* 0000.01FE.0000.F000 */ \
100	or	reg, byte, reg;			/* 0000.01FE.0000.F0xx */
101
102/*
103 * SETL41_OFF is used to calulate the relative PC value when a
104 * branch instruction needs to go over SETL41() macro
105 */
106#define SETL41_OFF  16
107
108/*
109 * reading stick requires 2 loads, and there could be an intervening
110 * low-to-high 32-bit rollover resulting in a return value that is
111 * off by about (2 ^ 32); this rare case is prevented by re-reading
112 * the low-32 bits after the high-32 and verifying the "after" value
113 * is >= the "before" value; if not, increment the high-32 value.
114 *
115 * this method is limited to 1 rollover, and based on the fixed
116 * stick-frequency (5555555), requires the loads to complete within
117 * 773 seconds; incrementing the high-32 value will not overflow for
118 * about 52644 years.
119 *
120 * writing stick requires 2 stores; if the old/new low-32 value is
121 * near 0xffffffff, there could be another rollover (also rare).
122 * to prevent this, we first write a 0 to the low-32, then write
123 * new values to the high-32 then the low-32.
124 *
125 * When we detect a carry in the lower %stick register, we need to
126 * read HST_HIGH again. However at the point where we detect this,
127 * we need to rebuild the register address HST_HIGH.This involves more
128 * than one instructions and a branch is unavoidable. However, most of
129 * the time, there is no carry. So we take the penalty of a branch
130 * instruction only when there is carry (less frequent).
131 *
132 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
133 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
134 * addr already points to HST_LOW.
135 *
136 * NOTE: this method requires disabling interrupts before using
137 * DELTA_NATIVE_TIME.
138 */
139#define	GET_NATIVE_TIME(out, scr, tmp)	\
140	SETL41(scr, HST_LOW);		\
141	ldxa	[scr]ASI_IO, tmp;	\
142	inc	HST_DIFF, scr;		\
143	ldxa	[scr]ASI_IO, out;	\
144	dec	HST_DIFF, scr;		\
145	ldxa	[scr]ASI_IO, scr;	\
146	sub	scr, tmp, tmp;		\
147	brlz,pn tmp, .-(SETL41_OFF+24); \
148	sllx	out, 32, out;		\
149	or	out, scr, out
150#define	DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
151	SETL41(addr, HST_LOW);		\
152	ldxa	[addr]ASI_IO, tmp;	\
153	inc	HST_DIFF, addr;		\
154	ldxa	[addr]ASI_IO, high;	\
155	dec	HST_DIFF, addr;		\
156	ldxa	[addr]ASI_IO, low;	\
157	sub	low, tmp, tmp;		\
158	brlz,pn tmp, .-24;		\
159	sllx	high, 32, high;		\
160	or	high, low, high;	\
161	add	high, delta, high;	\
162	srl	high, 0, low;		\
163	srlx	high, 32, high;		\
164	stxa	%g0, [addr]ASI_IO;	\
165	inc	HST_DIFF, addr;		\
166	stxa	high, [addr]ASI_IO;	\
167	dec	HST_DIFF, addr;		\
168	stxa	low, [addr]ASI_IO
169#define RD_TICKCMPR(out, scr)		\
170	SETL41(scr, HSTC_LOW);		\
171	ldxa	[scr]ASI_IO, out;	\
172	inc	HST_DIFF, scr;		\
173	ldxa	[scr]ASI_IO, scr;	\
174	sllx	scr, 32, scr;		\
175	or	scr, out, out
176#define WR_TICKCMPR(in, scra, scrd, label) \
177	SETL41(scra, HSTC_HIGH);	\
178	srlx	in, 32, scrd;		\
179	stxa	scrd, [scra]ASI_IO;	\
180	dec	HST_DIFF, scra;		\
181	stxa	in, [scra]ASI_IO
182
183#else	/* !CHEETAH && !HUMMINGBIRD */
184
185#define	GET_NATIVE_TIME(out, scr1, scr2) \
186	rdpr	%tick, out
187#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
188	rdpr	%tick, reg;		\
189	add	reg, delta, reg;	\
190	wrpr	reg, %tick
191#define	RD_TICKCMPR(out, scr)		\
192	rd	TICK_COMPARE, out
193#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
194/*
195 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
196 * The failure occurs only when the following instruction decodes to wr or
197 * wrpr.  The workaround is to immediately follow writes to TICK_COMPARE
198 * with a read, thus stalling the pipe and keeping following instructions
199 * from causing data corruption.  Aligning to a quadword will ensure these
200 * two instructions are not split due to i$ misses.
201 */
202#define WR_TICKCMPR(cmpr,scr1,scr2,label)	\
203	ba,a	.bb_errata_1.label		;\
204	.align	64				;\
205.bb_errata_1.label:				;\
206	wr	cmpr, TICK_COMPARE		;\
207	rd	TICK_COMPARE, %g0
208#else	/* BB_ERRATA_1 */
209#define	WR_TICKCMPR(in,scr1,scr2,label)		\
210	wr	in, TICK_COMPARE
211#endif	/* BB_ERRATA_1 */
212
213#endif	/* !CHEETAH && !HUMMINGBIRD */
214
215#include <sys/clock.h>
216
217#if defined(lint)
218#include <sys/types.h>
219#include <sys/scb.h>
220#include <sys/systm.h>
221#include <sys/regset.h>
222#include <sys/sunddi.h>
223#include <sys/lockstat.h>
224#endif	/* lint */
225
226
227#include <sys/asm_linkage.h>
228#include <sys/privregs.h>
229#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
230#include <sys/machthread.h>
231#include <sys/clock.h>
232#include <sys/intreg.h>
233#include <sys/psr_compat.h>
234#include <sys/isa_defs.h>
235#include <sys/dditypes.h>
236#include <sys/intr.h>
237
238#if !defined(lint)
239#include "assym.h"
240#endif	/* !lint */
241
242#if defined(lint)
243
244uint_t
245get_impl(void)
246{ return (0); }
247
248#else	/* lint */
249
250	ENTRY(get_impl)
251	GET_CPU_IMPL(%o0)
252	retl
253	nop
254	SET_SIZE(get_impl)
255
256#endif	/* lint */
257
258#if defined(lint)
259/*
260 * Softint generated when counter field of tick reg matches value field
261 * of tick_cmpr reg
262 */
263/*ARGSUSED*/
264void
265tickcmpr_set(uint64_t clock_cycles)
266{}
267
268#else	/* lint */
269
270	ENTRY_NP(tickcmpr_set)
271	! get 64-bit clock_cycles interval
272	mov	%o0, %o2
273	mov	8, %o3			! A reasonable initial step size
2741:
275	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
276
277	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
278	sllx	%o0, 1, %o0		!   value we wrote was in the future.
279	srlx	%o0, 1, %o0
280
281	cmp	%o2, %o0		! If the value we wrote was in the
282	bg,pt	%xcc, 2f		!   future, then blow out of here.
283	sllx	%o3, 1, %o3		! If not, then double our step size,
284	ba,pt	%xcc, 1b		!   and take another lap.
285	add	%o0, %o3, %o2		!
2862:
287	retl
288	nop
289	SET_SIZE(tickcmpr_set)
290
291#endif	/* lint */
292
293#if defined(lint)
294
295void
296tickcmpr_disable(void)
297{}
298
299#else	/* lint */
300
301	ENTRY_NP(tickcmpr_disable)
302	mov	1, %g1
303	sllx	%g1, TICKINT_DIS_SHFT, %o0
304	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
305	retl
306	nop
307	SET_SIZE(tickcmpr_disable)
308
309#endif	/* lint */
310
311#if defined(lint)
312
313/*
314 * tick_write_delta() increments %tick by the specified delta.  This should
315 * only be called after a CPR event to assure that gethrtime() continues to
316 * increase monotonically.  Obviously, writing %tick needs to de done very
317 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
318 * this reason, we make sure we're i-cache hot before actually writing to
319 * %tick.
320 */
321/*ARGSUSED*/
322void
323tick_write_delta(uint64_t delta)
324{}
325
326#else	/* lint */
327
328#ifdef DEBUG
329	.seg	".text"
330tick_write_panic:
331	.asciz	"tick_write_delta: interrupts already disabled on entry"
332#endif	/* DEBUG */
333
334	ENTRY_NP(tick_write_delta)
335	rdpr	%pstate, %g1
336#ifdef DEBUG
337	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
338	bnz	0f			! aren't already disabled.
339	sethi	%hi(tick_write_panic), %o1
340        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
341	call	panic
342	or	%i1, %lo(tick_write_panic), %o0
343#endif	/* DEBUG */
3440:	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
345	mov	%o0, %o2
346	ba	0f			! Branch to cache line-aligned instr.
347	nop
348	.align	16
3490:	nop				! The next 3 instructions are now hot.
350	DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2)	! read/inc/write %tick
351
352	retl				! Return
353	wrpr	%g0, %g1, %pstate	!     delay: Re-enable interrupts
354#endif	/* lint */
355
356#if defined(lint)
357/*
358 *  return 1 if disabled
359 */
360
361int
362tickcmpr_disabled(void)
363{ return (0); }
364
365#else	/* lint */
366
367	ENTRY_NP(tickcmpr_disabled)
368	RD_TICKCMPR(%g1, %o0)
369	retl
370	srlx	%g1, TICKINT_DIS_SHFT, %o0
371	SET_SIZE(tickcmpr_disabled)
372
373#endif	/* lint */
374
375/*
376 * Get current tick
377 */
378#if defined(lint)
379
380u_longlong_t
381gettick(void)
382{ return (0); }
383
384#else	/* lint */
385
386	ENTRY(gettick)
387	GET_NATIVE_TIME(%o0, %o2, %o3)
388	retl
389	nop
390	SET_SIZE(gettick)
391
392#endif	/* lint */
393
394
395/*
396 * Return the counter portion of the tick register.
397 */
398
399#if defined(lint)
400
401uint64_t
402gettick_counter(void)
403{ return(0); }
404
405#else	/* lint */
406
407	ENTRY_NP(gettick_counter)
408	rdpr	%tick, %o0
409	sllx	%o0, 1, %o0
410	retl
411	srlx	%o0, 1, %o0		! shake off npt bit
412	SET_SIZE(gettick_counter)
413#endif	/* lint */
414
415/*
416 * Provide a C callable interface to the trap that reads the hi-res timer.
417 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
418 */
419
420#if defined(lint)
421
422hrtime_t
423gethrtime(void)
424{
425	return ((hrtime_t)0);
426}
427
428hrtime_t
429gethrtime_unscaled(void)
430{
431	return ((hrtime_t)0);
432}
433
434hrtime_t
435gethrtime_max(void)
436{
437	return ((hrtime_t)0);
438}
439
440void
441scalehrtime(hrtime_t *hrt)
442{
443	*hrt = 0;
444}
445
446void
447gethrestime(timespec_t *tp)
448{
449	tp->tv_sec = 0;
450	tp->tv_nsec = 0;
451}
452
453time_t
454gethrestime_sec(void)
455{
456	return (0);
457}
458
459void
460gethrestime_lasttick(timespec_t *tp)
461{
462	tp->tv_sec = 0;
463	tp->tv_nsec = 0;
464}
465
466/*ARGSUSED*/
467void
468hres_tick(void)
469{
470}
471
472void
473panic_hres_tick(void)
474{
475}
476
477#else	/* lint */
478
479	ENTRY_NP(gethrtime)
480	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
481							! %g1 = hrtime
482	retl
483	mov	%g1, %o0
484	SET_SIZE(gethrtime)
485
486	ENTRY_NP(gethrtime_unscaled)
487	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
488	retl
489	mov	%g1, %o0
490	SET_SIZE(gethrtime_unscaled)
491
492	ENTRY_NP(gethrtime_waitfree)
493	ALTENTRY(dtrace_gethrtime)
494	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
495	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
496	retl
497	mov	%g1, %o0
498	SET_SIZE(dtrace_gethrtime)
499	SET_SIZE(gethrtime_waitfree)
500
501	ENTRY(gethrtime_max)
502	NATIVE_TIME_MAX(%g1)
503	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
504
505	! hrtime_t's are signed, max hrtime_t must be positive
506	mov	-1, %o2
507	brlz,a	%g1, 1f
508	srlx	%o2, 1, %g1
5091:
510	retl
511	mov	%g1, %o0
512	SET_SIZE(gethrtime_max)
513
514	ENTRY(scalehrtime)
515	ldx	[%o0], %o1
516	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
517	retl
518	stx	%o1, [%o0]
519	SET_SIZE(scalehrtime)
520
521/*
522 * Fast trap to return a timestamp, uses trap window, leaves traps
523 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
524 *
525 * This is the handler for the ST_GETHRTIME trap.
526 */
527
528	ENTRY_NP(get_timestamp)
529	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
530	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
531	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
532	FAST_TRAP_DONE
533	SET_SIZE(get_timestamp)
534
535/*
536 * Macro to convert GET_HRESTIME() bits into a timestamp.
537 *
538 * We use two separate macros so that the platform-dependent GET_HRESTIME()
539 * can be as small as possible; CONV_HRESTIME() implements the generic part.
540 */
541#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
542	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
543	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
544	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
545	srlx	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
546	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
547	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
548	ba	3f;			/* go convert to sec/nsec */	\
549	add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
5502:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
551	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
552	add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
553	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
5543:	cmp	hrestnsec, nano;	/* more than a billion? */	\
555	bl,pt	%xcc, 4f;		/* if not, we're done */	\
556	nop;				/* delay: do nothing :( */	\
557	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
558	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
559	ba,a	3b;			/* check >= billion again */	\
5604:
561
562	ENTRY_NP(gethrestime)
563	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
564	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
565	stn	%o1, [%o0]
566	retl
567	stn	%o2, [%o0 + CLONGSIZE]
568	SET_SIZE(gethrestime)
569
570/*
571 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
572 * seconds.
573 */
574	ENTRY_NP(gethrestime_sec)
575	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
576	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
577	retl					! %o0 current hrestime seconds
578	nop
579	SET_SIZE(gethrestime_sec)
580
581/*
582 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
583 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
584 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
585 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
586 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
587 * it explicitly.)
588 */
589	ENTRY_NP(gethrestime_lasttick)
590	sethi	%hi(hres_lock), %o1
5910:
592	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
593	membar	#LoadLoad			! Load of lock must complete
594	andn	%o2, 1, %o2			! Mask off lowest bit
595	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
596	add	%o1, %lo(hrestime), %o4
597	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
598	membar	#LoadLoad			! All loads must complete
599	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
600	cmp	%o3, %o2			! If lock is locked or has
601	bne	0b				!   changed, retry.
602	stn	%g1, [%o0]			! Delay: store seconds
603	retl
604	stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
605	SET_SIZE(gethrestime_lasttick)
606
607/*
608 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
609 *
610 * This is the handler for the ST_GETHRESTIME trap.
611 */
612
613	ENTRY_NP(get_hrestime)
614	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
615	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
616	FAST_TRAP_DONE
617	SET_SIZE(get_hrestime)
618
619/*
620 * Fast trap to return lwp virtual time, uses trap window, leaves traps
621 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
622 * of nanoseconds consumed.
623 *
624 * This is the handler for the ST_GETHRVTIME trap.
625 *
626 * Register usage:
627 *	%o0, %o1 = return lwp virtual time
628 * 	%o2 = CPU/thread
629 * 	%o3 = lwp
630 * 	%g1 = scratch
631 * 	%g5 = scratch
632 */
633	ENTRY_NP(get_virtime)
634	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
635	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
636	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
637	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
638
639	/*
640	 * Subtract start time of current microstate from time
641	 * of day to get increment for lwp virtual time.
642	 */
643	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
644	sub	%g5, %g1, %g5
645
646	/*
647	 * Add current value of ms_acct[LMS_USER]
648	 */
649	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
650	add	%g5, %g1, %g5
651	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
652
653	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
654	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
655
656	FAST_TRAP_DONE
657	SET_SIZE(get_virtime)
658
659
660
661	.seg	".text"
662hrtime_base_panic:
663	.asciz	"hrtime_base stepping back"
664
665
666	ENTRY_NP(hres_tick)
667	save	%sp, -SA(MINFRAME), %sp	! get a new window
668
669	sethi	%hi(hrestime), %l4
670	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
6717:	tst	%l5
672	bz,pt	%xcc, 8f			! if we got it, drive on
673	ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
674	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
6759:	tst	%l5
676	bz,a,pn	%xcc, 7b
677	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
678	ba,pt	%xcc, 9b
679	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
6808:
681	membar	#StoreLoad|#StoreStore
682
683	!
684	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
685	!
686	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
687	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
688	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
689	! convert native time to nsecs
690	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
691
692	sub	%l0, %g1, %i1			! get accurate nsec delta
693
694	ldx	[%l4 + %lo(hrtime_base)], %l1
695	cmp	%l1, %l0
696	bg,pn	%xcc, 9f
697	nop
698
699	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
700
701	!
702	! apply adjustment, if any
703	!
704	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
705	brz	%l0, 2f
706						! hrestime_adj == 0 ?
707						! yes, skip adjustments
708	clr	%l5				! delay: set adj to zero
709	tst	%l0				! is hrestime_adj >= 0 ?
710	bge,pt	%xcc, 1f			! yes, go handle positive case
711	srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
712
713	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
714	bl,pt	%xcc, 2f			! yes, use current adj
715	neg	%l5				! delay: %l5 = -adj
716	ba,pt	%xcc, 2f
717	mov	%l0, %l5			! no, so set adj = hrestime_adj
7181:
719	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
720	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
721	mov	%l0, %l5			! delay: adj = hrestime_adj
7222:
723	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
724	sub	%l0, %l5, %l0			! timedelta -= adj
725
726	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
727	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
728
729	or	%l4, %lo(hrestime), %l2
730	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
731	ldn	[%l2 + CLONGSIZE], %i3
732	add	%i3, %l5, %i3			! hrestime.nsec += adj
733	add	%i3, %i1, %i3			! hrestime.nsec += nslt
734
735	set	NANOSEC, %l5			! %l5 = NANOSEC
736	cmp	%i3, %l5
737	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
738	sethi	%hi(one_sec), %i1		! delay
739	add	%i2, 0x1, %i2			! hrestime.tv_sec++
740	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
741	mov	0x1, %l5
742	st	%l5, [%i1 + %lo(one_sec)]
7435:
744	stn	%i2, [%l2]
745	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
746
747	membar	#StoreStore
748
749	ld	[%l4 + %lo(hres_lock)], %i1
750	inc	%i1				! release lock
751	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
752
753	ret
754	restore
755
7569:
757	!
758	! release hres_lock
759	!
760	ld	[%l4 + %lo(hres_lock)], %i1
761	inc	%i1
762	st	%i1, [%l4 + %lo(hres_lock)]
763
764	sethi	%hi(hrtime_base_panic), %o0
765	call	panic
766	or	%o0, %lo(hrtime_base_panic), %o0
767
768	SET_SIZE(hres_tick)
769
770#endif	/* lint */
771
772#if !defined(lint) && !defined(__lint)
773
774	.seg	".text"
775kstat_q_panic_msg:
776	.asciz	"kstat_q_exit: qlen == 0"
777
778	ENTRY(kstat_q_panic)
779	save	%sp, -SA(MINFRAME), %sp
780	sethi	%hi(kstat_q_panic_msg), %o0
781	call	panic
782	or	%o0, %lo(kstat_q_panic_msg), %o0
783	/*NOTREACHED*/
784	SET_SIZE(kstat_q_panic)
785
786#define	BRZPN	brz,pn
787#define	BRZPT	brz,pt
788
789#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
790	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
791	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
792	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
793	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
794	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
795	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
796	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
797	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
798	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
799	add	%o4, %o2, %o4;			/* %o4 = new time */	\
800	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
801	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
802	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
803QRETURN;								\
804	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
805
806	.align 16
807	ENTRY(kstat_waitq_enter)
808	GET_NATIVE_TIME(%g1, %g2, %g3)
809	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
810	SET_SIZE(kstat_waitq_enter)
811
812	.align 16
813	ENTRY(kstat_waitq_exit)
814	GET_NATIVE_TIME(%g1, %g2, %g3)
815	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
816	SET_SIZE(kstat_waitq_exit)
817
818	.align 16
819	ENTRY(kstat_runq_enter)
820	GET_NATIVE_TIME(%g1, %g2, %g3)
821	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
822	SET_SIZE(kstat_runq_enter)
823
824	.align 16
825	ENTRY(kstat_runq_exit)
826	GET_NATIVE_TIME(%g1, %g2, %g3)
827	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
828	SET_SIZE(kstat_runq_exit)
829
830	.align 16
831	ENTRY(kstat_waitq_to_runq)
832	GET_NATIVE_TIME(%g1, %g2, %g3)
833	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
834	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
835	SET_SIZE(kstat_waitq_to_runq)
836
837	.align 16
838	ENTRY(kstat_runq_back_to_waitq)
839	GET_NATIVE_TIME(%g1, %g2, %g3)
840	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
841	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
842	SET_SIZE(kstat_runq_back_to_waitq)
843
844#endif	/* !(lint || __lint) */
845
846#ifdef lint
847
848int64_t timedelta;
849hrtime_t hres_last_tick;
850volatile timestruc_t hrestime;
851int64_t hrestime_adj;
852volatile int hres_lock;
853uint_t nsec_scale;
854hrtime_t hrtime_base;
855int traptrace_use_stick;
856
857#else	/* lint */
858	/*
859	 *  -- WARNING --
860	 *
861	 * The following variables MUST be together on a 128-byte boundary.
862	 * In addition to the primary performance motivation (having them all
863	 * on the same cache line(s)), code here and in the GET*TIME() macros
864	 * assumes that they all have the same high 22 address bits (so
865	 * there's only one sethi).
866	 */
867	.seg	".data"
868	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
869	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
870	.global	nsec_shift, adj_shift
871
872	/* XXX - above comment claims 128-bytes is necessary */
873	.align	64
874timedelta:
875	.word	0, 0		/* int64_t */
876hres_last_tick:
877	.word	0, 0		/* hrtime_t */
878hrestime:
879	.nword	0, 0		/* 2 longs */
880hrestime_adj:
881	.word	0, 0		/* int64_t */
882hres_lock:
883	.word	0
884nsec_scale:
885	.word	0
886hrtime_base:
887	.word	0, 0
888traptrace_use_stick:
889	.word	0
890nsec_shift:
891	.word	NSEC_SHIFT
892adj_shift:
893	.word	ADJ_SHIFT
894
895#endif	/* lint */
896
897
898/*
899 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
900 * usec_delay(int n)		[compatibility - should go one day]
901 * Delay by spinning.
902 *
903 * delay for n microseconds.  numbers <= 0 delay 1 usec
904 *
905 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
906 * and variable clock rate for power management requires that we
907 * use %stick to implement this routine.
908 *
909 * For OPL platforms that support the "sleep" instruction, we
910 * conditionally (ifdef'ed) insert a "sleep" instruction in
911 * the loop. Note that theoritically we should have move (duplicated)
912 * the code down to spitfire/us3/opl specific asm files - but this
913 * is alot of code duplication just to add one "sleep" instruction.
914 * We chose less code duplication for this.
915 */
916
917#if defined(lint)
918
919/*ARGSUSED*/
920void
921drv_usecwait(clock_t n)
922{}
923
924/*ARGSUSED*/
925void
926usec_delay(int n)
927{}
928
929#else	/* lint */
930
931	ENTRY(drv_usecwait)
932	ALTENTRY(usec_delay)
933	brlez,a,pn %o0, 0f
934	mov	1, %o0
9350:
936	sethi	%hi(sticks_per_usec), %o1
937	lduw	[%o1 + %lo(sticks_per_usec)], %o1
938	mulx	%o1, %o0, %o1		! Scale usec to ticks
939	inc	%o1			! We don't start on a tick edge
940	GET_NATIVE_TIME(%o2, %o3, %o4)
941	add	%o1, %o2, %o1
942
9431:
944#ifdef	_OPL
945	.word 0x81b01060		! insert "sleep" instruction
946#endif /* _OPL */			! use byte code for now
947	cmp	%o1, %o2
948	GET_NATIVE_TIME(%o2, %o3, %o4)
949	bgeu,pt	%xcc, 1b
950	nop
951	retl
952	nop
953	SET_SIZE(usec_delay)
954	SET_SIZE(drv_usecwait)
955#endif	/* lint */
956
957#if defined(lint)
958
959/* ARGSUSED */
960void
961pil14_interrupt(int level)
962{}
963
964#else	/* lint */
965
966/*
967 * Level-14 interrupt prologue.
968 */
969	ENTRY_NP(pil14_interrupt)
970	CPU_ADDR(%g1, %g2)
971	rdpr	%pil, %g6			! %g6 = interrupted PIL
972	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
973	rdpr	%tstate, %g6
974	rdpr	%tpc, %g5
975	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
976	bnz,a,pt %xcc, 1f
977	stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
978	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
979	ba	pil_interrupt_common		! must be large-disp branch
980	stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
9811:	ba	pil_interrupt_common		! must be large-disp branch
982	stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
983	SET_SIZE(pil14_interrupt)
984
985	ENTRY_NP(tick_rtt)
986	!
987	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
988	! disabled.  If TICK_COMPARE is enabled, we know that we need to
989	! reenqueue the interrupt request structure.  We'll then check TICKINT
990	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
991	! interrupt.  In this case, TICK_COMPARE may have been rewritten
992	! recently; we'll compare %o5 to the current time to verify that it's
993	! in the future.
994	!
995	! Note that %o5 is live until after 1f.
996	! XXX - there is a subroutine call while %o5 is live!
997	!
998	RD_TICKCMPR(%o5, %g1)
999	srlx	%o5, TICKINT_DIS_SHFT, %g1
1000	brnz,pt	%g1, 2f
1001	nop
1002
1003	rdpr 	%pstate, %g5
1004	andn	%g5, PSTATE_IE, %g1
1005	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
1006
1007	sethi	%hi(cbe_level14_inum), %o1
1008	ldx	[%o1 + %lo(cbe_level14_inum)], %o1
1009	call	intr_enqueue_req ! preserves %o5 and %g5
1010	mov	PIL_14, %o0
1011
1012	! Check SOFTINT for TICKINT/STICKINT
1013	rd	SOFTINT, %o4
1014	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
1015	andcc	%o4, %o0, %g0
1016	bz,a,pn	%icc, 2f
1017	wrpr	%g0, %g5, %pstate		! Enable vec interrupts
1018
1019	! clear TICKINT/STICKINT
1020	wr	%o0, CLEAR_SOFTINT
1021
1022	!
1023	! Now that we've cleared TICKINT, we can reread %tick and confirm
1024	! that the value we programmed is still in the future.  If it isn't,
1025	! we need to reprogram TICK_COMPARE to fire as soon as possible.
1026	!
1027	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
1028	sllx	%o0, 1, %o0			! Clear the DIS bit
1029	srlx	%o0, 1, %o0
1030	cmp	%o5, %o0			! In the future?
1031	bg,a,pt	%xcc, 2f			! Yes, drive on.
1032	wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
1033
1034	!
1035	! If we're here, then we have programmed TICK_COMPARE with a %tick
1036	! which is in the past; we'll now load an initial step size, and loop
1037	! until we've managed to program TICK_COMPARE to fire in the future.
1038	!
1039	mov	8, %o4				! 8 = arbitrary inital step
10401:	add	%o0, %o4, %o5			! Add the step
1041	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
1042	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
1043	sllx	%o0, 1, %o0			! Clear the DIS bit
1044	srlx	%o0, 1, %o0
1045	cmp	%o5, %o0			! In the future?
1046	bg,a,pt	%xcc, 2f			! Yes, drive on.
1047	wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
1048	ba	1b				! No, try again.
1049	sllx	%o4, 1, %o4			!    delay: double step size
1050
10512:	ba	current_thread_complete
1052	nop
1053	SET_SIZE(tick_rtt)
1054
1055#endif	/* lint */
1056
1057#if defined(lint)
1058
1059/* ARGSUSED */
1060void
1061pil15_interrupt(int level)
1062{}
1063
1064#else  /* lint */
1065
1066/*
1067 * Level-15 interrupt prologue.
1068 */
1069       ENTRY_NP(pil15_interrupt)
1070       CPU_ADDR(%g1, %g2)
1071       rdpr    %tstate, %g6
1072       rdpr    %tpc, %g5
1073       btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
1074       bnz,a,pt %xcc, 1f
1075       stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
1076       stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
1077       ba      pil15_epilogue                  ! must be large-disp branch
1078       stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
10791:     ba      pil15_epilogue                  ! must be large-disp branch
1080       stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
1081       SET_SIZE(pil15_interrupt)
1082
1083#endif /* lint */
1084
1085#if defined(lint) || defined(__lint)
1086
1087/* ARGSUSED */
1088uint64_t
1089find_cpufrequency(volatile uchar_t *clock_ptr)
1090{
1091	return (0);
1092}
1093
1094#else	/* lint */
1095
1096#ifdef DEBUG
1097	.seg	".text"
1098find_cpufreq_panic:
1099	.asciz	"find_cpufrequency: interrupts already disabled on entry"
1100#endif	/* DEBUG */
1101
1102	ENTRY_NP(find_cpufrequency)
1103	rdpr	%pstate, %g1
1104
1105#ifdef DEBUG
1106	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
1107	bnz	0f			! are currently enabled
1108	sethi	%hi(find_cpufreq_panic), %o1
1109	call	panic
1110	or	%o1, %lo(find_cpufreq_panic), %o0
1111#endif	/* DEBUG */
1112
11130:
1114	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
11153:
1116	ldub	[%o0], %o1		! Read the number of seconds
1117	mov	%o1, %o2		! remember initial value in %o2
11181:
1119	GET_NATIVE_TIME(%o3, %g4, %g5)
1120	cmp	%o1, %o2		! did the seconds register roll over?
1121	be,pt	%icc, 1b		! branch back if unchanged
1122	ldub	[%o0], %o2		!   delay: load the new seconds val
1123
1124	brz,pn	%o2, 3b			! if the minutes just rolled over,
1125					! the last second could have been
1126					! inaccurate; try again.
1127	mov	%o2, %o4		!   delay: store init. val. in %o2
11282:
1129	GET_NATIVE_TIME(%o5, %g4, %g5)
1130	cmp	%o2, %o4		! did the seconds register roll over?
1131	be,pt	%icc, 2b		! branch back if unchanged
1132	ldub	[%o0], %o4		!   delay: load the new seconds val
1133
1134	brz,pn	%o4, 0b			! if the minutes just rolled over,
1135					! the last second could have been
1136					! inaccurate; try again.
1137	wrpr	%g0, %g1, %pstate	!   delay: re-enable interrupts
1138
1139	retl
1140	sub	%o5, %o3, %o0		! return the difference in ticks
1141	SET_SIZE(find_cpufrequency)
1142
1143#endif	/* lint */
1144
1145#if defined(lint)
1146/*
1147 * Prefetch a page_t for write or read, this assumes a linear
1148 * scan of sequential page_t's.
1149 */
1150/*ARGSUSED*/
1151void
1152prefetch_page_w(void *pp)
1153{}
1154
1155/*ARGSUSED*/
1156void
1157prefetch_page_r(void *pp)
1158{}
1159#else	/* lint */
1160
1161#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1162	defined(SERRANO)
1163	!
1164	! On US-III, the prefetch instruction queue is 8 entries deep.
1165	! Also, prefetches for write put data in the E$, which has
1166	! lines of 512 bytes for an 8MB cache. Each E$ line is further
1167	! subblocked into 64 byte chunks.
1168	!
1169	! Since prefetch can only bring in 64 bytes at a time (See Sparc
1170	! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1171	! then 2 prefetches are required in order to bring an entire
1172	! page into the E$.
1173	!
1174	! Since the prefetch queue is 8 entries deep, we currently can
1175	! only have 4 prefetches for page_t's outstanding. Thus, we
1176	! prefetch n+4 ahead of where we are now:
1177	!
1178	!      4 * sizeof(page_t)     -> 512
1179	!      4 * sizeof(page_t) +64 -> 576
1180	!
1181	! Example
1182	! =======
1183	! contiguous page array in memory...
1184	!
1185	! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1186	! ^         ^         ^         ^         ^    ^
1187	! pp                                      |    pp+4*sizeof(page)+64
1188	!                                         |
1189	!                                         pp+4*sizeof(page)
1190	!
1191	!  Prefetch
1192	!   Queue
1193	! +-------+<--- In this iteration, we're working with pp (AAA1),
1194	! |Preftch|     but we enqueue prefetch for addr = XXX1
1195	! | XXX1  |
1196	! +-------+<--- this queue slot will be a prefetch instruction for
1197	! |Preftch|     for addr = pp + 4*sizeof(page_t) + 64 (or second
1198	! | XXX2  |     half of page XXX)
1199	! +-------+
1200	! |Preftch|<-+- The next time around this function, we'll be
1201	! | YYY1  |  |  working with pp = BBB1, but will be enqueueing
1202	! +-------+  |  prefetches to for both halves of page YYY,
1203	! |Preftch|  |  while both halves of page XXX are in transit
1204	! | YYY2  |<-+  make their way into the E$.
1205	! +-------+
1206	! |Preftch|
1207	! | ZZZ1  |
1208	! +-------+
1209	! .       .
1210	! :       :
1211	!
1212	!  E$
1213	! +============================================...
1214	! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1215	! +============================================...
1216	! |      |      |      |      |      |      |
1217	! +============================================...
1218	! .
1219	! :
1220	!
1221	! So we should expect the first four page accesses to stall
1222	! while we warm up the cache, afterwhich, most of the pages
1223	! will have their pp ready in the E$.
1224	!
1225	! Also note that if sizeof(page_t) grows beyond 128, then
1226	! we'll need an additional prefetch to get an entire page
1227	! into the E$, thus reducing the number of outstanding page
1228	! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1229	! etc.
1230	!
1231	! Cheetah+
1232	! ========
1233	! On Cheetah+ we use "#n_write" prefetches as these avoid
1234	! unnecessary RTS->RTO bus transaction state change, and
1235	! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1236	! PRM). On Cheetah, #n_write prefetches are reflected with
1237	! RTS->RTO state transition regardless.
1238	!
1239#define STRIDE1 512
1240#define STRIDE2 576
1241
1242#if	STRIDE1 != (PAGE_SIZE * 4)
1243#error	"STRIDE1 != (PAGE_SIZE * 4)"
1244#endif	/* STRIDE1 != (PAGE_SIZE * 4) */
1245
1246        ENTRY(prefetch_page_w)
1247        prefetch        [%o0+STRIDE1], #n_writes
1248        retl
1249        prefetch        [%o0+STRIDE2], #n_writes
1250        SET_SIZE(prefetch_page_w)
1251
1252	!
1253	! Note on CHEETAH to prefetch for read, we really use #one_write.
1254	! This fetches to E$ (general use) rather than P$ (floating point use).
1255	!
1256        ENTRY(prefetch_page_r)
1257        prefetch        [%o0+STRIDE1], #one_write
1258        retl
1259        prefetch        [%o0+STRIDE2], #one_write
1260        SET_SIZE(prefetch_page_r)
1261
1262#elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1263
1264	!
1265	! UltraSparcII can have up to 3 prefetches outstanding.
1266	! A page_t is 128 bytes (2 prefetches of 64 bytes each)
1267	! So prefetch for pp + 1, which is
1268	!
1269	!       pp + sizeof(page_t)
1270	! and
1271	!       pp + sizeof(page_t) + 64
1272	!
1273#define STRIDE1	128
1274#define STRIDE2	192
1275
1276#if	STRIDE1 != PAGE_SIZE
1277#error	"STRIDE1 != PAGE_SIZE"
1278#endif	/* STRIDE1 != PAGE_SIZE */
1279
1280        ENTRY(prefetch_page_w)
1281        prefetch        [%o0+STRIDE1], #n_writes
1282        retl
1283        prefetch        [%o0+STRIDE2], #n_writes
1284        SET_SIZE(prefetch_page_w)
1285
1286        ENTRY(prefetch_page_r)
1287        prefetch        [%o0+STRIDE1], #n_reads
1288        retl
1289        prefetch        [%o0+STRIDE2], #n_reads
1290        SET_SIZE(prefetch_page_r)
1291
1292#elif defined(OLYMPUS_C)
1293	!
1294	! Prefetch strides for Olympus-C
1295	!
1296
1297#define STRIDE1	0x440
1298#define STRIDE2	0x640
1299
1300	ENTRY(prefetch_page_w)
1301        prefetch        [%o0+STRIDE1], #n_writes
1302	retl
1303        prefetch        [%o0+STRIDE2], #n_writes
1304	SET_SIZE(prefetch_page_w)
1305
1306	ENTRY(prefetch_page_r)
1307        prefetch        [%o0+STRIDE1], #n_writes
1308	retl
1309        prefetch        [%o0+STRIDE2], #n_writes
1310	SET_SIZE(prefetch_page_r)
1311#else	/* OLYMPUS_C */
1312
1313#error "You need to fix this for your new cpu type."
1314
1315#endif	/* OLYMPUS_C */
1316
1317#endif	/* lint */
1318
1319#if defined(lint)
1320/*
1321 * Prefetch struct smap for write.
1322 */
1323/*ARGSUSED*/
1324void
1325prefetch_smap_w(void *smp)
1326{}
1327#else	/* lint */
1328
1329#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1330	defined(SERRANO)
1331
1332#define	PREFETCH_Q_LEN 8
1333
1334#elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1335
1336#define	PREFETCH_Q_LEN 3
1337
1338#elif defined(OLYMPUS_C)
1339	!
1340	! Use length of one for now.
1341	!
1342#define	PREFETCH_Q_LEN	1
1343
1344#else 	/* OLYMPUS_C */
1345
1346#error You need to fix this for your new cpu type.
1347
1348#endif	/* OLYMPUS_C */
1349
1350#include <vm/kpm.h>
1351
1352#ifdef	SEGKPM_SUPPORT
1353
1354#define	SMAP_SIZE 72
1355#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1356
1357#else	/* SEGKPM_SUPPORT */
1358
1359	!
1360	! The hardware will prefetch the 64 byte cache aligned block
1361	! that contains the address specified in the prefetch instruction.
1362	! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1363	! per pass will suffice as long as we prefetch far enough ahead to
1364	! make sure we don't stall for the cases where the smap object
1365	! spans multiple hardware prefetch blocks.  Let's prefetch as far
1366	! ahead as the hardware will allow.
1367	!
1368	! The smap array is processed with decreasing address pointers.
1369	!
1370#define	SMAP_SIZE 48
1371#define	SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1372
1373#endif	/* SEGKPM_SUPPORT */
1374
1375	ENTRY(prefetch_smap_w)
1376	retl
1377	prefetch	[%o0-SMAP_STRIDE], #n_writes
1378	SET_SIZE(prefetch_smap_w)
1379
1380#endif	/* lint */
1381
1382#if defined(lint) || defined(__lint)
1383
1384/* ARGSUSED */
1385uint64_t
1386getidsr(void)
1387{ return 0; }
1388
1389#else	/* lint */
1390
1391	ENTRY_NP(getidsr)
1392	retl
1393	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %o0
1394	SET_SIZE(getidsr)
1395
1396#endif	/* lint */
1397