xref: /titanic_52/usr/src/uts/sun4u/cpu/common_asm.s (revision 9e3700df0ac0106c9153db91edd9fe719345aa3c)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif	/* !lint */
31
32/*
33 * General assembly language routines.
34 * It is the intent of this file to contain routines that are
35 * specific to cpu architecture.
36 */
37
38/*
39 * WARNING: If you add a fast trap handler which can be invoked by a
40 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
41 * instead of "done" instruction to return back to the user mode. See
42 * comments for the "fast_trap_done" entry point for more information.
43 */
44#define	FAST_TRAP_DONE	\
45	ba,a	fast_trap_done
46
47/*
48 * Override GET_NATIVE_TIME for the cpu module code.  This is not
49 * guaranteed to be exactly one instruction, be careful of using
50 * the macro in delay slots.
51 *
52 * Do not use any instruction that modifies condition codes as the
53 * caller may depend on these to remain unchanged across the macro.
54 */
55#if defined(CHEETAH) || defined(OLYMPUS_C)
56
57#define	GET_NATIVE_TIME(out, scr1, scr2) \
58	rd	STICK, out
59#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
60	rd	STICK, reg;		\
61	add	reg, delta, reg;	\
62	wr	reg, STICK
63#define	RD_TICKCMPR(out, scr)		\
64	rd	STICK_COMPARE, out
65#define	WR_TICKCMPR(in, scr1, scr2, label) \
66	wr	in, STICK_COMPARE
67
68#elif defined(HUMMINGBIRD)
69#include <sys/spitregs.h>
70
71/*
72 * the current hummingbird version of %stick and %stick_cmp
73 * were both implemented as (2) 32-bit locations in ASI_IO space;
74 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
75 *
76 * 64-bit opcodes are required, but move only 32-bits:
77 *
78 * ldxa [phys]ASI_IO, %dst 	reads  the low 32-bits from phys into %dst
79 * stxa %src, [phys]ASI_IO 	writes the low 32-bits from %src into phys
80 *
81 * reg equivalent		[phys]ASI_IO
82 * ------------------		---------------
83 * %stick_cmp  low-32		0x1FE.0000.F060
84 * %stick_cmp high-32		0x1FE.0000.F068
85 * %stick      low-32		0x1FE.0000.F070
86 * %stick     high-32		0x1FE.0000.F078
87 */
88#define	HSTC_LOW	0x60			/* stick_cmp low  32-bits */
89#define	HSTC_HIGH	0x68			/* stick_cmp high 32-bits */
90#define	HST_LOW		0x70			/* stick low  32-bits */
91#define	HST_HIGH	0x78			/* stick high 32-bits */
92#define	HST_DIFF	0x08			/* low<-->high diff */
93
94/*
95 * Any change in the number of instructions in SETL41()
96 * will affect SETL41_OFF
97 */
98#define	SETL41(reg, byte) \
99	sethi	%hi(0x1FE00000), reg;		/* 0000.0000.1FE0.0000 */ \
100	or	reg, 0xF, reg;			/* 0000.0000.1FE0.000F */ \
101	sllx	reg, 12, reg;			/* 0000.01FE.0000.F000 */ \
102	or	reg, byte, reg;			/* 0000.01FE.0000.F0xx */
103
104/*
105 * SETL41_OFF is used to calulate the relative PC value when a
106 * branch instruction needs to go over SETL41() macro
107 */
108#define SETL41_OFF  16
109
110/*
111 * reading stick requires 2 loads, and there could be an intervening
112 * low-to-high 32-bit rollover resulting in a return value that is
113 * off by about (2 ^ 32); this rare case is prevented by re-reading
114 * the low-32 bits after the high-32 and verifying the "after" value
115 * is >= the "before" value; if not, increment the high-32 value.
116 *
117 * this method is limited to 1 rollover, and based on the fixed
118 * stick-frequency (5555555), requires the loads to complete within
119 * 773 seconds; incrementing the high-32 value will not overflow for
120 * about 52644 years.
121 *
122 * writing stick requires 2 stores; if the old/new low-32 value is
123 * near 0xffffffff, there could be another rollover (also rare).
124 * to prevent this, we first write a 0 to the low-32, then write
125 * new values to the high-32 then the low-32.
126 *
127 * When we detect a carry in the lower %stick register, we need to
128 * read HST_HIGH again. However at the point where we detect this,
129 * we need to rebuild the register address HST_HIGH.This involves more
130 * than one instructions and a branch is unavoidable. However, most of
131 * the time, there is no carry. So we take the penalty of a branch
132 * instruction only when there is carry (less frequent).
133 *
134 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
135 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
136 * addr already points to HST_LOW.
137 *
138 * NOTE: this method requires disabling interrupts before using
139 * DELTA_NATIVE_TIME.
140 */
141#define	GET_NATIVE_TIME(out, scr, tmp)	\
142	SETL41(scr, HST_LOW);		\
143	ldxa	[scr]ASI_IO, tmp;	\
144	inc	HST_DIFF, scr;		\
145	ldxa	[scr]ASI_IO, out;	\
146	dec	HST_DIFF, scr;		\
147	ldxa	[scr]ASI_IO, scr;	\
148	sub	scr, tmp, tmp;		\
149	brlz,pn tmp, .-(SETL41_OFF+24); \
150	sllx	out, 32, out;		\
151	or	out, scr, out
152#define	DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
153	SETL41(addr, HST_LOW);		\
154	ldxa	[addr]ASI_IO, tmp;	\
155	inc	HST_DIFF, addr;		\
156	ldxa	[addr]ASI_IO, high;	\
157	dec	HST_DIFF, addr;		\
158	ldxa	[addr]ASI_IO, low;	\
159	sub	low, tmp, tmp;		\
160	brlz,pn tmp, .-24;		\
161	sllx	high, 32, high;		\
162	or	high, low, high;	\
163	add	high, delta, high;	\
164	srl	high, 0, low;		\
165	srlx	high, 32, high;		\
166	stxa	%g0, [addr]ASI_IO;	\
167	inc	HST_DIFF, addr;		\
168	stxa	high, [addr]ASI_IO;	\
169	dec	HST_DIFF, addr;		\
170	stxa	low, [addr]ASI_IO
171#define RD_TICKCMPR(out, scr)		\
172	SETL41(scr, HSTC_LOW);		\
173	ldxa	[scr]ASI_IO, out;	\
174	inc	HST_DIFF, scr;		\
175	ldxa	[scr]ASI_IO, scr;	\
176	sllx	scr, 32, scr;		\
177	or	scr, out, out
178#define WR_TICKCMPR(in, scra, scrd, label) \
179	SETL41(scra, HSTC_HIGH);	\
180	srlx	in, 32, scrd;		\
181	stxa	scrd, [scra]ASI_IO;	\
182	dec	HST_DIFF, scra;		\
183	stxa	in, [scra]ASI_IO
184
185#else	/* !CHEETAH && !HUMMINGBIRD */
186
187#define	GET_NATIVE_TIME(out, scr1, scr2) \
188	rdpr	%tick, out
189#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
190	rdpr	%tick, reg;		\
191	add	reg, delta, reg;	\
192	wrpr	reg, %tick
193#define	RD_TICKCMPR(out, scr)		\
194	rd	TICK_COMPARE, out
195#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
196/*
197 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
198 * The failure occurs only when the following instruction decodes to wr or
199 * wrpr.  The workaround is to immediately follow writes to TICK_COMPARE
200 * with a read, thus stalling the pipe and keeping following instructions
201 * from causing data corruption.  Aligning to a quadword will ensure these
202 * two instructions are not split due to i$ misses.
203 */
204#define WR_TICKCMPR(cmpr,scr1,scr2,label)	\
205	ba,a	.bb_errata_1.label		;\
206	.align	64				;\
207.bb_errata_1.label:				;\
208	wr	cmpr, TICK_COMPARE		;\
209	rd	TICK_COMPARE, %g0
210#else	/* BB_ERRATA_1 */
211#define	WR_TICKCMPR(in,scr1,scr2,label)		\
212	wr	in, TICK_COMPARE
213#endif	/* BB_ERRATA_1 */
214
215#endif	/* !CHEETAH && !HUMMINGBIRD */
216
217#include <sys/clock.h>
218
219#if defined(lint)
220#include <sys/types.h>
221#include <sys/scb.h>
222#include <sys/systm.h>
223#include <sys/regset.h>
224#include <sys/sunddi.h>
225#include <sys/lockstat.h>
226#endif	/* lint */
227
228
229#include <sys/asm_linkage.h>
230#include <sys/privregs.h>
231#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
232#include <sys/machthread.h>
233#include <sys/clock.h>
234#include <sys/intreg.h>
235#include <sys/psr_compat.h>
236#include <sys/isa_defs.h>
237#include <sys/dditypes.h>
238#include <sys/intr.h>
239
240#if !defined(lint)
241#include "assym.h"
242#endif	/* !lint */
243
244#if defined(lint)
245
246uint_t
247get_impl(void)
248{ return (0); }
249
250#else	/* lint */
251
252	ENTRY(get_impl)
253	GET_CPU_IMPL(%o0)
254	retl
255	nop
256	SET_SIZE(get_impl)
257
258#endif	/* lint */
259
260#if defined(lint)
261/*
262 * Softint generated when counter field of tick reg matches value field
263 * of tick_cmpr reg
264 */
265/*ARGSUSED*/
266void
267tickcmpr_set(uint64_t clock_cycles)
268{}
269
270#else	/* lint */
271
272	ENTRY_NP(tickcmpr_set)
273	! get 64-bit clock_cycles interval
274	mov	%o0, %o2
275	mov	8, %o3			! A reasonable initial step size
2761:
277	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
278
279	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
280	sllx	%o0, 1, %o0		!   value we wrote was in the future.
281	srlx	%o0, 1, %o0
282
283	cmp	%o2, %o0		! If the value we wrote was in the
284	bg,pt	%xcc, 2f		!   future, then blow out of here.
285	sllx	%o3, 1, %o3		! If not, then double our step size,
286	ba,pt	%xcc, 1b		!   and take another lap.
287	add	%o0, %o3, %o2		!
2882:
289	retl
290	nop
291	SET_SIZE(tickcmpr_set)
292
293#endif	/* lint */
294
295#if defined(lint)
296
297void
298tickcmpr_disable(void)
299{}
300
301#else	/* lint */
302
303	ENTRY_NP(tickcmpr_disable)
304	mov	1, %g1
305	sllx	%g1, TICKINT_DIS_SHFT, %o0
306	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
307	retl
308	nop
309	SET_SIZE(tickcmpr_disable)
310
311#endif	/* lint */
312
313#if defined(lint)
314
315/*
316 * tick_write_delta() increments %tick by the specified delta.  This should
317 * only be called after a CPR event to assure that gethrtime() continues to
318 * increase monotonically.  Obviously, writing %tick needs to de done very
319 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
320 * this reason, we make sure we're i-cache hot before actually writing to
321 * %tick.
322 */
323/*ARGSUSED*/
324void
325tick_write_delta(uint64_t delta)
326{}
327
328#else	/* lint */
329
330#ifdef DEBUG
331	.seg	".text"
332tick_write_panic:
333	.asciz	"tick_write_delta: interrupts already disabled on entry"
334#endif	/* DEBUG */
335
336	ENTRY_NP(tick_write_delta)
337	rdpr	%pstate, %g1
338#ifdef DEBUG
339	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
340	bnz	0f			! aren't already disabled.
341	sethi	%hi(tick_write_panic), %o1
342        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
343	call	panic
344	or	%i1, %lo(tick_write_panic), %o0
345#endif	/* DEBUG */
3460:	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
347	mov	%o0, %o2
348	ba	0f			! Branch to cache line-aligned instr.
349	nop
350	.align	16
3510:	nop				! The next 3 instructions are now hot.
352	DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2)	! read/inc/write %tick
353
354	retl				! Return
355	wrpr	%g0, %g1, %pstate	!     delay: Re-enable interrupts
356#endif	/* lint */
357
358#if defined(lint)
359/*
360 *  return 1 if disabled
361 */
362
363int
364tickcmpr_disabled(void)
365{ return (0); }
366
367#else	/* lint */
368
369	ENTRY_NP(tickcmpr_disabled)
370	RD_TICKCMPR(%g1, %o0)
371	retl
372	srlx	%g1, TICKINT_DIS_SHFT, %o0
373	SET_SIZE(tickcmpr_disabled)
374
375#endif	/* lint */
376
377/*
378 * Get current tick
379 */
380#if defined(lint)
381
382u_longlong_t
383gettick(void)
384{ return (0); }
385
386#else	/* lint */
387
388	ENTRY(gettick)
389	GET_NATIVE_TIME(%o0, %o2, %o3)
390	retl
391	nop
392	SET_SIZE(gettick)
393
394#endif	/* lint */
395
396
397/*
398 * Return the counter portion of the tick register.
399 */
400
401#if defined(lint)
402
403uint64_t
404gettick_counter(void)
405{ return(0); }
406
407#else	/* lint */
408
409	ENTRY_NP(gettick_counter)
410	rdpr	%tick, %o0
411	sllx	%o0, 1, %o0
412	retl
413	srlx	%o0, 1, %o0		! shake off npt bit
414	SET_SIZE(gettick_counter)
415#endif	/* lint */
416
417/*
418 * Provide a C callable interface to the trap that reads the hi-res timer.
419 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
420 */
421
422#if defined(lint)
423
424hrtime_t
425gethrtime(void)
426{
427	return ((hrtime_t)0);
428}
429
430hrtime_t
431gethrtime_unscaled(void)
432{
433	return ((hrtime_t)0);
434}
435
436hrtime_t
437gethrtime_max(void)
438{
439	return ((hrtime_t)0);
440}
441
442void
443scalehrtime(hrtime_t *hrt)
444{
445	*hrt = 0;
446}
447
448void
449gethrestime(timespec_t *tp)
450{
451	tp->tv_sec = 0;
452	tp->tv_nsec = 0;
453}
454
455time_t
456gethrestime_sec(void)
457{
458	return (0);
459}
460
461void
462gethrestime_lasttick(timespec_t *tp)
463{
464	tp->tv_sec = 0;
465	tp->tv_nsec = 0;
466}
467
468/*ARGSUSED*/
469void
470hres_tick(void)
471{
472}
473
474void
475panic_hres_tick(void)
476{
477}
478
479#else	/* lint */
480
481	ENTRY_NP(gethrtime)
482	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
483							! %g1 = hrtime
484	retl
485	mov	%g1, %o0
486	SET_SIZE(gethrtime)
487
488	ENTRY_NP(gethrtime_unscaled)
489	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
490	retl
491	mov	%g1, %o0
492	SET_SIZE(gethrtime_unscaled)
493
494	ENTRY_NP(gethrtime_waitfree)
495	ALTENTRY(dtrace_gethrtime)
496	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
497	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
498	retl
499	mov	%g1, %o0
500	SET_SIZE(dtrace_gethrtime)
501	SET_SIZE(gethrtime_waitfree)
502
503	ENTRY(gethrtime_max)
504	NATIVE_TIME_MAX(%g1)
505	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
506
507	! hrtime_t's are signed, max hrtime_t must be positive
508	mov	-1, %o2
509	brlz,a	%g1, 1f
510	srlx	%o2, 1, %g1
5111:
512	retl
513	mov	%g1, %o0
514	SET_SIZE(gethrtime_max)
515
516	ENTRY(scalehrtime)
517	ldx	[%o0], %o1
518	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
519	retl
520	stx	%o1, [%o0]
521	SET_SIZE(scalehrtime)
522
523/*
524 * Fast trap to return a timestamp, uses trap window, leaves traps
525 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
526 *
527 * This is the handler for the ST_GETHRTIME trap.
528 */
529
530	ENTRY_NP(get_timestamp)
531	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
532	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
533	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
534	FAST_TRAP_DONE
535	SET_SIZE(get_timestamp)
536
537/*
538 * Macro to convert GET_HRESTIME() bits into a timestamp.
539 *
540 * We use two separate macros so that the platform-dependent GET_HRESTIME()
541 * can be as small as possible; CONV_HRESTIME() implements the generic part.
542 */
543#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
544	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
545	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
546	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
547	srl	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
548	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
549	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
550	ba	3f;			/* go convert to sec/nsec */	\
551	add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
5522:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
553	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
554	add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
555	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
5563:	cmp	hrestnsec, nano;	/* more than a billion? */	\
557	bl,pt	%xcc, 4f;		/* if not, we're done */	\
558	nop;				/* delay: do nothing :( */	\
559	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
560	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
5614:
562
563	ENTRY_NP(gethrestime)
564	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
565	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
566	stn	%o1, [%o0]
567	retl
568	stn	%o2, [%o0 + CLONGSIZE]
569	SET_SIZE(gethrestime)
570
571/*
572 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
573 * seconds.
574 */
575	ENTRY_NP(gethrestime_sec)
576	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
577	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
578	retl					! %o0 current hrestime seconds
579	nop
580	SET_SIZE(gethrestime_sec)
581
582/*
583 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
584 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
585 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
586 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
587 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
588 * it explicitly.)
589 */
590	ENTRY_NP(gethrestime_lasttick)
591	sethi	%hi(hres_lock), %o1
5920:
593	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
594	membar	#LoadLoad			! Load of lock must complete
595	andn	%o2, 1, %o2			! Mask off lowest bit
596	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
597	add	%o1, %lo(hrestime), %o4
598	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
599	membar	#LoadLoad			! All loads must complete
600	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
601	cmp	%o3, %o2			! If lock is locked or has
602	bne	0b				!   changed, retry.
603	stn	%g1, [%o0]			! Delay: store seconds
604	retl
605	stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
606	SET_SIZE(gethrestime_lasttick)
607
608/*
609 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
610 *
611 * This is the handler for the ST_GETHRESTIME trap.
612 */
613
614	ENTRY_NP(get_hrestime)
615	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
616	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
617	FAST_TRAP_DONE
618	SET_SIZE(get_hrestime)
619
620/*
621 * Fast trap to return lwp virtual time, uses trap window, leaves traps
622 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
623 * of nanoseconds consumed.
624 *
625 * This is the handler for the ST_GETHRVTIME trap.
626 *
627 * Register usage:
628 *	%o0, %o1 = return lwp virtual time
629 * 	%o2 = CPU/thread
630 * 	%o3 = lwp
631 * 	%g1 = scratch
632 * 	%g5 = scratch
633 */
634	ENTRY_NP(get_virtime)
635	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
636	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
637	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
638	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
639
640	/*
641	 * Subtract start time of current microstate from time
642	 * of day to get increment for lwp virtual time.
643	 */
644	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
645	sub	%g5, %g1, %g5
646
647	/*
648	 * Add current value of ms_acct[LMS_USER]
649	 */
650	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
651	add	%g5, %g1, %g5
652	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
653
654	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
655	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
656
657	FAST_TRAP_DONE
658	SET_SIZE(get_virtime)
659
660
661
662	.seg	".text"
663hrtime_base_panic:
664	.asciz	"hrtime_base stepping back"
665
666
667	ENTRY_NP(hres_tick)
668	save	%sp, -SA(MINFRAME), %sp	! get a new window
669
670	sethi	%hi(hrestime), %l4
671	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
6727:	tst	%l5
673	bz,pt	%xcc, 8f			! if we got it, drive on
674	ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
675	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
6769:	tst	%l5
677	bz,a,pn	%xcc, 7b
678	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
679	ba,pt	%xcc, 9b
680	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
6818:
682	membar	#StoreLoad|#StoreStore
683
684	!
685	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
686	!
687	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
688	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
689	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
690	! convert native time to nsecs
691	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
692
693	sub	%l0, %g1, %i1			! get accurate nsec delta
694
695	ldx	[%l4 + %lo(hrtime_base)], %l1
696	cmp	%l1, %l0
697	bg,pn	%xcc, 9f
698	nop
699
700	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
701
702	!
703	! apply adjustment, if any
704	!
705	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
706	brz	%l0, 2f
707						! hrestime_adj == 0 ?
708						! yes, skip adjustments
709	clr	%l5				! delay: set adj to zero
710	tst	%l0				! is hrestime_adj >= 0 ?
711	bge,pt	%xcc, 1f			! yes, go handle positive case
712	srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
713
714	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
715	bl,pt	%xcc, 2f			! yes, use current adj
716	neg	%l5				! delay: %l5 = -adj
717	ba,pt	%xcc, 2f
718	mov	%l0, %l5			! no, so set adj = hrestime_adj
7191:
720	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
721	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
722	mov	%l0, %l5			! delay: adj = hrestime_adj
7232:
724	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
725	sub	%l0, %l5, %l0			! timedelta -= adj
726
727	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
728	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
729
730	or	%l4, %lo(hrestime), %l2
731	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
732	ldn	[%l2 + CLONGSIZE], %i3
733	add	%i3, %l5, %i3			! hrestime.nsec += adj
734	add	%i3, %i1, %i3			! hrestime.nsec += nslt
735
736	set	NANOSEC, %l5			! %l5 = NANOSEC
737	cmp	%i3, %l5
738	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
739	sethi	%hi(one_sec), %i1		! delay
740	add	%i2, 0x1, %i2			! hrestime.tv_sec++
741	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
742	mov	0x1, %l5
743	st	%l5, [%i1 + %lo(one_sec)]
7445:
745	stn	%i2, [%l2]
746	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
747
748	membar	#StoreStore
749
750	ld	[%l4 + %lo(hres_lock)], %i1
751	inc	%i1				! release lock
752	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
753
754	ret
755	restore
756
7579:
758	!
759	! release hres_lock
760	!
761	ld	[%l4 + %lo(hres_lock)], %i1
762	inc	%i1
763	st	%i1, [%l4 + %lo(hres_lock)]
764
765	sethi	%hi(hrtime_base_panic), %o0
766	call	panic
767	or	%o0, %lo(hrtime_base_panic), %o0
768
769	SET_SIZE(hres_tick)
770
771#endif	/* lint */
772
773#if !defined(lint) && !defined(__lint)
774
775	.seg	".text"
776kstat_q_panic_msg:
777	.asciz	"kstat_q_exit: qlen == 0"
778
779	ENTRY(kstat_q_panic)
780	save	%sp, -SA(MINFRAME), %sp
781	sethi	%hi(kstat_q_panic_msg), %o0
782	call	panic
783	or	%o0, %lo(kstat_q_panic_msg), %o0
784	/*NOTREACHED*/
785	SET_SIZE(kstat_q_panic)
786
787#define	BRZPN	brz,pn
788#define	BRZPT	brz,pt
789
790#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
791	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
792	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
793	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
794	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
795	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
796	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
797	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
798	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
799	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
800	add	%o4, %o2, %o4;			/* %o4 = new time */	\
801	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
802	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
803	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
804QRETURN;								\
805	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
806
807	.align 16
808	ENTRY(kstat_waitq_enter)
809	GET_NATIVE_TIME(%g1, %g2, %g3)
810	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
811	SET_SIZE(kstat_waitq_enter)
812
813	.align 16
814	ENTRY(kstat_waitq_exit)
815	GET_NATIVE_TIME(%g1, %g2, %g3)
816	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
817	SET_SIZE(kstat_waitq_exit)
818
819	.align 16
820	ENTRY(kstat_runq_enter)
821	GET_NATIVE_TIME(%g1, %g2, %g3)
822	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
823	SET_SIZE(kstat_runq_enter)
824
825	.align 16
826	ENTRY(kstat_runq_exit)
827	GET_NATIVE_TIME(%g1, %g2, %g3)
828	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
829	SET_SIZE(kstat_runq_exit)
830
831	.align 16
832	ENTRY(kstat_waitq_to_runq)
833	GET_NATIVE_TIME(%g1, %g2, %g3)
834	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
835	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
836	SET_SIZE(kstat_waitq_to_runq)
837
838	.align 16
839	ENTRY(kstat_runq_back_to_waitq)
840	GET_NATIVE_TIME(%g1, %g2, %g3)
841	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
842	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
843	SET_SIZE(kstat_runq_back_to_waitq)
844
845#endif	/* !(lint || __lint) */
846
847#ifdef lint
848
849int64_t timedelta;
850hrtime_t hres_last_tick;
851timestruc_t hrestime;
852int64_t hrestime_adj;
853int hres_lock;
854uint_t nsec_scale;
855hrtime_t hrtime_base;
856int traptrace_use_stick;
857
858#else	/* lint */
859	/*
860	 *  -- WARNING --
861	 *
862	 * The following variables MUST be together on a 128-byte boundary.
863	 * In addition to the primary performance motivation (having them all
864	 * on the same cache line(s)), code here and in the GET*TIME() macros
865	 * assumes that they all have the same high 22 address bits (so
866	 * there's only one sethi).
867	 */
868	.seg	".data"
869	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
870	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
871	.global	nsec_shift, adj_shift
872
873	/* XXX - above comment claims 128-bytes is necessary */
874	.align	64
875timedelta:
876	.word	0, 0		/* int64_t */
877hres_last_tick:
878	.word	0, 0		/* hrtime_t */
879hrestime:
880	.nword	0, 0		/* 2 longs */
881hrestime_adj:
882	.word	0, 0		/* int64_t */
883hres_lock:
884	.word	0
885nsec_scale:
886	.word	0
887hrtime_base:
888	.word	0, 0
889traptrace_use_stick:
890	.word	0
891nsec_shift:
892	.word	NSEC_SHIFT
893adj_shift:
894	.word	ADJ_SHIFT
895
896#endif	/* lint */
897
898
899/*
900 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
901 * usec_delay(int n)		[compatibility - should go one day]
902 * Delay by spinning.
903 *
904 * delay for n microseconds.  numbers <= 0 delay 1 usec
905 *
906 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
907 * and variable clock rate for power management requires that we
908 * use %stick to implement this routine.
909 *
910 * For OPL platforms that support the "sleep" instruction, we
911 * conditionally (ifdef'ed) insert a "sleep" instruction in
912 * the loop. Note that theoritically we should have move (duplicated)
913 * the code down to spitfire/us3/opl specific asm files - but this
914 * is alot of code duplication just to add one "sleep" instruction.
915 * We chose less code duplication for this.
916 */
917
918#if defined(lint)
919
920/*ARGSUSED*/
921void
922drv_usecwait(clock_t n)
923{}
924
925/*ARGSUSED*/
926void
927usec_delay(int n)
928{}
929
930#else	/* lint */
931
932	ENTRY(drv_usecwait)
933	ALTENTRY(usec_delay)
934	brlez,a,pn %o0, 0f
935	mov	1, %o0
9360:
937	sethi	%hi(sticks_per_usec), %o1
938	lduw	[%o1 + %lo(sticks_per_usec)], %o1
939	mulx	%o1, %o0, %o1		! Scale usec to ticks
940	inc	%o1			! We don't start on a tick edge
941	GET_NATIVE_TIME(%o2, %o3, %o4)
942	add	%o1, %o2, %o1
943
9441:
945#ifdef	_OPL
946	.word 0x81b01060		! insert "sleep" instruction
947#endif /* _OPL */			! use byte code for now
948	cmp	%o1, %o2
949	GET_NATIVE_TIME(%o2, %o3, %o4)
950	bgeu,pt	%xcc, 1b
951	nop
952	retl
953	nop
954	SET_SIZE(usec_delay)
955	SET_SIZE(drv_usecwait)
956#endif	/* lint */
957
958#if defined(lint)
959
960/* ARGSUSED */
961void
962pil14_interrupt(int level)
963{}
964
965#else	/* lint */
966
967/*
968 * Level-14 interrupt prologue.
969 */
970	ENTRY_NP(pil14_interrupt)
971	CPU_ADDR(%g1, %g2)
972	rdpr	%pil, %g6			! %g6 = interrupted PIL
973	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
974	rdpr	%tstate, %g6
975	rdpr	%tpc, %g5
976	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
977	bnz,a,pt %xcc, 1f
978	stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
979	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
980	ba	pil_interrupt_common		! must be large-disp branch
981	stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
9821:	ba	pil_interrupt_common		! must be large-disp branch
983	stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
984	SET_SIZE(pil14_interrupt)
985
986	ENTRY_NP(tick_rtt)
987	!
988	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
989	! disabled.  If TICK_COMPARE is enabled, we know that we need to
990	! reenqueue the interrupt request structure.  We'll then check TICKINT
991	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
992	! interrupt.  In this case, TICK_COMPARE may have been rewritten
993	! recently; we'll compare %o5 to the current time to verify that it's
994	! in the future.
995	!
996	! Note that %o5 is live until after 1f.
997	! XXX - there is a subroutine call while %o5 is live!
998	!
999	RD_TICKCMPR(%o5, %g1)
1000	srlx	%o5, TICKINT_DIS_SHFT, %g1
1001	brnz,pt	%g1, 2f
1002	nop
1003
1004	rdpr 	%pstate, %g5
1005	andn	%g5, PSTATE_IE, %g1
1006	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
1007
1008	sethi	%hi(cbe_level14_inum), %o1
1009	ld	[%o1 + %lo(cbe_level14_inum)], %o1
1010	call	intr_enqueue_req ! preserves %o5 and %g5
1011	mov	PIL_14, %o0
1012
1013	! Check SOFTINT for TICKINT/STICKINT
1014	rd	SOFTINT, %o4
1015	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
1016	andcc	%o4, %o0, %g0
1017	bz,a,pn	%icc, 2f
1018	wrpr	%g0, %g5, %pstate		! Enable vec interrupts
1019
1020	! clear TICKINT/STICKINT
1021	wr	%o0, CLEAR_SOFTINT
1022
1023	!
1024	! Now that we've cleared TICKINT, we can reread %tick and confirm
1025	! that the value we programmed is still in the future.  If it isn't,
1026	! we need to reprogram TICK_COMPARE to fire as soon as possible.
1027	!
1028	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
1029	sllx	%o0, 1, %o0			! Clear the DIS bit
1030	srlx	%o0, 1, %o0
1031	cmp	%o5, %o0			! In the future?
1032	bg,a,pt	%xcc, 2f			! Yes, drive on.
1033	wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
1034
1035	!
1036	! If we're here, then we have programmed TICK_COMPARE with a %tick
1037	! which is in the past; we'll now load an initial step size, and loop
1038	! until we've managed to program TICK_COMPARE to fire in the future.
1039	!
1040	mov	8, %o4				! 8 = arbitrary inital step
10411:	add	%o0, %o4, %o5			! Add the step
1042	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
1043	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
1044	sllx	%o0, 1, %o0			! Clear the DIS bit
1045	srlx	%o0, 1, %o0
1046	cmp	%o5, %o0			! In the future?
1047	bg,a,pt	%xcc, 2f			! Yes, drive on.
1048	wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
1049	ba	1b				! No, try again.
1050	sllx	%o4, 1, %o4			!    delay: double step size
1051
10522:	ba	current_thread_complete
1053	nop
1054	SET_SIZE(tick_rtt)
1055
1056#endif	/* lint */
1057
1058#if defined(lint) || defined(__lint)
1059
1060/* ARGSUSED */
1061uint64_t
1062find_cpufrequency(volatile uchar_t *clock_ptr)
1063{
1064	return (0);
1065}
1066
1067#else	/* lint */
1068
1069#ifdef DEBUG
1070	.seg	".text"
1071find_cpufreq_panic:
1072	.asciz	"find_cpufrequency: interrupts already disabled on entry"
1073#endif	/* DEBUG */
1074
1075	ENTRY_NP(find_cpufrequency)
1076	rdpr	%pstate, %g1
1077
1078#ifdef DEBUG
1079	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
1080	bnz	0f			! are currently enabled
1081	sethi	%hi(find_cpufreq_panic), %o1
1082	call	panic
1083	or	%o1, %lo(find_cpufreq_panic), %o0
1084#endif	/* DEBUG */
1085
10860:
1087	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
10883:
1089	ldub	[%o0], %o1		! Read the number of seconds
1090	mov	%o1, %o2		! remember initial value in %o2
10911:
1092	GET_NATIVE_TIME(%o3, %g4, %g5)
1093	cmp	%o1, %o2		! did the seconds register roll over?
1094	be,pt	%icc, 1b		! branch back if unchanged
1095	ldub	[%o0], %o2		!   delay: load the new seconds val
1096
1097	brz,pn	%o2, 3b			! if the minutes just rolled over,
1098					! the last second could have been
1099					! inaccurate; try again.
1100	mov	%o2, %o4		!   delay: store init. val. in %o2
11012:
1102	GET_NATIVE_TIME(%o5, %g4, %g5)
1103	cmp	%o2, %o4		! did the seconds register roll over?
1104	be,pt	%icc, 2b		! branch back if unchanged
1105	ldub	[%o0], %o4		!   delay: load the new seconds val
1106
1107	brz,pn	%o4, 0b			! if the minutes just rolled over,
1108					! the last second could have been
1109					! inaccurate; try again.
1110	wrpr	%g0, %g1, %pstate	!   delay: re-enable interrupts
1111
1112	retl
1113	sub	%o5, %o3, %o0		! return the difference in ticks
1114	SET_SIZE(find_cpufrequency)
1115
1116#endif	/* lint */
1117
1118#if defined(lint)
1119/*
1120 * Prefetch a page_t for write or read, this assumes a linear
1121 * scan of sequential page_t's.
1122 */
1123/*ARGSUSED*/
1124void
1125prefetch_page_w(void *pp)
1126{}
1127
1128/*ARGSUSED*/
1129void
1130prefetch_page_r(void *pp)
1131{}
1132#else	/* lint */
1133
1134#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1135	defined(SERRANO)
1136	!
1137	! On US-III, the prefetch instruction queue is 8 entries deep.
1138	! Also, prefetches for write put data in the E$, which has
1139	! lines of 512 bytes for an 8MB cache. Each E$ line is further
1140	! subblocked into 64 byte chunks.
1141	!
1142	! Since prefetch can only bring in 64 bytes at a time (See Sparc
1143	! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1144	! then 2 prefetches are required in order to bring an entire
1145	! page into the E$.
1146	!
1147	! Since the prefetch queue is 8 entries deep, we currently can
1148	! only have 4 prefetches for page_t's outstanding. Thus, we
1149	! prefetch n+4 ahead of where we are now:
1150	!
1151	!      4 * sizeof(page_t)     -> 512
1152	!      4 * sizeof(page_t) +64 -> 576
1153	!
1154	! Example
1155	! =======
1156	! contiguous page array in memory...
1157	!
1158	! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1159	! ^         ^         ^         ^         ^    ^
1160	! pp                                      |    pp+4*sizeof(page)+64
1161	!                                         |
1162	!                                         pp+4*sizeof(page)
1163	!
1164	!  Prefetch
1165	!   Queue
1166	! +-------+<--- In this iteration, we're working with pp (AAA1),
1167	! |Preftch|     but we enqueue prefetch for addr = XXX1
1168	! | XXX1  |
1169	! +-------+<--- this queue slot will be a prefetch instruction for
1170	! |Preftch|     for addr = pp + 4*sizeof(page_t) + 64 (or second
1171	! | XXX2  |     half of page XXX)
1172	! +-------+
1173	! |Preftch|<-+- The next time around this function, we'll be
1174	! | YYY1  |  |  working with pp = BBB1, but will be enqueueing
1175	! +-------+  |  prefetches to for both halves of page YYY,
1176	! |Preftch|  |  while both halves of page XXX are in transit
1177	! | YYY2  |<-+  make their way into the E$.
1178	! +-------+
1179	! |Preftch|
1180	! | ZZZ1  |
1181	! +-------+
1182	! .       .
1183	! :       :
1184	!
1185	!  E$
1186	! +============================================...
1187	! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1188	! +============================================...
1189	! |      |      |      |      |      |      |
1190	! +============================================...
1191	! .
1192	! :
1193	!
1194	! So we should expect the first four page accesses to stall
1195	! while we warm up the cache, afterwhich, most of the pages
1196	! will have their pp ready in the E$.
1197	!
1198	! Also note that if sizeof(page_t) grows beyond 128, then
1199	! we'll need an additional prefetch to get an entire page
1200	! into the E$, thus reducing the number of outstanding page
1201	! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1202	! etc.
1203	!
1204	! Cheetah+
1205	! ========
1206	! On Cheetah+ we use "#n_write" prefetches as these avoid
1207	! unnecessary RTS->RTO bus transaction state change, and
1208	! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1209	! PRM). On Cheetah, #n_write prefetches are reflected with
1210	! RTS->RTO state transition regardless.
1211	!
1212#define STRIDE1 512
1213#define STRIDE2 576
1214
1215#if	STRIDE1 != (PAGE_SIZE * 4)
1216#error	"STRIDE1 != (PAGE_SIZE * 4)"
1217#endif	/* STRIDE1 != (PAGE_SIZE * 4) */
1218
1219        ENTRY(prefetch_page_w)
1220        prefetch        [%o0+STRIDE1], #n_writes
1221        retl
1222        prefetch        [%o0+STRIDE2], #n_writes
1223        SET_SIZE(prefetch_page_w)
1224
1225	!
1226	! Note on CHEETAH to prefetch for read, we really use #one_write.
1227	! This fetches to E$ (general use) rather than P$ (floating point use).
1228	!
1229        ENTRY(prefetch_page_r)
1230        prefetch        [%o0+STRIDE1], #one_write
1231        retl
1232        prefetch        [%o0+STRIDE2], #one_write
1233        SET_SIZE(prefetch_page_r)
1234
1235#elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1236
1237	!
1238	! UltraSparcII can have up to 3 prefetches outstanding.
1239	! A page_t is 128 bytes (2 prefetches of 64 bytes each)
1240	! So prefetch for pp + 1, which is
1241	!
1242	!       pp + sizeof(page_t)
1243	! and
1244	!       pp + sizeof(page_t) + 64
1245	!
1246#define STRIDE1	128
1247#define STRIDE2	192
1248
1249#if	STRIDE1 != PAGE_SIZE
1250#error	"STRIDE1 != PAGE_SIZE"
1251#endif	/* STRIDE1 != PAGE_SIZE */
1252
1253        ENTRY(prefetch_page_w)
1254        prefetch        [%o0+STRIDE1], #n_writes
1255        retl
1256        prefetch        [%o0+STRIDE2], #n_writes
1257        SET_SIZE(prefetch_page_w)
1258
1259        ENTRY(prefetch_page_r)
1260        prefetch        [%o0+STRIDE1], #n_reads
1261        retl
1262        prefetch        [%o0+STRIDE2], #n_reads
1263        SET_SIZE(prefetch_page_r)
1264
1265#elif defined(OLYMPUS_C)
1266	!
1267	!	Prefetch strides for Olympus-C
1268	!
1269
1270#define STRIDE1	512
1271#define STRIDE2	640
1272
1273	ENTRY(prefetch_page_w)
1274        prefetch        [%o0+STRIDE1], #n_writes
1275	retl
1276        prefetch        [%o0+STRIDE2], #n_writes
1277	SET_SIZE(prefetch_page_w)
1278
1279	ENTRY(prefetch_page_r)
1280        prefetch        [%o0+STRIDE1], #n_writes
1281	retl
1282        prefetch        [%o0+STRIDE2], #n_writes
1283	SET_SIZE(prefetch_page_r)
1284#else	/* OLYMPUS_C */
1285
1286#error "You need to fix this for your new cpu type."
1287
1288#endif	/* OLYMPUS_C */
1289
1290#endif	/* lint */
1291
1292#if defined(lint)
1293/*
1294 * Prefetch struct smap for write.
1295 */
1296/*ARGSUSED*/
1297void
1298prefetch_smap_w(void *smp)
1299{}
1300#else	/* lint */
1301
1302#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1303	defined(SERRANO)
1304
1305#define	PREFETCH_Q_LEN 8
1306
1307#elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1308
1309#define	PREFETCH_Q_LEN 3
1310
1311#elif defined(OLYMPUS_C)
1312	!
1313	! (TBD) Use length of one for now.
1314#define	PREFETCH_Q_LEN	1
1315
1316#else 	/* OLYMPUS_C */
1317
1318#error You need to fix this for your new cpu type.
1319
1320#endif	/* OLYMPUS_C */
1321
1322#include <vm/kpm.h>
1323
1324#ifdef	SEGKPM_SUPPORT
1325
1326#define	SMAP_SIZE 72
1327#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1328
1329#else	/* SEGKPM_SUPPORT */
1330
1331	!
1332	! The hardware will prefetch the 64 byte cache aligned block
1333	! that contains the address specified in the prefetch instruction.
1334	! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1335	! per pass will suffice as long as we prefetch far enough ahead to
1336	! make sure we don't stall for the cases where the smap object
1337	! spans multiple hardware prefetch blocks.  Let's prefetch as far
1338	! ahead as the hardware will allow.
1339	!
1340	! The smap array is processed with decreasing address pointers.
1341	!
1342#define	SMAP_SIZE 48
1343#define	SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1344
1345#endif	/* SEGKPM_SUPPORT */
1346
1347	ENTRY(prefetch_smap_w)
1348	retl
1349	prefetch	[%o0-SMAP_STRIDE], #n_writes
1350	SET_SIZE(prefetch_smap_w)
1351
1352#endif	/* lint */
1353
1354#if defined(lint) || defined(__lint)
1355
1356/* ARGSUSED */
1357uint64_t
1358getidsr(void)
1359{ return 0; }
1360
1361#else	/* lint */
1362
1363	ENTRY_NP(getidsr)
1364	retl
1365	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %o0
1366	SET_SIZE(getidsr)
1367
1368#endif	/* lint */
1369