xref: /titanic_41/usr/src/uts/sun4u/cpu/common_asm.s (revision c138f478d2bc94e73ab8f6a084e323bec25e62f5)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#if !defined(lint)
30#include "assym.h"
31#endif	/* !lint */
32
33/*
34 * General assembly language routines.
35 * It is the intent of this file to contain routines that are
36 * specific to cpu architecture.
37 */
38
39/*
40 * WARNING: If you add a fast trap handler which can be invoked by a
41 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
42 * instead of "done" instruction to return back to the user mode. See
43 * comments for the "fast_trap_done" entry point for more information.
44 */
45#define	FAST_TRAP_DONE	\
46	ba,a	fast_trap_done
47
48/*
49 * Override GET_NATIVE_TIME for the cpu module code.  This is not
50 * guaranteed to be exactly one instruction, be careful of using
51 * the macro in delay slots.
52 *
53 * Do not use any instruction that modifies condition codes as the
54 * caller may depend on these to remain unchanged across the macro.
55 */
56#if defined(CHEETAH)
57
58#define	GET_NATIVE_TIME(out, scr1, scr2) \
59	rd	STICK, out
60#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
61	rd	STICK, reg;		\
62	add	reg, delta, reg;	\
63	wr	reg, STICK
64#define	RD_TICKCMPR(out, scr)		\
65	rd	STICK_COMPARE, out
66#define	WR_TICKCMPR(in, scr1, scr2, label) \
67	wr	in, STICK_COMPARE
68
69#elif defined(HUMMINGBIRD)
70#include <sys/spitregs.h>
71
72/*
73 * the current hummingbird version of %stick and %stick_cmp
74 * were both implemented as (2) 32-bit locations in ASI_IO space;
75 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
76 *
77 * 64-bit opcodes are required, but move only 32-bits:
78 *
79 * ldxa [phys]ASI_IO, %dst 	reads  the low 32-bits from phys into %dst
80 * stxa %src, [phys]ASI_IO 	writes the low 32-bits from %src into phys
81 *
82 * reg equivalent		[phys]ASI_IO
83 * ------------------		---------------
84 * %stick_cmp  low-32		0x1FE.0000.F060
85 * %stick_cmp high-32		0x1FE.0000.F068
86 * %stick      low-32		0x1FE.0000.F070
87 * %stick     high-32		0x1FE.0000.F078
88 */
89#define	HSTC_LOW	0x60			/* stick_cmp low  32-bits */
90#define	HSTC_HIGH	0x68			/* stick_cmp high 32-bits */
91#define	HST_LOW		0x70			/* stick low  32-bits */
92#define	HST_HIGH	0x78			/* stick high 32-bits */
93#define	HST_DIFF	0x08			/* low<-->high diff */
94
95/*
96 * Any change in the number of instructions in SETL41()
97 * will affect SETL41_OFF
98 */
99#define	SETL41(reg, byte) \
100	sethi	%hi(0x1FE00000), reg;		/* 0000.0000.1FE0.0000 */ \
101	or	reg, 0xF, reg;			/* 0000.0000.1FE0.000F */ \
102	sllx	reg, 12, reg;			/* 0000.01FE.0000.F000 */ \
103	or	reg, byte, reg;			/* 0000.01FE.0000.F0xx */
104
105/*
106 * SETL41_OFF is used to calulate the relative PC value when a
107 * branch instruction needs to go over SETL41() macro
108 */
109#define SETL41_OFF  16
110
111/*
112 * reading stick requires 2 loads, and there could be an intervening
113 * low-to-high 32-bit rollover resulting in a return value that is
114 * off by about (2 ^ 32); this rare case is prevented by re-reading
115 * the low-32 bits after the high-32 and verifying the "after" value
116 * is >= the "before" value; if not, increment the high-32 value.
117 *
118 * this method is limited to 1 rollover, and based on the fixed
119 * stick-frequency (5555555), requires the loads to complete within
120 * 773 seconds; incrementing the high-32 value will not overflow for
121 * about 52644 years.
122 *
123 * writing stick requires 2 stores; if the old/new low-32 value is
124 * near 0xffffffff, there could be another rollover (also rare).
125 * to prevent this, we first write a 0 to the low-32, then write
126 * new values to the high-32 then the low-32.
127 *
128 * When we detect a carry in the lower %stick register, we need to
129 * read HST_HIGH again. However at the point where we detect this,
130 * we need to rebuild the register address HST_HIGH.This involves more
131 * than one instructions and a branch is unavoidable. However, most of
132 * the time, there is no carry. So we take the penalty of a branch
133 * instruction only when there is carry (less frequent).
134 *
135 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
136 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
137 * addr already points to HST_LOW.
138 *
139 * NOTE: this method requires disabling interrupts before using
140 * DELTA_NATIVE_TIME.
141 */
142#define	GET_NATIVE_TIME(out, scr, tmp)	\
143	SETL41(scr, HST_LOW);		\
144	ldxa	[scr]ASI_IO, tmp;	\
145	inc	HST_DIFF, scr;		\
146	ldxa	[scr]ASI_IO, out;	\
147	dec	HST_DIFF, scr;		\
148	ldxa	[scr]ASI_IO, scr;	\
149	sub	scr, tmp, tmp;		\
150	brlz,pn tmp, .-(SETL41_OFF+24); \
151	sllx	out, 32, out;		\
152	or	out, scr, out
153#define	DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
154	SETL41(addr, HST_LOW);		\
155	ldxa	[addr]ASI_IO, tmp;	\
156	inc	HST_DIFF, addr;		\
157	ldxa	[addr]ASI_IO, high;	\
158	dec	HST_DIFF, addr;		\
159	ldxa	[addr]ASI_IO, low;	\
160	sub	low, tmp, tmp;		\
161	brlz,pn tmp, .-24;		\
162	sllx	high, 32, high;		\
163	or	high, low, high;	\
164	add	high, delta, high;	\
165	srl	high, 0, low;		\
166	srlx	high, 32, high;		\
167	stxa	%g0, [addr]ASI_IO;	\
168	inc	HST_DIFF, addr;		\
169	stxa	high, [addr]ASI_IO;	\
170	dec	HST_DIFF, addr;		\
171	stxa	low, [addr]ASI_IO
172#define RD_TICKCMPR(out, scr)		\
173	SETL41(scr, HSTC_LOW);		\
174	ldxa	[scr]ASI_IO, out;	\
175	inc	HST_DIFF, scr;		\
176	ldxa	[scr]ASI_IO, scr;	\
177	sllx	scr, 32, scr;		\
178	or	scr, out, out
179#define WR_TICKCMPR(in, scra, scrd, label) \
180	SETL41(scra, HSTC_HIGH);	\
181	srlx	in, 32, scrd;		\
182	stxa	scrd, [scra]ASI_IO;	\
183	dec	HST_DIFF, scra;		\
184	stxa	in, [scra]ASI_IO
185
186#else	/* !CHEETAH && !HUMMINGBIRD */
187
188#define	GET_NATIVE_TIME(out, scr1, scr2) \
189	rdpr	%tick, out
190#define	DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
191	rdpr	%tick, reg;		\
192	add	reg, delta, reg;	\
193	wrpr	reg, %tick
194#define	RD_TICKCMPR(out, scr)		\
195	rd	TICK_COMPARE, out
196#ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
197/*
198 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
199 * The failure occurs only when the following instruction decodes to wr or
200 * wrpr.  The workaround is to immediately follow writes to TICK_COMPARE
201 * with a read, thus stalling the pipe and keeping following instructions
202 * from causing data corruption.  Aligning to a quadword will ensure these
203 * two instructions are not split due to i$ misses.
204 */
205#define WR_TICKCMPR(cmpr,scr1,scr2,label)	\
206	ba,a	.bb_errata_1.label		;\
207	.align	64				;\
208.bb_errata_1.label:				;\
209	wr	cmpr, TICK_COMPARE		;\
210	rd	TICK_COMPARE, %g0
211#else	/* BB_ERRATA_1 */
212#define	WR_TICKCMPR(in,scr1,scr2,label)		\
213	wr	in, TICK_COMPARE
214#endif	/* BB_ERRATA_1 */
215
216#endif	/* !CHEETAH && !HUMMINGBIRD */
217
218#include <sys/clock.h>
219
220#if defined(lint)
221#include <sys/types.h>
222#include <sys/scb.h>
223#include <sys/systm.h>
224#include <sys/regset.h>
225#include <sys/sunddi.h>
226#include <sys/lockstat.h>
227#endif	/* lint */
228
229
230#include <sys/asm_linkage.h>
231#include <sys/privregs.h>
232#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
233#include <sys/machthread.h>
234#include <sys/clock.h>
235#include <sys/intreg.h>
236#include <sys/psr_compat.h>
237#include <sys/isa_defs.h>
238#include <sys/dditypes.h>
239#include <sys/intr.h>
240
241#if !defined(lint)
242#include "assym.h"
243#endif	/* !lint */
244
245#if defined(lint)
246
247uint_t
248get_impl(void)
249{ return (0); }
250
251#else	/* lint */
252
253	ENTRY(get_impl)
254	GET_CPU_IMPL(%o0)
255	retl
256	nop
257	SET_SIZE(get_impl)
258
259#endif	/* lint */
260
261#if defined(lint)
262/*
263 * Softint generated when counter field of tick reg matches value field
264 * of tick_cmpr reg
265 */
266/*ARGSUSED*/
267void
268tickcmpr_set(uint64_t clock_cycles)
269{}
270
271#else	/* lint */
272
273	ENTRY_NP(tickcmpr_set)
274	! get 64-bit clock_cycles interval
275	mov	%o0, %o2
276	mov	8, %o3			! A reasonable initial step size
2771:
278	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
279
280	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
281	sllx	%o0, 1, %o0		!   value we wrote was in the future.
282	srlx	%o0, 1, %o0
283
284	cmp	%o2, %o0		! If the value we wrote was in the
285	bg,pt	%xcc, 2f		!   future, then blow out of here.
286	sllx	%o3, 1, %o3		! If not, then double our step size,
287	ba,pt	%xcc, 1b		!   and take another lap.
288	add	%o0, %o3, %o2		!
2892:
290	retl
291	nop
292	SET_SIZE(tickcmpr_set)
293
294#endif	/* lint */
295
296#if defined(lint)
297
298void
299tickcmpr_disable(void)
300{}
301
302#else	/* lint */
303
304	ENTRY_NP(tickcmpr_disable)
305	mov	1, %g1
306	sllx	%g1, TICKINT_DIS_SHFT, %o0
307	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
308	retl
309	nop
310	SET_SIZE(tickcmpr_disable)
311
312#endif	/* lint */
313
314#if defined(lint)
315
316/*
317 * tick_write_delta() increments %tick by the specified delta.  This should
318 * only be called after a CPR event to assure that gethrtime() continues to
319 * increase monotonically.  Obviously, writing %tick needs to de done very
320 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
321 * this reason, we make sure we're i-cache hot before actually writing to
322 * %tick.
323 */
324/*ARGSUSED*/
325void
326tick_write_delta(uint64_t delta)
327{}
328
329#else	/* lint */
330
331#ifdef DEBUG
332	.seg	".text"
333tick_write_panic:
334	.asciz	"tick_write_delta: interrupts already disabled on entry"
335#endif	/* DEBUG */
336
337	ENTRY_NP(tick_write_delta)
338	rdpr	%pstate, %g1
339#ifdef DEBUG
340	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
341	bnz	0f			! aren't already disabled.
342	sethi	%hi(tick_write_panic), %o1
343        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
344	call	panic
345	or	%i1, %lo(tick_write_panic), %o0
346#endif	/* DEBUG */
3470:	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
348	mov	%o0, %o2
349	ba	0f			! Branch to cache line-aligned instr.
350	nop
351	.align	16
3520:	nop				! The next 3 instructions are now hot.
353	DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2)	! read/inc/write %tick
354
355	retl				! Return
356	wrpr	%g0, %g1, %pstate	!     delay: Re-enable interrupts
357#endif	/* lint */
358
359#if defined(lint)
360/*
361 *  return 1 if disabled
362 */
363
364int
365tickcmpr_disabled(void)
366{ return (0); }
367
368#else	/* lint */
369
370	ENTRY_NP(tickcmpr_disabled)
371	RD_TICKCMPR(%g1, %o0)
372	retl
373	srlx	%g1, TICKINT_DIS_SHFT, %o0
374	SET_SIZE(tickcmpr_disabled)
375
376#endif	/* lint */
377
378/*
379 * Get current tick
380 */
381#if defined(lint)
382
383u_longlong_t
384gettick(void)
385{ return (0); }
386
387#else	/* lint */
388
389	ENTRY(gettick)
390	GET_NATIVE_TIME(%o0, %o2, %o3)
391	retl
392	nop
393	SET_SIZE(gettick)
394
395#endif	/* lint */
396
397
398/*
399 * Return the counter portion of the tick register.
400 */
401
402#if defined(lint)
403
404uint64_t
405gettick_counter(void)
406{ return(0); }
407
408#else	/* lint */
409
410	ENTRY_NP(gettick_counter)
411	rdpr	%tick, %o0
412	sllx	%o0, 1, %o0
413	retl
414	srlx	%o0, 1, %o0		! shake off npt bit
415	SET_SIZE(gettick_counter)
416#endif	/* lint */
417
418/*
419 * Provide a C callable interface to the trap that reads the hi-res timer.
420 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
421 */
422
423#if defined(lint)
424
425hrtime_t
426gethrtime(void)
427{
428	return ((hrtime_t)0);
429}
430
431hrtime_t
432gethrtime_unscaled(void)
433{
434	return ((hrtime_t)0);
435}
436
437hrtime_t
438gethrtime_max(void)
439{
440	return ((hrtime_t)0);
441}
442
443void
444scalehrtime(hrtime_t *hrt)
445{
446	*hrt = 0;
447}
448
449void
450gethrestime(timespec_t *tp)
451{
452	tp->tv_sec = 0;
453	tp->tv_nsec = 0;
454}
455
456time_t
457gethrestime_sec(void)
458{
459	return (0);
460}
461
462void
463gethrestime_lasttick(timespec_t *tp)
464{
465	tp->tv_sec = 0;
466	tp->tv_nsec = 0;
467}
468
469/*ARGSUSED*/
470void
471hres_tick(void)
472{
473}
474
475void
476panic_hres_tick(void)
477{
478}
479
480#else	/* lint */
481
482	ENTRY_NP(gethrtime)
483	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
484							! %g1 = hrtime
485	retl
486	mov	%g1, %o0
487	SET_SIZE(gethrtime)
488
489	ENTRY_NP(gethrtime_unscaled)
490	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
491	retl
492	mov	%g1, %o0
493	SET_SIZE(gethrtime_unscaled)
494
495	ENTRY_NP(gethrtime_waitfree)
496	ALTENTRY(dtrace_gethrtime)
497	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
498	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
499	retl
500	mov	%g1, %o0
501	SET_SIZE(dtrace_gethrtime)
502	SET_SIZE(gethrtime_waitfree)
503
504	ENTRY(gethrtime_max)
505	NATIVE_TIME_MAX(%g1)
506	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
507
508	! hrtime_t's are signed, max hrtime_t must be positive
509	mov	-1, %o2
510	brlz,a	%g1, 1f
511	srlx	%o2, 1, %g1
5121:
513	retl
514	mov	%g1, %o0
515	SET_SIZE(gethrtime_max)
516
517	ENTRY(scalehrtime)
518	ldx	[%o0], %o1
519	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
520	retl
521	stx	%o1, [%o0]
522	SET_SIZE(scalehrtime)
523
524/*
525 * Fast trap to return a timestamp, uses trap window, leaves traps
526 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
527 *
528 * This is the handler for the ST_GETHRTIME trap.
529 */
530
531	ENTRY_NP(get_timestamp)
532	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
533	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
534	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
535	FAST_TRAP_DONE
536	SET_SIZE(get_timestamp)
537
538/*
539 * Macro to convert GET_HRESTIME() bits into a timestamp.
540 *
541 * We use two separate macros so that the platform-dependent GET_HRESTIME()
542 * can be as small as possible; CONV_HRESTIME() implements the generic part.
543 */
544#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
545	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
546	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
547	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
548	srl	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
549	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
550	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
551	ba	3f;			/* go convert to sec/nsec */	\
552	add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
5532:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
554	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
555	add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
556	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
5573:	cmp	hrestnsec, nano;	/* more than a billion? */	\
558	bl,pt	%xcc, 4f;		/* if not, we're done */	\
559	nop;				/* delay: do nothing :( */	\
560	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
561	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
5624:
563
564	ENTRY_NP(gethrestime)
565	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
566	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
567	stn	%o1, [%o0]
568	retl
569	stn	%o2, [%o0 + CLONGSIZE]
570	SET_SIZE(gethrestime)
571
572/*
573 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
574 * seconds.
575 */
576	ENTRY_NP(gethrestime_sec)
577	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
578	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
579	retl					! %o0 current hrestime seconds
580	nop
581	SET_SIZE(gethrestime_sec)
582
583/*
584 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
585 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
586 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
587 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
588 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
589 * it explicitly.)
590 */
591	ENTRY_NP(gethrestime_lasttick)
592	sethi	%hi(hres_lock), %o1
5930:
594	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
595	membar	#LoadLoad			! Load of lock must complete
596	andn	%o2, 1, %o2			! Mask off lowest bit
597	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
598	add	%o1, %lo(hrestime), %o4
599	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
600	membar	#LoadLoad			! All loads must complete
601	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
602	cmp	%o3, %o2			! If lock is locked or has
603	bne	0b				!   changed, retry.
604	stn	%g1, [%o0]			! Delay: store seconds
605	retl
606	stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
607	SET_SIZE(gethrestime_lasttick)
608
609/*
610 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
611 *
612 * This is the handler for the ST_GETHRESTIME trap.
613 */
614
615	ENTRY_NP(get_hrestime)
616	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
617	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
618	FAST_TRAP_DONE
619	SET_SIZE(get_hrestime)
620
621/*
622 * Fast trap to return lwp virtual time, uses trap window, leaves traps
623 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
624 * of nanoseconds consumed.
625 *
626 * This is the handler for the ST_GETHRVTIME trap.
627 *
628 * Register usage:
629 *	%o0, %o1 = return lwp virtual time
630 * 	%o2 = CPU/thread
631 * 	%o3 = lwp
632 * 	%g1 = scratch
633 * 	%g5 = scratch
634 */
635	ENTRY_NP(get_virtime)
636	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
637	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
638	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
639	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
640
641	/*
642	 * Subtract start time of current microstate from time
643	 * of day to get increment for lwp virtual time.
644	 */
645	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
646	sub	%g5, %g1, %g5
647
648	/*
649	 * Add current value of ms_acct[LMS_USER]
650	 */
651	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
652	add	%g5, %g1, %g5
653	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
654
655	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
656	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
657
658	FAST_TRAP_DONE
659	SET_SIZE(get_virtime)
660
661
662
663	.seg	".text"
664hrtime_base_panic:
665	.asciz	"hrtime_base stepping back"
666
667
668	ENTRY_NP(hres_tick)
669	save	%sp, -SA(MINFRAME), %sp	! get a new window
670
671	sethi	%hi(hrestime), %l4
672	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
6737:	tst	%l5
674	bz,pt	%xcc, 8f			! if we got it, drive on
675	ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
676	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
6779:	tst	%l5
678	bz,a,pn	%xcc, 7b
679	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
680	ba,pt	%xcc, 9b
681	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
6828:
683	membar	#StoreLoad|#StoreStore
684
685	!
686	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
687	!
688	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
689	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
690	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
691	! convert native time to nsecs
692	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
693
694	sub	%l0, %g1, %i1			! get accurate nsec delta
695
696	ldx	[%l4 + %lo(hrtime_base)], %l1
697	cmp	%l1, %l0
698	bg,pn	%xcc, 9f
699	nop
700
701	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
702
703	!
704	! apply adjustment, if any
705	!
706	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
707	brz	%l0, 2f
708						! hrestime_adj == 0 ?
709						! yes, skip adjustments
710	clr	%l5				! delay: set adj to zero
711	tst	%l0				! is hrestime_adj >= 0 ?
712	bge,pt	%xcc, 1f			! yes, go handle positive case
713	srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
714
715	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
716	bl,pt	%xcc, 2f			! yes, use current adj
717	neg	%l5				! delay: %l5 = -adj
718	ba,pt	%xcc, 2f
719	mov	%l0, %l5			! no, so set adj = hrestime_adj
7201:
721	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
722	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
723	mov	%l0, %l5			! delay: adj = hrestime_adj
7242:
725	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
726	sub	%l0, %l5, %l0			! timedelta -= adj
727
728	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
729	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
730
731	or	%l4, %lo(hrestime), %l2
732	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
733	ldn	[%l2 + CLONGSIZE], %i3
734	add	%i3, %l5, %i3			! hrestime.nsec += adj
735	add	%i3, %i1, %i3			! hrestime.nsec += nslt
736
737	set	NANOSEC, %l5			! %l5 = NANOSEC
738	cmp	%i3, %l5
739	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
740	sethi	%hi(one_sec), %i1		! delay
741	add	%i2, 0x1, %i2			! hrestime.tv_sec++
742	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
743	mov	0x1, %l5
744	st	%l5, [%i1 + %lo(one_sec)]
7455:
746	stn	%i2, [%l2]
747	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
748
749	membar	#StoreStore
750
751	ld	[%l4 + %lo(hres_lock)], %i1
752	inc	%i1				! release lock
753	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
754
755	ret
756	restore
757
7589:
759	!
760	! release hres_lock
761	!
762	ld	[%l4 + %lo(hres_lock)], %i1
763	inc	%i1
764	st	%i1, [%l4 + %lo(hres_lock)]
765
766	sethi	%hi(hrtime_base_panic), %o0
767	call	panic
768	or	%o0, %lo(hrtime_base_panic), %o0
769
770	SET_SIZE(hres_tick)
771
772#endif	/* lint */
773
774#if !defined(lint) && !defined(__lint)
775
776	.seg	".text"
777kstat_q_panic_msg:
778	.asciz	"kstat_q_exit: qlen == 0"
779
780	ENTRY(kstat_q_panic)
781	save	%sp, -SA(MINFRAME), %sp
782	sethi	%hi(kstat_q_panic_msg), %o0
783	call	panic
784	or	%o0, %lo(kstat_q_panic_msg), %o0
785	/*NOTREACHED*/
786	SET_SIZE(kstat_q_panic)
787
788#define	BRZPN	brz,pn
789#define	BRZPT	brz,pt
790
791#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
792	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
793	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
794	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
795	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
796	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
797	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
798	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
799	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
800	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
801	add	%o4, %o2, %o4;			/* %o4 = new time */	\
802	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
803	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
804	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
805QRETURN;								\
806	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
807
808	.align 16
809	ENTRY(kstat_waitq_enter)
810	GET_NATIVE_TIME(%g1, %g2, %g3)
811	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
812	SET_SIZE(kstat_waitq_enter)
813
814	.align 16
815	ENTRY(kstat_waitq_exit)
816	GET_NATIVE_TIME(%g1, %g2, %g3)
817	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
818	SET_SIZE(kstat_waitq_exit)
819
820	.align 16
821	ENTRY(kstat_runq_enter)
822	GET_NATIVE_TIME(%g1, %g2, %g3)
823	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
824	SET_SIZE(kstat_runq_enter)
825
826	.align 16
827	ENTRY(kstat_runq_exit)
828	GET_NATIVE_TIME(%g1, %g2, %g3)
829	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
830	SET_SIZE(kstat_runq_exit)
831
832	.align 16
833	ENTRY(kstat_waitq_to_runq)
834	GET_NATIVE_TIME(%g1, %g2, %g3)
835	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
836	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
837	SET_SIZE(kstat_waitq_to_runq)
838
839	.align 16
840	ENTRY(kstat_runq_back_to_waitq)
841	GET_NATIVE_TIME(%g1, %g2, %g3)
842	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
843	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
844	SET_SIZE(kstat_runq_back_to_waitq)
845
846#endif	/* !(lint || __lint) */
847
848#ifdef lint
849
850int64_t timedelta;
851hrtime_t hres_last_tick;
852timestruc_t hrestime;
853int64_t hrestime_adj;
854int hres_lock;
855uint_t nsec_scale;
856hrtime_t hrtime_base;
857int traptrace_use_stick;
858
859#else	/* lint */
860	/*
861	 *  -- WARNING --
862	 *
863	 * The following variables MUST be together on a 128-byte boundary.
864	 * In addition to the primary performance motivation (having them all
865	 * on the same cache line(s)), code here and in the GET*TIME() macros
866	 * assumes that they all have the same high 22 address bits (so
867	 * there's only one sethi).
868	 */
869	.seg	".data"
870	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
871	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
872	.global	nsec_shift, adj_shift
873
874	/* XXX - above comment claims 128-bytes is necessary */
875	.align	64
876timedelta:
877	.word	0, 0		/* int64_t */
878hres_last_tick:
879	.word	0, 0		/* hrtime_t */
880hrestime:
881	.nword	0, 0		/* 2 longs */
882hrestime_adj:
883	.word	0, 0		/* int64_t */
884hres_lock:
885	.word	0
886nsec_scale:
887	.word	0
888hrtime_base:
889	.word	0, 0
890traptrace_use_stick:
891	.word	0
892nsec_shift:
893	.word	NSEC_SHIFT
894adj_shift:
895	.word	ADJ_SHIFT
896
897#endif	/* lint */
898
899
900/*
901 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
902 * usec_delay(int n)		[compatibility - should go one day]
903 * Delay by spinning.
904 *
905 * delay for n microseconds.  numbers <= 0 delay 1 usec
906 *
907 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
908 * and variable clock rate for power management requires that we
909 * use %stick to implement this routine.
910 */
911
912#if defined(lint)
913
914/*ARGSUSED*/
915void
916drv_usecwait(clock_t n)
917{}
918
919/*ARGSUSED*/
920void
921usec_delay(int n)
922{}
923
924#else	/* lint */
925
926	ENTRY(drv_usecwait)
927	ALTENTRY(usec_delay)
928	brlez,a,pn %o0, 0f
929	mov	1, %o0
9300:
931	sethi	%hi(sticks_per_usec), %o1
932	lduw	[%o1 + %lo(sticks_per_usec)], %o1
933	mulx	%o1, %o0, %o1		! Scale usec to ticks
934	inc	%o1			! We don't start on a tick edge
935	GET_NATIVE_TIME(%o2, %o3, %o4)
936	add	%o1, %o2, %o1
937
9381:	cmp	%o1, %o2
939	GET_NATIVE_TIME(%o2, %o3, %o4)
940	bgeu,pt	%xcc, 1b
941	nop
942	retl
943	nop
944	SET_SIZE(usec_delay)
945	SET_SIZE(drv_usecwait)
946#endif	/* lint */
947
948#if defined(lint)
949
950/* ARGSUSED */
951void
952pil14_interrupt(int level)
953{}
954
955#else	/* lint */
956
957/*
958 * Level-14 interrupt prologue.
959 */
960	ENTRY_NP(pil14_interrupt)
961	CPU_ADDR(%g1, %g2)
962	rdpr	%pil, %g6			! %g6 = interrupted PIL
963	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
964	rdpr	%tstate, %g6
965	rdpr	%tpc, %g5
966	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
967	bnz,a,pt %xcc, 1f
968	stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
969	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
970	ba	pil_interrupt_common		! must be large-disp branch
971	stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
9721:	ba	pil_interrupt_common		! must be large-disp branch
973	stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
974	SET_SIZE(pil14_interrupt)
975
976	ENTRY_NP(tick_rtt)
977	!
978	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
979	! disabled.  If TICK_COMPARE is enabled, we know that we need to
980	! reenqueue the interrupt request structure.  We'll then check TICKINT
981	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
982	! interrupt.  In this case, TICK_COMPARE may have been rewritten
983	! recently; we'll compare %o5 to the current time to verify that it's
984	! in the future.
985	!
986	! Note that %o5 is live until after 1f.
987	! XXX - there is a subroutine call while %o5 is live!
988	!
989	RD_TICKCMPR(%o5, %g1)
990	srlx	%o5, TICKINT_DIS_SHFT, %g1
991	brnz,pt	%g1, 2f
992	nop
993
994	rdpr 	%pstate, %g5
995	andn	%g5, PSTATE_IE, %g1
996	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
997
998	sethi	%hi(cbe_level14_inum), %o1
999	ld	[%o1 + %lo(cbe_level14_inum)], %o1
1000	call	intr_enqueue_req ! preserves %o5 and %g5
1001	mov	PIL_14, %o0
1002
1003	! Check SOFTINT for TICKINT/STICKINT
1004	rd	SOFTINT, %o4
1005	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
1006	andcc	%o4, %o0, %g0
1007	bz,a,pn	%icc, 2f
1008	wrpr	%g0, %g5, %pstate		! Enable vec interrupts
1009
1010	! clear TICKINT/STICKINT
1011	wr	%o0, CLEAR_SOFTINT
1012
1013	!
1014	! Now that we've cleared TICKINT, we can reread %tick and confirm
1015	! that the value we programmed is still in the future.  If it isn't,
1016	! we need to reprogram TICK_COMPARE to fire as soon as possible.
1017	!
1018	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
1019	sllx	%o0, 1, %o0			! Clear the DIS bit
1020	srlx	%o0, 1, %o0
1021	cmp	%o5, %o0			! In the future?
1022	bg,a,pt	%xcc, 2f			! Yes, drive on.
1023	wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
1024
1025	!
1026	! If we're here, then we have programmed TICK_COMPARE with a %tick
1027	! which is in the past; we'll now load an initial step size, and loop
1028	! until we've managed to program TICK_COMPARE to fire in the future.
1029	!
1030	mov	8, %o4				! 8 = arbitrary inital step
10311:	add	%o0, %o4, %o5			! Add the step
1032	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
1033	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
1034	sllx	%o0, 1, %o0			! Clear the DIS bit
1035	srlx	%o0, 1, %o0
1036	cmp	%o5, %o0			! In the future?
1037	bg,a,pt	%xcc, 2f			! Yes, drive on.
1038	wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
1039	ba	1b				! No, try again.
1040	sllx	%o4, 1, %o4			!    delay: double step size
1041
10422:	ba	current_thread_complete
1043	nop
1044	SET_SIZE(tick_rtt)
1045
1046#endif	/* lint */
1047
1048#if defined(lint) || defined(__lint)
1049
1050/* ARGSUSED */
1051uint64_t
1052find_cpufrequency(volatile uchar_t *clock_ptr)
1053{
1054	return (0);
1055}
1056
1057#else	/* lint */
1058
1059#ifdef DEBUG
1060	.seg	".text"
1061find_cpufreq_panic:
1062	.asciz	"find_cpufrequency: interrupts already disabled on entry"
1063#endif	/* DEBUG */
1064
1065	ENTRY_NP(find_cpufrequency)
1066	rdpr	%pstate, %g1
1067
1068#ifdef DEBUG
1069	andcc	%g1, PSTATE_IE, %g0	! If DEBUG, check that interrupts
1070	bnz	0f			! are currently enabled
1071	sethi	%hi(find_cpufreq_panic), %o1
1072	call	panic
1073	or	%o1, %lo(find_cpufreq_panic), %o0
1074#endif	/* DEBUG */
1075
10760:
1077	wrpr	%g1, PSTATE_IE, %pstate	! Disable interrupts
10783:
1079	ldub	[%o0], %o1		! Read the number of seconds
1080	mov	%o1, %o2		! remember initial value in %o2
10811:
1082	GET_NATIVE_TIME(%o3, %g4, %g5)
1083	cmp	%o1, %o2		! did the seconds register roll over?
1084	be,pt	%icc, 1b		! branch back if unchanged
1085	ldub	[%o0], %o2		!   delay: load the new seconds val
1086
1087	brz,pn	%o2, 3b			! if the minutes just rolled over,
1088					! the last second could have been
1089					! inaccurate; try again.
1090	mov	%o2, %o4		!   delay: store init. val. in %o2
10912:
1092	GET_NATIVE_TIME(%o5, %g4, %g5)
1093	cmp	%o2, %o4		! did the seconds register roll over?
1094	be,pt	%icc, 2b		! branch back if unchanged
1095	ldub	[%o0], %o4		!   delay: load the new seconds val
1096
1097	brz,pn	%o4, 0b			! if the minutes just rolled over,
1098					! the last second could have been
1099					! inaccurate; try again.
1100	wrpr	%g0, %g1, %pstate	!   delay: re-enable interrupts
1101
1102	retl
1103	sub	%o5, %o3, %o0		! return the difference in ticks
1104	SET_SIZE(find_cpufrequency)
1105
1106#endif	/* lint */
1107
1108#if defined(lint)
1109/*
1110 * Prefetch a page_t for write or read, this assumes a linear
1111 * scan of sequential page_t's.
1112 */
1113/*ARGSUSED*/
1114void
1115prefetch_page_w(void *pp)
1116{}
1117
1118/*ARGSUSED*/
1119void
1120prefetch_page_r(void *pp)
1121{}
1122#else	/* lint */
1123
1124#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1125	defined(SERRANO)
1126	!
1127	! On US-III, the prefetch instruction queue is 8 entries deep.
1128	! Also, prefetches for write put data in the E$, which has
1129	! lines of 512 bytes for an 8MB cache. Each E$ line is further
1130	! subblocked into 64 byte chunks.
1131	!
1132	! Since prefetch can only bring in 64 bytes at a time (See Sparc
1133	! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1134	! then 2 prefetches are required in order to bring an entire
1135	! page into the E$.
1136	!
1137	! Since the prefetch queue is 8 entries deep, we currently can
1138	! only have 4 prefetches for page_t's outstanding. Thus, we
1139	! prefetch n+4 ahead of where we are now:
1140	!
1141	!      4 * sizeof(page_t)     -> 512
1142	!      4 * sizeof(page_t) +64 -> 576
1143	!
1144	! Example
1145	! =======
1146	! contiguous page array in memory...
1147	!
1148	! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1149	! ^         ^         ^         ^         ^    ^
1150	! pp                                      |    pp+4*sizeof(page)+64
1151	!                                         |
1152	!                                         pp+4*sizeof(page)
1153	!
1154	!  Prefetch
1155	!   Queue
1156	! +-------+<--- In this iteration, we're working with pp (AAA1),
1157	! |Preftch|     but we enqueue prefetch for addr = XXX1
1158	! | XXX1  |
1159	! +-------+<--- this queue slot will be a prefetch instruction for
1160	! |Preftch|     for addr = pp + 4*sizeof(page_t) + 64 (or second
1161	! | XXX2  |     half of page XXX)
1162	! +-------+
1163	! |Preftch|<-+- The next time around this function, we'll be
1164	! | YYY1  |  |  working with pp = BBB1, but will be enqueueing
1165	! +-------+  |  prefetches to for both halves of page YYY,
1166	! |Preftch|  |  while both halves of page XXX are in transit
1167	! | YYY2  |<-+  make their way into the E$.
1168	! +-------+
1169	! |Preftch|
1170	! | ZZZ1  |
1171	! +-------+
1172	! .       .
1173	! :       :
1174	!
1175	!  E$
1176	! +============================================...
1177	! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1178	! +============================================...
1179	! |      |      |      |      |      |      |
1180	! +============================================...
1181	! .
1182	! :
1183	!
1184	! So we should expect the first four page accesses to stall
1185	! while we warm up the cache, afterwhich, most of the pages
1186	! will have their pp ready in the E$.
1187	!
1188	! Also note that if sizeof(page_t) grows beyond 128, then
1189	! we'll need an additional prefetch to get an entire page
1190	! into the E$, thus reducing the number of outstanding page
1191	! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1192	! etc.
1193	!
1194	! Cheetah+
1195	! ========
1196	! On Cheetah+ we use "#n_write" prefetches as these avoid
1197	! unnecessary RTS->RTO bus transaction state change, and
1198	! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1199	! PRM). On Cheetah, #n_write prefetches are reflected with
1200	! RTS->RTO state transition regardless.
1201	!
1202#define STRIDE1 512
1203#define STRIDE2 576
1204
1205#if	STRIDE1 != (PAGE_SIZE * 4)
1206#error	"STRIDE1 != (PAGE_SIZE * 4)"
1207#endif	/* STRIDE1 != (PAGE_SIZE * 4) */
1208
1209        ENTRY(prefetch_page_w)
1210        prefetch        [%o0+STRIDE1], #n_writes
1211        retl
1212        prefetch        [%o0+STRIDE2], #n_writes
1213        SET_SIZE(prefetch_page_w)
1214
1215	!
1216	! Note on CHEETAH to prefetch for read, we really use #one_write.
1217	! This fetches to E$ (general use) rather than P$ (floating point use).
1218	!
1219        ENTRY(prefetch_page_r)
1220        prefetch        [%o0+STRIDE1], #one_write
1221        retl
1222        prefetch        [%o0+STRIDE2], #one_write
1223        SET_SIZE(prefetch_page_r)
1224
1225#elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1226
1227	!
1228	! UltraSparcII can have up to 3 prefetches outstanding.
1229	! A page_t is 128 bytes (2 prefetches of 64 bytes each)
1230	! So prefetch for pp + 1, which is
1231	!
1232	!       pp + sizeof(page_t)
1233	! and
1234	!       pp + sizeof(page_t) + 64
1235	!
1236#define STRIDE1	128
1237#define STRIDE2	192
1238
1239#if	STRIDE1 != PAGE_SIZE
1240#error	"STRIDE1 != PAGE_SIZE"
1241#endif	/* STRIDE1 != PAGE_SIZE */
1242
1243        ENTRY(prefetch_page_w)
1244        prefetch        [%o0+STRIDE1], #n_writes
1245        retl
1246        prefetch        [%o0+STRIDE2], #n_writes
1247        SET_SIZE(prefetch_page_w)
1248
1249        ENTRY(prefetch_page_r)
1250        prefetch        [%o0+STRIDE1], #n_reads
1251        retl
1252        prefetch        [%o0+STRIDE2], #n_reads
1253        SET_SIZE(prefetch_page_r)
1254#else	/* SPITFIRE || HUMMINGBIRD */
1255
1256#error "You need to fix this for your new cpu type."
1257
1258#endif	/* SPITFIRE || HUMMINGBIRD */
1259
1260#endif	/* lint */
1261
1262#if defined(lint)
1263/*
1264 * Prefetch struct smap for write.
1265 */
1266/*ARGSUSED*/
1267void
1268prefetch_smap_w(void *smp)
1269{}
1270#else	/* lint */
1271
1272#if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1273	defined(SERRANO)
1274
1275#define	PREFETCH_Q_LEN 8
1276
1277#elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1278
1279#define	PREFETCH_Q_LEN 3
1280
1281#else	/* SPITFIRE || HUMMINGBIRD */
1282
1283#error You need to fix this for your new cpu type.
1284
1285#endif	/* SPITFIRE || HUMMINGBIRD */
1286
1287#include <vm/kpm.h>
1288
1289#ifdef	SEGKPM_SUPPORT
1290
1291#define	SMAP_SIZE 72
1292#define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1293
1294#else	/* SEGKPM_SUPPORT */
1295
1296	!
1297	! The hardware will prefetch the 64 byte cache aligned block
1298	! that contains the address specified in the prefetch instruction.
1299	! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1300	! per pass will suffice as long as we prefetch far enough ahead to
1301	! make sure we don't stall for the cases where the smap object
1302	! spans multiple hardware prefetch blocks.  Let's prefetch as far
1303	! ahead as the hardware will allow.
1304	!
1305	! The smap array is processed with decreasing address pointers.
1306	!
1307#define	SMAP_SIZE 48
1308#define	SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1309
1310#endif	/* SEGKPM_SUPPORT */
1311
1312	ENTRY(prefetch_smap_w)
1313	retl
1314	prefetch	[%o0-SMAP_STRIDE], #n_writes
1315	SET_SIZE(prefetch_smap_w)
1316
1317#endif	/* lint */
1318
1319#if defined(lint) || defined(__lint)
1320
1321/* ARGSUSED */
1322uint64_t
1323getidsr(void)
1324{ return 0; }
1325
1326#else	/* lint */
1327
1328	ENTRY_NP(getidsr)
1329	retl
1330	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %o0
1331	SET_SIZE(getidsr)
1332
1333#endif	/* lint */
1334