xref: /titanic_50/usr/src/uts/sun4v/cpu/common_asm.s (revision 60c807700988885656502665e0cf8afd4b4346f7)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif
31
32/*
33 * General assembly language routines.
34 * It is the intent of this file to contain routines that are
35 * specific to cpu architecture.
36 */
37
38/*
39 * WARNING: If you add a fast trap handler which can be invoked by a
40 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
41 * instead of "done" instruction to return back to the user mode. See
42 * comments for the "fast_trap_done" entry point for more information.
43 */
44#define	FAST_TRAP_DONE	\
45	ba,a	fast_trap_done
46
47/*
48 * Override GET_NATIVE_TIME for the cpu module code.  This is not
49 * guaranteed to be exactly one instruction, be careful of using
50 * the macro in delay slots.
51 *
52 * Do not use any instruction that modifies condition codes as the
53 * caller may depend on these to remain unchanged across the macro.
54 */
55
56#define	GET_NATIVE_TIME(out, scr1, scr2) \
57	rd	STICK, out
58
59#define	RD_TICKCMPR(out, scr)		\
60	rd	STICK_COMPARE, out
61
62#define	WR_TICKCMPR(in,scr1,scr2,label)		\
63	wr	in, STICK_COMPARE
64
65
66#include <sys/clock.h>
67
68#if defined(lint)
69#include <sys/types.h>
70#include <sys/scb.h>
71#include <sys/systm.h>
72#include <sys/regset.h>
73#include <sys/sunddi.h>
74#include <sys/lockstat.h>
75#endif	/* lint */
76
77
78#include <sys/asm_linkage.h>
79#include <sys/privregs.h>
80#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
81#include <sys/machthread.h>
82#include <sys/clock.h>
83#include <sys/intreg.h>
84#include <sys/psr_compat.h>
85#include <sys/isa_defs.h>
86#include <sys/dditypes.h>
87#include <sys/intr.h>
88#include <sys/hypervisor_api.h>
89
90#if !defined(lint)
91#include "assym.h"
92#endif
93
94#define	ICACHE_FLUSHSZ	0x20
95
96#if defined(lint)
97/*
98 * Softint generated when counter field of tick reg matches value field
99 * of tick_cmpr reg
100 */
101/*ARGSUSED*/
102void
103tickcmpr_set(uint64_t clock_cycles)
104{}
105
106#else   /* lint */
107
108	ENTRY_NP(tickcmpr_set)
109	! get 64-bit clock_cycles interval
110	mov	%o0, %o2
111	mov	8, %o3			! A reasonable initial step size
1121:
113	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
114
115	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
116	sllx	%o0, 1, %o0		!   value we wrote was in the future.
117	srlx	%o0, 1, %o0
118
119	cmp	%o2, %o0		! If the value we wrote was in the
120	bg,pt	%xcc, 2f		!   future, then blow out of here.
121	sllx	%o3, 1, %o3		! If not, then double our step size,
122	ba,pt	%xcc, 1b		!   and take another lap.
123	add	%o0, %o3, %o2		!
1242:
125	retl
126	nop
127	SET_SIZE(tickcmpr_set)
128
129#endif  /* lint */
130
131#if defined(lint)
132
133void
134tickcmpr_disable(void)
135{}
136
137#else
138
139	ENTRY_NP(tickcmpr_disable)
140	mov	1, %g1
141	sllx	%g1, TICKINT_DIS_SHFT, %o0
142	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
143	retl
144	nop
145	SET_SIZE(tickcmpr_disable)
146
147#endif
148
149#if defined(lint)
150
151/*
152 * tick_write_delta() increments %tick by the specified delta.  This should
153 * only be called after a CPR event to assure that gethrtime() continues to
154 * increase monotonically.  Obviously, writing %tick needs to de done very
155 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
156 * this reason, we make sure we're i-cache hot before actually writing to
157 * %tick.
158 *
159 * NOTE: No provision for this on sun4v right now.
160 */
161
162/*ARGSUSED*/
163void
164tick_write_delta(uint64_t delta)
165{}
166
167#else	/* lint */
168
169	.seg	".text"
170tick_write_delta_panic:
171	.asciz	"tick_write_delta: not supported"
172
173	ENTRY_NP(tick_write_delta)
174	sethi	%hi(tick_write_delta_panic), %o1
175        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
176	call	panic
177	or	%i1, %lo(tick_write_delta_panic), %o0
178	/*NOTREACHED*/
179	retl
180	nop
181#endif
182
183#if defined(lint)
184/*
185 *  return 1 if disabled
186 */
187
188int
189tickcmpr_disabled(void)
190{ return (0); }
191
192#else   /* lint */
193
194	ENTRY_NP(tickcmpr_disabled)
195	RD_TICKCMPR(%g1, %o0)
196	retl
197	srlx	%g1, TICKINT_DIS_SHFT, %o0
198	SET_SIZE(tickcmpr_disabled)
199
200#endif  /* lint */
201
202/*
203 * Get current tick
204 */
205#if defined(lint)
206
207u_longlong_t
208gettick(void)
209{ return (0); }
210
211#else   /* lint */
212
213	ENTRY(gettick)
214	GET_NATIVE_TIME(%o0, %o2, %o3)
215	retl
216	nop
217	SET_SIZE(gettick)
218
219#endif  /* lint */
220
221
222/*
223 * Return the counter portion of the tick register.
224 */
225
226#if defined(lint)
227
228uint64_t
229gettick_counter(void)
230{ return(0); }
231
232#else	/* lint */
233
234	ENTRY_NP(gettick_counter)
235	rdpr	%tick, %o0
236	sllx	%o0, 1, %o0
237	retl
238	srlx	%o0, 1, %o0		! shake off npt bit
239	SET_SIZE(gettick_counter)
240#endif	/* lint */
241
242/*
243 * Provide a C callable interface to the trap that reads the hi-res timer.
244 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
245 */
246
247#if defined(lint)
248
249hrtime_t
250gethrtime(void)
251{
252	return ((hrtime_t)0);
253}
254
255hrtime_t
256gethrtime_unscaled(void)
257{
258	return ((hrtime_t)0);
259}
260
261hrtime_t
262gethrtime_max(void)
263{
264	return ((hrtime_t)0);
265}
266
267void
268scalehrtime(hrtime_t *hrt)
269{
270	*hrt = 0;
271}
272
273void
274gethrestime(timespec_t *tp)
275{
276	tp->tv_sec = 0;
277	tp->tv_nsec = 0;
278}
279
280time_t
281gethrestime_sec(void)
282{
283	return (0);
284}
285
286void
287gethrestime_lasttick(timespec_t *tp)
288{
289	tp->tv_sec = 0;
290	tp->tv_nsec = 0;
291}
292
293/*ARGSUSED*/
294void
295hres_tick(void)
296{
297}
298
299void
300panic_hres_tick(void)
301{
302}
303
304#else	/* lint */
305
306	ENTRY_NP(gethrtime)
307	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
308							! %g1 = hrtime
309	retl
310	mov	%g1, %o0
311	SET_SIZE(gethrtime)
312
313	ENTRY_NP(gethrtime_unscaled)
314	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
315	retl
316	mov	%g1, %o0
317	SET_SIZE(gethrtime_unscaled)
318
319	ENTRY_NP(gethrtime_waitfree)
320	ALTENTRY(dtrace_gethrtime)
321	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
322	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
323	retl
324	mov	%g1, %o0
325	SET_SIZE(dtrace_gethrtime)
326	SET_SIZE(gethrtime_waitfree)
327
328	ENTRY(gethrtime_max)
329	NATIVE_TIME_MAX(%g1)
330	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
331
332	! hrtime_t's are signed, max hrtime_t must be positive
333	mov	-1, %o2
334	brlz,a	%g1, 1f
335	srlx	%o2, 1, %g1
3361:
337	retl
338	mov	%g1, %o0
339	SET_SIZE(gethrtime_max)
340
341	ENTRY(scalehrtime)
342	ldx	[%o0], %o1
343	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
344	retl
345	stx	%o1, [%o0]
346	SET_SIZE(scalehrtime)
347
348/*
349 * Fast trap to return a timestamp, uses trap window, leaves traps
350 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
351 *
352 * This is the handler for the ST_GETHRTIME trap.
353 */
354
355	ENTRY_NP(get_timestamp)
356	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
357	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
358	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
359	FAST_TRAP_DONE
360	SET_SIZE(get_timestamp)
361
362/*
363 * Macro to convert GET_HRESTIME() bits into a timestamp.
364 *
365 * We use two separate macros so that the platform-dependent GET_HRESTIME()
366 * can be as small as possible; CONV_HRESTIME() implements the generic part.
367 */
368#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
369	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
370	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
371	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
372	srl	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
373	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
374	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
375	ba	3f;			/* go convert to sec/nsec */	\
376	add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
3772:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
378	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
379	add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
380	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
3813:	cmp	hrestnsec, nano;	/* more than a billion? */	\
382	bl,pt	%xcc, 4f;		/* if not, we're done */	\
383	nop;				/* delay: do nothing :( */	\
384	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
385	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */	\
3864:
387
388	ENTRY_NP(gethrestime)
389	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
390	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
391	stn	%o1, [%o0]
392	retl
393	stn	%o2, [%o0 + CLONGSIZE]
394	SET_SIZE(gethrestime)
395
396/*
397 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
398 * seconds.
399 */
400	ENTRY_NP(gethrestime_sec)
401	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
402	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
403	retl					! %o0 current hrestime seconds
404	nop
405	SET_SIZE(gethrestime_sec)
406
407/*
408 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
409 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
410 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
411 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
412 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
413 * it explicitly.)
414 */
415	ENTRY_NP(gethrestime_lasttick)
416	sethi	%hi(hres_lock), %o1
4170:
418	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
419	membar	#LoadLoad			! Load of lock must complete
420	andn	%o2, 1, %o2			! Mask off lowest bit
421	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
422	add	%o1, %lo(hrestime), %o4
423	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
424	membar	#LoadLoad			! All loads must complete
425	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
426	cmp	%o3, %o2			! If lock is locked or has
427	bne	0b				!   changed, retry.
428	stn	%g1, [%o0]			! Delay: store seconds
429	retl
430	stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
431	SET_SIZE(gethrestime_lasttick)
432
433/*
434 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
435 *
436 * This is the handler for the ST_GETHRESTIME trap.
437 */
438
439	ENTRY_NP(get_hrestime)
440	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
441	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
442	FAST_TRAP_DONE
443	SET_SIZE(get_hrestime)
444
445/*
446 * Fast trap to return lwp virtual time, uses trap window, leaves traps
447 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
448 * of nanoseconds consumed.
449 *
450 * This is the handler for the ST_GETHRVTIME trap.
451 *
452 * Register usage:
453 *	%o0, %o1 = return lwp virtual time
454 * 	%o2 = CPU/thread
455 * 	%o3 = lwp
456 * 	%g1 = scratch
457 * 	%g5 = scratch
458 */
459	ENTRY_NP(get_virtime)
460	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
461	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
462	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
463	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
464
465	/*
466	 * Subtract start time of current microstate from time
467	 * of day to get increment for lwp virtual time.
468	 */
469	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
470	sub	%g5, %g1, %g5
471
472	/*
473	 * Add current value of ms_acct[LMS_USER]
474	 */
475	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
476	add	%g5, %g1, %g5
477	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
478
479	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
480	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
481
482	FAST_TRAP_DONE
483	SET_SIZE(get_virtime)
484
485
486
487	.seg	".text"
488hrtime_base_panic:
489	.asciz	"hrtime_base stepping back"
490
491
492	ENTRY_NP(hres_tick)
493	save	%sp, -SA(MINFRAME), %sp	! get a new window
494
495	sethi	%hi(hrestime), %l4
496	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
4977:	tst	%l5
498	bz,pt	%xcc, 8f			! if we got it, drive on
499	ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
500	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5019:	tst	%l5
502	bz,a,pn	%xcc, 7b
503	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
504	ba,pt	%xcc, 9b
505	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5068:
507	membar	#StoreLoad|#StoreStore
508
509	!
510	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
511	!
512	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
513	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
514	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
515	! convert native time to nsecs
516	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
517
518	sub	%l0, %g1, %i1			! get accurate nsec delta
519
520	ldx	[%l4 + %lo(hrtime_base)], %l1
521	cmp	%l1, %l0
522	bg,pn	%xcc, 9f
523	nop
524
525	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
526
527	!
528	! apply adjustment, if any
529	!
530	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
531	brz	%l0, 2f
532						! hrestime_adj == 0 ?
533						! yes, skip adjustments
534	clr	%l5				! delay: set adj to zero
535	tst	%l0				! is hrestime_adj >= 0 ?
536	bge,pt	%xcc, 1f			! yes, go handle positive case
537	srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
538
539	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
540	bl,pt	%xcc, 2f			! yes, use current adj
541	neg	%l5				! delay: %l5 = -adj
542	ba,pt	%xcc, 2f
543	mov	%l0, %l5			! no, so set adj = hrestime_adj
5441:
545	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
546	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
547	mov	%l0, %l5			! delay: adj = hrestime_adj
5482:
549	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
550	sub	%l0, %l5, %l0			! timedelta -= adj
551
552	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
553	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
554
555	or	%l4, %lo(hrestime), %l2
556	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
557	ldn	[%l2 + CLONGSIZE], %i3
558	add	%i3, %l5, %i3			! hrestime.nsec += adj
559	add	%i3, %i1, %i3			! hrestime.nsec += nslt
560
561	set	NANOSEC, %l5			! %l5 = NANOSEC
562	cmp	%i3, %l5
563	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
564	sethi	%hi(one_sec), %i1		! delay
565	add	%i2, 0x1, %i2			! hrestime.tv_sec++
566	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
567	mov	0x1, %l5
568	st	%l5, [%i1 + %lo(one_sec)]
5695:
570	stn	%i2, [%l2]
571	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
572
573	membar	#StoreStore
574
575	ld	[%l4 + %lo(hres_lock)], %i1
576	inc	%i1				! release lock
577	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
578
579	ret
580	restore
581
5829:
583	!
584	! release hres_lock
585	!
586	ld	[%l4 + %lo(hres_lock)], %i1
587	inc	%i1
588	st	%i1, [%l4 + %lo(hres_lock)]
589
590	sethi	%hi(hrtime_base_panic), %o0
591	call	panic
592	or	%o0, %lo(hrtime_base_panic), %o0
593
594	SET_SIZE(hres_tick)
595
596#endif	/* lint */
597
598#if !defined(lint) && !defined(__lint)
599
600	.seg	".text"
601kstat_q_panic_msg:
602	.asciz	"kstat_q_exit: qlen == 0"
603
604	ENTRY(kstat_q_panic)
605	save	%sp, -SA(MINFRAME), %sp
606	sethi	%hi(kstat_q_panic_msg), %o0
607	call	panic
608	or	%o0, %lo(kstat_q_panic_msg), %o0
609	/*NOTREACHED*/
610	SET_SIZE(kstat_q_panic)
611
612#define	BRZPN	brz,pn
613#define	BRZPT	brz,pt
614
615#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
616	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
617	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
618	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
619	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
620	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
621	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
622	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
623	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
624	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
625	add	%o4, %o2, %o4;			/* %o4 = new time */	\
626	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
627	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
628	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
629QRETURN;								\
630	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
631
632	.align 16
633	ENTRY(kstat_waitq_enter)
634	GET_NATIVE_TIME(%g1, %g2, %g3)
635	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
636	SET_SIZE(kstat_waitq_enter)
637
638	.align 16
639	ENTRY(kstat_waitq_exit)
640	GET_NATIVE_TIME(%g1, %g2, %g3)
641	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
642	SET_SIZE(kstat_waitq_exit)
643
644	.align 16
645	ENTRY(kstat_runq_enter)
646	GET_NATIVE_TIME(%g1, %g2, %g3)
647	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
648	SET_SIZE(kstat_runq_enter)
649
650	.align 16
651	ENTRY(kstat_runq_exit)
652	GET_NATIVE_TIME(%g1, %g2, %g3)
653	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
654	SET_SIZE(kstat_runq_exit)
655
656	.align 16
657	ENTRY(kstat_waitq_to_runq)
658	GET_NATIVE_TIME(%g1, %g2, %g3)
659	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
660	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
661	SET_SIZE(kstat_waitq_to_runq)
662
663	.align 16
664	ENTRY(kstat_runq_back_to_waitq)
665	GET_NATIVE_TIME(%g1, %g2, %g3)
666	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
667	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
668	SET_SIZE(kstat_runq_back_to_waitq)
669
670#endif /* lint */
671
672#ifdef lint
673
674int64_t timedelta;
675hrtime_t hres_last_tick;
676timestruc_t hrestime;
677int64_t hrestime_adj;
678int hres_lock;
679uint_t nsec_scale;
680hrtime_t hrtime_base;
681int traptrace_use_stick;
682
683#else
684	/*
685	 *  -- WARNING --
686	 *
687	 * The following variables MUST be together on a 128-byte boundary.
688	 * In addition to the primary performance motivation (having them all
689	 * on the same cache line(s)), code here and in the GET*TIME() macros
690	 * assumes that they all have the same high 22 address bits (so
691	 * there's only one sethi).
692	 */
693	.seg	".data"
694	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
695	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
696	.global	nsec_shift, adj_shift
697
698	/* XXX - above comment claims 128-bytes is necessary */
699	.align	64
700timedelta:
701	.word	0, 0		/* int64_t */
702hres_last_tick:
703	.word	0, 0		/* hrtime_t */
704hrestime:
705	.nword	0, 0		/* 2 longs */
706hrestime_adj:
707	.word	0, 0		/* int64_t */
708hres_lock:
709	.word	0
710nsec_scale:
711	.word	0
712hrtime_base:
713	.word	0, 0
714traptrace_use_stick:
715	.word	0
716nsec_shift:
717	.word	NSEC_SHIFT
718adj_shift:
719	.word	ADJ_SHIFT
720
721#endif
722
723
724/*
725 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
726 * usec_delay(int n)		[compatibility - should go one day]
727 * Delay by spinning.
728 *
729 * delay for n microseconds.  numbers <= 0 delay 1 usec
730 *
731 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
732 * and variable clock rate for power management requires that we
733 * use %stick to implement this routine.
734 */
735
736#if defined(lint)
737
738/*ARGSUSED*/
739void
740drv_usecwait(clock_t n)
741{}
742
743/*ARGSUSED*/
744void
745usec_delay(int n)
746{}
747
748#else	/* lint */
749
750	ENTRY(drv_usecwait)
751	ALTENTRY(usec_delay)
752	brlez,a,pn %o0, 0f
753	mov	1, %o0
7540:
755	sethi	%hi(sticks_per_usec), %o1
756	lduw	[%o1 + %lo(sticks_per_usec)], %o1
757	mulx	%o1, %o0, %o1		! Scale usec to ticks
758	inc	%o1			! We don't start on a tick edge
759	GET_NATIVE_TIME(%o2, %o3, %o4)
760	add	%o1, %o2, %o1
761
7621:	cmp	%o1, %o2
763	GET_NATIVE_TIME(%o2, %o3, %o4)
764	bgeu,pt	%xcc, 1b
765	nop
766	retl
767	nop
768	SET_SIZE(usec_delay)
769	SET_SIZE(drv_usecwait)
770#endif	/* lint */
771
772#if defined(lint)
773
774/* ARGSUSED */
775void
776pil14_interrupt(int level)
777{}
778
779#else
780
781/*
782 * Level-14 interrupt prologue.
783 */
784	ENTRY_NP(pil14_interrupt)
785	CPU_ADDR(%g1, %g2)
786	rdpr	%pil, %g6			! %g6 = interrupted PIL
787	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
788	rdpr	%tstate, %g6
789	rdpr	%tpc, %g5
790	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
791	bnz,a,pt %xcc, 1f
792	stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
793	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
794	ba	pil_interrupt_common		! must be large-disp branch
795	stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
7961:	ba	pil_interrupt_common		! must be large-disp branch
797	stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
798	SET_SIZE(pil14_interrupt)
799
800	ENTRY_NP(tick_rtt)
801	!
802	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
803	! disabled.  If TICK_COMPARE is enabled, we know that we need to
804	! reenqueue the interrupt request structure.  We'll then check TICKINT
805	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
806	! interrupt.  In this case, TICK_COMPARE may have been rewritten
807	! recently; we'll compare %o5 to the current time to verify that it's
808	! in the future.
809	!
810	! Note that %o5 is live until after 1f.
811	! XXX - there is a subroutine call while %o5 is live!
812	!
813	RD_TICKCMPR(%o5, %g1)
814	srlx	%o5, TICKINT_DIS_SHFT, %g1
815	brnz,pt	%g1, 2f
816	nop
817
818	rdpr 	%pstate, %g5
819	andn	%g5, PSTATE_IE, %g1
820	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
821
822	sethi	%hi(cbe_level14_inum), %o1
823	ld	[%o1 + %lo(cbe_level14_inum)], %o1
824	call	intr_enqueue_req ! preserves %o5 and %g5
825	mov	PIL_14, %o0
826
827	! Check SOFTINT for TICKINT/STICKINT
828	rd	SOFTINT, %o4
829	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
830	andcc	%o4, %o0, %g0
831	bz,a,pn	%icc, 2f
832	wrpr	%g0, %g5, %pstate		! Enable vec interrupts
833
834	! clear TICKINT/STICKINT
835	wr	%o0, CLEAR_SOFTINT
836
837	!
838	! Now that we've cleared TICKINT, we can reread %tick and confirm
839	! that the value we programmed is still in the future.  If it isn't,
840	! we need to reprogram TICK_COMPARE to fire as soon as possible.
841	!
842	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
843	sllx	%o0, 1, %o0			! Clear the DIS bit
844	srlx	%o0, 1, %o0
845	cmp	%o5, %o0			! In the future?
846	bg,a,pt	%xcc, 2f			! Yes, drive on.
847	wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
848
849	!
850	! If we're here, then we have programmed TICK_COMPARE with a %tick
851	! which is in the past; we'll now load an initial step size, and loop
852	! until we've managed to program TICK_COMPARE to fire in the future.
853	!
854	mov	8, %o4				! 8 = arbitrary inital step
8551:	add	%o0, %o4, %o5			! Add the step
856	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
857	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
858	sllx	%o0, 1, %o0			! Clear the DIS bit
859	srlx	%o0, 1, %o0
860	cmp	%o5, %o0			! In the future?
861	bg,a,pt	%xcc, 2f			! Yes, drive on.
862	wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
863	ba	1b				! No, try again.
864	sllx	%o4, 1, %o4			!    delay: double step size
865
8662:	ba	current_thread_complete
867	nop
868	SET_SIZE(tick_rtt)
869
870#endif /* lint */
871
872#if defined(lint)
873/*
874 * Prefetch a page_t for write or read, this assumes a linear
875 * scan of sequential page_t's.
876 */
877/*ARGSUSED*/
878void
879prefetch_page_w(void *pp)
880{}
881
882/*ARGSUSED*/
883void
884prefetch_page_r(void *pp)
885{}
886#else	/* lint */
887
888/* XXXQ These should be inline templates, not functions */
889        ENTRY(prefetch_page_w)
890        retl
891	nop
892        SET_SIZE(prefetch_page_w)
893
894        ENTRY(prefetch_page_r)
895        retl
896	nop
897        SET_SIZE(prefetch_page_r)
898
899#endif	/* lint */
900
901#if defined(lint)
902/*
903 * Prefetch struct smap for write.
904 */
905/*ARGSUSED*/
906void
907prefetch_smap_w(void *smp)
908{}
909#else	/* lint */
910
911/* XXXQ These should be inline templates, not functions */
912	ENTRY(prefetch_smap_w)
913	retl
914	nop
915	SET_SIZE(prefetch_smap_w)
916
917#endif	/* lint */
918
919/*
920 * Generic sun4v MMU and Cache operations.
921 */
922
923#if defined(lint)
924
925/*ARGSUSED*/
926void
927vtag_flushpage(caddr_t vaddr, uint_t ctxnum)
928{}
929
930/*ARGSUSED*/
931void
932vtag_flushctx(uint_t ctxnum)
933{}
934
935/*ARGSUSED*/
936void
937vtag_flushall(void)
938{}
939
940/*ARGSUSED*/
941void
942vtag_unmap_perm_tl1(uint64_t vaddr, uint64_t ctxnum)
943{}
944
945/*ARGSUSED*/
946void
947vtag_flushpage_tl1(uint64_t vaddr, uint64_t ctxnum)
948{}
949
950/*ARGSUSED*/
951void
952vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t ctx_pgcnt)
953{}
954
955/*ARGSUSED*/
956void
957vtag_flushctx_tl1(uint64_t ctxnum, uint64_t dummy)
958{}
959
960/*ARGSUSED*/
961void
962vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
963{}
964
965/*ARGSUSED*/
966void
967vac_flushpage(pfn_t pfnum, int vcolor)
968{}
969
970/*ARGSUSED*/
971void
972vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
973{}
974
975/*ARGSUSED*/
976void
977flush_instr_mem(caddr_t vaddr, size_t len)
978{}
979
980#else	/* lint */
981
982	ENTRY_NP(vtag_flushpage)
983	/*
984	 * flush page from the tlb
985	 *
986	 * %o0 = vaddr
987	 * %o1 = ctxnum
988	 */
989	mov	MAP_ITLB | MAP_DTLB, %o2
990	ta	MMU_UNMAP_ADDR
991	brz,pt	%o0, 1f
992	nop
993	ba	panic_bad_hcall
994	mov	MMU_UNMAP_ADDR, %o1
9951:
996 	retl
997	nop
998	SET_SIZE(vtag_flushpage)
999
1000	ENTRY_NP(vtag_flushctx)
1001	/*
1002	 * flush context from the tlb
1003	 *
1004	 * %o0 = ctxnum
1005	 */
1006	mov	%o0, %o2
1007	mov	%g0, %o0	! XXXQ no cpu list yet
1008	mov	%g0, %o1	! XXXQ no cpu list yet
1009	mov	MAP_ITLB | MAP_DTLB, %o3
1010	mov	MMU_DEMAP_CTX, %o5
1011	ta	FAST_TRAP
1012	brz,pt	%o0, 1f
1013	nop
1014	ba	panic_bad_hcall
1015	mov	MMU_DEMAP_CTX, %o1
10161:
1017	retl
1018	  nop
1019	SET_SIZE(vtag_flushctx)
1020
1021	ENTRY_NP(vtag_flushall)
1022	mov	%g0, %o0	! XXX no cpu list yet
1023	mov	%g0, %o1	! XXX no cpu list yet
1024	mov	MAP_ITLB | MAP_DTLB, %o2
1025	mov	MMU_DEMAP_ALL, %o5
1026	ta	FAST_TRAP
1027	brz,pt	%o0, 1f
1028	nop
1029	ba	panic_bad_hcall
1030	mov	MMU_DEMAP_ALL, %o1
10311:
1032	retl
1033	nop
1034	SET_SIZE(vtag_flushall)
1035
1036	ENTRY_NP(vtag_unmap_perm_tl1)
1037	/*
1038	 * x-trap to unmap perm map entry
1039	 * %g1 = vaddr
1040	 * %g2 = ctxnum
1041	 */
1042	mov	%o0, %g3
1043	mov	%o1, %g4
1044	mov	%o2, %g5
1045	mov	%o5, %g6
1046	mov	%g1, %o0
1047	mov	%g2, %o1
1048	mov	MAP_ITLB | MAP_DTLB, %o2
1049	mov	UNMAP_PERM_ADDR, %o5
1050	ta	FAST_TRAP
1051	brz,pt	%o0, 1f
1052	nop
1053
1054	mov	PTL1_BAD_HCALL, %g1
1055
1056	cmp	%o0, H_ENOMAP
1057	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
1058
1059	cmp	%o0, H_EINVAL
1060	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
1061
1062	ba,a	ptl1_panic
10631:
1064	mov	%g6, %o5
1065	mov	%g5, %o2
1066	mov	%g4, %o1
1067	mov	%g3, %o0
1068	retry
1069	SET_SIZE(vtag_unmap_perm_tl1)
1070
1071	ENTRY_NP(vtag_flushpage_tl1)
1072	/*
1073	 * x-trap to flush page from tlb and tsb
1074	 *
1075	 * %g1 = vaddr, zero-extended on 32-bit kernel
1076	 * %g2 = ctxnum
1077	 *
1078	 * assumes TSBE_TAG = 0
1079	 */
1080	srln	%g1, MMU_PAGESHIFT, %g1
1081	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
1082	mov	%o0, %g3
1083	mov	%o1, %g4
1084	mov	%o2, %g5
1085	mov	%g1, %o0			! vaddr
1086	mov	%g2, %o1			! ctx
1087	mov	MAP_ITLB | MAP_DTLB, %o2
1088	ta	MMU_UNMAP_ADDR
1089	brz,pt	%o0, 1f
1090	nop
1091	ba	ptl1_panic
1092	mov	PTL1_BAD_HCALL, %g1
10931:
1094	mov	%g5, %o2
1095	mov	%g4, %o1
1096	mov	%g3, %o0
1097	membar #Sync
1098	retry
1099	SET_SIZE(vtag_flushpage_tl1)
1100
1101	ENTRY_NP(vtag_flush_pgcnt_tl1)
1102	/*
1103	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
1104	 *
1105	 * %g1 = vaddr, zero-extended on 32-bit kernel
1106	 * %g2 = <zero32|ctx16|pgcnt16>
1107	 *
1108	 * NOTE: this handler relies on the fact that no
1109	 *	interrupts or traps can occur during the loop
1110	 *	issuing the TLB_DEMAP operations. It is assumed
1111	 *	that interrupts are disabled and this code is
1112	 *	fetching from the kernel locked text address.
1113	 *
1114	 * assumes TSBE_TAG = 0
1115	 */
1116	srln	%g1, MMU_PAGESHIFT, %g1
1117	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
1118	mov	%o0, %g3
1119	mov	%o1, %g4
1120	mov	%o2, %g5
1121
1122	set	0xffff, %g6
1123	and	%g6, %g2, %g7			/* g7 = pgcnt */
1124	srln	%g2, 16, %g2			/* g2 = ctxnum */
1125
1126	set	MMU_PAGESIZE, %g6		/* g2 = pgsize */
11271:
1128	mov	%g1, %o0			! vaddr
1129	mov	%g2, %o1			! ctx
1130	mov	MAP_ITLB | MAP_DTLB, %o2
1131	ta	MMU_UNMAP_ADDR
1132	brz,pt	%o0, 2f
1133	nop
1134	ba	ptl1_panic
1135	mov	PTL1_BAD_HCALL, %g1
11362:
1137	deccc	%g7				/* decr pgcnt */
1138	bnz,pt	%icc,1b
1139	add	%g1, %g6, %g1			/* go to nextpage */
1140
1141	mov	%g5, %o2
1142	mov	%g4, %o1
1143	mov	%g3, %o0
1144	membar #Sync
1145	retry
1146	SET_SIZE(vtag_flush_pgcnt_tl1)
1147
1148	ENTRY_NP(vtag_flushctx_tl1)
1149	/*
1150	 * x-trap to flush context from tlb
1151	 *
1152	 * %g1 = ctxnum
1153	 */
1154	mov	%o0, %g3
1155	mov	%o1, %g4
1156	mov	%o2, %g5
1157	mov	%o3, %g6
1158	mov	%o5, %g7
1159	mov	%g1, %o2
1160	mov	%g0, %o0	! XXXQ no cpu list yet
1161	mov	%g0, %o1	! XXXQ no cpu list yet
1162	mov	MAP_ITLB | MAP_DTLB, %o3
1163	mov	MMU_DEMAP_CTX, %o5
1164	ta	FAST_TRAP
1165	brz,pt	%o0, 1f
1166	nop
1167	ba	ptl1_panic
1168	mov	PTL1_BAD_HCALL, %g1
11691:
1170	mov	%g7, %o5
1171	mov	%g6, %o3
1172	mov	%g5, %o2
1173	mov	%g4, %o1
1174	mov	%g3, %o0
1175	membar #Sync
1176	retry
1177	SET_SIZE(vtag_flushctx_tl1)
1178
1179	! Not implemented on US1/US2
1180	ENTRY_NP(vtag_flushall_tl1)
1181	mov	%o0, %g3
1182	mov	%o1, %g4
1183	mov	%o2, %g5
1184	mov	%o3, %g6	! XXXQ not used?
1185	mov	%o5, %g7
1186	mov	%g0, %o0	! XXX no cpu list yet
1187	mov	%g0, %o1	! XXX no cpu list yet
1188	mov	MAP_ITLB | MAP_DTLB, %o2
1189	mov	MMU_DEMAP_ALL, %o5
1190	ta	FAST_TRAP
1191	brz,pt	%o0, 1f
1192	nop
1193	ba	ptl1_panic
1194	mov	PTL1_BAD_HCALL, %g1
11951:
1196	mov	%g7, %o5
1197	mov	%g6, %o3	! XXXQ not used?
1198	mov	%g5, %o2
1199	mov	%g4, %o1
1200	mov	%g3, %o0
1201	retry
1202	SET_SIZE(vtag_flushall_tl1)
1203
1204/*
1205 * vac_flushpage(pfnum, color)
1206 *	Flush 1 8k page of the D-$ with physical page = pfnum
1207 *	Algorithm:
1208 *		The spitfire dcache is a 16k direct mapped virtual indexed,
1209 *		physically tagged cache.  Given the pfnum we read all cache
1210 *		lines for the corresponding page in the cache (determined by
1211 *		the color).  Each cache line is compared with
1212 *		the tag created from the pfnum. If the tags match we flush
1213 *		the line.
1214 */
1215	ENTRY(vac_flushpage)
1216	/*
1217	 * flush page from the d$
1218	 *
1219	 * %o0 = pfnum, %o1 = color
1220	 */
1221	! XXXQ
1222	retl
1223	nop
1224	SET_SIZE(vac_flushpage)
1225
1226	ENTRY_NP(vac_flushpage_tl1)
1227	/*
1228	 * x-trap to flush page from the d$
1229	 *
1230	 * %g1 = pfnum, %g2 = color
1231	 */
1232	! XXXQ
1233	retry
1234	SET_SIZE(vac_flushpage_tl1)
1235
1236	ENTRY(vac_flushcolor)
1237	/*
1238	 * %o0 = vcolor
1239	 */
1240	! XXXQ
1241	retl
1242	  nop
1243	SET_SIZE(vac_flushcolor)
1244
1245	ENTRY(vac_flushcolor_tl1)
1246	/*
1247	 * %g1 = vcolor
1248	 */
1249	! XXXQ
1250	retry
1251	SET_SIZE(vac_flushcolor_tl1)
1252
1253/*
1254 * flush_instr_mem:
1255 *	Flush a portion of the I-$ starting at vaddr
1256 * 	%o0 vaddr
1257 *	%o1 bytes to be flushed
1258 */
1259
1260	ENTRY(flush_instr_mem)
1261	membar	#StoreStore				! Ensure the stores
1262							! are globally visible
12631:
1264	flush	%o0
1265	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
1266	bgu,pt	%ncc, 1b
1267	add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
1268
1269	retl
1270	nop
1271	SET_SIZE(flush_instr_mem)
1272
1273#endif /* !lint */
1274
1275/*
1276 * fp_zero() - clear all fp data registers and the fsr
1277 */
1278
1279#if defined(lint) || defined(__lint)
1280
1281void
1282fp_zero(void)
1283{}
1284
1285#else	/* lint */
1286
1287.global	fp_zero_zero
1288.align 8
1289fp_zero_zero:
1290	.xword	0
1291
1292	ENTRY_NP(fp_zero)
1293	sethi	%hi(fp_zero_zero), %o0
1294	ldd	[%o0 + %lo(fp_zero_zero)], %fsr
1295	ldd	[%o0 + %lo(fp_zero_zero)], %f0
1296	fmovd	%f0, %f2
1297	fmovd	%f0, %f4
1298	fmovd	%f0, %f6
1299	fmovd	%f0, %f8
1300	fmovd	%f0, %f10
1301	fmovd	%f0, %f12
1302	fmovd	%f0, %f14
1303	fmovd	%f0, %f16
1304	fmovd	%f0, %f18
1305	fmovd	%f0, %f20
1306	fmovd	%f0, %f22
1307	fmovd	%f0, %f24
1308	fmovd	%f0, %f26
1309	fmovd	%f0, %f28
1310	fmovd	%f0, %f30
1311	fmovd	%f0, %f32
1312	fmovd	%f0, %f34
1313	fmovd	%f0, %f36
1314	fmovd	%f0, %f38
1315	fmovd	%f0, %f40
1316	fmovd	%f0, %f42
1317	fmovd	%f0, %f44
1318	fmovd	%f0, %f46
1319	fmovd	%f0, %f48
1320	fmovd	%f0, %f50
1321	fmovd	%f0, %f52
1322	fmovd	%f0, %f54
1323	fmovd	%f0, %f56
1324	fmovd	%f0, %f58
1325	fmovd	%f0, %f60
1326	retl
1327	fmovd	%f0, %f62
1328	SET_SIZE(fp_zero)
1329
1330#endif	/* lint */
1331