xref: /titanic_50/usr/src/uts/sun4v/cpu/common_asm.s (revision 080575042aba2197b425ebfd52061dea061a9aa1)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif
31
32/*
33 * General assembly language routines.
34 * It is the intent of this file to contain routines that are
35 * specific to cpu architecture.
36 */
37
38/*
39 * WARNING: If you add a fast trap handler which can be invoked by a
40 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
41 * instead of "done" instruction to return back to the user mode. See
42 * comments for the "fast_trap_done" entry point for more information.
43 */
44#define	FAST_TRAP_DONE	\
45	ba,a	fast_trap_done
46
47/*
48 * Override GET_NATIVE_TIME for the cpu module code.  This is not
49 * guaranteed to be exactly one instruction, be careful of using
50 * the macro in delay slots.
51 *
52 * Do not use any instruction that modifies condition codes as the
53 * caller may depend on these to remain unchanged across the macro.
54 */
55
56#ifdef	N2_ERRATUM_49
57/*
58 * Niagara2 does not continuously compare STICK_CMPR and STICK, but it does
59 * so periodically (at least once every 128 cycles).  For this reason,
60 * Niagara2 implementations > 1.0 will always returns bits 6:0 of reads of
61 * STICK as 0x7f.  This ensures that if software writes a value to
62 * STICK_CMPR that is greater than the value subsequently read from STICK
63 * that a match will occur in the future.
64 *
65 * For Niagara2 1.0, we ensure bits 6:0 return 0x7f here.
66 */
67#define	GET_NATIVE_TIME(out, scr1, scr2) \
68	rd	STICK, out	;\
69	or	out, 0x7f, out
70
71#else	/* N2_ERRATUM_49 */
72#define	GET_NATIVE_TIME(out, scr1, scr2) \
73	rd	STICK, out
74#endif	/* N2_ERRATUM_49 */
75
76#define	RD_TICKCMPR(out, scr)		\
77	rd	STICK_COMPARE, out
78
79#define	WR_TICKCMPR(in,scr1,scr2,label)		\
80	wr	in, STICK_COMPARE
81
82
83#include <sys/clock.h>
84
85#if defined(lint)
86#include <sys/types.h>
87#include <sys/scb.h>
88#include <sys/systm.h>
89#include <sys/regset.h>
90#include <sys/sunddi.h>
91#include <sys/lockstat.h>
92#endif	/* lint */
93
94
95#include <sys/asm_linkage.h>
96#include <sys/privregs.h>
97#include <vm/hat_sfmmu.h>
98#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
99#include <sys/machthread.h>
100#include <sys/clock.h>
101#include <sys/intreg.h>
102#include <sys/psr_compat.h>
103#include <sys/isa_defs.h>
104#include <sys/dditypes.h>
105#include <sys/intr.h>
106#include <sys/hypervisor_api.h>
107
108#if !defined(lint)
109#include "assym.h"
110#endif
111
112#define	ICACHE_FLUSHSZ	0x20
113
114#if defined(lint)
115/*
116 * Softint generated when counter field of tick reg matches value field
117 * of tick_cmpr reg
118 */
119/*ARGSUSED*/
120void
121tickcmpr_set(uint64_t clock_cycles)
122{}
123
124#else   /* lint */
125
126	ENTRY_NP(tickcmpr_set)
127	! get 64-bit clock_cycles interval
128	mov	%o0, %o2
129	mov	8, %o3			! A reasonable initial step size
1301:
131	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
132
133	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
134	sllx	%o0, 1, %o0		!   value we wrote was in the future.
135	srlx	%o0, 1, %o0
136
137	cmp	%o2, %o0		! If the value we wrote was in the
138	bg,pt	%xcc, 2f		!   future, then blow out of here.
139	  sllx	%o3, 1, %o3		! If not, then double our step size,
140	ba,pt	%xcc, 1b		!   and take another lap.
141	  add	%o0, %o3, %o2		!
1422:
143	retl
144	  nop
145	SET_SIZE(tickcmpr_set)
146
147#endif  /* lint */
148
149#if defined(lint)
150
151void
152tickcmpr_disable(void)
153{}
154
155#else
156
157	ENTRY_NP(tickcmpr_disable)
158	mov	1, %g1
159	sllx	%g1, TICKINT_DIS_SHFT, %o0
160	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
161	retl
162	  nop
163	SET_SIZE(tickcmpr_disable)
164
165#endif
166
167#if defined(lint)
168
169/*
170 * tick_write_delta() increments %tick by the specified delta.  This should
171 * only be called after a CPR event to assure that gethrtime() continues to
172 * increase monotonically.  Obviously, writing %tick needs to de done very
173 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
174 * this reason, we make sure we're i-cache hot before actually writing to
175 * %tick.
176 *
177 * NOTE: No provision for this on sun4v right now.
178 */
179
180/*ARGSUSED*/
181void
182tick_write_delta(uint64_t delta)
183{}
184
185#else	/* lint */
186
187	.seg	".text"
188tick_write_delta_panic:
189	.asciz	"tick_write_delta: not supported"
190
191	ENTRY_NP(tick_write_delta)
192	sethi	%hi(tick_write_delta_panic), %o1
193        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
194	call	panic
195	  or	%i1, %lo(tick_write_delta_panic), %o0
196	/*NOTREACHED*/
197	retl
198	  nop
199#endif
200
201#if defined(lint)
202/*
203 *  return 1 if disabled
204 */
205
206int
207tickcmpr_disabled(void)
208{ return (0); }
209
210#else   /* lint */
211
212	ENTRY_NP(tickcmpr_disabled)
213	RD_TICKCMPR(%g1, %o0)
214	retl
215	  srlx	%g1, TICKINT_DIS_SHFT, %o0
216	SET_SIZE(tickcmpr_disabled)
217
218#endif  /* lint */
219
220/*
221 * Get current tick
222 */
223#if defined(lint)
224
225u_longlong_t
226gettick(void)
227{ return (0); }
228
229#else   /* lint */
230
231	ENTRY(gettick)
232	GET_NATIVE_TIME(%o0, %o2, %o3)
233	retl
234	  nop
235	SET_SIZE(gettick)
236
237#endif  /* lint */
238
239
240/*
241 * Return the counter portion of the tick register.
242 */
243
244#if defined(lint)
245
246uint64_t
247gettick_counter(void)
248{ return(0); }
249
250#else	/* lint */
251
252	ENTRY_NP(gettick_counter)
253	rdpr	%tick, %o0
254	sllx	%o0, 1, %o0
255	retl
256	  srlx	%o0, 1, %o0		! shake off npt bit
257	SET_SIZE(gettick_counter)
258#endif	/* lint */
259
260/*
261 * Provide a C callable interface to the trap that reads the hi-res timer.
262 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
263 */
264
265#if defined(lint)
266
267hrtime_t
268gethrtime(void)
269{
270	return ((hrtime_t)0);
271}
272
273hrtime_t
274gethrtime_unscaled(void)
275{
276	return ((hrtime_t)0);
277}
278
279hrtime_t
280gethrtime_max(void)
281{
282	return ((hrtime_t)0);
283}
284
285void
286scalehrtime(hrtime_t *hrt)
287{
288	*hrt = 0;
289}
290
291void
292gethrestime(timespec_t *tp)
293{
294	tp->tv_sec = 0;
295	tp->tv_nsec = 0;
296}
297
298time_t
299gethrestime_sec(void)
300{
301	return (0);
302}
303
304void
305gethrestime_lasttick(timespec_t *tp)
306{
307	tp->tv_sec = 0;
308	tp->tv_nsec = 0;
309}
310
311/*ARGSUSED*/
312void
313hres_tick(void)
314{
315}
316
317void
318panic_hres_tick(void)
319{
320}
321
322#else	/* lint */
323
324	ENTRY_NP(gethrtime)
325	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
326							! %g1 = hrtime
327	retl
328	  mov	%g1, %o0
329	SET_SIZE(gethrtime)
330
331	ENTRY_NP(gethrtime_unscaled)
332	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
333	retl
334	  mov	%g1, %o0
335	SET_SIZE(gethrtime_unscaled)
336
337	ENTRY_NP(gethrtime_waitfree)
338	ALTENTRY(dtrace_gethrtime)
339	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
340	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
341	retl
342	  mov	%g1, %o0
343	SET_SIZE(dtrace_gethrtime)
344	SET_SIZE(gethrtime_waitfree)
345
346	ENTRY(gethrtime_max)
347	NATIVE_TIME_MAX(%g1)
348	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
349
350	! hrtime_t's are signed, max hrtime_t must be positive
351	mov	-1, %o2
352	brlz,a	%g1, 1f
353	  srlx	%o2, 1, %g1
3541:
355	retl
356	  mov	%g1, %o0
357	SET_SIZE(gethrtime_max)
358
359	ENTRY(scalehrtime)
360	ldx	[%o0], %o1
361	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
362	retl
363	  stx	%o1, [%o0]
364	SET_SIZE(scalehrtime)
365
366/*
367 * Fast trap to return a timestamp, uses trap window, leaves traps
368 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
369 *
370 * This is the handler for the ST_GETHRTIME trap.
371 */
372
373	ENTRY_NP(get_timestamp)
374	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
375	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
376	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
377	FAST_TRAP_DONE
378	SET_SIZE(get_timestamp)
379
380/*
381 * Macro to convert GET_HRESTIME() bits into a timestamp.
382 *
383 * We use two separate macros so that the platform-dependent GET_HRESTIME()
384 * can be as small as possible; CONV_HRESTIME() implements the generic part.
385 */
386#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
387	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
388	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
389	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
390	  srlx	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
391	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
392	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
393	ba	3f;			/* go convert to sec/nsec */	\
394	  add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
3952:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
396	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
397	  add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
398	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
3993:	cmp	hrestnsec, nano;	/* more than a billion? */	\
400	bl,pt	%xcc, 4f;		/* if not, we're done */	\
401	  nop;				/* delay: do nothing :( */	\
402	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
403	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */	\
404	ba,a	3b;			/* check >= billion again */	\
4054:
406
407	ENTRY_NP(gethrestime)
408	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
409	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
410	stn	%o1, [%o0]
411	retl
412	  stn	%o2, [%o0 + CLONGSIZE]
413	SET_SIZE(gethrestime)
414
415/*
416 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
417 * seconds.
418 */
419	ENTRY_NP(gethrestime_sec)
420	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
421	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
422	retl					! %o0 current hrestime seconds
423	  nop
424	SET_SIZE(gethrestime_sec)
425
426/*
427 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
428 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
429 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
430 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
431 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
432 * it explicitly.)
433 */
434	ENTRY_NP(gethrestime_lasttick)
435	sethi	%hi(hres_lock), %o1
4360:
437	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
438	membar	#LoadLoad			! Load of lock must complete
439	andn	%o2, 1, %o2			! Mask off lowest bit
440	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
441	add	%o1, %lo(hrestime), %o4
442	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
443	membar	#LoadLoad			! All loads must complete
444	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
445	cmp	%o3, %o2			! If lock is locked or has
446	bne	0b				!   changed, retry.
447	  stn	%g1, [%o0]			! Delay: store seconds
448	retl
449	  stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
450	SET_SIZE(gethrestime_lasttick)
451
452/*
453 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
454 *
455 * This is the handler for the ST_GETHRESTIME trap.
456 */
457
458	ENTRY_NP(get_hrestime)
459	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
460	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
461	FAST_TRAP_DONE
462	SET_SIZE(get_hrestime)
463
464/*
465 * Fast trap to return lwp virtual time, uses trap window, leaves traps
466 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
467 * of nanoseconds consumed.
468 *
469 * This is the handler for the ST_GETHRVTIME trap.
470 *
471 * Register usage:
472 *	%o0, %o1 = return lwp virtual time
473 * 	%o2 = CPU/thread
474 * 	%o3 = lwp
475 * 	%g1 = scratch
476 * 	%g5 = scratch
477 */
478	ENTRY_NP(get_virtime)
479	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
480	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
481	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
482	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
483
484	/*
485	 * Subtract start time of current microstate from time
486	 * of day to get increment for lwp virtual time.
487	 */
488	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
489	sub	%g5, %g1, %g5
490
491	/*
492	 * Add current value of ms_acct[LMS_USER]
493	 */
494	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
495	add	%g5, %g1, %g5
496	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
497
498	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
499	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
500
501	FAST_TRAP_DONE
502	SET_SIZE(get_virtime)
503
504
505
506	.seg	".text"
507hrtime_base_panic:
508	.asciz	"hrtime_base stepping back"
509
510
511	ENTRY_NP(hres_tick)
512	save	%sp, -SA(MINFRAME), %sp	! get a new window
513
514	sethi	%hi(hrestime), %l4
515	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
5167:	tst	%l5
517	bz,pt	%xcc, 8f			! if we got it, drive on
518	  ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
519	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5209:	tst	%l5
521	bz,a,pn	%xcc, 7b
522	  ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
523	ba,pt	%xcc, 9b
524	  ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5258:
526	membar	#StoreLoad|#StoreStore
527
528	!
529	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
530	!
531	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
532	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
533	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
534	! convert native time to nsecs
535	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
536
537	sub	%l0, %g1, %i1			! get accurate nsec delta
538
539	ldx	[%l4 + %lo(hrtime_base)], %l1
540	cmp	%l1, %l0
541	bg,pn	%xcc, 9f
542	  nop
543
544	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
545
546	!
547	! apply adjustment, if any
548	!
549	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
550	brz	%l0, 2f
551						! hrestime_adj == 0 ?
552						! yes, skip adjustments
553	  clr	%l5				! delay: set adj to zero
554	tst	%l0				! is hrestime_adj >= 0 ?
555	bge,pt	%xcc, 1f			! yes, go handle positive case
556	  srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
557
558	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
559	bl,pt	%xcc, 2f			! yes, use current adj
560	  neg	%l5				! delay: %l5 = -adj
561	ba,pt	%xcc, 2f
562	  mov	%l0, %l5			! no, so set adj = hrestime_adj
5631:
564	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
565	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
566	  mov	%l0, %l5			! delay: adj = hrestime_adj
5672:
568	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
569	sub	%l0, %l5, %l0			! timedelta -= adj
570
571	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
572	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
573
574	or	%l4, %lo(hrestime), %l2
575	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
576	ldn	[%l2 + CLONGSIZE], %i3
577	add	%i3, %l5, %i3			! hrestime.nsec += adj
578	add	%i3, %i1, %i3			! hrestime.nsec += nslt
579
580	set	NANOSEC, %l5			! %l5 = NANOSEC
581	cmp	%i3, %l5
582	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
583	  sethi	%hi(one_sec), %i1		! delay
584	add	%i2, 0x1, %i2			! hrestime.tv_sec++
585	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
586	mov	0x1, %l5
587	st	%l5, [%i1 + %lo(one_sec)]
5885:
589	stn	%i2, [%l2]
590	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
591
592	membar	#StoreStore
593
594	ld	[%l4 + %lo(hres_lock)], %i1
595	inc	%i1				! release lock
596	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
597
598	ret
599	restore
600
6019:
602	!
603	! release hres_lock
604	!
605	ld	[%l4 + %lo(hres_lock)], %i1
606	inc	%i1
607	st	%i1, [%l4 + %lo(hres_lock)]
608
609	sethi	%hi(hrtime_base_panic), %o0
610	call	panic
611	  or	%o0, %lo(hrtime_base_panic), %o0
612
613	SET_SIZE(hres_tick)
614
615#endif	/* lint */
616
617#if !defined(lint) && !defined(__lint)
618
619	.seg	".text"
620kstat_q_panic_msg:
621	.asciz	"kstat_q_exit: qlen == 0"
622
623	ENTRY(kstat_q_panic)
624	save	%sp, -SA(MINFRAME), %sp
625	sethi	%hi(kstat_q_panic_msg), %o0
626	call	panic
627	  or	%o0, %lo(kstat_q_panic_msg), %o0
628	/*NOTREACHED*/
629	SET_SIZE(kstat_q_panic)
630
631#define	BRZPN	brz,pn
632#define	BRZPT	brz,pt
633
634#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
635	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
636	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
637	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
638	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
639	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
640	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
641	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
642	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
643	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
644	add	%o4, %o2, %o4;			/* %o4 = new time */	\
645	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
646	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
647	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
648QRETURN;								\
649	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
650
651	.align 16
652	ENTRY(kstat_waitq_enter)
653	GET_NATIVE_TIME(%g1, %g2, %g3)
654	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
655	SET_SIZE(kstat_waitq_enter)
656
657	.align 16
658	ENTRY(kstat_waitq_exit)
659	GET_NATIVE_TIME(%g1, %g2, %g3)
660	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
661	SET_SIZE(kstat_waitq_exit)
662
663	.align 16
664	ENTRY(kstat_runq_enter)
665	GET_NATIVE_TIME(%g1, %g2, %g3)
666	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
667	SET_SIZE(kstat_runq_enter)
668
669	.align 16
670	ENTRY(kstat_runq_exit)
671	GET_NATIVE_TIME(%g1, %g2, %g3)
672	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
673	SET_SIZE(kstat_runq_exit)
674
675	.align 16
676	ENTRY(kstat_waitq_to_runq)
677	GET_NATIVE_TIME(%g1, %g2, %g3)
678	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
679	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
680	SET_SIZE(kstat_waitq_to_runq)
681
682	.align 16
683	ENTRY(kstat_runq_back_to_waitq)
684	GET_NATIVE_TIME(%g1, %g2, %g3)
685	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
686	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
687	SET_SIZE(kstat_runq_back_to_waitq)
688
689#endif /* lint */
690
691#ifdef lint
692
693int64_t timedelta;
694hrtime_t hres_last_tick;
695timestruc_t hrestime;
696int64_t hrestime_adj;
697int hres_lock;
698uint_t nsec_scale;
699hrtime_t hrtime_base;
700int traptrace_use_stick;
701
702#else
703	/*
704	 *  -- WARNING --
705	 *
706	 * The following variables MUST be together on a 128-byte boundary.
707	 * In addition to the primary performance motivation (having them all
708	 * on the same cache line(s)), code here and in the GET*TIME() macros
709	 * assumes that they all have the same high 22 address bits (so
710	 * there's only one sethi).
711	 */
712	.seg	".data"
713	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
714	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
715	.global	nsec_shift, adj_shift
716
717	/* XXX - above comment claims 128-bytes is necessary */
718	.align	64
719timedelta:
720	.word	0, 0		/* int64_t */
721hres_last_tick:
722	.word	0, 0		/* hrtime_t */
723hrestime:
724	.nword	0, 0		/* 2 longs */
725hrestime_adj:
726	.word	0, 0		/* int64_t */
727hres_lock:
728	.word	0
729nsec_scale:
730	.word	0
731hrtime_base:
732	.word	0, 0
733traptrace_use_stick:
734	.word	0
735nsec_shift:
736	.word	NSEC_SHIFT
737adj_shift:
738	.word	ADJ_SHIFT
739
740#endif
741
742
743/*
744 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
745 * usec_delay(int n)		[compatibility - should go one day]
746 * Delay by spinning.
747 *
748 * delay for n microseconds.  numbers <= 0 delay 1 usec
749 *
750 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
751 * and variable clock rate for power management requires that we
752 * use %stick to implement this routine.
753 */
754
755#if defined(lint)
756
757/*ARGSUSED*/
758void
759drv_usecwait(clock_t n)
760{}
761
762/*ARGSUSED*/
763void
764usec_delay(int n)
765{}
766
767#else	/* lint */
768
769	ENTRY(drv_usecwait)
770	ALTENTRY(usec_delay)
771	brlez,a,pn %o0, 0f
772	  mov	1, %o0
7730:
774	sethi	%hi(sticks_per_usec), %o1
775	lduw	[%o1 + %lo(sticks_per_usec)], %o1
776	mulx	%o1, %o0, %o1		! Scale usec to ticks
777	inc	%o1			! We don't start on a tick edge
778	GET_NATIVE_TIME(%o2, %o3, %o4)
779	add	%o1, %o2, %o1
780
7811:	cmp	%o1, %o2
782	GET_NATIVE_TIME(%o2, %o3, %o4)
783	bgeu,pt	%xcc, 1b
784	  nop
785	retl
786	  nop
787	SET_SIZE(usec_delay)
788	SET_SIZE(drv_usecwait)
789#endif	/* lint */
790
791#if defined(lint)
792
793/* ARGSUSED */
794void
795pil14_interrupt(int level)
796{}
797
798#else
799
800/*
801 * Level-14 interrupt prologue.
802 */
803	ENTRY_NP(pil14_interrupt)
804	CPU_ADDR(%g1, %g2)
805	rdpr	%pil, %g6			! %g6 = interrupted PIL
806	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
807	rdpr	%tstate, %g6
808	rdpr	%tpc, %g5
809	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
810	bnz,a,pt %xcc, 1f
811	  stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
812	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
813	ba	pil_interrupt_common		! must be large-disp branch
814	  stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
8151:	ba	pil_interrupt_common		! must be large-disp branch
816	  stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
817	SET_SIZE(pil14_interrupt)
818
819	ENTRY_NP(tick_rtt)
820	!
821	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
822	! disabled.  If TICK_COMPARE is enabled, we know that we need to
823	! reenqueue the interrupt request structure.  We'll then check TICKINT
824	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
825	! interrupt.  In this case, TICK_COMPARE may have been rewritten
826	! recently; we'll compare %o5 to the current time to verify that it's
827	! in the future.
828	!
829	! Note that %o5 is live until after 1f.
830	! XXX - there is a subroutine call while %o5 is live!
831	!
832	RD_TICKCMPR(%o5, %g1)
833	srlx	%o5, TICKINT_DIS_SHFT, %g1
834	brnz,pt	%g1, 2f
835	  nop
836
837	rdpr 	%pstate, %g5
838	andn	%g5, PSTATE_IE, %g1
839	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
840
841	sethi	%hi(cbe_level14_inum), %o1
842	ldx	[%o1 + %lo(cbe_level14_inum)], %o1
843	call	intr_enqueue_req ! preserves %o5 and %g5
844	  mov	PIL_14, %o0
845
846	! Check SOFTINT for TICKINT/STICKINT
847	rd	SOFTINT, %o4
848	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
849	andcc	%o4, %o0, %g0
850	bz,a,pn	%icc, 2f
851	  wrpr	%g0, %g5, %pstate		! Enable vec interrupts
852
853	! clear TICKINT/STICKINT
854	wr	%o0, CLEAR_SOFTINT
855
856	!
857	! Now that we've cleared TICKINT, we can reread %tick and confirm
858	! that the value we programmed is still in the future.  If it isn't,
859	! we need to reprogram TICK_COMPARE to fire as soon as possible.
860	!
861	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
862	sllx	%o0, 1, %o0			! Clear the DIS bit
863	srlx	%o0, 1, %o0
864	cmp	%o5, %o0			! In the future?
865	bg,a,pt	%xcc, 2f			! Yes, drive on.
866	  wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
867
868	!
869	! If we're here, then we have programmed TICK_COMPARE with a %tick
870	! which is in the past; we'll now load an initial step size, and loop
871	! until we've managed to program TICK_COMPARE to fire in the future.
872	!
873	mov	8, %o4				! 8 = arbitrary inital step
8741:	add	%o0, %o4, %o5			! Add the step
875	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
876	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
877	sllx	%o0, 1, %o0			! Clear the DIS bit
878	srlx	%o0, 1, %o0
879	cmp	%o5, %o0			! In the future?
880	bg,a,pt	%xcc, 2f			! Yes, drive on.
881	  wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
882	ba	1b				! No, try again.
883	  sllx	%o4, 1, %o4			!    delay: double step size
884
8852:	ba	current_thread_complete
886	  nop
887	SET_SIZE(tick_rtt)
888
889#endif /* lint */
890
891#if defined(lint)
892/*
893 * Prefetch a page_t for write or read, this assumes a linear
894 * scan of sequential page_t's.
895 */
896/*ARGSUSED*/
897void
898prefetch_page_w(void *pp)
899{}
900
901/*ARGSUSED*/
902void
903prefetch_page_r(void *pp)
904{}
905#else	/* lint */
906
907/* XXXQ These should be inline templates, not functions */
908        ENTRY(prefetch_page_w)
909        retl
910	  nop
911        SET_SIZE(prefetch_page_w)
912
913        ENTRY(prefetch_page_r)
914        retl
915	  nop
916        SET_SIZE(prefetch_page_r)
917
918#endif	/* lint */
919
920#if defined(lint)
921/*
922 * Prefetch struct smap for write.
923 */
924/*ARGSUSED*/
925void
926prefetch_smap_w(void *smp)
927{}
928#else	/* lint */
929
930/* XXXQ These should be inline templates, not functions */
931	ENTRY(prefetch_smap_w)
932	retl
933	  nop
934	SET_SIZE(prefetch_smap_w)
935
936#endif	/* lint */
937
938/*
939 * Generic sun4v MMU and Cache operations.
940 */
941
942#if defined(lint)
943
944/*ARGSUSED*/
945void
946vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
947{}
948
949/*ARGSUSED*/
950void
951vtag_flushall(void)
952{}
953
954/*ARGSUSED*/
955void
956vtag_unmap_perm_tl1(uint64_t vaddr, uint64_t ctxnum)
957{}
958
959/*ARGSUSED*/
960void
961vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
962{}
963
964/*ARGSUSED*/
965void
966vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
967{}
968
969/*ARGSUSED*/
970void
971vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
972{}
973
974/*ARGSUSED*/
975void
976vac_flushpage(pfn_t pfnum, int vcolor)
977{}
978
979/*ARGSUSED*/
980void
981vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
982{}
983
984/*ARGSUSED*/
985void
986flush_instr_mem(caddr_t vaddr, size_t len)
987{}
988
989#else	/* lint */
990
991	ENTRY_NP(vtag_flushpage)
992	/*
993	 * flush page from the tlb
994	 *
995	 * %o0 = vaddr
996	 * %o1 = sfmmup
997	 */
998	SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
999
1000	mov	%g1, %o1
1001	mov	MAP_ITLB | MAP_DTLB, %o2
1002	ta	MMU_UNMAP_ADDR
1003	brz,pt	%o0, 1f
1004	  nop
1005	ba	panic_bad_hcall
1006	  mov	MMU_UNMAP_ADDR, %o1
10071:
1008 	retl
1009	  nop
1010	SET_SIZE(vtag_flushpage)
1011
1012	ENTRY_NP(vtag_flushall)
1013	mov	%g0, %o0	! XXX no cpu list yet
1014	mov	%g0, %o1	! XXX no cpu list yet
1015	mov	MAP_ITLB | MAP_DTLB, %o2
1016	mov	MMU_DEMAP_ALL, %o5
1017	ta	FAST_TRAP
1018	brz,pt	%o0, 1f
1019	  nop
1020	ba	panic_bad_hcall
1021	  mov	MMU_DEMAP_ALL, %o1
10221:
1023	retl
1024	  nop
1025	SET_SIZE(vtag_flushall)
1026
1027	ENTRY_NP(vtag_unmap_perm_tl1)
1028	/*
1029	 * x-trap to unmap perm map entry
1030	 * %g1 = vaddr
1031	 * %g2 = ctxnum (KCONTEXT only)
1032	 */
1033	mov	%o0, %g3
1034	mov	%o1, %g4
1035	mov	%o2, %g5
1036	mov	%o5, %g6
1037	mov	%g1, %o0
1038	mov	%g2, %o1
1039	mov	MAP_ITLB | MAP_DTLB, %o2
1040	mov	UNMAP_PERM_ADDR, %o5
1041	ta	FAST_TRAP
1042	brz,pt	%o0, 1f
1043	nop
1044
1045	mov	PTL1_BAD_HCALL, %g1
1046
1047	cmp	%o0, H_ENOMAP
1048	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
1049
1050	cmp	%o0, H_EINVAL
1051	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
1052
1053	ba,a	ptl1_panic
10541:
1055	mov	%g6, %o5
1056	mov	%g5, %o2
1057	mov	%g4, %o1
1058	mov	%g3, %o0
1059	retry
1060	SET_SIZE(vtag_unmap_perm_tl1)
1061
1062	ENTRY_NP(vtag_flushpage_tl1)
1063	/*
1064	 * x-trap to flush page from tlb and tsb
1065	 *
1066	 * %g1 = vaddr, zero-extended on 32-bit kernel
1067	 * %g2 = sfmmup
1068	 *
1069	 * assumes TSBE_TAG = 0
1070	 */
1071	srln	%g1, MMU_PAGESHIFT, %g1
1072	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
1073	mov	%o0, %g3
1074	mov	%o1, %g4
1075	mov	%o2, %g5
1076	mov	%g1, %o0			/* vaddr */
1077
1078	SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
1079
1080	mov	MAP_ITLB | MAP_DTLB, %o2
1081	ta	MMU_UNMAP_ADDR
1082	brz,pt	%o0, 1f
1083	nop
1084	  ba	ptl1_panic
1085	mov	PTL1_BAD_HCALL, %g1
10861:
1087	mov	%g5, %o2
1088	mov	%g4, %o1
1089	mov	%g3, %o0
1090	membar #Sync
1091	retry
1092	SET_SIZE(vtag_flushpage_tl1)
1093
1094	ENTRY_NP(vtag_flush_pgcnt_tl1)
1095	/*
1096	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
1097	 *
1098	 * %g1 = vaddr, zero-extended on 32-bit kernel
1099	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
1100	 *
1101	 * NOTE: this handler relies on the fact that no
1102	 *	interrupts or traps can occur during the loop
1103	 *	issuing the TLB_DEMAP operations. It is assumed
1104	 *	that interrupts are disabled and this code is
1105	 *	fetching from the kernel locked text address.
1106	 *
1107	 * assumes TSBE_TAG = 0
1108	 */
1109	srln	%g1, MMU_PAGESHIFT, %g1
1110	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
1111	mov	%o0, %g3
1112	mov	%o1, %g4
1113	mov	%o2, %g5
1114
1115	and	%g2, SFMMU_PGCNT_MASK, %g7	/* g7 = pgcnt - 1 */
1116	add	%g7, 1, %g7			/* g7 = pgcnt */
1117
1118        andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
1119
1120	SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
1121
1122	set	MMU_PAGESIZE, %g6		/* g6 = pgsize */
1123
11241:
1125	mov	%g1, %o0			/* vaddr */
1126	mov	%g2, %o1			/* cnum */
1127	mov	MAP_ITLB | MAP_DTLB, %o2
1128	ta	MMU_UNMAP_ADDR
1129	brz,pt	%o0, 2f
1130	  nop
1131	ba	ptl1_panic
1132	  mov	PTL1_BAD_HCALL, %g1
11332:
1134	deccc	%g7				/* decr pgcnt */
1135	bnz,pt	%icc,1b
1136	  add	%g1, %g6, %g1			/* go to nextpage */
1137
1138	mov	%g5, %o2
1139	mov	%g4, %o1
1140	mov	%g3, %o0
1141	membar #Sync
1142	retry
1143	SET_SIZE(vtag_flush_pgcnt_tl1)
1144
1145	! Not implemented on US1/US2
1146	ENTRY_NP(vtag_flushall_tl1)
1147	mov	%o0, %g3
1148	mov	%o1, %g4
1149	mov	%o2, %g5
1150	mov	%o3, %g6	! XXXQ not used?
1151	mov	%o5, %g7
1152	mov	%g0, %o0	! XXX no cpu list yet
1153	mov	%g0, %o1	! XXX no cpu list yet
1154	mov	MAP_ITLB | MAP_DTLB, %o2
1155	mov	MMU_DEMAP_ALL, %o5
1156	ta	FAST_TRAP
1157	brz,pt	%o0, 1f
1158	  nop
1159	ba	ptl1_panic
1160	  mov	PTL1_BAD_HCALL, %g1
11611:
1162	mov	%g7, %o5
1163	mov	%g6, %o3	! XXXQ not used?
1164	mov	%g5, %o2
1165	mov	%g4, %o1
1166	mov	%g3, %o0
1167	retry
1168	SET_SIZE(vtag_flushall_tl1)
1169
1170/*
1171 * flush_instr_mem:
1172 *	Flush a portion of the I-$ starting at vaddr
1173 * 	%o0 vaddr
1174 *	%o1 bytes to be flushed
1175 */
1176
1177	ENTRY(flush_instr_mem)
1178	membar	#StoreStore				! Ensure the stores
1179							! are globally visible
11801:
1181	flush	%o0
1182	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
1183	bgu,pt	%ncc, 1b
1184	  add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
1185
1186	retl
1187	  nop
1188	SET_SIZE(flush_instr_mem)
1189
1190#endif /* !lint */
1191
1192/*
1193 * fp_zero() - clear all fp data registers and the fsr
1194 */
1195
1196#if defined(lint) || defined(__lint)
1197
1198void
1199fp_zero(void)
1200{}
1201
1202#else	/* lint */
1203
1204.global	fp_zero_zero
1205.align 8
1206fp_zero_zero:
1207	.xword	0
1208
1209	ENTRY_NP(fp_zero)
1210	sethi	%hi(fp_zero_zero), %o0
1211	ldd	[%o0 + %lo(fp_zero_zero)], %fsr
1212	ldd	[%o0 + %lo(fp_zero_zero)], %f0
1213	fmovd	%f0, %f2
1214	fmovd	%f0, %f4
1215	fmovd	%f0, %f6
1216	fmovd	%f0, %f8
1217	fmovd	%f0, %f10
1218	fmovd	%f0, %f12
1219	fmovd	%f0, %f14
1220	fmovd	%f0, %f16
1221	fmovd	%f0, %f18
1222	fmovd	%f0, %f20
1223	fmovd	%f0, %f22
1224	fmovd	%f0, %f24
1225	fmovd	%f0, %f26
1226	fmovd	%f0, %f28
1227	fmovd	%f0, %f30
1228	fmovd	%f0, %f32
1229	fmovd	%f0, %f34
1230	fmovd	%f0, %f36
1231	fmovd	%f0, %f38
1232	fmovd	%f0, %f40
1233	fmovd	%f0, %f42
1234	fmovd	%f0, %f44
1235	fmovd	%f0, %f46
1236	fmovd	%f0, %f48
1237	fmovd	%f0, %f50
1238	fmovd	%f0, %f52
1239	fmovd	%f0, %f54
1240	fmovd	%f0, %f56
1241	fmovd	%f0, %f58
1242	fmovd	%f0, %f60
1243	retl
1244	fmovd	%f0, %f62
1245	SET_SIZE(fp_zero)
1246
1247#endif	/* lint */
1248