xref: /titanic_51/usr/src/uts/sun4v/cpu/common_asm.s (revision 6f25ad7ffa9acba13c9da0cb230544442ab650ce)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#if !defined(lint)
27#include "assym.h"
28#endif
29
30/*
31 * General assembly language routines.
32 * It is the intent of this file to contain routines that are
33 * specific to cpu architecture.
34 */
35
36/*
37 * WARNING: If you add a fast trap handler which can be invoked by a
38 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
39 * instead of "done" instruction to return back to the user mode. See
40 * comments for the "fast_trap_done" entry point for more information.
41 */
42#define	FAST_TRAP_DONE	\
43	ba,a	fast_trap_done
44
45#include <sys/machclock.h>
46#include <sys/clock.h>
47
48#if defined(lint)
49#include <sys/types.h>
50#include <sys/scb.h>
51#include <sys/systm.h>
52#include <sys/regset.h>
53#include <sys/sunddi.h>
54#include <sys/lockstat.h>
55#endif	/* lint */
56
57
58#include <sys/asm_linkage.h>
59#include <sys/privregs.h>
60#include <vm/hat_sfmmu.h>
61#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
62#include <sys/machthread.h>
63#include <sys/clock.h>
64#include <sys/intreg.h>
65#include <sys/psr_compat.h>
66#include <sys/isa_defs.h>
67#include <sys/dditypes.h>
68#include <sys/intr.h>
69#include <sys/hypervisor_api.h>
70
71#if !defined(lint)
72#include "assym.h"
73#endif
74
75#define	ICACHE_FLUSHSZ	0x20
76
77#if defined(lint)
78/*
79 * Softint generated when counter field of tick reg matches value field
80 * of tick_cmpr reg
81 */
82/*ARGSUSED*/
83void
84tickcmpr_set(uint64_t clock_cycles)
85{}
86
87#else   /* lint */
88
89	ENTRY_NP(tickcmpr_set)
90	! get 64-bit clock_cycles interval
91	mov	%o0, %o2
92	mov	8, %o3			! A reasonable initial step size
931:
94	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
95
96	GET_NATIVE_TIME(%o0,%o4,%o5,__LINE__)	! Read %tick to confirm the
97						! value we wrote was in the
98						! future.
99
100	cmp	%o2, %o0		! If the value we wrote was in the
101	bg,pt	%xcc, 2f		!   future, then blow out of here.
102	  sllx	%o3, 1, %o3		! If not, then double our step size,
103	ba,pt	%xcc, 1b		!   and take another lap.
104	  add	%o0, %o3, %o2		!
1052:
106	retl
107	  nop
108	SET_SIZE(tickcmpr_set)
109
110#endif  /* lint */
111
112#if defined(lint)
113
114void
115tickcmpr_disable(void)
116{}
117
118#else
119
120	ENTRY_NP(tickcmpr_disable)
121	mov	1, %g1
122	sllx	%g1, TICKINT_DIS_SHFT, %o0
123	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
124	retl
125	  nop
126	SET_SIZE(tickcmpr_disable)
127
128#endif
129
130#if defined(lint)
131
132/*
133 * tick_write_delta() is intended to increment %stick by the specified delta,
134 * but %stick is only writeable in hyperprivileged mode and at present there
135 * is no provision for this. tick_write_delta is called by the cylic subsystem
136 * if a negative %stick delta is observed after cyclic processing is resumed
137 * after an event such as an OS suspend/resume. On sun4v, the suspend/resume
138 * routines should adjust the %stick offset preventing the cyclic subsystem
139 * from detecting a negative delta. If a negative delta is detected, panic the
140 * system. The negative delta could be caused by improper %stick
141 * synchronization after a suspend/resume.
142 */
143
144/*ARGSUSED*/
145void
146tick_write_delta(uint64_t delta)
147{}
148
149#else	/* lint */
150
151	.seg	".text"
152tick_write_delta_panic:
153	.asciz	"tick_write_delta: not supported, delta: 0x%lx"
154
155	ENTRY_NP(tick_write_delta)
156	sethi	%hi(tick_write_delta_panic), %o1
157        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
158	mov	%i0, %o1
159	call	panic
160	  or	%i1, %lo(tick_write_delta_panic), %o0
161	/*NOTREACHED*/
162	retl
163	  nop
164#endif
165
166#if defined(lint)
167/*
168 *  return 1 if disabled
169 */
170
171int
172tickcmpr_disabled(void)
173{ return (0); }
174
175#else   /* lint */
176
177	ENTRY_NP(tickcmpr_disabled)
178	RD_TICKCMPR(%g1,%o0,%o1,__LINE__)
179	retl
180	  srlx	%g1, TICKINT_DIS_SHFT, %o0
181	SET_SIZE(tickcmpr_disabled)
182
183#endif  /* lint */
184
185/*
186 * Get current tick
187 */
188#if defined(lint)
189
190u_longlong_t
191gettick(void)
192{ return (0); }
193
194#else   /* lint */
195
196	ENTRY(gettick)
197	GET_NATIVE_TIME(%o0,%o2,%o3,__LINE__)
198	retl
199	  nop
200	SET_SIZE(gettick)
201
202#endif  /* lint */
203
204/*
205 * Get current tick. For trapstat use only.
206 */
207#if defined (lint)
208
209hrtime_t
210rdtick()
211{ return (0); }
212
213#else
214	ENTRY(rdtick)
215	retl
216	RD_TICK_PHYSICAL(%o0)
217	SET_SIZE(rdtick)
218#endif /* lint */
219
220
221/*
222 * Return the counter portion of the tick register.
223 */
224
225#if defined(lint)
226
227uint64_t
228gettick_counter(void)
229{ return(0); }
230
231uint64_t
232gettick_npt(void)
233{ return(0); }
234
235uint64_t
236getstick_npt(void)
237{ return(0); }
238
239#else	/* lint */
240
241	ENTRY_NP(gettick_counter)
242	RD_TICK(%o0,%o1,%o2,__LINE__)
243	retl
244	nop
245	SET_SIZE(gettick_counter)
246
247	ENTRY_NP(gettick_npt)
248	RD_TICK_PHYSICAL(%o0)
249	retl
250	srlx	%o0, 63, %o0
251	SET_SIZE(gettick_npt)
252
253	ENTRY_NP(getstick_npt)
254	RD_STICK_PHYSICAL(%o0)
255	retl
256	srlx	%o0, 63, %o0
257	SET_SIZE(getstick_npt)
258#endif	/* lint */
259
260/*
261 * Provide a C callable interface to the trap that reads the hi-res timer.
262 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
263 */
264
265#if defined(lint)
266
267hrtime_t
268gethrtime(void)
269{
270	return ((hrtime_t)0);
271}
272
273hrtime_t
274gethrtime_unscaled(void)
275{
276	return ((hrtime_t)0);
277}
278
279hrtime_t
280gethrtime_max(void)
281{
282	return ((hrtime_t)0);
283}
284
285void
286scalehrtime(hrtime_t *hrt)
287{
288	*hrt = 0;
289}
290
291void
292gethrestime(timespec_t *tp)
293{
294	tp->tv_sec = 0;
295	tp->tv_nsec = 0;
296}
297
298time_t
299gethrestime_sec(void)
300{
301	return (0);
302}
303
304void
305gethrestime_lasttick(timespec_t *tp)
306{
307	tp->tv_sec = 0;
308	tp->tv_nsec = 0;
309}
310
311/*ARGSUSED*/
312void
313hres_tick(void)
314{
315}
316
317void
318panic_hres_tick(void)
319{
320}
321
322#else	/* lint */
323
324	ENTRY_NP(gethrtime)
325	GET_HRTIME(%g1,%o0,%o1,%o2,%o3,%o4,%o5,%g2,__LINE__)
326							! %g1 = hrtime
327	retl
328	  mov	%g1, %o0
329	SET_SIZE(gethrtime)
330
331	ENTRY_NP(gethrtime_unscaled)
332	GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)	! %g1 = native time
333	retl
334	  mov	%g1, %o0
335	SET_SIZE(gethrtime_unscaled)
336
337	ENTRY_NP(gethrtime_waitfree)
338	ALTENTRY(dtrace_gethrtime)
339	GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)	! %g1 = native time
340	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
341	retl
342	  mov	%g1, %o0
343	SET_SIZE(dtrace_gethrtime)
344	SET_SIZE(gethrtime_waitfree)
345
346	ENTRY(gethrtime_max)
347	NATIVE_TIME_MAX(%g1)
348	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
349
350	! hrtime_t's are signed, max hrtime_t must be positive
351	mov	-1, %o2
352	brlz,a	%g1, 1f
353	  srlx	%o2, 1, %g1
3541:
355	retl
356	  mov	%g1, %o0
357	SET_SIZE(gethrtime_max)
358
359	ENTRY(scalehrtime)
360	ldx	[%o0], %o1
361	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
362	retl
363	  stx	%o1, [%o0]
364	SET_SIZE(scalehrtime)
365
366/*
367 * Fast trap to return a timestamp, uses trap window, leaves traps
368 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
369 *
370 * This is the handler for the ST_GETHRTIME trap.
371 */
372
373	ENTRY_NP(get_timestamp)
374	GET_HRTIME(%g1,%g2,%g3,%g4,%g5,%o0,%o1,%o2,__LINE__)
375	! %g1 = hrtime
376	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
377	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
378	FAST_TRAP_DONE
379	SET_SIZE(get_timestamp)
380
381/*
382 * Macro to convert GET_HRESTIME() bits into a timestamp.
383 *
384 * We use two separate macros so that the platform-dependent GET_HRESTIME()
385 * can be as small as possible; CONV_HRESTIME() implements the generic part.
386 */
387#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
388	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
389	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
390	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
391	  srlx	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
392	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
393	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
394	ba	3f;			/* go convert to sec/nsec */	\
395	  add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
3962:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
397	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
398	  add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
399	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
4003:	cmp	hrestnsec, nano;	/* more than a billion? */	\
401	bl,pt	%xcc, 4f;		/* if not, we're done */	\
402	  nop;				/* delay: do nothing :( */	\
403	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
404	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */	\
405	ba,a	3b;			/* check >= billion again */	\
4064:
407
408	ENTRY_NP(gethrestime)
409	GET_HRESTIME(%o1,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
410	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
411	stn	%o1, [%o0]
412	retl
413	  stn	%o2, [%o0 + CLONGSIZE]
414	SET_SIZE(gethrestime)
415
416/*
417 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
418 * seconds.
419 */
420	ENTRY_NP(gethrestime_sec)
421	GET_HRESTIME(%o0,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
422	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
423	retl					! %o0 current hrestime seconds
424	  nop
425	SET_SIZE(gethrestime_sec)
426
427/*
428 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
429 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
430 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
431 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
432 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
433 * it explicitly.)
434 */
435	ENTRY_NP(gethrestime_lasttick)
436	sethi	%hi(hres_lock), %o1
4370:
438	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
439	membar	#LoadLoad			! Load of lock must complete
440	andn	%o2, 1, %o2			! Mask off lowest bit
441	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
442	add	%o1, %lo(hrestime), %o4
443	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
444	membar	#LoadLoad			! All loads must complete
445	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
446	cmp	%o3, %o2			! If lock is locked or has
447	bne	0b				!   changed, retry.
448	  stn	%g1, [%o0]			! Delay: store seconds
449	retl
450	  stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
451	SET_SIZE(gethrestime_lasttick)
452
453/*
454 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
455 *
456 * This is the handler for the ST_GETHRESTIME trap.
457 */
458
459	ENTRY_NP(get_hrestime)
460	GET_HRESTIME(%o0,%o1,%g1,%g2,%g3,%g4,%g5,%o2,%o3,__LINE__)
461	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
462	FAST_TRAP_DONE
463	SET_SIZE(get_hrestime)
464
465/*
466 * Fast trap to return lwp virtual time, uses trap window, leaves traps
467 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
468 * of nanoseconds consumed.
469 *
470 * This is the handler for the ST_GETHRVTIME trap.
471 *
472 * Register usage:
473 *	%o0, %o1 = return lwp virtual time
474 * 	%o2 = CPU/thread
475 * 	%o3 = lwp
476 * 	%g1 = scratch
477 * 	%g5 = scratch
478 */
479	ENTRY_NP(get_virtime)
480	GET_NATIVE_TIME(%g5,%g1,%g2,__LINE__)	! %g5 = native time in ticks
481	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
482	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
483	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
484
485	/*
486	 * Subtract start time of current microstate from time
487	 * of day to get increment for lwp virtual time.
488	 */
489	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
490	sub	%g5, %g1, %g5
491
492	/*
493	 * Add current value of ms_acct[LMS_USER]
494	 */
495	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
496	add	%g5, %g1, %g5
497	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
498
499	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
500	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
501
502	FAST_TRAP_DONE
503	SET_SIZE(get_virtime)
504
505
506
507	.seg	".text"
508hrtime_base_panic:
509	.asciz	"hrtime_base stepping back"
510
511
512	ENTRY_NP(hres_tick)
513	save	%sp, -SA(MINFRAME), %sp	! get a new window
514
515	sethi	%hi(hrestime), %l4
516	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
5177:	tst	%l5
518	bz,pt	%xcc, 8f			! if we got it, drive on
519	  ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
520	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5219:	tst	%l5
522	bz,a,pn	%xcc, 7b
523	  ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
524	ba,pt	%xcc, 9b
525	  ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5268:
527	membar	#StoreLoad|#StoreStore
528
529	!
530	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
531	!
532	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
533	GET_NATIVE_TIME(%l0,%l3,%l6,__LINE__)	! current native time
534	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
535	! convert native time to nsecs
536	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
537
538	sub	%l0, %g1, %i1			! get accurate nsec delta
539
540	ldx	[%l4 + %lo(hrtime_base)], %l1
541	cmp	%l1, %l0
542	bg,pn	%xcc, 9f
543	  nop
544
545	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
546
547	!
548	! apply adjustment, if any
549	!
550	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
551	brz	%l0, 2f
552						! hrestime_adj == 0 ?
553						! yes, skip adjustments
554	  clr	%l5				! delay: set adj to zero
555	tst	%l0				! is hrestime_adj >= 0 ?
556	bge,pt	%xcc, 1f			! yes, go handle positive case
557	  srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
558
559	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
560	bl,pt	%xcc, 2f			! yes, use current adj
561	  neg	%l5				! delay: %l5 = -adj
562	ba,pt	%xcc, 2f
563	  mov	%l0, %l5			! no, so set adj = hrestime_adj
5641:
565	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
566	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
567	  mov	%l0, %l5			! delay: adj = hrestime_adj
5682:
569	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
570	sub	%l0, %l5, %l0			! timedelta -= adj
571
572	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
573	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
574
575	or	%l4, %lo(hrestime), %l2
576	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
577	ldn	[%l2 + CLONGSIZE], %i3
578	add	%i3, %l5, %i3			! hrestime.nsec += adj
579	add	%i3, %i1, %i3			! hrestime.nsec += nslt
580
581	set	NANOSEC, %l5			! %l5 = NANOSEC
582	cmp	%i3, %l5
583	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
584	  sethi	%hi(one_sec), %i1		! delay
585	add	%i2, 0x1, %i2			! hrestime.tv_sec++
586	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
587	mov	0x1, %l5
588	st	%l5, [%i1 + %lo(one_sec)]
5895:
590	stn	%i2, [%l2]
591	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
592
593	membar	#StoreStore
594
595	ld	[%l4 + %lo(hres_lock)], %i1
596	inc	%i1				! release lock
597	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
598
599	ret
600	restore
601
6029:
603	!
604	! release hres_lock
605	!
606	ld	[%l4 + %lo(hres_lock)], %i1
607	inc	%i1
608	st	%i1, [%l4 + %lo(hres_lock)]
609
610	sethi	%hi(hrtime_base_panic), %o0
611	call	panic
612	  or	%o0, %lo(hrtime_base_panic), %o0
613
614	SET_SIZE(hres_tick)
615
616#endif	/* lint */
617
618#if !defined(lint) && !defined(__lint)
619
620	.seg	".text"
621kstat_q_panic_msg:
622	.asciz	"kstat_q_exit: qlen == 0"
623
624	ENTRY(kstat_q_panic)
625	save	%sp, -SA(MINFRAME), %sp
626	sethi	%hi(kstat_q_panic_msg), %o0
627	call	panic
628	  or	%o0, %lo(kstat_q_panic_msg), %o0
629	/*NOTREACHED*/
630	SET_SIZE(kstat_q_panic)
631
632#define	BRZPN	brz,pn
633#define	BRZPT	brz,pt
634
635#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
636	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
637	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
638	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
639	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
640	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
641	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
642	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
643	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
644	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
645	add	%o4, %o2, %o4;			/* %o4 = new time */	\
646	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
647	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
648	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
649QRETURN;								\
650	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
651
652	.align 16
653	ENTRY(kstat_waitq_enter)
654	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
655	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
656	SET_SIZE(kstat_waitq_enter)
657
658	.align 16
659	ENTRY(kstat_waitq_exit)
660	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
661	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
662	SET_SIZE(kstat_waitq_exit)
663
664	.align 16
665	ENTRY(kstat_runq_enter)
666	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
667	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
668	SET_SIZE(kstat_runq_enter)
669
670	.align 16
671	ENTRY(kstat_runq_exit)
672	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
673	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
674	SET_SIZE(kstat_runq_exit)
675
676	.align 16
677	ENTRY(kstat_waitq_to_runq)
678	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
679	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
680	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
681	SET_SIZE(kstat_waitq_to_runq)
682
683	.align 16
684	ENTRY(kstat_runq_back_to_waitq)
685	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
686	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
687	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
688	SET_SIZE(kstat_runq_back_to_waitq)
689
690#endif /* lint */
691
692#ifdef lint
693
694int64_t timedelta;
695hrtime_t hres_last_tick;
696volatile timestruc_t hrestime;
697int64_t hrestime_adj;
698volatile int hres_lock;
699uint_t nsec_scale;
700hrtime_t hrtime_base;
701int traptrace_use_stick;
702
703#else
704	/*
705	 *  -- WARNING --
706	 *
707	 * The following variables MUST be together on a 128-byte boundary.
708	 * In addition to the primary performance motivation (having them all
709	 * on the same cache line(s)), code here and in the GET*TIME() macros
710	 * assumes that they all have the same high 22 address bits (so
711	 * there's only one sethi).
712	 */
713	.seg	".data"
714	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
715	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
716	.global	nsec_shift, adj_shift, native_tick_offset, native_stick_offset
717
718	/* XXX - above comment claims 128-bytes is necessary */
719	.align	64
720timedelta:
721	.word	0, 0		/* int64_t */
722hres_last_tick:
723	.word	0, 0		/* hrtime_t */
724hrestime:
725	.nword	0, 0		/* 2 longs */
726hrestime_adj:
727	.word	0, 0		/* int64_t */
728hres_lock:
729	.word	0
730nsec_scale:
731	.word	0
732hrtime_base:
733	.word	0, 0
734traptrace_use_stick:
735	.word	0
736nsec_shift:
737	.word	NSEC_SHIFT
738adj_shift:
739	.word	ADJ_SHIFT
740	.align	8
741native_tick_offset:
742	.word	0, 0
743	.align	8
744native_stick_offset:
745	.word	0, 0
746
747#endif
748
749
750/*
751 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
752 * usec_delay(int n)		[compatibility - should go one day]
753 * Delay by spinning.
754 *
755 * delay for n microseconds.  numbers <= 0 delay 1 usec
756 *
757 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
758 * and variable clock rate for power management requires that we
759 * use %stick to implement this routine.
760 */
761
762#if defined(lint)
763
764/*ARGSUSED*/
765void
766drv_usecwait(clock_t n)
767{}
768
769/*ARGSUSED*/
770void
771usec_delay(int n)
772{}
773
774#else	/* lint */
775
776	ENTRY(drv_usecwait)
777	ALTENTRY(usec_delay)
778	brlez,a,pn %o0, 0f
779	  mov	1, %o0
7800:
781	sethi	%hi(sticks_per_usec), %o1
782	lduw	[%o1 + %lo(sticks_per_usec)], %o1
783	mulx	%o1, %o0, %o1		! Scale usec to ticks
784	inc	%o1			! We don't start on a tick edge
785	GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
786	add	%o1, %o2, %o1
787
7881:	cmp	%o1, %o2
789	GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
790	bgeu,pt	%xcc, 1b
791	  nop
792	retl
793	  nop
794	SET_SIZE(usec_delay)
795	SET_SIZE(drv_usecwait)
796#endif	/* lint */
797
798#if defined(lint)
799
800/* ARGSUSED */
801void
802pil14_interrupt(int level)
803{}
804
805#else
806
807/*
808 * Level-14 interrupt prologue.
809 */
810	ENTRY_NP(pil14_interrupt)
811	CPU_ADDR(%g1, %g2)
812	rdpr	%pil, %g6			! %g6 = interrupted PIL
813	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
814	rdpr	%tstate, %g6
815	rdpr	%tpc, %g5
816	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
817	bnz,a,pt %xcc, 1f
818	  stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
819	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
820	ba	pil_interrupt_common		! must be large-disp branch
821	  stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
8221:	ba	pil_interrupt_common		! must be large-disp branch
823	  stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
824	SET_SIZE(pil14_interrupt)
825
826	ENTRY_NP(tick_rtt)
827	!
828	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
829	! disabled.  If TICK_COMPARE is enabled, we know that we need to
830	! reenqueue the interrupt request structure.  We'll then check TICKINT
831	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
832	! interrupt.  In this case, TICK_COMPARE may have been rewritten
833	! recently; we'll compare %o5 to the current time to verify that it's
834	! in the future.
835	!
836	! Note that %o5 is live until after 1f.
837	! XXX - there is a subroutine call while %o5 is live!
838	!
839	RD_TICKCMPR(%o5,%g1,%g2,__LINE__)
840	srlx	%o5, TICKINT_DIS_SHFT, %g1
841	brnz,pt	%g1, 2f
842	  nop
843
844	rdpr 	%pstate, %g5
845	andn	%g5, PSTATE_IE, %g1
846	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
847
848	sethi	%hi(cbe_level14_inum), %o1
849	ldx	[%o1 + %lo(cbe_level14_inum)], %o1
850	call	intr_enqueue_req ! preserves %o5 and %g5
851	  mov	PIL_14, %o0
852
853	! Check SOFTINT for TICKINT/STICKINT
854	rd	SOFTINT, %o4
855	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
856	andcc	%o4, %o0, %g0
857	bz,a,pn	%icc, 2f
858	  wrpr	%g0, %g5, %pstate		! Enable vec interrupts
859
860	! clear TICKINT/STICKINT
861	wr	%o0, CLEAR_SOFTINT
862
863	!
864	! Now that we've cleared TICKINT, we can reread %tick and confirm
865	! that the value we programmed is still in the future.  If it isn't,
866	! we need to reprogram TICK_COMPARE to fire as soon as possible.
867	!
868	GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)	! %o0 = tick
869	cmp	%o5, %o0			! In the future?
870	bg,a,pt	%xcc, 2f			! Yes, drive on.
871	  wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
872
873	!
874	! If we're here, then we have programmed TICK_COMPARE with a %tick
875	! which is in the past; we'll now load an initial step size, and loop
876	! until we've managed to program TICK_COMPARE to fire in the future.
877	!
878	mov	8, %o4				! 8 = arbitrary inital step
8791:	add	%o0, %o4, %o5			! Add the step
880	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
881	GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)	! %o0 = tick
882	cmp	%o5, %o0			! In the future?
883	bg,a,pt	%xcc, 2f			! Yes, drive on.
884	  wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
885	ba	1b				! No, try again.
886	  sllx	%o4, 1, %o4			!    delay: double step size
887
8882:	ba	current_thread_complete
889	  nop
890	SET_SIZE(tick_rtt)
891
892#endif /* lint */
893
894#if defined(lint)
895
896/* ARGSUSED */
897void
898pil15_interrupt(int level)
899{}
900
901#else   /* lint */
902
903/*
904 * Level-15 interrupt prologue.
905 */
906       ENTRY_NP(pil15_interrupt)
907       CPU_ADDR(%g1, %g2)
908       rdpr    %tstate, %g6
909       rdpr    %tpc, %g5
910       btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
911       bnz,a,pt %xcc, 1f
912       stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
913       stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
914       ba      pil15_epilogue                  ! must be large-disp branch
915       stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
9161:     ba      pil15_epilogue                  ! must be large-disp branch
917       stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
918       SET_SIZE(pil15_interrupt)
919
920#endif  /* lint */
921
922#if defined(lint)
923/*
924 * Prefetch a page_t for write or read, this assumes a linear
925 * scan of sequential page_t's.
926 */
927/*ARGSUSED*/
928void
929prefetch_page_w(void *pp)
930{}
931
932/*ARGSUSED*/
933void
934prefetch_page_r(void *pp)
935{}
936#else	/* lint */
937
938/* XXXQ These should be inline templates, not functions */
939        ENTRY(prefetch_page_w)
940        retl
941	  nop
942        SET_SIZE(prefetch_page_w)
943
944        ENTRY(prefetch_page_r)
945        retl
946	  nop
947        SET_SIZE(prefetch_page_r)
948
949#endif	/* lint */
950
951#if defined(lint)
952/*
953 * Prefetch struct smap for write.
954 */
955/*ARGSUSED*/
956void
957prefetch_smap_w(void *smp)
958{}
959#else	/* lint */
960
961/* XXXQ These should be inline templates, not functions */
962	ENTRY(prefetch_smap_w)
963	retl
964	  nop
965	SET_SIZE(prefetch_smap_w)
966
967#endif	/* lint */
968
969/*
970 * Generic sun4v MMU and Cache operations.
971 */
972
973#if defined(lint)
974
975/*ARGSUSED*/
976void
977vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
978{}
979
980/*ARGSUSED*/
981void
982vtag_flushall(void)
983{}
984
985/*ARGSUSED*/
986void
987vtag_unmap_perm_tl1(uint64_t vaddr, uint64_t ctxnum)
988{}
989
990/*ARGSUSED*/
991void
992vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
993{}
994
995/*ARGSUSED*/
996void
997vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
998{}
999
1000/*ARGSUSED*/
1001void
1002vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
1003{}
1004
1005/*ARGSUSED*/
1006void
1007vac_flushpage(pfn_t pfnum, int vcolor)
1008{}
1009
1010/*ARGSUSED*/
1011void
1012vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
1013{}
1014
1015/*ARGSUSED*/
1016void
1017flush_instr_mem(caddr_t vaddr, size_t len)
1018{}
1019
1020#else	/* lint */
1021
1022	ENTRY_NP(vtag_flushpage)
1023	/*
1024	 * flush page from the tlb
1025	 *
1026	 * %o0 = vaddr
1027	 * %o1 = sfmmup
1028	 */
1029	SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
1030
1031	mov	%g1, %o1
1032	mov	MAP_ITLB | MAP_DTLB, %o2
1033	ta	MMU_UNMAP_ADDR
1034	brz,pt	%o0, 1f
1035	  nop
1036	ba	panic_bad_hcall
1037	  mov	MMU_UNMAP_ADDR, %o1
10381:
1039 	retl
1040	  nop
1041	SET_SIZE(vtag_flushpage)
1042
1043	ENTRY_NP(vtag_flushall)
1044	mov	%g0, %o0	! XXX no cpu list yet
1045	mov	%g0, %o1	! XXX no cpu list yet
1046	mov	MAP_ITLB | MAP_DTLB, %o2
1047	mov	MMU_DEMAP_ALL, %o5
1048	ta	FAST_TRAP
1049	brz,pt	%o0, 1f
1050	  nop
1051	ba	panic_bad_hcall
1052	  mov	MMU_DEMAP_ALL, %o1
10531:
1054	retl
1055	  nop
1056	SET_SIZE(vtag_flushall)
1057
1058	ENTRY_NP(vtag_unmap_perm_tl1)
1059	/*
1060	 * x-trap to unmap perm map entry
1061	 * %g1 = vaddr
1062	 * %g2 = ctxnum (KCONTEXT only)
1063	 */
1064	mov	%o0, %g3
1065	mov	%o1, %g4
1066	mov	%o2, %g5
1067	mov	%o5, %g6
1068	mov	%g1, %o0
1069	mov	%g2, %o1
1070	mov	MAP_ITLB | MAP_DTLB, %o2
1071	mov	UNMAP_PERM_ADDR, %o5
1072	ta	FAST_TRAP
1073	brz,pt	%o0, 1f
1074	nop
1075
1076	mov	PTL1_BAD_HCALL, %g1
1077
1078	cmp	%o0, H_ENOMAP
1079	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
1080
1081	cmp	%o0, H_EINVAL
1082	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
1083
1084	ba,a	ptl1_panic
10851:
1086	mov	%g6, %o5
1087	mov	%g5, %o2
1088	mov	%g4, %o1
1089	mov	%g3, %o0
1090	retry
1091	SET_SIZE(vtag_unmap_perm_tl1)
1092
1093	ENTRY_NP(vtag_flushpage_tl1)
1094	/*
1095	 * x-trap to flush page from tlb and tsb
1096	 *
1097	 * %g1 = vaddr, zero-extended on 32-bit kernel
1098	 * %g2 = sfmmup
1099	 *
1100	 * assumes TSBE_TAG = 0
1101	 */
1102	srln	%g1, MMU_PAGESHIFT, %g1
1103	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
1104	mov	%o0, %g3
1105	mov	%o1, %g4
1106	mov	%o2, %g5
1107	mov	%g1, %o0			/* vaddr */
1108
1109	SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
1110
1111	mov	MAP_ITLB | MAP_DTLB, %o2
1112	ta	MMU_UNMAP_ADDR
1113	brz,pt	%o0, 1f
1114	nop
1115	  ba	ptl1_panic
1116	mov	PTL1_BAD_HCALL, %g1
11171:
1118	mov	%g5, %o2
1119	mov	%g4, %o1
1120	mov	%g3, %o0
1121	membar #Sync
1122	retry
1123	SET_SIZE(vtag_flushpage_tl1)
1124
1125	ENTRY_NP(vtag_flush_pgcnt_tl1)
1126	/*
1127	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
1128	 *
1129	 * %g1 = vaddr, zero-extended on 32-bit kernel
1130	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
1131	 *
1132	 * NOTE: this handler relies on the fact that no
1133	 *	interrupts or traps can occur during the loop
1134	 *	issuing the TLB_DEMAP operations. It is assumed
1135	 *	that interrupts are disabled and this code is
1136	 *	fetching from the kernel locked text address.
1137	 *
1138	 * assumes TSBE_TAG = 0
1139	 */
1140	srln	%g1, MMU_PAGESHIFT, %g1
1141	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
1142	mov	%o0, %g3
1143	mov	%o1, %g4
1144	mov	%o2, %g5
1145
1146	and	%g2, SFMMU_PGCNT_MASK, %g7	/* g7 = pgcnt - 1 */
1147	add	%g7, 1, %g7			/* g7 = pgcnt */
1148
1149        andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
1150
1151	SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
1152
1153	set	MMU_PAGESIZE, %g6		/* g6 = pgsize */
1154
11551:
1156	mov	%g1, %o0			/* vaddr */
1157	mov	%g2, %o1			/* cnum */
1158	mov	MAP_ITLB | MAP_DTLB, %o2
1159	ta	MMU_UNMAP_ADDR
1160	brz,pt	%o0, 2f
1161	  nop
1162	ba	ptl1_panic
1163	  mov	PTL1_BAD_HCALL, %g1
11642:
1165	deccc	%g7				/* decr pgcnt */
1166	bnz,pt	%icc,1b
1167	  add	%g1, %g6, %g1			/* go to nextpage */
1168
1169	mov	%g5, %o2
1170	mov	%g4, %o1
1171	mov	%g3, %o0
1172	membar #Sync
1173	retry
1174	SET_SIZE(vtag_flush_pgcnt_tl1)
1175
1176	! Not implemented on US1/US2
1177	ENTRY_NP(vtag_flushall_tl1)
1178	mov	%o0, %g3
1179	mov	%o1, %g4
1180	mov	%o2, %g5
1181	mov	%o3, %g6	! XXXQ not used?
1182	mov	%o5, %g7
1183	mov	%g0, %o0	! XXX no cpu list yet
1184	mov	%g0, %o1	! XXX no cpu list yet
1185	mov	MAP_ITLB | MAP_DTLB, %o2
1186	mov	MMU_DEMAP_ALL, %o5
1187	ta	FAST_TRAP
1188	brz,pt	%o0, 1f
1189	  nop
1190	ba	ptl1_panic
1191	  mov	PTL1_BAD_HCALL, %g1
11921:
1193	mov	%g7, %o5
1194	mov	%g6, %o3	! XXXQ not used?
1195	mov	%g5, %o2
1196	mov	%g4, %o1
1197	mov	%g3, %o0
1198	retry
1199	SET_SIZE(vtag_flushall_tl1)
1200
1201/*
1202 * flush_instr_mem:
1203 *	Flush a portion of the I-$ starting at vaddr
1204 * 	%o0 vaddr
1205 *	%o1 bytes to be flushed
1206 */
1207
1208	ENTRY(flush_instr_mem)
1209	membar	#StoreStore				! Ensure the stores
1210							! are globally visible
12111:
1212	flush	%o0
1213	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
1214	bgu,pt	%ncc, 1b
1215	  add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
1216
1217	retl
1218	  nop
1219	SET_SIZE(flush_instr_mem)
1220
1221#endif /* !lint */
1222
1223#if !defined(CUSTOM_FPZERO)
1224
1225/*
1226 * fp_zero() - clear all fp data registers and the fsr
1227 */
1228
1229#if defined(lint) || defined(__lint)
1230
1231void
1232fp_zero(void)
1233{}
1234
1235#else	/* lint */
1236
1237.global	fp_zero_zero
1238.align 8
1239fp_zero_zero:
1240	.xword	0
1241
1242	ENTRY_NP(fp_zero)
1243	sethi	%hi(fp_zero_zero), %o0
1244	ldx	[%o0 + %lo(fp_zero_zero)], %fsr
1245	ldd	[%o0 + %lo(fp_zero_zero)], %f0
1246	fmovd	%f0, %f2
1247	fmovd	%f0, %f4
1248	fmovd	%f0, %f6
1249	fmovd	%f0, %f8
1250	fmovd	%f0, %f10
1251	fmovd	%f0, %f12
1252	fmovd	%f0, %f14
1253	fmovd	%f0, %f16
1254	fmovd	%f0, %f18
1255	fmovd	%f0, %f20
1256	fmovd	%f0, %f22
1257	fmovd	%f0, %f24
1258	fmovd	%f0, %f26
1259	fmovd	%f0, %f28
1260	fmovd	%f0, %f30
1261	fmovd	%f0, %f32
1262	fmovd	%f0, %f34
1263	fmovd	%f0, %f36
1264	fmovd	%f0, %f38
1265	fmovd	%f0, %f40
1266	fmovd	%f0, %f42
1267	fmovd	%f0, %f44
1268	fmovd	%f0, %f46
1269	fmovd	%f0, %f48
1270	fmovd	%f0, %f50
1271	fmovd	%f0, %f52
1272	fmovd	%f0, %f54
1273	fmovd	%f0, %f56
1274	fmovd	%f0, %f58
1275	fmovd	%f0, %f60
1276	retl
1277	fmovd	%f0, %f62
1278	SET_SIZE(fp_zero)
1279
1280#endif	/* lint */
1281#endif  /* CUSTOM_FPZERO */
1282