xref: /titanic_41/usr/src/uts/sun4v/cpu/common_asm.s (revision b69a86957bad202e0d4cd49e4ceb7fea54a83270)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#if !defined(lint)
27#include "assym.h"
28#endif
29
30/*
31 * General assembly language routines.
32 * It is the intent of this file to contain routines that are
33 * specific to cpu architecture.
34 */
35
36/*
37 * WARNING: If you add a fast trap handler which can be invoked by a
38 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
39 * instead of "done" instruction to return back to the user mode. See
40 * comments for the "fast_trap_done" entry point for more information.
41 */
42#define	FAST_TRAP_DONE	\
43	ba,a	fast_trap_done
44
45#include <sys/machclock.h>
46#include <sys/clock.h>
47
48#if defined(lint)
49#include <sys/types.h>
50#include <sys/scb.h>
51#include <sys/systm.h>
52#include <sys/regset.h>
53#include <sys/sunddi.h>
54#include <sys/lockstat.h>
55#endif	/* lint */
56
57
58#include <sys/asm_linkage.h>
59#include <sys/privregs.h>
60#include <vm/hat_sfmmu.h>
61#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
62#include <sys/machthread.h>
63#include <sys/clock.h>
64#include <sys/intreg.h>
65#include <sys/psr_compat.h>
66#include <sys/isa_defs.h>
67#include <sys/dditypes.h>
68#include <sys/intr.h>
69#include <sys/hypervisor_api.h>
70
71#if !defined(lint)
72#include "assym.h"
73#endif
74
75#define	ICACHE_FLUSHSZ	0x20
76
77#if defined(lint)
78/*
79 * Softint generated when counter field of tick reg matches value field
80 * of tick_cmpr reg
81 */
82/*ARGSUSED*/
83void
84tickcmpr_set(uint64_t clock_cycles)
85{}
86
87#else   /* lint */
88
89	ENTRY_NP(tickcmpr_set)
90	! get 64-bit clock_cycles interval
91	mov	%o0, %o2
92	mov	8, %o3			! A reasonable initial step size
931:
94	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
95
96	GET_NATIVE_TIME(%o0,%o4,%o5,__LINE__)	! Read %tick to confirm the
97						! value we wrote was in the
98						! future.
99
100	cmp	%o2, %o0		! If the value we wrote was in the
101	bg,pt	%xcc, 2f		!   future, then blow out of here.
102	  sllx	%o3, 1, %o3		! If not, then double our step size,
103	ba,pt	%xcc, 1b		!   and take another lap.
104	  add	%o0, %o3, %o2		!
1052:
106	retl
107	  nop
108	SET_SIZE(tickcmpr_set)
109
110#endif  /* lint */
111
112#if defined(lint)
113
114void
115tickcmpr_disable(void)
116{}
117
118#else
119
120	ENTRY_NP(tickcmpr_disable)
121	mov	1, %g1
122	sllx	%g1, TICKINT_DIS_SHFT, %o0
123	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
124	retl
125	  nop
126	SET_SIZE(tickcmpr_disable)
127
128#endif
129
130#if defined(lint)
131
132/*
133 * tick_write_delta() increments %tick by the specified delta.  This should
134 * only be called after a CPR event to assure that gethrtime() continues to
135 * increase monotonically.  Obviously, writing %tick needs to de done very
136 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
137 * this reason, we make sure we're i-cache hot before actually writing to
138 * %tick.
139 *
140 * NOTE: No provision for this on sun4v right now.
141 */
142
143/*ARGSUSED*/
144void
145tick_write_delta(uint64_t delta)
146{}
147
148#else	/* lint */
149
150	.seg	".text"
151tick_write_delta_panic:
152	.asciz	"tick_write_delta: not supported"
153
154	ENTRY_NP(tick_write_delta)
155	sethi	%hi(tick_write_delta_panic), %o1
156        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
157	call	panic
158	  or	%i1, %lo(tick_write_delta_panic), %o0
159	/*NOTREACHED*/
160	retl
161	  nop
162#endif
163
164#if defined(lint)
165/*
166 *  return 1 if disabled
167 */
168
169int
170tickcmpr_disabled(void)
171{ return (0); }
172
173#else   /* lint */
174
175	ENTRY_NP(tickcmpr_disabled)
176	RD_TICKCMPR(%g1,%o0,%o1,__LINE__)
177	retl
178	  srlx	%g1, TICKINT_DIS_SHFT, %o0
179	SET_SIZE(tickcmpr_disabled)
180
181#endif  /* lint */
182
183/*
184 * Get current tick
185 */
186#if defined(lint)
187
188u_longlong_t
189gettick(void)
190{ return (0); }
191
192#else   /* lint */
193
194	ENTRY(gettick)
195	GET_NATIVE_TIME(%o0,%o2,%o3,__LINE__)
196	retl
197	  nop
198	SET_SIZE(gettick)
199
200#endif  /* lint */
201
202/*
203 * Get current tick. For trapstat use only.
204 */
205#if defined (lint)
206
207hrtime_t
208rdtick()
209{ return (0); }
210
211#else
212	ENTRY(rdtick)
213	retl
214	RD_TICK_PHYSICAL(%o0)
215	SET_SIZE(rdtick)
216#endif /* lint */
217
218
219/*
220 * Return the counter portion of the tick register.
221 */
222
223#if defined(lint)
224
225uint64_t
226gettick_counter(void)
227{ return(0); }
228
229uint64_t
230gettick_npt(void)
231{ return(0); }
232
233uint64_t
234getstick_npt(void)
235{ return(0); }
236
237#else	/* lint */
238
239	ENTRY_NP(gettick_counter)
240	RD_TICK(%o0,%o1,%o2,__LINE__)
241	retl
242	nop
243	SET_SIZE(gettick_counter)
244
245	ENTRY_NP(gettick_npt)
246	RD_TICK_PHYSICAL(%o0)
247	retl
248	srlx	%o0, 63, %o0
249	SET_SIZE(gettick_npt)
250
251	ENTRY_NP(getstick_npt)
252	RD_STICK_PHYSICAL(%o0)
253	retl
254	srlx	%o0, 63, %o0
255	SET_SIZE(getstick_npt)
256#endif	/* lint */
257
258/*
259 * Provide a C callable interface to the trap that reads the hi-res timer.
260 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
261 */
262
263#if defined(lint)
264
265hrtime_t
266gethrtime(void)
267{
268	return ((hrtime_t)0);
269}
270
271hrtime_t
272gethrtime_unscaled(void)
273{
274	return ((hrtime_t)0);
275}
276
277hrtime_t
278gethrtime_max(void)
279{
280	return ((hrtime_t)0);
281}
282
283void
284scalehrtime(hrtime_t *hrt)
285{
286	*hrt = 0;
287}
288
289void
290gethrestime(timespec_t *tp)
291{
292	tp->tv_sec = 0;
293	tp->tv_nsec = 0;
294}
295
296time_t
297gethrestime_sec(void)
298{
299	return (0);
300}
301
302void
303gethrestime_lasttick(timespec_t *tp)
304{
305	tp->tv_sec = 0;
306	tp->tv_nsec = 0;
307}
308
309/*ARGSUSED*/
310void
311hres_tick(void)
312{
313}
314
315void
316panic_hres_tick(void)
317{
318}
319
320#else	/* lint */
321
322	ENTRY_NP(gethrtime)
323	GET_HRTIME(%g1,%o0,%o1,%o2,%o3,%o4,%o5,%g2,__LINE__)
324							! %g1 = hrtime
325	retl
326	  mov	%g1, %o0
327	SET_SIZE(gethrtime)
328
329	ENTRY_NP(gethrtime_unscaled)
330	GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)	! %g1 = native time
331	retl
332	  mov	%g1, %o0
333	SET_SIZE(gethrtime_unscaled)
334
335	ENTRY_NP(gethrtime_waitfree)
336	ALTENTRY(dtrace_gethrtime)
337	GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)	! %g1 = native time
338	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
339	retl
340	  mov	%g1, %o0
341	SET_SIZE(dtrace_gethrtime)
342	SET_SIZE(gethrtime_waitfree)
343
344	ENTRY(gethrtime_max)
345	NATIVE_TIME_MAX(%g1)
346	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
347
348	! hrtime_t's are signed, max hrtime_t must be positive
349	mov	-1, %o2
350	brlz,a	%g1, 1f
351	  srlx	%o2, 1, %g1
3521:
353	retl
354	  mov	%g1, %o0
355	SET_SIZE(gethrtime_max)
356
357	ENTRY(scalehrtime)
358	ldx	[%o0], %o1
359	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
360	retl
361	  stx	%o1, [%o0]
362	SET_SIZE(scalehrtime)
363
364/*
365 * Fast trap to return a timestamp, uses trap window, leaves traps
366 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
367 *
368 * This is the handler for the ST_GETHRTIME trap.
369 */
370
371	ENTRY_NP(get_timestamp)
372	GET_HRTIME(%g1,%g2,%g3,%g4,%g5,%o0,%o1,%o2,__LINE__)
373	! %g1 = hrtime
374	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
375	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
376	FAST_TRAP_DONE
377	SET_SIZE(get_timestamp)
378
379/*
380 * Macro to convert GET_HRESTIME() bits into a timestamp.
381 *
382 * We use two separate macros so that the platform-dependent GET_HRESTIME()
383 * can be as small as possible; CONV_HRESTIME() implements the generic part.
384 */
385#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
386	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
387	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
388	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
389	  srlx	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
390	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
391	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
392	ba	3f;			/* go convert to sec/nsec */	\
393	  add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
3942:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
395	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
396	  add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
397	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
3983:	cmp	hrestnsec, nano;	/* more than a billion? */	\
399	bl,pt	%xcc, 4f;		/* if not, we're done */	\
400	  nop;				/* delay: do nothing :( */	\
401	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
402	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */	\
403	ba,a	3b;			/* check >= billion again */	\
4044:
405
406	ENTRY_NP(gethrestime)
407	GET_HRESTIME(%o1,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
408	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
409	stn	%o1, [%o0]
410	retl
411	  stn	%o2, [%o0 + CLONGSIZE]
412	SET_SIZE(gethrestime)
413
414/*
415 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
416 * seconds.
417 */
418	ENTRY_NP(gethrestime_sec)
419	GET_HRESTIME(%o0,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
420	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
421	retl					! %o0 current hrestime seconds
422	  nop
423	SET_SIZE(gethrestime_sec)
424
425/*
426 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
427 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
428 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
429 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
430 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
431 * it explicitly.)
432 */
433	ENTRY_NP(gethrestime_lasttick)
434	sethi	%hi(hres_lock), %o1
4350:
436	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
437	membar	#LoadLoad			! Load of lock must complete
438	andn	%o2, 1, %o2			! Mask off lowest bit
439	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
440	add	%o1, %lo(hrestime), %o4
441	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
442	membar	#LoadLoad			! All loads must complete
443	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
444	cmp	%o3, %o2			! If lock is locked or has
445	bne	0b				!   changed, retry.
446	  stn	%g1, [%o0]			! Delay: store seconds
447	retl
448	  stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
449	SET_SIZE(gethrestime_lasttick)
450
451/*
452 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
453 *
454 * This is the handler for the ST_GETHRESTIME trap.
455 */
456
457	ENTRY_NP(get_hrestime)
458	GET_HRESTIME(%o0,%o1,%g1,%g2,%g3,%g4,%g5,%o2,%o3,__LINE__)
459	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
460	FAST_TRAP_DONE
461	SET_SIZE(get_hrestime)
462
463/*
464 * Fast trap to return lwp virtual time, uses trap window, leaves traps
465 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
466 * of nanoseconds consumed.
467 *
468 * This is the handler for the ST_GETHRVTIME trap.
469 *
470 * Register usage:
471 *	%o0, %o1 = return lwp virtual time
472 * 	%o2 = CPU/thread
473 * 	%o3 = lwp
474 * 	%g1 = scratch
475 * 	%g5 = scratch
476 */
477	ENTRY_NP(get_virtime)
478	GET_NATIVE_TIME(%g5,%g1,%g2,__LINE__)	! %g5 = native time in ticks
479	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
480	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
481	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
482
483	/*
484	 * Subtract start time of current microstate from time
485	 * of day to get increment for lwp virtual time.
486	 */
487	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
488	sub	%g5, %g1, %g5
489
490	/*
491	 * Add current value of ms_acct[LMS_USER]
492	 */
493	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
494	add	%g5, %g1, %g5
495	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
496
497	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
498	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
499
500	FAST_TRAP_DONE
501	SET_SIZE(get_virtime)
502
503
504
505	.seg	".text"
506hrtime_base_panic:
507	.asciz	"hrtime_base stepping back"
508
509
510	ENTRY_NP(hres_tick)
511	save	%sp, -SA(MINFRAME), %sp	! get a new window
512
513	sethi	%hi(hrestime), %l4
514	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
5157:	tst	%l5
516	bz,pt	%xcc, 8f			! if we got it, drive on
517	  ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
518	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5199:	tst	%l5
520	bz,a,pn	%xcc, 7b
521	  ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
522	ba,pt	%xcc, 9b
523	  ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5248:
525	membar	#StoreLoad|#StoreStore
526
527	!
528	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
529	!
530	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
531	GET_NATIVE_TIME(%l0,%l3,%l6,__LINE__)	! current native time
532	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
533	! convert native time to nsecs
534	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
535
536	sub	%l0, %g1, %i1			! get accurate nsec delta
537
538	ldx	[%l4 + %lo(hrtime_base)], %l1
539	cmp	%l1, %l0
540	bg,pn	%xcc, 9f
541	  nop
542
543	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
544
545	!
546	! apply adjustment, if any
547	!
548	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
549	brz	%l0, 2f
550						! hrestime_adj == 0 ?
551						! yes, skip adjustments
552	  clr	%l5				! delay: set adj to zero
553	tst	%l0				! is hrestime_adj >= 0 ?
554	bge,pt	%xcc, 1f			! yes, go handle positive case
555	  srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
556
557	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
558	bl,pt	%xcc, 2f			! yes, use current adj
559	  neg	%l5				! delay: %l5 = -adj
560	ba,pt	%xcc, 2f
561	  mov	%l0, %l5			! no, so set adj = hrestime_adj
5621:
563	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
564	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
565	  mov	%l0, %l5			! delay: adj = hrestime_adj
5662:
567	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
568	sub	%l0, %l5, %l0			! timedelta -= adj
569
570	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
571	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
572
573	or	%l4, %lo(hrestime), %l2
574	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
575	ldn	[%l2 + CLONGSIZE], %i3
576	add	%i3, %l5, %i3			! hrestime.nsec += adj
577	add	%i3, %i1, %i3			! hrestime.nsec += nslt
578
579	set	NANOSEC, %l5			! %l5 = NANOSEC
580	cmp	%i3, %l5
581	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
582	  sethi	%hi(one_sec), %i1		! delay
583	add	%i2, 0x1, %i2			! hrestime.tv_sec++
584	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
585	mov	0x1, %l5
586	st	%l5, [%i1 + %lo(one_sec)]
5875:
588	stn	%i2, [%l2]
589	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
590
591	membar	#StoreStore
592
593	ld	[%l4 + %lo(hres_lock)], %i1
594	inc	%i1				! release lock
595	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
596
597	ret
598	restore
599
6009:
601	!
602	! release hres_lock
603	!
604	ld	[%l4 + %lo(hres_lock)], %i1
605	inc	%i1
606	st	%i1, [%l4 + %lo(hres_lock)]
607
608	sethi	%hi(hrtime_base_panic), %o0
609	call	panic
610	  or	%o0, %lo(hrtime_base_panic), %o0
611
612	SET_SIZE(hres_tick)
613
614#endif	/* lint */
615
616#if !defined(lint) && !defined(__lint)
617
618	.seg	".text"
619kstat_q_panic_msg:
620	.asciz	"kstat_q_exit: qlen == 0"
621
622	ENTRY(kstat_q_panic)
623	save	%sp, -SA(MINFRAME), %sp
624	sethi	%hi(kstat_q_panic_msg), %o0
625	call	panic
626	  or	%o0, %lo(kstat_q_panic_msg), %o0
627	/*NOTREACHED*/
628	SET_SIZE(kstat_q_panic)
629
630#define	BRZPN	brz,pn
631#define	BRZPT	brz,pt
632
633#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
634	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
635	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
636	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
637	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
638	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
639	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
640	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
641	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
642	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
643	add	%o4, %o2, %o4;			/* %o4 = new time */	\
644	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
645	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
646	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
647QRETURN;								\
648	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
649
650	.align 16
651	ENTRY(kstat_waitq_enter)
652	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
653	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
654	SET_SIZE(kstat_waitq_enter)
655
656	.align 16
657	ENTRY(kstat_waitq_exit)
658	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
659	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
660	SET_SIZE(kstat_waitq_exit)
661
662	.align 16
663	ENTRY(kstat_runq_enter)
664	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
665	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
666	SET_SIZE(kstat_runq_enter)
667
668	.align 16
669	ENTRY(kstat_runq_exit)
670	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
671	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
672	SET_SIZE(kstat_runq_exit)
673
674	.align 16
675	ENTRY(kstat_waitq_to_runq)
676	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
677	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
678	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
679	SET_SIZE(kstat_waitq_to_runq)
680
681	.align 16
682	ENTRY(kstat_runq_back_to_waitq)
683	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
684	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
685	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
686	SET_SIZE(kstat_runq_back_to_waitq)
687
688#endif /* lint */
689
690#ifdef lint
691
692int64_t timedelta;
693hrtime_t hres_last_tick;
694volatile timestruc_t hrestime;
695int64_t hrestime_adj;
696volatile int hres_lock;
697uint_t nsec_scale;
698hrtime_t hrtime_base;
699int traptrace_use_stick;
700
701#else
702	/*
703	 *  -- WARNING --
704	 *
705	 * The following variables MUST be together on a 128-byte boundary.
706	 * In addition to the primary performance motivation (having them all
707	 * on the same cache line(s)), code here and in the GET*TIME() macros
708	 * assumes that they all have the same high 22 address bits (so
709	 * there's only one sethi).
710	 */
711	.seg	".data"
712	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
713	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
714	.global	nsec_shift, adj_shift, native_tick_offset, native_stick_offset
715
716	/* XXX - above comment claims 128-bytes is necessary */
717	.align	64
718timedelta:
719	.word	0, 0		/* int64_t */
720hres_last_tick:
721	.word	0, 0		/* hrtime_t */
722hrestime:
723	.nword	0, 0		/* 2 longs */
724hrestime_adj:
725	.word	0, 0		/* int64_t */
726hres_lock:
727	.word	0
728nsec_scale:
729	.word	0
730hrtime_base:
731	.word	0, 0
732traptrace_use_stick:
733	.word	0
734nsec_shift:
735	.word	NSEC_SHIFT
736adj_shift:
737	.word	ADJ_SHIFT
738	.align	8
739native_tick_offset:
740	.word	0, 0
741	.align	8
742native_stick_offset:
743	.word	0, 0
744
745#endif
746
747
748/*
749 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
750 * usec_delay(int n)		[compatibility - should go one day]
751 * Delay by spinning.
752 *
753 * delay for n microseconds.  numbers <= 0 delay 1 usec
754 *
755 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
756 * and variable clock rate for power management requires that we
757 * use %stick to implement this routine.
758 */
759
760#if defined(lint)
761
762/*ARGSUSED*/
763void
764drv_usecwait(clock_t n)
765{}
766
767/*ARGSUSED*/
768void
769usec_delay(int n)
770{}
771
772#else	/* lint */
773
774	ENTRY(drv_usecwait)
775	ALTENTRY(usec_delay)
776	brlez,a,pn %o0, 0f
777	  mov	1, %o0
7780:
779	sethi	%hi(sticks_per_usec), %o1
780	lduw	[%o1 + %lo(sticks_per_usec)], %o1
781	mulx	%o1, %o0, %o1		! Scale usec to ticks
782	inc	%o1			! We don't start on a tick edge
783	GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
784	add	%o1, %o2, %o1
785
7861:	cmp	%o1, %o2
787	GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
788	bgeu,pt	%xcc, 1b
789	  nop
790	retl
791	  nop
792	SET_SIZE(usec_delay)
793	SET_SIZE(drv_usecwait)
794#endif	/* lint */
795
796#if defined(lint)
797
798/* ARGSUSED */
799void
800pil14_interrupt(int level)
801{}
802
803#else
804
805/*
806 * Level-14 interrupt prologue.
807 */
808	ENTRY_NP(pil14_interrupt)
809	CPU_ADDR(%g1, %g2)
810	rdpr	%pil, %g6			! %g6 = interrupted PIL
811	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
812	rdpr	%tstate, %g6
813	rdpr	%tpc, %g5
814	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
815	bnz,a,pt %xcc, 1f
816	  stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
817	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
818	ba	pil_interrupt_common		! must be large-disp branch
819	  stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
8201:	ba	pil_interrupt_common		! must be large-disp branch
821	  stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
822	SET_SIZE(pil14_interrupt)
823
824	ENTRY_NP(tick_rtt)
825	!
826	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
827	! disabled.  If TICK_COMPARE is enabled, we know that we need to
828	! reenqueue the interrupt request structure.  We'll then check TICKINT
829	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
830	! interrupt.  In this case, TICK_COMPARE may have been rewritten
831	! recently; we'll compare %o5 to the current time to verify that it's
832	! in the future.
833	!
834	! Note that %o5 is live until after 1f.
835	! XXX - there is a subroutine call while %o5 is live!
836	!
837	RD_TICKCMPR(%o5,%g1,%g2,__LINE__)
838	srlx	%o5, TICKINT_DIS_SHFT, %g1
839	brnz,pt	%g1, 2f
840	  nop
841
842	rdpr 	%pstate, %g5
843	andn	%g5, PSTATE_IE, %g1
844	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
845
846	sethi	%hi(cbe_level14_inum), %o1
847	ldx	[%o1 + %lo(cbe_level14_inum)], %o1
848	call	intr_enqueue_req ! preserves %o5 and %g5
849	  mov	PIL_14, %o0
850
851	! Check SOFTINT for TICKINT/STICKINT
852	rd	SOFTINT, %o4
853	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
854	andcc	%o4, %o0, %g0
855	bz,a,pn	%icc, 2f
856	  wrpr	%g0, %g5, %pstate		! Enable vec interrupts
857
858	! clear TICKINT/STICKINT
859	wr	%o0, CLEAR_SOFTINT
860
861	!
862	! Now that we've cleared TICKINT, we can reread %tick and confirm
863	! that the value we programmed is still in the future.  If it isn't,
864	! we need to reprogram TICK_COMPARE to fire as soon as possible.
865	!
866	GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)	! %o0 = tick
867	cmp	%o5, %o0			! In the future?
868	bg,a,pt	%xcc, 2f			! Yes, drive on.
869	  wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
870
871	!
872	! If we're here, then we have programmed TICK_COMPARE with a %tick
873	! which is in the past; we'll now load an initial step size, and loop
874	! until we've managed to program TICK_COMPARE to fire in the future.
875	!
876	mov	8, %o4				! 8 = arbitrary inital step
8771:	add	%o0, %o4, %o5			! Add the step
878	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
879	GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)	! %o0 = tick
880	cmp	%o5, %o0			! In the future?
881	bg,a,pt	%xcc, 2f			! Yes, drive on.
882	  wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
883	ba	1b				! No, try again.
884	  sllx	%o4, 1, %o4			!    delay: double step size
885
8862:	ba	current_thread_complete
887	  nop
888	SET_SIZE(tick_rtt)
889
890#endif /* lint */
891
892#if defined(lint)
893
894/* ARGSUSED */
895void
896pil15_interrupt(int level)
897{}
898
899#else   /* lint */
900
901/*
902 * Level-15 interrupt prologue.
903 */
904       ENTRY_NP(pil15_interrupt)
905       CPU_ADDR(%g1, %g2)
906       rdpr    %tstate, %g6
907       rdpr    %tpc, %g5
908       btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
909       bnz,a,pt %xcc, 1f
910       stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
911       stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
912       ba      pil15_epilogue                  ! must be large-disp branch
913       stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
9141:     ba      pil15_epilogue                  ! must be large-disp branch
915       stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
916       SET_SIZE(pil15_interrupt)
917
918#endif  /* lint */
919
920#if defined(lint)
921/*
922 * Prefetch a page_t for write or read, this assumes a linear
923 * scan of sequential page_t's.
924 */
925/*ARGSUSED*/
926void
927prefetch_page_w(void *pp)
928{}
929
930/*ARGSUSED*/
931void
932prefetch_page_r(void *pp)
933{}
934#else	/* lint */
935
936/* XXXQ These should be inline templates, not functions */
937        ENTRY(prefetch_page_w)
938        retl
939	  nop
940        SET_SIZE(prefetch_page_w)
941
942        ENTRY(prefetch_page_r)
943        retl
944	  nop
945        SET_SIZE(prefetch_page_r)
946
947#endif	/* lint */
948
949#if defined(lint)
950/*
951 * Prefetch struct smap for write.
952 */
953/*ARGSUSED*/
954void
955prefetch_smap_w(void *smp)
956{}
957#else	/* lint */
958
959/* XXXQ These should be inline templates, not functions */
960	ENTRY(prefetch_smap_w)
961	retl
962	  nop
963	SET_SIZE(prefetch_smap_w)
964
965#endif	/* lint */
966
967/*
968 * Generic sun4v MMU and Cache operations.
969 */
970
971#if defined(lint)
972
973/*ARGSUSED*/
974void
975vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
976{}
977
978/*ARGSUSED*/
979void
980vtag_flushall(void)
981{}
982
983/*ARGSUSED*/
984void
985vtag_unmap_perm_tl1(uint64_t vaddr, uint64_t ctxnum)
986{}
987
988/*ARGSUSED*/
989void
990vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
991{}
992
993/*ARGSUSED*/
994void
995vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
996{}
997
998/*ARGSUSED*/
999void
1000vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
1001{}
1002
1003/*ARGSUSED*/
1004void
1005vac_flushpage(pfn_t pfnum, int vcolor)
1006{}
1007
1008/*ARGSUSED*/
1009void
1010vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
1011{}
1012
1013/*ARGSUSED*/
1014void
1015flush_instr_mem(caddr_t vaddr, size_t len)
1016{}
1017
1018#else	/* lint */
1019
1020	ENTRY_NP(vtag_flushpage)
1021	/*
1022	 * flush page from the tlb
1023	 *
1024	 * %o0 = vaddr
1025	 * %o1 = sfmmup
1026	 */
1027	SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
1028
1029	mov	%g1, %o1
1030	mov	MAP_ITLB | MAP_DTLB, %o2
1031	ta	MMU_UNMAP_ADDR
1032	brz,pt	%o0, 1f
1033	  nop
1034	ba	panic_bad_hcall
1035	  mov	MMU_UNMAP_ADDR, %o1
10361:
1037 	retl
1038	  nop
1039	SET_SIZE(vtag_flushpage)
1040
1041	ENTRY_NP(vtag_flushall)
1042	mov	%g0, %o0	! XXX no cpu list yet
1043	mov	%g0, %o1	! XXX no cpu list yet
1044	mov	MAP_ITLB | MAP_DTLB, %o2
1045	mov	MMU_DEMAP_ALL, %o5
1046	ta	FAST_TRAP
1047	brz,pt	%o0, 1f
1048	  nop
1049	ba	panic_bad_hcall
1050	  mov	MMU_DEMAP_ALL, %o1
10511:
1052	retl
1053	  nop
1054	SET_SIZE(vtag_flushall)
1055
1056	ENTRY_NP(vtag_unmap_perm_tl1)
1057	/*
1058	 * x-trap to unmap perm map entry
1059	 * %g1 = vaddr
1060	 * %g2 = ctxnum (KCONTEXT only)
1061	 */
1062	mov	%o0, %g3
1063	mov	%o1, %g4
1064	mov	%o2, %g5
1065	mov	%o5, %g6
1066	mov	%g1, %o0
1067	mov	%g2, %o1
1068	mov	MAP_ITLB | MAP_DTLB, %o2
1069	mov	UNMAP_PERM_ADDR, %o5
1070	ta	FAST_TRAP
1071	brz,pt	%o0, 1f
1072	nop
1073
1074	mov	PTL1_BAD_HCALL, %g1
1075
1076	cmp	%o0, H_ENOMAP
1077	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
1078
1079	cmp	%o0, H_EINVAL
1080	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
1081
1082	ba,a	ptl1_panic
10831:
1084	mov	%g6, %o5
1085	mov	%g5, %o2
1086	mov	%g4, %o1
1087	mov	%g3, %o0
1088	retry
1089	SET_SIZE(vtag_unmap_perm_tl1)
1090
1091	ENTRY_NP(vtag_flushpage_tl1)
1092	/*
1093	 * x-trap to flush page from tlb and tsb
1094	 *
1095	 * %g1 = vaddr, zero-extended on 32-bit kernel
1096	 * %g2 = sfmmup
1097	 *
1098	 * assumes TSBE_TAG = 0
1099	 */
1100	srln	%g1, MMU_PAGESHIFT, %g1
1101	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
1102	mov	%o0, %g3
1103	mov	%o1, %g4
1104	mov	%o2, %g5
1105	mov	%g1, %o0			/* vaddr */
1106
1107	SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
1108
1109	mov	MAP_ITLB | MAP_DTLB, %o2
1110	ta	MMU_UNMAP_ADDR
1111	brz,pt	%o0, 1f
1112	nop
1113	  ba	ptl1_panic
1114	mov	PTL1_BAD_HCALL, %g1
11151:
1116	mov	%g5, %o2
1117	mov	%g4, %o1
1118	mov	%g3, %o0
1119	membar #Sync
1120	retry
1121	SET_SIZE(vtag_flushpage_tl1)
1122
1123	ENTRY_NP(vtag_flush_pgcnt_tl1)
1124	/*
1125	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
1126	 *
1127	 * %g1 = vaddr, zero-extended on 32-bit kernel
1128	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
1129	 *
1130	 * NOTE: this handler relies on the fact that no
1131	 *	interrupts or traps can occur during the loop
1132	 *	issuing the TLB_DEMAP operations. It is assumed
1133	 *	that interrupts are disabled and this code is
1134	 *	fetching from the kernel locked text address.
1135	 *
1136	 * assumes TSBE_TAG = 0
1137	 */
1138	srln	%g1, MMU_PAGESHIFT, %g1
1139	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
1140	mov	%o0, %g3
1141	mov	%o1, %g4
1142	mov	%o2, %g5
1143
1144	and	%g2, SFMMU_PGCNT_MASK, %g7	/* g7 = pgcnt - 1 */
1145	add	%g7, 1, %g7			/* g7 = pgcnt */
1146
1147        andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
1148
1149	SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
1150
1151	set	MMU_PAGESIZE, %g6		/* g6 = pgsize */
1152
11531:
1154	mov	%g1, %o0			/* vaddr */
1155	mov	%g2, %o1			/* cnum */
1156	mov	MAP_ITLB | MAP_DTLB, %o2
1157	ta	MMU_UNMAP_ADDR
1158	brz,pt	%o0, 2f
1159	  nop
1160	ba	ptl1_panic
1161	  mov	PTL1_BAD_HCALL, %g1
11622:
1163	deccc	%g7				/* decr pgcnt */
1164	bnz,pt	%icc,1b
1165	  add	%g1, %g6, %g1			/* go to nextpage */
1166
1167	mov	%g5, %o2
1168	mov	%g4, %o1
1169	mov	%g3, %o0
1170	membar #Sync
1171	retry
1172	SET_SIZE(vtag_flush_pgcnt_tl1)
1173
1174	! Not implemented on US1/US2
1175	ENTRY_NP(vtag_flushall_tl1)
1176	mov	%o0, %g3
1177	mov	%o1, %g4
1178	mov	%o2, %g5
1179	mov	%o3, %g6	! XXXQ not used?
1180	mov	%o5, %g7
1181	mov	%g0, %o0	! XXX no cpu list yet
1182	mov	%g0, %o1	! XXX no cpu list yet
1183	mov	MAP_ITLB | MAP_DTLB, %o2
1184	mov	MMU_DEMAP_ALL, %o5
1185	ta	FAST_TRAP
1186	brz,pt	%o0, 1f
1187	  nop
1188	ba	ptl1_panic
1189	  mov	PTL1_BAD_HCALL, %g1
11901:
1191	mov	%g7, %o5
1192	mov	%g6, %o3	! XXXQ not used?
1193	mov	%g5, %o2
1194	mov	%g4, %o1
1195	mov	%g3, %o0
1196	retry
1197	SET_SIZE(vtag_flushall_tl1)
1198
1199/*
1200 * flush_instr_mem:
1201 *	Flush a portion of the I-$ starting at vaddr
1202 * 	%o0 vaddr
1203 *	%o1 bytes to be flushed
1204 */
1205
1206	ENTRY(flush_instr_mem)
1207	membar	#StoreStore				! Ensure the stores
1208							! are globally visible
12091:
1210	flush	%o0
1211	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
1212	bgu,pt	%ncc, 1b
1213	  add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
1214
1215	retl
1216	  nop
1217	SET_SIZE(flush_instr_mem)
1218
1219#endif /* !lint */
1220
1221#if !defined(CUSTOM_FPZERO)
1222
1223/*
1224 * fp_zero() - clear all fp data registers and the fsr
1225 */
1226
1227#if defined(lint) || defined(__lint)
1228
1229void
1230fp_zero(void)
1231{}
1232
1233#else	/* lint */
1234
1235.global	fp_zero_zero
1236.align 8
1237fp_zero_zero:
1238	.xword	0
1239
1240	ENTRY_NP(fp_zero)
1241	sethi	%hi(fp_zero_zero), %o0
1242	ldx	[%o0 + %lo(fp_zero_zero)], %fsr
1243	ldd	[%o0 + %lo(fp_zero_zero)], %f0
1244	fmovd	%f0, %f2
1245	fmovd	%f0, %f4
1246	fmovd	%f0, %f6
1247	fmovd	%f0, %f8
1248	fmovd	%f0, %f10
1249	fmovd	%f0, %f12
1250	fmovd	%f0, %f14
1251	fmovd	%f0, %f16
1252	fmovd	%f0, %f18
1253	fmovd	%f0, %f20
1254	fmovd	%f0, %f22
1255	fmovd	%f0, %f24
1256	fmovd	%f0, %f26
1257	fmovd	%f0, %f28
1258	fmovd	%f0, %f30
1259	fmovd	%f0, %f32
1260	fmovd	%f0, %f34
1261	fmovd	%f0, %f36
1262	fmovd	%f0, %f38
1263	fmovd	%f0, %f40
1264	fmovd	%f0, %f42
1265	fmovd	%f0, %f44
1266	fmovd	%f0, %f46
1267	fmovd	%f0, %f48
1268	fmovd	%f0, %f50
1269	fmovd	%f0, %f52
1270	fmovd	%f0, %f54
1271	fmovd	%f0, %f56
1272	fmovd	%f0, %f58
1273	fmovd	%f0, %f60
1274	retl
1275	fmovd	%f0, %f62
1276	SET_SIZE(fp_zero)
1277
1278#endif	/* lint */
1279#endif  /* CUSTOM_FPZERO */
1280