xref: /titanic_50/usr/src/uts/sun4v/cpu/common_asm.s (revision 355b4669e025ff377602b6fc7caaf30dbc218371)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif
31
32/*
33 * General assembly language routines.
34 * It is the intent of this file to contain routines that are
35 * specific to cpu architecture.
36 */
37
38/*
39 * WARNING: If you add a fast trap handler which can be invoked by a
40 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
41 * instead of "done" instruction to return back to the user mode. See
42 * comments for the "fast_trap_done" entry point for more information.
43 */
44#define	FAST_TRAP_DONE	\
45	ba,a	fast_trap_done
46
47/*
48 * Override GET_NATIVE_TIME for the cpu module code.  This is not
49 * guaranteed to be exactly one instruction, be careful of using
50 * the macro in delay slots.
51 *
52 * Do not use any instruction that modifies condition codes as the
53 * caller may depend on these to remain unchanged across the macro.
54 */
55
56#define	GET_NATIVE_TIME(out, scr1, scr2) \
57	rd	STICK, out
58
59#define	RD_TICKCMPR(out, scr)		\
60	rd	STICK_COMPARE, out
61
62#define	WR_TICKCMPR(in,scr1,scr2,label)		\
63	wr	in, STICK_COMPARE
64
65
66#include <sys/clock.h>
67
68#if defined(lint)
69#include <sys/types.h>
70#include <sys/scb.h>
71#include <sys/systm.h>
72#include <sys/regset.h>
73#include <sys/sunddi.h>
74#include <sys/lockstat.h>
75#endif	/* lint */
76
77
78#include <sys/asm_linkage.h>
79#include <sys/privregs.h>
80#include <vm/hat_sfmmu.h>
81#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
82#include <sys/machthread.h>
83#include <sys/clock.h>
84#include <sys/intreg.h>
85#include <sys/psr_compat.h>
86#include <sys/isa_defs.h>
87#include <sys/dditypes.h>
88#include <sys/intr.h>
89#include <sys/hypervisor_api.h>
90
91#if !defined(lint)
92#include "assym.h"
93#endif
94
95#define	ICACHE_FLUSHSZ	0x20
96
97#if defined(lint)
98/*
99 * Softint generated when counter field of tick reg matches value field
100 * of tick_cmpr reg
101 */
102/*ARGSUSED*/
103void
104tickcmpr_set(uint64_t clock_cycles)
105{}
106
107#else   /* lint */
108
109	ENTRY_NP(tickcmpr_set)
110	! get 64-bit clock_cycles interval
111	mov	%o0, %o2
112	mov	8, %o3			! A reasonable initial step size
1131:
114	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
115
116	GET_NATIVE_TIME(%o0, %o4, %o5)	! Read %tick to confirm the
117	sllx	%o0, 1, %o0		!   value we wrote was in the future.
118	srlx	%o0, 1, %o0
119
120	cmp	%o2, %o0		! If the value we wrote was in the
121	bg,pt	%xcc, 2f		!   future, then blow out of here.
122	  sllx	%o3, 1, %o3		! If not, then double our step size,
123	ba,pt	%xcc, 1b		!   and take another lap.
124	  add	%o0, %o3, %o2		!
1252:
126	retl
127	  nop
128	SET_SIZE(tickcmpr_set)
129
130#endif  /* lint */
131
132#if defined(lint)
133
134void
135tickcmpr_disable(void)
136{}
137
138#else
139
140	ENTRY_NP(tickcmpr_disable)
141	mov	1, %g1
142	sllx	%g1, TICKINT_DIS_SHFT, %o0
143	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
144	retl
145	  nop
146	SET_SIZE(tickcmpr_disable)
147
148#endif
149
150#if defined(lint)
151
152/*
153 * tick_write_delta() increments %tick by the specified delta.  This should
154 * only be called after a CPR event to assure that gethrtime() continues to
155 * increase monotonically.  Obviously, writing %tick needs to de done very
156 * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
157 * this reason, we make sure we're i-cache hot before actually writing to
158 * %tick.
159 *
160 * NOTE: No provision for this on sun4v right now.
161 */
162
163/*ARGSUSED*/
164void
165tick_write_delta(uint64_t delta)
166{}
167
168#else	/* lint */
169
170	.seg	".text"
171tick_write_delta_panic:
172	.asciz	"tick_write_delta: not supported"
173
174	ENTRY_NP(tick_write_delta)
175	sethi	%hi(tick_write_delta_panic), %o1
176        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
177	call	panic
178	  or	%i1, %lo(tick_write_delta_panic), %o0
179	/*NOTREACHED*/
180	retl
181	  nop
182#endif
183
184#if defined(lint)
185/*
186 *  return 1 if disabled
187 */
188
189int
190tickcmpr_disabled(void)
191{ return (0); }
192
193#else   /* lint */
194
195	ENTRY_NP(tickcmpr_disabled)
196	RD_TICKCMPR(%g1, %o0)
197	retl
198	  srlx	%g1, TICKINT_DIS_SHFT, %o0
199	SET_SIZE(tickcmpr_disabled)
200
201#endif  /* lint */
202
203/*
204 * Get current tick
205 */
206#if defined(lint)
207
208u_longlong_t
209gettick(void)
210{ return (0); }
211
212#else   /* lint */
213
214	ENTRY(gettick)
215	GET_NATIVE_TIME(%o0, %o2, %o3)
216	retl
217	  nop
218	SET_SIZE(gettick)
219
220#endif  /* lint */
221
222
223/*
224 * Return the counter portion of the tick register.
225 */
226
227#if defined(lint)
228
229uint64_t
230gettick_counter(void)
231{ return(0); }
232
233#else	/* lint */
234
235	ENTRY_NP(gettick_counter)
236	rdpr	%tick, %o0
237	sllx	%o0, 1, %o0
238	retl
239	  srlx	%o0, 1, %o0		! shake off npt bit
240	SET_SIZE(gettick_counter)
241#endif	/* lint */
242
243/*
244 * Provide a C callable interface to the trap that reads the hi-res timer.
245 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
246 */
247
248#if defined(lint)
249
250hrtime_t
251gethrtime(void)
252{
253	return ((hrtime_t)0);
254}
255
256hrtime_t
257gethrtime_unscaled(void)
258{
259	return ((hrtime_t)0);
260}
261
262hrtime_t
263gethrtime_max(void)
264{
265	return ((hrtime_t)0);
266}
267
268void
269scalehrtime(hrtime_t *hrt)
270{
271	*hrt = 0;
272}
273
274void
275gethrestime(timespec_t *tp)
276{
277	tp->tv_sec = 0;
278	tp->tv_nsec = 0;
279}
280
281time_t
282gethrestime_sec(void)
283{
284	return (0);
285}
286
287void
288gethrestime_lasttick(timespec_t *tp)
289{
290	tp->tv_sec = 0;
291	tp->tv_nsec = 0;
292}
293
294/*ARGSUSED*/
295void
296hres_tick(void)
297{
298}
299
300void
301panic_hres_tick(void)
302{
303}
304
305#else	/* lint */
306
307	ENTRY_NP(gethrtime)
308	GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
309							! %g1 = hrtime
310	retl
311	  mov	%g1, %o0
312	SET_SIZE(gethrtime)
313
314	ENTRY_NP(gethrtime_unscaled)
315	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
316	retl
317	  mov	%g1, %o0
318	SET_SIZE(gethrtime_unscaled)
319
320	ENTRY_NP(gethrtime_waitfree)
321	ALTENTRY(dtrace_gethrtime)
322	GET_NATIVE_TIME(%g1, %o2, %o3)			! %g1 = native time
323	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
324	retl
325	  mov	%g1, %o0
326	SET_SIZE(dtrace_gethrtime)
327	SET_SIZE(gethrtime_waitfree)
328
329	ENTRY(gethrtime_max)
330	NATIVE_TIME_MAX(%g1)
331	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
332
333	! hrtime_t's are signed, max hrtime_t must be positive
334	mov	-1, %o2
335	brlz,a	%g1, 1f
336	  srlx	%o2, 1, %g1
3371:
338	retl
339	  mov	%g1, %o0
340	SET_SIZE(gethrtime_max)
341
342	ENTRY(scalehrtime)
343	ldx	[%o0], %o1
344	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
345	retl
346	  stx	%o1, [%o0]
347	SET_SIZE(scalehrtime)
348
349/*
350 * Fast trap to return a timestamp, uses trap window, leaves traps
351 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
352 *
353 * This is the handler for the ST_GETHRTIME trap.
354 */
355
356	ENTRY_NP(get_timestamp)
357	GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)	! %g1 = hrtime
358	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
359	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
360	FAST_TRAP_DONE
361	SET_SIZE(get_timestamp)
362
363/*
364 * Macro to convert GET_HRESTIME() bits into a timestamp.
365 *
366 * We use two separate macros so that the platform-dependent GET_HRESTIME()
367 * can be as small as possible; CONV_HRESTIME() implements the generic part.
368 */
369#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
370	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
371	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
372	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
373	  srl	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
374	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
375	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
376	ba	3f;			/* go convert to sec/nsec */	\
377	  add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
3782:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
379	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
380	  add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
381	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
3823:	cmp	hrestnsec, nano;	/* more than a billion? */	\
383	bl,pt	%xcc, 4f;		/* if not, we're done */	\
384	  nop;				/* delay: do nothing :( */	\
385	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
386	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */	\
3874:
388
389	ENTRY_NP(gethrestime)
390	GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
391	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
392	stn	%o1, [%o0]
393	retl
394	  stn	%o2, [%o0 + CLONGSIZE]
395	SET_SIZE(gethrestime)
396
397/*
398 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
399 * seconds.
400 */
401	ENTRY_NP(gethrestime_sec)
402	GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
403	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
404	retl					! %o0 current hrestime seconds
405	  nop
406	SET_SIZE(gethrestime_sec)
407
408/*
409 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
410 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
411 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
412 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
413 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
414 * it explicitly.)
415 */
416	ENTRY_NP(gethrestime_lasttick)
417	sethi	%hi(hres_lock), %o1
4180:
419	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
420	membar	#LoadLoad			! Load of lock must complete
421	andn	%o2, 1, %o2			! Mask off lowest bit
422	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
423	add	%o1, %lo(hrestime), %o4
424	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
425	membar	#LoadLoad			! All loads must complete
426	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
427	cmp	%o3, %o2			! If lock is locked or has
428	bne	0b				!   changed, retry.
429	  stn	%g1, [%o0]			! Delay: store seconds
430	retl
431	  stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
432	SET_SIZE(gethrestime_lasttick)
433
434/*
435 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
436 *
437 * This is the handler for the ST_GETHRESTIME trap.
438 */
439
440	ENTRY_NP(get_hrestime)
441	GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
442	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
443	FAST_TRAP_DONE
444	SET_SIZE(get_hrestime)
445
446/*
447 * Fast trap to return lwp virtual time, uses trap window, leaves traps
448 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
449 * of nanoseconds consumed.
450 *
451 * This is the handler for the ST_GETHRVTIME trap.
452 *
453 * Register usage:
454 *	%o0, %o1 = return lwp virtual time
455 * 	%o2 = CPU/thread
456 * 	%o3 = lwp
457 * 	%g1 = scratch
458 * 	%g5 = scratch
459 */
460	ENTRY_NP(get_virtime)
461	GET_NATIVE_TIME(%g5, %g1, %g2)	! %g5 = native time in ticks
462	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
463	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
464	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
465
466	/*
467	 * Subtract start time of current microstate from time
468	 * of day to get increment for lwp virtual time.
469	 */
470	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
471	sub	%g5, %g1, %g5
472
473	/*
474	 * Add current value of ms_acct[LMS_USER]
475	 */
476	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
477	add	%g5, %g1, %g5
478	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
479
480	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
481	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
482
483	FAST_TRAP_DONE
484	SET_SIZE(get_virtime)
485
486
487
488	.seg	".text"
489hrtime_base_panic:
490	.asciz	"hrtime_base stepping back"
491
492
493	ENTRY_NP(hres_tick)
494	save	%sp, -SA(MINFRAME), %sp	! get a new window
495
496	sethi	%hi(hrestime), %l4
497	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
4987:	tst	%l5
499	bz,pt	%xcc, 8f			! if we got it, drive on
500	  ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
501	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5029:	tst	%l5
503	bz,a,pn	%xcc, 7b
504	  ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
505	ba,pt	%xcc, 9b
506	  ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
5078:
508	membar	#StoreLoad|#StoreStore
509
510	!
511	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
512	!
513	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
514	GET_NATIVE_TIME(%l0, %l3, %l6)		! current native time
515	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
516	! convert native time to nsecs
517	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
518
519	sub	%l0, %g1, %i1			! get accurate nsec delta
520
521	ldx	[%l4 + %lo(hrtime_base)], %l1
522	cmp	%l1, %l0
523	bg,pn	%xcc, 9f
524	  nop
525
526	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
527
528	!
529	! apply adjustment, if any
530	!
531	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
532	brz	%l0, 2f
533						! hrestime_adj == 0 ?
534						! yes, skip adjustments
535	  clr	%l5				! delay: set adj to zero
536	tst	%l0				! is hrestime_adj >= 0 ?
537	bge,pt	%xcc, 1f			! yes, go handle positive case
538	  srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
539
540	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
541	bl,pt	%xcc, 2f			! yes, use current adj
542	  neg	%l5				! delay: %l5 = -adj
543	ba,pt	%xcc, 2f
544	  mov	%l0, %l5			! no, so set adj = hrestime_adj
5451:
546	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
547	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
548	  mov	%l0, %l5			! delay: adj = hrestime_adj
5492:
550	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
551	sub	%l0, %l5, %l0			! timedelta -= adj
552
553	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
554	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
555
556	or	%l4, %lo(hrestime), %l2
557	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
558	ldn	[%l2 + CLONGSIZE], %i3
559	add	%i3, %l5, %i3			! hrestime.nsec += adj
560	add	%i3, %i1, %i3			! hrestime.nsec += nslt
561
562	set	NANOSEC, %l5			! %l5 = NANOSEC
563	cmp	%i3, %l5
564	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
565	  sethi	%hi(one_sec), %i1		! delay
566	add	%i2, 0x1, %i2			! hrestime.tv_sec++
567	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
568	mov	0x1, %l5
569	st	%l5, [%i1 + %lo(one_sec)]
5705:
571	stn	%i2, [%l2]
572	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
573
574	membar	#StoreStore
575
576	ld	[%l4 + %lo(hres_lock)], %i1
577	inc	%i1				! release lock
578	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
579
580	ret
581	restore
582
5839:
584	!
585	! release hres_lock
586	!
587	ld	[%l4 + %lo(hres_lock)], %i1
588	inc	%i1
589	st	%i1, [%l4 + %lo(hres_lock)]
590
591	sethi	%hi(hrtime_base_panic), %o0
592	call	panic
593	  or	%o0, %lo(hrtime_base_panic), %o0
594
595	SET_SIZE(hres_tick)
596
597#endif	/* lint */
598
599#if !defined(lint) && !defined(__lint)
600
601	.seg	".text"
602kstat_q_panic_msg:
603	.asciz	"kstat_q_exit: qlen == 0"
604
605	ENTRY(kstat_q_panic)
606	save	%sp, -SA(MINFRAME), %sp
607	sethi	%hi(kstat_q_panic_msg), %o0
608	call	panic
609	  or	%o0, %lo(kstat_q_panic_msg), %o0
610	/*NOTREACHED*/
611	SET_SIZE(kstat_q_panic)
612
613#define	BRZPN	brz,pn
614#define	BRZPT	brz,pt
615
616#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
617	ld	[%o0 + QTYPE/**/CNT], %o1;	/* %o1 = old qlen */	\
618	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
619	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
620	st	%o2, [%o0 + QTYPE/**/CNT];	/* delay: save qlen */	\
621	ldx	[%o0 + QTYPE/**/LASTUPDATE], %o3;			\
622	ldx	[%o0 + QTYPE/**/TIME], %o4;	/* %o4 = old time */	\
623	ldx	[%o0 + QTYPE/**/LENTIME], %o5;	/* %o5 = old lentime */	\
624	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
625	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
626	add	%o4, %o2, %o4;			/* %o4 = new time */	\
627	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
628	stx	%o4, [%o0 + QTYPE/**/TIME];	/* save time */		\
629	stx	%o5, [%o0 + QTYPE/**/LENTIME];	/* save lentime */	\
630QRETURN;								\
631	stx	%g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
632
633	.align 16
634	ENTRY(kstat_waitq_enter)
635	GET_NATIVE_TIME(%g1, %g2, %g3)
636	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
637	SET_SIZE(kstat_waitq_enter)
638
639	.align 16
640	ENTRY(kstat_waitq_exit)
641	GET_NATIVE_TIME(%g1, %g2, %g3)
642	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
643	SET_SIZE(kstat_waitq_exit)
644
645	.align 16
646	ENTRY(kstat_runq_enter)
647	GET_NATIVE_TIME(%g1, %g2, %g3)
648	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
649	SET_SIZE(kstat_runq_enter)
650
651	.align 16
652	ENTRY(kstat_runq_exit)
653	GET_NATIVE_TIME(%g1, %g2, %g3)
654	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
655	SET_SIZE(kstat_runq_exit)
656
657	.align 16
658	ENTRY(kstat_waitq_to_runq)
659	GET_NATIVE_TIME(%g1, %g2, %g3)
660	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
661	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
662	SET_SIZE(kstat_waitq_to_runq)
663
664	.align 16
665	ENTRY(kstat_runq_back_to_waitq)
666	GET_NATIVE_TIME(%g1, %g2, %g3)
667	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
668	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
669	SET_SIZE(kstat_runq_back_to_waitq)
670
671#endif /* lint */
672
673#ifdef lint
674
675int64_t timedelta;
676hrtime_t hres_last_tick;
677timestruc_t hrestime;
678int64_t hrestime_adj;
679int hres_lock;
680uint_t nsec_scale;
681hrtime_t hrtime_base;
682int traptrace_use_stick;
683
684#else
685	/*
686	 *  -- WARNING --
687	 *
688	 * The following variables MUST be together on a 128-byte boundary.
689	 * In addition to the primary performance motivation (having them all
690	 * on the same cache line(s)), code here and in the GET*TIME() macros
691	 * assumes that they all have the same high 22 address bits (so
692	 * there's only one sethi).
693	 */
694	.seg	".data"
695	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
696	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
697	.global	nsec_shift, adj_shift
698
699	/* XXX - above comment claims 128-bytes is necessary */
700	.align	64
701timedelta:
702	.word	0, 0		/* int64_t */
703hres_last_tick:
704	.word	0, 0		/* hrtime_t */
705hrestime:
706	.nword	0, 0		/* 2 longs */
707hrestime_adj:
708	.word	0, 0		/* int64_t */
709hres_lock:
710	.word	0
711nsec_scale:
712	.word	0
713hrtime_base:
714	.word	0, 0
715traptrace_use_stick:
716	.word	0
717nsec_shift:
718	.word	NSEC_SHIFT
719adj_shift:
720	.word	ADJ_SHIFT
721
722#endif
723
724
725/*
726 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
727 * usec_delay(int n)		[compatibility - should go one day]
728 * Delay by spinning.
729 *
730 * delay for n microseconds.  numbers <= 0 delay 1 usec
731 *
732 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
733 * and variable clock rate for power management requires that we
734 * use %stick to implement this routine.
735 */
736
737#if defined(lint)
738
739/*ARGSUSED*/
740void
741drv_usecwait(clock_t n)
742{}
743
744/*ARGSUSED*/
745void
746usec_delay(int n)
747{}
748
749#else	/* lint */
750
751	ENTRY(drv_usecwait)
752	ALTENTRY(usec_delay)
753	brlez,a,pn %o0, 0f
754	  mov	1, %o0
7550:
756	sethi	%hi(sticks_per_usec), %o1
757	lduw	[%o1 + %lo(sticks_per_usec)], %o1
758	mulx	%o1, %o0, %o1		! Scale usec to ticks
759	inc	%o1			! We don't start on a tick edge
760	GET_NATIVE_TIME(%o2, %o3, %o4)
761	add	%o1, %o2, %o1
762
7631:	cmp	%o1, %o2
764	GET_NATIVE_TIME(%o2, %o3, %o4)
765	bgeu,pt	%xcc, 1b
766	  nop
767	retl
768	  nop
769	SET_SIZE(usec_delay)
770	SET_SIZE(drv_usecwait)
771#endif	/* lint */
772
773#if defined(lint)
774
775/* ARGSUSED */
776void
777pil14_interrupt(int level)
778{}
779
780#else
781
782/*
783 * Level-14 interrupt prologue.
784 */
785	ENTRY_NP(pil14_interrupt)
786	CPU_ADDR(%g1, %g2)
787	rdpr	%pil, %g6			! %g6 = interrupted PIL
788	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
789	rdpr	%tstate, %g6
790	rdpr	%tpc, %g5
791	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
792	bnz,a,pt %xcc, 1f
793	  stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
794	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
795	ba	pil_interrupt_common		! must be large-disp branch
796	  stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
7971:	ba	pil_interrupt_common		! must be large-disp branch
798	  stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
799	SET_SIZE(pil14_interrupt)
800
801	ENTRY_NP(tick_rtt)
802	!
803	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
804	! disabled.  If TICK_COMPARE is enabled, we know that we need to
805	! reenqueue the interrupt request structure.  We'll then check TICKINT
806	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
807	! interrupt.  In this case, TICK_COMPARE may have been rewritten
808	! recently; we'll compare %o5 to the current time to verify that it's
809	! in the future.
810	!
811	! Note that %o5 is live until after 1f.
812	! XXX - there is a subroutine call while %o5 is live!
813	!
814	RD_TICKCMPR(%o5, %g1)
815	srlx	%o5, TICKINT_DIS_SHFT, %g1
816	brnz,pt	%g1, 2f
817	  nop
818
819	rdpr 	%pstate, %g5
820	andn	%g5, PSTATE_IE, %g1
821	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
822
823	sethi	%hi(cbe_level14_inum), %o1
824	ld	[%o1 + %lo(cbe_level14_inum)], %o1
825	call	intr_enqueue_req ! preserves %o5 and %g5
826	  mov	PIL_14, %o0
827
828	! Check SOFTINT for TICKINT/STICKINT
829	rd	SOFTINT, %o4
830	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
831	andcc	%o4, %o0, %g0
832	bz,a,pn	%icc, 2f
833	  wrpr	%g0, %g5, %pstate		! Enable vec interrupts
834
835	! clear TICKINT/STICKINT
836	wr	%o0, CLEAR_SOFTINT
837
838	!
839	! Now that we've cleared TICKINT, we can reread %tick and confirm
840	! that the value we programmed is still in the future.  If it isn't,
841	! we need to reprogram TICK_COMPARE to fire as soon as possible.
842	!
843	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
844	sllx	%o0, 1, %o0			! Clear the DIS bit
845	srlx	%o0, 1, %o0
846	cmp	%o5, %o0			! In the future?
847	bg,a,pt	%xcc, 2f			! Yes, drive on.
848	  wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
849
850	!
851	! If we're here, then we have programmed TICK_COMPARE with a %tick
852	! which is in the past; we'll now load an initial step size, and loop
853	! until we've managed to program TICK_COMPARE to fire in the future.
854	!
855	mov	8, %o4				! 8 = arbitrary inital step
8561:	add	%o0, %o4, %o5			! Add the step
857	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
858	GET_NATIVE_TIME(%o0, %g1, %g2)		! %o0 = tick
859	sllx	%o0, 1, %o0			! Clear the DIS bit
860	srlx	%o0, 1, %o0
861	cmp	%o5, %o0			! In the future?
862	bg,a,pt	%xcc, 2f			! Yes, drive on.
863	  wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
864	ba	1b				! No, try again.
865	  sllx	%o4, 1, %o4			!    delay: double step size
866
8672:	ba	current_thread_complete
868	  nop
869	SET_SIZE(tick_rtt)
870
871#endif /* lint */
872
873#if defined(lint)
874/*
875 * Prefetch a page_t for write or read, this assumes a linear
876 * scan of sequential page_t's.
877 */
878/*ARGSUSED*/
879void
880prefetch_page_w(void *pp)
881{}
882
883/*ARGSUSED*/
884void
885prefetch_page_r(void *pp)
886{}
887#else	/* lint */
888
889/* XXXQ These should be inline templates, not functions */
890        ENTRY(prefetch_page_w)
891        retl
892	  nop
893        SET_SIZE(prefetch_page_w)
894
895        ENTRY(prefetch_page_r)
896        retl
897	  nop
898        SET_SIZE(prefetch_page_r)
899
900#endif	/* lint */
901
902#if defined(lint)
903/*
904 * Prefetch struct smap for write.
905 */
906/*ARGSUSED*/
907void
908prefetch_smap_w(void *smp)
909{}
910#else	/* lint */
911
912/* XXXQ These should be inline templates, not functions */
913	ENTRY(prefetch_smap_w)
914	retl
915	  nop
916	SET_SIZE(prefetch_smap_w)
917
918#endif	/* lint */
919
920/*
921 * Generic sun4v MMU and Cache operations.
922 */
923
924#if defined(lint)
925
926/*ARGSUSED*/
927void
928vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
929{}
930
931/*ARGSUSED*/
932void
933vtag_flushall(void)
934{}
935
936/*ARGSUSED*/
937void
938vtag_unmap_perm_tl1(uint64_t vaddr, uint64_t ctxnum)
939{}
940
941/*ARGSUSED*/
942void
943vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
944{}
945
946/*ARGSUSED*/
947void
948vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
949{}
950
951/*ARGSUSED*/
952void
953vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
954{}
955
956/*ARGSUSED*/
957void
958vac_flushpage(pfn_t pfnum, int vcolor)
959{}
960
961/*ARGSUSED*/
962void
963vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
964{}
965
966/*ARGSUSED*/
967void
968flush_instr_mem(caddr_t vaddr, size_t len)
969{}
970
971#else	/* lint */
972
973	ENTRY_NP(vtag_flushpage)
974	/*
975	 * flush page from the tlb
976	 *
977	 * %o0 = vaddr
978	 * %o1 = sfmmup
979	 */
980	SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
981
982	mov	%g1, %o1
983	mov	MAP_ITLB | MAP_DTLB, %o2
984	ta	MMU_UNMAP_ADDR
985	brz,pt	%o0, 1f
986	  nop
987	ba	panic_bad_hcall
988	  mov	MMU_UNMAP_ADDR, %o1
9891:
990 	retl
991	  nop
992	SET_SIZE(vtag_flushpage)
993
994	ENTRY_NP(vtag_flushall)
995	mov	%g0, %o0	! XXX no cpu list yet
996	mov	%g0, %o1	! XXX no cpu list yet
997	mov	MAP_ITLB | MAP_DTLB, %o2
998	mov	MMU_DEMAP_ALL, %o5
999	ta	FAST_TRAP
1000	brz,pt	%o0, 1f
1001	  nop
1002	ba	panic_bad_hcall
1003	  mov	MMU_DEMAP_ALL, %o1
10041:
1005	retl
1006	  nop
1007	SET_SIZE(vtag_flushall)
1008
1009	ENTRY_NP(vtag_unmap_perm_tl1)
1010	/*
1011	 * x-trap to unmap perm map entry
1012	 * %g1 = vaddr
1013	 * %g2 = ctxnum (KCONTEXT only)
1014	 */
1015	mov	%o0, %g3
1016	mov	%o1, %g4
1017	mov	%o2, %g5
1018	mov	%o5, %g6
1019	mov	%g1, %o0
1020	mov	%g2, %o1
1021	mov	MAP_ITLB | MAP_DTLB, %o2
1022	mov	UNMAP_PERM_ADDR, %o5
1023	ta	FAST_TRAP
1024	brz,pt	%o0, 1f
1025	nop
1026
1027	mov	PTL1_BAD_HCALL, %g1
1028
1029	cmp	%o0, H_ENOMAP
1030	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
1031
1032	cmp	%o0, H_EINVAL
1033	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
1034
1035	ba,a	ptl1_panic
10361:
1037	mov	%g6, %o5
1038	mov	%g5, %o2
1039	mov	%g4, %o1
1040	mov	%g3, %o0
1041	retry
1042	SET_SIZE(vtag_unmap_perm_tl1)
1043
1044	ENTRY_NP(vtag_flushpage_tl1)
1045	/*
1046	 * x-trap to flush page from tlb and tsb
1047	 *
1048	 * %g1 = vaddr, zero-extended on 32-bit kernel
1049	 * %g2 = sfmmup
1050	 *
1051	 * assumes TSBE_TAG = 0
1052	 */
1053	srln	%g1, MMU_PAGESHIFT, %g1
1054	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
1055	mov	%o0, %g3
1056	mov	%o1, %g4
1057	mov	%o2, %g5
1058	mov	%g1, %o0			/* vaddr */
1059
1060	SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
1061
1062	mov	MAP_ITLB | MAP_DTLB, %o2
1063	ta	MMU_UNMAP_ADDR
1064	brz,pt	%o0, 1f
1065	nop
1066	  ba	ptl1_panic
1067	mov	PTL1_BAD_HCALL, %g1
10681:
1069	mov	%g5, %o2
1070	mov	%g4, %o1
1071	mov	%g3, %o0
1072	membar #Sync
1073	retry
1074	SET_SIZE(vtag_flushpage_tl1)
1075
1076	ENTRY_NP(vtag_flush_pgcnt_tl1)
1077	/*
1078	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
1079	 *
1080	 * %g1 = vaddr, zero-extended on 32-bit kernel
1081	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
1082	 *
1083	 * NOTE: this handler relies on the fact that no
1084	 *	interrupts or traps can occur during the loop
1085	 *	issuing the TLB_DEMAP operations. It is assumed
1086	 *	that interrupts are disabled and this code is
1087	 *	fetching from the kernel locked text address.
1088	 *
1089	 * assumes TSBE_TAG = 0
1090	 */
1091	srln	%g1, MMU_PAGESHIFT, %g1
1092	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
1093	mov	%o0, %g3
1094	mov	%o1, %g4
1095	mov	%o2, %g5
1096
1097	and	%g2, SFMMU_PGCNT_MASK, %g7	/* g7 = pgcnt - 1 */
1098	add	%g7, 1, %g7			/* g7 = pgcnt */
1099
1100        andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
1101
1102	SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
1103
1104	set	MMU_PAGESIZE, %g6		/* g6 = pgsize */
1105
11061:
1107	mov	%g1, %o0			/* vaddr */
1108	mov	%g2, %o1			/* cnum */
1109	mov	MAP_ITLB | MAP_DTLB, %o2
1110	ta	MMU_UNMAP_ADDR
1111	brz,pt	%o0, 2f
1112	  nop
1113	ba	ptl1_panic
1114	  mov	PTL1_BAD_HCALL, %g1
11152:
1116	deccc	%g7				/* decr pgcnt */
1117	bnz,pt	%icc,1b
1118	  add	%g1, %g6, %g1			/* go to nextpage */
1119
1120	mov	%g5, %o2
1121	mov	%g4, %o1
1122	mov	%g3, %o0
1123	membar #Sync
1124	retry
1125	SET_SIZE(vtag_flush_pgcnt_tl1)
1126
1127	! Not implemented on US1/US2
1128	ENTRY_NP(vtag_flushall_tl1)
1129	mov	%o0, %g3
1130	mov	%o1, %g4
1131	mov	%o2, %g5
1132	mov	%o3, %g6	! XXXQ not used?
1133	mov	%o5, %g7
1134	mov	%g0, %o0	! XXX no cpu list yet
1135	mov	%g0, %o1	! XXX no cpu list yet
1136	mov	MAP_ITLB | MAP_DTLB, %o2
1137	mov	MMU_DEMAP_ALL, %o5
1138	ta	FAST_TRAP
1139	brz,pt	%o0, 1f
1140	  nop
1141	ba	ptl1_panic
1142	  mov	PTL1_BAD_HCALL, %g1
11431:
1144	mov	%g7, %o5
1145	mov	%g6, %o3	! XXXQ not used?
1146	mov	%g5, %o2
1147	mov	%g4, %o1
1148	mov	%g3, %o0
1149	retry
1150	SET_SIZE(vtag_flushall_tl1)
1151
1152/*
1153 * vac_flushpage(pfnum, color)
1154 *	Flush 1 8k page of the D-$ with physical page = pfnum
1155 *	Algorithm:
1156 *		The spitfire dcache is a 16k direct mapped virtual indexed,
1157 *		physically tagged cache.  Given the pfnum we read all cache
1158 *		lines for the corresponding page in the cache (determined by
1159 *		the color).  Each cache line is compared with
1160 *		the tag created from the pfnum. If the tags match we flush
1161 *		the line.
1162 */
1163	ENTRY(vac_flushpage)
1164	/*
1165	 * flush page from the d$
1166	 *
1167	 * %o0 = pfnum, %o1 = color
1168	 */
1169	! XXXQ
1170	retl
1171	  nop
1172	SET_SIZE(vac_flushpage)
1173
1174	ENTRY_NP(vac_flushpage_tl1)
1175	/*
1176	 * x-trap to flush page from the d$
1177	 *
1178	 * %g1 = pfnum, %g2 = color
1179	 */
1180	! XXXQ
1181	retry
1182	SET_SIZE(vac_flushpage_tl1)
1183
1184	ENTRY(vac_flushcolor)
1185	/*
1186	 * %o0 = vcolor
1187	 */
1188	! XXXQ
1189	retl
1190	  nop
1191	SET_SIZE(vac_flushcolor)
1192
1193	ENTRY(vac_flushcolor_tl1)
1194	/*
1195	 * %g1 = vcolor
1196	 */
1197	! XXXQ
1198	retry
1199	SET_SIZE(vac_flushcolor_tl1)
1200
1201/*
1202 * flush_instr_mem:
1203 *	Flush a portion of the I-$ starting at vaddr
1204 * 	%o0 vaddr
1205 *	%o1 bytes to be flushed
1206 */
1207
1208	ENTRY(flush_instr_mem)
1209	membar	#StoreStore				! Ensure the stores
1210							! are globally visible
12111:
1212	flush	%o0
1213	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
1214	bgu,pt	%ncc, 1b
1215	  add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
1216
1217	retl
1218	  nop
1219	SET_SIZE(flush_instr_mem)
1220
1221#endif /* !lint */
1222
1223/*
1224 * fp_zero() - clear all fp data registers and the fsr
1225 */
1226
1227#if defined(lint) || defined(__lint)
1228
1229void
1230fp_zero(void)
1231{}
1232
1233#else	/* lint */
1234
1235.global	fp_zero_zero
1236.align 8
1237fp_zero_zero:
1238	.xword	0
1239
1240	ENTRY_NP(fp_zero)
1241	sethi	%hi(fp_zero_zero), %o0
1242	ldd	[%o0 + %lo(fp_zero_zero)], %fsr
1243	ldd	[%o0 + %lo(fp_zero_zero)], %f0
1244	fmovd	%f0, %f2
1245	fmovd	%f0, %f4
1246	fmovd	%f0, %f6
1247	fmovd	%f0, %f8
1248	fmovd	%f0, %f10
1249	fmovd	%f0, %f12
1250	fmovd	%f0, %f14
1251	fmovd	%f0, %f16
1252	fmovd	%f0, %f18
1253	fmovd	%f0, %f20
1254	fmovd	%f0, %f22
1255	fmovd	%f0, %f24
1256	fmovd	%f0, %f26
1257	fmovd	%f0, %f28
1258	fmovd	%f0, %f30
1259	fmovd	%f0, %f32
1260	fmovd	%f0, %f34
1261	fmovd	%f0, %f36
1262	fmovd	%f0, %f38
1263	fmovd	%f0, %f40
1264	fmovd	%f0, %f42
1265	fmovd	%f0, %f44
1266	fmovd	%f0, %f46
1267	fmovd	%f0, %f48
1268	fmovd	%f0, %f50
1269	fmovd	%f0, %f52
1270	fmovd	%f0, %f54
1271	fmovd	%f0, %f56
1272	fmovd	%f0, %f58
1273	fmovd	%f0, %f60
1274	retl
1275	fmovd	%f0, %f62
1276	SET_SIZE(fp_zero)
1277
1278#endif	/* lint */
1279