xref: /illumos-gate/usr/src/uts/sun4v/cpu/common_asm.S (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include "assym.h"
26
27/*
28 * General assembly language routines.
29 * It is the intent of this file to contain routines that are
30 * specific to cpu architecture.
31 */
32
33/*
34 * WARNING: If you add a fast trap handler which can be invoked by a
35 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
36 * instead of "done" instruction to return back to the user mode. See
37 * comments for the "fast_trap_done" entry point for more information.
38 */
39#define	FAST_TRAP_DONE	\
40	ba,a	fast_trap_done
41
42#include <sys/machclock.h>
43#include <sys/clock.h>
44
45
46#include <sys/asm_linkage.h>
47#include <sys/privregs.h>
48#include <vm/hat_sfmmu.h>
49#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
50#include <sys/machthread.h>
51#include <sys/clock.h>
52#include <sys/intreg.h>
53#include <sys/psr_compat.h>
54#include <sys/isa_defs.h>
55#include <sys/dditypes.h>
56#include <sys/intr.h>
57#include <sys/hypervisor_api.h>
58
59#include "assym.h"
60
61#define	ICACHE_FLUSHSZ	0x20
62
63/*
64 * Softint generated when counter field of tick reg matches value field
65 * of tick_cmpr reg
66 */
67	ENTRY_NP(tickcmpr_set)
68	! get 64-bit clock_cycles interval
69	mov	%o0, %o2
70	mov	8, %o3			! A reasonable initial step size
711:
72	WR_TICKCMPR(%o2,%o4,%o5,__LINE__)	! Write to TICK_CMPR
73
74	GET_NATIVE_TIME(%o0,%o4,%o5,__LINE__)	! Read %tick to confirm the
75						! value we wrote was in the
76						! future.
77
78	cmp	%o2, %o0		! If the value we wrote was in the
79	bg,pt	%xcc, 2f		!   future, then blow out of here.
80	  sllx	%o3, 1, %o3		! If not, then double our step size,
81	ba,pt	%xcc, 1b		!   and take another lap.
82	  add	%o0, %o3, %o2		!
832:
84	retl
85	  nop
86	SET_SIZE(tickcmpr_set)
87
88	ENTRY_NP(tickcmpr_disable)
89	mov	1, %g1
90	sllx	%g1, TICKINT_DIS_SHFT, %o0
91	WR_TICKCMPR(%o0,%o4,%o5,__LINE__)	! Write to TICK_CMPR
92	retl
93	  nop
94	SET_SIZE(tickcmpr_disable)
95
96	.seg	".text"
97tick_write_delta_panic:
98	.asciz	"tick_write_delta: not supported, delta: 0x%lx"
99
100/*
101 * tick_write_delta() is intended to increment %stick by the specified delta,
102 * but %stick is only writeable in hyperprivileged mode and at present there
103 * is no provision for this. tick_write_delta is called by the cylic subsystem
104 * if a negative %stick delta is observed after cyclic processing is resumed
105 * after an event such as an OS suspend/resume. On sun4v, the suspend/resume
106 * routines should adjust the %stick offset preventing the cyclic subsystem
107 * from detecting a negative delta. If a negative delta is detected, panic the
108 * system. The negative delta could be caused by improper %stick
109 * synchronization after a suspend/resume.
110 */
111	ENTRY_NP(tick_write_delta)
112	sethi	%hi(tick_write_delta_panic), %o1
113        save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
114	mov	%i0, %o1
115	call	panic
116	  or	%i1, %lo(tick_write_delta_panic), %o0
117	/*NOTREACHED*/
118	retl
119	  nop
120
121	ENTRY_NP(tickcmpr_disabled)
122	RD_TICKCMPR(%g1,%o0,%o1,__LINE__)
123	retl
124	  srlx	%g1, TICKINT_DIS_SHFT, %o0
125	SET_SIZE(tickcmpr_disabled)
126
127/*
128 * Get current tick
129 */
130
131	ENTRY(gettick)
132	ALTENTRY(randtick)
133	GET_NATIVE_TIME(%o0,%o2,%o3,__LINE__)
134	retl
135	  nop
136	SET_SIZE(randtick)
137	SET_SIZE(gettick)
138
139/*
140 * Get current tick. For trapstat use only.
141 */
142	ENTRY(rdtick)
143	retl
144	RD_TICK_PHYSICAL(%o0)
145	SET_SIZE(rdtick)
146
147
148/*
149 * Return the counter portion of the tick register.
150 */
151
152	ENTRY_NP(gettick_counter)
153	RD_TICK(%o0,%o1,%o2,__LINE__)
154	retl
155	nop
156	SET_SIZE(gettick_counter)
157
158	ENTRY_NP(gettick_npt)
159	RD_TICK_PHYSICAL(%o0)
160	retl
161	srlx	%o0, 63, %o0
162	SET_SIZE(gettick_npt)
163
164	ENTRY_NP(getstick_npt)
165	RD_STICK_PHYSICAL(%o0)
166	retl
167	srlx	%o0, 63, %o0
168	SET_SIZE(getstick_npt)
169
170/*
171 * Provide a C callable interface to the trap that reads the hi-res timer.
172 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
173 */
174
175	ENTRY_NP(gethrtime)
176	GET_HRTIME(%g1,%o0,%o1,%o2,%o3,%o4,%o5,%g2,__LINE__)
177							! %g1 = hrtime
178	retl
179	  mov	%g1, %o0
180	SET_SIZE(gethrtime)
181
182	ENTRY_NP(gethrtime_unscaled)
183	GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)	! %g1 = native time
184	retl
185	  mov	%g1, %o0
186	SET_SIZE(gethrtime_unscaled)
187
188	ENTRY_NP(gethrtime_waitfree)
189	ALTENTRY(dtrace_gethrtime)
190	GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)	! %g1 = native time
191	NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
192	retl
193	  mov	%g1, %o0
194	SET_SIZE(dtrace_gethrtime)
195	SET_SIZE(gethrtime_waitfree)
196
197	ENTRY(gethrtime_max)
198	NATIVE_TIME_MAX(%g1)
199	NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
200
201	! hrtime_t's are signed, max hrtime_t must be positive
202	mov	-1, %o2
203	brlz,a	%g1, 1f
204	  srlx	%o2, 1, %g1
2051:
206	retl
207	  mov	%g1, %o0
208	SET_SIZE(gethrtime_max)
209
210	ENTRY(scalehrtime)
211	ldx	[%o0], %o1
212	NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
213	retl
214	  stx	%o1, [%o0]
215	SET_SIZE(scalehrtime)
216
217/*
218 * Fast trap to return a timestamp, uses trap window, leaves traps
219 * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
220 *
221 * This is the handler for the ST_GETHRTIME trap.
222 */
223
224	ENTRY_NP(get_timestamp)
225	GET_HRTIME(%g1,%g2,%g3,%g4,%g5,%o0,%o1,%o2,__LINE__)
226	! %g1 = hrtime
227	srlx	%g1, 32, %o0				! %o0 = hi32(%g1)
228	srl	%g1, 0, %o1				! %o1 = lo32(%g1)
229	FAST_TRAP_DONE
230	SET_SIZE(get_timestamp)
231
232/*
233 * Macro to convert GET_HRESTIME() bits into a timestamp.
234 *
235 * We use two separate macros so that the platform-dependent GET_HRESTIME()
236 * can be as small as possible; CONV_HRESTIME() implements the generic part.
237 */
238#define	CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
239	brz,pt	adj, 3f;		/* no adjustments, it's easy */	\
240	add	hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */	\
241	brlz,pn	adj, 2f;		/* if hrestime_adj negative */	\
242	  srlx	nslt, ADJ_SHIFT, nslt;	/* delay: nslt >>= 4 */		\
243	subcc	adj, nslt, %g0;		/* hrestime_adj - nslt/16 */	\
244	movg	%xcc, nslt, adj;	/* adj by min(adj, nslt/16) */	\
245	ba	3f;			/* go convert to sec/nsec */	\
246	  add	hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
2472:	addcc	adj, nslt, %g0;		/* hrestime_adj + nslt/16 */	\
248	bge,a,pt %xcc, 3f;		/* is adj less negative? */	\
249	  add	hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */	\
250	sub	hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
2513:	cmp	hrestnsec, nano;	/* more than a billion? */	\
252	bl,pt	%xcc, 4f;		/* if not, we're done */	\
253	  nop;				/* delay: do nothing :( */	\
254	add	hrestsec, 1, hrestsec;	/* hrest.tv_sec++; */		\
255	sub	hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */	\
256	ba,a	3b;			/* check >= billion again */	\
2574:
258
259	ENTRY_NP(gethrestime)
260	GET_HRESTIME(%o1,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
261	CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
262	stn	%o1, [%o0]
263	retl
264	  stn	%o2, [%o0 + CLONGSIZE]
265	SET_SIZE(gethrestime)
266
267/*
268 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
269 * seconds.
270 */
271	ENTRY_NP(gethrestime_sec)
272	GET_HRESTIME(%o0,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
273	CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
274	retl					! %o0 current hrestime seconds
275	  nop
276	SET_SIZE(gethrestime_sec)
277
278/*
279 * Returns the hrestime on the last tick.  This is simpler than gethrestime()
280 * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
281 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
282 * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
283 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
284 * it explicitly.)
285 */
286	ENTRY_NP(gethrestime_lasttick)
287	sethi	%hi(hres_lock), %o1
2880:
289	lduw	[%o1 + %lo(hres_lock)], %o2	! Load lock value
290	membar	#LoadLoad			! Load of lock must complete
291	andn	%o2, 1, %o2			! Mask off lowest bit
292	ldn	[%o1 + %lo(hrestime)], %g1	! Seconds.
293	add	%o1, %lo(hrestime), %o4
294	ldn	[%o4 + CLONGSIZE], %g2		! Nanoseconds.
295	membar	#LoadLoad			! All loads must complete
296	lduw	[%o1 + %lo(hres_lock)], %o3	! Reload lock value
297	cmp	%o3, %o2			! If lock is locked or has
298	bne	0b				!   changed, retry.
299	  stn	%g1, [%o0]			! Delay: store seconds
300	retl
301	  stn	%g2, [%o0 + CLONGSIZE]		! Delay: store nanoseconds
302	SET_SIZE(gethrestime_lasttick)
303
304/*
305 * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
306 *
307 * This is the handler for the ST_GETHRESTIME trap.
308 */
309
310	ENTRY_NP(get_hrestime)
311	GET_HRESTIME(%o0,%o1,%g1,%g2,%g3,%g4,%g5,%o2,%o3,__LINE__)
312	CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
313	FAST_TRAP_DONE
314	SET_SIZE(get_hrestime)
315
316/*
317 * Fast trap to return lwp virtual time, uses trap window, leaves traps
318 * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
319 * of nanoseconds consumed.
320 *
321 * This is the handler for the ST_GETHRVTIME trap.
322 *
323 * Register usage:
324 *	%o0, %o1 = return lwp virtual time
325 * 	%o2 = CPU/thread
326 * 	%o3 = lwp
327 * 	%g1 = scratch
328 * 	%g5 = scratch
329 */
330	ENTRY_NP(get_virtime)
331	GET_NATIVE_TIME(%g5,%g1,%g2,__LINE__)	! %g5 = native time in ticks
332	CPU_ADDR(%g2, %g3)			! CPU struct ptr to %g2
333	ldn	[%g2 + CPU_THREAD], %g2		! thread pointer to %g2
334	ldn	[%g2 + T_LWP], %g3		! lwp pointer to %g3
335
336	/*
337	 * Subtract start time of current microstate from time
338	 * of day to get increment for lwp virtual time.
339	 */
340	ldx	[%g3 + LWP_STATE_START], %g1	! ms_state_start
341	sub	%g5, %g1, %g5
342
343	/*
344	 * Add current value of ms_acct[LMS_USER]
345	 */
346	ldx	[%g3 + LWP_ACCT_USER], %g1	! ms_acct[LMS_USER]
347	add	%g5, %g1, %g5
348	NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
349
350	srl	%g5, 0, %o1			! %o1 = lo32(%g5)
351	srlx	%g5, 32, %o0			! %o0 = hi32(%g5)
352
353	FAST_TRAP_DONE
354	SET_SIZE(get_virtime)
355
356
357
358	.seg	".text"
359hrtime_base_panic:
360	.asciz	"hrtime_base stepping back"
361
362
363	ENTRY_NP(hres_tick)
364	save	%sp, -SA(MINFRAME), %sp	! get a new window
365
366	sethi	%hi(hrestime), %l4
367	ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5	! try locking
3687:	tst	%l5
369	bz,pt	%xcc, 8f			! if we got it, drive on
370	  ld	[%l4 + %lo(nsec_scale)], %l5	! delay: %l5 = scaling factor
371	ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
3729:	tst	%l5
373	bz,a,pn	%xcc, 7b
374	  ldstub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
375	ba,pt	%xcc, 9b
376	  ldub	[%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
3778:
378	membar	#StoreLoad|#StoreStore
379
380	!
381	! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
382	!
383	ldx	[%l4 + %lo(hrtime_base)], %g1	! load current hrtime_base
384	GET_NATIVE_TIME(%l0,%l3,%l6,__LINE__)	! current native time
385	stx	%l0, [%l4 + %lo(hres_last_tick)]! prev = current
386	! convert native time to nsecs
387	NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
388
389	sub	%l0, %g1, %i1			! get accurate nsec delta
390
391	ldx	[%l4 + %lo(hrtime_base)], %l1
392	cmp	%l1, %l0
393	bg,pn	%xcc, 9f
394	  nop
395
396	stx	%l0, [%l4 + %lo(hrtime_base)]	! update hrtime_base
397
398	!
399	! apply adjustment, if any
400	!
401	ldx	[%l4 + %lo(hrestime_adj)], %l0	! %l0 = hrestime_adj
402	brz	%l0, 2f
403						! hrestime_adj == 0 ?
404						! yes, skip adjustments
405	  clr	%l5				! delay: set adj to zero
406	tst	%l0				! is hrestime_adj >= 0 ?
407	bge,pt	%xcc, 1f			! yes, go handle positive case
408	  srl	%i1, ADJ_SHIFT, %l5		! delay: %l5 = adj
409
410	addcc	%l0, %l5, %g0			! hrestime_adj < -adj ?
411	bl,pt	%xcc, 2f			! yes, use current adj
412	  neg	%l5				! delay: %l5 = -adj
413	ba,pt	%xcc, 2f
414	  mov	%l0, %l5			! no, so set adj = hrestime_adj
4151:
416	subcc	%l0, %l5, %g0			! hrestime_adj < adj ?
417	bl,a,pt	%xcc, 2f			! yes, set adj = hrestime_adj
418	  mov	%l0, %l5			! delay: adj = hrestime_adj
4192:
420	ldx	[%l4 + %lo(timedelta)], %l0	! %l0 = timedelta
421	sub	%l0, %l5, %l0			! timedelta -= adj
422
423	stx	%l0, [%l4 + %lo(timedelta)]	! store new timedelta
424	stx	%l0, [%l4 + %lo(hrestime_adj)]	! hrestime_adj = timedelta
425
426	or	%l4, %lo(hrestime), %l2
427	ldn	[%l2], %i2			! %i2:%i3 = hrestime sec:nsec
428	ldn	[%l2 + CLONGSIZE], %i3
429	add	%i3, %l5, %i3			! hrestime.nsec += adj
430	add	%i3, %i1, %i3			! hrestime.nsec += nslt
431
432	set	NANOSEC, %l5			! %l5 = NANOSEC
433	cmp	%i3, %l5
434	bl,pt	%xcc, 5f			! if hrestime.tv_nsec < NANOSEC
435	  sethi	%hi(one_sec), %i1		! delay
436	add	%i2, 0x1, %i2			! hrestime.tv_sec++
437	sub	%i3, %l5, %i3			! hrestime.tv_nsec - NANOSEC
438	mov	0x1, %l5
439	st	%l5, [%i1 + %lo(one_sec)]
4405:
441	stn	%i2, [%l2]
442	stn	%i3, [%l2 + CLONGSIZE]		! store the new hrestime
443
444	membar	#StoreStore
445
446	ld	[%l4 + %lo(hres_lock)], %i1
447	inc	%i1				! release lock
448	st	%i1, [%l4 + %lo(hres_lock)]	! clear hres_lock
449
450	ret
451	restore
452
4539:
454	!
455	! release hres_lock
456	!
457	ld	[%l4 + %lo(hres_lock)], %i1
458	inc	%i1
459	st	%i1, [%l4 + %lo(hres_lock)]
460
461	sethi	%hi(hrtime_base_panic), %o0
462	call	panic
463	  or	%o0, %lo(hrtime_base_panic), %o0
464
465	SET_SIZE(hres_tick)
466
467	.seg	".text"
468kstat_q_panic_msg:
469	.asciz	"kstat_q_exit: qlen == 0"
470
471	ENTRY(kstat_q_panic)
472	save	%sp, -SA(MINFRAME), %sp
473	sethi	%hi(kstat_q_panic_msg), %o0
474	call	panic
475	  or	%o0, %lo(kstat_q_panic_msg), %o0
476	/*NOTREACHED*/
477	SET_SIZE(kstat_q_panic)
478
479#define	BRZPN	brz,pn
480#define	BRZPT	brz,pt
481
482#define	KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
483	ld	[%o0 + QTYPE##CNT], %o1;	/* %o1 = old qlen */	\
484	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
485	QBR	%o1, QZERO;			/* done if qlen == 0 */	\
486	st	%o2, [%o0 + QTYPE##CNT];	/* delay: save qlen */	\
487	ldx	[%o0 + QTYPE##LASTUPDATE], %o3;			\
488	ldx	[%o0 + QTYPE##TIME], %o4;	/* %o4 = old time */	\
489	ldx	[%o0 + QTYPE##LENTIME], %o5;	/* %o5 = old lentime */	\
490	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
491	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
492	add	%o4, %o2, %o4;			/* %o4 = new time */	\
493	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
494	stx	%o4, [%o0 + QTYPE##TIME];	/* save time */		\
495	stx	%o5, [%o0 + QTYPE##LENTIME];	/* save lentime */	\
496QRETURN;								\
497	stx	%g1, [%o0 + QTYPE##LASTUPDATE]; /* lastupdate = now */
498
499#if !defined(DEBUG)
500/*
501 * same as KSTAT_Q_UPDATE but without:
502 * QBR     %o1, QZERO;
503 * to be used only with non-debug build. mimics ASSERT() behaviour.
504 */
505#define	KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
506	ld	[%o0 + QTYPE##CNT], %o1;	/* %o1 = old qlen */	\
507	QOP	%o1, 1, %o2;			/* %o2 = new qlen */	\
508	st	%o2, [%o0 + QTYPE##CNT];	/* delay: save qlen */	\
509	ldx	[%o0 + QTYPE##LASTUPDATE], %o3;			\
510	ldx	[%o0 + QTYPE##TIME], %o4;	/* %o4 = old time */	\
511	ldx	[%o0 + QTYPE##LENTIME], %o5;	/* %o5 = old lentime */	\
512	sub	%g1, %o3, %o2;			/* %o2 = time delta */	\
513	mulx	%o1, %o2, %o3;			/* %o3 = cur lentime */	\
514	add	%o4, %o2, %o4;			/* %o4 = new time */	\
515	add	%o5, %o3, %o5;			/* %o5 = new lentime */	\
516	stx	%o4, [%o0 + QTYPE##TIME];	/* save time */		\
517	stx	%o5, [%o0 + QTYPE##LENTIME];	/* save lentime */	\
518QRETURN;								\
519	stx	%g1, [%o0 + QTYPE##LASTUPDATE]; /* lastupdate = now */
520#endif
521
522	.align 16
523	ENTRY(kstat_waitq_enter)
524	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
525	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
526	SET_SIZE(kstat_waitq_enter)
527
528	.align 16
529	ENTRY(kstat_waitq_exit)
530	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
531#if defined(DEBUG)
532	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
533#else
534	KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W)
535#endif
536	SET_SIZE(kstat_waitq_exit)
537
538	.align 16
539	ENTRY(kstat_runq_enter)
540	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
541	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
542	SET_SIZE(kstat_runq_enter)
543
544	.align 16
545	ENTRY(kstat_runq_exit)
546	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
547#if defined(DEBUG)
548	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
549#else
550	KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R)
551#endif
552	SET_SIZE(kstat_runq_exit)
553
554	.align 16
555	ENTRY(kstat_waitq_to_runq)
556	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
557#if defined(DEBUG)
558	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
559#else
560	KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
561#endif
562	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
563	SET_SIZE(kstat_waitq_to_runq)
564
565	.align 16
566	ENTRY(kstat_runq_back_to_waitq)
567	GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
568#if defined(DEBUG)
569	KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
570#else
571	KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
572#endif
573	KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
574	SET_SIZE(kstat_runq_back_to_waitq)
575
576	/*
577	 *  -- WARNING --
578	 *
579	 * The following variables MUST be together on a 128-byte boundary.
580	 * In addition to the primary performance motivation (having them all
581	 * on the same cache line(s)), code here and in the GET*TIME() macros
582	 * assumes that they all have the same high 22 address bits (so
583	 * there's only one sethi).
584	 */
585	.seg	".data"
586	.global	timedelta, hres_last_tick, hrestime, hrestime_adj
587	.global	hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
588	.global	nsec_shift, adj_shift, native_tick_offset, native_stick_offset
589
590	/* XXX - above comment claims 128-bytes is necessary */
591	.align	64
592timedelta:
593	.word	0, 0		/* int64_t */
594hres_last_tick:
595	.word	0, 0		/* hrtime_t */
596hrestime:
597	.nword	0, 0		/* 2 longs */
598hrestime_adj:
599	.word	0, 0		/* int64_t */
600hres_lock:
601	.word	0
602nsec_scale:
603	.word	0
604hrtime_base:
605	.word	0, 0
606traptrace_use_stick:
607	.word	0
608nsec_shift:
609	.word	NSEC_SHIFT
610adj_shift:
611	.word	ADJ_SHIFT
612	.align	8
613native_tick_offset:
614	.word	0, 0
615	.align	8
616native_stick_offset:
617	.word	0, 0
618
619
620/*
621 * drv_usecwait(clock_t n)	[DDI/DKI - section 9F]
622 * usec_delay(int n)		[compatibility - should go one day]
623 * Delay by spinning.
624 *
625 * delay for n microseconds.  numbers <= 0 delay 1 usec
626 *
627 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
628 * and variable clock rate for power management requires that we
629 * use %stick to implement this routine.
630 */
631
632	ENTRY(drv_usecwait)
633	ALTENTRY(usec_delay)
634	brlez,a,pn %o0, 0f
635	  mov	1, %o0
6360:
637	sethi	%hi(sticks_per_usec), %o1
638	lduw	[%o1 + %lo(sticks_per_usec)], %o1
639	mulx	%o1, %o0, %o1		! Scale usec to ticks
640	inc	%o1			! We don't start on a tick edge
641	GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
642	add	%o1, %o2, %o1
643
6441:	cmp	%o1, %o2
645	GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
646	bgeu,pt	%xcc, 1b
647	  nop
648	retl
649	  nop
650	SET_SIZE(usec_delay)
651	SET_SIZE(drv_usecwait)
652
653/*
654 * Level-14 interrupt prologue.
655 */
656	ENTRY_NP(pil14_interrupt)
657	CPU_ADDR(%g1, %g2)
658	rdpr	%pil, %g6			! %g6 = interrupted PIL
659	stn	%g6, [%g1 + CPU_PROFILE_PIL]	! record interrupted PIL
660	rdpr	%tstate, %g6
661	rdpr	%tpc, %g5
662	btst	TSTATE_PRIV, %g6		! trap from supervisor mode?
663	bnz,a,pt %xcc, 1f
664	  stn	%g5, [%g1 + CPU_PROFILE_PC]	! if so, record kernel PC
665	stn	%g5, [%g1 + CPU_PROFILE_UPC]	! if not, record user PC
666	ba	pil_interrupt_common		! must be large-disp branch
667	  stn	%g0, [%g1 + CPU_PROFILE_PC]	! zero kernel PC
6681:	ba	pil_interrupt_common		! must be large-disp branch
669	  stn	%g0, [%g1 + CPU_PROFILE_UPC]	! zero user PC
670	SET_SIZE(pil14_interrupt)
671
672	ENTRY_NP(tick_rtt)
673	!
674	! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
675	! disabled.  If TICK_COMPARE is enabled, we know that we need to
676	! reenqueue the interrupt request structure.  We'll then check TICKINT
677	! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
678	! interrupt.  In this case, TICK_COMPARE may have been rewritten
679	! recently; we'll compare %o5 to the current time to verify that it's
680	! in the future.
681	!
682	! Note that %o5 is live until after 1f.
683	! XXX - there is a subroutine call while %o5 is live!
684	!
685	RD_TICKCMPR(%o5,%g1,%g2,__LINE__)
686	srlx	%o5, TICKINT_DIS_SHFT, %g1
687	brnz,pt	%g1, 2f
688	  nop
689
690	rdpr 	%pstate, %g5
691	andn	%g5, PSTATE_IE, %g1
692	wrpr	%g0, %g1, %pstate		! Disable vec interrupts
693
694	sethi	%hi(cbe_level14_inum), %o1
695	ldx	[%o1 + %lo(cbe_level14_inum)], %o1
696	call	intr_enqueue_req ! preserves %o5 and %g5
697	  mov	PIL_14, %o0
698
699	! Check SOFTINT for TICKINT/STICKINT
700	rd	SOFTINT, %o4
701	set	(TICK_INT_MASK | STICK_INT_MASK), %o0
702	andcc	%o4, %o0, %g0
703	bz,a,pn	%icc, 2f
704	  wrpr	%g0, %g5, %pstate		! Enable vec interrupts
705
706	! clear TICKINT/STICKINT
707	wr	%o0, CLEAR_SOFTINT
708
709	!
710	! Now that we've cleared TICKINT, we can reread %tick and confirm
711	! that the value we programmed is still in the future.  If it isn't,
712	! we need to reprogram TICK_COMPARE to fire as soon as possible.
713	!
714	GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)	! %o0 = tick
715	cmp	%o5, %o0			! In the future?
716	bg,a,pt	%xcc, 2f			! Yes, drive on.
717	  wrpr	%g0, %g5, %pstate		!   delay: enable vec intr
718
719	!
720	! If we're here, then we have programmed TICK_COMPARE with a %tick
721	! which is in the past; we'll now load an initial step size, and loop
722	! until we've managed to program TICK_COMPARE to fire in the future.
723	!
724	mov	8, %o4				! 8 = arbitrary inital step
7251:	add	%o0, %o4, %o5			! Add the step
726	WR_TICKCMPR(%o5,%g1,%g2,__LINE__)	! Write to TICK_CMPR
727	GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)	! %o0 = tick
728	cmp	%o5, %o0			! In the future?
729	bg,a,pt	%xcc, 2f			! Yes, drive on.
730	  wrpr	%g0, %g5, %pstate		!    delay: enable vec intr
731	ba	1b				! No, try again.
732	  sllx	%o4, 1, %o4			!    delay: double step size
733
7342:	ba	current_thread_complete
735	  nop
736	SET_SIZE(tick_rtt)
737
738/*
739 * Level-15 interrupt prologue.
740 */
741       ENTRY_NP(pil15_interrupt)
742       CPU_ADDR(%g1, %g2)
743       rdpr    %tstate, %g6
744       rdpr    %tpc, %g5
745       btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
746       bnz,a,pt %xcc, 1f
747       stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
748       stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
749       ba      pil15_epilogue                  ! must be large-disp branch
750       stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
7511:     ba      pil15_epilogue                  ! must be large-disp branch
752       stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
753       SET_SIZE(pil15_interrupt)
754
755/*
756 * Prefetch a page_t for write or read, this assumes a linear
757 * scan of sequential page_t's.
758 */
759/* XXXQ These should be inline templates, not functions */
760        ENTRY(prefetch_page_w)
761        retl
762	  nop
763        SET_SIZE(prefetch_page_w)
764
765        ENTRY(prefetch_page_r)
766        retl
767	  nop
768        SET_SIZE(prefetch_page_r)
769
770/*
771 * Prefetch struct smap for write.
772 */
773/* XXXQ These should be inline templates, not functions */
774	ENTRY(prefetch_smap_w)
775	retl
776	  nop
777	SET_SIZE(prefetch_smap_w)
778
779/*
780 * Generic sun4v MMU and Cache operations.
781 */
782
783	ENTRY_NP(vtag_flushpage)
784	/*
785	 * flush page from the tlb
786	 *
787	 * %o0 = vaddr
788	 * %o1 = sfmmup
789	 */
790	SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
791
792	mov	%g1, %o1
793	mov	MAP_ITLB | MAP_DTLB, %o2
794	ta	MMU_UNMAP_ADDR
795	brz,pt	%o0, 1f
796	  nop
797	ba	panic_bad_hcall
798	  mov	MMU_UNMAP_ADDR, %o1
7991:
800 	retl
801	  nop
802	SET_SIZE(vtag_flushpage)
803
804	ENTRY_NP(vtag_flushall)
805	mov	%g0, %o0	! XXX no cpu list yet
806	mov	%g0, %o1	! XXX no cpu list yet
807	mov	MAP_ITLB | MAP_DTLB, %o2
808	mov	MMU_DEMAP_ALL, %o5
809	ta	FAST_TRAP
810	brz,pt	%o0, 1f
811	  nop
812	ba	panic_bad_hcall
813	  mov	MMU_DEMAP_ALL, %o1
8141:
815	retl
816	  nop
817	SET_SIZE(vtag_flushall)
818
819	ENTRY_NP(vtag_unmap_perm_tl1)
820	/*
821	 * x-trap to unmap perm map entry
822	 * %g1 = vaddr
823	 * %g2 = ctxnum (KCONTEXT only)
824	 */
825	mov	%o0, %g3
826	mov	%o1, %g4
827	mov	%o2, %g5
828	mov	%o5, %g6
829	mov	%g1, %o0
830	mov	%g2, %o1
831	mov	MAP_ITLB | MAP_DTLB, %o2
832	mov	UNMAP_PERM_ADDR, %o5
833	ta	FAST_TRAP
834	brz,pt	%o0, 1f
835	nop
836
837	mov	PTL1_BAD_HCALL, %g1
838
839	cmp	%o0, H_ENOMAP
840	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
841
842	cmp	%o0, H_EINVAL
843	move	%xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
844
845	ba,a	ptl1_panic
8461:
847	mov	%g6, %o5
848	mov	%g5, %o2
849	mov	%g4, %o1
850	mov	%g3, %o0
851	retry
852	SET_SIZE(vtag_unmap_perm_tl1)
853
854	ENTRY_NP(vtag_flushpage_tl1)
855	/*
856	 * x-trap to flush page from tlb and tsb
857	 *
858	 * %g1 = vaddr, zero-extended on 32-bit kernel
859	 * %g2 = sfmmup
860	 *
861	 * assumes TSBE_TAG = 0
862	 */
863	srln	%g1, MMU_PAGESHIFT, %g1
864	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
865	mov	%o0, %g3
866	mov	%o1, %g4
867	mov	%o2, %g5
868	mov	%g1, %o0			/* vaddr */
869
870	SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
871
872	mov	MAP_ITLB | MAP_DTLB, %o2
873	ta	MMU_UNMAP_ADDR
874	brz,pt	%o0, 1f
875	nop
876	  ba	ptl1_panic
877	mov	PTL1_BAD_HCALL, %g1
8781:
879	mov	%g5, %o2
880	mov	%g4, %o1
881	mov	%g3, %o0
882	membar #Sync
883	retry
884	SET_SIZE(vtag_flushpage_tl1)
885
886	ENTRY_NP(vtag_flush_pgcnt_tl1)
887	/*
888	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
889	 *
890	 * %g1 = vaddr, zero-extended on 32-bit kernel
891	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
892	 *
893	 * NOTE: this handler relies on the fact that no
894	 *	interrupts or traps can occur during the loop
895	 *	issuing the TLB_DEMAP operations. It is assumed
896	 *	that interrupts are disabled and this code is
897	 *	fetching from the kernel locked text address.
898	 *
899	 * assumes TSBE_TAG = 0
900	 */
901	srln	%g1, MMU_PAGESHIFT, %g1
902	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
903	mov	%o0, %g3
904	mov	%o1, %g4
905	mov	%o2, %g5
906
907	and	%g2, SFMMU_PGCNT_MASK, %g7	/* g7 = pgcnt - 1 */
908	add	%g7, 1, %g7			/* g7 = pgcnt */
909
910        andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
911
912	SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
913
914	set	MMU_PAGESIZE, %g6		/* g6 = pgsize */
915
9161:
917	mov	%g1, %o0			/* vaddr */
918	mov	%g2, %o1			/* cnum */
919	mov	MAP_ITLB | MAP_DTLB, %o2
920	ta	MMU_UNMAP_ADDR
921	brz,pt	%o0, 2f
922	  nop
923	ba	ptl1_panic
924	  mov	PTL1_BAD_HCALL, %g1
9252:
926	deccc	%g7				/* decr pgcnt */
927	bnz,pt	%icc,1b
928	  add	%g1, %g6, %g1			/* go to nextpage */
929
930	mov	%g5, %o2
931	mov	%g4, %o1
932	mov	%g3, %o0
933	membar #Sync
934	retry
935	SET_SIZE(vtag_flush_pgcnt_tl1)
936
937	! Not implemented on US1/US2
938	ENTRY_NP(vtag_flushall_tl1)
939	mov	%o0, %g3
940	mov	%o1, %g4
941	mov	%o2, %g5
942	mov	%o3, %g6	! XXXQ not used?
943	mov	%o5, %g7
944	mov	%g0, %o0	! XXX no cpu list yet
945	mov	%g0, %o1	! XXX no cpu list yet
946	mov	MAP_ITLB | MAP_DTLB, %o2
947	mov	MMU_DEMAP_ALL, %o5
948	ta	FAST_TRAP
949	brz,pt	%o0, 1f
950	  nop
951	ba	ptl1_panic
952	  mov	PTL1_BAD_HCALL, %g1
9531:
954	mov	%g7, %o5
955	mov	%g6, %o3	! XXXQ not used?
956	mov	%g5, %o2
957	mov	%g4, %o1
958	mov	%g3, %o0
959	retry
960	SET_SIZE(vtag_flushall_tl1)
961
962/*
963 * flush_instr_mem:
964 *	Flush a portion of the I-$ starting at vaddr
965 * 	%o0 vaddr
966 *	%o1 bytes to be flushed
967 */
968
969	ENTRY(flush_instr_mem)
970	membar	#StoreStore				! Ensure the stores
971							! are globally visible
9721:
973	flush	%o0
974	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
975	bgu,pt	%ncc, 1b
976	  add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
977
978	retl
979	  nop
980	SET_SIZE(flush_instr_mem)
981
982#if !defined(CUSTOM_FPZERO)
983
984/*
985 * fp_zero() - clear all fp data registers and the fsr
986 */
987
988.global	fp_zero_zero
989.align 8
990fp_zero_zero:
991	.xword	0
992
993	ENTRY_NP(fp_zero)
994	sethi	%hi(fp_zero_zero), %o0
995	ldx	[%o0 + %lo(fp_zero_zero)], %fsr
996	ldd	[%o0 + %lo(fp_zero_zero)], %f0
997	fmovd	%f0, %f2
998	fmovd	%f0, %f4
999	fmovd	%f0, %f6
1000	fmovd	%f0, %f8
1001	fmovd	%f0, %f10
1002	fmovd	%f0, %f12
1003	fmovd	%f0, %f14
1004	fmovd	%f0, %f16
1005	fmovd	%f0, %f18
1006	fmovd	%f0, %f20
1007	fmovd	%f0, %f22
1008	fmovd	%f0, %f24
1009	fmovd	%f0, %f26
1010	fmovd	%f0, %f28
1011	fmovd	%f0, %f30
1012	fmovd	%f0, %f32
1013	fmovd	%f0, %f34
1014	fmovd	%f0, %f36
1015	fmovd	%f0, %f38
1016	fmovd	%f0, %f40
1017	fmovd	%f0, %f42
1018	fmovd	%f0, %f44
1019	fmovd	%f0, %f46
1020	fmovd	%f0, %f48
1021	fmovd	%f0, %f50
1022	fmovd	%f0, %f52
1023	fmovd	%f0, %f54
1024	fmovd	%f0, %f56
1025	fmovd	%f0, %f58
1026	fmovd	%f0, %f60
1027	retl
1028	fmovd	%f0, %f62
1029	SET_SIZE(fp_zero)
1030
1031#endif  /* CUSTOM_FPZERO */
1032