xref: /titanic_52/usr/src/uts/sun4u/cpu/spitfire_asm.s (revision fd9cb95cbb2f626355a60efb9d02c5f0a33c10e6)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#if !defined(lint)
30#include "assym.h"
31#endif	/* lint */
32
33#include <sys/asm_linkage.h>
34#include <sys/mmu.h>
35#include <vm/hat_sfmmu.h>
36#include <sys/machparam.h>
37#include <sys/machcpuvar.h>
38#include <sys/machthread.h>
39#include <sys/privregs.h>
40#include <sys/asm_linkage.h>
41#include <sys/machasi.h>
42#include <sys/trap.h>
43#include <sys/spitregs.h>
44#include <sys/xc_impl.h>
45#include <sys/intreg.h>
46#include <sys/async.h>
47
48#ifdef TRAPTRACE
49#include <sys/traptrace.h>
50#endif /* TRAPTRACE */
51
52#ifndef	lint
53
54/* BEGIN CSTYLED */
55#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
56	ldxa	[%g0]ASI_LSU, tmp1					;\
57	btst	LSU_DC, tmp1		/* is dcache enabled? */	;\
58	bz,pn	%icc, 1f						;\
59	sethi	%hi(dcache_linesize), tmp1				;\
60	ld	[tmp1 + %lo(dcache_linesize)], tmp1			;\
61	sethi	%hi(dflush_type), tmp2					;\
62	ld	[tmp2 + %lo(dflush_type)], tmp2				;\
63	cmp	tmp2, FLUSHPAGE_TYPE					;\
64	be,pt	%icc, 2f						;\
65	sllx	arg1, SF_DC_VBIT_SHIFT, arg1	/* tag to compare */	;\
66	sethi	%hi(dcache_size), tmp3					;\
67	ld	[tmp3 + %lo(dcache_size)], tmp3				;\
68	cmp	tmp2, FLUSHMATCH_TYPE					;\
69	be,pt	%icc, 3f						;\
70	nop								;\
71	/*								\
72	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
73	 * tmp3 = cache size						\
74	 * tmp1 = cache line size					\
75	 */								\
76	sub	tmp3, tmp1, tmp2					;\
774:									\
78	stxa	%g0, [tmp2]ASI_DC_TAG					;\
79	membar	#Sync							;\
80	cmp	%g0, tmp2						;\
81	bne,pt	%icc, 4b						;\
82	sub	tmp2, tmp1, tmp2					;\
83	ba,pt	%icc, 1f						;\
84	nop								;\
85	/*								\
86	 * flushtype = FLUSHPAGE_TYPE					\
87	 * arg1 = tag to compare against				\
88	 * arg2 = virtual color						\
89	 * tmp1 = cache line size					\
90	 * tmp2 = tag from cache					\
91	 * tmp3 = counter						\
92	 */								\
932:									\
94	set	MMU_PAGESIZE, tmp3					;\
95	sllx	arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */	;\
96	sub	tmp3, tmp1, tmp3					;\
974:									\
98	ldxa	[arg2 + tmp3]ASI_DC_TAG, tmp2	/* read tag */		;\
99	btst	SF_DC_VBIT_MASK, tmp2					;\
100	bz,pn	%icc, 5f	  /* branch if no valid sub-blocks */	;\
101	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
102	cmp	tmp2, arg1						;\
103	bne,pn	%icc, 5f			/* br if tag miss */	;\
104	nop								;\
105	stxa	%g0, [arg2 + tmp3]ASI_DC_TAG				;\
106	membar	#Sync							;\
1075:									\
108	cmp	%g0, tmp3						;\
109	bnz,pt	%icc, 4b		/* branch if not done */	;\
110	sub	tmp3, tmp1, tmp3					;\
111	ba,pt	%icc, 1f						;\
112	nop								;\
113	/*								\
114	 * flushtype = FLUSHMATCH_TYPE					\
115	 * arg1 = tag to compare against				\
116	 * tmp1 = cache line size					\
117	 * tmp3 = cache size						\
118	 * arg2 = counter						\
119	 * tmp2 = cache tag						\
120	 */								\
1213:									\
122	sub	tmp3, tmp1, arg2					;\
1234:									\
124	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
125	btst	SF_DC_VBIT_MASK, tmp2					;\
126	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
127	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
128	cmp	tmp2, arg1						;\
129	bne,pn	%icc, 5f		/* branch if tag miss */	;\
130	nop								;\
131	stxa	%g0, [arg2]ASI_DC_TAG					;\
132	membar	#Sync							;\
1335:									\
134	cmp	%g0, arg2						;\
135	bne,pt	%icc, 4b		/* branch if not done */	;\
136	sub	arg2, tmp1, arg2					;\
1371:
138
139/*
140 * macro that flushes the entire dcache color
141 */
142#define	DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)				\
143	ldxa	[%g0]ASI_LSU, tmp1;					\
144	btst	LSU_DC, tmp1;		/* is dcache enabled? */	\
145	bz,pn	%icc, 1f;						\
146	sethi	%hi(dcache_linesize), tmp1;				\
147	ld	[tmp1 + %lo(dcache_linesize)], tmp1;			\
148	set	MMU_PAGESIZE, tmp2;					\
149	/*								\
150	 * arg = virtual color						\
151	 * tmp2 = page size						\
152	 * tmp1 = cache line size					\
153	 */								\
154	sllx	arg, MMU_PAGESHIFT, arg; /* color to dcache page */	\
155	sub	tmp2, tmp1, tmp2;					\
1562:									\
157	stxa	%g0, [arg + tmp2]ASI_DC_TAG;				\
158	membar	#Sync;							\
159	cmp	%g0, tmp2;						\
160	bne,pt	%icc, 2b;						\
161	sub	tmp2, tmp1, tmp2;					\
1621:
163
164/*
165 * macro that flushes the entire dcache
166 */
167#define	DCACHE_FLUSHALL(size, linesize, tmp)				\
168	ldxa	[%g0]ASI_LSU, tmp;					\
169	btst	LSU_DC, tmp;		/* is dcache enabled? */	\
170	bz,pn	%icc, 1f;						\
171									\
172	sub	size, linesize, tmp;					\
1732:									\
174	stxa	%g0, [tmp]ASI_DC_TAG;					\
175	membar	#Sync;							\
176	cmp	%g0, tmp;						\
177	bne,pt	%icc, 2b;						\
178	sub	tmp, linesize, tmp;					\
1791:
180
181/*
182 * macro that flushes the entire icache
183 */
184#define	ICACHE_FLUSHALL(size, linesize, tmp)				\
185	ldxa	[%g0]ASI_LSU, tmp;					\
186	btst	LSU_IC, tmp;						\
187	bz,pn	%icc, 1f;						\
188									\
189	sub	size, linesize, tmp;					\
1902:									\
191	stxa	%g0, [tmp]ASI_IC_TAG;					\
192	membar	#Sync;							\
193	cmp	%g0, tmp;						\
194	bne,pt	%icc, 2b;						\
195	sub	tmp, linesize, tmp;					\
1961:
197
198/*
199 * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
200 * ptr is in the machcpu structure.
201 * r_or_s:	Register or symbol off offset from 'cpu_private' ptr.
202 * scr1:	Scratch, ptr is returned in this register.
203 * scr2:	Scratch
204 */
205#define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)		\
206	CPU_ADDR(scr1, scr2);						\
207	ldn	[scr1 + CPU_PRIVATE], scr1; 				\
208	cmp	scr1, 0; 						\
209	be	label;							\
210	 nop; 								\
211	add	scr1, r_or_s, scr1;  					\
212
213#ifdef HUMMINGBIRD
214/*
215 * UltraSPARC-IIe processor supports both 4-way set associative and
216 * direct map E$. For performance reasons, we flush E$ by placing it
217 * in direct map mode for data load/store and restore the state after
218 * we are done flushing it. Keep interrupts off while flushing in this
219 * manner.
220 *
221 * We flush the entire ecache by starting at one end and loading each
222 * successive ecache line for the 2*ecache-size range. We have to repeat
223 * the flush operation to guarantee that the entire ecache has been
224 * flushed.
225 *
226 * For flushing a specific physical address, we start at the aliased
227 * address and load at set-size stride, wrapping around at 2*ecache-size
228 * boundary and skipping the physical address being flushed. It takes
229 * 10 loads to guarantee that the physical address has been flushed.
230 */
231
232#define	HB_ECACHE_FLUSH_CNT	2
233#define	HB_PHYS_FLUSH_CNT	10	/* #loads to flush specific paddr */
234#endif /* HUMMINGBIRD */
235
236/* END CSTYLED */
237
238#endif	/* !lint */
239
240/*
241 * Spitfire MMU and Cache operations.
242 */
243
244#if defined(lint)
245
246/*ARGSUSED*/
247void
248vtag_flushpage(caddr_t vaddr, uint_t ctxnum)
249{}
250
251/*ARGSUSED*/
252void
253vtag_flushctx(uint_t ctxnum)
254{}
255
256/*ARGSUSED*/
257void
258vtag_flushall(void)
259{}
260
261/*ARGSUSED*/
262void
263vtag_flushpage_tl1(uint64_t vaddr, uint64_t ctxnum)
264{}
265
266/*ARGSUSED*/
267void
268vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t ctx_pgcnt)
269{}
270
271/*ARGSUSED*/
272void
273vtag_flushctx_tl1(uint64_t ctxnum, uint64_t dummy)
274{}
275
276/*ARGSUSED*/
277void
278vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
279{}
280
281/*ARGSUSED*/
282void
283vac_flushpage(pfn_t pfnum, int vcolor)
284{}
285
286/*ARGSUSED*/
287void
288vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
289{}
290
291/*ARGSUSED*/
292void
293init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
294{}
295
296/*ARGSUSED*/
297void
298init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
299{}
300
301/*ARGSUSED*/
302void
303flush_instr_mem(caddr_t vaddr, size_t len)
304{}
305
306/*ARGSUSED*/
307void
308flush_ecache(uint64_t physaddr, size_t size, size_t linesize)
309{}
310
311/*ARGSUSED*/
312void
313get_ecache_dtag(uint32_t ecache_idx, uint64_t *ecache_data,
314		uint64_t *ecache_tag, uint64_t *oafsr, uint64_t *acc_afsr)
315{}
316
317/* ARGSUSED */
318uint64_t
319get_ecache_tag(uint32_t id, uint64_t *nafsr, uint64_t *acc_afsr)
320{
321	return ((uint64_t)0);
322}
323
324/* ARGSUSED */
325uint64_t
326check_ecache_line(uint32_t id, uint64_t *acc_afsr)
327{
328	return ((uint64_t)0);
329}
330
331/*ARGSUSED*/
332void
333kdi_flush_idcache(int dcache_size, int dcache_lsize,
334    int icache_size, int icache_lsize)
335{}
336
337#else	/* lint */
338
339	ENTRY_NP(vtag_flushpage)
340	/*
341	 * flush page from the tlb
342	 *
343	 * %o0 = vaddr
344	 * %o1 = ctxnum
345	 */
346	rdpr	%pstate, %o5
347#ifdef DEBUG
348	andcc	%o5, PSTATE_IE, %g0		/* if interrupts already */
349	bnz,a,pt %icc, 3f			/* disabled, panic */
350	nop
351	save	%sp, -SA(MINFRAME), %sp
352	sethi	%hi(sfmmu_panic1), %o0
353	call	panic
354	  or	%o0, %lo(sfmmu_panic1), %o0
355	ret
356	restore
3573:
358#endif /* DEBUG */
359	/*
360	 * disable ints
361	 */
362	andn	%o5, PSTATE_IE, %o4
363	wrpr	%o4, 0, %pstate
364
365	/*
366	 * Then, blow out the tlb
367	 * Interrupts are disabled to prevent the secondary ctx register
368	 * from changing underneath us.
369	 */
370	brnz,pt	%o1, 1f			/* KCONTEXT? */
371	sethi	%hi(FLUSH_ADDR), %o3
372	/*
373	 * For KCONTEXT demaps use primary. type = page implicitly
374	 */
375	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
376	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
377	b	5f
378	  flush	%o3
3791:
380	/*
381	 * User demap.  We need to set the secondary context properly.
382	 * %o0 = vaddr
383	 * %o1 = ctxnum
384	 * %o3 = FLUSH_ADDR
385	 */
386	set	MMU_SCONTEXT, %o4
387	ldxa	[%o4]ASI_DMMU, %o2		/* rd old ctxnum */
388	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
389	cmp	%o2, %o1
390	be,a,pt	%icc, 4f
391	  nop
392	stxa	%o1, [%o4]ASI_DMMU		/* wr new ctxum */
3934:
394	stxa	%g0, [%o0]ASI_DTLB_DEMAP
395	stxa	%g0, [%o0]ASI_ITLB_DEMAP
396	flush	%o3
397	be,a,pt	%icc, 5f
398	  nop
399	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
400	flush	%o3
4015:
402	retl
403	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
404	SET_SIZE(vtag_flushpage)
405
406	ENTRY_NP(vtag_flushctx)
407	/*
408	 * flush context from the tlb
409	 *
410	 * %o0 = ctxnum
411	 * We disable interrupts to prevent the secondary ctx register changing
412	 * underneath us.
413	 */
414	sethi	%hi(FLUSH_ADDR), %o3
415	set	DEMAP_CTX_TYPE | DEMAP_SECOND, %g1
416	rdpr	%pstate, %o2
417
418#ifdef DEBUG
419	andcc	%o2, PSTATE_IE, %g0		/* if interrupts already */
420	bnz,a,pt %icc, 1f			/* disabled, panic	 */
421	  nop
422	sethi	%hi(sfmmu_panic1), %o0
423	call	panic
424	  or	%o0, %lo(sfmmu_panic1), %o0
4251:
426#endif /* DEBUG */
427
428	wrpr	%o2, PSTATE_IE, %pstate		/* disable interrupts */
429	set	MMU_SCONTEXT, %o4
430	ldxa	[%o4]ASI_DMMU, %o5		/* rd old ctxnum */
431	cmp	%o5, %o0
432	be,a,pt	%icc, 4f
433	  nop
434	stxa	%o0, [%o4]ASI_DMMU		/* wr new ctxum */
4354:
436	stxa	%g0, [%g1]ASI_DTLB_DEMAP
437	stxa	%g0, [%g1]ASI_ITLB_DEMAP
438	flush	%o3
439	be,a,pt	%icc, 5f
440	  nop
441	stxa	%o5, [%o4]ASI_DMMU		/* restore old ctxnum */
442	flush	%o3
4435:
444	retl
445	  wrpr	%g0, %o2, %pstate		/* enable interrupts */
446	SET_SIZE(vtag_flushctx)
447
448	.seg	".text"
449.flushallmsg:
450	.asciz	"sfmmu_asm: unimplemented flush operation"
451
452	ENTRY_NP(vtag_flushall)
453	sethi	%hi(.flushallmsg), %o0
454	call	panic
455	  or	%o0, %lo(.flushallmsg), %o0
456	SET_SIZE(vtag_flushall)
457
458	ENTRY_NP(vtag_flushpage_tl1)
459	/*
460	 * x-trap to flush page from tlb and tsb
461	 *
462	 * %g1 = vaddr, zero-extended on 32-bit kernel
463	 * %g2 = ctxnum
464	 *
465	 * assumes TSBE_TAG = 0
466	 */
467	srln	%g1, MMU_PAGESHIFT, %g1
468	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
469	/* We need to set the secondary context properly. */
470	set	MMU_SCONTEXT, %g4
471	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
472	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
473	stxa	%g2, [%g4]ASI_DMMU		/* wr new ctxum */
474	stxa	%g0, [%g1]ASI_DTLB_DEMAP
475	stxa	%g0, [%g1]ASI_ITLB_DEMAP
476	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
477	membar #Sync
478	retry
479	SET_SIZE(vtag_flushpage_tl1)
480
481	ENTRY_NP(vtag_flush_pgcnt_tl1)
482	/*
483	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
484	 *
485	 * %g1 = vaddr, zero-extended on 32-bit kernel
486	 * %g2 = <zero32|ctx16|pgcnt16>
487	 *
488	 * NOTE: this handler relies on the fact that no
489	 *	interrupts or traps can occur during the loop
490	 *	issuing the TLB_DEMAP operations. It is assumed
491	 *	that interrupts are disabled and this code is
492	 *	fetching from the kernel locked text address.
493	 *
494	 * assumes TSBE_TAG = 0
495	 */
496	srln	%g1, MMU_PAGESHIFT, %g1
497	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
498	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
499	set	0xffff, %g4
500	and	%g4, %g2, %g3			/* g3 = pgcnt */
501	srln	%g2, 16, %g2			/* g2 = ctxnum */
502	/* We need to set the secondary context properly. */
503	set	MMU_SCONTEXT, %g4
504	ldxa	[%g4]ASI_DMMU, %g5		/* read old ctxnum */
505	stxa	%g2, [%g4]ASI_DMMU		/* write new ctxum */
506
507	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
5081:
509	stxa	%g0, [%g1]ASI_DTLB_DEMAP
510	stxa	%g0, [%g1]ASI_ITLB_DEMAP
511	deccc	%g3				/* decr pgcnt */
512	bnz,pt	%icc,1b
513	add	%g1, %g2, %g1			/* go to nextpage */
514
515	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
516	membar #Sync
517	retry
518	SET_SIZE(vtag_flush_pgcnt_tl1)
519
520	ENTRY_NP(vtag_flushctx_tl1)
521	/*
522	 * x-trap to flush context from tlb
523	 *
524	 * %g1 = ctxnum
525	 */
526	set	DEMAP_CTX_TYPE | DEMAP_SECOND, %g4
527	set	MMU_SCONTEXT, %g3
528	ldxa	[%g3]ASI_DMMU, %g5		/* rd old ctxnum */
529	stxa	%g1, [%g3]ASI_DMMU		/* wr new ctxum */
530	stxa	%g0, [%g4]ASI_DTLB_DEMAP
531	stxa	%g0, [%g4]ASI_ITLB_DEMAP
532	stxa	%g5, [%g3]ASI_DMMU		/* restore old ctxnum */
533	membar #Sync
534	retry
535	SET_SIZE(vtag_flushctx_tl1)
536
537	! Not implemented on US1/US2
538	ENTRY_NP(vtag_flushall_tl1)
539	retry
540	SET_SIZE(vtag_flushall_tl1)
541
542/*
543 * vac_flushpage(pfnum, color)
544 *	Flush 1 8k page of the D-$ with physical page = pfnum
545 *	Algorithm:
546 *		The spitfire dcache is a 16k direct mapped virtual indexed,
547 *		physically tagged cache.  Given the pfnum we read all cache
548 *		lines for the corresponding page in the cache (determined by
549 *		the color).  Each cache line is compared with
550 *		the tag created from the pfnum. If the tags match we flush
551 *		the line.
552 */
553	.seg	".data"
554	.align	8
555	.global	dflush_type
556dflush_type:
557	.word	FLUSHPAGE_TYPE
558	.seg	".text"
559
560	ENTRY(vac_flushpage)
561	/*
562	 * flush page from the d$
563	 *
564	 * %o0 = pfnum, %o1 = color
565	 */
566	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
567	retl
568	nop
569	SET_SIZE(vac_flushpage)
570
571	ENTRY_NP(vac_flushpage_tl1)
572	/*
573	 * x-trap to flush page from the d$
574	 *
575	 * %g1 = pfnum, %g2 = color
576	 */
577	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
578	retry
579	SET_SIZE(vac_flushpage_tl1)
580
581	ENTRY(vac_flushcolor)
582	/*
583	 * %o0 = vcolor
584	 */
585	DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
586	retl
587	  nop
588	SET_SIZE(vac_flushcolor)
589
590	ENTRY(vac_flushcolor_tl1)
591	/*
592	 * %g1 = vcolor
593	 */
594	DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
595	retry
596	SET_SIZE(vac_flushcolor_tl1)
597
598
599	.global _dispatch_status_busy
600_dispatch_status_busy:
601	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
602	.align	4
603
604/*
605 * Determine whether or not the IDSR is busy.
606 * Entry: no arguments
607 * Returns: 1 if busy, 0 otherwise
608 */
609	ENTRY(idsr_busy)
610	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
611	clr	%o0
612	btst	IDSR_BUSY, %g1
613	bz,a,pt	%xcc, 1f
614	mov	1, %o0
6151:
616	retl
617	nop
618	SET_SIZE(idsr_busy)
619
620/*
621 * Setup interrupt dispatch data registers
622 * Entry:
623 *	%o0 - function or inumber to call
624 *	%o1, %o2 - arguments (2 uint64_t's)
625 */
626	.seg "text"
627
628	ENTRY(init_mondo)
629#ifdef DEBUG
630	!
631	! IDSR should not be busy at the moment
632	!
633	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
634	btst	IDSR_BUSY, %g1
635	bz,pt	%xcc, 1f
636	nop
637
638	sethi	%hi(_dispatch_status_busy), %o0
639	call	panic
640	or	%o0, %lo(_dispatch_status_busy), %o0
641#endif /* DEBUG */
642
643	ALTENTRY(init_mondo_nocheck)
644	!
645	! interrupt vector dispach data reg 0
646	!
6471:
648	mov	IDDR_0, %g1
649	mov	IDDR_1, %g2
650	mov	IDDR_2, %g3
651	stxa	%o0, [%g1]ASI_INTR_DISPATCH
652
653	!
654	! interrupt vector dispach data reg 1
655	!
656	stxa	%o1, [%g2]ASI_INTR_DISPATCH
657
658	!
659	! interrupt vector dispach data reg 2
660	!
661	stxa	%o2, [%g3]ASI_INTR_DISPATCH
662
663	retl
664	membar	#Sync			! allowed to be in the delay slot
665	SET_SIZE(init_mondo)
666
667/*
668 * Ship mondo to upaid
669 */
670	ENTRY_NP(shipit)
671	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = upa id
672	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
673	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
674#if defined(SF_ERRATA_54)
675	membar	#Sync				! store must occur before load
676	mov	0x20, %g3			! UDBH Control Register Read
677	ldxa	[%g3]ASI_SDB_INTR_R, %g0
678#endif
679	retl
680	membar	#Sync
681	SET_SIZE(shipit)
682
683
684/*
685 * flush_instr_mem:
686 *	Flush a portion of the I-$ starting at vaddr
687 * 	%o0 vaddr
688 *	%o1 bytes to be flushed
689 */
690
691	ENTRY(flush_instr_mem)
692	membar	#StoreStore				! Ensure the stores
693							! are globally visible
6941:
695	flush	%o0
696	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
697	bgu,pt	%ncc, 1b
698	add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
699
700	retl
701	nop
702	SET_SIZE(flush_instr_mem)
703
704/*
705 * flush_ecache:
706 * Flush the entire e$ using displacement flush by reading through a
707 * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
708 * reading this physical address range so that data doesn't go to d$.
709 * incoming arguments:
710 *	%o0 - 64 bit physical address
711 *	%o1 - size of address range to read
712 *	%o2 - ecache linesize
713 */
714	ENTRY(flush_ecache)
715#ifndef HUMMINGBIRD
716	b	2f
717	  nop
7181:
719	ldxa	[%o0 + %o1]ASI_MEM, %g0	! start reading from physaddr + size
7202:
721	subcc	%o1, %o2, %o1
722	bcc,a,pt %ncc, 1b
723	  nop
724
725#else /* HUMMINGBIRD */
726	/*
727	 * UltraSPARC-IIe processor supports both 4-way set associative
728	 * and direct map E$. For performance reasons, we flush E$ by
729	 * placing it in direct map mode for data load/store and restore
730	 * the state after we are done flushing it. It takes 2 iterations
731	 * to guarantee that the entire ecache has been flushed.
732	 *
733	 * Keep the interrupts disabled while flushing E$ in this manner.
734	 */
735	rdpr	%pstate, %g4		! current pstate (restored later)
736	andn	%g4, PSTATE_IE, %g5
737	wrpr	%g0, %g5, %pstate	! disable interrupts
738
739	! Place E$ in direct map mode for data access
740	or	%g0, 1, %g5
741	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
742	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
743	or	%g1, %g5, %g5
744	membar	#Sync
745	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
746	membar	#Sync
747
748	! flush entire ecache HB_ECACHE_FLUSH_CNT times
749	mov	HB_ECACHE_FLUSH_CNT-1, %g5
7502:
751	sub	%o1, %o2, %g3		! start from last entry
7521:
753	ldxa	[%o0 + %g3]ASI_MEM, %g0	! start reading from physaddr + size
754	subcc	%g3, %o2, %g3
755	bgeu,a,pt %ncc, 1b
756	  nop
757	brgz,a,pt %g5, 2b
758	  dec	%g5
759
760	membar	#Sync
761	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
762	membar	#Sync
763	wrpr	%g0, %g4, %pstate	! restore earlier pstate
764#endif /* HUMMINGBIRD */
765
766	retl
767	nop
768	SET_SIZE(flush_ecache)
769
770/*
771 * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
772 *			int icache_size, int icache_linesize)
773 */
774	ENTRY(kdi_flush_idcache)
775	DCACHE_FLUSHALL(%o0, %o1, %g1)
776	ICACHE_FLUSHALL(%o2, %o3, %g1)
777	membar	#Sync
778	retl
779	nop
780	SET_SIZE(kdi_flush_idcache)
781
782
783/*
784 * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
785 * 			uint64_t *oafsr, uint64_t *acc_afsr)
786 *
787 * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
788 * on a 64-byte boundary.  The corresponding AFSR value is also read for each
789 * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
790 * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
791 * data and tag should be atomic to make sense.  We will be executing at PIL15
792 * and will disable IE, so nothing can occur between the two reads.  We also
793 * assume that the execution of this code does not interfere with what we are
794 * reading - not really possible, but we'll live with it for now.
795 * We also pass the old AFSR value before clearing it, and caller will take
796 * appropriate actions if the important bits are non-zero.
797 *
798 * If the caller wishes to track the AFSR in cases where the CP bit is
799 * set, an address should be passed in for acc_afsr.  Otherwise, this
800 * argument may be null.
801 *
802 * Register Usage:
803 * i0: In: 32-bit e$ index
804 * i1: In: addr of e$ data
805 * i2: In: addr of e$ tag
806 * i3: In: addr of old afsr
807 * i4: In: addr of accumulated afsr - may be null
808 */
809	ENTRY(get_ecache_dtag)
810	save	%sp, -SA(MINFRAME), %sp
811	or	%g0, 1, %l4
812	sllx	%l4, 39, %l4	! set bit 39 for e$ data access
813	or	%i0, %l4, %g6	! %g6 = e$ addr for data read
814	sllx	%l4, 1, %l4	! set bit 40 for e$ tag access
815	or	%i0, %l4, %l4	! %l4 = e$ addr for tag read
816
817	rdpr    %pstate, %i5
818	andn    %i5, PSTATE_IE | PSTATE_AM, %i0
819	wrpr    %i0, %g0, %pstate       ! clear IE, AM bits
820
821	ldxa    [%g0]ASI_ESTATE_ERR, %g1
822	stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
823	membar  #Sync
824
825	ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
826	stx     %i0, [%i3]		! write back the old-afsr
827
828	ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
829	ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
830	stx     %i0, [%i2]              ! write back tag result
831
832	clr	%i2			! loop count
833
834	brz	%i4, 1f			! acc_afsr == NULL?
835	  ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
836	srlx	%i0, P_AFSR_CP_SHIFT, %l0
837	btst	1, %l0
838	bz	1f
839	  nop
840	ldx	[%i4], %g4
841	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
842	stx	%g4, [%i4]
8431:
844	stxa    %i0, [%g0]ASI_AFSR	! clear AFSR
845	membar  #Sync
846	ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
847	stx     %i0, [%i1]              ! save the E$data
848	add     %g6, 8, %g6
849	add     %i1, 8, %i1
850	ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
851	srlx	%i0, P_AFSR_CP_SHIFT, %l0
852	btst	1, %l0
853	bz	2f
854	  stx     %i0, [%i1]		! save the AFSR
855
856	brz	%i4, 2f			! acc_afsr == NULL?
857	  nop
858	ldx	[%i4], %g4
859	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
860	stx	%g4, [%i4]
8612:
862	add     %i2, 8, %i2
863	cmp     %i2, 64
864	bl,a    1b
865	  add     %i1, 8, %i1
866	stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
867	membar  #Sync
868	stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
869	membar  #Sync
870	wrpr    %g0, %i5, %pstate
871	ret
872	  restore
873	SET_SIZE(get_ecache_dtag)
874#endif /* lint */
875
876#if defined(lint)
877/*
878 * The ce_err function handles trap type 0x63 (corrected_ECC_error) at tl=0.
879 * Steps: 1. GET AFSR  2. Get AFAR <40:4> 3. Get datapath error status
880 *	  4. Clear datapath error bit(s) 5. Clear AFSR error bit
881 *	  6. package data in %g2 and %g3 7. call cpu_ce_error vis sys_trap
882 * %g2: [ 52:43 UDB lower | 42:33 UDB upper | 32:0 afsr ] - arg #3/arg #1
883 * %g3: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
884 */
885void
886ce_err(void)
887{}
888
889void
890ce_err_tl1(void)
891{}
892
893
894/*
895 * The async_err function handles trap types 0x0A (instruction_access_error)
896 * and 0x32 (data_access_error) at TL = 0 and TL > 0.  When we branch here,
897 * %g5 will have the trap type (with 0x200 set if we're at TL > 0).
898 *
899 * Steps: 1. Get AFSR 2. Get AFAR <40:4> 3. If not UE error skip UDP registers.
900 *	  4. Else get and clear datapath error bit(s) 4. Clear AFSR error bits
901 *	  6. package data in %g2 and %g3 7. disable all cpu errors, because
902 *	  trap is likely to be fatal 8. call cpu_async_error vis sys_trap
903 *
904 * %g3: [ 63:53 tt | 52:43 UDB_L | 42:33 UDB_U | 32:0 afsr ] - arg #3/arg #1
905 * %g2: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
906 */
907void
908async_err(void)
909{}
910
911/*
912 * The clr_datapath function clears any error bits set in the UDB regs.
913 */
914void
915clr_datapath(void)
916{}
917
918/*
919 * The get_udb_errors() function gets the current value of the
920 * Datapath Error Registers.
921 */
922/*ARGSUSED*/
923void
924get_udb_errors(uint64_t *udbh, uint64_t *udbl)
925{
926	*udbh = 0;
927	*udbl = 0;
928}
929
930#else 	/* lint */
931
932	ENTRY_NP(ce_err)
933	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
934
935	!
936	! Check for a UE... From Kevin.Normoyle:
937	! We try to switch to the trap for the UE, but since that's
938	! a hardware pipeline, we might get to the CE trap before we
939	! can switch. The UDB and AFSR registers will have both the
940	! UE and CE bits set but the UDB syndrome and the AFAR will be
941	! for the UE.
942	!
943	or	%g0, 1, %g1		! put 1 in g1
944	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
945	andcc	%g1, %g3, %g0		! check for UE in afsr
946	bnz	async_err		! handle the UE, not the CE
947	  or	%g0, 0x63, %g5		! pass along the CE ttype
948	!
949	! Disable further CE traps to avoid recursion (stack overflow)
950	! and staying above XCALL_PIL for extended periods.
951	!
952	ldxa	[%g0]ASI_ESTATE_ERR, %g2
953	andn	%g2, 0x1, %g2		! clear bit 0 - CEEN
954	stxa	%g2, [%g0]ASI_ESTATE_ERR
955	membar	#Sync			! required
956	!
957	! handle the CE
958	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
959
960	set	P_DER_H, %g4		! put P_DER_H in g4
961	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
962	or	%g0, 1, %g6		! put 1 in g6
963	sllx	%g6, 8, %g6		! shift g6 to <8> sdb CE
964	andcc	%g5, %g6, %g1		! check for CE in upper half
965	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
966	or	%g3, %g5, %g3		! or with afsr bits
967	bz,a	1f			! no error, goto 1f
968	  nop
969	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
970	membar	#Sync			! membar sync required
9711:
972	set	P_DER_L, %g4		! put P_DER_L in g4
973	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
974	andcc	%g5, %g6, %g1		! check for CE in lower half
975	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
976	or	%g3, %g5, %g3		! or with afsr bits
977	bz,a	2f			! no error, goto 2f
978	  nop
979	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
980	membar	#Sync			! membar sync required
9812:
982	or	%g0, 1, %g4		! put 1 in g4
983	sllx	%g4, 20, %g4		! shift left to <20> afsr CE
984	stxa	%g4, [%g0]ASI_AFSR	! use g4 to clear afsr CE error
985	membar	#Sync			! membar sync required
986
987	set	cpu_ce_error, %g1	! put *cpu_ce_error() in g1
988	rdpr	%pil, %g6		! read pil into %g6
989	subcc	%g6, PIL_15, %g0
990	  movneg	%icc, PIL_14, %g4 ! run at pil 14 unless already at 15
991	sethi	%hi(sys_trap), %g5
992	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
993	  movge	%icc, PIL_15, %g4	! already at pil 15
994	SET_SIZE(ce_err)
995
996	ENTRY_NP(ce_err_tl1)
997#ifndef	TRAPTRACE
998	ldxa	[%g0]ASI_AFSR, %g7
999	stxa	%g7, [%g0]ASI_AFSR
1000	membar	#Sync
1001	retry
1002#else
1003	set	ce_trap_tl1, %g1
1004	sethi	%hi(dis_err_panic1), %g4
1005	jmp	%g4 + %lo(dis_err_panic1)
1006	nop
1007#endif
1008	SET_SIZE(ce_err_tl1)
1009
1010#ifdef	TRAPTRACE
1011.celevel1msg:
1012	.asciz	"Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";
1013
1014	ENTRY_NP(ce_trap_tl1)
1015	! upper 32 bits of AFSR already in o3
1016	mov	%o4, %o0		! save AFAR upper 32 bits
1017	mov	%o2, %o4		! lower 32 bits of AFSR
1018	mov	%o1, %o2		! lower 32 bits of AFAR
1019	mov	%o0, %o1		! upper 32 bits of AFAR
1020	set	.celevel1msg, %o0
1021	call	panic
1022	nop
1023	SET_SIZE(ce_trap_tl1)
1024#endif
1025
1026	!
1027	! async_err is the assembly glue code to get us from the actual trap
1028	! into the CPU module's C error handler.  Note that we also branch
1029	! here from ce_err() above.
1030	!
1031	ENTRY_NP(async_err)
1032	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
1033	membar	#Sync			! membar sync required
1034
1035	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
1036	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
1037
1038	sllx	%g5, 53, %g5		! move ttype to <63:53>
1039	or	%g3, %g5, %g3		! or to afsr in g3
1040
1041	or	%g0, 1, %g1		! put 1 in g1
1042	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
1043	andcc	%g1, %g3, %g0		! check for UE in afsr
1044	bz,a,pn %icc, 2f		! if !UE skip sdb read/clear
1045	  nop
1046
1047	set	P_DER_H, %g4		! put P_DER_H in g4
1048	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
1049	or	%g0, 1, %g6		! put 1 in g6
1050	sllx	%g6, 9, %g6		! shift g6 to <9> sdb UE
1051	andcc	%g5, %g6, %g1		! check for UE in upper half
1052	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1053	or	%g3, %g5, %g3		! or with afsr bits
1054	bz,a	1f			! no error, goto 1f
1055	  nop
1056	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1057	membar	#Sync			! membar sync required
10581:
1059	set	P_DER_L, %g4		! put P_DER_L in g4
1060	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1061	andcc	%g5, %g6, %g1		! check for UE in lower half
1062	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1063	or	%g3, %g5, %g3		! or with afsr bits
1064	bz,a	2f			! no error, goto 2f
1065	  nop
1066	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1067	membar	#Sync			! membar sync required
10682:
1069	stxa	%g3, [%g0]ASI_AFSR	! clear all the sticky bits
1070	membar	#Sync			! membar sync required
1071
1072	set	cpu_async_error, %g1	! put cpu_async_error in g1
1073	sethi	%hi(sys_trap), %g5
1074	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1075	  or	%g0, PIL_15, %g4	! run at pil 15
1076	SET_SIZE(async_err)
1077
1078	ENTRY_NP(dis_err_panic1)
1079	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
1080	membar	#Sync
1081	! save destination routine is in g1
1082	ldxa	[%g0]ASI_AFAR, %g2	! read afar
1083	ldxa	[%g0]ASI_AFSR, %g3	! read afsr
1084	set	P_DER_H, %g4		! put P_DER_H in g4
1085	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1086	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1087	or	%g3, %g5, %g3		! or with afsr bits
1088	set	P_DER_L, %g4		! put P_DER_L in g4
1089	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1090	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1091	or	%g3, %g5, %g3		! or with afsr bits
1092	sethi	%hi(sys_trap), %g5
1093	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1094	  sub	%g0, 1, %g4
1095	SET_SIZE(dis_err_panic1)
1096
1097	ENTRY(clr_datapath)
1098	set	P_DER_H, %o4			! put P_DER_H in o4
1099	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb upper half into o3
1100	or	%g0, 0x3, %o2			! put 0x3 in o2
1101	sllx	%o2, 8, %o2			! shift o2 to <9:8> sdb
1102	andcc	%o5, %o2, %o1			! check for UE,CE in upper half
1103	bz,a	1f				! no error, goto 1f
1104	  nop
1105	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1106	membar	#Sync				! membar sync required
11071:
1108	set	P_DER_L, %o4			! put P_DER_L in o4
1109	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb lower half into o5
1110	andcc	%o5, %o2, %o1			! check for UE,CE in lower half
1111	bz,a	2f				! no error, goto 2f
1112	  nop
1113	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1114	membar	#Sync
11152:
1116	retl
1117	  nop
1118	SET_SIZE(clr_datapath)
1119
1120	ENTRY(get_udb_errors)
1121	set	P_DER_H, %o3
1122	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1123	stx	%o2, [%o0]
1124	set	P_DER_L, %o3
1125	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1126	retl
1127	  stx	%o2, [%o1]
1128	SET_SIZE(get_udb_errors)
1129
1130#endif /* lint */
1131
1132#if defined(lint)
1133/*
1134 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1135 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1136 * should only be used in places where you have no choice but to look at the
1137 * tlb itself.
1138 *
1139 * Note: These two routines are required by the Estar "cpr" loadable module.
1140 */
1141/*ARGSUSED*/
1142void
1143itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
1144{}
1145
1146/*ARGSUSED*/
1147void
1148dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
1149{}
1150#else 	/* lint */
1151/*
1152 * NB - In Spitfire cpus, when reading a tte from the hardware, we
1153 * need to clear [42-41] because the general definitions in pte.h
1154 * define the PA to be [42-13] whereas Spitfire really uses [40-13].
1155 * When cloning these routines for other cpus the "andn" below is not
1156 * necessary.
1157 */
1158	ENTRY_NP(itlb_rd_entry)
1159	sllx	%o0, 3, %o0
1160#if defined(SF_ERRATA_32)
1161	sethi	%hi(FLUSH_ADDR), %g2
1162	set	MMU_PCONTEXT, %g1
1163	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1164	flush	%g2
1165#endif
1166	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
1167	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1168	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1169	andn	%g1, %g2, %g1				! for details
1170	stx	%g1, [%o1]
1171	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
1172	set	TAGREAD_CTX_MASK, %o4
1173	andn	%g2, %o4, %o5
1174	retl
1175	  stx	%o5, [%o2]
1176	SET_SIZE(itlb_rd_entry)
1177
1178	ENTRY_NP(dtlb_rd_entry)
1179	sllx	%o0, 3, %o0
1180#if defined(SF_ERRATA_32)
1181	sethi	%hi(FLUSH_ADDR), %g2
1182	set	MMU_PCONTEXT, %g1
1183	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1184	flush	%g2
1185#endif
1186	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
1187	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1188	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1189	andn	%g1, %g2, %g1				! itlb_rd_entry
1190	stx	%g1, [%o1]
1191	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
1192	set	TAGREAD_CTX_MASK, %o4
1193	andn	%g2, %o4, %o5
1194	retl
1195	  stx	%o5, [%o2]
1196	SET_SIZE(dtlb_rd_entry)
1197#endif /* lint */
1198
1199#if defined(lint)
1200
1201/*
1202 * routines to get and set the LSU register
1203 */
1204uint64_t
1205get_lsu(void)
1206{
1207	return ((uint64_t)0);
1208}
1209
1210/*ARGSUSED*/
1211void
1212set_lsu(uint64_t lsu)
1213{}
1214
1215#else /* lint */
1216
1217	ENTRY(set_lsu)
1218	stxa	%o0, [%g0]ASI_LSU		! store to LSU
1219	retl
1220	membar	#Sync
1221	SET_SIZE(set_lsu)
1222
1223	ENTRY(get_lsu)
1224	retl
1225	ldxa	[%g0]ASI_LSU, %o0		! load LSU
1226	SET_SIZE(get_lsu)
1227
1228#endif /* lint */
1229
1230#ifndef lint
1231	/*
1232	 * Clear the NPT (non-privileged trap) bit in the %tick
1233	 * registers. In an effort to make the change in the
1234	 * tick counter as consistent as possible, we disable
1235	 * all interrupts while we're changing the registers. We also
1236	 * ensure that the read and write instructions are in the same
1237	 * line in the instruction cache.
1238	 */
1239	ENTRY_NP(cpu_clearticknpt)
1240	rdpr	%pstate, %g1		/* save processor state */
1241	andn	%g1, PSTATE_IE, %g3	/* turn off */
1242	wrpr	%g0, %g3, %pstate	/*   interrupts */
1243	rdpr	%tick, %g2		/* get tick register */
1244	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
1245	mov	1, %g3			/* create mask */
1246	sllx	%g3, 63, %g3		/*   for NPT bit */
1247	ba,a,pt	%xcc, 2f
1248	.align	64			/* Align to I$ boundary */
12492:
1250	rdpr	%tick, %g2		/* get tick register */
1251	wrpr	%g3, %g2, %tick		/* write tick register, */
1252					/*   clearing NPT bit   */
1253#if defined(BB_ERRATA_1)
1254	rdpr	%tick, %g0		/* read (s)tick (BB_ERRATA_1) */
1255#endif
12561:
1257	jmp	%g4 + 4
1258	wrpr	%g0, %g1, %pstate	/* restore processor state */
1259	SET_SIZE(cpu_clearticknpt)
1260
1261	/*
1262	 * get_ecache_tag()
1263	 * Register Usage:
1264	 * %o0: In: 32-bit E$ index
1265	 *      Out: 64-bit E$ tag value
1266	 * %o1: In: 64-bit AFSR value after clearing sticky bits
1267	 * %o2: In: address of cpu private afsr storage
1268	 */
1269	ENTRY(get_ecache_tag)
1270	or	%g0, 1, %o4
1271	sllx	%o4, 40, %o4			! set bit 40 for e$ tag access
1272	or	%o0, %o4, %o4			! %o4 = e$ addr for tag read
1273	rdpr	%pstate, %o5
1274	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1275	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1276
1277	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1278	stxa	%g0, [%g0]ASI_ESTATE_ERR	! Turn off Error enable
1279	membar	#Sync
1280
1281	ldxa	[%g0]ASI_AFSR, %o0
1282	srlx	%o0, P_AFSR_CP_SHIFT, %o3
1283	btst	1, %o3
1284	bz	1f
1285	  nop
1286	ldx	[%o2], %g4
1287	or	%g4, %o0, %g4			! aggregate AFSR in cpu private
1288	stx	%g4, [%o2]
12891:
1290	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1291	membar  #Sync
1292
1293	ldxa	[%o4]ASI_EC_R, %g0
1294	ldxa	[%g0]ASI_EC_DIAG, %o0		! read tag from e$ tag reg
1295
1296	ldxa	[%g0]ASI_AFSR, %o3
1297	srlx	%o3, P_AFSR_CP_SHIFT, %o4
1298	btst	1, %o4
1299	bz	2f
1300	  stx	%o3, [%o1]			! AFSR after sticky clear
1301	ldx	[%o2], %g4
1302	or	%g4, %o3, %g4			! aggregate AFSR in cpu private
1303	stx	%g4, [%o2]
13042:
1305	membar	#Sync
1306
1307	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1308	membar	#Sync
1309	retl
1310	wrpr	%g0, %o5, %pstate
1311	SET_SIZE(get_ecache_tag)
1312
1313	/*
1314	 * check_ecache_line()
1315	 * Register Usage:
1316	 * %o0: In: 32-bit E$ index
1317	 *      Out: 64-bit accumulated AFSR
1318	 * %o1: In: address of cpu private afsr storage
1319	 */
1320	ENTRY(check_ecache_line)
1321	or	%g0, 1, %o4
1322	sllx	%o4, 39, %o4			! set bit 39 for e$ data access
1323	or	%o0, %o4, %o4		 	! %o4 = e$ addr for data read
1324
1325	rdpr	%pstate, %o5
1326	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1327	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1328
1329	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1330	stxa	%g0, [%g0]ASI_ESTATE_ERR 	! Turn off Error enable
1331	membar	#Sync
1332
1333	ldxa 	[%g0]ASI_AFSR, %o0
1334	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1335	btst	1, %o2
1336	bz	1f
1337	  clr	%o2				! loop count
1338	ldx	[%o1], %o3
1339	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1340	stx	%o3, [%o1]
13411:
1342	stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1343	membar	#Sync
1344
13452:
1346	ldxa	[%o4]ASI_EC_R, %g0		! Read the E$ data 8bytes each
1347	add	%o2, 1, %o2
1348	cmp	%o2, 8
1349	bl,a 	2b
1350	  add	%o4, 8, %o4
1351
1352	membar	#Sync
1353	ldxa	[%g0]ASI_AFSR, %o0		! read accumulated AFSR
1354	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1355	btst	1, %o2
1356	bz	3f
1357	  nop
1358	ldx	[%o1], %o3
1359	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1360	stx	%o3, [%o1]
13613:
1362	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1363	membar	#Sync
1364	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1365	membar	#Sync
1366	retl
1367	wrpr	%g0, %o5, %pstate
1368	SET_SIZE(check_ecache_line)
1369#endif /* lint */
1370
1371#if defined(lint)
1372uint64_t
1373read_and_clear_afsr()
1374{
1375	return ((uint64_t)0);
1376}
1377#else	/* lint */
1378	ENTRY(read_and_clear_afsr)
1379	ldxa	[%g0]ASI_AFSR, %o0
1380	retl
1381	  stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1382	SET_SIZE(read_and_clear_afsr)
1383#endif	/* lint */
1384
1385#if defined(lint)
1386/* ARGSUSED */
1387void
1388scrubphys(uint64_t paddr, int ecache_size)
1389{
1390}
1391
1392#else	/* lint */
1393
1394/*
1395 * scrubphys - Pass in the aligned physical memory address that you want
1396 * to scrub, along with the ecache size.
1397 *
1398 *	1) Displacement flush the E$ line corresponding to %addr.
1399 *	   The first ldxa guarantees that the %addr is no longer in
1400 *	   M, O, or E (goes to I or S (if instruction fetch also happens).
1401 *	2) "Write" the data using a CAS %addr,%g0,%g0.
1402 *	   The casxa guarantees a transition from I to M or S to M.
1403 *	3) Displacement flush the E$ line corresponding to %addr.
1404 *	   The second ldxa pushes the M line out of the ecache, into the
1405 *	   writeback buffers, on the way to memory.
1406 *	4) The "membar #Sync" pushes the cache line out of the writeback
1407 *	   buffers onto the bus, on the way to dram finally.
1408 *
1409 * This is a modified version of the algorithm suggested by Gary Lauterbach.
1410 * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
1411 * as modified, but then we found out that for spitfire, if it misses in the
1412 * E$ it will probably install as an M, but if it hits in the E$, then it
1413 * will stay E, if the store doesn't happen. So the first displacement flush
1414 * should ensure that the CAS will miss in the E$.  Arrgh.
1415 */
1416
1417	ENTRY(scrubphys)
1418	or	%o1, %g0, %o2	! put ecache size in %o2
1419#ifndef HUMMINGBIRD
1420	xor	%o0, %o2, %o1	! calculate alias address
1421	add	%o2, %o2, %o3	! 2 * ecachesize in case
1422				! addr == ecache_flushaddr
1423	sub	%o3, 1, %o3	! -1 == mask
1424	and	%o1, %o3, %o1	! and with xor'd address
1425	set	ecache_flushaddr, %o3
1426	ldx	[%o3], %o3
1427
1428	rdpr	%pstate, %o4
1429	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1430	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1431
1432	ldxa	[%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1433	casxa	[%o0]ASI_MEM, %g0, %g0
1434	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1435
1436#else /* HUMMINGBIRD */
1437	/*
1438	 * UltraSPARC-IIe processor supports both 4-way set associative
1439	 * and direct map E$. We need to reconfigure E$ to direct map
1440	 * mode for data load/store before displacement flush. Also, we
1441	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1442	 * has been flushed. Keep the interrupts disabled while flushing
1443	 * E$ in this manner.
1444	 *
1445	 * For flushing a specific physical address, we start at the
1446	 * aliased address and load at set-size stride, wrapping around
1447	 * at 2*ecache-size boundary and skipping fault physical address.
1448	 * It takes 10 loads to guarantee that the physical address has
1449	 * been flushed.
1450	 *
1451	 * Usage:
1452	 *	%o0	physaddr
1453	 *	%o5	physaddr - ecache_flushaddr
1454	 *	%g1	UPA config (restored later)
1455	 *	%g2	E$ set size
1456	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1457	 *	%g4	#loads to flush phys address
1458	 *	%g5	temp
1459	 */
1460
1461	sethi	%hi(ecache_associativity), %g5
1462	ld	[%g5 + %lo(ecache_associativity)], %g5
1463	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1464	xor	%o0, %o2, %o1	! calculate alias address
1465	add	%o2, %o2, %g3	! 2 * ecachesize in case
1466				! addr == ecache_flushaddr
1467	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1468	and	%o1, %g3, %o1	! and with xor'd address
1469	sethi	%hi(ecache_flushaddr), %o3
1470	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1471
1472	rdpr	%pstate, %o4
1473	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1474	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1475
1476	! Place E$ in direct map mode for data access
1477	or	%g0, 1, %g5
1478	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1479	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1480	or	%g1, %g5, %g5
1481	membar	#Sync
1482	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1483	membar	#Sync
1484
1485	! Displace cache line from each set of E$ starting at the
1486	! aliased address. at set-size stride, wrapping at 2*ecache_size
1487	! and skipping load from physaddr. We need 10 loads to flush the
1488	! physaddr from E$.
1489	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1490	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1491	or	%o1, %g0, %g5		! starting aliased offset
14922:
1493	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
14941:
1495	add	%g5, %g2, %g5		! calculate offset in next set
1496	and	%g5, %g3, %g5		! force offset within aliased range
1497	cmp	%g5, %o5		! skip loads from physaddr
1498	be,pn %ncc, 1b
1499	  nop
1500	brgz,pt	%g4, 2b
1501	  dec	%g4
1502
1503	casxa	[%o0]ASI_MEM, %g0, %g0
1504
1505	! Flush %o0 from ecahe again.
1506	! Need single displacement flush at offset %o1 this time as
1507	! the E$ is already in direct map mode.
1508	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1509
1510	membar	#Sync
1511	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1512	membar	#Sync
1513#endif /* HUMMINGBIRD */
1514	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1515
1516	retl
1517	membar	#Sync			! move the data out of the load buffer
1518	SET_SIZE(scrubphys)
1519
1520#endif	/* lint */
1521
1522#if defined(lint)
1523
1524/*
1525 * clearphys - Pass in the aligned physical memory address that you want
1526 * to push out, as a 64 byte block of zeros, from the ecache zero-filled.
1527 * Since this routine does not bypass the ecache, it is possible that
1528 * it could generate a UE error while trying to clear the a bad line.
1529 * This routine clears and restores the error enable flag.
1530 * TBD - Hummingbird may need similar protection
1531 */
1532/* ARGSUSED */
1533void
1534clearphys(uint64_t paddr, int ecache_size, int ecache_linesize)
1535{
1536}
1537
1538#else	/* lint */
1539
1540	ENTRY(clearphys)
1541	or	%o2, %g0, %o3	! ecache linesize
1542	or	%o1, %g0, %o2	! ecache size
1543#ifndef HUMMINGBIRD
1544	or	%o3, %g0, %o4	! save ecache linesize
1545	xor	%o0, %o2, %o1	! calculate alias address
1546	add	%o2, %o2, %o3	! 2 * ecachesize
1547	sub	%o3, 1, %o3	! -1 == mask
1548	and	%o1, %o3, %o1	! and with xor'd address
1549	set	ecache_flushaddr, %o3
1550	ldx	[%o3], %o3
1551	or	%o4, %g0, %o2	! saved ecache linesize
1552
1553	rdpr	%pstate, %o4
1554	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1555	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1556
1557	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1558	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1559	membar	#Sync
1560
1561	! need to put zeros in the cache line before displacing it
1562
1563	sub	%o2, 8, %o2	! get offset of last double word in ecache line
15641:
1565	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1566	sub	%o2, 8, %o2
1567	brgez,a,pt %o2, 1b
1568	nop
1569	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1570	casxa	[%o0]ASI_MEM, %g0, %g0
1571	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1572
1573	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1574	membar	#Sync
1575
1576#else /* HUMMINGBIRD... */
1577	/*
1578	 * UltraSPARC-IIe processor supports both 4-way set associative
1579	 * and direct map E$. We need to reconfigure E$ to direct map
1580	 * mode for data load/store before displacement flush. Also, we
1581	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1582	 * has been flushed. Keep the interrupts disabled while flushing
1583	 * E$ in this manner.
1584	 *
1585	 * For flushing a specific physical address, we start at the
1586	 * aliased address and load at set-size stride, wrapping around
1587	 * at 2*ecache-size boundary and skipping fault physical address.
1588	 * It takes 10 loads to guarantee that the physical address has
1589	 * been flushed.
1590	 *
1591	 * Usage:
1592	 *	%o0	physaddr
1593	 *	%o5	physaddr - ecache_flushaddr
1594	 *	%g1	UPA config (restored later)
1595	 *	%g2	E$ set size
1596	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1597	 *	%g4	#loads to flush phys address
1598	 *	%g5	temp
1599	 */
1600
1601	or	%o3, %g0, %o4	! save ecache linesize
1602	sethi	%hi(ecache_associativity), %g5
1603	ld	[%g5 + %lo(ecache_associativity)], %g5
1604	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1605
1606	xor	%o0, %o2, %o1	! calculate alias address
1607	add	%o2, %o2, %g3	! 2 * ecachesize
1608	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1609	and	%o1, %g3, %o1	! and with xor'd address
1610	sethi	%hi(ecache_flushaddr), %o3
1611	ldx	[%o3 +%lo(ecache_flushaddr)], %o3
1612	or	%o4, %g0, %o2	! saved ecache linesize
1613
1614	rdpr	%pstate, %o4
1615	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1616	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1617
1618	! Place E$ in direct map mode for data access
1619	or	%g0, 1, %g5
1620	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1621	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1622	or	%g1, %g5, %g5
1623	membar	#Sync
1624	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1625	membar	#Sync
1626
1627	! need to put zeros in the cache line before displacing it
1628
1629	sub	%o2, 8, %o2	! get offset of last double word in ecache line
16301:
1631	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1632	sub	%o2, 8, %o2
1633	brgez,a,pt %o2, 1b
1634	nop
1635
1636	! Displace cache line from each set of E$ starting at the
1637	! aliased address. at set-size stride, wrapping at 2*ecache_size
1638	! and skipping load from physaddr. We need 10 loads to flush the
1639	! physaddr from E$.
1640	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1641	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1642	or	%o1, %g0, %g5		! starting offset
16432:
1644	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
16453:
1646	add	%g5, %g2, %g5		! calculate offset in next set
1647	and	%g5, %g3, %g5		! force offset within aliased range
1648	cmp	%g5, %o5		! skip loads from physaddr
1649	be,pn %ncc, 3b
1650	  nop
1651	brgz,pt	%g4, 2b
1652	  dec	%g4
1653
1654	casxa	[%o0]ASI_MEM, %g0, %g0
1655
1656	! Flush %o0 from ecahe again.
1657	! Need single displacement flush at offset %o1 this time as
1658	! the E$ is already in direct map mode.
1659	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1660
1661	membar	#Sync
1662	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1663	membar	#Sync
1664#endif /* HUMMINGBIRD... */
1665
1666	retl
1667	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1668	SET_SIZE(clearphys)
1669
1670#endif	/* lint */
1671
1672#if defined(lint)
1673/* ARGSUSED */
1674void
1675flushecacheline(uint64_t paddr, int ecache_size)
1676{
1677}
1678
1679#else	/* lint */
1680/*
1681 * flushecacheline - This is a simpler version of scrubphys
1682 * which simply does a displacement flush of the line in
1683 * question. This routine is mainly used in handling async
1684 * errors where we want to get rid of a bad line in ecache.
1685 * Note that if the line is modified and it has suffered
1686 * data corruption - we are guarantee that the hw will write
1687 * a UE back to mark the page poisoned.
1688 */
1689        ENTRY(flushecacheline)
1690        or      %o1, %g0, %o2   ! put ecache size in %o2
1691#ifndef HUMMINGBIRD
1692        xor     %o0, %o2, %o1   ! calculate alias address
1693        add     %o2, %o2, %o3   ! 2 * ecachesize in case
1694                                ! addr == ecache_flushaddr
1695        sub     %o3, 1, %o3     ! -1 == mask
1696        and     %o1, %o3, %o1   ! and with xor'd address
1697        set     ecache_flushaddr, %o3
1698        ldx     [%o3], %o3
1699
1700        rdpr    %pstate, %o4
1701        andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1702        wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1703
1704	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1705	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1706	membar	#Sync
1707
1708        ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1709	membar	#Sync
1710	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1711        membar  #Sync
1712#else /* HUMMINGBIRD */
1713	/*
1714	 * UltraSPARC-IIe processor supports both 4-way set associative
1715	 * and direct map E$. We need to reconfigure E$ to direct map
1716	 * mode for data load/store before displacement flush. Also, we
1717	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1718	 * has been flushed. Keep the interrupts disabled while flushing
1719	 * E$ in this manner.
1720	 *
1721	 * For flushing a specific physical address, we start at the
1722	 * aliased address and load at set-size stride, wrapping around
1723	 * at 2*ecache-size boundary and skipping fault physical address.
1724	 * It takes 10 loads to guarantee that the physical address has
1725	 * been flushed.
1726	 *
1727	 * Usage:
1728	 *	%o0	physaddr
1729	 *	%o5	physaddr - ecache_flushaddr
1730	 *	%g1	error enable register
1731	 *	%g2	E$ set size
1732	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1733	 *	%g4	UPA config (restored later)
1734	 *	%g5	temp
1735	 */
1736
1737	sethi	%hi(ecache_associativity), %g5
1738	ld	[%g5 + %lo(ecache_associativity)], %g5
1739	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1740	xor	%o0, %o2, %o1	! calculate alias address
1741	add	%o2, %o2, %g3	! 2 * ecachesize in case
1742				! addr == ecache_flushaddr
1743	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1744	and	%o1, %g3, %o1	! and with xor'd address
1745	sethi	%hi(ecache_flushaddr), %o3
1746	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1747
1748	rdpr	%pstate, %o4
1749	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1750	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1751
1752	! Place E$ in direct map mode for data access
1753	or	%g0, 1, %g5
1754	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1755	ldxa	[%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
1756	or	%g4, %g5, %g5
1757	membar	#Sync
1758	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1759	membar	#Sync
1760
1761	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1762	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1763	membar	#Sync
1764
1765	! Displace cache line from each set of E$ starting at the
1766	! aliased address. at set-size stride, wrapping at 2*ecache_size
1767	! and skipping load from physaddr. We need 10 loads to flush the
1768	! physaddr from E$.
1769	mov	HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
1770	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
17712:
1772	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
17733:
1774	add	%o1, %g2, %o1		! calculate offset in next set
1775	and	%o1, %g3, %o1		! force offset within aliased range
1776	cmp	%o1, %o5		! skip loads from physaddr
1777	be,pn %ncc, 3b
1778	  nop
1779	brgz,pt	%g5, 2b
1780	  dec	%g5
1781
1782	membar	#Sync
1783	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1784        membar  #Sync
1785
1786	stxa	%g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1787	membar	#Sync
1788#endif /* HUMMINGBIRD */
1789        retl
1790        wrpr    %g0, %o4, %pstate
1791        SET_SIZE(flushecacheline)
1792
1793#endif	/* lint */
1794
1795#if defined(lint)
1796/* ARGSUSED */
1797void
1798ecache_scrubreq_tl1(uint64_t inum, uint64_t dummy)
1799{
1800}
1801
1802#else	/* lint */
1803/*
1804 * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
1805 * from the clock CPU.  It atomically increments the outstanding request
1806 * counter and, if there was not already an outstanding request,
1807 * branches to setsoftint_tl1 to enqueue an intr_req for the given inum.
1808 */
1809
1810	! Register usage:
1811	!
1812	! Arguments:
1813	! %g1 - inum
1814	!
1815	! Internal:
1816	! %g2, %g3, %g5 - scratch
1817	! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
1818	! %g6 - setsoftint_tl1 address
1819
1820	ENTRY_NP(ecache_scrubreq_tl1)
1821	set	SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
1822	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
1823	ld	[%g4], %g2		! cpu's ec_scrub_outstanding.
1824	set	setsoftint_tl1, %g6
1825	!
1826	! no need to use atomic instructions for the following
1827	! increment - we're at tl1
1828	!
1829	add	%g2, 0x1, %g3
1830	brnz,pn	%g2, 1f			! no need to enqueue more intr_req
1831	  st	%g3, [%g4]		! delay - store incremented counter
1832	jmp	%g6			! setsoftint_tl1(%g1) - queue intr_req
1833	  nop
1834	! not reached
18351:
1836	retry
1837	SET_SIZE(ecache_scrubreq_tl1)
1838
1839#endif	/* lint */
1840
1841#if defined(lint)
1842/*ARGSUSED*/
1843void
1844write_ec_tag_parity(uint32_t id)
1845{}
1846#else /* lint */
1847
1848	/*
1849         * write_ec_tag_parity(), which zero's the ecache tag,
1850         * marks the state as invalid and writes good parity to the tag.
1851         * Input %o1= 32 bit E$ index
1852         */
1853        ENTRY(write_ec_tag_parity)
1854        or      %g0, 1, %o4
1855        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1856        or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write
1857
1858        rdpr    %pstate, %o5
1859        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1860        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1861
1862        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1863        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1864        membar  #Sync
1865
1866        ba      1f
1867         nop
1868	/*
1869         * Align on the ecache boundary in order to force
1870         * ciritical code section onto the same ecache line.
1871         */
1872         .align 64
1873
18741:
1875        set     S_EC_PARITY, %o3         	! clear tag, state invalid
1876        sllx    %o3, S_ECPAR_SHIFT, %o3   	! and with good tag parity
1877        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1878        stxa    %g0, [%o4]ASI_EC_W
1879        membar  #Sync
1880
1881        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1882        membar  #Sync
1883        retl
1884        wrpr    %g0, %o5, %pstate
1885        SET_SIZE(write_ec_tag_parity)
1886
1887#endif /* lint */
1888
1889#if defined(lint)
1890/*ARGSUSED*/
1891void
1892write_hb_ec_tag_parity(uint32_t id)
1893{}
1894#else /* lint */
1895
1896	/*
1897         * write_hb_ec_tag_parity(), which zero's the ecache tag,
1898         * marks the state as invalid and writes good parity to the tag.
1899         * Input %o1= 32 bit E$ index
1900         */
1901        ENTRY(write_hb_ec_tag_parity)
1902        or      %g0, 1, %o4
1903        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1904        or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write
1905
1906        rdpr    %pstate, %o5
1907        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1908        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1909
1910        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1911        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1912        membar  #Sync
1913
1914        ba      1f
1915         nop
1916	/*
1917         * Align on the ecache boundary in order to force
1918         * ciritical code section onto the same ecache line.
1919         */
1920         .align 64
19211:
1922#ifdef HUMMINGBIRD
1923        set     HB_EC_PARITY, %o3         	! clear tag, state invalid
1924        sllx    %o3, HB_ECPAR_SHIFT, %o3   	! and with good tag parity
1925#else /* !HUMMINGBIRD */
1926        set     SB_EC_PARITY, %o3         	! clear tag, state invalid
1927        sllx    %o3, SB_ECPAR_SHIFT, %o3   	! and with good tag parity
1928#endif /* !HUMMINGBIRD */
1929
1930        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1931        stxa    %g0, [%o4]ASI_EC_W
1932        membar  #Sync
1933
1934        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1935        membar  #Sync
1936        retl
1937        wrpr    %g0, %o5, %pstate
1938        SET_SIZE(write_hb_ec_tag_parity)
1939
1940#endif /* lint */
1941
1942#define	VIS_BLOCKSIZE		64
1943
1944#if defined(lint)
1945
1946/*ARGSUSED*/
1947int
1948dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
1949{ return (0); }
1950
1951#else
1952
1953	ENTRY(dtrace_blksuword32)
1954	save	%sp, -SA(MINFRAME + 4), %sp
1955
1956	rdpr	%pstate, %l1
1957	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
1958	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
1959
1960	rd	%fprs, %l0
1961	andcc	%l0, FPRS_FEF, %g0
1962	bz,a,pt	%xcc, 1f			! if the fpu is disabled
1963	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
1964
1965	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
19661:
1967	set	0f, %l5
1968        /*
1969         * We're about to write a block full or either total garbage
1970         * (not kernel data, don't worry) or user floating-point data
1971         * (so it only _looks_ like garbage).
1972         */
1973	ld	[%i1], %f0			! modify the block
1974	membar	#Sync
1975	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
1976	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
1977	membar	#Sync
1978	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
1979
1980	bz,a,pt	%xcc, 1f
1981	wr	%g0, %l0, %fprs			! restore %fprs
1982
1983	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
19841:
1985
1986	wrpr	%g0, %l1, %pstate		! restore interrupts
1987
1988	ret
1989	restore	%g0, %g0, %o0
1990
19910:
1992	membar	#Sync
1993	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
1994
1995	bz,a,pt	%xcc, 1f
1996	wr	%g0, %l0, %fprs			! restore %fprs
1997
1998	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
19991:
2000
2001	wrpr	%g0, %l1, %pstate		! restore interrupts
2002
2003	/*
2004	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2005	 * which deals with watchpoints. Otherwise, just return -1.
2006	 */
2007	brnz,pt	%i2, 1f
2008	nop
2009	ret
2010	restore	%g0, -1, %o0
20111:
2012	call	dtrace_blksuword32_err
2013	restore
2014
2015	SET_SIZE(dtrace_blksuword32)
2016
2017#endif /* lint */
2018