xref: /illumos-gate/usr/src/uts/sun4u/cpu/spitfire_asm.S (revision 784279176e68a516c9e391eb98dda7bd543fa6dd)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include "assym.h"
27
28#include <sys/asm_linkage.h>
29#include <sys/mmu.h>
30#include <vm/hat_sfmmu.h>
31#include <sys/machparam.h>
32#include <sys/machcpuvar.h>
33#include <sys/machthread.h>
34#include <sys/privregs.h>
35#include <sys/asm_linkage.h>
36#include <sys/machasi.h>
37#include <sys/trap.h>
38#include <sys/spitregs.h>
39#include <sys/xc_impl.h>
40#include <sys/intreg.h>
41#include <sys/async.h>
42
43#ifdef TRAPTRACE
44#include <sys/traptrace.h>
45#endif /* TRAPTRACE */
46
47/* BEGIN CSTYLED */
48#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
49	ldxa	[%g0]ASI_LSU, tmp1					;\
50	btst	LSU_DC, tmp1		/* is dcache enabled? */	;\
51	bz,pn	%icc, 1f						;\
52	sethi	%hi(dcache_linesize), tmp1				;\
53	ld	[tmp1 + %lo(dcache_linesize)], tmp1			;\
54	sethi	%hi(dflush_type), tmp2					;\
55	ld	[tmp2 + %lo(dflush_type)], tmp2				;\
56	cmp	tmp2, FLUSHPAGE_TYPE					;\
57	be,pt	%icc, 2f						;\
58	sllx	arg1, SF_DC_VBIT_SHIFT, arg1	/* tag to compare */	;\
59	sethi	%hi(dcache_size), tmp3					;\
60	ld	[tmp3 + %lo(dcache_size)], tmp3				;\
61	cmp	tmp2, FLUSHMATCH_TYPE					;\
62	be,pt	%icc, 3f						;\
63	nop								;\
64	/*								\
65	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
66	 * tmp3 = cache size						\
67	 * tmp1 = cache line size					\
68	 */								\
69	sub	tmp3, tmp1, tmp2					;\
704:									\
71	stxa	%g0, [tmp2]ASI_DC_TAG					;\
72	membar	#Sync							;\
73	cmp	%g0, tmp2						;\
74	bne,pt	%icc, 4b						;\
75	sub	tmp2, tmp1, tmp2					;\
76	ba,pt	%icc, 1f						;\
77	nop								;\
78	/*								\
79	 * flushtype = FLUSHPAGE_TYPE					\
80	 * arg1 = tag to compare against				\
81	 * arg2 = virtual color						\
82	 * tmp1 = cache line size					\
83	 * tmp2 = tag from cache					\
84	 * tmp3 = counter						\
85	 */								\
862:									\
87	set	MMU_PAGESIZE, tmp3					;\
88	sllx	arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */	;\
89	sub	tmp3, tmp1, tmp3					;\
904:									\
91	ldxa	[arg2 + tmp3]ASI_DC_TAG, tmp2	/* read tag */		;\
92	btst	SF_DC_VBIT_MASK, tmp2					;\
93	bz,pn	%icc, 5f	  /* branch if no valid sub-blocks */	;\
94	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
95	cmp	tmp2, arg1						;\
96	bne,pn	%icc, 5f			/* br if tag miss */	;\
97	nop								;\
98	stxa	%g0, [arg2 + tmp3]ASI_DC_TAG				;\
99	membar	#Sync							;\
1005:									\
101	cmp	%g0, tmp3						;\
102	bnz,pt	%icc, 4b		/* branch if not done */	;\
103	sub	tmp3, tmp1, tmp3					;\
104	ba,pt	%icc, 1f						;\
105	nop								;\
106	/*								\
107	 * flushtype = FLUSHMATCH_TYPE					\
108	 * arg1 = tag to compare against				\
109	 * tmp1 = cache line size					\
110	 * tmp3 = cache size						\
111	 * arg2 = counter						\
112	 * tmp2 = cache tag						\
113	 */								\
1143:									\
115	sub	tmp3, tmp1, arg2					;\
1164:									\
117	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
118	btst	SF_DC_VBIT_MASK, tmp2					;\
119	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
120	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
121	cmp	tmp2, arg1						;\
122	bne,pn	%icc, 5f		/* branch if tag miss */	;\
123	nop								;\
124	stxa	%g0, [arg2]ASI_DC_TAG					;\
125	membar	#Sync							;\
1265:									\
127	cmp	%g0, arg2						;\
128	bne,pt	%icc, 4b		/* branch if not done */	;\
129	sub	arg2, tmp1, arg2					;\
1301:
131
132/*
133 * macro that flushes the entire dcache color
134 */
135#define	DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)				\
136	ldxa	[%g0]ASI_LSU, tmp1;					\
137	btst	LSU_DC, tmp1;		/* is dcache enabled? */	\
138	bz,pn	%icc, 1f;						\
139	sethi	%hi(dcache_linesize), tmp1;				\
140	ld	[tmp1 + %lo(dcache_linesize)], tmp1;			\
141	set	MMU_PAGESIZE, tmp2;					\
142	/*								\
143	 * arg = virtual color						\
144	 * tmp2 = page size						\
145	 * tmp1 = cache line size					\
146	 */								\
147	sllx	arg, MMU_PAGESHIFT, arg; /* color to dcache page */	\
148	sub	tmp2, tmp1, tmp2;					\
1492:									\
150	stxa	%g0, [arg + tmp2]ASI_DC_TAG;				\
151	membar	#Sync;							\
152	cmp	%g0, tmp2;						\
153	bne,pt	%icc, 2b;						\
154	sub	tmp2, tmp1, tmp2;					\
1551:
156
157/*
158 * macro that flushes the entire dcache
159 */
160#define	DCACHE_FLUSHALL(size, linesize, tmp)				\
161	ldxa	[%g0]ASI_LSU, tmp;					\
162	btst	LSU_DC, tmp;		/* is dcache enabled? */	\
163	bz,pn	%icc, 1f;						\
164									\
165	sub	size, linesize, tmp;					\
1662:									\
167	stxa	%g0, [tmp]ASI_DC_TAG;					\
168	membar	#Sync;							\
169	cmp	%g0, tmp;						\
170	bne,pt	%icc, 2b;						\
171	sub	tmp, linesize, tmp;					\
1721:
173
174/*
175 * macro that flushes the entire icache
176 */
177#define	ICACHE_FLUSHALL(size, linesize, tmp)				\
178	ldxa	[%g0]ASI_LSU, tmp;					\
179	btst	LSU_IC, tmp;						\
180	bz,pn	%icc, 1f;						\
181									\
182	sub	size, linesize, tmp;					\
1832:									\
184	stxa	%g0, [tmp]ASI_IC_TAG;					\
185	membar	#Sync;							\
186	cmp	%g0, tmp;						\
187	bne,pt	%icc, 2b;						\
188	sub	tmp, linesize, tmp;					\
1891:
190
191#ifdef SF_ERRATA_32
192#define SF_WORKAROUND(tmp1, tmp2)                               \
193        sethi   %hi(FLUSH_ADDR), tmp2                           ;\
194        set     MMU_PCONTEXT, tmp1                              ;\
195        stxa    %g0, [tmp1]ASI_DMMU                             ;\
196        flush   tmp2                                            ;
197#else
198#define SF_WORKAROUND(tmp1, tmp2)
199#endif /* SF_ERRATA_32 */
200
201/*
202 * arg1 = vaddr
203 * arg2 = ctxnum
204 *      - disable interrupts and clear address mask
205 *        to access 64 bit physaddr
206 *      - Blow out the TLB, flush user page.
207 *        . use secondary context.
208 */
209#define VTAG_FLUSHUPAGE(lbl, arg1, arg2, tmp1, tmp2, tmp3, tmp4) \
210        rdpr    %pstate, tmp1                                   ;\
211        andn    tmp1, PSTATE_IE, tmp2				;\
212        wrpr    tmp2, 0, %pstate                                ;\
213        sethi   %hi(FLUSH_ADDR), tmp2                           ;\
214        set     MMU_SCONTEXT, tmp3                              ;\
215        ldxa    [tmp3]ASI_DMMU, tmp4                            ;\
216        or      DEMAP_SECOND | DEMAP_PAGE_TYPE, arg1, arg1      ;\
217        cmp     tmp4, arg2                                      ;\
218        be,a,pt %icc, lbl##4                                  ;\
219          nop                                                   ;\
220        stxa    arg2, [tmp3]ASI_DMMU                            ;\
221lbl##4:                                                       ;\
222        stxa    %g0, [arg1]ASI_DTLB_DEMAP                       ;\
223        stxa    %g0, [arg1]ASI_ITLB_DEMAP                       ;\
224        flush   tmp2                                            ;\
225        be,a,pt %icc, lbl##5                                  ;\
226          nop                                                   ;\
227        stxa    tmp4, [tmp3]ASI_DMMU                            ;\
228        flush   tmp2                                            ;\
229lbl##5:                                                       ;\
230        wrpr    %g0, tmp1, %pstate
231
232
233/*
234 * macro that flushes all the user entries in dtlb
235 * arg1 = dtlb entries
236 *	- Before first compare:
237 *              tmp4 = tte
238 *              tmp5 = vaddr
239 *              tmp6 = cntxnum
240 */
241#define DTLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
242                                tmp4, tmp5, tmp6) \
243lbl##0:                                                       ;\
244        sllx    arg1, 3, tmp3                                   ;\
245        SF_WORKAROUND(tmp1, tmp2)                               ;\
246        ldxa    [tmp3]ASI_DTLB_ACCESS, tmp4                     ;\
247        srlx    tmp4, 6, tmp4                                   ;\
248        andcc   tmp4, 1, %g0                                    ;\
249        bnz,pn  %xcc, lbl##1                                  ;\
250        srlx    tmp4, 57, tmp4                                  ;\
251        andcc   tmp4, 1, %g0                                    ;\
252        beq,pn  %xcc, lbl##1                                  ;\
253          nop                                                   ;\
254        set     TAGREAD_CTX_MASK, tmp1                          ;\
255        ldxa    [tmp3]ASI_DTLB_TAGREAD, tmp2                    ;\
256        and     tmp2, tmp1, tmp6                                ;\
257        andn    tmp2, tmp1, tmp5                                ;\
258	set	KCONTEXT, tmp4					;\
259	cmp	tmp6, tmp4					;\
260	be	lbl##1					;\
261	  nop							;\
262        VTAG_FLUSHUPAGE(VD##lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
263lbl##1:                                                       ;\
264        brgz,pt arg1, lbl##0                                  ;\
265          sub     arg1, 1, arg1
266
267
268/*
269 * macro that flushes all the user entries in itlb
270 * arg1 = itlb entries
271 *      - Before first compare:
272 *              tmp4 = tte
273 *              tmp5 = vaddr
274 *              tmp6 = cntxnum
275 */
276#define ITLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
277                                tmp4, tmp5, tmp6) \
278lbl##0:                                                       ;\
279        sllx    arg1, 3, tmp3                                   ;\
280        SF_WORKAROUND(tmp1, tmp2)                               ;\
281        ldxa    [tmp3]ASI_ITLB_ACCESS, tmp4                     ;\
282        srlx    tmp4, 6, tmp4                                   ;\
283        andcc   tmp4, 1, %g0                                    ;\
284        bnz,pn  %xcc, lbl##1                                  ;\
285        srlx    tmp4, 57, tmp4                                  ;\
286        andcc   tmp4, 1, %g0                                    ;\
287        beq,pn  %xcc, lbl##1                                  ;\
288          nop                                                   ;\
289        set     TAGREAD_CTX_MASK, tmp1                          ;\
290        ldxa    [tmp3]ASI_ITLB_TAGREAD, tmp2                    ;\
291        and     tmp2, tmp1, tmp6                                ;\
292        andn    tmp2, tmp1, tmp5                                ;\
293	set	KCONTEXT, tmp4					;\
294	cmp	tmp6, tmp4					;\
295	be	lbl##1						;\
296	  nop							;\
297        VTAG_FLUSHUPAGE(VI##lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
298lbl##1:                                                       ;\
299        brgz,pt arg1, lbl##0                                  ;\
300        sub     arg1, 1, arg1
301
302
303
304/*
305 * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
306 * ptr is in the machcpu structure.
307 * r_or_s:	Register or symbol off offset from 'cpu_private' ptr.
308 * scr1:	Scratch, ptr is returned in this register.
309 * scr2:	Scratch
310 */
311#define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)		\
312	CPU_ADDR(scr1, scr2);						\
313	ldn	[scr1 + CPU_PRIVATE], scr1; 				\
314	cmp	scr1, 0; 						\
315	be	label;							\
316	 nop; 								\
317	add	scr1, r_or_s, scr1;  					\
318
319#ifdef HUMMINGBIRD
320/*
321 * UltraSPARC-IIe processor supports both 4-way set associative and
322 * direct map E$. For performance reasons, we flush E$ by placing it
323 * in direct map mode for data load/store and restore the state after
324 * we are done flushing it. Keep interrupts off while flushing in this
325 * manner.
326 *
327 * We flush the entire ecache by starting at one end and loading each
328 * successive ecache line for the 2*ecache-size range. We have to repeat
329 * the flush operation to guarantee that the entire ecache has been
330 * flushed.
331 *
332 * For flushing a specific physical address, we start at the aliased
333 * address and load at set-size stride, wrapping around at 2*ecache-size
334 * boundary and skipping the physical address being flushed. It takes
335 * 10 loads to guarantee that the physical address has been flushed.
336 */
337
338#define	HB_ECACHE_FLUSH_CNT	2
339#define	HB_PHYS_FLUSH_CNT	10	/* #loads to flush specific paddr */
340#endif /* HUMMINGBIRD */
341
342/* END CSTYLED */
343
344/*
345 * Spitfire MMU and Cache operations.
346 */
347
348	ENTRY_NP(vtag_flushpage)
349	/*
350	 * flush page from the tlb
351	 *
352	 * %o0 = vaddr
353	 * %o1 = sfmmup
354	 */
355	rdpr	%pstate, %o5
356#ifdef DEBUG
357	PANIC_IF_INTR_DISABLED_PSTR(%o5, sfdi_label1, %g1)
358#endif /* DEBUG */
359	/*
360	 * disable ints
361	 */
362	andn	%o5, PSTATE_IE, %o4
363	wrpr	%o4, 0, %pstate
364
365	/*
366	 * Then, blow out the tlb
367	 * Interrupts are disabled to prevent the secondary ctx register
368	 * from changing underneath us.
369	 */
370	sethi   %hi(ksfmmup), %o3
371        ldx     [%o3 + %lo(ksfmmup)], %o3
372        cmp     %o3, %o1
373        bne,pt   %xcc, 1f			! if not kernel as, go to 1
374	  sethi	%hi(FLUSH_ADDR), %o3
375	/*
376	 * For KCONTEXT demaps use primary. type = page implicitly
377	 */
378	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
379	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
380	flush	%o3
381	b	5f
382	  nop
3831:
384	/*
385	 * User demap.  We need to set the secondary context properly.
386	 * %o0 = vaddr
387	 * %o1 = sfmmup
388	 * %o3 = FLUSH_ADDR
389	 */
390	SFMMU_CPU_CNUM(%o1, %g1, %g2)	/* %g1 = sfmmu cnum on this CPU */
391
392	set	MMU_SCONTEXT, %o4
393	ldxa	[%o4]ASI_DMMU, %o2		/* rd old ctxnum */
394	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
395	cmp	%o2, %g1
396	be,pt	%icc, 4f
397	  nop
398	stxa	%g1, [%o4]ASI_DMMU		/* wr new ctxum */
3994:
400	stxa	%g0, [%o0]ASI_DTLB_DEMAP
401	stxa	%g0, [%o0]ASI_ITLB_DEMAP
402	flush	%o3
403	be,pt	%icc, 5f
404	  nop
405	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
406	flush	%o3
4075:
408	retl
409	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
410	SET_SIZE(vtag_flushpage)
411
412        .seg    ".text"
413.flushallmsg:
414        .asciz  "sfmmu_asm: unimplemented flush operation"
415
416        ENTRY_NP(vtag_flushall)
417        sethi   %hi(.flushallmsg), %o0
418        call    panic
419          or    %o0, %lo(.flushallmsg), %o0
420        SET_SIZE(vtag_flushall)
421
422	ENTRY_NP(vtag_flushall_uctxs)
423	/*
424	 * flush entire DTLB/ITLB.
425	 */
426	CPU_INDEX(%g1, %g2)
427	mulx	%g1, CPU_NODE_SIZE, %g1
428	set	cpunodes, %g2
429	add	%g1, %g2, %g1
430	lduh	[%g1 + ITLB_SIZE], %g2		! %g2 = # entries in ITLB
431	lduh	[%g1 + DTLB_SIZE], %g1		! %g1 = # entries in DTLB
432	sub	%g2, 1, %g2			! %g2 = # entries in ITLB - 1
433	sub	%g1, 1, %g1			! %g1 = # entries in DTLB - 1
434
435        !
436        ! Flush itlb's
437        !
438        ITLB_FLUSH_UNLOCKED_UCTXS(I, %g2, %g3, %g4, %o2, %o3, %o4, %o5)
439
440	!
441        ! Flush dtlb's
442        !
443        DTLB_FLUSH_UNLOCKED_UCTXS(D, %g1, %g3, %g4, %o2, %o3, %o4, %o5)
444
445	membar  #Sync
446	retl
447	  nop
448
449	SET_SIZE(vtag_flushall_uctxs)
450
451	ENTRY_NP(vtag_flushpage_tl1)
452	/*
453	 * x-trap to flush page from tlb and tsb
454	 *
455	 * %g1 = vaddr, zero-extended on 32-bit kernel
456	 * %g2 = sfmmup
457	 *
458	 * assumes TSBE_TAG = 0
459	 */
460	srln	%g1, MMU_PAGESHIFT, %g1
461	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
462
463	SFMMU_CPU_CNUM(%g2, %g3, %g4)   /* %g3 = sfmmu cnum on this CPU */
464
465	/* We need to set the secondary context properly. */
466	set	MMU_SCONTEXT, %g4
467	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
468	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
469	stxa	%g3, [%g4]ASI_DMMU		/* wr new ctxum */
470	stxa	%g0, [%g1]ASI_DTLB_DEMAP
471	stxa	%g0, [%g1]ASI_ITLB_DEMAP
472	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
473	membar #Sync
474	retry
475	SET_SIZE(vtag_flushpage_tl1)
476
477	ENTRY_NP(vtag_flush_pgcnt_tl1)
478	/*
479	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
480	 *
481	 * %g1 = vaddr, zero-extended on 32-bit kernel
482	 * %g2 = <sfmmup58 | pgcnt6>
483	 *
484	 * NOTE: this handler relies on the fact that no
485	 *	interrupts or traps can occur during the loop
486	 *	issuing the TLB_DEMAP operations. It is assumed
487	 *	that interrupts are disabled and this code is
488	 *	fetching from the kernel locked text address.
489	 *
490	 * assumes TSBE_TAG = 0
491	 */
492	srln	%g1, MMU_PAGESHIFT, %g1
493	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
494	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
495
496	set	SFMMU_PGCNT_MASK, %g4
497	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
498	add	%g3, 1, %g3			/* g3 = pgcnt */
499
500	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
501
502	SFMMU_CPU_CNUM(%g2, %g5, %g6)   ! %g5 = sfmmu cnum on this CPU
503
504	/* We need to set the secondary context properly. */
505	set	MMU_SCONTEXT, %g4
506	ldxa	[%g4]ASI_DMMU, %g6		/* read old ctxnum */
507	stxa	%g5, [%g4]ASI_DMMU		/* write new ctxum */
508
509	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
510	sethi	 %hi(FLUSH_ADDR), %g5
5111:
512	stxa	%g0, [%g1]ASI_DTLB_DEMAP
513	stxa	%g0, [%g1]ASI_ITLB_DEMAP
514	flush	%g5
515	deccc	%g3				/* decr pgcnt */
516	bnz,pt	%icc,1b
517	  add	%g1, %g2, %g1			/* go to nextpage */
518
519	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
520	membar #Sync
521	retry
522	SET_SIZE(vtag_flush_pgcnt_tl1)
523
524	! Not implemented on US1/US2
525	ENTRY_NP(vtag_flushall_tl1)
526	retry
527	SET_SIZE(vtag_flushall_tl1)
528
529/*
530 * vac_flushpage(pfnum, color)
531 *	Flush 1 8k page of the D-$ with physical page = pfnum
532 *	Algorithm:
533 *		The spitfire dcache is a 16k direct mapped virtual indexed,
534 *		physically tagged cache.  Given the pfnum we read all cache
535 *		lines for the corresponding page in the cache (determined by
536 *		the color).  Each cache line is compared with
537 *		the tag created from the pfnum. If the tags match we flush
538 *		the line.
539 */
540	.seg	".data"
541	.align	8
542	.global	dflush_type
543dflush_type:
544	.word	FLUSHPAGE_TYPE
545	.seg	".text"
546
547	ENTRY(vac_flushpage)
548	/*
549	 * flush page from the d$
550	 *
551	 * %o0 = pfnum, %o1 = color
552	 */
553	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
554	retl
555	nop
556	SET_SIZE(vac_flushpage)
557
558	ENTRY_NP(vac_flushpage_tl1)
559	/*
560	 * x-trap to flush page from the d$
561	 *
562	 * %g1 = pfnum, %g2 = color
563	 */
564	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
565	retry
566	SET_SIZE(vac_flushpage_tl1)
567
568	ENTRY(vac_flushcolor)
569	/*
570	 * %o0 = vcolor
571	 */
572	DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
573	retl
574	  nop
575	SET_SIZE(vac_flushcolor)
576
577	ENTRY(vac_flushcolor_tl1)
578	/*
579	 * %g1 = vcolor
580	 */
581	DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
582	retry
583	SET_SIZE(vac_flushcolor_tl1)
584
585
586	.global _dispatch_status_busy
587_dispatch_status_busy:
588	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
589	.align	4
590
591/*
592 * Determine whether or not the IDSR is busy.
593 * Entry: no arguments
594 * Returns: 1 if busy, 0 otherwise
595 */
596	ENTRY(idsr_busy)
597	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
598	clr	%o0
599	btst	IDSR_BUSY, %g1
600	bz,a,pt	%xcc, 1f
601	mov	1, %o0
6021:
603	retl
604	nop
605	SET_SIZE(idsr_busy)
606
607/*
608 * Setup interrupt dispatch data registers
609 * Entry:
610 *	%o0 - function or inumber to call
611 *	%o1, %o2 - arguments (2 uint64_t's)
612 */
613	.seg "text"
614
615	ENTRY(init_mondo)
616#ifdef DEBUG
617	!
618	! IDSR should not be busy at the moment
619	!
620	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
621	btst	IDSR_BUSY, %g1
622	bz,pt	%xcc, 1f
623	nop
624
625	sethi	%hi(_dispatch_status_busy), %o0
626	call	panic
627	or	%o0, %lo(_dispatch_status_busy), %o0
628#endif /* DEBUG */
629
630	ALTENTRY(init_mondo_nocheck)
631	!
632	! interrupt vector dispach data reg 0
633	!
6341:
635	mov	IDDR_0, %g1
636	mov	IDDR_1, %g2
637	mov	IDDR_2, %g3
638	stxa	%o0, [%g1]ASI_INTR_DISPATCH
639
640	!
641	! interrupt vector dispach data reg 1
642	!
643	stxa	%o1, [%g2]ASI_INTR_DISPATCH
644
645	!
646	! interrupt vector dispach data reg 2
647	!
648	stxa	%o2, [%g3]ASI_INTR_DISPATCH
649
650	retl
651	membar	#Sync			! allowed to be in the delay slot
652	SET_SIZE(init_mondo)
653
654/*
655 * Ship mondo to upaid
656 */
657	ENTRY_NP(shipit)
658	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = upa id
659	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
660	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
661#if defined(SF_ERRATA_54)
662	membar	#Sync				! store must occur before load
663	mov	0x20, %g3			! UDBH Control Register Read
664	ldxa	[%g3]ASI_SDB_INTR_R, %g0
665#endif
666	retl
667	membar	#Sync
668	SET_SIZE(shipit)
669
670
671/*
672 * flush_instr_mem:
673 *	Flush a portion of the I-$ starting at vaddr
674 * 	%o0 vaddr
675 *	%o1 bytes to be flushed
676 */
677
678	ENTRY(flush_instr_mem)
679	membar	#StoreStore				! Ensure the stores
680							! are globally visible
6811:
682	flush	%o0
683	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
684	bgu,pt	%ncc, 1b
685	add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
686
687	retl
688	nop
689	SET_SIZE(flush_instr_mem)
690
691/*
692 * flush_ecache:
693 * Flush the entire e$ using displacement flush by reading through a
694 * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
695 * reading this physical address range so that data doesn't go to d$.
696 * incoming arguments:
697 *	%o0 - 64 bit physical address
698 *	%o1 - size of address range to read
699 *	%o2 - ecache linesize
700 */
701	ENTRY(flush_ecache)
702#ifndef HUMMINGBIRD
703	b	2f
704	  nop
7051:
706	ldxa	[%o0 + %o1]ASI_MEM, %g0	! start reading from physaddr + size
7072:
708	subcc	%o1, %o2, %o1
709	bcc,a,pt %ncc, 1b
710	  nop
711
712#else /* HUMMINGBIRD */
713	/*
714	 * UltraSPARC-IIe processor supports both 4-way set associative
715	 * and direct map E$. For performance reasons, we flush E$ by
716	 * placing it in direct map mode for data load/store and restore
717	 * the state after we are done flushing it. It takes 2 iterations
718	 * to guarantee that the entire ecache has been flushed.
719	 *
720	 * Keep the interrupts disabled while flushing E$ in this manner.
721	 */
722	rdpr	%pstate, %g4		! current pstate (restored later)
723	andn	%g4, PSTATE_IE, %g5
724	wrpr	%g0, %g5, %pstate	! disable interrupts
725
726	! Place E$ in direct map mode for data access
727	or	%g0, 1, %g5
728	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
729	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
730	or	%g1, %g5, %g5
731	membar	#Sync
732	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
733	membar	#Sync
734
735	! flush entire ecache HB_ECACHE_FLUSH_CNT times
736	mov	HB_ECACHE_FLUSH_CNT-1, %g5
7372:
738	sub	%o1, %o2, %g3		! start from last entry
7391:
740	ldxa	[%o0 + %g3]ASI_MEM, %g0	! start reading from physaddr + size
741	subcc	%g3, %o2, %g3
742	bgeu,a,pt %ncc, 1b
743	  nop
744	brgz,a,pt %g5, 2b
745	  dec	%g5
746
747	membar	#Sync
748	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
749	membar	#Sync
750	wrpr	%g0, %g4, %pstate	! restore earlier pstate
751#endif /* HUMMINGBIRD */
752
753	retl
754	nop
755	SET_SIZE(flush_ecache)
756
757/*
758 * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
759 *			int icache_size, int icache_linesize)
760 */
761	ENTRY(kdi_flush_idcache)
762	DCACHE_FLUSHALL(%o0, %o1, %g1)
763	ICACHE_FLUSHALL(%o2, %o3, %g1)
764	membar	#Sync
765	retl
766	nop
767	SET_SIZE(kdi_flush_idcache)
768
769
770/*
771 * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
772 * 			uint64_t *oafsr, uint64_t *acc_afsr)
773 *
774 * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
775 * on a 64-byte boundary.  The corresponding AFSR value is also read for each
776 * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
777 * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
778 * data and tag should be atomic to make sense.  We will be executing at PIL15
779 * and will disable IE, so nothing can occur between the two reads.  We also
780 * assume that the execution of this code does not interfere with what we are
781 * reading - not really possible, but we'll live with it for now.
782 * We also pass the old AFSR value before clearing it, and caller will take
783 * appropriate actions if the important bits are non-zero.
784 *
785 * If the caller wishes to track the AFSR in cases where the CP bit is
786 * set, an address should be passed in for acc_afsr.  Otherwise, this
787 * argument may be null.
788 *
789 * Register Usage:
790 * i0: In: 32-bit e$ index
791 * i1: In: addr of e$ data
792 * i2: In: addr of e$ tag
793 * i3: In: addr of old afsr
794 * i4: In: addr of accumulated afsr - may be null
795 */
796	ENTRY(get_ecache_dtag)
797	save	%sp, -SA(MINFRAME), %sp
798	or	%g0, 1, %l4
799	sllx	%l4, 39, %l4	! set bit 39 for e$ data access
800	or	%i0, %l4, %g6	! %g6 = e$ addr for data read
801	sllx	%l4, 1, %l4	! set bit 40 for e$ tag access
802	or	%i0, %l4, %l4	! %l4 = e$ addr for tag read
803
804	rdpr    %pstate, %i5
805	andn    %i5, PSTATE_IE | PSTATE_AM, %i0
806	wrpr    %i0, %g0, %pstate       ! clear IE, AM bits
807
808	ldxa    [%g0]ASI_ESTATE_ERR, %g1
809	stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
810	membar  #Sync
811
812	ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
813	stx     %i0, [%i3]		! write back the old-afsr
814
815	ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
816	ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
817	stx     %i0, [%i2]              ! write back tag result
818
819	clr	%i2			! loop count
820
821	brz	%i4, 1f			! acc_afsr == NULL?
822	  ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
823	srlx	%i0, P_AFSR_CP_SHIFT, %l0
824	btst	1, %l0
825	bz	1f
826	  nop
827	ldx	[%i4], %g4
828	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
829	stx	%g4, [%i4]
8301:
831	stxa    %i0, [%g0]ASI_AFSR	! clear AFSR
832	membar  #Sync
833	ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
834	stx     %i0, [%i1]              ! save the E$data
835	add     %g6, 8, %g6
836	add     %i1, 8, %i1
837	ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
838	srlx	%i0, P_AFSR_CP_SHIFT, %l0
839	btst	1, %l0
840	bz	2f
841	  stx     %i0, [%i1]		! save the AFSR
842
843	brz	%i4, 2f			! acc_afsr == NULL?
844	  nop
845	ldx	[%i4], %g4
846	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
847	stx	%g4, [%i4]
8482:
849	add     %i2, 8, %i2
850	cmp     %i2, 64
851	bl,a    1b
852	  add     %i1, 8, %i1
853	stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
854	membar  #Sync
855	stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
856	membar  #Sync
857	wrpr    %g0, %i5, %pstate
858	ret
859	  restore
860	SET_SIZE(get_ecache_dtag)
861
862/*
863 * The ce_err function handles trap type 0x63 (corrected_ECC_error) at tl=0.
864 * Steps: 1. GET AFSR  2. Get AFAR <40:4> 3. Get datapath error status
865 *	  4. Clear datapath error bit(s) 5. Clear AFSR error bit
866 *	  6. package data in %g2 and %g3 7. call cpu_ce_error vis sys_trap
867 * %g2: [ 52:43 UDB lower | 42:33 UDB upper | 32:0 afsr ] - arg #3/arg #1
868 * %g3: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
869 */
870	ENTRY_NP(ce_err)
871	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
872
873	!
874	! Check for a UE... From Kevin.Normoyle:
875	! We try to switch to the trap for the UE, but since that's
876	! a hardware pipeline, we might get to the CE trap before we
877	! can switch. The UDB and AFSR registers will have both the
878	! UE and CE bits set but the UDB syndrome and the AFAR will be
879	! for the UE.
880	!
881	or	%g0, 1, %g1		! put 1 in g1
882	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
883	andcc	%g1, %g3, %g0		! check for UE in afsr
884	bnz	async_err		! handle the UE, not the CE
885	  or	%g0, 0x63, %g5		! pass along the CE ttype
886	!
887	! Disable further CE traps to avoid recursion (stack overflow)
888	! and staying above XCALL_PIL for extended periods.
889	!
890	ldxa	[%g0]ASI_ESTATE_ERR, %g2
891	andn	%g2, 0x1, %g2		! clear bit 0 - CEEN
892	stxa	%g2, [%g0]ASI_ESTATE_ERR
893	membar	#Sync			! required
894	!
895	! handle the CE
896	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
897
898	set	P_DER_H, %g4		! put P_DER_H in g4
899	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
900	or	%g0, 1, %g6		! put 1 in g6
901	sllx	%g6, 8, %g6		! shift g6 to <8> sdb CE
902	andcc	%g5, %g6, %g1		! check for CE in upper half
903	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
904	or	%g3, %g5, %g3		! or with afsr bits
905	bz,a	1f			! no error, goto 1f
906	  nop
907	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
908	membar	#Sync			! membar sync required
9091:
910	set	P_DER_L, %g4		! put P_DER_L in g4
911	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
912	andcc	%g5, %g6, %g1		! check for CE in lower half
913	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
914	or	%g3, %g5, %g3		! or with afsr bits
915	bz,a	2f			! no error, goto 2f
916	  nop
917	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
918	membar	#Sync			! membar sync required
9192:
920	or	%g0, 1, %g4		! put 1 in g4
921	sllx	%g4, 20, %g4		! shift left to <20> afsr CE
922	stxa	%g4, [%g0]ASI_AFSR	! use g4 to clear afsr CE error
923	membar	#Sync			! membar sync required
924
925	set	cpu_ce_error, %g1	! put *cpu_ce_error() in g1
926	rdpr	%pil, %g6		! read pil into %g6
927	subcc	%g6, PIL_15, %g0
928	  movneg	%icc, PIL_14, %g4 ! run at pil 14 unless already at 15
929	sethi	%hi(sys_trap), %g5
930	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
931	  movge	%icc, PIL_15, %g4	! already at pil 15
932	SET_SIZE(ce_err)
933
934	ENTRY_NP(ce_err_tl1)
935#ifndef	TRAPTRACE
936	ldxa	[%g0]ASI_AFSR, %g7
937	stxa	%g7, [%g0]ASI_AFSR
938	membar	#Sync
939	retry
940#else
941	set	ce_trap_tl1, %g1
942	sethi	%hi(dis_err_panic1), %g4
943	jmp	%g4 + %lo(dis_err_panic1)
944	nop
945#endif
946	SET_SIZE(ce_err_tl1)
947
948#ifdef	TRAPTRACE
949.celevel1msg:
950	.asciz	"Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";
951
952	ENTRY_NP(ce_trap_tl1)
953	! upper 32 bits of AFSR already in o3
954	mov	%o4, %o0		! save AFAR upper 32 bits
955	mov	%o2, %o4		! lower 32 bits of AFSR
956	mov	%o1, %o2		! lower 32 bits of AFAR
957	mov	%o0, %o1		! upper 32 bits of AFAR
958	set	.celevel1msg, %o0
959	call	panic
960	nop
961	SET_SIZE(ce_trap_tl1)
962#endif
963
964/*
965 * The async_err function handles trap types 0x0A (instruction_access_error)
966 * and 0x32 (data_access_error) at TL = 0 and TL > 0.  When we branch here,
967 * %g5 will have the trap type (with 0x200 set if we're at TL > 0).
968 *
969 * Steps: 1. Get AFSR 2. Get AFAR <40:4> 3. If not UE error skip UDP registers.
970 *	  4. Else get and clear datapath error bit(s) 4. Clear AFSR error bits
971 *	  6. package data in %g2 and %g3 7. disable all cpu errors, because
972 *	  trap is likely to be fatal 8. call cpu_async_error vis sys_trap
973 *
974 * %g3: [ 63:53 tt | 52:43 UDB_L | 42:33 UDB_U | 32:0 afsr ] - arg #3/arg #1
975 * %g2: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
976 *
977 * async_err is the assembly glue code to get us from the actual trap
978 * into the CPU module's C error handler.  Note that we also branch
979 * here from ce_err() above.
980 */
981	ENTRY_NP(async_err)
982	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
983	membar	#Sync			! membar sync required
984
985	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
986	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
987
988	sllx	%g5, 53, %g5		! move ttype to <63:53>
989	or	%g3, %g5, %g3		! or to afsr in g3
990
991	or	%g0, 1, %g1		! put 1 in g1
992	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
993	andcc	%g1, %g3, %g0		! check for UE in afsr
994	bz,a,pn %icc, 2f		! if !UE skip sdb read/clear
995	  nop
996
997	set	P_DER_H, %g4		! put P_DER_H in g4
998	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
999	or	%g0, 1, %g6		! put 1 in g6
1000	sllx	%g6, 9, %g6		! shift g6 to <9> sdb UE
1001	andcc	%g5, %g6, %g1		! check for UE in upper half
1002	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1003	or	%g3, %g5, %g3		! or with afsr bits
1004	bz,a	1f			! no error, goto 1f
1005	  nop
1006	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1007	membar	#Sync			! membar sync required
10081:
1009	set	P_DER_L, %g4		! put P_DER_L in g4
1010	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1011	andcc	%g5, %g6, %g1		! check for UE in lower half
1012	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1013	or	%g3, %g5, %g3		! or with afsr bits
1014	bz,a	2f			! no error, goto 2f
1015	  nop
1016	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1017	membar	#Sync			! membar sync required
10182:
1019	stxa	%g3, [%g0]ASI_AFSR	! clear all the sticky bits
1020	membar	#Sync			! membar sync required
1021
1022	RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1023async_err_resetskip:
1024
1025	set	cpu_async_error, %g1	! put cpu_async_error in g1
1026	sethi	%hi(sys_trap), %g5
1027	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1028	  or	%g0, PIL_15, %g4	! run at pil 15
1029	SET_SIZE(async_err)
1030
1031	ENTRY_NP(dis_err_panic1)
1032	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
1033	membar	#Sync
1034	! save destination routine is in g1
1035	ldxa	[%g0]ASI_AFAR, %g2	! read afar
1036	ldxa	[%g0]ASI_AFSR, %g3	! read afsr
1037	set	P_DER_H, %g4		! put P_DER_H in g4
1038	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1039	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1040	or	%g3, %g5, %g3		! or with afsr bits
1041	set	P_DER_L, %g4		! put P_DER_L in g4
1042	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1043	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1044	or	%g3, %g5, %g3		! or with afsr bits
1045
1046	RESET_USER_RTT_REGS(%g4, %g5, dis_err_panic1_resetskip)
1047dis_err_panic1_resetskip:
1048
1049	sethi	%hi(sys_trap), %g5
1050	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1051	  sub	%g0, 1, %g4
1052	SET_SIZE(dis_err_panic1)
1053
1054/*
1055 * The clr_datapath function clears any error bits set in the UDB regs.
1056 */
1057	ENTRY(clr_datapath)
1058	set	P_DER_H, %o4			! put P_DER_H in o4
1059	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb upper half into o3
1060	or	%g0, 0x3, %o2			! put 0x3 in o2
1061	sllx	%o2, 8, %o2			! shift o2 to <9:8> sdb
1062	andcc	%o5, %o2, %o1			! check for UE,CE in upper half
1063	bz,a	1f				! no error, goto 1f
1064	  nop
1065	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1066	membar	#Sync				! membar sync required
10671:
1068	set	P_DER_L, %o4			! put P_DER_L in o4
1069	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb lower half into o5
1070	andcc	%o5, %o2, %o1			! check for UE,CE in lower half
1071	bz,a	2f				! no error, goto 2f
1072	  nop
1073	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1074	membar	#Sync
10752:
1076	retl
1077	  nop
1078	SET_SIZE(clr_datapath)
1079
1080/*
1081 * The get_udb_errors() function gets the current value of the
1082 * Datapath Error Registers.
1083 */
1084	ENTRY(get_udb_errors)
1085	set	P_DER_H, %o3
1086	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1087	stx	%o2, [%o0]
1088	set	P_DER_L, %o3
1089	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1090	retl
1091	  stx	%o2, [%o1]
1092	SET_SIZE(get_udb_errors)
1093
1094/*
1095 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1096 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1097 * should only be used in places where you have no choice but to look at the
1098 * tlb itself.
1099 *
1100 * Note: These two routines are required by the Estar "cpr" loadable module.
1101 */
1102/*
1103 * NB - In Spitfire cpus, when reading a tte from the hardware, we
1104 * need to clear [42-41] because the general definitions in pte.h
1105 * define the PA to be [42-13] whereas Spitfire really uses [40-13].
1106 * When cloning these routines for other cpus the "andn" below is not
1107 * necessary.
1108 */
1109	ENTRY_NP(itlb_rd_entry)
1110	sllx	%o0, 3, %o0
1111#if defined(SF_ERRATA_32)
1112	sethi	%hi(FLUSH_ADDR), %g2
1113	set	MMU_PCONTEXT, %g1
1114	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1115	flush	%g2
1116#endif
1117	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
1118	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1119	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1120	andn	%g1, %g2, %g1				! for details
1121	stx	%g1, [%o1]
1122	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
1123	set	TAGREAD_CTX_MASK, %o4
1124	andn	%g2, %o4, %o5
1125	retl
1126	  stx	%o5, [%o2]
1127	SET_SIZE(itlb_rd_entry)
1128
1129	ENTRY_NP(dtlb_rd_entry)
1130	sllx	%o0, 3, %o0
1131#if defined(SF_ERRATA_32)
1132	sethi	%hi(FLUSH_ADDR), %g2
1133	set	MMU_PCONTEXT, %g1
1134	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1135	flush	%g2
1136#endif
1137	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
1138	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1139	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1140	andn	%g1, %g2, %g1				! itlb_rd_entry
1141	stx	%g1, [%o1]
1142	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
1143	set	TAGREAD_CTX_MASK, %o4
1144	andn	%g2, %o4, %o5
1145	retl
1146	  stx	%o5, [%o2]
1147	SET_SIZE(dtlb_rd_entry)
1148
1149	ENTRY(set_lsu)
1150	stxa	%o0, [%g0]ASI_LSU		! store to LSU
1151	retl
1152	membar	#Sync
1153	SET_SIZE(set_lsu)
1154
1155	ENTRY(get_lsu)
1156	retl
1157	ldxa	[%g0]ASI_LSU, %o0		! load LSU
1158	SET_SIZE(get_lsu)
1159
1160	/*
1161	 * Clear the NPT (non-privileged trap) bit in the %tick
1162	 * registers. In an effort to make the change in the
1163	 * tick counter as consistent as possible, we disable
1164	 * all interrupts while we're changing the registers. We also
1165	 * ensure that the read and write instructions are in the same
1166	 * line in the instruction cache.
1167	 */
1168	ENTRY_NP(cpu_clearticknpt)
1169	rdpr	%pstate, %g1		/* save processor state */
1170	andn	%g1, PSTATE_IE, %g3	/* turn off */
1171	wrpr	%g0, %g3, %pstate	/*   interrupts */
1172	rdpr	%tick, %g2		/* get tick register */
1173	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
1174	mov	1, %g3			/* create mask */
1175	sllx	%g3, 63, %g3		/*   for NPT bit */
1176	ba,a,pt	%xcc, 2f
1177	.align	64			/* Align to I$ boundary */
11782:
1179	rdpr	%tick, %g2		/* get tick register */
1180	wrpr	%g3, %g2, %tick		/* write tick register, */
1181					/*   clearing NPT bit   */
1182#if defined(BB_ERRATA_1)
1183	rdpr	%tick, %g0		/* read (s)tick (BB_ERRATA_1) */
1184#endif
11851:
1186	jmp	%g4 + 4
1187	wrpr	%g0, %g1, %pstate	/* restore processor state */
1188	SET_SIZE(cpu_clearticknpt)
1189
1190	/*
1191	 * get_ecache_tag()
1192	 * Register Usage:
1193	 * %o0: In: 32-bit E$ index
1194	 *      Out: 64-bit E$ tag value
1195	 * %o1: In: 64-bit AFSR value after clearing sticky bits
1196	 * %o2: In: address of cpu private afsr storage
1197	 */
1198	ENTRY(get_ecache_tag)
1199	or	%g0, 1, %o4
1200	sllx	%o4, 40, %o4			! set bit 40 for e$ tag access
1201	or	%o0, %o4, %o4			! %o4 = e$ addr for tag read
1202	rdpr	%pstate, %o5
1203	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1204	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1205
1206	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1207	stxa	%g0, [%g0]ASI_ESTATE_ERR	! Turn off Error enable
1208	membar	#Sync
1209
1210	ldxa	[%g0]ASI_AFSR, %o0
1211	srlx	%o0, P_AFSR_CP_SHIFT, %o3
1212	btst	1, %o3
1213	bz	1f
1214	  nop
1215	ldx	[%o2], %g4
1216	or	%g4, %o0, %g4			! aggregate AFSR in cpu private
1217	stx	%g4, [%o2]
12181:
1219	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1220	membar  #Sync
1221
1222	ldxa	[%o4]ASI_EC_R, %g0
1223	ldxa	[%g0]ASI_EC_DIAG, %o0		! read tag from e$ tag reg
1224
1225	ldxa	[%g0]ASI_AFSR, %o3
1226	srlx	%o3, P_AFSR_CP_SHIFT, %o4
1227	btst	1, %o4
1228	bz	2f
1229	  stx	%o3, [%o1]			! AFSR after sticky clear
1230	ldx	[%o2], %g4
1231	or	%g4, %o3, %g4			! aggregate AFSR in cpu private
1232	stx	%g4, [%o2]
12332:
1234	membar	#Sync
1235
1236	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1237	membar	#Sync
1238	retl
1239	wrpr	%g0, %o5, %pstate
1240	SET_SIZE(get_ecache_tag)
1241
1242	/*
1243	 * check_ecache_line()
1244	 * Register Usage:
1245	 * %o0: In: 32-bit E$ index
1246	 *      Out: 64-bit accumulated AFSR
1247	 * %o1: In: address of cpu private afsr storage
1248	 */
1249	ENTRY(check_ecache_line)
1250	or	%g0, 1, %o4
1251	sllx	%o4, 39, %o4			! set bit 39 for e$ data access
1252	or	%o0, %o4, %o4		 	! %o4 = e$ addr for data read
1253
1254	rdpr	%pstate, %o5
1255	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1256	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1257
1258	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1259	stxa	%g0, [%g0]ASI_ESTATE_ERR 	! Turn off Error enable
1260	membar	#Sync
1261
1262	ldxa 	[%g0]ASI_AFSR, %o0
1263	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1264	btst	1, %o2
1265	bz	1f
1266	  clr	%o2				! loop count
1267	ldx	[%o1], %o3
1268	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1269	stx	%o3, [%o1]
12701:
1271	stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1272	membar	#Sync
1273
12742:
1275	ldxa	[%o4]ASI_EC_R, %g0		! Read the E$ data 8bytes each
1276	add	%o2, 1, %o2
1277	cmp	%o2, 8
1278	bl,a 	2b
1279	  add	%o4, 8, %o4
1280
1281	membar	#Sync
1282	ldxa	[%g0]ASI_AFSR, %o0		! read accumulated AFSR
1283	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1284	btst	1, %o2
1285	bz	3f
1286	  nop
1287	ldx	[%o1], %o3
1288	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1289	stx	%o3, [%o1]
12903:
1291	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1292	membar	#Sync
1293	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1294	membar	#Sync
1295	retl
1296	wrpr	%g0, %o5, %pstate
1297	SET_SIZE(check_ecache_line)
1298
1299	ENTRY(read_and_clear_afsr)
1300	ldxa	[%g0]ASI_AFSR, %o0
1301	retl
1302	  stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1303	SET_SIZE(read_and_clear_afsr)
1304
1305/*
1306 * scrubphys - Pass in the aligned physical memory address that you want
1307 * to scrub, along with the ecache size.
1308 *
1309 *	1) Displacement flush the E$ line corresponding to %addr.
1310 *	   The first ldxa guarantees that the %addr is no longer in
1311 *	   M, O, or E (goes to I or S (if instruction fetch also happens).
1312 *	2) "Write" the data using a CAS %addr,%g0,%g0.
1313 *	   The casxa guarantees a transition from I to M or S to M.
1314 *	3) Displacement flush the E$ line corresponding to %addr.
1315 *	   The second ldxa pushes the M line out of the ecache, into the
1316 *	   writeback buffers, on the way to memory.
1317 *	4) The "membar #Sync" pushes the cache line out of the writeback
1318 *	   buffers onto the bus, on the way to dram finally.
1319 *
1320 * This is a modified version of the algorithm suggested by Gary Lauterbach.
1321 * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
1322 * as modified, but then we found out that for spitfire, if it misses in the
1323 * E$ it will probably install as an M, but if it hits in the E$, then it
1324 * will stay E, if the store doesn't happen. So the first displacement flush
1325 * should ensure that the CAS will miss in the E$.  Arrgh.
1326 */
1327
1328	ENTRY(scrubphys)
1329	or	%o1, %g0, %o2	! put ecache size in %o2
1330#ifndef HUMMINGBIRD
1331	xor	%o0, %o2, %o1	! calculate alias address
1332	add	%o2, %o2, %o3	! 2 * ecachesize in case
1333				! addr == ecache_flushaddr
1334	sub	%o3, 1, %o3	! -1 == mask
1335	and	%o1, %o3, %o1	! and with xor'd address
1336	set	ecache_flushaddr, %o3
1337	ldx	[%o3], %o3
1338
1339	rdpr	%pstate, %o4
1340	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1341	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1342
1343	ldxa	[%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1344	casxa	[%o0]ASI_MEM, %g0, %g0
1345	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1346
1347#else /* HUMMINGBIRD */
1348	/*
1349	 * UltraSPARC-IIe processor supports both 4-way set associative
1350	 * and direct map E$. We need to reconfigure E$ to direct map
1351	 * mode for data load/store before displacement flush. Also, we
1352	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1353	 * has been flushed. Keep the interrupts disabled while flushing
1354	 * E$ in this manner.
1355	 *
1356	 * For flushing a specific physical address, we start at the
1357	 * aliased address and load at set-size stride, wrapping around
1358	 * at 2*ecache-size boundary and skipping fault physical address.
1359	 * It takes 10 loads to guarantee that the physical address has
1360	 * been flushed.
1361	 *
1362	 * Usage:
1363	 *	%o0	physaddr
1364	 *	%o5	physaddr - ecache_flushaddr
1365	 *	%g1	UPA config (restored later)
1366	 *	%g2	E$ set size
1367	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1368	 *	%g4	#loads to flush phys address
1369	 *	%g5	temp
1370	 */
1371
1372	sethi	%hi(ecache_associativity), %g5
1373	ld	[%g5 + %lo(ecache_associativity)], %g5
1374	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1375	xor	%o0, %o2, %o1	! calculate alias address
1376	add	%o2, %o2, %g3	! 2 * ecachesize in case
1377				! addr == ecache_flushaddr
1378	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1379	and	%o1, %g3, %o1	! and with xor'd address
1380	sethi	%hi(ecache_flushaddr), %o3
1381	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1382
1383	rdpr	%pstate, %o4
1384	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1385	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1386
1387	! Place E$ in direct map mode for data access
1388	or	%g0, 1, %g5
1389	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1390	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1391	or	%g1, %g5, %g5
1392	membar	#Sync
1393	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1394	membar	#Sync
1395
1396	! Displace cache line from each set of E$ starting at the
1397	! aliased address. at set-size stride, wrapping at 2*ecache_size
1398	! and skipping load from physaddr. We need 10 loads to flush the
1399	! physaddr from E$.
1400	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1401	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1402	or	%o1, %g0, %g5		! starting aliased offset
14032:
1404	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
14051:
1406	add	%g5, %g2, %g5		! calculate offset in next set
1407	and	%g5, %g3, %g5		! force offset within aliased range
1408	cmp	%g5, %o5		! skip loads from physaddr
1409	be,pn %ncc, 1b
1410	  nop
1411	brgz,pt	%g4, 2b
1412	  dec	%g4
1413
1414	casxa	[%o0]ASI_MEM, %g0, %g0
1415
1416	! Flush %o0 from ecahe again.
1417	! Need single displacement flush at offset %o1 this time as
1418	! the E$ is already in direct map mode.
1419	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1420
1421	membar	#Sync
1422	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1423	membar	#Sync
1424#endif /* HUMMINGBIRD */
1425	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1426
1427	retl
1428	membar	#Sync			! move the data out of the load buffer
1429	SET_SIZE(scrubphys)
1430
1431/*
1432 * clearphys - Pass in the aligned physical memory address that you want
1433 * to push out, as a 64 byte block of zeros, from the ecache zero-filled.
1434 * Since this routine does not bypass the ecache, it is possible that
1435 * it could generate a UE error while trying to clear the a bad line.
1436 * This routine clears and restores the error enable flag.
1437 * TBD - Hummingbird may need similar protection
1438 */
1439	ENTRY(clearphys)
1440	or	%o2, %g0, %o3	! ecache linesize
1441	or	%o1, %g0, %o2	! ecache size
1442#ifndef HUMMINGBIRD
1443	or	%o3, %g0, %o4	! save ecache linesize
1444	xor	%o0, %o2, %o1	! calculate alias address
1445	add	%o2, %o2, %o3	! 2 * ecachesize
1446	sub	%o3, 1, %o3	! -1 == mask
1447	and	%o1, %o3, %o1	! and with xor'd address
1448	set	ecache_flushaddr, %o3
1449	ldx	[%o3], %o3
1450	or	%o4, %g0, %o2	! saved ecache linesize
1451
1452	rdpr	%pstate, %o4
1453	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1454	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1455
1456	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1457	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1458	membar	#Sync
1459
1460	! need to put zeros in the cache line before displacing it
1461
1462	sub	%o2, 8, %o2	! get offset of last double word in ecache line
14631:
1464	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1465	sub	%o2, 8, %o2
1466	brgez,a,pt %o2, 1b
1467	nop
1468	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1469	casxa	[%o0]ASI_MEM, %g0, %g0
1470	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1471
1472	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1473	membar	#Sync
1474
1475#else /* HUMMINGBIRD... */
1476	/*
1477	 * UltraSPARC-IIe processor supports both 4-way set associative
1478	 * and direct map E$. We need to reconfigure E$ to direct map
1479	 * mode for data load/store before displacement flush. Also, we
1480	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1481	 * has been flushed. Keep the interrupts disabled while flushing
1482	 * E$ in this manner.
1483	 *
1484	 * For flushing a specific physical address, we start at the
1485	 * aliased address and load at set-size stride, wrapping around
1486	 * at 2*ecache-size boundary and skipping fault physical address.
1487	 * It takes 10 loads to guarantee that the physical address has
1488	 * been flushed.
1489	 *
1490	 * Usage:
1491	 *	%o0	physaddr
1492	 *	%o5	physaddr - ecache_flushaddr
1493	 *	%g1	UPA config (restored later)
1494	 *	%g2	E$ set size
1495	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1496	 *	%g4	#loads to flush phys address
1497	 *	%g5	temp
1498	 */
1499
1500	or	%o3, %g0, %o4	! save ecache linesize
1501	sethi	%hi(ecache_associativity), %g5
1502	ld	[%g5 + %lo(ecache_associativity)], %g5
1503	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1504
1505	xor	%o0, %o2, %o1	! calculate alias address
1506	add	%o2, %o2, %g3	! 2 * ecachesize
1507	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1508	and	%o1, %g3, %o1	! and with xor'd address
1509	sethi	%hi(ecache_flushaddr), %o3
1510	ldx	[%o3 +%lo(ecache_flushaddr)], %o3
1511	or	%o4, %g0, %o2	! saved ecache linesize
1512
1513	rdpr	%pstate, %o4
1514	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1515	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1516
1517	! Place E$ in direct map mode for data access
1518	or	%g0, 1, %g5
1519	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1520	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1521	or	%g1, %g5, %g5
1522	membar	#Sync
1523	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1524	membar	#Sync
1525
1526	! need to put zeros in the cache line before displacing it
1527
1528	sub	%o2, 8, %o2	! get offset of last double word in ecache line
15291:
1530	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1531	sub	%o2, 8, %o2
1532	brgez,a,pt %o2, 1b
1533	nop
1534
1535	! Displace cache line from each set of E$ starting at the
1536	! aliased address. at set-size stride, wrapping at 2*ecache_size
1537	! and skipping load from physaddr. We need 10 loads to flush the
1538	! physaddr from E$.
1539	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1540	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1541	or	%o1, %g0, %g5		! starting offset
15422:
1543	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
15443:
1545	add	%g5, %g2, %g5		! calculate offset in next set
1546	and	%g5, %g3, %g5		! force offset within aliased range
1547	cmp	%g5, %o5		! skip loads from physaddr
1548	be,pn %ncc, 3b
1549	  nop
1550	brgz,pt	%g4, 2b
1551	  dec	%g4
1552
1553	casxa	[%o0]ASI_MEM, %g0, %g0
1554
1555	! Flush %o0 from ecahe again.
1556	! Need single displacement flush at offset %o1 this time as
1557	! the E$ is already in direct map mode.
1558	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1559
1560	membar	#Sync
1561	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1562	membar	#Sync
1563#endif /* HUMMINGBIRD... */
1564
1565	retl
1566	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1567	SET_SIZE(clearphys)
1568
1569/*
1570 * flushecacheline - This is a simpler version of scrubphys
1571 * which simply does a displacement flush of the line in
1572 * question. This routine is mainly used in handling async
1573 * errors where we want to get rid of a bad line in ecache.
1574 * Note that if the line is modified and it has suffered
1575 * data corruption - we are guarantee that the hw will write
1576 * a UE back to mark the page poisoned.
1577 */
1578        ENTRY(flushecacheline)
1579        or      %o1, %g0, %o2   ! put ecache size in %o2
1580#ifndef HUMMINGBIRD
1581        xor     %o0, %o2, %o1   ! calculate alias address
1582        add     %o2, %o2, %o3   ! 2 * ecachesize in case
1583                                ! addr == ecache_flushaddr
1584        sub     %o3, 1, %o3     ! -1 == mask
1585        and     %o1, %o3, %o1   ! and with xor'd address
1586        set     ecache_flushaddr, %o3
1587        ldx     [%o3], %o3
1588
1589        rdpr    %pstate, %o4
1590        andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1591        wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1592
1593	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1594	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1595	membar	#Sync
1596
1597        ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1598	membar	#Sync
1599	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1600        membar  #Sync
1601#else /* HUMMINGBIRD */
1602	/*
1603	 * UltraSPARC-IIe processor supports both 4-way set associative
1604	 * and direct map E$. We need to reconfigure E$ to direct map
1605	 * mode for data load/store before displacement flush. Also, we
1606	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1607	 * has been flushed. Keep the interrupts disabled while flushing
1608	 * E$ in this manner.
1609	 *
1610	 * For flushing a specific physical address, we start at the
1611	 * aliased address and load at set-size stride, wrapping around
1612	 * at 2*ecache-size boundary and skipping fault physical address.
1613	 * It takes 10 loads to guarantee that the physical address has
1614	 * been flushed.
1615	 *
1616	 * Usage:
1617	 *	%o0	physaddr
1618	 *	%o5	physaddr - ecache_flushaddr
1619	 *	%g1	error enable register
1620	 *	%g2	E$ set size
1621	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1622	 *	%g4	UPA config (restored later)
1623	 *	%g5	temp
1624	 */
1625
1626	sethi	%hi(ecache_associativity), %g5
1627	ld	[%g5 + %lo(ecache_associativity)], %g5
1628	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1629	xor	%o0, %o2, %o1	! calculate alias address
1630	add	%o2, %o2, %g3	! 2 * ecachesize in case
1631				! addr == ecache_flushaddr
1632	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1633	and	%o1, %g3, %o1	! and with xor'd address
1634	sethi	%hi(ecache_flushaddr), %o3
1635	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1636
1637	rdpr	%pstate, %o4
1638	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1639	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1640
1641	! Place E$ in direct map mode for data access
1642	or	%g0, 1, %g5
1643	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1644	ldxa	[%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
1645	or	%g4, %g5, %g5
1646	membar	#Sync
1647	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1648	membar	#Sync
1649
1650	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1651	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1652	membar	#Sync
1653
1654	! Displace cache line from each set of E$ starting at the
1655	! aliased address. at set-size stride, wrapping at 2*ecache_size
1656	! and skipping load from physaddr. We need 10 loads to flush the
1657	! physaddr from E$.
1658	mov	HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
1659	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
16602:
1661	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
16623:
1663	add	%o1, %g2, %o1		! calculate offset in next set
1664	and	%o1, %g3, %o1		! force offset within aliased range
1665	cmp	%o1, %o5		! skip loads from physaddr
1666	be,pn %ncc, 3b
1667	  nop
1668	brgz,pt	%g5, 2b
1669	  dec	%g5
1670
1671	membar	#Sync
1672	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1673        membar  #Sync
1674
1675	stxa	%g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1676	membar	#Sync
1677#endif /* HUMMINGBIRD */
1678        retl
1679        wrpr    %g0, %o4, %pstate
1680        SET_SIZE(flushecacheline)
1681
1682/*
1683 * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
1684 * from the clock CPU.  It atomically increments the outstanding request
1685 * counter and, if there was not already an outstanding request,
1686 * branches to setsoftint_tl1 to enqueue an intr_vec for the given inum.
1687 */
1688
1689	! Register usage:
1690	!
1691	! Arguments:
1692	! %g1 - inum
1693	!
1694	! Internal:
1695	! %g2, %g3, %g5 - scratch
1696	! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
1697	! %g6 - setsoftint_tl1 address
1698
1699	ENTRY_NP(ecache_scrubreq_tl1)
1700	set	SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
1701	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
1702	ld	[%g4], %g2		! cpu's ec_scrub_outstanding.
1703	set	setsoftint_tl1, %g6
1704	!
1705	! no need to use atomic instructions for the following
1706	! increment - we're at tl1
1707	!
1708	add	%g2, 0x1, %g3
1709	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
1710	  st	%g3, [%g4]		! delay - store incremented counter
1711	jmp	%g6			! setsoftint_tl1(%g1) - queue intr_vec
1712	  nop
1713	! not reached
17141:
1715	retry
1716	SET_SIZE(ecache_scrubreq_tl1)
1717
1718	/*
1719         * write_ec_tag_parity(), which zero's the ecache tag,
1720         * marks the state as invalid and writes good parity to the tag.
1721         * Input %o1= 32 bit E$ index
1722         */
1723        ENTRY(write_ec_tag_parity)
1724        or      %g0, 1, %o4
1725        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1726        or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write
1727
1728        rdpr    %pstate, %o5
1729        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1730        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1731
1732        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1733        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1734        membar  #Sync
1735
1736        ba      1f
1737         nop
1738	/*
1739         * Align on the ecache boundary in order to force
1740         * ciritical code section onto the same ecache line.
1741         */
1742         .align 64
1743
17441:
1745        set     S_EC_PARITY, %o3         	! clear tag, state invalid
1746        sllx    %o3, S_ECPAR_SHIFT, %o3   	! and with good tag parity
1747        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1748        stxa    %g0, [%o4]ASI_EC_W
1749        membar  #Sync
1750
1751        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1752        membar  #Sync
1753        retl
1754        wrpr    %g0, %o5, %pstate
1755        SET_SIZE(write_ec_tag_parity)
1756
1757	/*
1758         * write_hb_ec_tag_parity(), which zero's the ecache tag,
1759         * marks the state as invalid and writes good parity to the tag.
1760         * Input %o1= 32 bit E$ index
1761         */
1762        ENTRY(write_hb_ec_tag_parity)
1763        or      %g0, 1, %o4
1764        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1765        or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write
1766
1767        rdpr    %pstate, %o5
1768        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1769        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1770
1771        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1772        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1773        membar  #Sync
1774
1775        ba      1f
1776         nop
1777	/*
1778         * Align on the ecache boundary in order to force
1779         * ciritical code section onto the same ecache line.
1780         */
1781         .align 64
17821:
1783#ifdef HUMMINGBIRD
1784        set     HB_EC_PARITY, %o3         	! clear tag, state invalid
1785        sllx    %o3, HB_ECPAR_SHIFT, %o3   	! and with good tag parity
1786#else /* !HUMMINGBIRD */
1787        set     SB_EC_PARITY, %o3         	! clear tag, state invalid
1788        sllx    %o3, SB_ECPAR_SHIFT, %o3   	! and with good tag parity
1789#endif /* !HUMMINGBIRD */
1790
1791        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1792        stxa    %g0, [%o4]ASI_EC_W
1793        membar  #Sync
1794
1795        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1796        membar  #Sync
1797        retl
1798        wrpr    %g0, %o5, %pstate
1799        SET_SIZE(write_hb_ec_tag_parity)
1800
1801#define	VIS_BLOCKSIZE		64
1802
1803	ENTRY(dtrace_blksuword32)
1804	save	%sp, -SA(MINFRAME + 4), %sp
1805
1806	rdpr	%pstate, %l1
1807	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
1808	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
1809
1810	rd	%fprs, %l0
1811	andcc	%l0, FPRS_FEF, %g0
1812	bz,a,pt	%xcc, 1f			! if the fpu is disabled
1813	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
1814
1815	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
18161:
1817	set	0f, %l5
1818        /*
1819         * We're about to write a block full or either total garbage
1820         * (not kernel data, don't worry) or user floating-point data
1821         * (so it only _looks_ like garbage).
1822         */
1823	ld	[%i1], %f0			! modify the block
1824	membar	#Sync
1825	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
1826	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
1827	membar	#Sync
1828	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
1829
1830	bz,a,pt	%xcc, 1f
1831	wr	%g0, %l0, %fprs			! restore %fprs
1832
1833	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
18341:
1835
1836	wrpr	%g0, %l1, %pstate		! restore interrupts
1837
1838	ret
1839	restore	%g0, %g0, %o0
1840
18410:
1842	membar	#Sync
1843	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
1844
1845	bz,a,pt	%xcc, 1f
1846	wr	%g0, %l0, %fprs			! restore %fprs
1847
1848	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
18491:
1850
1851	wrpr	%g0, %l1, %pstate		! restore interrupts
1852
1853	/*
1854	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
1855	 * which deals with watchpoints. Otherwise, just return -1.
1856	 */
1857	brnz,pt	%i2, 1f
1858	nop
1859	ret
1860	restore	%g0, -1, %o0
18611:
1862	call	dtrace_blksuword32_err
1863	restore
1864
1865	SET_SIZE(dtrace_blksuword32)
1866