xref: /titanic_50/usr/src/uts/sun4u/cpu/spitfire_asm.s (revision d9e728a2c2e62adeef072d782e4c8a7b34e7e8e8)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if !defined(lint)
29#include "assym.h"
30#endif	/* lint */
31
32#include <sys/asm_linkage.h>
33#include <sys/mmu.h>
34#include <vm/hat_sfmmu.h>
35#include <sys/machparam.h>
36#include <sys/machcpuvar.h>
37#include <sys/machthread.h>
38#include <sys/privregs.h>
39#include <sys/asm_linkage.h>
40#include <sys/machasi.h>
41#include <sys/trap.h>
42#include <sys/spitregs.h>
43#include <sys/xc_impl.h>
44#include <sys/intreg.h>
45#include <sys/async.h>
46
47#ifdef TRAPTRACE
48#include <sys/traptrace.h>
49#endif /* TRAPTRACE */
50
51#ifndef	lint
52
53/* BEGIN CSTYLED */
54#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
55	ldxa	[%g0]ASI_LSU, tmp1					;\
56	btst	LSU_DC, tmp1		/* is dcache enabled? */	;\
57	bz,pn	%icc, 1f						;\
58	sethi	%hi(dcache_linesize), tmp1				;\
59	ld	[tmp1 + %lo(dcache_linesize)], tmp1			;\
60	sethi	%hi(dflush_type), tmp2					;\
61	ld	[tmp2 + %lo(dflush_type)], tmp2				;\
62	cmp	tmp2, FLUSHPAGE_TYPE					;\
63	be,pt	%icc, 2f						;\
64	sllx	arg1, SF_DC_VBIT_SHIFT, arg1	/* tag to compare */	;\
65	sethi	%hi(dcache_size), tmp3					;\
66	ld	[tmp3 + %lo(dcache_size)], tmp3				;\
67	cmp	tmp2, FLUSHMATCH_TYPE					;\
68	be,pt	%icc, 3f						;\
69	nop								;\
70	/*								\
71	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
72	 * tmp3 = cache size						\
73	 * tmp1 = cache line size					\
74	 */								\
75	sub	tmp3, tmp1, tmp2					;\
764:									\
77	stxa	%g0, [tmp2]ASI_DC_TAG					;\
78	membar	#Sync							;\
79	cmp	%g0, tmp2						;\
80	bne,pt	%icc, 4b						;\
81	sub	tmp2, tmp1, tmp2					;\
82	ba,pt	%icc, 1f						;\
83	nop								;\
84	/*								\
85	 * flushtype = FLUSHPAGE_TYPE					\
86	 * arg1 = tag to compare against				\
87	 * arg2 = virtual color						\
88	 * tmp1 = cache line size					\
89	 * tmp2 = tag from cache					\
90	 * tmp3 = counter						\
91	 */								\
922:									\
93	set	MMU_PAGESIZE, tmp3					;\
94	sllx	arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */	;\
95	sub	tmp3, tmp1, tmp3					;\
964:									\
97	ldxa	[arg2 + tmp3]ASI_DC_TAG, tmp2	/* read tag */		;\
98	btst	SF_DC_VBIT_MASK, tmp2					;\
99	bz,pn	%icc, 5f	  /* branch if no valid sub-blocks */	;\
100	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
101	cmp	tmp2, arg1						;\
102	bne,pn	%icc, 5f			/* br if tag miss */	;\
103	nop								;\
104	stxa	%g0, [arg2 + tmp3]ASI_DC_TAG				;\
105	membar	#Sync							;\
1065:									\
107	cmp	%g0, tmp3						;\
108	bnz,pt	%icc, 4b		/* branch if not done */	;\
109	sub	tmp3, tmp1, tmp3					;\
110	ba,pt	%icc, 1f						;\
111	nop								;\
112	/*								\
113	 * flushtype = FLUSHMATCH_TYPE					\
114	 * arg1 = tag to compare against				\
115	 * tmp1 = cache line size					\
116	 * tmp3 = cache size						\
117	 * arg2 = counter						\
118	 * tmp2 = cache tag						\
119	 */								\
1203:									\
121	sub	tmp3, tmp1, arg2					;\
1224:									\
123	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
124	btst	SF_DC_VBIT_MASK, tmp2					;\
125	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
126	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
127	cmp	tmp2, arg1						;\
128	bne,pn	%icc, 5f		/* branch if tag miss */	;\
129	nop								;\
130	stxa	%g0, [arg2]ASI_DC_TAG					;\
131	membar	#Sync							;\
1325:									\
133	cmp	%g0, arg2						;\
134	bne,pt	%icc, 4b		/* branch if not done */	;\
135	sub	arg2, tmp1, arg2					;\
1361:
137
138/*
139 * macro that flushes the entire dcache color
140 */
141#define	DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)				\
142	ldxa	[%g0]ASI_LSU, tmp1;					\
143	btst	LSU_DC, tmp1;		/* is dcache enabled? */	\
144	bz,pn	%icc, 1f;						\
145	sethi	%hi(dcache_linesize), tmp1;				\
146	ld	[tmp1 + %lo(dcache_linesize)], tmp1;			\
147	set	MMU_PAGESIZE, tmp2;					\
148	/*								\
149	 * arg = virtual color						\
150	 * tmp2 = page size						\
151	 * tmp1 = cache line size					\
152	 */								\
153	sllx	arg, MMU_PAGESHIFT, arg; /* color to dcache page */	\
154	sub	tmp2, tmp1, tmp2;					\
1552:									\
156	stxa	%g0, [arg + tmp2]ASI_DC_TAG;				\
157	membar	#Sync;							\
158	cmp	%g0, tmp2;						\
159	bne,pt	%icc, 2b;						\
160	sub	tmp2, tmp1, tmp2;					\
1611:
162
163/*
164 * macro that flushes the entire dcache
165 */
166#define	DCACHE_FLUSHALL(size, linesize, tmp)				\
167	ldxa	[%g0]ASI_LSU, tmp;					\
168	btst	LSU_DC, tmp;		/* is dcache enabled? */	\
169	bz,pn	%icc, 1f;						\
170									\
171	sub	size, linesize, tmp;					\
1722:									\
173	stxa	%g0, [tmp]ASI_DC_TAG;					\
174	membar	#Sync;							\
175	cmp	%g0, tmp;						\
176	bne,pt	%icc, 2b;						\
177	sub	tmp, linesize, tmp;					\
1781:
179
180/*
181 * macro that flushes the entire icache
182 */
183#define	ICACHE_FLUSHALL(size, linesize, tmp)				\
184	ldxa	[%g0]ASI_LSU, tmp;					\
185	btst	LSU_IC, tmp;						\
186	bz,pn	%icc, 1f;						\
187									\
188	sub	size, linesize, tmp;					\
1892:									\
190	stxa	%g0, [tmp]ASI_IC_TAG;					\
191	membar	#Sync;							\
192	cmp	%g0, tmp;						\
193	bne,pt	%icc, 2b;						\
194	sub	tmp, linesize, tmp;					\
1951:
196
197#ifdef SF_ERRATA_32
198#define SF_WORKAROUND(tmp1, tmp2)                               \
199        sethi   %hi(FLUSH_ADDR), tmp2                           ;\
200        set     MMU_PCONTEXT, tmp1                              ;\
201        stxa    %g0, [tmp1]ASI_DMMU                             ;\
202        flush   tmp2                                            ;
203#else
204#define SF_WORKAROUND(tmp1, tmp2)
205#endif /* SF_ERRATA_32 */
206
207/*
208 * arg1 = vaddr
209 * arg2 = ctxnum
210 *      - disable interrupts and clear address mask
211 *        to access 64 bit physaddr
212 *      - Blow out the TLB, flush user page.
213 *        . use secondary context.
214 */
215#define VTAG_FLUSHUPAGE(lbl, arg1, arg2, tmp1, tmp2, tmp3, tmp4) \
216        rdpr    %pstate, tmp1                                   ;\
217        andn    tmp1, PSTATE_IE, tmp2				;\
218        wrpr    tmp2, 0, %pstate                                ;\
219        sethi   %hi(FLUSH_ADDR), tmp2                           ;\
220        set     MMU_SCONTEXT, tmp3                              ;\
221        ldxa    [tmp3]ASI_DMMU, tmp4                            ;\
222        or      DEMAP_SECOND | DEMAP_PAGE_TYPE, arg1, arg1      ;\
223        cmp     tmp4, arg2                                      ;\
224        be,a,pt %icc, lbl/**/4                                  ;\
225          nop                                                   ;\
226        stxa    arg2, [tmp3]ASI_DMMU                            ;\
227lbl/**/4:                                                       ;\
228        stxa    %g0, [arg1]ASI_DTLB_DEMAP                       ;\
229        stxa    %g0, [arg1]ASI_ITLB_DEMAP                       ;\
230        flush   tmp2                                            ;\
231        be,a,pt %icc, lbl/**/5                                  ;\
232          nop                                                   ;\
233        stxa    tmp4, [tmp3]ASI_DMMU                            ;\
234        flush   tmp2                                            ;\
235lbl/**/5:                                                       ;\
236        wrpr    %g0, tmp1, %pstate
237
238
239/*
240 * macro that flushes all the user entries in dtlb
241 * arg1 = dtlb entries
242 *	- Before first compare:
243 *              tmp4 = tte
244 *              tmp5 = vaddr
245 *              tmp6 = cntxnum
246 */
247#define DTLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
248                                tmp4, tmp5, tmp6) \
249lbl/**/0:                                                       ;\
250        sllx    arg1, 3, tmp3                                   ;\
251        SF_WORKAROUND(tmp1, tmp2)                               ;\
252        ldxa    [tmp3]ASI_DTLB_ACCESS, tmp4                     ;\
253        srlx    tmp4, 6, tmp4                                   ;\
254        andcc   tmp4, 1, %g0                                    ;\
255        bnz,pn  %xcc, lbl/**/1                                  ;\
256        srlx    tmp4, 57, tmp4                                  ;\
257        andcc   tmp4, 1, %g0                                    ;\
258        beq,pn  %xcc, lbl/**/1                                  ;\
259          nop                                                   ;\
260        set     TAGREAD_CTX_MASK, tmp1                          ;\
261        ldxa    [tmp3]ASI_DTLB_TAGREAD, tmp2                    ;\
262        and     tmp2, tmp1, tmp6                                ;\
263        andn    tmp2, tmp1, tmp5                                ;\
264	set	KCONTEXT, tmp4					;\
265	cmp	tmp6, tmp4					;\
266	be	lbl/**/1					;\
267	  nop							;\
268        VTAG_FLUSHUPAGE(VD/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
269lbl/**/1:                                                       ;\
270        brgz,pt arg1, lbl/**/0                                  ;\
271          sub     arg1, 1, arg1
272
273
274/*
275 * macro that flushes all the user entries in itlb
276 * arg1 = itlb entries
277 *      - Before first compare:
278 *              tmp4 = tte
279 *              tmp5 = vaddr
280 *              tmp6 = cntxnum
281 */
282#define ITLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
283                                tmp4, tmp5, tmp6) \
284lbl/**/0:                                                       ;\
285        sllx    arg1, 3, tmp3                                   ;\
286        SF_WORKAROUND(tmp1, tmp2)                               ;\
287        ldxa    [tmp3]ASI_ITLB_ACCESS, tmp4                     ;\
288        srlx    tmp4, 6, tmp4                                   ;\
289        andcc   tmp4, 1, %g0                                    ;\
290        bnz,pn  %xcc, lbl/**/1                                  ;\
291        srlx    tmp4, 57, tmp4                                  ;\
292        andcc   tmp4, 1, %g0                                    ;\
293        beq,pn  %xcc, lbl/**/1                                  ;\
294          nop                                                   ;\
295        set     TAGREAD_CTX_MASK, tmp1                          ;\
296        ldxa    [tmp3]ASI_ITLB_TAGREAD, tmp2                    ;\
297        and     tmp2, tmp1, tmp6                                ;\
298        andn    tmp2, tmp1, tmp5                                ;\
299	set	KCONTEXT, tmp4					;\
300	cmp	tmp6, tmp4					;\
301	be	lbl/**/1					;\
302	  nop							;\
303        VTAG_FLUSHUPAGE(VI/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
304lbl/**/1:                                                       ;\
305        brgz,pt arg1, lbl/**/0                                  ;\
306        sub     arg1, 1, arg1
307
308
309
310/*
311 * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
312 * ptr is in the machcpu structure.
313 * r_or_s:	Register or symbol off offset from 'cpu_private' ptr.
314 * scr1:	Scratch, ptr is returned in this register.
315 * scr2:	Scratch
316 */
317#define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)		\
318	CPU_ADDR(scr1, scr2);						\
319	ldn	[scr1 + CPU_PRIVATE], scr1; 				\
320	cmp	scr1, 0; 						\
321	be	label;							\
322	 nop; 								\
323	add	scr1, r_or_s, scr1;  					\
324
325#ifdef HUMMINGBIRD
326/*
327 * UltraSPARC-IIe processor supports both 4-way set associative and
328 * direct map E$. For performance reasons, we flush E$ by placing it
329 * in direct map mode for data load/store and restore the state after
330 * we are done flushing it. Keep interrupts off while flushing in this
331 * manner.
332 *
333 * We flush the entire ecache by starting at one end and loading each
334 * successive ecache line for the 2*ecache-size range. We have to repeat
335 * the flush operation to guarantee that the entire ecache has been
336 * flushed.
337 *
338 * For flushing a specific physical address, we start at the aliased
339 * address and load at set-size stride, wrapping around at 2*ecache-size
340 * boundary and skipping the physical address being flushed. It takes
341 * 10 loads to guarantee that the physical address has been flushed.
342 */
343
344#define	HB_ECACHE_FLUSH_CNT	2
345#define	HB_PHYS_FLUSH_CNT	10	/* #loads to flush specific paddr */
346#endif /* HUMMINGBIRD */
347
348/* END CSTYLED */
349
350#endif	/* !lint */
351
352/*
353 * Spitfire MMU and Cache operations.
354 */
355
356#if defined(lint)
357
358/*ARGSUSED*/
359void
360vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
361{}
362
363/*ARGSUSED*/
364void
365vtag_flushall(void)
366{}
367
368/*ARGSUSED*/
369void
370vtag_flushall_uctxs(void)
371{}
372
373/*ARGSUSED*/
374void
375vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
376{}
377
378/*ARGSUSED*/
379void
380vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
381{}
382
383/*ARGSUSED*/
384void
385vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
386{}
387
388/*ARGSUSED*/
389void
390vac_flushpage(pfn_t pfnum, int vcolor)
391{}
392
393/*ARGSUSED*/
394void
395vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
396{}
397
398/*ARGSUSED*/
399void
400init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
401{}
402
403/*ARGSUSED*/
404void
405init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
406{}
407
408/*ARGSUSED*/
409void
410flush_instr_mem(caddr_t vaddr, size_t len)
411{}
412
413/*ARGSUSED*/
414void
415flush_ecache(uint64_t physaddr, size_t size, size_t linesize)
416{}
417
418/*ARGSUSED*/
419void
420get_ecache_dtag(uint32_t ecache_idx, uint64_t *ecache_data,
421		uint64_t *ecache_tag, uint64_t *oafsr, uint64_t *acc_afsr)
422{}
423
424/* ARGSUSED */
425uint64_t
426get_ecache_tag(uint32_t id, uint64_t *nafsr, uint64_t *acc_afsr)
427{
428	return ((uint64_t)0);
429}
430
431/* ARGSUSED */
432uint64_t
433check_ecache_line(uint32_t id, uint64_t *acc_afsr)
434{
435	return ((uint64_t)0);
436}
437
438/*ARGSUSED*/
439void
440kdi_flush_idcache(int dcache_size, int dcache_lsize,
441    int icache_size, int icache_lsize)
442{}
443
444#else	/* lint */
445
446	ENTRY_NP(vtag_flushpage)
447	/*
448	 * flush page from the tlb
449	 *
450	 * %o0 = vaddr
451	 * %o1 = sfmmup
452	 */
453	rdpr	%pstate, %o5
454#ifdef DEBUG
455	PANIC_IF_INTR_DISABLED_PSTR(%o5, sfdi_label1, %g1)
456#endif /* DEBUG */
457	/*
458	 * disable ints
459	 */
460	andn	%o5, PSTATE_IE, %o4
461	wrpr	%o4, 0, %pstate
462
463	/*
464	 * Then, blow out the tlb
465	 * Interrupts are disabled to prevent the secondary ctx register
466	 * from changing underneath us.
467	 */
468	sethi   %hi(ksfmmup), %o3
469        ldx     [%o3 + %lo(ksfmmup)], %o3
470        cmp     %o3, %o1
471        bne,pt   %xcc, 1f			! if not kernel as, go to 1
472	  sethi	%hi(FLUSH_ADDR), %o3
473	/*
474	 * For KCONTEXT demaps use primary. type = page implicitly
475	 */
476	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
477	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
478	flush	%o3
479	b	5f
480	  nop
4811:
482	/*
483	 * User demap.  We need to set the secondary context properly.
484	 * %o0 = vaddr
485	 * %o1 = sfmmup
486	 * %o3 = FLUSH_ADDR
487	 */
488	SFMMU_CPU_CNUM(%o1, %g1, %g2)	/* %g1 = sfmmu cnum on this CPU */
489
490	set	MMU_SCONTEXT, %o4
491	ldxa	[%o4]ASI_DMMU, %o2		/* rd old ctxnum */
492	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
493	cmp	%o2, %g1
494	be,pt	%icc, 4f
495	  nop
496	stxa	%g1, [%o4]ASI_DMMU		/* wr new ctxum */
4974:
498	stxa	%g0, [%o0]ASI_DTLB_DEMAP
499	stxa	%g0, [%o0]ASI_ITLB_DEMAP
500	flush	%o3
501	be,pt	%icc, 5f
502	  nop
503	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
504	flush	%o3
5055:
506	retl
507	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
508	SET_SIZE(vtag_flushpage)
509
510        .seg    ".text"
511.flushallmsg:
512        .asciz  "sfmmu_asm: unimplemented flush operation"
513
514        ENTRY_NP(vtag_flushall)
515        sethi   %hi(.flushallmsg), %o0
516        call    panic
517          or    %o0, %lo(.flushallmsg), %o0
518        SET_SIZE(vtag_flushall)
519
520	ENTRY_NP(vtag_flushall_uctxs)
521	/*
522	 * flush entire DTLB/ITLB.
523	 */
524	CPU_INDEX(%g1, %g2)
525	mulx	%g1, CPU_NODE_SIZE, %g1
526	set	cpunodes, %g2
527	add	%g1, %g2, %g1
528	lduh	[%g1 + ITLB_SIZE], %g2		! %g2 = # entries in ITLB
529	lduh	[%g1 + DTLB_SIZE], %g1		! %g1 = # entries in DTLB
530	sub	%g2, 1, %g2			! %g2 = # entries in ITLB - 1
531	sub	%g1, 1, %g1			! %g1 = # entries in DTLB - 1
532
533        !
534        ! Flush itlb's
535        !
536        ITLB_FLUSH_UNLOCKED_UCTXS(I, %g2, %g3, %g4, %o2, %o3, %o4, %o5)
537
538	!
539        ! Flush dtlb's
540        !
541        DTLB_FLUSH_UNLOCKED_UCTXS(D, %g1, %g3, %g4, %o2, %o3, %o4, %o5)
542
543	membar  #Sync
544	retl
545	  nop
546
547	SET_SIZE(vtag_flushall_uctxs)
548
549	ENTRY_NP(vtag_flushpage_tl1)
550	/*
551	 * x-trap to flush page from tlb and tsb
552	 *
553	 * %g1 = vaddr, zero-extended on 32-bit kernel
554	 * %g2 = sfmmup
555	 *
556	 * assumes TSBE_TAG = 0
557	 */
558	srln	%g1, MMU_PAGESHIFT, %g1
559	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
560
561	SFMMU_CPU_CNUM(%g2, %g3, %g4)   /* %g3 = sfmmu cnum on this CPU */
562
563	/* We need to set the secondary context properly. */
564	set	MMU_SCONTEXT, %g4
565	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
566	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
567	stxa	%g3, [%g4]ASI_DMMU		/* wr new ctxum */
568	stxa	%g0, [%g1]ASI_DTLB_DEMAP
569	stxa	%g0, [%g1]ASI_ITLB_DEMAP
570	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
571	membar #Sync
572	retry
573	SET_SIZE(vtag_flushpage_tl1)
574
575	ENTRY_NP(vtag_flush_pgcnt_tl1)
576	/*
577	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
578	 *
579	 * %g1 = vaddr, zero-extended on 32-bit kernel
580	 * %g2 = <sfmmup58 | pgcnt6>
581	 *
582	 * NOTE: this handler relies on the fact that no
583	 *	interrupts or traps can occur during the loop
584	 *	issuing the TLB_DEMAP operations. It is assumed
585	 *	that interrupts are disabled and this code is
586	 *	fetching from the kernel locked text address.
587	 *
588	 * assumes TSBE_TAG = 0
589	 */
590	srln	%g1, MMU_PAGESHIFT, %g1
591	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
592	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
593
594	set	SFMMU_PGCNT_MASK, %g4
595	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
596	add	%g3, 1, %g3			/* g3 = pgcnt */
597
598	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
599
600	SFMMU_CPU_CNUM(%g2, %g5, %g6)   ! %g5 = sfmmu cnum on this CPU
601
602	/* We need to set the secondary context properly. */
603	set	MMU_SCONTEXT, %g4
604	ldxa	[%g4]ASI_DMMU, %g6		/* read old ctxnum */
605	stxa	%g5, [%g4]ASI_DMMU		/* write new ctxum */
606
607	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
608	sethi	 %hi(FLUSH_ADDR), %g5
6091:
610	stxa	%g0, [%g1]ASI_DTLB_DEMAP
611	stxa	%g0, [%g1]ASI_ITLB_DEMAP
612	flush	%g5
613	deccc	%g3				/* decr pgcnt */
614	bnz,pt	%icc,1b
615	  add	%g1, %g2, %g1			/* go to nextpage */
616
617	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
618	membar #Sync
619	retry
620	SET_SIZE(vtag_flush_pgcnt_tl1)
621
622	! Not implemented on US1/US2
623	ENTRY_NP(vtag_flushall_tl1)
624	retry
625	SET_SIZE(vtag_flushall_tl1)
626
627/*
628 * vac_flushpage(pfnum, color)
629 *	Flush 1 8k page of the D-$ with physical page = pfnum
630 *	Algorithm:
631 *		The spitfire dcache is a 16k direct mapped virtual indexed,
632 *		physically tagged cache.  Given the pfnum we read all cache
633 *		lines for the corresponding page in the cache (determined by
634 *		the color).  Each cache line is compared with
635 *		the tag created from the pfnum. If the tags match we flush
636 *		the line.
637 */
638	.seg	".data"
639	.align	8
640	.global	dflush_type
641dflush_type:
642	.word	FLUSHPAGE_TYPE
643	.seg	".text"
644
645	ENTRY(vac_flushpage)
646	/*
647	 * flush page from the d$
648	 *
649	 * %o0 = pfnum, %o1 = color
650	 */
651	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
652	retl
653	nop
654	SET_SIZE(vac_flushpage)
655
656	ENTRY_NP(vac_flushpage_tl1)
657	/*
658	 * x-trap to flush page from the d$
659	 *
660	 * %g1 = pfnum, %g2 = color
661	 */
662	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
663	retry
664	SET_SIZE(vac_flushpage_tl1)
665
666	ENTRY(vac_flushcolor)
667	/*
668	 * %o0 = vcolor
669	 */
670	DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
671	retl
672	  nop
673	SET_SIZE(vac_flushcolor)
674
675	ENTRY(vac_flushcolor_tl1)
676	/*
677	 * %g1 = vcolor
678	 */
679	DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
680	retry
681	SET_SIZE(vac_flushcolor_tl1)
682
683
684	.global _dispatch_status_busy
685_dispatch_status_busy:
686	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
687	.align	4
688
689/*
690 * Determine whether or not the IDSR is busy.
691 * Entry: no arguments
692 * Returns: 1 if busy, 0 otherwise
693 */
694	ENTRY(idsr_busy)
695	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
696	clr	%o0
697	btst	IDSR_BUSY, %g1
698	bz,a,pt	%xcc, 1f
699	mov	1, %o0
7001:
701	retl
702	nop
703	SET_SIZE(idsr_busy)
704
705/*
706 * Setup interrupt dispatch data registers
707 * Entry:
708 *	%o0 - function or inumber to call
709 *	%o1, %o2 - arguments (2 uint64_t's)
710 */
711	.seg "text"
712
713	ENTRY(init_mondo)
714#ifdef DEBUG
715	!
716	! IDSR should not be busy at the moment
717	!
718	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
719	btst	IDSR_BUSY, %g1
720	bz,pt	%xcc, 1f
721	nop
722
723	sethi	%hi(_dispatch_status_busy), %o0
724	call	panic
725	or	%o0, %lo(_dispatch_status_busy), %o0
726#endif /* DEBUG */
727
728	ALTENTRY(init_mondo_nocheck)
729	!
730	! interrupt vector dispach data reg 0
731	!
7321:
733	mov	IDDR_0, %g1
734	mov	IDDR_1, %g2
735	mov	IDDR_2, %g3
736	stxa	%o0, [%g1]ASI_INTR_DISPATCH
737
738	!
739	! interrupt vector dispach data reg 1
740	!
741	stxa	%o1, [%g2]ASI_INTR_DISPATCH
742
743	!
744	! interrupt vector dispach data reg 2
745	!
746	stxa	%o2, [%g3]ASI_INTR_DISPATCH
747
748	retl
749	membar	#Sync			! allowed to be in the delay slot
750	SET_SIZE(init_mondo)
751
752/*
753 * Ship mondo to upaid
754 */
755	ENTRY_NP(shipit)
756	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = upa id
757	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
758	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
759#if defined(SF_ERRATA_54)
760	membar	#Sync				! store must occur before load
761	mov	0x20, %g3			! UDBH Control Register Read
762	ldxa	[%g3]ASI_SDB_INTR_R, %g0
763#endif
764	retl
765	membar	#Sync
766	SET_SIZE(shipit)
767
768
769/*
770 * flush_instr_mem:
771 *	Flush a portion of the I-$ starting at vaddr
772 * 	%o0 vaddr
773 *	%o1 bytes to be flushed
774 */
775
776	ENTRY(flush_instr_mem)
777	membar	#StoreStore				! Ensure the stores
778							! are globally visible
7791:
780	flush	%o0
781	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
782	bgu,pt	%ncc, 1b
783	add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
784
785	retl
786	nop
787	SET_SIZE(flush_instr_mem)
788
789/*
790 * flush_ecache:
791 * Flush the entire e$ using displacement flush by reading through a
792 * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
793 * reading this physical address range so that data doesn't go to d$.
794 * incoming arguments:
795 *	%o0 - 64 bit physical address
796 *	%o1 - size of address range to read
797 *	%o2 - ecache linesize
798 */
799	ENTRY(flush_ecache)
800#ifndef HUMMINGBIRD
801	b	2f
802	  nop
8031:
804	ldxa	[%o0 + %o1]ASI_MEM, %g0	! start reading from physaddr + size
8052:
806	subcc	%o1, %o2, %o1
807	bcc,a,pt %ncc, 1b
808	  nop
809
810#else /* HUMMINGBIRD */
811	/*
812	 * UltraSPARC-IIe processor supports both 4-way set associative
813	 * and direct map E$. For performance reasons, we flush E$ by
814	 * placing it in direct map mode for data load/store and restore
815	 * the state after we are done flushing it. It takes 2 iterations
816	 * to guarantee that the entire ecache has been flushed.
817	 *
818	 * Keep the interrupts disabled while flushing E$ in this manner.
819	 */
820	rdpr	%pstate, %g4		! current pstate (restored later)
821	andn	%g4, PSTATE_IE, %g5
822	wrpr	%g0, %g5, %pstate	! disable interrupts
823
824	! Place E$ in direct map mode for data access
825	or	%g0, 1, %g5
826	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
827	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
828	or	%g1, %g5, %g5
829	membar	#Sync
830	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
831	membar	#Sync
832
833	! flush entire ecache HB_ECACHE_FLUSH_CNT times
834	mov	HB_ECACHE_FLUSH_CNT-1, %g5
8352:
836	sub	%o1, %o2, %g3		! start from last entry
8371:
838	ldxa	[%o0 + %g3]ASI_MEM, %g0	! start reading from physaddr + size
839	subcc	%g3, %o2, %g3
840	bgeu,a,pt %ncc, 1b
841	  nop
842	brgz,a,pt %g5, 2b
843	  dec	%g5
844
845	membar	#Sync
846	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
847	membar	#Sync
848	wrpr	%g0, %g4, %pstate	! restore earlier pstate
849#endif /* HUMMINGBIRD */
850
851	retl
852	nop
853	SET_SIZE(flush_ecache)
854
855/*
856 * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
857 *			int icache_size, int icache_linesize)
858 */
859	ENTRY(kdi_flush_idcache)
860	DCACHE_FLUSHALL(%o0, %o1, %g1)
861	ICACHE_FLUSHALL(%o2, %o3, %g1)
862	membar	#Sync
863	retl
864	nop
865	SET_SIZE(kdi_flush_idcache)
866
867
868/*
869 * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
870 * 			uint64_t *oafsr, uint64_t *acc_afsr)
871 *
872 * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
873 * on a 64-byte boundary.  The corresponding AFSR value is also read for each
874 * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
875 * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
876 * data and tag should be atomic to make sense.  We will be executing at PIL15
877 * and will disable IE, so nothing can occur between the two reads.  We also
878 * assume that the execution of this code does not interfere with what we are
879 * reading - not really possible, but we'll live with it for now.
880 * We also pass the old AFSR value before clearing it, and caller will take
881 * appropriate actions if the important bits are non-zero.
882 *
883 * If the caller wishes to track the AFSR in cases where the CP bit is
884 * set, an address should be passed in for acc_afsr.  Otherwise, this
885 * argument may be null.
886 *
887 * Register Usage:
888 * i0: In: 32-bit e$ index
889 * i1: In: addr of e$ data
890 * i2: In: addr of e$ tag
891 * i3: In: addr of old afsr
892 * i4: In: addr of accumulated afsr - may be null
893 */
894	ENTRY(get_ecache_dtag)
895	save	%sp, -SA(MINFRAME), %sp
896	or	%g0, 1, %l4
897	sllx	%l4, 39, %l4	! set bit 39 for e$ data access
898	or	%i0, %l4, %g6	! %g6 = e$ addr for data read
899	sllx	%l4, 1, %l4	! set bit 40 for e$ tag access
900	or	%i0, %l4, %l4	! %l4 = e$ addr for tag read
901
902	rdpr    %pstate, %i5
903	andn    %i5, PSTATE_IE | PSTATE_AM, %i0
904	wrpr    %i0, %g0, %pstate       ! clear IE, AM bits
905
906	ldxa    [%g0]ASI_ESTATE_ERR, %g1
907	stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
908	membar  #Sync
909
910	ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
911	stx     %i0, [%i3]		! write back the old-afsr
912
913	ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
914	ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
915	stx     %i0, [%i2]              ! write back tag result
916
917	clr	%i2			! loop count
918
919	brz	%i4, 1f			! acc_afsr == NULL?
920	  ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
921	srlx	%i0, P_AFSR_CP_SHIFT, %l0
922	btst	1, %l0
923	bz	1f
924	  nop
925	ldx	[%i4], %g4
926	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
927	stx	%g4, [%i4]
9281:
929	stxa    %i0, [%g0]ASI_AFSR	! clear AFSR
930	membar  #Sync
931	ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
932	stx     %i0, [%i1]              ! save the E$data
933	add     %g6, 8, %g6
934	add     %i1, 8, %i1
935	ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
936	srlx	%i0, P_AFSR_CP_SHIFT, %l0
937	btst	1, %l0
938	bz	2f
939	  stx     %i0, [%i1]		! save the AFSR
940
941	brz	%i4, 2f			! acc_afsr == NULL?
942	  nop
943	ldx	[%i4], %g4
944	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
945	stx	%g4, [%i4]
9462:
947	add     %i2, 8, %i2
948	cmp     %i2, 64
949	bl,a    1b
950	  add     %i1, 8, %i1
951	stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
952	membar  #Sync
953	stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
954	membar  #Sync
955	wrpr    %g0, %i5, %pstate
956	ret
957	  restore
958	SET_SIZE(get_ecache_dtag)
959#endif /* lint */
960
961#if defined(lint)
962/*
963 * The ce_err function handles trap type 0x63 (corrected_ECC_error) at tl=0.
964 * Steps: 1. GET AFSR  2. Get AFAR <40:4> 3. Get datapath error status
965 *	  4. Clear datapath error bit(s) 5. Clear AFSR error bit
966 *	  6. package data in %g2 and %g3 7. call cpu_ce_error vis sys_trap
967 * %g2: [ 52:43 UDB lower | 42:33 UDB upper | 32:0 afsr ] - arg #3/arg #1
968 * %g3: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
969 */
970void
971ce_err(void)
972{}
973
974void
975ce_err_tl1(void)
976{}
977
978
979/*
980 * The async_err function handles trap types 0x0A (instruction_access_error)
981 * and 0x32 (data_access_error) at TL = 0 and TL > 0.  When we branch here,
982 * %g5 will have the trap type (with 0x200 set if we're at TL > 0).
983 *
984 * Steps: 1. Get AFSR 2. Get AFAR <40:4> 3. If not UE error skip UDP registers.
985 *	  4. Else get and clear datapath error bit(s) 4. Clear AFSR error bits
986 *	  6. package data in %g2 and %g3 7. disable all cpu errors, because
987 *	  trap is likely to be fatal 8. call cpu_async_error vis sys_trap
988 *
989 * %g3: [ 63:53 tt | 52:43 UDB_L | 42:33 UDB_U | 32:0 afsr ] - arg #3/arg #1
990 * %g2: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
991 */
992void
993async_err(void)
994{}
995
996/*
997 * The clr_datapath function clears any error bits set in the UDB regs.
998 */
999void
1000clr_datapath(void)
1001{}
1002
1003/*
1004 * The get_udb_errors() function gets the current value of the
1005 * Datapath Error Registers.
1006 */
1007/*ARGSUSED*/
1008void
1009get_udb_errors(uint64_t *udbh, uint64_t *udbl)
1010{
1011	*udbh = 0;
1012	*udbl = 0;
1013}
1014
1015#else 	/* lint */
1016
1017	ENTRY_NP(ce_err)
1018	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
1019
1020	!
1021	! Check for a UE... From Kevin.Normoyle:
1022	! We try to switch to the trap for the UE, but since that's
1023	! a hardware pipeline, we might get to the CE trap before we
1024	! can switch. The UDB and AFSR registers will have both the
1025	! UE and CE bits set but the UDB syndrome and the AFAR will be
1026	! for the UE.
1027	!
1028	or	%g0, 1, %g1		! put 1 in g1
1029	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
1030	andcc	%g1, %g3, %g0		! check for UE in afsr
1031	bnz	async_err		! handle the UE, not the CE
1032	  or	%g0, 0x63, %g5		! pass along the CE ttype
1033	!
1034	! Disable further CE traps to avoid recursion (stack overflow)
1035	! and staying above XCALL_PIL for extended periods.
1036	!
1037	ldxa	[%g0]ASI_ESTATE_ERR, %g2
1038	andn	%g2, 0x1, %g2		! clear bit 0 - CEEN
1039	stxa	%g2, [%g0]ASI_ESTATE_ERR
1040	membar	#Sync			! required
1041	!
1042	! handle the CE
1043	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
1044
1045	set	P_DER_H, %g4		! put P_DER_H in g4
1046	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1047	or	%g0, 1, %g6		! put 1 in g6
1048	sllx	%g6, 8, %g6		! shift g6 to <8> sdb CE
1049	andcc	%g5, %g6, %g1		! check for CE in upper half
1050	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1051	or	%g3, %g5, %g3		! or with afsr bits
1052	bz,a	1f			! no error, goto 1f
1053	  nop
1054	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
1055	membar	#Sync			! membar sync required
10561:
1057	set	P_DER_L, %g4		! put P_DER_L in g4
1058	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
1059	andcc	%g5, %g6, %g1		! check for CE in lower half
1060	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1061	or	%g3, %g5, %g3		! or with afsr bits
1062	bz,a	2f			! no error, goto 2f
1063	  nop
1064	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
1065	membar	#Sync			! membar sync required
10662:
1067	or	%g0, 1, %g4		! put 1 in g4
1068	sllx	%g4, 20, %g4		! shift left to <20> afsr CE
1069	stxa	%g4, [%g0]ASI_AFSR	! use g4 to clear afsr CE error
1070	membar	#Sync			! membar sync required
1071
1072	set	cpu_ce_error, %g1	! put *cpu_ce_error() in g1
1073	rdpr	%pil, %g6		! read pil into %g6
1074	subcc	%g6, PIL_15, %g0
1075	  movneg	%icc, PIL_14, %g4 ! run at pil 14 unless already at 15
1076	sethi	%hi(sys_trap), %g5
1077	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1078	  movge	%icc, PIL_15, %g4	! already at pil 15
1079	SET_SIZE(ce_err)
1080
1081	ENTRY_NP(ce_err_tl1)
1082#ifndef	TRAPTRACE
1083	ldxa	[%g0]ASI_AFSR, %g7
1084	stxa	%g7, [%g0]ASI_AFSR
1085	membar	#Sync
1086	retry
1087#else
1088	set	ce_trap_tl1, %g1
1089	sethi	%hi(dis_err_panic1), %g4
1090	jmp	%g4 + %lo(dis_err_panic1)
1091	nop
1092#endif
1093	SET_SIZE(ce_err_tl1)
1094
1095#ifdef	TRAPTRACE
1096.celevel1msg:
1097	.asciz	"Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";
1098
1099	ENTRY_NP(ce_trap_tl1)
1100	! upper 32 bits of AFSR already in o3
1101	mov	%o4, %o0		! save AFAR upper 32 bits
1102	mov	%o2, %o4		! lower 32 bits of AFSR
1103	mov	%o1, %o2		! lower 32 bits of AFAR
1104	mov	%o0, %o1		! upper 32 bits of AFAR
1105	set	.celevel1msg, %o0
1106	call	panic
1107	nop
1108	SET_SIZE(ce_trap_tl1)
1109#endif
1110
1111	!
1112	! async_err is the assembly glue code to get us from the actual trap
1113	! into the CPU module's C error handler.  Note that we also branch
1114	! here from ce_err() above.
1115	!
1116	ENTRY_NP(async_err)
1117	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
1118	membar	#Sync			! membar sync required
1119
1120	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
1121	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
1122
1123	sllx	%g5, 53, %g5		! move ttype to <63:53>
1124	or	%g3, %g5, %g3		! or to afsr in g3
1125
1126	or	%g0, 1, %g1		! put 1 in g1
1127	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
1128	andcc	%g1, %g3, %g0		! check for UE in afsr
1129	bz,a,pn %icc, 2f		! if !UE skip sdb read/clear
1130	  nop
1131
1132	set	P_DER_H, %g4		! put P_DER_H in g4
1133	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
1134	or	%g0, 1, %g6		! put 1 in g6
1135	sllx	%g6, 9, %g6		! shift g6 to <9> sdb UE
1136	andcc	%g5, %g6, %g1		! check for UE in upper half
1137	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1138	or	%g3, %g5, %g3		! or with afsr bits
1139	bz,a	1f			! no error, goto 1f
1140	  nop
1141	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1142	membar	#Sync			! membar sync required
11431:
1144	set	P_DER_L, %g4		! put P_DER_L in g4
1145	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1146	andcc	%g5, %g6, %g1		! check for UE in lower half
1147	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1148	or	%g3, %g5, %g3		! or with afsr bits
1149	bz,a	2f			! no error, goto 2f
1150	  nop
1151	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1152	membar	#Sync			! membar sync required
11532:
1154	stxa	%g3, [%g0]ASI_AFSR	! clear all the sticky bits
1155	membar	#Sync			! membar sync required
1156
1157	RESET_USER_RTT_REGS(%g4, %g5, 3f)
11583:
1159
1160	set	cpu_async_error, %g1	! put cpu_async_error in g1
1161	sethi	%hi(sys_trap), %g5
1162	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1163	  or	%g0, PIL_15, %g4	! run at pil 15
1164	SET_SIZE(async_err)
1165
1166	ENTRY_NP(dis_err_panic1)
1167	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
1168	membar	#Sync
1169	! save destination routine is in g1
1170	ldxa	[%g0]ASI_AFAR, %g2	! read afar
1171	ldxa	[%g0]ASI_AFSR, %g3	! read afsr
1172	set	P_DER_H, %g4		! put P_DER_H in g4
1173	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1174	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1175	or	%g3, %g5, %g3		! or with afsr bits
1176	set	P_DER_L, %g4		! put P_DER_L in g4
1177	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1178	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1179	or	%g3, %g5, %g3		! or with afsr bits
1180
1181	RESET_USER_RTT_REGS(%g4, %g5, 1f)
11821:
1183
1184	sethi	%hi(sys_trap), %g5
1185	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1186	  sub	%g0, 1, %g4
1187	SET_SIZE(dis_err_panic1)
1188
1189	ENTRY(clr_datapath)
1190	set	P_DER_H, %o4			! put P_DER_H in o4
1191	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb upper half into o3
1192	or	%g0, 0x3, %o2			! put 0x3 in o2
1193	sllx	%o2, 8, %o2			! shift o2 to <9:8> sdb
1194	andcc	%o5, %o2, %o1			! check for UE,CE in upper half
1195	bz,a	1f				! no error, goto 1f
1196	  nop
1197	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1198	membar	#Sync				! membar sync required
11991:
1200	set	P_DER_L, %o4			! put P_DER_L in o4
1201	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb lower half into o5
1202	andcc	%o5, %o2, %o1			! check for UE,CE in lower half
1203	bz,a	2f				! no error, goto 2f
1204	  nop
1205	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1206	membar	#Sync
12072:
1208	retl
1209	  nop
1210	SET_SIZE(clr_datapath)
1211
1212	ENTRY(get_udb_errors)
1213	set	P_DER_H, %o3
1214	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1215	stx	%o2, [%o0]
1216	set	P_DER_L, %o3
1217	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1218	retl
1219	  stx	%o2, [%o1]
1220	SET_SIZE(get_udb_errors)
1221
1222#endif /* lint */
1223
1224#if defined(lint)
1225/*
1226 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1227 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1228 * should only be used in places where you have no choice but to look at the
1229 * tlb itself.
1230 *
1231 * Note: These two routines are required by the Estar "cpr" loadable module.
1232 */
1233/*ARGSUSED*/
1234void
1235itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
1236{}
1237
1238/*ARGSUSED*/
1239void
1240dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
1241{}
1242#else 	/* lint */
1243/*
1244 * NB - In Spitfire cpus, when reading a tte from the hardware, we
1245 * need to clear [42-41] because the general definitions in pte.h
1246 * define the PA to be [42-13] whereas Spitfire really uses [40-13].
1247 * When cloning these routines for other cpus the "andn" below is not
1248 * necessary.
1249 */
1250	ENTRY_NP(itlb_rd_entry)
1251	sllx	%o0, 3, %o0
1252#if defined(SF_ERRATA_32)
1253	sethi	%hi(FLUSH_ADDR), %g2
1254	set	MMU_PCONTEXT, %g1
1255	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1256	flush	%g2
1257#endif
1258	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
1259	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1260	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1261	andn	%g1, %g2, %g1				! for details
1262	stx	%g1, [%o1]
1263	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
1264	set	TAGREAD_CTX_MASK, %o4
1265	andn	%g2, %o4, %o5
1266	retl
1267	  stx	%o5, [%o2]
1268	SET_SIZE(itlb_rd_entry)
1269
1270	ENTRY_NP(dtlb_rd_entry)
1271	sllx	%o0, 3, %o0
1272#if defined(SF_ERRATA_32)
1273	sethi	%hi(FLUSH_ADDR), %g2
1274	set	MMU_PCONTEXT, %g1
1275	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1276	flush	%g2
1277#endif
1278	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
1279	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1280	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1281	andn	%g1, %g2, %g1				! itlb_rd_entry
1282	stx	%g1, [%o1]
1283	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
1284	set	TAGREAD_CTX_MASK, %o4
1285	andn	%g2, %o4, %o5
1286	retl
1287	  stx	%o5, [%o2]
1288	SET_SIZE(dtlb_rd_entry)
1289#endif /* lint */
1290
1291#if defined(lint)
1292
1293/*
1294 * routines to get and set the LSU register
1295 */
1296uint64_t
1297get_lsu(void)
1298{
1299	return ((uint64_t)0);
1300}
1301
1302/*ARGSUSED*/
1303void
1304set_lsu(uint64_t lsu)
1305{}
1306
1307#else /* lint */
1308
1309	ENTRY(set_lsu)
1310	stxa	%o0, [%g0]ASI_LSU		! store to LSU
1311	retl
1312	membar	#Sync
1313	SET_SIZE(set_lsu)
1314
1315	ENTRY(get_lsu)
1316	retl
1317	ldxa	[%g0]ASI_LSU, %o0		! load LSU
1318	SET_SIZE(get_lsu)
1319
1320#endif /* lint */
1321
1322#ifndef lint
1323	/*
1324	 * Clear the NPT (non-privileged trap) bit in the %tick
1325	 * registers. In an effort to make the change in the
1326	 * tick counter as consistent as possible, we disable
1327	 * all interrupts while we're changing the registers. We also
1328	 * ensure that the read and write instructions are in the same
1329	 * line in the instruction cache.
1330	 */
1331	ENTRY_NP(cpu_clearticknpt)
1332	rdpr	%pstate, %g1		/* save processor state */
1333	andn	%g1, PSTATE_IE, %g3	/* turn off */
1334	wrpr	%g0, %g3, %pstate	/*   interrupts */
1335	rdpr	%tick, %g2		/* get tick register */
1336	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
1337	mov	1, %g3			/* create mask */
1338	sllx	%g3, 63, %g3		/*   for NPT bit */
1339	ba,a,pt	%xcc, 2f
1340	.align	64			/* Align to I$ boundary */
13412:
1342	rdpr	%tick, %g2		/* get tick register */
1343	wrpr	%g3, %g2, %tick		/* write tick register, */
1344					/*   clearing NPT bit   */
1345#if defined(BB_ERRATA_1)
1346	rdpr	%tick, %g0		/* read (s)tick (BB_ERRATA_1) */
1347#endif
13481:
1349	jmp	%g4 + 4
1350	wrpr	%g0, %g1, %pstate	/* restore processor state */
1351	SET_SIZE(cpu_clearticknpt)
1352
1353	/*
1354	 * get_ecache_tag()
1355	 * Register Usage:
1356	 * %o0: In: 32-bit E$ index
1357	 *      Out: 64-bit E$ tag value
1358	 * %o1: In: 64-bit AFSR value after clearing sticky bits
1359	 * %o2: In: address of cpu private afsr storage
1360	 */
1361	ENTRY(get_ecache_tag)
1362	or	%g0, 1, %o4
1363	sllx	%o4, 40, %o4			! set bit 40 for e$ tag access
1364	or	%o0, %o4, %o4			! %o4 = e$ addr for tag read
1365	rdpr	%pstate, %o5
1366	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1367	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1368
1369	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1370	stxa	%g0, [%g0]ASI_ESTATE_ERR	! Turn off Error enable
1371	membar	#Sync
1372
1373	ldxa	[%g0]ASI_AFSR, %o0
1374	srlx	%o0, P_AFSR_CP_SHIFT, %o3
1375	btst	1, %o3
1376	bz	1f
1377	  nop
1378	ldx	[%o2], %g4
1379	or	%g4, %o0, %g4			! aggregate AFSR in cpu private
1380	stx	%g4, [%o2]
13811:
1382	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1383	membar  #Sync
1384
1385	ldxa	[%o4]ASI_EC_R, %g0
1386	ldxa	[%g0]ASI_EC_DIAG, %o0		! read tag from e$ tag reg
1387
1388	ldxa	[%g0]ASI_AFSR, %o3
1389	srlx	%o3, P_AFSR_CP_SHIFT, %o4
1390	btst	1, %o4
1391	bz	2f
1392	  stx	%o3, [%o1]			! AFSR after sticky clear
1393	ldx	[%o2], %g4
1394	or	%g4, %o3, %g4			! aggregate AFSR in cpu private
1395	stx	%g4, [%o2]
13962:
1397	membar	#Sync
1398
1399	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1400	membar	#Sync
1401	retl
1402	wrpr	%g0, %o5, %pstate
1403	SET_SIZE(get_ecache_tag)
1404
1405	/*
1406	 * check_ecache_line()
1407	 * Register Usage:
1408	 * %o0: In: 32-bit E$ index
1409	 *      Out: 64-bit accumulated AFSR
1410	 * %o1: In: address of cpu private afsr storage
1411	 */
1412	ENTRY(check_ecache_line)
1413	or	%g0, 1, %o4
1414	sllx	%o4, 39, %o4			! set bit 39 for e$ data access
1415	or	%o0, %o4, %o4		 	! %o4 = e$ addr for data read
1416
1417	rdpr	%pstate, %o5
1418	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1419	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1420
1421	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1422	stxa	%g0, [%g0]ASI_ESTATE_ERR 	! Turn off Error enable
1423	membar	#Sync
1424
1425	ldxa 	[%g0]ASI_AFSR, %o0
1426	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1427	btst	1, %o2
1428	bz	1f
1429	  clr	%o2				! loop count
1430	ldx	[%o1], %o3
1431	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1432	stx	%o3, [%o1]
14331:
1434	stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1435	membar	#Sync
1436
14372:
1438	ldxa	[%o4]ASI_EC_R, %g0		! Read the E$ data 8bytes each
1439	add	%o2, 1, %o2
1440	cmp	%o2, 8
1441	bl,a 	2b
1442	  add	%o4, 8, %o4
1443
1444	membar	#Sync
1445	ldxa	[%g0]ASI_AFSR, %o0		! read accumulated AFSR
1446	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1447	btst	1, %o2
1448	bz	3f
1449	  nop
1450	ldx	[%o1], %o3
1451	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1452	stx	%o3, [%o1]
14533:
1454	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1455	membar	#Sync
1456	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1457	membar	#Sync
1458	retl
1459	wrpr	%g0, %o5, %pstate
1460	SET_SIZE(check_ecache_line)
1461#endif /* lint */
1462
1463#if defined(lint)
1464uint64_t
1465read_and_clear_afsr()
1466{
1467	return ((uint64_t)0);
1468}
1469#else	/* lint */
1470	ENTRY(read_and_clear_afsr)
1471	ldxa	[%g0]ASI_AFSR, %o0
1472	retl
1473	  stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1474	SET_SIZE(read_and_clear_afsr)
1475#endif	/* lint */
1476
1477#if defined(lint)
1478/* ARGSUSED */
1479void
1480scrubphys(uint64_t paddr, int ecache_size)
1481{
1482}
1483
1484#else	/* lint */
1485
1486/*
1487 * scrubphys - Pass in the aligned physical memory address that you want
1488 * to scrub, along with the ecache size.
1489 *
1490 *	1) Displacement flush the E$ line corresponding to %addr.
1491 *	   The first ldxa guarantees that the %addr is no longer in
1492 *	   M, O, or E (goes to I or S (if instruction fetch also happens).
1493 *	2) "Write" the data using a CAS %addr,%g0,%g0.
1494 *	   The casxa guarantees a transition from I to M or S to M.
1495 *	3) Displacement flush the E$ line corresponding to %addr.
1496 *	   The second ldxa pushes the M line out of the ecache, into the
1497 *	   writeback buffers, on the way to memory.
1498 *	4) The "membar #Sync" pushes the cache line out of the writeback
1499 *	   buffers onto the bus, on the way to dram finally.
1500 *
1501 * This is a modified version of the algorithm suggested by Gary Lauterbach.
1502 * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
1503 * as modified, but then we found out that for spitfire, if it misses in the
1504 * E$ it will probably install as an M, but if it hits in the E$, then it
1505 * will stay E, if the store doesn't happen. So the first displacement flush
1506 * should ensure that the CAS will miss in the E$.  Arrgh.
1507 */
1508
1509	ENTRY(scrubphys)
1510	or	%o1, %g0, %o2	! put ecache size in %o2
1511#ifndef HUMMINGBIRD
1512	xor	%o0, %o2, %o1	! calculate alias address
1513	add	%o2, %o2, %o3	! 2 * ecachesize in case
1514				! addr == ecache_flushaddr
1515	sub	%o3, 1, %o3	! -1 == mask
1516	and	%o1, %o3, %o1	! and with xor'd address
1517	set	ecache_flushaddr, %o3
1518	ldx	[%o3], %o3
1519
1520	rdpr	%pstate, %o4
1521	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1522	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1523
1524	ldxa	[%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1525	casxa	[%o0]ASI_MEM, %g0, %g0
1526	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1527
1528#else /* HUMMINGBIRD */
1529	/*
1530	 * UltraSPARC-IIe processor supports both 4-way set associative
1531	 * and direct map E$. We need to reconfigure E$ to direct map
1532	 * mode for data load/store before displacement flush. Also, we
1533	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1534	 * has been flushed. Keep the interrupts disabled while flushing
1535	 * E$ in this manner.
1536	 *
1537	 * For flushing a specific physical address, we start at the
1538	 * aliased address and load at set-size stride, wrapping around
1539	 * at 2*ecache-size boundary and skipping fault physical address.
1540	 * It takes 10 loads to guarantee that the physical address has
1541	 * been flushed.
1542	 *
1543	 * Usage:
1544	 *	%o0	physaddr
1545	 *	%o5	physaddr - ecache_flushaddr
1546	 *	%g1	UPA config (restored later)
1547	 *	%g2	E$ set size
1548	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1549	 *	%g4	#loads to flush phys address
1550	 *	%g5	temp
1551	 */
1552
1553	sethi	%hi(ecache_associativity), %g5
1554	ld	[%g5 + %lo(ecache_associativity)], %g5
1555	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1556	xor	%o0, %o2, %o1	! calculate alias address
1557	add	%o2, %o2, %g3	! 2 * ecachesize in case
1558				! addr == ecache_flushaddr
1559	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1560	and	%o1, %g3, %o1	! and with xor'd address
1561	sethi	%hi(ecache_flushaddr), %o3
1562	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1563
1564	rdpr	%pstate, %o4
1565	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1566	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1567
1568	! Place E$ in direct map mode for data access
1569	or	%g0, 1, %g5
1570	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1571	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1572	or	%g1, %g5, %g5
1573	membar	#Sync
1574	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1575	membar	#Sync
1576
1577	! Displace cache line from each set of E$ starting at the
1578	! aliased address. at set-size stride, wrapping at 2*ecache_size
1579	! and skipping load from physaddr. We need 10 loads to flush the
1580	! physaddr from E$.
1581	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1582	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1583	or	%o1, %g0, %g5		! starting aliased offset
15842:
1585	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
15861:
1587	add	%g5, %g2, %g5		! calculate offset in next set
1588	and	%g5, %g3, %g5		! force offset within aliased range
1589	cmp	%g5, %o5		! skip loads from physaddr
1590	be,pn %ncc, 1b
1591	  nop
1592	brgz,pt	%g4, 2b
1593	  dec	%g4
1594
1595	casxa	[%o0]ASI_MEM, %g0, %g0
1596
1597	! Flush %o0 from ecahe again.
1598	! Need single displacement flush at offset %o1 this time as
1599	! the E$ is already in direct map mode.
1600	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1601
1602	membar	#Sync
1603	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1604	membar	#Sync
1605#endif /* HUMMINGBIRD */
1606	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1607
1608	retl
1609	membar	#Sync			! move the data out of the load buffer
1610	SET_SIZE(scrubphys)
1611
1612#endif	/* lint */
1613
1614#if defined(lint)
1615
1616/*
1617 * clearphys - Pass in the aligned physical memory address that you want
1618 * to push out, as a 64 byte block of zeros, from the ecache zero-filled.
1619 * Since this routine does not bypass the ecache, it is possible that
1620 * it could generate a UE error while trying to clear the a bad line.
1621 * This routine clears and restores the error enable flag.
1622 * TBD - Hummingbird may need similar protection
1623 */
1624/* ARGSUSED */
1625void
1626clearphys(uint64_t paddr, int ecache_size, int ecache_linesize)
1627{
1628}
1629
1630#else	/* lint */
1631
1632	ENTRY(clearphys)
1633	or	%o2, %g0, %o3	! ecache linesize
1634	or	%o1, %g0, %o2	! ecache size
1635#ifndef HUMMINGBIRD
1636	or	%o3, %g0, %o4	! save ecache linesize
1637	xor	%o0, %o2, %o1	! calculate alias address
1638	add	%o2, %o2, %o3	! 2 * ecachesize
1639	sub	%o3, 1, %o3	! -1 == mask
1640	and	%o1, %o3, %o1	! and with xor'd address
1641	set	ecache_flushaddr, %o3
1642	ldx	[%o3], %o3
1643	or	%o4, %g0, %o2	! saved ecache linesize
1644
1645	rdpr	%pstate, %o4
1646	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1647	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1648
1649	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1650	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1651	membar	#Sync
1652
1653	! need to put zeros in the cache line before displacing it
1654
1655	sub	%o2, 8, %o2	! get offset of last double word in ecache line
16561:
1657	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1658	sub	%o2, 8, %o2
1659	brgez,a,pt %o2, 1b
1660	nop
1661	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1662	casxa	[%o0]ASI_MEM, %g0, %g0
1663	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1664
1665	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1666	membar	#Sync
1667
1668#else /* HUMMINGBIRD... */
1669	/*
1670	 * UltraSPARC-IIe processor supports both 4-way set associative
1671	 * and direct map E$. We need to reconfigure E$ to direct map
1672	 * mode for data load/store before displacement flush. Also, we
1673	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1674	 * has been flushed. Keep the interrupts disabled while flushing
1675	 * E$ in this manner.
1676	 *
1677	 * For flushing a specific physical address, we start at the
1678	 * aliased address and load at set-size stride, wrapping around
1679	 * at 2*ecache-size boundary and skipping fault physical address.
1680	 * It takes 10 loads to guarantee that the physical address has
1681	 * been flushed.
1682	 *
1683	 * Usage:
1684	 *	%o0	physaddr
1685	 *	%o5	physaddr - ecache_flushaddr
1686	 *	%g1	UPA config (restored later)
1687	 *	%g2	E$ set size
1688	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1689	 *	%g4	#loads to flush phys address
1690	 *	%g5	temp
1691	 */
1692
1693	or	%o3, %g0, %o4	! save ecache linesize
1694	sethi	%hi(ecache_associativity), %g5
1695	ld	[%g5 + %lo(ecache_associativity)], %g5
1696	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1697
1698	xor	%o0, %o2, %o1	! calculate alias address
1699	add	%o2, %o2, %g3	! 2 * ecachesize
1700	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1701	and	%o1, %g3, %o1	! and with xor'd address
1702	sethi	%hi(ecache_flushaddr), %o3
1703	ldx	[%o3 +%lo(ecache_flushaddr)], %o3
1704	or	%o4, %g0, %o2	! saved ecache linesize
1705
1706	rdpr	%pstate, %o4
1707	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1708	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1709
1710	! Place E$ in direct map mode for data access
1711	or	%g0, 1, %g5
1712	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1713	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1714	or	%g1, %g5, %g5
1715	membar	#Sync
1716	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1717	membar	#Sync
1718
1719	! need to put zeros in the cache line before displacing it
1720
1721	sub	%o2, 8, %o2	! get offset of last double word in ecache line
17221:
1723	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1724	sub	%o2, 8, %o2
1725	brgez,a,pt %o2, 1b
1726	nop
1727
1728	! Displace cache line from each set of E$ starting at the
1729	! aliased address. at set-size stride, wrapping at 2*ecache_size
1730	! and skipping load from physaddr. We need 10 loads to flush the
1731	! physaddr from E$.
1732	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1733	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1734	or	%o1, %g0, %g5		! starting offset
17352:
1736	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
17373:
1738	add	%g5, %g2, %g5		! calculate offset in next set
1739	and	%g5, %g3, %g5		! force offset within aliased range
1740	cmp	%g5, %o5		! skip loads from physaddr
1741	be,pn %ncc, 3b
1742	  nop
1743	brgz,pt	%g4, 2b
1744	  dec	%g4
1745
1746	casxa	[%o0]ASI_MEM, %g0, %g0
1747
1748	! Flush %o0 from ecahe again.
1749	! Need single displacement flush at offset %o1 this time as
1750	! the E$ is already in direct map mode.
1751	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1752
1753	membar	#Sync
1754	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1755	membar	#Sync
1756#endif /* HUMMINGBIRD... */
1757
1758	retl
1759	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1760	SET_SIZE(clearphys)
1761
1762#endif	/* lint */
1763
1764#if defined(lint)
1765/* ARGSUSED */
1766void
1767flushecacheline(uint64_t paddr, int ecache_size)
1768{
1769}
1770
1771#else	/* lint */
1772/*
1773 * flushecacheline - This is a simpler version of scrubphys
1774 * which simply does a displacement flush of the line in
1775 * question. This routine is mainly used in handling async
1776 * errors where we want to get rid of a bad line in ecache.
1777 * Note that if the line is modified and it has suffered
1778 * data corruption - we are guarantee that the hw will write
1779 * a UE back to mark the page poisoned.
1780 */
1781        ENTRY(flushecacheline)
1782        or      %o1, %g0, %o2   ! put ecache size in %o2
1783#ifndef HUMMINGBIRD
1784        xor     %o0, %o2, %o1   ! calculate alias address
1785        add     %o2, %o2, %o3   ! 2 * ecachesize in case
1786                                ! addr == ecache_flushaddr
1787        sub     %o3, 1, %o3     ! -1 == mask
1788        and     %o1, %o3, %o1   ! and with xor'd address
1789        set     ecache_flushaddr, %o3
1790        ldx     [%o3], %o3
1791
1792        rdpr    %pstate, %o4
1793        andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1794        wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1795
1796	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1797	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1798	membar	#Sync
1799
1800        ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1801	membar	#Sync
1802	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1803        membar  #Sync
1804#else /* HUMMINGBIRD */
1805	/*
1806	 * UltraSPARC-IIe processor supports both 4-way set associative
1807	 * and direct map E$. We need to reconfigure E$ to direct map
1808	 * mode for data load/store before displacement flush. Also, we
1809	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1810	 * has been flushed. Keep the interrupts disabled while flushing
1811	 * E$ in this manner.
1812	 *
1813	 * For flushing a specific physical address, we start at the
1814	 * aliased address and load at set-size stride, wrapping around
1815	 * at 2*ecache-size boundary and skipping fault physical address.
1816	 * It takes 10 loads to guarantee that the physical address has
1817	 * been flushed.
1818	 *
1819	 * Usage:
1820	 *	%o0	physaddr
1821	 *	%o5	physaddr - ecache_flushaddr
1822	 *	%g1	error enable register
1823	 *	%g2	E$ set size
1824	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1825	 *	%g4	UPA config (restored later)
1826	 *	%g5	temp
1827	 */
1828
1829	sethi	%hi(ecache_associativity), %g5
1830	ld	[%g5 + %lo(ecache_associativity)], %g5
1831	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1832	xor	%o0, %o2, %o1	! calculate alias address
1833	add	%o2, %o2, %g3	! 2 * ecachesize in case
1834				! addr == ecache_flushaddr
1835	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1836	and	%o1, %g3, %o1	! and with xor'd address
1837	sethi	%hi(ecache_flushaddr), %o3
1838	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1839
1840	rdpr	%pstate, %o4
1841	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1842	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1843
1844	! Place E$ in direct map mode for data access
1845	or	%g0, 1, %g5
1846	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1847	ldxa	[%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
1848	or	%g4, %g5, %g5
1849	membar	#Sync
1850	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1851	membar	#Sync
1852
1853	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1854	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1855	membar	#Sync
1856
1857	! Displace cache line from each set of E$ starting at the
1858	! aliased address. at set-size stride, wrapping at 2*ecache_size
1859	! and skipping load from physaddr. We need 10 loads to flush the
1860	! physaddr from E$.
1861	mov	HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
1862	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
18632:
1864	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
18653:
1866	add	%o1, %g2, %o1		! calculate offset in next set
1867	and	%o1, %g3, %o1		! force offset within aliased range
1868	cmp	%o1, %o5		! skip loads from physaddr
1869	be,pn %ncc, 3b
1870	  nop
1871	brgz,pt	%g5, 2b
1872	  dec	%g5
1873
1874	membar	#Sync
1875	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1876        membar  #Sync
1877
1878	stxa	%g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1879	membar	#Sync
1880#endif /* HUMMINGBIRD */
1881        retl
1882        wrpr    %g0, %o4, %pstate
1883        SET_SIZE(flushecacheline)
1884
1885#endif	/* lint */
1886
1887#if defined(lint)
1888/* ARGSUSED */
1889void
1890ecache_scrubreq_tl1(uint64_t inum, uint64_t dummy)
1891{
1892}
1893
1894#else	/* lint */
1895/*
1896 * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
1897 * from the clock CPU.  It atomically increments the outstanding request
1898 * counter and, if there was not already an outstanding request,
1899 * branches to setsoftint_tl1 to enqueue an intr_vec for the given inum.
1900 */
1901
1902	! Register usage:
1903	!
1904	! Arguments:
1905	! %g1 - inum
1906	!
1907	! Internal:
1908	! %g2, %g3, %g5 - scratch
1909	! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
1910	! %g6 - setsoftint_tl1 address
1911
1912	ENTRY_NP(ecache_scrubreq_tl1)
1913	set	SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
1914	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
1915	ld	[%g4], %g2		! cpu's ec_scrub_outstanding.
1916	set	setsoftint_tl1, %g6
1917	!
1918	! no need to use atomic instructions for the following
1919	! increment - we're at tl1
1920	!
1921	add	%g2, 0x1, %g3
1922	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
1923	  st	%g3, [%g4]		! delay - store incremented counter
1924	jmp	%g6			! setsoftint_tl1(%g1) - queue intr_vec
1925	  nop
1926	! not reached
19271:
1928	retry
1929	SET_SIZE(ecache_scrubreq_tl1)
1930
1931#endif	/* lint */
1932
1933#if defined(lint)
1934/*ARGSUSED*/
1935void
1936write_ec_tag_parity(uint32_t id)
1937{}
1938#else /* lint */
1939
1940	/*
1941         * write_ec_tag_parity(), which zero's the ecache tag,
1942         * marks the state as invalid and writes good parity to the tag.
1943         * Input %o1= 32 bit E$ index
1944         */
1945        ENTRY(write_ec_tag_parity)
1946        or      %g0, 1, %o4
1947        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1948        or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write
1949
1950        rdpr    %pstate, %o5
1951        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1952        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1953
1954        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1955        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1956        membar  #Sync
1957
1958        ba      1f
1959         nop
1960	/*
1961         * Align on the ecache boundary in order to force
1962         * ciritical code section onto the same ecache line.
1963         */
1964         .align 64
1965
19661:
1967        set     S_EC_PARITY, %o3         	! clear tag, state invalid
1968        sllx    %o3, S_ECPAR_SHIFT, %o3   	! and with good tag parity
1969        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1970        stxa    %g0, [%o4]ASI_EC_W
1971        membar  #Sync
1972
1973        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1974        membar  #Sync
1975        retl
1976        wrpr    %g0, %o5, %pstate
1977        SET_SIZE(write_ec_tag_parity)
1978
1979#endif /* lint */
1980
1981#if defined(lint)
1982/*ARGSUSED*/
1983void
1984write_hb_ec_tag_parity(uint32_t id)
1985{}
1986#else /* lint */
1987
1988	/*
1989         * write_hb_ec_tag_parity(), which zero's the ecache tag,
1990         * marks the state as invalid and writes good parity to the tag.
1991         * Input %o1= 32 bit E$ index
1992         */
1993        ENTRY(write_hb_ec_tag_parity)
1994        or      %g0, 1, %o4
1995        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1996        or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write
1997
1998        rdpr    %pstate, %o5
1999        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
2000        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
2001
2002        ldxa    [%g0]ASI_ESTATE_ERR, %g1
2003        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
2004        membar  #Sync
2005
2006        ba      1f
2007         nop
2008	/*
2009         * Align on the ecache boundary in order to force
2010         * ciritical code section onto the same ecache line.
2011         */
2012         .align 64
20131:
2014#ifdef HUMMINGBIRD
2015        set     HB_EC_PARITY, %o3         	! clear tag, state invalid
2016        sllx    %o3, HB_ECPAR_SHIFT, %o3   	! and with good tag parity
2017#else /* !HUMMINGBIRD */
2018        set     SB_EC_PARITY, %o3         	! clear tag, state invalid
2019        sllx    %o3, SB_ECPAR_SHIFT, %o3   	! and with good tag parity
2020#endif /* !HUMMINGBIRD */
2021
2022        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
2023        stxa    %g0, [%o4]ASI_EC_W
2024        membar  #Sync
2025
2026        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
2027        membar  #Sync
2028        retl
2029        wrpr    %g0, %o5, %pstate
2030        SET_SIZE(write_hb_ec_tag_parity)
2031
2032#endif /* lint */
2033
2034#define	VIS_BLOCKSIZE		64
2035
2036#if defined(lint)
2037
2038/*ARGSUSED*/
2039int
2040dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
2041{ return (0); }
2042
2043#else
2044
2045	ENTRY(dtrace_blksuword32)
2046	save	%sp, -SA(MINFRAME + 4), %sp
2047
2048	rdpr	%pstate, %l1
2049	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
2050	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
2051
2052	rd	%fprs, %l0
2053	andcc	%l0, FPRS_FEF, %g0
2054	bz,a,pt	%xcc, 1f			! if the fpu is disabled
2055	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
2056
2057	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
20581:
2059	set	0f, %l5
2060        /*
2061         * We're about to write a block full or either total garbage
2062         * (not kernel data, don't worry) or user floating-point data
2063         * (so it only _looks_ like garbage).
2064         */
2065	ld	[%i1], %f0			! modify the block
2066	membar	#Sync
2067	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
2068	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
2069	membar	#Sync
2070	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2071
2072	bz,a,pt	%xcc, 1f
2073	wr	%g0, %l0, %fprs			! restore %fprs
2074
2075	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
20761:
2077
2078	wrpr	%g0, %l1, %pstate		! restore interrupts
2079
2080	ret
2081	restore	%g0, %g0, %o0
2082
20830:
2084	membar	#Sync
2085	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2086
2087	bz,a,pt	%xcc, 1f
2088	wr	%g0, %l0, %fprs			! restore %fprs
2089
2090	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
20911:
2092
2093	wrpr	%g0, %l1, %pstate		! restore interrupts
2094
2095	/*
2096	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2097	 * which deals with watchpoints. Otherwise, just return -1.
2098	 */
2099	brnz,pt	%i2, 1f
2100	nop
2101	ret
2102	restore	%g0, -1, %o0
21031:
2104	call	dtrace_blksuword32_err
2105	restore
2106
2107	SET_SIZE(dtrace_blksuword32)
2108
2109#endif /* lint */
2110