xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common_asm.s (revision 0b1b4412cfd6c4ac5467dbe6f4088dcec4f55fe8)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28#if !defined(lint)
29#include "assym.h"
30#endif	/* !lint */
31
32#include <sys/asm_linkage.h>
33#include <sys/mmu.h>
34#include <vm/hat_sfmmu.h>
35#include <sys/machparam.h>
36#include <sys/machcpuvar.h>
37#include <sys/machthread.h>
38#include <sys/machtrap.h>
39#include <sys/privregs.h>
40#include <sys/trap.h>
41#include <sys/cheetahregs.h>
42#include <sys/us3_module.h>
43#include <sys/xc_impl.h>
44#include <sys/intreg.h>
45#include <sys/async.h>
46#include <sys/clock.h>
47#include <sys/cheetahasm.h>
48#include <sys/cmpregs.h>
49
50#ifdef TRAPTRACE
51#include <sys/traptrace.h>
52#endif /* TRAPTRACE */
53
54#if !defined(lint)
55
56/* BEGIN CSTYLED */
57
58#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
59	ldxa	[%g0]ASI_DCU, tmp1					;\
60	btst	DCU_DC, tmp1		/* is dcache enabled? */	;\
61	bz,pn	%icc, 1f						;\
62	ASM_LD(tmp1, dcache_linesize)					;\
63	ASM_LD(tmp2, dflush_type)					;\
64	cmp	tmp2, FLUSHPAGE_TYPE					;\
65	be,pt	%icc, 2f						;\
66	nop								;\
67	sllx	arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */	;\
68	ASM_LD(tmp3, dcache_size)					;\
69	cmp	tmp2, FLUSHMATCH_TYPE					;\
70	be,pt	%icc, 3f						;\
71	nop								;\
72	/*								\
73	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
74	 * tmp3 = cache size						\
75	 * tmp1 = cache line size					\
76	 */								\
77	sub	tmp3, tmp1, tmp2					;\
784:									\
79	stxa	%g0, [tmp2]ASI_DC_TAG					;\
80	membar	#Sync							;\
81	cmp	%g0, tmp2						;\
82	bne,pt	%icc, 4b						;\
83	sub	tmp2, tmp1, tmp2					;\
84	ba,pt	%icc, 1f						;\
85	nop								;\
86	/*								\
87	 * flushtype = FLUSHPAGE_TYPE					\
88	 * arg1 = pfn							\
89	 * arg2 = virtual color						\
90	 * tmp1 = cache line size					\
91	 * tmp2 = tag from cache					\
92	 * tmp3 = counter						\
93	 */								\
942:									\
95	set	MMU_PAGESIZE, tmp3					;\
96        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA	   */   ;\
97	sub	tmp3, tmp1, tmp3					;\
984:									\
99	stxa	%g0, [arg1 + tmp3]ASI_DC_INVAL				;\
100	membar	#Sync							;\
1015:									\
102	cmp	%g0, tmp3						;\
103	bnz,pt	%icc, 4b		/* branch if not done */	;\
104	sub	tmp3, tmp1, tmp3					;\
105	ba,pt	%icc, 1f						;\
106	nop								;\
107	/*								\
108	 * flushtype = FLUSHMATCH_TYPE					\
109	 * arg1 = tag to compare against				\
110	 * tmp1 = cache line size					\
111	 * tmp3 = cache size						\
112	 * arg2 = counter						\
113	 * tmp2 = cache tag						\
114	 */								\
1153:									\
116	sub	tmp3, tmp1, arg2					;\
1174:									\
118	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
119	btst	CHEETAH_DC_VBIT_MASK, tmp2				;\
120	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
121	andn	tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */	;\
122	cmp	tmp2, arg1						;\
123	bne,pn	%icc, 5f		/* branch if tag miss */	;\
124	nop								;\
125	stxa	%g0, [arg2]ASI_DC_TAG					;\
126	membar	#Sync							;\
1275:									\
128	cmp	%g0, arg2						;\
129	bne,pt	%icc, 4b		/* branch if not done */	;\
130	sub	arg2, tmp1, arg2					;\
1311:
132
133/*
134 * macro that flushes the entire dcache color
135 * dcache size = 64K, one way 16K
136 */
137#define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3)			\
138	ldxa	[%g0]ASI_DCU, tmp1;					\
139	btst	DCU_DC, tmp1;		/* is dcache enabled? */	\
140	bz,pn	%icc, 1f;						\
141	ASM_LD(tmp1, dcache_linesize)					\
142	set	MMU_PAGESIZE, tmp2;					\
143	/*								\
144	 * arg = virtual color						\
145	 * tmp2 = page size						\
146	 * tmp1 = cache line size					\
147	 */								\
148	sllx	arg, MMU_PAGESHIFT, arg; /* color to dcache page */	\
149	mov	way, tmp3;						\
150	sllx	tmp3, 14, tmp3;		 /* One way 16K */		\
151	or	arg, tmp3, arg;						\
152	sub	tmp2, tmp1, tmp2;					\
1532:									\
154	stxa	%g0, [arg + tmp2]ASI_DC_TAG;				\
155	membar	#Sync;							\
156	cmp	%g0, tmp2;						\
157	bne,pt	%icc, 2b;						\
158	  sub	tmp2, tmp1, tmp2;					\
1591:
160
161/* END CSTYLED */
162
163#endif	/* !lint */
164
165/*
166 * Cheetah MMU and Cache operations.
167 */
168
169#if defined(lint)
170
171/* ARGSUSED */
172void
173vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
174{}
175
176#else	/* lint */
177
178	ENTRY_NP(vtag_flushpage)
179	/*
180	 * flush page from the tlb
181	 *
182	 * %o0 = vaddr
183	 * %o1 = sfmmup
184	 */
185	rdpr	%pstate, %o5
186#ifdef DEBUG
187	PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
188#endif /* DEBUG */
189	/*
190	 * disable ints
191	 */
192	andn	%o5, PSTATE_IE, %o4
193	wrpr	%o4, 0, %pstate
194
195	/*
196	 * Then, blow out the tlb
197	 * Interrupts are disabled to prevent the primary ctx register
198	 * from changing underneath us.
199	 */
200	sethi   %hi(ksfmmup), %o3
201        ldx     [%o3 + %lo(ksfmmup)], %o3
202        cmp     %o3, %o1
203        bne,pt   %xcc, 1f			! if not kernel as, go to 1
204	  sethi	%hi(FLUSH_ADDR), %o3
205	/*
206	 * For Kernel demaps use primary. type = page implicitly
207	 */
208	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
209	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
210	flush	%o3
211	retl
212	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
2131:
214	/*
215	 * User demap.  We need to set the primary context properly.
216	 * Secondary context cannot be used for Cheetah IMMU.
217	 * %o0 = vaddr
218	 * %o1 = sfmmup
219	 * %o3 = FLUSH_ADDR
220	 */
221	SFMMU_CPU_CNUM(%o1, %g1, %g2)		! %g1 = sfmmu cnum on this CPU
222
223	ldub	[%o1 + SFMMU_CEXT], %o4		! %o4 = sfmmup->sfmmu_cext
224	sll	%o4, CTXREG_EXT_SHIFT, %o4
225	or	%g1, %o4, %g1			! %g1 = primary pgsz | cnum
226
227	wrpr	%g0, 1, %tl
228	set	MMU_PCONTEXT, %o4
229	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
230	ldxa	[%o4]ASI_DMMU, %o2		! %o2 = save old ctxnum
231	srlx	%o2, CTXREG_NEXT_SHIFT, %o1	! need to preserve nucleus pgsz
232	sllx	%o1, CTXREG_NEXT_SHIFT, %o1	! %o1 = nucleus pgsz
233	or	%g1, %o1, %g1			! %g1 = nucleus pgsz | primary pgsz | cnum
234	stxa	%g1, [%o4]ASI_DMMU		! wr new ctxum
235
236	stxa	%g0, [%o0]ASI_DTLB_DEMAP
237	stxa	%g0, [%o0]ASI_ITLB_DEMAP
238	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
239	flush	%o3
240	wrpr	%g0, 0, %tl
241
242	retl
243	wrpr	%g0, %o5, %pstate		/* enable interrupts */
244	SET_SIZE(vtag_flushpage)
245
246#endif	/* lint */
247
248#if defined(lint)
249
250void
251vtag_flushall(void)
252{}
253
254#else	/* lint */
255
256	ENTRY_NP2(vtag_flushall, demap_all)
257	/*
258	 * flush the tlb
259	 */
260	sethi	%hi(FLUSH_ADDR), %o3
261	set	DEMAP_ALL_TYPE, %g1
262	stxa	%g0, [%g1]ASI_DTLB_DEMAP
263	stxa	%g0, [%g1]ASI_ITLB_DEMAP
264	flush	%o3
265	retl
266	nop
267	SET_SIZE(demap_all)
268	SET_SIZE(vtag_flushall)
269
270#endif	/* lint */
271
272
273#if defined(lint)
274
275/* ARGSUSED */
276void
277vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
278{}
279
280#else	/* lint */
281
282	ENTRY_NP(vtag_flushpage_tl1)
283	/*
284	 * x-trap to flush page from tlb and tsb
285	 *
286	 * %g1 = vaddr, zero-extended on 32-bit kernel
287	 * %g2 = sfmmup
288	 *
289	 * assumes TSBE_TAG = 0
290	 */
291	srln	%g1, MMU_PAGESHIFT, %g1
292
293	sethi   %hi(ksfmmup), %g3
294        ldx     [%g3 + %lo(ksfmmup)], %g3
295        cmp     %g3, %g2
296        bne,pt	%xcc, 1f                        ! if not kernel as, go to 1
297	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
298
299	/* We need to demap in the kernel context */
300	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
301	stxa	%g0, [%g1]ASI_DTLB_DEMAP
302	stxa	%g0, [%g1]ASI_ITLB_DEMAP
303	retry
3041:
305	/* We need to demap in a user context */
306	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
307
308	SFMMU_CPU_CNUM(%g2, %g6, %g3)	! %g6 = sfmmu cnum on this CPU
309
310	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
311	sll	%g4, CTXREG_EXT_SHIFT, %g4
312	or	%g6, %g4, %g6			! %g6 = pgsz | cnum
313
314	set	MMU_PCONTEXT, %g4
315	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
316	srlx	%g5, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
317	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
318	or	%g6, %g2, %g6			/* %g6 = nucleus pgsz | primary pgsz | cnum */
319	stxa	%g6, [%g4]ASI_DMMU		/* wr new ctxum */
320	stxa	%g0, [%g1]ASI_DTLB_DEMAP
321	stxa	%g0, [%g1]ASI_ITLB_DEMAP
322	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
323	retry
324	SET_SIZE(vtag_flushpage_tl1)
325
326#endif	/* lint */
327
328
329#if defined(lint)
330
331/* ARGSUSED */
332void
333vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
334{}
335
336#else	/* lint */
337
338	ENTRY_NP(vtag_flush_pgcnt_tl1)
339	/*
340	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
341	 *
342	 * %g1 = vaddr, zero-extended on 32-bit kernel
343	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
344	 *
345	 * NOTE: this handler relies on the fact that no
346	 *	interrupts or traps can occur during the loop
347	 *	issuing the TLB_DEMAP operations. It is assumed
348	 *	that interrupts are disabled and this code is
349	 *	fetching from the kernel locked text address.
350	 *
351	 * assumes TSBE_TAG = 0
352	 */
353	set	SFMMU_PGCNT_MASK, %g4
354	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
355	add	%g3, 1, %g3			/* g3 = pgcnt */
356
357	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
358	srln	%g1, MMU_PAGESHIFT, %g1
359
360	sethi   %hi(ksfmmup), %g4
361        ldx     [%g4 + %lo(ksfmmup)], %g4
362        cmp     %g4, %g2
363        bne,pn   %xcc, 1f			/* if not kernel as, go to 1 */
364	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
365
366	/* We need to demap in the kernel context */
367	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
368	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
369	sethi   %hi(FLUSH_ADDR), %g5
3704:
371	stxa	%g0, [%g1]ASI_DTLB_DEMAP
372	stxa	%g0, [%g1]ASI_ITLB_DEMAP
373	flush	%g5				! flush required by immu
374
375	deccc	%g3				/* decr pgcnt */
376	bnz,pt	%icc,4b
377	  add	%g1, %g2, %g1			/* next page */
378	retry
3791:
380	/*
381	 * We need to demap in a user context
382	 *
383	 * g2 = sfmmup
384	 * g3 = pgcnt
385	 */
386	SFMMU_CPU_CNUM(%g2, %g5, %g6)		! %g5 = sfmmu cnum on this CPU
387
388	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
389
390	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
391	sll	%g4, CTXREG_EXT_SHIFT, %g4
392	or	%g5, %g4, %g5
393
394	set	MMU_PCONTEXT, %g4
395	ldxa	[%g4]ASI_DMMU, %g6		/* rd old ctxnum */
396	srlx	%g6, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
397	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
398	or	%g5, %g2, %g5			/* %g5 = nucleus pgsz | primary pgsz | cnum */
399	stxa	%g5, [%g4]ASI_DMMU		/* wr new ctxum */
400
401	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
402	sethi   %hi(FLUSH_ADDR), %g5
4033:
404	stxa	%g0, [%g1]ASI_DTLB_DEMAP
405	stxa	%g0, [%g1]ASI_ITLB_DEMAP
406	flush	%g5				! flush required by immu
407
408	deccc	%g3				/* decr pgcnt */
409	bnz,pt	%icc,3b
410	  add	%g1, %g2, %g1			/* next page */
411
412	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
413	retry
414	SET_SIZE(vtag_flush_pgcnt_tl1)
415
416#endif	/* lint */
417
418#if defined(lint)
419
420/*ARGSUSED*/
421void
422vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
423{}
424
425#else	/* lint */
426
427	ENTRY_NP(vtag_flushall_tl1)
428	/*
429	 * x-trap to flush tlb
430	 */
431	set	DEMAP_ALL_TYPE, %g4
432	stxa	%g0, [%g4]ASI_DTLB_DEMAP
433	stxa	%g0, [%g4]ASI_ITLB_DEMAP
434	retry
435	SET_SIZE(vtag_flushall_tl1)
436
437#endif	/* lint */
438
439
440#if defined(lint)
441
442/* ARGSUSED */
443void
444vac_flushpage(pfn_t pfnum, int vcolor)
445{}
446
447#else	/* lint */
448
449/*
450 * vac_flushpage(pfnum, color)
451 *	Flush 1 8k page of the D-$ with physical page = pfnum
452 *	Algorithm:
453 *		The cheetah dcache is a 64k psuedo 4 way accaociative cache.
454 *		It is virtual indexed, physically tagged cache.
455 */
456	.seg	".data"
457	.align	8
458	.global	dflush_type
459dflush_type:
460	.word	FLUSHPAGE_TYPE
461
462	ENTRY(vac_flushpage)
463	/*
464	 * flush page from the d$
465	 *
466	 * %o0 = pfnum, %o1 = color
467	 */
468	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
469	retl
470	  nop
471	SET_SIZE(vac_flushpage)
472
473#endif	/* lint */
474
475
476#if defined(lint)
477
478/* ARGSUSED */
479void
480vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
481{}
482
483#else	/* lint */
484
485	ENTRY_NP(vac_flushpage_tl1)
486	/*
487	 * x-trap to flush page from the d$
488	 *
489	 * %g1 = pfnum, %g2 = color
490	 */
491	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
492	retry
493	SET_SIZE(vac_flushpage_tl1)
494
495#endif	/* lint */
496
497
498#if defined(lint)
499
500/* ARGSUSED */
501void
502vac_flushcolor(int vcolor, pfn_t pfnum)
503{}
504
505#else	/* lint */
506
507	ENTRY(vac_flushcolor)
508	/*
509	 * %o0 = vcolor
510	 */
511	DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
512	DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
513	DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
514	DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
515	retl
516	  nop
517	SET_SIZE(vac_flushcolor)
518
519#endif	/* lint */
520
521
522#if defined(lint)
523
524/* ARGSUSED */
525void
526vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
527{}
528
529#else	/* lint */
530
531	ENTRY(vac_flushcolor_tl1)
532	/*
533	 * %g1 = vcolor
534	 */
535	DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
536	DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
537	DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
538	DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
539	retry
540	SET_SIZE(vac_flushcolor_tl1)
541
542#endif	/* lint */
543
544#if defined(lint)
545
546int
547idsr_busy(void)
548{
549	return (0);
550}
551
552#else	/* lint */
553
554/*
555 * Determine whether or not the IDSR is busy.
556 * Entry: no arguments
557 * Returns: 1 if busy, 0 otherwise
558 */
559	ENTRY(idsr_busy)
560	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
561	clr	%o0
562	btst	IDSR_BUSY, %g1
563	bz,a,pt	%xcc, 1f
564	mov	1, %o0
5651:
566	retl
567	nop
568	SET_SIZE(idsr_busy)
569
570#endif	/* lint */
571
572#if defined(lint)
573
574/* ARGSUSED */
575void
576init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
577{}
578
579/* ARGSUSED */
580void
581init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
582{}
583
584#else	/* lint */
585
586	.global _dispatch_status_busy
587_dispatch_status_busy:
588	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
589	.align	4
590
591/*
592 * Setup interrupt dispatch data registers
593 * Entry:
594 *	%o0 - function or inumber to call
595 *	%o1, %o2 - arguments (2 uint64_t's)
596 */
597	.seg "text"
598
599	ENTRY(init_mondo)
600#ifdef DEBUG
601	!
602	! IDSR should not be busy at the moment
603	!
604	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
605	btst	IDSR_BUSY, %g1
606	bz,pt	%xcc, 1f
607	nop
608	sethi	%hi(_dispatch_status_busy), %o0
609	call	panic
610	or	%o0, %lo(_dispatch_status_busy), %o0
611#endif /* DEBUG */
612
613	ALTENTRY(init_mondo_nocheck)
614	!
615	! interrupt vector dispatch data reg 0
616	!
6171:
618	mov	IDDR_0, %g1
619	mov	IDDR_1, %g2
620	mov	IDDR_2, %g3
621	stxa	%o0, [%g1]ASI_INTR_DISPATCH
622
623	!
624	! interrupt vector dispatch data reg 1
625	!
626	stxa	%o1, [%g2]ASI_INTR_DISPATCH
627
628	!
629	! interrupt vector dispatch data reg 2
630	!
631	stxa	%o2, [%g3]ASI_INTR_DISPATCH
632
633	membar	#Sync
634	retl
635	nop
636	SET_SIZE(init_mondo_nocheck)
637	SET_SIZE(init_mondo)
638
639#endif	/* lint */
640
641
642#if !(defined(JALAPENO) || defined(SERRANO))
643
644#if defined(lint)
645
646/* ARGSUSED */
647void
648shipit(int upaid, int bn)
649{ return; }
650
651#else	/* lint */
652
653/*
654 * Ship mondo to aid using busy/nack pair bn
655 */
656	ENTRY_NP(shipit)
657	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = agent id
658	sll	%o1, IDCR_BN_SHIFT, %g2		! IDCR<28:24> = b/n pair
659	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
660	or	%g1, %g2, %g1
661	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
662	membar	#Sync
663	retl
664	nop
665	SET_SIZE(shipit)
666
667#endif	/* lint */
668
669#endif	/* !(JALAPENO || SERRANO) */
670
671
672#if defined(lint)
673
674/* ARGSUSED */
675void
676flush_instr_mem(caddr_t vaddr, size_t len)
677{}
678
679#else	/* lint */
680
681/*
682 * flush_instr_mem:
683 *	Flush 1 page of the I-$ starting at vaddr
684 * 	%o0 vaddr
685 *	%o1 bytes to be flushed
686 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
687 * the stores from all processors so that a FLUSH instruction is only needed
688 * to ensure pipeline is consistent. This means a single flush is sufficient at
689 * the end of a sequence of stores that updates the instruction stream to
690 * ensure correct operation.
691 */
692
693	ENTRY(flush_instr_mem)
694	flush	%o0			! address irrelevant
695	retl
696	nop
697	SET_SIZE(flush_instr_mem)
698
699#endif	/* lint */
700
701
702#if defined(CPU_IMP_ECACHE_ASSOC)
703
704#if defined(lint)
705
706/* ARGSUSED */
707uint64_t
708get_ecache_ctrl(void)
709{ return (0); }
710
711#else	/* lint */
712
713	ENTRY(get_ecache_ctrl)
714	GET_CPU_IMPL(%o0)
715	cmp	%o0, JAGUAR_IMPL
716	!
717	! Putting an ASI access in the delay slot may
718	! cause it to be accessed, even when annulled.
719	!
720	bne	1f
721	  nop
722	ldxa	[%g0]ASI_EC_CFG_TIMING, %o0	! read Jaguar shared E$ ctrl reg
723	b	2f
724	  nop
7251:
726	ldxa	[%g0]ASI_EC_CTRL, %o0		! read Ch/Ch+ E$ control reg
7272:
728	retl
729	  nop
730	SET_SIZE(get_ecache_ctrl)
731
732#endif	/* lint */
733
734#endif	/* CPU_IMP_ECACHE_ASSOC */
735
736
737#if !(defined(JALAPENO) || defined(SERRANO))
738
739/*
740 * flush_ecache:
741 *	%o0 - 64 bit physical address
742 *	%o1 - ecache size
743 *	%o2 - ecache linesize
744 */
745#if defined(lint)
746
747/*ARGSUSED*/
748void
749flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
750{}
751
752#else /* !lint */
753
754	ENTRY(flush_ecache)
755
756	/*
757	 * For certain CPU implementations, we have to flush the L2 cache
758	 * before flushing the ecache.
759	 */
760	PN_L2_FLUSHALL(%g3, %g4, %g5)
761
762	/*
763	 * Flush the entire Ecache using displacement flush.
764	 */
765	ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
766
767	retl
768	nop
769	SET_SIZE(flush_ecache)
770
771#endif /* lint */
772
773#endif	/* !(JALAPENO || SERRANO) */
774
775
776#if defined(lint)
777
778void
779flush_dcache(void)
780{}
781
782#else	/* lint */
783
784	ENTRY(flush_dcache)
785	ASM_LD(%o0, dcache_size)
786	ASM_LD(%o1, dcache_linesize)
787	CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
788	retl
789	nop
790	SET_SIZE(flush_dcache)
791
792#endif	/* lint */
793
794
795#if defined(lint)
796
797void
798flush_icache(void)
799{}
800
801#else	/* lint */
802
803	ENTRY(flush_icache)
804	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
805	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
806	ba,pt	%icc, 2f
807	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
808flush_icache_1:
809	ASM_LD(%o0, icache_size)
810	ASM_LD(%o1, icache_linesize)
8112:
812	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
813	retl
814	nop
815	SET_SIZE(flush_icache)
816
817#endif	/* lint */
818
819#if defined(lint)
820
821/*ARGSUSED*/
822void
823kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
824    int icache_lsize)
825{
826}
827
828#else	/* lint */
829
830	ENTRY(kdi_flush_idcache)
831	CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
832	CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
833	membar	#Sync
834	retl
835	nop
836	SET_SIZE(kdi_flush_idcache)
837
838#endif	/* lint */
839
840#if defined(lint)
841
842void
843flush_pcache(void)
844{}
845
846#else	/* lint */
847
848	ENTRY(flush_pcache)
849	PCACHE_FLUSHALL(%o0, %o1, %o2)
850	retl
851	nop
852	SET_SIZE(flush_pcache)
853
854#endif	/* lint */
855
856
857#if defined(CPU_IMP_L1_CACHE_PARITY)
858
859#if defined(lint)
860
861/* ARGSUSED */
862void
863get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
864{}
865
866#else	/* lint */
867
868/*
869 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
870 * structure (see cheetahregs.h):
871 * The Dcache *should* be turned off when this code is executed.
872 */
873	.align	128
874	ENTRY(get_dcache_dtag)
875	rdpr	%pstate, %o5
876	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
877	wrpr	%g0, %o3, %pstate
878	b	1f
879	  stx	%o0, [%o1 + CH_DC_IDX]
880
881	.align	128
8821:
883	ldxa	[%o0]ASI_DC_TAG, %o2
884	stx	%o2, [%o1 + CH_DC_TAG]
885	membar	#Sync
886	ldxa	[%o0]ASI_DC_UTAG, %o2
887	membar	#Sync
888	stx	%o2, [%o1 + CH_DC_UTAG]
889	ldxa	[%o0]ASI_DC_SNP_TAG, %o2
890	stx	%o2, [%o1 + CH_DC_SNTAG]
891	add	%o1, CH_DC_DATA, %o1
892	clr	%o3
8932:
894	membar	#Sync				! required before ASI_DC_DATA
895	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
896	membar	#Sync				! required after ASI_DC_DATA
897	stx	%o2, [%o1 + %o3]
898	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
899	blt	2b
900	  add	%o3, 8, %o3
901
902	/*
903	 * Unlike other CPUs in the family, D$ data parity bits for Panther
904	 * do not reside in the microtag. Instead, we have to read them
905	 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
906	 * of just having 8 parity bits to protect all 32 bytes of data
907	 * per line, we now have 32 bits of parity.
908	 */
909	GET_CPU_IMPL(%o3)
910	cmp	%o3, PANTHER_IMPL
911	bne	4f
912	  clr	%o3
913
914	/*
915	 * move our pointer to the next field where we store parity bits
916	 * and add the offset of the last parity byte since we will be
917	 * storing all 4 parity bytes within one 64 bit field like this:
918	 *
919	 * +------+------------+------------+------------+------------+
920	 * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
921	 * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
922	 * +------+------------+------------+------------+------------+
923	 *  63:32     31:24        23:16         15:8          7:0
924	 */
925	add	%o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
926
927	/* add the DC_data_parity bit into our working index */
928	mov	1, %o2
929	sll	%o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
930	or	%o0, %o2, %o0
9313:
932	membar	#Sync				! required before ASI_DC_DATA
933	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
934	membar	#Sync				! required after ASI_DC_DATA
935	stb	%o2, [%o1]
936	dec	%o1
937	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
938	blt	3b
939	  add	%o3, 8, %o3
9404:
941	retl
942	  wrpr	%g0, %o5, %pstate
943	SET_SIZE(get_dcache_dtag)
944
945#endif	/* lint */
946
947
948#if defined(lint)
949
950/* ARGSUSED */
951void
952get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
953{}
954
955#else	/* lint */
956
957/*
958 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
959 * structure (see cheetahregs.h):
960 * The Icache *Must* be turned off when this function is called.
961 * This is because diagnostic accesses to the Icache interfere with cache
962 * consistency.
963 */
964	.align	128
965	ENTRY(get_icache_dtag)
966	rdpr	%pstate, %o5
967	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
968	wrpr	%g0, %o3, %pstate
969
970	stx	%o0, [%o1 + CH_IC_IDX]
971	ldxa	[%o0]ASI_IC_TAG, %o2
972	stx	%o2, [%o1 + CH_IC_PATAG]
973	add	%o0, CH_ICTAG_UTAG, %o0
974	ldxa	[%o0]ASI_IC_TAG, %o2
975	add	%o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
976	stx	%o2, [%o1 + CH_IC_UTAG]
977	ldxa	[%o0]ASI_IC_TAG, %o2
978	add	%o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
979	stx	%o2, [%o1 + CH_IC_UPPER]
980	ldxa	[%o0]ASI_IC_TAG, %o2
981	andn	%o0, CH_ICTAG_TMASK, %o0
982	stx	%o2, [%o1 + CH_IC_LOWER]
983	ldxa	[%o0]ASI_IC_SNP_TAG, %o2
984	stx	%o2, [%o1 + CH_IC_SNTAG]
985	add	%o1, CH_IC_DATA, %o1
986	clr	%o3
9872:
988	ldxa	[%o0 + %o3]ASI_IC_DATA, %o2
989	stx	%o2, [%o1 + %o3]
990	cmp	%o3, PN_IC_DATA_REG_SIZE - 8
991	blt	2b
992	  add	%o3, 8, %o3
993
994	retl
995	  wrpr	%g0, %o5, %pstate
996	SET_SIZE(get_icache_dtag)
997
998#endif	/* lint */
999
1000#if defined(lint)
1001
1002/* ARGSUSED */
1003void
1004get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
1005{}
1006
1007#else	/* lint */
1008
1009/*
1010 * Get pcache data and tags.
1011 * inputs:
1012 *   pcache_idx	- fully constructed VA for for accessing P$ diagnostic
1013 *		  registers. Contains PC_way and PC_addr shifted into
1014 *		  the correct bit positions. See the PRM for more details.
1015 *   data	- pointer to a ch_pc_data_t
1016 * structure (see cheetahregs.h):
1017 */
1018	.align	128
1019	ENTRY(get_pcache_dtag)
1020	rdpr	%pstate, %o5
1021	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
1022	wrpr	%g0, %o3, %pstate
1023
1024	stx	%o0, [%o1 + CH_PC_IDX]
1025	ldxa	[%o0]ASI_PC_STATUS_DATA, %o2
1026	stx	%o2, [%o1 + CH_PC_STATUS]
1027	ldxa	[%o0]ASI_PC_TAG, %o2
1028	stx	%o2, [%o1 + CH_PC_TAG]
1029	ldxa	[%o0]ASI_PC_SNP_TAG, %o2
1030	stx	%o2, [%o1 + CH_PC_SNTAG]
1031	add	%o1, CH_PC_DATA, %o1
1032	clr	%o3
10332:
1034	ldxa	[%o0 + %o3]ASI_PC_DATA, %o2
1035	stx	%o2, [%o1 + %o3]
1036	cmp	%o3, CH_PC_DATA_REG_SIZE - 8
1037	blt	2b
1038	  add	%o3, 8, %o3
1039
1040	retl
1041	  wrpr	%g0, %o5, %pstate
1042	SET_SIZE(get_pcache_dtag)
1043
1044#endif	/* lint */
1045
1046#endif	/* CPU_IMP_L1_CACHE_PARITY */
1047
1048#if defined(lint)
1049
1050/* ARGSUSED */
1051void
1052set_dcu(uint64_t dcu)
1053{}
1054
1055#else	/* lint */
1056
1057/*
1058 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
1059 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
1060 *   %o0 - 64 bit constant
1061 */
1062	ENTRY(set_dcu)
1063	stxa	%o0, [%g0]ASI_DCU	! Store to DCU
1064	flush	%g0	/* flush required after changing the IC bit */
1065	retl
1066	nop
1067	SET_SIZE(set_dcu)
1068
1069#endif	/* lint */
1070
1071
1072#if defined(lint)
1073
1074uint64_t
1075get_dcu(void)
1076{
1077	return ((uint64_t)0);
1078}
1079
1080#else	/* lint */
1081
1082/*
1083 * Return DCU register.
1084 */
1085	ENTRY(get_dcu)
1086	ldxa	[%g0]ASI_DCU, %o0		/* DCU control register */
1087	retl
1088	nop
1089	SET_SIZE(get_dcu)
1090
1091#endif	/* lint */
1092
1093/*
1094 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
1095 *
1096 * This handler is used to check for softints generated by error trap
1097 * handlers to report errors.  On Cheetah, this mechanism is used by the
1098 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
1099 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
1100 * NB: Must be 8 instructions or less to fit in trap table and code must
1101 *     be relocatable.
1102 */
1103#if defined(lint)
1104
1105void
1106ch_pil15_interrupt_instr(void)
1107{}
1108
1109#else	/* lint */
1110
1111	ENTRY_NP(ch_pil15_interrupt_instr)
1112	ASM_JMP(%g1, ch_pil15_interrupt)
1113	SET_SIZE(ch_pil15_interrupt_instr)
1114
1115#endif
1116
1117
1118#if defined(lint)
1119
1120void
1121ch_pil15_interrupt(void)
1122{}
1123
1124#else	/* lint */
1125
1126	ENTRY_NP(ch_pil15_interrupt)
1127
1128	/*
1129	 * Since pil_interrupt is hacked to assume that every level 15
1130	 * interrupt is generated by the CPU to indicate a performance
1131	 * counter overflow this gets ugly.  Before calling pil_interrupt
1132	 * the Error at TL>0 pending status is inspected.  If it is
1133	 * non-zero, then an error has occurred and it is handled.
1134	 * Otherwise control is transfered to pil_interrupt.  Note that if
1135	 * an error is detected pil_interrupt will not be called and
1136	 * overflow interrupts may be lost causing erroneous performance
1137	 * measurements.  However, error-recovery will have a detrimental
1138	 * effect on performance anyway.
1139	 */
1140	CPU_INDEX(%g1, %g4)
1141	set	ch_err_tl1_pending, %g4
1142	ldub	[%g1 + %g4], %g2
1143	brz	%g2, 1f
1144	  nop
1145
1146	/*
1147	 * We have a pending TL>0 error, clear the TL>0 pending status.
1148	 */
1149	stb	%g0, [%g1 + %g4]
1150
1151	/*
1152	 * Clear the softint.
1153	 */
1154	mov	1, %g5
1155	sll	%g5, PIL_15, %g5
1156	wr	%g5, CLEAR_SOFTINT
1157
1158	/*
1159	 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
1160	 * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
1161	 * panic flag (%g2).
1162	 */
1163	set	cpu_tl1_error, %g1
1164	clr	%g2
1165	ba	sys_trap
1166	  mov	PIL_15, %g4
1167
11681:
1169	/*
1170	 * The logout is invalid.
1171	 *
1172	 * Call the default interrupt handler.
1173	 */
1174	sethi	%hi(pil_interrupt), %g1
1175	jmp	%g1 + %lo(pil_interrupt)
1176	  mov	PIL_15, %g4
1177
1178	SET_SIZE(ch_pil15_interrupt)
1179#endif
1180
1181
1182/*
1183 * Error Handling
1184 *
1185 * Cheetah provides error checking for all memory access paths between
1186 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
1187 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
1188 * AFAR and one of the following traps is generated (provided that it
1189 * is enabled in External Cache Error Enable Register) to handle that
1190 * error:
1191 * 1. trap 0x70: Precise trap
1192 *    tt0_fecc for errors at trap level(TL)>=0
1193 * 2. trap 0x0A and 0x32: Deferred trap
1194 *    async_err for errors at TL>=0
1195 * 3. trap 0x63: Disrupting trap
1196 *    ce_err for errors at TL=0
1197 *    (Note that trap 0x63 cannot happen at trap level > 0)
1198 *
1199 * Trap level one handlers panic the system except for the fast ecc
1200 * error handler which tries to recover from certain errors.
1201 */
1202
1203/*
1204 * FAST ECC TRAP STRATEGY:
1205 *
1206 * Software must handle single and multi bit errors which occur due to data
1207 * or instruction cache reads from the external cache. A single or multi bit
1208 * error occuring in one of these situations results in a precise trap.
1209 *
1210 * The basic flow of this trap handler is as follows:
1211 *
1212 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
1213 *    is disabled because bad data could have been installed.  The Icache is
1214 *    turned off because we want to capture the Icache line related to the
1215 *    AFAR.
1216 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
1217 * 3) Park sibling core if caches are shared (to avoid race condition while
1218 *    accessing shared resources such as L3 data staging register during
1219 *    CPU logout.
1220 * 4) Read the AFAR and AFSR.
1221 * 5) If CPU logout structure is not being used, then:
1222 *    6) Clear all errors from the AFSR.
1223 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
1224 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1225 *       state.
1226 *    9) Unpark sibling core if we parked it earlier.
1227 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
1228 *        running at PIL 15.
1229 * 6) Otherwise, if CPU logout structure is being used:
1230 *    7) Incriment the "logout busy count".
1231 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1232 *       state.
1233 *    9) Unpark sibling core if we parked it earlier.
1234 *    10) Issue a retry since the other CPU error logging code will end up
1235 *       finding this error bit and logging information about it later.
1236 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
1237 *    yet initialized such that we can't even check the logout struct, then
1238 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
1239 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
1240 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
1241 *    in the high level trap handler since we don't have access to detailed
1242 *    logout information in cases where the cpu_private struct is not yet
1243 *    initialized.
1244 *
1245 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
1246 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
1247 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
1248 * since it is uses different code/data from this handler, has a better
1249 * chance of fixing things up than simply recursing through this code
1250 * again (this would probably cause an eventual kernel stack overflow).
1251 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
1252 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
1253 * the Fast ECC at TL>0 handler and eventually Red Mode.
1254 *
1255 * Note that for Cheetah (and only Cheetah), we use alias addresses for
1256 * flushing rather than ASI accesses (which don't exist on Cheetah).
1257 * Should we encounter a Fast ECC error within this handler on Cheetah,
1258 * there's a good chance it's within the ecache_flushaddr buffer (since
1259 * it's the largest piece of memory we touch in the handler and it is
1260 * usually kernel text/data).  For that reason the Fast ECC at TL>0
1261 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
1262 */
1263
1264/*
1265 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
1266 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
1267 * architecture-specific files.
1268 * NB: Must be 8 instructions or less to fit in trap table and code must
1269 *     be relocatable.
1270 */
1271
1272#if defined(lint)
1273
1274void
1275fecc_err_instr(void)
1276{}
1277
1278#else	/* lint */
1279
1280	ENTRY_NP(fecc_err_instr)
1281	membar	#Sync			! Cheetah requires membar #Sync
1282
1283	/*
1284	 * Save current DCU state.  Turn off the Dcache and Icache.
1285	 */
1286	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1287	andn	%g1, DCU_DC + DCU_IC, %g4
1288	stxa	%g4, [%g0]ASI_DCU
1289	flush	%g0	/* flush required after changing the IC bit */
1290
1291	ASM_JMP(%g4, fast_ecc_err)
1292	SET_SIZE(fecc_err_instr)
1293
1294#endif	/* lint */
1295
1296
1297#if !(defined(JALAPENO) || defined(SERRANO))
1298
1299#if defined(lint)
1300
1301void
1302fast_ecc_err(void)
1303{}
1304
1305#else	/* lint */
1306
1307	.section ".text"
1308	.align	64
1309	ENTRY_NP(fast_ecc_err)
1310
1311	/*
1312	 * Turn off CEEN and NCEEN.
1313	 */
1314	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1315	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1316	stxa	%g4, [%g0]ASI_ESTATE_ERR
1317	membar	#Sync			! membar sync required
1318
1319	/*
1320	 * Check to see whether we need to park our sibling core
1321	 * before recording diagnostic information from caches
1322	 * which may be shared by both cores.
1323	 * We use %g1 to store information about whether or not
1324	 * we had to park the core (%g1 holds our DCUCR value and
1325	 * we only use bits from that register which are "reserved"
1326	 * to keep track of core parking) so that we know whether
1327	 * or not to unpark later. %g5 and %g4 are scratch registers.
1328	 */
1329	PARK_SIBLING_CORE(%g1, %g5, %g4)
1330
1331	/*
1332	 * Do the CPU log out capture.
1333	 *   %g3 = "failed?" return value.
1334	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1335	 *         into this macro via %g4. Output only valid if cpu_private
1336	 *         struct has not been initialized.
1337	 *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1338	 *   %g4 = Trap information stored in the cpu logout flags field
1339	 *   %g5 = scr1
1340	 *   %g6 = scr2
1341	 *   %g3 = scr3
1342	 *   %g4 = scr4
1343	 */
1344	 /* store the CEEN and NCEEN values, TL=0 */
1345	and	%g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1346	set	CHPR_FECCTL0_LOGOUT, %g6
1347	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1348
1349	/*
1350	 * Flush the Ecache (and L2 cache for Panther) to get the error out
1351	 * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1352	 * following flush will turn that into a WDC or WDU, respectively.
1353	 */
1354	PN_L2_FLUSHALL(%g4, %g5, %g6)
1355
1356	CPU_INDEX(%g4, %g5)
1357	mulx	%g4, CPU_NODE_SIZE, %g4
1358	set	cpunodes, %g5
1359	add	%g4, %g5, %g4
1360	ld	[%g4 + ECACHE_LINESIZE], %g5
1361	ld	[%g4 + ECACHE_SIZE], %g4
1362
1363	ASM_LDX(%g6, ecache_flushaddr)
1364	ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1365
1366	/*
1367	 * Flush the Dcache.  Since bad data could have been installed in
1368	 * the Dcache we must flush it before re-enabling it.
1369	 */
1370	ASM_LD(%g5, dcache_size)
1371	ASM_LD(%g6, dcache_linesize)
1372	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1373
1374	/*
1375	 * Flush the Icache.  Since we turned off the Icache to capture the
1376	 * Icache line it is now stale or corrupted and we must flush it
1377	 * before re-enabling it.
1378	 */
1379	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1380	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1381	ba,pt	%icc, 6f
1382	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1383fast_ecc_err_5:
1384	ASM_LD(%g5, icache_size)
1385	ASM_LD(%g6, icache_linesize)
13866:
1387	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1388
1389	/*
1390	 * check to see whether we parked our sibling core at the start
1391	 * of this handler. If so, we need to unpark it here.
1392	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1393	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1394	 */
1395	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1396
1397	/*
1398	 * Restore the Dcache and Icache to the previous state.
1399	 */
1400	stxa	%g1, [%g0]ASI_DCU
1401	flush	%g0	/* flush required after changing the IC bit */
1402
1403	/*
1404	 * Make sure our CPU logout operation was successful.
1405	 */
1406	cmp	%g3, %g0
1407	be	8f
1408	  nop
1409
1410	/*
1411	 * If the logout structure had been busy, how many times have
1412	 * we tried to use it and failed (nesting count)? If we have
1413	 * already recursed a substantial number of times, then we can
1414	 * assume things are not going to get better by themselves and
1415	 * so it would be best to panic.
1416	 */
1417	cmp	%g3, CLO_NESTING_MAX
1418	blt	7f
1419	  nop
1420
1421        call ptl1_panic
1422          mov   PTL1_BAD_ECC, %g1
1423
14247:
1425	/*
1426	 * Otherwise, if the logout structure was busy but we have not
1427	 * nested more times than our maximum value, then we simply
1428	 * issue a retry. Our TL=0 trap handler code will check and
1429	 * clear the AFSR after it is done logging what is currently
1430	 * in the logout struct and handle this event at that time.
1431	 */
1432	retry
14338:
1434	/*
1435	 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1436	 * already at PIL 15.
1437	 */
1438	set	cpu_fast_ecc_error, %g1
1439	rdpr	%pil, %g4
1440	cmp	%g4, PIL_14
1441	ba	sys_trap
1442	  movl	%icc, PIL_14, %g4
1443
1444	SET_SIZE(fast_ecc_err)
1445
1446#endif	/* lint */
1447
1448#endif	/* !(JALAPENO || SERRANO) */
1449
1450
1451/*
1452 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1453 *
1454 * The basic flow of this trap handler is as follows:
1455 *
1456 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1457 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1458 *    will use to save %g1 and %g2.
1459 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1460 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1461 *    handler (using the just saved %g1).
1462 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1463 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1464 *    NB: we don't turn off the Icache because bad data is not installed nor
1465 *        will we be doing any diagnostic accesses.
1466 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1467 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1468 *    %tpc, %tnpc, %tstate values previously saved).
1469 * 6) set %tl to %tl - 1.
1470 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1471 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1472 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1473 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1474 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1475 *    AFSR_EXT and save the value in ch_err_tl1_data.
1476 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1477 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1478 * 11) Flush the Ecache.
1479 *    NB: the Ecache is flushed assuming the largest possible size with
1480 *        the smallest possible line size since access to the cpu_nodes may
1481 *        cause an unrecoverable DTLB miss.
1482 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1483 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1484 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1485 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1486 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1487 * 14) Flush and re-enable the Dcache if it was on at step 3.
1488 * 15) Do TRAPTRACE if enabled.
1489 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1490 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1491 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1492 *    event pending flag and call cpu_tl1_error via systrap if set.
1493 * 19) Restore the registers from step 5 and issue retry.
1494 */
1495
1496/*
1497 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1498 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1499 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1500 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1501 * NB: Must be 8 instructions or less to fit in trap table and code must
1502 *     be relocatable.
1503 */
1504
1505#if defined(lint)
1506
1507void
1508fecc_err_tl1_instr(void)
1509{}
1510
1511#else	/* lint */
1512
1513	ENTRY_NP(fecc_err_tl1_instr)
1514	CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1515	SET_SIZE(fecc_err_tl1_instr)
1516
1517#endif	/* lint */
1518
1519/*
1520 * Software trap 0 at TL>0.
1521 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1522 * the various architecture-specific files.  This is used as a continuation
1523 * of the fast ecc handling where we've bought an extra TL level, so we can
1524 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1525 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1526 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1527 * order two bits from %g1 and %g2 respectively).
1528 * NB: Must be 8 instructions or less to fit in trap table and code must
1529 *     be relocatable.
1530 */
1531#if defined(lint)
1532
1533void
1534fecc_err_tl1_cont_instr(void)
1535{}
1536
1537#else	/* lint */
1538
1539	ENTRY_NP(fecc_err_tl1_cont_instr)
1540	CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1541	SET_SIZE(fecc_err_tl1_cont_instr)
1542
1543#endif	/* lint */
1544
1545
1546#if defined(lint)
1547
1548void
1549ce_err(void)
1550{}
1551
1552#else	/* lint */
1553
1554/*
1555 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1556 *
1557 * AFSR errors bits which cause this trap are:
1558 *	CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1559 *
1560 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1561 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1562 *
1563 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1564 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1565 *
1566 * Cheetah+ also handles (No additional processing required):
1567 *    DUE, DTO, DBERR	(NCEEN controlled)
1568 *    THCE		(CEEN and ET_ECC_en controlled)
1569 *    TUE		(ET_ECC_en controlled)
1570 *
1571 * Panther further adds:
1572 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1573 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1574 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1575 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1576 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1577 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1578 *
1579 * Steps:
1580 *	1. Disable hardware corrected disrupting errors only (CEEN)
1581 *	2. Park sibling core if caches are shared (to avoid race
1582 *	   condition while accessing shared resources such as L3
1583 *	   data staging register during CPU logout.
1584 *	3. If the CPU logout structure is not currently being used:
1585 *		4. Clear AFSR error bits
1586 *		5. Capture Ecache, Dcache and Icache lines associated
1587 *		   with AFAR.
1588 *		6. Unpark sibling core if we parked it earlier.
1589 *		7. call cpu_disrupting_error via sys_trap at PIL 14
1590 *		   unless we're already running at PIL 15.
1591 *	4. Otherwise, if the CPU logout structure is busy:
1592 *		5. Incriment "logout busy count" and place into %g3
1593 *		6. Unpark sibling core if we parked it earlier.
1594 *		7. Issue a retry since the other CPU error logging
1595 *		   code will end up finding this error bit and logging
1596 *		   information about it later.
1597 *	5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1598 *         not yet initialized such that we can't even check the logout
1599 *         struct, then we place the clo_flags data into %g2
1600 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1601 *         systrap. The clo_flags parameter is used to determine information
1602 *         such as TL, TT, CEEN settings, etc in the high level trap
1603 *         handler since we don't have access to detailed logout information
1604 *         in cases where the cpu_private struct is not yet initialized.
1605 *
1606 * %g3: [ logout busy count ] - arg #2
1607 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1608 */
1609
1610	.align	128
1611	ENTRY_NP(ce_err)
1612	membar	#Sync			! Cheetah requires membar #Sync
1613
1614	/*
1615	 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1616	 * to prevent recursion.
1617	 */
1618	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1619	bclr	EN_REG_CEEN, %g1
1620	stxa	%g1, [%g0]ASI_ESTATE_ERR
1621	membar	#Sync			! membar sync required
1622
1623	/*
1624	 * Save current DCU state.  Turn off Icache to allow capture of
1625	 * Icache data by DO_CPU_LOGOUT.
1626	 */
1627	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1628	andn	%g1, DCU_IC, %g4
1629	stxa	%g4, [%g0]ASI_DCU
1630	flush	%g0	/* flush required after changing the IC bit */
1631
1632	/*
1633	 * Check to see whether we need to park our sibling core
1634	 * before recording diagnostic information from caches
1635	 * which may be shared by both cores.
1636	 * We use %g1 to store information about whether or not
1637	 * we had to park the core (%g1 holds our DCUCR value and
1638	 * we only use bits from that register which are "reserved"
1639	 * to keep track of core parking) so that we know whether
1640	 * or not to unpark later. %g5 and %g4 are scratch registers.
1641	 */
1642	PARK_SIBLING_CORE(%g1, %g5, %g4)
1643
1644	/*
1645	 * Do the CPU log out capture.
1646	 *   %g3 = "failed?" return value.
1647	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1648	 *         into this macro via %g4. Output only valid if cpu_private
1649	 *         struct has not been initialized.
1650	 *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1651	 *   %g4 = Trap information stored in the cpu logout flags field
1652	 *   %g5 = scr1
1653	 *   %g6 = scr2
1654	 *   %g3 = scr3
1655	 *   %g4 = scr4
1656	 */
1657	clr	%g4			! TL=0 bit in afsr
1658	set	CHPR_CECC_LOGOUT, %g6
1659	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1660
1661	/*
1662	 * Flush the Icache.  Since we turned off the Icache to capture the
1663	 * Icache line it is now stale or corrupted and we must flush it
1664	 * before re-enabling it.
1665	 */
1666	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1667	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1668	ba,pt	%icc, 2f
1669	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1670ce_err_1:
1671	ASM_LD(%g5, icache_size)
1672	ASM_LD(%g6, icache_linesize)
16732:
1674	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1675
1676	/*
1677	 * check to see whether we parked our sibling core at the start
1678	 * of this handler. If so, we need to unpark it here.
1679	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1680	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1681	 */
1682	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1683
1684	/*
1685	 * Restore Icache to previous state.
1686	 */
1687	stxa	%g1, [%g0]ASI_DCU
1688	flush	%g0	/* flush required after changing the IC bit */
1689
1690	/*
1691	 * Make sure our CPU logout operation was successful.
1692	 */
1693	cmp	%g3, %g0
1694	be	4f
1695	  nop
1696
1697	/*
1698	 * If the logout structure had been busy, how many times have
1699	 * we tried to use it and failed (nesting count)? If we have
1700	 * already recursed a substantial number of times, then we can
1701	 * assume things are not going to get better by themselves and
1702	 * so it would be best to panic.
1703	 */
1704	cmp	%g3, CLO_NESTING_MAX
1705	blt	3f
1706	  nop
1707
1708        call ptl1_panic
1709          mov   PTL1_BAD_ECC, %g1
1710
17113:
1712	/*
1713	 * Otherwise, if the logout structure was busy but we have not
1714	 * nested more times than our maximum value, then we simply
1715	 * issue a retry. Our TL=0 trap handler code will check and
1716	 * clear the AFSR after it is done logging what is currently
1717	 * in the logout struct and handle this event at that time.
1718	 */
1719	retry
17204:
1721	/*
1722	 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1723	 * already at PIL 15.
1724	 */
1725	set	cpu_disrupting_error, %g1
1726	rdpr	%pil, %g4
1727	cmp	%g4, PIL_14
1728	ba	sys_trap
1729	  movl	%icc, PIL_14, %g4
1730	SET_SIZE(ce_err)
1731
1732#endif	/* lint */
1733
1734
1735#if defined(lint)
1736
1737/*
1738 * This trap cannot happen at TL>0 which means this routine will never
1739 * actually be called and so we treat this like a BAD TRAP panic.
1740 */
1741void
1742ce_err_tl1(void)
1743{}
1744
1745#else	/* lint */
1746
1747	.align	64
1748	ENTRY_NP(ce_err_tl1)
1749
1750        call ptl1_panic
1751          mov   PTL1_BAD_TRAP, %g1
1752
1753	SET_SIZE(ce_err_tl1)
1754
1755#endif	/* lint */
1756
1757
1758#if defined(lint)
1759
1760void
1761async_err(void)
1762{}
1763
1764#else	/* lint */
1765
1766/*
1767 * The async_err function handles deferred trap types 0xA
1768 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1769 *
1770 * AFSR errors bits which cause this trap are:
1771 *	UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1772 * On some platforms, EMU may causes cheetah to pull the error pin
1773 * never giving Solaris a chance to take a trap.
1774 *
1775 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1776 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1777 *
1778 * Steps:
1779 *	1. Disable CEEN and NCEEN errors to prevent recursive errors.
1780 *	2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1781 *         I$ line in DO_CPU_LOGOUT.
1782 *	3. Park sibling core if caches are shared (to avoid race
1783 *	   condition while accessing shared resources such as L3
1784 *	   data staging register during CPU logout.
1785 *	4. If the CPU logout structure is not currently being used:
1786 *		5. Clear AFSR error bits
1787 *		6. Capture Ecache, Dcache and Icache lines associated
1788 *		   with AFAR.
1789 *		7. Unpark sibling core if we parked it earlier.
1790 *		8. call cpu_deferred_error via sys_trap.
1791 *	5. Otherwise, if the CPU logout structure is busy:
1792 *		6. Incriment "logout busy count"
1793 *		7. Unpark sibling core if we parked it earlier.
1794 *		8) Issue a retry since the other CPU error logging
1795 *		   code will end up finding this error bit and logging
1796 *		   information about it later.
1797 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1798 *         not yet initialized such that we can't even check the logout
1799 *         struct, then we place the clo_flags data into %g2
1800 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1801 *         systrap. The clo_flags parameter is used to determine information
1802 *         such as TL, TT, CEEN settings, etc in the high level trap handler
1803 *         since we don't have access to detailed logout information in cases
1804 *         where the cpu_private struct is not yet initialized.
1805 *
1806 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1807 * %g3: [ logout busy count ] - arg #2
1808 */
1809
1810	ENTRY_NP(async_err)
1811	membar	#Sync			! Cheetah requires membar #Sync
1812
1813	/*
1814	 * Disable CEEN and NCEEN.
1815	 */
1816	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1817	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1818	stxa	%g4, [%g0]ASI_ESTATE_ERR
1819	membar	#Sync			! membar sync required
1820
1821	/*
1822	 * Save current DCU state.
1823	 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1824	 * Do this regardless of whether this is a Data Access Error or
1825	 * Instruction Access Error Trap.
1826	 * Disable Dcache for both Data Access Error and Instruction Access
1827	 * Error per Cheetah PRM P.5 Note 6.
1828	 */
1829	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1830	andn	%g1, DCU_IC + DCU_DC, %g4
1831	stxa	%g4, [%g0]ASI_DCU
1832	flush	%g0	/* flush required after changing the IC bit */
1833
1834	/*
1835	 * Check to see whether we need to park our sibling core
1836	 * before recording diagnostic information from caches
1837	 * which may be shared by both cores.
1838	 * We use %g1 to store information about whether or not
1839	 * we had to park the core (%g1 holds our DCUCR value and
1840	 * we only use bits from that register which are "reserved"
1841	 * to keep track of core parking) so that we know whether
1842	 * or not to unpark later. %g6 and %g4 are scratch registers.
1843	 */
1844	PARK_SIBLING_CORE(%g1, %g6, %g4)
1845
1846	/*
1847	 * Do the CPU logout capture.
1848	 *
1849	 *   %g3 = "failed?" return value.
1850	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1851	 *         into this macro via %g4. Output only valid if cpu_private
1852	 *         struct has not been initialized.
1853	 *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1854	 *   %g4 = Trap information stored in the cpu logout flags field
1855	 *   %g5 = scr1
1856	 *   %g6 = scr2
1857	 *   %g3 = scr3
1858	 *   %g4 = scr4
1859	 */
1860	andcc	%g5, T_TL1, %g0
1861	clr	%g6
1862	movnz	%xcc, 1, %g6			! set %g6 if T_TL1 set
1863	sllx	%g6, CLO_FLAGS_TL_SHIFT, %g6
1864	sllx	%g5, CLO_FLAGS_TT_SHIFT, %g4
1865	set	CLO_FLAGS_TT_MASK, %g2
1866	and	%g4, %g2, %g4			! ttype
1867	or	%g6, %g4, %g4			! TT and TL
1868	and	%g3, EN_REG_CEEN, %g3		! CEEN value
1869	or	%g3, %g4, %g4			! TT and TL and CEEN
1870	set	CHPR_ASYNC_LOGOUT, %g6
1871	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1872
1873	/*
1874	 * If the logout struct was busy, we may need to pass the
1875	 * TT, TL, and CEEN information to the TL=0 handler via
1876	 * systrap parameter so save it off here.
1877	 */
1878	cmp	%g3, %g0
1879	be	1f
1880	  nop
1881	sllx	%g4, 32, %g4
1882	or	%g4, %g3, %g3
18831:
1884	/*
1885	 * Flush the Icache.  Since we turned off the Icache to capture the
1886	 * Icache line it is now stale or corrupted and we must flush it
1887	 * before re-enabling it.
1888	 */
1889	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1890	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1891	ba,pt	%icc, 2f
1892	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1893async_err_1:
1894	ASM_LD(%g5, icache_size)
1895	ASM_LD(%g6, icache_linesize)
18962:
1897	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1898
1899	/*
1900	 * XXX - Don't we need to flush the Dcache before turning it back
1901	 *       on to avoid stale or corrupt data? Was this broken?
1902	 */
1903	/*
1904	 * Flush the Dcache before turning it back on since it may now
1905	 * contain stale or corrupt data.
1906	 */
1907	ASM_LD(%g5, dcache_size)
1908	ASM_LD(%g6, dcache_linesize)
1909	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1910
1911	/*
1912	 * check to see whether we parked our sibling core at the start
1913	 * of this handler. If so, we need to unpark it here.
1914	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1915	 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1916	 */
1917	UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1918
1919	/*
1920	 * Restore Icache and Dcache to previous state.
1921	 */
1922	stxa	%g1, [%g0]ASI_DCU
1923	flush	%g0	/* flush required after changing the IC bit */
1924
1925	/*
1926	 * Make sure our CPU logout operation was successful.
1927	 */
1928	cmp	%g3, %g0
1929	be	4f
1930	  nop
1931
1932	/*
1933	 * If the logout structure had been busy, how many times have
1934	 * we tried to use it and failed (nesting count)? If we have
1935	 * already recursed a substantial number of times, then we can
1936	 * assume things are not going to get better by themselves and
1937	 * so it would be best to panic.
1938	 */
1939	cmp	%g3, CLO_NESTING_MAX
1940	blt	3f
1941	  nop
1942
1943        call ptl1_panic
1944          mov   PTL1_BAD_ECC, %g1
1945
19463:
1947	/*
1948	 * Otherwise, if the logout structure was busy but we have not
1949	 * nested more times than our maximum value, then we simply
1950	 * issue a retry. Our TL=0 trap handler code will check and
1951	 * clear the AFSR after it is done logging what is currently
1952	 * in the logout struct and handle this event at that time.
1953	 */
1954	retry
19554:
1956	RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1957async_err_resetskip:
1958	set	cpu_deferred_error, %g1
1959	ba	sys_trap
1960	  mov	PIL_15, %g4		! run at pil 15
1961	SET_SIZE(async_err)
1962
1963#endif	/* lint */
1964
1965#if defined(CPU_IMP_L1_CACHE_PARITY)
1966
1967/*
1968 * D$ parity error trap (trap 71) at TL=0.
1969 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1970 * the various architecture-specific files.  This merely sets up the
1971 * arguments for cpu_parity_error and calls it via sys_trap.
1972 * NB: Must be 8 instructions or less to fit in trap table and code must
1973 *     be relocatable.
1974 */
1975#if defined(lint)
1976
1977void
1978dcache_parity_instr(void)
1979{}
1980
1981#else	/* lint */
1982	ENTRY_NP(dcache_parity_instr)
1983	membar	#Sync			! Cheetah+ requires membar #Sync
1984	set	cpu_parity_error, %g1
1985	or	%g0, CH_ERR_DPE, %g2
1986	rdpr	%tpc, %g3
1987	sethi	%hi(sys_trap), %g7
1988	jmp	%g7 + %lo(sys_trap)
1989	  mov	PIL_15, %g4		! run at pil 15
1990	SET_SIZE(dcache_parity_instr)
1991
1992#endif	/* lint */
1993
1994
1995/*
1996 * D$ parity error trap (trap 71) at TL>0.
1997 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1998 * the various architecture-specific files.  This generates a "Software
1999 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
2000 * continue the handling there.
2001 * NB: Must be 8 instructions or less to fit in trap table and code must
2002 *     be relocatable.
2003 */
2004#if defined(lint)
2005
2006void
2007dcache_parity_tl1_instr(void)
2008{}
2009
2010#else	/* lint */
2011	ENTRY_NP(dcache_parity_tl1_instr)
2012	CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
2013	SET_SIZE(dcache_parity_tl1_instr)
2014
2015#endif	/* lint */
2016
2017
2018/*
2019 * Software trap 1 at TL>0.
2020 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
2021 * of the various architecture-specific files.  This is used as a continuation
2022 * of the dcache parity handling where we've bought an extra TL level, so we
2023 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2024 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2025 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2026 * order two bits from %g1 and %g2 respectively).
2027 * NB: Must be 8 instructions or less to fit in trap table and code must
2028 *     be relocatable.
2029 */
2030#if defined(lint)
2031
2032void
2033dcache_parity_tl1_cont_instr(void)
2034{}
2035
2036#else	/* lint */
2037	ENTRY_NP(dcache_parity_tl1_cont_instr)
2038	CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
2039	SET_SIZE(dcache_parity_tl1_cont_instr)
2040
2041#endif	/* lint */
2042
2043/*
2044 * D$ parity error at TL>0 handler
2045 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
2046 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2047 */
2048#if defined(lint)
2049
2050void
2051dcache_parity_tl1_err(void)
2052{}
2053
2054#else	/* lint */
2055
2056	ENTRY_NP(dcache_parity_tl1_err)
2057
2058	/*
2059	 * This macro saves all the %g registers in the ch_err_tl1_data
2060	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2061	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2062	 * the ch_err_tl1_data structure and %g2 will have the original
2063	 * flags in the ch_err_tl1_data structure.  All %g registers
2064	 * except for %g1 and %g2 will be available.
2065	 */
2066	CH_ERR_TL1_ENTER(CH_ERR_DPE);
2067
2068#ifdef TRAPTRACE
2069	/*
2070	 * Get current trap trace entry physical pointer.
2071	 */
2072	CPU_INDEX(%g6, %g5)
2073	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2074	set	trap_trace_ctl, %g5
2075	add	%g6, %g5, %g6
2076	ld	[%g6 + TRAPTR_LIMIT], %g5
2077	tst	%g5
2078	be	%icc, dpe_tl1_skip_tt
2079	  nop
2080	ldx	[%g6 + TRAPTR_PBASE], %g5
2081	ld	[%g6 + TRAPTR_OFFSET], %g4
2082	add	%g5, %g4, %g5
2083
2084	/*
2085	 * Create trap trace entry.
2086	 */
2087	rd	%asi, %g7
2088	wr	%g0, TRAPTR_ASI, %asi
2089	rd	STICK, %g4
2090	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2091	rdpr	%tl, %g4
2092	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2093	rdpr	%tt, %g4
2094	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2095	rdpr	%tpc, %g4
2096	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2097	rdpr	%tstate, %g4
2098	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2099	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2100	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2101	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2102	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2103	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2104	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2105	wr	%g0, %g7, %asi
2106
2107	/*
2108	 * Advance trap trace pointer.
2109	 */
2110	ld	[%g6 + TRAPTR_OFFSET], %g5
2111	ld	[%g6 + TRAPTR_LIMIT], %g4
2112	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2113	add	%g5, TRAP_ENT_SIZE, %g5
2114	sub	%g4, TRAP_ENT_SIZE, %g4
2115	cmp	%g5, %g4
2116	movge	%icc, 0, %g5
2117	st	%g5, [%g6 + TRAPTR_OFFSET]
2118dpe_tl1_skip_tt:
2119#endif	/* TRAPTRACE */
2120
2121	/*
2122	 * I$ and D$ are automatically turned off by HW when the CPU hits
2123	 * a dcache or icache parity error so we will just leave those two
2124	 * off for now to avoid repeating this trap.
2125	 * For Panther, however, since we trap on P$ data parity errors
2126	 * and HW does not automatically disable P$, we need to disable it
2127	 * here so that we don't encounter any recursive traps when we
2128	 * issue the retry.
2129	 */
2130	ldxa	[%g0]ASI_DCU, %g3
2131	mov	1, %g4
2132	sllx	%g4, DCU_PE_SHIFT, %g4
2133	andn	%g3, %g4, %g3
2134	stxa	%g3, [%g0]ASI_DCU
2135	membar	#Sync
2136
2137	/*
2138	 * We fall into this macro if we've successfully logged the error in
2139	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2140	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2141	 * Restores the %g registers and issues retry.
2142	 */
2143	CH_ERR_TL1_EXIT;
2144	SET_SIZE(dcache_parity_tl1_err)
2145
2146#endif	/* lint */
2147
2148/*
2149 * I$ parity error trap (trap 72) at TL=0.
2150 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
2151 * the various architecture-specific files.  This merely sets up the
2152 * arguments for cpu_parity_error and calls it via sys_trap.
2153 * NB: Must be 8 instructions or less to fit in trap table and code must
2154 *     be relocatable.
2155 */
2156#if defined(lint)
2157
2158void
2159icache_parity_instr(void)
2160{}
2161
2162#else	/* lint */
2163
2164	ENTRY_NP(icache_parity_instr)
2165	membar	#Sync			! Cheetah+ requires membar #Sync
2166	set	cpu_parity_error, %g1
2167	or	%g0, CH_ERR_IPE, %g2
2168	rdpr	%tpc, %g3
2169	sethi	%hi(sys_trap), %g7
2170	jmp	%g7 + %lo(sys_trap)
2171	  mov	PIL_15, %g4		! run at pil 15
2172	SET_SIZE(icache_parity_instr)
2173
2174#endif	/* lint */
2175
2176/*
2177 * I$ parity error trap (trap 72) at TL>0.
2178 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
2179 * the various architecture-specific files.  This generates a "Software
2180 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
2181 * continue the handling there.
2182 * NB: Must be 8 instructions or less to fit in trap table and code must
2183 *     be relocatable.
2184 */
2185#if defined(lint)
2186
2187void
2188icache_parity_tl1_instr(void)
2189{}
2190
2191#else	/* lint */
2192	ENTRY_NP(icache_parity_tl1_instr)
2193	CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
2194	SET_SIZE(icache_parity_tl1_instr)
2195
2196#endif	/* lint */
2197
2198/*
2199 * Software trap 2 at TL>0.
2200 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
2201 * of the various architecture-specific files.  This is used as a continuation
2202 * of the icache parity handling where we've bought an extra TL level, so we
2203 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2204 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2205 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2206 * order two bits from %g1 and %g2 respectively).
2207 * NB: Must be 8 instructions or less to fit in trap table and code must
2208 *     be relocatable.
2209 */
2210#if defined(lint)
2211
2212void
2213icache_parity_tl1_cont_instr(void)
2214{}
2215
2216#else	/* lint */
2217	ENTRY_NP(icache_parity_tl1_cont_instr)
2218	CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
2219	SET_SIZE(icache_parity_tl1_cont_instr)
2220
2221#endif	/* lint */
2222
2223
2224/*
2225 * I$ parity error at TL>0 handler
2226 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
2227 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2228 */
2229#if defined(lint)
2230
2231void
2232icache_parity_tl1_err(void)
2233{}
2234
2235#else	/* lint */
2236
2237	ENTRY_NP(icache_parity_tl1_err)
2238
2239	/*
2240	 * This macro saves all the %g registers in the ch_err_tl1_data
2241	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2242	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2243	 * the ch_err_tl1_data structure and %g2 will have the original
2244	 * flags in the ch_err_tl1_data structure.  All %g registers
2245	 * except for %g1 and %g2 will be available.
2246	 */
2247	CH_ERR_TL1_ENTER(CH_ERR_IPE);
2248
2249#ifdef TRAPTRACE
2250	/*
2251	 * Get current trap trace entry physical pointer.
2252	 */
2253	CPU_INDEX(%g6, %g5)
2254	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2255	set	trap_trace_ctl, %g5
2256	add	%g6, %g5, %g6
2257	ld	[%g6 + TRAPTR_LIMIT], %g5
2258	tst	%g5
2259	be	%icc, ipe_tl1_skip_tt
2260	  nop
2261	ldx	[%g6 + TRAPTR_PBASE], %g5
2262	ld	[%g6 + TRAPTR_OFFSET], %g4
2263	add	%g5, %g4, %g5
2264
2265	/*
2266	 * Create trap trace entry.
2267	 */
2268	rd	%asi, %g7
2269	wr	%g0, TRAPTR_ASI, %asi
2270	rd	STICK, %g4
2271	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2272	rdpr	%tl, %g4
2273	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2274	rdpr	%tt, %g4
2275	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2276	rdpr	%tpc, %g4
2277	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2278	rdpr	%tstate, %g4
2279	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2280	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2281	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2282	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2283	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2284	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2285	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2286	wr	%g0, %g7, %asi
2287
2288	/*
2289	 * Advance trap trace pointer.
2290	 */
2291	ld	[%g6 + TRAPTR_OFFSET], %g5
2292	ld	[%g6 + TRAPTR_LIMIT], %g4
2293	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2294	add	%g5, TRAP_ENT_SIZE, %g5
2295	sub	%g4, TRAP_ENT_SIZE, %g4
2296	cmp	%g5, %g4
2297	movge	%icc, 0, %g5
2298	st	%g5, [%g6 + TRAPTR_OFFSET]
2299ipe_tl1_skip_tt:
2300#endif	/* TRAPTRACE */
2301
2302	/*
2303	 * We fall into this macro if we've successfully logged the error in
2304	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2305	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2306	 * Restores the %g registers and issues retry.
2307	 */
2308	CH_ERR_TL1_EXIT;
2309
2310	SET_SIZE(icache_parity_tl1_err)
2311
2312#endif	/* lint */
2313
2314#endif	/* CPU_IMP_L1_CACHE_PARITY */
2315
2316
2317/*
2318 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
2319 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
2320 * should only be used in places where you have no choice but to look at the
2321 * tlb itself.
2322 *
2323 * Note: These two routines are required by the Estar "cpr" loadable module.
2324 */
2325
2326#if defined(lint)
2327
2328/* ARGSUSED */
2329void
2330itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2331{}
2332
2333#else	/* lint */
2334
2335	ENTRY_NP(itlb_rd_entry)
2336	sllx	%o0, 3, %o0
2337	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
2338	stx	%g1, [%o1]
2339	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
2340	set	TAGREAD_CTX_MASK, %o4
2341	andn	%g2, %o4, %o5
2342	retl
2343	  stx	%o5, [%o2]
2344	SET_SIZE(itlb_rd_entry)
2345
2346#endif	/* lint */
2347
2348
2349#if defined(lint)
2350
2351/* ARGSUSED */
2352void
2353dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2354{}
2355
2356#else	/* lint */
2357
2358	ENTRY_NP(dtlb_rd_entry)
2359	sllx	%o0, 3, %o0
2360	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
2361	stx	%g1, [%o1]
2362	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
2363	set	TAGREAD_CTX_MASK, %o4
2364	andn	%g2, %o4, %o5
2365	retl
2366	  stx	%o5, [%o2]
2367	SET_SIZE(dtlb_rd_entry)
2368#endif	/* lint */
2369
2370
2371#if !(defined(JALAPENO) || defined(SERRANO))
2372
2373#if defined(lint)
2374
2375uint64_t
2376get_safari_config(void)
2377{ return (0); }
2378
2379#else	/* lint */
2380
2381	ENTRY(get_safari_config)
2382	ldxa	[%g0]ASI_SAFARI_CONFIG, %o0
2383	retl
2384	nop
2385	SET_SIZE(get_safari_config)
2386
2387#endif	/* lint */
2388
2389
2390#if defined(lint)
2391
2392/* ARGSUSED */
2393void
2394set_safari_config(uint64_t safari_config)
2395{}
2396
2397#else	/* lint */
2398
2399	ENTRY(set_safari_config)
2400	stxa	%o0, [%g0]ASI_SAFARI_CONFIG
2401	membar	#Sync
2402	retl
2403	nop
2404	SET_SIZE(set_safari_config)
2405
2406#endif	/* lint */
2407
2408#endif	/* !(JALAPENO || SERRANO) */
2409
2410
2411#if defined(lint)
2412
2413void
2414cpu_cleartickpnt(void)
2415{}
2416
2417#else	/* lint */
2418	/*
2419	 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
2420	 * registers. In an effort to make the change in the
2421	 * tick/stick counter as consistent as possible, we disable
2422	 * all interrupts while we're changing the registers. We also
2423	 * ensure that the read and write instructions are in the same
2424	 * line in the instruction cache.
2425	 */
2426	ENTRY_NP(cpu_clearticknpt)
2427	rdpr	%pstate, %g1		/* save processor state */
2428	andn	%g1, PSTATE_IE, %g3	/* turn off */
2429	wrpr	%g0, %g3, %pstate	/*   interrupts */
2430	rdpr	%tick, %g2		/* get tick register */
2431	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
2432	mov	1, %g3			/* create mask */
2433	sllx	%g3, 63, %g3		/*   for NPT bit */
2434	ba,a,pt	%xcc, 2f
2435	.align	8			/* Ensure rd/wr in same i$ line */
24362:
2437	rdpr	%tick, %g2		/* get tick register */
2438	wrpr	%g3, %g2, %tick		/* write tick register, */
2439					/*   clearing NPT bit   */
24401:
2441	rd	STICK, %g2		/* get stick register */
2442	brgez,pn %g2, 3f		/* if NPT bit off, we're done */
2443	mov	1, %g3			/* create mask */
2444	sllx	%g3, 63, %g3		/*   for NPT bit */
2445	ba,a,pt	%xcc, 4f
2446	.align	8			/* Ensure rd/wr in same i$ line */
24474:
2448	rd	STICK, %g2		/* get stick register */
2449	wr	%g3, %g2, STICK		/* write stick register, */
2450					/*   clearing NPT bit   */
24513:
2452	jmp	%g4 + 4
2453	wrpr	%g0, %g1, %pstate	/* restore processor state */
2454
2455	SET_SIZE(cpu_clearticknpt)
2456
2457#endif	/* lint */
2458
2459
2460#if defined(CPU_IMP_L1_CACHE_PARITY)
2461
2462#if defined(lint)
2463/*
2464 * correct_dcache_parity(size_t size, size_t linesize)
2465 *
2466 * Correct D$ data parity by zeroing the data and initializing microtag
2467 * for all indexes and all ways of the D$.
2468 *
2469 */
2470/* ARGSUSED */
2471void
2472correct_dcache_parity(size_t size, size_t linesize)
2473{}
2474
2475#else	/* lint */
2476
2477	ENTRY(correct_dcache_parity)
2478	/*
2479	 * Register Usage:
2480	 *
2481	 * %o0 = input D$ size
2482	 * %o1 = input D$ line size
2483	 * %o2 = scratch
2484	 * %o3 = scratch
2485	 * %o4 = scratch
2486	 */
2487
2488	sub	%o0, %o1, %o0			! init cache line address
2489
2490	/*
2491	 * For Panther CPUs, we also need to clear the data parity bits
2492	 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2493	 */
2494	GET_CPU_IMPL(%o3)
2495	cmp	%o3, PANTHER_IMPL
2496	bne	1f
2497	  clr	%o3				! zero for non-Panther
2498	mov	1, %o3
2499	sll	%o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2500
25011:
2502	/*
2503	 * Set utag = way since it must be unique within an index.
2504	 */
2505	srl	%o0, 14, %o2			! get cache way (DC_way)
2506	membar	#Sync				! required before ASI_DC_UTAG
2507	stxa	%o2, [%o0]ASI_DC_UTAG		! set D$ utag = cache way
2508	membar	#Sync				! required after ASI_DC_UTAG
2509
2510	/*
2511	 * Zero line of D$ data (and data parity bits for Panther)
2512	 */
2513	sub	%o1, 8, %o2
2514	or	%o0, %o3, %o4			! same address + DC_data_parity
25152:
2516	membar	#Sync				! required before ASI_DC_DATA
2517	stxa	%g0, [%o0 + %o2]ASI_DC_DATA	! zero 8 bytes of D$ data
2518	membar	#Sync				! required after ASI_DC_DATA
2519	/*
2520	 * We also clear the parity bits if this is a panther. For non-Panther
2521	 * CPUs, we simply end up clearing the $data register twice.
2522	 */
2523	stxa	%g0, [%o4 + %o2]ASI_DC_DATA
2524	membar	#Sync
2525
2526	subcc	%o2, 8, %o2
2527	bge	2b
2528	nop
2529
2530	subcc	%o0, %o1, %o0
2531	bge	1b
2532	nop
2533
2534	retl
2535	  nop
2536	SET_SIZE(correct_dcache_parity)
2537
2538#endif	/* lint */
2539
2540#endif	/* CPU_IMP_L1_CACHE_PARITY */
2541
2542
2543#if defined(lint)
2544/*
2545 *  Get timestamp (stick).
2546 */
2547/* ARGSUSED */
2548void
2549stick_timestamp(int64_t *ts)
2550{
2551}
2552
2553#else	/* lint */
2554
2555	ENTRY_NP(stick_timestamp)
2556	rd	STICK, %g1	! read stick reg
2557	sllx	%g1, 1, %g1
2558	srlx	%g1, 1, %g1	! clear npt bit
2559
2560	retl
2561	stx     %g1, [%o0]	! store the timestamp
2562	SET_SIZE(stick_timestamp)
2563
2564#endif	/* lint */
2565
2566
2567#if defined(lint)
2568/*
2569 * Set STICK adjusted by skew.
2570 */
2571/* ARGSUSED */
2572void
2573stick_adj(int64_t skew)
2574{
2575}
2576
2577#else	/* lint */
2578
2579	ENTRY_NP(stick_adj)
2580	rdpr	%pstate, %g1		! save processor state
2581	andn	%g1, PSTATE_IE, %g3
2582	ba	1f			! cache align stick adj
2583	wrpr	%g0, %g3, %pstate	! turn off interrupts
2584
2585	.align	16
25861:	nop
2587
2588	rd	STICK, %g4		! read stick reg
2589	add	%g4, %o0, %o1		! adjust stick with skew
2590	wr	%o1, %g0, STICK		! write stick reg
2591
2592	retl
2593	wrpr	%g1, %pstate		! restore processor state
2594	SET_SIZE(stick_adj)
2595
2596#endif	/* lint */
2597
2598#if defined(lint)
2599/*
2600 * Debugger-specific stick retrieval
2601 */
2602/*ARGSUSED*/
2603int
2604kdi_get_stick(uint64_t *stickp)
2605{
2606	return (0);
2607}
2608
2609#else	/* lint */
2610
2611	ENTRY_NP(kdi_get_stick)
2612	rd	STICK, %g1
2613	stx	%g1, [%o0]
2614	retl
2615	mov	%g0, %o0
2616	SET_SIZE(kdi_get_stick)
2617
2618#endif	/* lint */
2619
2620#if defined(lint)
2621/*
2622 * Invalidate the specified line from the D$.
2623 *
2624 * Register usage:
2625 *	%o0 - index for the invalidation, specifies DC_way and DC_addr
2626 *
2627 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2628 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2629 *
2630 * The format of the stored 64-bit value is:
2631 *
2632 *	+----------+--------+----------+
2633 *	| Reserved | DC_tag | DC_valid |
2634 *	+----------+--------+----------+
2635 *       63      31 30     1	      0
2636 *
2637 * DC_tag is the 30-bit physical tag of the associated line.
2638 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2639 *
2640 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2641 *
2642 *	+----------+--------+----------+----------+
2643 *	| Reserved | DC_way | DC_addr  | Reserved |
2644 *	+----------+--------+----------+----------+
2645 *       63      16 15    14 13       5 4        0
2646 *
2647 * DC_way is a 2-bit index that selects one of the 4 ways.
2648 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2649 *
2650 * Setting the DC_valid bit to zero for the specified DC_way and
2651 * DC_addr index into the D$ results in an invalidation of a D$ line.
2652 */
2653/*ARGSUSED*/
2654void
2655dcache_inval_line(int index)
2656{
2657}
2658#else	/* lint */
2659	ENTRY(dcache_inval_line)
2660	sll	%o0, 5, %o0		! shift index into DC_way and DC_addr
2661	stxa	%g0, [%o0]ASI_DC_TAG	! zero the DC_valid and DC_tag bits
2662	membar	#Sync
2663	retl
2664	nop
2665	SET_SIZE(dcache_inval_line)
2666#endif	/* lint */
2667
2668#if defined(lint)
2669/*
2670 * Invalidate the entire I$
2671 *
2672 * Register usage:
2673 *	%o0 - specifies IC_way, IC_addr, IC_tag
2674 *	%o1 - scratch
2675 *	%o2 - used to save and restore DCU value
2676 *	%o3 - scratch
2677 *	%o5 - used to save and restore PSTATE
2678 *
2679 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2680 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2681 * block out snoops and invalidates to the I$, causing I$ consistency
2682 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2683 *
2684 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2685 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2686 * info below describes store (write) use of ASI_IC_TAG. Note that read
2687 * use of ASI_IC_TAG behaves differently.
2688 *
2689 * The format of the stored 64-bit value is:
2690 *
2691 *	+----------+--------+---------------+-----------+
2692 *	| Reserved | Valid  | IC_vpred<7:0> | Undefined |
2693 *	+----------+--------+---------------+-----------+
2694 *       63      55    54    53           46 45        0
2695 *
2696 * Valid is the 1-bit valid field for both the physical and snoop tags.
2697 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2698 *	the 32-byte boundary aligned address specified by IC_addr.
2699 *
2700 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2701 *
2702 *	+----------+--------+---------+--------+---------+
2703 *	| Reserved | IC_way | IC_addr | IC_tag |Reserved |
2704 *	+----------+--------+---------+--------+---------+
2705 *       63      16 15    14 13      5 4      3 2       0
2706 *
2707 * IC_way is a 2-bit index that selects one of the 4 ways.
2708 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2709 * IC_addr[5] is a "don't care" for a store.
2710 * IC_tag set to 2 specifies that the stored value is to be interpreted
2711 *	as containing Valid and IC_vpred as described above.
2712 *
2713 * Setting the Valid bit to zero for the specified IC_way and
2714 * IC_addr index into the I$ results in an invalidation of an I$ line.
2715 */
2716/*ARGSUSED*/
2717void
2718icache_inval_all(void)
2719{
2720}
2721#else	/* lint */
2722	ENTRY(icache_inval_all)
2723	rdpr	%pstate, %o5
2724	andn	%o5, PSTATE_IE, %o3
2725	wrpr	%g0, %o3, %pstate	! clear IE bit
2726
2727	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2728	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
2729	ba,pt	%icc, 2f
2730	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
2731icache_inval_all_1:
2732	ASM_LD(%o0, icache_size)
2733	ASM_LD(%o1, icache_linesize)
27342:
2735	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2736
2737	retl
2738	wrpr	%g0, %o5, %pstate	! restore earlier pstate
2739	SET_SIZE(icache_inval_all)
2740#endif	/* lint */
2741
2742
2743#if defined(lint)
2744/* ARGSUSED */
2745void
2746cache_scrubreq_tl1(uint64_t inum, uint64_t index)
2747{
2748}
2749
2750#else	/* lint */
2751/*
2752 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2753 * crosstrap.  It atomically increments the outstanding request counter and,
2754 * if there was not already an outstanding request, branches to setsoftint_tl1
2755 * to enqueue an intr_vec for the given inum.
2756 */
2757
2758	! Register usage:
2759	!
2760	! Arguments:
2761	! %g1 - inum
2762	! %g2 - index into chsm_outstanding array
2763	!
2764	! Internal:
2765	! %g2, %g3, %g5 - scratch
2766	! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2767	! %g6 - setsoftint_tl1 address
2768
2769	ENTRY_NP(cache_scrubreq_tl1)
2770	mulx	%g2, CHSM_OUTSTANDING_INCR, %g2
2771	set	CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2772	add	%g2, %g3, %g2
2773	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2774	ld	[%g4], %g2		! cpu's chsm_outstanding[index]
2775	!
2776	! no need to use atomic instructions for the following
2777	! increment - we're at tl1
2778	!
2779	add	%g2, 0x1, %g3
2780	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
2781	  st	%g3, [%g4]		! delay - store incremented counter
2782	ASM_JMP(%g6, setsoftint_tl1)
2783	! not reached
27841:
2785	retry
2786	SET_SIZE(cache_scrubreq_tl1)
2787
2788#endif	/* lint */
2789
2790
2791#if defined(lint)
2792
2793/* ARGSUSED */
2794void
2795get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
2796{}
2797
2798#else	/* lint */
2799
2800/*
2801 * Get the error state for the processor.
2802 * Note that this must not be used at TL>0
2803 */
2804	ENTRY(get_cpu_error_state)
2805#if defined(CHEETAH_PLUS)
2806	set	ASI_SHADOW_REG_VA, %o2
2807	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr reg
2808	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2809	ldxa	[%o2]ASI_AFAR, %o1		! shadow afar reg
2810	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2811	GET_CPU_IMPL(%o3)	! Only panther has AFSR_EXT registers
2812	cmp	%o3, PANTHER_IMPL
2813	bne,a	1f
2814	  stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]	! zero for non-PN
2815	set	ASI_AFSR_EXT_VA, %o2
2816	ldxa	[%o2]ASI_AFSR, %o1		! afsr_ext reg
2817	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2818	set	ASI_SHADOW_AFSR_EXT_VA, %o2
2819	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr_ext reg
2820	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2821	b	2f
2822	  nop
28231:
2824	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
28252:
2826#else	/* CHEETAH_PLUS */
2827	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2828	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2829	stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2830	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2831#endif	/* CHEETAH_PLUS */
2832#if defined(SERRANO)
2833	/*
2834	 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2835	 * We save this in the afar2 of the register save area.
2836	 */
2837	set	ASI_MCU_AFAR2_VA, %o2
2838	ldxa	[%o2]ASI_MCU_CTRL, %o1
2839	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2840#endif	/* SERRANO */
2841	ldxa	[%g0]ASI_AFSR, %o1		! primary afsr reg
2842	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR]
2843	ldxa	[%g0]ASI_AFAR, %o1		! primary afar reg
2844	retl
2845	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR]
2846	SET_SIZE(get_cpu_error_state)
2847#endif	/* lint */
2848
2849#if defined(lint)
2850
2851/*
2852 * Check a page of memory for errors.
2853 *
2854 * Load each 64 byte block from physical memory.
2855 * Check AFSR after each load to see if an error
2856 * was caused. If so, log/scrub that error.
2857 *
2858 * Used to determine if a page contains
2859 * CEs when CEEN is disabled.
2860 */
2861/*ARGSUSED*/
2862void
2863cpu_check_block(caddr_t va, uint_t psz)
2864{}
2865
2866#else	/* lint */
2867
2868	ENTRY(cpu_check_block)
2869	!
2870	! get a new window with room for the error regs
2871	!
2872	save	%sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2873	srl	%i1, 6, %l4		! clear top bits of psz
2874					! and divide by 64
2875	rd	%fprs, %l2		! store FP
2876	wr	%g0, FPRS_FEF, %fprs	! enable FP
28771:
2878	ldda	[%i0]ASI_BLK_P, %d0	! load a block
2879	membar	#Sync
2880	ldxa    [%g0]ASI_AFSR, %l3	! read afsr reg
2881	brz,a,pt %l3, 2f		! check for error
2882	nop
2883
2884	!
2885	! if error, read the error regs and log it
2886	!
2887	call	get_cpu_error_state
2888	add	%fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2889
2890	!
2891	! cpu_ce_detected(ch_cpu_errors_t *, flag)
2892	!
2893	call	cpu_ce_detected		! log the error
2894	mov	CE_CEEN_TIMEOUT, %o1
28952:
2896	dec	%l4			! next 64-byte block
2897	brnz,a,pt  %l4, 1b
2898	add	%i0, 64, %i0		! increment block addr
2899
2900	wr	%l2, %g0, %fprs		! restore FP
2901	ret
2902	restore
2903
2904	SET_SIZE(cpu_check_block)
2905
2906#endif	/* lint */
2907
2908#if defined(lint)
2909
2910/*
2911 * Perform a cpu logout called from C.  This is used where we did not trap
2912 * for the error but still want to gather "what we can".  Caller must make
2913 * sure cpu private area exists and that the indicated logout area is free
2914 * for use, and that we are unable to migrate cpus.
2915 */
2916/*ARGSUSED*/
2917void
2918cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
2919{ }
2920
2921#else
2922	ENTRY(cpu_delayed_logout)
2923	rdpr	%pstate, %o2
2924	andn	%o2, PSTATE_IE, %o2
2925	wrpr	%g0, %o2, %pstate		! disable interrupts
2926	PARK_SIBLING_CORE(%o2, %o3, %o4)	! %o2 has DCU value
2927	add	%o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2928	rd	%asi, %g1
2929	wr	%g0, ASI_P, %asi
2930	GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2931	wr	%g1, %asi
2932	UNPARK_SIBLING_CORE(%o2, %o3, %o4)	! can use %o2 again
2933	rdpr	%pstate, %o2
2934	or	%o2, PSTATE_IE, %o2
2935	wrpr	%g0, %o2, %pstate
2936	retl
2937	  nop
2938	SET_SIZE(cpu_delayed_logout)
2939
2940#endif	/* lint */
2941
2942#if defined(lint)
2943
2944/*ARGSUSED*/
2945int
2946dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
2947{ return (0); }
2948
2949#else
2950
2951	ENTRY(dtrace_blksuword32)
2952	save	%sp, -SA(MINFRAME + 4), %sp
2953
2954	rdpr	%pstate, %l1
2955	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
2956	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
2957
2958	rd	%fprs, %l0
2959	andcc	%l0, FPRS_FEF, %g0
2960	bz,a,pt	%xcc, 1f			! if the fpu is disabled
2961	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
2962
2963	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
29641:
2965	set	0f, %l5
2966        /*
2967         * We're about to write a block full or either total garbage
2968         * (not kernel data, don't worry) or user floating-point data
2969         * (so it only _looks_ like garbage).
2970         */
2971	ld	[%i1], %f0			! modify the block
2972	membar	#Sync
2973	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
2974	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
2975	membar	#Sync
2976	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2977
2978	bz,a,pt	%xcc, 1f
2979	wr	%g0, %l0, %fprs			! restore %fprs
2980
2981	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29821:
2983
2984	wrpr	%g0, %l1, %pstate		! restore interrupts
2985
2986	ret
2987	restore	%g0, %g0, %o0
2988
29890:
2990	membar	#Sync
2991	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2992
2993	bz,a,pt	%xcc, 1f
2994	wr	%g0, %l0, %fprs			! restore %fprs
2995
2996	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29971:
2998
2999	wrpr	%g0, %l1, %pstate		! restore interrupts
3000
3001	/*
3002	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
3003	 * which deals with watchpoints. Otherwise, just return -1.
3004	 */
3005	brnz,pt	%i2, 1f
3006	nop
3007	ret
3008	restore	%g0, -1, %o0
30091:
3010	call	dtrace_blksuword32_err
3011	restore
3012
3013	SET_SIZE(dtrace_blksuword32)
3014
3015#endif /* lint */
3016
3017#ifdef	CHEETAHPLUS_ERRATUM_25
3018
3019#if	defined(lint)
3020/*
3021 * Claim a chunk of physical address space.
3022 */
3023/*ARGSUSED*/
3024void
3025claimlines(uint64_t pa, size_t sz, int stride)
3026{}
3027#else	/* lint */
3028	ENTRY(claimlines)
30291:
3030	subcc	%o1, %o2, %o1
3031	add	%o0, %o1, %o3
3032	bgeu,a,pt	%xcc, 1b
3033	casxa	[%o3]ASI_MEM, %g0, %g0
3034	membar  #Sync
3035	retl
3036	nop
3037	SET_SIZE(claimlines)
3038#endif	/* lint */
3039
3040#if	defined(lint)
3041/*
3042 * CPU feature initialization,
3043 * turn BPE off,
3044 * get device id.
3045 */
3046/*ARGSUSED*/
3047void
3048cpu_feature_init(void)
3049{}
3050#else	/* lint */
3051	ENTRY(cpu_feature_init)
3052	save	%sp, -SA(MINFRAME), %sp
3053	sethi	%hi(cheetah_bpe_off), %o0
3054	ld	[%o0 + %lo(cheetah_bpe_off)], %o0
3055	brz	%o0, 1f
3056	nop
3057	rd	ASR_DISPATCH_CONTROL, %o0
3058	andn	%o0, ASR_DISPATCH_CONTROL_BPE, %o0
3059	wr	%o0, 0, ASR_DISPATCH_CONTROL
30601:
3061	!
3062	! get the device_id and store the device_id
3063	! in the appropriate cpunodes structure
3064	! given the cpus index
3065	!
3066	CPU_INDEX(%o0, %o1)
3067	mulx %o0, CPU_NODE_SIZE, %o0
3068	set  cpunodes + DEVICE_ID, %o1
3069	ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
3070	stx  %o2, [%o0 + %o1]
3071#ifdef	CHEETAHPLUS_ERRATUM_34
3072	!
3073	! apply Cheetah+ erratum 34 workaround
3074	!
3075	call itlb_erratum34_fixup
3076	  nop
3077	call dtlb_erratum34_fixup
3078	  nop
3079#endif	/* CHEETAHPLUS_ERRATUM_34 */
3080	ret
3081	  restore
3082	SET_SIZE(cpu_feature_init)
3083#endif	/* lint */
3084
3085#if	defined(lint)
3086/*
3087 * Copy a tsb entry atomically, from src to dest.
3088 * src must be 128 bit aligned.
3089 */
3090/*ARGSUSED*/
3091void
3092copy_tsb_entry(uintptr_t src, uintptr_t dest)
3093{}
3094#else	/* lint */
3095	ENTRY(copy_tsb_entry)
3096	ldda	[%o0]ASI_NQUAD_LD, %o2		! %o2 = tag, %o3 = data
3097	stx	%o2, [%o1]
3098	stx	%o3, [%o1 + 8 ]
3099	retl
3100	nop
3101	SET_SIZE(copy_tsb_entry)
3102#endif	/* lint */
3103
3104#endif	/* CHEETAHPLUS_ERRATUM_25 */
3105
3106#ifdef	CHEETAHPLUS_ERRATUM_34
3107
3108#if	defined(lint)
3109
3110/*ARGSUSED*/
3111void
3112itlb_erratum34_fixup(void)
3113{}
3114
3115#else	/* lint */
3116
3117	!
3118	! In Cheetah+ erratum 34, under certain conditions an ITLB locked
3119	! index 0 TTE will erroneously be displaced when a new TTE is
3120	! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
3121	! locked index 0 TTEs must be relocated.
3122	!
3123	! NOTE: Care must be taken to avoid an ITLB miss in this routine.
3124	!
3125	ENTRY_NP(itlb_erratum34_fixup)
3126	rdpr	%pstate, %o3
3127#ifdef DEBUG
3128	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
3129#endif /* DEBUG */
3130	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3131	ldxa	[%g0]ASI_ITLB_ACCESS, %o1	! %o1 = entry 0 data
3132	ldxa	[%g0]ASI_ITLB_TAGREAD, %o2	! %o2 = entry 0 tag
3133
3134	cmp	%o1, %g0			! Is this entry valid?
3135	bge	%xcc, 1f
3136	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3137	bnz	%icc, 2f
3138	  nop
31391:
3140	retl					! Nope, outta here...
3141	  wrpr	%g0, %o3, %pstate		! Enable interrupts
31422:
3143	sethi	%hi(FLUSH_ADDR), %o4
3144	stxa	%g0, [%o2]ASI_ITLB_DEMAP	! Flush this mapping
3145	flush	%o4				! Flush required for I-MMU
3146	!
3147	! Start search from index 1 up.  This is because the kernel force
3148	! loads its text page at index 15 in sfmmu_kernel_remap() and we
3149	! don't want our relocated entry evicted later.
3150	!
3151	! NOTE: We assume that we'll be successful in finding an unlocked
3152	! or invalid entry.  If that isn't the case there are bound to
3153	! bigger problems.
3154	!
3155	set	(1 << 3), %g3
31563:
3157	ldxa	[%g3]ASI_ITLB_ACCESS, %o4	! Load TTE from t16
3158	!
3159	! If this entry isn't valid, we'll choose to displace it (regardless
3160	! of the lock bit).
3161	!
3162	cmp	%o4, %g0			! TTE is > 0 iff not valid
3163	bge	%xcc, 4f			! If invalid, go displace
3164	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3165	bnz,a	%icc, 3b			! If locked, look at next
3166	  add	%g3, (1 << 3), %g3		!  entry
31674:
3168	!
3169	! We found an unlocked or invalid entry; we'll explicitly load
3170	! the former index 0 entry here.
3171	!
3172	sethi	%hi(FLUSH_ADDR), %o4
3173	set	MMU_TAG_ACCESS, %g4
3174	stxa	%o2, [%g4]ASI_IMMU
3175	stxa	%o1, [%g3]ASI_ITLB_ACCESS
3176	flush	%o4				! Flush required for I-MMU
3177	retl
3178	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3179	SET_SIZE(itlb_erratum34_fixup)
3180
3181#endif	/* lint */
3182
3183#if	defined(lint)
3184
3185/*ARGSUSED*/
3186void
3187dtlb_erratum34_fixup(void)
3188{}
3189
3190#else	/* lint */
3191
3192	!
3193	! In Cheetah+ erratum 34, under certain conditions a DTLB locked
3194	! index 0 TTE will erroneously be displaced when a new TTE is
3195	! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
3196	! TTEs must be relocated.
3197	!
3198	ENTRY_NP(dtlb_erratum34_fixup)
3199	rdpr	%pstate, %o3
3200#ifdef DEBUG
3201	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
3202#endif /* DEBUG */
3203	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3204	ldxa	[%g0]ASI_DTLB_ACCESS, %o1	! %o1 = entry 0 data
3205	ldxa	[%g0]ASI_DTLB_TAGREAD, %o2	! %o2 = entry 0 tag
3206
3207	cmp	%o1, %g0			! Is this entry valid?
3208	bge	%xcc, 1f
3209	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3210	bnz	%icc, 2f
3211	  nop
32121:
3213	retl					! Nope, outta here...
3214	  wrpr	%g0, %o3, %pstate		! Enable interrupts
32152:
3216	stxa	%g0, [%o2]ASI_DTLB_DEMAP	! Flush this mapping
3217	membar	#Sync
3218	!
3219	! Start search from index 1 up.
3220	!
3221	! NOTE: We assume that we'll be successful in finding an unlocked
3222	! or invalid entry.  If that isn't the case there are bound to
3223	! bigger problems.
3224	!
3225	set	(1 << 3), %g3
32263:
3227	ldxa	[%g3]ASI_DTLB_ACCESS, %o4	! Load TTE from t16
3228	!
3229	! If this entry isn't valid, we'll choose to displace it (regardless
3230	! of the lock bit).
3231	!
3232	cmp	%o4, %g0			! TTE is > 0 iff not valid
3233	bge	%xcc, 4f			! If invalid, go displace
3234	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3235	bnz,a	%icc, 3b			! If locked, look at next
3236	  add	%g3, (1 << 3), %g3		!  entry
32374:
3238	!
3239	! We found an unlocked or invalid entry; we'll explicitly load
3240	! the former index 0 entry here.
3241	!
3242	set	MMU_TAG_ACCESS, %g4
3243	stxa	%o2, [%g4]ASI_DMMU
3244	stxa	%o1, [%g3]ASI_DTLB_ACCESS
3245	membar	#Sync
3246	retl
3247	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3248	SET_SIZE(dtlb_erratum34_fixup)
3249
3250#endif	/* lint */
3251
3252#endif	/* CHEETAHPLUS_ERRATUM_34 */
3253
3254