xref: /titanic_51/usr/src/uts/sun4u/cpu/us3_common_asm.s (revision 694c35faa87b858ecdadfe4fc592615f4eefbb07)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28#if !defined(lint)
29#include "assym.h"
30#endif	/* !lint */
31
32#include <sys/asm_linkage.h>
33#include <sys/mmu.h>
34#include <vm/hat_sfmmu.h>
35#include <sys/machparam.h>
36#include <sys/machcpuvar.h>
37#include <sys/machthread.h>
38#include <sys/machtrap.h>
39#include <sys/privregs.h>
40#include <sys/trap.h>
41#include <sys/cheetahregs.h>
42#include <sys/us3_module.h>
43#include <sys/xc_impl.h>
44#include <sys/intreg.h>
45#include <sys/async.h>
46#include <sys/clock.h>
47#include <sys/cheetahasm.h>
48#include <sys/cmpregs.h>
49
50#ifdef TRAPTRACE
51#include <sys/traptrace.h>
52#endif /* TRAPTRACE */
53
54#if !defined(lint)
55
56/* BEGIN CSTYLED */
57
58#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
59	ldxa	[%g0]ASI_DCU, tmp1					;\
60	btst	DCU_DC, tmp1		/* is dcache enabled? */	;\
61	bz,pn	%icc, 1f						;\
62	ASM_LD(tmp1, dcache_linesize)					;\
63	ASM_LD(tmp2, dflush_type)					;\
64	cmp	tmp2, FLUSHPAGE_TYPE					;\
65	be,pt	%icc, 2f						;\
66	nop								;\
67	sllx	arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */	;\
68	ASM_LD(tmp3, dcache_size)					;\
69	cmp	tmp2, FLUSHMATCH_TYPE					;\
70	be,pt	%icc, 3f						;\
71	nop								;\
72	/*								\
73	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
74	 * tmp3 = cache size						\
75	 * tmp1 = cache line size					\
76	 */								\
77	sub	tmp3, tmp1, tmp2					;\
784:									\
79	stxa	%g0, [tmp2]ASI_DC_TAG					;\
80	membar	#Sync							;\
81	cmp	%g0, tmp2						;\
82	bne,pt	%icc, 4b						;\
83	sub	tmp2, tmp1, tmp2					;\
84	ba,pt	%icc, 1f						;\
85	nop								;\
86	/*								\
87	 * flushtype = FLUSHPAGE_TYPE					\
88	 * arg1 = pfn							\
89	 * arg2 = virtual color						\
90	 * tmp1 = cache line size					\
91	 * tmp2 = tag from cache					\
92	 * tmp3 = counter						\
93	 */								\
942:									\
95	set	MMU_PAGESIZE, tmp3					;\
96        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA	   */   ;\
97	sub	tmp3, tmp1, tmp3					;\
984:									\
99	stxa	%g0, [arg1 + tmp3]ASI_DC_INVAL				;\
100	membar	#Sync							;\
1015:									\
102	cmp	%g0, tmp3						;\
103	bnz,pt	%icc, 4b		/* branch if not done */	;\
104	sub	tmp3, tmp1, tmp3					;\
105	ba,pt	%icc, 1f						;\
106	nop								;\
107	/*								\
108	 * flushtype = FLUSHMATCH_TYPE					\
109	 * arg1 = tag to compare against				\
110	 * tmp1 = cache line size					\
111	 * tmp3 = cache size						\
112	 * arg2 = counter						\
113	 * tmp2 = cache tag						\
114	 */								\
1153:									\
116	sub	tmp3, tmp1, arg2					;\
1174:									\
118	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
119	btst	CHEETAH_DC_VBIT_MASK, tmp2				;\
120	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
121	andn	tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */	;\
122	cmp	tmp2, arg1						;\
123	bne,pn	%icc, 5f		/* branch if tag miss */	;\
124	nop								;\
125	stxa	%g0, [arg2]ASI_DC_TAG					;\
126	membar	#Sync							;\
1275:									\
128	cmp	%g0, arg2						;\
129	bne,pt	%icc, 4b		/* branch if not done */	;\
130	sub	arg2, tmp1, arg2					;\
1311:
132
133/*
134 * macro that flushes the entire dcache color
135 * dcache size = 64K, one way 16K
136 *
137 * In:
138 *    arg = virtual color register (not clobbered)
139 *    way = way#, can either be a constant or a register (not clobbered)
140 *    tmp1, tmp2, tmp3 = scratch registers
141 *
142 */
143#define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3)			\
144	ldxa	[%g0]ASI_DCU, tmp1;					\
145	btst	DCU_DC, tmp1;		/* is dcache enabled? */	\
146	bz,pn	%icc, 1f;						\
147	ASM_LD(tmp1, dcache_linesize)					\
148	/*								\
149	 * arg = virtual color						\
150	 * tmp1 = cache line size					\
151	 */								\
152	sllx	arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */	\
153	mov	way, tmp3;						\
154	sllx	tmp3, 14, tmp3;		  /* One way 16K */		\
155	or	tmp2, tmp3, tmp3;					\
156	set	MMU_PAGESIZE, tmp2;					\
157	/*								\
158	 * tmp2 = page size						\
159	 * tmp3 =  cached page in dcache				\
160	 */								\
161	sub	tmp2, tmp1, tmp2;					\
1622:									\
163	stxa	%g0, [tmp3 + tmp2]ASI_DC_TAG;				\
164	membar	#Sync;							\
165	cmp	%g0, tmp2;						\
166	bne,pt	%icc, 2b;						\
167	sub	tmp2, tmp1, tmp2;					\
1681:
169
170/* END CSTYLED */
171
172#endif	/* !lint */
173
174/*
175 * Cheetah MMU and Cache operations.
176 */
177
178#if defined(lint)
179
180/* ARGSUSED */
181void
182vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
183{}
184
185#else	/* lint */
186
187	ENTRY_NP(vtag_flushpage)
188	/*
189	 * flush page from the tlb
190	 *
191	 * %o0 = vaddr
192	 * %o1 = sfmmup
193	 */
194	rdpr	%pstate, %o5
195#ifdef DEBUG
196	PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
197#endif /* DEBUG */
198	/*
199	 * disable ints
200	 */
201	andn	%o5, PSTATE_IE, %o4
202	wrpr	%o4, 0, %pstate
203
204	/*
205	 * Then, blow out the tlb
206	 * Interrupts are disabled to prevent the primary ctx register
207	 * from changing underneath us.
208	 */
209	sethi   %hi(ksfmmup), %o3
210        ldx     [%o3 + %lo(ksfmmup)], %o3
211        cmp     %o3, %o1
212        bne,pt   %xcc, 1f			! if not kernel as, go to 1
213	  sethi	%hi(FLUSH_ADDR), %o3
214	/*
215	 * For Kernel demaps use primary. type = page implicitly
216	 */
217	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
218	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
219	flush	%o3
220	retl
221	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
2221:
223	/*
224	 * User demap.  We need to set the primary context properly.
225	 * Secondary context cannot be used for Cheetah IMMU.
226	 * %o0 = vaddr
227	 * %o1 = sfmmup
228	 * %o3 = FLUSH_ADDR
229	 */
230	SFMMU_CPU_CNUM(%o1, %g1, %g2)		! %g1 = sfmmu cnum on this CPU
231
232	ldub	[%o1 + SFMMU_CEXT], %o4		! %o4 = sfmmup->sfmmu_cext
233	sll	%o4, CTXREG_EXT_SHIFT, %o4
234	or	%g1, %o4, %g1			! %g1 = primary pgsz | cnum
235
236	wrpr	%g0, 1, %tl
237	set	MMU_PCONTEXT, %o4
238	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
239	ldxa	[%o4]ASI_DMMU, %o2		! %o2 = save old ctxnum
240	srlx	%o2, CTXREG_NEXT_SHIFT, %o1	! need to preserve nucleus pgsz
241	sllx	%o1, CTXREG_NEXT_SHIFT, %o1	! %o1 = nucleus pgsz
242	or	%g1, %o1, %g1			! %g1 = nucleus pgsz | primary pgsz | cnum
243	stxa	%g1, [%o4]ASI_DMMU		! wr new ctxum
244
245	stxa	%g0, [%o0]ASI_DTLB_DEMAP
246	stxa	%g0, [%o0]ASI_ITLB_DEMAP
247	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
248	flush	%o3
249	wrpr	%g0, 0, %tl
250
251	retl
252	wrpr	%g0, %o5, %pstate		/* enable interrupts */
253	SET_SIZE(vtag_flushpage)
254
255#endif	/* lint */
256
257#if defined(lint)
258
259void
260vtag_flushall(void)
261{}
262
263#else	/* lint */
264
265	ENTRY_NP2(vtag_flushall, demap_all)
266	/*
267	 * flush the tlb
268	 */
269	sethi	%hi(FLUSH_ADDR), %o3
270	set	DEMAP_ALL_TYPE, %g1
271	stxa	%g0, [%g1]ASI_DTLB_DEMAP
272	stxa	%g0, [%g1]ASI_ITLB_DEMAP
273	flush	%o3
274	retl
275	nop
276	SET_SIZE(demap_all)
277	SET_SIZE(vtag_flushall)
278
279#endif	/* lint */
280
281
282#if defined(lint)
283
284/* ARGSUSED */
285void
286vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
287{}
288
289#else	/* lint */
290
291	ENTRY_NP(vtag_flushpage_tl1)
292	/*
293	 * x-trap to flush page from tlb and tsb
294	 *
295	 * %g1 = vaddr, zero-extended on 32-bit kernel
296	 * %g2 = sfmmup
297	 *
298	 * assumes TSBE_TAG = 0
299	 */
300	srln	%g1, MMU_PAGESHIFT, %g1
301
302	sethi   %hi(ksfmmup), %g3
303        ldx     [%g3 + %lo(ksfmmup)], %g3
304        cmp     %g3, %g2
305        bne,pt	%xcc, 1f                        ! if not kernel as, go to 1
306	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
307
308	/* We need to demap in the kernel context */
309	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
310	stxa	%g0, [%g1]ASI_DTLB_DEMAP
311	stxa	%g0, [%g1]ASI_ITLB_DEMAP
312	retry
3131:
314	/* We need to demap in a user context */
315	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
316
317	SFMMU_CPU_CNUM(%g2, %g6, %g3)	! %g6 = sfmmu cnum on this CPU
318
319	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
320	sll	%g4, CTXREG_EXT_SHIFT, %g4
321	or	%g6, %g4, %g6			! %g6 = pgsz | cnum
322
323	set	MMU_PCONTEXT, %g4
324	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
325	srlx	%g5, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
326	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
327	or	%g6, %g2, %g6			/* %g6 = nucleus pgsz | primary pgsz | cnum */
328	stxa	%g6, [%g4]ASI_DMMU		/* wr new ctxum */
329	stxa	%g0, [%g1]ASI_DTLB_DEMAP
330	stxa	%g0, [%g1]ASI_ITLB_DEMAP
331	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
332	retry
333	SET_SIZE(vtag_flushpage_tl1)
334
335#endif	/* lint */
336
337
338#if defined(lint)
339
340/* ARGSUSED */
341void
342vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
343{}
344
345#else	/* lint */
346
347	ENTRY_NP(vtag_flush_pgcnt_tl1)
348	/*
349	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
350	 *
351	 * %g1 = vaddr, zero-extended on 32-bit kernel
352	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
353	 *
354	 * NOTE: this handler relies on the fact that no
355	 *	interrupts or traps can occur during the loop
356	 *	issuing the TLB_DEMAP operations. It is assumed
357	 *	that interrupts are disabled and this code is
358	 *	fetching from the kernel locked text address.
359	 *
360	 * assumes TSBE_TAG = 0
361	 */
362	set	SFMMU_PGCNT_MASK, %g4
363	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
364	add	%g3, 1, %g3			/* g3 = pgcnt */
365
366	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
367	srln	%g1, MMU_PAGESHIFT, %g1
368
369	sethi   %hi(ksfmmup), %g4
370        ldx     [%g4 + %lo(ksfmmup)], %g4
371        cmp     %g4, %g2
372        bne,pn   %xcc, 1f			/* if not kernel as, go to 1 */
373	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
374
375	/* We need to demap in the kernel context */
376	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
377	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
378	sethi   %hi(FLUSH_ADDR), %g5
3794:
380	stxa	%g0, [%g1]ASI_DTLB_DEMAP
381	stxa	%g0, [%g1]ASI_ITLB_DEMAP
382	flush	%g5				! flush required by immu
383
384	deccc	%g3				/* decr pgcnt */
385	bnz,pt	%icc,4b
386	  add	%g1, %g2, %g1			/* next page */
387	retry
3881:
389	/*
390	 * We need to demap in a user context
391	 *
392	 * g2 = sfmmup
393	 * g3 = pgcnt
394	 */
395	SFMMU_CPU_CNUM(%g2, %g5, %g6)		! %g5 = sfmmu cnum on this CPU
396
397	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
398
399	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
400	sll	%g4, CTXREG_EXT_SHIFT, %g4
401	or	%g5, %g4, %g5
402
403	set	MMU_PCONTEXT, %g4
404	ldxa	[%g4]ASI_DMMU, %g6		/* rd old ctxnum */
405	srlx	%g6, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
406	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
407	or	%g5, %g2, %g5			/* %g5 = nucleus pgsz | primary pgsz | cnum */
408	stxa	%g5, [%g4]ASI_DMMU		/* wr new ctxum */
409
410	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
411	sethi   %hi(FLUSH_ADDR), %g5
4123:
413	stxa	%g0, [%g1]ASI_DTLB_DEMAP
414	stxa	%g0, [%g1]ASI_ITLB_DEMAP
415	flush	%g5				! flush required by immu
416
417	deccc	%g3				/* decr pgcnt */
418	bnz,pt	%icc,3b
419	  add	%g1, %g2, %g1			/* next page */
420
421	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
422	retry
423	SET_SIZE(vtag_flush_pgcnt_tl1)
424
425#endif	/* lint */
426
427#if defined(lint)
428
429/*ARGSUSED*/
430void
431vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
432{}
433
434#else	/* lint */
435
436	ENTRY_NP(vtag_flushall_tl1)
437	/*
438	 * x-trap to flush tlb
439	 */
440	set	DEMAP_ALL_TYPE, %g4
441	stxa	%g0, [%g4]ASI_DTLB_DEMAP
442	stxa	%g0, [%g4]ASI_ITLB_DEMAP
443	retry
444	SET_SIZE(vtag_flushall_tl1)
445
446#endif	/* lint */
447
448
449#if defined(lint)
450
451/* ARGSUSED */
452void
453vac_flushpage(pfn_t pfnum, int vcolor)
454{}
455
456#else	/* lint */
457
458/*
459 * vac_flushpage(pfnum, color)
460 *	Flush 1 8k page of the D-$ with physical page = pfnum
461 *	Algorithm:
462 *		The cheetah dcache is a 64k psuedo 4 way accaociative cache.
463 *		It is virtual indexed, physically tagged cache.
464 */
465	.seg	".data"
466	.align	8
467	.global	dflush_type
468dflush_type:
469	.word	FLUSHPAGE_TYPE
470
471	ENTRY(vac_flushpage)
472	/*
473	 * flush page from the d$
474	 *
475	 * %o0 = pfnum, %o1 = color
476	 */
477	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
478	retl
479	  nop
480	SET_SIZE(vac_flushpage)
481
482#endif	/* lint */
483
484
485#if defined(lint)
486
487/* ARGSUSED */
488void
489vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
490{}
491
492#else	/* lint */
493
494	ENTRY_NP(vac_flushpage_tl1)
495	/*
496	 * x-trap to flush page from the d$
497	 *
498	 * %g1 = pfnum, %g2 = color
499	 */
500	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
501	retry
502	SET_SIZE(vac_flushpage_tl1)
503
504#endif	/* lint */
505
506
507#if defined(lint)
508
509/* ARGSUSED */
510void
511vac_flushcolor(int vcolor, pfn_t pfnum)
512{}
513
514#else	/* lint */
515
516	ENTRY(vac_flushcolor)
517	/*
518	 * %o0 = vcolor
519	 */
520	DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
521	DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
522	DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
523	DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
524	retl
525	  nop
526	SET_SIZE(vac_flushcolor)
527
528#endif	/* lint */
529
530
531#if defined(lint)
532
533/* ARGSUSED */
534void
535vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
536{}
537
538#else	/* lint */
539
540	ENTRY(vac_flushcolor_tl1)
541	/*
542	 * %g1 = vcolor
543	 */
544	DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
545	DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
546	DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
547	DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
548	retry
549	SET_SIZE(vac_flushcolor_tl1)
550
551#endif	/* lint */
552
553#if defined(lint)
554
555int
556idsr_busy(void)
557{
558	return (0);
559}
560
561#else	/* lint */
562
563/*
564 * Determine whether or not the IDSR is busy.
565 * Entry: no arguments
566 * Returns: 1 if busy, 0 otherwise
567 */
568	ENTRY(idsr_busy)
569	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
570	clr	%o0
571	btst	IDSR_BUSY, %g1
572	bz,a,pt	%xcc, 1f
573	mov	1, %o0
5741:
575	retl
576	nop
577	SET_SIZE(idsr_busy)
578
579#endif	/* lint */
580
581#if defined(lint)
582
583/* ARGSUSED */
584void
585init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
586{}
587
588/* ARGSUSED */
589void
590init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
591{}
592
593#else	/* lint */
594
595	.global _dispatch_status_busy
596_dispatch_status_busy:
597	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
598	.align	4
599
600/*
601 * Setup interrupt dispatch data registers
602 * Entry:
603 *	%o0 - function or inumber to call
604 *	%o1, %o2 - arguments (2 uint64_t's)
605 */
606	.seg "text"
607
608	ENTRY(init_mondo)
609#ifdef DEBUG
610	!
611	! IDSR should not be busy at the moment
612	!
613	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
614	btst	IDSR_BUSY, %g1
615	bz,pt	%xcc, 1f
616	nop
617	sethi	%hi(_dispatch_status_busy), %o0
618	call	panic
619	or	%o0, %lo(_dispatch_status_busy), %o0
620#endif /* DEBUG */
621
622	ALTENTRY(init_mondo_nocheck)
623	!
624	! interrupt vector dispatch data reg 0
625	!
6261:
627	mov	IDDR_0, %g1
628	mov	IDDR_1, %g2
629	mov	IDDR_2, %g3
630	stxa	%o0, [%g1]ASI_INTR_DISPATCH
631
632	!
633	! interrupt vector dispatch data reg 1
634	!
635	stxa	%o1, [%g2]ASI_INTR_DISPATCH
636
637	!
638	! interrupt vector dispatch data reg 2
639	!
640	stxa	%o2, [%g3]ASI_INTR_DISPATCH
641
642	membar	#Sync
643	retl
644	nop
645	SET_SIZE(init_mondo_nocheck)
646	SET_SIZE(init_mondo)
647
648#endif	/* lint */
649
650
651#if !(defined(JALAPENO) || defined(SERRANO))
652
653#if defined(lint)
654
655/* ARGSUSED */
656void
657shipit(int upaid, int bn)
658{ return; }
659
660#else	/* lint */
661
662/*
663 * Ship mondo to aid using busy/nack pair bn
664 */
665	ENTRY_NP(shipit)
666	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = agent id
667	sll	%o1, IDCR_BN_SHIFT, %g2		! IDCR<28:24> = b/n pair
668	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
669	or	%g1, %g2, %g1
670	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
671	membar	#Sync
672	retl
673	nop
674	SET_SIZE(shipit)
675
676#endif	/* lint */
677
678#endif	/* !(JALAPENO || SERRANO) */
679
680
681#if defined(lint)
682
683/* ARGSUSED */
684void
685flush_instr_mem(caddr_t vaddr, size_t len)
686{}
687
688#else	/* lint */
689
690/*
691 * flush_instr_mem:
692 *	Flush 1 page of the I-$ starting at vaddr
693 * 	%o0 vaddr
694 *	%o1 bytes to be flushed
695 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
696 * the stores from all processors so that a FLUSH instruction is only needed
697 * to ensure pipeline is consistent. This means a single flush is sufficient at
698 * the end of a sequence of stores that updates the instruction stream to
699 * ensure correct operation.
700 */
701
702	ENTRY(flush_instr_mem)
703	flush	%o0			! address irrelevant
704	retl
705	nop
706	SET_SIZE(flush_instr_mem)
707
708#endif	/* lint */
709
710
711#if defined(CPU_IMP_ECACHE_ASSOC)
712
713#if defined(lint)
714
715/* ARGSUSED */
716uint64_t
717get_ecache_ctrl(void)
718{ return (0); }
719
720#else	/* lint */
721
722	ENTRY(get_ecache_ctrl)
723	GET_CPU_IMPL(%o0)
724	cmp	%o0, JAGUAR_IMPL
725	!
726	! Putting an ASI access in the delay slot may
727	! cause it to be accessed, even when annulled.
728	!
729	bne	1f
730	  nop
731	ldxa	[%g0]ASI_EC_CFG_TIMING, %o0	! read Jaguar shared E$ ctrl reg
732	b	2f
733	  nop
7341:
735	ldxa	[%g0]ASI_EC_CTRL, %o0		! read Ch/Ch+ E$ control reg
7362:
737	retl
738	  nop
739	SET_SIZE(get_ecache_ctrl)
740
741#endif	/* lint */
742
743#endif	/* CPU_IMP_ECACHE_ASSOC */
744
745
746#if !(defined(JALAPENO) || defined(SERRANO))
747
748/*
749 * flush_ecache:
750 *	%o0 - 64 bit physical address
751 *	%o1 - ecache size
752 *	%o2 - ecache linesize
753 */
754#if defined(lint)
755
756/*ARGSUSED*/
757void
758flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
759{}
760
761#else /* !lint */
762
763	ENTRY(flush_ecache)
764
765	/*
766	 * For certain CPU implementations, we have to flush the L2 cache
767	 * before flushing the ecache.
768	 */
769	PN_L2_FLUSHALL(%g3, %g4, %g5)
770
771	/*
772	 * Flush the entire Ecache using displacement flush.
773	 */
774	ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
775
776	retl
777	nop
778	SET_SIZE(flush_ecache)
779
780#endif /* lint */
781
782#endif	/* !(JALAPENO || SERRANO) */
783
784
785#if defined(lint)
786
787void
788flush_dcache(void)
789{}
790
791#else	/* lint */
792
793	ENTRY(flush_dcache)
794	ASM_LD(%o0, dcache_size)
795	ASM_LD(%o1, dcache_linesize)
796	CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
797	retl
798	nop
799	SET_SIZE(flush_dcache)
800
801#endif	/* lint */
802
803
804#if defined(lint)
805
806void
807flush_icache(void)
808{}
809
810#else	/* lint */
811
812	ENTRY(flush_icache)
813	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
814	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
815	ba,pt	%icc, 2f
816	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
817flush_icache_1:
818	ASM_LD(%o0, icache_size)
819	ASM_LD(%o1, icache_linesize)
8202:
821	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
822	retl
823	nop
824	SET_SIZE(flush_icache)
825
826#endif	/* lint */
827
828#if defined(lint)
829
830/*ARGSUSED*/
831void
832kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
833    int icache_lsize)
834{
835}
836
837#else	/* lint */
838
839	ENTRY(kdi_flush_idcache)
840	CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
841	CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
842	membar	#Sync
843	retl
844	nop
845	SET_SIZE(kdi_flush_idcache)
846
847#endif	/* lint */
848
849#if defined(lint)
850
851void
852flush_pcache(void)
853{}
854
855#else	/* lint */
856
857	ENTRY(flush_pcache)
858	PCACHE_FLUSHALL(%o0, %o1, %o2)
859	retl
860	nop
861	SET_SIZE(flush_pcache)
862
863#endif	/* lint */
864
865
866#if defined(CPU_IMP_L1_CACHE_PARITY)
867
868#if defined(lint)
869
870/* ARGSUSED */
871void
872get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
873{}
874
875#else	/* lint */
876
877/*
878 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
879 * structure (see cheetahregs.h):
880 * The Dcache *should* be turned off when this code is executed.
881 */
882	.align	128
883	ENTRY(get_dcache_dtag)
884	rdpr	%pstate, %o5
885	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
886	wrpr	%g0, %o3, %pstate
887	b	1f
888	  stx	%o0, [%o1 + CH_DC_IDX]
889
890	.align	128
8911:
892	ldxa	[%o0]ASI_DC_TAG, %o2
893	stx	%o2, [%o1 + CH_DC_TAG]
894	membar	#Sync
895	ldxa	[%o0]ASI_DC_UTAG, %o2
896	membar	#Sync
897	stx	%o2, [%o1 + CH_DC_UTAG]
898	ldxa	[%o0]ASI_DC_SNP_TAG, %o2
899	stx	%o2, [%o1 + CH_DC_SNTAG]
900	add	%o1, CH_DC_DATA, %o1
901	clr	%o3
9022:
903	membar	#Sync				! required before ASI_DC_DATA
904	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
905	membar	#Sync				! required after ASI_DC_DATA
906	stx	%o2, [%o1 + %o3]
907	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
908	blt	2b
909	  add	%o3, 8, %o3
910
911	/*
912	 * Unlike other CPUs in the family, D$ data parity bits for Panther
913	 * do not reside in the microtag. Instead, we have to read them
914	 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
915	 * of just having 8 parity bits to protect all 32 bytes of data
916	 * per line, we now have 32 bits of parity.
917	 */
918	GET_CPU_IMPL(%o3)
919	cmp	%o3, PANTHER_IMPL
920	bne	4f
921	  clr	%o3
922
923	/*
924	 * move our pointer to the next field where we store parity bits
925	 * and add the offset of the last parity byte since we will be
926	 * storing all 4 parity bytes within one 64 bit field like this:
927	 *
928	 * +------+------------+------------+------------+------------+
929	 * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
930	 * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
931	 * +------+------------+------------+------------+------------+
932	 *  63:32     31:24        23:16         15:8          7:0
933	 */
934	add	%o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
935
936	/* add the DC_data_parity bit into our working index */
937	mov	1, %o2
938	sll	%o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
939	or	%o0, %o2, %o0
9403:
941	membar	#Sync				! required before ASI_DC_DATA
942	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
943	membar	#Sync				! required after ASI_DC_DATA
944	stb	%o2, [%o1]
945	dec	%o1
946	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
947	blt	3b
948	  add	%o3, 8, %o3
9494:
950	retl
951	  wrpr	%g0, %o5, %pstate
952	SET_SIZE(get_dcache_dtag)
953
954#endif	/* lint */
955
956
957#if defined(lint)
958
959/* ARGSUSED */
960void
961get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
962{}
963
964#else	/* lint */
965
966/*
967 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
968 * structure (see cheetahregs.h):
969 * The Icache *Must* be turned off when this function is called.
970 * This is because diagnostic accesses to the Icache interfere with cache
971 * consistency.
972 */
973	.align	128
974	ENTRY(get_icache_dtag)
975	rdpr	%pstate, %o5
976	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
977	wrpr	%g0, %o3, %pstate
978
979	stx	%o0, [%o1 + CH_IC_IDX]
980	ldxa	[%o0]ASI_IC_TAG, %o2
981	stx	%o2, [%o1 + CH_IC_PATAG]
982	add	%o0, CH_ICTAG_UTAG, %o0
983	ldxa	[%o0]ASI_IC_TAG, %o2
984	add	%o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
985	stx	%o2, [%o1 + CH_IC_UTAG]
986	ldxa	[%o0]ASI_IC_TAG, %o2
987	add	%o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
988	stx	%o2, [%o1 + CH_IC_UPPER]
989	ldxa	[%o0]ASI_IC_TAG, %o2
990	andn	%o0, CH_ICTAG_TMASK, %o0
991	stx	%o2, [%o1 + CH_IC_LOWER]
992	ldxa	[%o0]ASI_IC_SNP_TAG, %o2
993	stx	%o2, [%o1 + CH_IC_SNTAG]
994	add	%o1, CH_IC_DATA, %o1
995	clr	%o3
9962:
997	ldxa	[%o0 + %o3]ASI_IC_DATA, %o2
998	stx	%o2, [%o1 + %o3]
999	cmp	%o3, PN_IC_DATA_REG_SIZE - 8
1000	blt	2b
1001	  add	%o3, 8, %o3
1002
1003	retl
1004	  wrpr	%g0, %o5, %pstate
1005	SET_SIZE(get_icache_dtag)
1006
1007#endif	/* lint */
1008
1009#if defined(lint)
1010
1011/* ARGSUSED */
1012void
1013get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
1014{}
1015
1016#else	/* lint */
1017
1018/*
1019 * Get pcache data and tags.
1020 * inputs:
1021 *   pcache_idx	- fully constructed VA for for accessing P$ diagnostic
1022 *		  registers. Contains PC_way and PC_addr shifted into
1023 *		  the correct bit positions. See the PRM for more details.
1024 *   data	- pointer to a ch_pc_data_t
1025 * structure (see cheetahregs.h):
1026 */
1027	.align	128
1028	ENTRY(get_pcache_dtag)
1029	rdpr	%pstate, %o5
1030	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
1031	wrpr	%g0, %o3, %pstate
1032
1033	stx	%o0, [%o1 + CH_PC_IDX]
1034	ldxa	[%o0]ASI_PC_STATUS_DATA, %o2
1035	stx	%o2, [%o1 + CH_PC_STATUS]
1036	ldxa	[%o0]ASI_PC_TAG, %o2
1037	stx	%o2, [%o1 + CH_PC_TAG]
1038	ldxa	[%o0]ASI_PC_SNP_TAG, %o2
1039	stx	%o2, [%o1 + CH_PC_SNTAG]
1040	add	%o1, CH_PC_DATA, %o1
1041	clr	%o3
10422:
1043	ldxa	[%o0 + %o3]ASI_PC_DATA, %o2
1044	stx	%o2, [%o1 + %o3]
1045	cmp	%o3, CH_PC_DATA_REG_SIZE - 8
1046	blt	2b
1047	  add	%o3, 8, %o3
1048
1049	retl
1050	  wrpr	%g0, %o5, %pstate
1051	SET_SIZE(get_pcache_dtag)
1052
1053#endif	/* lint */
1054
1055#endif	/* CPU_IMP_L1_CACHE_PARITY */
1056
1057#if defined(lint)
1058
1059/* ARGSUSED */
1060void
1061set_dcu(uint64_t dcu)
1062{}
1063
1064#else	/* lint */
1065
1066/*
1067 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
1068 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
1069 *   %o0 - 64 bit constant
1070 */
1071	ENTRY(set_dcu)
1072	stxa	%o0, [%g0]ASI_DCU	! Store to DCU
1073	flush	%g0	/* flush required after changing the IC bit */
1074	retl
1075	nop
1076	SET_SIZE(set_dcu)
1077
1078#endif	/* lint */
1079
1080
1081#if defined(lint)
1082
1083uint64_t
1084get_dcu(void)
1085{
1086	return ((uint64_t)0);
1087}
1088
1089#else	/* lint */
1090
1091/*
1092 * Return DCU register.
1093 */
1094	ENTRY(get_dcu)
1095	ldxa	[%g0]ASI_DCU, %o0		/* DCU control register */
1096	retl
1097	nop
1098	SET_SIZE(get_dcu)
1099
1100#endif	/* lint */
1101
1102/*
1103 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
1104 *
1105 * This handler is used to check for softints generated by error trap
1106 * handlers to report errors.  On Cheetah, this mechanism is used by the
1107 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
1108 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
1109 * NB: Must be 8 instructions or less to fit in trap table and code must
1110 *     be relocatable.
1111 */
1112#if defined(lint)
1113
1114void
1115ch_pil15_interrupt_instr(void)
1116{}
1117
1118#else	/* lint */
1119
1120	ENTRY_NP(ch_pil15_interrupt_instr)
1121	ASM_JMP(%g1, ch_pil15_interrupt)
1122	SET_SIZE(ch_pil15_interrupt_instr)
1123
1124#endif
1125
1126
1127#if defined(lint)
1128
1129void
1130ch_pil15_interrupt(void)
1131{}
1132
1133#else	/* lint */
1134
1135	ENTRY_NP(ch_pil15_interrupt)
1136
1137	/*
1138	 * Since pil_interrupt is hacked to assume that every level 15
1139	 * interrupt is generated by the CPU to indicate a performance
1140	 * counter overflow this gets ugly.  Before calling pil_interrupt
1141	 * the Error at TL>0 pending status is inspected.  If it is
1142	 * non-zero, then an error has occurred and it is handled.
1143	 * Otherwise control is transfered to pil_interrupt.  Note that if
1144	 * an error is detected pil_interrupt will not be called and
1145	 * overflow interrupts may be lost causing erroneous performance
1146	 * measurements.  However, error-recovery will have a detrimental
1147	 * effect on performance anyway.
1148	 */
1149	CPU_INDEX(%g1, %g4)
1150	set	ch_err_tl1_pending, %g4
1151	ldub	[%g1 + %g4], %g2
1152	brz	%g2, 1f
1153	  nop
1154
1155	/*
1156	 * We have a pending TL>0 error, clear the TL>0 pending status.
1157	 */
1158	stb	%g0, [%g1 + %g4]
1159
1160	/*
1161	 * Clear the softint.
1162	 */
1163	mov	1, %g5
1164	sll	%g5, PIL_15, %g5
1165	wr	%g5, CLEAR_SOFTINT
1166
1167	/*
1168	 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
1169	 * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
1170	 * panic flag (%g2).
1171	 */
1172	set	cpu_tl1_error, %g1
1173	clr	%g2
1174	ba	sys_trap
1175	  mov	PIL_15, %g4
1176
11771:
1178	/*
1179	 * The logout is invalid.
1180	 *
1181	 * Call the default interrupt handler.
1182	 */
1183	sethi	%hi(pil_interrupt), %g1
1184	jmp	%g1 + %lo(pil_interrupt)
1185	  mov	PIL_15, %g4
1186
1187	SET_SIZE(ch_pil15_interrupt)
1188#endif
1189
1190
1191/*
1192 * Error Handling
1193 *
1194 * Cheetah provides error checking for all memory access paths between
1195 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
1196 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
1197 * AFAR and one of the following traps is generated (provided that it
1198 * is enabled in External Cache Error Enable Register) to handle that
1199 * error:
1200 * 1. trap 0x70: Precise trap
1201 *    tt0_fecc for errors at trap level(TL)>=0
1202 * 2. trap 0x0A and 0x32: Deferred trap
1203 *    async_err for errors at TL>=0
1204 * 3. trap 0x63: Disrupting trap
1205 *    ce_err for errors at TL=0
1206 *    (Note that trap 0x63 cannot happen at trap level > 0)
1207 *
1208 * Trap level one handlers panic the system except for the fast ecc
1209 * error handler which tries to recover from certain errors.
1210 */
1211
1212/*
1213 * FAST ECC TRAP STRATEGY:
1214 *
1215 * Software must handle single and multi bit errors which occur due to data
1216 * or instruction cache reads from the external cache. A single or multi bit
1217 * error occuring in one of these situations results in a precise trap.
1218 *
1219 * The basic flow of this trap handler is as follows:
1220 *
1221 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
1222 *    is disabled because bad data could have been installed.  The Icache is
1223 *    turned off because we want to capture the Icache line related to the
1224 *    AFAR.
1225 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
1226 * 3) Park sibling core if caches are shared (to avoid race condition while
1227 *    accessing shared resources such as L3 data staging register during
1228 *    CPU logout.
1229 * 4) Read the AFAR and AFSR.
1230 * 5) If CPU logout structure is not being used, then:
1231 *    6) Clear all errors from the AFSR.
1232 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
1233 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1234 *       state.
1235 *    9) Unpark sibling core if we parked it earlier.
1236 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
1237 *        running at PIL 15.
1238 * 6) Otherwise, if CPU logout structure is being used:
1239 *    7) Incriment the "logout busy count".
1240 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1241 *       state.
1242 *    9) Unpark sibling core if we parked it earlier.
1243 *    10) Issue a retry since the other CPU error logging code will end up
1244 *       finding this error bit and logging information about it later.
1245 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
1246 *    yet initialized such that we can't even check the logout struct, then
1247 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
1248 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
1249 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
1250 *    in the high level trap handler since we don't have access to detailed
1251 *    logout information in cases where the cpu_private struct is not yet
1252 *    initialized.
1253 *
1254 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
1255 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
1256 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
1257 * since it is uses different code/data from this handler, has a better
1258 * chance of fixing things up than simply recursing through this code
1259 * again (this would probably cause an eventual kernel stack overflow).
1260 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
1261 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
1262 * the Fast ECC at TL>0 handler and eventually Red Mode.
1263 *
1264 * Note that for Cheetah (and only Cheetah), we use alias addresses for
1265 * flushing rather than ASI accesses (which don't exist on Cheetah).
1266 * Should we encounter a Fast ECC error within this handler on Cheetah,
1267 * there's a good chance it's within the ecache_flushaddr buffer (since
1268 * it's the largest piece of memory we touch in the handler and it is
1269 * usually kernel text/data).  For that reason the Fast ECC at TL>0
1270 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
1271 */
1272
1273/*
1274 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
1275 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
1276 * architecture-specific files.
1277 * NB: Must be 8 instructions or less to fit in trap table and code must
1278 *     be relocatable.
1279 */
1280
1281#if defined(lint)
1282
1283void
1284fecc_err_instr(void)
1285{}
1286
1287#else	/* lint */
1288
1289	ENTRY_NP(fecc_err_instr)
1290	membar	#Sync			! Cheetah requires membar #Sync
1291
1292	/*
1293	 * Save current DCU state.  Turn off the Dcache and Icache.
1294	 */
1295	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1296	andn	%g1, DCU_DC + DCU_IC, %g4
1297	stxa	%g4, [%g0]ASI_DCU
1298	flush	%g0	/* flush required after changing the IC bit */
1299
1300	ASM_JMP(%g4, fast_ecc_err)
1301	SET_SIZE(fecc_err_instr)
1302
1303#endif	/* lint */
1304
1305
1306#if !(defined(JALAPENO) || defined(SERRANO))
1307
1308#if defined(lint)
1309
1310void
1311fast_ecc_err(void)
1312{}
1313
1314#else	/* lint */
1315
1316	.section ".text"
1317	.align	64
1318	ENTRY_NP(fast_ecc_err)
1319
1320	/*
1321	 * Turn off CEEN and NCEEN.
1322	 */
1323	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1324	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1325	stxa	%g4, [%g0]ASI_ESTATE_ERR
1326	membar	#Sync			! membar sync required
1327
1328	/*
1329	 * Check to see whether we need to park our sibling core
1330	 * before recording diagnostic information from caches
1331	 * which may be shared by both cores.
1332	 * We use %g1 to store information about whether or not
1333	 * we had to park the core (%g1 holds our DCUCR value and
1334	 * we only use bits from that register which are "reserved"
1335	 * to keep track of core parking) so that we know whether
1336	 * or not to unpark later. %g5 and %g4 are scratch registers.
1337	 */
1338	PARK_SIBLING_CORE(%g1, %g5, %g4)
1339
1340	/*
1341	 * Do the CPU log out capture.
1342	 *   %g3 = "failed?" return value.
1343	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1344	 *         into this macro via %g4. Output only valid if cpu_private
1345	 *         struct has not been initialized.
1346	 *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1347	 *   %g4 = Trap information stored in the cpu logout flags field
1348	 *   %g5 = scr1
1349	 *   %g6 = scr2
1350	 *   %g3 = scr3
1351	 *   %g4 = scr4
1352	 */
1353	 /* store the CEEN and NCEEN values, TL=0 */
1354	and	%g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1355	set	CHPR_FECCTL0_LOGOUT, %g6
1356	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1357
1358	/*
1359	 * Flush the Ecache (and L2 cache for Panther) to get the error out
1360	 * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1361	 * following flush will turn that into a WDC or WDU, respectively.
1362	 */
1363	PN_L2_FLUSHALL(%g4, %g5, %g6)
1364
1365	CPU_INDEX(%g4, %g5)
1366	mulx	%g4, CPU_NODE_SIZE, %g4
1367	set	cpunodes, %g5
1368	add	%g4, %g5, %g4
1369	ld	[%g4 + ECACHE_LINESIZE], %g5
1370	ld	[%g4 + ECACHE_SIZE], %g4
1371
1372	ASM_LDX(%g6, ecache_flushaddr)
1373	ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1374
1375	/*
1376	 * Flush the Dcache.  Since bad data could have been installed in
1377	 * the Dcache we must flush it before re-enabling it.
1378	 */
1379	ASM_LD(%g5, dcache_size)
1380	ASM_LD(%g6, dcache_linesize)
1381	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1382
1383	/*
1384	 * Flush the Icache.  Since we turned off the Icache to capture the
1385	 * Icache line it is now stale or corrupted and we must flush it
1386	 * before re-enabling it.
1387	 */
1388	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1389	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1390	ba,pt	%icc, 6f
1391	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1392fast_ecc_err_5:
1393	ASM_LD(%g5, icache_size)
1394	ASM_LD(%g6, icache_linesize)
13956:
1396	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1397
1398	/*
1399	 * check to see whether we parked our sibling core at the start
1400	 * of this handler. If so, we need to unpark it here.
1401	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1402	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1403	 */
1404	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1405
1406	/*
1407	 * Restore the Dcache and Icache to the previous state.
1408	 */
1409	stxa	%g1, [%g0]ASI_DCU
1410	flush	%g0	/* flush required after changing the IC bit */
1411
1412	/*
1413	 * Make sure our CPU logout operation was successful.
1414	 */
1415	cmp	%g3, %g0
1416	be	8f
1417	  nop
1418
1419	/*
1420	 * If the logout structure had been busy, how many times have
1421	 * we tried to use it and failed (nesting count)? If we have
1422	 * already recursed a substantial number of times, then we can
1423	 * assume things are not going to get better by themselves and
1424	 * so it would be best to panic.
1425	 */
1426	cmp	%g3, CLO_NESTING_MAX
1427	blt	7f
1428	  nop
1429
1430        call ptl1_panic
1431          mov   PTL1_BAD_ECC, %g1
1432
14337:
1434	/*
1435	 * Otherwise, if the logout structure was busy but we have not
1436	 * nested more times than our maximum value, then we simply
1437	 * issue a retry. Our TL=0 trap handler code will check and
1438	 * clear the AFSR after it is done logging what is currently
1439	 * in the logout struct and handle this event at that time.
1440	 */
1441	retry
14428:
1443	/*
1444	 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1445	 * already at PIL 15.
1446	 */
1447	set	cpu_fast_ecc_error, %g1
1448	rdpr	%pil, %g4
1449	cmp	%g4, PIL_14
1450	ba	sys_trap
1451	  movl	%icc, PIL_14, %g4
1452
1453	SET_SIZE(fast_ecc_err)
1454
1455#endif	/* lint */
1456
1457#endif	/* !(JALAPENO || SERRANO) */
1458
1459
1460/*
1461 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1462 *
1463 * The basic flow of this trap handler is as follows:
1464 *
1465 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1466 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1467 *    will use to save %g1 and %g2.
1468 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1469 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1470 *    handler (using the just saved %g1).
1471 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1472 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1473 *    NB: we don't turn off the Icache because bad data is not installed nor
1474 *        will we be doing any diagnostic accesses.
1475 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1476 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1477 *    %tpc, %tnpc, %tstate values previously saved).
1478 * 6) set %tl to %tl - 1.
1479 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1480 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1481 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1482 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1483 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1484 *    AFSR_EXT and save the value in ch_err_tl1_data.
1485 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1486 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1487 * 11) Flush the Ecache.
1488 *    NB: the Ecache is flushed assuming the largest possible size with
1489 *        the smallest possible line size since access to the cpu_nodes may
1490 *        cause an unrecoverable DTLB miss.
1491 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1492 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1493 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1494 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1495 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1496 * 14) Flush and re-enable the Dcache if it was on at step 3.
1497 * 15) Do TRAPTRACE if enabled.
1498 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1499 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1500 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1501 *    event pending flag and call cpu_tl1_error via systrap if set.
1502 * 19) Restore the registers from step 5 and issue retry.
1503 */
1504
1505/*
1506 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1507 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1508 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1509 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1510 * NB: Must be 8 instructions or less to fit in trap table and code must
1511 *     be relocatable.
1512 */
1513
1514#if defined(lint)
1515
1516void
1517fecc_err_tl1_instr(void)
1518{}
1519
1520#else	/* lint */
1521
1522	ENTRY_NP(fecc_err_tl1_instr)
1523	CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1524	SET_SIZE(fecc_err_tl1_instr)
1525
1526#endif	/* lint */
1527
1528/*
1529 * Software trap 0 at TL>0.
1530 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1531 * the various architecture-specific files.  This is used as a continuation
1532 * of the fast ecc handling where we've bought an extra TL level, so we can
1533 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1534 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1535 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1536 * order two bits from %g1 and %g2 respectively).
1537 * NB: Must be 8 instructions or less to fit in trap table and code must
1538 *     be relocatable.
1539 */
1540#if defined(lint)
1541
1542void
1543fecc_err_tl1_cont_instr(void)
1544{}
1545
1546#else	/* lint */
1547
1548	ENTRY_NP(fecc_err_tl1_cont_instr)
1549	CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1550	SET_SIZE(fecc_err_tl1_cont_instr)
1551
1552#endif	/* lint */
1553
1554
1555#if defined(lint)
1556
1557void
1558ce_err(void)
1559{}
1560
1561#else	/* lint */
1562
1563/*
1564 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1565 *
1566 * AFSR errors bits which cause this trap are:
1567 *	CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1568 *
1569 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1570 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1571 *
1572 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1573 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1574 *
1575 * Cheetah+ also handles (No additional processing required):
1576 *    DUE, DTO, DBERR	(NCEEN controlled)
1577 *    THCE		(CEEN and ET_ECC_en controlled)
1578 *    TUE		(ET_ECC_en controlled)
1579 *
1580 * Panther further adds:
1581 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1582 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1583 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1584 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1585 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1586 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1587 *
1588 * Steps:
1589 *	1. Disable hardware corrected disrupting errors only (CEEN)
1590 *	2. Park sibling core if caches are shared (to avoid race
1591 *	   condition while accessing shared resources such as L3
1592 *	   data staging register during CPU logout.
1593 *	3. If the CPU logout structure is not currently being used:
1594 *		4. Clear AFSR error bits
1595 *		5. Capture Ecache, Dcache and Icache lines associated
1596 *		   with AFAR.
1597 *		6. Unpark sibling core if we parked it earlier.
1598 *		7. call cpu_disrupting_error via sys_trap at PIL 14
1599 *		   unless we're already running at PIL 15.
1600 *	4. Otherwise, if the CPU logout structure is busy:
1601 *		5. Incriment "logout busy count" and place into %g3
1602 *		6. Unpark sibling core if we parked it earlier.
1603 *		7. Issue a retry since the other CPU error logging
1604 *		   code will end up finding this error bit and logging
1605 *		   information about it later.
1606 *	5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1607 *         not yet initialized such that we can't even check the logout
1608 *         struct, then we place the clo_flags data into %g2
1609 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1610 *         systrap. The clo_flags parameter is used to determine information
1611 *         such as TL, TT, CEEN settings, etc in the high level trap
1612 *         handler since we don't have access to detailed logout information
1613 *         in cases where the cpu_private struct is not yet initialized.
1614 *
1615 * %g3: [ logout busy count ] - arg #2
1616 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1617 */
1618
1619	.align	128
1620	ENTRY_NP(ce_err)
1621	membar	#Sync			! Cheetah requires membar #Sync
1622
1623	/*
1624	 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1625	 * to prevent recursion.
1626	 */
1627	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1628	bclr	EN_REG_CEEN, %g1
1629	stxa	%g1, [%g0]ASI_ESTATE_ERR
1630	membar	#Sync			! membar sync required
1631
1632	/*
1633	 * Save current DCU state.  Turn off Icache to allow capture of
1634	 * Icache data by DO_CPU_LOGOUT.
1635	 */
1636	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1637	andn	%g1, DCU_IC, %g4
1638	stxa	%g4, [%g0]ASI_DCU
1639	flush	%g0	/* flush required after changing the IC bit */
1640
1641	/*
1642	 * Check to see whether we need to park our sibling core
1643	 * before recording diagnostic information from caches
1644	 * which may be shared by both cores.
1645	 * We use %g1 to store information about whether or not
1646	 * we had to park the core (%g1 holds our DCUCR value and
1647	 * we only use bits from that register which are "reserved"
1648	 * to keep track of core parking) so that we know whether
1649	 * or not to unpark later. %g5 and %g4 are scratch registers.
1650	 */
1651	PARK_SIBLING_CORE(%g1, %g5, %g4)
1652
1653	/*
1654	 * Do the CPU log out capture.
1655	 *   %g3 = "failed?" return value.
1656	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1657	 *         into this macro via %g4. Output only valid if cpu_private
1658	 *         struct has not been initialized.
1659	 *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1660	 *   %g4 = Trap information stored in the cpu logout flags field
1661	 *   %g5 = scr1
1662	 *   %g6 = scr2
1663	 *   %g3 = scr3
1664	 *   %g4 = scr4
1665	 */
1666	clr	%g4			! TL=0 bit in afsr
1667	set	CHPR_CECC_LOGOUT, %g6
1668	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1669
1670	/*
1671	 * Flush the Icache.  Since we turned off the Icache to capture the
1672	 * Icache line it is now stale or corrupted and we must flush it
1673	 * before re-enabling it.
1674	 */
1675	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1676	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1677	ba,pt	%icc, 2f
1678	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1679ce_err_1:
1680	ASM_LD(%g5, icache_size)
1681	ASM_LD(%g6, icache_linesize)
16822:
1683	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1684
1685	/*
1686	 * check to see whether we parked our sibling core at the start
1687	 * of this handler. If so, we need to unpark it here.
1688	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1689	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1690	 */
1691	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1692
1693	/*
1694	 * Restore Icache to previous state.
1695	 */
1696	stxa	%g1, [%g0]ASI_DCU
1697	flush	%g0	/* flush required after changing the IC bit */
1698
1699	/*
1700	 * Make sure our CPU logout operation was successful.
1701	 */
1702	cmp	%g3, %g0
1703	be	4f
1704	  nop
1705
1706	/*
1707	 * If the logout structure had been busy, how many times have
1708	 * we tried to use it and failed (nesting count)? If we have
1709	 * already recursed a substantial number of times, then we can
1710	 * assume things are not going to get better by themselves and
1711	 * so it would be best to panic.
1712	 */
1713	cmp	%g3, CLO_NESTING_MAX
1714	blt	3f
1715	  nop
1716
1717        call ptl1_panic
1718          mov   PTL1_BAD_ECC, %g1
1719
17203:
1721	/*
1722	 * Otherwise, if the logout structure was busy but we have not
1723	 * nested more times than our maximum value, then we simply
1724	 * issue a retry. Our TL=0 trap handler code will check and
1725	 * clear the AFSR after it is done logging what is currently
1726	 * in the logout struct and handle this event at that time.
1727	 */
1728	retry
17294:
1730	/*
1731	 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1732	 * already at PIL 15.
1733	 */
1734	set	cpu_disrupting_error, %g1
1735	rdpr	%pil, %g4
1736	cmp	%g4, PIL_14
1737	ba	sys_trap
1738	  movl	%icc, PIL_14, %g4
1739	SET_SIZE(ce_err)
1740
1741#endif	/* lint */
1742
1743
1744#if defined(lint)
1745
1746/*
1747 * This trap cannot happen at TL>0 which means this routine will never
1748 * actually be called and so we treat this like a BAD TRAP panic.
1749 */
1750void
1751ce_err_tl1(void)
1752{}
1753
1754#else	/* lint */
1755
1756	.align	64
1757	ENTRY_NP(ce_err_tl1)
1758
1759        call ptl1_panic
1760          mov   PTL1_BAD_TRAP, %g1
1761
1762	SET_SIZE(ce_err_tl1)
1763
1764#endif	/* lint */
1765
1766
1767#if defined(lint)
1768
1769void
1770async_err(void)
1771{}
1772
1773#else	/* lint */
1774
1775/*
1776 * The async_err function handles deferred trap types 0xA
1777 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1778 *
1779 * AFSR errors bits which cause this trap are:
1780 *	UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1781 * On some platforms, EMU may causes cheetah to pull the error pin
1782 * never giving Solaris a chance to take a trap.
1783 *
1784 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1785 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1786 *
1787 * Steps:
1788 *	1. Disable CEEN and NCEEN errors to prevent recursive errors.
1789 *	2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1790 *         I$ line in DO_CPU_LOGOUT.
1791 *	3. Park sibling core if caches are shared (to avoid race
1792 *	   condition while accessing shared resources such as L3
1793 *	   data staging register during CPU logout.
1794 *	4. If the CPU logout structure is not currently being used:
1795 *		5. Clear AFSR error bits
1796 *		6. Capture Ecache, Dcache and Icache lines associated
1797 *		   with AFAR.
1798 *		7. Unpark sibling core if we parked it earlier.
1799 *		8. call cpu_deferred_error via sys_trap.
1800 *	5. Otherwise, if the CPU logout structure is busy:
1801 *		6. Incriment "logout busy count"
1802 *		7. Unpark sibling core if we parked it earlier.
1803 *		8) Issue a retry since the other CPU error logging
1804 *		   code will end up finding this error bit and logging
1805 *		   information about it later.
1806 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1807 *         not yet initialized such that we can't even check the logout
1808 *         struct, then we place the clo_flags data into %g2
1809 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1810 *         systrap. The clo_flags parameter is used to determine information
1811 *         such as TL, TT, CEEN settings, etc in the high level trap handler
1812 *         since we don't have access to detailed logout information in cases
1813 *         where the cpu_private struct is not yet initialized.
1814 *
1815 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1816 * %g3: [ logout busy count ] - arg #2
1817 */
1818
1819	ENTRY_NP(async_err)
1820	membar	#Sync			! Cheetah requires membar #Sync
1821
1822	/*
1823	 * Disable CEEN and NCEEN.
1824	 */
1825	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1826	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1827	stxa	%g4, [%g0]ASI_ESTATE_ERR
1828	membar	#Sync			! membar sync required
1829
1830	/*
1831	 * Save current DCU state.
1832	 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1833	 * Do this regardless of whether this is a Data Access Error or
1834	 * Instruction Access Error Trap.
1835	 * Disable Dcache for both Data Access Error and Instruction Access
1836	 * Error per Cheetah PRM P.5 Note 6.
1837	 */
1838	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1839	andn	%g1, DCU_IC + DCU_DC, %g4
1840	stxa	%g4, [%g0]ASI_DCU
1841	flush	%g0	/* flush required after changing the IC bit */
1842
1843	/*
1844	 * Check to see whether we need to park our sibling core
1845	 * before recording diagnostic information from caches
1846	 * which may be shared by both cores.
1847	 * We use %g1 to store information about whether or not
1848	 * we had to park the core (%g1 holds our DCUCR value and
1849	 * we only use bits from that register which are "reserved"
1850	 * to keep track of core parking) so that we know whether
1851	 * or not to unpark later. %g6 and %g4 are scratch registers.
1852	 */
1853	PARK_SIBLING_CORE(%g1, %g6, %g4)
1854
1855	/*
1856	 * Do the CPU logout capture.
1857	 *
1858	 *   %g3 = "failed?" return value.
1859	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1860	 *         into this macro via %g4. Output only valid if cpu_private
1861	 *         struct has not been initialized.
1862	 *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1863	 *   %g4 = Trap information stored in the cpu logout flags field
1864	 *   %g5 = scr1
1865	 *   %g6 = scr2
1866	 *   %g3 = scr3
1867	 *   %g4 = scr4
1868	 */
1869	andcc	%g5, T_TL1, %g0
1870	clr	%g6
1871	movnz	%xcc, 1, %g6			! set %g6 if T_TL1 set
1872	sllx	%g6, CLO_FLAGS_TL_SHIFT, %g6
1873	sllx	%g5, CLO_FLAGS_TT_SHIFT, %g4
1874	set	CLO_FLAGS_TT_MASK, %g2
1875	and	%g4, %g2, %g4			! ttype
1876	or	%g6, %g4, %g4			! TT and TL
1877	and	%g3, EN_REG_CEEN, %g3		! CEEN value
1878	or	%g3, %g4, %g4			! TT and TL and CEEN
1879	set	CHPR_ASYNC_LOGOUT, %g6
1880	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1881
1882	/*
1883	 * If the logout struct was busy, we may need to pass the
1884	 * TT, TL, and CEEN information to the TL=0 handler via
1885	 * systrap parameter so save it off here.
1886	 */
1887	cmp	%g3, %g0
1888	be	1f
1889	  nop
1890	sllx	%g4, 32, %g4
1891	or	%g4, %g3, %g3
18921:
1893	/*
1894	 * Flush the Icache.  Since we turned off the Icache to capture the
1895	 * Icache line it is now stale or corrupted and we must flush it
1896	 * before re-enabling it.
1897	 */
1898	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1899	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1900	ba,pt	%icc, 2f
1901	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1902async_err_1:
1903	ASM_LD(%g5, icache_size)
1904	ASM_LD(%g6, icache_linesize)
19052:
1906	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1907
1908	/*
1909	 * XXX - Don't we need to flush the Dcache before turning it back
1910	 *       on to avoid stale or corrupt data? Was this broken?
1911	 */
1912	/*
1913	 * Flush the Dcache before turning it back on since it may now
1914	 * contain stale or corrupt data.
1915	 */
1916	ASM_LD(%g5, dcache_size)
1917	ASM_LD(%g6, dcache_linesize)
1918	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1919
1920	/*
1921	 * check to see whether we parked our sibling core at the start
1922	 * of this handler. If so, we need to unpark it here.
1923	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1924	 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1925	 */
1926	UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1927
1928	/*
1929	 * Restore Icache and Dcache to previous state.
1930	 */
1931	stxa	%g1, [%g0]ASI_DCU
1932	flush	%g0	/* flush required after changing the IC bit */
1933
1934	/*
1935	 * Make sure our CPU logout operation was successful.
1936	 */
1937	cmp	%g3, %g0
1938	be	4f
1939	  nop
1940
1941	/*
1942	 * If the logout structure had been busy, how many times have
1943	 * we tried to use it and failed (nesting count)? If we have
1944	 * already recursed a substantial number of times, then we can
1945	 * assume things are not going to get better by themselves and
1946	 * so it would be best to panic.
1947	 */
1948	cmp	%g3, CLO_NESTING_MAX
1949	blt	3f
1950	  nop
1951
1952        call ptl1_panic
1953          mov   PTL1_BAD_ECC, %g1
1954
19553:
1956	/*
1957	 * Otherwise, if the logout structure was busy but we have not
1958	 * nested more times than our maximum value, then we simply
1959	 * issue a retry. Our TL=0 trap handler code will check and
1960	 * clear the AFSR after it is done logging what is currently
1961	 * in the logout struct and handle this event at that time.
1962	 */
1963	retry
19644:
1965	RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1966async_err_resetskip:
1967	set	cpu_deferred_error, %g1
1968	ba	sys_trap
1969	  mov	PIL_15, %g4		! run at pil 15
1970	SET_SIZE(async_err)
1971
1972#endif	/* lint */
1973
1974#if defined(CPU_IMP_L1_CACHE_PARITY)
1975
1976/*
1977 * D$ parity error trap (trap 71) at TL=0.
1978 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1979 * the various architecture-specific files.  This merely sets up the
1980 * arguments for cpu_parity_error and calls it via sys_trap.
1981 * NB: Must be 8 instructions or less to fit in trap table and code must
1982 *     be relocatable.
1983 */
1984#if defined(lint)
1985
1986void
1987dcache_parity_instr(void)
1988{}
1989
1990#else	/* lint */
1991	ENTRY_NP(dcache_parity_instr)
1992	membar	#Sync			! Cheetah+ requires membar #Sync
1993	set	cpu_parity_error, %g1
1994	or	%g0, CH_ERR_DPE, %g2
1995	rdpr	%tpc, %g3
1996	sethi	%hi(sys_trap), %g7
1997	jmp	%g7 + %lo(sys_trap)
1998	  mov	PIL_15, %g4		! run at pil 15
1999	SET_SIZE(dcache_parity_instr)
2000
2001#endif	/* lint */
2002
2003
2004/*
2005 * D$ parity error trap (trap 71) at TL>0.
2006 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
2007 * the various architecture-specific files.  This generates a "Software
2008 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
2009 * continue the handling there.
2010 * NB: Must be 8 instructions or less to fit in trap table and code must
2011 *     be relocatable.
2012 */
2013#if defined(lint)
2014
2015void
2016dcache_parity_tl1_instr(void)
2017{}
2018
2019#else	/* lint */
2020	ENTRY_NP(dcache_parity_tl1_instr)
2021	CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
2022	SET_SIZE(dcache_parity_tl1_instr)
2023
2024#endif	/* lint */
2025
2026
2027/*
2028 * Software trap 1 at TL>0.
2029 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
2030 * of the various architecture-specific files.  This is used as a continuation
2031 * of the dcache parity handling where we've bought an extra TL level, so we
2032 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2033 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2034 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2035 * order two bits from %g1 and %g2 respectively).
2036 * NB: Must be 8 instructions or less to fit in trap table and code must
2037 *     be relocatable.
2038 */
2039#if defined(lint)
2040
2041void
2042dcache_parity_tl1_cont_instr(void)
2043{}
2044
2045#else	/* lint */
2046	ENTRY_NP(dcache_parity_tl1_cont_instr)
2047	CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
2048	SET_SIZE(dcache_parity_tl1_cont_instr)
2049
2050#endif	/* lint */
2051
2052/*
2053 * D$ parity error at TL>0 handler
2054 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
2055 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2056 */
2057#if defined(lint)
2058
2059void
2060dcache_parity_tl1_err(void)
2061{}
2062
2063#else	/* lint */
2064
2065	ENTRY_NP(dcache_parity_tl1_err)
2066
2067	/*
2068	 * This macro saves all the %g registers in the ch_err_tl1_data
2069	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2070	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2071	 * the ch_err_tl1_data structure and %g2 will have the original
2072	 * flags in the ch_err_tl1_data structure.  All %g registers
2073	 * except for %g1 and %g2 will be available.
2074	 */
2075	CH_ERR_TL1_ENTER(CH_ERR_DPE);
2076
2077#ifdef TRAPTRACE
2078	/*
2079	 * Get current trap trace entry physical pointer.
2080	 */
2081	CPU_INDEX(%g6, %g5)
2082	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2083	set	trap_trace_ctl, %g5
2084	add	%g6, %g5, %g6
2085	ld	[%g6 + TRAPTR_LIMIT], %g5
2086	tst	%g5
2087	be	%icc, dpe_tl1_skip_tt
2088	  nop
2089	ldx	[%g6 + TRAPTR_PBASE], %g5
2090	ld	[%g6 + TRAPTR_OFFSET], %g4
2091	add	%g5, %g4, %g5
2092
2093	/*
2094	 * Create trap trace entry.
2095	 */
2096	rd	%asi, %g7
2097	wr	%g0, TRAPTR_ASI, %asi
2098	rd	STICK, %g4
2099	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2100	rdpr	%tl, %g4
2101	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2102	rdpr	%tt, %g4
2103	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2104	rdpr	%tpc, %g4
2105	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2106	rdpr	%tstate, %g4
2107	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2108	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2109	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2110	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2111	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2112	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2113	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2114	wr	%g0, %g7, %asi
2115
2116	/*
2117	 * Advance trap trace pointer.
2118	 */
2119	ld	[%g6 + TRAPTR_OFFSET], %g5
2120	ld	[%g6 + TRAPTR_LIMIT], %g4
2121	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2122	add	%g5, TRAP_ENT_SIZE, %g5
2123	sub	%g4, TRAP_ENT_SIZE, %g4
2124	cmp	%g5, %g4
2125	movge	%icc, 0, %g5
2126	st	%g5, [%g6 + TRAPTR_OFFSET]
2127dpe_tl1_skip_tt:
2128#endif	/* TRAPTRACE */
2129
2130	/*
2131	 * I$ and D$ are automatically turned off by HW when the CPU hits
2132	 * a dcache or icache parity error so we will just leave those two
2133	 * off for now to avoid repeating this trap.
2134	 * For Panther, however, since we trap on P$ data parity errors
2135	 * and HW does not automatically disable P$, we need to disable it
2136	 * here so that we don't encounter any recursive traps when we
2137	 * issue the retry.
2138	 */
2139	ldxa	[%g0]ASI_DCU, %g3
2140	mov	1, %g4
2141	sllx	%g4, DCU_PE_SHIFT, %g4
2142	andn	%g3, %g4, %g3
2143	stxa	%g3, [%g0]ASI_DCU
2144	membar	#Sync
2145
2146	/*
2147	 * We fall into this macro if we've successfully logged the error in
2148	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2149	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2150	 * Restores the %g registers and issues retry.
2151	 */
2152	CH_ERR_TL1_EXIT;
2153	SET_SIZE(dcache_parity_tl1_err)
2154
2155#endif	/* lint */
2156
2157/*
2158 * I$ parity error trap (trap 72) at TL=0.
2159 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
2160 * the various architecture-specific files.  This merely sets up the
2161 * arguments for cpu_parity_error and calls it via sys_trap.
2162 * NB: Must be 8 instructions or less to fit in trap table and code must
2163 *     be relocatable.
2164 */
2165#if defined(lint)
2166
2167void
2168icache_parity_instr(void)
2169{}
2170
2171#else	/* lint */
2172
2173	ENTRY_NP(icache_parity_instr)
2174	membar	#Sync			! Cheetah+ requires membar #Sync
2175	set	cpu_parity_error, %g1
2176	or	%g0, CH_ERR_IPE, %g2
2177	rdpr	%tpc, %g3
2178	sethi	%hi(sys_trap), %g7
2179	jmp	%g7 + %lo(sys_trap)
2180	  mov	PIL_15, %g4		! run at pil 15
2181	SET_SIZE(icache_parity_instr)
2182
2183#endif	/* lint */
2184
2185/*
2186 * I$ parity error trap (trap 72) at TL>0.
2187 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
2188 * the various architecture-specific files.  This generates a "Software
2189 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
2190 * continue the handling there.
2191 * NB: Must be 8 instructions or less to fit in trap table and code must
2192 *     be relocatable.
2193 */
2194#if defined(lint)
2195
2196void
2197icache_parity_tl1_instr(void)
2198{}
2199
2200#else	/* lint */
2201	ENTRY_NP(icache_parity_tl1_instr)
2202	CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
2203	SET_SIZE(icache_parity_tl1_instr)
2204
2205#endif	/* lint */
2206
2207/*
2208 * Software trap 2 at TL>0.
2209 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
2210 * of the various architecture-specific files.  This is used as a continuation
2211 * of the icache parity handling where we've bought an extra TL level, so we
2212 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2213 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2214 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2215 * order two bits from %g1 and %g2 respectively).
2216 * NB: Must be 8 instructions or less to fit in trap table and code must
2217 *     be relocatable.
2218 */
2219#if defined(lint)
2220
2221void
2222icache_parity_tl1_cont_instr(void)
2223{}
2224
2225#else	/* lint */
2226	ENTRY_NP(icache_parity_tl1_cont_instr)
2227	CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
2228	SET_SIZE(icache_parity_tl1_cont_instr)
2229
2230#endif	/* lint */
2231
2232
2233/*
2234 * I$ parity error at TL>0 handler
2235 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
2236 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2237 */
2238#if defined(lint)
2239
2240void
2241icache_parity_tl1_err(void)
2242{}
2243
2244#else	/* lint */
2245
2246	ENTRY_NP(icache_parity_tl1_err)
2247
2248	/*
2249	 * This macro saves all the %g registers in the ch_err_tl1_data
2250	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2251	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2252	 * the ch_err_tl1_data structure and %g2 will have the original
2253	 * flags in the ch_err_tl1_data structure.  All %g registers
2254	 * except for %g1 and %g2 will be available.
2255	 */
2256	CH_ERR_TL1_ENTER(CH_ERR_IPE);
2257
2258#ifdef TRAPTRACE
2259	/*
2260	 * Get current trap trace entry physical pointer.
2261	 */
2262	CPU_INDEX(%g6, %g5)
2263	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2264	set	trap_trace_ctl, %g5
2265	add	%g6, %g5, %g6
2266	ld	[%g6 + TRAPTR_LIMIT], %g5
2267	tst	%g5
2268	be	%icc, ipe_tl1_skip_tt
2269	  nop
2270	ldx	[%g6 + TRAPTR_PBASE], %g5
2271	ld	[%g6 + TRAPTR_OFFSET], %g4
2272	add	%g5, %g4, %g5
2273
2274	/*
2275	 * Create trap trace entry.
2276	 */
2277	rd	%asi, %g7
2278	wr	%g0, TRAPTR_ASI, %asi
2279	rd	STICK, %g4
2280	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2281	rdpr	%tl, %g4
2282	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2283	rdpr	%tt, %g4
2284	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2285	rdpr	%tpc, %g4
2286	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2287	rdpr	%tstate, %g4
2288	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2289	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2290	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2291	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2292	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2293	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2294	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2295	wr	%g0, %g7, %asi
2296
2297	/*
2298	 * Advance trap trace pointer.
2299	 */
2300	ld	[%g6 + TRAPTR_OFFSET], %g5
2301	ld	[%g6 + TRAPTR_LIMIT], %g4
2302	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2303	add	%g5, TRAP_ENT_SIZE, %g5
2304	sub	%g4, TRAP_ENT_SIZE, %g4
2305	cmp	%g5, %g4
2306	movge	%icc, 0, %g5
2307	st	%g5, [%g6 + TRAPTR_OFFSET]
2308ipe_tl1_skip_tt:
2309#endif	/* TRAPTRACE */
2310
2311	/*
2312	 * We fall into this macro if we've successfully logged the error in
2313	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2314	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2315	 * Restores the %g registers and issues retry.
2316	 */
2317	CH_ERR_TL1_EXIT;
2318
2319	SET_SIZE(icache_parity_tl1_err)
2320
2321#endif	/* lint */
2322
2323#endif	/* CPU_IMP_L1_CACHE_PARITY */
2324
2325
2326/*
2327 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
2328 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
2329 * should only be used in places where you have no choice but to look at the
2330 * tlb itself.
2331 *
2332 * Note: These two routines are required by the Estar "cpr" loadable module.
2333 */
2334
2335#if defined(lint)
2336
2337/* ARGSUSED */
2338void
2339itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2340{}
2341
2342#else	/* lint */
2343
2344	ENTRY_NP(itlb_rd_entry)
2345	sllx	%o0, 3, %o0
2346	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
2347	stx	%g1, [%o1]
2348	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
2349	set	TAGREAD_CTX_MASK, %o4
2350	andn	%g2, %o4, %o5
2351	retl
2352	  stx	%o5, [%o2]
2353	SET_SIZE(itlb_rd_entry)
2354
2355#endif	/* lint */
2356
2357
2358#if defined(lint)
2359
2360/* ARGSUSED */
2361void
2362dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2363{}
2364
2365#else	/* lint */
2366
2367	ENTRY_NP(dtlb_rd_entry)
2368	sllx	%o0, 3, %o0
2369	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
2370	stx	%g1, [%o1]
2371	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
2372	set	TAGREAD_CTX_MASK, %o4
2373	andn	%g2, %o4, %o5
2374	retl
2375	  stx	%o5, [%o2]
2376	SET_SIZE(dtlb_rd_entry)
2377#endif	/* lint */
2378
2379
2380#if !(defined(JALAPENO) || defined(SERRANO))
2381
2382#if defined(lint)
2383
2384uint64_t
2385get_safari_config(void)
2386{ return (0); }
2387
2388#else	/* lint */
2389
2390	ENTRY(get_safari_config)
2391	ldxa	[%g0]ASI_SAFARI_CONFIG, %o0
2392	retl
2393	nop
2394	SET_SIZE(get_safari_config)
2395
2396#endif	/* lint */
2397
2398
2399#if defined(lint)
2400
2401/* ARGSUSED */
2402void
2403set_safari_config(uint64_t safari_config)
2404{}
2405
2406#else	/* lint */
2407
2408	ENTRY(set_safari_config)
2409	stxa	%o0, [%g0]ASI_SAFARI_CONFIG
2410	membar	#Sync
2411	retl
2412	nop
2413	SET_SIZE(set_safari_config)
2414
2415#endif	/* lint */
2416
2417#endif	/* !(JALAPENO || SERRANO) */
2418
2419
2420#if defined(lint)
2421
2422void
2423cpu_cleartickpnt(void)
2424{}
2425
2426#else	/* lint */
2427	/*
2428	 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
2429	 * registers. In an effort to make the change in the
2430	 * tick/stick counter as consistent as possible, we disable
2431	 * all interrupts while we're changing the registers. We also
2432	 * ensure that the read and write instructions are in the same
2433	 * line in the instruction cache.
2434	 */
2435	ENTRY_NP(cpu_clearticknpt)
2436	rdpr	%pstate, %g1		/* save processor state */
2437	andn	%g1, PSTATE_IE, %g3	/* turn off */
2438	wrpr	%g0, %g3, %pstate	/*   interrupts */
2439	rdpr	%tick, %g2		/* get tick register */
2440	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
2441	mov	1, %g3			/* create mask */
2442	sllx	%g3, 63, %g3		/*   for NPT bit */
2443	ba,a,pt	%xcc, 2f
2444	.align	8			/* Ensure rd/wr in same i$ line */
24452:
2446	rdpr	%tick, %g2		/* get tick register */
2447	wrpr	%g3, %g2, %tick		/* write tick register, */
2448					/*   clearing NPT bit   */
24491:
2450	rd	STICK, %g2		/* get stick register */
2451	brgez,pn %g2, 3f		/* if NPT bit off, we're done */
2452	mov	1, %g3			/* create mask */
2453	sllx	%g3, 63, %g3		/*   for NPT bit */
2454	ba,a,pt	%xcc, 4f
2455	.align	8			/* Ensure rd/wr in same i$ line */
24564:
2457	rd	STICK, %g2		/* get stick register */
2458	wr	%g3, %g2, STICK		/* write stick register, */
2459					/*   clearing NPT bit   */
24603:
2461	jmp	%g4 + 4
2462	wrpr	%g0, %g1, %pstate	/* restore processor state */
2463
2464	SET_SIZE(cpu_clearticknpt)
2465
2466#endif	/* lint */
2467
2468
2469#if defined(CPU_IMP_L1_CACHE_PARITY)
2470
2471#if defined(lint)
2472/*
2473 * correct_dcache_parity(size_t size, size_t linesize)
2474 *
2475 * Correct D$ data parity by zeroing the data and initializing microtag
2476 * for all indexes and all ways of the D$.
2477 *
2478 */
2479/* ARGSUSED */
2480void
2481correct_dcache_parity(size_t size, size_t linesize)
2482{}
2483
2484#else	/* lint */
2485
2486	ENTRY(correct_dcache_parity)
2487	/*
2488	 * Register Usage:
2489	 *
2490	 * %o0 = input D$ size
2491	 * %o1 = input D$ line size
2492	 * %o2 = scratch
2493	 * %o3 = scratch
2494	 * %o4 = scratch
2495	 */
2496
2497	sub	%o0, %o1, %o0			! init cache line address
2498
2499	/*
2500	 * For Panther CPUs, we also need to clear the data parity bits
2501	 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2502	 */
2503	GET_CPU_IMPL(%o3)
2504	cmp	%o3, PANTHER_IMPL
2505	bne	1f
2506	  clr	%o3				! zero for non-Panther
2507	mov	1, %o3
2508	sll	%o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2509
25101:
2511	/*
2512	 * Set utag = way since it must be unique within an index.
2513	 */
2514	srl	%o0, 14, %o2			! get cache way (DC_way)
2515	membar	#Sync				! required before ASI_DC_UTAG
2516	stxa	%o2, [%o0]ASI_DC_UTAG		! set D$ utag = cache way
2517	membar	#Sync				! required after ASI_DC_UTAG
2518
2519	/*
2520	 * Zero line of D$ data (and data parity bits for Panther)
2521	 */
2522	sub	%o1, 8, %o2
2523	or	%o0, %o3, %o4			! same address + DC_data_parity
25242:
2525	membar	#Sync				! required before ASI_DC_DATA
2526	stxa	%g0, [%o0 + %o2]ASI_DC_DATA	! zero 8 bytes of D$ data
2527	membar	#Sync				! required after ASI_DC_DATA
2528	/*
2529	 * We also clear the parity bits if this is a panther. For non-Panther
2530	 * CPUs, we simply end up clearing the $data register twice.
2531	 */
2532	stxa	%g0, [%o4 + %o2]ASI_DC_DATA
2533	membar	#Sync
2534
2535	subcc	%o2, 8, %o2
2536	bge	2b
2537	nop
2538
2539	subcc	%o0, %o1, %o0
2540	bge	1b
2541	nop
2542
2543	retl
2544	  nop
2545	SET_SIZE(correct_dcache_parity)
2546
2547#endif	/* lint */
2548
2549#endif	/* CPU_IMP_L1_CACHE_PARITY */
2550
2551
2552#if defined(lint)
2553/*
2554 *  Get timestamp (stick).
2555 */
2556/* ARGSUSED */
2557void
2558stick_timestamp(int64_t *ts)
2559{
2560}
2561
2562#else	/* lint */
2563
2564	ENTRY_NP(stick_timestamp)
2565	rd	STICK, %g1	! read stick reg
2566	sllx	%g1, 1, %g1
2567	srlx	%g1, 1, %g1	! clear npt bit
2568
2569	retl
2570	stx     %g1, [%o0]	! store the timestamp
2571	SET_SIZE(stick_timestamp)
2572
2573#endif	/* lint */
2574
2575
2576#if defined(lint)
2577/*
2578 * Set STICK adjusted by skew.
2579 */
2580/* ARGSUSED */
2581void
2582stick_adj(int64_t skew)
2583{
2584}
2585
2586#else	/* lint */
2587
2588	ENTRY_NP(stick_adj)
2589	rdpr	%pstate, %g1		! save processor state
2590	andn	%g1, PSTATE_IE, %g3
2591	ba	1f			! cache align stick adj
2592	wrpr	%g0, %g3, %pstate	! turn off interrupts
2593
2594	.align	16
25951:	nop
2596
2597	rd	STICK, %g4		! read stick reg
2598	add	%g4, %o0, %o1		! adjust stick with skew
2599	wr	%o1, %g0, STICK		! write stick reg
2600
2601	retl
2602	wrpr	%g1, %pstate		! restore processor state
2603	SET_SIZE(stick_adj)
2604
2605#endif	/* lint */
2606
2607#if defined(lint)
2608/*
2609 * Debugger-specific stick retrieval
2610 */
2611/*ARGSUSED*/
2612int
2613kdi_get_stick(uint64_t *stickp)
2614{
2615	return (0);
2616}
2617
2618#else	/* lint */
2619
2620	ENTRY_NP(kdi_get_stick)
2621	rd	STICK, %g1
2622	stx	%g1, [%o0]
2623	retl
2624	mov	%g0, %o0
2625	SET_SIZE(kdi_get_stick)
2626
2627#endif	/* lint */
2628
2629#if defined(lint)
2630/*
2631 * Invalidate the specified line from the D$.
2632 *
2633 * Register usage:
2634 *	%o0 - index for the invalidation, specifies DC_way and DC_addr
2635 *
2636 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2637 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2638 *
2639 * The format of the stored 64-bit value is:
2640 *
2641 *	+----------+--------+----------+
2642 *	| Reserved | DC_tag | DC_valid |
2643 *	+----------+--------+----------+
2644 *       63      31 30     1	      0
2645 *
2646 * DC_tag is the 30-bit physical tag of the associated line.
2647 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2648 *
2649 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2650 *
2651 *	+----------+--------+----------+----------+
2652 *	| Reserved | DC_way | DC_addr  | Reserved |
2653 *	+----------+--------+----------+----------+
2654 *       63      16 15    14 13       5 4        0
2655 *
2656 * DC_way is a 2-bit index that selects one of the 4 ways.
2657 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2658 *
2659 * Setting the DC_valid bit to zero for the specified DC_way and
2660 * DC_addr index into the D$ results in an invalidation of a D$ line.
2661 */
2662/*ARGSUSED*/
2663void
2664dcache_inval_line(int index)
2665{
2666}
2667#else	/* lint */
2668	ENTRY(dcache_inval_line)
2669	sll	%o0, 5, %o0		! shift index into DC_way and DC_addr
2670	stxa	%g0, [%o0]ASI_DC_TAG	! zero the DC_valid and DC_tag bits
2671	membar	#Sync
2672	retl
2673	nop
2674	SET_SIZE(dcache_inval_line)
2675#endif	/* lint */
2676
2677#if defined(lint)
2678/*
2679 * Invalidate the entire I$
2680 *
2681 * Register usage:
2682 *	%o0 - specifies IC_way, IC_addr, IC_tag
2683 *	%o1 - scratch
2684 *	%o2 - used to save and restore DCU value
2685 *	%o3 - scratch
2686 *	%o5 - used to save and restore PSTATE
2687 *
2688 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2689 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2690 * block out snoops and invalidates to the I$, causing I$ consistency
2691 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2692 *
2693 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2694 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2695 * info below describes store (write) use of ASI_IC_TAG. Note that read
2696 * use of ASI_IC_TAG behaves differently.
2697 *
2698 * The format of the stored 64-bit value is:
2699 *
2700 *	+----------+--------+---------------+-----------+
2701 *	| Reserved | Valid  | IC_vpred<7:0> | Undefined |
2702 *	+----------+--------+---------------+-----------+
2703 *       63      55    54    53           46 45        0
2704 *
2705 * Valid is the 1-bit valid field for both the physical and snoop tags.
2706 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2707 *	the 32-byte boundary aligned address specified by IC_addr.
2708 *
2709 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2710 *
2711 *	+----------+--------+---------+--------+---------+
2712 *	| Reserved | IC_way | IC_addr | IC_tag |Reserved |
2713 *	+----------+--------+---------+--------+---------+
2714 *       63      16 15    14 13      5 4      3 2       0
2715 *
2716 * IC_way is a 2-bit index that selects one of the 4 ways.
2717 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2718 * IC_addr[5] is a "don't care" for a store.
2719 * IC_tag set to 2 specifies that the stored value is to be interpreted
2720 *	as containing Valid and IC_vpred as described above.
2721 *
2722 * Setting the Valid bit to zero for the specified IC_way and
2723 * IC_addr index into the I$ results in an invalidation of an I$ line.
2724 */
2725/*ARGSUSED*/
2726void
2727icache_inval_all(void)
2728{
2729}
2730#else	/* lint */
2731	ENTRY(icache_inval_all)
2732	rdpr	%pstate, %o5
2733	andn	%o5, PSTATE_IE, %o3
2734	wrpr	%g0, %o3, %pstate	! clear IE bit
2735
2736	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2737	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
2738	ba,pt	%icc, 2f
2739	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
2740icache_inval_all_1:
2741	ASM_LD(%o0, icache_size)
2742	ASM_LD(%o1, icache_linesize)
27432:
2744	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2745
2746	retl
2747	wrpr	%g0, %o5, %pstate	! restore earlier pstate
2748	SET_SIZE(icache_inval_all)
2749#endif	/* lint */
2750
2751
2752#if defined(lint)
2753/* ARGSUSED */
2754void
2755cache_scrubreq_tl1(uint64_t inum, uint64_t index)
2756{
2757}
2758
2759#else	/* lint */
2760/*
2761 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2762 * crosstrap.  It atomically increments the outstanding request counter and,
2763 * if there was not already an outstanding request, branches to setsoftint_tl1
2764 * to enqueue an intr_vec for the given inum.
2765 */
2766
2767	! Register usage:
2768	!
2769	! Arguments:
2770	! %g1 - inum
2771	! %g2 - index into chsm_outstanding array
2772	!
2773	! Internal:
2774	! %g2, %g3, %g5 - scratch
2775	! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2776	! %g6 - setsoftint_tl1 address
2777
2778	ENTRY_NP(cache_scrubreq_tl1)
2779	mulx	%g2, CHSM_OUTSTANDING_INCR, %g2
2780	set	CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2781	add	%g2, %g3, %g2
2782	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2783	ld	[%g4], %g2		! cpu's chsm_outstanding[index]
2784	!
2785	! no need to use atomic instructions for the following
2786	! increment - we're at tl1
2787	!
2788	add	%g2, 0x1, %g3
2789	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
2790	  st	%g3, [%g4]		! delay - store incremented counter
2791	ASM_JMP(%g6, setsoftint_tl1)
2792	! not reached
27931:
2794	retry
2795	SET_SIZE(cache_scrubreq_tl1)
2796
2797#endif	/* lint */
2798
2799
2800#if defined(lint)
2801
2802/* ARGSUSED */
2803void
2804get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
2805{}
2806
2807#else	/* lint */
2808
2809/*
2810 * Get the error state for the processor.
2811 * Note that this must not be used at TL>0
2812 */
2813	ENTRY(get_cpu_error_state)
2814#if defined(CHEETAH_PLUS)
2815	set	ASI_SHADOW_REG_VA, %o2
2816	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr reg
2817	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2818	ldxa	[%o2]ASI_AFAR, %o1		! shadow afar reg
2819	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2820	GET_CPU_IMPL(%o3)	! Only panther has AFSR_EXT registers
2821	cmp	%o3, PANTHER_IMPL
2822	bne,a	1f
2823	  stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]	! zero for non-PN
2824	set	ASI_AFSR_EXT_VA, %o2
2825	ldxa	[%o2]ASI_AFSR, %o1		! afsr_ext reg
2826	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2827	set	ASI_SHADOW_AFSR_EXT_VA, %o2
2828	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr_ext reg
2829	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2830	b	2f
2831	  nop
28321:
2833	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
28342:
2835#else	/* CHEETAH_PLUS */
2836	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2837	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2838	stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2839	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2840#endif	/* CHEETAH_PLUS */
2841#if defined(SERRANO)
2842	/*
2843	 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2844	 * We save this in the afar2 of the register save area.
2845	 */
2846	set	ASI_MCU_AFAR2_VA, %o2
2847	ldxa	[%o2]ASI_MCU_CTRL, %o1
2848	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2849#endif	/* SERRANO */
2850	ldxa	[%g0]ASI_AFSR, %o1		! primary afsr reg
2851	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR]
2852	ldxa	[%g0]ASI_AFAR, %o1		! primary afar reg
2853	retl
2854	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR]
2855	SET_SIZE(get_cpu_error_state)
2856#endif	/* lint */
2857
2858#if defined(lint)
2859
2860/*
2861 * Check a page of memory for errors.
2862 *
2863 * Load each 64 byte block from physical memory.
2864 * Check AFSR after each load to see if an error
2865 * was caused. If so, log/scrub that error.
2866 *
2867 * Used to determine if a page contains
2868 * CEs when CEEN is disabled.
2869 */
2870/*ARGSUSED*/
2871void
2872cpu_check_block(caddr_t va, uint_t psz)
2873{}
2874
2875#else	/* lint */
2876
2877	ENTRY(cpu_check_block)
2878	!
2879	! get a new window with room for the error regs
2880	!
2881	save	%sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2882	srl	%i1, 6, %l4		! clear top bits of psz
2883					! and divide by 64
2884	rd	%fprs, %l2		! store FP
2885	wr	%g0, FPRS_FEF, %fprs	! enable FP
28861:
2887	ldda	[%i0]ASI_BLK_P, %d0	! load a block
2888	membar	#Sync
2889	ldxa    [%g0]ASI_AFSR, %l3	! read afsr reg
2890	brz,a,pt %l3, 2f		! check for error
2891	nop
2892
2893	!
2894	! if error, read the error regs and log it
2895	!
2896	call	get_cpu_error_state
2897	add	%fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2898
2899	!
2900	! cpu_ce_detected(ch_cpu_errors_t *, flag)
2901	!
2902	call	cpu_ce_detected		! log the error
2903	mov	CE_CEEN_TIMEOUT, %o1
29042:
2905	dec	%l4			! next 64-byte block
2906	brnz,a,pt  %l4, 1b
2907	add	%i0, 64, %i0		! increment block addr
2908
2909	wr	%l2, %g0, %fprs		! restore FP
2910	ret
2911	restore
2912
2913	SET_SIZE(cpu_check_block)
2914
2915#endif	/* lint */
2916
2917#if defined(lint)
2918
2919/*
2920 * Perform a cpu logout called from C.  This is used where we did not trap
2921 * for the error but still want to gather "what we can".  Caller must make
2922 * sure cpu private area exists and that the indicated logout area is free
2923 * for use, and that we are unable to migrate cpus.
2924 */
2925/*ARGSUSED*/
2926void
2927cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
2928{ }
2929
2930#else
2931	ENTRY(cpu_delayed_logout)
2932	rdpr	%pstate, %o2
2933	andn	%o2, PSTATE_IE, %o2
2934	wrpr	%g0, %o2, %pstate		! disable interrupts
2935	PARK_SIBLING_CORE(%o2, %o3, %o4)	! %o2 has DCU value
2936	add	%o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2937	rd	%asi, %g1
2938	wr	%g0, ASI_P, %asi
2939	GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2940	wr	%g1, %asi
2941	UNPARK_SIBLING_CORE(%o2, %o3, %o4)	! can use %o2 again
2942	rdpr	%pstate, %o2
2943	or	%o2, PSTATE_IE, %o2
2944	wrpr	%g0, %o2, %pstate
2945	retl
2946	  nop
2947	SET_SIZE(cpu_delayed_logout)
2948
2949#endif	/* lint */
2950
2951#if defined(lint)
2952
2953/*ARGSUSED*/
2954int
2955dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
2956{ return (0); }
2957
2958#else
2959
2960	ENTRY(dtrace_blksuword32)
2961	save	%sp, -SA(MINFRAME + 4), %sp
2962
2963	rdpr	%pstate, %l1
2964	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
2965	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
2966
2967	rd	%fprs, %l0
2968	andcc	%l0, FPRS_FEF, %g0
2969	bz,a,pt	%xcc, 1f			! if the fpu is disabled
2970	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
2971
2972	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
29731:
2974	set	0f, %l5
2975        /*
2976         * We're about to write a block full or either total garbage
2977         * (not kernel data, don't worry) or user floating-point data
2978         * (so it only _looks_ like garbage).
2979         */
2980	ld	[%i1], %f0			! modify the block
2981	membar	#Sync
2982	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
2983	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
2984	membar	#Sync
2985	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2986
2987	bz,a,pt	%xcc, 1f
2988	wr	%g0, %l0, %fprs			! restore %fprs
2989
2990	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29911:
2992
2993	wrpr	%g0, %l1, %pstate		! restore interrupts
2994
2995	ret
2996	restore	%g0, %g0, %o0
2997
29980:
2999	membar	#Sync
3000	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
3001
3002	bz,a,pt	%xcc, 1f
3003	wr	%g0, %l0, %fprs			! restore %fprs
3004
3005	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
30061:
3007
3008	wrpr	%g0, %l1, %pstate		! restore interrupts
3009
3010	/*
3011	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
3012	 * which deals with watchpoints. Otherwise, just return -1.
3013	 */
3014	brnz,pt	%i2, 1f
3015	nop
3016	ret
3017	restore	%g0, -1, %o0
30181:
3019	call	dtrace_blksuword32_err
3020	restore
3021
3022	SET_SIZE(dtrace_blksuword32)
3023
3024#endif /* lint */
3025
3026#ifdef	CHEETAHPLUS_ERRATUM_25
3027
3028#if	defined(lint)
3029/*
3030 * Claim a chunk of physical address space.
3031 */
3032/*ARGSUSED*/
3033void
3034claimlines(uint64_t pa, size_t sz, int stride)
3035{}
3036#else	/* lint */
3037	ENTRY(claimlines)
30381:
3039	subcc	%o1, %o2, %o1
3040	add	%o0, %o1, %o3
3041	bgeu,a,pt	%xcc, 1b
3042	casxa	[%o3]ASI_MEM, %g0, %g0
3043	membar  #Sync
3044	retl
3045	nop
3046	SET_SIZE(claimlines)
3047#endif	/* lint */
3048
3049#if	defined(lint)
3050/*
3051 * CPU feature initialization,
3052 * turn BPE off,
3053 * get device id.
3054 */
3055/*ARGSUSED*/
3056void
3057cpu_feature_init(void)
3058{}
3059#else	/* lint */
3060	ENTRY(cpu_feature_init)
3061	save	%sp, -SA(MINFRAME), %sp
3062	sethi	%hi(cheetah_bpe_off), %o0
3063	ld	[%o0 + %lo(cheetah_bpe_off)], %o0
3064	brz	%o0, 1f
3065	nop
3066	rd	ASR_DISPATCH_CONTROL, %o0
3067	andn	%o0, ASR_DISPATCH_CONTROL_BPE, %o0
3068	wr	%o0, 0, ASR_DISPATCH_CONTROL
30691:
3070	!
3071	! get the device_id and store the device_id
3072	! in the appropriate cpunodes structure
3073	! given the cpus index
3074	!
3075	CPU_INDEX(%o0, %o1)
3076	mulx %o0, CPU_NODE_SIZE, %o0
3077	set  cpunodes + DEVICE_ID, %o1
3078	ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
3079	stx  %o2, [%o0 + %o1]
3080#ifdef	CHEETAHPLUS_ERRATUM_34
3081	!
3082	! apply Cheetah+ erratum 34 workaround
3083	!
3084	call itlb_erratum34_fixup
3085	  nop
3086	call dtlb_erratum34_fixup
3087	  nop
3088#endif	/* CHEETAHPLUS_ERRATUM_34 */
3089	ret
3090	  restore
3091	SET_SIZE(cpu_feature_init)
3092#endif	/* lint */
3093
3094#if	defined(lint)
3095/*
3096 * Copy a tsb entry atomically, from src to dest.
3097 * src must be 128 bit aligned.
3098 */
3099/*ARGSUSED*/
3100void
3101copy_tsb_entry(uintptr_t src, uintptr_t dest)
3102{}
3103#else	/* lint */
3104	ENTRY(copy_tsb_entry)
3105	ldda	[%o0]ASI_NQUAD_LD, %o2		! %o2 = tag, %o3 = data
3106	stx	%o2, [%o1]
3107	stx	%o3, [%o1 + 8 ]
3108	retl
3109	nop
3110	SET_SIZE(copy_tsb_entry)
3111#endif	/* lint */
3112
3113#endif	/* CHEETAHPLUS_ERRATUM_25 */
3114
3115#ifdef	CHEETAHPLUS_ERRATUM_34
3116
3117#if	defined(lint)
3118
3119/*ARGSUSED*/
3120void
3121itlb_erratum34_fixup(void)
3122{}
3123
3124#else	/* lint */
3125
3126	!
3127	! In Cheetah+ erratum 34, under certain conditions an ITLB locked
3128	! index 0 TTE will erroneously be displaced when a new TTE is
3129	! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
3130	! locked index 0 TTEs must be relocated.
3131	!
3132	! NOTE: Care must be taken to avoid an ITLB miss in this routine.
3133	!
3134	ENTRY_NP(itlb_erratum34_fixup)
3135	rdpr	%pstate, %o3
3136#ifdef DEBUG
3137	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
3138#endif /* DEBUG */
3139	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3140	ldxa	[%g0]ASI_ITLB_ACCESS, %o1	! %o1 = entry 0 data
3141	ldxa	[%g0]ASI_ITLB_TAGREAD, %o2	! %o2 = entry 0 tag
3142
3143	cmp	%o1, %g0			! Is this entry valid?
3144	bge	%xcc, 1f
3145	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3146	bnz	%icc, 2f
3147	  nop
31481:
3149	retl					! Nope, outta here...
3150	  wrpr	%g0, %o3, %pstate		! Enable interrupts
31512:
3152	sethi	%hi(FLUSH_ADDR), %o4
3153	stxa	%g0, [%o2]ASI_ITLB_DEMAP	! Flush this mapping
3154	flush	%o4				! Flush required for I-MMU
3155	!
3156	! Start search from index 1 up.  This is because the kernel force
3157	! loads its text page at index 15 in sfmmu_kernel_remap() and we
3158	! don't want our relocated entry evicted later.
3159	!
3160	! NOTE: We assume that we'll be successful in finding an unlocked
3161	! or invalid entry.  If that isn't the case there are bound to
3162	! bigger problems.
3163	!
3164	set	(1 << 3), %g3
31653:
3166	ldxa	[%g3]ASI_ITLB_ACCESS, %o4	! Load TTE from t16
3167	!
3168	! If this entry isn't valid, we'll choose to displace it (regardless
3169	! of the lock bit).
3170	!
3171	cmp	%o4, %g0			! TTE is > 0 iff not valid
3172	bge	%xcc, 4f			! If invalid, go displace
3173	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3174	bnz,a	%icc, 3b			! If locked, look at next
3175	  add	%g3, (1 << 3), %g3		!  entry
31764:
3177	!
3178	! We found an unlocked or invalid entry; we'll explicitly load
3179	! the former index 0 entry here.
3180	!
3181	sethi	%hi(FLUSH_ADDR), %o4
3182	set	MMU_TAG_ACCESS, %g4
3183	stxa	%o2, [%g4]ASI_IMMU
3184	stxa	%o1, [%g3]ASI_ITLB_ACCESS
3185	flush	%o4				! Flush required for I-MMU
3186	retl
3187	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3188	SET_SIZE(itlb_erratum34_fixup)
3189
3190#endif	/* lint */
3191
3192#if	defined(lint)
3193
3194/*ARGSUSED*/
3195void
3196dtlb_erratum34_fixup(void)
3197{}
3198
3199#else	/* lint */
3200
3201	!
3202	! In Cheetah+ erratum 34, under certain conditions a DTLB locked
3203	! index 0 TTE will erroneously be displaced when a new TTE is
3204	! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
3205	! TTEs must be relocated.
3206	!
3207	ENTRY_NP(dtlb_erratum34_fixup)
3208	rdpr	%pstate, %o3
3209#ifdef DEBUG
3210	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
3211#endif /* DEBUG */
3212	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3213	ldxa	[%g0]ASI_DTLB_ACCESS, %o1	! %o1 = entry 0 data
3214	ldxa	[%g0]ASI_DTLB_TAGREAD, %o2	! %o2 = entry 0 tag
3215
3216	cmp	%o1, %g0			! Is this entry valid?
3217	bge	%xcc, 1f
3218	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3219	bnz	%icc, 2f
3220	  nop
32211:
3222	retl					! Nope, outta here...
3223	  wrpr	%g0, %o3, %pstate		! Enable interrupts
32242:
3225	stxa	%g0, [%o2]ASI_DTLB_DEMAP	! Flush this mapping
3226	membar	#Sync
3227	!
3228	! Start search from index 1 up.
3229	!
3230	! NOTE: We assume that we'll be successful in finding an unlocked
3231	! or invalid entry.  If that isn't the case there are bound to
3232	! bigger problems.
3233	!
3234	set	(1 << 3), %g3
32353:
3236	ldxa	[%g3]ASI_DTLB_ACCESS, %o4	! Load TTE from t16
3237	!
3238	! If this entry isn't valid, we'll choose to displace it (regardless
3239	! of the lock bit).
3240	!
3241	cmp	%o4, %g0			! TTE is > 0 iff not valid
3242	bge	%xcc, 4f			! If invalid, go displace
3243	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3244	bnz,a	%icc, 3b			! If locked, look at next
3245	  add	%g3, (1 << 3), %g3		!  entry
32464:
3247	!
3248	! We found an unlocked or invalid entry; we'll explicitly load
3249	! the former index 0 entry here.
3250	!
3251	set	MMU_TAG_ACCESS, %g4
3252	stxa	%o2, [%g4]ASI_DMMU
3253	stxa	%o1, [%g3]ASI_DTLB_ACCESS
3254	membar	#Sync
3255	retl
3256	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3257	SET_SIZE(dtlb_erratum34_fixup)
3258
3259#endif	/* lint */
3260
3261#endif	/* CHEETAHPLUS_ERRATUM_34 */
3262
3263