xref: /titanic_52/usr/src/uts/sun4u/cpu/us3_common_asm.s (revision 1fac5a6088d9f8a16d0a302d57227a80031f002d)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Assembly code support for Cheetah/Cheetah+ modules
27 */
28
29#pragma ident	"%Z%%M%	%I%	%E% SMI"
30
31#if !defined(lint)
32#include "assym.h"
33#endif	/* !lint */
34
35#include <sys/asm_linkage.h>
36#include <sys/mmu.h>
37#include <vm/hat_sfmmu.h>
38#include <sys/machparam.h>
39#include <sys/machcpuvar.h>
40#include <sys/machthread.h>
41#include <sys/machtrap.h>
42#include <sys/privregs.h>
43#include <sys/trap.h>
44#include <sys/cheetahregs.h>
45#include <sys/us3_module.h>
46#include <sys/xc_impl.h>
47#include <sys/intreg.h>
48#include <sys/async.h>
49#include <sys/clock.h>
50#include <sys/cheetahasm.h>
51#include <sys/cmpregs.h>
52
53#ifdef TRAPTRACE
54#include <sys/traptrace.h>
55#endif /* TRAPTRACE */
56
57#if !defined(lint)
58
59/* BEGIN CSTYLED */
60
61#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
62	ldxa	[%g0]ASI_DCU, tmp1					;\
63	btst	DCU_DC, tmp1		/* is dcache enabled? */	;\
64	bz,pn	%icc, 1f						;\
65	ASM_LD(tmp1, dcache_linesize)					;\
66	ASM_LD(tmp2, dflush_type)					;\
67	cmp	tmp2, FLUSHPAGE_TYPE					;\
68	be,pt	%icc, 2f						;\
69	nop								;\
70	sllx	arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */	;\
71	ASM_LD(tmp3, dcache_size)					;\
72	cmp	tmp2, FLUSHMATCH_TYPE					;\
73	be,pt	%icc, 3f						;\
74	nop								;\
75	/*								\
76	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
77	 * tmp3 = cache size						\
78	 * tmp1 = cache line size					\
79	 */								\
80	sub	tmp3, tmp1, tmp2					;\
814:									\
82	stxa	%g0, [tmp2]ASI_DC_TAG					;\
83	membar	#Sync							;\
84	cmp	%g0, tmp2						;\
85	bne,pt	%icc, 4b						;\
86	sub	tmp2, tmp1, tmp2					;\
87	ba,pt	%icc, 1f						;\
88	nop								;\
89	/*								\
90	 * flushtype = FLUSHPAGE_TYPE					\
91	 * arg1 = pfn							\
92	 * arg2 = virtual color						\
93	 * tmp1 = cache line size					\
94	 * tmp2 = tag from cache					\
95	 * tmp3 = counter						\
96	 */								\
972:									\
98	set	MMU_PAGESIZE, tmp3					;\
99        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA	   */   ;\
100	sub	tmp3, tmp1, tmp3					;\
1014:									\
102	stxa	%g0, [arg1 + tmp3]ASI_DC_INVAL				;\
103	membar	#Sync							;\
1045:									\
105	cmp	%g0, tmp3						;\
106	bnz,pt	%icc, 4b		/* branch if not done */	;\
107	sub	tmp3, tmp1, tmp3					;\
108	ba,pt	%icc, 1f						;\
109	nop								;\
110	/*								\
111	 * flushtype = FLUSHMATCH_TYPE					\
112	 * arg1 = tag to compare against				\
113	 * tmp1 = cache line size					\
114	 * tmp3 = cache size						\
115	 * arg2 = counter						\
116	 * tmp2 = cache tag						\
117	 */								\
1183:									\
119	sub	tmp3, tmp1, arg2					;\
1204:									\
121	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
122	btst	CHEETAH_DC_VBIT_MASK, tmp2				;\
123	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
124	andn	tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */	;\
125	cmp	tmp2, arg1						;\
126	bne,pn	%icc, 5f		/* branch if tag miss */	;\
127	nop								;\
128	stxa	%g0, [arg2]ASI_DC_TAG					;\
129	membar	#Sync							;\
1305:									\
131	cmp	%g0, arg2						;\
132	bne,pt	%icc, 4b		/* branch if not done */	;\
133	sub	arg2, tmp1, arg2					;\
1341:
135
136
137/* END CSTYLED */
138
139#endif	/* !lint */
140
141/*
142 * Cheetah MMU and Cache operations.
143 */
144
145#if defined(lint)
146
147/* ARGSUSED */
148void
149vtag_flushpage(caddr_t vaddr, u_int ctxnum)
150{}
151
152#else	/* lint */
153
154	ENTRY_NP(vtag_flushpage)
155	/*
156	 * flush page from the tlb
157	 *
158	 * %o0 = vaddr
159	 * %o1 = ctxnum
160	 */
161	rdpr	%pstate, %o5
162#ifdef DEBUG
163	andcc	%o5, PSTATE_IE, %g0		/* if interrupts already */
164	bnz,a,pt %icc, 3f			/* disabled, panic	 */
165	  nop
166	save	%sp, -SA(MINFRAME), %sp
167	sethi	%hi(sfmmu_panic1), %o0
168	call	panic
169	  or	%o0, %lo(sfmmu_panic1), %o0
170	ret
171	restore
1723:
173#endif /* DEBUG */
174	/*
175	 * disable ints
176	 */
177	andn	%o5, PSTATE_IE, %o4
178	wrpr	%o4, 0, %pstate
179
180	/*
181	 * Then, blow out the tlb
182	 * Interrupts are disabled to prevent the primary ctx register
183	 * from changing underneath us.
184	 */
185	brnz,pt	%o1, 1f			/* KCONTEXT */
186	sethi	%hi(FLUSH_ADDR), %o3
187	/*
188	 * For KCONTEXT demaps use primary. type = page implicitly
189	 */
190	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
191	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
192	flush	%o3
193	b	5f
194	nop
1951:
196	/*
197	 * User demap.  We need to set the primary context properly.
198	 * Secondary context cannot be used for Cheetah IMMU.
199	 * %o0 = vaddr
200	 * %o1 = ctxnum
201	 * %o3 = FLUSH_ADDR
202	 */
203	sethi	%hi(ctx_pgsz_array), %o4
204	ldn     [%o4 + %lo(ctx_pgsz_array)], %o4
205	brz	%o4, 2f
206	nop
207	ldub	[%o4 + %o1], %o4
208	sll	%o4, CTXREG_EXT_SHIFT, %o4
209	or	%o1, %o4, %o1
2102:
211	wrpr	%g0, 1, %tl
212	set	MMU_PCONTEXT, %o4
213	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
214	ldxa	[%o4]ASI_DMMU, %o2		/* rd old ctxnum */
215	stxa	%o1, [%o4]ASI_DMMU		/* wr new ctxum */
2164:
217	stxa	%g0, [%o0]ASI_DTLB_DEMAP
218	stxa	%g0, [%o0]ASI_ITLB_DEMAP
219	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
220	flush	%o3
221	wrpr	%g0, 0, %tl
2225:
223	retl
224	wrpr	%g0, %o5, %pstate		/* enable interrupts */
225	SET_SIZE(vtag_flushpage)
226
227#endif	/* lint */
228
229
230#if defined(lint)
231
232/* ARGSUSED */
233void
234vtag_flushctx(u_int ctxnum)
235{}
236
237#else	/* lint */
238
239	ENTRY_NP(vtag_flushctx)
240	/*
241	 * flush context from the tlb
242	 *
243	 * %o0 = ctxnum
244	 * We disable interrupts to prevent the primary ctx register changing
245	 * underneath us.
246	 */
247	sethi	%hi(FLUSH_ADDR), %o3
248	rdpr	%pstate, %o2
249
250#ifdef DEBUG
251	andcc	%o2, PSTATE_IE, %g0		/* if interrupts already */
252	bnz,a,pt %icc, 1f			/* disabled, panic	 */
253	  nop
254	sethi	%hi(sfmmu_panic1), %o0
255	call	panic
256	  or	%o0, %lo(sfmmu_panic1), %o0
2571:
258#endif /* DEBUG */
259
260	sethi	%hi(ctx_pgsz_array), %o4
261	ldn     [%o4 + %lo(ctx_pgsz_array)], %o4
262	brz	%o4, 2f
263	nop
264	ldub	[%o4 + %o0], %o4
265	sll	%o4, CTXREG_EXT_SHIFT, %o4
266	or	%o0, %o4, %o0
2672:
268	wrpr	%o2, PSTATE_IE, %pstate		/* disable interrupts */
269	set	MMU_PCONTEXT, %o4
270	set	DEMAP_CTX_TYPE | DEMAP_PRIMARY, %g1
271	wrpr	%g0, 1, %tl
272	ldxa	[%o4]ASI_DMMU, %o5		/* rd old ctxnum */
273	stxa	%o0, [%o4]ASI_DMMU		/* wr new ctxum */
2744:
275	stxa	%g0, [%g1]ASI_DTLB_DEMAP
276	stxa	%g0, [%g1]ASI_ITLB_DEMAP
277	stxa	%o5, [%o4]ASI_DMMU		/* restore old ctxnum */
278	flush	%o3
279	wrpr	%g0, 0, %tl
2805:
281	retl
282	wrpr	%g0, %o2, %pstate		/* enable interrupts */
283	SET_SIZE(vtag_flushctx)
284
285#endif	/* lint */
286
287
288#if defined(lint)
289
290void
291vtag_flushall(void)
292{}
293
294#else	/* lint */
295
296	ENTRY_NP2(vtag_flushall, demap_all)
297	/*
298	 * flush the tlb
299	 */
300	sethi	%hi(FLUSH_ADDR), %o3
301	set	DEMAP_ALL_TYPE, %g1
302	stxa	%g0, [%g1]ASI_DTLB_DEMAP
303	stxa	%g0, [%g1]ASI_ITLB_DEMAP
304	flush	%o3
305	retl
306	nop
307	SET_SIZE(demap_all)
308	SET_SIZE(vtag_flushall)
309
310#endif	/* lint */
311
312
313#if defined(lint)
314
315/* ARGSUSED */
316void
317vtag_flushpage_tl1(uint64_t vaddr, uint64_t ctxnum)
318{}
319
320#else	/* lint */
321
322	ENTRY_NP(vtag_flushpage_tl1)
323	/*
324	 * x-trap to flush page from tlb and tsb
325	 *
326	 * %g1 = vaddr, zero-extended on 32-bit kernel
327	 * %g2 = ctxnum
328	 *
329	 * assumes TSBE_TAG = 0
330	 */
331	srln	%g1, MMU_PAGESHIFT, %g1
332	brnz,pt	%g2, 1f				/* KCONTEXT */
333	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
334
335	/* We need to demap in the kernel context */
336	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
337	stxa	%g0, [%g1]ASI_DTLB_DEMAP
338	stxa	%g0, [%g1]ASI_ITLB_DEMAP
339	retry
3401:
341	/* We need to demap in a user context */
342	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
343	sethi	%hi(ctx_pgsz_array), %g4
344	ldn     [%g4 + %lo(ctx_pgsz_array)], %g4
345	brz	%g4, 2f
346	nop
347	ldub	[%g4 + %g2], %g4
348	sll	%g4, CTXREG_EXT_SHIFT, %g4
349	or	%g2, %g4, %g2
3502:
351	set	MMU_PCONTEXT, %g4
352	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
353	stxa	%g2, [%g4]ASI_DMMU		/* wr new ctxum */
354	stxa	%g0, [%g1]ASI_DTLB_DEMAP
355	stxa	%g0, [%g1]ASI_ITLB_DEMAP
356	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
357	retry
358	SET_SIZE(vtag_flushpage_tl1)
359
360#endif	/* lint */
361
362
363#if defined(lint)
364
365/* ARGSUSED */
366void
367vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t ctx_pgcnt)
368{}
369
370#else	/* lint */
371
372	ENTRY_NP(vtag_flush_pgcnt_tl1)
373	/*
374	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
375	 *
376	 * %g1 = vaddr, zero-extended on 32-bit kernel
377	 * %g2 = <zero32|ctx16|pgcnt16>
378	 *
379	 * NOTE: this handler relies on the fact that no
380	 *	interrupts or traps can occur during the loop
381	 *	issuing the TLB_DEMAP operations. It is assumed
382	 *	that interrupts are disabled and this code is
383	 *	fetching from the kernel locked text address.
384	 *
385	 * assumes TSBE_TAG = 0
386	 */
387	set	0xffff, %g4
388	and	%g4, %g2, %g3			/* g3 = pgcnt */
389	srln	%g2, 16, %g2			/* g2 = ctxnum */
390	srln	%g1, MMU_PAGESHIFT, %g1
391	brnz,pt	%g2, 1f				/* KCONTEXT? */
392	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
393
394	/* We need to demap in the kernel context */
395	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
396	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
3974:
398	stxa	%g0, [%g1]ASI_DTLB_DEMAP
399	stxa	%g0, [%g1]ASI_ITLB_DEMAP
400	deccc	%g3				/* decr pgcnt */
401	bnz,pt	%icc,4b
402	  add	%g1, %g2, %g1			/* next page */
403	retry
4041:
405	/* We need to demap in a user context */
406	sethi	%hi(ctx_pgsz_array), %g4
407	ldn     [%g4 + %lo(ctx_pgsz_array)], %g4
408	brz	%g4, 2f
409	  or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
410	ldub	[%g4 + %g2], %g4
411	sll	%g4, CTXREG_EXT_SHIFT, %g4
412	or	%g2, %g4, %g2
4132:
414	set	MMU_PCONTEXT, %g4
415	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
416	stxa	%g2, [%g4]ASI_DMMU		/* wr new ctxum */
417
418	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
4193:
420	stxa	%g0, [%g1]ASI_DTLB_DEMAP
421	stxa	%g0, [%g1]ASI_ITLB_DEMAP
422	deccc	%g3				/* decr pgcnt */
423	bnz,pt	%icc,3b
424	  add	%g1, %g2, %g1			/* next page */
425
426	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
427	retry
428	SET_SIZE(vtag_flush_pgcnt_tl1)
429
430#endif	/* lint */
431
432
433#if defined(lint)
434
435/* ARGSUSED */
436void
437vtag_flushctx_tl1(uint64_t ctxnum, uint64_t dummy)
438{}
439
440#else	/* lint */
441
442	ENTRY_NP(vtag_flushctx_tl1)
443	/*
444	 * x-trap to flush context from tlb
445	 *
446	 * %g1 = ctxnum
447	 */
448	sethi	%hi(ctx_pgsz_array), %g4
449	ldn     [%g4 + %lo(ctx_pgsz_array)], %g4
450	brz	%g4, 2f
451	nop
452	ldub	[%g4 + %g1], %g4
453	sll	%g4, CTXREG_EXT_SHIFT, %g4
454	or	%g1, %g4, %g1
4552:
456	set	DEMAP_CTX_TYPE | DEMAP_PRIMARY, %g4
457	set	MMU_PCONTEXT, %g3
458	ldxa	[%g3]ASI_DMMU, %g5		/* rd old ctxnum */
459	stxa	%g1, [%g3]ASI_DMMU		/* wr new ctxum */
460	stxa	%g0, [%g4]ASI_DTLB_DEMAP
461	stxa	%g0, [%g4]ASI_ITLB_DEMAP
462	stxa	%g5, [%g3]ASI_DMMU		/* restore old ctxnum */
463	retry
464	SET_SIZE(vtag_flushctx_tl1)
465
466#endif	/* lint */
467
468
469#if defined(lint)
470
471/*ARGSUSED*/
472void
473vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
474{}
475
476#else	/* lint */
477
478	ENTRY_NP(vtag_flushall_tl1)
479	/*
480	 * x-trap to flush tlb
481	 */
482	set	DEMAP_ALL_TYPE, %g4
483	stxa	%g0, [%g4]ASI_DTLB_DEMAP
484	stxa	%g0, [%g4]ASI_ITLB_DEMAP
485	retry
486	SET_SIZE(vtag_flushall_tl1)
487
488#endif	/* lint */
489
490
491#if defined(lint)
492
493/* ARGSUSED */
494void
495vac_flushpage(pfn_t pfnum, int vcolor)
496{}
497
498#else	/* lint */
499
500/*
501 * vac_flushpage(pfnum, color)
502 *	Flush 1 8k page of the D-$ with physical page = pfnum
503 *	Algorithm:
504 *		The cheetah dcache is a 64k psuedo 4 way accaociative cache.
505 *		It is virtual indexed, physically tagged cache.
506 */
507	.seg	".data"
508	.align	8
509	.global	dflush_type
510dflush_type:
511	.word	FLUSHPAGE_TYPE
512
513	ENTRY(vac_flushpage)
514	/*
515	 * flush page from the d$
516	 *
517	 * %o0 = pfnum, %o1 = color
518	 */
519	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
520	retl
521	  nop
522	SET_SIZE(vac_flushpage)
523
524#endif	/* lint */
525
526
527#if defined(lint)
528
529/* ARGSUSED */
530void
531vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
532{}
533
534#else	/* lint */
535
536	ENTRY_NP(vac_flushpage_tl1)
537	/*
538	 * x-trap to flush page from the d$
539	 *
540	 * %g1 = pfnum, %g2 = color
541	 */
542	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
543	retry
544	SET_SIZE(vac_flushpage_tl1)
545
546#endif	/* lint */
547
548
549#if defined(lint)
550
551/* ARGSUSED */
552void
553vac_flushcolor(int vcolor, pfn_t pfnum)
554{}
555
556#else	/* lint */
557	/*
558	 * In UltraSPARC III flushcolor is same as as flushpage.
559	 * This is because we have an ASI to flush dcache using physical
560	 * address.
561	 * Flushing dcache using physical address is faster because we
562	 * don't have to deal with associativity of dcache.
563	 * The arguments to vac_flushpage() and vac_flushcolor() are same but
564	 * the order is reversed. this is because we maintain compatibility
565	 * with spitfire, in which vac_flushcolor has only one argument, namely
566	 * vcolor.
567	 */
568
569	ENTRY(vac_flushcolor)
570	/*
571	 * %o0 = vcolor, %o1 = pfnum
572	 */
573	DCACHE_FLUSHPAGE(%o1, %o0, %o2, %o3, %o4)
574	retl
575	  nop
576	SET_SIZE(vac_flushcolor)
577
578#endif	/* lint */
579
580
581#if defined(lint)
582
583/* ARGSUSED */
584void
585vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
586{}
587
588#else	/* lint */
589
590	ENTRY(vac_flushcolor_tl1)
591	/*
592	 * %g1 = vcolor
593	 * %g2 = pfnum
594	 */
595	DCACHE_FLUSHPAGE(%g2, %g1, %g3, %g4, %g5)
596	retry
597	SET_SIZE(vac_flushcolor_tl1)
598
599#endif	/* lint */
600
601#if defined(lint)
602
603int
604idsr_busy(void)
605{
606	return (0);
607}
608
609#else	/* lint */
610
611/*
612 * Determine whether or not the IDSR is busy.
613 * Entry: no arguments
614 * Returns: 1 if busy, 0 otherwise
615 */
616	ENTRY(idsr_busy)
617	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
618	clr	%o0
619	btst	IDSR_BUSY, %g1
620	bz,a,pt	%xcc, 1f
621	mov	1, %o0
6221:
623	retl
624	nop
625	SET_SIZE(idsr_busy)
626
627#endif	/* lint */
628
629#if defined(lint)
630
631/* ARGSUSED */
632void
633init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
634{}
635
636/* ARGSUSED */
637void
638init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
639{}
640
641#else	/* lint */
642
643	.global _dispatch_status_busy
644_dispatch_status_busy:
645	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
646	.align	4
647
648/*
649 * Setup interrupt dispatch data registers
650 * Entry:
651 *	%o0 - function or inumber to call
652 *	%o1, %o2 - arguments (2 uint64_t's)
653 */
654	.seg "text"
655
656	ENTRY(init_mondo)
657#ifdef DEBUG
658	!
659	! IDSR should not be busy at the moment
660	!
661	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
662	btst	IDSR_BUSY, %g1
663	bz,pt	%xcc, 1f
664	nop
665	sethi	%hi(_dispatch_status_busy), %o0
666	call	panic
667	or	%o0, %lo(_dispatch_status_busy), %o0
668#endif /* DEBUG */
669
670	ALTENTRY(init_mondo_nocheck)
671	!
672	! interrupt vector dispatch data reg 0
673	!
6741:
675	mov	IDDR_0, %g1
676	mov	IDDR_1, %g2
677	mov	IDDR_2, %g3
678	stxa	%o0, [%g1]ASI_INTR_DISPATCH
679
680	!
681	! interrupt vector dispatch data reg 1
682	!
683	stxa	%o1, [%g2]ASI_INTR_DISPATCH
684
685	!
686	! interrupt vector dispatch data reg 2
687	!
688	stxa	%o2, [%g3]ASI_INTR_DISPATCH
689
690	membar	#Sync
691	retl
692	nop
693	SET_SIZE(init_mondo_nocheck)
694	SET_SIZE(init_mondo)
695
696#endif	/* lint */
697
698
699#if !(defined(JALAPENO) || defined(SERRANO))
700
701#if defined(lint)
702
703/* ARGSUSED */
704void
705shipit(int upaid, int bn)
706{ return; }
707
708#else	/* lint */
709
710/*
711 * Ship mondo to aid using busy/nack pair bn
712 */
713	ENTRY_NP(shipit)
714	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = agent id
715	sll	%o1, IDCR_BN_SHIFT, %g2		! IDCR<28:24> = b/n pair
716	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
717	or	%g1, %g2, %g1
718	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
719	membar	#Sync
720	retl
721	nop
722	SET_SIZE(shipit)
723
724#endif	/* lint */
725
726#endif	/* !(JALAPENO || SERRANO) */
727
728
729#if defined(lint)
730
731/* ARGSUSED */
732void
733flush_instr_mem(caddr_t vaddr, size_t len)
734{}
735
736#else	/* lint */
737
738/*
739 * flush_instr_mem:
740 *	Flush 1 page of the I-$ starting at vaddr
741 * 	%o0 vaddr
742 *	%o1 bytes to be flushed
743 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
744 * the stores from all processors so that a FLUSH instruction is only needed
745 * to ensure pipeline is consistent. This means a single flush is sufficient at
746 * the end of a sequence of stores that updates the instruction stream to
747 * ensure correct operation.
748 */
749
750	ENTRY(flush_instr_mem)
751	flush	%o0			! address irrelevent
752	retl
753	nop
754	SET_SIZE(flush_instr_mem)
755
756#endif	/* lint */
757
758
759#if defined(CPU_IMP_ECACHE_ASSOC)
760
761#if defined(lint)
762
763/* ARGSUSED */
764uint64_t
765get_ecache_ctrl(void)
766{ return (0); }
767
768#else	/* lint */
769
770	ENTRY(get_ecache_ctrl)
771	GET_CPU_IMPL(%o0)
772	cmp	%o0, JAGUAR_IMPL
773	!
774	! Putting an ASI access in the delay slot may
775	! cause it to be accessed, even when annulled.
776	!
777	bne	1f
778	  nop
779	ldxa	[%g0]ASI_EC_CFG_TIMING, %o0	! read Jaguar shared E$ ctrl reg
780	b	2f
781	  nop
7821:
783	ldxa	[%g0]ASI_EC_CTRL, %o0		! read Ch/Ch+ E$ control reg
7842:
785	retl
786	  nop
787	SET_SIZE(get_ecache_ctrl)
788
789#endif	/* lint */
790
791#endif	/* CPU_IMP_ECACHE_ASSOC */
792
793
794#if !(defined(JALAPENO) || defined(SERRANO))
795
796/*
797 * flush_ecache:
798 *	%o0 - 64 bit physical address
799 *	%o1 - ecache size
800 *	%o2 - ecache linesize
801 */
802#if defined(lint)
803
804/*ARGSUSED*/
805void
806flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
807{}
808
809#else /* !lint */
810
811	ENTRY(flush_ecache)
812
813	/*
814	 * For certain CPU implementations, we have to flush the L2 cache
815	 * before flushing the ecache.
816	 */
817	PN_L2_FLUSHALL(%g3, %g4, %g5)
818
819	/*
820	 * Flush the entire Ecache using displacement flush.
821	 */
822	ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
823
824	retl
825	nop
826	SET_SIZE(flush_ecache)
827
828#endif /* lint */
829
830#endif	/* !(JALAPENO || SERRANO) */
831
832
833#if defined(lint)
834
835void
836flush_dcache(void)
837{}
838
839#else	/* lint */
840
841	ENTRY(flush_dcache)
842	ASM_LD(%o0, dcache_size)
843	ASM_LD(%o1, dcache_linesize)
844	CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
845	retl
846	nop
847	SET_SIZE(flush_dcache)
848
849#endif	/* lint */
850
851
852#if defined(lint)
853
854void
855flush_icache(void)
856{}
857
858#else	/* lint */
859
860	ENTRY(flush_icache)
861	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
862	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
863	ba,pt	%icc, 2f
864	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
865flush_icache_1:
866	ASM_LD(%o0, icache_size)
867	ASM_LD(%o1, icache_linesize)
8682:
869	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
870	retl
871	nop
872	SET_SIZE(flush_icache)
873
874#endif	/* lint */
875
876#if defined(lint)
877
878/*ARGSUSED*/
879void
880kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
881    int icache_lsize)
882{
883}
884
885#else	/* lint */
886
887	ENTRY(kdi_flush_idcache)
888	CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
889	CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
890	membar	#Sync
891	retl
892	nop
893	SET_SIZE(kdi_flush_idcache)
894
895#endif	/* lint */
896
897#if defined(lint)
898
899void
900flush_pcache(void)
901{}
902
903#else	/* lint */
904
905	ENTRY(flush_pcache)
906	PCACHE_FLUSHALL(%o0, %o1, %o2)
907	retl
908	nop
909	SET_SIZE(flush_pcache)
910
911#endif	/* lint */
912
913
914#if defined(CPU_IMP_L1_CACHE_PARITY)
915
916#if defined(lint)
917
918/* ARGSUSED */
919void
920get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
921{}
922
923#else	/* lint */
924
925/*
926 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
927 * structure (see cheetahregs.h):
928 * The Dcache *should* be turned off when this code is executed.
929 */
930	.align	128
931	ENTRY(get_dcache_dtag)
932	rdpr	%pstate, %o5
933	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
934	wrpr	%g0, %o3, %pstate
935	b	1f
936	  stx	%o0, [%o1 + CH_DC_IDX]
937
938	.align	128
9391:
940	ldxa	[%o0]ASI_DC_TAG, %o2
941	stx	%o2, [%o1 + CH_DC_TAG]
942	membar	#Sync
943	ldxa	[%o0]ASI_DC_UTAG, %o2
944	membar	#Sync
945	stx	%o2, [%o1 + CH_DC_UTAG]
946	ldxa	[%o0]ASI_DC_SNP_TAG, %o2
947	stx	%o2, [%o1 + CH_DC_SNTAG]
948	add	%o1, CH_DC_DATA, %o1
949	clr	%o3
9502:
951	membar	#Sync				! required before ASI_DC_DATA
952	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
953	membar	#Sync				! required after ASI_DC_DATA
954	stx	%o2, [%o1 + %o3]
955	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
956	blt	2b
957	  add	%o3, 8, %o3
958
959	/*
960	 * Unlike other CPUs in the family, D$ data parity bits for Panther
961	 * do not reside in the microtag. Instead, we have to read them
962	 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
963	 * of just having 8 parity bits to protect all 32 bytes of data
964	 * per line, we now have 32 bits of parity.
965	 */
966	GET_CPU_IMPL(%o3)
967	cmp	%o3, PANTHER_IMPL
968	bne	4f
969	  clr	%o3
970
971	/*
972	 * move our pointer to the next field where we store parity bits
973	 * and add the offset of the last parity byte since we will be
974	 * storing all 4 parity bytes within one 64 bit field like this:
975	 *
976	 * +------+------------+------------+------------+------------+
977	 * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
978	 * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
979	 * +------+------------+------------+------------+------------+
980	 *  63:32     31:24        23:16         15:8          7:0
981	 */
982	add	%o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
983
984	/* add the DC_data_parity bit into our working index */
985	mov	1, %o2
986	sll	%o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
987	or	%o0, %o2, %o0
9883:
989	membar	#Sync				! required before ASI_DC_DATA
990	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
991	membar	#Sync				! required after ASI_DC_DATA
992	stb	%o2, [%o1]
993	dec	%o1
994	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
995	blt	3b
996	  add	%o3, 8, %o3
9974:
998	retl
999	  wrpr	%g0, %o5, %pstate
1000	SET_SIZE(get_dcache_dtag)
1001
1002#endif	/* lint */
1003
1004
1005#if defined(lint)
1006
1007/* ARGSUSED */
1008void
1009get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
1010{}
1011
1012#else	/* lint */
1013
1014/*
1015 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
1016 * structure (see cheetahregs.h):
1017 * The Icache *Must* be turned off when this function is called.
1018 * This is because diagnostic accesses to the Icache interfere with cache
1019 * consistency.
1020 */
1021	.align	128
1022	ENTRY(get_icache_dtag)
1023	rdpr	%pstate, %o5
1024	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
1025	wrpr	%g0, %o3, %pstate
1026
1027	stx	%o0, [%o1 + CH_IC_IDX]
1028	ldxa	[%o0]ASI_IC_TAG, %o2
1029	stx	%o2, [%o1 + CH_IC_PATAG]
1030	add	%o0, CH_ICTAG_UTAG, %o0
1031	ldxa	[%o0]ASI_IC_TAG, %o2
1032	add	%o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
1033	stx	%o2, [%o1 + CH_IC_UTAG]
1034	ldxa	[%o0]ASI_IC_TAG, %o2
1035	add	%o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
1036	stx	%o2, [%o1 + CH_IC_UPPER]
1037	ldxa	[%o0]ASI_IC_TAG, %o2
1038	andn	%o0, CH_ICTAG_TMASK, %o0
1039	stx	%o2, [%o1 + CH_IC_LOWER]
1040	ldxa	[%o0]ASI_IC_SNP_TAG, %o2
1041	stx	%o2, [%o1 + CH_IC_SNTAG]
1042	add	%o1, CH_IC_DATA, %o1
1043	clr	%o3
10442:
1045	ldxa	[%o0 + %o3]ASI_IC_DATA, %o2
1046	stx	%o2, [%o1 + %o3]
1047	cmp	%o3, PN_IC_DATA_REG_SIZE - 8
1048	blt	2b
1049	  add	%o3, 8, %o3
1050
1051	retl
1052	  wrpr	%g0, %o5, %pstate
1053	SET_SIZE(get_icache_dtag)
1054
1055#endif	/* lint */
1056
1057#if defined(lint)
1058
1059/* ARGSUSED */
1060void
1061get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
1062{}
1063
1064#else	/* lint */
1065
1066/*
1067 * Get pcache data and tags.
1068 * inputs:
1069 *   pcache_idx	- fully constructed VA for for accessing P$ diagnostic
1070 *		  registers. Contains PC_way and PC_addr shifted into
1071 *		  the correct bit positions. See the PRM for more details.
1072 *   data	- pointer to a ch_pc_data_t
1073 * structure (see cheetahregs.h):
1074 */
1075	.align	128
1076	ENTRY(get_pcache_dtag)
1077	rdpr	%pstate, %o5
1078	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
1079	wrpr	%g0, %o3, %pstate
1080
1081	stx	%o0, [%o1 + CH_PC_IDX]
1082	ldxa	[%o0]ASI_PC_STATUS_DATA, %o2
1083	stx	%o2, [%o1 + CH_PC_STATUS]
1084	ldxa	[%o0]ASI_PC_TAG, %o2
1085	stx	%o2, [%o1 + CH_PC_TAG]
1086	ldxa	[%o0]ASI_PC_SNP_TAG, %o2
1087	stx	%o2, [%o1 + CH_PC_SNTAG]
1088	add	%o1, CH_PC_DATA, %o1
1089	clr	%o3
10902:
1091	ldxa	[%o0 + %o3]ASI_PC_DATA, %o2
1092	stx	%o2, [%o1 + %o3]
1093	cmp	%o3, CH_PC_DATA_REG_SIZE - 8
1094	blt	2b
1095	  add	%o3, 8, %o3
1096
1097	retl
1098	  wrpr	%g0, %o5, %pstate
1099	SET_SIZE(get_pcache_dtag)
1100
1101#endif	/* lint */
1102
1103#endif	/* CPU_IMP_L1_CACHE_PARITY */
1104
1105#if defined(lint)
1106
1107/* ARGSUSED */
1108void
1109set_dcu(uint64_t dcu)
1110{}
1111
1112#else	/* lint */
1113
1114/*
1115 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
1116 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
1117 *   %o0 - 64 bit constant
1118 */
1119	ENTRY(set_dcu)
1120	stxa	%o0, [%g0]ASI_DCU	! Store to DCU
1121	flush	%g0	/* flush required after changing the IC bit */
1122	retl
1123	nop
1124	SET_SIZE(set_dcu)
1125
1126#endif	/* lint */
1127
1128
1129#if defined(lint)
1130
1131uint64_t
1132get_dcu(void)
1133{
1134	return ((uint64_t)0);
1135}
1136
1137#else	/* lint */
1138
1139/*
1140 * Return DCU register.
1141 */
1142	ENTRY(get_dcu)
1143	ldxa	[%g0]ASI_DCU, %o0		/* DCU control register */
1144	retl
1145	nop
1146	SET_SIZE(get_dcu)
1147
1148#endif	/* lint */
1149
1150/*
1151 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
1152 *
1153 * This handler is used to check for softints generated by error trap
1154 * handlers to report errors.  On Cheetah, this mechanism is used by the
1155 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
1156 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
1157 * NB: Must be 8 instructions or less to fit in trap table and code must
1158 *     be relocatable.
1159 */
1160#if defined(lint)
1161
1162void
1163ch_pil15_interrupt_instr(void)
1164{}
1165
1166#else	/* lint */
1167
1168	ENTRY_NP(ch_pil15_interrupt_instr)
1169	ASM_JMP(%g1, ch_pil15_interrupt)
1170	SET_SIZE(ch_pil15_interrupt_instr)
1171
1172#endif
1173
1174
1175#if defined(lint)
1176
1177void
1178ch_pil15_interrupt(void)
1179{}
1180
1181#else	/* lint */
1182
1183	ENTRY_NP(ch_pil15_interrupt)
1184
1185	/*
1186	 * Since pil_interrupt is hacked to assume that every level 15
1187	 * interrupt is generated by the CPU to indicate a performance
1188	 * counter overflow this gets ugly.  Before calling pil_interrupt
1189	 * the Error at TL>0 pending status is inspected.  If it is
1190	 * non-zero, then an error has occurred and it is handled.
1191	 * Otherwise control is transfered to pil_interrupt.  Note that if
1192	 * an error is detected pil_interrupt will not be called and
1193	 * overflow interrupts may be lost causing erroneous performance
1194	 * measurements.  However, error-recovery will have a detrimental
1195	 * effect on performance anyway.
1196	 */
1197	CPU_INDEX(%g1, %g4)
1198	set	ch_err_tl1_pending, %g4
1199	ldub	[%g1 + %g4], %g2
1200	brz	%g2, 1f
1201	  nop
1202
1203	/*
1204	 * We have a pending TL>0 error, clear the TL>0 pending status.
1205	 */
1206	stb	%g0, [%g1 + %g4]
1207
1208	/*
1209	 * Clear the softint.
1210	 */
1211	mov	1, %g5
1212	sll	%g5, PIL_15, %g5
1213	wr	%g5, CLEAR_SOFTINT
1214
1215	/*
1216	 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
1217	 * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
1218	 * panic flag (%g2).
1219	 */
1220	set	cpu_tl1_error, %g1
1221	clr	%g2
1222	ba	sys_trap
1223	  mov	PIL_15, %g4
1224
12251:
1226	/*
1227	 * The logout is invalid.
1228	 *
1229	 * Call the default interrupt handler.
1230	 */
1231	sethi	%hi(pil_interrupt), %g1
1232	jmp	%g1 + %lo(pil_interrupt)
1233	  mov	PIL_15, %g4
1234
1235	SET_SIZE(ch_pil15_interrupt)
1236#endif
1237
1238
1239/*
1240 * Error Handling
1241 *
1242 * Cheetah provides error checking for all memory access paths between
1243 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
1244 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
1245 * AFAR and one of the following traps is generated (provided that it
1246 * is enabled in External Cache Error Enable Register) to handle that
1247 * error:
1248 * 1. trap 0x70: Precise trap
1249 *    tt0_fecc for errors at trap level(TL)>=0
1250 * 2. trap 0x0A and 0x32: Deferred trap
1251 *    async_err for errors at TL>=0
1252 * 3. trap 0x63: Disrupting trap
1253 *    ce_err for errors at TL=0
1254 *    (Note that trap 0x63 cannot happen at trap level > 0)
1255 *
1256 * Trap level one handlers panic the system except for the fast ecc
1257 * error handler which tries to recover from certain errors.
1258 */
1259
1260/*
1261 * FAST ECC TRAP STRATEGY:
1262 *
1263 * Software must handle single and multi bit errors which occur due to data
1264 * or instruction cache reads from the external cache. A single or multi bit
1265 * error occuring in one of these situations results in a precise trap.
1266 *
1267 * The basic flow of this trap handler is as follows:
1268 *
1269 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
1270 *    is disabled because bad data could have been installed.  The Icache is
1271 *    turned off because we want to capture the Icache line related to the
1272 *    AFAR.
1273 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
1274 * 3) Park sibling core if caches are shared (to avoid race condition while
1275 *    accessing shared resources such as L3 data staging register during
1276 *    CPU logout.
1277 * 4) Read the AFAR and AFSR.
1278 * 5) If CPU logout structure is not being used, then:
1279 *    6) Clear all errors from the AFSR.
1280 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
1281 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1282 *       state.
1283 *    9) Unpark sibling core if we parked it earlier.
1284 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
1285 *        running at PIL 15.
1286 * 6) Otherwise, if CPU logout structure is being used:
1287 *    7) Incriment the "logout busy count".
1288 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1289 *       state.
1290 *    9) Unpark sibling core if we parked it earlier.
1291 *    10) Issue a retry since the other CPU error logging code will end up
1292 *       finding this error bit and logging information about it later.
1293 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
1294 *    yet initialized such that we can't even check the logout struct, then
1295 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
1296 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
1297 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
1298 *    in the high level trap handler since we don't have access to detailed
1299 *    logout information in cases where the cpu_private struct is not yet
1300 *    initialized.
1301 *
1302 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
1303 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
1304 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
1305 * since it is uses different code/data from this handler, has a better
1306 * chance of fixing things up than simply recursing through this code
1307 * again (this would probably cause an eventual kernel stack overflow).
1308 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
1309 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
1310 * the Fast ECC at TL>0 handler and eventually Red Mode.
1311 *
1312 * Note that for Cheetah (and only Cheetah), we use alias addresses for
1313 * flushing rather than ASI accesses (which don't exist on Cheetah).
1314 * Should we encounter a Fast ECC error within this handler on Cheetah,
1315 * there's a good chance it's within the ecache_flushaddr buffer (since
1316 * it's the largest piece of memory we touch in the handler and it is
1317 * usually kernel text/data).  For that reason the Fast ECC at TL>0
1318 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
1319 */
1320
1321/*
1322 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
1323 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
1324 * architecture-specific files.
1325 * NB: Must be 8 instructions or less to fit in trap table and code must
1326 *     be relocatable.
1327 */
1328
1329#if defined(lint)
1330
1331void
1332fecc_err_instr(void)
1333{}
1334
1335#else	/* lint */
1336
1337	ENTRY_NP(fecc_err_instr)
1338	membar	#Sync			! Cheetah requires membar #Sync
1339
1340	/*
1341	 * Save current DCU state.  Turn off the Dcache and Icache.
1342	 */
1343	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1344	andn	%g1, DCU_DC + DCU_IC, %g4
1345	stxa	%g4, [%g0]ASI_DCU
1346	flush	%g0	/* flush required after changing the IC bit */
1347
1348	ASM_JMP(%g4, fast_ecc_err)
1349	SET_SIZE(fecc_err_instr)
1350
1351#endif	/* lint */
1352
1353
1354#if !(defined(JALAPENO) || defined(SERRANO))
1355
1356#if defined(lint)
1357
1358void
1359fast_ecc_err(void)
1360{}
1361
1362#else	/* lint */
1363
1364	.section ".text"
1365	.align	64
1366	ENTRY_NP(fast_ecc_err)
1367
1368	/*
1369	 * Turn off CEEN and NCEEN.
1370	 */
1371	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1372	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1373	stxa	%g4, [%g0]ASI_ESTATE_ERR
1374	membar	#Sync			! membar sync required
1375
1376	/*
1377	 * Check to see whether we need to park our sibling core
1378	 * before recording diagnostic information from caches
1379	 * which may be shared by both cores.
1380	 * We use %g1 to store information about whether or not
1381	 * we had to park the core (%g1 holds our DCUCR value and
1382	 * we only use bits from that register which are "reserved"
1383	 * to keep track of core parking) so that we know whether
1384	 * or not to unpark later. %g5 and %g4 are scratch registers.
1385	 */
1386	PARK_SIBLING_CORE(%g1, %g5, %g4)
1387
1388	/*
1389	 * Do the CPU log out capture.
1390	 *   %g3 = "failed?" return value.
1391	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1392	 *         into this macro via %g4. Output only valid if cpu_private
1393	 *         struct has not been initialized.
1394	 *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1395	 *   %g4 = Trap information stored in the cpu logout flags field
1396	 *   %g5 = scr1
1397	 *   %g6 = scr2
1398	 *   %g3 = scr3
1399	 *   %g4 = scr4
1400	 */
1401	 /* store the CEEN and NCEEN values, TL=0 */
1402	and	%g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1403	set	CHPR_FECCTL0_LOGOUT, %g6
1404	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1405
1406	/*
1407	 * Flush the Ecache (and L2 cache for Panther) to get the error out
1408	 * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1409	 * following flush will turn that into a WDC or WDU, respectively.
1410	 */
1411	PN_L2_FLUSHALL(%g4, %g5, %g6)
1412
1413	CPU_INDEX(%g4, %g5)
1414	mulx	%g4, CPU_NODE_SIZE, %g4
1415	set	cpunodes, %g5
1416	add	%g4, %g5, %g4
1417	ld	[%g4 + ECACHE_LINESIZE], %g5
1418	ld	[%g4 + ECACHE_SIZE], %g4
1419
1420	ASM_LDX(%g6, ecache_flushaddr)
1421	ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1422
1423	/*
1424	 * Flush the Dcache.  Since bad data could have been installed in
1425	 * the Dcache we must flush it before re-enabling it.
1426	 */
1427	ASM_LD(%g5, dcache_size)
1428	ASM_LD(%g6, dcache_linesize)
1429	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1430
1431	/*
1432	 * Flush the Icache.  Since we turned off the Icache to capture the
1433	 * Icache line it is now stale or corrupted and we must flush it
1434	 * before re-enabling it.
1435	 */
1436	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1437	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1438	ba,pt	%icc, 6f
1439	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1440fast_ecc_err_5:
1441	ASM_LD(%g5, icache_size)
1442	ASM_LD(%g6, icache_linesize)
14436:
1444	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1445
1446	/*
1447	 * check to see whether we parked our sibling core at the start
1448	 * of this handler. If so, we need to unpark it here.
1449	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1450	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1451	 */
1452	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1453
1454	/*
1455	 * Restore the Dcache and Icache to the previous state.
1456	 */
1457	stxa	%g1, [%g0]ASI_DCU
1458	flush	%g0	/* flush required after changing the IC bit */
1459
1460	/*
1461	 * Make sure our CPU logout operation was successful.
1462	 */
1463	cmp	%g3, %g0
1464	be	8f
1465	  nop
1466
1467	/*
1468	 * If the logout structure had been busy, how many times have
1469	 * we tried to use it and failed (nesting count)? If we have
1470	 * already recursed a substantial number of times, then we can
1471	 * assume things are not going to get better by themselves and
1472	 * so it would be best to panic.
1473	 */
1474	cmp	%g3, CLO_NESTING_MAX
1475	blt	7f
1476	  nop
1477
1478        call ptl1_panic
1479          mov   PTL1_BAD_ECC, %g1
1480
14817:
1482	/*
1483	 * Otherwise, if the logout structure was busy but we have not
1484	 * nested more times than our maximum value, then we simply
1485	 * issue a retry. Our TL=0 trap handler code will check and
1486	 * clear the AFSR after it is done logging what is currently
1487	 * in the logout struct and handle this event at that time.
1488	 */
1489	retry
14908:
1491	/*
1492	 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1493	 * already at PIL 15.
1494	 */
1495	set	cpu_fast_ecc_error, %g1
1496	rdpr	%pil, %g4
1497	cmp	%g4, PIL_14
1498	ba	sys_trap
1499	  movl	%icc, PIL_14, %g4
1500
1501	SET_SIZE(fast_ecc_err)
1502
1503#endif	/* lint */
1504
1505#endif	/* !(JALAPENO || SERRANO) */
1506
1507
1508/*
1509 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1510 *
1511 * The basic flow of this trap handler is as follows:
1512 *
1513 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1514 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1515 *    will use to save %g1 and %g2.
1516 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1517 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1518 *    handler (using the just saved %g1).
1519 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1520 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1521 *    NB: we don't turn off the Icache because bad data is not installed nor
1522 *        will we be doing any diagnostic accesses.
1523 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1524 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1525 *    %tpc, %tnpc, %tstate values previously saved).
1526 * 6) set %tl to %tl - 1.
1527 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1528 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1529 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1530 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1531 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1532 *    AFSR_EXT and save the value in ch_err_tl1_data.
1533 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1534 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1535 * 11) Flush the Ecache.
1536 *    NB: the Ecache is flushed assuming the largest possible size with
1537 *        the smallest possible line size since access to the cpu_nodes may
1538 *        cause an unrecoverable DTLB miss.
1539 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1540 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1541 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1542 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1543 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1544 * 14) Flush and re-enable the Dcache if it was on at step 3.
1545 * 15) Do TRAPTRACE if enabled.
1546 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1547 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1548 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1549 *    event pending flag and call cpu_tl1_error via systrap if set.
1550 * 19) Restore the registers from step 5 and issue retry.
1551 */
1552
1553/*
1554 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1555 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1556 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1557 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1558 * NB: Must be 8 instructions or less to fit in trap table and code must
1559 *     be relocatable.
1560 */
1561
1562#if defined(lint)
1563
1564void
1565fecc_err_tl1_instr(void)
1566{}
1567
1568#else	/* lint */
1569
1570	ENTRY_NP(fecc_err_tl1_instr)
1571	CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1572	SET_SIZE(fecc_err_tl1_instr)
1573
1574#endif	/* lint */
1575
1576/*
1577 * Software trap 0 at TL>0.
1578 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1579 * the various architecture-specific files.  This is used as a continuation
1580 * of the fast ecc handling where we've bought an extra TL level, so we can
1581 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1582 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1583 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1584 * order two bits from %g1 and %g2 respectively).
1585 * NB: Must be 8 instructions or less to fit in trap table and code must
1586 *     be relocatable.
1587 */
1588#if defined(lint)
1589
1590void
1591fecc_err_tl1_cont_instr(void)
1592{}
1593
1594#else	/* lint */
1595
1596	ENTRY_NP(fecc_err_tl1_cont_instr)
1597	CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1598	SET_SIZE(fecc_err_tl1_cont_instr)
1599
1600#endif	/* lint */
1601
1602
1603#if defined(lint)
1604
1605void
1606ce_err(void)
1607{}
1608
1609#else	/* lint */
1610
1611/*
1612 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1613 *
1614 * AFSR errors bits which cause this trap are:
1615 *	CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1616 *
1617 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1618 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1619 *
1620 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1621 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1622 *
1623 * Cheetah+ also handles (No additional processing required):
1624 *    DUE, DTO, DBERR	(NCEEN controlled)
1625 *    THCE		(CEEN and ET_ECC_en controlled)
1626 *    TUE		(ET_ECC_en controlled)
1627 *
1628 * Panther further adds:
1629 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1630 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1631 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1632 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1633 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1634 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1635 *
1636 * Steps:
1637 *	1. Disable hardware corrected disrupting errors only (CEEN)
1638 *	2. Park sibling core if caches are shared (to avoid race
1639 *	   condition while accessing shared resources such as L3
1640 *	   data staging register during CPU logout.
1641 *	3. If the CPU logout structure is not currently being used:
1642 *		4. Clear AFSR error bits
1643 *		5. Capture Ecache, Dcache and Icache lines associated
1644 *		   with AFAR.
1645 *		6. Unpark sibling core if we parked it earlier.
1646 *		7. call cpu_disrupting_error via sys_trap at PIL 14
1647 *		   unless we're already running at PIL 15.
1648 *	4. Otherwise, if the CPU logout structure is busy:
1649 *		5. Incriment "logout busy count" and place into %g3
1650 *		6. Unpark sibling core if we parked it earlier.
1651 *		7. Issue a retry since the other CPU error logging
1652 *		   code will end up finding this error bit and logging
1653 *		   information about it later.
1654 *	5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1655 *         not yet initialized such that we can't even check the logout
1656 *         struct, then we place the clo_flags data into %g2
1657 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1658 *         systrap. The clo_flags parameter is used to determine information
1659 *         such as TL, TT, CEEN settings, etc in the high level trap
1660 *         handler since we don't have access to detailed logout information
1661 *         in cases where the cpu_private struct is not yet initialized.
1662 *
1663 * %g3: [ logout busy count ] - arg #2
1664 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1665 */
1666
1667	.align	128
1668	ENTRY_NP(ce_err)
1669	membar	#Sync			! Cheetah requires membar #Sync
1670
1671	/*
1672	 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1673	 * to prevent recursion.
1674	 */
1675	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1676	bclr	EN_REG_CEEN, %g1
1677	stxa	%g1, [%g0]ASI_ESTATE_ERR
1678	membar	#Sync			! membar sync required
1679
1680	/*
1681	 * Save current DCU state.  Turn off Icache to allow capture of
1682	 * Icache data by DO_CPU_LOGOUT.
1683	 */
1684	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1685	andn	%g1, DCU_IC, %g4
1686	stxa	%g4, [%g0]ASI_DCU
1687	flush	%g0	/* flush required after changing the IC bit */
1688
1689	/*
1690	 * Check to see whether we need to park our sibling core
1691	 * before recording diagnostic information from caches
1692	 * which may be shared by both cores.
1693	 * We use %g1 to store information about whether or not
1694	 * we had to park the core (%g1 holds our DCUCR value and
1695	 * we only use bits from that register which are "reserved"
1696	 * to keep track of core parking) so that we know whether
1697	 * or not to unpark later. %g5 and %g4 are scratch registers.
1698	 */
1699	PARK_SIBLING_CORE(%g1, %g5, %g4)
1700
1701	/*
1702	 * Do the CPU log out capture.
1703	 *   %g3 = "failed?" return value.
1704	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1705	 *         into this macro via %g4. Output only valid if cpu_private
1706	 *         struct has not been initialized.
1707	 *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1708	 *   %g4 = Trap information stored in the cpu logout flags field
1709	 *   %g5 = scr1
1710	 *   %g6 = scr2
1711	 *   %g3 = scr3
1712	 *   %g4 = scr4
1713	 */
1714	clr	%g4			! TL=0 bit in afsr
1715	set	CHPR_CECC_LOGOUT, %g6
1716	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1717
1718	/*
1719	 * Flush the Icache.  Since we turned off the Icache to capture the
1720	 * Icache line it is now stale or corrupted and we must flush it
1721	 * before re-enabling it.
1722	 */
1723	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1724	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1725	ba,pt	%icc, 2f
1726	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1727ce_err_1:
1728	ASM_LD(%g5, icache_size)
1729	ASM_LD(%g6, icache_linesize)
17302:
1731	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1732
1733	/*
1734	 * check to see whether we parked our sibling core at the start
1735	 * of this handler. If so, we need to unpark it here.
1736	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1737	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1738	 */
1739	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1740
1741	/*
1742	 * Restore Icache to previous state.
1743	 */
1744	stxa	%g1, [%g0]ASI_DCU
1745	flush	%g0	/* flush required after changing the IC bit */
1746
1747	/*
1748	 * Make sure our CPU logout operation was successful.
1749	 */
1750	cmp	%g3, %g0
1751	be	4f
1752	  nop
1753
1754	/*
1755	 * If the logout structure had been busy, how many times have
1756	 * we tried to use it and failed (nesting count)? If we have
1757	 * already recursed a substantial number of times, then we can
1758	 * assume things are not going to get better by themselves and
1759	 * so it would be best to panic.
1760	 */
1761	cmp	%g3, CLO_NESTING_MAX
1762	blt	3f
1763	  nop
1764
1765        call ptl1_panic
1766          mov   PTL1_BAD_ECC, %g1
1767
17683:
1769	/*
1770	 * Otherwise, if the logout structure was busy but we have not
1771	 * nested more times than our maximum value, then we simply
1772	 * issue a retry. Our TL=0 trap handler code will check and
1773	 * clear the AFSR after it is done logging what is currently
1774	 * in the logout struct and handle this event at that time.
1775	 */
1776	retry
17774:
1778	/*
1779	 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1780	 * already at PIL 15.
1781	 */
1782	set	cpu_disrupting_error, %g1
1783	rdpr	%pil, %g4
1784	cmp	%g4, PIL_14
1785	ba	sys_trap
1786	  movl	%icc, PIL_14, %g4
1787	SET_SIZE(ce_err)
1788
1789#endif	/* lint */
1790
1791
1792#if defined(lint)
1793
1794/*
1795 * This trap cannot happen at TL>0 which means this routine will never
1796 * actually be called and so we treat this like a BAD TRAP panic.
1797 */
1798void
1799ce_err_tl1(void)
1800{}
1801
1802#else	/* lint */
1803
1804	.align	64
1805	ENTRY_NP(ce_err_tl1)
1806
1807        call ptl1_panic
1808          mov   PTL1_BAD_TRAP, %g1
1809
1810	SET_SIZE(ce_err_tl1)
1811
1812#endif	/* lint */
1813
1814
1815#if defined(lint)
1816
1817void
1818async_err(void)
1819{}
1820
1821#else	/* lint */
1822
1823/*
1824 * The async_err function handles deferred trap types 0xA
1825 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1826 *
1827 * AFSR errors bits which cause this trap are:
1828 *	UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1829 * On some platforms, EMU may causes cheetah to pull the error pin
1830 * never giving Solaris a chance to take a trap.
1831 *
1832 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1833 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1834 *
1835 * Steps:
1836 *	1. Disable CEEN and NCEEN errors to prevent recursive errors.
1837 *	2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1838 *         I$ line in DO_CPU_LOGOUT.
1839 *	3. Park sibling core if caches are shared (to avoid race
1840 *	   condition while accessing shared resources such as L3
1841 *	   data staging register during CPU logout.
1842 *	4. If the CPU logout structure is not currently being used:
1843 *		5. Clear AFSR error bits
1844 *		6. Capture Ecache, Dcache and Icache lines associated
1845 *		   with AFAR.
1846 *		7. Unpark sibling core if we parked it earlier.
1847 *		8. call cpu_deferred_error via sys_trap.
1848 *	5. Otherwise, if the CPU logout structure is busy:
1849 *		6. Incriment "logout busy count"
1850 *		7. Unpark sibling core if we parked it earlier.
1851 *		8) Issue a retry since the other CPU error logging
1852 *		   code will end up finding this error bit and logging
1853 *		   information about it later.
1854 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1855 *         not yet initialized such that we can't even check the logout
1856 *         struct, then we place the clo_flags data into %g2
1857 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1858 *         systrap. The clo_flags parameter is used to determine information
1859 *         such as TL, TT, CEEN settings, etc in the high level trap handler
1860 *         since we don't have access to detailed logout information in cases
1861 *         where the cpu_private struct is not yet initialized.
1862 *
1863 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1864 * %g3: [ logout busy count ] - arg #2
1865 */
1866
1867	ENTRY_NP(async_err)
1868	membar	#Sync			! Cheetah requires membar #Sync
1869
1870	/*
1871	 * Disable CEEN and NCEEN.
1872	 */
1873	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1874	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1875	stxa	%g4, [%g0]ASI_ESTATE_ERR
1876	membar	#Sync			! membar sync required
1877
1878	/*
1879	 * Save current DCU state.
1880	 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1881	 * Do this regardless of whether this is a Data Access Error or
1882	 * Instruction Access Error Trap.
1883	 * Disable Dcache for both Data Access Error and Instruction Access
1884	 * Error per Cheetah PRM P.5 Note 6.
1885	 */
1886	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1887	andn	%g1, DCU_IC + DCU_DC, %g4
1888	stxa	%g4, [%g0]ASI_DCU
1889	flush	%g0	/* flush required after changing the IC bit */
1890
1891	/*
1892	 * Check to see whether we need to park our sibling core
1893	 * before recording diagnostic information from caches
1894	 * which may be shared by both cores.
1895	 * We use %g1 to store information about whether or not
1896	 * we had to park the core (%g1 holds our DCUCR value and
1897	 * we only use bits from that register which are "reserved"
1898	 * to keep track of core parking) so that we know whether
1899	 * or not to unpark later. %g6 and %g4 are scratch registers.
1900	 */
1901	PARK_SIBLING_CORE(%g1, %g6, %g4)
1902
1903	/*
1904	 * Do the CPU logout capture.
1905	 *
1906	 *   %g3 = "failed?" return value.
1907	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1908	 *         into this macro via %g4. Output only valid if cpu_private
1909	 *         struct has not been initialized.
1910	 *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1911	 *   %g4 = Trap information stored in the cpu logout flags field
1912	 *   %g5 = scr1
1913	 *   %g6 = scr2
1914	 *   %g3 = scr3
1915	 *   %g4 = scr4
1916	 */
1917	andcc	%g5, T_TL1, %g0
1918	clr	%g6
1919	movnz	%xcc, 1, %g6			! set %g6 if T_TL1 set
1920	sllx	%g6, CLO_FLAGS_TL_SHIFT, %g6
1921	sllx	%g5, CLO_FLAGS_TT_SHIFT, %g4
1922	set	CLO_FLAGS_TT_MASK, %g2
1923	and	%g4, %g2, %g4			! ttype
1924	or	%g6, %g4, %g4			! TT and TL
1925	and	%g3, EN_REG_CEEN, %g3		! CEEN value
1926	or	%g3, %g4, %g4			! TT and TL and CEEN
1927	set	CHPR_ASYNC_LOGOUT, %g6
1928	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1929
1930	/*
1931	 * If the logout struct was busy, we may need to pass the
1932	 * TT, TL, and CEEN information to the TL=0 handler via
1933	 * systrap parameter so save it off here.
1934	 */
1935	cmp	%g3, %g0
1936	be	1f
1937	  nop
1938	sllx	%g4, 32, %g4
1939	or	%g4, %g3, %g3
19401:
1941	/*
1942	 * Flush the Icache.  Since we turned off the Icache to capture the
1943	 * Icache line it is now stale or corrupted and we must flush it
1944	 * before re-enabling it.
1945	 */
1946	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1947	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1948	ba,pt	%icc, 2f
1949	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1950async_err_1:
1951	ASM_LD(%g5, icache_size)
1952	ASM_LD(%g6, icache_linesize)
19532:
1954	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1955
1956	/*
1957	 * XXX - Don't we need to flush the Dcache before turning it back
1958	 *       on to avoid stale or corrupt data? Was this broken?
1959	 */
1960	/*
1961	 * Flush the Dcache before turning it back on since it may now
1962	 * contain stale or corrupt data.
1963	 */
1964	ASM_LD(%g5, dcache_size)
1965	ASM_LD(%g6, dcache_linesize)
1966	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1967
1968	/*
1969	 * check to see whether we parked our sibling core at the start
1970	 * of this handler. If so, we need to unpark it here.
1971	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1972	 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1973	 */
1974	UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1975
1976	/*
1977	 * Restore Icache and Dcache to previous state.
1978	 */
1979	stxa	%g1, [%g0]ASI_DCU
1980	flush	%g0	/* flush required after changing the IC bit */
1981
1982	/*
1983	 * Make sure our CPU logout operation was successful.
1984	 */
1985	cmp	%g3, %g0
1986	be	4f
1987	  nop
1988
1989	/*
1990	 * If the logout structure had been busy, how many times have
1991	 * we tried to use it and failed (nesting count)? If we have
1992	 * already recursed a substantial number of times, then we can
1993	 * assume things are not going to get better by themselves and
1994	 * so it would be best to panic.
1995	 */
1996	cmp	%g3, CLO_NESTING_MAX
1997	blt	3f
1998	  nop
1999
2000        call ptl1_panic
2001          mov   PTL1_BAD_ECC, %g1
2002
20033:
2004	/*
2005	 * Otherwise, if the logout structure was busy but we have not
2006	 * nested more times than our maximum value, then we simply
2007	 * issue a retry. Our TL=0 trap handler code will check and
2008	 * clear the AFSR after it is done logging what is currently
2009	 * in the logout struct and handle this event at that time.
2010	 */
2011	retry
20124:
2013	RESET_USER_RTT_REGS(%g4, %g5, 5f)
20145:
2015	set	cpu_deferred_error, %g1
2016	ba	sys_trap
2017	  mov	PIL_15, %g4		! run at pil 15
2018	SET_SIZE(async_err)
2019
2020#endif	/* lint */
2021
2022#if defined(CPU_IMP_L1_CACHE_PARITY)
2023
2024/*
2025 * D$ parity error trap (trap 71) at TL=0.
2026 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
2027 * the various architecture-specific files.  This merely sets up the
2028 * arguments for cpu_parity_error and calls it via sys_trap.
2029 * NB: Must be 8 instructions or less to fit in trap table and code must
2030 *     be relocatable.
2031 */
2032#if defined(lint)
2033
2034void
2035dcache_parity_instr(void)
2036{}
2037
2038#else	/* lint */
2039	ENTRY_NP(dcache_parity_instr)
2040	membar	#Sync			! Cheetah+ requires membar #Sync
2041	set	cpu_parity_error, %g1
2042	or	%g0, CH_ERR_DPE, %g2
2043	rdpr	%tpc, %g3
2044	sethi	%hi(sys_trap), %g7
2045	jmp	%g7 + %lo(sys_trap)
2046	  mov	PIL_15, %g4		! run at pil 15
2047	SET_SIZE(dcache_parity_instr)
2048
2049#endif	/* lint */
2050
2051
2052/*
2053 * D$ parity error trap (trap 71) at TL>0.
2054 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
2055 * the various architecture-specific files.  This generates a "Software
2056 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
2057 * continue the handling there.
2058 * NB: Must be 8 instructions or less to fit in trap table and code must
2059 *     be relocatable.
2060 */
2061#if defined(lint)
2062
2063void
2064dcache_parity_tl1_instr(void)
2065{}
2066
2067#else	/* lint */
2068	ENTRY_NP(dcache_parity_tl1_instr)
2069	CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
2070	SET_SIZE(dcache_parity_tl1_instr)
2071
2072#endif	/* lint */
2073
2074
2075/*
2076 * Software trap 1 at TL>0.
2077 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
2078 * of the various architecture-specific files.  This is used as a continuation
2079 * of the dcache parity handling where we've bought an extra TL level, so we
2080 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2081 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2082 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2083 * order two bits from %g1 and %g2 respectively).
2084 * NB: Must be 8 instructions or less to fit in trap table and code must
2085 *     be relocatable.
2086 */
2087#if defined(lint)
2088
2089void
2090dcache_parity_tl1_cont_instr(void)
2091{}
2092
2093#else	/* lint */
2094	ENTRY_NP(dcache_parity_tl1_cont_instr)
2095	CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
2096	SET_SIZE(dcache_parity_tl1_cont_instr)
2097
2098#endif	/* lint */
2099
2100/*
2101 * D$ parity error at TL>0 handler
2102 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
2103 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2104 */
2105#if defined(lint)
2106
2107void
2108dcache_parity_tl1_err(void)
2109{}
2110
2111#else	/* lint */
2112
2113	ENTRY_NP(dcache_parity_tl1_err)
2114
2115	/*
2116	 * This macro saves all the %g registers in the ch_err_tl1_data
2117	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2118	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2119	 * the ch_err_tl1_data structure and %g2 will have the original
2120	 * flags in the ch_err_tl1_data structure.  All %g registers
2121	 * except for %g1 and %g2 will be available.
2122	 */
2123	CH_ERR_TL1_ENTER(CH_ERR_DPE);
2124
2125#ifdef TRAPTRACE
2126	/*
2127	 * Get current trap trace entry physical pointer.
2128	 */
2129	CPU_INDEX(%g6, %g5)
2130	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2131	set	trap_trace_ctl, %g5
2132	add	%g6, %g5, %g6
2133	ld	[%g6 + TRAPTR_LIMIT], %g5
2134	tst	%g5
2135	be	%icc, dpe_tl1_skip_tt
2136	  nop
2137	ldx	[%g6 + TRAPTR_PBASE], %g5
2138	ld	[%g6 + TRAPTR_OFFSET], %g4
2139	add	%g5, %g4, %g5
2140
2141	/*
2142	 * Create trap trace entry.
2143	 */
2144	rd	%asi, %g7
2145	wr	%g0, TRAPTR_ASI, %asi
2146	rd	STICK, %g4
2147	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2148	rdpr	%tl, %g4
2149	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2150	rdpr	%tt, %g4
2151	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2152	rdpr	%tpc, %g4
2153	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2154	rdpr	%tstate, %g4
2155	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2156	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2157	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2158	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2159	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2160	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2161	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2162	wr	%g0, %g7, %asi
2163
2164	/*
2165	 * Advance trap trace pointer.
2166	 */
2167	ld	[%g6 + TRAPTR_OFFSET], %g5
2168	ld	[%g6 + TRAPTR_LIMIT], %g4
2169	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2170	add	%g5, TRAP_ENT_SIZE, %g5
2171	sub	%g4, TRAP_ENT_SIZE, %g4
2172	cmp	%g5, %g4
2173	movge	%icc, 0, %g5
2174	st	%g5, [%g6 + TRAPTR_OFFSET]
2175dpe_tl1_skip_tt:
2176#endif	/* TRAPTRACE */
2177
2178	/*
2179	 * I$ and D$ are automatically turned off by HW when the CPU hits
2180	 * a dcache or icache parity error so we will just leave those two
2181	 * off for now to avoid repeating this trap.
2182	 * For Panther, however, since we trap on P$ data parity errors
2183	 * and HW does not automatically disable P$, we need to disable it
2184	 * here so that we don't encounter any recursive traps when we
2185	 * issue the retry.
2186	 */
2187	ldxa	[%g0]ASI_DCU, %g3
2188	mov	1, %g4
2189	sllx	%g4, DCU_PE_SHIFT, %g4
2190	andn	%g3, %g4, %g3
2191	stxa	%g3, [%g0]ASI_DCU
2192	membar	#Sync
2193
2194	/*
2195	 * We fall into this macro if we've successfully logged the error in
2196	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2197	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2198	 * Restores the %g registers and issues retry.
2199	 */
2200	CH_ERR_TL1_EXIT;
2201	SET_SIZE(dcache_parity_tl1_err)
2202
2203#endif	/* lint */
2204
2205/*
2206 * I$ parity error trap (trap 72) at TL=0.
2207 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
2208 * the various architecture-specific files.  This merely sets up the
2209 * arguments for cpu_parity_error and calls it via sys_trap.
2210 * NB: Must be 8 instructions or less to fit in trap table and code must
2211 *     be relocatable.
2212 */
2213#if defined(lint)
2214
2215void
2216icache_parity_instr(void)
2217{}
2218
2219#else	/* lint */
2220
2221	ENTRY_NP(icache_parity_instr)
2222	membar	#Sync			! Cheetah+ requires membar #Sync
2223	set	cpu_parity_error, %g1
2224	or	%g0, CH_ERR_IPE, %g2
2225	rdpr	%tpc, %g3
2226	sethi	%hi(sys_trap), %g7
2227	jmp	%g7 + %lo(sys_trap)
2228	  mov	PIL_15, %g4		! run at pil 15
2229	SET_SIZE(icache_parity_instr)
2230
2231#endif	/* lint */
2232
2233/*
2234 * I$ parity error trap (trap 72) at TL>0.
2235 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
2236 * the various architecture-specific files.  This generates a "Software
2237 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
2238 * continue the handling there.
2239 * NB: Must be 8 instructions or less to fit in trap table and code must
2240 *     be relocatable.
2241 */
2242#if defined(lint)
2243
2244void
2245icache_parity_tl1_instr(void)
2246{}
2247
2248#else	/* lint */
2249	ENTRY_NP(icache_parity_tl1_instr)
2250	CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
2251	SET_SIZE(icache_parity_tl1_instr)
2252
2253#endif	/* lint */
2254
2255/*
2256 * Software trap 2 at TL>0.
2257 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
2258 * of the various architecture-specific files.  This is used as a continuation
2259 * of the icache parity handling where we've bought an extra TL level, so we
2260 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2261 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2262 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2263 * order two bits from %g1 and %g2 respectively).
2264 * NB: Must be 8 instructions or less to fit in trap table and code must
2265 *     be relocatable.
2266 */
2267#if defined(lint)
2268
2269void
2270icache_parity_tl1_cont_instr(void)
2271{}
2272
2273#else	/* lint */
2274	ENTRY_NP(icache_parity_tl1_cont_instr)
2275	CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
2276	SET_SIZE(icache_parity_tl1_cont_instr)
2277
2278#endif	/* lint */
2279
2280
2281/*
2282 * I$ parity error at TL>0 handler
2283 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
2284 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2285 */
2286#if defined(lint)
2287
2288void
2289icache_parity_tl1_err(void)
2290{}
2291
2292#else	/* lint */
2293
2294	ENTRY_NP(icache_parity_tl1_err)
2295
2296	/*
2297	 * This macro saves all the %g registers in the ch_err_tl1_data
2298	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2299	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2300	 * the ch_err_tl1_data structure and %g2 will have the original
2301	 * flags in the ch_err_tl1_data structure.  All %g registers
2302	 * except for %g1 and %g2 will be available.
2303	 */
2304	CH_ERR_TL1_ENTER(CH_ERR_IPE);
2305
2306#ifdef TRAPTRACE
2307	/*
2308	 * Get current trap trace entry physical pointer.
2309	 */
2310	CPU_INDEX(%g6, %g5)
2311	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2312	set	trap_trace_ctl, %g5
2313	add	%g6, %g5, %g6
2314	ld	[%g6 + TRAPTR_LIMIT], %g5
2315	tst	%g5
2316	be	%icc, ipe_tl1_skip_tt
2317	  nop
2318	ldx	[%g6 + TRAPTR_PBASE], %g5
2319	ld	[%g6 + TRAPTR_OFFSET], %g4
2320	add	%g5, %g4, %g5
2321
2322	/*
2323	 * Create trap trace entry.
2324	 */
2325	rd	%asi, %g7
2326	wr	%g0, TRAPTR_ASI, %asi
2327	rd	STICK, %g4
2328	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2329	rdpr	%tl, %g4
2330	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2331	rdpr	%tt, %g4
2332	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2333	rdpr	%tpc, %g4
2334	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2335	rdpr	%tstate, %g4
2336	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2337	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2338	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2339	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2340	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2341	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2342	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2343	wr	%g0, %g7, %asi
2344
2345	/*
2346	 * Advance trap trace pointer.
2347	 */
2348	ld	[%g6 + TRAPTR_OFFSET], %g5
2349	ld	[%g6 + TRAPTR_LIMIT], %g4
2350	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2351	add	%g5, TRAP_ENT_SIZE, %g5
2352	sub	%g4, TRAP_ENT_SIZE, %g4
2353	cmp	%g5, %g4
2354	movge	%icc, 0, %g5
2355	st	%g5, [%g6 + TRAPTR_OFFSET]
2356ipe_tl1_skip_tt:
2357#endif	/* TRAPTRACE */
2358
2359	/*
2360	 * We fall into this macro if we've successfully logged the error in
2361	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2362	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2363	 * Restores the %g registers and issues retry.
2364	 */
2365	CH_ERR_TL1_EXIT;
2366
2367	SET_SIZE(icache_parity_tl1_err)
2368
2369#endif	/* lint */
2370
2371#endif	/* CPU_IMP_L1_CACHE_PARITY */
2372
2373
2374/*
2375 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
2376 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
2377 * should only be used in places where you have no choice but to look at the
2378 * tlb itself.
2379 *
2380 * Note: These two routines are required by the Estar "cpr" loadable module.
2381 */
2382
2383#if defined(lint)
2384
2385/* ARGSUSED */
2386void
2387itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2388{}
2389
2390#else	/* lint */
2391
2392	ENTRY_NP(itlb_rd_entry)
2393	sllx	%o0, 3, %o0
2394	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
2395	stx	%g1, [%o1]
2396	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
2397	set	TAGREAD_CTX_MASK, %o4
2398	andn	%g2, %o4, %o5
2399	retl
2400	  stx	%o5, [%o2]
2401	SET_SIZE(itlb_rd_entry)
2402
2403#endif	/* lint */
2404
2405
2406#if defined(lint)
2407
2408/* ARGSUSED */
2409void
2410dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2411{}
2412
2413#else	/* lint */
2414
2415	ENTRY_NP(dtlb_rd_entry)
2416	sllx	%o0, 3, %o0
2417	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
2418	stx	%g1, [%o1]
2419	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
2420	set	TAGREAD_CTX_MASK, %o4
2421	andn	%g2, %o4, %o5
2422	retl
2423	  stx	%o5, [%o2]
2424	SET_SIZE(dtlb_rd_entry)
2425#endif	/* lint */
2426
2427
2428#if !(defined(JALAPENO) || defined(SERRANO))
2429
2430#if defined(lint)
2431
2432uint64_t
2433get_safari_config(void)
2434{ return (0); }
2435
2436#else	/* lint */
2437
2438	ENTRY(get_safari_config)
2439	ldxa	[%g0]ASI_SAFARI_CONFIG, %o0
2440	retl
2441	nop
2442	SET_SIZE(get_safari_config)
2443
2444#endif	/* lint */
2445
2446
2447#if defined(lint)
2448
2449/* ARGSUSED */
2450void
2451set_safari_config(uint64_t safari_config)
2452{}
2453
2454#else	/* lint */
2455
2456	ENTRY(set_safari_config)
2457	stxa	%o0, [%g0]ASI_SAFARI_CONFIG
2458	membar	#Sync
2459	retl
2460	nop
2461	SET_SIZE(set_safari_config)
2462
2463#endif	/* lint */
2464
2465#endif	/* !(JALAPENO || SERRANO) */
2466
2467
2468#if defined(lint)
2469
2470void
2471cpu_cleartickpnt(void)
2472{}
2473
2474#else	/* lint */
2475	/*
2476	 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
2477	 * registers. In an effort to make the change in the
2478	 * tick/stick counter as consistent as possible, we disable
2479	 * all interrupts while we're changing the registers. We also
2480	 * ensure that the read and write instructions are in the same
2481	 * line in the instruction cache.
2482	 */
2483	ENTRY_NP(cpu_clearticknpt)
2484	rdpr	%pstate, %g1		/* save processor state */
2485	andn	%g1, PSTATE_IE, %g3	/* turn off */
2486	wrpr	%g0, %g3, %pstate	/*   interrupts */
2487	rdpr	%tick, %g2		/* get tick register */
2488	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
2489	mov	1, %g3			/* create mask */
2490	sllx	%g3, 63, %g3		/*   for NPT bit */
2491	ba,a,pt	%xcc, 2f
2492	.align	8			/* Ensure rd/wr in same i$ line */
24932:
2494	rdpr	%tick, %g2		/* get tick register */
2495	wrpr	%g3, %g2, %tick		/* write tick register, */
2496					/*   clearing NPT bit   */
24971:
2498	rd	STICK, %g2		/* get stick register */
2499	brgez,pn %g2, 3f		/* if NPT bit off, we're done */
2500	mov	1, %g3			/* create mask */
2501	sllx	%g3, 63, %g3		/*   for NPT bit */
2502	ba,a,pt	%xcc, 4f
2503	.align	8			/* Ensure rd/wr in same i$ line */
25044:
2505	rd	STICK, %g2		/* get stick register */
2506	wr	%g3, %g2, STICK		/* write stick register, */
2507					/*   clearing NPT bit   */
25083:
2509	jmp	%g4 + 4
2510	wrpr	%g0, %g1, %pstate	/* restore processor state */
2511
2512	SET_SIZE(cpu_clearticknpt)
2513
2514#endif	/* lint */
2515
2516
2517#if defined(CPU_IMP_L1_CACHE_PARITY)
2518
2519#if defined(lint)
2520/*
2521 * correct_dcache_parity(size_t size, size_t linesize)
2522 *
2523 * Correct D$ data parity by zeroing the data and initializing microtag
2524 * for all indexes and all ways of the D$.
2525 *
2526 */
2527/* ARGSUSED */
2528void
2529correct_dcache_parity(size_t size, size_t linesize)
2530{}
2531
2532#else	/* lint */
2533
2534	ENTRY(correct_dcache_parity)
2535	/*
2536	 * Register Usage:
2537	 *
2538	 * %o0 = input D$ size
2539	 * %o1 = input D$ line size
2540	 * %o2 = scratch
2541	 * %o3 = scratch
2542	 * %o4 = scratch
2543	 */
2544
2545	sub	%o0, %o1, %o0			! init cache line address
2546
2547	/*
2548	 * For Panther CPUs, we also need to clear the data parity bits
2549	 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2550	 */
2551	GET_CPU_IMPL(%o3)
2552	cmp	%o3, PANTHER_IMPL
2553	bne	1f
2554	  clr	%o3				! zero for non-Panther
2555	mov	1, %o3
2556	sll	%o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2557
25581:
2559	/*
2560	 * Set utag = way since it must be unique within an index.
2561	 */
2562	srl	%o0, 14, %o2			! get cache way (DC_way)
2563	membar	#Sync				! required before ASI_DC_UTAG
2564	stxa	%o2, [%o0]ASI_DC_UTAG		! set D$ utag = cache way
2565	membar	#Sync				! required after ASI_DC_UTAG
2566
2567	/*
2568	 * Zero line of D$ data (and data parity bits for Panther)
2569	 */
2570	sub	%o1, 8, %o2
2571	or	%o0, %o3, %o4			! same address + DC_data_parity
25722:
2573	membar	#Sync				! required before ASI_DC_DATA
2574	stxa	%g0, [%o0 + %o2]ASI_DC_DATA	! zero 8 bytes of D$ data
2575	membar	#Sync				! required after ASI_DC_DATA
2576	/*
2577	 * We also clear the parity bits if this is a panther. For non-Panther
2578	 * CPUs, we simply end up clearing the $data register twice.
2579	 */
2580	stxa	%g0, [%o4 + %o2]ASI_DC_DATA
2581	membar	#Sync
2582
2583	subcc	%o2, 8, %o2
2584	bge	2b
2585	nop
2586
2587	subcc	%o0, %o1, %o0
2588	bge	1b
2589	nop
2590
2591	retl
2592	  nop
2593	SET_SIZE(correct_dcache_parity)
2594
2595#endif	/* lint */
2596
2597#endif	/* CPU_IMP_L1_CACHE_PARITY */
2598
2599
2600#if defined(lint)
2601/*
2602 *  Get timestamp (stick).
2603 */
2604/* ARGSUSED */
2605void
2606stick_timestamp(int64_t *ts)
2607{
2608}
2609
2610#else	/* lint */
2611
2612	ENTRY_NP(stick_timestamp)
2613	rd	STICK, %g1	! read stick reg
2614	sllx	%g1, 1, %g1
2615	srlx	%g1, 1, %g1	! clear npt bit
2616
2617	retl
2618	stx     %g1, [%o0]	! store the timestamp
2619	SET_SIZE(stick_timestamp)
2620
2621#endif	/* lint */
2622
2623
2624#if defined(lint)
2625/*
2626 * Set STICK adjusted by skew.
2627 */
2628/* ARGSUSED */
2629void
2630stick_adj(int64_t skew)
2631{
2632}
2633
2634#else	/* lint */
2635
2636	ENTRY_NP(stick_adj)
2637	rdpr	%pstate, %g1		! save processor state
2638	andn	%g1, PSTATE_IE, %g3
2639	ba	1f			! cache align stick adj
2640	wrpr	%g0, %g3, %pstate	! turn off interrupts
2641
2642	.align	16
26431:	nop
2644
2645	rd	STICK, %g4		! read stick reg
2646	add	%g4, %o0, %o1		! adjust stick with skew
2647	wr	%o1, %g0, STICK		! write stick reg
2648
2649	retl
2650	wrpr	%g1, %pstate		! restore processor state
2651	SET_SIZE(stick_adj)
2652
2653#endif	/* lint */
2654
2655#if defined(lint)
2656/*
2657 * Debugger-specific stick retrieval
2658 */
2659/*ARGSUSED*/
2660int
2661kdi_get_stick(uint64_t *stickp)
2662{
2663	return (0);
2664}
2665
2666#else	/* lint */
2667
2668	ENTRY_NP(kdi_get_stick)
2669	rd	STICK, %g1
2670	stx	%g1, [%o0]
2671	retl
2672	mov	%g0, %o0
2673	SET_SIZE(kdi_get_stick)
2674
2675#endif	/* lint */
2676
2677#if defined(lint)
2678/*
2679 * Invalidate the specified line from the D$.
2680 *
2681 * Register usage:
2682 *	%o0 - index for the invalidation, specifies DC_way and DC_addr
2683 *
2684 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2685 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2686 *
2687 * The format of the stored 64-bit value is:
2688 *
2689 *	+----------+--------+----------+
2690 *	| Reserved | DC_tag | DC_valid |
2691 *	+----------+--------+----------+
2692 *       63      31 30     1	      0
2693 *
2694 * DC_tag is the 30-bit physical tag of the associated line.
2695 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2696 *
2697 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2698 *
2699 *	+----------+--------+----------+----------+
2700 *	| Reserved | DC_way | DC_addr  | Reserved |
2701 *	+----------+--------+----------+----------+
2702 *       63      16 15    14 13       5 4        0
2703 *
2704 * DC_way is a 2-bit index that selects one of the 4 ways.
2705 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2706 *
2707 * Setting the DC_valid bit to zero for the specified DC_way and
2708 * DC_addr index into the D$ results in an invalidation of a D$ line.
2709 */
2710/*ARGSUSED*/
2711void
2712dcache_inval_line(int index)
2713{
2714}
2715#else	/* lint */
2716	ENTRY(dcache_inval_line)
2717	sll	%o0, 5, %o0		! shift index into DC_way and DC_addr
2718	stxa	%g0, [%o0]ASI_DC_TAG	! zero the DC_valid and DC_tag bits
2719	membar	#Sync
2720	retl
2721	nop
2722	SET_SIZE(dcache_inval_line)
2723#endif	/* lint */
2724
2725#if defined(lint)
2726/*
2727 * Invalidate the entire I$
2728 *
2729 * Register usage:
2730 *	%o0 - specifies IC_way, IC_addr, IC_tag
2731 *	%o1 - scratch
2732 *	%o2 - used to save and restore DCU value
2733 *	%o3 - scratch
2734 *	%o5 - used to save and restore PSTATE
2735 *
2736 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2737 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2738 * block out snoops and invalidates to the I$, causing I$ consistency
2739 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2740 *
2741 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2742 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2743 * info below describes store (write) use of ASI_IC_TAG. Note that read
2744 * use of ASI_IC_TAG behaves differently.
2745 *
2746 * The format of the stored 64-bit value is:
2747 *
2748 *	+----------+--------+---------------+-----------+
2749 *	| Reserved | Valid  | IC_vpred<7:0> | Undefined |
2750 *	+----------+--------+---------------+-----------+
2751 *       63      55    54    53           46 45        0
2752 *
2753 * Valid is the 1-bit valid field for both the physical and snoop tags.
2754 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2755 *	the 32-byte boundary aligned address specified by IC_addr.
2756 *
2757 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2758 *
2759 *	+----------+--------+---------+--------+---------+
2760 *	| Reserved | IC_way | IC_addr | IC_tag |Reserved |
2761 *	+----------+--------+---------+--------+---------+
2762 *       63      16 15    14 13      5 4      3 2       0
2763 *
2764 * IC_way is a 2-bit index that selects one of the 4 ways.
2765 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2766 * IC_addr[5] is a "don't care" for a store.
2767 * IC_tag set to 2 specifies that the stored value is to be interpreted
2768 *	as containing Valid and IC_vpred as described above.
2769 *
2770 * Setting the Valid bit to zero for the specified IC_way and
2771 * IC_addr index into the I$ results in an invalidation of an I$ line.
2772 */
2773/*ARGSUSED*/
2774void
2775icache_inval_all(void)
2776{
2777}
2778#else	/* lint */
2779	ENTRY(icache_inval_all)
2780	rdpr	%pstate, %o5
2781	andn	%o5, PSTATE_IE, %o3
2782	wrpr	%g0, %o3, %pstate	! clear IE bit
2783
2784	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2785	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
2786	ba,pt	%icc, 2f
2787	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
2788icache_inval_all_1:
2789	ASM_LD(%o0, icache_size)
2790	ASM_LD(%o1, icache_linesize)
27912:
2792	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2793
2794	retl
2795	wrpr	%g0, %o5, %pstate	! restore earlier pstate
2796	SET_SIZE(icache_inval_all)
2797#endif	/* lint */
2798
2799
2800#if defined(lint)
2801/* ARGSUSED */
2802void
2803cache_scrubreq_tl1(uint64_t inum, uint64_t index)
2804{
2805}
2806
2807#else	/* lint */
2808/*
2809 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2810 * crosstrap.  It atomically increments the outstanding request counter and,
2811 * if there was not already an outstanding request, branches to setsoftint_tl1
2812 * to enqueue an intr_req for the given inum.
2813 */
2814
2815	! Register usage:
2816	!
2817	! Arguments:
2818	! %g1 - inum
2819	! %g2 - index into chsm_outstanding array
2820	!
2821	! Internal:
2822	! %g2, %g3, %g5 - scratch
2823	! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2824	! %g6 - setsoftint_tl1 address
2825
2826	ENTRY_NP(cache_scrubreq_tl1)
2827	mulx	%g2, CHSM_OUTSTANDING_INCR, %g2
2828	set	CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2829	add	%g2, %g3, %g2
2830	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2831	ld	[%g4], %g2		! cpu's chsm_outstanding[index]
2832	!
2833	! no need to use atomic instructions for the following
2834	! increment - we're at tl1
2835	!
2836	add	%g2, 0x1, %g3
2837	brnz,pn	%g2, 1f			! no need to enqueue more intr_req
2838	  st	%g3, [%g4]		! delay - store incremented counter
2839	ASM_JMP(%g6, setsoftint_tl1)
2840	! not reached
28411:
2842	retry
2843	SET_SIZE(cache_scrubreq_tl1)
2844
2845#endif	/* lint */
2846
2847
2848#if defined(lint)
2849
2850/* ARGSUSED */
2851void
2852get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
2853{}
2854
2855#else	/* lint */
2856
2857/*
2858 * Get the error state for the processor.
2859 * Note that this must not be used at TL>0
2860 */
2861	ENTRY(get_cpu_error_state)
2862#if defined(CHEETAH_PLUS)
2863	set	ASI_SHADOW_REG_VA, %o2
2864	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr reg
2865	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2866	ldxa	[%o2]ASI_AFAR, %o1		! shadow afar reg
2867	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2868	GET_CPU_IMPL(%o3)	! Only panther has AFSR_EXT registers
2869	cmp	%o3, PANTHER_IMPL
2870	bne,a	1f
2871	  stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]	! zero for non-PN
2872	set	ASI_AFSR_EXT_VA, %o2
2873	ldxa	[%o2]ASI_AFSR, %o1		! afsr_ext reg
2874	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2875	set	ASI_SHADOW_AFSR_EXT_VA, %o2
2876	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr_ext reg
2877	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2878	b	2f
2879	  nop
28801:
2881	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
28822:
2883#else	/* CHEETAH_PLUS */
2884	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2885	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2886	stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2887	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2888#endif	/* CHEETAH_PLUS */
2889#if defined(SERRANO)
2890	/*
2891	 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2892	 * We save this in the afar2 of the register save area.
2893	 */
2894	set	ASI_MCU_AFAR2_VA, %o2
2895	ldxa	[%o2]ASI_MCU_CTRL, %o1
2896	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2897#endif	/* SERRANO */
2898	ldxa	[%g0]ASI_AFSR, %o1		! primary afsr reg
2899	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR]
2900	ldxa	[%g0]ASI_AFAR, %o1		! primary afar reg
2901	retl
2902	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR]
2903	SET_SIZE(get_cpu_error_state)
2904#endif	/* lint */
2905
2906#if defined(lint)
2907
2908/*
2909 * Check a page of memory for errors.
2910 *
2911 * Load each 64 byte block from physical memory.
2912 * Check AFSR after each load to see if an error
2913 * was caused. If so, log/scrub that error.
2914 *
2915 * Used to determine if a page contains
2916 * CEs when CEEN is disabled.
2917 */
2918/*ARGSUSED*/
2919void
2920cpu_check_block(caddr_t va, uint_t psz)
2921{}
2922
2923#else	/* lint */
2924
2925	ENTRY(cpu_check_block)
2926	!
2927	! get a new window with room for the error regs
2928	!
2929	save	%sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2930	srl	%i1, 6, %l4		! clear top bits of psz
2931					! and divide by 64
2932	rd	%fprs, %l2		! store FP
2933	wr	%g0, FPRS_FEF, %fprs	! enable FP
29341:
2935	ldda	[%i0]ASI_BLK_P, %d0	! load a block
2936	membar	#Sync
2937	ldxa    [%g0]ASI_AFSR, %l3	! read afsr reg
2938	brz,a,pt %l3, 2f		! check for error
2939	nop
2940
2941	!
2942	! if error, read the error regs and log it
2943	!
2944	call	get_cpu_error_state
2945	add	%fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2946
2947	!
2948	! cpu_ce_detected(ch_cpu_errors_t *, flag)
2949	!
2950	call	cpu_ce_detected		! log the error
2951	mov	CE_CEEN_TIMEOUT, %o1
29522:
2953	dec	%l4			! next 64-byte block
2954	brnz,a,pt  %l4, 1b
2955	add	%i0, 64, %i0		! increment block addr
2956
2957	wr	%l2, %g0, %fprs		! restore FP
2958	ret
2959	restore
2960
2961	SET_SIZE(cpu_check_block)
2962
2963#endif	/* lint */
2964
2965#if defined(lint)
2966
2967/*
2968 * Perform a cpu logout called from C.  This is used where we did not trap
2969 * for the error but still want to gather "what we can".  Caller must make
2970 * sure cpu private area exists and that the indicated logout area is free
2971 * for use, and that we are unable to migrate cpus.
2972 */
2973/*ARGSUSED*/
2974void
2975cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
2976{ }
2977
2978#else
2979	ENTRY(cpu_delayed_logout)
2980	rdpr	%pstate, %o2
2981	andn	%o2, PSTATE_IE, %o2
2982	wrpr	%g0, %o2, %pstate		! disable interrupts
2983	PARK_SIBLING_CORE(%o2, %o3, %o4)	! %o2 has DCU value
2984	add	%o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2985	rd	%asi, %g1
2986	wr	%g0, ASI_P, %asi
2987	GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2988	wr	%g1, %asi
2989	UNPARK_SIBLING_CORE(%o2, %o3, %o4)	! can use %o2 again
2990	rdpr	%pstate, %o2
2991	or	%o2, PSTATE_IE, %o2
2992	wrpr	%g0, %o2, %pstate
2993	retl
2994	  nop
2995	SET_SIZE(cpu_delayed_logout)
2996
2997#endif	/* lint */
2998
2999#if defined(lint)
3000
3001/*ARGSUSED*/
3002int
3003dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
3004{ return (0); }
3005
3006#else
3007
3008	ENTRY(dtrace_blksuword32)
3009	save	%sp, -SA(MINFRAME + 4), %sp
3010
3011	rdpr	%pstate, %l1
3012	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
3013	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
3014
3015	rd	%fprs, %l0
3016	andcc	%l0, FPRS_FEF, %g0
3017	bz,a,pt	%xcc, 1f			! if the fpu is disabled
3018	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
3019
3020	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
30211:
3022	set	0f, %l5
3023        /*
3024         * We're about to write a block full or either total garbage
3025         * (not kernel data, don't worry) or user floating-point data
3026         * (so it only _looks_ like garbage).
3027         */
3028	ld	[%i1], %f0			! modify the block
3029	membar	#Sync
3030	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
3031	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
3032	membar	#Sync
3033	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
3034
3035	bz,a,pt	%xcc, 1f
3036	wr	%g0, %l0, %fprs			! restore %fprs
3037
3038	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
30391:
3040
3041	wrpr	%g0, %l1, %pstate		! restore interrupts
3042
3043	ret
3044	restore	%g0, %g0, %o0
3045
30460:
3047	membar	#Sync
3048	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
3049
3050	bz,a,pt	%xcc, 1f
3051	wr	%g0, %l0, %fprs			! restore %fprs
3052
3053	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
30541:
3055
3056	wrpr	%g0, %l1, %pstate		! restore interrupts
3057
3058	/*
3059	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
3060	 * which deals with watchpoints. Otherwise, just return -1.
3061	 */
3062	brnz,pt	%i2, 1f
3063	nop
3064	ret
3065	restore	%g0, -1, %o0
30661:
3067	call	dtrace_blksuword32_err
3068	restore
3069
3070	SET_SIZE(dtrace_blksuword32)
3071
3072#endif /* lint */
3073
3074#ifdef	CHEETAHPLUS_ERRATUM_25
3075
3076#if	defined(lint)
3077/*
3078 * Claim a chunk of physical address space.
3079 */
3080/*ARGSUSED*/
3081void
3082claimlines(uint64_t pa, size_t sz, int stride)
3083{}
3084#else	/* lint */
3085	ENTRY(claimlines)
30861:
3087	subcc	%o1, %o2, %o1
3088	add	%o0, %o1, %o3
3089	bgeu,a,pt	%xcc, 1b
3090	casxa	[%o3]ASI_MEM, %g0, %g0
3091	membar  #Sync
3092	retl
3093	nop
3094	SET_SIZE(claimlines)
3095#endif	/* lint */
3096
3097#if	defined(lint)
3098/*
3099 * CPU feature initialization,
3100 * turn BPE off,
3101 * get device id.
3102 */
3103/*ARGSUSED*/
3104void
3105cpu_feature_init(void)
3106{}
3107#else	/* lint */
3108	ENTRY(cpu_feature_init)
3109	save	%sp, -SA(MINFRAME), %sp
3110	sethi	%hi(cheetah_bpe_off), %o0
3111	ld	[%o0 + %lo(cheetah_bpe_off)], %o0
3112	brz	%o0, 1f
3113	nop
3114	rd	ASR_DISPATCH_CONTROL, %o0
3115	andn	%o0, ASR_DISPATCH_CONTROL_BPE, %o0
3116	wr	%o0, 0, ASR_DISPATCH_CONTROL
31171:
3118	!
3119	! get the device_id and store the device_id
3120	! in the appropriate cpunodes structure
3121	! given the cpus index
3122	!
3123	CPU_INDEX(%o0, %o1)
3124	mulx %o0, CPU_NODE_SIZE, %o0
3125	set  cpunodes + DEVICE_ID, %o1
3126	ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
3127	stx  %o2, [%o0 + %o1]
3128#ifdef	CHEETAHPLUS_ERRATUM_34
3129	!
3130	! apply Cheetah+ erratum 34 workaround
3131	!
3132	call itlb_erratum34_fixup
3133	  nop
3134#endif	/* CHEETAHPLUS_ERRATUM_34 */
3135	ret
3136	  restore
3137	SET_SIZE(cpu_feature_init)
3138#endif	/* lint */
3139
3140#if	defined(lint)
3141/*
3142 * Copy a tsb entry atomically, from src to dest.
3143 * src must be 128 bit aligned.
3144 */
3145/*ARGSUSED*/
3146void
3147copy_tsb_entry(uintptr_t src, uintptr_t dest)
3148{}
3149#else	/* lint */
3150	ENTRY(copy_tsb_entry)
3151	ldda	[%o0]ASI_NQUAD_LD, %o2		! %o2 = tag, %o3 = data
3152	stx	%o2, [%o1]
3153	stx	%o3, [%o1 + 8 ]
3154	retl
3155	nop
3156	SET_SIZE(copy_tsb_entry)
3157#endif	/* lint */
3158
3159#endif	/* CHEETAHPLUS_ERRATUM_25 */
3160
3161#ifdef	CHEETAHPLUS_ERRATUM_34
3162
3163#if	defined(lint)
3164
3165/*ARGSUSED*/
3166void
3167itlb_erratum34_fixup(void)
3168{}
3169
3170#else	/* lint */
3171
3172	!
3173	! In Cheetah+ erratum 34, under certain conditions an ITLB locked
3174	! index 0 TTE will erroneously be displaced when a new TTE is
3175	! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
3176	! locked index 0 TTEs must be relocated.
3177	!
3178	! NOTE: Care must be taken to avoid an ITLB miss in this routine.
3179	!
3180	ENTRY_NP(itlb_erratum34_fixup)
3181	rdpr	%pstate, %o3
3182#ifdef DEBUG
3183	andcc	%o3, PSTATE_IE, %g0		! If interrupts already
3184	bnz,pt %icc, 0f				!   disabled, panic
3185	  nop
3186	sethi	%hi(sfmmu_panic1), %o0
3187	call	panic
3188	 or	%o0, %lo(sfmmu_panic1), %o0
31890:
3190#endif /* DEBUG */
3191	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3192	ldxa	[%g0]ASI_ITLB_ACCESS, %o1	! %o1 = entry 0 data
3193	ldxa	[%g0]ASI_ITLB_TAGREAD, %o2	! %o2 = entry 0 tag
3194
3195	cmp	%o1, %g0			! Is this entry valid?
3196	bge	%xcc, 1f
3197	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3198	bnz	%icc, 2f
3199	  nop
32001:
3201	retl					! Nope, outta here...
3202	  wrpr	%g0, %o3, %pstate		! Enable interrupts
32032:
3204	sethi	%hi(FLUSH_ADDR), %o4
3205	stxa	%g0, [%o2]ASI_ITLB_DEMAP	! Flush this mapping
3206	flush	%o4				! Flush required for I-MMU
3207	!
3208	! Start search from index 1 up.  This is because the kernel force
3209	! loads its text page at index 15 in sfmmu_kernel_remap() and we
3210	! don't want our relocated entry evicted later.
3211	!
3212	! NOTE: We assume that we'll be successful in finding an unlocked
3213	! or invalid entry.  If that isn't the case there are bound to
3214	! bigger problems.
3215	!
3216	set	(1 << 3), %g3
32173:
3218	ldxa	[%g3]ASI_ITLB_ACCESS, %o4	! Load TTE from t16
3219	!
3220	! If this entry isn't valid, we'll choose to displace it (regardless
3221	! of the lock bit).
3222	!
3223	cmp	%o4, %g0			! TTE is > 0 iff not valid
3224	bge	%xcc, 4f			! If invalid, go displace
3225	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3226	bnz,a	%icc, 3b			! If locked, look at next
3227	  add	%g3, (1 << 3), %g3		!  entry
32284:
3229	!
3230	! We found an unlocked or invalid entry; we'll explicitly load
3231	! the former index 0 entry here.
3232	!
3233	sethi	%hi(FLUSH_ADDR), %o4
3234	set	MMU_TAG_ACCESS, %g4
3235	stxa	%o2, [%g4]ASI_IMMU
3236	stxa	%o1, [%g3]ASI_ITLB_ACCESS
3237	flush	%o4				! Flush required for I-MMU
3238	retl
3239	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3240	SET_SIZE(itlb_erratum34_fixup)
3241
3242#endif	/* lint */
3243
3244#endif	/* CHEETAHPLUS_ERRATUM_34 */
3245
3246