xref: /illumos-gate/usr/src/uts/sun4u/cpu/us3_common_asm.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2020 Joyent, Inc.
26 *
27 * Assembly code support for Cheetah/Cheetah+ modules
28 */
29
30#include "assym.h"
31
32#include <sys/asm_linkage.h>
33#include <sys/mmu.h>
34#include <vm/hat_sfmmu.h>
35#include <sys/machparam.h>
36#include <sys/machcpuvar.h>
37#include <sys/machthread.h>
38#include <sys/machtrap.h>
39#include <sys/privregs.h>
40#include <sys/trap.h>
41#include <sys/cheetahregs.h>
42#include <sys/us3_module.h>
43#include <sys/xc_impl.h>
44#include <sys/intreg.h>
45#include <sys/async.h>
46#include <sys/clock.h>
47#include <sys/cheetahasm.h>
48#include <sys/cmpregs.h>
49
50#ifdef TRAPTRACE
51#include <sys/traptrace.h>
52#endif /* TRAPTRACE */
53
54/* BEGIN CSTYLED */
55
56#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
57	ldxa	[%g0]ASI_DCU, tmp1					;\
58	btst	DCU_DC, tmp1		/* is dcache enabled? */	;\
59	bz,pn	%icc, 1f						;\
60	ASM_LD(tmp1, dcache_linesize)					;\
61	ASM_LD(tmp2, dflush_type)					;\
62	cmp	tmp2, FLUSHPAGE_TYPE					;\
63	be,pt	%icc, 2f						;\
64	nop								;\
65	sllx	arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */	;\
66	ASM_LD(tmp3, dcache_size)					;\
67	cmp	tmp2, FLUSHMATCH_TYPE					;\
68	be,pt	%icc, 3f						;\
69	nop								;\
70	/*								\
71	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
72	 * tmp3 = cache size						\
73	 * tmp1 = cache line size					\
74	 */								\
75	sub	tmp3, tmp1, tmp2					;\
764:									\
77	stxa	%g0, [tmp2]ASI_DC_TAG					;\
78	membar	#Sync							;\
79	cmp	%g0, tmp2						;\
80	bne,pt	%icc, 4b						;\
81	sub	tmp2, tmp1, tmp2					;\
82	ba,pt	%icc, 1f						;\
83	nop								;\
84	/*								\
85	 * flushtype = FLUSHPAGE_TYPE					\
86	 * arg1 = pfn							\
87	 * arg2 = virtual color						\
88	 * tmp1 = cache line size					\
89	 * tmp2 = tag from cache					\
90	 * tmp3 = counter						\
91	 */								\
922:									\
93	set	MMU_PAGESIZE, tmp3					;\
94        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA	   */   ;\
95	sub	tmp3, tmp1, tmp3					;\
964:									\
97	stxa	%g0, [arg1 + tmp3]ASI_DC_INVAL				;\
98	membar	#Sync							;\
995:									\
100	cmp	%g0, tmp3						;\
101	bnz,pt	%icc, 4b		/* branch if not done */	;\
102	sub	tmp3, tmp1, tmp3					;\
103	ba,pt	%icc, 1f						;\
104	nop								;\
105	/*								\
106	 * flushtype = FLUSHMATCH_TYPE					\
107	 * arg1 = tag to compare against				\
108	 * tmp1 = cache line size					\
109	 * tmp3 = cache size						\
110	 * arg2 = counter						\
111	 * tmp2 = cache tag						\
112	 */								\
1133:									\
114	sub	tmp3, tmp1, arg2					;\
1154:									\
116	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
117	btst	CHEETAH_DC_VBIT_MASK, tmp2				;\
118	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
119	andn	tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */	;\
120	cmp	tmp2, arg1						;\
121	bne,pn	%icc, 5f		/* branch if tag miss */	;\
122	nop								;\
123	stxa	%g0, [arg2]ASI_DC_TAG					;\
124	membar	#Sync							;\
1255:									\
126	cmp	%g0, arg2						;\
127	bne,pt	%icc, 4b		/* branch if not done */	;\
128	sub	arg2, tmp1, arg2					;\
1291:
130
131/*
132 * macro that flushes the entire dcache color
133 * dcache size = 64K, one way 16K
134 *
135 * In:
136 *    arg = virtual color register (not clobbered)
137 *    way = way#, can either be a constant or a register (not clobbered)
138 *    tmp1, tmp2, tmp3 = scratch registers
139 *
140 */
141#define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3)			\
142	ldxa	[%g0]ASI_DCU, tmp1;					\
143	btst	DCU_DC, tmp1;		/* is dcache enabled? */	\
144	bz,pn	%icc, 1f;						\
145	ASM_LD(tmp1, dcache_linesize)					\
146	/*								\
147	 * arg = virtual color						\
148	 * tmp1 = cache line size					\
149	 */								\
150	sllx	arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */	\
151	mov	way, tmp3;						\
152	sllx	tmp3, 14, tmp3;		  /* One way 16K */		\
153	or	tmp2, tmp3, tmp3;					\
154	set	MMU_PAGESIZE, tmp2;					\
155	/*								\
156	 * tmp2 = page size						\
157	 * tmp3 =  cached page in dcache				\
158	 */								\
159	sub	tmp2, tmp1, tmp2;					\
1602:									\
161	stxa	%g0, [tmp3 + tmp2]ASI_DC_TAG;				\
162	membar	#Sync;							\
163	cmp	%g0, tmp2;						\
164	bne,pt	%icc, 2b;						\
165	sub	tmp2, tmp1, tmp2;					\
1661:
167
168/* END CSTYLED */
169
170/*
171 * Cheetah MMU and Cache operations.
172 */
173
174	ENTRY_NP(vtag_flushpage)
175	/*
176	 * flush page from the tlb
177	 *
178	 * %o0 = vaddr
179	 * %o1 = sfmmup
180	 */
181	rdpr	%pstate, %o5
182#ifdef DEBUG
183	PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
184#endif /* DEBUG */
185	/*
186	 * disable ints
187	 */
188	andn	%o5, PSTATE_IE, %o4
189	wrpr	%o4, 0, %pstate
190
191	/*
192	 * Then, blow out the tlb
193	 * Interrupts are disabled to prevent the primary ctx register
194	 * from changing underneath us.
195	 */
196	sethi   %hi(ksfmmup), %o3
197        ldx     [%o3 + %lo(ksfmmup)], %o3
198        cmp     %o3, %o1
199        bne,pt   %xcc, 1f			! if not kernel as, go to 1
200	  sethi	%hi(FLUSH_ADDR), %o3
201	/*
202	 * For Kernel demaps use primary. type = page implicitly
203	 */
204	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
205	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
206	flush	%o3
207	retl
208	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
2091:
210	/*
211	 * User demap.  We need to set the primary context properly.
212	 * Secondary context cannot be used for Cheetah IMMU.
213	 * %o0 = vaddr
214	 * %o1 = sfmmup
215	 * %o3 = FLUSH_ADDR
216	 */
217	SFMMU_CPU_CNUM(%o1, %g1, %g2)		! %g1 = sfmmu cnum on this CPU
218
219	ldub	[%o1 + SFMMU_CEXT], %o4		! %o4 = sfmmup->sfmmu_cext
220	sll	%o4, CTXREG_EXT_SHIFT, %o4
221	or	%g1, %o4, %g1			! %g1 = primary pgsz | cnum
222
223	wrpr	%g0, 1, %tl
224	set	MMU_PCONTEXT, %o4
225	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
226	ldxa	[%o4]ASI_DMMU, %o2		! %o2 = save old ctxnum
227	srlx	%o2, CTXREG_NEXT_SHIFT, %o1	! need to preserve nucleus pgsz
228	sllx	%o1, CTXREG_NEXT_SHIFT, %o1	! %o1 = nucleus pgsz
229	or	%g1, %o1, %g1			! %g1 = nucleus pgsz | primary pgsz | cnum
230	stxa	%g1, [%o4]ASI_DMMU		! wr new ctxum
231
232	stxa	%g0, [%o0]ASI_DTLB_DEMAP
233	stxa	%g0, [%o0]ASI_ITLB_DEMAP
234	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
235	flush	%o3
236	wrpr	%g0, 0, %tl
237
238	retl
239	wrpr	%g0, %o5, %pstate		/* enable interrupts */
240	SET_SIZE(vtag_flushpage)
241
242	ENTRY_NP2(vtag_flushall, demap_all)
243	/*
244	 * flush the tlb
245	 */
246	sethi	%hi(FLUSH_ADDR), %o3
247	set	DEMAP_ALL_TYPE, %g1
248	stxa	%g0, [%g1]ASI_DTLB_DEMAP
249	stxa	%g0, [%g1]ASI_ITLB_DEMAP
250	flush	%o3
251	retl
252	nop
253	SET_SIZE(demap_all)
254	SET_SIZE(vtag_flushall)
255
256
257	ENTRY_NP(vtag_flushpage_tl1)
258	/*
259	 * x-trap to flush page from tlb and tsb
260	 *
261	 * %g1 = vaddr, zero-extended on 32-bit kernel
262	 * %g2 = sfmmup
263	 *
264	 * assumes TSBE_TAG = 0
265	 */
266	srln	%g1, MMU_PAGESHIFT, %g1
267
268	sethi   %hi(ksfmmup), %g3
269        ldx     [%g3 + %lo(ksfmmup)], %g3
270        cmp     %g3, %g2
271        bne,pt	%xcc, 1f                        ! if not kernel as, go to 1
272	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
273
274	/* We need to demap in the kernel context */
275	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
276	stxa	%g0, [%g1]ASI_DTLB_DEMAP
277	stxa	%g0, [%g1]ASI_ITLB_DEMAP
278	retry
2791:
280	/* We need to demap in a user context */
281	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
282
283	SFMMU_CPU_CNUM(%g2, %g6, %g3)	! %g6 = sfmmu cnum on this CPU
284
285	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
286	sll	%g4, CTXREG_EXT_SHIFT, %g4
287	or	%g6, %g4, %g6			! %g6 = pgsz | cnum
288
289	set	MMU_PCONTEXT, %g4
290	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
291	srlx	%g5, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
292	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
293	or	%g6, %g2, %g6			/* %g6 = nucleus pgsz | primary pgsz | cnum */
294	stxa	%g6, [%g4]ASI_DMMU		/* wr new ctxum */
295	stxa	%g0, [%g1]ASI_DTLB_DEMAP
296	stxa	%g0, [%g1]ASI_ITLB_DEMAP
297	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
298	retry
299	SET_SIZE(vtag_flushpage_tl1)
300
301
302	ENTRY_NP(vtag_flush_pgcnt_tl1)
303	/*
304	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
305	 *
306	 * %g1 = vaddr, zero-extended on 32-bit kernel
307	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
308	 *
309	 * NOTE: this handler relies on the fact that no
310	 *	interrupts or traps can occur during the loop
311	 *	issuing the TLB_DEMAP operations. It is assumed
312	 *	that interrupts are disabled and this code is
313	 *	fetching from the kernel locked text address.
314	 *
315	 * assumes TSBE_TAG = 0
316	 */
317	set	SFMMU_PGCNT_MASK, %g4
318	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
319	add	%g3, 1, %g3			/* g3 = pgcnt */
320
321	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
322	srln	%g1, MMU_PAGESHIFT, %g1
323
324	sethi   %hi(ksfmmup), %g4
325        ldx     [%g4 + %lo(ksfmmup)], %g4
326        cmp     %g4, %g2
327        bne,pn   %xcc, 1f			/* if not kernel as, go to 1 */
328	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
329
330	/* We need to demap in the kernel context */
331	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
332	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
333	sethi   %hi(FLUSH_ADDR), %g5
3344:
335	stxa	%g0, [%g1]ASI_DTLB_DEMAP
336	stxa	%g0, [%g1]ASI_ITLB_DEMAP
337	flush	%g5				! flush required by immu
338
339	deccc	%g3				/* decr pgcnt */
340	bnz,pt	%icc,4b
341	  add	%g1, %g2, %g1			/* next page */
342	retry
3431:
344	/*
345	 * We need to demap in a user context
346	 *
347	 * g2 = sfmmup
348	 * g3 = pgcnt
349	 */
350	SFMMU_CPU_CNUM(%g2, %g5, %g6)		! %g5 = sfmmu cnum on this CPU
351
352	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
353
354	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
355	sll	%g4, CTXREG_EXT_SHIFT, %g4
356	or	%g5, %g4, %g5
357
358	set	MMU_PCONTEXT, %g4
359	ldxa	[%g4]ASI_DMMU, %g6		/* rd old ctxnum */
360	srlx	%g6, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
361	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
362	or	%g5, %g2, %g5			/* %g5 = nucleus pgsz | primary pgsz | cnum */
363	stxa	%g5, [%g4]ASI_DMMU		/* wr new ctxum */
364
365	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
366	sethi   %hi(FLUSH_ADDR), %g5
3673:
368	stxa	%g0, [%g1]ASI_DTLB_DEMAP
369	stxa	%g0, [%g1]ASI_ITLB_DEMAP
370	flush	%g5				! flush required by immu
371
372	deccc	%g3				/* decr pgcnt */
373	bnz,pt	%icc,3b
374	  add	%g1, %g2, %g1			/* next page */
375
376	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
377	retry
378	SET_SIZE(vtag_flush_pgcnt_tl1)
379
380	ENTRY_NP(vtag_flushall_tl1)
381	/*
382	 * x-trap to flush tlb
383	 */
384	set	DEMAP_ALL_TYPE, %g4
385	stxa	%g0, [%g4]ASI_DTLB_DEMAP
386	stxa	%g0, [%g4]ASI_ITLB_DEMAP
387	retry
388	SET_SIZE(vtag_flushall_tl1)
389
390
391/*
392 * vac_flushpage(pfnum, color)
393 *	Flush 1 8k page of the D-$ with physical page = pfnum
394 *	Algorithm:
395 *		The cheetah dcache is a 64k psuedo 4 way accaociative cache.
396 *		It is virtual indexed, physically tagged cache.
397 */
398	.seg	".data"
399	.align	8
400	.global	dflush_type
401dflush_type:
402	.word	FLUSHPAGE_TYPE
403
404	ENTRY(vac_flushpage)
405	/*
406	 * flush page from the d$
407	 *
408	 * %o0 = pfnum, %o1 = color
409	 */
410	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
411	retl
412	  nop
413	SET_SIZE(vac_flushpage)
414
415
416	ENTRY_NP(vac_flushpage_tl1)
417	/*
418	 * x-trap to flush page from the d$
419	 *
420	 * %g1 = pfnum, %g2 = color
421	 */
422	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
423	retry
424	SET_SIZE(vac_flushpage_tl1)
425
426
427	ENTRY(vac_flushcolor)
428	/*
429	 * %o0 = vcolor
430	 */
431	DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
432	DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
433	DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
434	DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
435	retl
436	  nop
437	SET_SIZE(vac_flushcolor)
438
439
440	ENTRY(vac_flushcolor_tl1)
441	/*
442	 * %g1 = vcolor
443	 */
444	DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
445	DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
446	DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
447	DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
448	retry
449	SET_SIZE(vac_flushcolor_tl1)
450
451/*
452 * Determine whether or not the IDSR is busy.
453 * Entry: no arguments
454 * Returns: 1 if busy, 0 otherwise
455 */
456	ENTRY(idsr_busy)
457	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
458	clr	%o0
459	btst	IDSR_BUSY, %g1
460	bz,a,pt	%xcc, 1f
461	mov	1, %o0
4621:
463	retl
464	nop
465	SET_SIZE(idsr_busy)
466
467	.global _dispatch_status_busy
468_dispatch_status_busy:
469	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
470	.align	4
471
472/*
473 * Setup interrupt dispatch data registers
474 * Entry:
475 *	%o0 - function or inumber to call
476 *	%o1, %o2 - arguments (2 uint64_t's)
477 */
478	.seg "text"
479
480	ENTRY(init_mondo)
481#ifdef DEBUG
482	!
483	! IDSR should not be busy at the moment
484	!
485	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
486	btst	IDSR_BUSY, %g1
487	bz,pt	%xcc, 1f
488	nop
489	sethi	%hi(_dispatch_status_busy), %o0
490	call	panic
491	or	%o0, %lo(_dispatch_status_busy), %o0
492#endif /* DEBUG */
493
494	ALTENTRY(init_mondo_nocheck)
495	!
496	! interrupt vector dispatch data reg 0
497	!
4981:
499	mov	IDDR_0, %g1
500	mov	IDDR_1, %g2
501	mov	IDDR_2, %g3
502	stxa	%o0, [%g1]ASI_INTR_DISPATCH
503
504	!
505	! interrupt vector dispatch data reg 1
506	!
507	stxa	%o1, [%g2]ASI_INTR_DISPATCH
508
509	!
510	! interrupt vector dispatch data reg 2
511	!
512	stxa	%o2, [%g3]ASI_INTR_DISPATCH
513
514	membar	#Sync
515	retl
516	nop
517	SET_SIZE(init_mondo_nocheck)
518	SET_SIZE(init_mondo)
519
520
521#if !(defined(JALAPENO) || defined(SERRANO))
522
523/*
524 * Ship mondo to aid using busy/nack pair bn
525 */
526	ENTRY_NP(shipit)
527	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = agent id
528	sll	%o1, IDCR_BN_SHIFT, %g2		! IDCR<28:24> = b/n pair
529	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
530	or	%g1, %g2, %g1
531	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
532	membar	#Sync
533	retl
534	nop
535	SET_SIZE(shipit)
536
537#endif	/* !(JALAPENO || SERRANO) */
538
539
540/*
541 * flush_instr_mem:
542 *	Flush 1 page of the I-$ starting at vaddr
543 * 	%o0 vaddr
544 *	%o1 bytes to be flushed
545 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
546 * the stores from all processors so that a FLUSH instruction is only needed
547 * to ensure pipeline is consistent. This means a single flush is sufficient at
548 * the end of a sequence of stores that updates the instruction stream to
549 * ensure correct operation.
550 */
551
552	ENTRY(flush_instr_mem)
553	flush	%o0			! address irrelevant
554	retl
555	nop
556	SET_SIZE(flush_instr_mem)
557
558
559#if defined(CPU_IMP_ECACHE_ASSOC)
560
561	ENTRY(get_ecache_ctrl)
562	GET_CPU_IMPL(%o0)
563	cmp	%o0, JAGUAR_IMPL
564	!
565	! Putting an ASI access in the delay slot may
566	! cause it to be accessed, even when annulled.
567	!
568	bne	1f
569	  nop
570	ldxa	[%g0]ASI_EC_CFG_TIMING, %o0	! read Jaguar shared E$ ctrl reg
571	b	2f
572	  nop
5731:
574	ldxa	[%g0]ASI_EC_CTRL, %o0		! read Ch/Ch+ E$ control reg
5752:
576	retl
577	  nop
578	SET_SIZE(get_ecache_ctrl)
579
580#endif	/* CPU_IMP_ECACHE_ASSOC */
581
582
583#if !(defined(JALAPENO) || defined(SERRANO))
584
585/*
586 * flush_ecache:
587 *	%o0 - 64 bit physical address
588 *	%o1 - ecache size
589 *	%o2 - ecache linesize
590 */
591
592	ENTRY(flush_ecache)
593
594	/*
595	 * For certain CPU implementations, we have to flush the L2 cache
596	 * before flushing the ecache.
597	 */
598	PN_L2_FLUSHALL(%g3, %g4, %g5)
599
600	/*
601	 * Flush the entire Ecache using displacement flush.
602	 */
603	ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
604
605	retl
606	nop
607	SET_SIZE(flush_ecache)
608
609#endif	/* !(JALAPENO || SERRANO) */
610
611
612	ENTRY(flush_dcache)
613	ASM_LD(%o0, dcache_size)
614	ASM_LD(%o1, dcache_linesize)
615	CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
616	retl
617	nop
618	SET_SIZE(flush_dcache)
619
620
621	ENTRY(flush_icache)
622	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
623	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
624	ba,pt	%icc, 2f
625	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
626flush_icache_1:
627	ASM_LD(%o0, icache_size)
628	ASM_LD(%o1, icache_linesize)
6292:
630	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
631	retl
632	nop
633	SET_SIZE(flush_icache)
634
635	ENTRY(kdi_flush_idcache)
636	CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
637	CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
638	membar	#Sync
639	retl
640	nop
641	SET_SIZE(kdi_flush_idcache)
642
643	ENTRY(flush_pcache)
644	PCACHE_FLUSHALL(%o0, %o1, %o2)
645	retl
646	nop
647	SET_SIZE(flush_pcache)
648
649
650#if defined(CPU_IMP_L1_CACHE_PARITY)
651
652/*
653 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
654 * structure (see cheetahregs.h):
655 * The Dcache *should* be turned off when this code is executed.
656 */
657	.align	128
658	ENTRY(get_dcache_dtag)
659	rdpr	%pstate, %o5
660	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
661	wrpr	%g0, %o3, %pstate
662	b	1f
663	  stx	%o0, [%o1 + CH_DC_IDX]
664
665	.align	128
6661:
667	ldxa	[%o0]ASI_DC_TAG, %o2
668	stx	%o2, [%o1 + CH_DC_TAG]
669	membar	#Sync
670	ldxa	[%o0]ASI_DC_UTAG, %o2
671	membar	#Sync
672	stx	%o2, [%o1 + CH_DC_UTAG]
673	ldxa	[%o0]ASI_DC_SNP_TAG, %o2
674	stx	%o2, [%o1 + CH_DC_SNTAG]
675	add	%o1, CH_DC_DATA, %o1
676	clr	%o3
6772:
678	membar	#Sync				! required before ASI_DC_DATA
679	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
680	membar	#Sync				! required after ASI_DC_DATA
681	stx	%o2, [%o1 + %o3]
682	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
683	blt	2b
684	  add	%o3, 8, %o3
685
686	/*
687	 * Unlike other CPUs in the family, D$ data parity bits for Panther
688	 * do not reside in the microtag. Instead, we have to read them
689	 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
690	 * of just having 8 parity bits to protect all 32 bytes of data
691	 * per line, we now have 32 bits of parity.
692	 */
693	GET_CPU_IMPL(%o3)
694	cmp	%o3, PANTHER_IMPL
695	bne	4f
696	  clr	%o3
697
698	/*
699	 * move our pointer to the next field where we store parity bits
700	 * and add the offset of the last parity byte since we will be
701	 * storing all 4 parity bytes within one 64 bit field like this:
702	 *
703	 * +------+------------+------------+------------+------------+
704	 * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
705	 * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
706	 * +------+------------+------------+------------+------------+
707	 *  63:32     31:24        23:16         15:8          7:0
708	 */
709	add	%o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
710
711	/* add the DC_data_parity bit into our working index */
712	mov	1, %o2
713	sll	%o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
714	or	%o0, %o2, %o0
7153:
716	membar	#Sync				! required before ASI_DC_DATA
717	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
718	membar	#Sync				! required after ASI_DC_DATA
719	stb	%o2, [%o1]
720	dec	%o1
721	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
722	blt	3b
723	  add	%o3, 8, %o3
7244:
725	retl
726	  wrpr	%g0, %o5, %pstate
727	SET_SIZE(get_dcache_dtag)
728
729
730/*
731 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
732 * structure (see cheetahregs.h):
733 * The Icache *Must* be turned off when this function is called.
734 * This is because diagnostic accesses to the Icache interfere with cache
735 * consistency.
736 */
737	.align	128
738	ENTRY(get_icache_dtag)
739	rdpr	%pstate, %o5
740	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
741	wrpr	%g0, %o3, %pstate
742
743	stx	%o0, [%o1 + CH_IC_IDX]
744	ldxa	[%o0]ASI_IC_TAG, %o2
745	stx	%o2, [%o1 + CH_IC_PATAG]
746	add	%o0, CH_ICTAG_UTAG, %o0
747	ldxa	[%o0]ASI_IC_TAG, %o2
748	add	%o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
749	stx	%o2, [%o1 + CH_IC_UTAG]
750	ldxa	[%o0]ASI_IC_TAG, %o2
751	add	%o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
752	stx	%o2, [%o1 + CH_IC_UPPER]
753	ldxa	[%o0]ASI_IC_TAG, %o2
754	andn	%o0, CH_ICTAG_TMASK, %o0
755	stx	%o2, [%o1 + CH_IC_LOWER]
756	ldxa	[%o0]ASI_IC_SNP_TAG, %o2
757	stx	%o2, [%o1 + CH_IC_SNTAG]
758	add	%o1, CH_IC_DATA, %o1
759	clr	%o3
7602:
761	ldxa	[%o0 + %o3]ASI_IC_DATA, %o2
762	stx	%o2, [%o1 + %o3]
763	cmp	%o3, PN_IC_DATA_REG_SIZE - 8
764	blt	2b
765	  add	%o3, 8, %o3
766
767	retl
768	  wrpr	%g0, %o5, %pstate
769	SET_SIZE(get_icache_dtag)
770
771/*
772 * Get pcache data and tags.
773 * inputs:
774 *   pcache_idx	- fully constructed VA for for accessing P$ diagnostic
775 *		  registers. Contains PC_way and PC_addr shifted into
776 *		  the correct bit positions. See the PRM for more details.
777 *   data	- pointer to a ch_pc_data_t
778 * structure (see cheetahregs.h):
779 */
780	.align	128
781	ENTRY(get_pcache_dtag)
782	rdpr	%pstate, %o5
783	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
784	wrpr	%g0, %o3, %pstate
785
786	stx	%o0, [%o1 + CH_PC_IDX]
787	ldxa	[%o0]ASI_PC_STATUS_DATA, %o2
788	stx	%o2, [%o1 + CH_PC_STATUS]
789	ldxa	[%o0]ASI_PC_TAG, %o2
790	stx	%o2, [%o1 + CH_PC_TAG]
791	ldxa	[%o0]ASI_PC_SNP_TAG, %o2
792	stx	%o2, [%o1 + CH_PC_SNTAG]
793	add	%o1, CH_PC_DATA, %o1
794	clr	%o3
7952:
796	ldxa	[%o0 + %o3]ASI_PC_DATA, %o2
797	stx	%o2, [%o1 + %o3]
798	cmp	%o3, CH_PC_DATA_REG_SIZE - 8
799	blt	2b
800	  add	%o3, 8, %o3
801
802	retl
803	  wrpr	%g0, %o5, %pstate
804	SET_SIZE(get_pcache_dtag)
805
806#endif	/* CPU_IMP_L1_CACHE_PARITY */
807
808/*
809 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
810 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
811 *   %o0 - 64 bit constant
812 */
813	ENTRY(set_dcu)
814	stxa	%o0, [%g0]ASI_DCU	! Store to DCU
815	flush	%g0	/* flush required after changing the IC bit */
816	retl
817	nop
818	SET_SIZE(set_dcu)
819
820
821/*
822 * Return DCU register.
823 */
824	ENTRY(get_dcu)
825	ldxa	[%g0]ASI_DCU, %o0		/* DCU control register */
826	retl
827	nop
828	SET_SIZE(get_dcu)
829
830/*
831 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
832 *
833 * This handler is used to check for softints generated by error trap
834 * handlers to report errors.  On Cheetah, this mechanism is used by the
835 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
836 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
837 * NB: Must be 8 instructions or less to fit in trap table and code must
838 *     be relocatable.
839 */
840
841	ENTRY_NP(ch_pil15_interrupt_instr)
842	ASM_JMP(%g1, ch_pil15_interrupt)
843	SET_SIZE(ch_pil15_interrupt_instr)
844
845
846	ENTRY_NP(ch_pil15_interrupt)
847
848	/*
849	 * Since pil_interrupt is hacked to assume that every level 15
850	 * interrupt is generated by the CPU to indicate a performance
851	 * counter overflow this gets ugly.  Before calling pil_interrupt
852	 * the Error at TL>0 pending status is inspected.  If it is
853	 * non-zero, then an error has occurred and it is handled.
854	 * Otherwise control is transfered to pil_interrupt.  Note that if
855	 * an error is detected pil_interrupt will not be called and
856	 * overflow interrupts may be lost causing erroneous performance
857	 * measurements.  However, error-recovery will have a detrimental
858	 * effect on performance anyway.
859	 */
860	CPU_INDEX(%g1, %g4)
861	set	ch_err_tl1_pending, %g4
862	ldub	[%g1 + %g4], %g2
863	brz	%g2, 1f
864	  nop
865
866	/*
867	 * We have a pending TL>0 error, clear the TL>0 pending status.
868	 */
869	stb	%g0, [%g1 + %g4]
870
871	/*
872	 * Clear the softint.
873	 */
874	mov	1, %g5
875	sll	%g5, PIL_15, %g5
876	wr	%g5, CLEAR_SOFTINT
877
878	/*
879	 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
880	 * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
881	 * panic flag (%g2).
882	 */
883	set	cpu_tl1_error, %g1
884	clr	%g2
885	ba	sys_trap
886	  mov	PIL_15, %g4
887
8881:
889	/*
890	 * The logout is invalid.
891	 *
892	 * Call the default interrupt handler.
893	 */
894	sethi	%hi(pil_interrupt), %g1
895	jmp	%g1 + %lo(pil_interrupt)
896	  mov	PIL_15, %g4
897
898	SET_SIZE(ch_pil15_interrupt)
899
900
901/*
902 * Error Handling
903 *
904 * Cheetah provides error checking for all memory access paths between
905 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
906 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
907 * AFAR and one of the following traps is generated (provided that it
908 * is enabled in External Cache Error Enable Register) to handle that
909 * error:
910 * 1. trap 0x70: Precise trap
911 *    tt0_fecc for errors at trap level(TL)>=0
912 * 2. trap 0x0A and 0x32: Deferred trap
913 *    async_err for errors at TL>=0
914 * 3. trap 0x63: Disrupting trap
915 *    ce_err for errors at TL=0
916 *    (Note that trap 0x63 cannot happen at trap level > 0)
917 *
918 * Trap level one handlers panic the system except for the fast ecc
919 * error handler which tries to recover from certain errors.
920 */
921
922/*
923 * FAST ECC TRAP STRATEGY:
924 *
925 * Software must handle single and multi bit errors which occur due to data
926 * or instruction cache reads from the external cache. A single or multi bit
927 * error occuring in one of these situations results in a precise trap.
928 *
929 * The basic flow of this trap handler is as follows:
930 *
931 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
932 *    is disabled because bad data could have been installed.  The Icache is
933 *    turned off because we want to capture the Icache line related to the
934 *    AFAR.
935 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
936 * 3) Park sibling core if caches are shared (to avoid race condition while
937 *    accessing shared resources such as L3 data staging register during
938 *    CPU logout.
939 * 4) Read the AFAR and AFSR.
940 * 5) If CPU logout structure is not being used, then:
941 *    6) Clear all errors from the AFSR.
942 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
943 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
944 *       state.
945 *    9) Unpark sibling core if we parked it earlier.
946 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
947 *        running at PIL 15.
948 * 6) Otherwise, if CPU logout structure is being used:
949 *    7) Incriment the "logout busy count".
950 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
951 *       state.
952 *    9) Unpark sibling core if we parked it earlier.
953 *    10) Issue a retry since the other CPU error logging code will end up
954 *       finding this error bit and logging information about it later.
955 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
956 *    yet initialized such that we can't even check the logout struct, then
957 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
958 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
959 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
960 *    in the high level trap handler since we don't have access to detailed
961 *    logout information in cases where the cpu_private struct is not yet
962 *    initialized.
963 *
964 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
965 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
966 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
967 * since it is uses different code/data from this handler, has a better
968 * chance of fixing things up than simply recursing through this code
969 * again (this would probably cause an eventual kernel stack overflow).
970 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
971 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
972 * the Fast ECC at TL>0 handler and eventually Red Mode.
973 *
974 * Note that for Cheetah (and only Cheetah), we use alias addresses for
975 * flushing rather than ASI accesses (which don't exist on Cheetah).
976 * Should we encounter a Fast ECC error within this handler on Cheetah,
977 * there's a good chance it's within the ecache_flushaddr buffer (since
978 * it's the largest piece of memory we touch in the handler and it is
979 * usually kernel text/data).  For that reason the Fast ECC at TL>0
980 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
981 */
982
983/*
984 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
985 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
986 * architecture-specific files.
987 * NB: Must be 8 instructions or less to fit in trap table and code must
988 *     be relocatable.
989 */
990
991	ENTRY_NP(fecc_err_instr)
992	membar	#Sync			! Cheetah requires membar #Sync
993
994	/*
995	 * Save current DCU state.  Turn off the Dcache and Icache.
996	 */
997	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
998	andn	%g1, DCU_DC + DCU_IC, %g4
999	stxa	%g4, [%g0]ASI_DCU
1000	flush	%g0	/* flush required after changing the IC bit */
1001
1002	ASM_JMP(%g4, fast_ecc_err)
1003	SET_SIZE(fecc_err_instr)
1004
1005
1006#if !(defined(JALAPENO) || defined(SERRANO))
1007
1008	.section ".text"
1009	.align	64
1010	ENTRY_NP(fast_ecc_err)
1011
1012	/*
1013	 * Turn off CEEN and NCEEN.
1014	 */
1015	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1016	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1017	stxa	%g4, [%g0]ASI_ESTATE_ERR
1018	membar	#Sync			! membar sync required
1019
1020	/*
1021	 * Check to see whether we need to park our sibling core
1022	 * before recording diagnostic information from caches
1023	 * which may be shared by both cores.
1024	 * We use %g1 to store information about whether or not
1025	 * we had to park the core (%g1 holds our DCUCR value and
1026	 * we only use bits from that register which are "reserved"
1027	 * to keep track of core parking) so that we know whether
1028	 * or not to unpark later. %g5 and %g4 are scratch registers.
1029	 */
1030	PARK_SIBLING_CORE(%g1, %g5, %g4)
1031
1032	/*
1033	 * Do the CPU log out capture.
1034	 *   %g3 = "failed?" return value.
1035	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1036	 *         into this macro via %g4. Output only valid if cpu_private
1037	 *         struct has not been initialized.
1038	 *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1039	 *   %g4 = Trap information stored in the cpu logout flags field
1040	 *   %g5 = scr1
1041	 *   %g6 = scr2
1042	 *   %g3 = scr3
1043	 *   %g4 = scr4
1044	 */
1045	 /* store the CEEN and NCEEN values, TL=0 */
1046	and	%g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1047	set	CHPR_FECCTL0_LOGOUT, %g6
1048	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1049
1050	/*
1051	 * Flush the Ecache (and L2 cache for Panther) to get the error out
1052	 * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1053	 * following flush will turn that into a WDC or WDU, respectively.
1054	 */
1055	PN_L2_FLUSHALL(%g4, %g5, %g6)
1056
1057	CPU_INDEX(%g4, %g5)
1058	mulx	%g4, CPU_NODE_SIZE, %g4
1059	set	cpunodes, %g5
1060	add	%g4, %g5, %g4
1061	ld	[%g4 + ECACHE_LINESIZE], %g5
1062	ld	[%g4 + ECACHE_SIZE], %g4
1063
1064	ASM_LDX(%g6, ecache_flushaddr)
1065	ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1066
1067	/*
1068	 * Flush the Dcache.  Since bad data could have been installed in
1069	 * the Dcache we must flush it before re-enabling it.
1070	 */
1071	ASM_LD(%g5, dcache_size)
1072	ASM_LD(%g6, dcache_linesize)
1073	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1074
1075	/*
1076	 * Flush the Icache.  Since we turned off the Icache to capture the
1077	 * Icache line it is now stale or corrupted and we must flush it
1078	 * before re-enabling it.
1079	 */
1080	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1081	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1082	ba,pt	%icc, 6f
1083	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1084fast_ecc_err_5:
1085	ASM_LD(%g5, icache_size)
1086	ASM_LD(%g6, icache_linesize)
10876:
1088	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1089
1090	/*
1091	 * check to see whether we parked our sibling core at the start
1092	 * of this handler. If so, we need to unpark it here.
1093	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1094	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1095	 */
1096	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1097
1098	/*
1099	 * Restore the Dcache and Icache to the previous state.
1100	 */
1101	stxa	%g1, [%g0]ASI_DCU
1102	flush	%g0	/* flush required after changing the IC bit */
1103
1104	/*
1105	 * Make sure our CPU logout operation was successful.
1106	 */
1107	cmp	%g3, %g0
1108	be	8f
1109	  nop
1110
1111	/*
1112	 * If the logout structure had been busy, how many times have
1113	 * we tried to use it and failed (nesting count)? If we have
1114	 * already recursed a substantial number of times, then we can
1115	 * assume things are not going to get better by themselves and
1116	 * so it would be best to panic.
1117	 */
1118	cmp	%g3, CLO_NESTING_MAX
1119	blt	7f
1120	  nop
1121
1122        call ptl1_panic
1123          mov   PTL1_BAD_ECC, %g1
1124
11257:
1126	/*
1127	 * Otherwise, if the logout structure was busy but we have not
1128	 * nested more times than our maximum value, then we simply
1129	 * issue a retry. Our TL=0 trap handler code will check and
1130	 * clear the AFSR after it is done logging what is currently
1131	 * in the logout struct and handle this event at that time.
1132	 */
1133	retry
11348:
1135	/*
1136	 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1137	 * already at PIL 15.
1138	 */
1139	set	cpu_fast_ecc_error, %g1
1140	rdpr	%pil, %g4
1141	cmp	%g4, PIL_14
1142	ba	sys_trap
1143	  movl	%icc, PIL_14, %g4
1144
1145	SET_SIZE(fast_ecc_err)
1146
1147#endif	/* !(JALAPENO || SERRANO) */
1148
1149
1150/*
1151 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1152 *
1153 * The basic flow of this trap handler is as follows:
1154 *
1155 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1156 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1157 *    will use to save %g1 and %g2.
1158 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1159 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1160 *    handler (using the just saved %g1).
1161 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1162 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1163 *    NB: we don't turn off the Icache because bad data is not installed nor
1164 *        will we be doing any diagnostic accesses.
1165 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1166 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1167 *    %tpc, %tnpc, %tstate values previously saved).
1168 * 6) set %tl to %tl - 1.
1169 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1170 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1171 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1172 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1173 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1174 *    AFSR_EXT and save the value in ch_err_tl1_data.
1175 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1176 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1177 * 11) Flush the Ecache.
1178 *    NB: the Ecache is flushed assuming the largest possible size with
1179 *        the smallest possible line size since access to the cpu_nodes may
1180 *        cause an unrecoverable DTLB miss.
1181 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1182 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1183 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1184 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1185 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1186 * 14) Flush and re-enable the Dcache if it was on at step 3.
1187 * 15) Do TRAPTRACE if enabled.
1188 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1189 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1190 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1191 *    event pending flag and call cpu_tl1_error via systrap if set.
1192 * 19) Restore the registers from step 5 and issue retry.
1193 */
1194
1195/*
1196 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1197 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1198 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1199 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1200 * NB: Must be 8 instructions or less to fit in trap table and code must
1201 *     be relocatable.
1202 */
1203
1204	ENTRY_NP(fecc_err_tl1_instr)
1205	CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1206	SET_SIZE(fecc_err_tl1_instr)
1207
1208/*
1209 * Software trap 0 at TL>0.
1210 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1211 * the various architecture-specific files.  This is used as a continuation
1212 * of the fast ecc handling where we've bought an extra TL level, so we can
1213 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1214 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1215 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1216 * order two bits from %g1 and %g2 respectively).
1217 * NB: Must be 8 instructions or less to fit in trap table and code must
1218 *     be relocatable.
1219 */
1220
1221	ENTRY_NP(fecc_err_tl1_cont_instr)
1222	CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1223	SET_SIZE(fecc_err_tl1_cont_instr)
1224
1225
1226/*
1227 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1228 *
1229 * AFSR errors bits which cause this trap are:
1230 *	CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1231 *
1232 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1233 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1234 *
1235 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1236 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1237 *
1238 * Cheetah+ also handles (No additional processing required):
1239 *    DUE, DTO, DBERR	(NCEEN controlled)
1240 *    THCE		(CEEN and ET_ECC_en controlled)
1241 *    TUE		(ET_ECC_en controlled)
1242 *
1243 * Panther further adds:
1244 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1245 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1246 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1247 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1248 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1249 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1250 *
1251 * Steps:
1252 *	1. Disable hardware corrected disrupting errors only (CEEN)
1253 *	2. Park sibling core if caches are shared (to avoid race
1254 *	   condition while accessing shared resources such as L3
1255 *	   data staging register during CPU logout.
1256 *	3. If the CPU logout structure is not currently being used:
1257 *		4. Clear AFSR error bits
1258 *		5. Capture Ecache, Dcache and Icache lines associated
1259 *		   with AFAR.
1260 *		6. Unpark sibling core if we parked it earlier.
1261 *		7. call cpu_disrupting_error via sys_trap at PIL 14
1262 *		   unless we're already running at PIL 15.
1263 *	4. Otherwise, if the CPU logout structure is busy:
1264 *		5. Incriment "logout busy count" and place into %g3
1265 *		6. Unpark sibling core if we parked it earlier.
1266 *		7. Issue a retry since the other CPU error logging
1267 *		   code will end up finding this error bit and logging
1268 *		   information about it later.
1269 *	5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1270 *         not yet initialized such that we can't even check the logout
1271 *         struct, then we place the clo_flags data into %g2
1272 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1273 *         systrap. The clo_flags parameter is used to determine information
1274 *         such as TL, TT, CEEN settings, etc in the high level trap
1275 *         handler since we don't have access to detailed logout information
1276 *         in cases where the cpu_private struct is not yet initialized.
1277 *
1278 * %g3: [ logout busy count ] - arg #2
1279 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1280 */
1281
1282	.align	128
1283	ENTRY_NP(ce_err)
1284	membar	#Sync			! Cheetah requires membar #Sync
1285
1286	/*
1287	 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1288	 * to prevent recursion.
1289	 */
1290	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1291	bclr	EN_REG_CEEN, %g1
1292	stxa	%g1, [%g0]ASI_ESTATE_ERR
1293	membar	#Sync			! membar sync required
1294
1295	/*
1296	 * Save current DCU state.  Turn off Icache to allow capture of
1297	 * Icache data by DO_CPU_LOGOUT.
1298	 */
1299	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1300	andn	%g1, DCU_IC, %g4
1301	stxa	%g4, [%g0]ASI_DCU
1302	flush	%g0	/* flush required after changing the IC bit */
1303
1304	/*
1305	 * Check to see whether we need to park our sibling core
1306	 * before recording diagnostic information from caches
1307	 * which may be shared by both cores.
1308	 * We use %g1 to store information about whether or not
1309	 * we had to park the core (%g1 holds our DCUCR value and
1310	 * we only use bits from that register which are "reserved"
1311	 * to keep track of core parking) so that we know whether
1312	 * or not to unpark later. %g5 and %g4 are scratch registers.
1313	 */
1314	PARK_SIBLING_CORE(%g1, %g5, %g4)
1315
1316	/*
1317	 * Do the CPU log out capture.
1318	 *   %g3 = "failed?" return value.
1319	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1320	 *         into this macro via %g4. Output only valid if cpu_private
1321	 *         struct has not been initialized.
1322	 *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1323	 *   %g4 = Trap information stored in the cpu logout flags field
1324	 *   %g5 = scr1
1325	 *   %g6 = scr2
1326	 *   %g3 = scr3
1327	 *   %g4 = scr4
1328	 */
1329	clr	%g4			! TL=0 bit in afsr
1330	set	CHPR_CECC_LOGOUT, %g6
1331	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1332
1333	/*
1334	 * Flush the Icache.  Since we turned off the Icache to capture the
1335	 * Icache line it is now stale or corrupted and we must flush it
1336	 * before re-enabling it.
1337	 */
1338	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1339	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1340	ba,pt	%icc, 2f
1341	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1342ce_err_1:
1343	ASM_LD(%g5, icache_size)
1344	ASM_LD(%g6, icache_linesize)
13452:
1346	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1347
1348	/*
1349	 * check to see whether we parked our sibling core at the start
1350	 * of this handler. If so, we need to unpark it here.
1351	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1352	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1353	 */
1354	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1355
1356	/*
1357	 * Restore Icache to previous state.
1358	 */
1359	stxa	%g1, [%g0]ASI_DCU
1360	flush	%g0	/* flush required after changing the IC bit */
1361
1362	/*
1363	 * Make sure our CPU logout operation was successful.
1364	 */
1365	cmp	%g3, %g0
1366	be	4f
1367	  nop
1368
1369	/*
1370	 * If the logout structure had been busy, how many times have
1371	 * we tried to use it and failed (nesting count)? If we have
1372	 * already recursed a substantial number of times, then we can
1373	 * assume things are not going to get better by themselves and
1374	 * so it would be best to panic.
1375	 */
1376	cmp	%g3, CLO_NESTING_MAX
1377	blt	3f
1378	  nop
1379
1380        call ptl1_panic
1381          mov   PTL1_BAD_ECC, %g1
1382
13833:
1384	/*
1385	 * Otherwise, if the logout structure was busy but we have not
1386	 * nested more times than our maximum value, then we simply
1387	 * issue a retry. Our TL=0 trap handler code will check and
1388	 * clear the AFSR after it is done logging what is currently
1389	 * in the logout struct and handle this event at that time.
1390	 */
1391	retry
13924:
1393	/*
1394	 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1395	 * already at PIL 15.
1396	 */
1397	set	cpu_disrupting_error, %g1
1398	rdpr	%pil, %g4
1399	cmp	%g4, PIL_14
1400	ba	sys_trap
1401	  movl	%icc, PIL_14, %g4
1402	SET_SIZE(ce_err)
1403
1404
1405/*
1406 * This trap cannot happen at TL>0 which means this routine will never
1407 * actually be called and so we treat this like a BAD TRAP panic.
1408 */
1409	.align	64
1410	ENTRY_NP(ce_err_tl1)
1411
1412        call ptl1_panic
1413          mov   PTL1_BAD_TRAP, %g1
1414
1415	SET_SIZE(ce_err_tl1)
1416
1417
1418/*
1419 * The async_err function handles deferred trap types 0xA
1420 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1421 *
1422 * AFSR errors bits which cause this trap are:
1423 *	UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1424 * On some platforms, EMU may causes cheetah to pull the error pin
1425 * never giving Solaris a chance to take a trap.
1426 *
1427 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1428 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1429 *
1430 * Steps:
1431 *	1. Disable CEEN and NCEEN errors to prevent recursive errors.
1432 *	2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1433 *         I$ line in DO_CPU_LOGOUT.
1434 *	3. Park sibling core if caches are shared (to avoid race
1435 *	   condition while accessing shared resources such as L3
1436 *	   data staging register during CPU logout.
1437 *	4. If the CPU logout structure is not currently being used:
1438 *		5. Clear AFSR error bits
1439 *		6. Capture Ecache, Dcache and Icache lines associated
1440 *		   with AFAR.
1441 *		7. Unpark sibling core if we parked it earlier.
1442 *		8. call cpu_deferred_error via sys_trap.
1443 *	5. Otherwise, if the CPU logout structure is busy:
1444 *		6. Incriment "logout busy count"
1445 *		7. Unpark sibling core if we parked it earlier.
1446 *		8) Issue a retry since the other CPU error logging
1447 *		   code will end up finding this error bit and logging
1448 *		   information about it later.
1449 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1450 *         not yet initialized such that we can't even check the logout
1451 *         struct, then we place the clo_flags data into %g2
1452 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1453 *         systrap. The clo_flags parameter is used to determine information
1454 *         such as TL, TT, CEEN settings, etc in the high level trap handler
1455 *         since we don't have access to detailed logout information in cases
1456 *         where the cpu_private struct is not yet initialized.
1457 *
1458 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1459 * %g3: [ logout busy count ] - arg #2
1460 */
1461
1462	ENTRY_NP(async_err)
1463	membar	#Sync			! Cheetah requires membar #Sync
1464
1465	/*
1466	 * Disable CEEN and NCEEN.
1467	 */
1468	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1469	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1470	stxa	%g4, [%g0]ASI_ESTATE_ERR
1471	membar	#Sync			! membar sync required
1472
1473	/*
1474	 * Save current DCU state.
1475	 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1476	 * Do this regardless of whether this is a Data Access Error or
1477	 * Instruction Access Error Trap.
1478	 * Disable Dcache for both Data Access Error and Instruction Access
1479	 * Error per Cheetah PRM P.5 Note 6.
1480	 */
1481	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1482	andn	%g1, DCU_IC + DCU_DC, %g4
1483	stxa	%g4, [%g0]ASI_DCU
1484	flush	%g0	/* flush required after changing the IC bit */
1485
1486	/*
1487	 * Check to see whether we need to park our sibling core
1488	 * before recording diagnostic information from caches
1489	 * which may be shared by both cores.
1490	 * We use %g1 to store information about whether or not
1491	 * we had to park the core (%g1 holds our DCUCR value and
1492	 * we only use bits from that register which are "reserved"
1493	 * to keep track of core parking) so that we know whether
1494	 * or not to unpark later. %g6 and %g4 are scratch registers.
1495	 */
1496	PARK_SIBLING_CORE(%g1, %g6, %g4)
1497
1498	/*
1499	 * Do the CPU logout capture.
1500	 *
1501	 *   %g3 = "failed?" return value.
1502	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1503	 *         into this macro via %g4. Output only valid if cpu_private
1504	 *         struct has not been initialized.
1505	 *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1506	 *   %g4 = Trap information stored in the cpu logout flags field
1507	 *   %g5 = scr1
1508	 *   %g6 = scr2
1509	 *   %g3 = scr3
1510	 *   %g4 = scr4
1511	 */
1512	andcc	%g5, T_TL1, %g0
1513	clr	%g6
1514	movnz	%xcc, 1, %g6			! set %g6 if T_TL1 set
1515	sllx	%g6, CLO_FLAGS_TL_SHIFT, %g6
1516	sllx	%g5, CLO_FLAGS_TT_SHIFT, %g4
1517	set	CLO_FLAGS_TT_MASK, %g2
1518	and	%g4, %g2, %g4			! ttype
1519	or	%g6, %g4, %g4			! TT and TL
1520	and	%g3, EN_REG_CEEN, %g3		! CEEN value
1521	or	%g3, %g4, %g4			! TT and TL and CEEN
1522	set	CHPR_ASYNC_LOGOUT, %g6
1523	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1524
1525	/*
1526	 * If the logout struct was busy, we may need to pass the
1527	 * TT, TL, and CEEN information to the TL=0 handler via
1528	 * systrap parameter so save it off here.
1529	 */
1530	cmp	%g3, %g0
1531	be	1f
1532	  nop
1533	sllx	%g4, 32, %g4
1534	or	%g4, %g3, %g3
15351:
1536	/*
1537	 * Flush the Icache.  Since we turned off the Icache to capture the
1538	 * Icache line it is now stale or corrupted and we must flush it
1539	 * before re-enabling it.
1540	 */
1541	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1542	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1543	ba,pt	%icc, 2f
1544	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1545async_err_1:
1546	ASM_LD(%g5, icache_size)
1547	ASM_LD(%g6, icache_linesize)
15482:
1549	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1550
1551	/*
1552	 * XXX - Don't we need to flush the Dcache before turning it back
1553	 *       on to avoid stale or corrupt data? Was this broken?
1554	 */
1555	/*
1556	 * Flush the Dcache before turning it back on since it may now
1557	 * contain stale or corrupt data.
1558	 */
1559	ASM_LD(%g5, dcache_size)
1560	ASM_LD(%g6, dcache_linesize)
1561	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1562
1563	/*
1564	 * check to see whether we parked our sibling core at the start
1565	 * of this handler. If so, we need to unpark it here.
1566	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1567	 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1568	 */
1569	UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1570
1571	/*
1572	 * Restore Icache and Dcache to previous state.
1573	 */
1574	stxa	%g1, [%g0]ASI_DCU
1575	flush	%g0	/* flush required after changing the IC bit */
1576
1577	/*
1578	 * Make sure our CPU logout operation was successful.
1579	 */
1580	cmp	%g3, %g0
1581	be	4f
1582	  nop
1583
1584	/*
1585	 * If the logout structure had been busy, how many times have
1586	 * we tried to use it and failed (nesting count)? If we have
1587	 * already recursed a substantial number of times, then we can
1588	 * assume things are not going to get better by themselves and
1589	 * so it would be best to panic.
1590	 */
1591	cmp	%g3, CLO_NESTING_MAX
1592	blt	3f
1593	  nop
1594
1595        call ptl1_panic
1596          mov   PTL1_BAD_ECC, %g1
1597
15983:
1599	/*
1600	 * Otherwise, if the logout structure was busy but we have not
1601	 * nested more times than our maximum value, then we simply
1602	 * issue a retry. Our TL=0 trap handler code will check and
1603	 * clear the AFSR after it is done logging what is currently
1604	 * in the logout struct and handle this event at that time.
1605	 */
1606	retry
16074:
1608	RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1609async_err_resetskip:
1610	set	cpu_deferred_error, %g1
1611	ba	sys_trap
1612	  mov	PIL_15, %g4		! run at pil 15
1613	SET_SIZE(async_err)
1614
1615#if defined(CPU_IMP_L1_CACHE_PARITY)
1616
1617/*
1618 * D$ parity error trap (trap 71) at TL=0.
1619 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1620 * the various architecture-specific files.  This merely sets up the
1621 * arguments for cpu_parity_error and calls it via sys_trap.
1622 * NB: Must be 8 instructions or less to fit in trap table and code must
1623 *     be relocatable.
1624 */
1625	ENTRY_NP(dcache_parity_instr)
1626	membar	#Sync			! Cheetah+ requires membar #Sync
1627	set	cpu_parity_error, %g1
1628	or	%g0, CH_ERR_DPE, %g2
1629	rdpr	%tpc, %g3
1630	sethi	%hi(sys_trap), %g7
1631	jmp	%g7 + %lo(sys_trap)
1632	  mov	PIL_15, %g4		! run at pil 15
1633	SET_SIZE(dcache_parity_instr)
1634
1635
1636/*
1637 * D$ parity error trap (trap 71) at TL>0.
1638 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1639 * the various architecture-specific files.  This generates a "Software
1640 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1641 * continue the handling there.
1642 * NB: Must be 8 instructions or less to fit in trap table and code must
1643 *     be relocatable.
1644 */
1645	ENTRY_NP(dcache_parity_tl1_instr)
1646	CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1647	SET_SIZE(dcache_parity_tl1_instr)
1648
1649
1650/*
1651 * Software trap 1 at TL>0.
1652 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1653 * of the various architecture-specific files.  This is used as a continuation
1654 * of the dcache parity handling where we've bought an extra TL level, so we
1655 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1656 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1657 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1658 * order two bits from %g1 and %g2 respectively).
1659 * NB: Must be 8 instructions or less to fit in trap table and code must
1660 *     be relocatable.
1661 */
1662	ENTRY_NP(dcache_parity_tl1_cont_instr)
1663	CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
1664	SET_SIZE(dcache_parity_tl1_cont_instr)
1665
1666/*
1667 * D$ parity error at TL>0 handler
1668 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
1669 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1670 */
1671
1672	ENTRY_NP(dcache_parity_tl1_err)
1673
1674	/*
1675	 * This macro saves all the %g registers in the ch_err_tl1_data
1676	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1677	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
1678	 * the ch_err_tl1_data structure and %g2 will have the original
1679	 * flags in the ch_err_tl1_data structure.  All %g registers
1680	 * except for %g1 and %g2 will be available.
1681	 */
1682	CH_ERR_TL1_ENTER(CH_ERR_DPE);
1683
1684#ifdef TRAPTRACE
1685	/*
1686	 * Get current trap trace entry physical pointer.
1687	 */
1688	CPU_INDEX(%g6, %g5)
1689	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
1690	set	trap_trace_ctl, %g5
1691	add	%g6, %g5, %g6
1692	ld	[%g6 + TRAPTR_LIMIT], %g5
1693	tst	%g5
1694	be	%icc, dpe_tl1_skip_tt
1695	  nop
1696	ldx	[%g6 + TRAPTR_PBASE], %g5
1697	ld	[%g6 + TRAPTR_OFFSET], %g4
1698	add	%g5, %g4, %g5
1699
1700	/*
1701	 * Create trap trace entry.
1702	 */
1703	rd	%asi, %g7
1704	wr	%g0, TRAPTR_ASI, %asi
1705	rd	STICK, %g4
1706	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
1707	rdpr	%tl, %g4
1708	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
1709	rdpr	%tt, %g4
1710	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
1711	rdpr	%tpc, %g4
1712	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
1713	rdpr	%tstate, %g4
1714	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
1715	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
1716	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
1717	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
1718	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
1719	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
1720	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
1721	wr	%g0, %g7, %asi
1722
1723	/*
1724	 * Advance trap trace pointer.
1725	 */
1726	ld	[%g6 + TRAPTR_OFFSET], %g5
1727	ld	[%g6 + TRAPTR_LIMIT], %g4
1728	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
1729	add	%g5, TRAP_ENT_SIZE, %g5
1730	sub	%g4, TRAP_ENT_SIZE, %g4
1731	cmp	%g5, %g4
1732	movge	%icc, 0, %g5
1733	st	%g5, [%g6 + TRAPTR_OFFSET]
1734dpe_tl1_skip_tt:
1735#endif	/* TRAPTRACE */
1736
1737	/*
1738	 * I$ and D$ are automatically turned off by HW when the CPU hits
1739	 * a dcache or icache parity error so we will just leave those two
1740	 * off for now to avoid repeating this trap.
1741	 * For Panther, however, since we trap on P$ data parity errors
1742	 * and HW does not automatically disable P$, we need to disable it
1743	 * here so that we don't encounter any recursive traps when we
1744	 * issue the retry.
1745	 */
1746	ldxa	[%g0]ASI_DCU, %g3
1747	mov	1, %g4
1748	sllx	%g4, DCU_PE_SHIFT, %g4
1749	andn	%g3, %g4, %g3
1750	stxa	%g3, [%g0]ASI_DCU
1751	membar	#Sync
1752
1753	/*
1754	 * We fall into this macro if we've successfully logged the error in
1755	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1756	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
1757	 * Restores the %g registers and issues retry.
1758	 */
1759	CH_ERR_TL1_EXIT;
1760	SET_SIZE(dcache_parity_tl1_err)
1761
1762/*
1763 * I$ parity error trap (trap 72) at TL=0.
1764 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
1765 * the various architecture-specific files.  This merely sets up the
1766 * arguments for cpu_parity_error and calls it via sys_trap.
1767 * NB: Must be 8 instructions or less to fit in trap table and code must
1768 *     be relocatable.
1769 */
1770
1771	ENTRY_NP(icache_parity_instr)
1772	membar	#Sync			! Cheetah+ requires membar #Sync
1773	set	cpu_parity_error, %g1
1774	or	%g0, CH_ERR_IPE, %g2
1775	rdpr	%tpc, %g3
1776	sethi	%hi(sys_trap), %g7
1777	jmp	%g7 + %lo(sys_trap)
1778	  mov	PIL_15, %g4		! run at pil 15
1779	SET_SIZE(icache_parity_instr)
1780
1781/*
1782 * I$ parity error trap (trap 72) at TL>0.
1783 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
1784 * the various architecture-specific files.  This generates a "Software
1785 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
1786 * continue the handling there.
1787 * NB: Must be 8 instructions or less to fit in trap table and code must
1788 *     be relocatable.
1789 */
1790	ENTRY_NP(icache_parity_tl1_instr)
1791	CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
1792	SET_SIZE(icache_parity_tl1_instr)
1793
1794/*
1795 * Software trap 2 at TL>0.
1796 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
1797 * of the various architecture-specific files.  This is used as a continuation
1798 * of the icache parity handling where we've bought an extra TL level, so we
1799 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1800 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1801 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1802 * order two bits from %g1 and %g2 respectively).
1803 * NB: Must be 8 instructions or less to fit in trap table and code must
1804 *     be relocatable.
1805 */
1806	ENTRY_NP(icache_parity_tl1_cont_instr)
1807	CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
1808	SET_SIZE(icache_parity_tl1_cont_instr)
1809
1810
1811/*
1812 * I$ parity error at TL>0 handler
1813 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
1814 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1815 */
1816
1817	ENTRY_NP(icache_parity_tl1_err)
1818
1819	/*
1820	 * This macro saves all the %g registers in the ch_err_tl1_data
1821	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1822	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
1823	 * the ch_err_tl1_data structure and %g2 will have the original
1824	 * flags in the ch_err_tl1_data structure.  All %g registers
1825	 * except for %g1 and %g2 will be available.
1826	 */
1827	CH_ERR_TL1_ENTER(CH_ERR_IPE);
1828
1829#ifdef TRAPTRACE
1830	/*
1831	 * Get current trap trace entry physical pointer.
1832	 */
1833	CPU_INDEX(%g6, %g5)
1834	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
1835	set	trap_trace_ctl, %g5
1836	add	%g6, %g5, %g6
1837	ld	[%g6 + TRAPTR_LIMIT], %g5
1838	tst	%g5
1839	be	%icc, ipe_tl1_skip_tt
1840	  nop
1841	ldx	[%g6 + TRAPTR_PBASE], %g5
1842	ld	[%g6 + TRAPTR_OFFSET], %g4
1843	add	%g5, %g4, %g5
1844
1845	/*
1846	 * Create trap trace entry.
1847	 */
1848	rd	%asi, %g7
1849	wr	%g0, TRAPTR_ASI, %asi
1850	rd	STICK, %g4
1851	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
1852	rdpr	%tl, %g4
1853	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
1854	rdpr	%tt, %g4
1855	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
1856	rdpr	%tpc, %g4
1857	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
1858	rdpr	%tstate, %g4
1859	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
1860	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
1861	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
1862	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
1863	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
1864	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
1865	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
1866	wr	%g0, %g7, %asi
1867
1868	/*
1869	 * Advance trap trace pointer.
1870	 */
1871	ld	[%g6 + TRAPTR_OFFSET], %g5
1872	ld	[%g6 + TRAPTR_LIMIT], %g4
1873	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
1874	add	%g5, TRAP_ENT_SIZE, %g5
1875	sub	%g4, TRAP_ENT_SIZE, %g4
1876	cmp	%g5, %g4
1877	movge	%icc, 0, %g5
1878	st	%g5, [%g6 + TRAPTR_OFFSET]
1879ipe_tl1_skip_tt:
1880#endif	/* TRAPTRACE */
1881
1882	/*
1883	 * We fall into this macro if we've successfully logged the error in
1884	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1885	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
1886	 * Restores the %g registers and issues retry.
1887	 */
1888	CH_ERR_TL1_EXIT;
1889
1890	SET_SIZE(icache_parity_tl1_err)
1891
1892#endif	/* CPU_IMP_L1_CACHE_PARITY */
1893
1894
1895/*
1896 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1897 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1898 * should only be used in places where you have no choice but to look at the
1899 * tlb itself.
1900 *
1901 * Note: These two routines are required by the Estar "cpr" loadable module.
1902 */
1903
1904	ENTRY_NP(itlb_rd_entry)
1905	sllx	%o0, 3, %o0
1906	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
1907	stx	%g1, [%o1]
1908	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
1909	set	TAGREAD_CTX_MASK, %o4
1910	andn	%g2, %o4, %o5
1911	retl
1912	  stx	%o5, [%o2]
1913	SET_SIZE(itlb_rd_entry)
1914
1915
1916	ENTRY_NP(dtlb_rd_entry)
1917	sllx	%o0, 3, %o0
1918	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
1919	stx	%g1, [%o1]
1920	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
1921	set	TAGREAD_CTX_MASK, %o4
1922	andn	%g2, %o4, %o5
1923	retl
1924	  stx	%o5, [%o2]
1925	SET_SIZE(dtlb_rd_entry)
1926
1927
1928#if !(defined(JALAPENO) || defined(SERRANO))
1929
1930	ENTRY(get_safari_config)
1931	ldxa	[%g0]ASI_SAFARI_CONFIG, %o0
1932	retl
1933	nop
1934	SET_SIZE(get_safari_config)
1935
1936
1937	ENTRY(set_safari_config)
1938	stxa	%o0, [%g0]ASI_SAFARI_CONFIG
1939	membar	#Sync
1940	retl
1941	nop
1942	SET_SIZE(set_safari_config)
1943
1944#endif	/* !(JALAPENO || SERRANO) */
1945
1946
1947	/*
1948	 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
1949	 * registers. In an effort to make the change in the
1950	 * tick/stick counter as consistent as possible, we disable
1951	 * all interrupts while we're changing the registers. We also
1952	 * ensure that the read and write instructions are in the same
1953	 * line in the instruction cache.
1954	 */
1955	ENTRY_NP(cpu_clearticknpt)
1956	rdpr	%pstate, %g1		/* save processor state */
1957	andn	%g1, PSTATE_IE, %g3	/* turn off */
1958	wrpr	%g0, %g3, %pstate	/*   interrupts */
1959	rdpr	%tick, %g2		/* get tick register */
1960	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
1961	mov	1, %g3			/* create mask */
1962	sllx	%g3, 63, %g3		/*   for NPT bit */
1963	ba,a,pt	%xcc, 2f
1964	.align	8			/* Ensure rd/wr in same i$ line */
19652:
1966	rdpr	%tick, %g2		/* get tick register */
1967	wrpr	%g3, %g2, %tick		/* write tick register, */
1968					/*   clearing NPT bit   */
19691:
1970	rd	STICK, %g2		/* get stick register */
1971	brgez,pn %g2, 3f		/* if NPT bit off, we're done */
1972	mov	1, %g3			/* create mask */
1973	sllx	%g3, 63, %g3		/*   for NPT bit */
1974	ba,a,pt	%xcc, 4f
1975	.align	8			/* Ensure rd/wr in same i$ line */
19764:
1977	rd	STICK, %g2		/* get stick register */
1978	wr	%g3, %g2, STICK		/* write stick register, */
1979					/*   clearing NPT bit   */
19803:
1981	jmp	%g4 + 4
1982	wrpr	%g0, %g1, %pstate	/* restore processor state */
1983
1984	SET_SIZE(cpu_clearticknpt)
1985
1986
1987#if defined(CPU_IMP_L1_CACHE_PARITY)
1988
1989/*
1990 * correct_dcache_parity(size_t size, size_t linesize)
1991 *
1992 * Correct D$ data parity by zeroing the data and initializing microtag
1993 * for all indexes and all ways of the D$.
1994 *
1995 */
1996	ENTRY(correct_dcache_parity)
1997	/*
1998	 * Register Usage:
1999	 *
2000	 * %o0 = input D$ size
2001	 * %o1 = input D$ line size
2002	 * %o2 = scratch
2003	 * %o3 = scratch
2004	 * %o4 = scratch
2005	 */
2006
2007	sub	%o0, %o1, %o0			! init cache line address
2008
2009	/*
2010	 * For Panther CPUs, we also need to clear the data parity bits
2011	 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2012	 */
2013	GET_CPU_IMPL(%o3)
2014	cmp	%o3, PANTHER_IMPL
2015	bne	1f
2016	  clr	%o3				! zero for non-Panther
2017	mov	1, %o3
2018	sll	%o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2019
20201:
2021	/*
2022	 * Set utag = way since it must be unique within an index.
2023	 */
2024	srl	%o0, 14, %o2			! get cache way (DC_way)
2025	membar	#Sync				! required before ASI_DC_UTAG
2026	stxa	%o2, [%o0]ASI_DC_UTAG		! set D$ utag = cache way
2027	membar	#Sync				! required after ASI_DC_UTAG
2028
2029	/*
2030	 * Zero line of D$ data (and data parity bits for Panther)
2031	 */
2032	sub	%o1, 8, %o2
2033	or	%o0, %o3, %o4			! same address + DC_data_parity
20342:
2035	membar	#Sync				! required before ASI_DC_DATA
2036	stxa	%g0, [%o0 + %o2]ASI_DC_DATA	! zero 8 bytes of D$ data
2037	membar	#Sync				! required after ASI_DC_DATA
2038	/*
2039	 * We also clear the parity bits if this is a panther. For non-Panther
2040	 * CPUs, we simply end up clearing the $data register twice.
2041	 */
2042	stxa	%g0, [%o4 + %o2]ASI_DC_DATA
2043	membar	#Sync
2044
2045	subcc	%o2, 8, %o2
2046	bge	2b
2047	nop
2048
2049	subcc	%o0, %o1, %o0
2050	bge	1b
2051	nop
2052
2053	retl
2054	  nop
2055	SET_SIZE(correct_dcache_parity)
2056
2057#endif	/* CPU_IMP_L1_CACHE_PARITY */
2058
2059
2060	ENTRY_NP(stick_timestamp)
2061	rd	STICK, %g1	! read stick reg
2062	sllx	%g1, 1, %g1
2063	srlx	%g1, 1, %g1	! clear npt bit
2064
2065	retl
2066	stx     %g1, [%o0]	! store the timestamp
2067	SET_SIZE(stick_timestamp)
2068
2069
2070	ENTRY_NP(stick_adj)
2071	rdpr	%pstate, %g1		! save processor state
2072	andn	%g1, PSTATE_IE, %g3
2073	ba	1f			! cache align stick adj
2074	wrpr	%g0, %g3, %pstate	! turn off interrupts
2075
2076	.align	16
20771:	nop
2078
2079	rd	STICK, %g4		! read stick reg
2080	add	%g4, %o0, %o1		! adjust stick with skew
2081	wr	%o1, %g0, STICK		! write stick reg
2082
2083	retl
2084	wrpr	%g1, %pstate		! restore processor state
2085	SET_SIZE(stick_adj)
2086
2087	ENTRY_NP(kdi_get_stick)
2088	rd	STICK, %g1
2089	stx	%g1, [%o0]
2090	retl
2091	mov	%g0, %o0
2092	SET_SIZE(kdi_get_stick)
2093
2094/*
2095 * Invalidate the specified line from the D$.
2096 *
2097 * Register usage:
2098 *	%o0 - index for the invalidation, specifies DC_way and DC_addr
2099 *
2100 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2101 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2102 *
2103 * The format of the stored 64-bit value is:
2104 *
2105 *	+----------+--------+----------+
2106 *	| Reserved | DC_tag | DC_valid |
2107 *	+----------+--------+----------+
2108 *       63      31 30     1	      0
2109 *
2110 * DC_tag is the 30-bit physical tag of the associated line.
2111 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2112 *
2113 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2114 *
2115 *	+----------+--------+----------+----------+
2116 *	| Reserved | DC_way | DC_addr  | Reserved |
2117 *	+----------+--------+----------+----------+
2118 *       63      16 15    14 13       5 4        0
2119 *
2120 * DC_way is a 2-bit index that selects one of the 4 ways.
2121 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2122 *
2123 * Setting the DC_valid bit to zero for the specified DC_way and
2124 * DC_addr index into the D$ results in an invalidation of a D$ line.
2125 */
2126	ENTRY(dcache_inval_line)
2127	sll	%o0, 5, %o0		! shift index into DC_way and DC_addr
2128	stxa	%g0, [%o0]ASI_DC_TAG	! zero the DC_valid and DC_tag bits
2129	membar	#Sync
2130	retl
2131	nop
2132	SET_SIZE(dcache_inval_line)
2133
2134/*
2135 * Invalidate the entire I$
2136 *
2137 * Register usage:
2138 *	%o0 - specifies IC_way, IC_addr, IC_tag
2139 *	%o1 - scratch
2140 *	%o2 - used to save and restore DCU value
2141 *	%o3 - scratch
2142 *	%o5 - used to save and restore PSTATE
2143 *
2144 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2145 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2146 * block out snoops and invalidates to the I$, causing I$ consistency
2147 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2148 *
2149 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2150 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2151 * info below describes store (write) use of ASI_IC_TAG. Note that read
2152 * use of ASI_IC_TAG behaves differently.
2153 *
2154 * The format of the stored 64-bit value is:
2155 *
2156 *	+----------+--------+---------------+-----------+
2157 *	| Reserved | Valid  | IC_vpred<7:0> | Undefined |
2158 *	+----------+--------+---------------+-----------+
2159 *       63      55    54    53           46 45        0
2160 *
2161 * Valid is the 1-bit valid field for both the physical and snoop tags.
2162 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2163 *	the 32-byte boundary aligned address specified by IC_addr.
2164 *
2165 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2166 *
2167 *	+----------+--------+---------+--------+---------+
2168 *	| Reserved | IC_way | IC_addr | IC_tag |Reserved |
2169 *	+----------+--------+---------+--------+---------+
2170 *       63      16 15    14 13      5 4      3 2       0
2171 *
2172 * IC_way is a 2-bit index that selects one of the 4 ways.
2173 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2174 * IC_addr[5] is a "don't care" for a store.
2175 * IC_tag set to 2 specifies that the stored value is to be interpreted
2176 *	as containing Valid and IC_vpred as described above.
2177 *
2178 * Setting the Valid bit to zero for the specified IC_way and
2179 * IC_addr index into the I$ results in an invalidation of an I$ line.
2180 */
2181	ENTRY(icache_inval_all)
2182	rdpr	%pstate, %o5
2183	andn	%o5, PSTATE_IE, %o3
2184	wrpr	%g0, %o3, %pstate	! clear IE bit
2185
2186	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2187	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
2188	ba,pt	%icc, 2f
2189	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
2190icache_inval_all_1:
2191	ASM_LD(%o0, icache_size)
2192	ASM_LD(%o1, icache_linesize)
21932:
2194	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2195
2196	retl
2197	wrpr	%g0, %o5, %pstate	! restore earlier pstate
2198	SET_SIZE(icache_inval_all)
2199
2200
2201/*
2202 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2203 * crosstrap.  It atomically increments the outstanding request counter and,
2204 * if there was not already an outstanding request, branches to setsoftint_tl1
2205 * to enqueue an intr_vec for the given inum.
2206 */
2207
2208	! Register usage:
2209	!
2210	! Arguments:
2211	! %g1 - inum
2212	! %g2 - index into chsm_outstanding array
2213	!
2214	! Internal:
2215	! %g2, %g3, %g5 - scratch
2216	! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2217	! %g6 - setsoftint_tl1 address
2218
2219	ENTRY_NP(cache_scrubreq_tl1)
2220	mulx	%g2, CHSM_OUTSTANDING_INCR, %g2
2221	set	CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2222	add	%g2, %g3, %g2
2223	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2224	ld	[%g4], %g2		! cpu's chsm_outstanding[index]
2225	!
2226	! no need to use atomic instructions for the following
2227	! increment - we're at tl1
2228	!
2229	add	%g2, 0x1, %g3
2230	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
2231	  st	%g3, [%g4]		! delay - store incremented counter
2232	ASM_JMP(%g6, setsoftint_tl1)
2233	! not reached
22341:
2235	retry
2236	SET_SIZE(cache_scrubreq_tl1)
2237
2238
2239/*
2240 * Get the error state for the processor.
2241 * Note that this must not be used at TL>0
2242 */
2243	ENTRY(get_cpu_error_state)
2244#if defined(CHEETAH_PLUS)
2245	set	ASI_SHADOW_REG_VA, %o2
2246	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr reg
2247	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2248	ldxa	[%o2]ASI_AFAR, %o1		! shadow afar reg
2249	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2250	GET_CPU_IMPL(%o3)	! Only panther has AFSR_EXT registers
2251	cmp	%o3, PANTHER_IMPL
2252	bne,a	1f
2253	  stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]	! zero for non-PN
2254	set	ASI_AFSR_EXT_VA, %o2
2255	ldxa	[%o2]ASI_AFSR, %o1		! afsr_ext reg
2256	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2257	set	ASI_SHADOW_AFSR_EXT_VA, %o2
2258	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr_ext reg
2259	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2260	b	2f
2261	  nop
22621:
2263	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
22642:
2265#else	/* CHEETAH_PLUS */
2266	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2267	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2268	stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2269	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2270#endif	/* CHEETAH_PLUS */
2271#if defined(SERRANO)
2272	/*
2273	 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2274	 * We save this in the afar2 of the register save area.
2275	 */
2276	set	ASI_MCU_AFAR2_VA, %o2
2277	ldxa	[%o2]ASI_MCU_CTRL, %o1
2278	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2279#endif	/* SERRANO */
2280	ldxa	[%g0]ASI_AFSR, %o1		! primary afsr reg
2281	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR]
2282	ldxa	[%g0]ASI_AFAR, %o1		! primary afar reg
2283	retl
2284	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR]
2285	SET_SIZE(get_cpu_error_state)
2286
2287/*
2288 * Check a page of memory for errors.
2289 *
2290 * Load each 64 byte block from physical memory.
2291 * Check AFSR after each load to see if an error
2292 * was caused. If so, log/scrub that error.
2293 *
2294 * Used to determine if a page contains
2295 * CEs when CEEN is disabled.
2296 */
2297	ENTRY(cpu_check_block)
2298	!
2299	! get a new window with room for the error regs
2300	!
2301	save	%sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2302	srl	%i1, 6, %l4		! clear top bits of psz
2303					! and divide by 64
2304	rd	%fprs, %l2		! store FP
2305	wr	%g0, FPRS_FEF, %fprs	! enable FP
23061:
2307	ldda	[%i0]ASI_BLK_P, %d0	! load a block
2308	membar	#Sync
2309	ldxa    [%g0]ASI_AFSR, %l3	! read afsr reg
2310	brz,a,pt %l3, 2f		! check for error
2311	nop
2312
2313	!
2314	! if error, read the error regs and log it
2315	!
2316	call	get_cpu_error_state
2317	add	%fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2318
2319	!
2320	! cpu_ce_detected(ch_cpu_errors_t *, flag)
2321	!
2322	call	cpu_ce_detected		! log the error
2323	mov	CE_CEEN_TIMEOUT, %o1
23242:
2325	dec	%l4			! next 64-byte block
2326	brnz,a,pt  %l4, 1b
2327	add	%i0, 64, %i0		! increment block addr
2328
2329	wr	%l2, %g0, %fprs		! restore FP
2330	ret
2331	restore
2332
2333	SET_SIZE(cpu_check_block)
2334
2335/*
2336 * Perform a cpu logout called from C.  This is used where we did not trap
2337 * for the error but still want to gather "what we can".  Caller must make
2338 * sure cpu private area exists and that the indicated logout area is free
2339 * for use, and that we are unable to migrate cpus.
2340 */
2341	ENTRY(cpu_delayed_logout)
2342	rdpr	%pstate, %o2
2343	andn	%o2, PSTATE_IE, %o2
2344	wrpr	%g0, %o2, %pstate		! disable interrupts
2345	PARK_SIBLING_CORE(%o2, %o3, %o4)	! %o2 has DCU value
2346	add	%o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2347	rd	%asi, %g1
2348	wr	%g0, ASI_P, %asi
2349	GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2350	wr	%g1, %asi
2351	UNPARK_SIBLING_CORE(%o2, %o3, %o4)	! can use %o2 again
2352	rdpr	%pstate, %o2
2353	or	%o2, PSTATE_IE, %o2
2354	wrpr	%g0, %o2, %pstate
2355	retl
2356	  nop
2357	SET_SIZE(cpu_delayed_logout)
2358
2359	ENTRY(dtrace_blksuword32)
2360	save	%sp, -SA(MINFRAME + 4), %sp
2361
2362	rdpr	%pstate, %l1
2363	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
2364	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
2365
2366	rd	%fprs, %l0
2367	andcc	%l0, FPRS_FEF, %g0
2368	bz,a,pt	%xcc, 1f			! if the fpu is disabled
2369	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
2370
2371	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
23721:
2373	set	0f, %l5
2374        /*
2375         * We're about to write a block full or either total garbage
2376         * (not kernel data, don't worry) or user floating-point data
2377         * (so it only _looks_ like garbage).
2378         */
2379	ld	[%i1], %f0			! modify the block
2380	membar	#Sync
2381	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
2382	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
2383	membar	#Sync
2384	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2385
2386	bz,a,pt	%xcc, 1f
2387	wr	%g0, %l0, %fprs			! restore %fprs
2388
2389	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
23901:
2391
2392	wrpr	%g0, %l1, %pstate		! restore interrupts
2393
2394	ret
2395	restore	%g0, %g0, %o0
2396
23970:
2398	membar	#Sync
2399	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2400
2401	bz,a,pt	%xcc, 1f
2402	wr	%g0, %l0, %fprs			! restore %fprs
2403
2404	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
24051:
2406
2407	wrpr	%g0, %l1, %pstate		! restore interrupts
2408
2409	/*
2410	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2411	 * which deals with watchpoints. Otherwise, just return -1.
2412	 */
2413	brnz,pt	%i2, 1f
2414	nop
2415	ret
2416	restore	%g0, -1, %o0
24171:
2418	call	dtrace_blksuword32_err
2419	restore
2420
2421	SET_SIZE(dtrace_blksuword32)
2422
2423#ifdef	CHEETAHPLUS_ERRATUM_25
2424
2425	/* Claim a chunk of physical address space. */
2426	ENTRY(claimlines)
24271:
2428	subcc	%o1, %o2, %o1
2429	add	%o0, %o1, %o3
2430	bgeu,a,pt	%xcc, 1b
2431	casxa	[%o3]ASI_MEM, %g0, %g0
2432	membar  #Sync
2433	retl
2434	nop
2435	SET_SIZE(claimlines)
2436
2437	/*
2438	 * CPU feature initialization,
2439	 * turn BPE off,
2440	 * get device id.
2441	 */
2442	ENTRY(cpu_feature_init)
2443	save	%sp, -SA(MINFRAME), %sp
2444	sethi	%hi(cheetah_bpe_off), %o0
2445	ld	[%o0 + %lo(cheetah_bpe_off)], %o0
2446	brz	%o0, 1f
2447	nop
2448	rd	ASR_DISPATCH_CONTROL, %o0
2449	andn	%o0, ASR_DISPATCH_CONTROL_BPE, %o0
2450	wr	%o0, 0, ASR_DISPATCH_CONTROL
24511:
2452	!
2453	! get the device_id and store the device_id
2454	! in the appropriate cpunodes structure
2455	! given the cpus index
2456	!
2457	CPU_INDEX(%o0, %o1)
2458	mulx %o0, CPU_NODE_SIZE, %o0
2459	set  cpunodes + DEVICE_ID, %o1
2460	ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
2461	stx  %o2, [%o0 + %o1]
2462#ifdef	CHEETAHPLUS_ERRATUM_34
2463	!
2464	! apply Cheetah+ erratum 34 workaround
2465	!
2466	call itlb_erratum34_fixup
2467	  nop
2468	call dtlb_erratum34_fixup
2469	  nop
2470#endif	/* CHEETAHPLUS_ERRATUM_34 */
2471	ret
2472	  restore
2473	SET_SIZE(cpu_feature_init)
2474
2475/*
2476 * Copy a tsb entry atomically, from src to dest.
2477 * src must be 128 bit aligned.
2478 */
2479	ENTRY(copy_tsb_entry)
2480	ldda	[%o0]ASI_NQUAD_LD, %o2		! %o2 = tag, %o3 = data
2481	stx	%o2, [%o1]
2482	stx	%o3, [%o1 + 8 ]
2483	retl
2484	nop
2485	SET_SIZE(copy_tsb_entry)
2486
2487#endif	/* CHEETAHPLUS_ERRATUM_25 */
2488
2489#ifdef	CHEETAHPLUS_ERRATUM_34
2490
2491	!
2492	! In Cheetah+ erratum 34, under certain conditions an ITLB locked
2493	! index 0 TTE will erroneously be displaced when a new TTE is
2494	! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
2495	! locked index 0 TTEs must be relocated.
2496	!
2497	! NOTE: Care must be taken to avoid an ITLB miss in this routine.
2498	!
2499	ENTRY_NP(itlb_erratum34_fixup)
2500	rdpr	%pstate, %o3
2501#ifdef DEBUG
2502	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
2503#endif /* DEBUG */
2504	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
2505	ldxa	[%g0]ASI_ITLB_ACCESS, %o1	! %o1 = entry 0 data
2506	ldxa	[%g0]ASI_ITLB_TAGREAD, %o2	! %o2 = entry 0 tag
2507
2508	cmp	%o1, %g0			! Is this entry valid?
2509	bge	%xcc, 1f
2510	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
2511	bnz	%icc, 2f
2512	  nop
25131:
2514	retl					! Nope, outta here...
2515	  wrpr	%g0, %o3, %pstate		! Enable interrupts
25162:
2517	sethi	%hi(FLUSH_ADDR), %o4
2518	stxa	%g0, [%o2]ASI_ITLB_DEMAP	! Flush this mapping
2519	flush	%o4				! Flush required for I-MMU
2520	!
2521	! Start search from index 1 up.  This is because the kernel force
2522	! loads its text page at index 15 in sfmmu_kernel_remap() and we
2523	! don't want our relocated entry evicted later.
2524	!
2525	! NOTE: We assume that we'll be successful in finding an unlocked
2526	! or invalid entry.  If that isn't the case there are bound to
2527	! bigger problems.
2528	!
2529	set	(1 << 3), %g3
25303:
2531	ldxa	[%g3]ASI_ITLB_ACCESS, %o4	! Load TTE from t16
2532	!
2533	! If this entry isn't valid, we'll choose to displace it (regardless
2534	! of the lock bit).
2535	!
2536	cmp	%o4, %g0			! TTE is > 0 iff not valid
2537	bge	%xcc, 4f			! If invalid, go displace
2538	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
2539	bnz,a	%icc, 3b			! If locked, look at next
2540	  add	%g3, (1 << 3), %g3		!  entry
25414:
2542	!
2543	! We found an unlocked or invalid entry; we'll explicitly load
2544	! the former index 0 entry here.
2545	!
2546	sethi	%hi(FLUSH_ADDR), %o4
2547	set	MMU_TAG_ACCESS, %g4
2548	stxa	%o2, [%g4]ASI_IMMU
2549	stxa	%o1, [%g3]ASI_ITLB_ACCESS
2550	flush	%o4				! Flush required for I-MMU
2551	retl
2552	  wrpr	%g0, %o3, %pstate		! Enable interrupts
2553	SET_SIZE(itlb_erratum34_fixup)
2554
2555	!
2556	! In Cheetah+ erratum 34, under certain conditions a DTLB locked
2557	! index 0 TTE will erroneously be displaced when a new TTE is
2558	! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
2559	! TTEs must be relocated.
2560	!
2561	ENTRY_NP(dtlb_erratum34_fixup)
2562	rdpr	%pstate, %o3
2563#ifdef DEBUG
2564	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
2565#endif /* DEBUG */
2566	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
2567	ldxa	[%g0]ASI_DTLB_ACCESS, %o1	! %o1 = entry 0 data
2568	ldxa	[%g0]ASI_DTLB_TAGREAD, %o2	! %o2 = entry 0 tag
2569
2570	cmp	%o1, %g0			! Is this entry valid?
2571	bge	%xcc, 1f
2572	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
2573	bnz	%icc, 2f
2574	  nop
25751:
2576	retl					! Nope, outta here...
2577	  wrpr	%g0, %o3, %pstate		! Enable interrupts
25782:
2579	stxa	%g0, [%o2]ASI_DTLB_DEMAP	! Flush this mapping
2580	membar	#Sync
2581	!
2582	! Start search from index 1 up.
2583	!
2584	! NOTE: We assume that we'll be successful in finding an unlocked
2585	! or invalid entry.  If that isn't the case there are bound to
2586	! bigger problems.
2587	!
2588	set	(1 << 3), %g3
25893:
2590	ldxa	[%g3]ASI_DTLB_ACCESS, %o4	! Load TTE from t16
2591	!
2592	! If this entry isn't valid, we'll choose to displace it (regardless
2593	! of the lock bit).
2594	!
2595	cmp	%o4, %g0			! TTE is > 0 iff not valid
2596	bge	%xcc, 4f			! If invalid, go displace
2597	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
2598	bnz,a	%icc, 3b			! If locked, look at next
2599	  add	%g3, (1 << 3), %g3		!  entry
26004:
2601	!
2602	! We found an unlocked or invalid entry; we'll explicitly load
2603	! the former index 0 entry here.
2604	!
2605	set	MMU_TAG_ACCESS, %g4
2606	stxa	%o2, [%g4]ASI_DMMU
2607	stxa	%o1, [%g3]ASI_DTLB_ACCESS
2608	membar	#Sync
2609	retl
2610	  wrpr	%g0, %o3, %pstate		! Enable interrupts
2611	SET_SIZE(dtlb_erratum34_fixup)
2612
2613#endif	/* CHEETAHPLUS_ERRATUM_34 */
2614
2615