xref: /titanic_44/usr/src/uts/sun4u/cpu/us3_common_asm.s (revision 20e6d5c536ad5b300e7fafb6a92e13040f492977)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29
30#if !defined(lint)
31#include "assym.h"
32#endif	/* !lint */
33
34#include <sys/asm_linkage.h>
35#include <sys/mmu.h>
36#include <vm/hat_sfmmu.h>
37#include <sys/machparam.h>
38#include <sys/machcpuvar.h>
39#include <sys/machthread.h>
40#include <sys/machtrap.h>
41#include <sys/privregs.h>
42#include <sys/trap.h>
43#include <sys/cheetahregs.h>
44#include <sys/us3_module.h>
45#include <sys/xc_impl.h>
46#include <sys/intreg.h>
47#include <sys/async.h>
48#include <sys/clock.h>
49#include <sys/cheetahasm.h>
50#include <sys/cmpregs.h>
51
52#ifdef TRAPTRACE
53#include <sys/traptrace.h>
54#endif /* TRAPTRACE */
55
56#if !defined(lint)
57
58/* BEGIN CSTYLED */
59
60#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
61	ldxa	[%g0]ASI_DCU, tmp1					;\
62	btst	DCU_DC, tmp1		/* is dcache enabled? */	;\
63	bz,pn	%icc, 1f						;\
64	ASM_LD(tmp1, dcache_linesize)					;\
65	ASM_LD(tmp2, dflush_type)					;\
66	cmp	tmp2, FLUSHPAGE_TYPE					;\
67	be,pt	%icc, 2f						;\
68	nop								;\
69	sllx	arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */	;\
70	ASM_LD(tmp3, dcache_size)					;\
71	cmp	tmp2, FLUSHMATCH_TYPE					;\
72	be,pt	%icc, 3f						;\
73	nop								;\
74	/*								\
75	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
76	 * tmp3 = cache size						\
77	 * tmp1 = cache line size					\
78	 */								\
79	sub	tmp3, tmp1, tmp2					;\
804:									\
81	stxa	%g0, [tmp2]ASI_DC_TAG					;\
82	membar	#Sync							;\
83	cmp	%g0, tmp2						;\
84	bne,pt	%icc, 4b						;\
85	sub	tmp2, tmp1, tmp2					;\
86	ba,pt	%icc, 1f						;\
87	nop								;\
88	/*								\
89	 * flushtype = FLUSHPAGE_TYPE					\
90	 * arg1 = pfn							\
91	 * arg2 = virtual color						\
92	 * tmp1 = cache line size					\
93	 * tmp2 = tag from cache					\
94	 * tmp3 = counter						\
95	 */								\
962:									\
97	set	MMU_PAGESIZE, tmp3					;\
98        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA	   */   ;\
99	sub	tmp3, tmp1, tmp3					;\
1004:									\
101	stxa	%g0, [arg1 + tmp3]ASI_DC_INVAL				;\
102	membar	#Sync							;\
1035:									\
104	cmp	%g0, tmp3						;\
105	bnz,pt	%icc, 4b		/* branch if not done */	;\
106	sub	tmp3, tmp1, tmp3					;\
107	ba,pt	%icc, 1f						;\
108	nop								;\
109	/*								\
110	 * flushtype = FLUSHMATCH_TYPE					\
111	 * arg1 = tag to compare against				\
112	 * tmp1 = cache line size					\
113	 * tmp3 = cache size						\
114	 * arg2 = counter						\
115	 * tmp2 = cache tag						\
116	 */								\
1173:									\
118	sub	tmp3, tmp1, arg2					;\
1194:									\
120	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
121	btst	CHEETAH_DC_VBIT_MASK, tmp2				;\
122	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
123	andn	tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */	;\
124	cmp	tmp2, arg1						;\
125	bne,pn	%icc, 5f		/* branch if tag miss */	;\
126	nop								;\
127	stxa	%g0, [arg2]ASI_DC_TAG					;\
128	membar	#Sync							;\
1295:									\
130	cmp	%g0, arg2						;\
131	bne,pt	%icc, 4b		/* branch if not done */	;\
132	sub	arg2, tmp1, arg2					;\
1331:
134
135
136/* END CSTYLED */
137
138#endif	/* !lint */
139
140/*
141 * Cheetah MMU and Cache operations.
142 */
143
144#if defined(lint)
145
146/* ARGSUSED */
147void
148vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
149{}
150
151#else	/* lint */
152
153	ENTRY_NP(vtag_flushpage)
154	/*
155	 * flush page from the tlb
156	 *
157	 * %o0 = vaddr
158	 * %o1 = sfmmup
159	 */
160	rdpr	%pstate, %o5
161#ifdef DEBUG
162	PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
163#endif /* DEBUG */
164	/*
165	 * disable ints
166	 */
167	andn	%o5, PSTATE_IE, %o4
168	wrpr	%o4, 0, %pstate
169
170	/*
171	 * Then, blow out the tlb
172	 * Interrupts are disabled to prevent the primary ctx register
173	 * from changing underneath us.
174	 */
175	sethi   %hi(ksfmmup), %o3
176        ldx     [%o3 + %lo(ksfmmup)], %o3
177        cmp     %o3, %o1
178        bne,pt   %xcc, 1f			! if not kernel as, go to 1
179	  sethi	%hi(FLUSH_ADDR), %o3
180	/*
181	 * For Kernel demaps use primary. type = page implicitly
182	 */
183	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
184	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
185	flush	%o3
186	retl
187	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
1881:
189	/*
190	 * User demap.  We need to set the primary context properly.
191	 * Secondary context cannot be used for Cheetah IMMU.
192	 * %o0 = vaddr
193	 * %o1 = sfmmup
194	 * %o3 = FLUSH_ADDR
195	 */
196	SFMMU_CPU_CNUM(%o1, %g1, %g2)		! %g1 = sfmmu cnum on this CPU
197
198	ldub	[%o1 + SFMMU_CEXT], %o4		! %o4 = sfmmup->sfmmu_cext
199	sll	%o4, CTXREG_EXT_SHIFT, %o4
200	or	%g1, %o4, %g1			! %g1 = primary pgsz | cnum
201
202	wrpr	%g0, 1, %tl
203	set	MMU_PCONTEXT, %o4
204	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
205	ldxa	[%o4]ASI_DMMU, %o2		! %o2 = save old ctxnum
206	srlx	%o2, CTXREG_NEXT_SHIFT, %o1	! need to preserve nucleus pgsz
207	sllx	%o1, CTXREG_NEXT_SHIFT, %o1	! %o1 = nucleus pgsz
208	or	%g1, %o1, %g1			! %g1 = nucleus pgsz | primary pgsz | cnum
209	stxa	%g1, [%o4]ASI_DMMU		! wr new ctxum
210
211	stxa	%g0, [%o0]ASI_DTLB_DEMAP
212	stxa	%g0, [%o0]ASI_ITLB_DEMAP
213	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
214	flush	%o3
215	wrpr	%g0, 0, %tl
216
217	retl
218	wrpr	%g0, %o5, %pstate		/* enable interrupts */
219	SET_SIZE(vtag_flushpage)
220
221#endif	/* lint */
222
223#if defined(lint)
224
225void
226vtag_flushall(void)
227{}
228
229#else	/* lint */
230
231	ENTRY_NP2(vtag_flushall, demap_all)
232	/*
233	 * flush the tlb
234	 */
235	sethi	%hi(FLUSH_ADDR), %o3
236	set	DEMAP_ALL_TYPE, %g1
237	stxa	%g0, [%g1]ASI_DTLB_DEMAP
238	stxa	%g0, [%g1]ASI_ITLB_DEMAP
239	flush	%o3
240	retl
241	nop
242	SET_SIZE(demap_all)
243	SET_SIZE(vtag_flushall)
244
245#endif	/* lint */
246
247
248#if defined(lint)
249
250/* ARGSUSED */
251void
252vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
253{}
254
255#else	/* lint */
256
257	ENTRY_NP(vtag_flushpage_tl1)
258	/*
259	 * x-trap to flush page from tlb and tsb
260	 *
261	 * %g1 = vaddr, zero-extended on 32-bit kernel
262	 * %g2 = sfmmup
263	 *
264	 * assumes TSBE_TAG = 0
265	 */
266	srln	%g1, MMU_PAGESHIFT, %g1
267
268	sethi   %hi(ksfmmup), %g3
269        ldx     [%g3 + %lo(ksfmmup)], %g3
270        cmp     %g3, %g2
271        bne,pt	%xcc, 1f                        ! if not kernel as, go to 1
272	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
273
274	/* We need to demap in the kernel context */
275	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
276	stxa	%g0, [%g1]ASI_DTLB_DEMAP
277	stxa	%g0, [%g1]ASI_ITLB_DEMAP
278	retry
2791:
280	/* We need to demap in a user context */
281	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
282
283	SFMMU_CPU_CNUM(%g2, %g6, %g3)	! %g6 = sfmmu cnum on this CPU
284
285	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
286	sll	%g4, CTXREG_EXT_SHIFT, %g4
287	or	%g6, %g4, %g6			! %g6 = pgsz | cnum
288
289	set	MMU_PCONTEXT, %g4
290	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
291	srlx	%g5, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
292	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
293	or	%g6, %g2, %g6			/* %g6 = nucleus pgsz | primary pgsz | cnum */
294	stxa	%g6, [%g4]ASI_DMMU		/* wr new ctxum */
295	stxa	%g0, [%g1]ASI_DTLB_DEMAP
296	stxa	%g0, [%g1]ASI_ITLB_DEMAP
297	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
298	retry
299	SET_SIZE(vtag_flushpage_tl1)
300
301#endif	/* lint */
302
303
304#if defined(lint)
305
306/* ARGSUSED */
307void
308vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
309{}
310
311#else	/* lint */
312
313	ENTRY_NP(vtag_flush_pgcnt_tl1)
314	/*
315	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
316	 *
317	 * %g1 = vaddr, zero-extended on 32-bit kernel
318	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
319	 *
320	 * NOTE: this handler relies on the fact that no
321	 *	interrupts or traps can occur during the loop
322	 *	issuing the TLB_DEMAP operations. It is assumed
323	 *	that interrupts are disabled and this code is
324	 *	fetching from the kernel locked text address.
325	 *
326	 * assumes TSBE_TAG = 0
327	 */
328	set	SFMMU_PGCNT_MASK, %g4
329	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
330	add	%g3, 1, %g3			/* g3 = pgcnt */
331
332	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
333	srln	%g1, MMU_PAGESHIFT, %g1
334
335	sethi   %hi(ksfmmup), %g4
336        ldx     [%g4 + %lo(ksfmmup)], %g4
337        cmp     %g4, %g2
338        bne,pn   %xcc, 1f			/* if not kernel as, go to 1 */
339	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
340
341	/* We need to demap in the kernel context */
342	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
343	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
344	sethi   %hi(FLUSH_ADDR), %g5
3454:
346	stxa	%g0, [%g1]ASI_DTLB_DEMAP
347	stxa	%g0, [%g1]ASI_ITLB_DEMAP
348	flush	%g5				! flush required by immu
349
350	deccc	%g3				/* decr pgcnt */
351	bnz,pt	%icc,4b
352	  add	%g1, %g2, %g1			/* next page */
353	retry
3541:
355	/*
356	 * We need to demap in a user context
357	 *
358	 * g2 = sfmmup
359	 * g3 = pgcnt
360	 */
361	SFMMU_CPU_CNUM(%g2, %g5, %g6)		! %g5 = sfmmu cnum on this CPU
362
363	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
364
365	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
366	sll	%g4, CTXREG_EXT_SHIFT, %g4
367	or	%g5, %g4, %g5
368
369	set	MMU_PCONTEXT, %g4
370	ldxa	[%g4]ASI_DMMU, %g6		/* rd old ctxnum */
371	srlx	%g6, CTXREG_NEXT_SHIFT, %g2	/* %g2 = nucleus pgsz */
372	sllx	%g2, CTXREG_NEXT_SHIFT, %g2	/* preserve nucleus pgsz */
373	or	%g5, %g2, %g5			/* %g5 = nucleus pgsz | primary pgsz | cnum */
374	stxa	%g5, [%g4]ASI_DMMU		/* wr new ctxum */
375
376	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
377	sethi   %hi(FLUSH_ADDR), %g5
3783:
379	stxa	%g0, [%g1]ASI_DTLB_DEMAP
380	stxa	%g0, [%g1]ASI_ITLB_DEMAP
381	flush	%g5				! flush required by immu
382
383	deccc	%g3				/* decr pgcnt */
384	bnz,pt	%icc,3b
385	  add	%g1, %g2, %g1			/* next page */
386
387	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
388	retry
389	SET_SIZE(vtag_flush_pgcnt_tl1)
390
391#endif	/* lint */
392
393#if defined(lint)
394
395/*ARGSUSED*/
396void
397vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
398{}
399
400#else	/* lint */
401
402	ENTRY_NP(vtag_flushall_tl1)
403	/*
404	 * x-trap to flush tlb
405	 */
406	set	DEMAP_ALL_TYPE, %g4
407	stxa	%g0, [%g4]ASI_DTLB_DEMAP
408	stxa	%g0, [%g4]ASI_ITLB_DEMAP
409	retry
410	SET_SIZE(vtag_flushall_tl1)
411
412#endif	/* lint */
413
414
415#if defined(lint)
416
417/* ARGSUSED */
418void
419vac_flushpage(pfn_t pfnum, int vcolor)
420{}
421
422#else	/* lint */
423
424/*
425 * vac_flushpage(pfnum, color)
426 *	Flush 1 8k page of the D-$ with physical page = pfnum
427 *	Algorithm:
428 *		The cheetah dcache is a 64k psuedo 4 way accaociative cache.
429 *		It is virtual indexed, physically tagged cache.
430 */
431	.seg	".data"
432	.align	8
433	.global	dflush_type
434dflush_type:
435	.word	FLUSHPAGE_TYPE
436
437	ENTRY(vac_flushpage)
438	/*
439	 * flush page from the d$
440	 *
441	 * %o0 = pfnum, %o1 = color
442	 */
443	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
444	retl
445	  nop
446	SET_SIZE(vac_flushpage)
447
448#endif	/* lint */
449
450
451#if defined(lint)
452
453/* ARGSUSED */
454void
455vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
456{}
457
458#else	/* lint */
459
460	ENTRY_NP(vac_flushpage_tl1)
461	/*
462	 * x-trap to flush page from the d$
463	 *
464	 * %g1 = pfnum, %g2 = color
465	 */
466	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
467	retry
468	SET_SIZE(vac_flushpage_tl1)
469
470#endif	/* lint */
471
472
473#if defined(lint)
474
475/* ARGSUSED */
476void
477vac_flushcolor(int vcolor, pfn_t pfnum)
478{}
479
480#else	/* lint */
481	/*
482	 * In UltraSPARC III flushcolor is same as as flushpage.
483	 * This is because we have an ASI to flush dcache using physical
484	 * address.
485	 * Flushing dcache using physical address is faster because we
486	 * don't have to deal with associativity of dcache.
487	 * The arguments to vac_flushpage() and vac_flushcolor() are same but
488	 * the order is reversed. this is because we maintain compatibility
489	 * with spitfire, in which vac_flushcolor has only one argument, namely
490	 * vcolor.
491	 */
492
493	ENTRY(vac_flushcolor)
494	/*
495	 * %o0 = vcolor, %o1 = pfnum
496	 */
497	DCACHE_FLUSHPAGE(%o1, %o0, %o2, %o3, %o4)
498	retl
499	  nop
500	SET_SIZE(vac_flushcolor)
501
502#endif	/* lint */
503
504
505#if defined(lint)
506
507/* ARGSUSED */
508void
509vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
510{}
511
512#else	/* lint */
513
514	ENTRY(vac_flushcolor_tl1)
515	/*
516	 * %g1 = vcolor
517	 * %g2 = pfnum
518	 */
519	DCACHE_FLUSHPAGE(%g2, %g1, %g3, %g4, %g5)
520	retry
521	SET_SIZE(vac_flushcolor_tl1)
522
523#endif	/* lint */
524
525#if defined(lint)
526
527int
528idsr_busy(void)
529{
530	return (0);
531}
532
533#else	/* lint */
534
535/*
536 * Determine whether or not the IDSR is busy.
537 * Entry: no arguments
538 * Returns: 1 if busy, 0 otherwise
539 */
540	ENTRY(idsr_busy)
541	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
542	clr	%o0
543	btst	IDSR_BUSY, %g1
544	bz,a,pt	%xcc, 1f
545	mov	1, %o0
5461:
547	retl
548	nop
549	SET_SIZE(idsr_busy)
550
551#endif	/* lint */
552
553#if defined(lint)
554
555/* ARGSUSED */
556void
557init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
558{}
559
560/* ARGSUSED */
561void
562init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
563{}
564
565#else	/* lint */
566
567	.global _dispatch_status_busy
568_dispatch_status_busy:
569	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
570	.align	4
571
572/*
573 * Setup interrupt dispatch data registers
574 * Entry:
575 *	%o0 - function or inumber to call
576 *	%o1, %o2 - arguments (2 uint64_t's)
577 */
578	.seg "text"
579
580	ENTRY(init_mondo)
581#ifdef DEBUG
582	!
583	! IDSR should not be busy at the moment
584	!
585	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
586	btst	IDSR_BUSY, %g1
587	bz,pt	%xcc, 1f
588	nop
589	sethi	%hi(_dispatch_status_busy), %o0
590	call	panic
591	or	%o0, %lo(_dispatch_status_busy), %o0
592#endif /* DEBUG */
593
594	ALTENTRY(init_mondo_nocheck)
595	!
596	! interrupt vector dispatch data reg 0
597	!
5981:
599	mov	IDDR_0, %g1
600	mov	IDDR_1, %g2
601	mov	IDDR_2, %g3
602	stxa	%o0, [%g1]ASI_INTR_DISPATCH
603
604	!
605	! interrupt vector dispatch data reg 1
606	!
607	stxa	%o1, [%g2]ASI_INTR_DISPATCH
608
609	!
610	! interrupt vector dispatch data reg 2
611	!
612	stxa	%o2, [%g3]ASI_INTR_DISPATCH
613
614	membar	#Sync
615	retl
616	nop
617	SET_SIZE(init_mondo_nocheck)
618	SET_SIZE(init_mondo)
619
620#endif	/* lint */
621
622
623#if !(defined(JALAPENO) || defined(SERRANO))
624
625#if defined(lint)
626
627/* ARGSUSED */
628void
629shipit(int upaid, int bn)
630{ return; }
631
632#else	/* lint */
633
634/*
635 * Ship mondo to aid using busy/nack pair bn
636 */
637	ENTRY_NP(shipit)
638	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = agent id
639	sll	%o1, IDCR_BN_SHIFT, %g2		! IDCR<28:24> = b/n pair
640	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
641	or	%g1, %g2, %g1
642	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
643	membar	#Sync
644	retl
645	nop
646	SET_SIZE(shipit)
647
648#endif	/* lint */
649
650#endif	/* !(JALAPENO || SERRANO) */
651
652
653#if defined(lint)
654
655/* ARGSUSED */
656void
657flush_instr_mem(caddr_t vaddr, size_t len)
658{}
659
660#else	/* lint */
661
662/*
663 * flush_instr_mem:
664 *	Flush 1 page of the I-$ starting at vaddr
665 * 	%o0 vaddr
666 *	%o1 bytes to be flushed
667 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
668 * the stores from all processors so that a FLUSH instruction is only needed
669 * to ensure pipeline is consistent. This means a single flush is sufficient at
670 * the end of a sequence of stores that updates the instruction stream to
671 * ensure correct operation.
672 */
673
674	ENTRY(flush_instr_mem)
675	flush	%o0			! address irrelevant
676	retl
677	nop
678	SET_SIZE(flush_instr_mem)
679
680#endif	/* lint */
681
682
683#if defined(CPU_IMP_ECACHE_ASSOC)
684
685#if defined(lint)
686
687/* ARGSUSED */
688uint64_t
689get_ecache_ctrl(void)
690{ return (0); }
691
692#else	/* lint */
693
694	ENTRY(get_ecache_ctrl)
695	GET_CPU_IMPL(%o0)
696	cmp	%o0, JAGUAR_IMPL
697	!
698	! Putting an ASI access in the delay slot may
699	! cause it to be accessed, even when annulled.
700	!
701	bne	1f
702	  nop
703	ldxa	[%g0]ASI_EC_CFG_TIMING, %o0	! read Jaguar shared E$ ctrl reg
704	b	2f
705	  nop
7061:
707	ldxa	[%g0]ASI_EC_CTRL, %o0		! read Ch/Ch+ E$ control reg
7082:
709	retl
710	  nop
711	SET_SIZE(get_ecache_ctrl)
712
713#endif	/* lint */
714
715#endif	/* CPU_IMP_ECACHE_ASSOC */
716
717
718#if !(defined(JALAPENO) || defined(SERRANO))
719
720/*
721 * flush_ecache:
722 *	%o0 - 64 bit physical address
723 *	%o1 - ecache size
724 *	%o2 - ecache linesize
725 */
726#if defined(lint)
727
728/*ARGSUSED*/
729void
730flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
731{}
732
733#else /* !lint */
734
735	ENTRY(flush_ecache)
736
737	/*
738	 * For certain CPU implementations, we have to flush the L2 cache
739	 * before flushing the ecache.
740	 */
741	PN_L2_FLUSHALL(%g3, %g4, %g5)
742
743	/*
744	 * Flush the entire Ecache using displacement flush.
745	 */
746	ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
747
748	retl
749	nop
750	SET_SIZE(flush_ecache)
751
752#endif /* lint */
753
754#endif	/* !(JALAPENO || SERRANO) */
755
756
757#if defined(lint)
758
759void
760flush_dcache(void)
761{}
762
763#else	/* lint */
764
765	ENTRY(flush_dcache)
766	ASM_LD(%o0, dcache_size)
767	ASM_LD(%o1, dcache_linesize)
768	CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
769	retl
770	nop
771	SET_SIZE(flush_dcache)
772
773#endif	/* lint */
774
775
776#if defined(lint)
777
778void
779flush_icache(void)
780{}
781
782#else	/* lint */
783
784	ENTRY(flush_icache)
785	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
786	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
787	ba,pt	%icc, 2f
788	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
789flush_icache_1:
790	ASM_LD(%o0, icache_size)
791	ASM_LD(%o1, icache_linesize)
7922:
793	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
794	retl
795	nop
796	SET_SIZE(flush_icache)
797
798#endif	/* lint */
799
800#if defined(lint)
801
802/*ARGSUSED*/
803void
804kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
805    int icache_lsize)
806{
807}
808
809#else	/* lint */
810
811	ENTRY(kdi_flush_idcache)
812	CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
813	CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
814	membar	#Sync
815	retl
816	nop
817	SET_SIZE(kdi_flush_idcache)
818
819#endif	/* lint */
820
821#if defined(lint)
822
823void
824flush_pcache(void)
825{}
826
827#else	/* lint */
828
829	ENTRY(flush_pcache)
830	PCACHE_FLUSHALL(%o0, %o1, %o2)
831	retl
832	nop
833	SET_SIZE(flush_pcache)
834
835#endif	/* lint */
836
837
838#if defined(CPU_IMP_L1_CACHE_PARITY)
839
840#if defined(lint)
841
842/* ARGSUSED */
843void
844get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
845{}
846
847#else	/* lint */
848
849/*
850 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
851 * structure (see cheetahregs.h):
852 * The Dcache *should* be turned off when this code is executed.
853 */
854	.align	128
855	ENTRY(get_dcache_dtag)
856	rdpr	%pstate, %o5
857	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
858	wrpr	%g0, %o3, %pstate
859	b	1f
860	  stx	%o0, [%o1 + CH_DC_IDX]
861
862	.align	128
8631:
864	ldxa	[%o0]ASI_DC_TAG, %o2
865	stx	%o2, [%o1 + CH_DC_TAG]
866	membar	#Sync
867	ldxa	[%o0]ASI_DC_UTAG, %o2
868	membar	#Sync
869	stx	%o2, [%o1 + CH_DC_UTAG]
870	ldxa	[%o0]ASI_DC_SNP_TAG, %o2
871	stx	%o2, [%o1 + CH_DC_SNTAG]
872	add	%o1, CH_DC_DATA, %o1
873	clr	%o3
8742:
875	membar	#Sync				! required before ASI_DC_DATA
876	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
877	membar	#Sync				! required after ASI_DC_DATA
878	stx	%o2, [%o1 + %o3]
879	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
880	blt	2b
881	  add	%o3, 8, %o3
882
883	/*
884	 * Unlike other CPUs in the family, D$ data parity bits for Panther
885	 * do not reside in the microtag. Instead, we have to read them
886	 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
887	 * of just having 8 parity bits to protect all 32 bytes of data
888	 * per line, we now have 32 bits of parity.
889	 */
890	GET_CPU_IMPL(%o3)
891	cmp	%o3, PANTHER_IMPL
892	bne	4f
893	  clr	%o3
894
895	/*
896	 * move our pointer to the next field where we store parity bits
897	 * and add the offset of the last parity byte since we will be
898	 * storing all 4 parity bytes within one 64 bit field like this:
899	 *
900	 * +------+------------+------------+------------+------------+
901	 * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
902	 * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
903	 * +------+------------+------------+------------+------------+
904	 *  63:32     31:24        23:16         15:8          7:0
905	 */
906	add	%o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
907
908	/* add the DC_data_parity bit into our working index */
909	mov	1, %o2
910	sll	%o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
911	or	%o0, %o2, %o0
9123:
913	membar	#Sync				! required before ASI_DC_DATA
914	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
915	membar	#Sync				! required after ASI_DC_DATA
916	stb	%o2, [%o1]
917	dec	%o1
918	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
919	blt	3b
920	  add	%o3, 8, %o3
9214:
922	retl
923	  wrpr	%g0, %o5, %pstate
924	SET_SIZE(get_dcache_dtag)
925
926#endif	/* lint */
927
928
929#if defined(lint)
930
931/* ARGSUSED */
932void
933get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
934{}
935
936#else	/* lint */
937
938/*
939 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
940 * structure (see cheetahregs.h):
941 * The Icache *Must* be turned off when this function is called.
942 * This is because diagnostic accesses to the Icache interfere with cache
943 * consistency.
944 */
945	.align	128
946	ENTRY(get_icache_dtag)
947	rdpr	%pstate, %o5
948	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
949	wrpr	%g0, %o3, %pstate
950
951	stx	%o0, [%o1 + CH_IC_IDX]
952	ldxa	[%o0]ASI_IC_TAG, %o2
953	stx	%o2, [%o1 + CH_IC_PATAG]
954	add	%o0, CH_ICTAG_UTAG, %o0
955	ldxa	[%o0]ASI_IC_TAG, %o2
956	add	%o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
957	stx	%o2, [%o1 + CH_IC_UTAG]
958	ldxa	[%o0]ASI_IC_TAG, %o2
959	add	%o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
960	stx	%o2, [%o1 + CH_IC_UPPER]
961	ldxa	[%o0]ASI_IC_TAG, %o2
962	andn	%o0, CH_ICTAG_TMASK, %o0
963	stx	%o2, [%o1 + CH_IC_LOWER]
964	ldxa	[%o0]ASI_IC_SNP_TAG, %o2
965	stx	%o2, [%o1 + CH_IC_SNTAG]
966	add	%o1, CH_IC_DATA, %o1
967	clr	%o3
9682:
969	ldxa	[%o0 + %o3]ASI_IC_DATA, %o2
970	stx	%o2, [%o1 + %o3]
971	cmp	%o3, PN_IC_DATA_REG_SIZE - 8
972	blt	2b
973	  add	%o3, 8, %o3
974
975	retl
976	  wrpr	%g0, %o5, %pstate
977	SET_SIZE(get_icache_dtag)
978
979#endif	/* lint */
980
981#if defined(lint)
982
983/* ARGSUSED */
984void
985get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
986{}
987
988#else	/* lint */
989
990/*
991 * Get pcache data and tags.
992 * inputs:
993 *   pcache_idx	- fully constructed VA for for accessing P$ diagnostic
994 *		  registers. Contains PC_way and PC_addr shifted into
995 *		  the correct bit positions. See the PRM for more details.
996 *   data	- pointer to a ch_pc_data_t
997 * structure (see cheetahregs.h):
998 */
999	.align	128
1000	ENTRY(get_pcache_dtag)
1001	rdpr	%pstate, %o5
1002	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
1003	wrpr	%g0, %o3, %pstate
1004
1005	stx	%o0, [%o1 + CH_PC_IDX]
1006	ldxa	[%o0]ASI_PC_STATUS_DATA, %o2
1007	stx	%o2, [%o1 + CH_PC_STATUS]
1008	ldxa	[%o0]ASI_PC_TAG, %o2
1009	stx	%o2, [%o1 + CH_PC_TAG]
1010	ldxa	[%o0]ASI_PC_SNP_TAG, %o2
1011	stx	%o2, [%o1 + CH_PC_SNTAG]
1012	add	%o1, CH_PC_DATA, %o1
1013	clr	%o3
10142:
1015	ldxa	[%o0 + %o3]ASI_PC_DATA, %o2
1016	stx	%o2, [%o1 + %o3]
1017	cmp	%o3, CH_PC_DATA_REG_SIZE - 8
1018	blt	2b
1019	  add	%o3, 8, %o3
1020
1021	retl
1022	  wrpr	%g0, %o5, %pstate
1023	SET_SIZE(get_pcache_dtag)
1024
1025#endif	/* lint */
1026
1027#endif	/* CPU_IMP_L1_CACHE_PARITY */
1028
1029#if defined(lint)
1030
1031/* ARGSUSED */
1032void
1033set_dcu(uint64_t dcu)
1034{}
1035
1036#else	/* lint */
1037
1038/*
1039 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
1040 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
1041 *   %o0 - 64 bit constant
1042 */
1043	ENTRY(set_dcu)
1044	stxa	%o0, [%g0]ASI_DCU	! Store to DCU
1045	flush	%g0	/* flush required after changing the IC bit */
1046	retl
1047	nop
1048	SET_SIZE(set_dcu)
1049
1050#endif	/* lint */
1051
1052
1053#if defined(lint)
1054
1055uint64_t
1056get_dcu(void)
1057{
1058	return ((uint64_t)0);
1059}
1060
1061#else	/* lint */
1062
1063/*
1064 * Return DCU register.
1065 */
1066	ENTRY(get_dcu)
1067	ldxa	[%g0]ASI_DCU, %o0		/* DCU control register */
1068	retl
1069	nop
1070	SET_SIZE(get_dcu)
1071
1072#endif	/* lint */
1073
1074/*
1075 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
1076 *
1077 * This handler is used to check for softints generated by error trap
1078 * handlers to report errors.  On Cheetah, this mechanism is used by the
1079 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
1080 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
1081 * NB: Must be 8 instructions or less to fit in trap table and code must
1082 *     be relocatable.
1083 */
1084#if defined(lint)
1085
1086void
1087ch_pil15_interrupt_instr(void)
1088{}
1089
1090#else	/* lint */
1091
1092	ENTRY_NP(ch_pil15_interrupt_instr)
1093	ASM_JMP(%g1, ch_pil15_interrupt)
1094	SET_SIZE(ch_pil15_interrupt_instr)
1095
1096#endif
1097
1098
1099#if defined(lint)
1100
1101void
1102ch_pil15_interrupt(void)
1103{}
1104
1105#else	/* lint */
1106
1107	ENTRY_NP(ch_pil15_interrupt)
1108
1109	/*
1110	 * Since pil_interrupt is hacked to assume that every level 15
1111	 * interrupt is generated by the CPU to indicate a performance
1112	 * counter overflow this gets ugly.  Before calling pil_interrupt
1113	 * the Error at TL>0 pending status is inspected.  If it is
1114	 * non-zero, then an error has occurred and it is handled.
1115	 * Otherwise control is transfered to pil_interrupt.  Note that if
1116	 * an error is detected pil_interrupt will not be called and
1117	 * overflow interrupts may be lost causing erroneous performance
1118	 * measurements.  However, error-recovery will have a detrimental
1119	 * effect on performance anyway.
1120	 */
1121	CPU_INDEX(%g1, %g4)
1122	set	ch_err_tl1_pending, %g4
1123	ldub	[%g1 + %g4], %g2
1124	brz	%g2, 1f
1125	  nop
1126
1127	/*
1128	 * We have a pending TL>0 error, clear the TL>0 pending status.
1129	 */
1130	stb	%g0, [%g1 + %g4]
1131
1132	/*
1133	 * Clear the softint.
1134	 */
1135	mov	1, %g5
1136	sll	%g5, PIL_15, %g5
1137	wr	%g5, CLEAR_SOFTINT
1138
1139	/*
1140	 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
1141	 * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
1142	 * panic flag (%g2).
1143	 */
1144	set	cpu_tl1_error, %g1
1145	clr	%g2
1146	ba	sys_trap
1147	  mov	PIL_15, %g4
1148
11491:
1150	/*
1151	 * The logout is invalid.
1152	 *
1153	 * Call the default interrupt handler.
1154	 */
1155	sethi	%hi(pil_interrupt), %g1
1156	jmp	%g1 + %lo(pil_interrupt)
1157	  mov	PIL_15, %g4
1158
1159	SET_SIZE(ch_pil15_interrupt)
1160#endif
1161
1162
1163/*
1164 * Error Handling
1165 *
1166 * Cheetah provides error checking for all memory access paths between
1167 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
1168 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
1169 * AFAR and one of the following traps is generated (provided that it
1170 * is enabled in External Cache Error Enable Register) to handle that
1171 * error:
1172 * 1. trap 0x70: Precise trap
1173 *    tt0_fecc for errors at trap level(TL)>=0
1174 * 2. trap 0x0A and 0x32: Deferred trap
1175 *    async_err for errors at TL>=0
1176 * 3. trap 0x63: Disrupting trap
1177 *    ce_err for errors at TL=0
1178 *    (Note that trap 0x63 cannot happen at trap level > 0)
1179 *
1180 * Trap level one handlers panic the system except for the fast ecc
1181 * error handler which tries to recover from certain errors.
1182 */
1183
1184/*
1185 * FAST ECC TRAP STRATEGY:
1186 *
1187 * Software must handle single and multi bit errors which occur due to data
1188 * or instruction cache reads from the external cache. A single or multi bit
1189 * error occuring in one of these situations results in a precise trap.
1190 *
1191 * The basic flow of this trap handler is as follows:
1192 *
1193 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
1194 *    is disabled because bad data could have been installed.  The Icache is
1195 *    turned off because we want to capture the Icache line related to the
1196 *    AFAR.
1197 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
1198 * 3) Park sibling core if caches are shared (to avoid race condition while
1199 *    accessing shared resources such as L3 data staging register during
1200 *    CPU logout.
1201 * 4) Read the AFAR and AFSR.
1202 * 5) If CPU logout structure is not being used, then:
1203 *    6) Clear all errors from the AFSR.
1204 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
1205 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1206 *       state.
1207 *    9) Unpark sibling core if we parked it earlier.
1208 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
1209 *        running at PIL 15.
1210 * 6) Otherwise, if CPU logout structure is being used:
1211 *    7) Incriment the "logout busy count".
1212 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1213 *       state.
1214 *    9) Unpark sibling core if we parked it earlier.
1215 *    10) Issue a retry since the other CPU error logging code will end up
1216 *       finding this error bit and logging information about it later.
1217 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
1218 *    yet initialized such that we can't even check the logout struct, then
1219 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
1220 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
1221 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
1222 *    in the high level trap handler since we don't have access to detailed
1223 *    logout information in cases where the cpu_private struct is not yet
1224 *    initialized.
1225 *
1226 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
1227 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
1228 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
1229 * since it is uses different code/data from this handler, has a better
1230 * chance of fixing things up than simply recursing through this code
1231 * again (this would probably cause an eventual kernel stack overflow).
1232 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
1233 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
1234 * the Fast ECC at TL>0 handler and eventually Red Mode.
1235 *
1236 * Note that for Cheetah (and only Cheetah), we use alias addresses for
1237 * flushing rather than ASI accesses (which don't exist on Cheetah).
1238 * Should we encounter a Fast ECC error within this handler on Cheetah,
1239 * there's a good chance it's within the ecache_flushaddr buffer (since
1240 * it's the largest piece of memory we touch in the handler and it is
1241 * usually kernel text/data).  For that reason the Fast ECC at TL>0
1242 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
1243 */
1244
1245/*
1246 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
1247 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
1248 * architecture-specific files.
1249 * NB: Must be 8 instructions or less to fit in trap table and code must
1250 *     be relocatable.
1251 */
1252
1253#if defined(lint)
1254
1255void
1256fecc_err_instr(void)
1257{}
1258
1259#else	/* lint */
1260
1261	ENTRY_NP(fecc_err_instr)
1262	membar	#Sync			! Cheetah requires membar #Sync
1263
1264	/*
1265	 * Save current DCU state.  Turn off the Dcache and Icache.
1266	 */
1267	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1268	andn	%g1, DCU_DC + DCU_IC, %g4
1269	stxa	%g4, [%g0]ASI_DCU
1270	flush	%g0	/* flush required after changing the IC bit */
1271
1272	ASM_JMP(%g4, fast_ecc_err)
1273	SET_SIZE(fecc_err_instr)
1274
1275#endif	/* lint */
1276
1277
1278#if !(defined(JALAPENO) || defined(SERRANO))
1279
1280#if defined(lint)
1281
1282void
1283fast_ecc_err(void)
1284{}
1285
1286#else	/* lint */
1287
1288	.section ".text"
1289	.align	64
1290	ENTRY_NP(fast_ecc_err)
1291
1292	/*
1293	 * Turn off CEEN and NCEEN.
1294	 */
1295	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1296	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1297	stxa	%g4, [%g0]ASI_ESTATE_ERR
1298	membar	#Sync			! membar sync required
1299
1300	/*
1301	 * Check to see whether we need to park our sibling core
1302	 * before recording diagnostic information from caches
1303	 * which may be shared by both cores.
1304	 * We use %g1 to store information about whether or not
1305	 * we had to park the core (%g1 holds our DCUCR value and
1306	 * we only use bits from that register which are "reserved"
1307	 * to keep track of core parking) so that we know whether
1308	 * or not to unpark later. %g5 and %g4 are scratch registers.
1309	 */
1310	PARK_SIBLING_CORE(%g1, %g5, %g4)
1311
1312	/*
1313	 * Do the CPU log out capture.
1314	 *   %g3 = "failed?" return value.
1315	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1316	 *         into this macro via %g4. Output only valid if cpu_private
1317	 *         struct has not been initialized.
1318	 *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1319	 *   %g4 = Trap information stored in the cpu logout flags field
1320	 *   %g5 = scr1
1321	 *   %g6 = scr2
1322	 *   %g3 = scr3
1323	 *   %g4 = scr4
1324	 */
1325	 /* store the CEEN and NCEEN values, TL=0 */
1326	and	%g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1327	set	CHPR_FECCTL0_LOGOUT, %g6
1328	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1329
1330	/*
1331	 * Flush the Ecache (and L2 cache for Panther) to get the error out
1332	 * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1333	 * following flush will turn that into a WDC or WDU, respectively.
1334	 */
1335	PN_L2_FLUSHALL(%g4, %g5, %g6)
1336
1337	CPU_INDEX(%g4, %g5)
1338	mulx	%g4, CPU_NODE_SIZE, %g4
1339	set	cpunodes, %g5
1340	add	%g4, %g5, %g4
1341	ld	[%g4 + ECACHE_LINESIZE], %g5
1342	ld	[%g4 + ECACHE_SIZE], %g4
1343
1344	ASM_LDX(%g6, ecache_flushaddr)
1345	ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1346
1347	/*
1348	 * Flush the Dcache.  Since bad data could have been installed in
1349	 * the Dcache we must flush it before re-enabling it.
1350	 */
1351	ASM_LD(%g5, dcache_size)
1352	ASM_LD(%g6, dcache_linesize)
1353	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1354
1355	/*
1356	 * Flush the Icache.  Since we turned off the Icache to capture the
1357	 * Icache line it is now stale or corrupted and we must flush it
1358	 * before re-enabling it.
1359	 */
1360	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1361	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1362	ba,pt	%icc, 6f
1363	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1364fast_ecc_err_5:
1365	ASM_LD(%g5, icache_size)
1366	ASM_LD(%g6, icache_linesize)
13676:
1368	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1369
1370	/*
1371	 * check to see whether we parked our sibling core at the start
1372	 * of this handler. If so, we need to unpark it here.
1373	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1374	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1375	 */
1376	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1377
1378	/*
1379	 * Restore the Dcache and Icache to the previous state.
1380	 */
1381	stxa	%g1, [%g0]ASI_DCU
1382	flush	%g0	/* flush required after changing the IC bit */
1383
1384	/*
1385	 * Make sure our CPU logout operation was successful.
1386	 */
1387	cmp	%g3, %g0
1388	be	8f
1389	  nop
1390
1391	/*
1392	 * If the logout structure had been busy, how many times have
1393	 * we tried to use it and failed (nesting count)? If we have
1394	 * already recursed a substantial number of times, then we can
1395	 * assume things are not going to get better by themselves and
1396	 * so it would be best to panic.
1397	 */
1398	cmp	%g3, CLO_NESTING_MAX
1399	blt	7f
1400	  nop
1401
1402        call ptl1_panic
1403          mov   PTL1_BAD_ECC, %g1
1404
14057:
1406	/*
1407	 * Otherwise, if the logout structure was busy but we have not
1408	 * nested more times than our maximum value, then we simply
1409	 * issue a retry. Our TL=0 trap handler code will check and
1410	 * clear the AFSR after it is done logging what is currently
1411	 * in the logout struct and handle this event at that time.
1412	 */
1413	retry
14148:
1415	/*
1416	 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1417	 * already at PIL 15.
1418	 */
1419	set	cpu_fast_ecc_error, %g1
1420	rdpr	%pil, %g4
1421	cmp	%g4, PIL_14
1422	ba	sys_trap
1423	  movl	%icc, PIL_14, %g4
1424
1425	SET_SIZE(fast_ecc_err)
1426
1427#endif	/* lint */
1428
1429#endif	/* !(JALAPENO || SERRANO) */
1430
1431
1432/*
1433 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1434 *
1435 * The basic flow of this trap handler is as follows:
1436 *
1437 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1438 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1439 *    will use to save %g1 and %g2.
1440 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1441 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1442 *    handler (using the just saved %g1).
1443 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1444 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1445 *    NB: we don't turn off the Icache because bad data is not installed nor
1446 *        will we be doing any diagnostic accesses.
1447 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1448 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1449 *    %tpc, %tnpc, %tstate values previously saved).
1450 * 6) set %tl to %tl - 1.
1451 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1452 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1453 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1454 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1455 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1456 *    AFSR_EXT and save the value in ch_err_tl1_data.
1457 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1458 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1459 * 11) Flush the Ecache.
1460 *    NB: the Ecache is flushed assuming the largest possible size with
1461 *        the smallest possible line size since access to the cpu_nodes may
1462 *        cause an unrecoverable DTLB miss.
1463 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1464 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1465 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1466 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1467 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1468 * 14) Flush and re-enable the Dcache if it was on at step 3.
1469 * 15) Do TRAPTRACE if enabled.
1470 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1471 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1472 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1473 *    event pending flag and call cpu_tl1_error via systrap if set.
1474 * 19) Restore the registers from step 5 and issue retry.
1475 */
1476
1477/*
1478 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1479 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1480 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1481 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1482 * NB: Must be 8 instructions or less to fit in trap table and code must
1483 *     be relocatable.
1484 */
1485
1486#if defined(lint)
1487
1488void
1489fecc_err_tl1_instr(void)
1490{}
1491
1492#else	/* lint */
1493
1494	ENTRY_NP(fecc_err_tl1_instr)
1495	CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1496	SET_SIZE(fecc_err_tl1_instr)
1497
1498#endif	/* lint */
1499
1500/*
1501 * Software trap 0 at TL>0.
1502 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1503 * the various architecture-specific files.  This is used as a continuation
1504 * of the fast ecc handling where we've bought an extra TL level, so we can
1505 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1506 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1507 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1508 * order two bits from %g1 and %g2 respectively).
1509 * NB: Must be 8 instructions or less to fit in trap table and code must
1510 *     be relocatable.
1511 */
1512#if defined(lint)
1513
1514void
1515fecc_err_tl1_cont_instr(void)
1516{}
1517
1518#else	/* lint */
1519
1520	ENTRY_NP(fecc_err_tl1_cont_instr)
1521	CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1522	SET_SIZE(fecc_err_tl1_cont_instr)
1523
1524#endif	/* lint */
1525
1526
1527#if defined(lint)
1528
1529void
1530ce_err(void)
1531{}
1532
1533#else	/* lint */
1534
1535/*
1536 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1537 *
1538 * AFSR errors bits which cause this trap are:
1539 *	CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1540 *
1541 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1542 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1543 *
1544 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1545 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1546 *
1547 * Cheetah+ also handles (No additional processing required):
1548 *    DUE, DTO, DBERR	(NCEEN controlled)
1549 *    THCE		(CEEN and ET_ECC_en controlled)
1550 *    TUE		(ET_ECC_en controlled)
1551 *
1552 * Panther further adds:
1553 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1554 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1555 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1556 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1557 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1558 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1559 *
1560 * Steps:
1561 *	1. Disable hardware corrected disrupting errors only (CEEN)
1562 *	2. Park sibling core if caches are shared (to avoid race
1563 *	   condition while accessing shared resources such as L3
1564 *	   data staging register during CPU logout.
1565 *	3. If the CPU logout structure is not currently being used:
1566 *		4. Clear AFSR error bits
1567 *		5. Capture Ecache, Dcache and Icache lines associated
1568 *		   with AFAR.
1569 *		6. Unpark sibling core if we parked it earlier.
1570 *		7. call cpu_disrupting_error via sys_trap at PIL 14
1571 *		   unless we're already running at PIL 15.
1572 *	4. Otherwise, if the CPU logout structure is busy:
1573 *		5. Incriment "logout busy count" and place into %g3
1574 *		6. Unpark sibling core if we parked it earlier.
1575 *		7. Issue a retry since the other CPU error logging
1576 *		   code will end up finding this error bit and logging
1577 *		   information about it later.
1578 *	5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1579 *         not yet initialized such that we can't even check the logout
1580 *         struct, then we place the clo_flags data into %g2
1581 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1582 *         systrap. The clo_flags parameter is used to determine information
1583 *         such as TL, TT, CEEN settings, etc in the high level trap
1584 *         handler since we don't have access to detailed logout information
1585 *         in cases where the cpu_private struct is not yet initialized.
1586 *
1587 * %g3: [ logout busy count ] - arg #2
1588 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1589 */
1590
1591	.align	128
1592	ENTRY_NP(ce_err)
1593	membar	#Sync			! Cheetah requires membar #Sync
1594
1595	/*
1596	 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1597	 * to prevent recursion.
1598	 */
1599	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1600	bclr	EN_REG_CEEN, %g1
1601	stxa	%g1, [%g0]ASI_ESTATE_ERR
1602	membar	#Sync			! membar sync required
1603
1604	/*
1605	 * Save current DCU state.  Turn off Icache to allow capture of
1606	 * Icache data by DO_CPU_LOGOUT.
1607	 */
1608	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1609	andn	%g1, DCU_IC, %g4
1610	stxa	%g4, [%g0]ASI_DCU
1611	flush	%g0	/* flush required after changing the IC bit */
1612
1613	/*
1614	 * Check to see whether we need to park our sibling core
1615	 * before recording diagnostic information from caches
1616	 * which may be shared by both cores.
1617	 * We use %g1 to store information about whether or not
1618	 * we had to park the core (%g1 holds our DCUCR value and
1619	 * we only use bits from that register which are "reserved"
1620	 * to keep track of core parking) so that we know whether
1621	 * or not to unpark later. %g5 and %g4 are scratch registers.
1622	 */
1623	PARK_SIBLING_CORE(%g1, %g5, %g4)
1624
1625	/*
1626	 * Do the CPU log out capture.
1627	 *   %g3 = "failed?" return value.
1628	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1629	 *         into this macro via %g4. Output only valid if cpu_private
1630	 *         struct has not been initialized.
1631	 *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1632	 *   %g4 = Trap information stored in the cpu logout flags field
1633	 *   %g5 = scr1
1634	 *   %g6 = scr2
1635	 *   %g3 = scr3
1636	 *   %g4 = scr4
1637	 */
1638	clr	%g4			! TL=0 bit in afsr
1639	set	CHPR_CECC_LOGOUT, %g6
1640	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1641
1642	/*
1643	 * Flush the Icache.  Since we turned off the Icache to capture the
1644	 * Icache line it is now stale or corrupted and we must flush it
1645	 * before re-enabling it.
1646	 */
1647	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1648	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1649	ba,pt	%icc, 2f
1650	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1651ce_err_1:
1652	ASM_LD(%g5, icache_size)
1653	ASM_LD(%g6, icache_linesize)
16542:
1655	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1656
1657	/*
1658	 * check to see whether we parked our sibling core at the start
1659	 * of this handler. If so, we need to unpark it here.
1660	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1661	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1662	 */
1663	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1664
1665	/*
1666	 * Restore Icache to previous state.
1667	 */
1668	stxa	%g1, [%g0]ASI_DCU
1669	flush	%g0	/* flush required after changing the IC bit */
1670
1671	/*
1672	 * Make sure our CPU logout operation was successful.
1673	 */
1674	cmp	%g3, %g0
1675	be	4f
1676	  nop
1677
1678	/*
1679	 * If the logout structure had been busy, how many times have
1680	 * we tried to use it and failed (nesting count)? If we have
1681	 * already recursed a substantial number of times, then we can
1682	 * assume things are not going to get better by themselves and
1683	 * so it would be best to panic.
1684	 */
1685	cmp	%g3, CLO_NESTING_MAX
1686	blt	3f
1687	  nop
1688
1689        call ptl1_panic
1690          mov   PTL1_BAD_ECC, %g1
1691
16923:
1693	/*
1694	 * Otherwise, if the logout structure was busy but we have not
1695	 * nested more times than our maximum value, then we simply
1696	 * issue a retry. Our TL=0 trap handler code will check and
1697	 * clear the AFSR after it is done logging what is currently
1698	 * in the logout struct and handle this event at that time.
1699	 */
1700	retry
17014:
1702	/*
1703	 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1704	 * already at PIL 15.
1705	 */
1706	set	cpu_disrupting_error, %g1
1707	rdpr	%pil, %g4
1708	cmp	%g4, PIL_14
1709	ba	sys_trap
1710	  movl	%icc, PIL_14, %g4
1711	SET_SIZE(ce_err)
1712
1713#endif	/* lint */
1714
1715
1716#if defined(lint)
1717
1718/*
1719 * This trap cannot happen at TL>0 which means this routine will never
1720 * actually be called and so we treat this like a BAD TRAP panic.
1721 */
1722void
1723ce_err_tl1(void)
1724{}
1725
1726#else	/* lint */
1727
1728	.align	64
1729	ENTRY_NP(ce_err_tl1)
1730
1731        call ptl1_panic
1732          mov   PTL1_BAD_TRAP, %g1
1733
1734	SET_SIZE(ce_err_tl1)
1735
1736#endif	/* lint */
1737
1738
1739#if defined(lint)
1740
1741void
1742async_err(void)
1743{}
1744
1745#else	/* lint */
1746
1747/*
1748 * The async_err function handles deferred trap types 0xA
1749 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1750 *
1751 * AFSR errors bits which cause this trap are:
1752 *	UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1753 * On some platforms, EMU may causes cheetah to pull the error pin
1754 * never giving Solaris a chance to take a trap.
1755 *
1756 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1757 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1758 *
1759 * Steps:
1760 *	1. Disable CEEN and NCEEN errors to prevent recursive errors.
1761 *	2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1762 *         I$ line in DO_CPU_LOGOUT.
1763 *	3. Park sibling core if caches are shared (to avoid race
1764 *	   condition while accessing shared resources such as L3
1765 *	   data staging register during CPU logout.
1766 *	4. If the CPU logout structure is not currently being used:
1767 *		5. Clear AFSR error bits
1768 *		6. Capture Ecache, Dcache and Icache lines associated
1769 *		   with AFAR.
1770 *		7. Unpark sibling core if we parked it earlier.
1771 *		8. call cpu_deferred_error via sys_trap.
1772 *	5. Otherwise, if the CPU logout structure is busy:
1773 *		6. Incriment "logout busy count"
1774 *		7. Unpark sibling core if we parked it earlier.
1775 *		8) Issue a retry since the other CPU error logging
1776 *		   code will end up finding this error bit and logging
1777 *		   information about it later.
1778 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1779 *         not yet initialized such that we can't even check the logout
1780 *         struct, then we place the clo_flags data into %g2
1781 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1782 *         systrap. The clo_flags parameter is used to determine information
1783 *         such as TL, TT, CEEN settings, etc in the high level trap handler
1784 *         since we don't have access to detailed logout information in cases
1785 *         where the cpu_private struct is not yet initialized.
1786 *
1787 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1788 * %g3: [ logout busy count ] - arg #2
1789 */
1790
1791	ENTRY_NP(async_err)
1792	membar	#Sync			! Cheetah requires membar #Sync
1793
1794	/*
1795	 * Disable CEEN and NCEEN.
1796	 */
1797	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1798	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1799	stxa	%g4, [%g0]ASI_ESTATE_ERR
1800	membar	#Sync			! membar sync required
1801
1802	/*
1803	 * Save current DCU state.
1804	 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1805	 * Do this regardless of whether this is a Data Access Error or
1806	 * Instruction Access Error Trap.
1807	 * Disable Dcache for both Data Access Error and Instruction Access
1808	 * Error per Cheetah PRM P.5 Note 6.
1809	 */
1810	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1811	andn	%g1, DCU_IC + DCU_DC, %g4
1812	stxa	%g4, [%g0]ASI_DCU
1813	flush	%g0	/* flush required after changing the IC bit */
1814
1815	/*
1816	 * Check to see whether we need to park our sibling core
1817	 * before recording diagnostic information from caches
1818	 * which may be shared by both cores.
1819	 * We use %g1 to store information about whether or not
1820	 * we had to park the core (%g1 holds our DCUCR value and
1821	 * we only use bits from that register which are "reserved"
1822	 * to keep track of core parking) so that we know whether
1823	 * or not to unpark later. %g6 and %g4 are scratch registers.
1824	 */
1825	PARK_SIBLING_CORE(%g1, %g6, %g4)
1826
1827	/*
1828	 * Do the CPU logout capture.
1829	 *
1830	 *   %g3 = "failed?" return value.
1831	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1832	 *         into this macro via %g4. Output only valid if cpu_private
1833	 *         struct has not been initialized.
1834	 *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1835	 *   %g4 = Trap information stored in the cpu logout flags field
1836	 *   %g5 = scr1
1837	 *   %g6 = scr2
1838	 *   %g3 = scr3
1839	 *   %g4 = scr4
1840	 */
1841	andcc	%g5, T_TL1, %g0
1842	clr	%g6
1843	movnz	%xcc, 1, %g6			! set %g6 if T_TL1 set
1844	sllx	%g6, CLO_FLAGS_TL_SHIFT, %g6
1845	sllx	%g5, CLO_FLAGS_TT_SHIFT, %g4
1846	set	CLO_FLAGS_TT_MASK, %g2
1847	and	%g4, %g2, %g4			! ttype
1848	or	%g6, %g4, %g4			! TT and TL
1849	and	%g3, EN_REG_CEEN, %g3		! CEEN value
1850	or	%g3, %g4, %g4			! TT and TL and CEEN
1851	set	CHPR_ASYNC_LOGOUT, %g6
1852	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1853
1854	/*
1855	 * If the logout struct was busy, we may need to pass the
1856	 * TT, TL, and CEEN information to the TL=0 handler via
1857	 * systrap parameter so save it off here.
1858	 */
1859	cmp	%g3, %g0
1860	be	1f
1861	  nop
1862	sllx	%g4, 32, %g4
1863	or	%g4, %g3, %g3
18641:
1865	/*
1866	 * Flush the Icache.  Since we turned off the Icache to capture the
1867	 * Icache line it is now stale or corrupted and we must flush it
1868	 * before re-enabling it.
1869	 */
1870	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1871	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1872	ba,pt	%icc, 2f
1873	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1874async_err_1:
1875	ASM_LD(%g5, icache_size)
1876	ASM_LD(%g6, icache_linesize)
18772:
1878	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1879
1880	/*
1881	 * XXX - Don't we need to flush the Dcache before turning it back
1882	 *       on to avoid stale or corrupt data? Was this broken?
1883	 */
1884	/*
1885	 * Flush the Dcache before turning it back on since it may now
1886	 * contain stale or corrupt data.
1887	 */
1888	ASM_LD(%g5, dcache_size)
1889	ASM_LD(%g6, dcache_linesize)
1890	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1891
1892	/*
1893	 * check to see whether we parked our sibling core at the start
1894	 * of this handler. If so, we need to unpark it here.
1895	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1896	 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1897	 */
1898	UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1899
1900	/*
1901	 * Restore Icache and Dcache to previous state.
1902	 */
1903	stxa	%g1, [%g0]ASI_DCU
1904	flush	%g0	/* flush required after changing the IC bit */
1905
1906	/*
1907	 * Make sure our CPU logout operation was successful.
1908	 */
1909	cmp	%g3, %g0
1910	be	4f
1911	  nop
1912
1913	/*
1914	 * If the logout structure had been busy, how many times have
1915	 * we tried to use it and failed (nesting count)? If we have
1916	 * already recursed a substantial number of times, then we can
1917	 * assume things are not going to get better by themselves and
1918	 * so it would be best to panic.
1919	 */
1920	cmp	%g3, CLO_NESTING_MAX
1921	blt	3f
1922	  nop
1923
1924        call ptl1_panic
1925          mov   PTL1_BAD_ECC, %g1
1926
19273:
1928	/*
1929	 * Otherwise, if the logout structure was busy but we have not
1930	 * nested more times than our maximum value, then we simply
1931	 * issue a retry. Our TL=0 trap handler code will check and
1932	 * clear the AFSR after it is done logging what is currently
1933	 * in the logout struct and handle this event at that time.
1934	 */
1935	retry
19364:
1937	RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1938async_err_resetskip:
1939	set	cpu_deferred_error, %g1
1940	ba	sys_trap
1941	  mov	PIL_15, %g4		! run at pil 15
1942	SET_SIZE(async_err)
1943
1944#endif	/* lint */
1945
1946#if defined(CPU_IMP_L1_CACHE_PARITY)
1947
1948/*
1949 * D$ parity error trap (trap 71) at TL=0.
1950 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1951 * the various architecture-specific files.  This merely sets up the
1952 * arguments for cpu_parity_error and calls it via sys_trap.
1953 * NB: Must be 8 instructions or less to fit in trap table and code must
1954 *     be relocatable.
1955 */
1956#if defined(lint)
1957
1958void
1959dcache_parity_instr(void)
1960{}
1961
1962#else	/* lint */
1963	ENTRY_NP(dcache_parity_instr)
1964	membar	#Sync			! Cheetah+ requires membar #Sync
1965	set	cpu_parity_error, %g1
1966	or	%g0, CH_ERR_DPE, %g2
1967	rdpr	%tpc, %g3
1968	sethi	%hi(sys_trap), %g7
1969	jmp	%g7 + %lo(sys_trap)
1970	  mov	PIL_15, %g4		! run at pil 15
1971	SET_SIZE(dcache_parity_instr)
1972
1973#endif	/* lint */
1974
1975
1976/*
1977 * D$ parity error trap (trap 71) at TL>0.
1978 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1979 * the various architecture-specific files.  This generates a "Software
1980 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1981 * continue the handling there.
1982 * NB: Must be 8 instructions or less to fit in trap table and code must
1983 *     be relocatable.
1984 */
1985#if defined(lint)
1986
1987void
1988dcache_parity_tl1_instr(void)
1989{}
1990
1991#else	/* lint */
1992	ENTRY_NP(dcache_parity_tl1_instr)
1993	CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1994	SET_SIZE(dcache_parity_tl1_instr)
1995
1996#endif	/* lint */
1997
1998
1999/*
2000 * Software trap 1 at TL>0.
2001 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
2002 * of the various architecture-specific files.  This is used as a continuation
2003 * of the dcache parity handling where we've bought an extra TL level, so we
2004 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2005 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2006 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2007 * order two bits from %g1 and %g2 respectively).
2008 * NB: Must be 8 instructions or less to fit in trap table and code must
2009 *     be relocatable.
2010 */
2011#if defined(lint)
2012
2013void
2014dcache_parity_tl1_cont_instr(void)
2015{}
2016
2017#else	/* lint */
2018	ENTRY_NP(dcache_parity_tl1_cont_instr)
2019	CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
2020	SET_SIZE(dcache_parity_tl1_cont_instr)
2021
2022#endif	/* lint */
2023
2024/*
2025 * D$ parity error at TL>0 handler
2026 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
2027 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2028 */
2029#if defined(lint)
2030
2031void
2032dcache_parity_tl1_err(void)
2033{}
2034
2035#else	/* lint */
2036
2037	ENTRY_NP(dcache_parity_tl1_err)
2038
2039	/*
2040	 * This macro saves all the %g registers in the ch_err_tl1_data
2041	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2042	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2043	 * the ch_err_tl1_data structure and %g2 will have the original
2044	 * flags in the ch_err_tl1_data structure.  All %g registers
2045	 * except for %g1 and %g2 will be available.
2046	 */
2047	CH_ERR_TL1_ENTER(CH_ERR_DPE);
2048
2049#ifdef TRAPTRACE
2050	/*
2051	 * Get current trap trace entry physical pointer.
2052	 */
2053	CPU_INDEX(%g6, %g5)
2054	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2055	set	trap_trace_ctl, %g5
2056	add	%g6, %g5, %g6
2057	ld	[%g6 + TRAPTR_LIMIT], %g5
2058	tst	%g5
2059	be	%icc, dpe_tl1_skip_tt
2060	  nop
2061	ldx	[%g6 + TRAPTR_PBASE], %g5
2062	ld	[%g6 + TRAPTR_OFFSET], %g4
2063	add	%g5, %g4, %g5
2064
2065	/*
2066	 * Create trap trace entry.
2067	 */
2068	rd	%asi, %g7
2069	wr	%g0, TRAPTR_ASI, %asi
2070	rd	STICK, %g4
2071	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2072	rdpr	%tl, %g4
2073	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2074	rdpr	%tt, %g4
2075	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2076	rdpr	%tpc, %g4
2077	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2078	rdpr	%tstate, %g4
2079	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2080	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2081	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2082	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2083	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2084	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2085	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2086	wr	%g0, %g7, %asi
2087
2088	/*
2089	 * Advance trap trace pointer.
2090	 */
2091	ld	[%g6 + TRAPTR_OFFSET], %g5
2092	ld	[%g6 + TRAPTR_LIMIT], %g4
2093	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2094	add	%g5, TRAP_ENT_SIZE, %g5
2095	sub	%g4, TRAP_ENT_SIZE, %g4
2096	cmp	%g5, %g4
2097	movge	%icc, 0, %g5
2098	st	%g5, [%g6 + TRAPTR_OFFSET]
2099dpe_tl1_skip_tt:
2100#endif	/* TRAPTRACE */
2101
2102	/*
2103	 * I$ and D$ are automatically turned off by HW when the CPU hits
2104	 * a dcache or icache parity error so we will just leave those two
2105	 * off for now to avoid repeating this trap.
2106	 * For Panther, however, since we trap on P$ data parity errors
2107	 * and HW does not automatically disable P$, we need to disable it
2108	 * here so that we don't encounter any recursive traps when we
2109	 * issue the retry.
2110	 */
2111	ldxa	[%g0]ASI_DCU, %g3
2112	mov	1, %g4
2113	sllx	%g4, DCU_PE_SHIFT, %g4
2114	andn	%g3, %g4, %g3
2115	stxa	%g3, [%g0]ASI_DCU
2116	membar	#Sync
2117
2118	/*
2119	 * We fall into this macro if we've successfully logged the error in
2120	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2121	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2122	 * Restores the %g registers and issues retry.
2123	 */
2124	CH_ERR_TL1_EXIT;
2125	SET_SIZE(dcache_parity_tl1_err)
2126
2127#endif	/* lint */
2128
2129/*
2130 * I$ parity error trap (trap 72) at TL=0.
2131 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
2132 * the various architecture-specific files.  This merely sets up the
2133 * arguments for cpu_parity_error and calls it via sys_trap.
2134 * NB: Must be 8 instructions or less to fit in trap table and code must
2135 *     be relocatable.
2136 */
2137#if defined(lint)
2138
2139void
2140icache_parity_instr(void)
2141{}
2142
2143#else	/* lint */
2144
2145	ENTRY_NP(icache_parity_instr)
2146	membar	#Sync			! Cheetah+ requires membar #Sync
2147	set	cpu_parity_error, %g1
2148	or	%g0, CH_ERR_IPE, %g2
2149	rdpr	%tpc, %g3
2150	sethi	%hi(sys_trap), %g7
2151	jmp	%g7 + %lo(sys_trap)
2152	  mov	PIL_15, %g4		! run at pil 15
2153	SET_SIZE(icache_parity_instr)
2154
2155#endif	/* lint */
2156
2157/*
2158 * I$ parity error trap (trap 72) at TL>0.
2159 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
2160 * the various architecture-specific files.  This generates a "Software
2161 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
2162 * continue the handling there.
2163 * NB: Must be 8 instructions or less to fit in trap table and code must
2164 *     be relocatable.
2165 */
2166#if defined(lint)
2167
2168void
2169icache_parity_tl1_instr(void)
2170{}
2171
2172#else	/* lint */
2173	ENTRY_NP(icache_parity_tl1_instr)
2174	CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
2175	SET_SIZE(icache_parity_tl1_instr)
2176
2177#endif	/* lint */
2178
2179/*
2180 * Software trap 2 at TL>0.
2181 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
2182 * of the various architecture-specific files.  This is used as a continuation
2183 * of the icache parity handling where we've bought an extra TL level, so we
2184 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2185 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2186 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2187 * order two bits from %g1 and %g2 respectively).
2188 * NB: Must be 8 instructions or less to fit in trap table and code must
2189 *     be relocatable.
2190 */
2191#if defined(lint)
2192
2193void
2194icache_parity_tl1_cont_instr(void)
2195{}
2196
2197#else	/* lint */
2198	ENTRY_NP(icache_parity_tl1_cont_instr)
2199	CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
2200	SET_SIZE(icache_parity_tl1_cont_instr)
2201
2202#endif	/* lint */
2203
2204
2205/*
2206 * I$ parity error at TL>0 handler
2207 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
2208 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2209 */
2210#if defined(lint)
2211
2212void
2213icache_parity_tl1_err(void)
2214{}
2215
2216#else	/* lint */
2217
2218	ENTRY_NP(icache_parity_tl1_err)
2219
2220	/*
2221	 * This macro saves all the %g registers in the ch_err_tl1_data
2222	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2223	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2224	 * the ch_err_tl1_data structure and %g2 will have the original
2225	 * flags in the ch_err_tl1_data structure.  All %g registers
2226	 * except for %g1 and %g2 will be available.
2227	 */
2228	CH_ERR_TL1_ENTER(CH_ERR_IPE);
2229
2230#ifdef TRAPTRACE
2231	/*
2232	 * Get current trap trace entry physical pointer.
2233	 */
2234	CPU_INDEX(%g6, %g5)
2235	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2236	set	trap_trace_ctl, %g5
2237	add	%g6, %g5, %g6
2238	ld	[%g6 + TRAPTR_LIMIT], %g5
2239	tst	%g5
2240	be	%icc, ipe_tl1_skip_tt
2241	  nop
2242	ldx	[%g6 + TRAPTR_PBASE], %g5
2243	ld	[%g6 + TRAPTR_OFFSET], %g4
2244	add	%g5, %g4, %g5
2245
2246	/*
2247	 * Create trap trace entry.
2248	 */
2249	rd	%asi, %g7
2250	wr	%g0, TRAPTR_ASI, %asi
2251	rd	STICK, %g4
2252	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2253	rdpr	%tl, %g4
2254	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2255	rdpr	%tt, %g4
2256	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2257	rdpr	%tpc, %g4
2258	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2259	rdpr	%tstate, %g4
2260	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2261	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2262	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2263	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2264	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2265	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2266	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2267	wr	%g0, %g7, %asi
2268
2269	/*
2270	 * Advance trap trace pointer.
2271	 */
2272	ld	[%g6 + TRAPTR_OFFSET], %g5
2273	ld	[%g6 + TRAPTR_LIMIT], %g4
2274	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2275	add	%g5, TRAP_ENT_SIZE, %g5
2276	sub	%g4, TRAP_ENT_SIZE, %g4
2277	cmp	%g5, %g4
2278	movge	%icc, 0, %g5
2279	st	%g5, [%g6 + TRAPTR_OFFSET]
2280ipe_tl1_skip_tt:
2281#endif	/* TRAPTRACE */
2282
2283	/*
2284	 * We fall into this macro if we've successfully logged the error in
2285	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2286	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2287	 * Restores the %g registers and issues retry.
2288	 */
2289	CH_ERR_TL1_EXIT;
2290
2291	SET_SIZE(icache_parity_tl1_err)
2292
2293#endif	/* lint */
2294
2295#endif	/* CPU_IMP_L1_CACHE_PARITY */
2296
2297
2298/*
2299 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
2300 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
2301 * should only be used in places where you have no choice but to look at the
2302 * tlb itself.
2303 *
2304 * Note: These two routines are required by the Estar "cpr" loadable module.
2305 */
2306
2307#if defined(lint)
2308
2309/* ARGSUSED */
2310void
2311itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2312{}
2313
2314#else	/* lint */
2315
2316	ENTRY_NP(itlb_rd_entry)
2317	sllx	%o0, 3, %o0
2318	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
2319	stx	%g1, [%o1]
2320	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
2321	set	TAGREAD_CTX_MASK, %o4
2322	andn	%g2, %o4, %o5
2323	retl
2324	  stx	%o5, [%o2]
2325	SET_SIZE(itlb_rd_entry)
2326
2327#endif	/* lint */
2328
2329
2330#if defined(lint)
2331
2332/* ARGSUSED */
2333void
2334dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2335{}
2336
2337#else	/* lint */
2338
2339	ENTRY_NP(dtlb_rd_entry)
2340	sllx	%o0, 3, %o0
2341	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
2342	stx	%g1, [%o1]
2343	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
2344	set	TAGREAD_CTX_MASK, %o4
2345	andn	%g2, %o4, %o5
2346	retl
2347	  stx	%o5, [%o2]
2348	SET_SIZE(dtlb_rd_entry)
2349#endif	/* lint */
2350
2351
2352#if !(defined(JALAPENO) || defined(SERRANO))
2353
2354#if defined(lint)
2355
2356uint64_t
2357get_safari_config(void)
2358{ return (0); }
2359
2360#else	/* lint */
2361
2362	ENTRY(get_safari_config)
2363	ldxa	[%g0]ASI_SAFARI_CONFIG, %o0
2364	retl
2365	nop
2366	SET_SIZE(get_safari_config)
2367
2368#endif	/* lint */
2369
2370
2371#if defined(lint)
2372
2373/* ARGSUSED */
2374void
2375set_safari_config(uint64_t safari_config)
2376{}
2377
2378#else	/* lint */
2379
2380	ENTRY(set_safari_config)
2381	stxa	%o0, [%g0]ASI_SAFARI_CONFIG
2382	membar	#Sync
2383	retl
2384	nop
2385	SET_SIZE(set_safari_config)
2386
2387#endif	/* lint */
2388
2389#endif	/* !(JALAPENO || SERRANO) */
2390
2391
2392#if defined(lint)
2393
2394void
2395cpu_cleartickpnt(void)
2396{}
2397
2398#else	/* lint */
2399	/*
2400	 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
2401	 * registers. In an effort to make the change in the
2402	 * tick/stick counter as consistent as possible, we disable
2403	 * all interrupts while we're changing the registers. We also
2404	 * ensure that the read and write instructions are in the same
2405	 * line in the instruction cache.
2406	 */
2407	ENTRY_NP(cpu_clearticknpt)
2408	rdpr	%pstate, %g1		/* save processor state */
2409	andn	%g1, PSTATE_IE, %g3	/* turn off */
2410	wrpr	%g0, %g3, %pstate	/*   interrupts */
2411	rdpr	%tick, %g2		/* get tick register */
2412	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
2413	mov	1, %g3			/* create mask */
2414	sllx	%g3, 63, %g3		/*   for NPT bit */
2415	ba,a,pt	%xcc, 2f
2416	.align	8			/* Ensure rd/wr in same i$ line */
24172:
2418	rdpr	%tick, %g2		/* get tick register */
2419	wrpr	%g3, %g2, %tick		/* write tick register, */
2420					/*   clearing NPT bit   */
24211:
2422	rd	STICK, %g2		/* get stick register */
2423	brgez,pn %g2, 3f		/* if NPT bit off, we're done */
2424	mov	1, %g3			/* create mask */
2425	sllx	%g3, 63, %g3		/*   for NPT bit */
2426	ba,a,pt	%xcc, 4f
2427	.align	8			/* Ensure rd/wr in same i$ line */
24284:
2429	rd	STICK, %g2		/* get stick register */
2430	wr	%g3, %g2, STICK		/* write stick register, */
2431					/*   clearing NPT bit   */
24323:
2433	jmp	%g4 + 4
2434	wrpr	%g0, %g1, %pstate	/* restore processor state */
2435
2436	SET_SIZE(cpu_clearticknpt)
2437
2438#endif	/* lint */
2439
2440
2441#if defined(CPU_IMP_L1_CACHE_PARITY)
2442
2443#if defined(lint)
2444/*
2445 * correct_dcache_parity(size_t size, size_t linesize)
2446 *
2447 * Correct D$ data parity by zeroing the data and initializing microtag
2448 * for all indexes and all ways of the D$.
2449 *
2450 */
2451/* ARGSUSED */
2452void
2453correct_dcache_parity(size_t size, size_t linesize)
2454{}
2455
2456#else	/* lint */
2457
2458	ENTRY(correct_dcache_parity)
2459	/*
2460	 * Register Usage:
2461	 *
2462	 * %o0 = input D$ size
2463	 * %o1 = input D$ line size
2464	 * %o2 = scratch
2465	 * %o3 = scratch
2466	 * %o4 = scratch
2467	 */
2468
2469	sub	%o0, %o1, %o0			! init cache line address
2470
2471	/*
2472	 * For Panther CPUs, we also need to clear the data parity bits
2473	 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2474	 */
2475	GET_CPU_IMPL(%o3)
2476	cmp	%o3, PANTHER_IMPL
2477	bne	1f
2478	  clr	%o3				! zero for non-Panther
2479	mov	1, %o3
2480	sll	%o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2481
24821:
2483	/*
2484	 * Set utag = way since it must be unique within an index.
2485	 */
2486	srl	%o0, 14, %o2			! get cache way (DC_way)
2487	membar	#Sync				! required before ASI_DC_UTAG
2488	stxa	%o2, [%o0]ASI_DC_UTAG		! set D$ utag = cache way
2489	membar	#Sync				! required after ASI_DC_UTAG
2490
2491	/*
2492	 * Zero line of D$ data (and data parity bits for Panther)
2493	 */
2494	sub	%o1, 8, %o2
2495	or	%o0, %o3, %o4			! same address + DC_data_parity
24962:
2497	membar	#Sync				! required before ASI_DC_DATA
2498	stxa	%g0, [%o0 + %o2]ASI_DC_DATA	! zero 8 bytes of D$ data
2499	membar	#Sync				! required after ASI_DC_DATA
2500	/*
2501	 * We also clear the parity bits if this is a panther. For non-Panther
2502	 * CPUs, we simply end up clearing the $data register twice.
2503	 */
2504	stxa	%g0, [%o4 + %o2]ASI_DC_DATA
2505	membar	#Sync
2506
2507	subcc	%o2, 8, %o2
2508	bge	2b
2509	nop
2510
2511	subcc	%o0, %o1, %o0
2512	bge	1b
2513	nop
2514
2515	retl
2516	  nop
2517	SET_SIZE(correct_dcache_parity)
2518
2519#endif	/* lint */
2520
2521#endif	/* CPU_IMP_L1_CACHE_PARITY */
2522
2523
2524#if defined(lint)
2525/*
2526 *  Get timestamp (stick).
2527 */
2528/* ARGSUSED */
2529void
2530stick_timestamp(int64_t *ts)
2531{
2532}
2533
2534#else	/* lint */
2535
2536	ENTRY_NP(stick_timestamp)
2537	rd	STICK, %g1	! read stick reg
2538	sllx	%g1, 1, %g1
2539	srlx	%g1, 1, %g1	! clear npt bit
2540
2541	retl
2542	stx     %g1, [%o0]	! store the timestamp
2543	SET_SIZE(stick_timestamp)
2544
2545#endif	/* lint */
2546
2547
2548#if defined(lint)
2549/*
2550 * Set STICK adjusted by skew.
2551 */
2552/* ARGSUSED */
2553void
2554stick_adj(int64_t skew)
2555{
2556}
2557
2558#else	/* lint */
2559
2560	ENTRY_NP(stick_adj)
2561	rdpr	%pstate, %g1		! save processor state
2562	andn	%g1, PSTATE_IE, %g3
2563	ba	1f			! cache align stick adj
2564	wrpr	%g0, %g3, %pstate	! turn off interrupts
2565
2566	.align	16
25671:	nop
2568
2569	rd	STICK, %g4		! read stick reg
2570	add	%g4, %o0, %o1		! adjust stick with skew
2571	wr	%o1, %g0, STICK		! write stick reg
2572
2573	retl
2574	wrpr	%g1, %pstate		! restore processor state
2575	SET_SIZE(stick_adj)
2576
2577#endif	/* lint */
2578
2579#if defined(lint)
2580/*
2581 * Debugger-specific stick retrieval
2582 */
2583/*ARGSUSED*/
2584int
2585kdi_get_stick(uint64_t *stickp)
2586{
2587	return (0);
2588}
2589
2590#else	/* lint */
2591
2592	ENTRY_NP(kdi_get_stick)
2593	rd	STICK, %g1
2594	stx	%g1, [%o0]
2595	retl
2596	mov	%g0, %o0
2597	SET_SIZE(kdi_get_stick)
2598
2599#endif	/* lint */
2600
2601#if defined(lint)
2602/*
2603 * Invalidate the specified line from the D$.
2604 *
2605 * Register usage:
2606 *	%o0 - index for the invalidation, specifies DC_way and DC_addr
2607 *
2608 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2609 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2610 *
2611 * The format of the stored 64-bit value is:
2612 *
2613 *	+----------+--------+----------+
2614 *	| Reserved | DC_tag | DC_valid |
2615 *	+----------+--------+----------+
2616 *       63      31 30     1	      0
2617 *
2618 * DC_tag is the 30-bit physical tag of the associated line.
2619 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2620 *
2621 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2622 *
2623 *	+----------+--------+----------+----------+
2624 *	| Reserved | DC_way | DC_addr  | Reserved |
2625 *	+----------+--------+----------+----------+
2626 *       63      16 15    14 13       5 4        0
2627 *
2628 * DC_way is a 2-bit index that selects one of the 4 ways.
2629 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2630 *
2631 * Setting the DC_valid bit to zero for the specified DC_way and
2632 * DC_addr index into the D$ results in an invalidation of a D$ line.
2633 */
2634/*ARGSUSED*/
2635void
2636dcache_inval_line(int index)
2637{
2638}
2639#else	/* lint */
2640	ENTRY(dcache_inval_line)
2641	sll	%o0, 5, %o0		! shift index into DC_way and DC_addr
2642	stxa	%g0, [%o0]ASI_DC_TAG	! zero the DC_valid and DC_tag bits
2643	membar	#Sync
2644	retl
2645	nop
2646	SET_SIZE(dcache_inval_line)
2647#endif	/* lint */
2648
2649#if defined(lint)
2650/*
2651 * Invalidate the entire I$
2652 *
2653 * Register usage:
2654 *	%o0 - specifies IC_way, IC_addr, IC_tag
2655 *	%o1 - scratch
2656 *	%o2 - used to save and restore DCU value
2657 *	%o3 - scratch
2658 *	%o5 - used to save and restore PSTATE
2659 *
2660 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2661 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2662 * block out snoops and invalidates to the I$, causing I$ consistency
2663 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2664 *
2665 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2666 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2667 * info below describes store (write) use of ASI_IC_TAG. Note that read
2668 * use of ASI_IC_TAG behaves differently.
2669 *
2670 * The format of the stored 64-bit value is:
2671 *
2672 *	+----------+--------+---------------+-----------+
2673 *	| Reserved | Valid  | IC_vpred<7:0> | Undefined |
2674 *	+----------+--------+---------------+-----------+
2675 *       63      55    54    53           46 45        0
2676 *
2677 * Valid is the 1-bit valid field for both the physical and snoop tags.
2678 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2679 *	the 32-byte boundary aligned address specified by IC_addr.
2680 *
2681 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2682 *
2683 *	+----------+--------+---------+--------+---------+
2684 *	| Reserved | IC_way | IC_addr | IC_tag |Reserved |
2685 *	+----------+--------+---------+--------+---------+
2686 *       63      16 15    14 13      5 4      3 2       0
2687 *
2688 * IC_way is a 2-bit index that selects one of the 4 ways.
2689 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2690 * IC_addr[5] is a "don't care" for a store.
2691 * IC_tag set to 2 specifies that the stored value is to be interpreted
2692 *	as containing Valid and IC_vpred as described above.
2693 *
2694 * Setting the Valid bit to zero for the specified IC_way and
2695 * IC_addr index into the I$ results in an invalidation of an I$ line.
2696 */
2697/*ARGSUSED*/
2698void
2699icache_inval_all(void)
2700{
2701}
2702#else	/* lint */
2703	ENTRY(icache_inval_all)
2704	rdpr	%pstate, %o5
2705	andn	%o5, PSTATE_IE, %o3
2706	wrpr	%g0, %o3, %pstate	! clear IE bit
2707
2708	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2709	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
2710	ba,pt	%icc, 2f
2711	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
2712icache_inval_all_1:
2713	ASM_LD(%o0, icache_size)
2714	ASM_LD(%o1, icache_linesize)
27152:
2716	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2717
2718	retl
2719	wrpr	%g0, %o5, %pstate	! restore earlier pstate
2720	SET_SIZE(icache_inval_all)
2721#endif	/* lint */
2722
2723
2724#if defined(lint)
2725/* ARGSUSED */
2726void
2727cache_scrubreq_tl1(uint64_t inum, uint64_t index)
2728{
2729}
2730
2731#else	/* lint */
2732/*
2733 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2734 * crosstrap.  It atomically increments the outstanding request counter and,
2735 * if there was not already an outstanding request, branches to setsoftint_tl1
2736 * to enqueue an intr_vec for the given inum.
2737 */
2738
2739	! Register usage:
2740	!
2741	! Arguments:
2742	! %g1 - inum
2743	! %g2 - index into chsm_outstanding array
2744	!
2745	! Internal:
2746	! %g2, %g3, %g5 - scratch
2747	! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2748	! %g6 - setsoftint_tl1 address
2749
2750	ENTRY_NP(cache_scrubreq_tl1)
2751	mulx	%g2, CHSM_OUTSTANDING_INCR, %g2
2752	set	CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2753	add	%g2, %g3, %g2
2754	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2755	ld	[%g4], %g2		! cpu's chsm_outstanding[index]
2756	!
2757	! no need to use atomic instructions for the following
2758	! increment - we're at tl1
2759	!
2760	add	%g2, 0x1, %g3
2761	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
2762	  st	%g3, [%g4]		! delay - store incremented counter
2763	ASM_JMP(%g6, setsoftint_tl1)
2764	! not reached
27651:
2766	retry
2767	SET_SIZE(cache_scrubreq_tl1)
2768
2769#endif	/* lint */
2770
2771
2772#if defined(lint)
2773
2774/* ARGSUSED */
2775void
2776get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
2777{}
2778
2779#else	/* lint */
2780
2781/*
2782 * Get the error state for the processor.
2783 * Note that this must not be used at TL>0
2784 */
2785	ENTRY(get_cpu_error_state)
2786#if defined(CHEETAH_PLUS)
2787	set	ASI_SHADOW_REG_VA, %o2
2788	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr reg
2789	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2790	ldxa	[%o2]ASI_AFAR, %o1		! shadow afar reg
2791	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2792	GET_CPU_IMPL(%o3)	! Only panther has AFSR_EXT registers
2793	cmp	%o3, PANTHER_IMPL
2794	bne,a	1f
2795	  stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]	! zero for non-PN
2796	set	ASI_AFSR_EXT_VA, %o2
2797	ldxa	[%o2]ASI_AFSR, %o1		! afsr_ext reg
2798	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2799	set	ASI_SHADOW_AFSR_EXT_VA, %o2
2800	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr_ext reg
2801	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2802	b	2f
2803	  nop
28041:
2805	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
28062:
2807#else	/* CHEETAH_PLUS */
2808	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2809	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2810	stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2811	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2812#endif	/* CHEETAH_PLUS */
2813#if defined(SERRANO)
2814	/*
2815	 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2816	 * We save this in the afar2 of the register save area.
2817	 */
2818	set	ASI_MCU_AFAR2_VA, %o2
2819	ldxa	[%o2]ASI_MCU_CTRL, %o1
2820	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2821#endif	/* SERRANO */
2822	ldxa	[%g0]ASI_AFSR, %o1		! primary afsr reg
2823	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR]
2824	ldxa	[%g0]ASI_AFAR, %o1		! primary afar reg
2825	retl
2826	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR]
2827	SET_SIZE(get_cpu_error_state)
2828#endif	/* lint */
2829
2830#if defined(lint)
2831
2832/*
2833 * Check a page of memory for errors.
2834 *
2835 * Load each 64 byte block from physical memory.
2836 * Check AFSR after each load to see if an error
2837 * was caused. If so, log/scrub that error.
2838 *
2839 * Used to determine if a page contains
2840 * CEs when CEEN is disabled.
2841 */
2842/*ARGSUSED*/
2843void
2844cpu_check_block(caddr_t va, uint_t psz)
2845{}
2846
2847#else	/* lint */
2848
2849	ENTRY(cpu_check_block)
2850	!
2851	! get a new window with room for the error regs
2852	!
2853	save	%sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2854	srl	%i1, 6, %l4		! clear top bits of psz
2855					! and divide by 64
2856	rd	%fprs, %l2		! store FP
2857	wr	%g0, FPRS_FEF, %fprs	! enable FP
28581:
2859	ldda	[%i0]ASI_BLK_P, %d0	! load a block
2860	membar	#Sync
2861	ldxa    [%g0]ASI_AFSR, %l3	! read afsr reg
2862	brz,a,pt %l3, 2f		! check for error
2863	nop
2864
2865	!
2866	! if error, read the error regs and log it
2867	!
2868	call	get_cpu_error_state
2869	add	%fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2870
2871	!
2872	! cpu_ce_detected(ch_cpu_errors_t *, flag)
2873	!
2874	call	cpu_ce_detected		! log the error
2875	mov	CE_CEEN_TIMEOUT, %o1
28762:
2877	dec	%l4			! next 64-byte block
2878	brnz,a,pt  %l4, 1b
2879	add	%i0, 64, %i0		! increment block addr
2880
2881	wr	%l2, %g0, %fprs		! restore FP
2882	ret
2883	restore
2884
2885	SET_SIZE(cpu_check_block)
2886
2887#endif	/* lint */
2888
2889#if defined(lint)
2890
2891/*
2892 * Perform a cpu logout called from C.  This is used where we did not trap
2893 * for the error but still want to gather "what we can".  Caller must make
2894 * sure cpu private area exists and that the indicated logout area is free
2895 * for use, and that we are unable to migrate cpus.
2896 */
2897/*ARGSUSED*/
2898void
2899cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
2900{ }
2901
2902#else
2903	ENTRY(cpu_delayed_logout)
2904	rdpr	%pstate, %o2
2905	andn	%o2, PSTATE_IE, %o2
2906	wrpr	%g0, %o2, %pstate		! disable interrupts
2907	PARK_SIBLING_CORE(%o2, %o3, %o4)	! %o2 has DCU value
2908	add	%o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2909	rd	%asi, %g1
2910	wr	%g0, ASI_P, %asi
2911	GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2912	wr	%g1, %asi
2913	UNPARK_SIBLING_CORE(%o2, %o3, %o4)	! can use %o2 again
2914	rdpr	%pstate, %o2
2915	or	%o2, PSTATE_IE, %o2
2916	wrpr	%g0, %o2, %pstate
2917	retl
2918	  nop
2919	SET_SIZE(cpu_delayed_logout)
2920
2921#endif	/* lint */
2922
2923#if defined(lint)
2924
2925/*ARGSUSED*/
2926int
2927dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
2928{ return (0); }
2929
2930#else
2931
2932	ENTRY(dtrace_blksuword32)
2933	save	%sp, -SA(MINFRAME + 4), %sp
2934
2935	rdpr	%pstate, %l1
2936	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
2937	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
2938
2939	rd	%fprs, %l0
2940	andcc	%l0, FPRS_FEF, %g0
2941	bz,a,pt	%xcc, 1f			! if the fpu is disabled
2942	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
2943
2944	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
29451:
2946	set	0f, %l5
2947        /*
2948         * We're about to write a block full or either total garbage
2949         * (not kernel data, don't worry) or user floating-point data
2950         * (so it only _looks_ like garbage).
2951         */
2952	ld	[%i1], %f0			! modify the block
2953	membar	#Sync
2954	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
2955	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
2956	membar	#Sync
2957	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2958
2959	bz,a,pt	%xcc, 1f
2960	wr	%g0, %l0, %fprs			! restore %fprs
2961
2962	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29631:
2964
2965	wrpr	%g0, %l1, %pstate		! restore interrupts
2966
2967	ret
2968	restore	%g0, %g0, %o0
2969
29700:
2971	membar	#Sync
2972	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2973
2974	bz,a,pt	%xcc, 1f
2975	wr	%g0, %l0, %fprs			! restore %fprs
2976
2977	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29781:
2979
2980	wrpr	%g0, %l1, %pstate		! restore interrupts
2981
2982	/*
2983	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2984	 * which deals with watchpoints. Otherwise, just return -1.
2985	 */
2986	brnz,pt	%i2, 1f
2987	nop
2988	ret
2989	restore	%g0, -1, %o0
29901:
2991	call	dtrace_blksuword32_err
2992	restore
2993
2994	SET_SIZE(dtrace_blksuword32)
2995
2996#endif /* lint */
2997
2998#ifdef	CHEETAHPLUS_ERRATUM_25
2999
3000#if	defined(lint)
3001/*
3002 * Claim a chunk of physical address space.
3003 */
3004/*ARGSUSED*/
3005void
3006claimlines(uint64_t pa, size_t sz, int stride)
3007{}
3008#else	/* lint */
3009	ENTRY(claimlines)
30101:
3011	subcc	%o1, %o2, %o1
3012	add	%o0, %o1, %o3
3013	bgeu,a,pt	%xcc, 1b
3014	casxa	[%o3]ASI_MEM, %g0, %g0
3015	membar  #Sync
3016	retl
3017	nop
3018	SET_SIZE(claimlines)
3019#endif	/* lint */
3020
3021#if	defined(lint)
3022/*
3023 * CPU feature initialization,
3024 * turn BPE off,
3025 * get device id.
3026 */
3027/*ARGSUSED*/
3028void
3029cpu_feature_init(void)
3030{}
3031#else	/* lint */
3032	ENTRY(cpu_feature_init)
3033	save	%sp, -SA(MINFRAME), %sp
3034	sethi	%hi(cheetah_bpe_off), %o0
3035	ld	[%o0 + %lo(cheetah_bpe_off)], %o0
3036	brz	%o0, 1f
3037	nop
3038	rd	ASR_DISPATCH_CONTROL, %o0
3039	andn	%o0, ASR_DISPATCH_CONTROL_BPE, %o0
3040	wr	%o0, 0, ASR_DISPATCH_CONTROL
30411:
3042	!
3043	! get the device_id and store the device_id
3044	! in the appropriate cpunodes structure
3045	! given the cpus index
3046	!
3047	CPU_INDEX(%o0, %o1)
3048	mulx %o0, CPU_NODE_SIZE, %o0
3049	set  cpunodes + DEVICE_ID, %o1
3050	ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
3051	stx  %o2, [%o0 + %o1]
3052#ifdef	CHEETAHPLUS_ERRATUM_34
3053	!
3054	! apply Cheetah+ erratum 34 workaround
3055	!
3056	call itlb_erratum34_fixup
3057	  nop
3058	call dtlb_erratum34_fixup
3059	  nop
3060#endif	/* CHEETAHPLUS_ERRATUM_34 */
3061	ret
3062	  restore
3063	SET_SIZE(cpu_feature_init)
3064#endif	/* lint */
3065
3066#if	defined(lint)
3067/*
3068 * Copy a tsb entry atomically, from src to dest.
3069 * src must be 128 bit aligned.
3070 */
3071/*ARGSUSED*/
3072void
3073copy_tsb_entry(uintptr_t src, uintptr_t dest)
3074{}
3075#else	/* lint */
3076	ENTRY(copy_tsb_entry)
3077	ldda	[%o0]ASI_NQUAD_LD, %o2		! %o2 = tag, %o3 = data
3078	stx	%o2, [%o1]
3079	stx	%o3, [%o1 + 8 ]
3080	retl
3081	nop
3082	SET_SIZE(copy_tsb_entry)
3083#endif	/* lint */
3084
3085#endif	/* CHEETAHPLUS_ERRATUM_25 */
3086
3087#ifdef	CHEETAHPLUS_ERRATUM_34
3088
3089#if	defined(lint)
3090
3091/*ARGSUSED*/
3092void
3093itlb_erratum34_fixup(void)
3094{}
3095
3096#else	/* lint */
3097
3098	!
3099	! In Cheetah+ erratum 34, under certain conditions an ITLB locked
3100	! index 0 TTE will erroneously be displaced when a new TTE is
3101	! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
3102	! locked index 0 TTEs must be relocated.
3103	!
3104	! NOTE: Care must be taken to avoid an ITLB miss in this routine.
3105	!
3106	ENTRY_NP(itlb_erratum34_fixup)
3107	rdpr	%pstate, %o3
3108#ifdef DEBUG
3109	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
3110#endif /* DEBUG */
3111	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3112	ldxa	[%g0]ASI_ITLB_ACCESS, %o1	! %o1 = entry 0 data
3113	ldxa	[%g0]ASI_ITLB_TAGREAD, %o2	! %o2 = entry 0 tag
3114
3115	cmp	%o1, %g0			! Is this entry valid?
3116	bge	%xcc, 1f
3117	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3118	bnz	%icc, 2f
3119	  nop
31201:
3121	retl					! Nope, outta here...
3122	  wrpr	%g0, %o3, %pstate		! Enable interrupts
31232:
3124	sethi	%hi(FLUSH_ADDR), %o4
3125	stxa	%g0, [%o2]ASI_ITLB_DEMAP	! Flush this mapping
3126	flush	%o4				! Flush required for I-MMU
3127	!
3128	! Start search from index 1 up.  This is because the kernel force
3129	! loads its text page at index 15 in sfmmu_kernel_remap() and we
3130	! don't want our relocated entry evicted later.
3131	!
3132	! NOTE: We assume that we'll be successful in finding an unlocked
3133	! or invalid entry.  If that isn't the case there are bound to
3134	! bigger problems.
3135	!
3136	set	(1 << 3), %g3
31373:
3138	ldxa	[%g3]ASI_ITLB_ACCESS, %o4	! Load TTE from t16
3139	!
3140	! If this entry isn't valid, we'll choose to displace it (regardless
3141	! of the lock bit).
3142	!
3143	cmp	%o4, %g0			! TTE is > 0 iff not valid
3144	bge	%xcc, 4f			! If invalid, go displace
3145	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3146	bnz,a	%icc, 3b			! If locked, look at next
3147	  add	%g3, (1 << 3), %g3		!  entry
31484:
3149	!
3150	! We found an unlocked or invalid entry; we'll explicitly load
3151	! the former index 0 entry here.
3152	!
3153	sethi	%hi(FLUSH_ADDR), %o4
3154	set	MMU_TAG_ACCESS, %g4
3155	stxa	%o2, [%g4]ASI_IMMU
3156	stxa	%o1, [%g3]ASI_ITLB_ACCESS
3157	flush	%o4				! Flush required for I-MMU
3158	retl
3159	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3160	SET_SIZE(itlb_erratum34_fixup)
3161
3162#endif	/* lint */
3163
3164#if	defined(lint)
3165
3166/*ARGSUSED*/
3167void
3168dtlb_erratum34_fixup(void)
3169{}
3170
3171#else	/* lint */
3172
3173	!
3174	! In Cheetah+ erratum 34, under certain conditions a DTLB locked
3175	! index 0 TTE will erroneously be displaced when a new TTE is
3176	! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
3177	! TTEs must be relocated.
3178	!
3179	ENTRY_NP(dtlb_erratum34_fixup)
3180	rdpr	%pstate, %o3
3181#ifdef DEBUG
3182	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
3183#endif /* DEBUG */
3184	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3185	ldxa	[%g0]ASI_DTLB_ACCESS, %o1	! %o1 = entry 0 data
3186	ldxa	[%g0]ASI_DTLB_TAGREAD, %o2	! %o2 = entry 0 tag
3187
3188	cmp	%o1, %g0			! Is this entry valid?
3189	bge	%xcc, 1f
3190	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3191	bnz	%icc, 2f
3192	  nop
31931:
3194	retl					! Nope, outta here...
3195	  wrpr	%g0, %o3, %pstate		! Enable interrupts
31962:
3197	stxa	%g0, [%o2]ASI_DTLB_DEMAP	! Flush this mapping
3198	membar	#Sync
3199	!
3200	! Start search from index 1 up.
3201	!
3202	! NOTE: We assume that we'll be successful in finding an unlocked
3203	! or invalid entry.  If that isn't the case there are bound to
3204	! bigger problems.
3205	!
3206	set	(1 << 3), %g3
32073:
3208	ldxa	[%g3]ASI_DTLB_ACCESS, %o4	! Load TTE from t16
3209	!
3210	! If this entry isn't valid, we'll choose to displace it (regardless
3211	! of the lock bit).
3212	!
3213	cmp	%o4, %g0			! TTE is > 0 iff not valid
3214	bge	%xcc, 4f			! If invalid, go displace
3215	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3216	bnz,a	%icc, 3b			! If locked, look at next
3217	  add	%g3, (1 << 3), %g3		!  entry
32184:
3219	!
3220	! We found an unlocked or invalid entry; we'll explicitly load
3221	! the former index 0 entry here.
3222	!
3223	set	MMU_TAG_ACCESS, %g4
3224	stxa	%o2, [%g4]ASI_DMMU
3225	stxa	%o1, [%g3]ASI_DTLB_ACCESS
3226	membar	#Sync
3227	retl
3228	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3229	SET_SIZE(dtlb_erratum34_fixup)
3230
3231#endif	/* lint */
3232
3233#endif	/* CHEETAHPLUS_ERRATUM_34 */
3234
3235