xref: /titanic_44/usr/src/uts/sun4u/cpu/us3_common_asm.s (revision 3d63ea05cb8474d8036d3588cf8299306a994b8c)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29
30#if !defined(lint)
31#include "assym.h"
32#endif	/* !lint */
33
34#include <sys/asm_linkage.h>
35#include <sys/mmu.h>
36#include <vm/hat_sfmmu.h>
37#include <sys/machparam.h>
38#include <sys/machcpuvar.h>
39#include <sys/machthread.h>
40#include <sys/machtrap.h>
41#include <sys/privregs.h>
42#include <sys/trap.h>
43#include <sys/cheetahregs.h>
44#include <sys/us3_module.h>
45#include <sys/xc_impl.h>
46#include <sys/intreg.h>
47#include <sys/async.h>
48#include <sys/clock.h>
49#include <sys/cheetahasm.h>
50#include <sys/cmpregs.h>
51
52#ifdef TRAPTRACE
53#include <sys/traptrace.h>
54#endif /* TRAPTRACE */
55
56#if !defined(lint)
57
58/* BEGIN CSTYLED */
59
60#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
61	ldxa	[%g0]ASI_DCU, tmp1					;\
62	btst	DCU_DC, tmp1		/* is dcache enabled? */	;\
63	bz,pn	%icc, 1f						;\
64	ASM_LD(tmp1, dcache_linesize)					;\
65	ASM_LD(tmp2, dflush_type)					;\
66	cmp	tmp2, FLUSHPAGE_TYPE					;\
67	be,pt	%icc, 2f						;\
68	nop								;\
69	sllx	arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */	;\
70	ASM_LD(tmp3, dcache_size)					;\
71	cmp	tmp2, FLUSHMATCH_TYPE					;\
72	be,pt	%icc, 3f						;\
73	nop								;\
74	/*								\
75	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
76	 * tmp3 = cache size						\
77	 * tmp1 = cache line size					\
78	 */								\
79	sub	tmp3, tmp1, tmp2					;\
804:									\
81	stxa	%g0, [tmp2]ASI_DC_TAG					;\
82	membar	#Sync							;\
83	cmp	%g0, tmp2						;\
84	bne,pt	%icc, 4b						;\
85	sub	tmp2, tmp1, tmp2					;\
86	ba,pt	%icc, 1f						;\
87	nop								;\
88	/*								\
89	 * flushtype = FLUSHPAGE_TYPE					\
90	 * arg1 = pfn							\
91	 * arg2 = virtual color						\
92	 * tmp1 = cache line size					\
93	 * tmp2 = tag from cache					\
94	 * tmp3 = counter						\
95	 */								\
962:									\
97	set	MMU_PAGESIZE, tmp3					;\
98        sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA	   */   ;\
99	sub	tmp3, tmp1, tmp3					;\
1004:									\
101	stxa	%g0, [arg1 + tmp3]ASI_DC_INVAL				;\
102	membar	#Sync							;\
1035:									\
104	cmp	%g0, tmp3						;\
105	bnz,pt	%icc, 4b		/* branch if not done */	;\
106	sub	tmp3, tmp1, tmp3					;\
107	ba,pt	%icc, 1f						;\
108	nop								;\
109	/*								\
110	 * flushtype = FLUSHMATCH_TYPE					\
111	 * arg1 = tag to compare against				\
112	 * tmp1 = cache line size					\
113	 * tmp3 = cache size						\
114	 * arg2 = counter						\
115	 * tmp2 = cache tag						\
116	 */								\
1173:									\
118	sub	tmp3, tmp1, arg2					;\
1194:									\
120	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
121	btst	CHEETAH_DC_VBIT_MASK, tmp2				;\
122	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
123	andn	tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */	;\
124	cmp	tmp2, arg1						;\
125	bne,pn	%icc, 5f		/* branch if tag miss */	;\
126	nop								;\
127	stxa	%g0, [arg2]ASI_DC_TAG					;\
128	membar	#Sync							;\
1295:									\
130	cmp	%g0, arg2						;\
131	bne,pt	%icc, 4b		/* branch if not done */	;\
132	sub	arg2, tmp1, arg2					;\
1331:
134
135
136/* END CSTYLED */
137
138#endif	/* !lint */
139
140/*
141 * Cheetah MMU and Cache operations.
142 */
143
144#if defined(lint)
145
146/* ARGSUSED */
147void
148vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
149{}
150
151#else	/* lint */
152
153	ENTRY_NP(vtag_flushpage)
154	/*
155	 * flush page from the tlb
156	 *
157	 * %o0 = vaddr
158	 * %o1 = sfmmup
159	 */
160	rdpr	%pstate, %o5
161#ifdef DEBUG
162	PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
163#endif /* DEBUG */
164	/*
165	 * disable ints
166	 */
167	andn	%o5, PSTATE_IE, %o4
168	wrpr	%o4, 0, %pstate
169
170	/*
171	 * Then, blow out the tlb
172	 * Interrupts are disabled to prevent the primary ctx register
173	 * from changing underneath us.
174	 */
175	sethi   %hi(ksfmmup), %o3
176        ldx     [%o3 + %lo(ksfmmup)], %o3
177        cmp     %o3, %o1
178        bne,pt   %xcc, 1f			! if not kernel as, go to 1
179	  sethi	%hi(FLUSH_ADDR), %o3
180	/*
181	 * For Kernel demaps use primary. type = page implicitly
182	 */
183	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
184	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
185	flush	%o3
186	retl
187	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
1881:
189	/*
190	 * User demap.  We need to set the primary context properly.
191	 * Secondary context cannot be used for Cheetah IMMU.
192	 * %o0 = vaddr
193	 * %o1 = sfmmup
194	 * %o3 = FLUSH_ADDR
195	 */
196	SFMMU_CPU_CNUM(%o1, %g1, %g2)		! %g1 = sfmmu cnum on this CPU
197
198	ldub	[%o1 + SFMMU_CEXT], %o4		! %o4 = sfmmup->sfmmu_cext
199	sll	%o4, CTXREG_EXT_SHIFT, %o4
200	or	%g1, %o4, %g1			! %g1 = pgsz | cnum
201
202	wrpr	%g0, 1, %tl
203	set	MMU_PCONTEXT, %o4
204	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
205	ldxa	[%o4]ASI_DMMU, %o2		! %o2 = save old ctxnum
206	stxa	%g1, [%o4]ASI_DMMU		! wr new ctxum
207
208	stxa	%g0, [%o0]ASI_DTLB_DEMAP
209	stxa	%g0, [%o0]ASI_ITLB_DEMAP
210	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
211	flush	%o3
212	wrpr	%g0, 0, %tl
213
214	retl
215	wrpr	%g0, %o5, %pstate		/* enable interrupts */
216	SET_SIZE(vtag_flushpage)
217
218#endif	/* lint */
219
220#if defined(lint)
221
222void
223vtag_flushall(void)
224{}
225
226#else	/* lint */
227
228	ENTRY_NP2(vtag_flushall, demap_all)
229	/*
230	 * flush the tlb
231	 */
232	sethi	%hi(FLUSH_ADDR), %o3
233	set	DEMAP_ALL_TYPE, %g1
234	stxa	%g0, [%g1]ASI_DTLB_DEMAP
235	stxa	%g0, [%g1]ASI_ITLB_DEMAP
236	flush	%o3
237	retl
238	nop
239	SET_SIZE(demap_all)
240	SET_SIZE(vtag_flushall)
241
242#endif	/* lint */
243
244
245#if defined(lint)
246
247/* ARGSUSED */
248void
249vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
250{}
251
252#else	/* lint */
253
254	ENTRY_NP(vtag_flushpage_tl1)
255	/*
256	 * x-trap to flush page from tlb and tsb
257	 *
258	 * %g1 = vaddr, zero-extended on 32-bit kernel
259	 * %g2 = sfmmup
260	 *
261	 * assumes TSBE_TAG = 0
262	 */
263	srln	%g1, MMU_PAGESHIFT, %g1
264
265	sethi   %hi(ksfmmup), %g3
266        ldx     [%g3 + %lo(ksfmmup)], %g3
267        cmp     %g3, %g2
268        bne,pt	%xcc, 1f                        ! if not kernel as, go to 1
269	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
270
271	/* We need to demap in the kernel context */
272	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
273	stxa	%g0, [%g1]ASI_DTLB_DEMAP
274	stxa	%g0, [%g1]ASI_ITLB_DEMAP
275	retry
2761:
277	/* We need to demap in a user context */
278	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
279
280	SFMMU_CPU_CNUM(%g2, %g6, %g3)	! %g6 = sfmmu cnum on this CPU
281
282	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
283	sll	%g4, CTXREG_EXT_SHIFT, %g4
284	or	%g6, %g4, %g6			! %g6 = pgsz | cnum
285
286	set	MMU_PCONTEXT, %g4
287	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
288	stxa	%g6, [%g4]ASI_DMMU		/* wr new ctxum */
289	stxa	%g0, [%g1]ASI_DTLB_DEMAP
290	stxa	%g0, [%g1]ASI_ITLB_DEMAP
291	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
292	retry
293	SET_SIZE(vtag_flushpage_tl1)
294
295#endif	/* lint */
296
297
298#if defined(lint)
299
300/* ARGSUSED */
301void
302vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
303{}
304
305#else	/* lint */
306
307	ENTRY_NP(vtag_flush_pgcnt_tl1)
308	/*
309	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
310	 *
311	 * %g1 = vaddr, zero-extended on 32-bit kernel
312	 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
313	 *
314	 * NOTE: this handler relies on the fact that no
315	 *	interrupts or traps can occur during the loop
316	 *	issuing the TLB_DEMAP operations. It is assumed
317	 *	that interrupts are disabled and this code is
318	 *	fetching from the kernel locked text address.
319	 *
320	 * assumes TSBE_TAG = 0
321	 */
322	set	SFMMU_PGCNT_MASK, %g4
323	and	%g4, %g2, %g3			/* g3 = pgcnt - 1 */
324	add	%g3, 1, %g3			/* g3 = pgcnt */
325
326	andn	%g2, SFMMU_PGCNT_MASK, %g2	/* g2 = sfmmup */
327	srln	%g1, MMU_PAGESHIFT, %g1
328
329	sethi   %hi(ksfmmup), %g4
330        ldx     [%g4 + %lo(ksfmmup)], %g4
331        cmp     %g4, %g2
332        bne,pn   %xcc, 1f			/* if not kernel as, go to 1 */
333	  slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
334
335	/* We need to demap in the kernel context */
336	or	DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
337	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
338	sethi   %hi(FLUSH_ADDR), %g5
3394:
340	stxa	%g0, [%g1]ASI_DTLB_DEMAP
341	stxa	%g0, [%g1]ASI_ITLB_DEMAP
342	flush	%g5				! flush required by immu
343
344	deccc	%g3				/* decr pgcnt */
345	bnz,pt	%icc,4b
346	  add	%g1, %g2, %g1			/* next page */
347	retry
3481:
349	/*
350	 * We need to demap in a user context
351	 *
352	 * g2 = sfmmup
353	 * g3 = pgcnt
354	 */
355	SFMMU_CPU_CNUM(%g2, %g5, %g6)		! %g5 = sfmmu cnum on this CPU
356
357	or	DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
358
359	ldub	[%g2 + SFMMU_CEXT], %g4		! %g4 = sfmmup->cext
360	sll	%g4, CTXREG_EXT_SHIFT, %g4
361	or	%g5, %g4, %g5
362
363	set	MMU_PCONTEXT, %g4
364	ldxa	[%g4]ASI_DMMU, %g6		/* rd old ctxnum */
365	stxa	%g5, [%g4]ASI_DMMU		/* wr new ctxum */
366
367	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
368	sethi   %hi(FLUSH_ADDR), %g5
3693:
370	stxa	%g0, [%g1]ASI_DTLB_DEMAP
371	stxa	%g0, [%g1]ASI_ITLB_DEMAP
372	flush	%g5				! flush required by immu
373
374	deccc	%g3				/* decr pgcnt */
375	bnz,pt	%icc,3b
376	  add	%g1, %g2, %g1			/* next page */
377
378	stxa	%g6, [%g4]ASI_DMMU		/* restore old ctxnum */
379	retry
380	SET_SIZE(vtag_flush_pgcnt_tl1)
381
382#endif	/* lint */
383
384#if defined(lint)
385
386/*ARGSUSED*/
387void
388vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
389{}
390
391#else	/* lint */
392
393	ENTRY_NP(vtag_flushall_tl1)
394	/*
395	 * x-trap to flush tlb
396	 */
397	set	DEMAP_ALL_TYPE, %g4
398	stxa	%g0, [%g4]ASI_DTLB_DEMAP
399	stxa	%g0, [%g4]ASI_ITLB_DEMAP
400	retry
401	SET_SIZE(vtag_flushall_tl1)
402
403#endif	/* lint */
404
405
406#if defined(lint)
407
408/* ARGSUSED */
409void
410vac_flushpage(pfn_t pfnum, int vcolor)
411{}
412
413#else	/* lint */
414
415/*
416 * vac_flushpage(pfnum, color)
417 *	Flush 1 8k page of the D-$ with physical page = pfnum
418 *	Algorithm:
419 *		The cheetah dcache is a 64k psuedo 4 way accaociative cache.
420 *		It is virtual indexed, physically tagged cache.
421 */
422	.seg	".data"
423	.align	8
424	.global	dflush_type
425dflush_type:
426	.word	FLUSHPAGE_TYPE
427
428	ENTRY(vac_flushpage)
429	/*
430	 * flush page from the d$
431	 *
432	 * %o0 = pfnum, %o1 = color
433	 */
434	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
435	retl
436	  nop
437	SET_SIZE(vac_flushpage)
438
439#endif	/* lint */
440
441
442#if defined(lint)
443
444/* ARGSUSED */
445void
446vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
447{}
448
449#else	/* lint */
450
451	ENTRY_NP(vac_flushpage_tl1)
452	/*
453	 * x-trap to flush page from the d$
454	 *
455	 * %g1 = pfnum, %g2 = color
456	 */
457	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
458	retry
459	SET_SIZE(vac_flushpage_tl1)
460
461#endif	/* lint */
462
463
464#if defined(lint)
465
466/* ARGSUSED */
467void
468vac_flushcolor(int vcolor, pfn_t pfnum)
469{}
470
471#else	/* lint */
472	/*
473	 * In UltraSPARC III flushcolor is same as as flushpage.
474	 * This is because we have an ASI to flush dcache using physical
475	 * address.
476	 * Flushing dcache using physical address is faster because we
477	 * don't have to deal with associativity of dcache.
478	 * The arguments to vac_flushpage() and vac_flushcolor() are same but
479	 * the order is reversed. this is because we maintain compatibility
480	 * with spitfire, in which vac_flushcolor has only one argument, namely
481	 * vcolor.
482	 */
483
484	ENTRY(vac_flushcolor)
485	/*
486	 * %o0 = vcolor, %o1 = pfnum
487	 */
488	DCACHE_FLUSHPAGE(%o1, %o0, %o2, %o3, %o4)
489	retl
490	  nop
491	SET_SIZE(vac_flushcolor)
492
493#endif	/* lint */
494
495
496#if defined(lint)
497
498/* ARGSUSED */
499void
500vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
501{}
502
503#else	/* lint */
504
505	ENTRY(vac_flushcolor_tl1)
506	/*
507	 * %g1 = vcolor
508	 * %g2 = pfnum
509	 */
510	DCACHE_FLUSHPAGE(%g2, %g1, %g3, %g4, %g5)
511	retry
512	SET_SIZE(vac_flushcolor_tl1)
513
514#endif	/* lint */
515
516#if defined(lint)
517
518int
519idsr_busy(void)
520{
521	return (0);
522}
523
524#else	/* lint */
525
526/*
527 * Determine whether or not the IDSR is busy.
528 * Entry: no arguments
529 * Returns: 1 if busy, 0 otherwise
530 */
531	ENTRY(idsr_busy)
532	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
533	clr	%o0
534	btst	IDSR_BUSY, %g1
535	bz,a,pt	%xcc, 1f
536	mov	1, %o0
5371:
538	retl
539	nop
540	SET_SIZE(idsr_busy)
541
542#endif	/* lint */
543
544#if defined(lint)
545
546/* ARGSUSED */
547void
548init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
549{}
550
551/* ARGSUSED */
552void
553init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
554{}
555
556#else	/* lint */
557
558	.global _dispatch_status_busy
559_dispatch_status_busy:
560	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
561	.align	4
562
563/*
564 * Setup interrupt dispatch data registers
565 * Entry:
566 *	%o0 - function or inumber to call
567 *	%o1, %o2 - arguments (2 uint64_t's)
568 */
569	.seg "text"
570
571	ENTRY(init_mondo)
572#ifdef DEBUG
573	!
574	! IDSR should not be busy at the moment
575	!
576	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
577	btst	IDSR_BUSY, %g1
578	bz,pt	%xcc, 1f
579	nop
580	sethi	%hi(_dispatch_status_busy), %o0
581	call	panic
582	or	%o0, %lo(_dispatch_status_busy), %o0
583#endif /* DEBUG */
584
585	ALTENTRY(init_mondo_nocheck)
586	!
587	! interrupt vector dispatch data reg 0
588	!
5891:
590	mov	IDDR_0, %g1
591	mov	IDDR_1, %g2
592	mov	IDDR_2, %g3
593	stxa	%o0, [%g1]ASI_INTR_DISPATCH
594
595	!
596	! interrupt vector dispatch data reg 1
597	!
598	stxa	%o1, [%g2]ASI_INTR_DISPATCH
599
600	!
601	! interrupt vector dispatch data reg 2
602	!
603	stxa	%o2, [%g3]ASI_INTR_DISPATCH
604
605	membar	#Sync
606	retl
607	nop
608	SET_SIZE(init_mondo_nocheck)
609	SET_SIZE(init_mondo)
610
611#endif	/* lint */
612
613
614#if !(defined(JALAPENO) || defined(SERRANO))
615
616#if defined(lint)
617
618/* ARGSUSED */
619void
620shipit(int upaid, int bn)
621{ return; }
622
623#else	/* lint */
624
625/*
626 * Ship mondo to aid using busy/nack pair bn
627 */
628	ENTRY_NP(shipit)
629	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = agent id
630	sll	%o1, IDCR_BN_SHIFT, %g2		! IDCR<28:24> = b/n pair
631	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
632	or	%g1, %g2, %g1
633	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
634	membar	#Sync
635	retl
636	nop
637	SET_SIZE(shipit)
638
639#endif	/* lint */
640
641#endif	/* !(JALAPENO || SERRANO) */
642
643
644#if defined(lint)
645
646/* ARGSUSED */
647void
648flush_instr_mem(caddr_t vaddr, size_t len)
649{}
650
651#else	/* lint */
652
653/*
654 * flush_instr_mem:
655 *	Flush 1 page of the I-$ starting at vaddr
656 * 	%o0 vaddr
657 *	%o1 bytes to be flushed
658 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
659 * the stores from all processors so that a FLUSH instruction is only needed
660 * to ensure pipeline is consistent. This means a single flush is sufficient at
661 * the end of a sequence of stores that updates the instruction stream to
662 * ensure correct operation.
663 */
664
665	ENTRY(flush_instr_mem)
666	flush	%o0			! address irrelevant
667	retl
668	nop
669	SET_SIZE(flush_instr_mem)
670
671#endif	/* lint */
672
673
674#if defined(CPU_IMP_ECACHE_ASSOC)
675
676#if defined(lint)
677
678/* ARGSUSED */
679uint64_t
680get_ecache_ctrl(void)
681{ return (0); }
682
683#else	/* lint */
684
685	ENTRY(get_ecache_ctrl)
686	GET_CPU_IMPL(%o0)
687	cmp	%o0, JAGUAR_IMPL
688	!
689	! Putting an ASI access in the delay slot may
690	! cause it to be accessed, even when annulled.
691	!
692	bne	1f
693	  nop
694	ldxa	[%g0]ASI_EC_CFG_TIMING, %o0	! read Jaguar shared E$ ctrl reg
695	b	2f
696	  nop
6971:
698	ldxa	[%g0]ASI_EC_CTRL, %o0		! read Ch/Ch+ E$ control reg
6992:
700	retl
701	  nop
702	SET_SIZE(get_ecache_ctrl)
703
704#endif	/* lint */
705
706#endif	/* CPU_IMP_ECACHE_ASSOC */
707
708
709#if !(defined(JALAPENO) || defined(SERRANO))
710
711/*
712 * flush_ecache:
713 *	%o0 - 64 bit physical address
714 *	%o1 - ecache size
715 *	%o2 - ecache linesize
716 */
717#if defined(lint)
718
719/*ARGSUSED*/
720void
721flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
722{}
723
724#else /* !lint */
725
726	ENTRY(flush_ecache)
727
728	/*
729	 * For certain CPU implementations, we have to flush the L2 cache
730	 * before flushing the ecache.
731	 */
732	PN_L2_FLUSHALL(%g3, %g4, %g5)
733
734	/*
735	 * Flush the entire Ecache using displacement flush.
736	 */
737	ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
738
739	retl
740	nop
741	SET_SIZE(flush_ecache)
742
743#endif /* lint */
744
745#endif	/* !(JALAPENO || SERRANO) */
746
747
748#if defined(lint)
749
750void
751flush_dcache(void)
752{}
753
754#else	/* lint */
755
756	ENTRY(flush_dcache)
757	ASM_LD(%o0, dcache_size)
758	ASM_LD(%o1, dcache_linesize)
759	CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
760	retl
761	nop
762	SET_SIZE(flush_dcache)
763
764#endif	/* lint */
765
766
767#if defined(lint)
768
769void
770flush_icache(void)
771{}
772
773#else	/* lint */
774
775	ENTRY(flush_icache)
776	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
777	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
778	ba,pt	%icc, 2f
779	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
780flush_icache_1:
781	ASM_LD(%o0, icache_size)
782	ASM_LD(%o1, icache_linesize)
7832:
784	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
785	retl
786	nop
787	SET_SIZE(flush_icache)
788
789#endif	/* lint */
790
791#if defined(lint)
792
793/*ARGSUSED*/
794void
795kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
796    int icache_lsize)
797{
798}
799
800#else	/* lint */
801
802	ENTRY(kdi_flush_idcache)
803	CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
804	CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
805	membar	#Sync
806	retl
807	nop
808	SET_SIZE(kdi_flush_idcache)
809
810#endif	/* lint */
811
812#if defined(lint)
813
814void
815flush_pcache(void)
816{}
817
818#else	/* lint */
819
820	ENTRY(flush_pcache)
821	PCACHE_FLUSHALL(%o0, %o1, %o2)
822	retl
823	nop
824	SET_SIZE(flush_pcache)
825
826#endif	/* lint */
827
828
829#if defined(CPU_IMP_L1_CACHE_PARITY)
830
831#if defined(lint)
832
833/* ARGSUSED */
834void
835get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
836{}
837
838#else	/* lint */
839
840/*
841 * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
842 * structure (see cheetahregs.h):
843 * The Dcache *should* be turned off when this code is executed.
844 */
845	.align	128
846	ENTRY(get_dcache_dtag)
847	rdpr	%pstate, %o5
848	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
849	wrpr	%g0, %o3, %pstate
850	b	1f
851	  stx	%o0, [%o1 + CH_DC_IDX]
852
853	.align	128
8541:
855	ldxa	[%o0]ASI_DC_TAG, %o2
856	stx	%o2, [%o1 + CH_DC_TAG]
857	membar	#Sync
858	ldxa	[%o0]ASI_DC_UTAG, %o2
859	membar	#Sync
860	stx	%o2, [%o1 + CH_DC_UTAG]
861	ldxa	[%o0]ASI_DC_SNP_TAG, %o2
862	stx	%o2, [%o1 + CH_DC_SNTAG]
863	add	%o1, CH_DC_DATA, %o1
864	clr	%o3
8652:
866	membar	#Sync				! required before ASI_DC_DATA
867	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
868	membar	#Sync				! required after ASI_DC_DATA
869	stx	%o2, [%o1 + %o3]
870	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
871	blt	2b
872	  add	%o3, 8, %o3
873
874	/*
875	 * Unlike other CPUs in the family, D$ data parity bits for Panther
876	 * do not reside in the microtag. Instead, we have to read them
877	 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
878	 * of just having 8 parity bits to protect all 32 bytes of data
879	 * per line, we now have 32 bits of parity.
880	 */
881	GET_CPU_IMPL(%o3)
882	cmp	%o3, PANTHER_IMPL
883	bne	4f
884	  clr	%o3
885
886	/*
887	 * move our pointer to the next field where we store parity bits
888	 * and add the offset of the last parity byte since we will be
889	 * storing all 4 parity bytes within one 64 bit field like this:
890	 *
891	 * +------+------------+------------+------------+------------+
892	 * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
893	 * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
894	 * +------+------------+------------+------------+------------+
895	 *  63:32     31:24        23:16         15:8          7:0
896	 */
897	add	%o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
898
899	/* add the DC_data_parity bit into our working index */
900	mov	1, %o2
901	sll	%o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
902	or	%o0, %o2, %o0
9033:
904	membar	#Sync				! required before ASI_DC_DATA
905	ldxa	[%o0 + %o3]ASI_DC_DATA, %o2
906	membar	#Sync				! required after ASI_DC_DATA
907	stb	%o2, [%o1]
908	dec	%o1
909	cmp	%o3, CH_DC_DATA_REG_SIZE - 8
910	blt	3b
911	  add	%o3, 8, %o3
9124:
913	retl
914	  wrpr	%g0, %o5, %pstate
915	SET_SIZE(get_dcache_dtag)
916
917#endif	/* lint */
918
919
920#if defined(lint)
921
922/* ARGSUSED */
923void
924get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
925{}
926
927#else	/* lint */
928
929/*
930 * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
931 * structure (see cheetahregs.h):
932 * The Icache *Must* be turned off when this function is called.
933 * This is because diagnostic accesses to the Icache interfere with cache
934 * consistency.
935 */
936	.align	128
937	ENTRY(get_icache_dtag)
938	rdpr	%pstate, %o5
939	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
940	wrpr	%g0, %o3, %pstate
941
942	stx	%o0, [%o1 + CH_IC_IDX]
943	ldxa	[%o0]ASI_IC_TAG, %o2
944	stx	%o2, [%o1 + CH_IC_PATAG]
945	add	%o0, CH_ICTAG_UTAG, %o0
946	ldxa	[%o0]ASI_IC_TAG, %o2
947	add	%o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
948	stx	%o2, [%o1 + CH_IC_UTAG]
949	ldxa	[%o0]ASI_IC_TAG, %o2
950	add	%o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
951	stx	%o2, [%o1 + CH_IC_UPPER]
952	ldxa	[%o0]ASI_IC_TAG, %o2
953	andn	%o0, CH_ICTAG_TMASK, %o0
954	stx	%o2, [%o1 + CH_IC_LOWER]
955	ldxa	[%o0]ASI_IC_SNP_TAG, %o2
956	stx	%o2, [%o1 + CH_IC_SNTAG]
957	add	%o1, CH_IC_DATA, %o1
958	clr	%o3
9592:
960	ldxa	[%o0 + %o3]ASI_IC_DATA, %o2
961	stx	%o2, [%o1 + %o3]
962	cmp	%o3, PN_IC_DATA_REG_SIZE - 8
963	blt	2b
964	  add	%o3, 8, %o3
965
966	retl
967	  wrpr	%g0, %o5, %pstate
968	SET_SIZE(get_icache_dtag)
969
970#endif	/* lint */
971
972#if defined(lint)
973
974/* ARGSUSED */
975void
976get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
977{}
978
979#else	/* lint */
980
981/*
982 * Get pcache data and tags.
983 * inputs:
984 *   pcache_idx	- fully constructed VA for for accessing P$ diagnostic
985 *		  registers. Contains PC_way and PC_addr shifted into
986 *		  the correct bit positions. See the PRM for more details.
987 *   data	- pointer to a ch_pc_data_t
988 * structure (see cheetahregs.h):
989 */
990	.align	128
991	ENTRY(get_pcache_dtag)
992	rdpr	%pstate, %o5
993	andn    %o5, PSTATE_IE | PSTATE_AM, %o3
994	wrpr	%g0, %o3, %pstate
995
996	stx	%o0, [%o1 + CH_PC_IDX]
997	ldxa	[%o0]ASI_PC_STATUS_DATA, %o2
998	stx	%o2, [%o1 + CH_PC_STATUS]
999	ldxa	[%o0]ASI_PC_TAG, %o2
1000	stx	%o2, [%o1 + CH_PC_TAG]
1001	ldxa	[%o0]ASI_PC_SNP_TAG, %o2
1002	stx	%o2, [%o1 + CH_PC_SNTAG]
1003	add	%o1, CH_PC_DATA, %o1
1004	clr	%o3
10052:
1006	ldxa	[%o0 + %o3]ASI_PC_DATA, %o2
1007	stx	%o2, [%o1 + %o3]
1008	cmp	%o3, CH_PC_DATA_REG_SIZE - 8
1009	blt	2b
1010	  add	%o3, 8, %o3
1011
1012	retl
1013	  wrpr	%g0, %o5, %pstate
1014	SET_SIZE(get_pcache_dtag)
1015
1016#endif	/* lint */
1017
1018#endif	/* CPU_IMP_L1_CACHE_PARITY */
1019
1020#if defined(lint)
1021
1022/* ARGSUSED */
1023void
1024set_dcu(uint64_t dcu)
1025{}
1026
1027#else	/* lint */
1028
1029/*
1030 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
1031 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
1032 *   %o0 - 64 bit constant
1033 */
1034	ENTRY(set_dcu)
1035	stxa	%o0, [%g0]ASI_DCU	! Store to DCU
1036	flush	%g0	/* flush required after changing the IC bit */
1037	retl
1038	nop
1039	SET_SIZE(set_dcu)
1040
1041#endif	/* lint */
1042
1043
1044#if defined(lint)
1045
1046uint64_t
1047get_dcu(void)
1048{
1049	return ((uint64_t)0);
1050}
1051
1052#else	/* lint */
1053
1054/*
1055 * Return DCU register.
1056 */
1057	ENTRY(get_dcu)
1058	ldxa	[%g0]ASI_DCU, %o0		/* DCU control register */
1059	retl
1060	nop
1061	SET_SIZE(get_dcu)
1062
1063#endif	/* lint */
1064
1065/*
1066 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
1067 *
1068 * This handler is used to check for softints generated by error trap
1069 * handlers to report errors.  On Cheetah, this mechanism is used by the
1070 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
1071 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
1072 * NB: Must be 8 instructions or less to fit in trap table and code must
1073 *     be relocatable.
1074 */
1075#if defined(lint)
1076
1077void
1078ch_pil15_interrupt_instr(void)
1079{}
1080
1081#else	/* lint */
1082
1083	ENTRY_NP(ch_pil15_interrupt_instr)
1084	ASM_JMP(%g1, ch_pil15_interrupt)
1085	SET_SIZE(ch_pil15_interrupt_instr)
1086
1087#endif
1088
1089
1090#if defined(lint)
1091
1092void
1093ch_pil15_interrupt(void)
1094{}
1095
1096#else	/* lint */
1097
1098	ENTRY_NP(ch_pil15_interrupt)
1099
1100	/*
1101	 * Since pil_interrupt is hacked to assume that every level 15
1102	 * interrupt is generated by the CPU to indicate a performance
1103	 * counter overflow this gets ugly.  Before calling pil_interrupt
1104	 * the Error at TL>0 pending status is inspected.  If it is
1105	 * non-zero, then an error has occurred and it is handled.
1106	 * Otherwise control is transfered to pil_interrupt.  Note that if
1107	 * an error is detected pil_interrupt will not be called and
1108	 * overflow interrupts may be lost causing erroneous performance
1109	 * measurements.  However, error-recovery will have a detrimental
1110	 * effect on performance anyway.
1111	 */
1112	CPU_INDEX(%g1, %g4)
1113	set	ch_err_tl1_pending, %g4
1114	ldub	[%g1 + %g4], %g2
1115	brz	%g2, 1f
1116	  nop
1117
1118	/*
1119	 * We have a pending TL>0 error, clear the TL>0 pending status.
1120	 */
1121	stb	%g0, [%g1 + %g4]
1122
1123	/*
1124	 * Clear the softint.
1125	 */
1126	mov	1, %g5
1127	sll	%g5, PIL_15, %g5
1128	wr	%g5, CLEAR_SOFTINT
1129
1130	/*
1131	 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
1132	 * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
1133	 * panic flag (%g2).
1134	 */
1135	set	cpu_tl1_error, %g1
1136	clr	%g2
1137	ba	sys_trap
1138	  mov	PIL_15, %g4
1139
11401:
1141	/*
1142	 * The logout is invalid.
1143	 *
1144	 * Call the default interrupt handler.
1145	 */
1146	sethi	%hi(pil_interrupt), %g1
1147	jmp	%g1 + %lo(pil_interrupt)
1148	  mov	PIL_15, %g4
1149
1150	SET_SIZE(ch_pil15_interrupt)
1151#endif
1152
1153
1154/*
1155 * Error Handling
1156 *
1157 * Cheetah provides error checking for all memory access paths between
1158 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
1159 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
1160 * AFAR and one of the following traps is generated (provided that it
1161 * is enabled in External Cache Error Enable Register) to handle that
1162 * error:
1163 * 1. trap 0x70: Precise trap
1164 *    tt0_fecc for errors at trap level(TL)>=0
1165 * 2. trap 0x0A and 0x32: Deferred trap
1166 *    async_err for errors at TL>=0
1167 * 3. trap 0x63: Disrupting trap
1168 *    ce_err for errors at TL=0
1169 *    (Note that trap 0x63 cannot happen at trap level > 0)
1170 *
1171 * Trap level one handlers panic the system except for the fast ecc
1172 * error handler which tries to recover from certain errors.
1173 */
1174
1175/*
1176 * FAST ECC TRAP STRATEGY:
1177 *
1178 * Software must handle single and multi bit errors which occur due to data
1179 * or instruction cache reads from the external cache. A single or multi bit
1180 * error occuring in one of these situations results in a precise trap.
1181 *
1182 * The basic flow of this trap handler is as follows:
1183 *
1184 * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
1185 *    is disabled because bad data could have been installed.  The Icache is
1186 *    turned off because we want to capture the Icache line related to the
1187 *    AFAR.
1188 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
1189 * 3) Park sibling core if caches are shared (to avoid race condition while
1190 *    accessing shared resources such as L3 data staging register during
1191 *    CPU logout.
1192 * 4) Read the AFAR and AFSR.
1193 * 5) If CPU logout structure is not being used, then:
1194 *    6) Clear all errors from the AFSR.
1195 *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
1196 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1197 *       state.
1198 *    9) Unpark sibling core if we parked it earlier.
1199 *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
1200 *        running at PIL 15.
1201 * 6) Otherwise, if CPU logout structure is being used:
1202 *    7) Incriment the "logout busy count".
1203 *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
1204 *       state.
1205 *    9) Unpark sibling core if we parked it earlier.
1206 *    10) Issue a retry since the other CPU error logging code will end up
1207 *       finding this error bit and logging information about it later.
1208 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
1209 *    yet initialized such that we can't even check the logout struct, then
1210 *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
1211 *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
1212 *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
1213 *    in the high level trap handler since we don't have access to detailed
1214 *    logout information in cases where the cpu_private struct is not yet
1215 *    initialized.
1216 *
1217 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
1218 * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
1219 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
1220 * since it is uses different code/data from this handler, has a better
1221 * chance of fixing things up than simply recursing through this code
1222 * again (this would probably cause an eventual kernel stack overflow).
1223 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
1224 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
1225 * the Fast ECC at TL>0 handler and eventually Red Mode.
1226 *
1227 * Note that for Cheetah (and only Cheetah), we use alias addresses for
1228 * flushing rather than ASI accesses (which don't exist on Cheetah).
1229 * Should we encounter a Fast ECC error within this handler on Cheetah,
1230 * there's a good chance it's within the ecache_flushaddr buffer (since
1231 * it's the largest piece of memory we touch in the handler and it is
1232 * usually kernel text/data).  For that reason the Fast ECC at TL>0
1233 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
1234 */
1235
1236/*
1237 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
1238 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
1239 * architecture-specific files.
1240 * NB: Must be 8 instructions or less to fit in trap table and code must
1241 *     be relocatable.
1242 */
1243
1244#if defined(lint)
1245
1246void
1247fecc_err_instr(void)
1248{}
1249
1250#else	/* lint */
1251
1252	ENTRY_NP(fecc_err_instr)
1253	membar	#Sync			! Cheetah requires membar #Sync
1254
1255	/*
1256	 * Save current DCU state.  Turn off the Dcache and Icache.
1257	 */
1258	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1259	andn	%g1, DCU_DC + DCU_IC, %g4
1260	stxa	%g4, [%g0]ASI_DCU
1261	flush	%g0	/* flush required after changing the IC bit */
1262
1263	ASM_JMP(%g4, fast_ecc_err)
1264	SET_SIZE(fecc_err_instr)
1265
1266#endif	/* lint */
1267
1268
1269#if !(defined(JALAPENO) || defined(SERRANO))
1270
1271#if defined(lint)
1272
1273void
1274fast_ecc_err(void)
1275{}
1276
1277#else	/* lint */
1278
1279	.section ".text"
1280	.align	64
1281	ENTRY_NP(fast_ecc_err)
1282
1283	/*
1284	 * Turn off CEEN and NCEEN.
1285	 */
1286	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1287	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1288	stxa	%g4, [%g0]ASI_ESTATE_ERR
1289	membar	#Sync			! membar sync required
1290
1291	/*
1292	 * Check to see whether we need to park our sibling core
1293	 * before recording diagnostic information from caches
1294	 * which may be shared by both cores.
1295	 * We use %g1 to store information about whether or not
1296	 * we had to park the core (%g1 holds our DCUCR value and
1297	 * we only use bits from that register which are "reserved"
1298	 * to keep track of core parking) so that we know whether
1299	 * or not to unpark later. %g5 and %g4 are scratch registers.
1300	 */
1301	PARK_SIBLING_CORE(%g1, %g5, %g4)
1302
1303	/*
1304	 * Do the CPU log out capture.
1305	 *   %g3 = "failed?" return value.
1306	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1307	 *         into this macro via %g4. Output only valid if cpu_private
1308	 *         struct has not been initialized.
1309	 *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1310	 *   %g4 = Trap information stored in the cpu logout flags field
1311	 *   %g5 = scr1
1312	 *   %g6 = scr2
1313	 *   %g3 = scr3
1314	 *   %g4 = scr4
1315	 */
1316	 /* store the CEEN and NCEEN values, TL=0 */
1317	and	%g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1318	set	CHPR_FECCTL0_LOGOUT, %g6
1319	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1320
1321	/*
1322	 * Flush the Ecache (and L2 cache for Panther) to get the error out
1323	 * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1324	 * following flush will turn that into a WDC or WDU, respectively.
1325	 */
1326	PN_L2_FLUSHALL(%g4, %g5, %g6)
1327
1328	CPU_INDEX(%g4, %g5)
1329	mulx	%g4, CPU_NODE_SIZE, %g4
1330	set	cpunodes, %g5
1331	add	%g4, %g5, %g4
1332	ld	[%g4 + ECACHE_LINESIZE], %g5
1333	ld	[%g4 + ECACHE_SIZE], %g4
1334
1335	ASM_LDX(%g6, ecache_flushaddr)
1336	ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1337
1338	/*
1339	 * Flush the Dcache.  Since bad data could have been installed in
1340	 * the Dcache we must flush it before re-enabling it.
1341	 */
1342	ASM_LD(%g5, dcache_size)
1343	ASM_LD(%g6, dcache_linesize)
1344	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1345
1346	/*
1347	 * Flush the Icache.  Since we turned off the Icache to capture the
1348	 * Icache line it is now stale or corrupted and we must flush it
1349	 * before re-enabling it.
1350	 */
1351	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1352	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1353	ba,pt	%icc, 6f
1354	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1355fast_ecc_err_5:
1356	ASM_LD(%g5, icache_size)
1357	ASM_LD(%g6, icache_linesize)
13586:
1359	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1360
1361	/*
1362	 * check to see whether we parked our sibling core at the start
1363	 * of this handler. If so, we need to unpark it here.
1364	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1365	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1366	 */
1367	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1368
1369	/*
1370	 * Restore the Dcache and Icache to the previous state.
1371	 */
1372	stxa	%g1, [%g0]ASI_DCU
1373	flush	%g0	/* flush required after changing the IC bit */
1374
1375	/*
1376	 * Make sure our CPU logout operation was successful.
1377	 */
1378	cmp	%g3, %g0
1379	be	8f
1380	  nop
1381
1382	/*
1383	 * If the logout structure had been busy, how many times have
1384	 * we tried to use it and failed (nesting count)? If we have
1385	 * already recursed a substantial number of times, then we can
1386	 * assume things are not going to get better by themselves and
1387	 * so it would be best to panic.
1388	 */
1389	cmp	%g3, CLO_NESTING_MAX
1390	blt	7f
1391	  nop
1392
1393        call ptl1_panic
1394          mov   PTL1_BAD_ECC, %g1
1395
13967:
1397	/*
1398	 * Otherwise, if the logout structure was busy but we have not
1399	 * nested more times than our maximum value, then we simply
1400	 * issue a retry. Our TL=0 trap handler code will check and
1401	 * clear the AFSR after it is done logging what is currently
1402	 * in the logout struct and handle this event at that time.
1403	 */
1404	retry
14058:
1406	/*
1407	 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1408	 * already at PIL 15.
1409	 */
1410	set	cpu_fast_ecc_error, %g1
1411	rdpr	%pil, %g4
1412	cmp	%g4, PIL_14
1413	ba	sys_trap
1414	  movl	%icc, PIL_14, %g4
1415
1416	SET_SIZE(fast_ecc_err)
1417
1418#endif	/* lint */
1419
1420#endif	/* !(JALAPENO || SERRANO) */
1421
1422
1423/*
1424 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1425 *
1426 * The basic flow of this trap handler is as follows:
1427 *
1428 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1429 *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1430 *    will use to save %g1 and %g2.
1431 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1432 *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1433 *    handler (using the just saved %g1).
1434 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1435 *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1436 *    NB: we don't turn off the Icache because bad data is not installed nor
1437 *        will we be doing any diagnostic accesses.
1438 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1439 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1440 *    %tpc, %tnpc, %tstate values previously saved).
1441 * 6) set %tl to %tl - 1.
1442 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1443 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1444 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1445 *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1446 *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1447 *    AFSR_EXT and save the value in ch_err_tl1_data.
1448 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1449 *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1450 * 11) Flush the Ecache.
1451 *    NB: the Ecache is flushed assuming the largest possible size with
1452 *        the smallest possible line size since access to the cpu_nodes may
1453 *        cause an unrecoverable DTLB miss.
1454 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1455 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1456 *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1457 *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1458 *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1459 * 14) Flush and re-enable the Dcache if it was on at step 3.
1460 * 15) Do TRAPTRACE if enabled.
1461 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1462 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1463 * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1464 *    event pending flag and call cpu_tl1_error via systrap if set.
1465 * 19) Restore the registers from step 5 and issue retry.
1466 */
1467
1468/*
1469 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1470 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1471 * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1472 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1473 * NB: Must be 8 instructions or less to fit in trap table and code must
1474 *     be relocatable.
1475 */
1476
1477#if defined(lint)
1478
1479void
1480fecc_err_tl1_instr(void)
1481{}
1482
1483#else	/* lint */
1484
1485	ENTRY_NP(fecc_err_tl1_instr)
1486	CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1487	SET_SIZE(fecc_err_tl1_instr)
1488
1489#endif	/* lint */
1490
1491/*
1492 * Software trap 0 at TL>0.
1493 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1494 * the various architecture-specific files.  This is used as a continuation
1495 * of the fast ecc handling where we've bought an extra TL level, so we can
1496 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1497 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1498 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1499 * order two bits from %g1 and %g2 respectively).
1500 * NB: Must be 8 instructions or less to fit in trap table and code must
1501 *     be relocatable.
1502 */
1503#if defined(lint)
1504
1505void
1506fecc_err_tl1_cont_instr(void)
1507{}
1508
1509#else	/* lint */
1510
1511	ENTRY_NP(fecc_err_tl1_cont_instr)
1512	CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1513	SET_SIZE(fecc_err_tl1_cont_instr)
1514
1515#endif	/* lint */
1516
1517
1518#if defined(lint)
1519
1520void
1521ce_err(void)
1522{}
1523
1524#else	/* lint */
1525
1526/*
1527 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1528 *
1529 * AFSR errors bits which cause this trap are:
1530 *	CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1531 *
1532 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1533 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1534 *
1535 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1536 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1537 *
1538 * Cheetah+ also handles (No additional processing required):
1539 *    DUE, DTO, DBERR	(NCEEN controlled)
1540 *    THCE		(CEEN and ET_ECC_en controlled)
1541 *    TUE		(ET_ECC_en controlled)
1542 *
1543 * Panther further adds:
1544 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1545 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1546 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1547 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1548 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1549 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1550 *
1551 * Steps:
1552 *	1. Disable hardware corrected disrupting errors only (CEEN)
1553 *	2. Park sibling core if caches are shared (to avoid race
1554 *	   condition while accessing shared resources such as L3
1555 *	   data staging register during CPU logout.
1556 *	3. If the CPU logout structure is not currently being used:
1557 *		4. Clear AFSR error bits
1558 *		5. Capture Ecache, Dcache and Icache lines associated
1559 *		   with AFAR.
1560 *		6. Unpark sibling core if we parked it earlier.
1561 *		7. call cpu_disrupting_error via sys_trap at PIL 14
1562 *		   unless we're already running at PIL 15.
1563 *	4. Otherwise, if the CPU logout structure is busy:
1564 *		5. Incriment "logout busy count" and place into %g3
1565 *		6. Unpark sibling core if we parked it earlier.
1566 *		7. Issue a retry since the other CPU error logging
1567 *		   code will end up finding this error bit and logging
1568 *		   information about it later.
1569 *	5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1570 *         not yet initialized such that we can't even check the logout
1571 *         struct, then we place the clo_flags data into %g2
1572 *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1573 *         systrap. The clo_flags parameter is used to determine information
1574 *         such as TL, TT, CEEN settings, etc in the high level trap
1575 *         handler since we don't have access to detailed logout information
1576 *         in cases where the cpu_private struct is not yet initialized.
1577 *
1578 * %g3: [ logout busy count ] - arg #2
1579 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1580 */
1581
1582	.align	128
1583	ENTRY_NP(ce_err)
1584	membar	#Sync			! Cheetah requires membar #Sync
1585
1586	/*
1587	 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1588	 * to prevent recursion.
1589	 */
1590	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1591	bclr	EN_REG_CEEN, %g1
1592	stxa	%g1, [%g0]ASI_ESTATE_ERR
1593	membar	#Sync			! membar sync required
1594
1595	/*
1596	 * Save current DCU state.  Turn off Icache to allow capture of
1597	 * Icache data by DO_CPU_LOGOUT.
1598	 */
1599	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1600	andn	%g1, DCU_IC, %g4
1601	stxa	%g4, [%g0]ASI_DCU
1602	flush	%g0	/* flush required after changing the IC bit */
1603
1604	/*
1605	 * Check to see whether we need to park our sibling core
1606	 * before recording diagnostic information from caches
1607	 * which may be shared by both cores.
1608	 * We use %g1 to store information about whether or not
1609	 * we had to park the core (%g1 holds our DCUCR value and
1610	 * we only use bits from that register which are "reserved"
1611	 * to keep track of core parking) so that we know whether
1612	 * or not to unpark later. %g5 and %g4 are scratch registers.
1613	 */
1614	PARK_SIBLING_CORE(%g1, %g5, %g4)
1615
1616	/*
1617	 * Do the CPU log out capture.
1618	 *   %g3 = "failed?" return value.
1619	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1620	 *         into this macro via %g4. Output only valid if cpu_private
1621	 *         struct has not been initialized.
1622	 *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1623	 *   %g4 = Trap information stored in the cpu logout flags field
1624	 *   %g5 = scr1
1625	 *   %g6 = scr2
1626	 *   %g3 = scr3
1627	 *   %g4 = scr4
1628	 */
1629	clr	%g4			! TL=0 bit in afsr
1630	set	CHPR_CECC_LOGOUT, %g6
1631	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1632
1633	/*
1634	 * Flush the Icache.  Since we turned off the Icache to capture the
1635	 * Icache line it is now stale or corrupted and we must flush it
1636	 * before re-enabling it.
1637	 */
1638	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1639	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1640	ba,pt	%icc, 2f
1641	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1642ce_err_1:
1643	ASM_LD(%g5, icache_size)
1644	ASM_LD(%g6, icache_linesize)
16452:
1646	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1647
1648	/*
1649	 * check to see whether we parked our sibling core at the start
1650	 * of this handler. If so, we need to unpark it here.
1651	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1652	 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1653	 */
1654	UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1655
1656	/*
1657	 * Restore Icache to previous state.
1658	 */
1659	stxa	%g1, [%g0]ASI_DCU
1660	flush	%g0	/* flush required after changing the IC bit */
1661
1662	/*
1663	 * Make sure our CPU logout operation was successful.
1664	 */
1665	cmp	%g3, %g0
1666	be	4f
1667	  nop
1668
1669	/*
1670	 * If the logout structure had been busy, how many times have
1671	 * we tried to use it and failed (nesting count)? If we have
1672	 * already recursed a substantial number of times, then we can
1673	 * assume things are not going to get better by themselves and
1674	 * so it would be best to panic.
1675	 */
1676	cmp	%g3, CLO_NESTING_MAX
1677	blt	3f
1678	  nop
1679
1680        call ptl1_panic
1681          mov   PTL1_BAD_ECC, %g1
1682
16833:
1684	/*
1685	 * Otherwise, if the logout structure was busy but we have not
1686	 * nested more times than our maximum value, then we simply
1687	 * issue a retry. Our TL=0 trap handler code will check and
1688	 * clear the AFSR after it is done logging what is currently
1689	 * in the logout struct and handle this event at that time.
1690	 */
1691	retry
16924:
1693	/*
1694	 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1695	 * already at PIL 15.
1696	 */
1697	set	cpu_disrupting_error, %g1
1698	rdpr	%pil, %g4
1699	cmp	%g4, PIL_14
1700	ba	sys_trap
1701	  movl	%icc, PIL_14, %g4
1702	SET_SIZE(ce_err)
1703
1704#endif	/* lint */
1705
1706
1707#if defined(lint)
1708
1709/*
1710 * This trap cannot happen at TL>0 which means this routine will never
1711 * actually be called and so we treat this like a BAD TRAP panic.
1712 */
1713void
1714ce_err_tl1(void)
1715{}
1716
1717#else	/* lint */
1718
1719	.align	64
1720	ENTRY_NP(ce_err_tl1)
1721
1722        call ptl1_panic
1723          mov   PTL1_BAD_TRAP, %g1
1724
1725	SET_SIZE(ce_err_tl1)
1726
1727#endif	/* lint */
1728
1729
1730#if defined(lint)
1731
1732void
1733async_err(void)
1734{}
1735
1736#else	/* lint */
1737
1738/*
1739 * The async_err function handles deferred trap types 0xA
1740 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1741 *
1742 * AFSR errors bits which cause this trap are:
1743 *	UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1744 * On some platforms, EMU may causes cheetah to pull the error pin
1745 * never giving Solaris a chance to take a trap.
1746 *
1747 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1748 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1749 *
1750 * Steps:
1751 *	1. Disable CEEN and NCEEN errors to prevent recursive errors.
1752 *	2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1753 *         I$ line in DO_CPU_LOGOUT.
1754 *	3. Park sibling core if caches are shared (to avoid race
1755 *	   condition while accessing shared resources such as L3
1756 *	   data staging register during CPU logout.
1757 *	4. If the CPU logout structure is not currently being used:
1758 *		5. Clear AFSR error bits
1759 *		6. Capture Ecache, Dcache and Icache lines associated
1760 *		   with AFAR.
1761 *		7. Unpark sibling core if we parked it earlier.
1762 *		8. call cpu_deferred_error via sys_trap.
1763 *	5. Otherwise, if the CPU logout structure is busy:
1764 *		6. Incriment "logout busy count"
1765 *		7. Unpark sibling core if we parked it earlier.
1766 *		8) Issue a retry since the other CPU error logging
1767 *		   code will end up finding this error bit and logging
1768 *		   information about it later.
1769 *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1770 *         not yet initialized such that we can't even check the logout
1771 *         struct, then we place the clo_flags data into %g2
1772 *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1773 *         systrap. The clo_flags parameter is used to determine information
1774 *         such as TL, TT, CEEN settings, etc in the high level trap handler
1775 *         since we don't have access to detailed logout information in cases
1776 *         where the cpu_private struct is not yet initialized.
1777 *
1778 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1779 * %g3: [ logout busy count ] - arg #2
1780 */
1781
1782	ENTRY_NP(async_err)
1783	membar	#Sync			! Cheetah requires membar #Sync
1784
1785	/*
1786	 * Disable CEEN and NCEEN.
1787	 */
1788	ldxa	[%g0]ASI_ESTATE_ERR, %g3
1789	andn	%g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1790	stxa	%g4, [%g0]ASI_ESTATE_ERR
1791	membar	#Sync			! membar sync required
1792
1793	/*
1794	 * Save current DCU state.
1795	 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1796	 * Do this regardless of whether this is a Data Access Error or
1797	 * Instruction Access Error Trap.
1798	 * Disable Dcache for both Data Access Error and Instruction Access
1799	 * Error per Cheetah PRM P.5 Note 6.
1800	 */
1801	ldxa	[%g0]ASI_DCU, %g1	! save DCU in %g1
1802	andn	%g1, DCU_IC + DCU_DC, %g4
1803	stxa	%g4, [%g0]ASI_DCU
1804	flush	%g0	/* flush required after changing the IC bit */
1805
1806	/*
1807	 * Check to see whether we need to park our sibling core
1808	 * before recording diagnostic information from caches
1809	 * which may be shared by both cores.
1810	 * We use %g1 to store information about whether or not
1811	 * we had to park the core (%g1 holds our DCUCR value and
1812	 * we only use bits from that register which are "reserved"
1813	 * to keep track of core parking) so that we know whether
1814	 * or not to unpark later. %g6 and %g4 are scratch registers.
1815	 */
1816	PARK_SIBLING_CORE(%g1, %g6, %g4)
1817
1818	/*
1819	 * Do the CPU logout capture.
1820	 *
1821	 *   %g3 = "failed?" return value.
1822	 *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1823	 *         into this macro via %g4. Output only valid if cpu_private
1824	 *         struct has not been initialized.
1825	 *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1826	 *   %g4 = Trap information stored in the cpu logout flags field
1827	 *   %g5 = scr1
1828	 *   %g6 = scr2
1829	 *   %g3 = scr3
1830	 *   %g4 = scr4
1831	 */
1832	andcc	%g5, T_TL1, %g0
1833	clr	%g6
1834	movnz	%xcc, 1, %g6			! set %g6 if T_TL1 set
1835	sllx	%g6, CLO_FLAGS_TL_SHIFT, %g6
1836	sllx	%g5, CLO_FLAGS_TT_SHIFT, %g4
1837	set	CLO_FLAGS_TT_MASK, %g2
1838	and	%g4, %g2, %g4			! ttype
1839	or	%g6, %g4, %g4			! TT and TL
1840	and	%g3, EN_REG_CEEN, %g3		! CEEN value
1841	or	%g3, %g4, %g4			! TT and TL and CEEN
1842	set	CHPR_ASYNC_LOGOUT, %g6
1843	DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1844
1845	/*
1846	 * If the logout struct was busy, we may need to pass the
1847	 * TT, TL, and CEEN information to the TL=0 handler via
1848	 * systrap parameter so save it off here.
1849	 */
1850	cmp	%g3, %g0
1851	be	1f
1852	  nop
1853	sllx	%g4, 32, %g4
1854	or	%g4, %g3, %g3
18551:
1856	/*
1857	 * Flush the Icache.  Since we turned off the Icache to capture the
1858	 * Icache line it is now stale or corrupted and we must flush it
1859	 * before re-enabling it.
1860	 */
1861	GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1862	ld	[%g5 + CHPR_ICACHE_LINESIZE], %g6
1863	ba,pt	%icc, 2f
1864	  ld	[%g5 + CHPR_ICACHE_SIZE], %g5
1865async_err_1:
1866	ASM_LD(%g5, icache_size)
1867	ASM_LD(%g6, icache_linesize)
18682:
1869	CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1870
1871	/*
1872	 * XXX - Don't we need to flush the Dcache before turning it back
1873	 *       on to avoid stale or corrupt data? Was this broken?
1874	 */
1875	/*
1876	 * Flush the Dcache before turning it back on since it may now
1877	 * contain stale or corrupt data.
1878	 */
1879	ASM_LD(%g5, dcache_size)
1880	ASM_LD(%g6, dcache_linesize)
1881	CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1882
1883	/*
1884	 * check to see whether we parked our sibling core at the start
1885	 * of this handler. If so, we need to unpark it here.
1886	 * We use DCUCR reserved bits (stored in %g1) to keep track of
1887	 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1888	 */
1889	UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1890
1891	/*
1892	 * Restore Icache and Dcache to previous state.
1893	 */
1894	stxa	%g1, [%g0]ASI_DCU
1895	flush	%g0	/* flush required after changing the IC bit */
1896
1897	/*
1898	 * Make sure our CPU logout operation was successful.
1899	 */
1900	cmp	%g3, %g0
1901	be	4f
1902	  nop
1903
1904	/*
1905	 * If the logout structure had been busy, how many times have
1906	 * we tried to use it and failed (nesting count)? If we have
1907	 * already recursed a substantial number of times, then we can
1908	 * assume things are not going to get better by themselves and
1909	 * so it would be best to panic.
1910	 */
1911	cmp	%g3, CLO_NESTING_MAX
1912	blt	3f
1913	  nop
1914
1915        call ptl1_panic
1916          mov   PTL1_BAD_ECC, %g1
1917
19183:
1919	/*
1920	 * Otherwise, if the logout structure was busy but we have not
1921	 * nested more times than our maximum value, then we simply
1922	 * issue a retry. Our TL=0 trap handler code will check and
1923	 * clear the AFSR after it is done logging what is currently
1924	 * in the logout struct and handle this event at that time.
1925	 */
1926	retry
19274:
1928	RESET_USER_RTT_REGS(%g4, %g5, 5f)
19295:
1930	set	cpu_deferred_error, %g1
1931	ba	sys_trap
1932	  mov	PIL_15, %g4		! run at pil 15
1933	SET_SIZE(async_err)
1934
1935#endif	/* lint */
1936
1937#if defined(CPU_IMP_L1_CACHE_PARITY)
1938
1939/*
1940 * D$ parity error trap (trap 71) at TL=0.
1941 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1942 * the various architecture-specific files.  This merely sets up the
1943 * arguments for cpu_parity_error and calls it via sys_trap.
1944 * NB: Must be 8 instructions or less to fit in trap table and code must
1945 *     be relocatable.
1946 */
1947#if defined(lint)
1948
1949void
1950dcache_parity_instr(void)
1951{}
1952
1953#else	/* lint */
1954	ENTRY_NP(dcache_parity_instr)
1955	membar	#Sync			! Cheetah+ requires membar #Sync
1956	set	cpu_parity_error, %g1
1957	or	%g0, CH_ERR_DPE, %g2
1958	rdpr	%tpc, %g3
1959	sethi	%hi(sys_trap), %g7
1960	jmp	%g7 + %lo(sys_trap)
1961	  mov	PIL_15, %g4		! run at pil 15
1962	SET_SIZE(dcache_parity_instr)
1963
1964#endif	/* lint */
1965
1966
1967/*
1968 * D$ parity error trap (trap 71) at TL>0.
1969 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1970 * the various architecture-specific files.  This generates a "Software
1971 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1972 * continue the handling there.
1973 * NB: Must be 8 instructions or less to fit in trap table and code must
1974 *     be relocatable.
1975 */
1976#if defined(lint)
1977
1978void
1979dcache_parity_tl1_instr(void)
1980{}
1981
1982#else	/* lint */
1983	ENTRY_NP(dcache_parity_tl1_instr)
1984	CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1985	SET_SIZE(dcache_parity_tl1_instr)
1986
1987#endif	/* lint */
1988
1989
1990/*
1991 * Software trap 1 at TL>0.
1992 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1993 * of the various architecture-specific files.  This is used as a continuation
1994 * of the dcache parity handling where we've bought an extra TL level, so we
1995 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1996 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1997 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1998 * order two bits from %g1 and %g2 respectively).
1999 * NB: Must be 8 instructions or less to fit in trap table and code must
2000 *     be relocatable.
2001 */
2002#if defined(lint)
2003
2004void
2005dcache_parity_tl1_cont_instr(void)
2006{}
2007
2008#else	/* lint */
2009	ENTRY_NP(dcache_parity_tl1_cont_instr)
2010	CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
2011	SET_SIZE(dcache_parity_tl1_cont_instr)
2012
2013#endif	/* lint */
2014
2015/*
2016 * D$ parity error at TL>0 handler
2017 * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
2018 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2019 */
2020#if defined(lint)
2021
2022void
2023dcache_parity_tl1_err(void)
2024{}
2025
2026#else	/* lint */
2027
2028	ENTRY_NP(dcache_parity_tl1_err)
2029
2030	/*
2031	 * This macro saves all the %g registers in the ch_err_tl1_data
2032	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2033	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2034	 * the ch_err_tl1_data structure and %g2 will have the original
2035	 * flags in the ch_err_tl1_data structure.  All %g registers
2036	 * except for %g1 and %g2 will be available.
2037	 */
2038	CH_ERR_TL1_ENTER(CH_ERR_DPE);
2039
2040#ifdef TRAPTRACE
2041	/*
2042	 * Get current trap trace entry physical pointer.
2043	 */
2044	CPU_INDEX(%g6, %g5)
2045	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2046	set	trap_trace_ctl, %g5
2047	add	%g6, %g5, %g6
2048	ld	[%g6 + TRAPTR_LIMIT], %g5
2049	tst	%g5
2050	be	%icc, dpe_tl1_skip_tt
2051	  nop
2052	ldx	[%g6 + TRAPTR_PBASE], %g5
2053	ld	[%g6 + TRAPTR_OFFSET], %g4
2054	add	%g5, %g4, %g5
2055
2056	/*
2057	 * Create trap trace entry.
2058	 */
2059	rd	%asi, %g7
2060	wr	%g0, TRAPTR_ASI, %asi
2061	rd	STICK, %g4
2062	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2063	rdpr	%tl, %g4
2064	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2065	rdpr	%tt, %g4
2066	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2067	rdpr	%tpc, %g4
2068	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2069	rdpr	%tstate, %g4
2070	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2071	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2072	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2073	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2074	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2075	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2076	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2077	wr	%g0, %g7, %asi
2078
2079	/*
2080	 * Advance trap trace pointer.
2081	 */
2082	ld	[%g6 + TRAPTR_OFFSET], %g5
2083	ld	[%g6 + TRAPTR_LIMIT], %g4
2084	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2085	add	%g5, TRAP_ENT_SIZE, %g5
2086	sub	%g4, TRAP_ENT_SIZE, %g4
2087	cmp	%g5, %g4
2088	movge	%icc, 0, %g5
2089	st	%g5, [%g6 + TRAPTR_OFFSET]
2090dpe_tl1_skip_tt:
2091#endif	/* TRAPTRACE */
2092
2093	/*
2094	 * I$ and D$ are automatically turned off by HW when the CPU hits
2095	 * a dcache or icache parity error so we will just leave those two
2096	 * off for now to avoid repeating this trap.
2097	 * For Panther, however, since we trap on P$ data parity errors
2098	 * and HW does not automatically disable P$, we need to disable it
2099	 * here so that we don't encounter any recursive traps when we
2100	 * issue the retry.
2101	 */
2102	ldxa	[%g0]ASI_DCU, %g3
2103	mov	1, %g4
2104	sllx	%g4, DCU_PE_SHIFT, %g4
2105	andn	%g3, %g4, %g3
2106	stxa	%g3, [%g0]ASI_DCU
2107	membar	#Sync
2108
2109	/*
2110	 * We fall into this macro if we've successfully logged the error in
2111	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2112	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2113	 * Restores the %g registers and issues retry.
2114	 */
2115	CH_ERR_TL1_EXIT;
2116	SET_SIZE(dcache_parity_tl1_err)
2117
2118#endif	/* lint */
2119
2120/*
2121 * I$ parity error trap (trap 72) at TL=0.
2122 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
2123 * the various architecture-specific files.  This merely sets up the
2124 * arguments for cpu_parity_error and calls it via sys_trap.
2125 * NB: Must be 8 instructions or less to fit in trap table and code must
2126 *     be relocatable.
2127 */
2128#if defined(lint)
2129
2130void
2131icache_parity_instr(void)
2132{}
2133
2134#else	/* lint */
2135
2136	ENTRY_NP(icache_parity_instr)
2137	membar	#Sync			! Cheetah+ requires membar #Sync
2138	set	cpu_parity_error, %g1
2139	or	%g0, CH_ERR_IPE, %g2
2140	rdpr	%tpc, %g3
2141	sethi	%hi(sys_trap), %g7
2142	jmp	%g7 + %lo(sys_trap)
2143	  mov	PIL_15, %g4		! run at pil 15
2144	SET_SIZE(icache_parity_instr)
2145
2146#endif	/* lint */
2147
2148/*
2149 * I$ parity error trap (trap 72) at TL>0.
2150 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
2151 * the various architecture-specific files.  This generates a "Software
2152 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
2153 * continue the handling there.
2154 * NB: Must be 8 instructions or less to fit in trap table and code must
2155 *     be relocatable.
2156 */
2157#if defined(lint)
2158
2159void
2160icache_parity_tl1_instr(void)
2161{}
2162
2163#else	/* lint */
2164	ENTRY_NP(icache_parity_tl1_instr)
2165	CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
2166	SET_SIZE(icache_parity_tl1_instr)
2167
2168#endif	/* lint */
2169
2170/*
2171 * Software trap 2 at TL>0.
2172 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
2173 * of the various architecture-specific files.  This is used as a continuation
2174 * of the icache parity handling where we've bought an extra TL level, so we
2175 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2176 * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2177 * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
2178 * order two bits from %g1 and %g2 respectively).
2179 * NB: Must be 8 instructions or less to fit in trap table and code must
2180 *     be relocatable.
2181 */
2182#if defined(lint)
2183
2184void
2185icache_parity_tl1_cont_instr(void)
2186{}
2187
2188#else	/* lint */
2189	ENTRY_NP(icache_parity_tl1_cont_instr)
2190	CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
2191	SET_SIZE(icache_parity_tl1_cont_instr)
2192
2193#endif	/* lint */
2194
2195
2196/*
2197 * I$ parity error at TL>0 handler
2198 * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
2199 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2200 */
2201#if defined(lint)
2202
2203void
2204icache_parity_tl1_err(void)
2205{}
2206
2207#else	/* lint */
2208
2209	ENTRY_NP(icache_parity_tl1_err)
2210
2211	/*
2212	 * This macro saves all the %g registers in the ch_err_tl1_data
2213	 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2214	 * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
2215	 * the ch_err_tl1_data structure and %g2 will have the original
2216	 * flags in the ch_err_tl1_data structure.  All %g registers
2217	 * except for %g1 and %g2 will be available.
2218	 */
2219	CH_ERR_TL1_ENTER(CH_ERR_IPE);
2220
2221#ifdef TRAPTRACE
2222	/*
2223	 * Get current trap trace entry physical pointer.
2224	 */
2225	CPU_INDEX(%g6, %g5)
2226	sll	%g6, TRAPTR_SIZE_SHIFT, %g6
2227	set	trap_trace_ctl, %g5
2228	add	%g6, %g5, %g6
2229	ld	[%g6 + TRAPTR_LIMIT], %g5
2230	tst	%g5
2231	be	%icc, ipe_tl1_skip_tt
2232	  nop
2233	ldx	[%g6 + TRAPTR_PBASE], %g5
2234	ld	[%g6 + TRAPTR_OFFSET], %g4
2235	add	%g5, %g4, %g5
2236
2237	/*
2238	 * Create trap trace entry.
2239	 */
2240	rd	%asi, %g7
2241	wr	%g0, TRAPTR_ASI, %asi
2242	rd	STICK, %g4
2243	stxa	%g4, [%g5 + TRAP_ENT_TICK]%asi
2244	rdpr	%tl, %g4
2245	stha	%g4, [%g5 + TRAP_ENT_TL]%asi
2246	rdpr	%tt, %g4
2247	stha	%g4, [%g5 + TRAP_ENT_TT]%asi
2248	rdpr	%tpc, %g4
2249	stna	%g4, [%g5 + TRAP_ENT_TPC]%asi
2250	rdpr	%tstate, %g4
2251	stxa	%g4, [%g5 + TRAP_ENT_TSTATE]%asi
2252	stna	%sp, [%g5 + TRAP_ENT_SP]%asi
2253	stna	%g0, [%g5 + TRAP_ENT_TR]%asi
2254	stna	%g0, [%g5 + TRAP_ENT_F1]%asi
2255	stna	%g0, [%g5 + TRAP_ENT_F2]%asi
2256	stna	%g0, [%g5 + TRAP_ENT_F3]%asi
2257	stna	%g0, [%g5 + TRAP_ENT_F4]%asi
2258	wr	%g0, %g7, %asi
2259
2260	/*
2261	 * Advance trap trace pointer.
2262	 */
2263	ld	[%g6 + TRAPTR_OFFSET], %g5
2264	ld	[%g6 + TRAPTR_LIMIT], %g4
2265	st	%g5, [%g6 + TRAPTR_LAST_OFFSET]
2266	add	%g5, TRAP_ENT_SIZE, %g5
2267	sub	%g4, TRAP_ENT_SIZE, %g4
2268	cmp	%g5, %g4
2269	movge	%icc, 0, %g5
2270	st	%g5, [%g6 + TRAPTR_OFFSET]
2271ipe_tl1_skip_tt:
2272#endif	/* TRAPTRACE */
2273
2274	/*
2275	 * We fall into this macro if we've successfully logged the error in
2276	 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2277	 * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
2278	 * Restores the %g registers and issues retry.
2279	 */
2280	CH_ERR_TL1_EXIT;
2281
2282	SET_SIZE(icache_parity_tl1_err)
2283
2284#endif	/* lint */
2285
2286#endif	/* CPU_IMP_L1_CACHE_PARITY */
2287
2288
2289/*
2290 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
2291 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
2292 * should only be used in places where you have no choice but to look at the
2293 * tlb itself.
2294 *
2295 * Note: These two routines are required by the Estar "cpr" loadable module.
2296 */
2297
2298#if defined(lint)
2299
2300/* ARGSUSED */
2301void
2302itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2303{}
2304
2305#else	/* lint */
2306
2307	ENTRY_NP(itlb_rd_entry)
2308	sllx	%o0, 3, %o0
2309	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
2310	stx	%g1, [%o1]
2311	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
2312	set	TAGREAD_CTX_MASK, %o4
2313	andn	%g2, %o4, %o5
2314	retl
2315	  stx	%o5, [%o2]
2316	SET_SIZE(itlb_rd_entry)
2317
2318#endif	/* lint */
2319
2320
2321#if defined(lint)
2322
2323/* ARGSUSED */
2324void
2325dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2326{}
2327
2328#else	/* lint */
2329
2330	ENTRY_NP(dtlb_rd_entry)
2331	sllx	%o0, 3, %o0
2332	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
2333	stx	%g1, [%o1]
2334	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
2335	set	TAGREAD_CTX_MASK, %o4
2336	andn	%g2, %o4, %o5
2337	retl
2338	  stx	%o5, [%o2]
2339	SET_SIZE(dtlb_rd_entry)
2340#endif	/* lint */
2341
2342
2343#if !(defined(JALAPENO) || defined(SERRANO))
2344
2345#if defined(lint)
2346
2347uint64_t
2348get_safari_config(void)
2349{ return (0); }
2350
2351#else	/* lint */
2352
2353	ENTRY(get_safari_config)
2354	ldxa	[%g0]ASI_SAFARI_CONFIG, %o0
2355	retl
2356	nop
2357	SET_SIZE(get_safari_config)
2358
2359#endif	/* lint */
2360
2361
2362#if defined(lint)
2363
2364/* ARGSUSED */
2365void
2366set_safari_config(uint64_t safari_config)
2367{}
2368
2369#else	/* lint */
2370
2371	ENTRY(set_safari_config)
2372	stxa	%o0, [%g0]ASI_SAFARI_CONFIG
2373	membar	#Sync
2374	retl
2375	nop
2376	SET_SIZE(set_safari_config)
2377
2378#endif	/* lint */
2379
2380#endif	/* !(JALAPENO || SERRANO) */
2381
2382
2383#if defined(lint)
2384
2385void
2386cpu_cleartickpnt(void)
2387{}
2388
2389#else	/* lint */
2390	/*
2391	 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
2392	 * registers. In an effort to make the change in the
2393	 * tick/stick counter as consistent as possible, we disable
2394	 * all interrupts while we're changing the registers. We also
2395	 * ensure that the read and write instructions are in the same
2396	 * line in the instruction cache.
2397	 */
2398	ENTRY_NP(cpu_clearticknpt)
2399	rdpr	%pstate, %g1		/* save processor state */
2400	andn	%g1, PSTATE_IE, %g3	/* turn off */
2401	wrpr	%g0, %g3, %pstate	/*   interrupts */
2402	rdpr	%tick, %g2		/* get tick register */
2403	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
2404	mov	1, %g3			/* create mask */
2405	sllx	%g3, 63, %g3		/*   for NPT bit */
2406	ba,a,pt	%xcc, 2f
2407	.align	8			/* Ensure rd/wr in same i$ line */
24082:
2409	rdpr	%tick, %g2		/* get tick register */
2410	wrpr	%g3, %g2, %tick		/* write tick register, */
2411					/*   clearing NPT bit   */
24121:
2413	rd	STICK, %g2		/* get stick register */
2414	brgez,pn %g2, 3f		/* if NPT bit off, we're done */
2415	mov	1, %g3			/* create mask */
2416	sllx	%g3, 63, %g3		/*   for NPT bit */
2417	ba,a,pt	%xcc, 4f
2418	.align	8			/* Ensure rd/wr in same i$ line */
24194:
2420	rd	STICK, %g2		/* get stick register */
2421	wr	%g3, %g2, STICK		/* write stick register, */
2422					/*   clearing NPT bit   */
24233:
2424	jmp	%g4 + 4
2425	wrpr	%g0, %g1, %pstate	/* restore processor state */
2426
2427	SET_SIZE(cpu_clearticknpt)
2428
2429#endif	/* lint */
2430
2431
2432#if defined(CPU_IMP_L1_CACHE_PARITY)
2433
2434#if defined(lint)
2435/*
2436 * correct_dcache_parity(size_t size, size_t linesize)
2437 *
2438 * Correct D$ data parity by zeroing the data and initializing microtag
2439 * for all indexes and all ways of the D$.
2440 *
2441 */
2442/* ARGSUSED */
2443void
2444correct_dcache_parity(size_t size, size_t linesize)
2445{}
2446
2447#else	/* lint */
2448
2449	ENTRY(correct_dcache_parity)
2450	/*
2451	 * Register Usage:
2452	 *
2453	 * %o0 = input D$ size
2454	 * %o1 = input D$ line size
2455	 * %o2 = scratch
2456	 * %o3 = scratch
2457	 * %o4 = scratch
2458	 */
2459
2460	sub	%o0, %o1, %o0			! init cache line address
2461
2462	/*
2463	 * For Panther CPUs, we also need to clear the data parity bits
2464	 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2465	 */
2466	GET_CPU_IMPL(%o3)
2467	cmp	%o3, PANTHER_IMPL
2468	bne	1f
2469	  clr	%o3				! zero for non-Panther
2470	mov	1, %o3
2471	sll	%o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2472
24731:
2474	/*
2475	 * Set utag = way since it must be unique within an index.
2476	 */
2477	srl	%o0, 14, %o2			! get cache way (DC_way)
2478	membar	#Sync				! required before ASI_DC_UTAG
2479	stxa	%o2, [%o0]ASI_DC_UTAG		! set D$ utag = cache way
2480	membar	#Sync				! required after ASI_DC_UTAG
2481
2482	/*
2483	 * Zero line of D$ data (and data parity bits for Panther)
2484	 */
2485	sub	%o1, 8, %o2
2486	or	%o0, %o3, %o4			! same address + DC_data_parity
24872:
2488	membar	#Sync				! required before ASI_DC_DATA
2489	stxa	%g0, [%o0 + %o2]ASI_DC_DATA	! zero 8 bytes of D$ data
2490	membar	#Sync				! required after ASI_DC_DATA
2491	/*
2492	 * We also clear the parity bits if this is a panther. For non-Panther
2493	 * CPUs, we simply end up clearing the $data register twice.
2494	 */
2495	stxa	%g0, [%o4 + %o2]ASI_DC_DATA
2496	membar	#Sync
2497
2498	subcc	%o2, 8, %o2
2499	bge	2b
2500	nop
2501
2502	subcc	%o0, %o1, %o0
2503	bge	1b
2504	nop
2505
2506	retl
2507	  nop
2508	SET_SIZE(correct_dcache_parity)
2509
2510#endif	/* lint */
2511
2512#endif	/* CPU_IMP_L1_CACHE_PARITY */
2513
2514
2515#if defined(lint)
2516/*
2517 *  Get timestamp (stick).
2518 */
2519/* ARGSUSED */
2520void
2521stick_timestamp(int64_t *ts)
2522{
2523}
2524
2525#else	/* lint */
2526
2527	ENTRY_NP(stick_timestamp)
2528	rd	STICK, %g1	! read stick reg
2529	sllx	%g1, 1, %g1
2530	srlx	%g1, 1, %g1	! clear npt bit
2531
2532	retl
2533	stx     %g1, [%o0]	! store the timestamp
2534	SET_SIZE(stick_timestamp)
2535
2536#endif	/* lint */
2537
2538
2539#if defined(lint)
2540/*
2541 * Set STICK adjusted by skew.
2542 */
2543/* ARGSUSED */
2544void
2545stick_adj(int64_t skew)
2546{
2547}
2548
2549#else	/* lint */
2550
2551	ENTRY_NP(stick_adj)
2552	rdpr	%pstate, %g1		! save processor state
2553	andn	%g1, PSTATE_IE, %g3
2554	ba	1f			! cache align stick adj
2555	wrpr	%g0, %g3, %pstate	! turn off interrupts
2556
2557	.align	16
25581:	nop
2559
2560	rd	STICK, %g4		! read stick reg
2561	add	%g4, %o0, %o1		! adjust stick with skew
2562	wr	%o1, %g0, STICK		! write stick reg
2563
2564	retl
2565	wrpr	%g1, %pstate		! restore processor state
2566	SET_SIZE(stick_adj)
2567
2568#endif	/* lint */
2569
2570#if defined(lint)
2571/*
2572 * Debugger-specific stick retrieval
2573 */
2574/*ARGSUSED*/
2575int
2576kdi_get_stick(uint64_t *stickp)
2577{
2578	return (0);
2579}
2580
2581#else	/* lint */
2582
2583	ENTRY_NP(kdi_get_stick)
2584	rd	STICK, %g1
2585	stx	%g1, [%o0]
2586	retl
2587	mov	%g0, %o0
2588	SET_SIZE(kdi_get_stick)
2589
2590#endif	/* lint */
2591
2592#if defined(lint)
2593/*
2594 * Invalidate the specified line from the D$.
2595 *
2596 * Register usage:
2597 *	%o0 - index for the invalidation, specifies DC_way and DC_addr
2598 *
2599 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2600 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2601 *
2602 * The format of the stored 64-bit value is:
2603 *
2604 *	+----------+--------+----------+
2605 *	| Reserved | DC_tag | DC_valid |
2606 *	+----------+--------+----------+
2607 *       63      31 30     1	      0
2608 *
2609 * DC_tag is the 30-bit physical tag of the associated line.
2610 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2611 *
2612 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2613 *
2614 *	+----------+--------+----------+----------+
2615 *	| Reserved | DC_way | DC_addr  | Reserved |
2616 *	+----------+--------+----------+----------+
2617 *       63      16 15    14 13       5 4        0
2618 *
2619 * DC_way is a 2-bit index that selects one of the 4 ways.
2620 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2621 *
2622 * Setting the DC_valid bit to zero for the specified DC_way and
2623 * DC_addr index into the D$ results in an invalidation of a D$ line.
2624 */
2625/*ARGSUSED*/
2626void
2627dcache_inval_line(int index)
2628{
2629}
2630#else	/* lint */
2631	ENTRY(dcache_inval_line)
2632	sll	%o0, 5, %o0		! shift index into DC_way and DC_addr
2633	stxa	%g0, [%o0]ASI_DC_TAG	! zero the DC_valid and DC_tag bits
2634	membar	#Sync
2635	retl
2636	nop
2637	SET_SIZE(dcache_inval_line)
2638#endif	/* lint */
2639
2640#if defined(lint)
2641/*
2642 * Invalidate the entire I$
2643 *
2644 * Register usage:
2645 *	%o0 - specifies IC_way, IC_addr, IC_tag
2646 *	%o1 - scratch
2647 *	%o2 - used to save and restore DCU value
2648 *	%o3 - scratch
2649 *	%o5 - used to save and restore PSTATE
2650 *
2651 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2652 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2653 * block out snoops and invalidates to the I$, causing I$ consistency
2654 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2655 *
2656 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2657 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2658 * info below describes store (write) use of ASI_IC_TAG. Note that read
2659 * use of ASI_IC_TAG behaves differently.
2660 *
2661 * The format of the stored 64-bit value is:
2662 *
2663 *	+----------+--------+---------------+-----------+
2664 *	| Reserved | Valid  | IC_vpred<7:0> | Undefined |
2665 *	+----------+--------+---------------+-----------+
2666 *       63      55    54    53           46 45        0
2667 *
2668 * Valid is the 1-bit valid field for both the physical and snoop tags.
2669 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2670 *	the 32-byte boundary aligned address specified by IC_addr.
2671 *
2672 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2673 *
2674 *	+----------+--------+---------+--------+---------+
2675 *	| Reserved | IC_way | IC_addr | IC_tag |Reserved |
2676 *	+----------+--------+---------+--------+---------+
2677 *       63      16 15    14 13      5 4      3 2       0
2678 *
2679 * IC_way is a 2-bit index that selects one of the 4 ways.
2680 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2681 * IC_addr[5] is a "don't care" for a store.
2682 * IC_tag set to 2 specifies that the stored value is to be interpreted
2683 *	as containing Valid and IC_vpred as described above.
2684 *
2685 * Setting the Valid bit to zero for the specified IC_way and
2686 * IC_addr index into the I$ results in an invalidation of an I$ line.
2687 */
2688/*ARGSUSED*/
2689void
2690icache_inval_all(void)
2691{
2692}
2693#else	/* lint */
2694	ENTRY(icache_inval_all)
2695	rdpr	%pstate, %o5
2696	andn	%o5, PSTATE_IE, %o3
2697	wrpr	%g0, %o3, %pstate	! clear IE bit
2698
2699	GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2700	ld	[%o0 + CHPR_ICACHE_LINESIZE], %o1
2701	ba,pt	%icc, 2f
2702	  ld	[%o0 + CHPR_ICACHE_SIZE], %o0
2703icache_inval_all_1:
2704	ASM_LD(%o0, icache_size)
2705	ASM_LD(%o1, icache_linesize)
27062:
2707	CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2708
2709	retl
2710	wrpr	%g0, %o5, %pstate	! restore earlier pstate
2711	SET_SIZE(icache_inval_all)
2712#endif	/* lint */
2713
2714
2715#if defined(lint)
2716/* ARGSUSED */
2717void
2718cache_scrubreq_tl1(uint64_t inum, uint64_t index)
2719{
2720}
2721
2722#else	/* lint */
2723/*
2724 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2725 * crosstrap.  It atomically increments the outstanding request counter and,
2726 * if there was not already an outstanding request, branches to setsoftint_tl1
2727 * to enqueue an intr_vec for the given inum.
2728 */
2729
2730	! Register usage:
2731	!
2732	! Arguments:
2733	! %g1 - inum
2734	! %g2 - index into chsm_outstanding array
2735	!
2736	! Internal:
2737	! %g2, %g3, %g5 - scratch
2738	! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2739	! %g6 - setsoftint_tl1 address
2740
2741	ENTRY_NP(cache_scrubreq_tl1)
2742	mulx	%g2, CHSM_OUTSTANDING_INCR, %g2
2743	set	CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2744	add	%g2, %g3, %g2
2745	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2746	ld	[%g4], %g2		! cpu's chsm_outstanding[index]
2747	!
2748	! no need to use atomic instructions for the following
2749	! increment - we're at tl1
2750	!
2751	add	%g2, 0x1, %g3
2752	brnz,pn	%g2, 1f			! no need to enqueue more intr_vec
2753	  st	%g3, [%g4]		! delay - store incremented counter
2754	ASM_JMP(%g6, setsoftint_tl1)
2755	! not reached
27561:
2757	retry
2758	SET_SIZE(cache_scrubreq_tl1)
2759
2760#endif	/* lint */
2761
2762
2763#if defined(lint)
2764
2765/* ARGSUSED */
2766void
2767get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
2768{}
2769
2770#else	/* lint */
2771
2772/*
2773 * Get the error state for the processor.
2774 * Note that this must not be used at TL>0
2775 */
2776	ENTRY(get_cpu_error_state)
2777#if defined(CHEETAH_PLUS)
2778	set	ASI_SHADOW_REG_VA, %o2
2779	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr reg
2780	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2781	ldxa	[%o2]ASI_AFAR, %o1		! shadow afar reg
2782	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2783	GET_CPU_IMPL(%o3)	! Only panther has AFSR_EXT registers
2784	cmp	%o3, PANTHER_IMPL
2785	bne,a	1f
2786	  stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]	! zero for non-PN
2787	set	ASI_AFSR_EXT_VA, %o2
2788	ldxa	[%o2]ASI_AFSR, %o1		! afsr_ext reg
2789	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2790	set	ASI_SHADOW_AFSR_EXT_VA, %o2
2791	ldxa	[%o2]ASI_AFSR, %o1		! shadow afsr_ext reg
2792	stx	%o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2793	b	2f
2794	  nop
27951:
2796	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
27972:
2798#else	/* CHEETAH_PLUS */
2799	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2800	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2801	stx	%g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2802	stx	%g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2803#endif	/* CHEETAH_PLUS */
2804#if defined(SERRANO)
2805	/*
2806	 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2807	 * We save this in the afar2 of the register save area.
2808	 */
2809	set	ASI_MCU_AFAR2_VA, %o2
2810	ldxa	[%o2]ASI_MCU_CTRL, %o1
2811	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2812#endif	/* SERRANO */
2813	ldxa	[%g0]ASI_AFSR, %o1		! primary afsr reg
2814	stx	%o1, [%o0 + CH_CPU_ERRORS_AFSR]
2815	ldxa	[%g0]ASI_AFAR, %o1		! primary afar reg
2816	retl
2817	stx	%o1, [%o0 + CH_CPU_ERRORS_AFAR]
2818	SET_SIZE(get_cpu_error_state)
2819#endif	/* lint */
2820
2821#if defined(lint)
2822
2823/*
2824 * Check a page of memory for errors.
2825 *
2826 * Load each 64 byte block from physical memory.
2827 * Check AFSR after each load to see if an error
2828 * was caused. If so, log/scrub that error.
2829 *
2830 * Used to determine if a page contains
2831 * CEs when CEEN is disabled.
2832 */
2833/*ARGSUSED*/
2834void
2835cpu_check_block(caddr_t va, uint_t psz)
2836{}
2837
2838#else	/* lint */
2839
2840	ENTRY(cpu_check_block)
2841	!
2842	! get a new window with room for the error regs
2843	!
2844	save	%sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2845	srl	%i1, 6, %l4		! clear top bits of psz
2846					! and divide by 64
2847	rd	%fprs, %l2		! store FP
2848	wr	%g0, FPRS_FEF, %fprs	! enable FP
28491:
2850	ldda	[%i0]ASI_BLK_P, %d0	! load a block
2851	membar	#Sync
2852	ldxa    [%g0]ASI_AFSR, %l3	! read afsr reg
2853	brz,a,pt %l3, 2f		! check for error
2854	nop
2855
2856	!
2857	! if error, read the error regs and log it
2858	!
2859	call	get_cpu_error_state
2860	add	%fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2861
2862	!
2863	! cpu_ce_detected(ch_cpu_errors_t *, flag)
2864	!
2865	call	cpu_ce_detected		! log the error
2866	mov	CE_CEEN_TIMEOUT, %o1
28672:
2868	dec	%l4			! next 64-byte block
2869	brnz,a,pt  %l4, 1b
2870	add	%i0, 64, %i0		! increment block addr
2871
2872	wr	%l2, %g0, %fprs		! restore FP
2873	ret
2874	restore
2875
2876	SET_SIZE(cpu_check_block)
2877
2878#endif	/* lint */
2879
2880#if defined(lint)
2881
2882/*
2883 * Perform a cpu logout called from C.  This is used where we did not trap
2884 * for the error but still want to gather "what we can".  Caller must make
2885 * sure cpu private area exists and that the indicated logout area is free
2886 * for use, and that we are unable to migrate cpus.
2887 */
2888/*ARGSUSED*/
2889void
2890cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
2891{ }
2892
2893#else
2894	ENTRY(cpu_delayed_logout)
2895	rdpr	%pstate, %o2
2896	andn	%o2, PSTATE_IE, %o2
2897	wrpr	%g0, %o2, %pstate		! disable interrupts
2898	PARK_SIBLING_CORE(%o2, %o3, %o4)	! %o2 has DCU value
2899	add	%o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2900	rd	%asi, %g1
2901	wr	%g0, ASI_P, %asi
2902	GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2903	wr	%g1, %asi
2904	UNPARK_SIBLING_CORE(%o2, %o3, %o4)	! can use %o2 again
2905	rdpr	%pstate, %o2
2906	or	%o2, PSTATE_IE, %o2
2907	wrpr	%g0, %o2, %pstate
2908	retl
2909	  nop
2910	SET_SIZE(cpu_delayed_logout)
2911
2912#endif	/* lint */
2913
2914#if defined(lint)
2915
2916/*ARGSUSED*/
2917int
2918dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
2919{ return (0); }
2920
2921#else
2922
2923	ENTRY(dtrace_blksuword32)
2924	save	%sp, -SA(MINFRAME + 4), %sp
2925
2926	rdpr	%pstate, %l1
2927	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
2928	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
2929
2930	rd	%fprs, %l0
2931	andcc	%l0, FPRS_FEF, %g0
2932	bz,a,pt	%xcc, 1f			! if the fpu is disabled
2933	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
2934
2935	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
29361:
2937	set	0f, %l5
2938        /*
2939         * We're about to write a block full or either total garbage
2940         * (not kernel data, don't worry) or user floating-point data
2941         * (so it only _looks_ like garbage).
2942         */
2943	ld	[%i1], %f0			! modify the block
2944	membar	#Sync
2945	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
2946	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
2947	membar	#Sync
2948	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2949
2950	bz,a,pt	%xcc, 1f
2951	wr	%g0, %l0, %fprs			! restore %fprs
2952
2953	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29541:
2955
2956	wrpr	%g0, %l1, %pstate		! restore interrupts
2957
2958	ret
2959	restore	%g0, %g0, %o0
2960
29610:
2962	membar	#Sync
2963	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2964
2965	bz,a,pt	%xcc, 1f
2966	wr	%g0, %l0, %fprs			! restore %fprs
2967
2968	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
29691:
2970
2971	wrpr	%g0, %l1, %pstate		! restore interrupts
2972
2973	/*
2974	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2975	 * which deals with watchpoints. Otherwise, just return -1.
2976	 */
2977	brnz,pt	%i2, 1f
2978	nop
2979	ret
2980	restore	%g0, -1, %o0
29811:
2982	call	dtrace_blksuword32_err
2983	restore
2984
2985	SET_SIZE(dtrace_blksuword32)
2986
2987#endif /* lint */
2988
2989#ifdef	CHEETAHPLUS_ERRATUM_25
2990
2991#if	defined(lint)
2992/*
2993 * Claim a chunk of physical address space.
2994 */
2995/*ARGSUSED*/
2996void
2997claimlines(uint64_t pa, size_t sz, int stride)
2998{}
2999#else	/* lint */
3000	ENTRY(claimlines)
30011:
3002	subcc	%o1, %o2, %o1
3003	add	%o0, %o1, %o3
3004	bgeu,a,pt	%xcc, 1b
3005	casxa	[%o3]ASI_MEM, %g0, %g0
3006	membar  #Sync
3007	retl
3008	nop
3009	SET_SIZE(claimlines)
3010#endif	/* lint */
3011
3012#if	defined(lint)
3013/*
3014 * CPU feature initialization,
3015 * turn BPE off,
3016 * get device id.
3017 */
3018/*ARGSUSED*/
3019void
3020cpu_feature_init(void)
3021{}
3022#else	/* lint */
3023	ENTRY(cpu_feature_init)
3024	save	%sp, -SA(MINFRAME), %sp
3025	sethi	%hi(cheetah_bpe_off), %o0
3026	ld	[%o0 + %lo(cheetah_bpe_off)], %o0
3027	brz	%o0, 1f
3028	nop
3029	rd	ASR_DISPATCH_CONTROL, %o0
3030	andn	%o0, ASR_DISPATCH_CONTROL_BPE, %o0
3031	wr	%o0, 0, ASR_DISPATCH_CONTROL
30321:
3033	!
3034	! get the device_id and store the device_id
3035	! in the appropriate cpunodes structure
3036	! given the cpus index
3037	!
3038	CPU_INDEX(%o0, %o1)
3039	mulx %o0, CPU_NODE_SIZE, %o0
3040	set  cpunodes + DEVICE_ID, %o1
3041	ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
3042	stx  %o2, [%o0 + %o1]
3043#ifdef	CHEETAHPLUS_ERRATUM_34
3044	!
3045	! apply Cheetah+ erratum 34 workaround
3046	!
3047	call itlb_erratum34_fixup
3048	  nop
3049#endif	/* CHEETAHPLUS_ERRATUM_34 */
3050	ret
3051	  restore
3052	SET_SIZE(cpu_feature_init)
3053#endif	/* lint */
3054
3055#if	defined(lint)
3056/*
3057 * Copy a tsb entry atomically, from src to dest.
3058 * src must be 128 bit aligned.
3059 */
3060/*ARGSUSED*/
3061void
3062copy_tsb_entry(uintptr_t src, uintptr_t dest)
3063{}
3064#else	/* lint */
3065	ENTRY(copy_tsb_entry)
3066	ldda	[%o0]ASI_NQUAD_LD, %o2		! %o2 = tag, %o3 = data
3067	stx	%o2, [%o1]
3068	stx	%o3, [%o1 + 8 ]
3069	retl
3070	nop
3071	SET_SIZE(copy_tsb_entry)
3072#endif	/* lint */
3073
3074#endif	/* CHEETAHPLUS_ERRATUM_25 */
3075
3076#ifdef	CHEETAHPLUS_ERRATUM_34
3077
3078#if	defined(lint)
3079
3080/*ARGSUSED*/
3081void
3082itlb_erratum34_fixup(void)
3083{}
3084
3085#else	/* lint */
3086
3087	!
3088	! In Cheetah+ erratum 34, under certain conditions an ITLB locked
3089	! index 0 TTE will erroneously be displaced when a new TTE is
3090	! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
3091	! locked index 0 TTEs must be relocated.
3092	!
3093	! NOTE: Care must be taken to avoid an ITLB miss in this routine.
3094	!
3095	ENTRY_NP(itlb_erratum34_fixup)
3096	rdpr	%pstate, %o3
3097#ifdef DEBUG
3098	PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
3099#endif /* DEBUG */
3100	wrpr	%o3, PSTATE_IE, %pstate		! Disable interrupts
3101	ldxa	[%g0]ASI_ITLB_ACCESS, %o1	! %o1 = entry 0 data
3102	ldxa	[%g0]ASI_ITLB_TAGREAD, %o2	! %o2 = entry 0 tag
3103
3104	cmp	%o1, %g0			! Is this entry valid?
3105	bge	%xcc, 1f
3106	  andcc	%o1, TTE_LCK_INT, %g0		! Is this entry locked?
3107	bnz	%icc, 2f
3108	  nop
31091:
3110	retl					! Nope, outta here...
3111	  wrpr	%g0, %o3, %pstate		! Enable interrupts
31122:
3113	sethi	%hi(FLUSH_ADDR), %o4
3114	stxa	%g0, [%o2]ASI_ITLB_DEMAP	! Flush this mapping
3115	flush	%o4				! Flush required for I-MMU
3116	!
3117	! Start search from index 1 up.  This is because the kernel force
3118	! loads its text page at index 15 in sfmmu_kernel_remap() and we
3119	! don't want our relocated entry evicted later.
3120	!
3121	! NOTE: We assume that we'll be successful in finding an unlocked
3122	! or invalid entry.  If that isn't the case there are bound to
3123	! bigger problems.
3124	!
3125	set	(1 << 3), %g3
31263:
3127	ldxa	[%g3]ASI_ITLB_ACCESS, %o4	! Load TTE from t16
3128	!
3129	! If this entry isn't valid, we'll choose to displace it (regardless
3130	! of the lock bit).
3131	!
3132	cmp	%o4, %g0			! TTE is > 0 iff not valid
3133	bge	%xcc, 4f			! If invalid, go displace
3134	  andcc	%o4, TTE_LCK_INT, %g0		! Check for lock bit
3135	bnz,a	%icc, 3b			! If locked, look at next
3136	  add	%g3, (1 << 3), %g3		!  entry
31374:
3138	!
3139	! We found an unlocked or invalid entry; we'll explicitly load
3140	! the former index 0 entry here.
3141	!
3142	sethi	%hi(FLUSH_ADDR), %o4
3143	set	MMU_TAG_ACCESS, %g4
3144	stxa	%o2, [%g4]ASI_IMMU
3145	stxa	%o1, [%g3]ASI_ITLB_ACCESS
3146	flush	%o4				! Flush required for I-MMU
3147	retl
3148	  wrpr	%g0, %o3, %pstate		! Enable interrupts
3149	SET_SIZE(itlb_erratum34_fixup)
3150
3151#endif	/* lint */
3152
3153#endif	/* CHEETAHPLUS_ERRATUM_34 */
3154
3155