xref: /titanic_41/usr/src/uts/sun4u/cpu/spitfire_asm.s (revision 749f21d359d8fbd020c974a1a5227316221bfc9c)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#if !defined(lint)
30#include "assym.h"
31#endif	/* lint */
32
33#include <sys/asm_linkage.h>
34#include <sys/mmu.h>
35#include <vm/hat_sfmmu.h>
36#include <sys/machparam.h>
37#include <sys/machcpuvar.h>
38#include <sys/machthread.h>
39#include <sys/privregs.h>
40#include <sys/asm_linkage.h>
41#include <sys/machasi.h>
42#include <sys/trap.h>
43#include <sys/spitregs.h>
44#include <sys/xc_impl.h>
45#include <sys/intreg.h>
46#include <sys/async.h>
47
48#ifdef TRAPTRACE
49#include <sys/traptrace.h>
50#endif /* TRAPTRACE */
51
52#ifndef	lint
53
54/* BEGIN CSTYLED */
55#define	DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)			\
56	ldxa	[%g0]ASI_LSU, tmp1					;\
57	btst	LSU_DC, tmp1		/* is dcache enabled? */	;\
58	bz,pn	%icc, 1f						;\
59	sethi	%hi(dcache_linesize), tmp1				;\
60	ld	[tmp1 + %lo(dcache_linesize)], tmp1			;\
61	sethi	%hi(dflush_type), tmp2					;\
62	ld	[tmp2 + %lo(dflush_type)], tmp2				;\
63	cmp	tmp2, FLUSHPAGE_TYPE					;\
64	be,pt	%icc, 2f						;\
65	sllx	arg1, SF_DC_VBIT_SHIFT, arg1	/* tag to compare */	;\
66	sethi	%hi(dcache_size), tmp3					;\
67	ld	[tmp3 + %lo(dcache_size)], tmp3				;\
68	cmp	tmp2, FLUSHMATCH_TYPE					;\
69	be,pt	%icc, 3f						;\
70	nop								;\
71	/*								\
72	 * flushtype = FLUSHALL_TYPE, flush the whole thing		\
73	 * tmp3 = cache size						\
74	 * tmp1 = cache line size					\
75	 */								\
76	sub	tmp3, tmp1, tmp2					;\
774:									\
78	stxa	%g0, [tmp2]ASI_DC_TAG					;\
79	membar	#Sync							;\
80	cmp	%g0, tmp2						;\
81	bne,pt	%icc, 4b						;\
82	sub	tmp2, tmp1, tmp2					;\
83	ba,pt	%icc, 1f						;\
84	nop								;\
85	/*								\
86	 * flushtype = FLUSHPAGE_TYPE					\
87	 * arg1 = tag to compare against				\
88	 * arg2 = virtual color						\
89	 * tmp1 = cache line size					\
90	 * tmp2 = tag from cache					\
91	 * tmp3 = counter						\
92	 */								\
932:									\
94	set	MMU_PAGESIZE, tmp3					;\
95	sllx	arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */	;\
96	sub	tmp3, tmp1, tmp3					;\
974:									\
98	ldxa	[arg2 + tmp3]ASI_DC_TAG, tmp2	/* read tag */		;\
99	btst	SF_DC_VBIT_MASK, tmp2					;\
100	bz,pn	%icc, 5f	  /* branch if no valid sub-blocks */	;\
101	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
102	cmp	tmp2, arg1						;\
103	bne,pn	%icc, 5f			/* br if tag miss */	;\
104	nop								;\
105	stxa	%g0, [arg2 + tmp3]ASI_DC_TAG				;\
106	membar	#Sync							;\
1075:									\
108	cmp	%g0, tmp3						;\
109	bnz,pt	%icc, 4b		/* branch if not done */	;\
110	sub	tmp3, tmp1, tmp3					;\
111	ba,pt	%icc, 1f						;\
112	nop								;\
113	/*								\
114	 * flushtype = FLUSHMATCH_TYPE					\
115	 * arg1 = tag to compare against				\
116	 * tmp1 = cache line size					\
117	 * tmp3 = cache size						\
118	 * arg2 = counter						\
119	 * tmp2 = cache tag						\
120	 */								\
1213:									\
122	sub	tmp3, tmp1, arg2					;\
1234:									\
124	ldxa	[arg2]ASI_DC_TAG, tmp2		/* read tag */		;\
125	btst	SF_DC_VBIT_MASK, tmp2					;\
126	bz,pn	%icc, 5f		/* br if no valid sub-blocks */	;\
127	andn	tmp2, SF_DC_VBIT_MASK, tmp2	/* clear out v bits */	;\
128	cmp	tmp2, arg1						;\
129	bne,pn	%icc, 5f		/* branch if tag miss */	;\
130	nop								;\
131	stxa	%g0, [arg2]ASI_DC_TAG					;\
132	membar	#Sync							;\
1335:									\
134	cmp	%g0, arg2						;\
135	bne,pt	%icc, 4b		/* branch if not done */	;\
136	sub	arg2, tmp1, arg2					;\
1371:
138
139/*
140 * macro that flushes the entire dcache color
141 */
142#define	DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)				\
143	ldxa	[%g0]ASI_LSU, tmp1;					\
144	btst	LSU_DC, tmp1;		/* is dcache enabled? */	\
145	bz,pn	%icc, 1f;						\
146	sethi	%hi(dcache_linesize), tmp1;				\
147	ld	[tmp1 + %lo(dcache_linesize)], tmp1;			\
148	set	MMU_PAGESIZE, tmp2;					\
149	/*								\
150	 * arg = virtual color						\
151	 * tmp2 = page size						\
152	 * tmp1 = cache line size					\
153	 */								\
154	sllx	arg, MMU_PAGESHIFT, arg; /* color to dcache page */	\
155	sub	tmp2, tmp1, tmp2;					\
1562:									\
157	stxa	%g0, [arg + tmp2]ASI_DC_TAG;				\
158	membar	#Sync;							\
159	cmp	%g0, tmp2;						\
160	bne,pt	%icc, 2b;						\
161	sub	tmp2, tmp1, tmp2;					\
1621:
163
164/*
165 * macro that flushes the entire dcache
166 */
167#define	DCACHE_FLUSHALL(size, linesize, tmp)				\
168	ldxa	[%g0]ASI_LSU, tmp;					\
169	btst	LSU_DC, tmp;		/* is dcache enabled? */	\
170	bz,pn	%icc, 1f;						\
171									\
172	sub	size, linesize, tmp;					\
1732:									\
174	stxa	%g0, [tmp]ASI_DC_TAG;					\
175	membar	#Sync;							\
176	cmp	%g0, tmp;						\
177	bne,pt	%icc, 2b;						\
178	sub	tmp, linesize, tmp;					\
1791:
180
181/*
182 * macro that flushes the entire icache
183 */
184#define	ICACHE_FLUSHALL(size, linesize, tmp)				\
185	ldxa	[%g0]ASI_LSU, tmp;					\
186	btst	LSU_IC, tmp;						\
187	bz,pn	%icc, 1f;						\
188									\
189	sub	size, linesize, tmp;					\
1902:									\
191	stxa	%g0, [tmp]ASI_IC_TAG;					\
192	membar	#Sync;							\
193	cmp	%g0, tmp;						\
194	bne,pt	%icc, 2b;						\
195	sub	tmp, linesize, tmp;					\
1961:
197
198/*
199 * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
200 * ptr is in the machcpu structure.
201 * r_or_s:	Register or symbol off offset from 'cpu_private' ptr.
202 * scr1:	Scratch, ptr is returned in this register.
203 * scr2:	Scratch
204 */
205#define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)		\
206	CPU_ADDR(scr1, scr2);						\
207	ldn	[scr1 + CPU_PRIVATE], scr1; 				\
208	cmp	scr1, 0; 						\
209	be	label;							\
210	 nop; 								\
211	add	scr1, r_or_s, scr1;  					\
212
213#ifdef HUMMINGBIRD
214/*
215 * UltraSPARC-IIe processor supports both 4-way set associative and
216 * direct map E$. For performance reasons, we flush E$ by placing it
217 * in direct map mode for data load/store and restore the state after
218 * we are done flushing it. Keep interrupts off while flushing in this
219 * manner.
220 *
221 * We flush the entire ecache by starting at one end and loading each
222 * successive ecache line for the 2*ecache-size range. We have to repeat
223 * the flush operation to guarantee that the entire ecache has been
224 * flushed.
225 *
226 * For flushing a specific physical address, we start at the aliased
227 * address and load at set-size stride, wrapping around at 2*ecache-size
228 * boundary and skipping the physical address being flushed. It takes
229 * 10 loads to guarantee that the physical address has been flushed.
230 */
231
232#define	HB_ECACHE_FLUSH_CNT	2
233#define	HB_PHYS_FLUSH_CNT	10	/* #loads to flush specific paddr */
234#endif /* HUMMINGBIRD */
235
236/* END CSTYLED */
237
238#endif	/* !lint */
239
240/*
241 * Spitfire MMU and Cache operations.
242 */
243
244#if defined(lint)
245
246/*ARGSUSED*/
247void
248vtag_flushpage(caddr_t vaddr, uint_t ctxnum)
249{}
250
251/*ARGSUSED*/
252void
253vtag_flushctx(uint_t ctxnum)
254{}
255
256/*ARGSUSED*/
257void
258vtag_flushall(void)
259{}
260
261/*ARGSUSED*/
262void
263vtag_flushpage_tl1(uint64_t vaddr, uint64_t ctxnum)
264{}
265
266/*ARGSUSED*/
267void
268vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t ctx_pgcnt)
269{}
270
271/*ARGSUSED*/
272void
273vtag_flushctx_tl1(uint64_t ctxnum, uint64_t dummy)
274{}
275
276/*ARGSUSED*/
277void
278vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
279{}
280
281/*ARGSUSED*/
282void
283vac_flushpage(pfn_t pfnum, int vcolor)
284{}
285
286/*ARGSUSED*/
287void
288vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
289{}
290
291/*ARGSUSED*/
292void
293init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
294{}
295
296/*ARGSUSED*/
297void
298init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
299{}
300
301/*ARGSUSED*/
302void
303flush_instr_mem(caddr_t vaddr, size_t len)
304{}
305
306/*ARGSUSED*/
307void
308flush_ecache(uint64_t physaddr, size_t size, size_t linesize)
309{}
310
311/*ARGSUSED*/
312void
313get_ecache_dtag(uint32_t ecache_idx, uint64_t *ecache_data,
314		uint64_t *ecache_tag, uint64_t *oafsr, uint64_t *acc_afsr)
315{}
316
317/* ARGSUSED */
318uint64_t
319get_ecache_tag(uint32_t id, uint64_t *nafsr, uint64_t *acc_afsr)
320{
321	return ((uint64_t)0);
322}
323
324/* ARGSUSED */
325uint64_t
326check_ecache_line(uint32_t id, uint64_t *acc_afsr)
327{
328	return ((uint64_t)0);
329}
330
331/*ARGSUSED*/
332void
333kdi_flush_idcache(int dcache_size, int dcache_lsize,
334    int icache_size, int icache_lsize)
335{}
336
337#else	/* lint */
338
339	ENTRY_NP(vtag_flushpage)
340	/*
341	 * flush page from the tlb
342	 *
343	 * %o0 = vaddr
344	 * %o1 = ctxnum
345	 */
346	rdpr	%pstate, %o5
347#ifdef DEBUG
348	andcc	%o5, PSTATE_IE, %g0		/* if interrupts already */
349	bnz,a,pt %icc, 3f			/* disabled, panic */
350	nop
351	save	%sp, -SA(MINFRAME), %sp
352	sethi	%hi(sfmmu_panic1), %o0
353	call	panic
354	  or	%o0, %lo(sfmmu_panic1), %o0
355	ret
356	restore
3573:
358#endif /* DEBUG */
359	/*
360	 * disable ints
361	 */
362	andn	%o5, PSTATE_IE, %o4
363	wrpr	%o4, 0, %pstate
364
365	/*
366	 * Then, blow out the tlb
367	 * Interrupts are disabled to prevent the secondary ctx register
368	 * from changing underneath us.
369	 */
370	brnz,pt	%o1, 1f			/* KCONTEXT? */
371	sethi	%hi(FLUSH_ADDR), %o3
372	/*
373	 * For KCONTEXT demaps use primary. type = page implicitly
374	 */
375	stxa	%g0, [%o0]ASI_DTLB_DEMAP	/* dmmu flush for KCONTEXT */
376	stxa	%g0, [%o0]ASI_ITLB_DEMAP	/* immu flush for KCONTEXT */
377	b	5f
378	  flush	%o3
3791:
380	/*
381	 * User demap.  We need to set the secondary context properly.
382	 * %o0 = vaddr
383	 * %o1 = ctxnum
384	 * %o3 = FLUSH_ADDR
385	 */
386	set	MMU_SCONTEXT, %o4
387	ldxa	[%o4]ASI_DMMU, %o2		/* rd old ctxnum */
388	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
389	cmp	%o2, %o1
390	be,a,pt	%icc, 4f
391	  nop
392	stxa	%o1, [%o4]ASI_DMMU		/* wr new ctxum */
3934:
394	stxa	%g0, [%o0]ASI_DTLB_DEMAP
395	stxa	%g0, [%o0]ASI_ITLB_DEMAP
396	flush	%o3
397	be,a,pt	%icc, 5f
398	  nop
399	stxa	%o2, [%o4]ASI_DMMU		/* restore old ctxnum */
400	flush	%o3
4015:
402	retl
403	  wrpr	%g0, %o5, %pstate		/* enable interrupts */
404	SET_SIZE(vtag_flushpage)
405
406	ENTRY_NP(vtag_flushctx)
407	/*
408	 * flush context from the tlb
409	 *
410	 * %o0 = ctxnum
411	 * We disable interrupts to prevent the secondary ctx register changing
412	 * underneath us.
413	 */
414	sethi	%hi(FLUSH_ADDR), %o3
415	set	DEMAP_CTX_TYPE | DEMAP_SECOND, %g1
416	rdpr	%pstate, %o2
417
418#ifdef DEBUG
419	andcc	%o2, PSTATE_IE, %g0		/* if interrupts already */
420	bnz,a,pt %icc, 1f			/* disabled, panic	 */
421	  nop
422	sethi	%hi(sfmmu_panic1), %o0
423	call	panic
424	  or	%o0, %lo(sfmmu_panic1), %o0
4251:
426#endif /* DEBUG */
427
428	wrpr	%o2, PSTATE_IE, %pstate		/* disable interrupts */
429	set	MMU_SCONTEXT, %o4
430	ldxa	[%o4]ASI_DMMU, %o5		/* rd old ctxnum */
431	cmp	%o5, %o0
432	be,a,pt	%icc, 4f
433	  nop
434	stxa	%o0, [%o4]ASI_DMMU		/* wr new ctxum */
4354:
436	stxa	%g0, [%g1]ASI_DTLB_DEMAP
437	stxa	%g0, [%g1]ASI_ITLB_DEMAP
438	flush	%o3
439	be,a,pt	%icc, 5f
440	  nop
441	stxa	%o5, [%o4]ASI_DMMU		/* restore old ctxnum */
442	flush	%o3
4435:
444	retl
445	  wrpr	%g0, %o2, %pstate		/* enable interrupts */
446	SET_SIZE(vtag_flushctx)
447
448	.seg	".text"
449.flushallmsg:
450	.asciz	"sfmmu_asm: unimplemented flush operation"
451
452	ENTRY_NP(vtag_flushall)
453	sethi	%hi(.flushallmsg), %o0
454	call	panic
455	  or	%o0, %lo(.flushallmsg), %o0
456	SET_SIZE(vtag_flushall)
457
458	ENTRY_NP(vtag_flushpage_tl1)
459	/*
460	 * x-trap to flush page from tlb and tsb
461	 *
462	 * %g1 = vaddr, zero-extended on 32-bit kernel
463	 * %g2 = ctxnum
464	 *
465	 * assumes TSBE_TAG = 0
466	 */
467	srln	%g1, MMU_PAGESHIFT, %g1
468	slln	%g1, MMU_PAGESHIFT, %g1			/* g1 = vaddr */
469	/* We need to set the secondary context properly. */
470	set	MMU_SCONTEXT, %g4
471	ldxa	[%g4]ASI_DMMU, %g5		/* rd old ctxnum */
472	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
473	stxa	%g2, [%g4]ASI_DMMU		/* wr new ctxum */
474	stxa	%g0, [%g1]ASI_DTLB_DEMAP
475	stxa	%g0, [%g1]ASI_ITLB_DEMAP
476	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
477	membar #Sync
478	retry
479	SET_SIZE(vtag_flushpage_tl1)
480
481	ENTRY_NP(vtag_flush_pgcnt_tl1)
482	/*
483	 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
484	 *
485	 * %g1 = vaddr, zero-extended on 32-bit kernel
486	 * %g2 = <zero32|ctx16|pgcnt16>
487	 *
488	 * NOTE: this handler relies on the fact that no
489	 *	interrupts or traps can occur during the loop
490	 *	issuing the TLB_DEMAP operations. It is assumed
491	 *	that interrupts are disabled and this code is
492	 *	fetching from the kernel locked text address.
493	 *
494	 * assumes TSBE_TAG = 0
495	 */
496	srln	%g1, MMU_PAGESHIFT, %g1
497	slln	%g1, MMU_PAGESHIFT, %g1		/* g1 = vaddr */
498	or	DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
499	set	0xffff, %g4
500	and	%g4, %g2, %g3			/* g3 = pgcnt */
501	srln	%g2, 16, %g2			/* g2 = ctxnum */
502	/* We need to set the secondary context properly. */
503	set	MMU_SCONTEXT, %g4
504	ldxa	[%g4]ASI_DMMU, %g5		/* read old ctxnum */
505	stxa	%g2, [%g4]ASI_DMMU		/* write new ctxum */
506
507	set	MMU_PAGESIZE, %g2		/* g2 = pgsize */
5081:
509	stxa	%g0, [%g1]ASI_DTLB_DEMAP
510	stxa	%g0, [%g1]ASI_ITLB_DEMAP
511	deccc	%g3				/* decr pgcnt */
512	bnz,pt	%icc,1b
513	add	%g1, %g2, %g1			/* go to nextpage */
514
515	stxa	%g5, [%g4]ASI_DMMU		/* restore old ctxnum */
516	membar #Sync
517	retry
518	SET_SIZE(vtag_flush_pgcnt_tl1)
519
520	ENTRY_NP(vtag_flushctx_tl1)
521	/*
522	 * x-trap to flush context from tlb
523	 *
524	 * %g1 = ctxnum
525	 */
526	set	DEMAP_CTX_TYPE | DEMAP_SECOND, %g4
527	set	MMU_SCONTEXT, %g3
528	ldxa	[%g3]ASI_DMMU, %g5		/* rd old ctxnum */
529	stxa	%g1, [%g3]ASI_DMMU		/* wr new ctxum */
530	stxa	%g0, [%g4]ASI_DTLB_DEMAP
531	stxa	%g0, [%g4]ASI_ITLB_DEMAP
532	stxa	%g5, [%g3]ASI_DMMU		/* restore old ctxnum */
533	membar #Sync
534	retry
535	SET_SIZE(vtag_flushctx_tl1)
536
537	! Not implemented on US1/US2
538	ENTRY_NP(vtag_flushall_tl1)
539	retry
540	SET_SIZE(vtag_flushall_tl1)
541
542/*
543 * vac_flushpage(pfnum, color)
544 *	Flush 1 8k page of the D-$ with physical page = pfnum
545 *	Algorithm:
546 *		The spitfire dcache is a 16k direct mapped virtual indexed,
547 *		physically tagged cache.  Given the pfnum we read all cache
548 *		lines for the corresponding page in the cache (determined by
549 *		the color).  Each cache line is compared with
550 *		the tag created from the pfnum. If the tags match we flush
551 *		the line.
552 */
553	.seg	".data"
554	.align	8
555	.global	dflush_type
556dflush_type:
557	.word	FLUSHPAGE_TYPE
558	.seg	".text"
559
560	ENTRY(vac_flushpage)
561	/*
562	 * flush page from the d$
563	 *
564	 * %o0 = pfnum, %o1 = color
565	 */
566	DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
567	retl
568	nop
569	SET_SIZE(vac_flushpage)
570
571	ENTRY_NP(vac_flushpage_tl1)
572	/*
573	 * x-trap to flush page from the d$
574	 *
575	 * %g1 = pfnum, %g2 = color
576	 */
577	DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
578	retry
579	SET_SIZE(vac_flushpage_tl1)
580
581	ENTRY(vac_flushcolor)
582	/*
583	 * %o0 = vcolor
584	 */
585	DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
586	retl
587	  nop
588	SET_SIZE(vac_flushcolor)
589
590	ENTRY(vac_flushcolor_tl1)
591	/*
592	 * %g1 = vcolor
593	 */
594	DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
595	retry
596	SET_SIZE(vac_flushcolor_tl1)
597
598
599	.global _dispatch_status_busy
600_dispatch_status_busy:
601	.asciz	"ASI_INTR_DISPATCH_STATUS error: busy"
602	.align	4
603
604/*
605 * Determine whether or not the IDSR is busy.
606 * Entry: no arguments
607 * Returns: 1 if busy, 0 otherwise
608 */
609	ENTRY(idsr_busy)
610	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
611	clr	%o0
612	btst	IDSR_BUSY, %g1
613	bz,a,pt	%xcc, 1f
614	mov	1, %o0
6151:
616	retl
617	nop
618	SET_SIZE(idsr_busy)
619
620/*
621 * Setup interrupt dispatch data registers
622 * Entry:
623 *	%o0 - function or inumber to call
624 *	%o1, %o2 - arguments (2 uint64_t's)
625 */
626	.seg "text"
627
628	ENTRY(init_mondo)
629#ifdef DEBUG
630	!
631	! IDSR should not be busy at the moment
632	!
633	ldxa	[%g0]ASI_INTR_DISPATCH_STATUS, %g1
634	btst	IDSR_BUSY, %g1
635	bz,pt	%xcc, 1f
636	nop
637
638	sethi	%hi(_dispatch_status_busy), %o0
639	call	panic
640	or	%o0, %lo(_dispatch_status_busy), %o0
641#endif /* DEBUG */
642
643	ALTENTRY(init_mondo_nocheck)
644	!
645	! interrupt vector dispach data reg 0
646	!
6471:
648	mov	IDDR_0, %g1
649	mov	IDDR_1, %g2
650	mov	IDDR_2, %g3
651	stxa	%o0, [%g1]ASI_INTR_DISPATCH
652
653	!
654	! interrupt vector dispach data reg 1
655	!
656	stxa	%o1, [%g2]ASI_INTR_DISPATCH
657
658	!
659	! interrupt vector dispach data reg 2
660	!
661	stxa	%o2, [%g3]ASI_INTR_DISPATCH
662
663	retl
664	membar	#Sync			! allowed to be in the delay slot
665	SET_SIZE(init_mondo)
666
667/*
668 * Ship mondo to upaid
669 */
670	ENTRY_NP(shipit)
671	sll	%o0, IDCR_PID_SHIFT, %g1	! IDCR<18:14> = upa id
672	or	%g1, IDCR_OFFSET, %g1		! IDCR<13:0> = 0x70
673	stxa	%g0, [%g1]ASI_INTR_DISPATCH	! interrupt vector dispatch
674#if defined(SF_ERRATA_54)
675	membar	#Sync				! store must occur before load
676	mov	0x20, %g3			! UDBH Control Register Read
677	ldxa	[%g3]ASI_SDB_INTR_R, %g0
678#endif
679	retl
680	membar	#Sync
681	SET_SIZE(shipit)
682
683
684/*
685 * flush_instr_mem:
686 *	Flush a portion of the I-$ starting at vaddr
687 * 	%o0 vaddr
688 *	%o1 bytes to be flushed
689 */
690
691	ENTRY(flush_instr_mem)
692	membar	#StoreStore				! Ensure the stores
693							! are globally visible
6941:
695	flush	%o0
696	subcc	%o1, ICACHE_FLUSHSZ, %o1		! bytes = bytes-0x20
697	bgu,pt	%ncc, 1b
698	add	%o0, ICACHE_FLUSHSZ, %o0		! vaddr = vaddr+0x20
699
700	retl
701	nop
702	SET_SIZE(flush_instr_mem)
703
704/*
705 * flush_ecache:
706 * Flush the entire e$ using displacement flush by reading through a
707 * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
708 * reading this physical address range so that data doesn't go to d$.
709 * incoming arguments:
710 *	%o0 - 64 bit physical address
711 *	%o1 - size of address range to read
712 *	%o2 - ecache linesize
713 */
714	ENTRY(flush_ecache)
715#ifndef HUMMINGBIRD
716	b	2f
717	  nop
7181:
719	ldxa	[%o0 + %o1]ASI_MEM, %g0	! start reading from physaddr + size
7202:
721	subcc	%o1, %o2, %o1
722	bcc,a,pt %ncc, 1b
723	  nop
724
725#else /* HUMMINGBIRD */
726	/*
727	 * UltraSPARC-IIe processor supports both 4-way set associative
728	 * and direct map E$. For performance reasons, we flush E$ by
729	 * placing it in direct map mode for data load/store and restore
730	 * the state after we are done flushing it. It takes 2 iterations
731	 * to guarantee that the entire ecache has been flushed.
732	 *
733	 * Keep the interrupts disabled while flushing E$ in this manner.
734	 */
735	rdpr	%pstate, %g4		! current pstate (restored later)
736	andn	%g4, PSTATE_IE, %g5
737	wrpr	%g0, %g5, %pstate	! disable interrupts
738
739	! Place E$ in direct map mode for data access
740	or	%g0, 1, %g5
741	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
742	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
743	or	%g1, %g5, %g5
744	membar	#Sync
745	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
746	membar	#Sync
747
748	! flush entire ecache HB_ECACHE_FLUSH_CNT times
749	mov	HB_ECACHE_FLUSH_CNT-1, %g5
7502:
751	sub	%o1, %o2, %g3		! start from last entry
7521:
753	ldxa	[%o0 + %g3]ASI_MEM, %g0	! start reading from physaddr + size
754	subcc	%g3, %o2, %g3
755	bgeu,a,pt %ncc, 1b
756	  nop
757	brgz,a,pt %g5, 2b
758	  dec	%g5
759
760	membar	#Sync
761	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
762	membar	#Sync
763	wrpr	%g0, %g4, %pstate	! restore earlier pstate
764#endif /* HUMMINGBIRD */
765
766	retl
767	nop
768	SET_SIZE(flush_ecache)
769
770/*
771 * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
772 *			int icache_size, int icache_linesize)
773 */
774	ENTRY(kdi_flush_idcache)
775	DCACHE_FLUSHALL(%o0, %o1, %g1)
776	ICACHE_FLUSHALL(%o2, %o3, %g1)
777	membar	#Sync
778	retl
779	nop
780	SET_SIZE(kdi_flush_idcache)
781
782
783/*
784 * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
785 * 			uint64_t *oafsr, uint64_t *acc_afsr)
786 *
787 * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
788 * on a 64-byte boundary.  The corresponding AFSR value is also read for each
789 * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
790 * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
791 * data and tag should be atomic to make sense.  We will be executing at PIL15
792 * and will disable IE, so nothing can occur between the two reads.  We also
793 * assume that the execution of this code does not interfere with what we are
794 * reading - not really possible, but we'll live with it for now.
795 * We also pass the old AFSR value before clearing it, and caller will take
796 * appropriate actions if the important bits are non-zero.
797 *
798 * If the caller wishes to track the AFSR in cases where the CP bit is
799 * set, an address should be passed in for acc_afsr.  Otherwise, this
800 * argument may be null.
801 *
802 * Register Usage:
803 * i0: In: 32-bit e$ index
804 * i1: In: addr of e$ data
805 * i2: In: addr of e$ tag
806 * i3: In: addr of old afsr
807 * i4: In: addr of accumulated afsr - may be null
808 */
809	ENTRY(get_ecache_dtag)
810	save	%sp, -SA(MINFRAME), %sp
811	or	%g0, 1, %l4
812	sllx	%l4, 39, %l4	! set bit 39 for e$ data access
813	or	%i0, %l4, %g6	! %g6 = e$ addr for data read
814	sllx	%l4, 1, %l4	! set bit 40 for e$ tag access
815	or	%i0, %l4, %l4	! %l4 = e$ addr for tag read
816
817	rdpr    %pstate, %i5
818	andn    %i5, PSTATE_IE | PSTATE_AM, %i0
819	wrpr    %i0, %g0, %pstate       ! clear IE, AM bits
820
821	ldxa    [%g0]ASI_ESTATE_ERR, %g1
822	stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
823	membar  #Sync
824
825	ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
826	stx     %i0, [%i3]		! write back the old-afsr
827
828	ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
829	ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
830	stx     %i0, [%i2]              ! write back tag result
831
832	clr	%i2			! loop count
833
834	brz	%i4, 1f			! acc_afsr == NULL?
835	  ldxa	[%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
836	srlx	%i0, P_AFSR_CP_SHIFT, %l0
837	btst	1, %l0
838	bz	1f
839	  nop
840	ldx	[%i4], %g4
841	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
842	stx	%g4, [%i4]
8431:
844	stxa    %i0, [%g0]ASI_AFSR	! clear AFSR
845	membar  #Sync
846	ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
847	stx     %i0, [%i1]              ! save the E$data
848	add     %g6, 8, %g6
849	add     %i1, 8, %i1
850	ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
851	srlx	%i0, P_AFSR_CP_SHIFT, %l0
852	btst	1, %l0
853	bz	2f
854	  stx     %i0, [%i1]		! save the AFSR
855
856	brz	%i4, 2f			! acc_afsr == NULL?
857	  nop
858	ldx	[%i4], %g4
859	or	%g4, %i0, %g4		! aggregate AFSR in cpu private
860	stx	%g4, [%i4]
8612:
862	add     %i2, 8, %i2
863	cmp     %i2, 64
864	bl,a    1b
865	  add     %i1, 8, %i1
866	stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
867	membar  #Sync
868	stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
869	membar  #Sync
870	wrpr    %g0, %i5, %pstate
871	ret
872	  restore
873	SET_SIZE(get_ecache_dtag)
874#endif /* lint */
875
876#if defined(lint)
877/*
878 * The ce_err function handles trap type 0x63 (corrected_ECC_error) at tl=0.
879 * Steps: 1. GET AFSR  2. Get AFAR <40:4> 3. Get datapath error status
880 *	  4. Clear datapath error bit(s) 5. Clear AFSR error bit
881 *	  6. package data in %g2 and %g3 7. call cpu_ce_error vis sys_trap
882 * %g2: [ 52:43 UDB lower | 42:33 UDB upper | 32:0 afsr ] - arg #3/arg #1
883 * %g3: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
884 */
885void
886ce_err(void)
887{}
888
889void
890ce_err_tl1(void)
891{}
892
893
894/*
895 * The async_err function handles trap types 0x0A (instruction_access_error)
896 * and 0x32 (data_access_error) at TL = 0 and TL > 0.  When we branch here,
897 * %g5 will have the trap type (with 0x200 set if we're at TL > 0).
898 *
899 * Steps: 1. Get AFSR 2. Get AFAR <40:4> 3. If not UE error skip UDP registers.
900 *	  4. Else get and clear datapath error bit(s) 4. Clear AFSR error bits
901 *	  6. package data in %g2 and %g3 7. disable all cpu errors, because
902 *	  trap is likely to be fatal 8. call cpu_async_error vis sys_trap
903 *
904 * %g3: [ 63:53 tt | 52:43 UDB_L | 42:33 UDB_U | 32:0 afsr ] - arg #3/arg #1
905 * %g2: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
906 */
907void
908async_err(void)
909{}
910
911/*
912 * The clr_datapath function clears any error bits set in the UDB regs.
913 */
914void
915clr_datapath(void)
916{}
917
918/*
919 * The get_udb_errors() function gets the current value of the
920 * Datapath Error Registers.
921 */
922/*ARGSUSED*/
923void
924get_udb_errors(uint64_t *udbh, uint64_t *udbl)
925{
926	*udbh = 0;
927	*udbl = 0;
928}
929
930#else 	/* lint */
931
932	ENTRY_NP(ce_err)
933	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
934
935	!
936	! Check for a UE... From Kevin.Normoyle:
937	! We try to switch to the trap for the UE, but since that's
938	! a hardware pipeline, we might get to the CE trap before we
939	! can switch. The UDB and AFSR registers will have both the
940	! UE and CE bits set but the UDB syndrome and the AFAR will be
941	! for the UE.
942	!
943	or	%g0, 1, %g1		! put 1 in g1
944	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
945	andcc	%g1, %g3, %g0		! check for UE in afsr
946	bnz	async_err		! handle the UE, not the CE
947	  or	%g0, 0x63, %g5		! pass along the CE ttype
948	!
949	! Disable further CE traps to avoid recursion (stack overflow)
950	! and staying above XCALL_PIL for extended periods.
951	!
952	ldxa	[%g0]ASI_ESTATE_ERR, %g2
953	andn	%g2, 0x1, %g2		! clear bit 0 - CEEN
954	stxa	%g2, [%g0]ASI_ESTATE_ERR
955	membar	#Sync			! required
956	!
957	! handle the CE
958	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
959
960	set	P_DER_H, %g4		! put P_DER_H in g4
961	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
962	or	%g0, 1, %g6		! put 1 in g6
963	sllx	%g6, 8, %g6		! shift g6 to <8> sdb CE
964	andcc	%g5, %g6, %g1		! check for CE in upper half
965	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
966	or	%g3, %g5, %g3		! or with afsr bits
967	bz,a	1f			! no error, goto 1f
968	  nop
969	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
970	membar	#Sync			! membar sync required
9711:
972	set	P_DER_L, %g4		! put P_DER_L in g4
973	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
974	andcc	%g5, %g6, %g1		! check for CE in lower half
975	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
976	or	%g3, %g5, %g3		! or with afsr bits
977	bz,a	2f			! no error, goto 2f
978	  nop
979	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
980	membar	#Sync			! membar sync required
9812:
982	or	%g0, 1, %g4		! put 1 in g4
983	sllx	%g4, 20, %g4		! shift left to <20> afsr CE
984	stxa	%g4, [%g0]ASI_AFSR	! use g4 to clear afsr CE error
985	membar	#Sync			! membar sync required
986
987	set	cpu_ce_error, %g1	! put *cpu_ce_error() in g1
988	rdpr	%pil, %g6		! read pil into %g6
989	subcc	%g6, PIL_15, %g0
990	  movneg	%icc, PIL_14, %g4 ! run at pil 14 unless already at 15
991	sethi	%hi(sys_trap), %g5
992	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
993	  movge	%icc, PIL_15, %g4	! already at pil 15
994	SET_SIZE(ce_err)
995
996	ENTRY_NP(ce_err_tl1)
997#ifndef	TRAPTRACE
998	ldxa	[%g0]ASI_AFSR, %g7
999	stxa	%g7, [%g0]ASI_AFSR
1000	membar	#Sync
1001	retry
1002#else
1003	set	ce_trap_tl1, %g1
1004	sethi	%hi(dis_err_panic1), %g4
1005	jmp	%g4 + %lo(dis_err_panic1)
1006	nop
1007#endif
1008	SET_SIZE(ce_err_tl1)
1009
1010#ifdef	TRAPTRACE
1011.celevel1msg:
1012	.asciz	"Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";
1013
1014	ENTRY_NP(ce_trap_tl1)
1015	! upper 32 bits of AFSR already in o3
1016	mov	%o4, %o0		! save AFAR upper 32 bits
1017	mov	%o2, %o4		! lower 32 bits of AFSR
1018	mov	%o1, %o2		! lower 32 bits of AFAR
1019	mov	%o0, %o1		! upper 32 bits of AFAR
1020	set	.celevel1msg, %o0
1021	call	panic
1022	nop
1023	SET_SIZE(ce_trap_tl1)
1024#endif
1025
1026	!
1027	! async_err is the assembly glue code to get us from the actual trap
1028	! into the CPU module's C error handler.  Note that we also branch
1029	! here from ce_err() above.
1030	!
1031	ENTRY_NP(async_err)
1032	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
1033	membar	#Sync			! membar sync required
1034
1035	ldxa	[%g0]ASI_AFSR, %g3	! save afsr in g3
1036	ldxa	[%g0]ASI_AFAR, %g2	! save afar in g2
1037
1038	sllx	%g5, 53, %g5		! move ttype to <63:53>
1039	or	%g3, %g5, %g3		! or to afsr in g3
1040
1041	or	%g0, 1, %g1		! put 1 in g1
1042	sllx	%g1, 21, %g1		! shift left to <21> afsr UE
1043	andcc	%g1, %g3, %g0		! check for UE in afsr
1044	bz,a,pn %icc, 2f		! if !UE skip sdb read/clear
1045	  nop
1046
1047	set	P_DER_H, %g4		! put P_DER_H in g4
1048	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
1049	or	%g0, 1, %g6		! put 1 in g6
1050	sllx	%g6, 9, %g6		! shift g6 to <9> sdb UE
1051	andcc	%g5, %g6, %g1		! check for UE in upper half
1052	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1053	or	%g3, %g5, %g3		! or with afsr bits
1054	bz,a	1f			! no error, goto 1f
1055	  nop
1056	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1057	membar	#Sync			! membar sync required
10581:
1059	set	P_DER_L, %g4		! put P_DER_L in g4
1060	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1061	andcc	%g5, %g6, %g1		! check for UE in lower half
1062	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1063	or	%g3, %g5, %g3		! or with afsr bits
1064	bz,a	2f			! no error, goto 2f
1065	  nop
1066	stxa	%g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1067	membar	#Sync			! membar sync required
10682:
1069	stxa	%g3, [%g0]ASI_AFSR	! clear all the sticky bits
1070	membar	#Sync			! membar sync required
1071
1072	RESET_USER_RTT_REGS(%g4, %g5, 3f)
10733:
1074
1075	set	cpu_async_error, %g1	! put cpu_async_error in g1
1076	sethi	%hi(sys_trap), %g5
1077	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1078	  or	%g0, PIL_15, %g4	! run at pil 15
1079	SET_SIZE(async_err)
1080
1081	ENTRY_NP(dis_err_panic1)
1082	stxa	%g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
1083	membar	#Sync
1084	! save destination routine is in g1
1085	ldxa	[%g0]ASI_AFAR, %g2	! read afar
1086	ldxa	[%g0]ASI_AFSR, %g3	! read afsr
1087	set	P_DER_H, %g4		! put P_DER_H in g4
1088	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1089	sllx	%g5, 33, %g5		! shift upper bits to <42:33>
1090	or	%g3, %g5, %g3		! or with afsr bits
1091	set	P_DER_L, %g4		! put P_DER_L in g4
1092	ldxa	[%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1093	sllx	%g5, 43, %g5		! shift upper bits to <52:43>
1094	or	%g3, %g5, %g3		! or with afsr bits
1095
1096	RESET_USER_RTT_REGS(%g4, %g5, 1f)
10971:
1098
1099	sethi	%hi(sys_trap), %g5
1100	jmp	%g5 + %lo(sys_trap)	! goto sys_trap
1101	  sub	%g0, 1, %g4
1102	SET_SIZE(dis_err_panic1)
1103
1104	ENTRY(clr_datapath)
1105	set	P_DER_H, %o4			! put P_DER_H in o4
1106	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb upper half into o3
1107	or	%g0, 0x3, %o2			! put 0x3 in o2
1108	sllx	%o2, 8, %o2			! shift o2 to <9:8> sdb
1109	andcc	%o5, %o2, %o1			! check for UE,CE in upper half
1110	bz,a	1f				! no error, goto 1f
1111	  nop
1112	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1113	membar	#Sync				! membar sync required
11141:
1115	set	P_DER_L, %o4			! put P_DER_L in o4
1116	ldxa	[%o4]ASI_SDB_INTR_R, %o5	! read sdb lower half into o5
1117	andcc	%o5, %o2, %o1			! check for UE,CE in lower half
1118	bz,a	2f				! no error, goto 2f
1119	  nop
1120	stxa	%o1, [%o4]ASI_SDB_INTR_W	! clear sdb reg UE,CE error bits
1121	membar	#Sync
11222:
1123	retl
1124	  nop
1125	SET_SIZE(clr_datapath)
1126
1127	ENTRY(get_udb_errors)
1128	set	P_DER_H, %o3
1129	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1130	stx	%o2, [%o0]
1131	set	P_DER_L, %o3
1132	ldxa	[%o3]ASI_SDB_INTR_R, %o2
1133	retl
1134	  stx	%o2, [%o1]
1135	SET_SIZE(get_udb_errors)
1136
1137#endif /* lint */
1138
1139#if defined(lint)
1140/*
1141 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1142 * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1143 * should only be used in places where you have no choice but to look at the
1144 * tlb itself.
1145 *
1146 * Note: These two routines are required by the Estar "cpr" loadable module.
1147 */
1148/*ARGSUSED*/
1149void
1150itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
1151{}
1152
1153/*ARGSUSED*/
1154void
1155dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
1156{}
1157#else 	/* lint */
1158/*
1159 * NB - In Spitfire cpus, when reading a tte from the hardware, we
1160 * need to clear [42-41] because the general definitions in pte.h
1161 * define the PA to be [42-13] whereas Spitfire really uses [40-13].
1162 * When cloning these routines for other cpus the "andn" below is not
1163 * necessary.
1164 */
1165	ENTRY_NP(itlb_rd_entry)
1166	sllx	%o0, 3, %o0
1167#if defined(SF_ERRATA_32)
1168	sethi	%hi(FLUSH_ADDR), %g2
1169	set	MMU_PCONTEXT, %g1
1170	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1171	flush	%g2
1172#endif
1173	ldxa	[%o0]ASI_ITLB_ACCESS, %g1
1174	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1175	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1176	andn	%g1, %g2, %g1				! for details
1177	stx	%g1, [%o1]
1178	ldxa	[%o0]ASI_ITLB_TAGREAD, %g2
1179	set	TAGREAD_CTX_MASK, %o4
1180	andn	%g2, %o4, %o5
1181	retl
1182	  stx	%o5, [%o2]
1183	SET_SIZE(itlb_rd_entry)
1184
1185	ENTRY_NP(dtlb_rd_entry)
1186	sllx	%o0, 3, %o0
1187#if defined(SF_ERRATA_32)
1188	sethi	%hi(FLUSH_ADDR), %g2
1189	set	MMU_PCONTEXT, %g1
1190	stxa	%g0, [%g1]ASI_DMMU			! KCONTEXT
1191	flush	%g2
1192#endif
1193	ldxa	[%o0]ASI_DTLB_ACCESS, %g1
1194	set	TTE_SPITFIRE_PFNHI_CLEAR, %g2		! spitfire only
1195	sllx	%g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2	! see comment above
1196	andn	%g1, %g2, %g1				! itlb_rd_entry
1197	stx	%g1, [%o1]
1198	ldxa	[%o0]ASI_DTLB_TAGREAD, %g2
1199	set	TAGREAD_CTX_MASK, %o4
1200	andn	%g2, %o4, %o5
1201	retl
1202	  stx	%o5, [%o2]
1203	SET_SIZE(dtlb_rd_entry)
1204#endif /* lint */
1205
1206#if defined(lint)
1207
1208/*
1209 * routines to get and set the LSU register
1210 */
1211uint64_t
1212get_lsu(void)
1213{
1214	return ((uint64_t)0);
1215}
1216
1217/*ARGSUSED*/
1218void
1219set_lsu(uint64_t lsu)
1220{}
1221
1222#else /* lint */
1223
1224	ENTRY(set_lsu)
1225	stxa	%o0, [%g0]ASI_LSU		! store to LSU
1226	retl
1227	membar	#Sync
1228	SET_SIZE(set_lsu)
1229
1230	ENTRY(get_lsu)
1231	retl
1232	ldxa	[%g0]ASI_LSU, %o0		! load LSU
1233	SET_SIZE(get_lsu)
1234
1235#endif /* lint */
1236
1237#ifndef lint
1238	/*
1239	 * Clear the NPT (non-privileged trap) bit in the %tick
1240	 * registers. In an effort to make the change in the
1241	 * tick counter as consistent as possible, we disable
1242	 * all interrupts while we're changing the registers. We also
1243	 * ensure that the read and write instructions are in the same
1244	 * line in the instruction cache.
1245	 */
1246	ENTRY_NP(cpu_clearticknpt)
1247	rdpr	%pstate, %g1		/* save processor state */
1248	andn	%g1, PSTATE_IE, %g3	/* turn off */
1249	wrpr	%g0, %g3, %pstate	/*   interrupts */
1250	rdpr	%tick, %g2		/* get tick register */
1251	brgez,pn %g2, 1f		/* if NPT bit off, we're done */
1252	mov	1, %g3			/* create mask */
1253	sllx	%g3, 63, %g3		/*   for NPT bit */
1254	ba,a,pt	%xcc, 2f
1255	.align	64			/* Align to I$ boundary */
12562:
1257	rdpr	%tick, %g2		/* get tick register */
1258	wrpr	%g3, %g2, %tick		/* write tick register, */
1259					/*   clearing NPT bit   */
1260#if defined(BB_ERRATA_1)
1261	rdpr	%tick, %g0		/* read (s)tick (BB_ERRATA_1) */
1262#endif
12631:
1264	jmp	%g4 + 4
1265	wrpr	%g0, %g1, %pstate	/* restore processor state */
1266	SET_SIZE(cpu_clearticknpt)
1267
1268	/*
1269	 * get_ecache_tag()
1270	 * Register Usage:
1271	 * %o0: In: 32-bit E$ index
1272	 *      Out: 64-bit E$ tag value
1273	 * %o1: In: 64-bit AFSR value after clearing sticky bits
1274	 * %o2: In: address of cpu private afsr storage
1275	 */
1276	ENTRY(get_ecache_tag)
1277	or	%g0, 1, %o4
1278	sllx	%o4, 40, %o4			! set bit 40 for e$ tag access
1279	or	%o0, %o4, %o4			! %o4 = e$ addr for tag read
1280	rdpr	%pstate, %o5
1281	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1282	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1283
1284	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1285	stxa	%g0, [%g0]ASI_ESTATE_ERR	! Turn off Error enable
1286	membar	#Sync
1287
1288	ldxa	[%g0]ASI_AFSR, %o0
1289	srlx	%o0, P_AFSR_CP_SHIFT, %o3
1290	btst	1, %o3
1291	bz	1f
1292	  nop
1293	ldx	[%o2], %g4
1294	or	%g4, %o0, %g4			! aggregate AFSR in cpu private
1295	stx	%g4, [%o2]
12961:
1297	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1298	membar  #Sync
1299
1300	ldxa	[%o4]ASI_EC_R, %g0
1301	ldxa	[%g0]ASI_EC_DIAG, %o0		! read tag from e$ tag reg
1302
1303	ldxa	[%g0]ASI_AFSR, %o3
1304	srlx	%o3, P_AFSR_CP_SHIFT, %o4
1305	btst	1, %o4
1306	bz	2f
1307	  stx	%o3, [%o1]			! AFSR after sticky clear
1308	ldx	[%o2], %g4
1309	or	%g4, %o3, %g4			! aggregate AFSR in cpu private
1310	stx	%g4, [%o2]
13112:
1312	membar	#Sync
1313
1314	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1315	membar	#Sync
1316	retl
1317	wrpr	%g0, %o5, %pstate
1318	SET_SIZE(get_ecache_tag)
1319
1320	/*
1321	 * check_ecache_line()
1322	 * Register Usage:
1323	 * %o0: In: 32-bit E$ index
1324	 *      Out: 64-bit accumulated AFSR
1325	 * %o1: In: address of cpu private afsr storage
1326	 */
1327	ENTRY(check_ecache_line)
1328	or	%g0, 1, %o4
1329	sllx	%o4, 39, %o4			! set bit 39 for e$ data access
1330	or	%o0, %o4, %o4		 	! %o4 = e$ addr for data read
1331
1332	rdpr	%pstate, %o5
1333	andn	%o5, PSTATE_IE | PSTATE_AM, %o0
1334	wrpr	%o0, %g0, %pstate		! clear IE, AM bits
1335
1336	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1337	stxa	%g0, [%g0]ASI_ESTATE_ERR 	! Turn off Error enable
1338	membar	#Sync
1339
1340	ldxa 	[%g0]ASI_AFSR, %o0
1341	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1342	btst	1, %o2
1343	bz	1f
1344	  clr	%o2				! loop count
1345	ldx	[%o1], %o3
1346	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1347	stx	%o3, [%o1]
13481:
1349	stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1350	membar	#Sync
1351
13522:
1353	ldxa	[%o4]ASI_EC_R, %g0		! Read the E$ data 8bytes each
1354	add	%o2, 1, %o2
1355	cmp	%o2, 8
1356	bl,a 	2b
1357	  add	%o4, 8, %o4
1358
1359	membar	#Sync
1360	ldxa	[%g0]ASI_AFSR, %o0		! read accumulated AFSR
1361	srlx	%o0, P_AFSR_CP_SHIFT, %o2
1362	btst	1, %o2
1363	bz	3f
1364	  nop
1365	ldx	[%o1], %o3
1366	or	%o3, %o0, %o3			! aggregate AFSR in cpu private
1367	stx	%o3, [%o1]
13683:
1369	stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1370	membar	#Sync
1371	stxa	%g1, [%g0]ASI_ESTATE_ERR	! Turn error enable back on
1372	membar	#Sync
1373	retl
1374	wrpr	%g0, %o5, %pstate
1375	SET_SIZE(check_ecache_line)
1376#endif /* lint */
1377
1378#if defined(lint)
1379uint64_t
1380read_and_clear_afsr()
1381{
1382	return ((uint64_t)0);
1383}
1384#else	/* lint */
1385	ENTRY(read_and_clear_afsr)
1386	ldxa	[%g0]ASI_AFSR, %o0
1387	retl
1388	  stxa	%o0, [%g0]ASI_AFSR		! clear AFSR
1389	SET_SIZE(read_and_clear_afsr)
1390#endif	/* lint */
1391
1392#if defined(lint)
1393/* ARGSUSED */
1394void
1395scrubphys(uint64_t paddr, int ecache_size)
1396{
1397}
1398
1399#else	/* lint */
1400
1401/*
1402 * scrubphys - Pass in the aligned physical memory address that you want
1403 * to scrub, along with the ecache size.
1404 *
1405 *	1) Displacement flush the E$ line corresponding to %addr.
1406 *	   The first ldxa guarantees that the %addr is no longer in
1407 *	   M, O, or E (goes to I or S (if instruction fetch also happens).
1408 *	2) "Write" the data using a CAS %addr,%g0,%g0.
1409 *	   The casxa guarantees a transition from I to M or S to M.
1410 *	3) Displacement flush the E$ line corresponding to %addr.
1411 *	   The second ldxa pushes the M line out of the ecache, into the
1412 *	   writeback buffers, on the way to memory.
1413 *	4) The "membar #Sync" pushes the cache line out of the writeback
1414 *	   buffers onto the bus, on the way to dram finally.
1415 *
1416 * This is a modified version of the algorithm suggested by Gary Lauterbach.
1417 * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
1418 * as modified, but then we found out that for spitfire, if it misses in the
1419 * E$ it will probably install as an M, but if it hits in the E$, then it
1420 * will stay E, if the store doesn't happen. So the first displacement flush
1421 * should ensure that the CAS will miss in the E$.  Arrgh.
1422 */
1423
1424	ENTRY(scrubphys)
1425	or	%o1, %g0, %o2	! put ecache size in %o2
1426#ifndef HUMMINGBIRD
1427	xor	%o0, %o2, %o1	! calculate alias address
1428	add	%o2, %o2, %o3	! 2 * ecachesize in case
1429				! addr == ecache_flushaddr
1430	sub	%o3, 1, %o3	! -1 == mask
1431	and	%o1, %o3, %o1	! and with xor'd address
1432	set	ecache_flushaddr, %o3
1433	ldx	[%o3], %o3
1434
1435	rdpr	%pstate, %o4
1436	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1437	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1438
1439	ldxa	[%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1440	casxa	[%o0]ASI_MEM, %g0, %g0
1441	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1442
1443#else /* HUMMINGBIRD */
1444	/*
1445	 * UltraSPARC-IIe processor supports both 4-way set associative
1446	 * and direct map E$. We need to reconfigure E$ to direct map
1447	 * mode for data load/store before displacement flush. Also, we
1448	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1449	 * has been flushed. Keep the interrupts disabled while flushing
1450	 * E$ in this manner.
1451	 *
1452	 * For flushing a specific physical address, we start at the
1453	 * aliased address and load at set-size stride, wrapping around
1454	 * at 2*ecache-size boundary and skipping fault physical address.
1455	 * It takes 10 loads to guarantee that the physical address has
1456	 * been flushed.
1457	 *
1458	 * Usage:
1459	 *	%o0	physaddr
1460	 *	%o5	physaddr - ecache_flushaddr
1461	 *	%g1	UPA config (restored later)
1462	 *	%g2	E$ set size
1463	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1464	 *	%g4	#loads to flush phys address
1465	 *	%g5	temp
1466	 */
1467
1468	sethi	%hi(ecache_associativity), %g5
1469	ld	[%g5 + %lo(ecache_associativity)], %g5
1470	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1471	xor	%o0, %o2, %o1	! calculate alias address
1472	add	%o2, %o2, %g3	! 2 * ecachesize in case
1473				! addr == ecache_flushaddr
1474	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1475	and	%o1, %g3, %o1	! and with xor'd address
1476	sethi	%hi(ecache_flushaddr), %o3
1477	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1478
1479	rdpr	%pstate, %o4
1480	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1481	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1482
1483	! Place E$ in direct map mode for data access
1484	or	%g0, 1, %g5
1485	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1486	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1487	or	%g1, %g5, %g5
1488	membar	#Sync
1489	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1490	membar	#Sync
1491
1492	! Displace cache line from each set of E$ starting at the
1493	! aliased address. at set-size stride, wrapping at 2*ecache_size
1494	! and skipping load from physaddr. We need 10 loads to flush the
1495	! physaddr from E$.
1496	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1497	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1498	or	%o1, %g0, %g5		! starting aliased offset
14992:
1500	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
15011:
1502	add	%g5, %g2, %g5		! calculate offset in next set
1503	and	%g5, %g3, %g5		! force offset within aliased range
1504	cmp	%g5, %o5		! skip loads from physaddr
1505	be,pn %ncc, 1b
1506	  nop
1507	brgz,pt	%g4, 2b
1508	  dec	%g4
1509
1510	casxa	[%o0]ASI_MEM, %g0, %g0
1511
1512	! Flush %o0 from ecahe again.
1513	! Need single displacement flush at offset %o1 this time as
1514	! the E$ is already in direct map mode.
1515	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1516
1517	membar	#Sync
1518	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1519	membar	#Sync
1520#endif /* HUMMINGBIRD */
1521	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1522
1523	retl
1524	membar	#Sync			! move the data out of the load buffer
1525	SET_SIZE(scrubphys)
1526
1527#endif	/* lint */
1528
1529#if defined(lint)
1530
1531/*
1532 * clearphys - Pass in the aligned physical memory address that you want
1533 * to push out, as a 64 byte block of zeros, from the ecache zero-filled.
1534 * Since this routine does not bypass the ecache, it is possible that
1535 * it could generate a UE error while trying to clear the a bad line.
1536 * This routine clears and restores the error enable flag.
1537 * TBD - Hummingbird may need similar protection
1538 */
1539/* ARGSUSED */
1540void
1541clearphys(uint64_t paddr, int ecache_size, int ecache_linesize)
1542{
1543}
1544
1545#else	/* lint */
1546
1547	ENTRY(clearphys)
1548	or	%o2, %g0, %o3	! ecache linesize
1549	or	%o1, %g0, %o2	! ecache size
1550#ifndef HUMMINGBIRD
1551	or	%o3, %g0, %o4	! save ecache linesize
1552	xor	%o0, %o2, %o1	! calculate alias address
1553	add	%o2, %o2, %o3	! 2 * ecachesize
1554	sub	%o3, 1, %o3	! -1 == mask
1555	and	%o1, %o3, %o1	! and with xor'd address
1556	set	ecache_flushaddr, %o3
1557	ldx	[%o3], %o3
1558	or	%o4, %g0, %o2	! saved ecache linesize
1559
1560	rdpr	%pstate, %o4
1561	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1562	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1563
1564	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1565	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1566	membar	#Sync
1567
1568	! need to put zeros in the cache line before displacing it
1569
1570	sub	%o2, 8, %o2	! get offset of last double word in ecache line
15711:
1572	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1573	sub	%o2, 8, %o2
1574	brgez,a,pt %o2, 1b
1575	nop
1576	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1577	casxa	[%o0]ASI_MEM, %g0, %g0
1578	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1579
1580	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1581	membar	#Sync
1582
1583#else /* HUMMINGBIRD... */
1584	/*
1585	 * UltraSPARC-IIe processor supports both 4-way set associative
1586	 * and direct map E$. We need to reconfigure E$ to direct map
1587	 * mode for data load/store before displacement flush. Also, we
1588	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1589	 * has been flushed. Keep the interrupts disabled while flushing
1590	 * E$ in this manner.
1591	 *
1592	 * For flushing a specific physical address, we start at the
1593	 * aliased address and load at set-size stride, wrapping around
1594	 * at 2*ecache-size boundary and skipping fault physical address.
1595	 * It takes 10 loads to guarantee that the physical address has
1596	 * been flushed.
1597	 *
1598	 * Usage:
1599	 *	%o0	physaddr
1600	 *	%o5	physaddr - ecache_flushaddr
1601	 *	%g1	UPA config (restored later)
1602	 *	%g2	E$ set size
1603	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1604	 *	%g4	#loads to flush phys address
1605	 *	%g5	temp
1606	 */
1607
1608	or	%o3, %g0, %o4	! save ecache linesize
1609	sethi	%hi(ecache_associativity), %g5
1610	ld	[%g5 + %lo(ecache_associativity)], %g5
1611	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1612
1613	xor	%o0, %o2, %o1	! calculate alias address
1614	add	%o2, %o2, %g3	! 2 * ecachesize
1615	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1616	and	%o1, %g3, %o1	! and with xor'd address
1617	sethi	%hi(ecache_flushaddr), %o3
1618	ldx	[%o3 +%lo(ecache_flushaddr)], %o3
1619	or	%o4, %g0, %o2	! saved ecache linesize
1620
1621	rdpr	%pstate, %o4
1622	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1623	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1624
1625	! Place E$ in direct map mode for data access
1626	or	%g0, 1, %g5
1627	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1628	ldxa	[%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1629	or	%g1, %g5, %g5
1630	membar	#Sync
1631	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1632	membar	#Sync
1633
1634	! need to put zeros in the cache line before displacing it
1635
1636	sub	%o2, 8, %o2	! get offset of last double word in ecache line
16371:
1638	stxa	%g0, [%o0 + %o2]ASI_MEM	! put zeros in the ecache line
1639	sub	%o2, 8, %o2
1640	brgez,a,pt %o2, 1b
1641	nop
1642
1643	! Displace cache line from each set of E$ starting at the
1644	! aliased address. at set-size stride, wrapping at 2*ecache_size
1645	! and skipping load from physaddr. We need 10 loads to flush the
1646	! physaddr from E$.
1647	mov	HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1648	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
1649	or	%o1, %g0, %g5		! starting offset
16502:
1651	ldxa	[%g5 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
16523:
1653	add	%g5, %g2, %g5		! calculate offset in next set
1654	and	%g5, %g3, %g5		! force offset within aliased range
1655	cmp	%g5, %o5		! skip loads from physaddr
1656	be,pn %ncc, 3b
1657	  nop
1658	brgz,pt	%g4, 2b
1659	  dec	%g4
1660
1661	casxa	[%o0]ASI_MEM, %g0, %g0
1662
1663	! Flush %o0 from ecahe again.
1664	! Need single displacement flush at offset %o1 this time as
1665	! the E$ is already in direct map mode.
1666	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
1667
1668	membar	#Sync
1669	stxa	%g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1670	membar	#Sync
1671#endif /* HUMMINGBIRD... */
1672
1673	retl
1674	wrpr	%g0, %o4, %pstate	! restore earlier pstate register value
1675	SET_SIZE(clearphys)
1676
1677#endif	/* lint */
1678
1679#if defined(lint)
1680/* ARGSUSED */
1681void
1682flushecacheline(uint64_t paddr, int ecache_size)
1683{
1684}
1685
1686#else	/* lint */
1687/*
1688 * flushecacheline - This is a simpler version of scrubphys
1689 * which simply does a displacement flush of the line in
1690 * question. This routine is mainly used in handling async
1691 * errors where we want to get rid of a bad line in ecache.
1692 * Note that if the line is modified and it has suffered
1693 * data corruption - we are guarantee that the hw will write
1694 * a UE back to mark the page poisoned.
1695 */
1696        ENTRY(flushecacheline)
1697        or      %o1, %g0, %o2   ! put ecache size in %o2
1698#ifndef HUMMINGBIRD
1699        xor     %o0, %o2, %o1   ! calculate alias address
1700        add     %o2, %o2, %o3   ! 2 * ecachesize in case
1701                                ! addr == ecache_flushaddr
1702        sub     %o3, 1, %o3     ! -1 == mask
1703        and     %o1, %o3, %o1   ! and with xor'd address
1704        set     ecache_flushaddr, %o3
1705        ldx     [%o3], %o3
1706
1707        rdpr    %pstate, %o4
1708        andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1709        wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1710
1711	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1712	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1713	membar	#Sync
1714
1715        ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1716	membar	#Sync
1717	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1718        membar  #Sync
1719#else /* HUMMINGBIRD */
1720	/*
1721	 * UltraSPARC-IIe processor supports both 4-way set associative
1722	 * and direct map E$. We need to reconfigure E$ to direct map
1723	 * mode for data load/store before displacement flush. Also, we
1724	 * need to flush all 4 sets of the E$ to ensure that the physaddr
1725	 * has been flushed. Keep the interrupts disabled while flushing
1726	 * E$ in this manner.
1727	 *
1728	 * For flushing a specific physical address, we start at the
1729	 * aliased address and load at set-size stride, wrapping around
1730	 * at 2*ecache-size boundary and skipping fault physical address.
1731	 * It takes 10 loads to guarantee that the physical address has
1732	 * been flushed.
1733	 *
1734	 * Usage:
1735	 *	%o0	physaddr
1736	 *	%o5	physaddr - ecache_flushaddr
1737	 *	%g1	error enable register
1738	 *	%g2	E$ set size
1739	 *	%g3	E$ flush address range mask (i.e. 2 * E$ -1)
1740	 *	%g4	UPA config (restored later)
1741	 *	%g5	temp
1742	 */
1743
1744	sethi	%hi(ecache_associativity), %g5
1745	ld	[%g5 + %lo(ecache_associativity)], %g5
1746	udivx	%o2, %g5, %g2	! set size (i.e. ecache_size/#sets)
1747	xor	%o0, %o2, %o1	! calculate alias address
1748	add	%o2, %o2, %g3	! 2 * ecachesize in case
1749				! addr == ecache_flushaddr
1750	sub	%g3, 1, %g3	! 2 * ecachesize -1 == mask
1751	and	%o1, %g3, %o1	! and with xor'd address
1752	sethi	%hi(ecache_flushaddr), %o3
1753	ldx	[%o3 + %lo(ecache_flushaddr)], %o3
1754
1755	rdpr	%pstate, %o4
1756	andn	%o4, PSTATE_IE | PSTATE_AM, %o5
1757	wrpr	%o5, %g0, %pstate	! clear IE, AM bits
1758
1759	! Place E$ in direct map mode for data access
1760	or	%g0, 1, %g5
1761	sllx	%g5, HB_UPA_DMAP_DATA_BIT, %g5
1762	ldxa	[%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
1763	or	%g4, %g5, %g5
1764	membar	#Sync
1765	stxa	%g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1766	membar	#Sync
1767
1768	ldxa	[%g0]ASI_ESTATE_ERR, %g1
1769	stxa	%g0, [%g0]ASI_ESTATE_ERR	! disable errors
1770	membar	#Sync
1771
1772	! Displace cache line from each set of E$ starting at the
1773	! aliased address. at set-size stride, wrapping at 2*ecache_size
1774	! and skipping load from physaddr. We need 10 loads to flush the
1775	! physaddr from E$.
1776	mov	HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
1777	sub	%o0, %o3, %o5		! physaddr - ecache_flushaddr
17782:
1779	ldxa	[%o1 + %o3]ASI_MEM, %g0	! load ecache_flushaddr + alias
17803:
1781	add	%o1, %g2, %o1		! calculate offset in next set
1782	and	%o1, %g3, %o1		! force offset within aliased range
1783	cmp	%o1, %o5		! skip loads from physaddr
1784	be,pn %ncc, 3b
1785	  nop
1786	brgz,pt	%g5, 2b
1787	  dec	%g5
1788
1789	membar	#Sync
1790	stxa	%g1, [%g0]ASI_ESTATE_ERR	! restore error enable
1791        membar  #Sync
1792
1793	stxa	%g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1794	membar	#Sync
1795#endif /* HUMMINGBIRD */
1796        retl
1797        wrpr    %g0, %o4, %pstate
1798        SET_SIZE(flushecacheline)
1799
1800#endif	/* lint */
1801
1802#if defined(lint)
1803/* ARGSUSED */
1804void
1805ecache_scrubreq_tl1(uint64_t inum, uint64_t dummy)
1806{
1807}
1808
1809#else	/* lint */
1810/*
1811 * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
1812 * from the clock CPU.  It atomically increments the outstanding request
1813 * counter and, if there was not already an outstanding request,
1814 * branches to setsoftint_tl1 to enqueue an intr_req for the given inum.
1815 */
1816
1817	! Register usage:
1818	!
1819	! Arguments:
1820	! %g1 - inum
1821	!
1822	! Internal:
1823	! %g2, %g3, %g5 - scratch
1824	! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
1825	! %g6 - setsoftint_tl1 address
1826
1827	ENTRY_NP(ecache_scrubreq_tl1)
1828	set	SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
1829	GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
1830	ld	[%g4], %g2		! cpu's ec_scrub_outstanding.
1831	set	setsoftint_tl1, %g6
1832	!
1833	! no need to use atomic instructions for the following
1834	! increment - we're at tl1
1835	!
1836	add	%g2, 0x1, %g3
1837	brnz,pn	%g2, 1f			! no need to enqueue more intr_req
1838	  st	%g3, [%g4]		! delay - store incremented counter
1839	jmp	%g6			! setsoftint_tl1(%g1) - queue intr_req
1840	  nop
1841	! not reached
18421:
1843	retry
1844	SET_SIZE(ecache_scrubreq_tl1)
1845
1846#endif	/* lint */
1847
1848#if defined(lint)
1849/*ARGSUSED*/
1850void
1851write_ec_tag_parity(uint32_t id)
1852{}
1853#else /* lint */
1854
1855	/*
1856         * write_ec_tag_parity(), which zero's the ecache tag,
1857         * marks the state as invalid and writes good parity to the tag.
1858         * Input %o1= 32 bit E$ index
1859         */
1860        ENTRY(write_ec_tag_parity)
1861        or      %g0, 1, %o4
1862        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1863        or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write
1864
1865        rdpr    %pstate, %o5
1866        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1867        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1868
1869        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1870        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1871        membar  #Sync
1872
1873        ba      1f
1874         nop
1875	/*
1876         * Align on the ecache boundary in order to force
1877         * ciritical code section onto the same ecache line.
1878         */
1879         .align 64
1880
18811:
1882        set     S_EC_PARITY, %o3         	! clear tag, state invalid
1883        sllx    %o3, S_ECPAR_SHIFT, %o3   	! and with good tag parity
1884        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1885        stxa    %g0, [%o4]ASI_EC_W
1886        membar  #Sync
1887
1888        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1889        membar  #Sync
1890        retl
1891        wrpr    %g0, %o5, %pstate
1892        SET_SIZE(write_ec_tag_parity)
1893
1894#endif /* lint */
1895
1896#if defined(lint)
1897/*ARGSUSED*/
1898void
1899write_hb_ec_tag_parity(uint32_t id)
1900{}
1901#else /* lint */
1902
1903	/*
1904         * write_hb_ec_tag_parity(), which zero's the ecache tag,
1905         * marks the state as invalid and writes good parity to the tag.
1906         * Input %o1= 32 bit E$ index
1907         */
1908        ENTRY(write_hb_ec_tag_parity)
1909        or      %g0, 1, %o4
1910        sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1911        or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write
1912
1913        rdpr    %pstate, %o5
1914        andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1915        wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1916
1917        ldxa    [%g0]ASI_ESTATE_ERR, %g1
1918        stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1919        membar  #Sync
1920
1921        ba      1f
1922         nop
1923	/*
1924         * Align on the ecache boundary in order to force
1925         * ciritical code section onto the same ecache line.
1926         */
1927         .align 64
19281:
1929#ifdef HUMMINGBIRD
1930        set     HB_EC_PARITY, %o3         	! clear tag, state invalid
1931        sllx    %o3, HB_ECPAR_SHIFT, %o3   	! and with good tag parity
1932#else /* !HUMMINGBIRD */
1933        set     SB_EC_PARITY, %o3         	! clear tag, state invalid
1934        sllx    %o3, SB_ECPAR_SHIFT, %o3   	! and with good tag parity
1935#endif /* !HUMMINGBIRD */
1936
1937        stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1938        stxa    %g0, [%o4]ASI_EC_W
1939        membar  #Sync
1940
1941        stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1942        membar  #Sync
1943        retl
1944        wrpr    %g0, %o5, %pstate
1945        SET_SIZE(write_hb_ec_tag_parity)
1946
1947#endif /* lint */
1948
1949#define	VIS_BLOCKSIZE		64
1950
1951#if defined(lint)
1952
1953/*ARGSUSED*/
1954int
1955dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
1956{ return (0); }
1957
1958#else
1959
1960	ENTRY(dtrace_blksuword32)
1961	save	%sp, -SA(MINFRAME + 4), %sp
1962
1963	rdpr	%pstate, %l1
1964	andn	%l1, PSTATE_IE, %l2		! disable interrupts to
1965	wrpr	%g0, %l2, %pstate		! protect our FPU diddling
1966
1967	rd	%fprs, %l0
1968	andcc	%l0, FPRS_FEF, %g0
1969	bz,a,pt	%xcc, 1f			! if the fpu is disabled
1970	wr	%g0, FPRS_FEF, %fprs		! ... enable the fpu
1971
1972	st	%f0, [%fp + STACK_BIAS - 4]	! save %f0 to the stack
19731:
1974	set	0f, %l5
1975        /*
1976         * We're about to write a block full or either total garbage
1977         * (not kernel data, don't worry) or user floating-point data
1978         * (so it only _looks_ like garbage).
1979         */
1980	ld	[%i1], %f0			! modify the block
1981	membar	#Sync
1982	stn	%l5, [THREAD_REG + T_LOFAULT]	! set up the lofault handler
1983	stda	%d0, [%i0]ASI_BLK_COMMIT_S	! store the modified block
1984	membar	#Sync
1985	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
1986
1987	bz,a,pt	%xcc, 1f
1988	wr	%g0, %l0, %fprs			! restore %fprs
1989
1990	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
19911:
1992
1993	wrpr	%g0, %l1, %pstate		! restore interrupts
1994
1995	ret
1996	restore	%g0, %g0, %o0
1997
19980:
1999	membar	#Sync
2000	stn	%g0, [THREAD_REG + T_LOFAULT]	! remove the lofault handler
2001
2002	bz,a,pt	%xcc, 1f
2003	wr	%g0, %l0, %fprs			! restore %fprs
2004
2005	ld	[%fp + STACK_BIAS - 4], %f0	! restore %f0
20061:
2007
2008	wrpr	%g0, %l1, %pstate		! restore interrupts
2009
2010	/*
2011	 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2012	 * which deals with watchpoints. Otherwise, just return -1.
2013	 */
2014	brnz,pt	%i2, 1f
2015	nop
2016	ret
2017	restore	%g0, -1, %o0
20181:
2019	call	dtrace_blksuword32_err
2020	restore
2021
2022	SET_SIZE(dtrace_blksuword32)
2023
2024#endif /* lint */
2025