xref: /illumos-gate/usr/src/uts/sun4u/cpu/spitfire.c (revision 9dd828891378a0a6a509ab601b4c5c20ca5562ec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machparam.h>
32 #include <sys/machsystm.h>
33 #include <sys/cpu.h>
34 #include <sys/elf_SPARC.h>
35 #include <vm/hat_sfmmu.h>
36 #include <vm/page.h>
37 #include <sys/cpuvar.h>
38 #include <sys/spitregs.h>
39 #include <sys/async.h>
40 #include <sys/cmn_err.h>
41 #include <sys/debug.h>
42 #include <sys/dditypes.h>
43 #include <sys/sunddi.h>
44 #include <sys/cpu_module.h>
45 #include <sys/prom_debug.h>
46 #include <sys/vmsystm.h>
47 #include <sys/prom_plat.h>
48 #include <sys/sysmacros.h>
49 #include <sys/intreg.h>
50 #include <sys/machtrap.h>
51 #include <sys/ontrap.h>
52 #include <sys/ivintr.h>
53 #include <sys/atomic.h>
54 #include <sys/panic.h>
55 #include <sys/ndifm.h>
56 #include <sys/fm/protocol.h>
57 #include <sys/fm/util.h>
58 #include <sys/fm/cpu/UltraSPARC-II.h>
59 #include <sys/ddi.h>
60 #include <sys/ecc_kstat.h>
61 #include <sys/watchpoint.h>
62 #include <sys/dtrace.h>
63 #include <sys/errclassify.h>
64 
65 uint_t	cpu_impl_dual_pgsz = 0;
66 
67 /*
68  * Structure for the 8 byte ecache data dump and the associated AFSR state.
69  * There will be 8 of these structures used to dump an ecache line (64 bytes).
70  */
71 typedef struct sf_ec_data_elm {
72 	uint64_t ec_d8;
73 	uint64_t ec_afsr;
74 } ec_data_t;
75 
76 /*
77  * Define spitfire (Ultra I/II) specific asynchronous error structure
78  */
79 typedef struct spitfire_async_flt {
80 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
81 	ushort_t flt_type;		/* types of faults - cpu specific */
82 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
83 	uint64_t flt_ec_tag;		/* E$ tag info */
84 	int flt_ec_lcnt;		/* number of bad E$ lines */
85 	ushort_t flt_sdbh;		/* UDBH reg */
86 	ushort_t flt_sdbl;		/* UDBL reg */
87 } spitf_async_flt;
88 
89 /*
90  * Prototypes for support routines in spitfire_asm.s:
91  */
92 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
93 extern uint64_t get_lsu(void);
94 extern void set_lsu(uint64_t ncc);
95 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
96 				uint64_t *oafsr, uint64_t *acc_afsr);
97 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
98 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
99 				uint64_t *acc_afsr);
100 extern uint64_t read_and_clear_afsr();
101 extern void write_ec_tag_parity(uint32_t id);
102 extern void write_hb_ec_tag_parity(uint32_t id);
103 
104 /*
105  * Spitfire module routines:
106  */
107 static void cpu_async_log_err(void *flt);
108 /*PRINTFLIKE6*/
109 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
110     uint_t logflags, const char *endstr, const char *fmt, ...);
111 
112 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
113 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
114 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
115 
116 static void log_ce_err(struct async_flt *aflt, char *unum);
117 static void log_ue_err(struct async_flt *aflt, char *unum);
118 static void check_misc_err(spitf_async_flt *spf_flt);
119 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
120 static int check_ecc(struct async_flt *aflt);
121 static uint_t get_cpu_status(uint64_t arg);
122 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
123 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
124 		int *m, uint64_t *afsr);
125 static void ecache_kstat_init(struct cpu *cp);
126 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
127 		uint64_t paddr, int mpb, uint64_t);
128 static uint64_t ecache_scrub_misc_err(int, uint64_t);
129 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
130 static void ecache_page_retire(void *);
131 static int ecc_kstat_update(kstat_t *ksp, int rw);
132 static int ce_count_unum(int status, int len, char *unum);
133 static void add_leaky_bucket_timeout(void);
134 static int synd_to_synd_code(int synd_status, ushort_t synd);
135 
136 extern uint_t read_all_memscrub;
137 extern void memscrub_run(void);
138 
139 static uchar_t	isus2i;			/* set if sabre */
140 static uchar_t	isus2e;			/* set if hummingbird */
141 
142 /*
143  * Default ecache mask and shift settings for Spitfire.  If we detect a
144  * different CPU implementation, we will modify these values at boot time.
145  */
146 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
147 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
148 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
149 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
150 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
151 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
152 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
153 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
154 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
155 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
156 
157 /*
158  * Default ecache state bits for Spitfire.  These individual bits indicate if
159  * the given line is in any of the valid or modified states, respectively.
160  * Again, we modify these at boot if we detect a different CPU.
161  */
162 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
163 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
164 static uchar_t cpu_ec_parity		= S_EC_PARITY;
165 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
166 
167 /*
168  * This table is used to determine which bit(s) is(are) bad when an ECC
169  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
170  * of this array have the following semantics:
171  *
172  *      00-63   The number of the bad bit, when only one bit is bad.
173  *      64      ECC bit C0 is bad.
174  *      65      ECC bit C1 is bad.
175  *      66      ECC bit C2 is bad.
176  *      67      ECC bit C3 is bad.
177  *      68      ECC bit C4 is bad.
178  *      69      ECC bit C5 is bad.
179  *      70      ECC bit C6 is bad.
180  *      71      ECC bit C7 is bad.
181  *      72      Two bits are bad.
182  *      73      Three bits are bad.
183  *      74      Four bits are bad.
184  *      75      More than Four bits are bad.
185  *      76      NO bits are bad.
186  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
187  */
188 
189 #define	C0	64
190 #define	C1	65
191 #define	C2	66
192 #define	C3	67
193 #define	C4	68
194 #define	C5	69
195 #define	C6	70
196 #define	C7	71
197 #define	M2	72
198 #define	M3	73
199 #define	M4	74
200 #define	MX	75
201 #define	NA	76
202 
203 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
204 						    (synd_code < C0))
205 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
206 						    (synd_code <= C7))
207 
208 static char ecc_syndrome_tab[] =
209 {
210 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
211 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
212 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
213 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
214 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
215 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
216 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
217 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
218 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
219 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
220 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
221 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
222 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
223 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
224 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
225 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
226 };
227 
228 #define	SYND_TBL_SIZE 256
229 
230 /*
231  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
232  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
233  */
234 #define	UDBL_REG	0x8000
235 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
236 #define	SYND(synd)	(synd & 0x7FFF)
237 
238 /*
239  * These error types are specific to Spitfire and are used internally for the
240  * spitfire fault structure flt_type field.
241  */
242 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
243 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
244 #define	CPU_WP_ERR		2	/* WP parity error */
245 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
246 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
247 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
248 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
249 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
250 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
251 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
252 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
253 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
254 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
255 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
256 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
257 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
258 
259 /*
260  * Macro to access the "Spitfire cpu private" data structure.
261  */
262 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
263 
264 /*
265  * set to 0 to disable automatic retiring of pages on
266  * DIMMs that have excessive soft errors
267  */
268 int automatic_page_removal = 1;
269 
270 /*
271  * Heuristic for figuring out which module to replace.
272  * Relative likelihood that this P_SYND indicates that this module is bad.
273  * We call it a "score", though, not a relative likelihood.
274  *
275  * Step 1.
276  * Assign a score to each byte of P_SYND according to the following rules:
277  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
278  * If one bit on, give it a 95.
279  * If seven bits on, give it a 10.
280  * If two bits on:
281  *   in different nybbles, a 90
282  *   in same nybble, but unaligned, 85
283  *   in same nybble and as an aligned pair, 80
284  * If six bits on, look at the bits that are off:
285  *   in same nybble and as an aligned pair, 15
286  *   in same nybble, but unaligned, 20
287  *   in different nybbles, a 25
288  * If three bits on:
289  *   in diferent nybbles, no aligned pairs, 75
290  *   in diferent nybbles, one aligned pair, 70
291  *   in the same nybble, 65
292  * If five bits on, look at the bits that are off:
293  *   in the same nybble, 30
294  *   in diferent nybbles, one aligned pair, 35
295  *   in diferent nybbles, no aligned pairs, 40
296  * If four bits on:
297  *   all in one nybble, 45
298  *   as two aligned pairs, 50
299  *   one aligned pair, 55
300  *   no aligned pairs, 60
301  *
302  * Step 2:
303  * Take the higher of the two scores (one for each byte) as the score
304  * for the module.
305  *
306  * Print the score for each module, and field service should replace the
307  * module with the highest score.
308  */
309 
310 /*
311  * In the table below, the first row/column comment indicates the
312  * number of bits on in that nybble; the second row/column comment is
313  * the hex digit.
314  */
315 
316 static int
317 p_synd_score_table[256] = {
318 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
319 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
320 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
321 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
322 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
324 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
325 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
326 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
328 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
329 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
330 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
332 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
333 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
334 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
336 };
337 
338 int
339 ecc_psynd_score(ushort_t p_synd)
340 {
341 	int i, j, a, b;
342 
343 	i = p_synd & 0xFF;
344 	j = (p_synd >> 8) & 0xFF;
345 
346 	a = p_synd_score_table[i];
347 	b = p_synd_score_table[j];
348 
349 	return (a > b ? a : b);
350 }
351 
352 /*
353  * Async Fault Logging
354  *
355  * To ease identifying, reading, and filtering async fault log messages, the
356  * label [AFT#] is now prepended to each async fault message.  These messages
357  * and the logging rules are implemented by cpu_aflt_log(), below.
358  *
359  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
360  *          This includes both corrected ECC memory and ecache faults.
361  *
362  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
363  *          else except CE errors) with a priority of 1 (highest).  This tag
364  *          is also used for panic messages that result from an async fault.
365  *
366  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
367  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
368  *          of the E-$ data and tags.
369  *
370  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
371  * printed on the console.  To send all AFT logs to both the log and the
372  * console, set aft_verbose = 1.
373  */
374 
375 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
376 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
377 #define	CPU_ERRID		0x0004	/* print flt_id */
378 #define	CPU_TL			0x0008	/* print flt_tl */
379 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
380 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
381 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
382 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
383 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
384 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
385 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
386 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
387 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
388 
389 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
390 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
391 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
392 				CPU_FAULTPC)
393 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
394 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
395 				~CPU_SPACE)
396 #define	PARERR_LFLAGS	(CMN_LFLAGS)
397 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
398 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
399 				~CPU_FLTCPU & ~CPU_FAULTPC)
400 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
401 #define	NO_LFLAGS	(0)
402 
403 #define	AFSR_FMTSTR0	"\020\1ME"
404 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
405 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
406 #define	UDB_FMTSTR	"\020\012UE\011CE"
407 
408 /*
409  * Save the cache bootup state for use when internal
410  * caches are to be re-enabled after an error occurs.
411  */
412 uint64_t	cache_boot_state = 0;
413 
414 /*
415  * PA[31:0] represent Displacement in UPA configuration space.
416  */
417 uint_t	root_phys_addr_lo_mask = 0xffffffff;
418 
419 /*
420  * Spitfire legacy globals
421  */
422 int	itlb_entries;
423 int	dtlb_entries;
424 
425 void
426 cpu_setup(void)
427 {
428 	extern int page_retire_messages;
429 	extern int page_retire_first_ue;
430 	extern int at_flags;
431 #if defined(SF_ERRATA_57)
432 	extern caddr_t errata57_limit;
433 #endif
434 	extern int disable_text_largepages;
435 	extern int disable_initdata_largepages;
436 
437 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
438 
439 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
440 
441 	/*
442 	 * Spitfire isn't currently FMA-aware, so we have to enable the
443 	 * page retirement messages. We also change the default policy
444 	 * for UE retirement to allow clearing of transient errors.
445 	 */
446 	page_retire_messages = 1;
447 	page_retire_first_ue = 0;
448 
449 	/*
450 	 * save the cache bootup state.
451 	 */
452 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
453 
454 	if (use_page_coloring) {
455 		do_pg_coloring = 1;
456 		if (use_virtual_coloring)
457 			do_virtual_coloring = 1;
458 	}
459 
460 	/*
461 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
462 	 */
463 	pp_slots = MIN(8, MAXPP_SLOTS);
464 
465 	/*
466 	 * Block stores invalidate all pages of the d$ so pagecopy
467 	 * et. al. do not need virtual translations with virtual
468 	 * coloring taken into consideration.
469 	 */
470 	pp_consistent_coloring = 0;
471 
472 	isa_list =
473 	    "sparcv9+vis sparcv9 "
474 	    "sparcv8plus+vis sparcv8plus "
475 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
476 
477 	cpu_hwcap_flags = AV_SPARC_VIS;
478 
479 	/*
480 	 * On Spitfire, there's a hole in the address space
481 	 * that we must never map (the hardware only support 44-bits of
482 	 * virtual address).  Later CPUs are expected to have wider
483 	 * supported address ranges.
484 	 *
485 	 * See address map on p23 of the UltraSPARC 1 user's manual.
486 	 */
487 	hole_start = (caddr_t)0x80000000000ull;
488 	hole_end = (caddr_t)0xfffff80000000000ull;
489 
490 	/*
491 	 * A spitfire call bug requires us to be a further 4Gbytes of
492 	 * firewall from the spec.
493 	 *
494 	 * See Spitfire Errata #21
495 	 */
496 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
497 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
498 
499 	/*
500 	 * The kpm mapping window.
501 	 * kpm_size:
502 	 *	The size of a single kpm range.
503 	 *	The overall size will be: kpm_size * vac_colors.
504 	 * kpm_vbase:
505 	 *	The virtual start address of the kpm range within the kernel
506 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
507 	 */
508 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
509 	kpm_size_shift = 41;
510 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
511 
512 #if defined(SF_ERRATA_57)
513 	errata57_limit = (caddr_t)0x80000000ul;
514 #endif
515 
516 	/*
517 	 * Allow only 8K, 64K and 4M pages for text by default.
518 	 * Allow only 8K and 64K page for initialized data segments by
519 	 * default.
520 	 */
521 	disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
522 	    (1 << TTE256M);
523 	disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
524 	    (1 << TTE32M) | (1 << TTE256M);
525 }
526 
527 static int
528 getintprop(pnode_t node, char *name, int deflt)
529 {
530 	int	value;
531 
532 	switch (prom_getproplen(node, name)) {
533 	case 0:
534 		value = 1;	/* boolean properties */
535 		break;
536 
537 	case sizeof (int):
538 		(void) prom_getprop(node, name, (caddr_t)&value);
539 		break;
540 
541 	default:
542 		value = deflt;
543 		break;
544 	}
545 
546 	return (value);
547 }
548 
549 /*
550  * Set the magic constants of the implementation.
551  */
552 void
553 cpu_fiximp(pnode_t dnode)
554 {
555 	extern int vac_size, vac_shift;
556 	extern uint_t vac_mask;
557 	extern int dcache_line_mask;
558 	int i, a;
559 	static struct {
560 		char	*name;
561 		int	*var;
562 	} prop[] = {
563 		"dcache-size",		&dcache_size,
564 		"dcache-line-size",	&dcache_linesize,
565 		"icache-size",		&icache_size,
566 		"icache-line-size",	&icache_linesize,
567 		"ecache-size",		&ecache_size,
568 		"ecache-line-size",	&ecache_alignsize,
569 		"ecache-associativity", &ecache_associativity,
570 		"#itlb-entries",	&itlb_entries,
571 		"#dtlb-entries",	&dtlb_entries,
572 		};
573 
574 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
575 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
576 			*prop[i].var = a;
577 		}
578 	}
579 
580 	ecache_setsize = ecache_size / ecache_associativity;
581 
582 	vac_size = S_VAC_SIZE;
583 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
584 	i = 0; a = vac_size;
585 	while (a >>= 1)
586 		++i;
587 	vac_shift = i;
588 	shm_alignment = vac_size;
589 	vac = 1;
590 
591 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
592 
593 	/*
594 	 * UltraSPARC I & II have ecache sizes running
595 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
596 	 * and 8 MB. Adjust the copyin/copyout limits
597 	 * according to the cache size. The magic number
598 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
599 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
600 	 * VIS instructions.
601 	 *
602 	 * We assume that all CPUs on the system have the same size
603 	 * ecache. We're also called very early in the game.
604 	 * /etc/system will be parsed *after* we're called so
605 	 * these values can be overwritten.
606 	 */
607 
608 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
609 	if (ecache_size <= 524288) {
610 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
611 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
612 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
613 	} else if (ecache_size == 1048576) {
614 		hw_copy_limit_2 = 1024;
615 		hw_copy_limit_4 = 1280;
616 		hw_copy_limit_8 = 1536;
617 	} else if (ecache_size == 2097152) {
618 		hw_copy_limit_2 = 1536;
619 		hw_copy_limit_4 = 2048;
620 		hw_copy_limit_8 = 2560;
621 	} else if (ecache_size == 4194304) {
622 		hw_copy_limit_2 = 2048;
623 		hw_copy_limit_4 = 2560;
624 		hw_copy_limit_8 = 3072;
625 	} else {
626 		hw_copy_limit_2 = 2560;
627 		hw_copy_limit_4 = 3072;
628 		hw_copy_limit_8 = 3584;
629 	}
630 }
631 
632 /*
633  * Called by setcpudelay
634  */
635 void
636 cpu_init_tick_freq(void)
637 {
638 	/*
639 	 * Determine the cpu frequency by calling
640 	 * tod_get_cpufrequency. Use an approximate freqency
641 	 * value computed by the prom if the tod module
642 	 * is not initialized and loaded yet.
643 	 */
644 	if (tod_ops.tod_get_cpufrequency != NULL) {
645 		mutex_enter(&tod_lock);
646 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
647 		mutex_exit(&tod_lock);
648 	} else {
649 #if defined(HUMMINGBIRD)
650 		/*
651 		 * the hummingbird version of %stick is used as the basis for
652 		 * low level timing; this provides an independent constant-rate
653 		 * clock for general system use, and frees power mgmt to set
654 		 * various cpu clock speeds.
655 		 */
656 		if (system_clock_freq == 0)
657 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
658 			    system_clock_freq);
659 		sys_tick_freq = system_clock_freq;
660 #else /* SPITFIRE */
661 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
662 #endif
663 	}
664 }
665 
666 
667 void shipit(int upaid);
668 extern uint64_t xc_tick_limit;
669 extern uint64_t xc_tick_jump_limit;
670 
671 #ifdef SEND_MONDO_STATS
672 uint64_t x_early[NCPU][64];
673 #endif
674 
675 /*
676  * Note: A version of this function is used by the debugger via the KDI,
677  * and must be kept in sync with this version.  Any changes made to this
678  * function to support new chips or to accomodate errata must also be included
679  * in the KDI-specific version.  See spitfire_kdi.c.
680  */
681 void
682 send_one_mondo(int cpuid)
683 {
684 	uint64_t idsr, starttick, endtick;
685 	int upaid, busy, nack;
686 	uint64_t tick, tick_prev;
687 	ulong_t ticks;
688 
689 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
690 	upaid = CPUID_TO_UPAID(cpuid);
691 	tick = starttick = gettick();
692 	shipit(upaid);
693 	endtick = starttick + xc_tick_limit;
694 	busy = nack = 0;
695 	for (;;) {
696 		idsr = getidsr();
697 		if (idsr == 0)
698 			break;
699 		/*
700 		 * When we detect an irregular tick jump, we adjust
701 		 * the timer window to the current tick value.
702 		 */
703 		tick_prev = tick;
704 		tick = gettick();
705 		ticks = tick - tick_prev;
706 		if (ticks > xc_tick_jump_limit) {
707 			endtick = tick + xc_tick_limit;
708 		} else if (tick > endtick) {
709 			if (panic_quiesce)
710 				return;
711 			cmn_err(CE_PANIC,
712 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
713 			upaid, nack, busy);
714 		}
715 		if (idsr & IDSR_BUSY) {
716 			busy++;
717 			continue;
718 		}
719 		drv_usecwait(1);
720 		shipit(upaid);
721 		nack++;
722 		busy = 0;
723 	}
724 #ifdef SEND_MONDO_STATS
725 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
726 #endif
727 }
728 
729 void
730 send_mondo_set(cpuset_t set)
731 {
732 	int i;
733 
734 	for (i = 0; i < NCPU; i++)
735 		if (CPU_IN_SET(set, i)) {
736 			send_one_mondo(i);
737 			CPUSET_DEL(set, i);
738 			if (CPUSET_ISNULL(set))
739 				break;
740 		}
741 }
742 
743 void
744 syncfpu(void)
745 {
746 }
747 
748 /*
749  * Determine the size of the CPU module's error structure in bytes.  This is
750  * called once during boot to initialize the error queues.
751  */
752 int
753 cpu_aflt_size(void)
754 {
755 	/*
756 	 * We need to determine whether this is a sabre, Hummingbird or a
757 	 * Spitfire/Blackbird impl and set the appropriate state variables for
758 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
759 	 * too early in the boot flow and the cpunodes are not initialized.
760 	 * This routine will be called once after cpunodes[] is ready, so do
761 	 * it here.
762 	 */
763 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
764 		isus2i = 1;
765 		cpu_ec_tag_mask = SB_ECTAG_MASK;
766 		cpu_ec_state_mask = SB_ECSTATE_MASK;
767 		cpu_ec_par_mask = SB_ECPAR_MASK;
768 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
769 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
770 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
771 		cpu_ec_state_exl = SB_ECSTATE_EXL;
772 		cpu_ec_state_mod = SB_ECSTATE_MOD;
773 
774 		/* These states do not exist in sabre - set to 0xFF */
775 		cpu_ec_state_shr = 0xFF;
776 		cpu_ec_state_own = 0xFF;
777 
778 		cpu_ec_state_valid = SB_ECSTATE_VALID;
779 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
780 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
781 		cpu_ec_parity = SB_EC_PARITY;
782 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
783 		isus2e = 1;
784 		cpu_ec_tag_mask = HB_ECTAG_MASK;
785 		cpu_ec_state_mask = HB_ECSTATE_MASK;
786 		cpu_ec_par_mask = HB_ECPAR_MASK;
787 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
788 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
789 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
790 		cpu_ec_state_exl = HB_ECSTATE_EXL;
791 		cpu_ec_state_mod = HB_ECSTATE_MOD;
792 
793 		/* These states do not exist in hummingbird - set to 0xFF */
794 		cpu_ec_state_shr = 0xFF;
795 		cpu_ec_state_own = 0xFF;
796 
797 		cpu_ec_state_valid = HB_ECSTATE_VALID;
798 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
799 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
800 		cpu_ec_parity = HB_EC_PARITY;
801 	}
802 
803 	return (sizeof (spitf_async_flt));
804 }
805 
806 
807 /*
808  * Correctable ecc error trap handler
809  */
810 /*ARGSUSED*/
811 void
812 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
813 	uint_t p_afsr_high, uint_t p_afar_high)
814 {
815 	ushort_t sdbh, sdbl;
816 	ushort_t e_syndh, e_syndl;
817 	spitf_async_flt spf_flt;
818 	struct async_flt *ecc;
819 	int queue = 1;
820 
821 	uint64_t t_afar = p_afar;
822 	uint64_t t_afsr = p_afsr;
823 
824 	/*
825 	 * Note: the Spitfire data buffer error registers
826 	 * (upper and lower halves) are or'ed into the upper
827 	 * word of the afsr by ce_err().
828 	 */
829 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
830 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
831 
832 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
833 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
834 
835 	t_afsr &= S_AFSR_MASK;
836 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
837 
838 	/* Setup the async fault structure */
839 	bzero(&spf_flt, sizeof (spitf_async_flt));
840 	ecc = (struct async_flt *)&spf_flt;
841 	ecc->flt_id = gethrtime_waitfree();
842 	ecc->flt_stat = t_afsr;
843 	ecc->flt_addr = t_afar;
844 	ecc->flt_status = ECC_C_TRAP;
845 	ecc->flt_bus_id = getprocessorid();
846 	ecc->flt_inst = CPU->cpu_id;
847 	ecc->flt_pc = (caddr_t)rp->r_pc;
848 	ecc->flt_func = log_ce_err;
849 	ecc->flt_in_memory =
850 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
851 	spf_flt.flt_sdbh = sdbh;
852 	spf_flt.flt_sdbl = sdbl;
853 
854 	/*
855 	 * Check for fatal conditions.
856 	 */
857 	check_misc_err(&spf_flt);
858 
859 	/*
860 	 * Pananoid checks for valid AFSR and UDBs
861 	 */
862 	if ((t_afsr & P_AFSR_CE) == 0) {
863 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
864 			"** Panic due to CE bit not set in the AFSR",
865 			"  Corrected Memory Error on");
866 	}
867 
868 	/*
869 	 * We want to skip logging only if ALL the following
870 	 * conditions are true:
871 	 *
872 	 *	1. There is only one error
873 	 *	2. That error is a correctable memory error
874 	 *	3. The error is caused by the memory scrubber (in which case
875 	 *	    the error will have occurred under on_trap protection)
876 	 *	4. The error is on a retired page
877 	 *
878 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
879 	 * However, none of those errors should occur on a retired page.
880 	 */
881 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
882 	    curthread->t_ontrap != NULL) {
883 
884 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
885 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
886 				queue = 0;
887 			}
888 		}
889 	}
890 
891 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
892 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
893 			"** Panic due to CE bits not set in the UDBs",
894 			" Corrected Memory Error on");
895 	}
896 
897 	if ((sdbh >> 8) & 1) {
898 		ecc->flt_synd = e_syndh;
899 		ce_scrub(ecc);
900 		if (queue) {
901 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
902 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
903 		}
904 	}
905 
906 	if ((sdbl >> 8) & 1) {
907 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
908 		ecc->flt_synd = e_syndl | UDBL_REG;
909 		ce_scrub(ecc);
910 		if (queue) {
911 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
912 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
913 		}
914 	}
915 
916 	/*
917 	 * Re-enable all error trapping (CEEN currently cleared).
918 	 */
919 	clr_datapath();
920 	set_asyncflt(P_AFSR_CE);
921 	set_error_enable(EER_ENABLE);
922 }
923 
924 /*
925  * Cpu specific CE logging routine
926  */
927 static void
928 log_ce_err(struct async_flt *aflt, char *unum)
929 {
930 	spitf_async_flt spf_flt;
931 
932 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
933 		return;
934 	}
935 
936 	spf_flt.cmn_asyncflt = *aflt;
937 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
938 	    " Corrected Memory Error detected by");
939 }
940 
941 /*
942  * Spitfire does not perform any further CE classification refinement
943  */
944 /*ARGSUSED*/
945 int
946 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
947     size_t afltoffset)
948 {
949 	return (0);
950 }
951 
952 char *
953 flt_to_error_type(struct async_flt *aflt)
954 {
955 	if (aflt->flt_status & ECC_INTERMITTENT)
956 		return (ERR_TYPE_DESC_INTERMITTENT);
957 	if (aflt->flt_status & ECC_PERSISTENT)
958 		return (ERR_TYPE_DESC_PERSISTENT);
959 	if (aflt->flt_status & ECC_STICKY)
960 		return (ERR_TYPE_DESC_STICKY);
961 	return (ERR_TYPE_DESC_UNKNOWN);
962 }
963 
964 /*
965  * Called by correctable ecc error logging code to print out
966  * the stick/persistent/intermittent status of the error.
967  */
968 static void
969 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
970 {
971 	ushort_t status;
972 	char *status1_str = "Memory";
973 	char *status2_str = "Intermittent";
974 	struct async_flt *aflt = (struct async_flt *)spf_flt;
975 
976 	status = aflt->flt_status;
977 
978 	if (status & ECC_ECACHE)
979 		status1_str = "Ecache";
980 
981 	if (status & ECC_STICKY)
982 		status2_str = "Sticky";
983 	else if (status & ECC_PERSISTENT)
984 		status2_str = "Persistent";
985 
986 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
987 		NULL, " Corrected %s Error on %s is %s",
988 		status1_str, unum, status2_str);
989 }
990 
991 /*
992  * check for a valid ce syndrome, then call the
993  * displacement flush scrubbing code, and then check the afsr to see if
994  * the error was persistent or intermittent. Reread the afar/afsr to see
995  * if the error was not scrubbed successfully, and is therefore sticky.
996  */
997 /*ARGSUSED1*/
998 void
999 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
1000 {
1001 	uint64_t eer, afsr;
1002 	ushort_t status;
1003 
1004 	ASSERT(getpil() > LOCK_LEVEL);
1005 
1006 	/*
1007 	 * It is possible that the flt_addr is not a valid
1008 	 * physical address. To deal with this, we disable
1009 	 * NCEEN while we scrub that address. If this causes
1010 	 * a TIMEOUT/BERR, we know this is an invalid
1011 	 * memory location.
1012 	 */
1013 	kpreempt_disable();
1014 	eer = get_error_enable();
1015 	if (eer & (EER_CEEN | EER_NCEEN))
1016 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1017 
1018 	/*
1019 	 * To check if the error detected by IO is persistent, sticky or
1020 	 * intermittent.
1021 	 */
1022 	if (ecc->flt_status & ECC_IOBUS) {
1023 		ecc->flt_stat = P_AFSR_CE;
1024 	}
1025 
1026 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1027 	    cpunodes[CPU->cpu_id].ecache_size);
1028 
1029 	get_asyncflt(&afsr);
1030 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1031 		/*
1032 		 * Must ensure that we don't get the TIMEOUT/BERR
1033 		 * when we reenable NCEEN, so we clear the AFSR.
1034 		 */
1035 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1036 		if (eer & (EER_CEEN | EER_NCEEN))
1037 		    set_error_enable(eer);
1038 		kpreempt_enable();
1039 		return;
1040 	}
1041 
1042 	if (eer & EER_NCEEN)
1043 	    set_error_enable(eer & ~EER_CEEN);
1044 
1045 	/*
1046 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1047 	 * not trip over the error, mark it intermittent.  If the scrub did
1048 	 * trip the error again and it did not scrub away, mark it sticky.
1049 	 * Otherwise mark it persistent.
1050 	 */
1051 	if (check_ecc(ecc) != 0) {
1052 		cpu_read_paddr(ecc, 0, 1);
1053 
1054 		if (check_ecc(ecc) != 0)
1055 			status = ECC_STICKY;
1056 		else
1057 			status = ECC_PERSISTENT;
1058 	} else
1059 		status = ECC_INTERMITTENT;
1060 
1061 	if (eer & (EER_CEEN | EER_NCEEN))
1062 	    set_error_enable(eer);
1063 	kpreempt_enable();
1064 
1065 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1066 	ecc->flt_status |= status;
1067 }
1068 
1069 /*
1070  * get the syndrome and unum, and then call the routines
1071  * to check the other cpus and iobuses, and then do the error logging.
1072  */
1073 /*ARGSUSED1*/
1074 void
1075 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1076 {
1077 	char unum[UNUM_NAMLEN];
1078 	int len = 0;
1079 	int ce_verbose = 0;
1080 	int err;
1081 
1082 	ASSERT(ecc->flt_func != NULL);
1083 
1084 	/* Get the unum string for logging purposes */
1085 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1086 	    UNUM_NAMLEN, &len);
1087 
1088 	/* Call specific error logging routine */
1089 	(void) (*ecc->flt_func)(ecc, unum);
1090 
1091 	/*
1092 	 * Count errors per unum.
1093 	 * Non-memory errors are all counted via a special unum string.
1094 	 */
1095 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1096 	    automatic_page_removal) {
1097 		(void) page_retire(ecc->flt_addr, err);
1098 	}
1099 
1100 	if (ecc->flt_panic) {
1101 		ce_verbose = 1;
1102 	} else if ((ecc->flt_class == BUS_FAULT) ||
1103 	    (ecc->flt_stat & P_AFSR_CE)) {
1104 		ce_verbose = (ce_verbose_memory > 0);
1105 	} else {
1106 		ce_verbose = 1;
1107 	}
1108 
1109 	if (ce_verbose) {
1110 		spitf_async_flt sflt;
1111 		int synd_code;
1112 
1113 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1114 
1115 		cpu_ce_log_status(&sflt, unum);
1116 
1117 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1118 				SYND(ecc->flt_synd));
1119 
1120 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1121 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1122 			    NULL, " ECC Data Bit %2d was in error "
1123 			    "and corrected", synd_code);
1124 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1125 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1126 			    NULL, " ECC Check Bit %2d was in error "
1127 			    "and corrected", synd_code - C0);
1128 		} else {
1129 			/*
1130 			 * These are UE errors - we shouldn't be getting CE
1131 			 * traps for these; handle them in case of bad h/w.
1132 			 */
1133 			switch (synd_code) {
1134 			case M2:
1135 				cpu_aflt_log(CE_CONT, 0, &sflt,
1136 				    CPU_ERRID_FIRST, NULL,
1137 				    " Two ECC Bits were in error");
1138 				break;
1139 			case M3:
1140 				cpu_aflt_log(CE_CONT, 0, &sflt,
1141 				    CPU_ERRID_FIRST, NULL,
1142 				    " Three ECC Bits were in error");
1143 				break;
1144 			case M4:
1145 				cpu_aflt_log(CE_CONT, 0, &sflt,
1146 				    CPU_ERRID_FIRST, NULL,
1147 				    " Four ECC Bits were in error");
1148 				break;
1149 			case MX:
1150 				cpu_aflt_log(CE_CONT, 0, &sflt,
1151 				    CPU_ERRID_FIRST, NULL,
1152 				    " More than Four ECC bits were "
1153 				    "in error");
1154 				break;
1155 			default:
1156 				cpu_aflt_log(CE_CONT, 0, &sflt,
1157 				    CPU_ERRID_FIRST, NULL,
1158 				    " Unknown fault syndrome %d",
1159 				    synd_code);
1160 				break;
1161 			}
1162 		}
1163 	}
1164 
1165 	/* Display entire cache line, if valid address */
1166 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1167 		read_ecc_data(ecc, 1, 1);
1168 }
1169 
1170 /*
1171  * We route all errors through a single switch statement.
1172  */
1173 void
1174 cpu_ue_log_err(struct async_flt *aflt)
1175 {
1176 
1177 	switch (aflt->flt_class) {
1178 	case CPU_FAULT:
1179 		cpu_async_log_err(aflt);
1180 		break;
1181 
1182 	case BUS_FAULT:
1183 		bus_async_log_err(aflt);
1184 		break;
1185 
1186 	default:
1187 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1188 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1189 		break;
1190 	}
1191 }
1192 
1193 /* Values for action variable in cpu_async_error() */
1194 #define	ACTION_NONE		0
1195 #define	ACTION_TRAMPOLINE	1
1196 #define	ACTION_AST_FLAGS	2
1197 
1198 /*
1199  * Access error trap handler for asynchronous cpu errors.  This routine is
1200  * called to handle a data or instruction access error.  All fatal errors are
1201  * completely handled by this routine (by panicking).  Non fatal error logging
1202  * is queued for later processing either via AST or softint at a lower PIL.
1203  * In case of panic, the error log queue will also be processed as part of the
1204  * panic flow to ensure all errors are logged.  This routine is called with all
1205  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1206  * error bits are also cleared.  The hardware has also disabled the I and
1207  * D-caches for us, so we must re-enable them before returning.
1208  *
1209  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1210  *
1211  *		_______________________________________________________________
1212  *		|        Privileged tl0		|         Unprivileged	      |
1213  *		| Protected	| Unprotected	| Protected	| Unprotected |
1214  *		|on_trap|lofault|		|		|	      |
1215  * -------------|-------|-------+---------------+---------------+-------------|
1216  *		|	|	|		|		|	      |
1217  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1218  *		|	|	|		|		|	      |
1219  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1220  *		|	|	|		|		|	      |
1221  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1222  *		|	|	|		|		|	      |
1223  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1224  * ____________________________________________________________________________
1225  *
1226  *
1227  * Action codes:
1228  *
1229  * L - log
1230  * M - kick off memscrubber if flt_in_memory
1231  * P - panic
1232  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1233  * R - i)  if aft_panic is set, panic
1234  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1235  * S - send SIGBUS to process
1236  * T - trampoline
1237  *
1238  * Special cases:
1239  *
1240  * 1) if aft_testfatal is set, all faults result in a panic regardless
1241  *    of type (even WP), protection (even on_trap), or privilege.
1242  */
1243 /*ARGSUSED*/
1244 void
1245 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1246 	uint_t p_afsr_high, uint_t p_afar_high)
1247 {
1248 	ushort_t sdbh, sdbl, ttype, tl;
1249 	spitf_async_flt spf_flt;
1250 	struct async_flt *aflt;
1251 	char pr_reason[28];
1252 	uint64_t oafsr;
1253 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1254 	int action = ACTION_NONE;
1255 	uint64_t t_afar = p_afar;
1256 	uint64_t t_afsr = p_afsr;
1257 	int expected = DDI_FM_ERR_UNEXPECTED;
1258 	ddi_acc_hdl_t *hp;
1259 
1260 	/*
1261 	 * We need to look at p_flag to determine if the thread detected an
1262 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1263 	 * because we just need a consistent snapshot and we know that everyone
1264 	 * else will store a consistent set of bits while holding p_lock.  We
1265 	 * don't have to worry about a race because SDOCORE is set once prior
1266 	 * to doing i/o from the process's address space and is never cleared.
1267 	 */
1268 	uint_t pflag = ttoproc(curthread)->p_flag;
1269 
1270 	pr_reason[0] = '\0';
1271 
1272 	/*
1273 	 * Note: the Spitfire data buffer error registers
1274 	 * (upper and lower halves) are or'ed into the upper
1275 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1276 	 */
1277 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1278 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1279 
1280 	/*
1281 	 * Grab the ttype encoded in <63:53> of the saved
1282 	 * afsr passed from async_err()
1283 	 */
1284 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1285 	tl = (ushort_t)(t_afsr >> 62);
1286 
1287 	t_afsr &= S_AFSR_MASK;
1288 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1289 
1290 	/*
1291 	 * Initialize most of the common and CPU-specific structure.  We derive
1292 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1293 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1294 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1295 	 * tuneable aft_testfatal is set (not the default).
1296 	 */
1297 	bzero(&spf_flt, sizeof (spitf_async_flt));
1298 	aflt = (struct async_flt *)&spf_flt;
1299 	aflt->flt_id = gethrtime_waitfree();
1300 	aflt->flt_stat = t_afsr;
1301 	aflt->flt_addr = t_afar;
1302 	aflt->flt_bus_id = getprocessorid();
1303 	aflt->flt_inst = CPU->cpu_id;
1304 	aflt->flt_pc = (caddr_t)rp->r_pc;
1305 	aflt->flt_prot = AFLT_PROT_NONE;
1306 	aflt->flt_class = CPU_FAULT;
1307 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1308 	aflt->flt_tl = (uchar_t)tl;
1309 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1310 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1311 
1312 	/*
1313 	 * Set flt_status based on the trap type.  If we end up here as the
1314 	 * result of a UE detected by the CE handling code, leave status 0.
1315 	 */
1316 	switch (ttype) {
1317 	case T_DATA_ERROR:
1318 		aflt->flt_status = ECC_D_TRAP;
1319 		break;
1320 	case T_INSTR_ERROR:
1321 		aflt->flt_status = ECC_I_TRAP;
1322 		break;
1323 	}
1324 
1325 	spf_flt.flt_sdbh = sdbh;
1326 	spf_flt.flt_sdbl = sdbl;
1327 
1328 	/*
1329 	 * Check for fatal async errors.
1330 	 */
1331 	check_misc_err(&spf_flt);
1332 
1333 	/*
1334 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1335 	 * see if we were executing in the kernel under on_trap() or t_lofault
1336 	 * protection.  If so, modify the saved registers so that we return
1337 	 * from the trap to the appropriate trampoline routine.
1338 	 */
1339 	if (aflt->flt_priv && tl == 0) {
1340 		if (curthread->t_ontrap != NULL) {
1341 			on_trap_data_t *otp = curthread->t_ontrap;
1342 
1343 			if (otp->ot_prot & OT_DATA_EC) {
1344 				aflt->flt_prot = AFLT_PROT_EC;
1345 				otp->ot_trap |= OT_DATA_EC;
1346 				rp->r_pc = otp->ot_trampoline;
1347 				rp->r_npc = rp->r_pc + 4;
1348 				action = ACTION_TRAMPOLINE;
1349 			}
1350 
1351 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1352 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1353 				aflt->flt_prot = AFLT_PROT_ACCESS;
1354 				otp->ot_trap |= OT_DATA_ACCESS;
1355 				rp->r_pc = otp->ot_trampoline;
1356 				rp->r_npc = rp->r_pc + 4;
1357 				action = ACTION_TRAMPOLINE;
1358 				/*
1359 				 * for peeks and caut_gets errors are expected
1360 				 */
1361 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1362 				if (!hp)
1363 					expected = DDI_FM_ERR_PEEK;
1364 				else if (hp->ah_acc.devacc_attr_access ==
1365 				    DDI_CAUTIOUS_ACC)
1366 					expected = DDI_FM_ERR_EXPECTED;
1367 			}
1368 
1369 		} else if (curthread->t_lofault) {
1370 			aflt->flt_prot = AFLT_PROT_COPY;
1371 			rp->r_g1 = EFAULT;
1372 			rp->r_pc = curthread->t_lofault;
1373 			rp->r_npc = rp->r_pc + 4;
1374 			action = ACTION_TRAMPOLINE;
1375 		}
1376 	}
1377 
1378 	/*
1379 	 * Determine if this error needs to be treated as fatal.  Note that
1380 	 * multiple errors detected upon entry to this trap handler does not
1381 	 * necessarily warrant a panic.  We only want to panic if the trap
1382 	 * happened in privileged mode and not under t_ontrap or t_lofault
1383 	 * protection.  The exception is WP: if we *only* get WP, it is not
1384 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1385 	 *
1386 	 * aft_panic, if set, effectively makes us treat usermode
1387 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1388 	 * panic instead of sending a contract event.  A lofault-protected
1389 	 * fault will normally follow the contract event; if aft_panic is
1390 	 * set this will be changed to a panic.
1391 	 *
1392 	 * For usermode BERR/BTO errors, eg from processes performing device
1393 	 * control through mapped device memory, we need only deliver
1394 	 * a SIGBUS to the offending process.
1395 	 *
1396 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1397 	 * checked later; for now we implement the common reasons.
1398 	 */
1399 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1400 		/*
1401 		 * Beware - multiple bits may be set in AFSR
1402 		 */
1403 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1404 			if (aflt->flt_priv || aft_panic)
1405 				aflt->flt_panic = 1;
1406 		}
1407 
1408 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1409 			if (aflt->flt_priv)
1410 				aflt->flt_panic = 1;
1411 		}
1412 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1413 		aflt->flt_panic = 1;
1414 	}
1415 
1416 	/*
1417 	 * UE/BERR/TO: Call our bus nexus friends to check for
1418 	 * IO errors that may have resulted in this trap.
1419 	 */
1420 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1421 		cpu_run_bus_error_handlers(aflt, expected);
1422 	}
1423 
1424 	/*
1425 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1426 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1427 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1428 	 * caches may introduce more parity errors (especially when the module
1429 	 * is bad) and in sabre there is no guarantee that such errors
1430 	 * (if introduced) are written back as poisoned data.
1431 	 */
1432 	if (t_afsr & P_AFSR_UE) {
1433 		int i;
1434 
1435 		(void) strcat(pr_reason, "UE ");
1436 
1437 		spf_flt.flt_type = CPU_UE_ERR;
1438 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1439 			MMU_PAGESHIFT)) ? 1: 0;
1440 
1441 		/*
1442 		 * With UE, we have the PA of the fault.
1443 		 * Let do a diagnostic read to get the ecache
1444 		 * data and tag info of the bad line for logging.
1445 		 */
1446 		if (aflt->flt_in_memory) {
1447 			uint32_t ec_set_size;
1448 			uchar_t state;
1449 			uint32_t ecache_idx;
1450 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1451 
1452 			/* touch the line to put it in ecache */
1453 			acc_afsr |= read_and_clear_afsr();
1454 			(void) lddphys(faultpa);
1455 			acc_afsr |= (read_and_clear_afsr() &
1456 				    ~(P_AFSR_EDP | P_AFSR_UE));
1457 
1458 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1459 			    ecache_associativity;
1460 
1461 			for (i = 0; i < ecache_associativity; i++) {
1462 				ecache_idx = i * ec_set_size +
1463 				    (aflt->flt_addr % ec_set_size);
1464 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1465 					(uint64_t *)&spf_flt.flt_ec_data[0],
1466 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1467 				acc_afsr |= oafsr;
1468 
1469 				state = (uchar_t)((spf_flt.flt_ec_tag &
1470 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1471 
1472 				if ((state & cpu_ec_state_valid) &&
1473 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1474 				    ((uint64_t)aflt->flt_addr >>
1475 				    cpu_ec_tag_shift)))
1476 					break;
1477 			}
1478 
1479 			/*
1480 			 * Check to see if the ecache tag is valid for the
1481 			 * fault PA. In the very unlikely event where the
1482 			 * line could be victimized, no ecache info will be
1483 			 * available. If this is the case, capture the line
1484 			 * from memory instead.
1485 			 */
1486 			if ((state & cpu_ec_state_valid) == 0 ||
1487 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1488 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1489 				for (i = 0; i < 8; i++, faultpa += 8) {
1490 				    ec_data_t *ecdptr;
1491 
1492 					ecdptr = &spf_flt.flt_ec_data[i];
1493 					acc_afsr |= read_and_clear_afsr();
1494 					ecdptr->ec_d8 = lddphys(faultpa);
1495 					acc_afsr |= (read_and_clear_afsr() &
1496 						    ~(P_AFSR_EDP | P_AFSR_UE));
1497 					ecdptr->ec_afsr = 0;
1498 							/* null afsr value */
1499 				}
1500 
1501 				/*
1502 				 * Mark tag invalid to indicate mem dump
1503 				 * when we print out the info.
1504 				 */
1505 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1506 			}
1507 			spf_flt.flt_ec_lcnt = 1;
1508 
1509 			/*
1510 			 * Flush out the bad line
1511 			 */
1512 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1513 				cpunodes[CPU->cpu_id].ecache_size);
1514 
1515 			acc_afsr |= clear_errors(NULL, NULL);
1516 		}
1517 
1518 		/*
1519 		 * Ask our bus nexus friends if they have any fatal errors. If
1520 		 * so, they will log appropriate error messages and panic as a
1521 		 * result. We then queue an event for each UDB that reports a
1522 		 * UE. Each UE reported in a UDB will have its own log message.
1523 		 *
1524 		 * Note from kbn: In the case where there are multiple UEs
1525 		 * (ME bit is set) - the AFAR address is only accurate to
1526 		 * the 16-byte granularity. One cannot tell whether the AFAR
1527 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1528 		 * always report the AFAR address to be 16-byte aligned.
1529 		 *
1530 		 * If we're on a Sabre, there is no SDBL, but it will always
1531 		 * read as zero, so the sdbl test below will safely fail.
1532 		 */
1533 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1534 			aflt->flt_panic = 1;
1535 
1536 		if (sdbh & P_DER_UE) {
1537 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1538 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1539 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1540 			    aflt->flt_panic);
1541 		}
1542 		if (sdbl & P_DER_UE) {
1543 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1544 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1545 			if (!(aflt->flt_stat & P_AFSR_ME))
1546 				aflt->flt_addr |= 0x8;
1547 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1548 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1549 			    aflt->flt_panic);
1550 		}
1551 
1552 		/*
1553 		 * We got a UE and are panicking, save the fault PA in a known
1554 		 * location so that the platform specific panic code can check
1555 		 * for copyback errors.
1556 		 */
1557 		if (aflt->flt_panic && aflt->flt_in_memory) {
1558 			panic_aflt = *aflt;
1559 		}
1560 	}
1561 
1562 	/*
1563 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1564 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1565 	 */
1566 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1567 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1568 
1569 		if (t_afsr & P_AFSR_EDP)
1570 			(void) strcat(pr_reason, "EDP ");
1571 
1572 		if (t_afsr & P_AFSR_LDP)
1573 			(void) strcat(pr_reason, "LDP ");
1574 
1575 		/*
1576 		 * Here we have no PA to work with.
1577 		 * Scan each line in the ecache to look for
1578 		 * the one with bad parity.
1579 		 */
1580 		aflt->flt_addr = AFLT_INV_ADDR;
1581 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1582 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1583 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1584 
1585 		/*
1586 		 * If we found a bad PA, update the state to indicate if it is
1587 		 * memory or I/O space.  This code will be important if we ever
1588 		 * support cacheable frame buffers.
1589 		 */
1590 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1591 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1592 				MMU_PAGESHIFT)) ? 1 : 0;
1593 		}
1594 
1595 		if (isus2i || isus2e)
1596 			aflt->flt_panic = 1;
1597 
1598 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1599 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1600 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1601 		    aflt->flt_panic);
1602 	}
1603 
1604 	/*
1605 	 * Timeout and bus error handling.  There are two cases to consider:
1606 	 *
1607 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1608 	 * have already modified the saved registers so that we will return
1609 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1610 	 *
1611 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1612 	 * a SIGBUS.  We do not log the occurence - processes performing
1613 	 * device control would generate lots of uninteresting messages.
1614 	 */
1615 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1616 		if (t_afsr & P_AFSR_TO)
1617 			(void) strcat(pr_reason, "BTO ");
1618 
1619 		if (t_afsr & P_AFSR_BERR)
1620 			(void) strcat(pr_reason, "BERR ");
1621 
1622 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1623 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1624 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1625 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1626 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1627 			    aflt->flt_panic);
1628 		}
1629 	}
1630 
1631 	/*
1632 	 * Handle WP: WP happens when the ecache is victimized and a parity
1633 	 * error was detected on a writeback.  The data in question will be
1634 	 * poisoned as a UE will be written back.  The PA is not logged and
1635 	 * it is possible that it doesn't belong to the trapped thread.  The
1636 	 * WP trap is not fatal, but it could be fatal to someone that
1637 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1638 	 * to force the memscrubber to read all of memory when it awakens.
1639 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1640 	 * UE back to poison the data.
1641 	 */
1642 	if (t_afsr & P_AFSR_WP) {
1643 		(void) strcat(pr_reason, "WP ");
1644 		if (isus2i || isus2e) {
1645 			aflt->flt_panic = 1;
1646 		} else {
1647 			read_all_memscrub = 1;
1648 		}
1649 		spf_flt.flt_type = CPU_WP_ERR;
1650 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1651 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1652 		    aflt->flt_panic);
1653 	}
1654 
1655 	/*
1656 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1657 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1658 	 * This is fatal.
1659 	 */
1660 
1661 	if (t_afsr & P_AFSR_CP) {
1662 		if (isus2i || isus2e) {
1663 			(void) strcat(pr_reason, "CP ");
1664 			aflt->flt_panic = 1;
1665 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1666 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1667 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1668 			    aflt->flt_panic);
1669 		} else {
1670 			/*
1671 			 * Orphan CP: Happens due to signal integrity problem
1672 			 * on a CPU, where a CP is reported, without reporting
1673 			 * its associated UE. This is handled by locating the
1674 			 * bad parity line and would kick off the memscrubber
1675 			 * to find the UE if in memory or in another's cache.
1676 			 */
1677 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1678 			(void) strcat(pr_reason, "ORPHAN_CP ");
1679 
1680 			/*
1681 			 * Here we have no PA to work with.
1682 			 * Scan each line in the ecache to look for
1683 			 * the one with bad parity.
1684 			 */
1685 			aflt->flt_addr = AFLT_INV_ADDR;
1686 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1687 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1688 				&oafsr);
1689 			acc_afsr |= oafsr;
1690 
1691 			/*
1692 			 * If we found a bad PA, update the state to indicate
1693 			 * if it is memory or I/O space.
1694 			 */
1695 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1696 				aflt->flt_in_memory =
1697 					(pf_is_memory(aflt->flt_addr >>
1698 						MMU_PAGESHIFT)) ? 1 : 0;
1699 			}
1700 			read_all_memscrub = 1;
1701 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1702 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1703 			    aflt->flt_panic);
1704 
1705 		}
1706 	}
1707 
1708 	/*
1709 	 * If we queued an error other than WP or CP and we are going to return
1710 	 * from the trap and the error was in user mode or inside of a
1711 	 * copy routine, set AST flag so the queue will be drained before
1712 	 * returning to user mode.
1713 	 *
1714 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1715 	 * and send an event to its process contract.
1716 	 *
1717 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1718 	 * will have been no error queued in this case.
1719 	 */
1720 	if ((t_afsr &
1721 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1722 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1723 			int pcb_flag = 0;
1724 
1725 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1726 				pcb_flag |= ASYNC_HWERR;
1727 
1728 			if (t_afsr & P_AFSR_BERR)
1729 				pcb_flag |= ASYNC_BERR;
1730 
1731 			if (t_afsr & P_AFSR_TO)
1732 				pcb_flag |= ASYNC_BTO;
1733 
1734 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1735 			aston(curthread);
1736 			action = ACTION_AST_FLAGS;
1737 	}
1738 
1739 	/*
1740 	 * In response to a deferred error, we must do one of three things:
1741 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1742 	 * set in cases (1) and (2) - check that either action is set or
1743 	 * (3) is true.
1744 	 *
1745 	 * On II, the WP writes poisoned data back to memory, which will
1746 	 * cause a UE and a panic or reboot when read.  In this case, we
1747 	 * don't need to panic at this time.  On IIi and IIe,
1748 	 * aflt->flt_panic is already set above.
1749 	 */
1750 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1751 	    (t_afsr & P_AFSR_WP));
1752 
1753 	/*
1754 	 * Make a final sanity check to make sure we did not get any more async
1755 	 * errors and accumulate the afsr.
1756 	 */
1757 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1758 	    cpunodes[CPU->cpu_id].ecache_linesize);
1759 	(void) clear_errors(&spf_flt, NULL);
1760 
1761 	/*
1762 	 * Take care of a special case: If there is a UE in the ecache flush
1763 	 * area, we'll see it in flush_ecache().  This will trigger the
1764 	 * CPU_ADDITIONAL_ERRORS case below.
1765 	 *
1766 	 * This could occur if the original error was a UE in the flush area,
1767 	 * or if the original error was an E$ error that was flushed out of
1768 	 * the E$ in scan_ecache().
1769 	 *
1770 	 * If it's at the same address that we're already logging, then it's
1771 	 * probably one of these cases.  Clear the bit so we don't trip over
1772 	 * it on the additional errors case, which could cause an unnecessary
1773 	 * panic.
1774 	 */
1775 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1776 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1777 	else
1778 		acc_afsr |= aflt->flt_stat;
1779 
1780 	/*
1781 	 * Check the acumulated afsr for the important bits.
1782 	 * Make sure the spf_flt.flt_type value is set, and
1783 	 * enque an error.
1784 	 */
1785 	if (acc_afsr &
1786 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1787 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1788 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1789 		    P_AFSR_ISAP))
1790 			aflt->flt_panic = 1;
1791 
1792 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1793 		aflt->flt_stat = acc_afsr;
1794 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1795 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1796 		    aflt->flt_panic);
1797 	}
1798 
1799 	/*
1800 	 * If aflt->flt_panic is set at this point, we need to panic as the
1801 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1802 	 * We've already enqueued the error in one of the if-clauses above,
1803 	 * and it will be dequeued and logged as part of the panic flow.
1804 	 */
1805 	if (aflt->flt_panic) {
1806 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1807 		    "See previous message(s) for details", " %sError(s)",
1808 		    pr_reason);
1809 	}
1810 
1811 	/*
1812 	 * Before returning, we must re-enable errors, and
1813 	 * reset the caches to their boot-up state.
1814 	 */
1815 	set_lsu(get_lsu() | cache_boot_state);
1816 	set_error_enable(EER_ENABLE);
1817 }
1818 
1819 /*
1820  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1821  * This routine is shared by the CE and UE handling code.
1822  */
1823 static void
1824 check_misc_err(spitf_async_flt *spf_flt)
1825 {
1826 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1827 	char *fatal_str = NULL;
1828 
1829 	/*
1830 	 * The ISAP and ETP errors are supposed to cause a POR
1831 	 * from the system, so in theory we never, ever see these messages.
1832 	 * ISAP, ETP and IVUE are considered to be fatal.
1833 	 */
1834 	if (aflt->flt_stat & P_AFSR_ISAP)
1835 		fatal_str = " System Address Parity Error on";
1836 	else if (aflt->flt_stat & P_AFSR_ETP)
1837 		fatal_str = " Ecache Tag Parity Error on";
1838 	else if (aflt->flt_stat & P_AFSR_IVUE)
1839 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1840 	if (fatal_str != NULL) {
1841 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1842 			NULL, fatal_str);
1843 	}
1844 }
1845 
1846 /*
1847  * Routine to convert a syndrome into a syndrome code.
1848  */
1849 static int
1850 synd_to_synd_code(int synd_status, ushort_t synd)
1851 {
1852 	if (synd_status != AFLT_STAT_VALID)
1853 		return (-1);
1854 
1855 	/*
1856 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1857 	 * to get the code indicating which bit(s) is(are) bad.
1858 	 */
1859 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1860 		return (-1);
1861 	else
1862 		return (ecc_syndrome_tab[synd]);
1863 }
1864 
1865 /* ARGSUSED */
1866 int
1867 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1868 {
1869 	return (ENOTSUP);
1870 }
1871 
1872 /* ARGSUSED */
1873 int
1874 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1875 {
1876 	return (ENOTSUP);
1877 }
1878 
1879 /* ARGSUSED */
1880 int
1881 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1882 {
1883 	return (ENOTSUP);
1884 }
1885 
1886 /*
1887  * Routine to return a string identifying the physical name
1888  * associated with a memory/cache error.
1889  */
1890 /* ARGSUSED */
1891 int
1892 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1893     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1894     char *buf, int buflen, int *lenp)
1895 {
1896 	short synd_code;
1897 	int ret;
1898 
1899 	if (flt_in_memory) {
1900 		synd_code = synd_to_synd_code(synd_status, synd);
1901 		if (synd_code == -1) {
1902 			ret = EINVAL;
1903 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1904 		    buf, buflen, lenp) != 0) {
1905 			ret = EIO;
1906 		} else if (*lenp <= 1) {
1907 			ret = EINVAL;
1908 		} else {
1909 			ret = 0;
1910 		}
1911 	} else {
1912 		ret = ENOTSUP;
1913 	}
1914 
1915 	if (ret != 0) {
1916 		buf[0] = '\0';
1917 		*lenp = 0;
1918 	}
1919 
1920 	return (ret);
1921 }
1922 
1923 /*
1924  * Wrapper for cpu_get_mem_unum() routine that takes an
1925  * async_flt struct rather than explicit arguments.
1926  */
1927 int
1928 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1929     char *buf, int buflen, int *lenp)
1930 {
1931 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1932 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1933 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1934 }
1935 
1936 /*
1937  * This routine is a more generic interface to cpu_get_mem_unum(),
1938  * that may be used by other modules (e.g. mm).
1939  */
1940 int
1941 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1942 		char *buf, int buflen, int *lenp)
1943 {
1944 	int synd_status, flt_in_memory, ret;
1945 	char unum[UNUM_NAMLEN];
1946 
1947 	/*
1948 	 * Check for an invalid address.
1949 	 */
1950 	if (afar == (uint64_t)-1)
1951 		return (ENXIO);
1952 
1953 	if (synd == (uint64_t)-1)
1954 		synd_status = AFLT_STAT_INVALID;
1955 	else
1956 		synd_status = AFLT_STAT_VALID;
1957 
1958 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1959 
1960 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1961 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1962 	    != 0)
1963 		return (ret);
1964 
1965 	if (*lenp >= buflen)
1966 		return (ENAMETOOLONG);
1967 
1968 	(void) strncpy(buf, unum, buflen);
1969 
1970 	return (0);
1971 }
1972 
1973 /*
1974  * Routine to return memory information associated
1975  * with a physical address and syndrome.
1976  */
1977 /* ARGSUSED */
1978 int
1979 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1980     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1981     int *segsp, int *banksp, int *mcidp)
1982 {
1983 	return (ENOTSUP);
1984 }
1985 
1986 /*
1987  * Routine to return a string identifying the physical
1988  * name associated with a cpuid.
1989  */
1990 /* ARGSUSED */
1991 int
1992 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1993 {
1994 	return (ENOTSUP);
1995 }
1996 
1997 /*
1998  * This routine returns the size of the kernel's FRU name buffer.
1999  */
2000 size_t
2001 cpu_get_name_bufsize()
2002 {
2003 	return (UNUM_NAMLEN);
2004 }
2005 
2006 /*
2007  * Cpu specific log func for UEs.
2008  */
2009 static void
2010 log_ue_err(struct async_flt *aflt, char *unum)
2011 {
2012 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2013 	int len = 0;
2014 
2015 #ifdef DEBUG
2016 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2017 
2018 	/*
2019 	 * Paranoid Check for priv mismatch
2020 	 * Only applicable for UEs
2021 	 */
2022 	if (afsr_priv != aflt->flt_priv) {
2023 		/*
2024 		 * The priv bits in %tstate and %afsr did not match; we expect
2025 		 * this to be very rare, so flag it with a message.
2026 		 */
2027 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2028 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2029 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2030 
2031 		/* update saved afsr to reflect the correct priv */
2032 		aflt->flt_stat &= ~P_AFSR_PRIV;
2033 		if (aflt->flt_priv)
2034 			aflt->flt_stat |= P_AFSR_PRIV;
2035 	}
2036 #endif /* DEBUG */
2037 
2038 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2039 	    UNUM_NAMLEN, &len);
2040 
2041 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2042 	    " Uncorrectable Memory Error on");
2043 
2044 	if (SYND(aflt->flt_synd) == 0x3) {
2045 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2046 		    " Syndrome 0x3 indicates that this may not be a "
2047 		    "memory module problem");
2048 	}
2049 
2050 	if (aflt->flt_in_memory)
2051 		cpu_log_ecmem_info(spf_flt);
2052 }
2053 
2054 
2055 /*
2056  * The cpu_async_log_err() function is called via the ue_drain() function to
2057  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2058  * from softint context, from AST processing in the trap() flow, or from the
2059  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2060  */
2061 static void
2062 cpu_async_log_err(void *flt)
2063 {
2064 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2065 	struct async_flt *aflt = (struct async_flt *)flt;
2066 	char unum[UNUM_NAMLEN];
2067 	char *space;
2068 	char *ecache_scrub_logstr = NULL;
2069 
2070 	switch (spf_flt->flt_type) {
2071 	    case CPU_UE_ERR:
2072 		/*
2073 		 * We want to skip logging only if ALL the following
2074 		 * conditions are true:
2075 		 *
2076 		 *	1. We are not panicking
2077 		 *	2. There is only one error
2078 		 *	3. That error is a memory error
2079 		 *	4. The error is caused by the memory scrubber (in
2080 		 *	   which case the error will have occurred under
2081 		 *	   on_trap protection)
2082 		 *	5. The error is on a retired page
2083 		 *
2084 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2085 		 * scrubber.  However, none of those errors should occur
2086 		 * on a retired page.
2087 		 *
2088 		 * Note 2: In the CE case, these errors are discarded before
2089 		 * the errorq.  In the UE case, we must wait until now --
2090 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2091 		 */
2092 		if (!panicstr &&
2093 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2094 		    aflt->flt_prot == AFLT_PROT_EC) {
2095 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2096 				/* Zero the address to clear the error */
2097 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2098 				return;
2099 			}
2100 		}
2101 
2102 		/*
2103 		 * Log the UE and check for causes of this UE error that
2104 		 * don't cause a trap (Copyback error).  cpu_async_error()
2105 		 * has already checked the i/o buses for us.
2106 		 */
2107 		log_ue_err(aflt, unum);
2108 		if (aflt->flt_in_memory)
2109 			cpu_check_allcpus(aflt);
2110 		break;
2111 
2112 	    case CPU_EDP_LDP_ERR:
2113 		if (aflt->flt_stat & P_AFSR_EDP)
2114 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2115 			    NULL, " EDP event on");
2116 
2117 		if (aflt->flt_stat & P_AFSR_LDP)
2118 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2119 			    NULL, " LDP event on");
2120 
2121 		/* Log ecache info if exist */
2122 		if (spf_flt->flt_ec_lcnt > 0) {
2123 			cpu_log_ecmem_info(spf_flt);
2124 
2125 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2126 			    NULL, " AFAR was derived from E$Tag");
2127 		} else {
2128 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2129 			    NULL, " No error found in ecache (No fault "
2130 			    "PA available)");
2131 		}
2132 		break;
2133 
2134 	    case CPU_WP_ERR:
2135 		/*
2136 		 * If the memscrub thread hasn't yet read
2137 		 * all of memory, as we requested in the
2138 		 * trap handler, then give it a kick to
2139 		 * make sure it does.
2140 		 */
2141 		if (!isus2i && !isus2e && read_all_memscrub)
2142 			memscrub_run();
2143 
2144 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2145 		    " WP event on");
2146 		return;
2147 
2148 	    case CPU_BTO_BERR_ERR:
2149 		/*
2150 		 * A bus timeout or error occurred that was in user mode or not
2151 		 * in a protected kernel code region.
2152 		 */
2153 		if (aflt->flt_stat & P_AFSR_BERR) {
2154 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2155 			    spf_flt, BERRTO_LFLAGS, NULL,
2156 			    " Bus Error on System Bus in %s mode from",
2157 			    aflt->flt_priv ? "privileged" : "user");
2158 		}
2159 
2160 		if (aflt->flt_stat & P_AFSR_TO) {
2161 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2162 			    spf_flt, BERRTO_LFLAGS, NULL,
2163 			    " Timeout on System Bus in %s mode from",
2164 			    aflt->flt_priv ? "privileged" : "user");
2165 		}
2166 
2167 		return;
2168 
2169 	    case CPU_PANIC_CP_ERR:
2170 		/*
2171 		 * Process the Copyback (CP) error info (if any) obtained from
2172 		 * polling all the cpus in the panic flow. This case is only
2173 		 * entered if we are panicking.
2174 		 */
2175 		ASSERT(panicstr != NULL);
2176 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2177 
2178 		/* See which space - this info may not exist */
2179 		if (panic_aflt.flt_status & ECC_D_TRAP)
2180 			space = "Data ";
2181 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2182 			space = "Instruction ";
2183 		else
2184 			space = "";
2185 
2186 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2187 		    " AFAR was derived from UE report,"
2188 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2189 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2190 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2191 
2192 		if (spf_flt->flt_ec_lcnt > 0)
2193 			cpu_log_ecmem_info(spf_flt);
2194 		else
2195 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2196 			    NULL, " No cache dump available");
2197 
2198 		return;
2199 
2200 	    case CPU_TRAPPING_CP_ERR:
2201 		/*
2202 		 * For sabre only.  This is a copyback ecache parity error due
2203 		 * to a PCI DMA read.  We should be panicking if we get here.
2204 		 */
2205 		ASSERT(panicstr != NULL);
2206 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2207 		    " AFAR was derived from UE report,"
2208 		    " CP event on CPU%d (caused Data access error "
2209 		    "on PCIBus)", aflt->flt_inst);
2210 		return;
2211 
2212 		/*
2213 		 * We log the ecache lines of the following states,
2214 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2215 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2216 		 * in addition to logging if ecache_scrub_panic is set.
2217 		 */
2218 	    case CPU_BADLINE_CI_ERR:
2219 		ecache_scrub_logstr = "CBI";
2220 		/* FALLTHRU */
2221 
2222 	    case CPU_BADLINE_CB_ERR:
2223 		if (ecache_scrub_logstr == NULL)
2224 			ecache_scrub_logstr = "CBB";
2225 		/* FALLTHRU */
2226 
2227 	    case CPU_BADLINE_DI_ERR:
2228 		if (ecache_scrub_logstr == NULL)
2229 			ecache_scrub_logstr = "DBI";
2230 		/* FALLTHRU */
2231 
2232 	    case CPU_BADLINE_DB_ERR:
2233 		if (ecache_scrub_logstr == NULL)
2234 			ecache_scrub_logstr = "DBB";
2235 
2236 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2237 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2238 			" %s event on", ecache_scrub_logstr);
2239 		cpu_log_ecmem_info(spf_flt);
2240 
2241 		return;
2242 
2243 	    case CPU_ORPHAN_CP_ERR:
2244 		/*
2245 		 * Orphan CPs, where the CP bit is set, but when a CPU
2246 		 * doesn't report a UE.
2247 		 */
2248 		if (read_all_memscrub)
2249 			memscrub_run();
2250 
2251 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2252 			NULL, " Orphan CP event on");
2253 
2254 		/* Log ecache info if exist */
2255 		if (spf_flt->flt_ec_lcnt > 0)
2256 			cpu_log_ecmem_info(spf_flt);
2257 		else
2258 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2259 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2260 				" No error found in ecache (No fault "
2261 				"PA available");
2262 		return;
2263 
2264 	    case CPU_ECACHE_ADDR_PAR_ERR:
2265 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2266 				" E$ Tag Address Parity error on");
2267 		cpu_log_ecmem_info(spf_flt);
2268 		return;
2269 
2270 	    case CPU_ECACHE_STATE_ERR:
2271 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2272 				" E$ Tag State Parity error on");
2273 		cpu_log_ecmem_info(spf_flt);
2274 		return;
2275 
2276 	    case CPU_ECACHE_TAG_ERR:
2277 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2278 				" E$ Tag scrub event on");
2279 		cpu_log_ecmem_info(spf_flt);
2280 		return;
2281 
2282 	    case CPU_ECACHE_ETP_ETS_ERR:
2283 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2284 				" AFSR.ETP is set and AFSR.ETS is zero on");
2285 		cpu_log_ecmem_info(spf_flt);
2286 		return;
2287 
2288 
2289 	    case CPU_ADDITIONAL_ERR:
2290 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2291 		    " Additional errors detected during error processing on");
2292 		return;
2293 
2294 	    default:
2295 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2296 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2297 		return;
2298 	}
2299 
2300 	/* ... fall through from the UE, EDP, or LDP cases */
2301 
2302 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2303 		if (!panicstr) {
2304 			(void) page_retire(aflt->flt_addr, PR_UE);
2305 		} else {
2306 			/*
2307 			 * Clear UEs on panic so that we don't
2308 			 * get haunted by them during panic or
2309 			 * after reboot
2310 			 */
2311 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2312 			    cpunodes[CPU->cpu_id].ecache_size,
2313 			    cpunodes[CPU->cpu_id].ecache_linesize);
2314 
2315 			(void) clear_errors(NULL, NULL);
2316 		}
2317 	}
2318 
2319 	/*
2320 	 * Log final recover message
2321 	 */
2322 	if (!panicstr) {
2323 		if (!aflt->flt_priv) {
2324 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2325 			    NULL, " Above Error is in User Mode"
2326 			    "\n    and is fatal: "
2327 			    "will SIGKILL process and notify contract");
2328 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2329 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2330 			    NULL, " Above Error detected while dumping core;"
2331 			    "\n    core file will be truncated");
2332 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2333 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2334 			    NULL, " Above Error is due to Kernel access"
2335 			    "\n    to User space and is fatal: "
2336 			    "will SIGKILL process and notify contract");
2337 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2338 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2339 			    " Above Error detected by protected Kernel code"
2340 			    "\n    that will try to clear error from system");
2341 		}
2342 	}
2343 }
2344 
2345 
2346 /*
2347  * Check all cpus for non-trapping UE-causing errors
2348  * In Ultra I/II, we look for copyback errors (CPs)
2349  */
2350 void
2351 cpu_check_allcpus(struct async_flt *aflt)
2352 {
2353 	spitf_async_flt cp;
2354 	spitf_async_flt *spf_cpflt = &cp;
2355 	struct async_flt *cpflt = (struct async_flt *)&cp;
2356 	int pix;
2357 
2358 	cpflt->flt_id = aflt->flt_id;
2359 	cpflt->flt_addr = aflt->flt_addr;
2360 
2361 	for (pix = 0; pix < NCPU; pix++) {
2362 		if (CPU_XCALL_READY(pix)) {
2363 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2364 			    (uint64_t)cpflt, 0);
2365 
2366 			if (cpflt->flt_stat & P_AFSR_CP) {
2367 				char *space;
2368 
2369 				/* See which space - this info may not exist */
2370 				if (aflt->flt_status & ECC_D_TRAP)
2371 					space = "Data ";
2372 				else if (aflt->flt_status & ECC_I_TRAP)
2373 					space = "Instruction ";
2374 				else
2375 					space = "";
2376 
2377 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2378 				    NULL, " AFAR was derived from UE report,"
2379 				    " CP event on CPU%d (caused %saccess "
2380 				    "error on %s%d)", pix, space,
2381 				    (aflt->flt_status & ECC_IOBUS) ?
2382 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2383 
2384 				if (spf_cpflt->flt_ec_lcnt > 0)
2385 					cpu_log_ecmem_info(spf_cpflt);
2386 				else
2387 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2388 					    CPU_ERRID_FIRST, NULL,
2389 					    " No cache dump available");
2390 			}
2391 		}
2392 	}
2393 }
2394 
2395 #ifdef DEBUG
2396 int test_mp_cp = 0;
2397 #endif
2398 
2399 /*
2400  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2401  * for copyback errors and capture relevant information.
2402  */
2403 static uint_t
2404 get_cpu_status(uint64_t arg)
2405 {
2406 	struct async_flt *aflt = (struct async_flt *)arg;
2407 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2408 	uint64_t afsr;
2409 	uint32_t ec_idx;
2410 	uint64_t sdbh, sdbl;
2411 	int i;
2412 	uint32_t ec_set_size;
2413 	uchar_t valid;
2414 	ec_data_t ec_data[8];
2415 	uint64_t ec_tag, flt_addr_tag, oafsr;
2416 	uint64_t *acc_afsr = NULL;
2417 
2418 	get_asyncflt(&afsr);
2419 	if (CPU_PRIVATE(CPU) != NULL) {
2420 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2421 		afsr |= *acc_afsr;
2422 		*acc_afsr = 0;
2423 	}
2424 
2425 #ifdef DEBUG
2426 	if (test_mp_cp)
2427 		afsr |= P_AFSR_CP;
2428 #endif
2429 	aflt->flt_stat = afsr;
2430 
2431 	if (afsr & P_AFSR_CP) {
2432 		/*
2433 		 * Capture the UDBs
2434 		 */
2435 		get_udb_errors(&sdbh, &sdbl);
2436 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2437 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2438 
2439 		/*
2440 		 * Clear CP bit before capturing ecache data
2441 		 * and AFSR info.
2442 		 */
2443 		set_asyncflt(P_AFSR_CP);
2444 
2445 		/*
2446 		 * See if we can capture the ecache line for the
2447 		 * fault PA.
2448 		 *
2449 		 * Return a valid matching ecache line, if any.
2450 		 * Otherwise, return the first matching ecache
2451 		 * line marked invalid.
2452 		 */
2453 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2454 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2455 		    ecache_associativity;
2456 		spf_flt->flt_ec_lcnt = 0;
2457 
2458 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2459 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2460 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2461 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2462 				    acc_afsr);
2463 
2464 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2465 				continue;
2466 
2467 			valid = cpu_ec_state_valid &
2468 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2469 			    cpu_ec_state_shift);
2470 
2471 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2472 				spf_flt->flt_ec_tag = ec_tag;
2473 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2474 				    sizeof (ec_data));
2475 				spf_flt->flt_ec_lcnt = 1;
2476 
2477 				if (valid)
2478 					break;
2479 			}
2480 		}
2481 	}
2482 	return (0);
2483 }
2484 
2485 /*
2486  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2487  * from panic_idle() as part of the other CPUs stopping themselves when a
2488  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2489  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2490  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2491  * CP error information.
2492  */
2493 void
2494 cpu_async_panic_callb(void)
2495 {
2496 	spitf_async_flt cp;
2497 	struct async_flt *aflt = (struct async_flt *)&cp;
2498 	uint64_t *scrub_afsr;
2499 
2500 	if (panic_aflt.flt_id != 0) {
2501 		aflt->flt_addr = panic_aflt.flt_addr;
2502 		(void) get_cpu_status((uint64_t)aflt);
2503 
2504 		if (CPU_PRIVATE(CPU) != NULL) {
2505 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2506 			if (*scrub_afsr & P_AFSR_CP) {
2507 				aflt->flt_stat |= *scrub_afsr;
2508 				*scrub_afsr = 0;
2509 			}
2510 		}
2511 		if (aflt->flt_stat & P_AFSR_CP) {
2512 			aflt->flt_id = panic_aflt.flt_id;
2513 			aflt->flt_panic = 1;
2514 			aflt->flt_inst = CPU->cpu_id;
2515 			aflt->flt_class = CPU_FAULT;
2516 			cp.flt_type = CPU_PANIC_CP_ERR;
2517 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2518 			    (void *)&cp, sizeof (cp), ue_queue,
2519 			    aflt->flt_panic);
2520 		}
2521 	}
2522 }
2523 
2524 /*
2525  * Turn off all cpu error detection, normally only used for panics.
2526  */
2527 void
2528 cpu_disable_errors(void)
2529 {
2530 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2531 }
2532 
2533 /*
2534  * Enable errors.
2535  */
2536 void
2537 cpu_enable_errors(void)
2538 {
2539 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2540 }
2541 
2542 static void
2543 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2544 {
2545 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2546 	int i, loop = 1;
2547 	ushort_t ecc_0;
2548 	uint64_t paddr;
2549 	uint64_t data;
2550 
2551 	if (verbose)
2552 		loop = 8;
2553 	for (i = 0; i < loop; i++) {
2554 		paddr = aligned_addr + (i * 8);
2555 		data = lddphys(paddr);
2556 		if (verbose) {
2557 			if (ce_err) {
2558 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2559 			    (uint32_t)data);
2560 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2561 				NULL, "    Paddr 0x%" PRIx64 ", "
2562 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2563 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2564 			} else {
2565 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2566 				    NULL, "    Paddr 0x%" PRIx64 ", "
2567 				    "Data 0x%08x.%08x", paddr,
2568 				    (uint32_t)(data>>32), (uint32_t)data);
2569 			}
2570 		}
2571 	}
2572 }
2573 
2574 static struct {		/* sec-ded-s4ed ecc code */
2575 	uint_t hi, lo;
2576 } ecc_code[8] = {
2577 	{ 0xee55de23U, 0x16161161U },
2578 	{ 0x55eede93U, 0x61612212U },
2579 	{ 0xbb557b8cU, 0x49494494U },
2580 	{ 0x55bb7b6cU, 0x94948848U },
2581 	{ 0x16161161U, 0xee55de23U },
2582 	{ 0x61612212U, 0x55eede93U },
2583 	{ 0x49494494U, 0xbb557b8cU },
2584 	{ 0x94948848U, 0x55bb7b6cU }
2585 };
2586 
2587 static ushort_t
2588 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2589 {
2590 	int i, j;
2591 	uchar_t checker, bit_mask;
2592 	struct {
2593 		uint_t hi, lo;
2594 	} hex_data, masked_data[8];
2595 
2596 	hex_data.hi = high_bytes;
2597 	hex_data.lo = low_bytes;
2598 
2599 	/* mask out bits according to sec-ded-s4ed ecc code */
2600 	for (i = 0; i < 8; i++) {
2601 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2602 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2603 	}
2604 
2605 	/*
2606 	 * xor all bits in masked_data[i] to get bit_i of checker,
2607 	 * where i = 0 to 7
2608 	 */
2609 	checker = 0;
2610 	for (i = 0; i < 8; i++) {
2611 		bit_mask = 1 << i;
2612 		for (j = 0; j < 32; j++) {
2613 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2614 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2615 			masked_data[i].hi >>= 1;
2616 			masked_data[i].lo >>= 1;
2617 		}
2618 	}
2619 	return (checker);
2620 }
2621 
2622 /*
2623  * Flush the entire ecache using displacement flush by reading through a
2624  * physical address range as large as the ecache.
2625  */
2626 void
2627 cpu_flush_ecache(void)
2628 {
2629 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2630 	    cpunodes[CPU->cpu_id].ecache_linesize);
2631 }
2632 
2633 /*
2634  * read and display the data in the cache line where the
2635  * original ce error occurred.
2636  * This routine is mainly used for debugging new hardware.
2637  */
2638 void
2639 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2640 {
2641 	kpreempt_disable();
2642 	/* disable ECC error traps */
2643 	set_error_enable(EER_ECC_DISABLE);
2644 
2645 	/*
2646 	 * flush the ecache
2647 	 * read the data
2648 	 * check to see if an ECC error occured
2649 	 */
2650 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2651 	    cpunodes[CPU->cpu_id].ecache_linesize);
2652 	set_lsu(get_lsu() | cache_boot_state);
2653 	cpu_read_paddr(ecc, verbose, ce_err);
2654 	(void) check_ecc(ecc);
2655 
2656 	/* enable ECC error traps */
2657 	set_error_enable(EER_ENABLE);
2658 	kpreempt_enable();
2659 }
2660 
2661 /*
2662  * Check the AFSR bits for UE/CE persistence.
2663  * If UE or CE errors are detected, the routine will
2664  * clears all the AFSR sticky bits (except CP for
2665  * spitfire/blackbird) and the UDBs.
2666  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2667  */
2668 static int
2669 check_ecc(struct async_flt *ecc)
2670 {
2671 	uint64_t t_afsr;
2672 	uint64_t t_afar;
2673 	uint64_t udbh;
2674 	uint64_t udbl;
2675 	ushort_t udb;
2676 	int persistent = 0;
2677 
2678 	/*
2679 	 * Capture the AFSR, AFAR and UDBs info
2680 	 */
2681 	get_asyncflt(&t_afsr);
2682 	get_asyncaddr(&t_afar);
2683 	t_afar &= SABRE_AFAR_PA;
2684 	get_udb_errors(&udbh, &udbl);
2685 
2686 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2687 		/*
2688 		 * Clear the errors
2689 		 */
2690 		clr_datapath();
2691 
2692 		if (isus2i || isus2e)
2693 			set_asyncflt(t_afsr);
2694 		else
2695 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2696 
2697 		/*
2698 		 * determine whether to check UDBH or UDBL for persistence
2699 		 */
2700 		if (ecc->flt_synd & UDBL_REG) {
2701 			udb = (ushort_t)udbl;
2702 			t_afar |= 0x8;
2703 		} else {
2704 			udb = (ushort_t)udbh;
2705 		}
2706 
2707 		if (ce_debug || ue_debug) {
2708 			spitf_async_flt spf_flt; /* for logging */
2709 			struct async_flt *aflt =
2710 				(struct async_flt *)&spf_flt;
2711 
2712 			/* Package the info nicely in the spf_flt struct */
2713 			bzero(&spf_flt, sizeof (spitf_async_flt));
2714 			aflt->flt_stat = t_afsr;
2715 			aflt->flt_addr = t_afar;
2716 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2717 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2718 
2719 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2720 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2721 			    " check_ecc: Dumping captured error states ...");
2722 		}
2723 
2724 		/*
2725 		 * if the fault addresses don't match, not persistent
2726 		 */
2727 		if (t_afar != ecc->flt_addr) {
2728 			return (persistent);
2729 		}
2730 
2731 		/*
2732 		 * check for UE persistence
2733 		 * since all DIMMs in the bank are identified for a UE,
2734 		 * there's no reason to check the syndrome
2735 		 */
2736 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2737 			persistent = 1;
2738 		}
2739 
2740 		/*
2741 		 * check for CE persistence
2742 		 */
2743 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2744 			if ((udb & P_DER_E_SYND) ==
2745 			    (ecc->flt_synd & P_DER_E_SYND)) {
2746 				persistent = 1;
2747 			}
2748 		}
2749 	}
2750 	return (persistent);
2751 }
2752 
2753 #ifdef HUMMINGBIRD
2754 #define	HB_FULL_DIV		1
2755 #define	HB_HALF_DIV		2
2756 #define	HB_LOWEST_DIV		8
2757 #define	HB_ECLK_INVALID		0xdeadbad
2758 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2759 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2760 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2761 	HB_ECLK_8 };
2762 
2763 #define	HB_SLOW_DOWN		0
2764 #define	HB_SPEED_UP		1
2765 
2766 #define	SET_ESTAR_MODE(mode)					\
2767 	stdphysio(HB_ESTAR_MODE, (mode));			\
2768 	/*							\
2769 	 * PLL logic requires minimum of 16 clock		\
2770 	 * cycles to lock to the new clock speed.		\
2771 	 * Wait 1 usec to satisfy this requirement.		\
2772 	 */							\
2773 	drv_usecwait(1);
2774 
2775 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2776 {								\
2777 	volatile uint64_t data;					\
2778 	uint64_t count, new_count;				\
2779 	clock_t delay;						\
2780 	data = lddphysio(HB_MEM_CNTRL0);			\
2781 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2782 	    HB_REFRESH_COUNT_SHIFT;				\
2783 	new_count = (HB_REFRESH_INTERVAL *			\
2784 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2785 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2786 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2787 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2788 	stdphysio(HB_MEM_CNTRL0, data);				\
2789 	data = lddphysio(HB_MEM_CNTRL0);        		\
2790 	/*							\
2791 	 * If we are slowing down the cpu and Memory		\
2792 	 * Self Refresh is not enabled, it is required		\
2793 	 * to wait for old refresh count to count-down and	\
2794 	 * new refresh count to go into effect (let new value	\
2795 	 * counts down once).					\
2796 	 */							\
2797 	if ((direction) == HB_SLOW_DOWN &&			\
2798 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2799 		/*						\
2800 		 * Each count takes 64 cpu clock cycles		\
2801 		 * to decrement.  Wait for current refresh	\
2802 		 * count plus new refresh count at current	\
2803 		 * cpu speed to count down to zero.  Round	\
2804 		 * up the delay time.				\
2805 		 */						\
2806 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2807 		    (count + new_count) * MICROSEC * (cur_div)) /\
2808 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2809 		drv_usecwait(delay);				\
2810 	}							\
2811 }
2812 
2813 #define	SET_SELF_REFRESH(bit)					\
2814 {								\
2815 	volatile uint64_t data;					\
2816 	data = lddphysio(HB_MEM_CNTRL0);			\
2817 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2818 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2819 	stdphysio(HB_MEM_CNTRL0, data);				\
2820 	data = lddphysio(HB_MEM_CNTRL0);			\
2821 }
2822 #endif	/* HUMMINGBIRD */
2823 
2824 /* ARGSUSED */
2825 void
2826 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2827 {
2828 #ifdef HUMMINGBIRD
2829 	uint64_t cur_mask, cur_divisor = 0;
2830 	volatile uint64_t reg;
2831 	int index;
2832 
2833 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2834 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2835 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2836 		    new_divisor);
2837 		return;
2838 	}
2839 
2840 	reg = lddphysio(HB_ESTAR_MODE);
2841 	cur_mask = reg & HB_ECLK_MASK;
2842 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2843 		if (hb_eclk[index] == cur_mask) {
2844 			cur_divisor = index;
2845 			break;
2846 		}
2847 	}
2848 
2849 	if (cur_divisor == 0)
2850 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2851 		    "can't be determined!");
2852 
2853 	/*
2854 	 * If we are already at the requested divisor speed, just
2855 	 * return.
2856 	 */
2857 	if (cur_divisor == new_divisor)
2858 		return;
2859 
2860 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2861 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2862 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2863 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2864 
2865 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2866 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2867 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2868 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2869 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2870 
2871 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2872 		/*
2873 		 * Transition to 1/2 speed first, then to
2874 		 * lower speed.
2875 		 */
2876 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2877 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2878 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2879 
2880 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2881 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2882 
2883 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2884 		/*
2885 		 * Transition to 1/2 speed first, then to
2886 		 * full speed.
2887 		 */
2888 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2889 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2890 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2891 
2892 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2893 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2894 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2895 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2896 
2897 	} else if (cur_divisor < new_divisor) {
2898 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2899 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2900 
2901 	} else if (cur_divisor > new_divisor) {
2902 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2903 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2904 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2905 	}
2906 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2907 #endif
2908 }
2909 
2910 /*
2911  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2912  * we clear all the sticky bits. If a non-null pointer to a async fault
2913  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2914  * info will be returned in the structure.  If a non-null pointer to a
2915  * uint64_t is passed in, this will be updated if the CP bit is set in the
2916  * AFSR.  The afsr will be returned.
2917  */
2918 static uint64_t
2919 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2920 {
2921 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2922 	uint64_t afsr;
2923 	uint64_t udbh, udbl;
2924 
2925 	get_asyncflt(&afsr);
2926 
2927 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2928 		*acc_afsr |= afsr;
2929 
2930 	if (spf_flt != NULL) {
2931 		aflt->flt_stat = afsr;
2932 		get_asyncaddr(&aflt->flt_addr);
2933 		aflt->flt_addr &= SABRE_AFAR_PA;
2934 
2935 		get_udb_errors(&udbh, &udbl);
2936 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2937 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2938 	}
2939 
2940 	set_asyncflt(afsr);		/* clear afsr */
2941 	clr_datapath();			/* clear udbs */
2942 	return (afsr);
2943 }
2944 
2945 /*
2946  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2947  * tag of the first bad line will be returned. We also return the old-afsr
2948  * (before clearing the sticky bits). The linecnt data will be updated to
2949  * indicate the number of bad lines detected.
2950  */
2951 static void
2952 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2953 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2954 {
2955 	ec_data_t t_ecdata[8];
2956 	uint64_t t_etag, oafsr;
2957 	uint64_t pa = AFLT_INV_ADDR;
2958 	uint32_t i, j, ecache_sz;
2959 	uint64_t acc_afsr = 0;
2960 	uint64_t *cpu_afsr = NULL;
2961 
2962 	if (CPU_PRIVATE(CPU) != NULL)
2963 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2964 
2965 	*linecnt = 0;
2966 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2967 
2968 	for (i = 0; i < ecache_sz; i += 64) {
2969 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2970 		    cpu_afsr);
2971 		acc_afsr |= oafsr;
2972 
2973 		/*
2974 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2975 		 * looking for the first occurrence of an EDP error.  The AFSR
2976 		 * info is captured for each 8-byte chunk.  Note that for
2977 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2978 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2979 		 * for the high and low 8-byte words within the 16-byte chunk).
2980 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2981 		 * granularity and only PSYND bits [7:0] are used.
2982 		 */
2983 		for (j = 0; j < 8; j++) {
2984 			ec_data_t *ecdptr = &t_ecdata[j];
2985 
2986 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
2987 				uint64_t errpa;
2988 				ushort_t psynd;
2989 				uint32_t ec_set_size = ecache_sz /
2990 				    ecache_associativity;
2991 
2992 				/*
2993 				 * For Spitfire/Blackbird, we need to look at
2994 				 * the PSYND to make sure that this 8-byte chunk
2995 				 * is the right one.  PSYND bits [15:8] belong
2996 				 * to the upper 8-byte (even) chunk.  Bits
2997 				 * [7:0] belong to the lower 8-byte chunk (odd).
2998 				 */
2999 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3000 				if (!isus2i && !isus2e) {
3001 					if (j & 0x1)
3002 						psynd = psynd & 0xFF;
3003 					else
3004 						psynd = psynd >> 8;
3005 
3006 					if (!psynd)
3007 						continue; /* wrong chunk */
3008 				}
3009 
3010 				/* Construct the PA */
3011 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3012 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3013 				    ec_set_size);
3014 
3015 				/* clean up the cache line */
3016 				flushecacheline(P2ALIGN(errpa, 64),
3017 					cpunodes[CPU->cpu_id].ecache_size);
3018 
3019 				oafsr = clear_errors(NULL, cpu_afsr);
3020 				acc_afsr |= oafsr;
3021 
3022 				(*linecnt)++;
3023 
3024 				/*
3025 				 * Capture the PA for the first bad line found.
3026 				 * Return the ecache dump and tag info.
3027 				 */
3028 				if (pa == AFLT_INV_ADDR) {
3029 					int k;
3030 
3031 					pa = errpa;
3032 					for (k = 0; k < 8; k++)
3033 						ecache_data[k] = t_ecdata[k];
3034 					*ecache_tag = t_etag;
3035 				}
3036 				break;
3037 			}
3038 		}
3039 	}
3040 	*t_afar = pa;
3041 	*t_afsr = acc_afsr;
3042 }
3043 
3044 static void
3045 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3046 {
3047 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3048 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3049 	char linestr[30];
3050 	char *state_str;
3051 	int i;
3052 
3053 	/*
3054 	 * Check the ecache tag to make sure it
3055 	 * is valid. If invalid, a memory dump was
3056 	 * captured instead of a ecache dump.
3057 	 */
3058 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3059 		uchar_t eparity = (uchar_t)
3060 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3061 
3062 		uchar_t estate = (uchar_t)
3063 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3064 
3065 		if (estate == cpu_ec_state_shr)
3066 			state_str = "Shared";
3067 		else if (estate == cpu_ec_state_exl)
3068 			state_str = "Exclusive";
3069 		else if (estate == cpu_ec_state_own)
3070 			state_str = "Owner";
3071 		else if (estate == cpu_ec_state_mod)
3072 			state_str = "Modified";
3073 		else
3074 			state_str = "Invalid";
3075 
3076 		if (spf_flt->flt_ec_lcnt > 1) {
3077 			(void) snprintf(linestr, sizeof (linestr),
3078 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3079 		} else {
3080 			linestr[0] = '\0';
3081 		}
3082 
3083 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3084 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3085 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3086 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3087 		    (uint32_t)ecache_tag, state_str,
3088 		    (uint32_t)eparity, linestr);
3089 	} else {
3090 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3091 		    " E$tag != PA from AFAR; E$line was victimized"
3092 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3093 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3094 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3095 	}
3096 
3097 	/*
3098 	 * Dump out all 8 8-byte ecache data captured
3099 	 * For each 8-byte data captured, we check the
3100 	 * captured afsr's parity syndrome to find out
3101 	 * which 8-byte chunk is bad. For memory dump, the
3102 	 * AFSR values were initialized to 0.
3103 	 */
3104 	for (i = 0; i < 8; i++) {
3105 		ec_data_t *ecdptr;
3106 		uint_t offset;
3107 		ushort_t psynd;
3108 		ushort_t bad;
3109 		uint64_t edp;
3110 
3111 		offset = i << 3;	/* multiply by 8 */
3112 		ecdptr = &spf_flt->flt_ec_data[i];
3113 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3114 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3115 
3116 		/*
3117 		 * For Sabre/Hummingbird, parity synd is captured only
3118 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3119 		 * For spitfire/blackbird, AFSR.PSYND is captured
3120 		 * in 16-byte granularity. [15:8] represent
3121 		 * the upper 8 byte and [7:0] the lower 8 byte.
3122 		 */
3123 		if (isus2i || isus2e || (i & 0x1))
3124 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3125 		else
3126 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3127 
3128 		if (bad && edp) {
3129 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3130 			    " E$Data (0x%02x): 0x%08x.%08x "
3131 			    "*Bad* PSYND=0x%04x", offset,
3132 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3133 			    (uint32_t)ecdptr->ec_d8, psynd);
3134 		} else {
3135 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3136 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3137 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3138 			    (uint32_t)ecdptr->ec_d8);
3139 		}
3140 	}
3141 }
3142 
3143 /*
3144  * Common logging function for all cpu async errors.  This function allows the
3145  * caller to generate a single cmn_err() call that logs the appropriate items
3146  * from the fault structure, and implements our rules for AFT logging levels.
3147  *
3148  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3149  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3150  *	spflt: pointer to spitfire async fault structure
3151  *	logflags: bitflags indicating what to output
3152  *	endstr: a end string to appear at the end of this log
3153  *	fmt: a format string to appear at the beginning of the log
3154  *
3155  * The logflags allows the construction of predetermined output from the spflt
3156  * structure.  The individual data items always appear in a consistent order.
3157  * Note that either or both of the spflt structure pointer and logflags may be
3158  * NULL or zero respectively, indicating that the predetermined output
3159  * substrings are not requested in this log.  The output looks like this:
3160  *
3161  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3162  *	<CPU_SPACE><CPU_ERRID>
3163  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3164  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3165  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3166  *	newline+4spaces<CPU_SYND>
3167  *	newline+4spaces<endstr>
3168  *
3169  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3170  * it is assumed that <endstr> will be the unum string in this case.  The size
3171  * of our intermediate formatting buf[] is based on the worst case of all flags
3172  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3173  * formatting so we don't need additional stack space to format them here.
3174  */
3175 /*PRINTFLIKE6*/
3176 static void
3177 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3178 	const char *endstr, const char *fmt, ...)
3179 {
3180 	struct async_flt *aflt = (struct async_flt *)spflt;
3181 	char buf[400], *p, *q; /* see comments about buf[] size above */
3182 	va_list ap;
3183 	int console_log_flag;
3184 
3185 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3186 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3187 	    (aflt->flt_panic)) {
3188 		console_log_flag = (tagnum < 2) || aft_verbose;
3189 	} else {
3190 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3191 		    (aflt->flt_stat & P_AFSR_CE)) ?
3192 		    ce_verbose_memory : ce_verbose_other;
3193 
3194 		if (!verbose)
3195 			return;
3196 
3197 		console_log_flag = (verbose > 1);
3198 	}
3199 
3200 	if (console_log_flag)
3201 		(void) sprintf(buf, "[AFT%d]", tagnum);
3202 	else
3203 		(void) sprintf(buf, "![AFT%d]", tagnum);
3204 
3205 	p = buf + strlen(buf);	/* current buffer position */
3206 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3207 
3208 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3209 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3210 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3211 		p += strlen(p);
3212 	}
3213 
3214 	/*
3215 	 * Copy the caller's format string verbatim into buf[].  It will be
3216 	 * formatted by the call to vcmn_err() at the end of this function.
3217 	 */
3218 	if (fmt != NULL && p < q) {
3219 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3220 		buf[sizeof (buf) - 1] = '\0';
3221 		p += strlen(p);
3222 	}
3223 
3224 	if (spflt != NULL) {
3225 		if (logflags & CPU_FLTCPU) {
3226 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3227 			    aflt->flt_inst);
3228 			p += strlen(p);
3229 		}
3230 
3231 		if (logflags & CPU_SPACE) {
3232 			if (aflt->flt_status & ECC_D_TRAP)
3233 				(void) snprintf(p, (size_t)(q - p),
3234 				    " Data access");
3235 			else if (aflt->flt_status & ECC_I_TRAP)
3236 				(void) snprintf(p, (size_t)(q - p),
3237 				    " Instruction access");
3238 			p += strlen(p);
3239 		}
3240 
3241 		if (logflags & CPU_TL) {
3242 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3243 			    aflt->flt_tl ? ">0" : "=0");
3244 			p += strlen(p);
3245 		}
3246 
3247 		if (logflags & CPU_ERRID) {
3248 			(void) snprintf(p, (size_t)(q - p),
3249 			    ", errID 0x%08x.%08x",
3250 			    (uint32_t)(aflt->flt_id >> 32),
3251 			    (uint32_t)aflt->flt_id);
3252 			p += strlen(p);
3253 		}
3254 
3255 		if (logflags & CPU_AFSR) {
3256 			(void) snprintf(p, (size_t)(q - p),
3257 			    "\n    AFSR 0x%08b.%08b",
3258 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3259 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3260 			p += strlen(p);
3261 		}
3262 
3263 		if (logflags & CPU_AFAR) {
3264 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3265 			    (uint32_t)(aflt->flt_addr >> 32),
3266 			    (uint32_t)aflt->flt_addr);
3267 			p += strlen(p);
3268 		}
3269 
3270 		if (logflags & CPU_AF_PSYND) {
3271 			ushort_t psynd = (ushort_t)
3272 			    (aflt->flt_stat & P_AFSR_P_SYND);
3273 
3274 			(void) snprintf(p, (size_t)(q - p),
3275 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3276 			    psynd, ecc_psynd_score(psynd));
3277 			p += strlen(p);
3278 		}
3279 
3280 		if (logflags & CPU_AF_ETS) {
3281 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3282 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3283 			p += strlen(p);
3284 		}
3285 
3286 		if (logflags & CPU_FAULTPC) {
3287 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3288 			    (void *)aflt->flt_pc);
3289 			p += strlen(p);
3290 		}
3291 
3292 		if (logflags & CPU_UDBH) {
3293 			(void) snprintf(p, (size_t)(q - p),
3294 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3295 			    spflt->flt_sdbh, UDB_FMTSTR,
3296 			    spflt->flt_sdbh & 0xFF);
3297 			p += strlen(p);
3298 		}
3299 
3300 		if (logflags & CPU_UDBL) {
3301 			(void) snprintf(p, (size_t)(q - p),
3302 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3303 			    spflt->flt_sdbl, UDB_FMTSTR,
3304 			    spflt->flt_sdbl & 0xFF);
3305 			p += strlen(p);
3306 		}
3307 
3308 		if (logflags & CPU_SYND) {
3309 			ushort_t synd = SYND(aflt->flt_synd);
3310 
3311 			(void) snprintf(p, (size_t)(q - p),
3312 			    "\n    %s Syndrome 0x%x Memory Module ",
3313 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3314 			p += strlen(p);
3315 		}
3316 	}
3317 
3318 	if (endstr != NULL) {
3319 		if (!(logflags & CPU_SYND))
3320 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3321 		else
3322 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3323 		p += strlen(p);
3324 	}
3325 
3326 	if (ce_code == CE_CONT && (p < q - 1))
3327 		(void) strcpy(p, "\n"); /* add final \n if needed */
3328 
3329 	va_start(ap, fmt);
3330 	vcmn_err(ce_code, buf, ap);
3331 	va_end(ap);
3332 }
3333 
3334 /*
3335  * Ecache Scrubbing
3336  *
3337  * The basic idea is to prevent lines from sitting in the ecache long enough
3338  * to build up soft errors which can lead to ecache parity errors.
3339  *
3340  * The following rules are observed when flushing the ecache:
3341  *
3342  * 1. When the system is busy, flush bad clean lines
3343  * 2. When the system is idle, flush all clean lines
3344  * 3. When the system is idle, flush good dirty lines
3345  * 4. Never flush bad dirty lines.
3346  *
3347  *	modify	parity	busy   idle
3348  *	----------------------------
3349  *	clean	good		X
3350  * 	clean	bad	X	X
3351  * 	dirty	good		X
3352  *	dirty	bad
3353  *
3354  * Bad or good refers to whether a line has an E$ parity error or not.
3355  * Clean or dirty refers to the state of the modified bit.  We currently
3356  * default the scan rate to 100 (scan 10% of the cache per second).
3357  *
3358  * The following are E$ states and actions.
3359  *
3360  * We encode our state as a 3-bit number, consisting of:
3361  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3362  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3363  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3364  *
3365  * We associate a flushing and a logging action with each state.
3366  *
3367  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3368  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3369  * E$ only, in addition to value being set by ec_flush.
3370  */
3371 
3372 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3373 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3374 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3375 
3376 struct {
3377 	char	ec_flush;		/* whether to flush or not */
3378 	char	ec_log;			/* ecache logging */
3379 	char	ec_log_type;		/* log type info */
3380 } ec_action[] = {	/* states of the E$ line in M P B */
3381 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3382 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3383 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3384 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3385 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3386 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3387 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3388 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3389 };
3390 
3391 /*
3392  * Offsets into the ec_action[] that determines clean_good_busy and
3393  * dirty_good_busy lines.
3394  */
3395 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3396 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3397 
3398 /*
3399  * We are flushing lines which are Clean_Good_Busy and also the lines
3400  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3401  */
3402 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3403 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3404 
3405 #define	ECACHE_STATE_MODIFIED	0x4
3406 #define	ECACHE_STATE_PARITY	0x2
3407 #define	ECACHE_STATE_BUSY	0x1
3408 
3409 /*
3410  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3411  */
3412 int ecache_calls_a_sec_mirrored = 1;
3413 int ecache_lines_per_call_mirrored = 1;
3414 
3415 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3416 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3417 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3418 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3419 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3420 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3421 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3422 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3423 
3424 volatile int ec_timeout_calls = 1;	/* timeout calls */
3425 
3426 /*
3427  * Interrupt number and pil for ecache scrubber cross-trap calls.
3428  */
3429 static uint_t ecache_scrub_inum;
3430 uint_t ecache_scrub_pil = PIL_9;
3431 
3432 /*
3433  * Kstats for the E$ scrubber.
3434  */
3435 typedef struct ecache_kstat {
3436 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3437 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3438 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3439 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3440 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3441 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3442 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3443 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3444 	kstat_named_t invalid_lines;		/* # of invalid lines */
3445 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3446 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3447 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3448 } ecache_kstat_t;
3449 
3450 static ecache_kstat_t ec_kstat_template = {
3451 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3452 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3453 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3454 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3455 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3456 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3457 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3458 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3459 	{ "invalid_lines", KSTAT_DATA_ULONG },
3460 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3461 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3462 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3463 };
3464 
3465 struct kmem_cache *sf_private_cache;
3466 
3467 /*
3468  * Called periodically on each CPU to scan the ecache once a sec.
3469  * adjusting the ecache line index appropriately
3470  */
3471 void
3472 scrub_ecache_line()
3473 {
3474 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3475 	int cpuid = CPU->cpu_id;
3476 	uint32_t index = ssmp->ecache_flush_index;
3477 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3478 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3479 	int nlines = ssmp->ecache_nlines;
3480 	uint32_t ec_set_size = ec_size / ecache_associativity;
3481 	int ec_mirror = ssmp->ecache_mirror;
3482 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3483 
3484 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3485 	int mpb;		/* encode Modified, Parity, Busy for action */
3486 	uchar_t state;
3487 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3488 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3489 	ec_data_t ec_data[8];
3490 	kstat_named_t *ec_knp;
3491 
3492 	switch (ec_mirror) {
3493 		default:
3494 		case ECACHE_CPU_NON_MIRROR:
3495 			/*
3496 			 * The E$ scan rate is expressed in units of tenths of
3497 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3498 			 * whole cache is scanned every second.
3499 			 */
3500 			scan_lines = (nlines * ecache_scan_rate) /
3501 					(1000 * ecache_calls_a_sec);
3502 			if (!(ssmp->ecache_busy)) {
3503 				if (ecache_idle_factor > 0) {
3504 					scan_lines *= ecache_idle_factor;
3505 				}
3506 			} else {
3507 				flush_clean_busy = (scan_lines *
3508 					ecache_flush_clean_good_busy) / 100;
3509 				flush_dirty_busy = (scan_lines *
3510 					ecache_flush_dirty_good_busy) / 100;
3511 			}
3512 
3513 			ec_timeout_calls = (ecache_calls_a_sec ?
3514 						ecache_calls_a_sec : 1);
3515 			break;
3516 
3517 		case ECACHE_CPU_MIRROR:
3518 			scan_lines = ecache_lines_per_call_mirrored;
3519 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3520 					ecache_calls_a_sec_mirrored : 1);
3521 			break;
3522 	}
3523 
3524 	/*
3525 	 * The ecache scrubber algorithm operates by reading and
3526 	 * decoding the E$ tag to determine whether the corresponding E$ line
3527 	 * can be scrubbed. There is a implicit assumption in the scrubber
3528 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3529 	 * flawed since the E$ tag may also be corrupted and have parity errors
3530 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3531 	 * before scrubbing. When a parity error is detected in the E$ tag,
3532 	 * it is possible to recover and scrub the tag under certain conditions
3533 	 * so that a ETP error condition can be avoided.
3534 	 */
3535 
3536 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3537 		/*
3538 		 * We get the old-AFSR before clearing the AFSR sticky bits
3539 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3540 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3541 		 */
3542 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3543 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3544 				cpu_ec_state_shift);
3545 
3546 		/*
3547 		 * ETP is set try to scrub the ecache tag.
3548 		 */
3549 		if (nafsr & P_AFSR_ETP) {
3550 			ecache_scrub_tag_err(nafsr, state, index);
3551 		} else if (state & cpu_ec_state_valid) {
3552 			/*
3553 			 * ETP is not set, E$ tag is valid.
3554 			 * Proceed with the E$ scrubbing.
3555 			 */
3556 			if (state & cpu_ec_state_dirty)
3557 				mpb |= ECACHE_STATE_MODIFIED;
3558 
3559 			tafsr = check_ecache_line(index, acc_afsr);
3560 
3561 			if (tafsr & P_AFSR_EDP) {
3562 				mpb |= ECACHE_STATE_PARITY;
3563 
3564 				if (ecache_scrub_verbose ||
3565 							ecache_scrub_panic) {
3566 					get_ecache_dtag(P2ALIGN(index, 64),
3567 						(uint64_t *)&ec_data[0],
3568 						&ec_tag, &oafsr, acc_afsr);
3569 				}
3570 			}
3571 
3572 			if (ssmp->ecache_busy)
3573 				mpb |= ECACHE_STATE_BUSY;
3574 
3575 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3576 			ec_knp->value.ul++;
3577 
3578 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3579 				cpu_ec_tag_shift) | (index % ec_set_size);
3580 
3581 			/*
3582 			 * We flush the E$ lines depending on the ec_flush,
3583 			 * we additionally flush clean_good_busy and
3584 			 * dirty_good_busy lines for mirrored E$.
3585 			 */
3586 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3587 				flushecacheline(paddr, ec_size);
3588 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3589 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3590 					flushecacheline(paddr, ec_size);
3591 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3592 				softcall(ecache_page_retire, (void *)paddr);
3593 			}
3594 
3595 			/*
3596 			 * Conditionally flush both the clean_good and
3597 			 * dirty_good lines when busy.
3598 			 */
3599 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3600 				flush_clean_busy--;
3601 				flushecacheline(paddr, ec_size);
3602 				ec_ksp->clean_good_busy_flush.value.ul++;
3603 			} else if (DGB(mpb, ec_mirror) &&
3604 						(flush_dirty_busy > 0)) {
3605 				flush_dirty_busy--;
3606 				flushecacheline(paddr, ec_size);
3607 				ec_ksp->dirty_good_busy_flush.value.ul++;
3608 			}
3609 
3610 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3611 						ecache_scrub_panic)) {
3612 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3613 						tafsr);
3614 			}
3615 
3616 		} else {
3617 			ec_ksp->invalid_lines.value.ul++;
3618 		}
3619 
3620 		if ((index += ec_linesize) >= ec_size)
3621 			index = 0;
3622 
3623 	}
3624 
3625 	/*
3626 	 * set the ecache scrub index for the next time around
3627 	 */
3628 	ssmp->ecache_flush_index = index;
3629 
3630 	if (*acc_afsr & P_AFSR_CP) {
3631 		uint64_t ret_afsr;
3632 
3633 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3634 		if ((ret_afsr & P_AFSR_CP) == 0)
3635 			*acc_afsr = 0;
3636 	}
3637 }
3638 
3639 /*
3640  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3641  * we decrement the outstanding request count to zero.
3642  */
3643 
3644 /*ARGSUSED*/
3645 uint_t
3646 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3647 {
3648 	int i;
3649 	int outstanding;
3650 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3651 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3652 
3653 	do {
3654 		outstanding = *countp;
3655 		ASSERT(outstanding > 0);
3656 		for (i = 0; i < outstanding; i++)
3657 			scrub_ecache_line();
3658 	} while (atomic_add_32_nv(countp, -outstanding));
3659 
3660 	return (DDI_INTR_CLAIMED);
3661 }
3662 
3663 /*
3664  * force each cpu to perform an ecache scrub, called from a timeout
3665  */
3666 extern xcfunc_t ecache_scrubreq_tl1;
3667 
3668 void
3669 do_scrub_ecache_line(void)
3670 {
3671 	long delta;
3672 
3673 	if (ecache_calls_a_sec > hz)
3674 		ecache_calls_a_sec = hz;
3675 	else if (ecache_calls_a_sec <= 0)
3676 	    ecache_calls_a_sec = 1;
3677 
3678 	if (ecache_calls_a_sec_mirrored > hz)
3679 		ecache_calls_a_sec_mirrored = hz;
3680 	else if (ecache_calls_a_sec_mirrored <= 0)
3681 	    ecache_calls_a_sec_mirrored = 1;
3682 
3683 	if (ecache_scrub_enable) {
3684 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3685 		delta = hz / ec_timeout_calls;
3686 	} else {
3687 		delta = hz;
3688 	}
3689 
3690 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3691 		delta);
3692 }
3693 
3694 /*
3695  * initialization for ecache scrubbing
3696  * This routine is called AFTER all cpus have had cpu_init_private called
3697  * to initialize their private data areas.
3698  */
3699 void
3700 cpu_init_cache_scrub(void)
3701 {
3702 	if (ecache_calls_a_sec > hz) {
3703 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3704 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3705 		ecache_calls_a_sec = hz;
3706 	}
3707 
3708 	/*
3709 	 * Register softint for ecache scrubbing.
3710 	 */
3711 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3712 	    scrub_ecache_line_intr, NULL);
3713 
3714 	/*
3715 	 * kick off the scrubbing using realtime timeout
3716 	 */
3717 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3718 	    hz / ecache_calls_a_sec);
3719 }
3720 
3721 /*
3722  * Unset the busy flag for this cpu.
3723  */
3724 void
3725 cpu_idle_ecache_scrub(struct cpu *cp)
3726 {
3727 	if (CPU_PRIVATE(cp) != NULL) {
3728 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3729 							sfpr_scrub_misc);
3730 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3731 	}
3732 }
3733 
3734 /*
3735  * Set the busy flag for this cpu.
3736  */
3737 void
3738 cpu_busy_ecache_scrub(struct cpu *cp)
3739 {
3740 	if (CPU_PRIVATE(cp) != NULL) {
3741 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3742 							sfpr_scrub_misc);
3743 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3744 	}
3745 }
3746 
3747 /*
3748  * initialize the ecache scrubber data structures
3749  * The global entry point cpu_init_private replaces this entry point.
3750  *
3751  */
3752 static void
3753 cpu_init_ecache_scrub_dr(struct cpu *cp)
3754 {
3755 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3756 	int cpuid = cp->cpu_id;
3757 
3758 	/*
3759 	 * intialize bookkeeping for cache scrubbing
3760 	 */
3761 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3762 
3763 	ssmp->ecache_flush_index = 0;
3764 
3765 	ssmp->ecache_nlines =
3766 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3767 
3768 	/*
3769 	 * Determine whether we are running on mirrored SRAM
3770 	 */
3771 
3772 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3773 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3774 	else
3775 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3776 
3777 	cpu_busy_ecache_scrub(cp);
3778 
3779 	/*
3780 	 * initialize the kstats
3781 	 */
3782 	ecache_kstat_init(cp);
3783 }
3784 
3785 /*
3786  * uninitialize the ecache scrubber data structures
3787  * The global entry point cpu_uninit_private replaces this entry point.
3788  */
3789 static void
3790 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3791 {
3792 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3793 
3794 	if (ssmp->ecache_ksp != NULL) {
3795 		kstat_delete(ssmp->ecache_ksp);
3796 		ssmp->ecache_ksp = NULL;
3797 	}
3798 
3799 	/*
3800 	 * un-initialize bookkeeping for cache scrubbing
3801 	 */
3802 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3803 
3804 	cpu_idle_ecache_scrub(cp);
3805 }
3806 
3807 struct kmem_cache *sf_private_cache;
3808 
3809 /*
3810  * Cpu private initialization.  This includes allocating the cpu_private
3811  * data structure, initializing it, and initializing the scrubber for this
3812  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3813  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3814  * We use kmem_cache_create for the spitfire private data structure because it
3815  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3816  */
3817 void
3818 cpu_init_private(struct cpu *cp)
3819 {
3820 	spitfire_private_t *sfprp;
3821 
3822 	ASSERT(CPU_PRIVATE(cp) == NULL);
3823 
3824 	/*
3825 	 * If the sf_private_cache has not been created, create it.
3826 	 */
3827 	if (sf_private_cache == NULL) {
3828 		sf_private_cache = kmem_cache_create("sf_private_cache",
3829 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3830 			NULL, NULL, NULL, NULL, 0);
3831 		ASSERT(sf_private_cache);
3832 	}
3833 
3834 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3835 
3836 	bzero(sfprp, sizeof (spitfire_private_t));
3837 
3838 	cpu_init_ecache_scrub_dr(cp);
3839 }
3840 
3841 /*
3842  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3843  * deallocate the scrubber data structures and cpu_private data structure.
3844  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3845  * the scrubber for the specified cpu.
3846  */
3847 void
3848 cpu_uninit_private(struct cpu *cp)
3849 {
3850 	ASSERT(CPU_PRIVATE(cp));
3851 
3852 	cpu_uninit_ecache_scrub_dr(cp);
3853 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3854 	CPU_PRIVATE(cp) = NULL;
3855 }
3856 
3857 /*
3858  * initialize the ecache kstats for each cpu
3859  */
3860 static void
3861 ecache_kstat_init(struct cpu *cp)
3862 {
3863 	struct kstat *ksp;
3864 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3865 
3866 	ASSERT(ssmp != NULL);
3867 
3868 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3869 	    KSTAT_TYPE_NAMED,
3870 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3871 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3872 		ssmp->ecache_ksp = NULL;
3873 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3874 		return;
3875 	}
3876 
3877 	ssmp->ecache_ksp = ksp;
3878 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3879 	kstat_install(ksp);
3880 }
3881 
3882 /*
3883  * log the bad ecache information
3884  */
3885 static void
3886 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3887 		uint64_t afsr)
3888 {
3889 	spitf_async_flt spf_flt;
3890 	struct async_flt *aflt;
3891 	int i;
3892 	char *class;
3893 
3894 	bzero(&spf_flt, sizeof (spitf_async_flt));
3895 	aflt = &spf_flt.cmn_asyncflt;
3896 
3897 	for (i = 0; i < 8; i++) {
3898 		spf_flt.flt_ec_data[i] = ec_data[i];
3899 	}
3900 
3901 	spf_flt.flt_ec_tag = ec_tag;
3902 
3903 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3904 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3905 	} else spf_flt.flt_type = (ushort_t)mpb;
3906 
3907 	aflt->flt_inst = CPU->cpu_id;
3908 	aflt->flt_class = CPU_FAULT;
3909 	aflt->flt_id = gethrtime_waitfree();
3910 	aflt->flt_addr = paddr;
3911 	aflt->flt_stat = afsr;
3912 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3913 
3914 	switch (mpb) {
3915 	case CPU_ECACHE_TAG_ERR:
3916 	case CPU_ECACHE_ADDR_PAR_ERR:
3917 	case CPU_ECACHE_ETP_ETS_ERR:
3918 	case CPU_ECACHE_STATE_ERR:
3919 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3920 		break;
3921 	default:
3922 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3923 		break;
3924 	}
3925 
3926 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3927 	    ue_queue, aflt->flt_panic);
3928 
3929 	if (aflt->flt_panic)
3930 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3931 					"line detected");
3932 }
3933 
3934 /*
3935  * Process an ecache error that occured during the E$ scrubbing.
3936  * We do the ecache scan to find the bad line, flush the bad line
3937  * and start the memscrubber to find any UE (in memory or in another cache)
3938  */
3939 static uint64_t
3940 ecache_scrub_misc_err(int type, uint64_t afsr)
3941 {
3942 	spitf_async_flt spf_flt;
3943 	struct async_flt *aflt;
3944 	uint64_t oafsr;
3945 
3946 	bzero(&spf_flt, sizeof (spitf_async_flt));
3947 	aflt = &spf_flt.cmn_asyncflt;
3948 
3949 	/*
3950 	 * Scan each line in the cache to look for the one
3951 	 * with bad parity
3952 	 */
3953 	aflt->flt_addr = AFLT_INV_ADDR;
3954 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3955 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3956 
3957 	if (oafsr & P_AFSR_CP) {
3958 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3959 		*cp_afsr |= oafsr;
3960 	}
3961 
3962 	/*
3963 	 * If we found a bad PA, update the state to indicate if it is
3964 	 * memory or I/O space.
3965 	 */
3966 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3967 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3968 			MMU_PAGESHIFT)) ? 1 : 0;
3969 	}
3970 
3971 	spf_flt.flt_type = (ushort_t)type;
3972 
3973 	aflt->flt_inst = CPU->cpu_id;
3974 	aflt->flt_class = CPU_FAULT;
3975 	aflt->flt_id = gethrtime_waitfree();
3976 	aflt->flt_status = afsr;
3977 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3978 
3979 	/*
3980 	 * We have the bad line, flush that line and start
3981 	 * the memscrubber.
3982 	 */
3983 	if (spf_flt.flt_ec_lcnt > 0) {
3984 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3985 			cpunodes[CPU->cpu_id].ecache_size);
3986 		read_all_memscrub = 1;
3987 		memscrub_run();
3988 	}
3989 
3990 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3991 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3992 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3993 
3994 	return (oafsr);
3995 }
3996 
3997 static void
3998 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
3999 {
4000 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
4001 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
4002 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
4003 	uint64_t ec_tag, paddr, oafsr;
4004 	ec_data_t ec_data[8];
4005 	int cpuid = CPU->cpu_id;
4006 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4007 						ecache_associativity;
4008 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4009 
4010 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4011 			&oafsr, cpu_afsr);
4012 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4013 						(index % ec_set_size);
4014 
4015 	/*
4016 	 * E$ tag state has good parity
4017 	 */
4018 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4019 		if (afsr_ets & cpu_ec_parity) {
4020 			/*
4021 			 * E$ tag state bits indicate the line is clean,
4022 			 * invalidate the E$ tag and continue.
4023 			 */
4024 			if (!(state & cpu_ec_state_dirty)) {
4025 				/*
4026 				 * Zero the tag and mark the state invalid
4027 				 * with good parity for the tag.
4028 				 */
4029 				if (isus2i || isus2e)
4030 					write_hb_ec_tag_parity(index);
4031 				else
4032 					write_ec_tag_parity(index);
4033 
4034 				/* Sync with the dual tag */
4035 				flushecacheline(0,
4036 					cpunodes[CPU->cpu_id].ecache_size);
4037 				ec_ksp->tags_cleared.value.ul++;
4038 				ecache_scrub_log(ec_data, ec_tag, paddr,
4039 					CPU_ECACHE_TAG_ERR, afsr);
4040 				return;
4041 			} else {
4042 				ecache_scrub_log(ec_data, ec_tag, paddr,
4043 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4044 				cmn_err(CE_PANIC, " E$ tag address has bad"
4045 							" parity");
4046 			}
4047 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4048 			/*
4049 			 * ETS is zero but ETP is set
4050 			 */
4051 			ecache_scrub_log(ec_data, ec_tag, paddr,
4052 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4053 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4054 				" AFSR.ETS is zero");
4055 		}
4056 	} else {
4057 		/*
4058 		 * E$ tag state bit has a bad parity
4059 		 */
4060 		ecache_scrub_log(ec_data, ec_tag, paddr,
4061 				CPU_ECACHE_STATE_ERR, afsr);
4062 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4063 	}
4064 }
4065 
4066 static void
4067 ecache_page_retire(void *arg)
4068 {
4069 	uint64_t paddr = (uint64_t)arg;
4070 	(void) page_retire(paddr, PR_UE);
4071 }
4072 
4073 void
4074 sticksync_slave(void)
4075 {}
4076 
4077 void
4078 sticksync_master(void)
4079 {}
4080 
4081 /*ARGSUSED*/
4082 void
4083 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4084 {}
4085 
4086 void
4087 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4088 {
4089 	int status;
4090 	ddi_fm_error_t de;
4091 
4092 	bzero(&de, sizeof (ddi_fm_error_t));
4093 
4094 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4095 	    FM_ENA_FMT1);
4096 	de.fme_flag = expected;
4097 	de.fme_bus_specific = (void *)aflt->flt_addr;
4098 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4099 
4100 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4101 		aflt->flt_panic = 1;
4102 }
4103 
4104 /*ARGSUSED*/
4105 void
4106 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4107     errorq_t *eqp, uint_t flag)
4108 {
4109 	struct async_flt *aflt = (struct async_flt *)payload;
4110 
4111 	aflt->flt_erpt_class = error_class;
4112 	errorq_dispatch(eqp, payload, payload_sz, flag);
4113 }
4114 
4115 #define	MAX_SIMM	8
4116 
4117 struct ce_info {
4118 	char    name[UNUM_NAMLEN];
4119 	uint64_t intermittent_total;
4120 	uint64_t persistent_total;
4121 	uint64_t sticky_total;
4122 	unsigned short leaky_bucket_cnt;
4123 };
4124 
4125 /*
4126  * Separately-defined structure for use in reporting the ce_info
4127  * to SunVTS without exposing the internal layout and implementation
4128  * of struct ce_info.
4129  */
4130 static struct ecc_error_info ecc_error_info_data = {
4131 	{ "version", KSTAT_DATA_UINT32 },
4132 	{ "maxcount", KSTAT_DATA_UINT32 },
4133 	{ "count", KSTAT_DATA_UINT32 }
4134 };
4135 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4136     sizeof (struct kstat_named);
4137 
4138 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4139 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4140 #endif
4141 
4142 struct ce_info  *mem_ce_simm = NULL;
4143 size_t mem_ce_simm_size = 0;
4144 
4145 /*
4146  * Default values for the number of CE's allowed per interval.
4147  * Interval is defined in minutes
4148  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4149  */
4150 #define	SOFTERR_LIMIT_DEFAULT		2
4151 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4152 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4153 #define	TIMEOUT_NONE			((timeout_id_t)0)
4154 #define	TIMEOUT_SET			((timeout_id_t)1)
4155 
4156 /*
4157  * timeout identifer for leaky_bucket
4158  */
4159 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4160 
4161 /*
4162  * Tunables for maximum number of allowed CE's in a given time
4163  */
4164 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4165 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4166 
4167 void
4168 cpu_mp_init(void)
4169 {
4170 	size_t size = cpu_aflt_size();
4171 	size_t i;
4172 	kstat_t *ksp;
4173 
4174 	/*
4175 	 * Initialize the CE error handling buffers.
4176 	 */
4177 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4178 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4179 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4180 
4181 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4182 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4183 	if (ksp != NULL) {
4184 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4185 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4186 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4187 		ecc_error_info_data.count.value.ui32 = 0;
4188 		kstat_install(ksp);
4189 	}
4190 
4191 	for (i = 0; i < mem_ce_simm_size; i++) {
4192 		struct kstat_ecc_mm_info *kceip;
4193 
4194 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4195 		    KM_SLEEP);
4196 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4197 		    KSTAT_TYPE_NAMED,
4198 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4199 		    KSTAT_FLAG_VIRTUAL);
4200 		if (ksp != NULL) {
4201 			/*
4202 			 * Re-declare ks_data_size to include room for the
4203 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4204 			 * set.
4205 			 */
4206 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4207 			    KSTAT_CE_UNUM_NAMLEN;
4208 			ksp->ks_data = kceip;
4209 			kstat_named_init(&kceip->name,
4210 			    "name", KSTAT_DATA_STRING);
4211 			kstat_named_init(&kceip->intermittent_total,
4212 			    "intermittent_total", KSTAT_DATA_UINT64);
4213 			kstat_named_init(&kceip->persistent_total,
4214 			    "persistent_total", KSTAT_DATA_UINT64);
4215 			kstat_named_init(&kceip->sticky_total,
4216 			    "sticky_total", KSTAT_DATA_UINT64);
4217 			/*
4218 			 * Use the default snapshot routine as it knows how to
4219 			 * deal with named kstats with long strings.
4220 			 */
4221 			ksp->ks_update = ecc_kstat_update;
4222 			kstat_install(ksp);
4223 		} else {
4224 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4225 		}
4226 	}
4227 }
4228 
4229 /*ARGSUSED*/
4230 static void
4231 leaky_bucket_timeout(void *arg)
4232 {
4233 	int i;
4234 	struct ce_info *psimm = mem_ce_simm;
4235 
4236 	for (i = 0; i < mem_ce_simm_size; i++) {
4237 		if (psimm[i].leaky_bucket_cnt > 0)
4238 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4239 	}
4240 	add_leaky_bucket_timeout();
4241 }
4242 
4243 static void
4244 add_leaky_bucket_timeout(void)
4245 {
4246 	long timeout_in_microsecs;
4247 
4248 	/*
4249 	 * create timeout for next leak.
4250 	 *
4251 	 * The timeout interval is calculated as follows
4252 	 *
4253 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4254 	 *
4255 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4256 	 * in a minute), then multiply this by MICROSEC to get the interval
4257 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4258 	 * the timeout interval is accurate to within a few microseconds.
4259 	 */
4260 
4261 	if (ecc_softerr_limit <= 0)
4262 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4263 	if (ecc_softerr_interval <= 0)
4264 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4265 
4266 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4267 	    ecc_softerr_limit;
4268 
4269 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4270 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4271 
4272 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4273 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4274 }
4275 
4276 /*
4277  * Legacy Correctable ECC Error Hash
4278  *
4279  * All of the code below this comment is used to implement a legacy array
4280  * which counted intermittent, persistent, and sticky CE errors by unum,
4281  * and then was later extended to publish the data as a kstat for SunVTS.
4282  * All of this code is replaced by FMA, and remains here until such time
4283  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4284  *
4285  * Errors are saved in three buckets per-unum:
4286  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4287  *     This could represent a problem, and is immediately printed out.
4288  * (2) persistent - was successfully scrubbed
4289  *     These errors use the leaky bucket algorithm to determine
4290  *     if there is a serious problem.
4291  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4292  *     and does not necessarily indicate any problem with the dimm itself,
4293  *     is critical information for debugging new hardware.
4294  *     Because we do not know if it came from the dimm, it would be
4295  *     inappropriate to include these in the leaky bucket counts.
4296  *
4297  * If the E$ line was modified before the scrub operation began, then the
4298  * displacement flush at the beginning of scrubphys() will cause the modified
4299  * line to be written out, which will clean up the CE.  Then, any subsequent
4300  * read will not cause an error, which will cause persistent errors to be
4301  * identified as intermittent.
4302  *
4303  * If a DIMM is going bad, it will produce true persistents as well as
4304  * false intermittents, so these intermittents can be safely ignored.
4305  *
4306  * If the error count is excessive for a DIMM, this function will return
4307  * PR_MCE, and the CPU module may then decide to remove that page from use.
4308  */
4309 static int
4310 ce_count_unum(int status, int len, char *unum)
4311 {
4312 	int i;
4313 	struct ce_info *psimm = mem_ce_simm;
4314 	int page_status = PR_OK;
4315 
4316 	ASSERT(psimm != NULL);
4317 
4318 	if (len <= 0 ||
4319 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4320 		return (page_status);
4321 
4322 	/*
4323 	 * Initialize the leaky_bucket timeout
4324 	 */
4325 	if (casptr(&leaky_bucket_timeout_id,
4326 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4327 		add_leaky_bucket_timeout();
4328 
4329 	for (i = 0; i < mem_ce_simm_size; i++) {
4330 		if (psimm[i].name[0] == '\0') {
4331 			/*
4332 			 * Hit the end of the valid entries, add
4333 			 * a new one.
4334 			 */
4335 			(void) strncpy(psimm[i].name, unum, len);
4336 			if (status & ECC_STICKY) {
4337 				/*
4338 				 * Sticky - the leaky bucket is used to track
4339 				 * soft errors.  Since a sticky error is a
4340 				 * hard error and likely to be retired soon,
4341 				 * we do not count it in the leaky bucket.
4342 				 */
4343 				psimm[i].leaky_bucket_cnt = 0;
4344 				psimm[i].intermittent_total = 0;
4345 				psimm[i].persistent_total = 0;
4346 				psimm[i].sticky_total = 1;
4347 				cmn_err(CE_WARN,
4348 				    "[AFT0] Sticky Softerror encountered "
4349 				    "on Memory Module %s\n", unum);
4350 				page_status = PR_MCE;
4351 			} else if (status & ECC_PERSISTENT) {
4352 				psimm[i].leaky_bucket_cnt = 1;
4353 				psimm[i].intermittent_total = 0;
4354 				psimm[i].persistent_total = 1;
4355 				psimm[i].sticky_total = 0;
4356 			} else {
4357 				/*
4358 				 * Intermittent - Because the scrub operation
4359 				 * cannot find the error in the DIMM, we will
4360 				 * not count these in the leaky bucket
4361 				 */
4362 				psimm[i].leaky_bucket_cnt = 0;
4363 				psimm[i].intermittent_total = 1;
4364 				psimm[i].persistent_total = 0;
4365 				psimm[i].sticky_total = 0;
4366 			}
4367 			ecc_error_info_data.count.value.ui32++;
4368 			break;
4369 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4370 			/*
4371 			 * Found an existing entry for the current
4372 			 * memory module, adjust the counts.
4373 			 */
4374 			if (status & ECC_STICKY) {
4375 				psimm[i].sticky_total++;
4376 				cmn_err(CE_WARN,
4377 				    "[AFT0] Sticky Softerror encountered "
4378 				    "on Memory Module %s\n", unum);
4379 				page_status = PR_MCE;
4380 			} else if (status & ECC_PERSISTENT) {
4381 				int new_value;
4382 
4383 				new_value = atomic_add_16_nv(
4384 				    &psimm[i].leaky_bucket_cnt, 1);
4385 				psimm[i].persistent_total++;
4386 				if (new_value > ecc_softerr_limit) {
4387 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4388 					    " soft errors from Memory Module"
4389 					    " %s exceed threshold (N=%d,"
4390 					    " T=%dh:%02dm) triggering page"
4391 					    " retire", new_value, unum,
4392 					    ecc_softerr_limit,
4393 					    ecc_softerr_interval / 60,
4394 					    ecc_softerr_interval % 60);
4395 					atomic_add_16(
4396 					    &psimm[i].leaky_bucket_cnt, -1);
4397 					page_status = PR_MCE;
4398 				}
4399 			} else { /* Intermittent */
4400 				psimm[i].intermittent_total++;
4401 			}
4402 			break;
4403 		}
4404 	}
4405 
4406 	if (i >= mem_ce_simm_size)
4407 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4408 		    "space.\n");
4409 
4410 	return (page_status);
4411 }
4412 
4413 /*
4414  * Function to support counting of IO detected CEs.
4415  */
4416 void
4417 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4418 {
4419 	int err;
4420 
4421 	err = ce_count_unum(ecc->flt_status, len, unum);
4422 	if (err != PR_OK && automatic_page_removal) {
4423 		(void) page_retire(ecc->flt_addr, err);
4424 	}
4425 }
4426 
4427 static int
4428 ecc_kstat_update(kstat_t *ksp, int rw)
4429 {
4430 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4431 	struct ce_info *ceip = mem_ce_simm;
4432 	int i = ksp->ks_instance;
4433 
4434 	if (rw == KSTAT_WRITE)
4435 		return (EACCES);
4436 
4437 	ASSERT(ksp->ks_data != NULL);
4438 	ASSERT(i < mem_ce_simm_size && i >= 0);
4439 
4440 	/*
4441 	 * Since we're not using locks, make sure that we don't get partial
4442 	 * data. The name is always copied before the counters are incremented
4443 	 * so only do this update routine if at least one of the counters is
4444 	 * non-zero, which ensures that ce_count_unum() is done, and the
4445 	 * string is fully copied.
4446 	 */
4447 	if (ceip[i].intermittent_total == 0 &&
4448 	    ceip[i].persistent_total == 0 &&
4449 	    ceip[i].sticky_total == 0) {
4450 		/*
4451 		 * Uninitialized or partially initialized. Ignore.
4452 		 * The ks_data buffer was allocated via kmem_zalloc,
4453 		 * so no need to bzero it.
4454 		 */
4455 		return (0);
4456 	}
4457 
4458 	kstat_named_setstr(&kceip->name, ceip[i].name);
4459 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4460 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4461 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4462 
4463 	return (0);
4464 }
4465 
4466 #define	VIS_BLOCKSIZE		64
4467 
4468 int
4469 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4470 {
4471 	int ret, watched;
4472 
4473 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4474 	ret = dtrace_blksuword32(addr, data, 0);
4475 	if (watched)
4476 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4477 
4478 	return (ret);
4479 }
4480 
4481 /*ARGSUSED*/
4482 void
4483 cpu_faulted_enter(struct cpu *cp)
4484 {
4485 }
4486 
4487 /*ARGSUSED*/
4488 void
4489 cpu_faulted_exit(struct cpu *cp)
4490 {
4491 }
4492 
4493 static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
4494 	(1 << TTE32M) | (1 << TTE256M));
4495 static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
4496 
4497 /*
4498  * The function returns the US_II mmu-specific values for the
4499  * hat's disable_large_pages and disable_ism_large_pages variables.
4500  */
4501 int
4502 mmu_large_pages_disabled(uint_t flag)
4503 {
4504 	int pages_disable = 0;
4505 
4506 	if (flag == HAT_LOAD) {
4507 		pages_disable = mmu_disable_large_pages;
4508 	} else if (flag == HAT_LOAD_SHARE) {
4509 		pages_disable = mmu_disable_ism_large_pages;
4510 	}
4511 	return (pages_disable);
4512 }
4513 
4514 /*ARGSUSED*/
4515 void
4516 mmu_init_kernel_pgsz(struct hat *hat)
4517 {
4518 }
4519 
4520 size_t
4521 mmu_get_kernel_lpsize(size_t lpsize)
4522 {
4523 	uint_t tte;
4524 
4525 	if (lpsize == 0) {
4526 		/* no setting for segkmem_lpsize in /etc/system: use default */
4527 		return (MMU_PAGESIZE4M);
4528 	}
4529 
4530 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4531 		if (lpsize == TTEBYTES(tte))
4532 			return (lpsize);
4533 	}
4534 
4535 	return (TTEBYTES(TTE8K));
4536 }
4537