xref: /titanic_51/usr/src/uts/sun4u/cpu/spitfire.c (revision ed78bdc4c40aa72221120749961b690306299ab8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machparam.h>
32 #include <sys/machsystm.h>
33 #include <sys/cpu.h>
34 #include <sys/elf_SPARC.h>
35 #include <vm/hat_sfmmu.h>
36 #include <vm/page.h>
37 #include <vm/vm_dep.h>
38 #include <sys/cpuvar.h>
39 #include <sys/spitregs.h>
40 #include <sys/async.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/dditypes.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpu_module.h>
46 #include <sys/prom_debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sysmacros.h>
50 #include <sys/intreg.h>
51 #include <sys/machtrap.h>
52 #include <sys/ontrap.h>
53 #include <sys/ivintr.h>
54 #include <sys/atomic.h>
55 #include <sys/panic.h>
56 #include <sys/ndifm.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/fm/util.h>
59 #include <sys/fm/cpu/UltraSPARC-II.h>
60 #include <sys/ddi.h>
61 #include <sys/ecc_kstat.h>
62 #include <sys/watchpoint.h>
63 #include <sys/dtrace.h>
64 #include <sys/errclassify.h>
65 
66 uint_t	cpu_impl_dual_pgsz = 0;
67 
68 /*
69  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71  */
72 typedef struct sf_ec_data_elm {
73 	uint64_t ec_d8;
74 	uint64_t ec_afsr;
75 } ec_data_t;
76 
77 /*
78  * Define spitfire (Ultra I/II) specific asynchronous error structure
79  */
80 typedef struct spitfire_async_flt {
81 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82 	ushort_t flt_type;		/* types of faults - cpu specific */
83 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84 	uint64_t flt_ec_tag;		/* E$ tag info */
85 	int flt_ec_lcnt;		/* number of bad E$ lines */
86 	ushort_t flt_sdbh;		/* UDBH reg */
87 	ushort_t flt_sdbl;		/* UDBL reg */
88 } spitf_async_flt;
89 
90 /*
91  * Prototypes for support routines in spitfire_asm.s:
92  */
93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94 extern uint64_t get_lsu(void);
95 extern void set_lsu(uint64_t ncc);
96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97 				uint64_t *oafsr, uint64_t *acc_afsr);
98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100 				uint64_t *acc_afsr);
101 extern uint64_t read_and_clear_afsr();
102 extern void write_ec_tag_parity(uint32_t id);
103 extern void write_hb_ec_tag_parity(uint32_t id);
104 
105 /*
106  * Spitfire module routines:
107  */
108 static void cpu_async_log_err(void *flt);
109 /*PRINTFLIKE6*/
110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111     uint_t logflags, const char *endstr, const char *fmt, ...);
112 
113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116 
117 static void log_ce_err(struct async_flt *aflt, char *unum);
118 static void log_ue_err(struct async_flt *aflt, char *unum);
119 static void check_misc_err(spitf_async_flt *spf_flt);
120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121 static int check_ecc(struct async_flt *aflt);
122 static uint_t get_cpu_status(uint64_t arg);
123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125 		int *m, uint64_t *afsr);
126 static void ecache_kstat_init(struct cpu *cp);
127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128 		uint64_t paddr, int mpb, uint64_t);
129 static uint64_t ecache_scrub_misc_err(int, uint64_t);
130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131 static void ecache_page_retire(void *);
132 static int ecc_kstat_update(kstat_t *ksp, int rw);
133 static int ce_count_unum(int status, int len, char *unum);
134 static void add_leaky_bucket_timeout(void);
135 static int synd_to_synd_code(int synd_status, ushort_t synd);
136 
137 extern uint_t read_all_memscrub;
138 extern void memscrub_run(void);
139 
140 static uchar_t	isus2i;			/* set if sabre */
141 static uchar_t	isus2e;			/* set if hummingbird */
142 
143 /*
144  * Default ecache mask and shift settings for Spitfire.  If we detect a
145  * different CPU implementation, we will modify these values at boot time.
146  */
147 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157 
158 /*
159  * Default ecache state bits for Spitfire.  These individual bits indicate if
160  * the given line is in any of the valid or modified states, respectively.
161  * Again, we modify these at boot if we detect a different CPU.
162  */
163 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165 static uchar_t cpu_ec_parity		= S_EC_PARITY;
166 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167 
168 /*
169  * This table is used to determine which bit(s) is(are) bad when an ECC
170  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171  * of this array have the following semantics:
172  *
173  *      00-63   The number of the bad bit, when only one bit is bad.
174  *      64      ECC bit C0 is bad.
175  *      65      ECC bit C1 is bad.
176  *      66      ECC bit C2 is bad.
177  *      67      ECC bit C3 is bad.
178  *      68      ECC bit C4 is bad.
179  *      69      ECC bit C5 is bad.
180  *      70      ECC bit C6 is bad.
181  *      71      ECC bit C7 is bad.
182  *      72      Two bits are bad.
183  *      73      Three bits are bad.
184  *      74      Four bits are bad.
185  *      75      More than Four bits are bad.
186  *      76      NO bits are bad.
187  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188  */
189 
190 #define	C0	64
191 #define	C1	65
192 #define	C2	66
193 #define	C3	67
194 #define	C4	68
195 #define	C5	69
196 #define	C6	70
197 #define	C7	71
198 #define	M2	72
199 #define	M3	73
200 #define	M4	74
201 #define	MX	75
202 #define	NA	76
203 
204 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205 						    (synd_code < C0))
206 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207 						    (synd_code <= C7))
208 
209 static char ecc_syndrome_tab[] =
210 {
211 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227 };
228 
229 #define	SYND_TBL_SIZE 256
230 
231 /*
232  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234  */
235 #define	UDBL_REG	0x8000
236 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237 #define	SYND(synd)	(synd & 0x7FFF)
238 
239 /*
240  * These error types are specific to Spitfire and are used internally for the
241  * spitfire fault structure flt_type field.
242  */
243 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245 #define	CPU_WP_ERR		2	/* WP parity error */
246 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259 
260 /*
261  * Macro to access the "Spitfire cpu private" data structure.
262  */
263 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264 
265 /*
266  * set to 0 to disable automatic retiring of pages on
267  * DIMMs that have excessive soft errors
268  */
269 int automatic_page_removal = 1;
270 
271 /*
272  * Heuristic for figuring out which module to replace.
273  * Relative likelihood that this P_SYND indicates that this module is bad.
274  * We call it a "score", though, not a relative likelihood.
275  *
276  * Step 1.
277  * Assign a score to each byte of P_SYND according to the following rules:
278  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279  * If one bit on, give it a 95.
280  * If seven bits on, give it a 10.
281  * If two bits on:
282  *   in different nybbles, a 90
283  *   in same nybble, but unaligned, 85
284  *   in same nybble and as an aligned pair, 80
285  * If six bits on, look at the bits that are off:
286  *   in same nybble and as an aligned pair, 15
287  *   in same nybble, but unaligned, 20
288  *   in different nybbles, a 25
289  * If three bits on:
290  *   in diferent nybbles, no aligned pairs, 75
291  *   in diferent nybbles, one aligned pair, 70
292  *   in the same nybble, 65
293  * If five bits on, look at the bits that are off:
294  *   in the same nybble, 30
295  *   in diferent nybbles, one aligned pair, 35
296  *   in diferent nybbles, no aligned pairs, 40
297  * If four bits on:
298  *   all in one nybble, 45
299  *   as two aligned pairs, 50
300  *   one aligned pair, 55
301  *   no aligned pairs, 60
302  *
303  * Step 2:
304  * Take the higher of the two scores (one for each byte) as the score
305  * for the module.
306  *
307  * Print the score for each module, and field service should replace the
308  * module with the highest score.
309  */
310 
311 /*
312  * In the table below, the first row/column comment indicates the
313  * number of bits on in that nybble; the second row/column comment is
314  * the hex digit.
315  */
316 
317 static int
318 p_synd_score_table[256] = {
319 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337 };
338 
339 int
340 ecc_psynd_score(ushort_t p_synd)
341 {
342 	int i, j, a, b;
343 
344 	i = p_synd & 0xFF;
345 	j = (p_synd >> 8) & 0xFF;
346 
347 	a = p_synd_score_table[i];
348 	b = p_synd_score_table[j];
349 
350 	return (a > b ? a : b);
351 }
352 
353 /*
354  * Async Fault Logging
355  *
356  * To ease identifying, reading, and filtering async fault log messages, the
357  * label [AFT#] is now prepended to each async fault message.  These messages
358  * and the logging rules are implemented by cpu_aflt_log(), below.
359  *
360  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361  *          This includes both corrected ECC memory and ecache faults.
362  *
363  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364  *          else except CE errors) with a priority of 1 (highest).  This tag
365  *          is also used for panic messages that result from an async fault.
366  *
367  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369  *          of the E-$ data and tags.
370  *
371  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372  * printed on the console.  To send all AFT logs to both the log and the
373  * console, set aft_verbose = 1.
374  */
375 
376 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378 #define	CPU_ERRID		0x0004	/* print flt_id */
379 #define	CPU_TL			0x0008	/* print flt_tl */
380 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389 
390 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393 				CPU_FAULTPC)
394 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396 				~CPU_SPACE)
397 #define	PARERR_LFLAGS	(CMN_LFLAGS)
398 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400 				~CPU_FLTCPU & ~CPU_FAULTPC)
401 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402 #define	NO_LFLAGS	(0)
403 
404 #define	AFSR_FMTSTR0	"\020\1ME"
405 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407 #define	UDB_FMTSTR	"\020\012UE\011CE"
408 
409 /*
410  * Save the cache bootup state for use when internal
411  * caches are to be re-enabled after an error occurs.
412  */
413 uint64_t	cache_boot_state = 0;
414 
415 /*
416  * PA[31:0] represent Displacement in UPA configuration space.
417  */
418 uint_t	root_phys_addr_lo_mask = 0xffffffff;
419 
420 /*
421  * Spitfire legacy globals
422  */
423 int	itlb_entries;
424 int	dtlb_entries;
425 
426 void
427 cpu_setup(void)
428 {
429 	extern int page_retire_messages;
430 	extern int page_retire_first_ue;
431 	extern int at_flags;
432 #if defined(SF_ERRATA_57)
433 	extern caddr_t errata57_limit;
434 #endif
435 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
436 
437 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
438 
439 	/*
440 	 * Spitfire isn't currently FMA-aware, so we have to enable the
441 	 * page retirement messages. We also change the default policy
442 	 * for UE retirement to allow clearing of transient errors.
443 	 */
444 	page_retire_messages = 1;
445 	page_retire_first_ue = 0;
446 
447 	/*
448 	 * save the cache bootup state.
449 	 */
450 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
451 
452 	if (use_page_coloring) {
453 		do_pg_coloring = 1;
454 	}
455 
456 	/*
457 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
458 	 */
459 	pp_slots = MIN(8, MAXPP_SLOTS);
460 
461 	/*
462 	 * Block stores invalidate all pages of the d$ so pagecopy
463 	 * et. al. do not need virtual translations with virtual
464 	 * coloring taken into consideration.
465 	 */
466 	pp_consistent_coloring = 0;
467 
468 	isa_list =
469 	    "sparcv9+vis sparcv9 "
470 	    "sparcv8plus+vis sparcv8plus "
471 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
472 
473 	cpu_hwcap_flags = AV_SPARC_VIS;
474 
475 	/*
476 	 * On Spitfire, there's a hole in the address space
477 	 * that we must never map (the hardware only support 44-bits of
478 	 * virtual address).  Later CPUs are expected to have wider
479 	 * supported address ranges.
480 	 *
481 	 * See address map on p23 of the UltraSPARC 1 user's manual.
482 	 */
483 	hole_start = (caddr_t)0x80000000000ull;
484 	hole_end = (caddr_t)0xfffff80000000000ull;
485 
486 	/*
487 	 * A spitfire call bug requires us to be a further 4Gbytes of
488 	 * firewall from the spec.
489 	 *
490 	 * See Spitfire Errata #21
491 	 */
492 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
493 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
494 
495 	/*
496 	 * The kpm mapping window.
497 	 * kpm_size:
498 	 *	The size of a single kpm range.
499 	 *	The overall size will be: kpm_size * vac_colors.
500 	 * kpm_vbase:
501 	 *	The virtual start address of the kpm range within the kernel
502 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
503 	 */
504 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
505 	kpm_size_shift = 41;
506 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
507 
508 #if defined(SF_ERRATA_57)
509 	errata57_limit = (caddr_t)0x80000000ul;
510 #endif
511 
512 	/*
513 	 * Disable text by default.
514 	 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
515 	 */
516 	max_utext_lpsize = MMU_PAGESIZE;
517 }
518 
519 static int
520 getintprop(pnode_t node, char *name, int deflt)
521 {
522 	int	value;
523 
524 	switch (prom_getproplen(node, name)) {
525 	case 0:
526 		value = 1;	/* boolean properties */
527 		break;
528 
529 	case sizeof (int):
530 		(void) prom_getprop(node, name, (caddr_t)&value);
531 		break;
532 
533 	default:
534 		value = deflt;
535 		break;
536 	}
537 
538 	return (value);
539 }
540 
541 /*
542  * Set the magic constants of the implementation.
543  */
544 void
545 cpu_fiximp(pnode_t dnode)
546 {
547 	extern int vac_size, vac_shift;
548 	extern uint_t vac_mask;
549 	extern int dcache_line_mask;
550 	int i, a;
551 	static struct {
552 		char	*name;
553 		int	*var;
554 	} prop[] = {
555 		"dcache-size",		&dcache_size,
556 		"dcache-line-size",	&dcache_linesize,
557 		"icache-size",		&icache_size,
558 		"icache-line-size",	&icache_linesize,
559 		"ecache-size",		&ecache_size,
560 		"ecache-line-size",	&ecache_alignsize,
561 		"ecache-associativity", &ecache_associativity,
562 		"#itlb-entries",	&itlb_entries,
563 		"#dtlb-entries",	&dtlb_entries,
564 		};
565 
566 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
567 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
568 			*prop[i].var = a;
569 		}
570 	}
571 
572 	ecache_setsize = ecache_size / ecache_associativity;
573 
574 	vac_size = S_VAC_SIZE;
575 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
576 	i = 0; a = vac_size;
577 	while (a >>= 1)
578 		++i;
579 	vac_shift = i;
580 	shm_alignment = vac_size;
581 	vac = 1;
582 
583 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
584 
585 	/*
586 	 * UltraSPARC I & II have ecache sizes running
587 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
588 	 * and 8 MB. Adjust the copyin/copyout limits
589 	 * according to the cache size. The magic number
590 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
591 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
592 	 * VIS instructions.
593 	 *
594 	 * We assume that all CPUs on the system have the same size
595 	 * ecache. We're also called very early in the game.
596 	 * /etc/system will be parsed *after* we're called so
597 	 * these values can be overwritten.
598 	 */
599 
600 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
601 	if (ecache_size <= 524288) {
602 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
603 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
604 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
605 	} else if (ecache_size == 1048576) {
606 		hw_copy_limit_2 = 1024;
607 		hw_copy_limit_4 = 1280;
608 		hw_copy_limit_8 = 1536;
609 	} else if (ecache_size == 2097152) {
610 		hw_copy_limit_2 = 1536;
611 		hw_copy_limit_4 = 2048;
612 		hw_copy_limit_8 = 2560;
613 	} else if (ecache_size == 4194304) {
614 		hw_copy_limit_2 = 2048;
615 		hw_copy_limit_4 = 2560;
616 		hw_copy_limit_8 = 3072;
617 	} else {
618 		hw_copy_limit_2 = 2560;
619 		hw_copy_limit_4 = 3072;
620 		hw_copy_limit_8 = 3584;
621 	}
622 }
623 
624 /*
625  * Called by setcpudelay
626  */
627 void
628 cpu_init_tick_freq(void)
629 {
630 	/*
631 	 * Determine the cpu frequency by calling
632 	 * tod_get_cpufrequency. Use an approximate freqency
633 	 * value computed by the prom if the tod module
634 	 * is not initialized and loaded yet.
635 	 */
636 	if (tod_ops.tod_get_cpufrequency != NULL) {
637 		mutex_enter(&tod_lock);
638 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
639 		mutex_exit(&tod_lock);
640 	} else {
641 #if defined(HUMMINGBIRD)
642 		/*
643 		 * the hummingbird version of %stick is used as the basis for
644 		 * low level timing; this provides an independent constant-rate
645 		 * clock for general system use, and frees power mgmt to set
646 		 * various cpu clock speeds.
647 		 */
648 		if (system_clock_freq == 0)
649 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
650 			    system_clock_freq);
651 		sys_tick_freq = system_clock_freq;
652 #else /* SPITFIRE */
653 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
654 #endif
655 	}
656 }
657 
658 
659 void shipit(int upaid);
660 extern uint64_t xc_tick_limit;
661 extern uint64_t xc_tick_jump_limit;
662 
663 #ifdef SEND_MONDO_STATS
664 uint64_t x_early[NCPU][64];
665 #endif
666 
667 /*
668  * Note: A version of this function is used by the debugger via the KDI,
669  * and must be kept in sync with this version.  Any changes made to this
670  * function to support new chips or to accomodate errata must also be included
671  * in the KDI-specific version.  See spitfire_kdi.c.
672  */
673 void
674 send_one_mondo(int cpuid)
675 {
676 	uint64_t idsr, starttick, endtick;
677 	int upaid, busy, nack;
678 	uint64_t tick, tick_prev;
679 	ulong_t ticks;
680 
681 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
682 	upaid = CPUID_TO_UPAID(cpuid);
683 	tick = starttick = gettick();
684 	shipit(upaid);
685 	endtick = starttick + xc_tick_limit;
686 	busy = nack = 0;
687 	for (;;) {
688 		idsr = getidsr();
689 		if (idsr == 0)
690 			break;
691 		/*
692 		 * When we detect an irregular tick jump, we adjust
693 		 * the timer window to the current tick value.
694 		 */
695 		tick_prev = tick;
696 		tick = gettick();
697 		ticks = tick - tick_prev;
698 		if (ticks > xc_tick_jump_limit) {
699 			endtick = tick + xc_tick_limit;
700 		} else if (tick > endtick) {
701 			if (panic_quiesce)
702 				return;
703 			cmn_err(CE_PANIC,
704 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
705 			upaid, nack, busy);
706 		}
707 		if (idsr & IDSR_BUSY) {
708 			busy++;
709 			continue;
710 		}
711 		drv_usecwait(1);
712 		shipit(upaid);
713 		nack++;
714 		busy = 0;
715 	}
716 #ifdef SEND_MONDO_STATS
717 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
718 #endif
719 }
720 
721 void
722 send_mondo_set(cpuset_t set)
723 {
724 	int i;
725 
726 	for (i = 0; i < NCPU; i++)
727 		if (CPU_IN_SET(set, i)) {
728 			send_one_mondo(i);
729 			CPUSET_DEL(set, i);
730 			if (CPUSET_ISNULL(set))
731 				break;
732 		}
733 }
734 
735 void
736 syncfpu(void)
737 {
738 }
739 
740 /*
741  * Determine the size of the CPU module's error structure in bytes.  This is
742  * called once during boot to initialize the error queues.
743  */
744 int
745 cpu_aflt_size(void)
746 {
747 	/*
748 	 * We need to determine whether this is a sabre, Hummingbird or a
749 	 * Spitfire/Blackbird impl and set the appropriate state variables for
750 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
751 	 * too early in the boot flow and the cpunodes are not initialized.
752 	 * This routine will be called once after cpunodes[] is ready, so do
753 	 * it here.
754 	 */
755 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
756 		isus2i = 1;
757 		cpu_ec_tag_mask = SB_ECTAG_MASK;
758 		cpu_ec_state_mask = SB_ECSTATE_MASK;
759 		cpu_ec_par_mask = SB_ECPAR_MASK;
760 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
761 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
762 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
763 		cpu_ec_state_exl = SB_ECSTATE_EXL;
764 		cpu_ec_state_mod = SB_ECSTATE_MOD;
765 
766 		/* These states do not exist in sabre - set to 0xFF */
767 		cpu_ec_state_shr = 0xFF;
768 		cpu_ec_state_own = 0xFF;
769 
770 		cpu_ec_state_valid = SB_ECSTATE_VALID;
771 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
772 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
773 		cpu_ec_parity = SB_EC_PARITY;
774 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
775 		isus2e = 1;
776 		cpu_ec_tag_mask = HB_ECTAG_MASK;
777 		cpu_ec_state_mask = HB_ECSTATE_MASK;
778 		cpu_ec_par_mask = HB_ECPAR_MASK;
779 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
780 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
781 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
782 		cpu_ec_state_exl = HB_ECSTATE_EXL;
783 		cpu_ec_state_mod = HB_ECSTATE_MOD;
784 
785 		/* These states do not exist in hummingbird - set to 0xFF */
786 		cpu_ec_state_shr = 0xFF;
787 		cpu_ec_state_own = 0xFF;
788 
789 		cpu_ec_state_valid = HB_ECSTATE_VALID;
790 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
791 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
792 		cpu_ec_parity = HB_EC_PARITY;
793 	}
794 
795 	return (sizeof (spitf_async_flt));
796 }
797 
798 
799 /*
800  * Correctable ecc error trap handler
801  */
802 /*ARGSUSED*/
803 void
804 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
805 	uint_t p_afsr_high, uint_t p_afar_high)
806 {
807 	ushort_t sdbh, sdbl;
808 	ushort_t e_syndh, e_syndl;
809 	spitf_async_flt spf_flt;
810 	struct async_flt *ecc;
811 	int queue = 1;
812 
813 	uint64_t t_afar = p_afar;
814 	uint64_t t_afsr = p_afsr;
815 
816 	/*
817 	 * Note: the Spitfire data buffer error registers
818 	 * (upper and lower halves) are or'ed into the upper
819 	 * word of the afsr by ce_err().
820 	 */
821 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
822 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
823 
824 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
825 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
826 
827 	t_afsr &= S_AFSR_MASK;
828 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
829 
830 	/* Setup the async fault structure */
831 	bzero(&spf_flt, sizeof (spitf_async_flt));
832 	ecc = (struct async_flt *)&spf_flt;
833 	ecc->flt_id = gethrtime_waitfree();
834 	ecc->flt_stat = t_afsr;
835 	ecc->flt_addr = t_afar;
836 	ecc->flt_status = ECC_C_TRAP;
837 	ecc->flt_bus_id = getprocessorid();
838 	ecc->flt_inst = CPU->cpu_id;
839 	ecc->flt_pc = (caddr_t)rp->r_pc;
840 	ecc->flt_func = log_ce_err;
841 	ecc->flt_in_memory =
842 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
843 	spf_flt.flt_sdbh = sdbh;
844 	spf_flt.flt_sdbl = sdbl;
845 
846 	/*
847 	 * Check for fatal conditions.
848 	 */
849 	check_misc_err(&spf_flt);
850 
851 	/*
852 	 * Pananoid checks for valid AFSR and UDBs
853 	 */
854 	if ((t_afsr & P_AFSR_CE) == 0) {
855 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
856 			"** Panic due to CE bit not set in the AFSR",
857 			"  Corrected Memory Error on");
858 	}
859 
860 	/*
861 	 * We want to skip logging only if ALL the following
862 	 * conditions are true:
863 	 *
864 	 *	1. There is only one error
865 	 *	2. That error is a correctable memory error
866 	 *	3. The error is caused by the memory scrubber (in which case
867 	 *	    the error will have occurred under on_trap protection)
868 	 *	4. The error is on a retired page
869 	 *
870 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
871 	 * However, none of those errors should occur on a retired page.
872 	 */
873 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
874 	    curthread->t_ontrap != NULL) {
875 
876 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
877 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
878 				queue = 0;
879 			}
880 		}
881 	}
882 
883 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
884 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
885 			"** Panic due to CE bits not set in the UDBs",
886 			" Corrected Memory Error on");
887 	}
888 
889 	if ((sdbh >> 8) & 1) {
890 		ecc->flt_synd = e_syndh;
891 		ce_scrub(ecc);
892 		if (queue) {
893 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
894 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
895 		}
896 	}
897 
898 	if ((sdbl >> 8) & 1) {
899 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
900 		ecc->flt_synd = e_syndl | UDBL_REG;
901 		ce_scrub(ecc);
902 		if (queue) {
903 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
904 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
905 		}
906 	}
907 
908 	/*
909 	 * Re-enable all error trapping (CEEN currently cleared).
910 	 */
911 	clr_datapath();
912 	set_asyncflt(P_AFSR_CE);
913 	set_error_enable(EER_ENABLE);
914 }
915 
916 /*
917  * Cpu specific CE logging routine
918  */
919 static void
920 log_ce_err(struct async_flt *aflt, char *unum)
921 {
922 	spitf_async_flt spf_flt;
923 
924 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
925 		return;
926 	}
927 
928 	spf_flt.cmn_asyncflt = *aflt;
929 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
930 	    " Corrected Memory Error detected by");
931 }
932 
933 /*
934  * Spitfire does not perform any further CE classification refinement
935  */
936 /*ARGSUSED*/
937 int
938 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
939     size_t afltoffset)
940 {
941 	return (0);
942 }
943 
944 char *
945 flt_to_error_type(struct async_flt *aflt)
946 {
947 	if (aflt->flt_status & ECC_INTERMITTENT)
948 		return (ERR_TYPE_DESC_INTERMITTENT);
949 	if (aflt->flt_status & ECC_PERSISTENT)
950 		return (ERR_TYPE_DESC_PERSISTENT);
951 	if (aflt->flt_status & ECC_STICKY)
952 		return (ERR_TYPE_DESC_STICKY);
953 	return (ERR_TYPE_DESC_UNKNOWN);
954 }
955 
956 /*
957  * Called by correctable ecc error logging code to print out
958  * the stick/persistent/intermittent status of the error.
959  */
960 static void
961 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
962 {
963 	ushort_t status;
964 	char *status1_str = "Memory";
965 	char *status2_str = "Intermittent";
966 	struct async_flt *aflt = (struct async_flt *)spf_flt;
967 
968 	status = aflt->flt_status;
969 
970 	if (status & ECC_ECACHE)
971 		status1_str = "Ecache";
972 
973 	if (status & ECC_STICKY)
974 		status2_str = "Sticky";
975 	else if (status & ECC_PERSISTENT)
976 		status2_str = "Persistent";
977 
978 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
979 		NULL, " Corrected %s Error on %s is %s",
980 		status1_str, unum, status2_str);
981 }
982 
983 /*
984  * check for a valid ce syndrome, then call the
985  * displacement flush scrubbing code, and then check the afsr to see if
986  * the error was persistent or intermittent. Reread the afar/afsr to see
987  * if the error was not scrubbed successfully, and is therefore sticky.
988  */
989 /*ARGSUSED1*/
990 void
991 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
992 {
993 	uint64_t eer, afsr;
994 	ushort_t status;
995 
996 	ASSERT(getpil() > LOCK_LEVEL);
997 
998 	/*
999 	 * It is possible that the flt_addr is not a valid
1000 	 * physical address. To deal with this, we disable
1001 	 * NCEEN while we scrub that address. If this causes
1002 	 * a TIMEOUT/BERR, we know this is an invalid
1003 	 * memory location.
1004 	 */
1005 	kpreempt_disable();
1006 	eer = get_error_enable();
1007 	if (eer & (EER_CEEN | EER_NCEEN))
1008 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1009 
1010 	/*
1011 	 * To check if the error detected by IO is persistent, sticky or
1012 	 * intermittent.
1013 	 */
1014 	if (ecc->flt_status & ECC_IOBUS) {
1015 		ecc->flt_stat = P_AFSR_CE;
1016 	}
1017 
1018 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1019 	    cpunodes[CPU->cpu_id].ecache_size);
1020 
1021 	get_asyncflt(&afsr);
1022 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1023 		/*
1024 		 * Must ensure that we don't get the TIMEOUT/BERR
1025 		 * when we reenable NCEEN, so we clear the AFSR.
1026 		 */
1027 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1028 		if (eer & (EER_CEEN | EER_NCEEN))
1029 		    set_error_enable(eer);
1030 		kpreempt_enable();
1031 		return;
1032 	}
1033 
1034 	if (eer & EER_NCEEN)
1035 	    set_error_enable(eer & ~EER_CEEN);
1036 
1037 	/*
1038 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1039 	 * not trip over the error, mark it intermittent.  If the scrub did
1040 	 * trip the error again and it did not scrub away, mark it sticky.
1041 	 * Otherwise mark it persistent.
1042 	 */
1043 	if (check_ecc(ecc) != 0) {
1044 		cpu_read_paddr(ecc, 0, 1);
1045 
1046 		if (check_ecc(ecc) != 0)
1047 			status = ECC_STICKY;
1048 		else
1049 			status = ECC_PERSISTENT;
1050 	} else
1051 		status = ECC_INTERMITTENT;
1052 
1053 	if (eer & (EER_CEEN | EER_NCEEN))
1054 	    set_error_enable(eer);
1055 	kpreempt_enable();
1056 
1057 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1058 	ecc->flt_status |= status;
1059 }
1060 
1061 /*
1062  * get the syndrome and unum, and then call the routines
1063  * to check the other cpus and iobuses, and then do the error logging.
1064  */
1065 /*ARGSUSED1*/
1066 void
1067 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1068 {
1069 	char unum[UNUM_NAMLEN];
1070 	int len = 0;
1071 	int ce_verbose = 0;
1072 	int err;
1073 
1074 	ASSERT(ecc->flt_func != NULL);
1075 
1076 	/* Get the unum string for logging purposes */
1077 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1078 	    UNUM_NAMLEN, &len);
1079 
1080 	/* Call specific error logging routine */
1081 	(void) (*ecc->flt_func)(ecc, unum);
1082 
1083 	/*
1084 	 * Count errors per unum.
1085 	 * Non-memory errors are all counted via a special unum string.
1086 	 */
1087 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1088 	    automatic_page_removal) {
1089 		(void) page_retire(ecc->flt_addr, err);
1090 	}
1091 
1092 	if (ecc->flt_panic) {
1093 		ce_verbose = 1;
1094 	} else if ((ecc->flt_class == BUS_FAULT) ||
1095 	    (ecc->flt_stat & P_AFSR_CE)) {
1096 		ce_verbose = (ce_verbose_memory > 0);
1097 	} else {
1098 		ce_verbose = 1;
1099 	}
1100 
1101 	if (ce_verbose) {
1102 		spitf_async_flt sflt;
1103 		int synd_code;
1104 
1105 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1106 
1107 		cpu_ce_log_status(&sflt, unum);
1108 
1109 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1110 				SYND(ecc->flt_synd));
1111 
1112 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1113 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1114 			    NULL, " ECC Data Bit %2d was in error "
1115 			    "and corrected", synd_code);
1116 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1117 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1118 			    NULL, " ECC Check Bit %2d was in error "
1119 			    "and corrected", synd_code - C0);
1120 		} else {
1121 			/*
1122 			 * These are UE errors - we shouldn't be getting CE
1123 			 * traps for these; handle them in case of bad h/w.
1124 			 */
1125 			switch (synd_code) {
1126 			case M2:
1127 				cpu_aflt_log(CE_CONT, 0, &sflt,
1128 				    CPU_ERRID_FIRST, NULL,
1129 				    " Two ECC Bits were in error");
1130 				break;
1131 			case M3:
1132 				cpu_aflt_log(CE_CONT, 0, &sflt,
1133 				    CPU_ERRID_FIRST, NULL,
1134 				    " Three ECC Bits were in error");
1135 				break;
1136 			case M4:
1137 				cpu_aflt_log(CE_CONT, 0, &sflt,
1138 				    CPU_ERRID_FIRST, NULL,
1139 				    " Four ECC Bits were in error");
1140 				break;
1141 			case MX:
1142 				cpu_aflt_log(CE_CONT, 0, &sflt,
1143 				    CPU_ERRID_FIRST, NULL,
1144 				    " More than Four ECC bits were "
1145 				    "in error");
1146 				break;
1147 			default:
1148 				cpu_aflt_log(CE_CONT, 0, &sflt,
1149 				    CPU_ERRID_FIRST, NULL,
1150 				    " Unknown fault syndrome %d",
1151 				    synd_code);
1152 				break;
1153 			}
1154 		}
1155 	}
1156 
1157 	/* Display entire cache line, if valid address */
1158 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1159 		read_ecc_data(ecc, 1, 1);
1160 }
1161 
1162 /*
1163  * We route all errors through a single switch statement.
1164  */
1165 void
1166 cpu_ue_log_err(struct async_flt *aflt)
1167 {
1168 
1169 	switch (aflt->flt_class) {
1170 	case CPU_FAULT:
1171 		cpu_async_log_err(aflt);
1172 		break;
1173 
1174 	case BUS_FAULT:
1175 		bus_async_log_err(aflt);
1176 		break;
1177 
1178 	default:
1179 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1180 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1181 		break;
1182 	}
1183 }
1184 
1185 /* Values for action variable in cpu_async_error() */
1186 #define	ACTION_NONE		0
1187 #define	ACTION_TRAMPOLINE	1
1188 #define	ACTION_AST_FLAGS	2
1189 
1190 /*
1191  * Access error trap handler for asynchronous cpu errors.  This routine is
1192  * called to handle a data or instruction access error.  All fatal errors are
1193  * completely handled by this routine (by panicking).  Non fatal error logging
1194  * is queued for later processing either via AST or softint at a lower PIL.
1195  * In case of panic, the error log queue will also be processed as part of the
1196  * panic flow to ensure all errors are logged.  This routine is called with all
1197  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1198  * error bits are also cleared.  The hardware has also disabled the I and
1199  * D-caches for us, so we must re-enable them before returning.
1200  *
1201  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1202  *
1203  *		_______________________________________________________________
1204  *		|        Privileged tl0		|         Unprivileged	      |
1205  *		| Protected	| Unprotected	| Protected	| Unprotected |
1206  *		|on_trap|lofault|		|		|	      |
1207  * -------------|-------|-------+---------------+---------------+-------------|
1208  *		|	|	|		|		|	      |
1209  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1210  *		|	|	|		|		|	      |
1211  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1212  *		|	|	|		|		|	      |
1213  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1214  *		|	|	|		|		|	      |
1215  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1216  * ____________________________________________________________________________
1217  *
1218  *
1219  * Action codes:
1220  *
1221  * L - log
1222  * M - kick off memscrubber if flt_in_memory
1223  * P - panic
1224  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1225  * R - i)  if aft_panic is set, panic
1226  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1227  * S - send SIGBUS to process
1228  * T - trampoline
1229  *
1230  * Special cases:
1231  *
1232  * 1) if aft_testfatal is set, all faults result in a panic regardless
1233  *    of type (even WP), protection (even on_trap), or privilege.
1234  */
1235 /*ARGSUSED*/
1236 void
1237 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1238 	uint_t p_afsr_high, uint_t p_afar_high)
1239 {
1240 	ushort_t sdbh, sdbl, ttype, tl;
1241 	spitf_async_flt spf_flt;
1242 	struct async_flt *aflt;
1243 	char pr_reason[28];
1244 	uint64_t oafsr;
1245 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1246 	int action = ACTION_NONE;
1247 	uint64_t t_afar = p_afar;
1248 	uint64_t t_afsr = p_afsr;
1249 	int expected = DDI_FM_ERR_UNEXPECTED;
1250 	ddi_acc_hdl_t *hp;
1251 
1252 	/*
1253 	 * We need to look at p_flag to determine if the thread detected an
1254 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1255 	 * because we just need a consistent snapshot and we know that everyone
1256 	 * else will store a consistent set of bits while holding p_lock.  We
1257 	 * don't have to worry about a race because SDOCORE is set once prior
1258 	 * to doing i/o from the process's address space and is never cleared.
1259 	 */
1260 	uint_t pflag = ttoproc(curthread)->p_flag;
1261 
1262 	pr_reason[0] = '\0';
1263 
1264 	/*
1265 	 * Note: the Spitfire data buffer error registers
1266 	 * (upper and lower halves) are or'ed into the upper
1267 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1268 	 */
1269 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1270 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1271 
1272 	/*
1273 	 * Grab the ttype encoded in <63:53> of the saved
1274 	 * afsr passed from async_err()
1275 	 */
1276 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1277 	tl = (ushort_t)(t_afsr >> 62);
1278 
1279 	t_afsr &= S_AFSR_MASK;
1280 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1281 
1282 	/*
1283 	 * Initialize most of the common and CPU-specific structure.  We derive
1284 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1285 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1286 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1287 	 * tuneable aft_testfatal is set (not the default).
1288 	 */
1289 	bzero(&spf_flt, sizeof (spitf_async_flt));
1290 	aflt = (struct async_flt *)&spf_flt;
1291 	aflt->flt_id = gethrtime_waitfree();
1292 	aflt->flt_stat = t_afsr;
1293 	aflt->flt_addr = t_afar;
1294 	aflt->flt_bus_id = getprocessorid();
1295 	aflt->flt_inst = CPU->cpu_id;
1296 	aflt->flt_pc = (caddr_t)rp->r_pc;
1297 	aflt->flt_prot = AFLT_PROT_NONE;
1298 	aflt->flt_class = CPU_FAULT;
1299 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1300 	aflt->flt_tl = (uchar_t)tl;
1301 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1302 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1303 
1304 	/*
1305 	 * Set flt_status based on the trap type.  If we end up here as the
1306 	 * result of a UE detected by the CE handling code, leave status 0.
1307 	 */
1308 	switch (ttype) {
1309 	case T_DATA_ERROR:
1310 		aflt->flt_status = ECC_D_TRAP;
1311 		break;
1312 	case T_INSTR_ERROR:
1313 		aflt->flt_status = ECC_I_TRAP;
1314 		break;
1315 	}
1316 
1317 	spf_flt.flt_sdbh = sdbh;
1318 	spf_flt.flt_sdbl = sdbl;
1319 
1320 	/*
1321 	 * Check for fatal async errors.
1322 	 */
1323 	check_misc_err(&spf_flt);
1324 
1325 	/*
1326 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1327 	 * see if we were executing in the kernel under on_trap() or t_lofault
1328 	 * protection.  If so, modify the saved registers so that we return
1329 	 * from the trap to the appropriate trampoline routine.
1330 	 */
1331 	if (aflt->flt_priv && tl == 0) {
1332 		if (curthread->t_ontrap != NULL) {
1333 			on_trap_data_t *otp = curthread->t_ontrap;
1334 
1335 			if (otp->ot_prot & OT_DATA_EC) {
1336 				aflt->flt_prot = AFLT_PROT_EC;
1337 				otp->ot_trap |= OT_DATA_EC;
1338 				rp->r_pc = otp->ot_trampoline;
1339 				rp->r_npc = rp->r_pc + 4;
1340 				action = ACTION_TRAMPOLINE;
1341 			}
1342 
1343 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1344 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1345 				aflt->flt_prot = AFLT_PROT_ACCESS;
1346 				otp->ot_trap |= OT_DATA_ACCESS;
1347 				rp->r_pc = otp->ot_trampoline;
1348 				rp->r_npc = rp->r_pc + 4;
1349 				action = ACTION_TRAMPOLINE;
1350 				/*
1351 				 * for peeks and caut_gets errors are expected
1352 				 */
1353 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1354 				if (!hp)
1355 					expected = DDI_FM_ERR_PEEK;
1356 				else if (hp->ah_acc.devacc_attr_access ==
1357 				    DDI_CAUTIOUS_ACC)
1358 					expected = DDI_FM_ERR_EXPECTED;
1359 			}
1360 
1361 		} else if (curthread->t_lofault) {
1362 			aflt->flt_prot = AFLT_PROT_COPY;
1363 			rp->r_g1 = EFAULT;
1364 			rp->r_pc = curthread->t_lofault;
1365 			rp->r_npc = rp->r_pc + 4;
1366 			action = ACTION_TRAMPOLINE;
1367 		}
1368 	}
1369 
1370 	/*
1371 	 * Determine if this error needs to be treated as fatal.  Note that
1372 	 * multiple errors detected upon entry to this trap handler does not
1373 	 * necessarily warrant a panic.  We only want to panic if the trap
1374 	 * happened in privileged mode and not under t_ontrap or t_lofault
1375 	 * protection.  The exception is WP: if we *only* get WP, it is not
1376 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1377 	 *
1378 	 * aft_panic, if set, effectively makes us treat usermode
1379 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1380 	 * panic instead of sending a contract event.  A lofault-protected
1381 	 * fault will normally follow the contract event; if aft_panic is
1382 	 * set this will be changed to a panic.
1383 	 *
1384 	 * For usermode BERR/BTO errors, eg from processes performing device
1385 	 * control through mapped device memory, we need only deliver
1386 	 * a SIGBUS to the offending process.
1387 	 *
1388 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1389 	 * checked later; for now we implement the common reasons.
1390 	 */
1391 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1392 		/*
1393 		 * Beware - multiple bits may be set in AFSR
1394 		 */
1395 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1396 			if (aflt->flt_priv || aft_panic)
1397 				aflt->flt_panic = 1;
1398 		}
1399 
1400 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1401 			if (aflt->flt_priv)
1402 				aflt->flt_panic = 1;
1403 		}
1404 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1405 		aflt->flt_panic = 1;
1406 	}
1407 
1408 	/*
1409 	 * UE/BERR/TO: Call our bus nexus friends to check for
1410 	 * IO errors that may have resulted in this trap.
1411 	 */
1412 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1413 		cpu_run_bus_error_handlers(aflt, expected);
1414 	}
1415 
1416 	/*
1417 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1418 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1419 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1420 	 * caches may introduce more parity errors (especially when the module
1421 	 * is bad) and in sabre there is no guarantee that such errors
1422 	 * (if introduced) are written back as poisoned data.
1423 	 */
1424 	if (t_afsr & P_AFSR_UE) {
1425 		int i;
1426 
1427 		(void) strcat(pr_reason, "UE ");
1428 
1429 		spf_flt.flt_type = CPU_UE_ERR;
1430 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1431 			MMU_PAGESHIFT)) ? 1: 0;
1432 
1433 		/*
1434 		 * With UE, we have the PA of the fault.
1435 		 * Let do a diagnostic read to get the ecache
1436 		 * data and tag info of the bad line for logging.
1437 		 */
1438 		if (aflt->flt_in_memory) {
1439 			uint32_t ec_set_size;
1440 			uchar_t state;
1441 			uint32_t ecache_idx;
1442 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1443 
1444 			/* touch the line to put it in ecache */
1445 			acc_afsr |= read_and_clear_afsr();
1446 			(void) lddphys(faultpa);
1447 			acc_afsr |= (read_and_clear_afsr() &
1448 				    ~(P_AFSR_EDP | P_AFSR_UE));
1449 
1450 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1451 			    ecache_associativity;
1452 
1453 			for (i = 0; i < ecache_associativity; i++) {
1454 				ecache_idx = i * ec_set_size +
1455 				    (aflt->flt_addr % ec_set_size);
1456 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1457 					(uint64_t *)&spf_flt.flt_ec_data[0],
1458 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1459 				acc_afsr |= oafsr;
1460 
1461 				state = (uchar_t)((spf_flt.flt_ec_tag &
1462 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1463 
1464 				if ((state & cpu_ec_state_valid) &&
1465 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1466 				    ((uint64_t)aflt->flt_addr >>
1467 				    cpu_ec_tag_shift)))
1468 					break;
1469 			}
1470 
1471 			/*
1472 			 * Check to see if the ecache tag is valid for the
1473 			 * fault PA. In the very unlikely event where the
1474 			 * line could be victimized, no ecache info will be
1475 			 * available. If this is the case, capture the line
1476 			 * from memory instead.
1477 			 */
1478 			if ((state & cpu_ec_state_valid) == 0 ||
1479 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1480 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1481 				for (i = 0; i < 8; i++, faultpa += 8) {
1482 				    ec_data_t *ecdptr;
1483 
1484 					ecdptr = &spf_flt.flt_ec_data[i];
1485 					acc_afsr |= read_and_clear_afsr();
1486 					ecdptr->ec_d8 = lddphys(faultpa);
1487 					acc_afsr |= (read_and_clear_afsr() &
1488 						    ~(P_AFSR_EDP | P_AFSR_UE));
1489 					ecdptr->ec_afsr = 0;
1490 							/* null afsr value */
1491 				}
1492 
1493 				/*
1494 				 * Mark tag invalid to indicate mem dump
1495 				 * when we print out the info.
1496 				 */
1497 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1498 			}
1499 			spf_flt.flt_ec_lcnt = 1;
1500 
1501 			/*
1502 			 * Flush out the bad line
1503 			 */
1504 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1505 				cpunodes[CPU->cpu_id].ecache_size);
1506 
1507 			acc_afsr |= clear_errors(NULL, NULL);
1508 		}
1509 
1510 		/*
1511 		 * Ask our bus nexus friends if they have any fatal errors. If
1512 		 * so, they will log appropriate error messages and panic as a
1513 		 * result. We then queue an event for each UDB that reports a
1514 		 * UE. Each UE reported in a UDB will have its own log message.
1515 		 *
1516 		 * Note from kbn: In the case where there are multiple UEs
1517 		 * (ME bit is set) - the AFAR address is only accurate to
1518 		 * the 16-byte granularity. One cannot tell whether the AFAR
1519 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1520 		 * always report the AFAR address to be 16-byte aligned.
1521 		 *
1522 		 * If we're on a Sabre, there is no SDBL, but it will always
1523 		 * read as zero, so the sdbl test below will safely fail.
1524 		 */
1525 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1526 			aflt->flt_panic = 1;
1527 
1528 		if (sdbh & P_DER_UE) {
1529 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1530 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1531 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1532 			    aflt->flt_panic);
1533 		}
1534 		if (sdbl & P_DER_UE) {
1535 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1536 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1537 			if (!(aflt->flt_stat & P_AFSR_ME))
1538 				aflt->flt_addr |= 0x8;
1539 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1540 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1541 			    aflt->flt_panic);
1542 		}
1543 
1544 		/*
1545 		 * We got a UE and are panicking, save the fault PA in a known
1546 		 * location so that the platform specific panic code can check
1547 		 * for copyback errors.
1548 		 */
1549 		if (aflt->flt_panic && aflt->flt_in_memory) {
1550 			panic_aflt = *aflt;
1551 		}
1552 	}
1553 
1554 	/*
1555 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1556 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1557 	 */
1558 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1559 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1560 
1561 		if (t_afsr & P_AFSR_EDP)
1562 			(void) strcat(pr_reason, "EDP ");
1563 
1564 		if (t_afsr & P_AFSR_LDP)
1565 			(void) strcat(pr_reason, "LDP ");
1566 
1567 		/*
1568 		 * Here we have no PA to work with.
1569 		 * Scan each line in the ecache to look for
1570 		 * the one with bad parity.
1571 		 */
1572 		aflt->flt_addr = AFLT_INV_ADDR;
1573 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1574 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1575 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1576 
1577 		/*
1578 		 * If we found a bad PA, update the state to indicate if it is
1579 		 * memory or I/O space.  This code will be important if we ever
1580 		 * support cacheable frame buffers.
1581 		 */
1582 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1583 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1584 				MMU_PAGESHIFT)) ? 1 : 0;
1585 		}
1586 
1587 		if (isus2i || isus2e)
1588 			aflt->flt_panic = 1;
1589 
1590 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1591 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1592 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1593 		    aflt->flt_panic);
1594 	}
1595 
1596 	/*
1597 	 * Timeout and bus error handling.  There are two cases to consider:
1598 	 *
1599 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1600 	 * have already modified the saved registers so that we will return
1601 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1602 	 *
1603 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1604 	 * a SIGBUS.  We do not log the occurence - processes performing
1605 	 * device control would generate lots of uninteresting messages.
1606 	 */
1607 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1608 		if (t_afsr & P_AFSR_TO)
1609 			(void) strcat(pr_reason, "BTO ");
1610 
1611 		if (t_afsr & P_AFSR_BERR)
1612 			(void) strcat(pr_reason, "BERR ");
1613 
1614 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1615 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1616 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1617 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1618 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1619 			    aflt->flt_panic);
1620 		}
1621 	}
1622 
1623 	/*
1624 	 * Handle WP: WP happens when the ecache is victimized and a parity
1625 	 * error was detected on a writeback.  The data in question will be
1626 	 * poisoned as a UE will be written back.  The PA is not logged and
1627 	 * it is possible that it doesn't belong to the trapped thread.  The
1628 	 * WP trap is not fatal, but it could be fatal to someone that
1629 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1630 	 * to force the memscrubber to read all of memory when it awakens.
1631 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1632 	 * UE back to poison the data.
1633 	 */
1634 	if (t_afsr & P_AFSR_WP) {
1635 		(void) strcat(pr_reason, "WP ");
1636 		if (isus2i || isus2e) {
1637 			aflt->flt_panic = 1;
1638 		} else {
1639 			read_all_memscrub = 1;
1640 		}
1641 		spf_flt.flt_type = CPU_WP_ERR;
1642 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1643 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1644 		    aflt->flt_panic);
1645 	}
1646 
1647 	/*
1648 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1649 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1650 	 * This is fatal.
1651 	 */
1652 
1653 	if (t_afsr & P_AFSR_CP) {
1654 		if (isus2i || isus2e) {
1655 			(void) strcat(pr_reason, "CP ");
1656 			aflt->flt_panic = 1;
1657 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1658 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1659 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1660 			    aflt->flt_panic);
1661 		} else {
1662 			/*
1663 			 * Orphan CP: Happens due to signal integrity problem
1664 			 * on a CPU, where a CP is reported, without reporting
1665 			 * its associated UE. This is handled by locating the
1666 			 * bad parity line and would kick off the memscrubber
1667 			 * to find the UE if in memory or in another's cache.
1668 			 */
1669 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1670 			(void) strcat(pr_reason, "ORPHAN_CP ");
1671 
1672 			/*
1673 			 * Here we have no PA to work with.
1674 			 * Scan each line in the ecache to look for
1675 			 * the one with bad parity.
1676 			 */
1677 			aflt->flt_addr = AFLT_INV_ADDR;
1678 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1679 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1680 				&oafsr);
1681 			acc_afsr |= oafsr;
1682 
1683 			/*
1684 			 * If we found a bad PA, update the state to indicate
1685 			 * if it is memory or I/O space.
1686 			 */
1687 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1688 				aflt->flt_in_memory =
1689 					(pf_is_memory(aflt->flt_addr >>
1690 						MMU_PAGESHIFT)) ? 1 : 0;
1691 			}
1692 			read_all_memscrub = 1;
1693 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1694 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1695 			    aflt->flt_panic);
1696 
1697 		}
1698 	}
1699 
1700 	/*
1701 	 * If we queued an error other than WP or CP and we are going to return
1702 	 * from the trap and the error was in user mode or inside of a
1703 	 * copy routine, set AST flag so the queue will be drained before
1704 	 * returning to user mode.
1705 	 *
1706 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1707 	 * and send an event to its process contract.
1708 	 *
1709 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1710 	 * will have been no error queued in this case.
1711 	 */
1712 	if ((t_afsr &
1713 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1714 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1715 			int pcb_flag = 0;
1716 
1717 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1718 				pcb_flag |= ASYNC_HWERR;
1719 
1720 			if (t_afsr & P_AFSR_BERR)
1721 				pcb_flag |= ASYNC_BERR;
1722 
1723 			if (t_afsr & P_AFSR_TO)
1724 				pcb_flag |= ASYNC_BTO;
1725 
1726 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1727 			aston(curthread);
1728 			action = ACTION_AST_FLAGS;
1729 	}
1730 
1731 	/*
1732 	 * In response to a deferred error, we must do one of three things:
1733 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1734 	 * set in cases (1) and (2) - check that either action is set or
1735 	 * (3) is true.
1736 	 *
1737 	 * On II, the WP writes poisoned data back to memory, which will
1738 	 * cause a UE and a panic or reboot when read.  In this case, we
1739 	 * don't need to panic at this time.  On IIi and IIe,
1740 	 * aflt->flt_panic is already set above.
1741 	 */
1742 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1743 	    (t_afsr & P_AFSR_WP));
1744 
1745 	/*
1746 	 * Make a final sanity check to make sure we did not get any more async
1747 	 * errors and accumulate the afsr.
1748 	 */
1749 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1750 	    cpunodes[CPU->cpu_id].ecache_linesize);
1751 	(void) clear_errors(&spf_flt, NULL);
1752 
1753 	/*
1754 	 * Take care of a special case: If there is a UE in the ecache flush
1755 	 * area, we'll see it in flush_ecache().  This will trigger the
1756 	 * CPU_ADDITIONAL_ERRORS case below.
1757 	 *
1758 	 * This could occur if the original error was a UE in the flush area,
1759 	 * or if the original error was an E$ error that was flushed out of
1760 	 * the E$ in scan_ecache().
1761 	 *
1762 	 * If it's at the same address that we're already logging, then it's
1763 	 * probably one of these cases.  Clear the bit so we don't trip over
1764 	 * it on the additional errors case, which could cause an unnecessary
1765 	 * panic.
1766 	 */
1767 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1768 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1769 	else
1770 		acc_afsr |= aflt->flt_stat;
1771 
1772 	/*
1773 	 * Check the acumulated afsr for the important bits.
1774 	 * Make sure the spf_flt.flt_type value is set, and
1775 	 * enque an error.
1776 	 */
1777 	if (acc_afsr &
1778 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1779 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1780 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1781 		    P_AFSR_ISAP))
1782 			aflt->flt_panic = 1;
1783 
1784 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1785 		aflt->flt_stat = acc_afsr;
1786 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1787 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1788 		    aflt->flt_panic);
1789 	}
1790 
1791 	/*
1792 	 * If aflt->flt_panic is set at this point, we need to panic as the
1793 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1794 	 * We've already enqueued the error in one of the if-clauses above,
1795 	 * and it will be dequeued and logged as part of the panic flow.
1796 	 */
1797 	if (aflt->flt_panic) {
1798 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1799 		    "See previous message(s) for details", " %sError(s)",
1800 		    pr_reason);
1801 	}
1802 
1803 	/*
1804 	 * Before returning, we must re-enable errors, and
1805 	 * reset the caches to their boot-up state.
1806 	 */
1807 	set_lsu(get_lsu() | cache_boot_state);
1808 	set_error_enable(EER_ENABLE);
1809 }
1810 
1811 /*
1812  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1813  * This routine is shared by the CE and UE handling code.
1814  */
1815 static void
1816 check_misc_err(spitf_async_flt *spf_flt)
1817 {
1818 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1819 	char *fatal_str = NULL;
1820 
1821 	/*
1822 	 * The ISAP and ETP errors are supposed to cause a POR
1823 	 * from the system, so in theory we never, ever see these messages.
1824 	 * ISAP, ETP and IVUE are considered to be fatal.
1825 	 */
1826 	if (aflt->flt_stat & P_AFSR_ISAP)
1827 		fatal_str = " System Address Parity Error on";
1828 	else if (aflt->flt_stat & P_AFSR_ETP)
1829 		fatal_str = " Ecache Tag Parity Error on";
1830 	else if (aflt->flt_stat & P_AFSR_IVUE)
1831 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1832 	if (fatal_str != NULL) {
1833 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1834 			NULL, fatal_str);
1835 	}
1836 }
1837 
1838 /*
1839  * Routine to convert a syndrome into a syndrome code.
1840  */
1841 static int
1842 synd_to_synd_code(int synd_status, ushort_t synd)
1843 {
1844 	if (synd_status != AFLT_STAT_VALID)
1845 		return (-1);
1846 
1847 	/*
1848 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1849 	 * to get the code indicating which bit(s) is(are) bad.
1850 	 */
1851 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1852 		return (-1);
1853 	else
1854 		return (ecc_syndrome_tab[synd]);
1855 }
1856 
1857 /* ARGSUSED */
1858 int
1859 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1860 {
1861 	return (ENOTSUP);
1862 }
1863 
1864 /* ARGSUSED */
1865 int
1866 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1867 {
1868 	return (ENOTSUP);
1869 }
1870 
1871 /* ARGSUSED */
1872 int
1873 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1874 {
1875 	return (ENOTSUP);
1876 }
1877 
1878 /*
1879  * Routine to return a string identifying the physical name
1880  * associated with a memory/cache error.
1881  */
1882 /* ARGSUSED */
1883 int
1884 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1885     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1886     char *buf, int buflen, int *lenp)
1887 {
1888 	short synd_code;
1889 	int ret;
1890 
1891 	if (flt_in_memory) {
1892 		synd_code = synd_to_synd_code(synd_status, synd);
1893 		if (synd_code == -1) {
1894 			ret = EINVAL;
1895 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1896 		    buf, buflen, lenp) != 0) {
1897 			ret = EIO;
1898 		} else if (*lenp <= 1) {
1899 			ret = EINVAL;
1900 		} else {
1901 			ret = 0;
1902 		}
1903 	} else {
1904 		ret = ENOTSUP;
1905 	}
1906 
1907 	if (ret != 0) {
1908 		buf[0] = '\0';
1909 		*lenp = 0;
1910 	}
1911 
1912 	return (ret);
1913 }
1914 
1915 /*
1916  * Wrapper for cpu_get_mem_unum() routine that takes an
1917  * async_flt struct rather than explicit arguments.
1918  */
1919 int
1920 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1921     char *buf, int buflen, int *lenp)
1922 {
1923 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1924 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1925 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1926 }
1927 
1928 /*
1929  * This routine is a more generic interface to cpu_get_mem_unum(),
1930  * that may be used by other modules (e.g. mm).
1931  */
1932 int
1933 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1934 		char *buf, int buflen, int *lenp)
1935 {
1936 	int synd_status, flt_in_memory, ret;
1937 	char unum[UNUM_NAMLEN];
1938 
1939 	/*
1940 	 * Check for an invalid address.
1941 	 */
1942 	if (afar == (uint64_t)-1)
1943 		return (ENXIO);
1944 
1945 	if (synd == (uint64_t)-1)
1946 		synd_status = AFLT_STAT_INVALID;
1947 	else
1948 		synd_status = AFLT_STAT_VALID;
1949 
1950 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1951 
1952 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1953 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1954 	    != 0)
1955 		return (ret);
1956 
1957 	if (*lenp >= buflen)
1958 		return (ENAMETOOLONG);
1959 
1960 	(void) strncpy(buf, unum, buflen);
1961 
1962 	return (0);
1963 }
1964 
1965 /*
1966  * Routine to return memory information associated
1967  * with a physical address and syndrome.
1968  */
1969 /* ARGSUSED */
1970 int
1971 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1972     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1973     int *segsp, int *banksp, int *mcidp)
1974 {
1975 	return (ENOTSUP);
1976 }
1977 
1978 /*
1979  * Routine to return a string identifying the physical
1980  * name associated with a cpuid.
1981  */
1982 /* ARGSUSED */
1983 int
1984 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1985 {
1986 	return (ENOTSUP);
1987 }
1988 
1989 /*
1990  * This routine returns the size of the kernel's FRU name buffer.
1991  */
1992 size_t
1993 cpu_get_name_bufsize()
1994 {
1995 	return (UNUM_NAMLEN);
1996 }
1997 
1998 /*
1999  * Cpu specific log func for UEs.
2000  */
2001 static void
2002 log_ue_err(struct async_flt *aflt, char *unum)
2003 {
2004 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2005 	int len = 0;
2006 
2007 #ifdef DEBUG
2008 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2009 
2010 	/*
2011 	 * Paranoid Check for priv mismatch
2012 	 * Only applicable for UEs
2013 	 */
2014 	if (afsr_priv != aflt->flt_priv) {
2015 		/*
2016 		 * The priv bits in %tstate and %afsr did not match; we expect
2017 		 * this to be very rare, so flag it with a message.
2018 		 */
2019 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2020 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2021 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2022 
2023 		/* update saved afsr to reflect the correct priv */
2024 		aflt->flt_stat &= ~P_AFSR_PRIV;
2025 		if (aflt->flt_priv)
2026 			aflt->flt_stat |= P_AFSR_PRIV;
2027 	}
2028 #endif /* DEBUG */
2029 
2030 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2031 	    UNUM_NAMLEN, &len);
2032 
2033 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2034 	    " Uncorrectable Memory Error on");
2035 
2036 	if (SYND(aflt->flt_synd) == 0x3) {
2037 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2038 		    " Syndrome 0x3 indicates that this may not be a "
2039 		    "memory module problem");
2040 	}
2041 
2042 	if (aflt->flt_in_memory)
2043 		cpu_log_ecmem_info(spf_flt);
2044 }
2045 
2046 
2047 /*
2048  * The cpu_async_log_err() function is called via the ue_drain() function to
2049  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2050  * from softint context, from AST processing in the trap() flow, or from the
2051  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2052  */
2053 static void
2054 cpu_async_log_err(void *flt)
2055 {
2056 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2057 	struct async_flt *aflt = (struct async_flt *)flt;
2058 	char unum[UNUM_NAMLEN];
2059 	char *space;
2060 	char *ecache_scrub_logstr = NULL;
2061 
2062 	switch (spf_flt->flt_type) {
2063 	    case CPU_UE_ERR:
2064 		/*
2065 		 * We want to skip logging only if ALL the following
2066 		 * conditions are true:
2067 		 *
2068 		 *	1. We are not panicking
2069 		 *	2. There is only one error
2070 		 *	3. That error is a memory error
2071 		 *	4. The error is caused by the memory scrubber (in
2072 		 *	   which case the error will have occurred under
2073 		 *	   on_trap protection)
2074 		 *	5. The error is on a retired page
2075 		 *
2076 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2077 		 * scrubber.  However, none of those errors should occur
2078 		 * on a retired page.
2079 		 *
2080 		 * Note 2: In the CE case, these errors are discarded before
2081 		 * the errorq.  In the UE case, we must wait until now --
2082 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2083 		 */
2084 		if (!panicstr &&
2085 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2086 		    aflt->flt_prot == AFLT_PROT_EC) {
2087 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2088 				/* Zero the address to clear the error */
2089 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2090 				return;
2091 			}
2092 		}
2093 
2094 		/*
2095 		 * Log the UE and check for causes of this UE error that
2096 		 * don't cause a trap (Copyback error).  cpu_async_error()
2097 		 * has already checked the i/o buses for us.
2098 		 */
2099 		log_ue_err(aflt, unum);
2100 		if (aflt->flt_in_memory)
2101 			cpu_check_allcpus(aflt);
2102 		break;
2103 
2104 	    case CPU_EDP_LDP_ERR:
2105 		if (aflt->flt_stat & P_AFSR_EDP)
2106 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2107 			    NULL, " EDP event on");
2108 
2109 		if (aflt->flt_stat & P_AFSR_LDP)
2110 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2111 			    NULL, " LDP event on");
2112 
2113 		/* Log ecache info if exist */
2114 		if (spf_flt->flt_ec_lcnt > 0) {
2115 			cpu_log_ecmem_info(spf_flt);
2116 
2117 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2118 			    NULL, " AFAR was derived from E$Tag");
2119 		} else {
2120 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2121 			    NULL, " No error found in ecache (No fault "
2122 			    "PA available)");
2123 		}
2124 		break;
2125 
2126 	    case CPU_WP_ERR:
2127 		/*
2128 		 * If the memscrub thread hasn't yet read
2129 		 * all of memory, as we requested in the
2130 		 * trap handler, then give it a kick to
2131 		 * make sure it does.
2132 		 */
2133 		if (!isus2i && !isus2e && read_all_memscrub)
2134 			memscrub_run();
2135 
2136 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2137 		    " WP event on");
2138 		return;
2139 
2140 	    case CPU_BTO_BERR_ERR:
2141 		/*
2142 		 * A bus timeout or error occurred that was in user mode or not
2143 		 * in a protected kernel code region.
2144 		 */
2145 		if (aflt->flt_stat & P_AFSR_BERR) {
2146 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2147 			    spf_flt, BERRTO_LFLAGS, NULL,
2148 			    " Bus Error on System Bus in %s mode from",
2149 			    aflt->flt_priv ? "privileged" : "user");
2150 		}
2151 
2152 		if (aflt->flt_stat & P_AFSR_TO) {
2153 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2154 			    spf_flt, BERRTO_LFLAGS, NULL,
2155 			    " Timeout on System Bus in %s mode from",
2156 			    aflt->flt_priv ? "privileged" : "user");
2157 		}
2158 
2159 		return;
2160 
2161 	    case CPU_PANIC_CP_ERR:
2162 		/*
2163 		 * Process the Copyback (CP) error info (if any) obtained from
2164 		 * polling all the cpus in the panic flow. This case is only
2165 		 * entered if we are panicking.
2166 		 */
2167 		ASSERT(panicstr != NULL);
2168 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2169 
2170 		/* See which space - this info may not exist */
2171 		if (panic_aflt.flt_status & ECC_D_TRAP)
2172 			space = "Data ";
2173 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2174 			space = "Instruction ";
2175 		else
2176 			space = "";
2177 
2178 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2179 		    " AFAR was derived from UE report,"
2180 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2181 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2182 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2183 
2184 		if (spf_flt->flt_ec_lcnt > 0)
2185 			cpu_log_ecmem_info(spf_flt);
2186 		else
2187 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2188 			    NULL, " No cache dump available");
2189 
2190 		return;
2191 
2192 	    case CPU_TRAPPING_CP_ERR:
2193 		/*
2194 		 * For sabre only.  This is a copyback ecache parity error due
2195 		 * to a PCI DMA read.  We should be panicking if we get here.
2196 		 */
2197 		ASSERT(panicstr != NULL);
2198 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2199 		    " AFAR was derived from UE report,"
2200 		    " CP event on CPU%d (caused Data access error "
2201 		    "on PCIBus)", aflt->flt_inst);
2202 		return;
2203 
2204 		/*
2205 		 * We log the ecache lines of the following states,
2206 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2207 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2208 		 * in addition to logging if ecache_scrub_panic is set.
2209 		 */
2210 	    case CPU_BADLINE_CI_ERR:
2211 		ecache_scrub_logstr = "CBI";
2212 		/* FALLTHRU */
2213 
2214 	    case CPU_BADLINE_CB_ERR:
2215 		if (ecache_scrub_logstr == NULL)
2216 			ecache_scrub_logstr = "CBB";
2217 		/* FALLTHRU */
2218 
2219 	    case CPU_BADLINE_DI_ERR:
2220 		if (ecache_scrub_logstr == NULL)
2221 			ecache_scrub_logstr = "DBI";
2222 		/* FALLTHRU */
2223 
2224 	    case CPU_BADLINE_DB_ERR:
2225 		if (ecache_scrub_logstr == NULL)
2226 			ecache_scrub_logstr = "DBB";
2227 
2228 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2229 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2230 			" %s event on", ecache_scrub_logstr);
2231 		cpu_log_ecmem_info(spf_flt);
2232 
2233 		return;
2234 
2235 	    case CPU_ORPHAN_CP_ERR:
2236 		/*
2237 		 * Orphan CPs, where the CP bit is set, but when a CPU
2238 		 * doesn't report a UE.
2239 		 */
2240 		if (read_all_memscrub)
2241 			memscrub_run();
2242 
2243 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2244 			NULL, " Orphan CP event on");
2245 
2246 		/* Log ecache info if exist */
2247 		if (spf_flt->flt_ec_lcnt > 0)
2248 			cpu_log_ecmem_info(spf_flt);
2249 		else
2250 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2251 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2252 				" No error found in ecache (No fault "
2253 				"PA available");
2254 		return;
2255 
2256 	    case CPU_ECACHE_ADDR_PAR_ERR:
2257 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2258 				" E$ Tag Address Parity error on");
2259 		cpu_log_ecmem_info(spf_flt);
2260 		return;
2261 
2262 	    case CPU_ECACHE_STATE_ERR:
2263 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2264 				" E$ Tag State Parity error on");
2265 		cpu_log_ecmem_info(spf_flt);
2266 		return;
2267 
2268 	    case CPU_ECACHE_TAG_ERR:
2269 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270 				" E$ Tag scrub event on");
2271 		cpu_log_ecmem_info(spf_flt);
2272 		return;
2273 
2274 	    case CPU_ECACHE_ETP_ETS_ERR:
2275 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276 				" AFSR.ETP is set and AFSR.ETS is zero on");
2277 		cpu_log_ecmem_info(spf_flt);
2278 		return;
2279 
2280 
2281 	    case CPU_ADDITIONAL_ERR:
2282 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2283 		    " Additional errors detected during error processing on");
2284 		return;
2285 
2286 	    default:
2287 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2288 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2289 		return;
2290 	}
2291 
2292 	/* ... fall through from the UE, EDP, or LDP cases */
2293 
2294 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2295 		if (!panicstr) {
2296 			(void) page_retire(aflt->flt_addr, PR_UE);
2297 		} else {
2298 			/*
2299 			 * Clear UEs on panic so that we don't
2300 			 * get haunted by them during panic or
2301 			 * after reboot
2302 			 */
2303 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2304 			    cpunodes[CPU->cpu_id].ecache_size,
2305 			    cpunodes[CPU->cpu_id].ecache_linesize);
2306 
2307 			(void) clear_errors(NULL, NULL);
2308 		}
2309 	}
2310 
2311 	/*
2312 	 * Log final recover message
2313 	 */
2314 	if (!panicstr) {
2315 		if (!aflt->flt_priv) {
2316 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2317 			    NULL, " Above Error is in User Mode"
2318 			    "\n    and is fatal: "
2319 			    "will SIGKILL process and notify contract");
2320 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2321 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2322 			    NULL, " Above Error detected while dumping core;"
2323 			    "\n    core file will be truncated");
2324 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2325 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2326 			    NULL, " Above Error is due to Kernel access"
2327 			    "\n    to User space and is fatal: "
2328 			    "will SIGKILL process and notify contract");
2329 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2330 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2331 			    " Above Error detected by protected Kernel code"
2332 			    "\n    that will try to clear error from system");
2333 		}
2334 	}
2335 }
2336 
2337 
2338 /*
2339  * Check all cpus for non-trapping UE-causing errors
2340  * In Ultra I/II, we look for copyback errors (CPs)
2341  */
2342 void
2343 cpu_check_allcpus(struct async_flt *aflt)
2344 {
2345 	spitf_async_flt cp;
2346 	spitf_async_flt *spf_cpflt = &cp;
2347 	struct async_flt *cpflt = (struct async_flt *)&cp;
2348 	int pix;
2349 
2350 	cpflt->flt_id = aflt->flt_id;
2351 	cpflt->flt_addr = aflt->flt_addr;
2352 
2353 	for (pix = 0; pix < NCPU; pix++) {
2354 		if (CPU_XCALL_READY(pix)) {
2355 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2356 			    (uint64_t)cpflt, 0);
2357 
2358 			if (cpflt->flt_stat & P_AFSR_CP) {
2359 				char *space;
2360 
2361 				/* See which space - this info may not exist */
2362 				if (aflt->flt_status & ECC_D_TRAP)
2363 					space = "Data ";
2364 				else if (aflt->flt_status & ECC_I_TRAP)
2365 					space = "Instruction ";
2366 				else
2367 					space = "";
2368 
2369 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2370 				    NULL, " AFAR was derived from UE report,"
2371 				    " CP event on CPU%d (caused %saccess "
2372 				    "error on %s%d)", pix, space,
2373 				    (aflt->flt_status & ECC_IOBUS) ?
2374 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2375 
2376 				if (spf_cpflt->flt_ec_lcnt > 0)
2377 					cpu_log_ecmem_info(spf_cpflt);
2378 				else
2379 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2380 					    CPU_ERRID_FIRST, NULL,
2381 					    " No cache dump available");
2382 			}
2383 		}
2384 	}
2385 }
2386 
2387 #ifdef DEBUG
2388 int test_mp_cp = 0;
2389 #endif
2390 
2391 /*
2392  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2393  * for copyback errors and capture relevant information.
2394  */
2395 static uint_t
2396 get_cpu_status(uint64_t arg)
2397 {
2398 	struct async_flt *aflt = (struct async_flt *)arg;
2399 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2400 	uint64_t afsr;
2401 	uint32_t ec_idx;
2402 	uint64_t sdbh, sdbl;
2403 	int i;
2404 	uint32_t ec_set_size;
2405 	uchar_t valid;
2406 	ec_data_t ec_data[8];
2407 	uint64_t ec_tag, flt_addr_tag, oafsr;
2408 	uint64_t *acc_afsr = NULL;
2409 
2410 	get_asyncflt(&afsr);
2411 	if (CPU_PRIVATE(CPU) != NULL) {
2412 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2413 		afsr |= *acc_afsr;
2414 		*acc_afsr = 0;
2415 	}
2416 
2417 #ifdef DEBUG
2418 	if (test_mp_cp)
2419 		afsr |= P_AFSR_CP;
2420 #endif
2421 	aflt->flt_stat = afsr;
2422 
2423 	if (afsr & P_AFSR_CP) {
2424 		/*
2425 		 * Capture the UDBs
2426 		 */
2427 		get_udb_errors(&sdbh, &sdbl);
2428 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2429 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2430 
2431 		/*
2432 		 * Clear CP bit before capturing ecache data
2433 		 * and AFSR info.
2434 		 */
2435 		set_asyncflt(P_AFSR_CP);
2436 
2437 		/*
2438 		 * See if we can capture the ecache line for the
2439 		 * fault PA.
2440 		 *
2441 		 * Return a valid matching ecache line, if any.
2442 		 * Otherwise, return the first matching ecache
2443 		 * line marked invalid.
2444 		 */
2445 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2446 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2447 		    ecache_associativity;
2448 		spf_flt->flt_ec_lcnt = 0;
2449 
2450 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2451 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2452 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2453 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2454 				    acc_afsr);
2455 
2456 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2457 				continue;
2458 
2459 			valid = cpu_ec_state_valid &
2460 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2461 			    cpu_ec_state_shift);
2462 
2463 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2464 				spf_flt->flt_ec_tag = ec_tag;
2465 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2466 				    sizeof (ec_data));
2467 				spf_flt->flt_ec_lcnt = 1;
2468 
2469 				if (valid)
2470 					break;
2471 			}
2472 		}
2473 	}
2474 	return (0);
2475 }
2476 
2477 /*
2478  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2479  * from panic_idle() as part of the other CPUs stopping themselves when a
2480  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2481  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2482  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2483  * CP error information.
2484  */
2485 void
2486 cpu_async_panic_callb(void)
2487 {
2488 	spitf_async_flt cp;
2489 	struct async_flt *aflt = (struct async_flt *)&cp;
2490 	uint64_t *scrub_afsr;
2491 
2492 	if (panic_aflt.flt_id != 0) {
2493 		aflt->flt_addr = panic_aflt.flt_addr;
2494 		(void) get_cpu_status((uint64_t)aflt);
2495 
2496 		if (CPU_PRIVATE(CPU) != NULL) {
2497 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2498 			if (*scrub_afsr & P_AFSR_CP) {
2499 				aflt->flt_stat |= *scrub_afsr;
2500 				*scrub_afsr = 0;
2501 			}
2502 		}
2503 		if (aflt->flt_stat & P_AFSR_CP) {
2504 			aflt->flt_id = panic_aflt.flt_id;
2505 			aflt->flt_panic = 1;
2506 			aflt->flt_inst = CPU->cpu_id;
2507 			aflt->flt_class = CPU_FAULT;
2508 			cp.flt_type = CPU_PANIC_CP_ERR;
2509 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2510 			    (void *)&cp, sizeof (cp), ue_queue,
2511 			    aflt->flt_panic);
2512 		}
2513 	}
2514 }
2515 
2516 /*
2517  * Turn off all cpu error detection, normally only used for panics.
2518  */
2519 void
2520 cpu_disable_errors(void)
2521 {
2522 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2523 }
2524 
2525 /*
2526  * Enable errors.
2527  */
2528 void
2529 cpu_enable_errors(void)
2530 {
2531 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2532 }
2533 
2534 static void
2535 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2536 {
2537 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2538 	int i, loop = 1;
2539 	ushort_t ecc_0;
2540 	uint64_t paddr;
2541 	uint64_t data;
2542 
2543 	if (verbose)
2544 		loop = 8;
2545 	for (i = 0; i < loop; i++) {
2546 		paddr = aligned_addr + (i * 8);
2547 		data = lddphys(paddr);
2548 		if (verbose) {
2549 			if (ce_err) {
2550 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2551 			    (uint32_t)data);
2552 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2553 				NULL, "    Paddr 0x%" PRIx64 ", "
2554 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2555 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2556 			} else {
2557 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2558 				    NULL, "    Paddr 0x%" PRIx64 ", "
2559 				    "Data 0x%08x.%08x", paddr,
2560 				    (uint32_t)(data>>32), (uint32_t)data);
2561 			}
2562 		}
2563 	}
2564 }
2565 
2566 static struct {		/* sec-ded-s4ed ecc code */
2567 	uint_t hi, lo;
2568 } ecc_code[8] = {
2569 	{ 0xee55de23U, 0x16161161U },
2570 	{ 0x55eede93U, 0x61612212U },
2571 	{ 0xbb557b8cU, 0x49494494U },
2572 	{ 0x55bb7b6cU, 0x94948848U },
2573 	{ 0x16161161U, 0xee55de23U },
2574 	{ 0x61612212U, 0x55eede93U },
2575 	{ 0x49494494U, 0xbb557b8cU },
2576 	{ 0x94948848U, 0x55bb7b6cU }
2577 };
2578 
2579 static ushort_t
2580 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2581 {
2582 	int i, j;
2583 	uchar_t checker, bit_mask;
2584 	struct {
2585 		uint_t hi, lo;
2586 	} hex_data, masked_data[8];
2587 
2588 	hex_data.hi = high_bytes;
2589 	hex_data.lo = low_bytes;
2590 
2591 	/* mask out bits according to sec-ded-s4ed ecc code */
2592 	for (i = 0; i < 8; i++) {
2593 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2594 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2595 	}
2596 
2597 	/*
2598 	 * xor all bits in masked_data[i] to get bit_i of checker,
2599 	 * where i = 0 to 7
2600 	 */
2601 	checker = 0;
2602 	for (i = 0; i < 8; i++) {
2603 		bit_mask = 1 << i;
2604 		for (j = 0; j < 32; j++) {
2605 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2606 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2607 			masked_data[i].hi >>= 1;
2608 			masked_data[i].lo >>= 1;
2609 		}
2610 	}
2611 	return (checker);
2612 }
2613 
2614 /*
2615  * Flush the entire ecache using displacement flush by reading through a
2616  * physical address range as large as the ecache.
2617  */
2618 void
2619 cpu_flush_ecache(void)
2620 {
2621 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2622 	    cpunodes[CPU->cpu_id].ecache_linesize);
2623 }
2624 
2625 /*
2626  * read and display the data in the cache line where the
2627  * original ce error occurred.
2628  * This routine is mainly used for debugging new hardware.
2629  */
2630 void
2631 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2632 {
2633 	kpreempt_disable();
2634 	/* disable ECC error traps */
2635 	set_error_enable(EER_ECC_DISABLE);
2636 
2637 	/*
2638 	 * flush the ecache
2639 	 * read the data
2640 	 * check to see if an ECC error occured
2641 	 */
2642 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2643 	    cpunodes[CPU->cpu_id].ecache_linesize);
2644 	set_lsu(get_lsu() | cache_boot_state);
2645 	cpu_read_paddr(ecc, verbose, ce_err);
2646 	(void) check_ecc(ecc);
2647 
2648 	/* enable ECC error traps */
2649 	set_error_enable(EER_ENABLE);
2650 	kpreempt_enable();
2651 }
2652 
2653 /*
2654  * Check the AFSR bits for UE/CE persistence.
2655  * If UE or CE errors are detected, the routine will
2656  * clears all the AFSR sticky bits (except CP for
2657  * spitfire/blackbird) and the UDBs.
2658  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2659  */
2660 static int
2661 check_ecc(struct async_flt *ecc)
2662 {
2663 	uint64_t t_afsr;
2664 	uint64_t t_afar;
2665 	uint64_t udbh;
2666 	uint64_t udbl;
2667 	ushort_t udb;
2668 	int persistent = 0;
2669 
2670 	/*
2671 	 * Capture the AFSR, AFAR and UDBs info
2672 	 */
2673 	get_asyncflt(&t_afsr);
2674 	get_asyncaddr(&t_afar);
2675 	t_afar &= SABRE_AFAR_PA;
2676 	get_udb_errors(&udbh, &udbl);
2677 
2678 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2679 		/*
2680 		 * Clear the errors
2681 		 */
2682 		clr_datapath();
2683 
2684 		if (isus2i || isus2e)
2685 			set_asyncflt(t_afsr);
2686 		else
2687 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2688 
2689 		/*
2690 		 * determine whether to check UDBH or UDBL for persistence
2691 		 */
2692 		if (ecc->flt_synd & UDBL_REG) {
2693 			udb = (ushort_t)udbl;
2694 			t_afar |= 0x8;
2695 		} else {
2696 			udb = (ushort_t)udbh;
2697 		}
2698 
2699 		if (ce_debug || ue_debug) {
2700 			spitf_async_flt spf_flt; /* for logging */
2701 			struct async_flt *aflt =
2702 				(struct async_flt *)&spf_flt;
2703 
2704 			/* Package the info nicely in the spf_flt struct */
2705 			bzero(&spf_flt, sizeof (spitf_async_flt));
2706 			aflt->flt_stat = t_afsr;
2707 			aflt->flt_addr = t_afar;
2708 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2709 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2710 
2711 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2712 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2713 			    " check_ecc: Dumping captured error states ...");
2714 		}
2715 
2716 		/*
2717 		 * if the fault addresses don't match, not persistent
2718 		 */
2719 		if (t_afar != ecc->flt_addr) {
2720 			return (persistent);
2721 		}
2722 
2723 		/*
2724 		 * check for UE persistence
2725 		 * since all DIMMs in the bank are identified for a UE,
2726 		 * there's no reason to check the syndrome
2727 		 */
2728 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2729 			persistent = 1;
2730 		}
2731 
2732 		/*
2733 		 * check for CE persistence
2734 		 */
2735 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2736 			if ((udb & P_DER_E_SYND) ==
2737 			    (ecc->flt_synd & P_DER_E_SYND)) {
2738 				persistent = 1;
2739 			}
2740 		}
2741 	}
2742 	return (persistent);
2743 }
2744 
2745 #ifdef HUMMINGBIRD
2746 #define	HB_FULL_DIV		1
2747 #define	HB_HALF_DIV		2
2748 #define	HB_LOWEST_DIV		8
2749 #define	HB_ECLK_INVALID		0xdeadbad
2750 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2751 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2752 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2753 	HB_ECLK_8 };
2754 
2755 #define	HB_SLOW_DOWN		0
2756 #define	HB_SPEED_UP		1
2757 
2758 #define	SET_ESTAR_MODE(mode)					\
2759 	stdphysio(HB_ESTAR_MODE, (mode));			\
2760 	/*							\
2761 	 * PLL logic requires minimum of 16 clock		\
2762 	 * cycles to lock to the new clock speed.		\
2763 	 * Wait 1 usec to satisfy this requirement.		\
2764 	 */							\
2765 	drv_usecwait(1);
2766 
2767 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2768 {								\
2769 	volatile uint64_t data;					\
2770 	uint64_t count, new_count;				\
2771 	clock_t delay;						\
2772 	data = lddphysio(HB_MEM_CNTRL0);			\
2773 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2774 	    HB_REFRESH_COUNT_SHIFT;				\
2775 	new_count = (HB_REFRESH_INTERVAL *			\
2776 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2777 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2778 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2779 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2780 	stdphysio(HB_MEM_CNTRL0, data);				\
2781 	data = lddphysio(HB_MEM_CNTRL0);        		\
2782 	/*							\
2783 	 * If we are slowing down the cpu and Memory		\
2784 	 * Self Refresh is not enabled, it is required		\
2785 	 * to wait for old refresh count to count-down and	\
2786 	 * new refresh count to go into effect (let new value	\
2787 	 * counts down once).					\
2788 	 */							\
2789 	if ((direction) == HB_SLOW_DOWN &&			\
2790 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2791 		/*						\
2792 		 * Each count takes 64 cpu clock cycles		\
2793 		 * to decrement.  Wait for current refresh	\
2794 		 * count plus new refresh count at current	\
2795 		 * cpu speed to count down to zero.  Round	\
2796 		 * up the delay time.				\
2797 		 */						\
2798 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2799 		    (count + new_count) * MICROSEC * (cur_div)) /\
2800 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2801 		drv_usecwait(delay);				\
2802 	}							\
2803 }
2804 
2805 #define	SET_SELF_REFRESH(bit)					\
2806 {								\
2807 	volatile uint64_t data;					\
2808 	data = lddphysio(HB_MEM_CNTRL0);			\
2809 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2810 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2811 	stdphysio(HB_MEM_CNTRL0, data);				\
2812 	data = lddphysio(HB_MEM_CNTRL0);			\
2813 }
2814 #endif	/* HUMMINGBIRD */
2815 
2816 /* ARGSUSED */
2817 void
2818 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2819 {
2820 #ifdef HUMMINGBIRD
2821 	uint64_t cur_mask, cur_divisor = 0;
2822 	volatile uint64_t reg;
2823 	processor_info_t *pi = &(CPU->cpu_type_info);
2824 	int index;
2825 
2826 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2827 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2828 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2829 		    new_divisor);
2830 		return;
2831 	}
2832 
2833 	reg = lddphysio(HB_ESTAR_MODE);
2834 	cur_mask = reg & HB_ECLK_MASK;
2835 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2836 		if (hb_eclk[index] == cur_mask) {
2837 			cur_divisor = index;
2838 			break;
2839 		}
2840 	}
2841 
2842 	if (cur_divisor == 0)
2843 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2844 		    "can't be determined!");
2845 
2846 	/*
2847 	 * If we are already at the requested divisor speed, just
2848 	 * return.
2849 	 */
2850 	if (cur_divisor == new_divisor)
2851 		return;
2852 
2853 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2854 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2855 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2856 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2857 
2858 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2859 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2860 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2861 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2862 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2863 
2864 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2865 		/*
2866 		 * Transition to 1/2 speed first, then to
2867 		 * lower speed.
2868 		 */
2869 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2870 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2871 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2872 
2873 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2874 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2875 
2876 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2877 		/*
2878 		 * Transition to 1/2 speed first, then to
2879 		 * full speed.
2880 		 */
2881 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2882 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2883 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2884 
2885 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2886 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2887 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2888 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2889 
2890 	} else if (cur_divisor < new_divisor) {
2891 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2892 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2893 
2894 	} else if (cur_divisor > new_divisor) {
2895 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2896 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2897 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2898 	}
2899 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2900 	pi->pi_curr_clock =
2901 	    (((uint64_t)pi->pi_clock * 1000000) / new_divisor);
2902 #endif
2903 }
2904 
2905 /*
2906  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2907  * we clear all the sticky bits. If a non-null pointer to a async fault
2908  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2909  * info will be returned in the structure.  If a non-null pointer to a
2910  * uint64_t is passed in, this will be updated if the CP bit is set in the
2911  * AFSR.  The afsr will be returned.
2912  */
2913 static uint64_t
2914 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2915 {
2916 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2917 	uint64_t afsr;
2918 	uint64_t udbh, udbl;
2919 
2920 	get_asyncflt(&afsr);
2921 
2922 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2923 		*acc_afsr |= afsr;
2924 
2925 	if (spf_flt != NULL) {
2926 		aflt->flt_stat = afsr;
2927 		get_asyncaddr(&aflt->flt_addr);
2928 		aflt->flt_addr &= SABRE_AFAR_PA;
2929 
2930 		get_udb_errors(&udbh, &udbl);
2931 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2932 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2933 	}
2934 
2935 	set_asyncflt(afsr);		/* clear afsr */
2936 	clr_datapath();			/* clear udbs */
2937 	return (afsr);
2938 }
2939 
2940 /*
2941  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2942  * tag of the first bad line will be returned. We also return the old-afsr
2943  * (before clearing the sticky bits). The linecnt data will be updated to
2944  * indicate the number of bad lines detected.
2945  */
2946 static void
2947 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2948 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2949 {
2950 	ec_data_t t_ecdata[8];
2951 	uint64_t t_etag, oafsr;
2952 	uint64_t pa = AFLT_INV_ADDR;
2953 	uint32_t i, j, ecache_sz;
2954 	uint64_t acc_afsr = 0;
2955 	uint64_t *cpu_afsr = NULL;
2956 
2957 	if (CPU_PRIVATE(CPU) != NULL)
2958 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2959 
2960 	*linecnt = 0;
2961 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2962 
2963 	for (i = 0; i < ecache_sz; i += 64) {
2964 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2965 		    cpu_afsr);
2966 		acc_afsr |= oafsr;
2967 
2968 		/*
2969 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2970 		 * looking for the first occurrence of an EDP error.  The AFSR
2971 		 * info is captured for each 8-byte chunk.  Note that for
2972 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2973 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2974 		 * for the high and low 8-byte words within the 16-byte chunk).
2975 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2976 		 * granularity and only PSYND bits [7:0] are used.
2977 		 */
2978 		for (j = 0; j < 8; j++) {
2979 			ec_data_t *ecdptr = &t_ecdata[j];
2980 
2981 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
2982 				uint64_t errpa;
2983 				ushort_t psynd;
2984 				uint32_t ec_set_size = ecache_sz /
2985 				    ecache_associativity;
2986 
2987 				/*
2988 				 * For Spitfire/Blackbird, we need to look at
2989 				 * the PSYND to make sure that this 8-byte chunk
2990 				 * is the right one.  PSYND bits [15:8] belong
2991 				 * to the upper 8-byte (even) chunk.  Bits
2992 				 * [7:0] belong to the lower 8-byte chunk (odd).
2993 				 */
2994 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
2995 				if (!isus2i && !isus2e) {
2996 					if (j & 0x1)
2997 						psynd = psynd & 0xFF;
2998 					else
2999 						psynd = psynd >> 8;
3000 
3001 					if (!psynd)
3002 						continue; /* wrong chunk */
3003 				}
3004 
3005 				/* Construct the PA */
3006 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3007 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3008 				    ec_set_size);
3009 
3010 				/* clean up the cache line */
3011 				flushecacheline(P2ALIGN(errpa, 64),
3012 					cpunodes[CPU->cpu_id].ecache_size);
3013 
3014 				oafsr = clear_errors(NULL, cpu_afsr);
3015 				acc_afsr |= oafsr;
3016 
3017 				(*linecnt)++;
3018 
3019 				/*
3020 				 * Capture the PA for the first bad line found.
3021 				 * Return the ecache dump and tag info.
3022 				 */
3023 				if (pa == AFLT_INV_ADDR) {
3024 					int k;
3025 
3026 					pa = errpa;
3027 					for (k = 0; k < 8; k++)
3028 						ecache_data[k] = t_ecdata[k];
3029 					*ecache_tag = t_etag;
3030 				}
3031 				break;
3032 			}
3033 		}
3034 	}
3035 	*t_afar = pa;
3036 	*t_afsr = acc_afsr;
3037 }
3038 
3039 static void
3040 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3041 {
3042 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3043 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3044 	char linestr[30];
3045 	char *state_str;
3046 	int i;
3047 
3048 	/*
3049 	 * Check the ecache tag to make sure it
3050 	 * is valid. If invalid, a memory dump was
3051 	 * captured instead of a ecache dump.
3052 	 */
3053 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3054 		uchar_t eparity = (uchar_t)
3055 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3056 
3057 		uchar_t estate = (uchar_t)
3058 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3059 
3060 		if (estate == cpu_ec_state_shr)
3061 			state_str = "Shared";
3062 		else if (estate == cpu_ec_state_exl)
3063 			state_str = "Exclusive";
3064 		else if (estate == cpu_ec_state_own)
3065 			state_str = "Owner";
3066 		else if (estate == cpu_ec_state_mod)
3067 			state_str = "Modified";
3068 		else
3069 			state_str = "Invalid";
3070 
3071 		if (spf_flt->flt_ec_lcnt > 1) {
3072 			(void) snprintf(linestr, sizeof (linestr),
3073 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3074 		} else {
3075 			linestr[0] = '\0';
3076 		}
3077 
3078 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3079 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3080 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3081 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3082 		    (uint32_t)ecache_tag, state_str,
3083 		    (uint32_t)eparity, linestr);
3084 	} else {
3085 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3086 		    " E$tag != PA from AFAR; E$line was victimized"
3087 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3088 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3089 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3090 	}
3091 
3092 	/*
3093 	 * Dump out all 8 8-byte ecache data captured
3094 	 * For each 8-byte data captured, we check the
3095 	 * captured afsr's parity syndrome to find out
3096 	 * which 8-byte chunk is bad. For memory dump, the
3097 	 * AFSR values were initialized to 0.
3098 	 */
3099 	for (i = 0; i < 8; i++) {
3100 		ec_data_t *ecdptr;
3101 		uint_t offset;
3102 		ushort_t psynd;
3103 		ushort_t bad;
3104 		uint64_t edp;
3105 
3106 		offset = i << 3;	/* multiply by 8 */
3107 		ecdptr = &spf_flt->flt_ec_data[i];
3108 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3109 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3110 
3111 		/*
3112 		 * For Sabre/Hummingbird, parity synd is captured only
3113 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3114 		 * For spitfire/blackbird, AFSR.PSYND is captured
3115 		 * in 16-byte granularity. [15:8] represent
3116 		 * the upper 8 byte and [7:0] the lower 8 byte.
3117 		 */
3118 		if (isus2i || isus2e || (i & 0x1))
3119 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3120 		else
3121 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3122 
3123 		if (bad && edp) {
3124 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3125 			    " E$Data (0x%02x): 0x%08x.%08x "
3126 			    "*Bad* PSYND=0x%04x", offset,
3127 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3128 			    (uint32_t)ecdptr->ec_d8, psynd);
3129 		} else {
3130 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3131 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3132 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3133 			    (uint32_t)ecdptr->ec_d8);
3134 		}
3135 	}
3136 }
3137 
3138 /*
3139  * Common logging function for all cpu async errors.  This function allows the
3140  * caller to generate a single cmn_err() call that logs the appropriate items
3141  * from the fault structure, and implements our rules for AFT logging levels.
3142  *
3143  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3144  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3145  *	spflt: pointer to spitfire async fault structure
3146  *	logflags: bitflags indicating what to output
3147  *	endstr: a end string to appear at the end of this log
3148  *	fmt: a format string to appear at the beginning of the log
3149  *
3150  * The logflags allows the construction of predetermined output from the spflt
3151  * structure.  The individual data items always appear in a consistent order.
3152  * Note that either or both of the spflt structure pointer and logflags may be
3153  * NULL or zero respectively, indicating that the predetermined output
3154  * substrings are not requested in this log.  The output looks like this:
3155  *
3156  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3157  *	<CPU_SPACE><CPU_ERRID>
3158  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3159  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3160  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3161  *	newline+4spaces<CPU_SYND>
3162  *	newline+4spaces<endstr>
3163  *
3164  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3165  * it is assumed that <endstr> will be the unum string in this case.  The size
3166  * of our intermediate formatting buf[] is based on the worst case of all flags
3167  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3168  * formatting so we don't need additional stack space to format them here.
3169  */
3170 /*PRINTFLIKE6*/
3171 static void
3172 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3173 	const char *endstr, const char *fmt, ...)
3174 {
3175 	struct async_flt *aflt = (struct async_flt *)spflt;
3176 	char buf[400], *p, *q; /* see comments about buf[] size above */
3177 	va_list ap;
3178 	int console_log_flag;
3179 
3180 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3181 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3182 	    (aflt->flt_panic)) {
3183 		console_log_flag = (tagnum < 2) || aft_verbose;
3184 	} else {
3185 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3186 		    (aflt->flt_stat & P_AFSR_CE)) ?
3187 		    ce_verbose_memory : ce_verbose_other;
3188 
3189 		if (!verbose)
3190 			return;
3191 
3192 		console_log_flag = (verbose > 1);
3193 	}
3194 
3195 	if (console_log_flag)
3196 		(void) sprintf(buf, "[AFT%d]", tagnum);
3197 	else
3198 		(void) sprintf(buf, "![AFT%d]", tagnum);
3199 
3200 	p = buf + strlen(buf);	/* current buffer position */
3201 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3202 
3203 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3204 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3205 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3206 		p += strlen(p);
3207 	}
3208 
3209 	/*
3210 	 * Copy the caller's format string verbatim into buf[].  It will be
3211 	 * formatted by the call to vcmn_err() at the end of this function.
3212 	 */
3213 	if (fmt != NULL && p < q) {
3214 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3215 		buf[sizeof (buf) - 1] = '\0';
3216 		p += strlen(p);
3217 	}
3218 
3219 	if (spflt != NULL) {
3220 		if (logflags & CPU_FLTCPU) {
3221 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3222 			    aflt->flt_inst);
3223 			p += strlen(p);
3224 		}
3225 
3226 		if (logflags & CPU_SPACE) {
3227 			if (aflt->flt_status & ECC_D_TRAP)
3228 				(void) snprintf(p, (size_t)(q - p),
3229 				    " Data access");
3230 			else if (aflt->flt_status & ECC_I_TRAP)
3231 				(void) snprintf(p, (size_t)(q - p),
3232 				    " Instruction access");
3233 			p += strlen(p);
3234 		}
3235 
3236 		if (logflags & CPU_TL) {
3237 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3238 			    aflt->flt_tl ? ">0" : "=0");
3239 			p += strlen(p);
3240 		}
3241 
3242 		if (logflags & CPU_ERRID) {
3243 			(void) snprintf(p, (size_t)(q - p),
3244 			    ", errID 0x%08x.%08x",
3245 			    (uint32_t)(aflt->flt_id >> 32),
3246 			    (uint32_t)aflt->flt_id);
3247 			p += strlen(p);
3248 		}
3249 
3250 		if (logflags & CPU_AFSR) {
3251 			(void) snprintf(p, (size_t)(q - p),
3252 			    "\n    AFSR 0x%08b.%08b",
3253 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3254 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3255 			p += strlen(p);
3256 		}
3257 
3258 		if (logflags & CPU_AFAR) {
3259 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3260 			    (uint32_t)(aflt->flt_addr >> 32),
3261 			    (uint32_t)aflt->flt_addr);
3262 			p += strlen(p);
3263 		}
3264 
3265 		if (logflags & CPU_AF_PSYND) {
3266 			ushort_t psynd = (ushort_t)
3267 			    (aflt->flt_stat & P_AFSR_P_SYND);
3268 
3269 			(void) snprintf(p, (size_t)(q - p),
3270 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3271 			    psynd, ecc_psynd_score(psynd));
3272 			p += strlen(p);
3273 		}
3274 
3275 		if (logflags & CPU_AF_ETS) {
3276 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3277 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3278 			p += strlen(p);
3279 		}
3280 
3281 		if (logflags & CPU_FAULTPC) {
3282 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3283 			    (void *)aflt->flt_pc);
3284 			p += strlen(p);
3285 		}
3286 
3287 		if (logflags & CPU_UDBH) {
3288 			(void) snprintf(p, (size_t)(q - p),
3289 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3290 			    spflt->flt_sdbh, UDB_FMTSTR,
3291 			    spflt->flt_sdbh & 0xFF);
3292 			p += strlen(p);
3293 		}
3294 
3295 		if (logflags & CPU_UDBL) {
3296 			(void) snprintf(p, (size_t)(q - p),
3297 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3298 			    spflt->flt_sdbl, UDB_FMTSTR,
3299 			    spflt->flt_sdbl & 0xFF);
3300 			p += strlen(p);
3301 		}
3302 
3303 		if (logflags & CPU_SYND) {
3304 			ushort_t synd = SYND(aflt->flt_synd);
3305 
3306 			(void) snprintf(p, (size_t)(q - p),
3307 			    "\n    %s Syndrome 0x%x Memory Module ",
3308 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3309 			p += strlen(p);
3310 		}
3311 	}
3312 
3313 	if (endstr != NULL) {
3314 		if (!(logflags & CPU_SYND))
3315 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3316 		else
3317 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3318 		p += strlen(p);
3319 	}
3320 
3321 	if (ce_code == CE_CONT && (p < q - 1))
3322 		(void) strcpy(p, "\n"); /* add final \n if needed */
3323 
3324 	va_start(ap, fmt);
3325 	vcmn_err(ce_code, buf, ap);
3326 	va_end(ap);
3327 }
3328 
3329 /*
3330  * Ecache Scrubbing
3331  *
3332  * The basic idea is to prevent lines from sitting in the ecache long enough
3333  * to build up soft errors which can lead to ecache parity errors.
3334  *
3335  * The following rules are observed when flushing the ecache:
3336  *
3337  * 1. When the system is busy, flush bad clean lines
3338  * 2. When the system is idle, flush all clean lines
3339  * 3. When the system is idle, flush good dirty lines
3340  * 4. Never flush bad dirty lines.
3341  *
3342  *	modify	parity	busy   idle
3343  *	----------------------------
3344  *	clean	good		X
3345  * 	clean	bad	X	X
3346  * 	dirty	good		X
3347  *	dirty	bad
3348  *
3349  * Bad or good refers to whether a line has an E$ parity error or not.
3350  * Clean or dirty refers to the state of the modified bit.  We currently
3351  * default the scan rate to 100 (scan 10% of the cache per second).
3352  *
3353  * The following are E$ states and actions.
3354  *
3355  * We encode our state as a 3-bit number, consisting of:
3356  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3357  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3358  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3359  *
3360  * We associate a flushing and a logging action with each state.
3361  *
3362  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3363  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3364  * E$ only, in addition to value being set by ec_flush.
3365  */
3366 
3367 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3368 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3369 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3370 
3371 struct {
3372 	char	ec_flush;		/* whether to flush or not */
3373 	char	ec_log;			/* ecache logging */
3374 	char	ec_log_type;		/* log type info */
3375 } ec_action[] = {	/* states of the E$ line in M P B */
3376 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3377 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3378 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3379 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3380 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3381 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3382 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3383 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3384 };
3385 
3386 /*
3387  * Offsets into the ec_action[] that determines clean_good_busy and
3388  * dirty_good_busy lines.
3389  */
3390 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3391 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3392 
3393 /*
3394  * We are flushing lines which are Clean_Good_Busy and also the lines
3395  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3396  */
3397 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3398 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3399 
3400 #define	ECACHE_STATE_MODIFIED	0x4
3401 #define	ECACHE_STATE_PARITY	0x2
3402 #define	ECACHE_STATE_BUSY	0x1
3403 
3404 /*
3405  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3406  */
3407 int ecache_calls_a_sec_mirrored = 1;
3408 int ecache_lines_per_call_mirrored = 1;
3409 
3410 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3411 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3412 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3413 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3414 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3415 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3416 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3417 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3418 
3419 volatile int ec_timeout_calls = 1;	/* timeout calls */
3420 
3421 /*
3422  * Interrupt number and pil for ecache scrubber cross-trap calls.
3423  */
3424 static uint64_t ecache_scrub_inum;
3425 uint_t ecache_scrub_pil = PIL_9;
3426 
3427 /*
3428  * Kstats for the E$ scrubber.
3429  */
3430 typedef struct ecache_kstat {
3431 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3432 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3433 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3434 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3435 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3436 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3437 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3438 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3439 	kstat_named_t invalid_lines;		/* # of invalid lines */
3440 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3441 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3442 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3443 } ecache_kstat_t;
3444 
3445 static ecache_kstat_t ec_kstat_template = {
3446 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3447 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3448 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3449 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3450 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3451 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3452 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3453 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3454 	{ "invalid_lines", KSTAT_DATA_ULONG },
3455 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3456 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3457 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3458 };
3459 
3460 struct kmem_cache *sf_private_cache;
3461 
3462 /*
3463  * Called periodically on each CPU to scan the ecache once a sec.
3464  * adjusting the ecache line index appropriately
3465  */
3466 void
3467 scrub_ecache_line()
3468 {
3469 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3470 	int cpuid = CPU->cpu_id;
3471 	uint32_t index = ssmp->ecache_flush_index;
3472 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3473 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3474 	int nlines = ssmp->ecache_nlines;
3475 	uint32_t ec_set_size = ec_size / ecache_associativity;
3476 	int ec_mirror = ssmp->ecache_mirror;
3477 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3478 
3479 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3480 	int mpb;		/* encode Modified, Parity, Busy for action */
3481 	uchar_t state;
3482 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3483 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3484 	ec_data_t ec_data[8];
3485 	kstat_named_t *ec_knp;
3486 
3487 	switch (ec_mirror) {
3488 		default:
3489 		case ECACHE_CPU_NON_MIRROR:
3490 			/*
3491 			 * The E$ scan rate is expressed in units of tenths of
3492 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3493 			 * whole cache is scanned every second.
3494 			 */
3495 			scan_lines = (nlines * ecache_scan_rate) /
3496 					(1000 * ecache_calls_a_sec);
3497 			if (!(ssmp->ecache_busy)) {
3498 				if (ecache_idle_factor > 0) {
3499 					scan_lines *= ecache_idle_factor;
3500 				}
3501 			} else {
3502 				flush_clean_busy = (scan_lines *
3503 					ecache_flush_clean_good_busy) / 100;
3504 				flush_dirty_busy = (scan_lines *
3505 					ecache_flush_dirty_good_busy) / 100;
3506 			}
3507 
3508 			ec_timeout_calls = (ecache_calls_a_sec ?
3509 						ecache_calls_a_sec : 1);
3510 			break;
3511 
3512 		case ECACHE_CPU_MIRROR:
3513 			scan_lines = ecache_lines_per_call_mirrored;
3514 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3515 					ecache_calls_a_sec_mirrored : 1);
3516 			break;
3517 	}
3518 
3519 	/*
3520 	 * The ecache scrubber algorithm operates by reading and
3521 	 * decoding the E$ tag to determine whether the corresponding E$ line
3522 	 * can be scrubbed. There is a implicit assumption in the scrubber
3523 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3524 	 * flawed since the E$ tag may also be corrupted and have parity errors
3525 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3526 	 * before scrubbing. When a parity error is detected in the E$ tag,
3527 	 * it is possible to recover and scrub the tag under certain conditions
3528 	 * so that a ETP error condition can be avoided.
3529 	 */
3530 
3531 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3532 		/*
3533 		 * We get the old-AFSR before clearing the AFSR sticky bits
3534 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3535 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3536 		 */
3537 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3538 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3539 				cpu_ec_state_shift);
3540 
3541 		/*
3542 		 * ETP is set try to scrub the ecache tag.
3543 		 */
3544 		if (nafsr & P_AFSR_ETP) {
3545 			ecache_scrub_tag_err(nafsr, state, index);
3546 		} else if (state & cpu_ec_state_valid) {
3547 			/*
3548 			 * ETP is not set, E$ tag is valid.
3549 			 * Proceed with the E$ scrubbing.
3550 			 */
3551 			if (state & cpu_ec_state_dirty)
3552 				mpb |= ECACHE_STATE_MODIFIED;
3553 
3554 			tafsr = check_ecache_line(index, acc_afsr);
3555 
3556 			if (tafsr & P_AFSR_EDP) {
3557 				mpb |= ECACHE_STATE_PARITY;
3558 
3559 				if (ecache_scrub_verbose ||
3560 							ecache_scrub_panic) {
3561 					get_ecache_dtag(P2ALIGN(index, 64),
3562 						(uint64_t *)&ec_data[0],
3563 						&ec_tag, &oafsr, acc_afsr);
3564 				}
3565 			}
3566 
3567 			if (ssmp->ecache_busy)
3568 				mpb |= ECACHE_STATE_BUSY;
3569 
3570 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3571 			ec_knp->value.ul++;
3572 
3573 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3574 				cpu_ec_tag_shift) | (index % ec_set_size);
3575 
3576 			/*
3577 			 * We flush the E$ lines depending on the ec_flush,
3578 			 * we additionally flush clean_good_busy and
3579 			 * dirty_good_busy lines for mirrored E$.
3580 			 */
3581 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3582 				flushecacheline(paddr, ec_size);
3583 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3584 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3585 					flushecacheline(paddr, ec_size);
3586 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3587 				softcall(ecache_page_retire, (void *)paddr);
3588 			}
3589 
3590 			/*
3591 			 * Conditionally flush both the clean_good and
3592 			 * dirty_good lines when busy.
3593 			 */
3594 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3595 				flush_clean_busy--;
3596 				flushecacheline(paddr, ec_size);
3597 				ec_ksp->clean_good_busy_flush.value.ul++;
3598 			} else if (DGB(mpb, ec_mirror) &&
3599 						(flush_dirty_busy > 0)) {
3600 				flush_dirty_busy--;
3601 				flushecacheline(paddr, ec_size);
3602 				ec_ksp->dirty_good_busy_flush.value.ul++;
3603 			}
3604 
3605 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3606 						ecache_scrub_panic)) {
3607 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3608 						tafsr);
3609 			}
3610 
3611 		} else {
3612 			ec_ksp->invalid_lines.value.ul++;
3613 		}
3614 
3615 		if ((index += ec_linesize) >= ec_size)
3616 			index = 0;
3617 
3618 	}
3619 
3620 	/*
3621 	 * set the ecache scrub index for the next time around
3622 	 */
3623 	ssmp->ecache_flush_index = index;
3624 
3625 	if (*acc_afsr & P_AFSR_CP) {
3626 		uint64_t ret_afsr;
3627 
3628 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3629 		if ((ret_afsr & P_AFSR_CP) == 0)
3630 			*acc_afsr = 0;
3631 	}
3632 }
3633 
3634 /*
3635  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3636  * we decrement the outstanding request count to zero.
3637  */
3638 
3639 /*ARGSUSED*/
3640 uint_t
3641 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3642 {
3643 	int i;
3644 	int outstanding;
3645 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3646 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3647 
3648 	do {
3649 		outstanding = *countp;
3650 		ASSERT(outstanding > 0);
3651 		for (i = 0; i < outstanding; i++)
3652 			scrub_ecache_line();
3653 	} while (atomic_add_32_nv(countp, -outstanding));
3654 
3655 	return (DDI_INTR_CLAIMED);
3656 }
3657 
3658 /*
3659  * force each cpu to perform an ecache scrub, called from a timeout
3660  */
3661 extern xcfunc_t ecache_scrubreq_tl1;
3662 
3663 void
3664 do_scrub_ecache_line(void)
3665 {
3666 	long delta;
3667 
3668 	if (ecache_calls_a_sec > hz)
3669 		ecache_calls_a_sec = hz;
3670 	else if (ecache_calls_a_sec <= 0)
3671 	    ecache_calls_a_sec = 1;
3672 
3673 	if (ecache_calls_a_sec_mirrored > hz)
3674 		ecache_calls_a_sec_mirrored = hz;
3675 	else if (ecache_calls_a_sec_mirrored <= 0)
3676 	    ecache_calls_a_sec_mirrored = 1;
3677 
3678 	if (ecache_scrub_enable) {
3679 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3680 		delta = hz / ec_timeout_calls;
3681 	} else {
3682 		delta = hz;
3683 	}
3684 
3685 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3686 		delta);
3687 }
3688 
3689 /*
3690  * initialization for ecache scrubbing
3691  * This routine is called AFTER all cpus have had cpu_init_private called
3692  * to initialize their private data areas.
3693  */
3694 void
3695 cpu_init_cache_scrub(void)
3696 {
3697 	if (ecache_calls_a_sec > hz) {
3698 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3699 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3700 		ecache_calls_a_sec = hz;
3701 	}
3702 
3703 	/*
3704 	 * Register softint for ecache scrubbing.
3705 	 */
3706 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3707 	    scrub_ecache_line_intr, NULL, SOFTINT_MT);
3708 
3709 	/*
3710 	 * kick off the scrubbing using realtime timeout
3711 	 */
3712 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3713 	    hz / ecache_calls_a_sec);
3714 }
3715 
3716 /*
3717  * Unset the busy flag for this cpu.
3718  */
3719 void
3720 cpu_idle_ecache_scrub(struct cpu *cp)
3721 {
3722 	if (CPU_PRIVATE(cp) != NULL) {
3723 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3724 							sfpr_scrub_misc);
3725 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3726 	}
3727 }
3728 
3729 /*
3730  * Set the busy flag for this cpu.
3731  */
3732 void
3733 cpu_busy_ecache_scrub(struct cpu *cp)
3734 {
3735 	if (CPU_PRIVATE(cp) != NULL) {
3736 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3737 							sfpr_scrub_misc);
3738 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3739 	}
3740 }
3741 
3742 /*
3743  * initialize the ecache scrubber data structures
3744  * The global entry point cpu_init_private replaces this entry point.
3745  *
3746  */
3747 static void
3748 cpu_init_ecache_scrub_dr(struct cpu *cp)
3749 {
3750 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3751 	int cpuid = cp->cpu_id;
3752 
3753 	/*
3754 	 * intialize bookkeeping for cache scrubbing
3755 	 */
3756 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3757 
3758 	ssmp->ecache_flush_index = 0;
3759 
3760 	ssmp->ecache_nlines =
3761 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3762 
3763 	/*
3764 	 * Determine whether we are running on mirrored SRAM
3765 	 */
3766 
3767 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3768 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3769 	else
3770 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3771 
3772 	cpu_busy_ecache_scrub(cp);
3773 
3774 	/*
3775 	 * initialize the kstats
3776 	 */
3777 	ecache_kstat_init(cp);
3778 }
3779 
3780 /*
3781  * uninitialize the ecache scrubber data structures
3782  * The global entry point cpu_uninit_private replaces this entry point.
3783  */
3784 static void
3785 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3786 {
3787 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3788 
3789 	if (ssmp->ecache_ksp != NULL) {
3790 		kstat_delete(ssmp->ecache_ksp);
3791 		ssmp->ecache_ksp = NULL;
3792 	}
3793 
3794 	/*
3795 	 * un-initialize bookkeeping for cache scrubbing
3796 	 */
3797 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3798 
3799 	cpu_idle_ecache_scrub(cp);
3800 }
3801 
3802 struct kmem_cache *sf_private_cache;
3803 
3804 /*
3805  * Cpu private initialization.  This includes allocating the cpu_private
3806  * data structure, initializing it, and initializing the scrubber for this
3807  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3808  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3809  * We use kmem_cache_create for the spitfire private data structure because it
3810  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3811  */
3812 void
3813 cpu_init_private(struct cpu *cp)
3814 {
3815 	spitfire_private_t *sfprp;
3816 
3817 	ASSERT(CPU_PRIVATE(cp) == NULL);
3818 
3819 	/*
3820 	 * If the sf_private_cache has not been created, create it.
3821 	 */
3822 	if (sf_private_cache == NULL) {
3823 		sf_private_cache = kmem_cache_create("sf_private_cache",
3824 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3825 			NULL, NULL, NULL, NULL, 0);
3826 		ASSERT(sf_private_cache);
3827 	}
3828 
3829 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3830 
3831 	bzero(sfprp, sizeof (spitfire_private_t));
3832 
3833 	cpu_init_ecache_scrub_dr(cp);
3834 }
3835 
3836 /*
3837  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3838  * deallocate the scrubber data structures and cpu_private data structure.
3839  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3840  * the scrubber for the specified cpu.
3841  */
3842 void
3843 cpu_uninit_private(struct cpu *cp)
3844 {
3845 	ASSERT(CPU_PRIVATE(cp));
3846 
3847 	cpu_uninit_ecache_scrub_dr(cp);
3848 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3849 	CPU_PRIVATE(cp) = NULL;
3850 }
3851 
3852 /*
3853  * initialize the ecache kstats for each cpu
3854  */
3855 static void
3856 ecache_kstat_init(struct cpu *cp)
3857 {
3858 	struct kstat *ksp;
3859 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3860 
3861 	ASSERT(ssmp != NULL);
3862 
3863 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3864 	    KSTAT_TYPE_NAMED,
3865 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3866 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3867 		ssmp->ecache_ksp = NULL;
3868 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3869 		return;
3870 	}
3871 
3872 	ssmp->ecache_ksp = ksp;
3873 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3874 	kstat_install(ksp);
3875 }
3876 
3877 /*
3878  * log the bad ecache information
3879  */
3880 static void
3881 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3882 		uint64_t afsr)
3883 {
3884 	spitf_async_flt spf_flt;
3885 	struct async_flt *aflt;
3886 	int i;
3887 	char *class;
3888 
3889 	bzero(&spf_flt, sizeof (spitf_async_flt));
3890 	aflt = &spf_flt.cmn_asyncflt;
3891 
3892 	for (i = 0; i < 8; i++) {
3893 		spf_flt.flt_ec_data[i] = ec_data[i];
3894 	}
3895 
3896 	spf_flt.flt_ec_tag = ec_tag;
3897 
3898 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3899 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3900 	} else spf_flt.flt_type = (ushort_t)mpb;
3901 
3902 	aflt->flt_inst = CPU->cpu_id;
3903 	aflt->flt_class = CPU_FAULT;
3904 	aflt->flt_id = gethrtime_waitfree();
3905 	aflt->flt_addr = paddr;
3906 	aflt->flt_stat = afsr;
3907 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3908 
3909 	switch (mpb) {
3910 	case CPU_ECACHE_TAG_ERR:
3911 	case CPU_ECACHE_ADDR_PAR_ERR:
3912 	case CPU_ECACHE_ETP_ETS_ERR:
3913 	case CPU_ECACHE_STATE_ERR:
3914 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3915 		break;
3916 	default:
3917 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3918 		break;
3919 	}
3920 
3921 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3922 	    ue_queue, aflt->flt_panic);
3923 
3924 	if (aflt->flt_panic)
3925 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3926 					"line detected");
3927 }
3928 
3929 /*
3930  * Process an ecache error that occured during the E$ scrubbing.
3931  * We do the ecache scan to find the bad line, flush the bad line
3932  * and start the memscrubber to find any UE (in memory or in another cache)
3933  */
3934 static uint64_t
3935 ecache_scrub_misc_err(int type, uint64_t afsr)
3936 {
3937 	spitf_async_flt spf_flt;
3938 	struct async_flt *aflt;
3939 	uint64_t oafsr;
3940 
3941 	bzero(&spf_flt, sizeof (spitf_async_flt));
3942 	aflt = &spf_flt.cmn_asyncflt;
3943 
3944 	/*
3945 	 * Scan each line in the cache to look for the one
3946 	 * with bad parity
3947 	 */
3948 	aflt->flt_addr = AFLT_INV_ADDR;
3949 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3950 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3951 
3952 	if (oafsr & P_AFSR_CP) {
3953 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3954 		*cp_afsr |= oafsr;
3955 	}
3956 
3957 	/*
3958 	 * If we found a bad PA, update the state to indicate if it is
3959 	 * memory or I/O space.
3960 	 */
3961 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3962 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3963 			MMU_PAGESHIFT)) ? 1 : 0;
3964 	}
3965 
3966 	spf_flt.flt_type = (ushort_t)type;
3967 
3968 	aflt->flt_inst = CPU->cpu_id;
3969 	aflt->flt_class = CPU_FAULT;
3970 	aflt->flt_id = gethrtime_waitfree();
3971 	aflt->flt_status = afsr;
3972 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3973 
3974 	/*
3975 	 * We have the bad line, flush that line and start
3976 	 * the memscrubber.
3977 	 */
3978 	if (spf_flt.flt_ec_lcnt > 0) {
3979 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3980 			cpunodes[CPU->cpu_id].ecache_size);
3981 		read_all_memscrub = 1;
3982 		memscrub_run();
3983 	}
3984 
3985 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3986 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3987 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3988 
3989 	return (oafsr);
3990 }
3991 
3992 static void
3993 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
3994 {
3995 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
3996 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3997 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3998 	uint64_t ec_tag, paddr, oafsr;
3999 	ec_data_t ec_data[8];
4000 	int cpuid = CPU->cpu_id;
4001 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4002 						ecache_associativity;
4003 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4004 
4005 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4006 			&oafsr, cpu_afsr);
4007 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4008 						(index % ec_set_size);
4009 
4010 	/*
4011 	 * E$ tag state has good parity
4012 	 */
4013 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4014 		if (afsr_ets & cpu_ec_parity) {
4015 			/*
4016 			 * E$ tag state bits indicate the line is clean,
4017 			 * invalidate the E$ tag and continue.
4018 			 */
4019 			if (!(state & cpu_ec_state_dirty)) {
4020 				/*
4021 				 * Zero the tag and mark the state invalid
4022 				 * with good parity for the tag.
4023 				 */
4024 				if (isus2i || isus2e)
4025 					write_hb_ec_tag_parity(index);
4026 				else
4027 					write_ec_tag_parity(index);
4028 
4029 				/* Sync with the dual tag */
4030 				flushecacheline(0,
4031 					cpunodes[CPU->cpu_id].ecache_size);
4032 				ec_ksp->tags_cleared.value.ul++;
4033 				ecache_scrub_log(ec_data, ec_tag, paddr,
4034 					CPU_ECACHE_TAG_ERR, afsr);
4035 				return;
4036 			} else {
4037 				ecache_scrub_log(ec_data, ec_tag, paddr,
4038 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4039 				cmn_err(CE_PANIC, " E$ tag address has bad"
4040 							" parity");
4041 			}
4042 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4043 			/*
4044 			 * ETS is zero but ETP is set
4045 			 */
4046 			ecache_scrub_log(ec_data, ec_tag, paddr,
4047 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4048 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4049 				" AFSR.ETS is zero");
4050 		}
4051 	} else {
4052 		/*
4053 		 * E$ tag state bit has a bad parity
4054 		 */
4055 		ecache_scrub_log(ec_data, ec_tag, paddr,
4056 				CPU_ECACHE_STATE_ERR, afsr);
4057 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4058 	}
4059 }
4060 
4061 static void
4062 ecache_page_retire(void *arg)
4063 {
4064 	uint64_t paddr = (uint64_t)arg;
4065 	(void) page_retire(paddr, PR_UE);
4066 }
4067 
4068 void
4069 sticksync_slave(void)
4070 {}
4071 
4072 void
4073 sticksync_master(void)
4074 {}
4075 
4076 /*ARGSUSED*/
4077 void
4078 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4079 {}
4080 
4081 void
4082 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4083 {
4084 	int status;
4085 	ddi_fm_error_t de;
4086 
4087 	bzero(&de, sizeof (ddi_fm_error_t));
4088 
4089 	de.fme_version = DDI_FME_VERSION;
4090 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4091 	    FM_ENA_FMT1);
4092 	de.fme_flag = expected;
4093 	de.fme_bus_specific = (void *)aflt->flt_addr;
4094 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4095 
4096 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4097 		aflt->flt_panic = 1;
4098 }
4099 
4100 /*ARGSUSED*/
4101 void
4102 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4103     errorq_t *eqp, uint_t flag)
4104 {
4105 	struct async_flt *aflt = (struct async_flt *)payload;
4106 
4107 	aflt->flt_erpt_class = error_class;
4108 	errorq_dispatch(eqp, payload, payload_sz, flag);
4109 }
4110 
4111 #define	MAX_SIMM	8
4112 
4113 struct ce_info {
4114 	char    name[UNUM_NAMLEN];
4115 	uint64_t intermittent_total;
4116 	uint64_t persistent_total;
4117 	uint64_t sticky_total;
4118 	unsigned short leaky_bucket_cnt;
4119 };
4120 
4121 /*
4122  * Separately-defined structure for use in reporting the ce_info
4123  * to SunVTS without exposing the internal layout and implementation
4124  * of struct ce_info.
4125  */
4126 static struct ecc_error_info ecc_error_info_data = {
4127 	{ "version", KSTAT_DATA_UINT32 },
4128 	{ "maxcount", KSTAT_DATA_UINT32 },
4129 	{ "count", KSTAT_DATA_UINT32 }
4130 };
4131 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4132     sizeof (struct kstat_named);
4133 
4134 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4135 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4136 #endif
4137 
4138 struct ce_info  *mem_ce_simm = NULL;
4139 size_t mem_ce_simm_size = 0;
4140 
4141 /*
4142  * Default values for the number of CE's allowed per interval.
4143  * Interval is defined in minutes
4144  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4145  */
4146 #define	SOFTERR_LIMIT_DEFAULT		2
4147 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4148 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4149 #define	TIMEOUT_NONE			((timeout_id_t)0)
4150 #define	TIMEOUT_SET			((timeout_id_t)1)
4151 
4152 /*
4153  * timeout identifer for leaky_bucket
4154  */
4155 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4156 
4157 /*
4158  * Tunables for maximum number of allowed CE's in a given time
4159  */
4160 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4161 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4162 
4163 void
4164 cpu_mp_init(void)
4165 {
4166 	size_t size = cpu_aflt_size();
4167 	size_t i;
4168 	kstat_t *ksp;
4169 
4170 	/*
4171 	 * Initialize the CE error handling buffers.
4172 	 */
4173 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4174 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4175 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4176 
4177 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4178 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4179 	if (ksp != NULL) {
4180 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4181 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4182 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4183 		ecc_error_info_data.count.value.ui32 = 0;
4184 		kstat_install(ksp);
4185 	}
4186 
4187 	for (i = 0; i < mem_ce_simm_size; i++) {
4188 		struct kstat_ecc_mm_info *kceip;
4189 
4190 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4191 		    KM_SLEEP);
4192 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4193 		    KSTAT_TYPE_NAMED,
4194 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4195 		    KSTAT_FLAG_VIRTUAL);
4196 		if (ksp != NULL) {
4197 			/*
4198 			 * Re-declare ks_data_size to include room for the
4199 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4200 			 * set.
4201 			 */
4202 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4203 			    KSTAT_CE_UNUM_NAMLEN;
4204 			ksp->ks_data = kceip;
4205 			kstat_named_init(&kceip->name,
4206 			    "name", KSTAT_DATA_STRING);
4207 			kstat_named_init(&kceip->intermittent_total,
4208 			    "intermittent_total", KSTAT_DATA_UINT64);
4209 			kstat_named_init(&kceip->persistent_total,
4210 			    "persistent_total", KSTAT_DATA_UINT64);
4211 			kstat_named_init(&kceip->sticky_total,
4212 			    "sticky_total", KSTAT_DATA_UINT64);
4213 			/*
4214 			 * Use the default snapshot routine as it knows how to
4215 			 * deal with named kstats with long strings.
4216 			 */
4217 			ksp->ks_update = ecc_kstat_update;
4218 			kstat_install(ksp);
4219 		} else {
4220 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4221 		}
4222 	}
4223 }
4224 
4225 /*ARGSUSED*/
4226 static void
4227 leaky_bucket_timeout(void *arg)
4228 {
4229 	int i;
4230 	struct ce_info *psimm = mem_ce_simm;
4231 
4232 	for (i = 0; i < mem_ce_simm_size; i++) {
4233 		if (psimm[i].leaky_bucket_cnt > 0)
4234 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4235 	}
4236 	add_leaky_bucket_timeout();
4237 }
4238 
4239 static void
4240 add_leaky_bucket_timeout(void)
4241 {
4242 	long timeout_in_microsecs;
4243 
4244 	/*
4245 	 * create timeout for next leak.
4246 	 *
4247 	 * The timeout interval is calculated as follows
4248 	 *
4249 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4250 	 *
4251 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4252 	 * in a minute), then multiply this by MICROSEC to get the interval
4253 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4254 	 * the timeout interval is accurate to within a few microseconds.
4255 	 */
4256 
4257 	if (ecc_softerr_limit <= 0)
4258 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4259 	if (ecc_softerr_interval <= 0)
4260 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4261 
4262 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4263 	    ecc_softerr_limit;
4264 
4265 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4266 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4267 
4268 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4269 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4270 }
4271 
4272 /*
4273  * Legacy Correctable ECC Error Hash
4274  *
4275  * All of the code below this comment is used to implement a legacy array
4276  * which counted intermittent, persistent, and sticky CE errors by unum,
4277  * and then was later extended to publish the data as a kstat for SunVTS.
4278  * All of this code is replaced by FMA, and remains here until such time
4279  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4280  *
4281  * Errors are saved in three buckets per-unum:
4282  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4283  *     This could represent a problem, and is immediately printed out.
4284  * (2) persistent - was successfully scrubbed
4285  *     These errors use the leaky bucket algorithm to determine
4286  *     if there is a serious problem.
4287  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4288  *     and does not necessarily indicate any problem with the dimm itself,
4289  *     is critical information for debugging new hardware.
4290  *     Because we do not know if it came from the dimm, it would be
4291  *     inappropriate to include these in the leaky bucket counts.
4292  *
4293  * If the E$ line was modified before the scrub operation began, then the
4294  * displacement flush at the beginning of scrubphys() will cause the modified
4295  * line to be written out, which will clean up the CE.  Then, any subsequent
4296  * read will not cause an error, which will cause persistent errors to be
4297  * identified as intermittent.
4298  *
4299  * If a DIMM is going bad, it will produce true persistents as well as
4300  * false intermittents, so these intermittents can be safely ignored.
4301  *
4302  * If the error count is excessive for a DIMM, this function will return
4303  * PR_MCE, and the CPU module may then decide to remove that page from use.
4304  */
4305 static int
4306 ce_count_unum(int status, int len, char *unum)
4307 {
4308 	int i;
4309 	struct ce_info *psimm = mem_ce_simm;
4310 	int page_status = PR_OK;
4311 
4312 	ASSERT(psimm != NULL);
4313 
4314 	if (len <= 0 ||
4315 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4316 		return (page_status);
4317 
4318 	/*
4319 	 * Initialize the leaky_bucket timeout
4320 	 */
4321 	if (casptr(&leaky_bucket_timeout_id,
4322 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4323 		add_leaky_bucket_timeout();
4324 
4325 	for (i = 0; i < mem_ce_simm_size; i++) {
4326 		if (psimm[i].name[0] == '\0') {
4327 			/*
4328 			 * Hit the end of the valid entries, add
4329 			 * a new one.
4330 			 */
4331 			(void) strncpy(psimm[i].name, unum, len);
4332 			if (status & ECC_STICKY) {
4333 				/*
4334 				 * Sticky - the leaky bucket is used to track
4335 				 * soft errors.  Since a sticky error is a
4336 				 * hard error and likely to be retired soon,
4337 				 * we do not count it in the leaky bucket.
4338 				 */
4339 				psimm[i].leaky_bucket_cnt = 0;
4340 				psimm[i].intermittent_total = 0;
4341 				psimm[i].persistent_total = 0;
4342 				psimm[i].sticky_total = 1;
4343 				cmn_err(CE_WARN,
4344 				    "[AFT0] Sticky Softerror encountered "
4345 				    "on Memory Module %s\n", unum);
4346 				page_status = PR_MCE;
4347 			} else if (status & ECC_PERSISTENT) {
4348 				psimm[i].leaky_bucket_cnt = 1;
4349 				psimm[i].intermittent_total = 0;
4350 				psimm[i].persistent_total = 1;
4351 				psimm[i].sticky_total = 0;
4352 			} else {
4353 				/*
4354 				 * Intermittent - Because the scrub operation
4355 				 * cannot find the error in the DIMM, we will
4356 				 * not count these in the leaky bucket
4357 				 */
4358 				psimm[i].leaky_bucket_cnt = 0;
4359 				psimm[i].intermittent_total = 1;
4360 				psimm[i].persistent_total = 0;
4361 				psimm[i].sticky_total = 0;
4362 			}
4363 			ecc_error_info_data.count.value.ui32++;
4364 			break;
4365 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4366 			/*
4367 			 * Found an existing entry for the current
4368 			 * memory module, adjust the counts.
4369 			 */
4370 			if (status & ECC_STICKY) {
4371 				psimm[i].sticky_total++;
4372 				cmn_err(CE_WARN,
4373 				    "[AFT0] Sticky Softerror encountered "
4374 				    "on Memory Module %s\n", unum);
4375 				page_status = PR_MCE;
4376 			} else if (status & ECC_PERSISTENT) {
4377 				int new_value;
4378 
4379 				new_value = atomic_add_16_nv(
4380 				    &psimm[i].leaky_bucket_cnt, 1);
4381 				psimm[i].persistent_total++;
4382 				if (new_value > ecc_softerr_limit) {
4383 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4384 					    " soft errors from Memory Module"
4385 					    " %s exceed threshold (N=%d,"
4386 					    " T=%dh:%02dm) triggering page"
4387 					    " retire", new_value, unum,
4388 					    ecc_softerr_limit,
4389 					    ecc_softerr_interval / 60,
4390 					    ecc_softerr_interval % 60);
4391 					atomic_add_16(
4392 					    &psimm[i].leaky_bucket_cnt, -1);
4393 					page_status = PR_MCE;
4394 				}
4395 			} else { /* Intermittent */
4396 				psimm[i].intermittent_total++;
4397 			}
4398 			break;
4399 		}
4400 	}
4401 
4402 	if (i >= mem_ce_simm_size)
4403 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4404 		    "space.\n");
4405 
4406 	return (page_status);
4407 }
4408 
4409 /*
4410  * Function to support counting of IO detected CEs.
4411  */
4412 void
4413 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4414 {
4415 	int err;
4416 
4417 	err = ce_count_unum(ecc->flt_status, len, unum);
4418 	if (err != PR_OK && automatic_page_removal) {
4419 		(void) page_retire(ecc->flt_addr, err);
4420 	}
4421 }
4422 
4423 static int
4424 ecc_kstat_update(kstat_t *ksp, int rw)
4425 {
4426 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4427 	struct ce_info *ceip = mem_ce_simm;
4428 	int i = ksp->ks_instance;
4429 
4430 	if (rw == KSTAT_WRITE)
4431 		return (EACCES);
4432 
4433 	ASSERT(ksp->ks_data != NULL);
4434 	ASSERT(i < mem_ce_simm_size && i >= 0);
4435 
4436 	/*
4437 	 * Since we're not using locks, make sure that we don't get partial
4438 	 * data. The name is always copied before the counters are incremented
4439 	 * so only do this update routine if at least one of the counters is
4440 	 * non-zero, which ensures that ce_count_unum() is done, and the
4441 	 * string is fully copied.
4442 	 */
4443 	if (ceip[i].intermittent_total == 0 &&
4444 	    ceip[i].persistent_total == 0 &&
4445 	    ceip[i].sticky_total == 0) {
4446 		/*
4447 		 * Uninitialized or partially initialized. Ignore.
4448 		 * The ks_data buffer was allocated via kmem_zalloc,
4449 		 * so no need to bzero it.
4450 		 */
4451 		return (0);
4452 	}
4453 
4454 	kstat_named_setstr(&kceip->name, ceip[i].name);
4455 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4456 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4457 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4458 
4459 	return (0);
4460 }
4461 
4462 #define	VIS_BLOCKSIZE		64
4463 
4464 int
4465 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4466 {
4467 	int ret, watched;
4468 
4469 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4470 	ret = dtrace_blksuword32(addr, data, 0);
4471 	if (watched)
4472 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4473 
4474 	return (ret);
4475 }
4476 
4477 /*ARGSUSED*/
4478 void
4479 cpu_faulted_enter(struct cpu *cp)
4480 {
4481 }
4482 
4483 /*ARGSUSED*/
4484 void
4485 cpu_faulted_exit(struct cpu *cp)
4486 {
4487 }
4488 
4489 /*ARGSUSED*/
4490 void
4491 mmu_init_kernel_pgsz(struct hat *hat)
4492 {
4493 }
4494 
4495 size_t
4496 mmu_get_kernel_lpsize(size_t lpsize)
4497 {
4498 	uint_t tte;
4499 
4500 	if (lpsize == 0) {
4501 		/* no setting for segkmem_lpsize in /etc/system: use default */
4502 		return (MMU_PAGESIZE4M);
4503 	}
4504 
4505 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4506 		if (lpsize == TTEBYTES(tte))
4507 			return (lpsize);
4508 	}
4509 
4510 	return (TTEBYTES(TTE8K));
4511 }
4512