xref: /titanic_52/usr/src/uts/sun4u/cpu/spitfire.c (revision 554ff184129088135ad2643c1c9832174a17be88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/cpu.h>
35 #include <sys/elf_SPARC.h>
36 #include <vm/hat_sfmmu.h>
37 #include <vm/page.h>
38 #include <sys/cpuvar.h>
39 #include <sys/spitregs.h>
40 #include <sys/async.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/dditypes.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpu_module.h>
46 #include <sys/prom_debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sysmacros.h>
50 #include <sys/intreg.h>
51 #include <sys/machtrap.h>
52 #include <sys/ontrap.h>
53 #include <sys/ivintr.h>
54 #include <sys/atomic.h>
55 #include <sys/panic.h>
56 #include <sys/ndifm.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/fm/util.h>
59 #include <sys/fm/cpu/UltraSPARC-II.h>
60 #include <sys/ddi.h>
61 #include <sys/ecc_kstat.h>
62 #include <sys/watchpoint.h>
63 #include <sys/dtrace.h>
64 #include <sys/errclassify.h>
65 
66 uchar_t	*ctx_pgsz_array = NULL;
67 
68 /*
69  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71  */
72 typedef struct sf_ec_data_elm {
73 	uint64_t ec_d8;
74 	uint64_t ec_afsr;
75 } ec_data_t;
76 
77 /*
78  * Define spitfire (Ultra I/II) specific asynchronous error structure
79  */
80 typedef struct spitfire_async_flt {
81 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82 	ushort_t flt_type;		/* types of faults - cpu specific */
83 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84 	uint64_t flt_ec_tag;		/* E$ tag info */
85 	int flt_ec_lcnt;		/* number of bad E$ lines */
86 	ushort_t flt_sdbh;		/* UDBH reg */
87 	ushort_t flt_sdbl;		/* UDBL reg */
88 } spitf_async_flt;
89 
90 /*
91  * Prototypes for support routines in spitfire_asm.s:
92  */
93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94 extern uint64_t get_lsu(void);
95 extern void set_lsu(uint64_t ncc);
96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97 				uint64_t *oafsr, uint64_t *acc_afsr);
98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100 				uint64_t *acc_afsr);
101 extern uint64_t read_and_clear_afsr();
102 extern void write_ec_tag_parity(uint32_t id);
103 extern void write_hb_ec_tag_parity(uint32_t id);
104 
105 /*
106  * Spitfire module routines:
107  */
108 static void cpu_async_log_err(void *flt);
109 /*PRINTFLIKE6*/
110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111     uint_t logflags, const char *endstr, const char *fmt, ...);
112 
113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116 
117 static void log_ce_err(struct async_flt *aflt, char *unum);
118 static void log_ue_err(struct async_flt *aflt, char *unum);
119 static void check_misc_err(spitf_async_flt *spf_flt);
120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121 static int check_ecc(struct async_flt *aflt);
122 static uint_t get_cpu_status(uint64_t arg);
123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125 		int *m, uint64_t *afsr);
126 static void ecache_kstat_init(struct cpu *cp);
127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128 		uint64_t paddr, int mpb, uint64_t);
129 static uint64_t ecache_scrub_misc_err(int, uint64_t);
130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131 static void ecache_page_retire(void *);
132 static int ecc_kstat_update(kstat_t *ksp, int rw);
133 static int ce_count_unum(int status, int len, char *unum);
134 static void add_leaky_bucket_timeout(void);
135 static int synd_to_synd_code(int synd_status, ushort_t synd);
136 
137 extern uint_t read_all_memscrub;
138 extern void memscrub_run(void);
139 
140 static uchar_t	isus2i;			/* set if sabre */
141 static uchar_t	isus2e;			/* set if hummingbird */
142 
143 /*
144  * Default ecache mask and shift settings for Spitfire.  If we detect a
145  * different CPU implementation, we will modify these values at boot time.
146  */
147 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157 
158 /*
159  * Default ecache state bits for Spitfire.  These individual bits indicate if
160  * the given line is in any of the valid or modified states, respectively.
161  * Again, we modify these at boot if we detect a different CPU.
162  */
163 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165 static uchar_t cpu_ec_parity		= S_EC_PARITY;
166 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167 
168 /*
169  * This table is used to determine which bit(s) is(are) bad when an ECC
170  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171  * of this array have the following semantics:
172  *
173  *      00-63   The number of the bad bit, when only one bit is bad.
174  *      64      ECC bit C0 is bad.
175  *      65      ECC bit C1 is bad.
176  *      66      ECC bit C2 is bad.
177  *      67      ECC bit C3 is bad.
178  *      68      ECC bit C4 is bad.
179  *      69      ECC bit C5 is bad.
180  *      70      ECC bit C6 is bad.
181  *      71      ECC bit C7 is bad.
182  *      72      Two bits are bad.
183  *      73      Three bits are bad.
184  *      74      Four bits are bad.
185  *      75      More than Four bits are bad.
186  *      76      NO bits are bad.
187  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188  */
189 
190 #define	C0	64
191 #define	C1	65
192 #define	C2	66
193 #define	C3	67
194 #define	C4	68
195 #define	C5	69
196 #define	C6	70
197 #define	C7	71
198 #define	M2	72
199 #define	M3	73
200 #define	M4	74
201 #define	MX	75
202 #define	NA	76
203 
204 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205 						    (synd_code < C0))
206 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207 						    (synd_code <= C7))
208 
209 static char ecc_syndrome_tab[] =
210 {
211 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227 };
228 
229 #define	SYND_TBL_SIZE 256
230 
231 /*
232  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234  */
235 #define	UDBL_REG	0x8000
236 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237 #define	SYND(synd)	(synd & 0x7FFF)
238 
239 /*
240  * These error types are specific to Spitfire and are used internally for the
241  * spitfire fault structure flt_type field.
242  */
243 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245 #define	CPU_WP_ERR		2	/* WP parity error */
246 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259 
260 /*
261  * Macro to access the "Spitfire cpu private" data structure.
262  */
263 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264 
265 /*
266  * set to 0 to disable automatic retiring of pages on
267  * DIMMs that have excessive soft errors
268  */
269 int automatic_page_removal = 1;
270 
271 /*
272  * Heuristic for figuring out which module to replace.
273  * Relative likelihood that this P_SYND indicates that this module is bad.
274  * We call it a "score", though, not a relative likelihood.
275  *
276  * Step 1.
277  * Assign a score to each byte of P_SYND according to the following rules:
278  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279  * If one bit on, give it a 95.
280  * If seven bits on, give it a 10.
281  * If two bits on:
282  *   in different nybbles, a 90
283  *   in same nybble, but unaligned, 85
284  *   in same nybble and as an aligned pair, 80
285  * If six bits on, look at the bits that are off:
286  *   in same nybble and as an aligned pair, 15
287  *   in same nybble, but unaligned, 20
288  *   in different nybbles, a 25
289  * If three bits on:
290  *   in diferent nybbles, no aligned pairs, 75
291  *   in diferent nybbles, one aligned pair, 70
292  *   in the same nybble, 65
293  * If five bits on, look at the bits that are off:
294  *   in the same nybble, 30
295  *   in diferent nybbles, one aligned pair, 35
296  *   in diferent nybbles, no aligned pairs, 40
297  * If four bits on:
298  *   all in one nybble, 45
299  *   as two aligned pairs, 50
300  *   one aligned pair, 55
301  *   no aligned pairs, 60
302  *
303  * Step 2:
304  * Take the higher of the two scores (one for each byte) as the score
305  * for the module.
306  *
307  * Print the score for each module, and field service should replace the
308  * module with the highest score.
309  */
310 
311 /*
312  * In the table below, the first row/column comment indicates the
313  * number of bits on in that nybble; the second row/column comment is
314  * the hex digit.
315  */
316 
317 static int
318 p_synd_score_table[256] = {
319 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337 };
338 
339 int
340 ecc_psynd_score(ushort_t p_synd)
341 {
342 	int i, j, a, b;
343 
344 	i = p_synd & 0xFF;
345 	j = (p_synd >> 8) & 0xFF;
346 
347 	a = p_synd_score_table[i];
348 	b = p_synd_score_table[j];
349 
350 	return (a > b ? a : b);
351 }
352 
353 /*
354  * Async Fault Logging
355  *
356  * To ease identifying, reading, and filtering async fault log messages, the
357  * label [AFT#] is now prepended to each async fault message.  These messages
358  * and the logging rules are implemented by cpu_aflt_log(), below.
359  *
360  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361  *          This includes both corrected ECC memory and ecache faults.
362  *
363  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364  *          else except CE errors) with a priority of 1 (highest).  This tag
365  *          is also used for panic messages that result from an async fault.
366  *
367  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369  *          of the E-$ data and tags.
370  *
371  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372  * printed on the console.  To send all AFT logs to both the log and the
373  * console, set aft_verbose = 1.
374  */
375 
376 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378 #define	CPU_ERRID		0x0004	/* print flt_id */
379 #define	CPU_TL			0x0008	/* print flt_tl */
380 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389 
390 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393 				CPU_FAULTPC)
394 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396 				~CPU_SPACE)
397 #define	PARERR_LFLAGS	(CMN_LFLAGS)
398 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400 				~CPU_FLTCPU & ~CPU_FAULTPC)
401 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402 #define	NO_LFLAGS	(0)
403 
404 #define	AFSR_FMTSTR0	"\020\1ME"
405 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407 #define	UDB_FMTSTR	"\020\012UE\011CE"
408 
409 /*
410  * Maximum number of contexts for Spitfire.
411  */
412 #define	MAX_NCTXS	(1 << 13)
413 
414 /*
415  * Save the cache bootup state for use when internal
416  * caches are to be re-enabled after an error occurs.
417  */
418 uint64_t	cache_boot_state = 0;
419 
420 /*
421  * PA[31:0] represent Displacement in UPA configuration space.
422  */
423 uint_t	root_phys_addr_lo_mask = 0xffffffff;
424 
425 /*
426  * Spitfire legacy globals
427  */
428 int	itlb_entries;
429 int	dtlb_entries;
430 
431 void
432 cpu_setup(void)
433 {
434 	extern int page_retire_messages;
435 	extern int at_flags;
436 #if defined(SF_ERRATA_57)
437 	extern caddr_t errata57_limit;
438 #endif
439 	extern int disable_text_largepages;
440 	extern int disable_initdata_largepages;
441 
442 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
443 
444 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
445 
446 	/*
447 	 * Spitfire isn't currently FMA-aware, so we have to enable the
448 	 * page retirement messages.
449 	 */
450 	page_retire_messages = 1;
451 
452 	/*
453 	 * save the cache bootup state.
454 	 */
455 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
456 
457 	/*
458 	 * Use the maximum number of contexts available for Spitfire unless
459 	 * it has been tuned for debugging.
460 	 * We are checking against 0 here since this value can be patched
461 	 * while booting.  It can not be patched via /etc/system since it
462 	 * will be patched too late and thus cause the system to panic.
463 	 */
464 	if (nctxs == 0)
465 		nctxs = MAX_NCTXS;
466 
467 	if (use_page_coloring) {
468 		do_pg_coloring = 1;
469 		if (use_virtual_coloring)
470 			do_virtual_coloring = 1;
471 	}
472 
473 	/*
474 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
475 	 */
476 	pp_slots = MIN(8, MAXPP_SLOTS);
477 
478 	/*
479 	 * Block stores invalidate all pages of the d$ so pagecopy
480 	 * et. al. do not need virtual translations with virtual
481 	 * coloring taken into consideration.
482 	 */
483 	pp_consistent_coloring = 0;
484 
485 	isa_list =
486 	    "sparcv9+vis sparcv9 "
487 	    "sparcv8plus+vis sparcv8plus "
488 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
489 
490 	cpu_hwcap_flags = AV_SPARC_VIS;
491 
492 	/*
493 	 * On Spitfire, there's a hole in the address space
494 	 * that we must never map (the hardware only support 44-bits of
495 	 * virtual address).  Later CPUs are expected to have wider
496 	 * supported address ranges.
497 	 *
498 	 * See address map on p23 of the UltraSPARC 1 user's manual.
499 	 */
500 	hole_start = (caddr_t)0x80000000000ull;
501 	hole_end = (caddr_t)0xfffff80000000000ull;
502 
503 	/*
504 	 * A spitfire call bug requires us to be a further 4Gbytes of
505 	 * firewall from the spec.
506 	 *
507 	 * See Spitfire Errata #21
508 	 */
509 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
510 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
511 
512 	/*
513 	 * The kpm mapping window.
514 	 * kpm_size:
515 	 *	The size of a single kpm range.
516 	 *	The overall size will be: kpm_size * vac_colors.
517 	 * kpm_vbase:
518 	 *	The virtual start address of the kpm range within the kernel
519 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
520 	 */
521 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
522 	kpm_size_shift = 41;
523 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
524 
525 #if defined(SF_ERRATA_57)
526 	errata57_limit = (caddr_t)0x80000000ul;
527 #endif
528 
529 	/*
530 	 * Allow only 8K, 64K and 4M pages for text by default.
531 	 * Allow only 8K and 64K page for initialized data segments by
532 	 * default.
533 	 */
534 	disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
535 	    (1 << TTE256M);
536 	disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
537 	    (1 << TTE32M) | (1 << TTE256M);
538 }
539 
540 static int
541 getintprop(dnode_t node, char *name, int deflt)
542 {
543 	int	value;
544 
545 	switch (prom_getproplen(node, name)) {
546 	case 0:
547 		value = 1;	/* boolean properties */
548 		break;
549 
550 	case sizeof (int):
551 		(void) prom_getprop(node, name, (caddr_t)&value);
552 		break;
553 
554 	default:
555 		value = deflt;
556 		break;
557 	}
558 
559 	return (value);
560 }
561 
562 /*
563  * Set the magic constants of the implementation.
564  */
565 void
566 cpu_fiximp(dnode_t dnode)
567 {
568 	extern int vac_size, vac_shift;
569 	extern uint_t vac_mask;
570 	extern int dcache_line_mask;
571 	int i, a;
572 	static struct {
573 		char	*name;
574 		int	*var;
575 	} prop[] = {
576 		"dcache-size",		&dcache_size,
577 		"dcache-line-size",	&dcache_linesize,
578 		"icache-size",		&icache_size,
579 		"icache-line-size",	&icache_linesize,
580 		"ecache-size",		&ecache_size,
581 		"ecache-line-size",	&ecache_alignsize,
582 		"ecache-associativity", &ecache_associativity,
583 		"#itlb-entries",	&itlb_entries,
584 		"#dtlb-entries",	&dtlb_entries,
585 		};
586 
587 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
588 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
589 			*prop[i].var = a;
590 		}
591 	}
592 
593 	ecache_setsize = ecache_size / ecache_associativity;
594 
595 	vac_size = S_VAC_SIZE;
596 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
597 	i = 0; a = vac_size;
598 	while (a >>= 1)
599 		++i;
600 	vac_shift = i;
601 	shm_alignment = vac_size;
602 	vac = 1;
603 
604 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
605 
606 	/*
607 	 * UltraSPARC I & II have ecache sizes running
608 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
609 	 * and 8 MB. Adjust the copyin/copyout limits
610 	 * according to the cache size. The magic number
611 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
612 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
613 	 * VIS instructions.
614 	 *
615 	 * We assume that all CPUs on the system have the same size
616 	 * ecache. We're also called very early in the game.
617 	 * /etc/system will be parsed *after* we're called so
618 	 * these values can be overwritten.
619 	 */
620 
621 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
622 	if (ecache_size <= 524288) {
623 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
624 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
625 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
626 	} else if (ecache_size == 1048576) {
627 		hw_copy_limit_2 = 1024;
628 		hw_copy_limit_4 = 1280;
629 		hw_copy_limit_8 = 1536;
630 	} else if (ecache_size == 2097152) {
631 		hw_copy_limit_2 = 1536;
632 		hw_copy_limit_4 = 2048;
633 		hw_copy_limit_8 = 2560;
634 	} else if (ecache_size == 4194304) {
635 		hw_copy_limit_2 = 2048;
636 		hw_copy_limit_4 = 2560;
637 		hw_copy_limit_8 = 3072;
638 	} else {
639 		hw_copy_limit_2 = 2560;
640 		hw_copy_limit_4 = 3072;
641 		hw_copy_limit_8 = 3584;
642 	}
643 }
644 
645 /*
646  * Called by setcpudelay
647  */
648 void
649 cpu_init_tick_freq(void)
650 {
651 	/*
652 	 * Determine the cpu frequency by calling
653 	 * tod_get_cpufrequency. Use an approximate freqency
654 	 * value computed by the prom if the tod module
655 	 * is not initialized and loaded yet.
656 	 */
657 	if (tod_ops.tod_get_cpufrequency != NULL) {
658 		mutex_enter(&tod_lock);
659 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
660 		mutex_exit(&tod_lock);
661 	} else {
662 #if defined(HUMMINGBIRD)
663 		/*
664 		 * the hummingbird version of %stick is used as the basis for
665 		 * low level timing; this provides an independent constant-rate
666 		 * clock for general system use, and frees power mgmt to set
667 		 * various cpu clock speeds.
668 		 */
669 		if (system_clock_freq == 0)
670 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
671 			    system_clock_freq);
672 		sys_tick_freq = system_clock_freq;
673 #else /* SPITFIRE */
674 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
675 #endif
676 	}
677 }
678 
679 
680 void shipit(int upaid);
681 extern uint64_t xc_tick_limit;
682 extern uint64_t xc_tick_jump_limit;
683 
684 #ifdef SEND_MONDO_STATS
685 uint64_t x_early[NCPU][64];
686 #endif
687 
688 /*
689  * Note: A version of this function is used by the debugger via the KDI,
690  * and must be kept in sync with this version.  Any changes made to this
691  * function to support new chips or to accomodate errata must also be included
692  * in the KDI-specific version.  See spitfire_kdi.c.
693  */
694 void
695 send_one_mondo(int cpuid)
696 {
697 	uint64_t idsr, starttick, endtick;
698 	int upaid, busy, nack;
699 	uint64_t tick, tick_prev;
700 	ulong_t ticks;
701 
702 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
703 	upaid = CPUID_TO_UPAID(cpuid);
704 	tick = starttick = gettick();
705 	shipit(upaid);
706 	endtick = starttick + xc_tick_limit;
707 	busy = nack = 0;
708 	for (;;) {
709 		idsr = getidsr();
710 		if (idsr == 0)
711 			break;
712 		/*
713 		 * When we detect an irregular tick jump, we adjust
714 		 * the timer window to the current tick value.
715 		 */
716 		tick_prev = tick;
717 		tick = gettick();
718 		ticks = tick - tick_prev;
719 		if (ticks > xc_tick_jump_limit) {
720 			endtick = tick + xc_tick_limit;
721 		} else if (tick > endtick) {
722 			if (panic_quiesce)
723 				return;
724 			cmn_err(CE_PANIC,
725 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
726 			upaid, nack, busy);
727 		}
728 		if (idsr & IDSR_BUSY) {
729 			busy++;
730 			continue;
731 		}
732 		drv_usecwait(1);
733 		shipit(upaid);
734 		nack++;
735 		busy = 0;
736 	}
737 #ifdef SEND_MONDO_STATS
738 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
739 #endif
740 }
741 
742 void
743 send_mondo_set(cpuset_t set)
744 {
745 	int i;
746 
747 	for (i = 0; i < NCPU; i++)
748 		if (CPU_IN_SET(set, i)) {
749 			send_one_mondo(i);
750 			CPUSET_DEL(set, i);
751 			if (CPUSET_ISNULL(set))
752 				break;
753 		}
754 }
755 
756 void
757 syncfpu(void)
758 {
759 }
760 
761 /*
762  * Determine the size of the CPU module's error structure in bytes.  This is
763  * called once during boot to initialize the error queues.
764  */
765 int
766 cpu_aflt_size(void)
767 {
768 	/*
769 	 * We need to determine whether this is a sabre, Hummingbird or a
770 	 * Spitfire/Blackbird impl and set the appropriate state variables for
771 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
772 	 * too early in the boot flow and the cpunodes are not initialized.
773 	 * This routine will be called once after cpunodes[] is ready, so do
774 	 * it here.
775 	 */
776 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
777 		isus2i = 1;
778 		cpu_ec_tag_mask = SB_ECTAG_MASK;
779 		cpu_ec_state_mask = SB_ECSTATE_MASK;
780 		cpu_ec_par_mask = SB_ECPAR_MASK;
781 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
782 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
783 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
784 		cpu_ec_state_exl = SB_ECSTATE_EXL;
785 		cpu_ec_state_mod = SB_ECSTATE_MOD;
786 
787 		/* These states do not exist in sabre - set to 0xFF */
788 		cpu_ec_state_shr = 0xFF;
789 		cpu_ec_state_own = 0xFF;
790 
791 		cpu_ec_state_valid = SB_ECSTATE_VALID;
792 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
793 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
794 		cpu_ec_parity = SB_EC_PARITY;
795 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
796 		isus2e = 1;
797 		cpu_ec_tag_mask = HB_ECTAG_MASK;
798 		cpu_ec_state_mask = HB_ECSTATE_MASK;
799 		cpu_ec_par_mask = HB_ECPAR_MASK;
800 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
801 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
802 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
803 		cpu_ec_state_exl = HB_ECSTATE_EXL;
804 		cpu_ec_state_mod = HB_ECSTATE_MOD;
805 
806 		/* These states do not exist in hummingbird - set to 0xFF */
807 		cpu_ec_state_shr = 0xFF;
808 		cpu_ec_state_own = 0xFF;
809 
810 		cpu_ec_state_valid = HB_ECSTATE_VALID;
811 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
812 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
813 		cpu_ec_parity = HB_EC_PARITY;
814 	}
815 
816 	return (sizeof (spitf_async_flt));
817 }
818 
819 
820 /*
821  * Correctable ecc error trap handler
822  */
823 /*ARGSUSED*/
824 void
825 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
826 	uint_t p_afsr_high, uint_t p_afar_high)
827 {
828 	ushort_t sdbh, sdbl;
829 	ushort_t e_syndh, e_syndl;
830 	spitf_async_flt spf_flt;
831 	struct async_flt *ecc;
832 	int queue = 1;
833 
834 	uint64_t t_afar = p_afar;
835 	uint64_t t_afsr = p_afsr;
836 
837 	/*
838 	 * Note: the Spitfire data buffer error registers
839 	 * (upper and lower halves) are or'ed into the upper
840 	 * word of the afsr by ce_err().
841 	 */
842 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
843 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
844 
845 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
846 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
847 
848 	t_afsr &= S_AFSR_MASK;
849 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
850 
851 	/* Setup the async fault structure */
852 	bzero(&spf_flt, sizeof (spitf_async_flt));
853 	ecc = (struct async_flt *)&spf_flt;
854 	ecc->flt_id = gethrtime_waitfree();
855 	ecc->flt_stat = t_afsr;
856 	ecc->flt_addr = t_afar;
857 	ecc->flt_status = ECC_C_TRAP;
858 	ecc->flt_bus_id = getprocessorid();
859 	ecc->flt_inst = CPU->cpu_id;
860 	ecc->flt_pc = (caddr_t)rp->r_pc;
861 	ecc->flt_func = log_ce_err;
862 	ecc->flt_in_memory =
863 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
864 	spf_flt.flt_sdbh = sdbh;
865 	spf_flt.flt_sdbl = sdbl;
866 
867 	/*
868 	 * Check for fatal conditions.
869 	 */
870 	check_misc_err(&spf_flt);
871 
872 	/*
873 	 * Pananoid checks for valid AFSR and UDBs
874 	 */
875 	if ((t_afsr & P_AFSR_CE) == 0) {
876 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
877 			"** Panic due to CE bit not set in the AFSR",
878 			"  Corrected Memory Error on");
879 	}
880 
881 	/*
882 	 * We want to skip logging only if ALL the following
883 	 * conditions are true:
884 	 *
885 	 *	1. There is only one error
886 	 *	2. That error is a correctable memory error
887 	 *	3. The error is caused by the memory scrubber (in which case
888 	 *	    the error will have occurred under on_trap protection)
889 	 *	4. The error is on a retired page
890 	 *
891 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
892 	 * However, none of those errors should occur on a retired page.
893 	 */
894 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
895 	    curthread->t_ontrap != NULL) {
896 
897 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
898 			page_t *pp = page_numtopp_nolock((pfn_t)
899 			    (ecc->flt_addr >> MMU_PAGESHIFT));
900 
901 			if (pp != NULL && page_isretired(pp)) {
902 				queue = 0;
903 			}
904 		}
905 	}
906 
907 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
908 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
909 			"** Panic due to CE bits not set in the UDBs",
910 			" Corrected Memory Error on");
911 	}
912 
913 	if ((sdbh >> 8) & 1) {
914 		ecc->flt_synd = e_syndh;
915 		ce_scrub(ecc);
916 		if (queue) {
917 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
918 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
919 		}
920 	}
921 
922 	if ((sdbl >> 8) & 1) {
923 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
924 		ecc->flt_synd = e_syndl | UDBL_REG;
925 		ce_scrub(ecc);
926 		if (queue) {
927 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
928 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
929 		}
930 	}
931 
932 	/*
933 	 * Re-enable all error trapping (CEEN currently cleared).
934 	 */
935 	clr_datapath();
936 	set_asyncflt(P_AFSR_CE);
937 	set_error_enable(EER_ENABLE);
938 }
939 
940 /*
941  * Cpu specific CE logging routine
942  */
943 static void
944 log_ce_err(struct async_flt *aflt, char *unum)
945 {
946 	spitf_async_flt spf_flt;
947 
948 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
949 		return;
950 	}
951 
952 	spf_flt.cmn_asyncflt = *aflt;
953 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
954 	    " Corrected Memory Error detected by");
955 }
956 
957 /*
958  * Spitfire does not perform any further CE classification refinement
959  */
960 /*ARGSUSED*/
961 int
962 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
963     size_t afltoffset)
964 {
965 	return (0);
966 }
967 
968 char *
969 flt_to_error_type(struct async_flt *aflt)
970 {
971 	if (aflt->flt_status & ECC_INTERMITTENT)
972 		return (ERR_TYPE_DESC_INTERMITTENT);
973 	if (aflt->flt_status & ECC_PERSISTENT)
974 		return (ERR_TYPE_DESC_PERSISTENT);
975 	if (aflt->flt_status & ECC_STICKY)
976 		return (ERR_TYPE_DESC_STICKY);
977 	return (ERR_TYPE_DESC_UNKNOWN);
978 }
979 
980 /*
981  * Called by correctable ecc error logging code to print out
982  * the stick/persistent/intermittent status of the error.
983  */
984 static void
985 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
986 {
987 	ushort_t status;
988 	char *status1_str = "Memory";
989 	char *status2_str = "Intermittent";
990 	struct async_flt *aflt = (struct async_flt *)spf_flt;
991 
992 	status = aflt->flt_status;
993 
994 	if (status & ECC_ECACHE)
995 		status1_str = "Ecache";
996 
997 	if (status & ECC_STICKY)
998 		status2_str = "Sticky";
999 	else if (status & ECC_PERSISTENT)
1000 		status2_str = "Persistent";
1001 
1002 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
1003 		NULL, " Corrected %s Error on %s is %s",
1004 		status1_str, unum, status2_str);
1005 }
1006 
1007 /*
1008  * check for a valid ce syndrome, then call the
1009  * displacement flush scrubbing code, and then check the afsr to see if
1010  * the error was persistent or intermittent. Reread the afar/afsr to see
1011  * if the error was not scrubbed successfully, and is therefore sticky.
1012  */
1013 /*ARGSUSED1*/
1014 void
1015 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
1016 {
1017 	uint64_t eer, afsr;
1018 	ushort_t status;
1019 
1020 	ASSERT(getpil() > LOCK_LEVEL);
1021 
1022 	/*
1023 	 * It is possible that the flt_addr is not a valid
1024 	 * physical address. To deal with this, we disable
1025 	 * NCEEN while we scrub that address. If this causes
1026 	 * a TIMEOUT/BERR, we know this is an invalid
1027 	 * memory location.
1028 	 */
1029 	kpreempt_disable();
1030 	eer = get_error_enable();
1031 	if (eer & (EER_CEEN | EER_NCEEN))
1032 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1033 
1034 	/*
1035 	 * To check if the error detected by IO is persistent, sticky or
1036 	 * intermittent.
1037 	 */
1038 	if (ecc->flt_status & ECC_IOBUS) {
1039 		ecc->flt_stat = P_AFSR_CE;
1040 	}
1041 
1042 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1043 	    cpunodes[CPU->cpu_id].ecache_size);
1044 
1045 	get_asyncflt(&afsr);
1046 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1047 		/*
1048 		 * Must ensure that we don't get the TIMEOUT/BERR
1049 		 * when we reenable NCEEN, so we clear the AFSR.
1050 		 */
1051 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1052 		if (eer & (EER_CEEN | EER_NCEEN))
1053 		    set_error_enable(eer);
1054 		kpreempt_enable();
1055 		return;
1056 	}
1057 
1058 	if (eer & EER_NCEEN)
1059 	    set_error_enable(eer & ~EER_CEEN);
1060 
1061 	/*
1062 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1063 	 * not trip over the error, mark it intermittent.  If the scrub did
1064 	 * trip the error again and it did not scrub away, mark it sticky.
1065 	 * Otherwise mark it persistent.
1066 	 */
1067 	if (check_ecc(ecc) != 0) {
1068 		cpu_read_paddr(ecc, 0, 1);
1069 
1070 		if (check_ecc(ecc) != 0)
1071 			status = ECC_STICKY;
1072 		else
1073 			status = ECC_PERSISTENT;
1074 	} else
1075 		status = ECC_INTERMITTENT;
1076 
1077 	if (eer & (EER_CEEN | EER_NCEEN))
1078 	    set_error_enable(eer);
1079 	kpreempt_enable();
1080 
1081 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1082 	ecc->flt_status |= status;
1083 }
1084 
1085 /*
1086  * get the syndrome and unum, and then call the routines
1087  * to check the other cpus and iobuses, and then do the error logging.
1088  */
1089 /*ARGSUSED1*/
1090 void
1091 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1092 {
1093 	char unum[UNUM_NAMLEN];
1094 	int len = 0;
1095 	int ce_verbose = 0;
1096 
1097 	ASSERT(ecc->flt_func != NULL);
1098 
1099 	/* Get the unum string for logging purposes */
1100 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1101 	    UNUM_NAMLEN, &len);
1102 
1103 	/* Call specific error logging routine */
1104 	(void) (*ecc->flt_func)(ecc, unum);
1105 
1106 	/*
1107 	 * Count errors per unum.
1108 	 * Non-memory errors are all counted via a special unum string.
1109 	 */
1110 	if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING &&
1111 	    automatic_page_removal) {
1112 		page_t *pp = page_numtopp_nolock((pfn_t)
1113 		    (ecc->flt_addr >> MMU_PAGESHIFT));
1114 
1115 		if (pp) {
1116 			page_settoxic(pp, PAGE_IS_FAULTY);
1117 			(void) page_retire(pp, PAGE_IS_FAILING);
1118 		}
1119 	}
1120 
1121 	if (ecc->flt_panic) {
1122 		ce_verbose = 1;
1123 	} else if ((ecc->flt_class == BUS_FAULT) ||
1124 	    (ecc->flt_stat & P_AFSR_CE)) {
1125 		ce_verbose = (ce_verbose_memory > 0);
1126 	} else {
1127 		ce_verbose = 1;
1128 	}
1129 
1130 	if (ce_verbose) {
1131 		spitf_async_flt sflt;
1132 		int synd_code;
1133 
1134 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1135 
1136 		cpu_ce_log_status(&sflt, unum);
1137 
1138 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1139 				SYND(ecc->flt_synd));
1140 
1141 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1142 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1143 			    NULL, " ECC Data Bit %2d was in error "
1144 			    "and corrected", synd_code);
1145 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1146 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1147 			    NULL, " ECC Check Bit %2d was in error "
1148 			    "and corrected", synd_code - C0);
1149 		} else {
1150 			/*
1151 			 * These are UE errors - we shouldn't be getting CE
1152 			 * traps for these; handle them in case of bad h/w.
1153 			 */
1154 			switch (synd_code) {
1155 			case M2:
1156 				cpu_aflt_log(CE_CONT, 0, &sflt,
1157 				    CPU_ERRID_FIRST, NULL,
1158 				    " Two ECC Bits were in error");
1159 				break;
1160 			case M3:
1161 				cpu_aflt_log(CE_CONT, 0, &sflt,
1162 				    CPU_ERRID_FIRST, NULL,
1163 				    " Three ECC Bits were in error");
1164 				break;
1165 			case M4:
1166 				cpu_aflt_log(CE_CONT, 0, &sflt,
1167 				    CPU_ERRID_FIRST, NULL,
1168 				    " Four ECC Bits were in error");
1169 				break;
1170 			case MX:
1171 				cpu_aflt_log(CE_CONT, 0, &sflt,
1172 				    CPU_ERRID_FIRST, NULL,
1173 				    " More than Four ECC bits were "
1174 				    "in error");
1175 				break;
1176 			default:
1177 				cpu_aflt_log(CE_CONT, 0, &sflt,
1178 				    CPU_ERRID_FIRST, NULL,
1179 				    " Unknown fault syndrome %d",
1180 				    synd_code);
1181 				break;
1182 			}
1183 		}
1184 	}
1185 
1186 	/* Display entire cache line, if valid address */
1187 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1188 		read_ecc_data(ecc, 1, 1);
1189 }
1190 
1191 /*
1192  * We route all errors through a single switch statement.
1193  */
1194 void
1195 cpu_ue_log_err(struct async_flt *aflt)
1196 {
1197 
1198 	switch (aflt->flt_class) {
1199 	case CPU_FAULT:
1200 		cpu_async_log_err(aflt);
1201 		break;
1202 
1203 	case BUS_FAULT:
1204 		bus_async_log_err(aflt);
1205 		break;
1206 
1207 	default:
1208 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1209 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1210 		break;
1211 	}
1212 }
1213 
1214 /* Values for action variable in cpu_async_error() */
1215 #define	ACTION_NONE		0
1216 #define	ACTION_TRAMPOLINE	1
1217 #define	ACTION_AST_FLAGS	2
1218 
1219 /*
1220  * Access error trap handler for asynchronous cpu errors.  This routine is
1221  * called to handle a data or instruction access error.  All fatal errors are
1222  * completely handled by this routine (by panicking).  Non fatal error logging
1223  * is queued for later processing either via AST or softint at a lower PIL.
1224  * In case of panic, the error log queue will also be processed as part of the
1225  * panic flow to ensure all errors are logged.  This routine is called with all
1226  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1227  * error bits are also cleared.  The hardware has also disabled the I and
1228  * D-caches for us, so we must re-enable them before returning.
1229  *
1230  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1231  *
1232  *		_______________________________________________________________
1233  *		|        Privileged tl0		|         Unprivileged	      |
1234  *		| Protected	| Unprotected	| Protected	| Unprotected |
1235  *		|on_trap|lofault|		|		|	      |
1236  * -------------|-------|-------+---------------+---------------+-------------|
1237  *		|	|	|		|		|	      |
1238  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1239  *		|	|	|		|		|	      |
1240  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1241  *		|	|	|		|		|	      |
1242  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1243  *		|	|	|		|		|	      |
1244  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1245  * ____________________________________________________________________________
1246  *
1247  *
1248  * Action codes:
1249  *
1250  * L - log
1251  * M - kick off memscrubber if flt_in_memory
1252  * P - panic
1253  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1254  * R - i)  if aft_panic is set, panic
1255  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1256  * S - send SIGBUS to process
1257  * T - trampoline
1258  *
1259  * Special cases:
1260  *
1261  * 1) if aft_testfatal is set, all faults result in a panic regardless
1262  *    of type (even WP), protection (even on_trap), or privilege.
1263  */
1264 /*ARGSUSED*/
1265 void
1266 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1267 	uint_t p_afsr_high, uint_t p_afar_high)
1268 {
1269 	ushort_t sdbh, sdbl, ttype, tl;
1270 	spitf_async_flt spf_flt;
1271 	struct async_flt *aflt;
1272 	char pr_reason[28];
1273 	uint64_t oafsr;
1274 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1275 	int action = ACTION_NONE;
1276 	uint64_t t_afar = p_afar;
1277 	uint64_t t_afsr = p_afsr;
1278 	int expected = DDI_FM_ERR_UNEXPECTED;
1279 	ddi_acc_hdl_t *hp;
1280 
1281 	/*
1282 	 * We need to look at p_flag to determine if the thread detected an
1283 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1284 	 * because we just need a consistent snapshot and we know that everyone
1285 	 * else will store a consistent set of bits while holding p_lock.  We
1286 	 * don't have to worry about a race because SDOCORE is set once prior
1287 	 * to doing i/o from the process's address space and is never cleared.
1288 	 */
1289 	uint_t pflag = ttoproc(curthread)->p_flag;
1290 
1291 	pr_reason[0] = '\0';
1292 
1293 	/*
1294 	 * Note: the Spitfire data buffer error registers
1295 	 * (upper and lower halves) are or'ed into the upper
1296 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1297 	 */
1298 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1299 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1300 
1301 	/*
1302 	 * Grab the ttype encoded in <63:53> of the saved
1303 	 * afsr passed from async_err()
1304 	 */
1305 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1306 	tl = (ushort_t)(t_afsr >> 62);
1307 
1308 	t_afsr &= S_AFSR_MASK;
1309 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1310 
1311 	/*
1312 	 * Initialize most of the common and CPU-specific structure.  We derive
1313 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1314 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1315 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1316 	 * tuneable aft_testfatal is set (not the default).
1317 	 */
1318 	bzero(&spf_flt, sizeof (spitf_async_flt));
1319 	aflt = (struct async_flt *)&spf_flt;
1320 	aflt->flt_id = gethrtime_waitfree();
1321 	aflt->flt_stat = t_afsr;
1322 	aflt->flt_addr = t_afar;
1323 	aflt->flt_bus_id = getprocessorid();
1324 	aflt->flt_inst = CPU->cpu_id;
1325 	aflt->flt_pc = (caddr_t)rp->r_pc;
1326 	aflt->flt_prot = AFLT_PROT_NONE;
1327 	aflt->flt_class = CPU_FAULT;
1328 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1329 	aflt->flt_tl = (uchar_t)tl;
1330 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1331 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1332 
1333 	/*
1334 	 * Set flt_status based on the trap type.  If we end up here as the
1335 	 * result of a UE detected by the CE handling code, leave status 0.
1336 	 */
1337 	switch (ttype) {
1338 	case T_DATA_ERROR:
1339 		aflt->flt_status = ECC_D_TRAP;
1340 		break;
1341 	case T_INSTR_ERROR:
1342 		aflt->flt_status = ECC_I_TRAP;
1343 		break;
1344 	}
1345 
1346 	spf_flt.flt_sdbh = sdbh;
1347 	spf_flt.flt_sdbl = sdbl;
1348 
1349 	/*
1350 	 * Check for fatal async errors.
1351 	 */
1352 	check_misc_err(&spf_flt);
1353 
1354 	/*
1355 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1356 	 * see if we were executing in the kernel under on_trap() or t_lofault
1357 	 * protection.  If so, modify the saved registers so that we return
1358 	 * from the trap to the appropriate trampoline routine.
1359 	 */
1360 	if (aflt->flt_priv && tl == 0) {
1361 		if (curthread->t_ontrap != NULL) {
1362 			on_trap_data_t *otp = curthread->t_ontrap;
1363 
1364 			if (otp->ot_prot & OT_DATA_EC) {
1365 				aflt->flt_prot = AFLT_PROT_EC;
1366 				otp->ot_trap |= OT_DATA_EC;
1367 				rp->r_pc = otp->ot_trampoline;
1368 				rp->r_npc = rp->r_pc + 4;
1369 				action = ACTION_TRAMPOLINE;
1370 			}
1371 
1372 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1373 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1374 				aflt->flt_prot = AFLT_PROT_ACCESS;
1375 				otp->ot_trap |= OT_DATA_ACCESS;
1376 				rp->r_pc = otp->ot_trampoline;
1377 				rp->r_npc = rp->r_pc + 4;
1378 				action = ACTION_TRAMPOLINE;
1379 				/*
1380 				 * for peeks and caut_gets errors are expected
1381 				 */
1382 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1383 				if (!hp)
1384 					expected = DDI_FM_ERR_PEEK;
1385 				else if (hp->ah_acc.devacc_attr_access ==
1386 				    DDI_CAUTIOUS_ACC)
1387 					expected = DDI_FM_ERR_EXPECTED;
1388 			}
1389 
1390 		} else if (curthread->t_lofault) {
1391 			aflt->flt_prot = AFLT_PROT_COPY;
1392 			rp->r_g1 = EFAULT;
1393 			rp->r_pc = curthread->t_lofault;
1394 			rp->r_npc = rp->r_pc + 4;
1395 			action = ACTION_TRAMPOLINE;
1396 		}
1397 	}
1398 
1399 	/*
1400 	 * Determine if this error needs to be treated as fatal.  Note that
1401 	 * multiple errors detected upon entry to this trap handler does not
1402 	 * necessarily warrant a panic.  We only want to panic if the trap
1403 	 * happened in privileged mode and not under t_ontrap or t_lofault
1404 	 * protection.  The exception is WP: if we *only* get WP, it is not
1405 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1406 	 *
1407 	 * aft_panic, if set, effectively makes us treat usermode
1408 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1409 	 * panic instead of sending a contract event.  A lofault-protected
1410 	 * fault will normally follow the contract event; if aft_panic is
1411 	 * set this will be changed to a panic.
1412 	 *
1413 	 * For usermode BERR/BTO errors, eg from processes performing device
1414 	 * control through mapped device memory, we need only deliver
1415 	 * a SIGBUS to the offending process.
1416 	 *
1417 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1418 	 * checked later; for now we implement the common reasons.
1419 	 */
1420 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1421 		/*
1422 		 * Beware - multiple bits may be set in AFSR
1423 		 */
1424 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1425 			if (aflt->flt_priv || aft_panic)
1426 				aflt->flt_panic = 1;
1427 		}
1428 
1429 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1430 			if (aflt->flt_priv)
1431 				aflt->flt_panic = 1;
1432 		}
1433 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1434 		aflt->flt_panic = 1;
1435 	}
1436 
1437 	/*
1438 	 * UE/BERR/TO: Call our bus nexus friends to check for
1439 	 * IO errors that may have resulted in this trap.
1440 	 */
1441 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1442 		cpu_run_bus_error_handlers(aflt, expected);
1443 	}
1444 
1445 	/*
1446 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1447 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1448 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1449 	 * caches may introduce more parity errors (especially when the module
1450 	 * is bad) and in sabre there is no guarantee that such errors
1451 	 * (if introduced) are written back as poisoned data.
1452 	 */
1453 	if (t_afsr & P_AFSR_UE) {
1454 		int i;
1455 
1456 		(void) strcat(pr_reason, "UE ");
1457 
1458 		spf_flt.flt_type = CPU_UE_ERR;
1459 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1460 			MMU_PAGESHIFT)) ? 1: 0;
1461 
1462 		/*
1463 		 * With UE, we have the PA of the fault.
1464 		 * Let do a diagnostic read to get the ecache
1465 		 * data and tag info of the bad line for logging.
1466 		 */
1467 		if (aflt->flt_in_memory) {
1468 			uint32_t ec_set_size;
1469 			uchar_t state;
1470 			uint32_t ecache_idx;
1471 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1472 
1473 			/* touch the line to put it in ecache */
1474 			acc_afsr |= read_and_clear_afsr();
1475 			(void) lddphys(faultpa);
1476 			acc_afsr |= (read_and_clear_afsr() &
1477 				    ~(P_AFSR_EDP | P_AFSR_UE));
1478 
1479 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1480 			    ecache_associativity;
1481 
1482 			for (i = 0; i < ecache_associativity; i++) {
1483 				ecache_idx = i * ec_set_size +
1484 				    (aflt->flt_addr % ec_set_size);
1485 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1486 					(uint64_t *)&spf_flt.flt_ec_data[0],
1487 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1488 				acc_afsr |= oafsr;
1489 
1490 				state = (uchar_t)((spf_flt.flt_ec_tag &
1491 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1492 
1493 				if ((state & cpu_ec_state_valid) &&
1494 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1495 				    ((uint64_t)aflt->flt_addr >>
1496 				    cpu_ec_tag_shift)))
1497 					break;
1498 			}
1499 
1500 			/*
1501 			 * Check to see if the ecache tag is valid for the
1502 			 * fault PA. In the very unlikely event where the
1503 			 * line could be victimized, no ecache info will be
1504 			 * available. If this is the case, capture the line
1505 			 * from memory instead.
1506 			 */
1507 			if ((state & cpu_ec_state_valid) == 0 ||
1508 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1509 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1510 				for (i = 0; i < 8; i++, faultpa += 8) {
1511 				    ec_data_t *ecdptr;
1512 
1513 					ecdptr = &spf_flt.flt_ec_data[i];
1514 					acc_afsr |= read_and_clear_afsr();
1515 					ecdptr->ec_d8 = lddphys(faultpa);
1516 					acc_afsr |= (read_and_clear_afsr() &
1517 						    ~(P_AFSR_EDP | P_AFSR_UE));
1518 					ecdptr->ec_afsr = 0;
1519 							/* null afsr value */
1520 				}
1521 
1522 				/*
1523 				 * Mark tag invalid to indicate mem dump
1524 				 * when we print out the info.
1525 				 */
1526 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1527 			}
1528 			spf_flt.flt_ec_lcnt = 1;
1529 
1530 			/*
1531 			 * Flush out the bad line
1532 			 */
1533 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1534 				cpunodes[CPU->cpu_id].ecache_size);
1535 
1536 			acc_afsr |= clear_errors(NULL, NULL);
1537 		}
1538 
1539 		/*
1540 		 * Ask our bus nexus friends if they have any fatal errors. If
1541 		 * so, they will log appropriate error messages and panic as a
1542 		 * result. We then queue an event for each UDB that reports a
1543 		 * UE. Each UE reported in a UDB will have its own log message.
1544 		 *
1545 		 * Note from kbn: In the case where there are multiple UEs
1546 		 * (ME bit is set) - the AFAR address is only accurate to
1547 		 * the 16-byte granularity. One cannot tell whether the AFAR
1548 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1549 		 * always report the AFAR address to be 16-byte aligned.
1550 		 *
1551 		 * If we're on a Sabre, there is no SDBL, but it will always
1552 		 * read as zero, so the sdbl test below will safely fail.
1553 		 */
1554 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1555 			aflt->flt_panic = 1;
1556 
1557 		if (sdbh & P_DER_UE) {
1558 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1559 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1560 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1561 			    aflt->flt_panic);
1562 		}
1563 		if (sdbl & P_DER_UE) {
1564 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1565 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1566 			if (!(aflt->flt_stat & P_AFSR_ME))
1567 				aflt->flt_addr |= 0x8;
1568 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1569 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1570 			    aflt->flt_panic);
1571 		}
1572 
1573 		/*
1574 		 * We got a UE and are panicking, save the fault PA in a known
1575 		 * location so that the platform specific panic code can check
1576 		 * for copyback errors.
1577 		 */
1578 		if (aflt->flt_panic && aflt->flt_in_memory) {
1579 			panic_aflt = *aflt;
1580 		}
1581 	}
1582 
1583 	/*
1584 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1585 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1586 	 */
1587 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1588 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1589 
1590 		if (t_afsr & P_AFSR_EDP)
1591 			(void) strcat(pr_reason, "EDP ");
1592 
1593 		if (t_afsr & P_AFSR_LDP)
1594 			(void) strcat(pr_reason, "LDP ");
1595 
1596 		/*
1597 		 * Here we have no PA to work with.
1598 		 * Scan each line in the ecache to look for
1599 		 * the one with bad parity.
1600 		 */
1601 		aflt->flt_addr = AFLT_INV_ADDR;
1602 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1603 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1604 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1605 
1606 		/*
1607 		 * If we found a bad PA, update the state to indicate if it is
1608 		 * memory or I/O space.  This code will be important if we ever
1609 		 * support cacheable frame buffers.
1610 		 */
1611 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1612 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1613 				MMU_PAGESHIFT)) ? 1 : 0;
1614 		}
1615 
1616 		if (isus2i || isus2e)
1617 			aflt->flt_panic = 1;
1618 
1619 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1620 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1621 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1622 		    aflt->flt_panic);
1623 	}
1624 
1625 	/*
1626 	 * Timeout and bus error handling.  There are two cases to consider:
1627 	 *
1628 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1629 	 * have already modified the saved registers so that we will return
1630 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1631 	 *
1632 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1633 	 * a SIGBUS.  We do not log the occurence - processes performing
1634 	 * device control would generate lots of uninteresting messages.
1635 	 */
1636 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1637 		if (t_afsr & P_AFSR_TO)
1638 			(void) strcat(pr_reason, "BTO ");
1639 
1640 		if (t_afsr & P_AFSR_BERR)
1641 			(void) strcat(pr_reason, "BERR ");
1642 
1643 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1644 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1645 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1646 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1647 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1648 			    aflt->flt_panic);
1649 		}
1650 	}
1651 
1652 	/*
1653 	 * Handle WP: WP happens when the ecache is victimized and a parity
1654 	 * error was detected on a writeback.  The data in question will be
1655 	 * poisoned as a UE will be written back.  The PA is not logged and
1656 	 * it is possible that it doesn't belong to the trapped thread.  The
1657 	 * WP trap is not fatal, but it could be fatal to someone that
1658 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1659 	 * to force the memscrubber to read all of memory when it awakens.
1660 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1661 	 * UE back to poison the data.
1662 	 */
1663 	if (t_afsr & P_AFSR_WP) {
1664 		(void) strcat(pr_reason, "WP ");
1665 		if (isus2i || isus2e) {
1666 			aflt->flt_panic = 1;
1667 		} else {
1668 			read_all_memscrub = 1;
1669 		}
1670 		spf_flt.flt_type = CPU_WP_ERR;
1671 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1672 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1673 		    aflt->flt_panic);
1674 	}
1675 
1676 	/*
1677 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1678 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1679 	 * This is fatal.
1680 	 */
1681 
1682 	if (t_afsr & P_AFSR_CP) {
1683 		if (isus2i || isus2e) {
1684 			(void) strcat(pr_reason, "CP ");
1685 			aflt->flt_panic = 1;
1686 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1687 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1688 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1689 			    aflt->flt_panic);
1690 		} else {
1691 			/*
1692 			 * Orphan CP: Happens due to signal integrity problem
1693 			 * on a CPU, where a CP is reported, without reporting
1694 			 * its associated UE. This is handled by locating the
1695 			 * bad parity line and would kick off the memscrubber
1696 			 * to find the UE if in memory or in another's cache.
1697 			 */
1698 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1699 			(void) strcat(pr_reason, "ORPHAN_CP ");
1700 
1701 			/*
1702 			 * Here we have no PA to work with.
1703 			 * Scan each line in the ecache to look for
1704 			 * the one with bad parity.
1705 			 */
1706 			aflt->flt_addr = AFLT_INV_ADDR;
1707 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1708 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1709 				&oafsr);
1710 			acc_afsr |= oafsr;
1711 
1712 			/*
1713 			 * If we found a bad PA, update the state to indicate
1714 			 * if it is memory or I/O space.
1715 			 */
1716 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1717 				aflt->flt_in_memory =
1718 					(pf_is_memory(aflt->flt_addr >>
1719 						MMU_PAGESHIFT)) ? 1 : 0;
1720 			}
1721 			read_all_memscrub = 1;
1722 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1723 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1724 			    aflt->flt_panic);
1725 
1726 		}
1727 	}
1728 
1729 	/*
1730 	 * If we queued an error other than WP or CP and we are going to return
1731 	 * from the trap and the error was in user mode or inside of a
1732 	 * copy routine, set AST flag so the queue will be drained before
1733 	 * returning to user mode.
1734 	 *
1735 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1736 	 * and send an event to its process contract.
1737 	 *
1738 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1739 	 * will have been no error queued in this case.
1740 	 */
1741 	if ((t_afsr &
1742 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1743 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1744 			int pcb_flag = 0;
1745 
1746 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1747 				pcb_flag |= ASYNC_HWERR;
1748 
1749 			if (t_afsr & P_AFSR_BERR)
1750 				pcb_flag |= ASYNC_BERR;
1751 
1752 			if (t_afsr & P_AFSR_TO)
1753 				pcb_flag |= ASYNC_BTO;
1754 
1755 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1756 			aston(curthread);
1757 			action = ACTION_AST_FLAGS;
1758 	}
1759 
1760 	/*
1761 	 * In response to a deferred error, we must do one of three things:
1762 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1763 	 * set in cases (1) and (2) - check that either action is set or
1764 	 * (3) is true.
1765 	 *
1766 	 * On II, the WP writes poisoned data back to memory, which will
1767 	 * cause a UE and a panic or reboot when read.  In this case, we
1768 	 * don't need to panic at this time.  On IIi and IIe,
1769 	 * aflt->flt_panic is already set above.
1770 	 */
1771 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1772 	    (t_afsr & P_AFSR_WP));
1773 
1774 	/*
1775 	 * Make a final sanity check to make sure we did not get any more async
1776 	 * errors and accumulate the afsr.
1777 	 */
1778 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1779 	    cpunodes[CPU->cpu_id].ecache_linesize);
1780 	(void) clear_errors(&spf_flt, NULL);
1781 
1782 	/*
1783 	 * Take care of a special case: If there is a UE in the ecache flush
1784 	 * area, we'll see it in flush_ecache().  This will trigger the
1785 	 * CPU_ADDITIONAL_ERRORS case below.
1786 	 *
1787 	 * This could occur if the original error was a UE in the flush area,
1788 	 * or if the original error was an E$ error that was flushed out of
1789 	 * the E$ in scan_ecache().
1790 	 *
1791 	 * If it's at the same address that we're already logging, then it's
1792 	 * probably one of these cases.  Clear the bit so we don't trip over
1793 	 * it on the additional errors case, which could cause an unnecessary
1794 	 * panic.
1795 	 */
1796 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1797 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1798 	else
1799 		acc_afsr |= aflt->flt_stat;
1800 
1801 	/*
1802 	 * Check the acumulated afsr for the important bits.
1803 	 * Make sure the spf_flt.flt_type value is set, and
1804 	 * enque an error.
1805 	 */
1806 	if (acc_afsr &
1807 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1808 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1809 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1810 		    P_AFSR_ISAP))
1811 			aflt->flt_panic = 1;
1812 
1813 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1814 		aflt->flt_stat = acc_afsr;
1815 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1816 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1817 		    aflt->flt_panic);
1818 	}
1819 
1820 	/*
1821 	 * If aflt->flt_panic is set at this point, we need to panic as the
1822 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1823 	 * We've already enqueued the error in one of the if-clauses above,
1824 	 * and it will be dequeued and logged as part of the panic flow.
1825 	 */
1826 	if (aflt->flt_panic) {
1827 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1828 		    "See previous message(s) for details", " %sError(s)",
1829 		    pr_reason);
1830 	}
1831 
1832 	/*
1833 	 * Before returning, we must re-enable errors, and
1834 	 * reset the caches to their boot-up state.
1835 	 */
1836 	set_lsu(get_lsu() | cache_boot_state);
1837 	set_error_enable(EER_ENABLE);
1838 }
1839 
1840 /*
1841  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1842  * This routine is shared by the CE and UE handling code.
1843  */
1844 static void
1845 check_misc_err(spitf_async_flt *spf_flt)
1846 {
1847 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1848 	char *fatal_str = NULL;
1849 
1850 	/*
1851 	 * The ISAP and ETP errors are supposed to cause a POR
1852 	 * from the system, so in theory we never, ever see these messages.
1853 	 * ISAP, ETP and IVUE are considered to be fatal.
1854 	 */
1855 	if (aflt->flt_stat & P_AFSR_ISAP)
1856 		fatal_str = " System Address Parity Error on";
1857 	else if (aflt->flt_stat & P_AFSR_ETP)
1858 		fatal_str = " Ecache Tag Parity Error on";
1859 	else if (aflt->flt_stat & P_AFSR_IVUE)
1860 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1861 	if (fatal_str != NULL) {
1862 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1863 			NULL, fatal_str);
1864 	}
1865 }
1866 
1867 /*
1868  * Routine to convert a syndrome into a syndrome code.
1869  */
1870 static int
1871 synd_to_synd_code(int synd_status, ushort_t synd)
1872 {
1873 	if (synd_status != AFLT_STAT_VALID)
1874 		return (-1);
1875 
1876 	/*
1877 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1878 	 * to get the code indicating which bit(s) is(are) bad.
1879 	 */
1880 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1881 		return (-1);
1882 	else
1883 		return (ecc_syndrome_tab[synd]);
1884 }
1885 
1886 /*
1887  * Routine to return a string identifying the physical name
1888  * associated with a memory/cache error.
1889  */
1890 /* ARGSUSED */
1891 int
1892 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1893     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1894     char *buf, int buflen, int *lenp)
1895 {
1896 	short synd_code;
1897 	int ret;
1898 
1899 	if (flt_in_memory) {
1900 		synd_code = synd_to_synd_code(synd_status, synd);
1901 		if (synd_code == -1) {
1902 			ret = EINVAL;
1903 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1904 		    buf, buflen, lenp) != 0) {
1905 			ret = EIO;
1906 		} else if (*lenp <= 1) {
1907 			ret = EINVAL;
1908 		} else {
1909 			ret = 0;
1910 		}
1911 	} else {
1912 		ret = ENOTSUP;
1913 	}
1914 
1915 	if (ret != 0) {
1916 		buf[0] = '\0';
1917 		*lenp = 0;
1918 	}
1919 
1920 	return (ret);
1921 }
1922 
1923 /*
1924  * Wrapper for cpu_get_mem_unum() routine that takes an
1925  * async_flt struct rather than explicit arguments.
1926  */
1927 int
1928 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1929     char *buf, int buflen, int *lenp)
1930 {
1931 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1932 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1933 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1934 }
1935 
1936 /*
1937  * This routine is a more generic interface to cpu_get_mem_unum(),
1938  * that may be used by other modules (e.g. mm).
1939  */
1940 int
1941 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1942 		char *buf, int buflen, int *lenp)
1943 {
1944 	int synd_status, flt_in_memory, ret;
1945 	char unum[UNUM_NAMLEN];
1946 
1947 	/*
1948 	 * Check for an invalid address.
1949 	 */
1950 	if (afar == (uint64_t)-1)
1951 		return (ENXIO);
1952 
1953 	if (synd == (uint64_t)-1)
1954 		synd_status = AFLT_STAT_INVALID;
1955 	else
1956 		synd_status = AFLT_STAT_VALID;
1957 
1958 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1959 
1960 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1961 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1962 	    != 0)
1963 		return (ret);
1964 
1965 	if (*lenp >= buflen)
1966 		return (ENAMETOOLONG);
1967 
1968 	(void) strncpy(buf, unum, buflen);
1969 
1970 	return (0);
1971 }
1972 
1973 /*
1974  * Routine to return memory information associated
1975  * with a physical address and syndrome.
1976  */
1977 /* ARGSUSED */
1978 int
1979 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1980     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1981     int *segsp, int *banksp, int *mcidp)
1982 {
1983 	return (ENOTSUP);
1984 }
1985 
1986 /*
1987  * Routine to return a string identifying the physical
1988  * name associated with a cpuid.
1989  */
1990 /* ARGSUSED */
1991 int
1992 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1993 {
1994 	return (ENOTSUP);
1995 }
1996 
1997 /*
1998  * This routine returns the size of the kernel's FRU name buffer.
1999  */
2000 size_t
2001 cpu_get_name_bufsize()
2002 {
2003 	return (UNUM_NAMLEN);
2004 }
2005 
2006 /*
2007  * Cpu specific log func for UEs.
2008  */
2009 static void
2010 log_ue_err(struct async_flt *aflt, char *unum)
2011 {
2012 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2013 	int len = 0;
2014 
2015 #ifdef DEBUG
2016 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2017 
2018 	/*
2019 	 * Paranoid Check for priv mismatch
2020 	 * Only applicable for UEs
2021 	 */
2022 	if (afsr_priv != aflt->flt_priv) {
2023 		/*
2024 		 * The priv bits in %tstate and %afsr did not match; we expect
2025 		 * this to be very rare, so flag it with a message.
2026 		 */
2027 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2028 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2029 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2030 
2031 		/* update saved afsr to reflect the correct priv */
2032 		aflt->flt_stat &= ~P_AFSR_PRIV;
2033 		if (aflt->flt_priv)
2034 			aflt->flt_stat |= P_AFSR_PRIV;
2035 	}
2036 #endif /* DEBUG */
2037 
2038 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2039 	    UNUM_NAMLEN, &len);
2040 
2041 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2042 	    " Uncorrectable Memory Error on");
2043 
2044 	if (SYND(aflt->flt_synd) == 0x3) {
2045 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2046 		    " Syndrome 0x3 indicates that this may not be a "
2047 		    "memory module problem");
2048 	}
2049 
2050 	if (aflt->flt_in_memory)
2051 		cpu_log_ecmem_info(spf_flt);
2052 }
2053 
2054 
2055 /*
2056  * The cpu_async_log_err() function is called via the ue_drain() function to
2057  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2058  * from softint context, from AST processing in the trap() flow, or from the
2059  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2060  */
2061 static void
2062 cpu_async_log_err(void *flt)
2063 {
2064 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2065 	struct async_flt *aflt = (struct async_flt *)flt;
2066 	char unum[UNUM_NAMLEN];
2067 	char *space;
2068 	char *ecache_scrub_logstr = NULL;
2069 
2070 	switch (spf_flt->flt_type) {
2071 	    case CPU_UE_ERR:
2072 		/*
2073 		 * We want to skip logging only if ALL the following
2074 		 * conditions are true:
2075 		 *
2076 		 *	1. We are not panicking
2077 		 *	2. There is only one error
2078 		 *	3. That error is a memory error
2079 		 *	4. The error is caused by the memory scrubber (in
2080 		 *	   which case the error will have occurred under
2081 		 *	   on_trap protection)
2082 		 *	5. The error is on a retired page
2083 		 *
2084 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2085 		 * scrubber.  However, none of those errors should occur
2086 		 * on a retired page.
2087 		 *
2088 		 * Note 2: In the CE case, these errors are discarded before
2089 		 * the errorq.  In the UE case, we must wait until now --
2090 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2091 		 */
2092 		if (!panicstr &&
2093 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2094 		    aflt->flt_prot == AFLT_PROT_EC) {
2095 			page_t *pp = page_numtopp_nolock((pfn_t)
2096 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2097 
2098 			if (pp != NULL && page_isretired(pp)) {
2099 
2100 				/* Zero the address to clear the error */
2101 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2102 				return;
2103 			}
2104 		}
2105 
2106 		/*
2107 		 * Log the UE and check for causes of this UE error that
2108 		 * don't cause a trap (Copyback error).  cpu_async_error()
2109 		 * has already checked the i/o buses for us.
2110 		 */
2111 		log_ue_err(aflt, unum);
2112 		if (aflt->flt_in_memory)
2113 			cpu_check_allcpus(aflt);
2114 		break;
2115 
2116 	    case CPU_EDP_LDP_ERR:
2117 		if (aflt->flt_stat & P_AFSR_EDP)
2118 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2119 			    NULL, " EDP event on");
2120 
2121 		if (aflt->flt_stat & P_AFSR_LDP)
2122 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2123 			    NULL, " LDP event on");
2124 
2125 		/* Log ecache info if exist */
2126 		if (spf_flt->flt_ec_lcnt > 0) {
2127 			cpu_log_ecmem_info(spf_flt);
2128 
2129 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2130 			    NULL, " AFAR was derived from E$Tag");
2131 		} else {
2132 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2133 			    NULL, " No error found in ecache (No fault "
2134 			    "PA available)");
2135 		}
2136 		break;
2137 
2138 	    case CPU_WP_ERR:
2139 		/*
2140 		 * If the memscrub thread hasn't yet read
2141 		 * all of memory, as we requested in the
2142 		 * trap handler, then give it a kick to
2143 		 * make sure it does.
2144 		 */
2145 		if (!isus2i && !isus2e && read_all_memscrub)
2146 			memscrub_run();
2147 
2148 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2149 		    " WP event on");
2150 		return;
2151 
2152 	    case CPU_BTO_BERR_ERR:
2153 		/*
2154 		 * A bus timeout or error occurred that was in user mode or not
2155 		 * in a protected kernel code region.
2156 		 */
2157 		if (aflt->flt_stat & P_AFSR_BERR) {
2158 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2159 			    spf_flt, BERRTO_LFLAGS, NULL,
2160 			    " Bus Error on System Bus in %s mode from",
2161 			    aflt->flt_priv ? "privileged" : "user");
2162 		}
2163 
2164 		if (aflt->flt_stat & P_AFSR_TO) {
2165 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2166 			    spf_flt, BERRTO_LFLAGS, NULL,
2167 			    " Timeout on System Bus in %s mode from",
2168 			    aflt->flt_priv ? "privileged" : "user");
2169 		}
2170 
2171 		return;
2172 
2173 	    case CPU_PANIC_CP_ERR:
2174 		/*
2175 		 * Process the Copyback (CP) error info (if any) obtained from
2176 		 * polling all the cpus in the panic flow. This case is only
2177 		 * entered if we are panicking.
2178 		 */
2179 		ASSERT(panicstr != NULL);
2180 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2181 
2182 		/* See which space - this info may not exist */
2183 		if (panic_aflt.flt_status & ECC_D_TRAP)
2184 			space = "Data ";
2185 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2186 			space = "Instruction ";
2187 		else
2188 			space = "";
2189 
2190 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2191 		    " AFAR was derived from UE report,"
2192 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2193 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2194 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2195 
2196 		if (spf_flt->flt_ec_lcnt > 0)
2197 			cpu_log_ecmem_info(spf_flt);
2198 		else
2199 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2200 			    NULL, " No cache dump available");
2201 
2202 		return;
2203 
2204 	    case CPU_TRAPPING_CP_ERR:
2205 		/*
2206 		 * For sabre only.  This is a copyback ecache parity error due
2207 		 * to a PCI DMA read.  We should be panicking if we get here.
2208 		 */
2209 		ASSERT(panicstr != NULL);
2210 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2211 		    " AFAR was derived from UE report,"
2212 		    " CP event on CPU%d (caused Data access error "
2213 		    "on PCIBus)", aflt->flt_inst);
2214 		return;
2215 
2216 		/*
2217 		 * We log the ecache lines of the following states,
2218 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2219 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2220 		 * in addition to logging if ecache_scrub_panic is set.
2221 		 */
2222 	    case CPU_BADLINE_CI_ERR:
2223 		ecache_scrub_logstr = "CBI";
2224 		/* FALLTHRU */
2225 
2226 	    case CPU_BADLINE_CB_ERR:
2227 		if (ecache_scrub_logstr == NULL)
2228 			ecache_scrub_logstr = "CBB";
2229 		/* FALLTHRU */
2230 
2231 	    case CPU_BADLINE_DI_ERR:
2232 		if (ecache_scrub_logstr == NULL)
2233 			ecache_scrub_logstr = "DBI";
2234 		/* FALLTHRU */
2235 
2236 	    case CPU_BADLINE_DB_ERR:
2237 		if (ecache_scrub_logstr == NULL)
2238 			ecache_scrub_logstr = "DBB";
2239 
2240 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2241 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2242 			" %s event on", ecache_scrub_logstr);
2243 		cpu_log_ecmem_info(spf_flt);
2244 
2245 		return;
2246 
2247 	    case CPU_ORPHAN_CP_ERR:
2248 		/*
2249 		 * Orphan CPs, where the CP bit is set, but when a CPU
2250 		 * doesn't report a UE.
2251 		 */
2252 		if (read_all_memscrub)
2253 			memscrub_run();
2254 
2255 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2256 			NULL, " Orphan CP event on");
2257 
2258 		/* Log ecache info if exist */
2259 		if (spf_flt->flt_ec_lcnt > 0)
2260 			cpu_log_ecmem_info(spf_flt);
2261 		else
2262 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2263 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2264 				" No error found in ecache (No fault "
2265 				"PA available");
2266 		return;
2267 
2268 	    case CPU_ECACHE_ADDR_PAR_ERR:
2269 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270 				" E$ Tag Address Parity error on");
2271 		cpu_log_ecmem_info(spf_flt);
2272 		return;
2273 
2274 	    case CPU_ECACHE_STATE_ERR:
2275 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276 				" E$ Tag State Parity error on");
2277 		cpu_log_ecmem_info(spf_flt);
2278 		return;
2279 
2280 	    case CPU_ECACHE_TAG_ERR:
2281 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2282 				" E$ Tag scrub event on");
2283 		cpu_log_ecmem_info(spf_flt);
2284 		return;
2285 
2286 	    case CPU_ECACHE_ETP_ETS_ERR:
2287 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2288 				" AFSR.ETP is set and AFSR.ETS is zero on");
2289 		cpu_log_ecmem_info(spf_flt);
2290 		return;
2291 
2292 
2293 	    case CPU_ADDITIONAL_ERR:
2294 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2295 		    " Additional errors detected during error processing on");
2296 		return;
2297 
2298 	    default:
2299 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2300 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2301 		return;
2302 	}
2303 
2304 	/* ... fall through from the UE, EDP, or LDP cases */
2305 
2306 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2307 		if (!panicstr) {
2308 			/*
2309 			 * Retire the bad page that caused the error
2310 			 */
2311 			page_t *pp = page_numtopp_nolock((pfn_t)
2312 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2313 
2314 			if (pp != NULL) {
2315 				page_settoxic(pp, PAGE_IS_FAULTY);
2316 				(void) page_retire(pp, PAGE_IS_TOXIC);
2317 			} else {
2318 				uint64_t pa =
2319 				    P2ALIGN(aflt->flt_addr, MMU_PAGESIZE);
2320 
2321 				cpu_aflt_log(CE_CONT, 3, spf_flt,
2322 				    CPU_ERRID_FIRST, NULL,
2323 				    ": cannot schedule clearing of error on "
2324 				    "page 0x%08x.%08x; page not in VM system",
2325 				    (uint32_t)(pa >> 32), (uint32_t)pa);
2326 			}
2327 		} else {
2328 			/*
2329 			 * Clear UEs on panic so that we don't
2330 			 * get haunted by them during panic or
2331 			 * after reboot
2332 			 */
2333 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2334 			    cpunodes[CPU->cpu_id].ecache_size,
2335 			    cpunodes[CPU->cpu_id].ecache_linesize);
2336 
2337 			(void) clear_errors(NULL, NULL);
2338 		}
2339 	}
2340 
2341 	/*
2342 	 * Log final recover message
2343 	 */
2344 	if (!panicstr) {
2345 		if (!aflt->flt_priv) {
2346 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2347 			    NULL, " Above Error is in User Mode"
2348 			    "\n    and is fatal: "
2349 			    "will SIGKILL process and notify contract");
2350 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2351 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2352 			    NULL, " Above Error detected while dumping core;"
2353 			    "\n    core file will be truncated");
2354 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2355 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2356 			    NULL, " Above Error is due to Kernel access"
2357 			    "\n    to User space and is fatal: "
2358 			    "will SIGKILL process and notify contract");
2359 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2360 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2361 			    " Above Error detected by protected Kernel code"
2362 			    "\n    that will try to clear error from system");
2363 		}
2364 	}
2365 }
2366 
2367 
2368 /*
2369  * Check all cpus for non-trapping UE-causing errors
2370  * In Ultra I/II, we look for copyback errors (CPs)
2371  */
2372 void
2373 cpu_check_allcpus(struct async_flt *aflt)
2374 {
2375 	spitf_async_flt cp;
2376 	spitf_async_flt *spf_cpflt = &cp;
2377 	struct async_flt *cpflt = (struct async_flt *)&cp;
2378 	int pix;
2379 
2380 	cpflt->flt_id = aflt->flt_id;
2381 	cpflt->flt_addr = aflt->flt_addr;
2382 
2383 	for (pix = 0; pix < NCPU; pix++) {
2384 		if (CPU_XCALL_READY(pix)) {
2385 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2386 			    (uint64_t)cpflt, 0);
2387 
2388 			if (cpflt->flt_stat & P_AFSR_CP) {
2389 				char *space;
2390 
2391 				/* See which space - this info may not exist */
2392 				if (aflt->flt_status & ECC_D_TRAP)
2393 					space = "Data ";
2394 				else if (aflt->flt_status & ECC_I_TRAP)
2395 					space = "Instruction ";
2396 				else
2397 					space = "";
2398 
2399 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2400 				    NULL, " AFAR was derived from UE report,"
2401 				    " CP event on CPU%d (caused %saccess "
2402 				    "error on %s%d)", pix, space,
2403 				    (aflt->flt_status & ECC_IOBUS) ?
2404 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2405 
2406 				if (spf_cpflt->flt_ec_lcnt > 0)
2407 					cpu_log_ecmem_info(spf_cpflt);
2408 				else
2409 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2410 					    CPU_ERRID_FIRST, NULL,
2411 					    " No cache dump available");
2412 			}
2413 		}
2414 	}
2415 }
2416 
2417 #ifdef DEBUG
2418 int test_mp_cp = 0;
2419 #endif
2420 
2421 /*
2422  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2423  * for copyback errors and capture relevant information.
2424  */
2425 static uint_t
2426 get_cpu_status(uint64_t arg)
2427 {
2428 	struct async_flt *aflt = (struct async_flt *)arg;
2429 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2430 	uint64_t afsr;
2431 	uint32_t ec_idx;
2432 	uint64_t sdbh, sdbl;
2433 	int i;
2434 	uint32_t ec_set_size;
2435 	uchar_t valid;
2436 	ec_data_t ec_data[8];
2437 	uint64_t ec_tag, flt_addr_tag, oafsr;
2438 	uint64_t *acc_afsr = NULL;
2439 
2440 	get_asyncflt(&afsr);
2441 	if (CPU_PRIVATE(CPU) != NULL) {
2442 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2443 		afsr |= *acc_afsr;
2444 		*acc_afsr = 0;
2445 	}
2446 
2447 #ifdef DEBUG
2448 	if (test_mp_cp)
2449 		afsr |= P_AFSR_CP;
2450 #endif
2451 	aflt->flt_stat = afsr;
2452 
2453 	if (afsr & P_AFSR_CP) {
2454 		/*
2455 		 * Capture the UDBs
2456 		 */
2457 		get_udb_errors(&sdbh, &sdbl);
2458 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2459 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2460 
2461 		/*
2462 		 * Clear CP bit before capturing ecache data
2463 		 * and AFSR info.
2464 		 */
2465 		set_asyncflt(P_AFSR_CP);
2466 
2467 		/*
2468 		 * See if we can capture the ecache line for the
2469 		 * fault PA.
2470 		 *
2471 		 * Return a valid matching ecache line, if any.
2472 		 * Otherwise, return the first matching ecache
2473 		 * line marked invalid.
2474 		 */
2475 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2476 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2477 		    ecache_associativity;
2478 		spf_flt->flt_ec_lcnt = 0;
2479 
2480 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2481 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2482 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2483 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2484 				    acc_afsr);
2485 
2486 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2487 				continue;
2488 
2489 			valid = cpu_ec_state_valid &
2490 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2491 			    cpu_ec_state_shift);
2492 
2493 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2494 				spf_flt->flt_ec_tag = ec_tag;
2495 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2496 				    sizeof (ec_data));
2497 				spf_flt->flt_ec_lcnt = 1;
2498 
2499 				if (valid)
2500 					break;
2501 			}
2502 		}
2503 	}
2504 	return (0);
2505 }
2506 
2507 /*
2508  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2509  * from panic_idle() as part of the other CPUs stopping themselves when a
2510  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2511  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2512  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2513  * CP error information.
2514  */
2515 void
2516 cpu_async_panic_callb(void)
2517 {
2518 	spitf_async_flt cp;
2519 	struct async_flt *aflt = (struct async_flt *)&cp;
2520 	uint64_t *scrub_afsr;
2521 
2522 	if (panic_aflt.flt_id != 0) {
2523 		aflt->flt_addr = panic_aflt.flt_addr;
2524 		(void) get_cpu_status((uint64_t)aflt);
2525 
2526 		if (CPU_PRIVATE(CPU) != NULL) {
2527 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2528 			if (*scrub_afsr & P_AFSR_CP) {
2529 				aflt->flt_stat |= *scrub_afsr;
2530 				*scrub_afsr = 0;
2531 			}
2532 		}
2533 		if (aflt->flt_stat & P_AFSR_CP) {
2534 			aflt->flt_id = panic_aflt.flt_id;
2535 			aflt->flt_panic = 1;
2536 			aflt->flt_inst = CPU->cpu_id;
2537 			aflt->flt_class = CPU_FAULT;
2538 			cp.flt_type = CPU_PANIC_CP_ERR;
2539 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2540 			    (void *)&cp, sizeof (cp), ue_queue,
2541 			    aflt->flt_panic);
2542 		}
2543 	}
2544 }
2545 
2546 /*
2547  * Turn off all cpu error detection, normally only used for panics.
2548  */
2549 void
2550 cpu_disable_errors(void)
2551 {
2552 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2553 }
2554 
2555 /*
2556  * Enable errors.
2557  */
2558 void
2559 cpu_enable_errors(void)
2560 {
2561 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2562 }
2563 
2564 static void
2565 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2566 {
2567 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2568 	int i, loop = 1;
2569 	ushort_t ecc_0;
2570 	uint64_t paddr;
2571 	uint64_t data;
2572 
2573 	if (verbose)
2574 		loop = 8;
2575 	for (i = 0; i < loop; i++) {
2576 		paddr = aligned_addr + (i * 8);
2577 		data = lddphys(paddr);
2578 		if (verbose) {
2579 			if (ce_err) {
2580 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2581 			    (uint32_t)data);
2582 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2583 				NULL, "    Paddr 0x%" PRIx64 ", "
2584 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2585 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2586 			} else {
2587 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2588 				    NULL, "    Paddr 0x%" PRIx64 ", "
2589 				    "Data 0x%08x.%08x", paddr,
2590 				    (uint32_t)(data>>32), (uint32_t)data);
2591 			}
2592 		}
2593 	}
2594 }
2595 
2596 static struct {		/* sec-ded-s4ed ecc code */
2597 	uint_t hi, lo;
2598 } ecc_code[8] = {
2599 	{ 0xee55de23U, 0x16161161U },
2600 	{ 0x55eede93U, 0x61612212U },
2601 	{ 0xbb557b8cU, 0x49494494U },
2602 	{ 0x55bb7b6cU, 0x94948848U },
2603 	{ 0x16161161U, 0xee55de23U },
2604 	{ 0x61612212U, 0x55eede93U },
2605 	{ 0x49494494U, 0xbb557b8cU },
2606 	{ 0x94948848U, 0x55bb7b6cU }
2607 };
2608 
2609 static ushort_t
2610 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2611 {
2612 	int i, j;
2613 	uchar_t checker, bit_mask;
2614 	struct {
2615 		uint_t hi, lo;
2616 	} hex_data, masked_data[8];
2617 
2618 	hex_data.hi = high_bytes;
2619 	hex_data.lo = low_bytes;
2620 
2621 	/* mask out bits according to sec-ded-s4ed ecc code */
2622 	for (i = 0; i < 8; i++) {
2623 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2624 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2625 	}
2626 
2627 	/*
2628 	 * xor all bits in masked_data[i] to get bit_i of checker,
2629 	 * where i = 0 to 7
2630 	 */
2631 	checker = 0;
2632 	for (i = 0; i < 8; i++) {
2633 		bit_mask = 1 << i;
2634 		for (j = 0; j < 32; j++) {
2635 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2636 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2637 			masked_data[i].hi >>= 1;
2638 			masked_data[i].lo >>= 1;
2639 		}
2640 	}
2641 	return (checker);
2642 }
2643 
2644 /*
2645  * Flush the entire ecache using displacement flush by reading through a
2646  * physical address range as large as the ecache.
2647  */
2648 void
2649 cpu_flush_ecache(void)
2650 {
2651 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2652 	    cpunodes[CPU->cpu_id].ecache_linesize);
2653 }
2654 
2655 /*
2656  * read and display the data in the cache line where the
2657  * original ce error occurred.
2658  * This routine is mainly used for debugging new hardware.
2659  */
2660 void
2661 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2662 {
2663 	kpreempt_disable();
2664 	/* disable ECC error traps */
2665 	set_error_enable(EER_ECC_DISABLE);
2666 
2667 	/*
2668 	 * flush the ecache
2669 	 * read the data
2670 	 * check to see if an ECC error occured
2671 	 */
2672 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2673 	    cpunodes[CPU->cpu_id].ecache_linesize);
2674 	set_lsu(get_lsu() | cache_boot_state);
2675 	cpu_read_paddr(ecc, verbose, ce_err);
2676 	(void) check_ecc(ecc);
2677 
2678 	/* enable ECC error traps */
2679 	set_error_enable(EER_ENABLE);
2680 	kpreempt_enable();
2681 }
2682 
2683 /*
2684  * Check the AFSR bits for UE/CE persistence.
2685  * If UE or CE errors are detected, the routine will
2686  * clears all the AFSR sticky bits (except CP for
2687  * spitfire/blackbird) and the UDBs.
2688  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2689  */
2690 static int
2691 check_ecc(struct async_flt *ecc)
2692 {
2693 	uint64_t t_afsr;
2694 	uint64_t t_afar;
2695 	uint64_t udbh;
2696 	uint64_t udbl;
2697 	ushort_t udb;
2698 	int persistent = 0;
2699 
2700 	/*
2701 	 * Capture the AFSR, AFAR and UDBs info
2702 	 */
2703 	get_asyncflt(&t_afsr);
2704 	get_asyncaddr(&t_afar);
2705 	t_afar &= SABRE_AFAR_PA;
2706 	get_udb_errors(&udbh, &udbl);
2707 
2708 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2709 		/*
2710 		 * Clear the errors
2711 		 */
2712 		clr_datapath();
2713 
2714 		if (isus2i || isus2e)
2715 			set_asyncflt(t_afsr);
2716 		else
2717 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2718 
2719 		/*
2720 		 * determine whether to check UDBH or UDBL for persistence
2721 		 */
2722 		if (ecc->flt_synd & UDBL_REG) {
2723 			udb = (ushort_t)udbl;
2724 			t_afar |= 0x8;
2725 		} else {
2726 			udb = (ushort_t)udbh;
2727 		}
2728 
2729 		if (ce_debug || ue_debug) {
2730 			spitf_async_flt spf_flt; /* for logging */
2731 			struct async_flt *aflt =
2732 				(struct async_flt *)&spf_flt;
2733 
2734 			/* Package the info nicely in the spf_flt struct */
2735 			bzero(&spf_flt, sizeof (spitf_async_flt));
2736 			aflt->flt_stat = t_afsr;
2737 			aflt->flt_addr = t_afar;
2738 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2739 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2740 
2741 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2742 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2743 			    " check_ecc: Dumping captured error states ...");
2744 		}
2745 
2746 		/*
2747 		 * if the fault addresses don't match, not persistent
2748 		 */
2749 		if (t_afar != ecc->flt_addr) {
2750 			return (persistent);
2751 		}
2752 
2753 		/*
2754 		 * check for UE persistence
2755 		 * since all DIMMs in the bank are identified for a UE,
2756 		 * there's no reason to check the syndrome
2757 		 */
2758 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2759 			persistent = 1;
2760 		}
2761 
2762 		/*
2763 		 * check for CE persistence
2764 		 */
2765 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2766 			if ((udb & P_DER_E_SYND) ==
2767 			    (ecc->flt_synd & P_DER_E_SYND)) {
2768 				persistent = 1;
2769 			}
2770 		}
2771 	}
2772 	return (persistent);
2773 }
2774 
2775 #ifdef HUMMINGBIRD
2776 #define	HB_FULL_DIV		1
2777 #define	HB_HALF_DIV		2
2778 #define	HB_LOWEST_DIV		8
2779 #define	HB_ECLK_INVALID		0xdeadbad
2780 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2781 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2782 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2783 	HB_ECLK_8 };
2784 
2785 #define	HB_SLOW_DOWN		0
2786 #define	HB_SPEED_UP		1
2787 
2788 #define	SET_ESTAR_MODE(mode)					\
2789 	stdphysio(HB_ESTAR_MODE, (mode));			\
2790 	/*							\
2791 	 * PLL logic requires minimum of 16 clock		\
2792 	 * cycles to lock to the new clock speed.		\
2793 	 * Wait 1 usec to satisfy this requirement.		\
2794 	 */							\
2795 	drv_usecwait(1);
2796 
2797 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2798 {								\
2799 	volatile uint64_t data;					\
2800 	uint64_t count, new_count;				\
2801 	clock_t delay;						\
2802 	data = lddphysio(HB_MEM_CNTRL0);			\
2803 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2804 	    HB_REFRESH_COUNT_SHIFT;				\
2805 	new_count = (HB_REFRESH_INTERVAL *			\
2806 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2807 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2808 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2809 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2810 	stdphysio(HB_MEM_CNTRL0, data);				\
2811 	data = lddphysio(HB_MEM_CNTRL0);        		\
2812 	/*							\
2813 	 * If we are slowing down the cpu and Memory		\
2814 	 * Self Refresh is not enabled, it is required		\
2815 	 * to wait for old refresh count to count-down and	\
2816 	 * new refresh count to go into effect (let new value	\
2817 	 * counts down once).					\
2818 	 */							\
2819 	if ((direction) == HB_SLOW_DOWN &&			\
2820 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2821 		/*						\
2822 		 * Each count takes 64 cpu clock cycles		\
2823 		 * to decrement.  Wait for current refresh	\
2824 		 * count plus new refresh count at current	\
2825 		 * cpu speed to count down to zero.  Round	\
2826 		 * up the delay time.				\
2827 		 */						\
2828 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2829 		    (count + new_count) * MICROSEC * (cur_div)) /\
2830 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2831 		drv_usecwait(delay);				\
2832 	}							\
2833 }
2834 
2835 #define	SET_SELF_REFRESH(bit)					\
2836 {								\
2837 	volatile uint64_t data;					\
2838 	data = lddphysio(HB_MEM_CNTRL0);			\
2839 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2840 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2841 	stdphysio(HB_MEM_CNTRL0, data);				\
2842 	data = lddphysio(HB_MEM_CNTRL0);			\
2843 }
2844 #endif	/* HUMMINGBIRD */
2845 
2846 /* ARGSUSED */
2847 void
2848 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2849 {
2850 #ifdef HUMMINGBIRD
2851 	uint64_t cur_mask, cur_divisor = 0;
2852 	volatile uint64_t reg;
2853 	int index;
2854 
2855 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2856 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2857 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2858 		    new_divisor);
2859 		return;
2860 	}
2861 
2862 	reg = lddphysio(HB_ESTAR_MODE);
2863 	cur_mask = reg & HB_ECLK_MASK;
2864 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2865 		if (hb_eclk[index] == cur_mask) {
2866 			cur_divisor = index;
2867 			break;
2868 		}
2869 	}
2870 
2871 	if (cur_divisor == 0)
2872 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2873 		    "can't be determined!");
2874 
2875 	/*
2876 	 * If we are already at the requested divisor speed, just
2877 	 * return.
2878 	 */
2879 	if (cur_divisor == new_divisor)
2880 		return;
2881 
2882 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2883 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2884 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2885 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2886 
2887 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2888 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2889 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2890 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2891 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2892 
2893 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2894 		/*
2895 		 * Transition to 1/2 speed first, then to
2896 		 * lower speed.
2897 		 */
2898 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2899 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2900 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2901 
2902 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2903 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2904 
2905 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2906 		/*
2907 		 * Transition to 1/2 speed first, then to
2908 		 * full speed.
2909 		 */
2910 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2911 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2912 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2913 
2914 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2915 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2916 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2917 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2918 
2919 	} else if (cur_divisor < new_divisor) {
2920 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2921 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2922 
2923 	} else if (cur_divisor > new_divisor) {
2924 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2925 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2926 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2927 	}
2928 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2929 #endif
2930 }
2931 
2932 /*
2933  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2934  * we clear all the sticky bits. If a non-null pointer to a async fault
2935  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2936  * info will be returned in the structure.  If a non-null pointer to a
2937  * uint64_t is passed in, this will be updated if the CP bit is set in the
2938  * AFSR.  The afsr will be returned.
2939  */
2940 static uint64_t
2941 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2942 {
2943 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2944 	uint64_t afsr;
2945 	uint64_t udbh, udbl;
2946 
2947 	get_asyncflt(&afsr);
2948 
2949 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2950 		*acc_afsr |= afsr;
2951 
2952 	if (spf_flt != NULL) {
2953 		aflt->flt_stat = afsr;
2954 		get_asyncaddr(&aflt->flt_addr);
2955 		aflt->flt_addr &= SABRE_AFAR_PA;
2956 
2957 		get_udb_errors(&udbh, &udbl);
2958 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2959 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2960 	}
2961 
2962 	set_asyncflt(afsr);		/* clear afsr */
2963 	clr_datapath();			/* clear udbs */
2964 	return (afsr);
2965 }
2966 
2967 /*
2968  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2969  * tag of the first bad line will be returned. We also return the old-afsr
2970  * (before clearing the sticky bits). The linecnt data will be updated to
2971  * indicate the number of bad lines detected.
2972  */
2973 static void
2974 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2975 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2976 {
2977 	ec_data_t t_ecdata[8];
2978 	uint64_t t_etag, oafsr;
2979 	uint64_t pa = AFLT_INV_ADDR;
2980 	uint32_t i, j, ecache_sz;
2981 	uint64_t acc_afsr = 0;
2982 	uint64_t *cpu_afsr = NULL;
2983 
2984 	if (CPU_PRIVATE(CPU) != NULL)
2985 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2986 
2987 	*linecnt = 0;
2988 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2989 
2990 	for (i = 0; i < ecache_sz; i += 64) {
2991 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2992 		    cpu_afsr);
2993 		acc_afsr |= oafsr;
2994 
2995 		/*
2996 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2997 		 * looking for the first occurrence of an EDP error.  The AFSR
2998 		 * info is captured for each 8-byte chunk.  Note that for
2999 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
3000 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
3001 		 * for the high and low 8-byte words within the 16-byte chunk).
3002 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
3003 		 * granularity and only PSYND bits [7:0] are used.
3004 		 */
3005 		for (j = 0; j < 8; j++) {
3006 			ec_data_t *ecdptr = &t_ecdata[j];
3007 
3008 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
3009 				uint64_t errpa;
3010 				ushort_t psynd;
3011 				uint32_t ec_set_size = ecache_sz /
3012 				    ecache_associativity;
3013 
3014 				/*
3015 				 * For Spitfire/Blackbird, we need to look at
3016 				 * the PSYND to make sure that this 8-byte chunk
3017 				 * is the right one.  PSYND bits [15:8] belong
3018 				 * to the upper 8-byte (even) chunk.  Bits
3019 				 * [7:0] belong to the lower 8-byte chunk (odd).
3020 				 */
3021 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3022 				if (!isus2i && !isus2e) {
3023 					if (j & 0x1)
3024 						psynd = psynd & 0xFF;
3025 					else
3026 						psynd = psynd >> 8;
3027 
3028 					if (!psynd)
3029 						continue; /* wrong chunk */
3030 				}
3031 
3032 				/* Construct the PA */
3033 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3034 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3035 				    ec_set_size);
3036 
3037 				/* clean up the cache line */
3038 				flushecacheline(P2ALIGN(errpa, 64),
3039 					cpunodes[CPU->cpu_id].ecache_size);
3040 
3041 				oafsr = clear_errors(NULL, cpu_afsr);
3042 				acc_afsr |= oafsr;
3043 
3044 				(*linecnt)++;
3045 
3046 				/*
3047 				 * Capture the PA for the first bad line found.
3048 				 * Return the ecache dump and tag info.
3049 				 */
3050 				if (pa == AFLT_INV_ADDR) {
3051 					int k;
3052 
3053 					pa = errpa;
3054 					for (k = 0; k < 8; k++)
3055 						ecache_data[k] = t_ecdata[k];
3056 					*ecache_tag = t_etag;
3057 				}
3058 				break;
3059 			}
3060 		}
3061 	}
3062 	*t_afar = pa;
3063 	*t_afsr = acc_afsr;
3064 }
3065 
3066 static void
3067 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3068 {
3069 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3070 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3071 	char linestr[30];
3072 	char *state_str;
3073 	int i;
3074 
3075 	/*
3076 	 * Check the ecache tag to make sure it
3077 	 * is valid. If invalid, a memory dump was
3078 	 * captured instead of a ecache dump.
3079 	 */
3080 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3081 		uchar_t eparity = (uchar_t)
3082 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3083 
3084 		uchar_t estate = (uchar_t)
3085 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3086 
3087 		if (estate == cpu_ec_state_shr)
3088 			state_str = "Shared";
3089 		else if (estate == cpu_ec_state_exl)
3090 			state_str = "Exclusive";
3091 		else if (estate == cpu_ec_state_own)
3092 			state_str = "Owner";
3093 		else if (estate == cpu_ec_state_mod)
3094 			state_str = "Modified";
3095 		else
3096 			state_str = "Invalid";
3097 
3098 		if (spf_flt->flt_ec_lcnt > 1) {
3099 			(void) snprintf(linestr, sizeof (linestr),
3100 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3101 		} else {
3102 			linestr[0] = '\0';
3103 		}
3104 
3105 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3106 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3107 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3108 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3109 		    (uint32_t)ecache_tag, state_str,
3110 		    (uint32_t)eparity, linestr);
3111 	} else {
3112 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3113 		    " E$tag != PA from AFAR; E$line was victimized"
3114 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3115 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3116 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3117 	}
3118 
3119 	/*
3120 	 * Dump out all 8 8-byte ecache data captured
3121 	 * For each 8-byte data captured, we check the
3122 	 * captured afsr's parity syndrome to find out
3123 	 * which 8-byte chunk is bad. For memory dump, the
3124 	 * AFSR values were initialized to 0.
3125 	 */
3126 	for (i = 0; i < 8; i++) {
3127 		ec_data_t *ecdptr;
3128 		uint_t offset;
3129 		ushort_t psynd;
3130 		ushort_t bad;
3131 		uint64_t edp;
3132 
3133 		offset = i << 3;	/* multiply by 8 */
3134 		ecdptr = &spf_flt->flt_ec_data[i];
3135 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3136 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3137 
3138 		/*
3139 		 * For Sabre/Hummingbird, parity synd is captured only
3140 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3141 		 * For spitfire/blackbird, AFSR.PSYND is captured
3142 		 * in 16-byte granularity. [15:8] represent
3143 		 * the upper 8 byte and [7:0] the lower 8 byte.
3144 		 */
3145 		if (isus2i || isus2e || (i & 0x1))
3146 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3147 		else
3148 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3149 
3150 		if (bad && edp) {
3151 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3152 			    " E$Data (0x%02x): 0x%08x.%08x "
3153 			    "*Bad* PSYND=0x%04x", offset,
3154 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3155 			    (uint32_t)ecdptr->ec_d8, psynd);
3156 		} else {
3157 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3158 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3159 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3160 			    (uint32_t)ecdptr->ec_d8);
3161 		}
3162 	}
3163 }
3164 
3165 /*
3166  * Common logging function for all cpu async errors.  This function allows the
3167  * caller to generate a single cmn_err() call that logs the appropriate items
3168  * from the fault structure, and implements our rules for AFT logging levels.
3169  *
3170  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3171  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3172  *	spflt: pointer to spitfire async fault structure
3173  *	logflags: bitflags indicating what to output
3174  *	endstr: a end string to appear at the end of this log
3175  *	fmt: a format string to appear at the beginning of the log
3176  *
3177  * The logflags allows the construction of predetermined output from the spflt
3178  * structure.  The individual data items always appear in a consistent order.
3179  * Note that either or both of the spflt structure pointer and logflags may be
3180  * NULL or zero respectively, indicating that the predetermined output
3181  * substrings are not requested in this log.  The output looks like this:
3182  *
3183  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3184  *	<CPU_SPACE><CPU_ERRID>
3185  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3186  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3187  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3188  *	newline+4spaces<CPU_SYND>
3189  *	newline+4spaces<endstr>
3190  *
3191  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3192  * it is assumed that <endstr> will be the unum string in this case.  The size
3193  * of our intermediate formatting buf[] is based on the worst case of all flags
3194  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3195  * formatting so we don't need additional stack space to format them here.
3196  */
3197 /*PRINTFLIKE6*/
3198 static void
3199 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3200 	const char *endstr, const char *fmt, ...)
3201 {
3202 	struct async_flt *aflt = (struct async_flt *)spflt;
3203 	char buf[400], *p, *q; /* see comments about buf[] size above */
3204 	va_list ap;
3205 	int console_log_flag;
3206 
3207 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3208 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3209 	    (aflt->flt_panic)) {
3210 		console_log_flag = (tagnum < 2) || aft_verbose;
3211 	} else {
3212 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3213 		    (aflt->flt_stat & P_AFSR_CE)) ?
3214 		    ce_verbose_memory : ce_verbose_other;
3215 
3216 		if (!verbose)
3217 			return;
3218 
3219 		console_log_flag = (verbose > 1);
3220 	}
3221 
3222 	if (console_log_flag)
3223 		(void) sprintf(buf, "[AFT%d]", tagnum);
3224 	else
3225 		(void) sprintf(buf, "![AFT%d]", tagnum);
3226 
3227 	p = buf + strlen(buf);	/* current buffer position */
3228 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3229 
3230 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3231 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3232 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3233 		p += strlen(p);
3234 	}
3235 
3236 	/*
3237 	 * Copy the caller's format string verbatim into buf[].  It will be
3238 	 * formatted by the call to vcmn_err() at the end of this function.
3239 	 */
3240 	if (fmt != NULL && p < q) {
3241 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3242 		buf[sizeof (buf) - 1] = '\0';
3243 		p += strlen(p);
3244 	}
3245 
3246 	if (spflt != NULL) {
3247 		if (logflags & CPU_FLTCPU) {
3248 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3249 			    aflt->flt_inst);
3250 			p += strlen(p);
3251 		}
3252 
3253 		if (logflags & CPU_SPACE) {
3254 			if (aflt->flt_status & ECC_D_TRAP)
3255 				(void) snprintf(p, (size_t)(q - p),
3256 				    " Data access");
3257 			else if (aflt->flt_status & ECC_I_TRAP)
3258 				(void) snprintf(p, (size_t)(q - p),
3259 				    " Instruction access");
3260 			p += strlen(p);
3261 		}
3262 
3263 		if (logflags & CPU_TL) {
3264 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3265 			    aflt->flt_tl ? ">0" : "=0");
3266 			p += strlen(p);
3267 		}
3268 
3269 		if (logflags & CPU_ERRID) {
3270 			(void) snprintf(p, (size_t)(q - p),
3271 			    ", errID 0x%08x.%08x",
3272 			    (uint32_t)(aflt->flt_id >> 32),
3273 			    (uint32_t)aflt->flt_id);
3274 			p += strlen(p);
3275 		}
3276 
3277 		if (logflags & CPU_AFSR) {
3278 			(void) snprintf(p, (size_t)(q - p),
3279 			    "\n    AFSR 0x%08b.%08b",
3280 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3281 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3282 			p += strlen(p);
3283 		}
3284 
3285 		if (logflags & CPU_AFAR) {
3286 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3287 			    (uint32_t)(aflt->flt_addr >> 32),
3288 			    (uint32_t)aflt->flt_addr);
3289 			p += strlen(p);
3290 		}
3291 
3292 		if (logflags & CPU_AF_PSYND) {
3293 			ushort_t psynd = (ushort_t)
3294 			    (aflt->flt_stat & P_AFSR_P_SYND);
3295 
3296 			(void) snprintf(p, (size_t)(q - p),
3297 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3298 			    psynd, ecc_psynd_score(psynd));
3299 			p += strlen(p);
3300 		}
3301 
3302 		if (logflags & CPU_AF_ETS) {
3303 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3304 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3305 			p += strlen(p);
3306 		}
3307 
3308 		if (logflags & CPU_FAULTPC) {
3309 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3310 			    (void *)aflt->flt_pc);
3311 			p += strlen(p);
3312 		}
3313 
3314 		if (logflags & CPU_UDBH) {
3315 			(void) snprintf(p, (size_t)(q - p),
3316 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3317 			    spflt->flt_sdbh, UDB_FMTSTR,
3318 			    spflt->flt_sdbh & 0xFF);
3319 			p += strlen(p);
3320 		}
3321 
3322 		if (logflags & CPU_UDBL) {
3323 			(void) snprintf(p, (size_t)(q - p),
3324 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3325 			    spflt->flt_sdbl, UDB_FMTSTR,
3326 			    spflt->flt_sdbl & 0xFF);
3327 			p += strlen(p);
3328 		}
3329 
3330 		if (logflags & CPU_SYND) {
3331 			ushort_t synd = SYND(aflt->flt_synd);
3332 
3333 			(void) snprintf(p, (size_t)(q - p),
3334 			    "\n    %s Syndrome 0x%x Memory Module ",
3335 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3336 			p += strlen(p);
3337 		}
3338 	}
3339 
3340 	if (endstr != NULL) {
3341 		if (!(logflags & CPU_SYND))
3342 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3343 		else
3344 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3345 		p += strlen(p);
3346 	}
3347 
3348 	if (ce_code == CE_CONT && (p < q - 1))
3349 		(void) strcpy(p, "\n"); /* add final \n if needed */
3350 
3351 	va_start(ap, fmt);
3352 	vcmn_err(ce_code, buf, ap);
3353 	va_end(ap);
3354 }
3355 
3356 /*
3357  * Ecache Scrubbing
3358  *
3359  * The basic idea is to prevent lines from sitting in the ecache long enough
3360  * to build up soft errors which can lead to ecache parity errors.
3361  *
3362  * The following rules are observed when flushing the ecache:
3363  *
3364  * 1. When the system is busy, flush bad clean lines
3365  * 2. When the system is idle, flush all clean lines
3366  * 3. When the system is idle, flush good dirty lines
3367  * 4. Never flush bad dirty lines.
3368  *
3369  *	modify	parity	busy   idle
3370  *	----------------------------
3371  *	clean	good		X
3372  * 	clean	bad	X	X
3373  * 	dirty	good		X
3374  *	dirty	bad
3375  *
3376  * Bad or good refers to whether a line has an E$ parity error or not.
3377  * Clean or dirty refers to the state of the modified bit.  We currently
3378  * default the scan rate to 100 (scan 10% of the cache per second).
3379  *
3380  * The following are E$ states and actions.
3381  *
3382  * We encode our state as a 3-bit number, consisting of:
3383  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3384  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3385  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3386  *
3387  * We associate a flushing and a logging action with each state.
3388  *
3389  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3390  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3391  * E$ only, in addition to value being set by ec_flush.
3392  */
3393 
3394 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3395 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3396 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3397 
3398 struct {
3399 	char	ec_flush;		/* whether to flush or not */
3400 	char	ec_log;			/* ecache logging */
3401 	char	ec_log_type;		/* log type info */
3402 } ec_action[] = {	/* states of the E$ line in M P B */
3403 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3404 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3405 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3406 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3407 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3408 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3409 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3410 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3411 };
3412 
3413 /*
3414  * Offsets into the ec_action[] that determines clean_good_busy and
3415  * dirty_good_busy lines.
3416  */
3417 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3418 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3419 
3420 /*
3421  * We are flushing lines which are Clean_Good_Busy and also the lines
3422  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3423  */
3424 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3425 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3426 
3427 #define	ECACHE_STATE_MODIFIED	0x4
3428 #define	ECACHE_STATE_PARITY	0x2
3429 #define	ECACHE_STATE_BUSY	0x1
3430 
3431 /*
3432  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3433  */
3434 int ecache_calls_a_sec_mirrored = 1;
3435 int ecache_lines_per_call_mirrored = 1;
3436 
3437 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3438 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3439 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3440 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3441 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3442 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3443 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3444 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3445 
3446 volatile int ec_timeout_calls = 1;	/* timeout calls */
3447 
3448 /*
3449  * Interrupt number and pil for ecache scrubber cross-trap calls.
3450  */
3451 static uint_t ecache_scrub_inum;
3452 uint_t ecache_scrub_pil = PIL_9;
3453 
3454 /*
3455  * Kstats for the E$ scrubber.
3456  */
3457 typedef struct ecache_kstat {
3458 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3459 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3460 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3461 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3462 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3463 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3464 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3465 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3466 	kstat_named_t invalid_lines;		/* # of invalid lines */
3467 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3468 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3469 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3470 } ecache_kstat_t;
3471 
3472 static ecache_kstat_t ec_kstat_template = {
3473 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3474 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3475 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3476 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3477 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3478 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3479 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3480 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3481 	{ "invalid_lines", KSTAT_DATA_ULONG },
3482 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3483 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3484 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3485 };
3486 
3487 struct kmem_cache *sf_private_cache;
3488 
3489 /*
3490  * Called periodically on each CPU to scan the ecache once a sec.
3491  * adjusting the ecache line index appropriately
3492  */
3493 void
3494 scrub_ecache_line()
3495 {
3496 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3497 	int cpuid = CPU->cpu_id;
3498 	uint32_t index = ssmp->ecache_flush_index;
3499 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3500 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3501 	int nlines = ssmp->ecache_nlines;
3502 	uint32_t ec_set_size = ec_size / ecache_associativity;
3503 	int ec_mirror = ssmp->ecache_mirror;
3504 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3505 
3506 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3507 	int mpb;		/* encode Modified, Parity, Busy for action */
3508 	uchar_t state;
3509 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3510 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3511 	ec_data_t ec_data[8];
3512 	kstat_named_t *ec_knp;
3513 
3514 	switch (ec_mirror) {
3515 		default:
3516 		case ECACHE_CPU_NON_MIRROR:
3517 			/*
3518 			 * The E$ scan rate is expressed in units of tenths of
3519 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3520 			 * whole cache is scanned every second.
3521 			 */
3522 			scan_lines = (nlines * ecache_scan_rate) /
3523 					(1000 * ecache_calls_a_sec);
3524 			if (!(ssmp->ecache_busy)) {
3525 				if (ecache_idle_factor > 0) {
3526 					scan_lines *= ecache_idle_factor;
3527 				}
3528 			} else {
3529 				flush_clean_busy = (scan_lines *
3530 					ecache_flush_clean_good_busy) / 100;
3531 				flush_dirty_busy = (scan_lines *
3532 					ecache_flush_dirty_good_busy) / 100;
3533 			}
3534 
3535 			ec_timeout_calls = (ecache_calls_a_sec ?
3536 						ecache_calls_a_sec : 1);
3537 			break;
3538 
3539 		case ECACHE_CPU_MIRROR:
3540 			scan_lines = ecache_lines_per_call_mirrored;
3541 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3542 					ecache_calls_a_sec_mirrored : 1);
3543 			break;
3544 	}
3545 
3546 	/*
3547 	 * The ecache scrubber algorithm operates by reading and
3548 	 * decoding the E$ tag to determine whether the corresponding E$ line
3549 	 * can be scrubbed. There is a implicit assumption in the scrubber
3550 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3551 	 * flawed since the E$ tag may also be corrupted and have parity errors
3552 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3553 	 * before scrubbing. When a parity error is detected in the E$ tag,
3554 	 * it is possible to recover and scrub the tag under certain conditions
3555 	 * so that a ETP error condition can be avoided.
3556 	 */
3557 
3558 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3559 		/*
3560 		 * We get the old-AFSR before clearing the AFSR sticky bits
3561 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3562 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3563 		 */
3564 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3565 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3566 				cpu_ec_state_shift);
3567 
3568 		/*
3569 		 * ETP is set try to scrub the ecache tag.
3570 		 */
3571 		if (nafsr & P_AFSR_ETP) {
3572 			ecache_scrub_tag_err(nafsr, state, index);
3573 		} else if (state & cpu_ec_state_valid) {
3574 			/*
3575 			 * ETP is not set, E$ tag is valid.
3576 			 * Proceed with the E$ scrubbing.
3577 			 */
3578 			if (state & cpu_ec_state_dirty)
3579 				mpb |= ECACHE_STATE_MODIFIED;
3580 
3581 			tafsr = check_ecache_line(index, acc_afsr);
3582 
3583 			if (tafsr & P_AFSR_EDP) {
3584 				mpb |= ECACHE_STATE_PARITY;
3585 
3586 				if (ecache_scrub_verbose ||
3587 							ecache_scrub_panic) {
3588 					get_ecache_dtag(P2ALIGN(index, 64),
3589 						(uint64_t *)&ec_data[0],
3590 						&ec_tag, &oafsr, acc_afsr);
3591 				}
3592 			}
3593 
3594 			if (ssmp->ecache_busy)
3595 				mpb |= ECACHE_STATE_BUSY;
3596 
3597 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3598 			ec_knp->value.ul++;
3599 
3600 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3601 				cpu_ec_tag_shift) | (index % ec_set_size);
3602 
3603 			/*
3604 			 * We flush the E$ lines depending on the ec_flush,
3605 			 * we additionally flush clean_good_busy and
3606 			 * dirty_good_busy lines for mirrored E$.
3607 			 */
3608 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3609 				flushecacheline(paddr, ec_size);
3610 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3611 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3612 					flushecacheline(paddr, ec_size);
3613 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3614 				softcall(ecache_page_retire, (void *)paddr);
3615 			}
3616 
3617 			/*
3618 			 * Conditionally flush both the clean_good and
3619 			 * dirty_good lines when busy.
3620 			 */
3621 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3622 				flush_clean_busy--;
3623 				flushecacheline(paddr, ec_size);
3624 				ec_ksp->clean_good_busy_flush.value.ul++;
3625 			} else if (DGB(mpb, ec_mirror) &&
3626 						(flush_dirty_busy > 0)) {
3627 				flush_dirty_busy--;
3628 				flushecacheline(paddr, ec_size);
3629 				ec_ksp->dirty_good_busy_flush.value.ul++;
3630 			}
3631 
3632 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3633 						ecache_scrub_panic)) {
3634 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3635 						tafsr);
3636 			}
3637 
3638 		} else {
3639 			ec_ksp->invalid_lines.value.ul++;
3640 		}
3641 
3642 		if ((index += ec_linesize) >= ec_size)
3643 			index = 0;
3644 
3645 	}
3646 
3647 	/*
3648 	 * set the ecache scrub index for the next time around
3649 	 */
3650 	ssmp->ecache_flush_index = index;
3651 
3652 	if (*acc_afsr & P_AFSR_CP) {
3653 		uint64_t ret_afsr;
3654 
3655 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3656 		if ((ret_afsr & P_AFSR_CP) == 0)
3657 			*acc_afsr = 0;
3658 	}
3659 }
3660 
3661 /*
3662  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3663  * we decrement the outstanding request count to zero.
3664  */
3665 
3666 /*ARGSUSED*/
3667 uint_t
3668 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3669 {
3670 	int i;
3671 	int outstanding;
3672 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3673 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3674 
3675 	do {
3676 		outstanding = *countp;
3677 		ASSERT(outstanding > 0);
3678 		for (i = 0; i < outstanding; i++)
3679 			scrub_ecache_line();
3680 	} while (atomic_add_32_nv(countp, -outstanding));
3681 
3682 	return (DDI_INTR_CLAIMED);
3683 }
3684 
3685 /*
3686  * force each cpu to perform an ecache scrub, called from a timeout
3687  */
3688 extern xcfunc_t ecache_scrubreq_tl1;
3689 
3690 void
3691 do_scrub_ecache_line(void)
3692 {
3693 	long delta;
3694 
3695 	if (ecache_calls_a_sec > hz)
3696 		ecache_calls_a_sec = hz;
3697 	else if (ecache_calls_a_sec <= 0)
3698 	    ecache_calls_a_sec = 1;
3699 
3700 	if (ecache_calls_a_sec_mirrored > hz)
3701 		ecache_calls_a_sec_mirrored = hz;
3702 	else if (ecache_calls_a_sec_mirrored <= 0)
3703 	    ecache_calls_a_sec_mirrored = 1;
3704 
3705 	if (ecache_scrub_enable) {
3706 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3707 		delta = hz / ec_timeout_calls;
3708 	} else {
3709 		delta = hz;
3710 	}
3711 
3712 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3713 		delta);
3714 }
3715 
3716 /*
3717  * initialization for ecache scrubbing
3718  * This routine is called AFTER all cpus have had cpu_init_private called
3719  * to initialize their private data areas.
3720  */
3721 void
3722 cpu_init_cache_scrub(void)
3723 {
3724 	if (ecache_calls_a_sec > hz) {
3725 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3726 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3727 		ecache_calls_a_sec = hz;
3728 	}
3729 
3730 	/*
3731 	 * Register softint for ecache scrubbing.
3732 	 */
3733 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3734 	    scrub_ecache_line_intr, NULL);
3735 
3736 	/*
3737 	 * kick off the scrubbing using realtime timeout
3738 	 */
3739 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3740 	    hz / ecache_calls_a_sec);
3741 }
3742 
3743 /*
3744  * Unset the busy flag for this cpu.
3745  */
3746 void
3747 cpu_idle_ecache_scrub(struct cpu *cp)
3748 {
3749 	if (CPU_PRIVATE(cp) != NULL) {
3750 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3751 							sfpr_scrub_misc);
3752 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3753 	}
3754 }
3755 
3756 /*
3757  * Set the busy flag for this cpu.
3758  */
3759 void
3760 cpu_busy_ecache_scrub(struct cpu *cp)
3761 {
3762 	if (CPU_PRIVATE(cp) != NULL) {
3763 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3764 							sfpr_scrub_misc);
3765 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3766 	}
3767 }
3768 
3769 /*
3770  * initialize the ecache scrubber data structures
3771  * The global entry point cpu_init_private replaces this entry point.
3772  *
3773  */
3774 static void
3775 cpu_init_ecache_scrub_dr(struct cpu *cp)
3776 {
3777 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3778 	int cpuid = cp->cpu_id;
3779 
3780 	/*
3781 	 * intialize bookkeeping for cache scrubbing
3782 	 */
3783 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3784 
3785 	ssmp->ecache_flush_index = 0;
3786 
3787 	ssmp->ecache_nlines =
3788 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3789 
3790 	/*
3791 	 * Determine whether we are running on mirrored SRAM
3792 	 */
3793 
3794 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3795 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3796 	else
3797 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3798 
3799 	cpu_busy_ecache_scrub(cp);
3800 
3801 	/*
3802 	 * initialize the kstats
3803 	 */
3804 	ecache_kstat_init(cp);
3805 }
3806 
3807 /*
3808  * uninitialize the ecache scrubber data structures
3809  * The global entry point cpu_uninit_private replaces this entry point.
3810  */
3811 static void
3812 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3813 {
3814 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3815 
3816 	if (ssmp->ecache_ksp != NULL) {
3817 		kstat_delete(ssmp->ecache_ksp);
3818 		ssmp->ecache_ksp = NULL;
3819 	}
3820 
3821 	/*
3822 	 * un-initialize bookkeeping for cache scrubbing
3823 	 */
3824 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3825 
3826 	cpu_idle_ecache_scrub(cp);
3827 }
3828 
3829 struct kmem_cache *sf_private_cache;
3830 
3831 /*
3832  * Cpu private initialization.  This includes allocating the cpu_private
3833  * data structure, initializing it, and initializing the scrubber for this
3834  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3835  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3836  * We use kmem_cache_create for the spitfire private data structure because it
3837  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3838  */
3839 void
3840 cpu_init_private(struct cpu *cp)
3841 {
3842 	spitfire_private_t *sfprp;
3843 
3844 	ASSERT(CPU_PRIVATE(cp) == NULL);
3845 
3846 	/*
3847 	 * If the sf_private_cache has not been created, create it.
3848 	 */
3849 	if (sf_private_cache == NULL) {
3850 		sf_private_cache = kmem_cache_create("sf_private_cache",
3851 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3852 			NULL, NULL, NULL, NULL, 0);
3853 		ASSERT(sf_private_cache);
3854 	}
3855 
3856 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3857 
3858 	bzero(sfprp, sizeof (spitfire_private_t));
3859 
3860 	cpu_init_ecache_scrub_dr(cp);
3861 }
3862 
3863 /*
3864  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3865  * deallocate the scrubber data structures and cpu_private data structure.
3866  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3867  * the scrubber for the specified cpu.
3868  */
3869 void
3870 cpu_uninit_private(struct cpu *cp)
3871 {
3872 	ASSERT(CPU_PRIVATE(cp));
3873 
3874 	cpu_uninit_ecache_scrub_dr(cp);
3875 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3876 	CPU_PRIVATE(cp) = NULL;
3877 }
3878 
3879 /*
3880  * initialize the ecache kstats for each cpu
3881  */
3882 static void
3883 ecache_kstat_init(struct cpu *cp)
3884 {
3885 	struct kstat *ksp;
3886 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3887 
3888 	ASSERT(ssmp != NULL);
3889 
3890 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3891 	    KSTAT_TYPE_NAMED,
3892 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3893 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3894 		ssmp->ecache_ksp = NULL;
3895 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3896 		return;
3897 	}
3898 
3899 	ssmp->ecache_ksp = ksp;
3900 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3901 	kstat_install(ksp);
3902 }
3903 
3904 /*
3905  * log the bad ecache information
3906  */
3907 static void
3908 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3909 		uint64_t afsr)
3910 {
3911 	spitf_async_flt spf_flt;
3912 	struct async_flt *aflt;
3913 	int i;
3914 	char *class;
3915 
3916 	bzero(&spf_flt, sizeof (spitf_async_flt));
3917 	aflt = &spf_flt.cmn_asyncflt;
3918 
3919 	for (i = 0; i < 8; i++) {
3920 		spf_flt.flt_ec_data[i] = ec_data[i];
3921 	}
3922 
3923 	spf_flt.flt_ec_tag = ec_tag;
3924 
3925 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3926 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3927 	} else spf_flt.flt_type = (ushort_t)mpb;
3928 
3929 	aflt->flt_inst = CPU->cpu_id;
3930 	aflt->flt_class = CPU_FAULT;
3931 	aflt->flt_id = gethrtime_waitfree();
3932 	aflt->flt_addr = paddr;
3933 	aflt->flt_stat = afsr;
3934 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3935 
3936 	switch (mpb) {
3937 	case CPU_ECACHE_TAG_ERR:
3938 	case CPU_ECACHE_ADDR_PAR_ERR:
3939 	case CPU_ECACHE_ETP_ETS_ERR:
3940 	case CPU_ECACHE_STATE_ERR:
3941 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3942 		break;
3943 	default:
3944 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3945 		break;
3946 	}
3947 
3948 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3949 	    ue_queue, aflt->flt_panic);
3950 
3951 	if (aflt->flt_panic)
3952 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3953 					"line detected");
3954 }
3955 
3956 /*
3957  * Process an ecache error that occured during the E$ scrubbing.
3958  * We do the ecache scan to find the bad line, flush the bad line
3959  * and start the memscrubber to find any UE (in memory or in another cache)
3960  */
3961 static uint64_t
3962 ecache_scrub_misc_err(int type, uint64_t afsr)
3963 {
3964 	spitf_async_flt spf_flt;
3965 	struct async_flt *aflt;
3966 	uint64_t oafsr;
3967 
3968 	bzero(&spf_flt, sizeof (spitf_async_flt));
3969 	aflt = &spf_flt.cmn_asyncflt;
3970 
3971 	/*
3972 	 * Scan each line in the cache to look for the one
3973 	 * with bad parity
3974 	 */
3975 	aflt->flt_addr = AFLT_INV_ADDR;
3976 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3977 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3978 
3979 	if (oafsr & P_AFSR_CP) {
3980 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3981 		*cp_afsr |= oafsr;
3982 	}
3983 
3984 	/*
3985 	 * If we found a bad PA, update the state to indicate if it is
3986 	 * memory or I/O space.
3987 	 */
3988 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3989 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3990 			MMU_PAGESHIFT)) ? 1 : 0;
3991 	}
3992 
3993 	spf_flt.flt_type = (ushort_t)type;
3994 
3995 	aflt->flt_inst = CPU->cpu_id;
3996 	aflt->flt_class = CPU_FAULT;
3997 	aflt->flt_id = gethrtime_waitfree();
3998 	aflt->flt_status = afsr;
3999 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
4000 
4001 	/*
4002 	 * We have the bad line, flush that line and start
4003 	 * the memscrubber.
4004 	 */
4005 	if (spf_flt.flt_ec_lcnt > 0) {
4006 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
4007 			cpunodes[CPU->cpu_id].ecache_size);
4008 		read_all_memscrub = 1;
4009 		memscrub_run();
4010 	}
4011 
4012 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
4013 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
4014 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
4015 
4016 	return (oafsr);
4017 }
4018 
4019 static void
4020 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
4021 {
4022 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
4023 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
4024 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
4025 	uint64_t ec_tag, paddr, oafsr;
4026 	ec_data_t ec_data[8];
4027 	int cpuid = CPU->cpu_id;
4028 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4029 						ecache_associativity;
4030 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4031 
4032 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4033 			&oafsr, cpu_afsr);
4034 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4035 						(index % ec_set_size);
4036 
4037 	/*
4038 	 * E$ tag state has good parity
4039 	 */
4040 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4041 		if (afsr_ets & cpu_ec_parity) {
4042 			/*
4043 			 * E$ tag state bits indicate the line is clean,
4044 			 * invalidate the E$ tag and continue.
4045 			 */
4046 			if (!(state & cpu_ec_state_dirty)) {
4047 				/*
4048 				 * Zero the tag and mark the state invalid
4049 				 * with good parity for the tag.
4050 				 */
4051 				if (isus2i || isus2e)
4052 					write_hb_ec_tag_parity(index);
4053 				else
4054 					write_ec_tag_parity(index);
4055 
4056 				/* Sync with the dual tag */
4057 				flushecacheline(0,
4058 					cpunodes[CPU->cpu_id].ecache_size);
4059 				ec_ksp->tags_cleared.value.ul++;
4060 				ecache_scrub_log(ec_data, ec_tag, paddr,
4061 					CPU_ECACHE_TAG_ERR, afsr);
4062 				return;
4063 			} else {
4064 				ecache_scrub_log(ec_data, ec_tag, paddr,
4065 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4066 				cmn_err(CE_PANIC, " E$ tag address has bad"
4067 							" parity");
4068 			}
4069 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4070 			/*
4071 			 * ETS is zero but ETP is set
4072 			 */
4073 			ecache_scrub_log(ec_data, ec_tag, paddr,
4074 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4075 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4076 				" AFSR.ETS is zero");
4077 		}
4078 	} else {
4079 		/*
4080 		 * E$ tag state bit has a bad parity
4081 		 */
4082 		ecache_scrub_log(ec_data, ec_tag, paddr,
4083 				CPU_ECACHE_STATE_ERR, afsr);
4084 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4085 	}
4086 }
4087 
4088 static void
4089 ecache_page_retire(void *arg)
4090 {
4091 	uint64_t paddr = (uint64_t)arg;
4092 	page_t *pp = page_numtopp_nolock((pfn_t)(paddr >> MMU_PAGESHIFT));
4093 
4094 	if (pp) {
4095 		page_settoxic(pp, PAGE_IS_FAULTY);
4096 		(void) page_retire(pp, PAGE_IS_TOXIC);
4097 	}
4098 }
4099 
4100 void
4101 sticksync_slave(void)
4102 {}
4103 
4104 void
4105 sticksync_master(void)
4106 {}
4107 
4108 /*ARGSUSED*/
4109 void
4110 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4111 {}
4112 
4113 void
4114 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4115 {
4116 	int status;
4117 	ddi_fm_error_t de;
4118 
4119 	bzero(&de, sizeof (ddi_fm_error_t));
4120 
4121 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4122 	    FM_ENA_FMT1);
4123 	de.fme_flag = expected;
4124 	de.fme_bus_specific = (void *)aflt->flt_addr;
4125 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4126 
4127 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4128 		aflt->flt_panic = 1;
4129 }
4130 
4131 /*ARGSUSED*/
4132 void
4133 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4134     errorq_t *eqp, uint_t flag)
4135 {
4136 	struct async_flt *aflt = (struct async_flt *)payload;
4137 
4138 	aflt->flt_erpt_class = error_class;
4139 	errorq_dispatch(eqp, payload, payload_sz, flag);
4140 }
4141 
4142 #define	MAX_SIMM	8
4143 
4144 struct ce_info {
4145 	char    name[UNUM_NAMLEN];
4146 	uint64_t intermittent_total;
4147 	uint64_t persistent_total;
4148 	uint64_t sticky_total;
4149 	unsigned short leaky_bucket_cnt;
4150 };
4151 
4152 /*
4153  * Separately-defined structure for use in reporting the ce_info
4154  * to SunVTS without exposing the internal layout and implementation
4155  * of struct ce_info.
4156  */
4157 static struct ecc_error_info ecc_error_info_data = {
4158 	{ "version", KSTAT_DATA_UINT32 },
4159 	{ "maxcount", KSTAT_DATA_UINT32 },
4160 	{ "count", KSTAT_DATA_UINT32 }
4161 };
4162 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4163     sizeof (struct kstat_named);
4164 
4165 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4166 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4167 #endif
4168 
4169 struct ce_info  *mem_ce_simm = NULL;
4170 size_t mem_ce_simm_size = 0;
4171 
4172 /*
4173  * Default values for the number of CE's allowed per interval.
4174  * Interval is defined in minutes
4175  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4176  */
4177 #define	SOFTERR_LIMIT_DEFAULT		2
4178 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4179 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4180 #define	TIMEOUT_NONE			((timeout_id_t)0)
4181 #define	TIMEOUT_SET			((timeout_id_t)1)
4182 
4183 /*
4184  * timeout identifer for leaky_bucket
4185  */
4186 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4187 
4188 /*
4189  * Tunables for maximum number of allowed CE's in a given time
4190  */
4191 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4192 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4193 
4194 void
4195 cpu_mp_init(void)
4196 {
4197 	size_t size = cpu_aflt_size();
4198 	size_t i;
4199 	kstat_t *ksp;
4200 
4201 	/*
4202 	 * Initialize the CE error handling buffers.
4203 	 */
4204 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4205 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4206 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4207 
4208 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4209 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4210 	if (ksp != NULL) {
4211 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4212 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4213 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4214 		ecc_error_info_data.count.value.ui32 = 0;
4215 		kstat_install(ksp);
4216 	}
4217 
4218 	for (i = 0; i < mem_ce_simm_size; i++) {
4219 		struct kstat_ecc_mm_info *kceip;
4220 
4221 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4222 		    KM_SLEEP);
4223 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4224 		    KSTAT_TYPE_NAMED,
4225 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4226 		    KSTAT_FLAG_VIRTUAL);
4227 		if (ksp != NULL) {
4228 			/*
4229 			 * Re-declare ks_data_size to include room for the
4230 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4231 			 * set.
4232 			 */
4233 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4234 			    KSTAT_CE_UNUM_NAMLEN;
4235 			ksp->ks_data = kceip;
4236 			kstat_named_init(&kceip->name,
4237 			    "name", KSTAT_DATA_STRING);
4238 			kstat_named_init(&kceip->intermittent_total,
4239 			    "intermittent_total", KSTAT_DATA_UINT64);
4240 			kstat_named_init(&kceip->persistent_total,
4241 			    "persistent_total", KSTAT_DATA_UINT64);
4242 			kstat_named_init(&kceip->sticky_total,
4243 			    "sticky_total", KSTAT_DATA_UINT64);
4244 			/*
4245 			 * Use the default snapshot routine as it knows how to
4246 			 * deal with named kstats with long strings.
4247 			 */
4248 			ksp->ks_update = ecc_kstat_update;
4249 			kstat_install(ksp);
4250 		} else {
4251 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4252 		}
4253 	}
4254 }
4255 
4256 /*ARGSUSED*/
4257 static void
4258 leaky_bucket_timeout(void *arg)
4259 {
4260 	int i;
4261 	struct ce_info *psimm = mem_ce_simm;
4262 
4263 	for (i = 0; i < mem_ce_simm_size; i++) {
4264 		if (psimm[i].leaky_bucket_cnt > 0)
4265 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4266 	}
4267 	add_leaky_bucket_timeout();
4268 }
4269 
4270 static void
4271 add_leaky_bucket_timeout(void)
4272 {
4273 	long timeout_in_microsecs;
4274 
4275 	/*
4276 	 * create timeout for next leak.
4277 	 *
4278 	 * The timeout interval is calculated as follows
4279 	 *
4280 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4281 	 *
4282 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4283 	 * in a minute), then multiply this by MICROSEC to get the interval
4284 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4285 	 * the timeout interval is accurate to within a few microseconds.
4286 	 */
4287 
4288 	if (ecc_softerr_limit <= 0)
4289 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4290 	if (ecc_softerr_interval <= 0)
4291 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4292 
4293 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4294 	    ecc_softerr_limit;
4295 
4296 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4297 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4298 
4299 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4300 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4301 }
4302 
4303 /*
4304  * Legacy Correctable ECC Error Hash
4305  *
4306  * All of the code below this comment is used to implement a legacy array
4307  * which counted intermittent, persistent, and sticky CE errors by unum,
4308  * and then was later extended to publish the data as a kstat for SunVTS.
4309  * All of this code is replaced by FMA, and remains here until such time
4310  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4311  *
4312  * Errors are saved in three buckets per-unum:
4313  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4314  *     This could represent a problem, and is immediately printed out.
4315  * (2) persistent - was successfully scrubbed
4316  *     These errors use the leaky bucket algorithm to determine
4317  *     if there is a serious problem.
4318  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4319  *     and does not necessarily indicate any problem with the dimm itself,
4320  *     is critical information for debugging new hardware.
4321  *     Because we do not know if it came from the dimm, it would be
4322  *     inappropriate to include these in the leaky bucket counts.
4323  *
4324  * If the E$ line was modified before the scrub operation began, then the
4325  * displacement flush at the beginning of scrubphys() will cause the modified
4326  * line to be written out, which will clean up the CE.  Then, any subsequent
4327  * read will not cause an error, which will cause persistent errors to be
4328  * identified as intermittent.
4329  *
4330  * If a DIMM is going bad, it will produce true persistents as well as
4331  * false intermittents, so these intermittents can be safely ignored.
4332  *
4333  * If the error count is excessive for a DIMM, this function will return
4334  * PAGE_IS_FAILING, and the CPU module may then decide to remove that page
4335  * from use.
4336  */
4337 static int
4338 ce_count_unum(int status, int len, char *unum)
4339 {
4340 	int i;
4341 	struct ce_info *psimm = mem_ce_simm;
4342 	int page_status = PAGE_IS_OK;
4343 
4344 	ASSERT(psimm != NULL);
4345 
4346 	if (len <= 0 ||
4347 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4348 		return (page_status);
4349 
4350 	/*
4351 	 * Initialize the leaky_bucket timeout
4352 	 */
4353 	if (casptr(&leaky_bucket_timeout_id,
4354 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4355 		add_leaky_bucket_timeout();
4356 
4357 	for (i = 0; i < mem_ce_simm_size; i++) {
4358 		if (psimm[i].name[0] == '\0') {
4359 			/*
4360 			 * Hit the end of the valid entries, add
4361 			 * a new one.
4362 			 */
4363 			(void) strncpy(psimm[i].name, unum, len);
4364 			if (status & ECC_STICKY) {
4365 				/*
4366 				 * Sticky - the leaky bucket is used to track
4367 				 * soft errors.  Since a sticky error is a
4368 				 * hard error and likely to be retired soon,
4369 				 * we do not count it in the leaky bucket.
4370 				 */
4371 				psimm[i].leaky_bucket_cnt = 0;
4372 				psimm[i].intermittent_total = 0;
4373 				psimm[i].persistent_total = 0;
4374 				psimm[i].sticky_total = 1;
4375 				cmn_err(CE_WARN,
4376 				    "[AFT0] Sticky Softerror encountered "
4377 				    "on Memory Module %s\n", unum);
4378 				page_status = PAGE_IS_FAILING;
4379 			} else if (status & ECC_PERSISTENT) {
4380 				psimm[i].leaky_bucket_cnt = 1;
4381 				psimm[i].intermittent_total = 0;
4382 				psimm[i].persistent_total = 1;
4383 				psimm[i].sticky_total = 0;
4384 			} else {
4385 				/*
4386 				 * Intermittent - Because the scrub operation
4387 				 * cannot find the error in the DIMM, we will
4388 				 * not count these in the leaky bucket
4389 				 */
4390 				psimm[i].leaky_bucket_cnt = 0;
4391 				psimm[i].intermittent_total = 1;
4392 				psimm[i].persistent_total = 0;
4393 				psimm[i].sticky_total = 0;
4394 			}
4395 			ecc_error_info_data.count.value.ui32++;
4396 			break;
4397 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4398 			/*
4399 			 * Found an existing entry for the current
4400 			 * memory module, adjust the counts.
4401 			 */
4402 			if (status & ECC_STICKY) {
4403 				psimm[i].sticky_total++;
4404 				cmn_err(CE_WARN,
4405 				    "[AFT0] Sticky Softerror encountered "
4406 				    "on Memory Module %s\n", unum);
4407 				page_status = PAGE_IS_FAILING;
4408 			} else if (status & ECC_PERSISTENT) {
4409 				int new_value;
4410 
4411 				new_value = atomic_add_16_nv(
4412 				    &psimm[i].leaky_bucket_cnt, 1);
4413 				psimm[i].persistent_total++;
4414 				if (new_value > ecc_softerr_limit) {
4415 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4416 					    " soft errors from Memory Module"
4417 					    " %s exceed threshold (N=%d,"
4418 					    " T=%dh:%02dm) triggering page"
4419 					    " retire", new_value, unum,
4420 					    ecc_softerr_limit,
4421 					    ecc_softerr_interval / 60,
4422 					    ecc_softerr_interval % 60);
4423 					atomic_add_16(
4424 					    &psimm[i].leaky_bucket_cnt, -1);
4425 					page_status = PAGE_IS_FAILING;
4426 				}
4427 			} else { /* Intermittent */
4428 				psimm[i].intermittent_total++;
4429 			}
4430 			break;
4431 		}
4432 	}
4433 
4434 	if (i >= mem_ce_simm_size)
4435 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4436 		    "space.\n");
4437 
4438 	return (page_status);
4439 }
4440 
4441 /*
4442  * Function to support counting of IO detected CEs.
4443  */
4444 void
4445 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4446 {
4447 	if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING &&
4448 	    automatic_page_removal) {
4449 		page_t *pp = page_numtopp_nolock((pfn_t)
4450 		    (ecc->flt_addr >> MMU_PAGESHIFT));
4451 
4452 		if (pp) {
4453 			page_settoxic(pp, PAGE_IS_FAULTY);
4454 			(void) page_retire(pp, PAGE_IS_FAILING);
4455 		}
4456 	}
4457 }
4458 
4459 static int
4460 ecc_kstat_update(kstat_t *ksp, int rw)
4461 {
4462 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4463 	struct ce_info *ceip = mem_ce_simm;
4464 	int i = ksp->ks_instance;
4465 
4466 	if (rw == KSTAT_WRITE)
4467 		return (EACCES);
4468 
4469 	ASSERT(ksp->ks_data != NULL);
4470 	ASSERT(i < mem_ce_simm_size && i >= 0);
4471 
4472 	/*
4473 	 * Since we're not using locks, make sure that we don't get partial
4474 	 * data. The name is always copied before the counters are incremented
4475 	 * so only do this update routine if at least one of the counters is
4476 	 * non-zero, which ensures that ce_count_unum() is done, and the
4477 	 * string is fully copied.
4478 	 */
4479 	if (ceip[i].intermittent_total == 0 &&
4480 	    ceip[i].persistent_total == 0 &&
4481 	    ceip[i].sticky_total == 0) {
4482 		/*
4483 		 * Uninitialized or partially initialized. Ignore.
4484 		 * The ks_data buffer was allocated via kmem_zalloc,
4485 		 * so no need to bzero it.
4486 		 */
4487 		return (0);
4488 	}
4489 
4490 	kstat_named_setstr(&kceip->name, ceip[i].name);
4491 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4492 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4493 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4494 
4495 	return (0);
4496 }
4497 
4498 #define	VIS_BLOCKSIZE		64
4499 
4500 int
4501 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4502 {
4503 	int ret, watched;
4504 
4505 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4506 	ret = dtrace_blksuword32(addr, data, 0);
4507 	if (watched)
4508 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4509 
4510 	return (ret);
4511 }
4512 
4513 /*ARGSUSED*/
4514 void
4515 cpu_faulted_enter(struct cpu *cp)
4516 {
4517 }
4518 
4519 /*ARGSUSED*/
4520 void
4521 cpu_faulted_exit(struct cpu *cp)
4522 {
4523 }
4524 
4525 static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
4526 	(1 << TTE32M) | (1 << TTE256M));
4527 static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
4528 
4529 /*
4530  * The function returns the US_II mmu-specific values for the
4531  * hat's disable_large_pages and disable_ism_large_pages variables.
4532  */
4533 int
4534 mmu_large_pages_disabled(uint_t flag)
4535 {
4536 	int pages_disable = 0;
4537 
4538 	if (flag == HAT_LOAD) {
4539 		pages_disable = mmu_disable_large_pages;
4540 	} else if (flag == HAT_LOAD_SHARE) {
4541 		pages_disable = mmu_disable_ism_large_pages;
4542 	}
4543 	return (pages_disable);
4544 }
4545 
4546 /*ARGSUSED*/
4547 void
4548 mmu_init_kernel_pgsz(struct hat *hat)
4549 {
4550 }
4551 
4552 size_t
4553 mmu_get_kernel_lpsize(size_t lpsize)
4554 {
4555 	uint_t tte;
4556 
4557 	if (lpsize == 0) {
4558 		/* no setting for segkmem_lpsize in /etc/system: use default */
4559 		return (MMU_PAGESIZE4M);
4560 	}
4561 
4562 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4563 		if (lpsize == TTEBYTES(tte))
4564 			return (lpsize);
4565 	}
4566 
4567 	return (TTEBYTES(TTE8K));
4568 }
4569