xref: /titanic_52/usr/src/uts/sun4u/cpu/spitfire.c (revision 25293bed88d38c68b38433904649010d0103e8c5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machparam.h>
32 #include <sys/machsystm.h>
33 #include <sys/cpu.h>
34 #include <sys/elf_SPARC.h>
35 #include <vm/hat_sfmmu.h>
36 #include <vm/page.h>
37 #include <vm/vm_dep.h>
38 #include <sys/cpuvar.h>
39 #include <sys/spitregs.h>
40 #include <sys/async.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/dditypes.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpu_module.h>
46 #include <sys/prom_debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sysmacros.h>
50 #include <sys/intreg.h>
51 #include <sys/machtrap.h>
52 #include <sys/ontrap.h>
53 #include <sys/ivintr.h>
54 #include <sys/atomic.h>
55 #include <sys/panic.h>
56 #include <sys/ndifm.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/fm/util.h>
59 #include <sys/fm/cpu/UltraSPARC-II.h>
60 #include <sys/ddi.h>
61 #include <sys/ecc_kstat.h>
62 #include <sys/watchpoint.h>
63 #include <sys/dtrace.h>
64 #include <sys/errclassify.h>
65 
66 uint_t	cpu_impl_dual_pgsz = 0;
67 
68 /*
69  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71  */
72 typedef struct sf_ec_data_elm {
73 	uint64_t ec_d8;
74 	uint64_t ec_afsr;
75 } ec_data_t;
76 
77 /*
78  * Define spitfire (Ultra I/II) specific asynchronous error structure
79  */
80 typedef struct spitfire_async_flt {
81 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82 	ushort_t flt_type;		/* types of faults - cpu specific */
83 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84 	uint64_t flt_ec_tag;		/* E$ tag info */
85 	int flt_ec_lcnt;		/* number of bad E$ lines */
86 	ushort_t flt_sdbh;		/* UDBH reg */
87 	ushort_t flt_sdbl;		/* UDBL reg */
88 } spitf_async_flt;
89 
90 /*
91  * Prototypes for support routines in spitfire_asm.s:
92  */
93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94 extern uint64_t get_lsu(void);
95 extern void set_lsu(uint64_t ncc);
96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97 				uint64_t *oafsr, uint64_t *acc_afsr);
98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100 				uint64_t *acc_afsr);
101 extern uint64_t read_and_clear_afsr();
102 extern void write_ec_tag_parity(uint32_t id);
103 extern void write_hb_ec_tag_parity(uint32_t id);
104 
105 /*
106  * Spitfire module routines:
107  */
108 static void cpu_async_log_err(void *flt);
109 /*PRINTFLIKE6*/
110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111     uint_t logflags, const char *endstr, const char *fmt, ...);
112 
113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116 
117 static void log_ce_err(struct async_flt *aflt, char *unum);
118 static void log_ue_err(struct async_flt *aflt, char *unum);
119 static void check_misc_err(spitf_async_flt *spf_flt);
120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121 static int check_ecc(struct async_flt *aflt);
122 static uint_t get_cpu_status(uint64_t arg);
123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125 		int *m, uint64_t *afsr);
126 static void ecache_kstat_init(struct cpu *cp);
127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128 		uint64_t paddr, int mpb, uint64_t);
129 static uint64_t ecache_scrub_misc_err(int, uint64_t);
130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131 static void ecache_page_retire(void *);
132 static int ecc_kstat_update(kstat_t *ksp, int rw);
133 static int ce_count_unum(int status, int len, char *unum);
134 static void add_leaky_bucket_timeout(void);
135 static int synd_to_synd_code(int synd_status, ushort_t synd);
136 
137 extern uint_t read_all_memscrub;
138 extern void memscrub_run(void);
139 
140 static uchar_t	isus2i;			/* set if sabre */
141 static uchar_t	isus2e;			/* set if hummingbird */
142 
143 /*
144  * Default ecache mask and shift settings for Spitfire.  If we detect a
145  * different CPU implementation, we will modify these values at boot time.
146  */
147 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157 
158 /*
159  * Default ecache state bits for Spitfire.  These individual bits indicate if
160  * the given line is in any of the valid or modified states, respectively.
161  * Again, we modify these at boot if we detect a different CPU.
162  */
163 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165 static uchar_t cpu_ec_parity		= S_EC_PARITY;
166 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167 
168 /*
169  * This table is used to determine which bit(s) is(are) bad when an ECC
170  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171  * of this array have the following semantics:
172  *
173  *      00-63   The number of the bad bit, when only one bit is bad.
174  *      64      ECC bit C0 is bad.
175  *      65      ECC bit C1 is bad.
176  *      66      ECC bit C2 is bad.
177  *      67      ECC bit C3 is bad.
178  *      68      ECC bit C4 is bad.
179  *      69      ECC bit C5 is bad.
180  *      70      ECC bit C6 is bad.
181  *      71      ECC bit C7 is bad.
182  *      72      Two bits are bad.
183  *      73      Three bits are bad.
184  *      74      Four bits are bad.
185  *      75      More than Four bits are bad.
186  *      76      NO bits are bad.
187  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188  */
189 
190 #define	C0	64
191 #define	C1	65
192 #define	C2	66
193 #define	C3	67
194 #define	C4	68
195 #define	C5	69
196 #define	C6	70
197 #define	C7	71
198 #define	M2	72
199 #define	M3	73
200 #define	M4	74
201 #define	MX	75
202 #define	NA	76
203 
204 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205 						    (synd_code < C0))
206 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207 						    (synd_code <= C7))
208 
209 static char ecc_syndrome_tab[] =
210 {
211 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227 };
228 
229 #define	SYND_TBL_SIZE 256
230 
231 /*
232  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234  */
235 #define	UDBL_REG	0x8000
236 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237 #define	SYND(synd)	(synd & 0x7FFF)
238 
239 /*
240  * These error types are specific to Spitfire and are used internally for the
241  * spitfire fault structure flt_type field.
242  */
243 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245 #define	CPU_WP_ERR		2	/* WP parity error */
246 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259 
260 /*
261  * Macro to access the "Spitfire cpu private" data structure.
262  */
263 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264 
265 /*
266  * set to 0 to disable automatic retiring of pages on
267  * DIMMs that have excessive soft errors
268  */
269 int automatic_page_removal = 1;
270 
271 /*
272  * Heuristic for figuring out which module to replace.
273  * Relative likelihood that this P_SYND indicates that this module is bad.
274  * We call it a "score", though, not a relative likelihood.
275  *
276  * Step 1.
277  * Assign a score to each byte of P_SYND according to the following rules:
278  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279  * If one bit on, give it a 95.
280  * If seven bits on, give it a 10.
281  * If two bits on:
282  *   in different nybbles, a 90
283  *   in same nybble, but unaligned, 85
284  *   in same nybble and as an aligned pair, 80
285  * If six bits on, look at the bits that are off:
286  *   in same nybble and as an aligned pair, 15
287  *   in same nybble, but unaligned, 20
288  *   in different nybbles, a 25
289  * If three bits on:
290  *   in diferent nybbles, no aligned pairs, 75
291  *   in diferent nybbles, one aligned pair, 70
292  *   in the same nybble, 65
293  * If five bits on, look at the bits that are off:
294  *   in the same nybble, 30
295  *   in diferent nybbles, one aligned pair, 35
296  *   in diferent nybbles, no aligned pairs, 40
297  * If four bits on:
298  *   all in one nybble, 45
299  *   as two aligned pairs, 50
300  *   one aligned pair, 55
301  *   no aligned pairs, 60
302  *
303  * Step 2:
304  * Take the higher of the two scores (one for each byte) as the score
305  * for the module.
306  *
307  * Print the score for each module, and field service should replace the
308  * module with the highest score.
309  */
310 
311 /*
312  * In the table below, the first row/column comment indicates the
313  * number of bits on in that nybble; the second row/column comment is
314  * the hex digit.
315  */
316 
317 static int
318 p_synd_score_table[256] = {
319 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337 };
338 
339 int
340 ecc_psynd_score(ushort_t p_synd)
341 {
342 	int i, j, a, b;
343 
344 	i = p_synd & 0xFF;
345 	j = (p_synd >> 8) & 0xFF;
346 
347 	a = p_synd_score_table[i];
348 	b = p_synd_score_table[j];
349 
350 	return (a > b ? a : b);
351 }
352 
353 /*
354  * Async Fault Logging
355  *
356  * To ease identifying, reading, and filtering async fault log messages, the
357  * label [AFT#] is now prepended to each async fault message.  These messages
358  * and the logging rules are implemented by cpu_aflt_log(), below.
359  *
360  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361  *          This includes both corrected ECC memory and ecache faults.
362  *
363  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364  *          else except CE errors) with a priority of 1 (highest).  This tag
365  *          is also used for panic messages that result from an async fault.
366  *
367  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369  *          of the E-$ data and tags.
370  *
371  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372  * printed on the console.  To send all AFT logs to both the log and the
373  * console, set aft_verbose = 1.
374  */
375 
376 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378 #define	CPU_ERRID		0x0004	/* print flt_id */
379 #define	CPU_TL			0x0008	/* print flt_tl */
380 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389 
390 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393 				CPU_FAULTPC)
394 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396 				~CPU_SPACE)
397 #define	PARERR_LFLAGS	(CMN_LFLAGS)
398 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400 				~CPU_FLTCPU & ~CPU_FAULTPC)
401 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402 #define	NO_LFLAGS	(0)
403 
404 #define	AFSR_FMTSTR0	"\020\1ME"
405 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407 #define	UDB_FMTSTR	"\020\012UE\011CE"
408 
409 /*
410  * Save the cache bootup state for use when internal
411  * caches are to be re-enabled after an error occurs.
412  */
413 uint64_t	cache_boot_state = 0;
414 
415 /*
416  * PA[31:0] represent Displacement in UPA configuration space.
417  */
418 uint_t	root_phys_addr_lo_mask = 0xffffffff;
419 
420 /*
421  * Spitfire legacy globals
422  */
423 int	itlb_entries;
424 int	dtlb_entries;
425 
426 void
427 cpu_setup(void)
428 {
429 	extern int page_retire_messages;
430 	extern int page_retire_first_ue;
431 	extern int at_flags;
432 #if defined(SF_ERRATA_57)
433 	extern caddr_t errata57_limit;
434 #endif
435 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
436 
437 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
438 
439 	/*
440 	 * Spitfire isn't currently FMA-aware, so we have to enable the
441 	 * page retirement messages. We also change the default policy
442 	 * for UE retirement to allow clearing of transient errors.
443 	 */
444 	page_retire_messages = 1;
445 	page_retire_first_ue = 0;
446 
447 	/*
448 	 * save the cache bootup state.
449 	 */
450 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
451 
452 	if (use_page_coloring) {
453 		do_pg_coloring = 1;
454 		if (use_virtual_coloring)
455 			do_virtual_coloring = 1;
456 	}
457 
458 	/*
459 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
460 	 */
461 	pp_slots = MIN(8, MAXPP_SLOTS);
462 
463 	/*
464 	 * Block stores invalidate all pages of the d$ so pagecopy
465 	 * et. al. do not need virtual translations with virtual
466 	 * coloring taken into consideration.
467 	 */
468 	pp_consistent_coloring = 0;
469 
470 	isa_list =
471 	    "sparcv9+vis sparcv9 "
472 	    "sparcv8plus+vis sparcv8plus "
473 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
474 
475 	cpu_hwcap_flags = AV_SPARC_VIS;
476 
477 	/*
478 	 * On Spitfire, there's a hole in the address space
479 	 * that we must never map (the hardware only support 44-bits of
480 	 * virtual address).  Later CPUs are expected to have wider
481 	 * supported address ranges.
482 	 *
483 	 * See address map on p23 of the UltraSPARC 1 user's manual.
484 	 */
485 	hole_start = (caddr_t)0x80000000000ull;
486 	hole_end = (caddr_t)0xfffff80000000000ull;
487 
488 	/*
489 	 * A spitfire call bug requires us to be a further 4Gbytes of
490 	 * firewall from the spec.
491 	 *
492 	 * See Spitfire Errata #21
493 	 */
494 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
495 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
496 
497 	/*
498 	 * The kpm mapping window.
499 	 * kpm_size:
500 	 *	The size of a single kpm range.
501 	 *	The overall size will be: kpm_size * vac_colors.
502 	 * kpm_vbase:
503 	 *	The virtual start address of the kpm range within the kernel
504 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
505 	 */
506 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
507 	kpm_size_shift = 41;
508 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
509 
510 #if defined(SF_ERRATA_57)
511 	errata57_limit = (caddr_t)0x80000000ul;
512 #endif
513 
514 	/*
515 	 * Disable text by default.
516 	 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
517 	 */
518 	max_utext_lpsize = MMU_PAGESIZE;
519 }
520 
521 static int
522 getintprop(pnode_t node, char *name, int deflt)
523 {
524 	int	value;
525 
526 	switch (prom_getproplen(node, name)) {
527 	case 0:
528 		value = 1;	/* boolean properties */
529 		break;
530 
531 	case sizeof (int):
532 		(void) prom_getprop(node, name, (caddr_t)&value);
533 		break;
534 
535 	default:
536 		value = deflt;
537 		break;
538 	}
539 
540 	return (value);
541 }
542 
543 /*
544  * Set the magic constants of the implementation.
545  */
546 void
547 cpu_fiximp(pnode_t dnode)
548 {
549 	extern int vac_size, vac_shift;
550 	extern uint_t vac_mask;
551 	extern int dcache_line_mask;
552 	int i, a;
553 	static struct {
554 		char	*name;
555 		int	*var;
556 	} prop[] = {
557 		"dcache-size",		&dcache_size,
558 		"dcache-line-size",	&dcache_linesize,
559 		"icache-size",		&icache_size,
560 		"icache-line-size",	&icache_linesize,
561 		"ecache-size",		&ecache_size,
562 		"ecache-line-size",	&ecache_alignsize,
563 		"ecache-associativity", &ecache_associativity,
564 		"#itlb-entries",	&itlb_entries,
565 		"#dtlb-entries",	&dtlb_entries,
566 		};
567 
568 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
569 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
570 			*prop[i].var = a;
571 		}
572 	}
573 
574 	ecache_setsize = ecache_size / ecache_associativity;
575 
576 	vac_size = S_VAC_SIZE;
577 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
578 	i = 0; a = vac_size;
579 	while (a >>= 1)
580 		++i;
581 	vac_shift = i;
582 	shm_alignment = vac_size;
583 	vac = 1;
584 
585 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
586 
587 	/*
588 	 * UltraSPARC I & II have ecache sizes running
589 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
590 	 * and 8 MB. Adjust the copyin/copyout limits
591 	 * according to the cache size. The magic number
592 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
593 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
594 	 * VIS instructions.
595 	 *
596 	 * We assume that all CPUs on the system have the same size
597 	 * ecache. We're also called very early in the game.
598 	 * /etc/system will be parsed *after* we're called so
599 	 * these values can be overwritten.
600 	 */
601 
602 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
603 	if (ecache_size <= 524288) {
604 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
605 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
606 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
607 	} else if (ecache_size == 1048576) {
608 		hw_copy_limit_2 = 1024;
609 		hw_copy_limit_4 = 1280;
610 		hw_copy_limit_8 = 1536;
611 	} else if (ecache_size == 2097152) {
612 		hw_copy_limit_2 = 1536;
613 		hw_copy_limit_4 = 2048;
614 		hw_copy_limit_8 = 2560;
615 	} else if (ecache_size == 4194304) {
616 		hw_copy_limit_2 = 2048;
617 		hw_copy_limit_4 = 2560;
618 		hw_copy_limit_8 = 3072;
619 	} else {
620 		hw_copy_limit_2 = 2560;
621 		hw_copy_limit_4 = 3072;
622 		hw_copy_limit_8 = 3584;
623 	}
624 }
625 
626 /*
627  * Called by setcpudelay
628  */
629 void
630 cpu_init_tick_freq(void)
631 {
632 	/*
633 	 * Determine the cpu frequency by calling
634 	 * tod_get_cpufrequency. Use an approximate freqency
635 	 * value computed by the prom if the tod module
636 	 * is not initialized and loaded yet.
637 	 */
638 	if (tod_ops.tod_get_cpufrequency != NULL) {
639 		mutex_enter(&tod_lock);
640 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
641 		mutex_exit(&tod_lock);
642 	} else {
643 #if defined(HUMMINGBIRD)
644 		/*
645 		 * the hummingbird version of %stick is used as the basis for
646 		 * low level timing; this provides an independent constant-rate
647 		 * clock for general system use, and frees power mgmt to set
648 		 * various cpu clock speeds.
649 		 */
650 		if (system_clock_freq == 0)
651 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
652 			    system_clock_freq);
653 		sys_tick_freq = system_clock_freq;
654 #else /* SPITFIRE */
655 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
656 #endif
657 	}
658 }
659 
660 
661 void shipit(int upaid);
662 extern uint64_t xc_tick_limit;
663 extern uint64_t xc_tick_jump_limit;
664 
665 #ifdef SEND_MONDO_STATS
666 uint64_t x_early[NCPU][64];
667 #endif
668 
669 /*
670  * Note: A version of this function is used by the debugger via the KDI,
671  * and must be kept in sync with this version.  Any changes made to this
672  * function to support new chips or to accomodate errata must also be included
673  * in the KDI-specific version.  See spitfire_kdi.c.
674  */
675 void
676 send_one_mondo(int cpuid)
677 {
678 	uint64_t idsr, starttick, endtick;
679 	int upaid, busy, nack;
680 	uint64_t tick, tick_prev;
681 	ulong_t ticks;
682 
683 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
684 	upaid = CPUID_TO_UPAID(cpuid);
685 	tick = starttick = gettick();
686 	shipit(upaid);
687 	endtick = starttick + xc_tick_limit;
688 	busy = nack = 0;
689 	for (;;) {
690 		idsr = getidsr();
691 		if (idsr == 0)
692 			break;
693 		/*
694 		 * When we detect an irregular tick jump, we adjust
695 		 * the timer window to the current tick value.
696 		 */
697 		tick_prev = tick;
698 		tick = gettick();
699 		ticks = tick - tick_prev;
700 		if (ticks > xc_tick_jump_limit) {
701 			endtick = tick + xc_tick_limit;
702 		} else if (tick > endtick) {
703 			if (panic_quiesce)
704 				return;
705 			cmn_err(CE_PANIC,
706 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
707 			upaid, nack, busy);
708 		}
709 		if (idsr & IDSR_BUSY) {
710 			busy++;
711 			continue;
712 		}
713 		drv_usecwait(1);
714 		shipit(upaid);
715 		nack++;
716 		busy = 0;
717 	}
718 #ifdef SEND_MONDO_STATS
719 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
720 #endif
721 }
722 
723 void
724 send_mondo_set(cpuset_t set)
725 {
726 	int i;
727 
728 	for (i = 0; i < NCPU; i++)
729 		if (CPU_IN_SET(set, i)) {
730 			send_one_mondo(i);
731 			CPUSET_DEL(set, i);
732 			if (CPUSET_ISNULL(set))
733 				break;
734 		}
735 }
736 
737 void
738 syncfpu(void)
739 {
740 }
741 
742 /*
743  * Determine the size of the CPU module's error structure in bytes.  This is
744  * called once during boot to initialize the error queues.
745  */
746 int
747 cpu_aflt_size(void)
748 {
749 	/*
750 	 * We need to determine whether this is a sabre, Hummingbird or a
751 	 * Spitfire/Blackbird impl and set the appropriate state variables for
752 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
753 	 * too early in the boot flow and the cpunodes are not initialized.
754 	 * This routine will be called once after cpunodes[] is ready, so do
755 	 * it here.
756 	 */
757 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
758 		isus2i = 1;
759 		cpu_ec_tag_mask = SB_ECTAG_MASK;
760 		cpu_ec_state_mask = SB_ECSTATE_MASK;
761 		cpu_ec_par_mask = SB_ECPAR_MASK;
762 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
763 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
764 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
765 		cpu_ec_state_exl = SB_ECSTATE_EXL;
766 		cpu_ec_state_mod = SB_ECSTATE_MOD;
767 
768 		/* These states do not exist in sabre - set to 0xFF */
769 		cpu_ec_state_shr = 0xFF;
770 		cpu_ec_state_own = 0xFF;
771 
772 		cpu_ec_state_valid = SB_ECSTATE_VALID;
773 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
774 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
775 		cpu_ec_parity = SB_EC_PARITY;
776 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
777 		isus2e = 1;
778 		cpu_ec_tag_mask = HB_ECTAG_MASK;
779 		cpu_ec_state_mask = HB_ECSTATE_MASK;
780 		cpu_ec_par_mask = HB_ECPAR_MASK;
781 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
782 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
783 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
784 		cpu_ec_state_exl = HB_ECSTATE_EXL;
785 		cpu_ec_state_mod = HB_ECSTATE_MOD;
786 
787 		/* These states do not exist in hummingbird - set to 0xFF */
788 		cpu_ec_state_shr = 0xFF;
789 		cpu_ec_state_own = 0xFF;
790 
791 		cpu_ec_state_valid = HB_ECSTATE_VALID;
792 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
793 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
794 		cpu_ec_parity = HB_EC_PARITY;
795 	}
796 
797 	return (sizeof (spitf_async_flt));
798 }
799 
800 
801 /*
802  * Correctable ecc error trap handler
803  */
804 /*ARGSUSED*/
805 void
806 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
807 	uint_t p_afsr_high, uint_t p_afar_high)
808 {
809 	ushort_t sdbh, sdbl;
810 	ushort_t e_syndh, e_syndl;
811 	spitf_async_flt spf_flt;
812 	struct async_flt *ecc;
813 	int queue = 1;
814 
815 	uint64_t t_afar = p_afar;
816 	uint64_t t_afsr = p_afsr;
817 
818 	/*
819 	 * Note: the Spitfire data buffer error registers
820 	 * (upper and lower halves) are or'ed into the upper
821 	 * word of the afsr by ce_err().
822 	 */
823 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
824 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
825 
826 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
827 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
828 
829 	t_afsr &= S_AFSR_MASK;
830 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
831 
832 	/* Setup the async fault structure */
833 	bzero(&spf_flt, sizeof (spitf_async_flt));
834 	ecc = (struct async_flt *)&spf_flt;
835 	ecc->flt_id = gethrtime_waitfree();
836 	ecc->flt_stat = t_afsr;
837 	ecc->flt_addr = t_afar;
838 	ecc->flt_status = ECC_C_TRAP;
839 	ecc->flt_bus_id = getprocessorid();
840 	ecc->flt_inst = CPU->cpu_id;
841 	ecc->flt_pc = (caddr_t)rp->r_pc;
842 	ecc->flt_func = log_ce_err;
843 	ecc->flt_in_memory =
844 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
845 	spf_flt.flt_sdbh = sdbh;
846 	spf_flt.flt_sdbl = sdbl;
847 
848 	/*
849 	 * Check for fatal conditions.
850 	 */
851 	check_misc_err(&spf_flt);
852 
853 	/*
854 	 * Pananoid checks for valid AFSR and UDBs
855 	 */
856 	if ((t_afsr & P_AFSR_CE) == 0) {
857 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
858 			"** Panic due to CE bit not set in the AFSR",
859 			"  Corrected Memory Error on");
860 	}
861 
862 	/*
863 	 * We want to skip logging only if ALL the following
864 	 * conditions are true:
865 	 *
866 	 *	1. There is only one error
867 	 *	2. That error is a correctable memory error
868 	 *	3. The error is caused by the memory scrubber (in which case
869 	 *	    the error will have occurred under on_trap protection)
870 	 *	4. The error is on a retired page
871 	 *
872 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
873 	 * However, none of those errors should occur on a retired page.
874 	 */
875 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
876 	    curthread->t_ontrap != NULL) {
877 
878 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
879 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
880 				queue = 0;
881 			}
882 		}
883 	}
884 
885 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
886 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
887 			"** Panic due to CE bits not set in the UDBs",
888 			" Corrected Memory Error on");
889 	}
890 
891 	if ((sdbh >> 8) & 1) {
892 		ecc->flt_synd = e_syndh;
893 		ce_scrub(ecc);
894 		if (queue) {
895 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
896 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
897 		}
898 	}
899 
900 	if ((sdbl >> 8) & 1) {
901 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
902 		ecc->flt_synd = e_syndl | UDBL_REG;
903 		ce_scrub(ecc);
904 		if (queue) {
905 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
906 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
907 		}
908 	}
909 
910 	/*
911 	 * Re-enable all error trapping (CEEN currently cleared).
912 	 */
913 	clr_datapath();
914 	set_asyncflt(P_AFSR_CE);
915 	set_error_enable(EER_ENABLE);
916 }
917 
918 /*
919  * Cpu specific CE logging routine
920  */
921 static void
922 log_ce_err(struct async_flt *aflt, char *unum)
923 {
924 	spitf_async_flt spf_flt;
925 
926 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
927 		return;
928 	}
929 
930 	spf_flt.cmn_asyncflt = *aflt;
931 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
932 	    " Corrected Memory Error detected by");
933 }
934 
935 /*
936  * Spitfire does not perform any further CE classification refinement
937  */
938 /*ARGSUSED*/
939 int
940 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
941     size_t afltoffset)
942 {
943 	return (0);
944 }
945 
946 char *
947 flt_to_error_type(struct async_flt *aflt)
948 {
949 	if (aflt->flt_status & ECC_INTERMITTENT)
950 		return (ERR_TYPE_DESC_INTERMITTENT);
951 	if (aflt->flt_status & ECC_PERSISTENT)
952 		return (ERR_TYPE_DESC_PERSISTENT);
953 	if (aflt->flt_status & ECC_STICKY)
954 		return (ERR_TYPE_DESC_STICKY);
955 	return (ERR_TYPE_DESC_UNKNOWN);
956 }
957 
958 /*
959  * Called by correctable ecc error logging code to print out
960  * the stick/persistent/intermittent status of the error.
961  */
962 static void
963 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
964 {
965 	ushort_t status;
966 	char *status1_str = "Memory";
967 	char *status2_str = "Intermittent";
968 	struct async_flt *aflt = (struct async_flt *)spf_flt;
969 
970 	status = aflt->flt_status;
971 
972 	if (status & ECC_ECACHE)
973 		status1_str = "Ecache";
974 
975 	if (status & ECC_STICKY)
976 		status2_str = "Sticky";
977 	else if (status & ECC_PERSISTENT)
978 		status2_str = "Persistent";
979 
980 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
981 		NULL, " Corrected %s Error on %s is %s",
982 		status1_str, unum, status2_str);
983 }
984 
985 /*
986  * check for a valid ce syndrome, then call the
987  * displacement flush scrubbing code, and then check the afsr to see if
988  * the error was persistent or intermittent. Reread the afar/afsr to see
989  * if the error was not scrubbed successfully, and is therefore sticky.
990  */
991 /*ARGSUSED1*/
992 void
993 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
994 {
995 	uint64_t eer, afsr;
996 	ushort_t status;
997 
998 	ASSERT(getpil() > LOCK_LEVEL);
999 
1000 	/*
1001 	 * It is possible that the flt_addr is not a valid
1002 	 * physical address. To deal with this, we disable
1003 	 * NCEEN while we scrub that address. If this causes
1004 	 * a TIMEOUT/BERR, we know this is an invalid
1005 	 * memory location.
1006 	 */
1007 	kpreempt_disable();
1008 	eer = get_error_enable();
1009 	if (eer & (EER_CEEN | EER_NCEEN))
1010 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1011 
1012 	/*
1013 	 * To check if the error detected by IO is persistent, sticky or
1014 	 * intermittent.
1015 	 */
1016 	if (ecc->flt_status & ECC_IOBUS) {
1017 		ecc->flt_stat = P_AFSR_CE;
1018 	}
1019 
1020 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1021 	    cpunodes[CPU->cpu_id].ecache_size);
1022 
1023 	get_asyncflt(&afsr);
1024 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1025 		/*
1026 		 * Must ensure that we don't get the TIMEOUT/BERR
1027 		 * when we reenable NCEEN, so we clear the AFSR.
1028 		 */
1029 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1030 		if (eer & (EER_CEEN | EER_NCEEN))
1031 		    set_error_enable(eer);
1032 		kpreempt_enable();
1033 		return;
1034 	}
1035 
1036 	if (eer & EER_NCEEN)
1037 	    set_error_enable(eer & ~EER_CEEN);
1038 
1039 	/*
1040 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1041 	 * not trip over the error, mark it intermittent.  If the scrub did
1042 	 * trip the error again and it did not scrub away, mark it sticky.
1043 	 * Otherwise mark it persistent.
1044 	 */
1045 	if (check_ecc(ecc) != 0) {
1046 		cpu_read_paddr(ecc, 0, 1);
1047 
1048 		if (check_ecc(ecc) != 0)
1049 			status = ECC_STICKY;
1050 		else
1051 			status = ECC_PERSISTENT;
1052 	} else
1053 		status = ECC_INTERMITTENT;
1054 
1055 	if (eer & (EER_CEEN | EER_NCEEN))
1056 	    set_error_enable(eer);
1057 	kpreempt_enable();
1058 
1059 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1060 	ecc->flt_status |= status;
1061 }
1062 
1063 /*
1064  * get the syndrome and unum, and then call the routines
1065  * to check the other cpus and iobuses, and then do the error logging.
1066  */
1067 /*ARGSUSED1*/
1068 void
1069 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1070 {
1071 	char unum[UNUM_NAMLEN];
1072 	int len = 0;
1073 	int ce_verbose = 0;
1074 	int err;
1075 
1076 	ASSERT(ecc->flt_func != NULL);
1077 
1078 	/* Get the unum string for logging purposes */
1079 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1080 	    UNUM_NAMLEN, &len);
1081 
1082 	/* Call specific error logging routine */
1083 	(void) (*ecc->flt_func)(ecc, unum);
1084 
1085 	/*
1086 	 * Count errors per unum.
1087 	 * Non-memory errors are all counted via a special unum string.
1088 	 */
1089 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1090 	    automatic_page_removal) {
1091 		(void) page_retire(ecc->flt_addr, err);
1092 	}
1093 
1094 	if (ecc->flt_panic) {
1095 		ce_verbose = 1;
1096 	} else if ((ecc->flt_class == BUS_FAULT) ||
1097 	    (ecc->flt_stat & P_AFSR_CE)) {
1098 		ce_verbose = (ce_verbose_memory > 0);
1099 	} else {
1100 		ce_verbose = 1;
1101 	}
1102 
1103 	if (ce_verbose) {
1104 		spitf_async_flt sflt;
1105 		int synd_code;
1106 
1107 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1108 
1109 		cpu_ce_log_status(&sflt, unum);
1110 
1111 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1112 				SYND(ecc->flt_synd));
1113 
1114 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1115 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1116 			    NULL, " ECC Data Bit %2d was in error "
1117 			    "and corrected", synd_code);
1118 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1119 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1120 			    NULL, " ECC Check Bit %2d was in error "
1121 			    "and corrected", synd_code - C0);
1122 		} else {
1123 			/*
1124 			 * These are UE errors - we shouldn't be getting CE
1125 			 * traps for these; handle them in case of bad h/w.
1126 			 */
1127 			switch (synd_code) {
1128 			case M2:
1129 				cpu_aflt_log(CE_CONT, 0, &sflt,
1130 				    CPU_ERRID_FIRST, NULL,
1131 				    " Two ECC Bits were in error");
1132 				break;
1133 			case M3:
1134 				cpu_aflt_log(CE_CONT, 0, &sflt,
1135 				    CPU_ERRID_FIRST, NULL,
1136 				    " Three ECC Bits were in error");
1137 				break;
1138 			case M4:
1139 				cpu_aflt_log(CE_CONT, 0, &sflt,
1140 				    CPU_ERRID_FIRST, NULL,
1141 				    " Four ECC Bits were in error");
1142 				break;
1143 			case MX:
1144 				cpu_aflt_log(CE_CONT, 0, &sflt,
1145 				    CPU_ERRID_FIRST, NULL,
1146 				    " More than Four ECC bits were "
1147 				    "in error");
1148 				break;
1149 			default:
1150 				cpu_aflt_log(CE_CONT, 0, &sflt,
1151 				    CPU_ERRID_FIRST, NULL,
1152 				    " Unknown fault syndrome %d",
1153 				    synd_code);
1154 				break;
1155 			}
1156 		}
1157 	}
1158 
1159 	/* Display entire cache line, if valid address */
1160 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1161 		read_ecc_data(ecc, 1, 1);
1162 }
1163 
1164 /*
1165  * We route all errors through a single switch statement.
1166  */
1167 void
1168 cpu_ue_log_err(struct async_flt *aflt)
1169 {
1170 
1171 	switch (aflt->flt_class) {
1172 	case CPU_FAULT:
1173 		cpu_async_log_err(aflt);
1174 		break;
1175 
1176 	case BUS_FAULT:
1177 		bus_async_log_err(aflt);
1178 		break;
1179 
1180 	default:
1181 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1182 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1183 		break;
1184 	}
1185 }
1186 
1187 /* Values for action variable in cpu_async_error() */
1188 #define	ACTION_NONE		0
1189 #define	ACTION_TRAMPOLINE	1
1190 #define	ACTION_AST_FLAGS	2
1191 
1192 /*
1193  * Access error trap handler for asynchronous cpu errors.  This routine is
1194  * called to handle a data or instruction access error.  All fatal errors are
1195  * completely handled by this routine (by panicking).  Non fatal error logging
1196  * is queued for later processing either via AST or softint at a lower PIL.
1197  * In case of panic, the error log queue will also be processed as part of the
1198  * panic flow to ensure all errors are logged.  This routine is called with all
1199  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1200  * error bits are also cleared.  The hardware has also disabled the I and
1201  * D-caches for us, so we must re-enable them before returning.
1202  *
1203  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1204  *
1205  *		_______________________________________________________________
1206  *		|        Privileged tl0		|         Unprivileged	      |
1207  *		| Protected	| Unprotected	| Protected	| Unprotected |
1208  *		|on_trap|lofault|		|		|	      |
1209  * -------------|-------|-------+---------------+---------------+-------------|
1210  *		|	|	|		|		|	      |
1211  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1212  *		|	|	|		|		|	      |
1213  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1214  *		|	|	|		|		|	      |
1215  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1216  *		|	|	|		|		|	      |
1217  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1218  * ____________________________________________________________________________
1219  *
1220  *
1221  * Action codes:
1222  *
1223  * L - log
1224  * M - kick off memscrubber if flt_in_memory
1225  * P - panic
1226  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1227  * R - i)  if aft_panic is set, panic
1228  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1229  * S - send SIGBUS to process
1230  * T - trampoline
1231  *
1232  * Special cases:
1233  *
1234  * 1) if aft_testfatal is set, all faults result in a panic regardless
1235  *    of type (even WP), protection (even on_trap), or privilege.
1236  */
1237 /*ARGSUSED*/
1238 void
1239 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1240 	uint_t p_afsr_high, uint_t p_afar_high)
1241 {
1242 	ushort_t sdbh, sdbl, ttype, tl;
1243 	spitf_async_flt spf_flt;
1244 	struct async_flt *aflt;
1245 	char pr_reason[28];
1246 	uint64_t oafsr;
1247 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1248 	int action = ACTION_NONE;
1249 	uint64_t t_afar = p_afar;
1250 	uint64_t t_afsr = p_afsr;
1251 	int expected = DDI_FM_ERR_UNEXPECTED;
1252 	ddi_acc_hdl_t *hp;
1253 
1254 	/*
1255 	 * We need to look at p_flag to determine if the thread detected an
1256 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1257 	 * because we just need a consistent snapshot and we know that everyone
1258 	 * else will store a consistent set of bits while holding p_lock.  We
1259 	 * don't have to worry about a race because SDOCORE is set once prior
1260 	 * to doing i/o from the process's address space and is never cleared.
1261 	 */
1262 	uint_t pflag = ttoproc(curthread)->p_flag;
1263 
1264 	pr_reason[0] = '\0';
1265 
1266 	/*
1267 	 * Note: the Spitfire data buffer error registers
1268 	 * (upper and lower halves) are or'ed into the upper
1269 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1270 	 */
1271 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1272 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1273 
1274 	/*
1275 	 * Grab the ttype encoded in <63:53> of the saved
1276 	 * afsr passed from async_err()
1277 	 */
1278 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1279 	tl = (ushort_t)(t_afsr >> 62);
1280 
1281 	t_afsr &= S_AFSR_MASK;
1282 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1283 
1284 	/*
1285 	 * Initialize most of the common and CPU-specific structure.  We derive
1286 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1287 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1288 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1289 	 * tuneable aft_testfatal is set (not the default).
1290 	 */
1291 	bzero(&spf_flt, sizeof (spitf_async_flt));
1292 	aflt = (struct async_flt *)&spf_flt;
1293 	aflt->flt_id = gethrtime_waitfree();
1294 	aflt->flt_stat = t_afsr;
1295 	aflt->flt_addr = t_afar;
1296 	aflt->flt_bus_id = getprocessorid();
1297 	aflt->flt_inst = CPU->cpu_id;
1298 	aflt->flt_pc = (caddr_t)rp->r_pc;
1299 	aflt->flt_prot = AFLT_PROT_NONE;
1300 	aflt->flt_class = CPU_FAULT;
1301 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1302 	aflt->flt_tl = (uchar_t)tl;
1303 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1304 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1305 
1306 	/*
1307 	 * Set flt_status based on the trap type.  If we end up here as the
1308 	 * result of a UE detected by the CE handling code, leave status 0.
1309 	 */
1310 	switch (ttype) {
1311 	case T_DATA_ERROR:
1312 		aflt->flt_status = ECC_D_TRAP;
1313 		break;
1314 	case T_INSTR_ERROR:
1315 		aflt->flt_status = ECC_I_TRAP;
1316 		break;
1317 	}
1318 
1319 	spf_flt.flt_sdbh = sdbh;
1320 	spf_flt.flt_sdbl = sdbl;
1321 
1322 	/*
1323 	 * Check for fatal async errors.
1324 	 */
1325 	check_misc_err(&spf_flt);
1326 
1327 	/*
1328 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1329 	 * see if we were executing in the kernel under on_trap() or t_lofault
1330 	 * protection.  If so, modify the saved registers so that we return
1331 	 * from the trap to the appropriate trampoline routine.
1332 	 */
1333 	if (aflt->flt_priv && tl == 0) {
1334 		if (curthread->t_ontrap != NULL) {
1335 			on_trap_data_t *otp = curthread->t_ontrap;
1336 
1337 			if (otp->ot_prot & OT_DATA_EC) {
1338 				aflt->flt_prot = AFLT_PROT_EC;
1339 				otp->ot_trap |= OT_DATA_EC;
1340 				rp->r_pc = otp->ot_trampoline;
1341 				rp->r_npc = rp->r_pc + 4;
1342 				action = ACTION_TRAMPOLINE;
1343 			}
1344 
1345 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1346 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1347 				aflt->flt_prot = AFLT_PROT_ACCESS;
1348 				otp->ot_trap |= OT_DATA_ACCESS;
1349 				rp->r_pc = otp->ot_trampoline;
1350 				rp->r_npc = rp->r_pc + 4;
1351 				action = ACTION_TRAMPOLINE;
1352 				/*
1353 				 * for peeks and caut_gets errors are expected
1354 				 */
1355 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1356 				if (!hp)
1357 					expected = DDI_FM_ERR_PEEK;
1358 				else if (hp->ah_acc.devacc_attr_access ==
1359 				    DDI_CAUTIOUS_ACC)
1360 					expected = DDI_FM_ERR_EXPECTED;
1361 			}
1362 
1363 		} else if (curthread->t_lofault) {
1364 			aflt->flt_prot = AFLT_PROT_COPY;
1365 			rp->r_g1 = EFAULT;
1366 			rp->r_pc = curthread->t_lofault;
1367 			rp->r_npc = rp->r_pc + 4;
1368 			action = ACTION_TRAMPOLINE;
1369 		}
1370 	}
1371 
1372 	/*
1373 	 * Determine if this error needs to be treated as fatal.  Note that
1374 	 * multiple errors detected upon entry to this trap handler does not
1375 	 * necessarily warrant a panic.  We only want to panic if the trap
1376 	 * happened in privileged mode and not under t_ontrap or t_lofault
1377 	 * protection.  The exception is WP: if we *only* get WP, it is not
1378 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1379 	 *
1380 	 * aft_panic, if set, effectively makes us treat usermode
1381 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1382 	 * panic instead of sending a contract event.  A lofault-protected
1383 	 * fault will normally follow the contract event; if aft_panic is
1384 	 * set this will be changed to a panic.
1385 	 *
1386 	 * For usermode BERR/BTO errors, eg from processes performing device
1387 	 * control through mapped device memory, we need only deliver
1388 	 * a SIGBUS to the offending process.
1389 	 *
1390 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1391 	 * checked later; for now we implement the common reasons.
1392 	 */
1393 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1394 		/*
1395 		 * Beware - multiple bits may be set in AFSR
1396 		 */
1397 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1398 			if (aflt->flt_priv || aft_panic)
1399 				aflt->flt_panic = 1;
1400 		}
1401 
1402 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1403 			if (aflt->flt_priv)
1404 				aflt->flt_panic = 1;
1405 		}
1406 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1407 		aflt->flt_panic = 1;
1408 	}
1409 
1410 	/*
1411 	 * UE/BERR/TO: Call our bus nexus friends to check for
1412 	 * IO errors that may have resulted in this trap.
1413 	 */
1414 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1415 		cpu_run_bus_error_handlers(aflt, expected);
1416 	}
1417 
1418 	/*
1419 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1420 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1421 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1422 	 * caches may introduce more parity errors (especially when the module
1423 	 * is bad) and in sabre there is no guarantee that such errors
1424 	 * (if introduced) are written back as poisoned data.
1425 	 */
1426 	if (t_afsr & P_AFSR_UE) {
1427 		int i;
1428 
1429 		(void) strcat(pr_reason, "UE ");
1430 
1431 		spf_flt.flt_type = CPU_UE_ERR;
1432 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1433 			MMU_PAGESHIFT)) ? 1: 0;
1434 
1435 		/*
1436 		 * With UE, we have the PA of the fault.
1437 		 * Let do a diagnostic read to get the ecache
1438 		 * data and tag info of the bad line for logging.
1439 		 */
1440 		if (aflt->flt_in_memory) {
1441 			uint32_t ec_set_size;
1442 			uchar_t state;
1443 			uint32_t ecache_idx;
1444 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1445 
1446 			/* touch the line to put it in ecache */
1447 			acc_afsr |= read_and_clear_afsr();
1448 			(void) lddphys(faultpa);
1449 			acc_afsr |= (read_and_clear_afsr() &
1450 				    ~(P_AFSR_EDP | P_AFSR_UE));
1451 
1452 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1453 			    ecache_associativity;
1454 
1455 			for (i = 0; i < ecache_associativity; i++) {
1456 				ecache_idx = i * ec_set_size +
1457 				    (aflt->flt_addr % ec_set_size);
1458 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1459 					(uint64_t *)&spf_flt.flt_ec_data[0],
1460 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1461 				acc_afsr |= oafsr;
1462 
1463 				state = (uchar_t)((spf_flt.flt_ec_tag &
1464 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1465 
1466 				if ((state & cpu_ec_state_valid) &&
1467 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1468 				    ((uint64_t)aflt->flt_addr >>
1469 				    cpu_ec_tag_shift)))
1470 					break;
1471 			}
1472 
1473 			/*
1474 			 * Check to see if the ecache tag is valid for the
1475 			 * fault PA. In the very unlikely event where the
1476 			 * line could be victimized, no ecache info will be
1477 			 * available. If this is the case, capture the line
1478 			 * from memory instead.
1479 			 */
1480 			if ((state & cpu_ec_state_valid) == 0 ||
1481 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1482 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1483 				for (i = 0; i < 8; i++, faultpa += 8) {
1484 				    ec_data_t *ecdptr;
1485 
1486 					ecdptr = &spf_flt.flt_ec_data[i];
1487 					acc_afsr |= read_and_clear_afsr();
1488 					ecdptr->ec_d8 = lddphys(faultpa);
1489 					acc_afsr |= (read_and_clear_afsr() &
1490 						    ~(P_AFSR_EDP | P_AFSR_UE));
1491 					ecdptr->ec_afsr = 0;
1492 							/* null afsr value */
1493 				}
1494 
1495 				/*
1496 				 * Mark tag invalid to indicate mem dump
1497 				 * when we print out the info.
1498 				 */
1499 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1500 			}
1501 			spf_flt.flt_ec_lcnt = 1;
1502 
1503 			/*
1504 			 * Flush out the bad line
1505 			 */
1506 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1507 				cpunodes[CPU->cpu_id].ecache_size);
1508 
1509 			acc_afsr |= clear_errors(NULL, NULL);
1510 		}
1511 
1512 		/*
1513 		 * Ask our bus nexus friends if they have any fatal errors. If
1514 		 * so, they will log appropriate error messages and panic as a
1515 		 * result. We then queue an event for each UDB that reports a
1516 		 * UE. Each UE reported in a UDB will have its own log message.
1517 		 *
1518 		 * Note from kbn: In the case where there are multiple UEs
1519 		 * (ME bit is set) - the AFAR address is only accurate to
1520 		 * the 16-byte granularity. One cannot tell whether the AFAR
1521 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1522 		 * always report the AFAR address to be 16-byte aligned.
1523 		 *
1524 		 * If we're on a Sabre, there is no SDBL, but it will always
1525 		 * read as zero, so the sdbl test below will safely fail.
1526 		 */
1527 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1528 			aflt->flt_panic = 1;
1529 
1530 		if (sdbh & P_DER_UE) {
1531 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1532 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1533 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1534 			    aflt->flt_panic);
1535 		}
1536 		if (sdbl & P_DER_UE) {
1537 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1538 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1539 			if (!(aflt->flt_stat & P_AFSR_ME))
1540 				aflt->flt_addr |= 0x8;
1541 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1542 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1543 			    aflt->flt_panic);
1544 		}
1545 
1546 		/*
1547 		 * We got a UE and are panicking, save the fault PA in a known
1548 		 * location so that the platform specific panic code can check
1549 		 * for copyback errors.
1550 		 */
1551 		if (aflt->flt_panic && aflt->flt_in_memory) {
1552 			panic_aflt = *aflt;
1553 		}
1554 	}
1555 
1556 	/*
1557 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1558 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1559 	 */
1560 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1561 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1562 
1563 		if (t_afsr & P_AFSR_EDP)
1564 			(void) strcat(pr_reason, "EDP ");
1565 
1566 		if (t_afsr & P_AFSR_LDP)
1567 			(void) strcat(pr_reason, "LDP ");
1568 
1569 		/*
1570 		 * Here we have no PA to work with.
1571 		 * Scan each line in the ecache to look for
1572 		 * the one with bad parity.
1573 		 */
1574 		aflt->flt_addr = AFLT_INV_ADDR;
1575 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1576 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1577 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1578 
1579 		/*
1580 		 * If we found a bad PA, update the state to indicate if it is
1581 		 * memory or I/O space.  This code will be important if we ever
1582 		 * support cacheable frame buffers.
1583 		 */
1584 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1585 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1586 				MMU_PAGESHIFT)) ? 1 : 0;
1587 		}
1588 
1589 		if (isus2i || isus2e)
1590 			aflt->flt_panic = 1;
1591 
1592 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1593 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1594 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1595 		    aflt->flt_panic);
1596 	}
1597 
1598 	/*
1599 	 * Timeout and bus error handling.  There are two cases to consider:
1600 	 *
1601 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1602 	 * have already modified the saved registers so that we will return
1603 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1604 	 *
1605 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1606 	 * a SIGBUS.  We do not log the occurence - processes performing
1607 	 * device control would generate lots of uninteresting messages.
1608 	 */
1609 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1610 		if (t_afsr & P_AFSR_TO)
1611 			(void) strcat(pr_reason, "BTO ");
1612 
1613 		if (t_afsr & P_AFSR_BERR)
1614 			(void) strcat(pr_reason, "BERR ");
1615 
1616 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1617 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1618 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1619 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1620 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1621 			    aflt->flt_panic);
1622 		}
1623 	}
1624 
1625 	/*
1626 	 * Handle WP: WP happens when the ecache is victimized and a parity
1627 	 * error was detected on a writeback.  The data in question will be
1628 	 * poisoned as a UE will be written back.  The PA is not logged and
1629 	 * it is possible that it doesn't belong to the trapped thread.  The
1630 	 * WP trap is not fatal, but it could be fatal to someone that
1631 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1632 	 * to force the memscrubber to read all of memory when it awakens.
1633 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1634 	 * UE back to poison the data.
1635 	 */
1636 	if (t_afsr & P_AFSR_WP) {
1637 		(void) strcat(pr_reason, "WP ");
1638 		if (isus2i || isus2e) {
1639 			aflt->flt_panic = 1;
1640 		} else {
1641 			read_all_memscrub = 1;
1642 		}
1643 		spf_flt.flt_type = CPU_WP_ERR;
1644 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1645 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1646 		    aflt->flt_panic);
1647 	}
1648 
1649 	/*
1650 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1651 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1652 	 * This is fatal.
1653 	 */
1654 
1655 	if (t_afsr & P_AFSR_CP) {
1656 		if (isus2i || isus2e) {
1657 			(void) strcat(pr_reason, "CP ");
1658 			aflt->flt_panic = 1;
1659 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1660 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1661 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1662 			    aflt->flt_panic);
1663 		} else {
1664 			/*
1665 			 * Orphan CP: Happens due to signal integrity problem
1666 			 * on a CPU, where a CP is reported, without reporting
1667 			 * its associated UE. This is handled by locating the
1668 			 * bad parity line and would kick off the memscrubber
1669 			 * to find the UE if in memory or in another's cache.
1670 			 */
1671 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1672 			(void) strcat(pr_reason, "ORPHAN_CP ");
1673 
1674 			/*
1675 			 * Here we have no PA to work with.
1676 			 * Scan each line in the ecache to look for
1677 			 * the one with bad parity.
1678 			 */
1679 			aflt->flt_addr = AFLT_INV_ADDR;
1680 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1681 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1682 				&oafsr);
1683 			acc_afsr |= oafsr;
1684 
1685 			/*
1686 			 * If we found a bad PA, update the state to indicate
1687 			 * if it is memory or I/O space.
1688 			 */
1689 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1690 				aflt->flt_in_memory =
1691 					(pf_is_memory(aflt->flt_addr >>
1692 						MMU_PAGESHIFT)) ? 1 : 0;
1693 			}
1694 			read_all_memscrub = 1;
1695 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1696 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1697 			    aflt->flt_panic);
1698 
1699 		}
1700 	}
1701 
1702 	/*
1703 	 * If we queued an error other than WP or CP and we are going to return
1704 	 * from the trap and the error was in user mode or inside of a
1705 	 * copy routine, set AST flag so the queue will be drained before
1706 	 * returning to user mode.
1707 	 *
1708 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1709 	 * and send an event to its process contract.
1710 	 *
1711 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1712 	 * will have been no error queued in this case.
1713 	 */
1714 	if ((t_afsr &
1715 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1716 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1717 			int pcb_flag = 0;
1718 
1719 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1720 				pcb_flag |= ASYNC_HWERR;
1721 
1722 			if (t_afsr & P_AFSR_BERR)
1723 				pcb_flag |= ASYNC_BERR;
1724 
1725 			if (t_afsr & P_AFSR_TO)
1726 				pcb_flag |= ASYNC_BTO;
1727 
1728 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1729 			aston(curthread);
1730 			action = ACTION_AST_FLAGS;
1731 	}
1732 
1733 	/*
1734 	 * In response to a deferred error, we must do one of three things:
1735 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1736 	 * set in cases (1) and (2) - check that either action is set or
1737 	 * (3) is true.
1738 	 *
1739 	 * On II, the WP writes poisoned data back to memory, which will
1740 	 * cause a UE and a panic or reboot when read.  In this case, we
1741 	 * don't need to panic at this time.  On IIi and IIe,
1742 	 * aflt->flt_panic is already set above.
1743 	 */
1744 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1745 	    (t_afsr & P_AFSR_WP));
1746 
1747 	/*
1748 	 * Make a final sanity check to make sure we did not get any more async
1749 	 * errors and accumulate the afsr.
1750 	 */
1751 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1752 	    cpunodes[CPU->cpu_id].ecache_linesize);
1753 	(void) clear_errors(&spf_flt, NULL);
1754 
1755 	/*
1756 	 * Take care of a special case: If there is a UE in the ecache flush
1757 	 * area, we'll see it in flush_ecache().  This will trigger the
1758 	 * CPU_ADDITIONAL_ERRORS case below.
1759 	 *
1760 	 * This could occur if the original error was a UE in the flush area,
1761 	 * or if the original error was an E$ error that was flushed out of
1762 	 * the E$ in scan_ecache().
1763 	 *
1764 	 * If it's at the same address that we're already logging, then it's
1765 	 * probably one of these cases.  Clear the bit so we don't trip over
1766 	 * it on the additional errors case, which could cause an unnecessary
1767 	 * panic.
1768 	 */
1769 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1770 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1771 	else
1772 		acc_afsr |= aflt->flt_stat;
1773 
1774 	/*
1775 	 * Check the acumulated afsr for the important bits.
1776 	 * Make sure the spf_flt.flt_type value is set, and
1777 	 * enque an error.
1778 	 */
1779 	if (acc_afsr &
1780 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1781 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1782 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1783 		    P_AFSR_ISAP))
1784 			aflt->flt_panic = 1;
1785 
1786 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1787 		aflt->flt_stat = acc_afsr;
1788 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1789 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1790 		    aflt->flt_panic);
1791 	}
1792 
1793 	/*
1794 	 * If aflt->flt_panic is set at this point, we need to panic as the
1795 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1796 	 * We've already enqueued the error in one of the if-clauses above,
1797 	 * and it will be dequeued and logged as part of the panic flow.
1798 	 */
1799 	if (aflt->flt_panic) {
1800 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1801 		    "See previous message(s) for details", " %sError(s)",
1802 		    pr_reason);
1803 	}
1804 
1805 	/*
1806 	 * Before returning, we must re-enable errors, and
1807 	 * reset the caches to their boot-up state.
1808 	 */
1809 	set_lsu(get_lsu() | cache_boot_state);
1810 	set_error_enable(EER_ENABLE);
1811 }
1812 
1813 /*
1814  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1815  * This routine is shared by the CE and UE handling code.
1816  */
1817 static void
1818 check_misc_err(spitf_async_flt *spf_flt)
1819 {
1820 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1821 	char *fatal_str = NULL;
1822 
1823 	/*
1824 	 * The ISAP and ETP errors are supposed to cause a POR
1825 	 * from the system, so in theory we never, ever see these messages.
1826 	 * ISAP, ETP and IVUE are considered to be fatal.
1827 	 */
1828 	if (aflt->flt_stat & P_AFSR_ISAP)
1829 		fatal_str = " System Address Parity Error on";
1830 	else if (aflt->flt_stat & P_AFSR_ETP)
1831 		fatal_str = " Ecache Tag Parity Error on";
1832 	else if (aflt->flt_stat & P_AFSR_IVUE)
1833 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1834 	if (fatal_str != NULL) {
1835 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1836 			NULL, fatal_str);
1837 	}
1838 }
1839 
1840 /*
1841  * Routine to convert a syndrome into a syndrome code.
1842  */
1843 static int
1844 synd_to_synd_code(int synd_status, ushort_t synd)
1845 {
1846 	if (synd_status != AFLT_STAT_VALID)
1847 		return (-1);
1848 
1849 	/*
1850 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1851 	 * to get the code indicating which bit(s) is(are) bad.
1852 	 */
1853 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1854 		return (-1);
1855 	else
1856 		return (ecc_syndrome_tab[synd]);
1857 }
1858 
1859 /* ARGSUSED */
1860 int
1861 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1862 {
1863 	return (ENOTSUP);
1864 }
1865 
1866 /* ARGSUSED */
1867 int
1868 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1869 {
1870 	return (ENOTSUP);
1871 }
1872 
1873 /* ARGSUSED */
1874 int
1875 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1876 {
1877 	return (ENOTSUP);
1878 }
1879 
1880 /*
1881  * Routine to return a string identifying the physical name
1882  * associated with a memory/cache error.
1883  */
1884 /* ARGSUSED */
1885 int
1886 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1887     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1888     char *buf, int buflen, int *lenp)
1889 {
1890 	short synd_code;
1891 	int ret;
1892 
1893 	if (flt_in_memory) {
1894 		synd_code = synd_to_synd_code(synd_status, synd);
1895 		if (synd_code == -1) {
1896 			ret = EINVAL;
1897 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1898 		    buf, buflen, lenp) != 0) {
1899 			ret = EIO;
1900 		} else if (*lenp <= 1) {
1901 			ret = EINVAL;
1902 		} else {
1903 			ret = 0;
1904 		}
1905 	} else {
1906 		ret = ENOTSUP;
1907 	}
1908 
1909 	if (ret != 0) {
1910 		buf[0] = '\0';
1911 		*lenp = 0;
1912 	}
1913 
1914 	return (ret);
1915 }
1916 
1917 /*
1918  * Wrapper for cpu_get_mem_unum() routine that takes an
1919  * async_flt struct rather than explicit arguments.
1920  */
1921 int
1922 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1923     char *buf, int buflen, int *lenp)
1924 {
1925 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1926 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1927 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1928 }
1929 
1930 /*
1931  * This routine is a more generic interface to cpu_get_mem_unum(),
1932  * that may be used by other modules (e.g. mm).
1933  */
1934 int
1935 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1936 		char *buf, int buflen, int *lenp)
1937 {
1938 	int synd_status, flt_in_memory, ret;
1939 	char unum[UNUM_NAMLEN];
1940 
1941 	/*
1942 	 * Check for an invalid address.
1943 	 */
1944 	if (afar == (uint64_t)-1)
1945 		return (ENXIO);
1946 
1947 	if (synd == (uint64_t)-1)
1948 		synd_status = AFLT_STAT_INVALID;
1949 	else
1950 		synd_status = AFLT_STAT_VALID;
1951 
1952 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1953 
1954 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1955 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1956 	    != 0)
1957 		return (ret);
1958 
1959 	if (*lenp >= buflen)
1960 		return (ENAMETOOLONG);
1961 
1962 	(void) strncpy(buf, unum, buflen);
1963 
1964 	return (0);
1965 }
1966 
1967 /*
1968  * Routine to return memory information associated
1969  * with a physical address and syndrome.
1970  */
1971 /* ARGSUSED */
1972 int
1973 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1974     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1975     int *segsp, int *banksp, int *mcidp)
1976 {
1977 	return (ENOTSUP);
1978 }
1979 
1980 /*
1981  * Routine to return a string identifying the physical
1982  * name associated with a cpuid.
1983  */
1984 /* ARGSUSED */
1985 int
1986 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1987 {
1988 	return (ENOTSUP);
1989 }
1990 
1991 /*
1992  * This routine returns the size of the kernel's FRU name buffer.
1993  */
1994 size_t
1995 cpu_get_name_bufsize()
1996 {
1997 	return (UNUM_NAMLEN);
1998 }
1999 
2000 /*
2001  * Cpu specific log func for UEs.
2002  */
2003 static void
2004 log_ue_err(struct async_flt *aflt, char *unum)
2005 {
2006 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2007 	int len = 0;
2008 
2009 #ifdef DEBUG
2010 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2011 
2012 	/*
2013 	 * Paranoid Check for priv mismatch
2014 	 * Only applicable for UEs
2015 	 */
2016 	if (afsr_priv != aflt->flt_priv) {
2017 		/*
2018 		 * The priv bits in %tstate and %afsr did not match; we expect
2019 		 * this to be very rare, so flag it with a message.
2020 		 */
2021 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2022 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2023 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2024 
2025 		/* update saved afsr to reflect the correct priv */
2026 		aflt->flt_stat &= ~P_AFSR_PRIV;
2027 		if (aflt->flt_priv)
2028 			aflt->flt_stat |= P_AFSR_PRIV;
2029 	}
2030 #endif /* DEBUG */
2031 
2032 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2033 	    UNUM_NAMLEN, &len);
2034 
2035 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2036 	    " Uncorrectable Memory Error on");
2037 
2038 	if (SYND(aflt->flt_synd) == 0x3) {
2039 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2040 		    " Syndrome 0x3 indicates that this may not be a "
2041 		    "memory module problem");
2042 	}
2043 
2044 	if (aflt->flt_in_memory)
2045 		cpu_log_ecmem_info(spf_flt);
2046 }
2047 
2048 
2049 /*
2050  * The cpu_async_log_err() function is called via the ue_drain() function to
2051  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2052  * from softint context, from AST processing in the trap() flow, or from the
2053  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2054  */
2055 static void
2056 cpu_async_log_err(void *flt)
2057 {
2058 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2059 	struct async_flt *aflt = (struct async_flt *)flt;
2060 	char unum[UNUM_NAMLEN];
2061 	char *space;
2062 	char *ecache_scrub_logstr = NULL;
2063 
2064 	switch (spf_flt->flt_type) {
2065 	    case CPU_UE_ERR:
2066 		/*
2067 		 * We want to skip logging only if ALL the following
2068 		 * conditions are true:
2069 		 *
2070 		 *	1. We are not panicking
2071 		 *	2. There is only one error
2072 		 *	3. That error is a memory error
2073 		 *	4. The error is caused by the memory scrubber (in
2074 		 *	   which case the error will have occurred under
2075 		 *	   on_trap protection)
2076 		 *	5. The error is on a retired page
2077 		 *
2078 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2079 		 * scrubber.  However, none of those errors should occur
2080 		 * on a retired page.
2081 		 *
2082 		 * Note 2: In the CE case, these errors are discarded before
2083 		 * the errorq.  In the UE case, we must wait until now --
2084 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2085 		 */
2086 		if (!panicstr &&
2087 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2088 		    aflt->flt_prot == AFLT_PROT_EC) {
2089 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2090 				/* Zero the address to clear the error */
2091 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2092 				return;
2093 			}
2094 		}
2095 
2096 		/*
2097 		 * Log the UE and check for causes of this UE error that
2098 		 * don't cause a trap (Copyback error).  cpu_async_error()
2099 		 * has already checked the i/o buses for us.
2100 		 */
2101 		log_ue_err(aflt, unum);
2102 		if (aflt->flt_in_memory)
2103 			cpu_check_allcpus(aflt);
2104 		break;
2105 
2106 	    case CPU_EDP_LDP_ERR:
2107 		if (aflt->flt_stat & P_AFSR_EDP)
2108 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2109 			    NULL, " EDP event on");
2110 
2111 		if (aflt->flt_stat & P_AFSR_LDP)
2112 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2113 			    NULL, " LDP event on");
2114 
2115 		/* Log ecache info if exist */
2116 		if (spf_flt->flt_ec_lcnt > 0) {
2117 			cpu_log_ecmem_info(spf_flt);
2118 
2119 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2120 			    NULL, " AFAR was derived from E$Tag");
2121 		} else {
2122 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2123 			    NULL, " No error found in ecache (No fault "
2124 			    "PA available)");
2125 		}
2126 		break;
2127 
2128 	    case CPU_WP_ERR:
2129 		/*
2130 		 * If the memscrub thread hasn't yet read
2131 		 * all of memory, as we requested in the
2132 		 * trap handler, then give it a kick to
2133 		 * make sure it does.
2134 		 */
2135 		if (!isus2i && !isus2e && read_all_memscrub)
2136 			memscrub_run();
2137 
2138 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2139 		    " WP event on");
2140 		return;
2141 
2142 	    case CPU_BTO_BERR_ERR:
2143 		/*
2144 		 * A bus timeout or error occurred that was in user mode or not
2145 		 * in a protected kernel code region.
2146 		 */
2147 		if (aflt->flt_stat & P_AFSR_BERR) {
2148 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2149 			    spf_flt, BERRTO_LFLAGS, NULL,
2150 			    " Bus Error on System Bus in %s mode from",
2151 			    aflt->flt_priv ? "privileged" : "user");
2152 		}
2153 
2154 		if (aflt->flt_stat & P_AFSR_TO) {
2155 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2156 			    spf_flt, BERRTO_LFLAGS, NULL,
2157 			    " Timeout on System Bus in %s mode from",
2158 			    aflt->flt_priv ? "privileged" : "user");
2159 		}
2160 
2161 		return;
2162 
2163 	    case CPU_PANIC_CP_ERR:
2164 		/*
2165 		 * Process the Copyback (CP) error info (if any) obtained from
2166 		 * polling all the cpus in the panic flow. This case is only
2167 		 * entered if we are panicking.
2168 		 */
2169 		ASSERT(panicstr != NULL);
2170 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2171 
2172 		/* See which space - this info may not exist */
2173 		if (panic_aflt.flt_status & ECC_D_TRAP)
2174 			space = "Data ";
2175 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2176 			space = "Instruction ";
2177 		else
2178 			space = "";
2179 
2180 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2181 		    " AFAR was derived from UE report,"
2182 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2183 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2184 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2185 
2186 		if (spf_flt->flt_ec_lcnt > 0)
2187 			cpu_log_ecmem_info(spf_flt);
2188 		else
2189 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2190 			    NULL, " No cache dump available");
2191 
2192 		return;
2193 
2194 	    case CPU_TRAPPING_CP_ERR:
2195 		/*
2196 		 * For sabre only.  This is a copyback ecache parity error due
2197 		 * to a PCI DMA read.  We should be panicking if we get here.
2198 		 */
2199 		ASSERT(panicstr != NULL);
2200 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2201 		    " AFAR was derived from UE report,"
2202 		    " CP event on CPU%d (caused Data access error "
2203 		    "on PCIBus)", aflt->flt_inst);
2204 		return;
2205 
2206 		/*
2207 		 * We log the ecache lines of the following states,
2208 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2209 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2210 		 * in addition to logging if ecache_scrub_panic is set.
2211 		 */
2212 	    case CPU_BADLINE_CI_ERR:
2213 		ecache_scrub_logstr = "CBI";
2214 		/* FALLTHRU */
2215 
2216 	    case CPU_BADLINE_CB_ERR:
2217 		if (ecache_scrub_logstr == NULL)
2218 			ecache_scrub_logstr = "CBB";
2219 		/* FALLTHRU */
2220 
2221 	    case CPU_BADLINE_DI_ERR:
2222 		if (ecache_scrub_logstr == NULL)
2223 			ecache_scrub_logstr = "DBI";
2224 		/* FALLTHRU */
2225 
2226 	    case CPU_BADLINE_DB_ERR:
2227 		if (ecache_scrub_logstr == NULL)
2228 			ecache_scrub_logstr = "DBB";
2229 
2230 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2231 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2232 			" %s event on", ecache_scrub_logstr);
2233 		cpu_log_ecmem_info(spf_flt);
2234 
2235 		return;
2236 
2237 	    case CPU_ORPHAN_CP_ERR:
2238 		/*
2239 		 * Orphan CPs, where the CP bit is set, but when a CPU
2240 		 * doesn't report a UE.
2241 		 */
2242 		if (read_all_memscrub)
2243 			memscrub_run();
2244 
2245 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2246 			NULL, " Orphan CP event on");
2247 
2248 		/* Log ecache info if exist */
2249 		if (spf_flt->flt_ec_lcnt > 0)
2250 			cpu_log_ecmem_info(spf_flt);
2251 		else
2252 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2253 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2254 				" No error found in ecache (No fault "
2255 				"PA available");
2256 		return;
2257 
2258 	    case CPU_ECACHE_ADDR_PAR_ERR:
2259 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2260 				" E$ Tag Address Parity error on");
2261 		cpu_log_ecmem_info(spf_flt);
2262 		return;
2263 
2264 	    case CPU_ECACHE_STATE_ERR:
2265 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2266 				" E$ Tag State Parity error on");
2267 		cpu_log_ecmem_info(spf_flt);
2268 		return;
2269 
2270 	    case CPU_ECACHE_TAG_ERR:
2271 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2272 				" E$ Tag scrub event on");
2273 		cpu_log_ecmem_info(spf_flt);
2274 		return;
2275 
2276 	    case CPU_ECACHE_ETP_ETS_ERR:
2277 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2278 				" AFSR.ETP is set and AFSR.ETS is zero on");
2279 		cpu_log_ecmem_info(spf_flt);
2280 		return;
2281 
2282 
2283 	    case CPU_ADDITIONAL_ERR:
2284 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2285 		    " Additional errors detected during error processing on");
2286 		return;
2287 
2288 	    default:
2289 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2290 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2291 		return;
2292 	}
2293 
2294 	/* ... fall through from the UE, EDP, or LDP cases */
2295 
2296 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2297 		if (!panicstr) {
2298 			(void) page_retire(aflt->flt_addr, PR_UE);
2299 		} else {
2300 			/*
2301 			 * Clear UEs on panic so that we don't
2302 			 * get haunted by them during panic or
2303 			 * after reboot
2304 			 */
2305 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2306 			    cpunodes[CPU->cpu_id].ecache_size,
2307 			    cpunodes[CPU->cpu_id].ecache_linesize);
2308 
2309 			(void) clear_errors(NULL, NULL);
2310 		}
2311 	}
2312 
2313 	/*
2314 	 * Log final recover message
2315 	 */
2316 	if (!panicstr) {
2317 		if (!aflt->flt_priv) {
2318 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2319 			    NULL, " Above Error is in User Mode"
2320 			    "\n    and is fatal: "
2321 			    "will SIGKILL process and notify contract");
2322 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2323 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2324 			    NULL, " Above Error detected while dumping core;"
2325 			    "\n    core file will be truncated");
2326 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2327 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2328 			    NULL, " Above Error is due to Kernel access"
2329 			    "\n    to User space and is fatal: "
2330 			    "will SIGKILL process and notify contract");
2331 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2332 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2333 			    " Above Error detected by protected Kernel code"
2334 			    "\n    that will try to clear error from system");
2335 		}
2336 	}
2337 }
2338 
2339 
2340 /*
2341  * Check all cpus for non-trapping UE-causing errors
2342  * In Ultra I/II, we look for copyback errors (CPs)
2343  */
2344 void
2345 cpu_check_allcpus(struct async_flt *aflt)
2346 {
2347 	spitf_async_flt cp;
2348 	spitf_async_flt *spf_cpflt = &cp;
2349 	struct async_flt *cpflt = (struct async_flt *)&cp;
2350 	int pix;
2351 
2352 	cpflt->flt_id = aflt->flt_id;
2353 	cpflt->flt_addr = aflt->flt_addr;
2354 
2355 	for (pix = 0; pix < NCPU; pix++) {
2356 		if (CPU_XCALL_READY(pix)) {
2357 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2358 			    (uint64_t)cpflt, 0);
2359 
2360 			if (cpflt->flt_stat & P_AFSR_CP) {
2361 				char *space;
2362 
2363 				/* See which space - this info may not exist */
2364 				if (aflt->flt_status & ECC_D_TRAP)
2365 					space = "Data ";
2366 				else if (aflt->flt_status & ECC_I_TRAP)
2367 					space = "Instruction ";
2368 				else
2369 					space = "";
2370 
2371 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2372 				    NULL, " AFAR was derived from UE report,"
2373 				    " CP event on CPU%d (caused %saccess "
2374 				    "error on %s%d)", pix, space,
2375 				    (aflt->flt_status & ECC_IOBUS) ?
2376 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2377 
2378 				if (spf_cpflt->flt_ec_lcnt > 0)
2379 					cpu_log_ecmem_info(spf_cpflt);
2380 				else
2381 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2382 					    CPU_ERRID_FIRST, NULL,
2383 					    " No cache dump available");
2384 			}
2385 		}
2386 	}
2387 }
2388 
2389 #ifdef DEBUG
2390 int test_mp_cp = 0;
2391 #endif
2392 
2393 /*
2394  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2395  * for copyback errors and capture relevant information.
2396  */
2397 static uint_t
2398 get_cpu_status(uint64_t arg)
2399 {
2400 	struct async_flt *aflt = (struct async_flt *)arg;
2401 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2402 	uint64_t afsr;
2403 	uint32_t ec_idx;
2404 	uint64_t sdbh, sdbl;
2405 	int i;
2406 	uint32_t ec_set_size;
2407 	uchar_t valid;
2408 	ec_data_t ec_data[8];
2409 	uint64_t ec_tag, flt_addr_tag, oafsr;
2410 	uint64_t *acc_afsr = NULL;
2411 
2412 	get_asyncflt(&afsr);
2413 	if (CPU_PRIVATE(CPU) != NULL) {
2414 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2415 		afsr |= *acc_afsr;
2416 		*acc_afsr = 0;
2417 	}
2418 
2419 #ifdef DEBUG
2420 	if (test_mp_cp)
2421 		afsr |= P_AFSR_CP;
2422 #endif
2423 	aflt->flt_stat = afsr;
2424 
2425 	if (afsr & P_AFSR_CP) {
2426 		/*
2427 		 * Capture the UDBs
2428 		 */
2429 		get_udb_errors(&sdbh, &sdbl);
2430 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2431 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2432 
2433 		/*
2434 		 * Clear CP bit before capturing ecache data
2435 		 * and AFSR info.
2436 		 */
2437 		set_asyncflt(P_AFSR_CP);
2438 
2439 		/*
2440 		 * See if we can capture the ecache line for the
2441 		 * fault PA.
2442 		 *
2443 		 * Return a valid matching ecache line, if any.
2444 		 * Otherwise, return the first matching ecache
2445 		 * line marked invalid.
2446 		 */
2447 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2448 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2449 		    ecache_associativity;
2450 		spf_flt->flt_ec_lcnt = 0;
2451 
2452 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2453 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2454 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2455 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2456 				    acc_afsr);
2457 
2458 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2459 				continue;
2460 
2461 			valid = cpu_ec_state_valid &
2462 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2463 			    cpu_ec_state_shift);
2464 
2465 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2466 				spf_flt->flt_ec_tag = ec_tag;
2467 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2468 				    sizeof (ec_data));
2469 				spf_flt->flt_ec_lcnt = 1;
2470 
2471 				if (valid)
2472 					break;
2473 			}
2474 		}
2475 	}
2476 	return (0);
2477 }
2478 
2479 /*
2480  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2481  * from panic_idle() as part of the other CPUs stopping themselves when a
2482  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2483  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2484  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2485  * CP error information.
2486  */
2487 void
2488 cpu_async_panic_callb(void)
2489 {
2490 	spitf_async_flt cp;
2491 	struct async_flt *aflt = (struct async_flt *)&cp;
2492 	uint64_t *scrub_afsr;
2493 
2494 	if (panic_aflt.flt_id != 0) {
2495 		aflt->flt_addr = panic_aflt.flt_addr;
2496 		(void) get_cpu_status((uint64_t)aflt);
2497 
2498 		if (CPU_PRIVATE(CPU) != NULL) {
2499 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2500 			if (*scrub_afsr & P_AFSR_CP) {
2501 				aflt->flt_stat |= *scrub_afsr;
2502 				*scrub_afsr = 0;
2503 			}
2504 		}
2505 		if (aflt->flt_stat & P_AFSR_CP) {
2506 			aflt->flt_id = panic_aflt.flt_id;
2507 			aflt->flt_panic = 1;
2508 			aflt->flt_inst = CPU->cpu_id;
2509 			aflt->flt_class = CPU_FAULT;
2510 			cp.flt_type = CPU_PANIC_CP_ERR;
2511 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2512 			    (void *)&cp, sizeof (cp), ue_queue,
2513 			    aflt->flt_panic);
2514 		}
2515 	}
2516 }
2517 
2518 /*
2519  * Turn off all cpu error detection, normally only used for panics.
2520  */
2521 void
2522 cpu_disable_errors(void)
2523 {
2524 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2525 }
2526 
2527 /*
2528  * Enable errors.
2529  */
2530 void
2531 cpu_enable_errors(void)
2532 {
2533 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2534 }
2535 
2536 static void
2537 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2538 {
2539 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2540 	int i, loop = 1;
2541 	ushort_t ecc_0;
2542 	uint64_t paddr;
2543 	uint64_t data;
2544 
2545 	if (verbose)
2546 		loop = 8;
2547 	for (i = 0; i < loop; i++) {
2548 		paddr = aligned_addr + (i * 8);
2549 		data = lddphys(paddr);
2550 		if (verbose) {
2551 			if (ce_err) {
2552 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2553 			    (uint32_t)data);
2554 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2555 				NULL, "    Paddr 0x%" PRIx64 ", "
2556 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2557 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2558 			} else {
2559 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2560 				    NULL, "    Paddr 0x%" PRIx64 ", "
2561 				    "Data 0x%08x.%08x", paddr,
2562 				    (uint32_t)(data>>32), (uint32_t)data);
2563 			}
2564 		}
2565 	}
2566 }
2567 
2568 static struct {		/* sec-ded-s4ed ecc code */
2569 	uint_t hi, lo;
2570 } ecc_code[8] = {
2571 	{ 0xee55de23U, 0x16161161U },
2572 	{ 0x55eede93U, 0x61612212U },
2573 	{ 0xbb557b8cU, 0x49494494U },
2574 	{ 0x55bb7b6cU, 0x94948848U },
2575 	{ 0x16161161U, 0xee55de23U },
2576 	{ 0x61612212U, 0x55eede93U },
2577 	{ 0x49494494U, 0xbb557b8cU },
2578 	{ 0x94948848U, 0x55bb7b6cU }
2579 };
2580 
2581 static ushort_t
2582 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2583 {
2584 	int i, j;
2585 	uchar_t checker, bit_mask;
2586 	struct {
2587 		uint_t hi, lo;
2588 	} hex_data, masked_data[8];
2589 
2590 	hex_data.hi = high_bytes;
2591 	hex_data.lo = low_bytes;
2592 
2593 	/* mask out bits according to sec-ded-s4ed ecc code */
2594 	for (i = 0; i < 8; i++) {
2595 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2596 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2597 	}
2598 
2599 	/*
2600 	 * xor all bits in masked_data[i] to get bit_i of checker,
2601 	 * where i = 0 to 7
2602 	 */
2603 	checker = 0;
2604 	for (i = 0; i < 8; i++) {
2605 		bit_mask = 1 << i;
2606 		for (j = 0; j < 32; j++) {
2607 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2608 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2609 			masked_data[i].hi >>= 1;
2610 			masked_data[i].lo >>= 1;
2611 		}
2612 	}
2613 	return (checker);
2614 }
2615 
2616 /*
2617  * Flush the entire ecache using displacement flush by reading through a
2618  * physical address range as large as the ecache.
2619  */
2620 void
2621 cpu_flush_ecache(void)
2622 {
2623 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2624 	    cpunodes[CPU->cpu_id].ecache_linesize);
2625 }
2626 
2627 /*
2628  * read and display the data in the cache line where the
2629  * original ce error occurred.
2630  * This routine is mainly used for debugging new hardware.
2631  */
2632 void
2633 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2634 {
2635 	kpreempt_disable();
2636 	/* disable ECC error traps */
2637 	set_error_enable(EER_ECC_DISABLE);
2638 
2639 	/*
2640 	 * flush the ecache
2641 	 * read the data
2642 	 * check to see if an ECC error occured
2643 	 */
2644 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2645 	    cpunodes[CPU->cpu_id].ecache_linesize);
2646 	set_lsu(get_lsu() | cache_boot_state);
2647 	cpu_read_paddr(ecc, verbose, ce_err);
2648 	(void) check_ecc(ecc);
2649 
2650 	/* enable ECC error traps */
2651 	set_error_enable(EER_ENABLE);
2652 	kpreempt_enable();
2653 }
2654 
2655 /*
2656  * Check the AFSR bits for UE/CE persistence.
2657  * If UE or CE errors are detected, the routine will
2658  * clears all the AFSR sticky bits (except CP for
2659  * spitfire/blackbird) and the UDBs.
2660  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2661  */
2662 static int
2663 check_ecc(struct async_flt *ecc)
2664 {
2665 	uint64_t t_afsr;
2666 	uint64_t t_afar;
2667 	uint64_t udbh;
2668 	uint64_t udbl;
2669 	ushort_t udb;
2670 	int persistent = 0;
2671 
2672 	/*
2673 	 * Capture the AFSR, AFAR and UDBs info
2674 	 */
2675 	get_asyncflt(&t_afsr);
2676 	get_asyncaddr(&t_afar);
2677 	t_afar &= SABRE_AFAR_PA;
2678 	get_udb_errors(&udbh, &udbl);
2679 
2680 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2681 		/*
2682 		 * Clear the errors
2683 		 */
2684 		clr_datapath();
2685 
2686 		if (isus2i || isus2e)
2687 			set_asyncflt(t_afsr);
2688 		else
2689 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2690 
2691 		/*
2692 		 * determine whether to check UDBH or UDBL for persistence
2693 		 */
2694 		if (ecc->flt_synd & UDBL_REG) {
2695 			udb = (ushort_t)udbl;
2696 			t_afar |= 0x8;
2697 		} else {
2698 			udb = (ushort_t)udbh;
2699 		}
2700 
2701 		if (ce_debug || ue_debug) {
2702 			spitf_async_flt spf_flt; /* for logging */
2703 			struct async_flt *aflt =
2704 				(struct async_flt *)&spf_flt;
2705 
2706 			/* Package the info nicely in the spf_flt struct */
2707 			bzero(&spf_flt, sizeof (spitf_async_flt));
2708 			aflt->flt_stat = t_afsr;
2709 			aflt->flt_addr = t_afar;
2710 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2711 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2712 
2713 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2714 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2715 			    " check_ecc: Dumping captured error states ...");
2716 		}
2717 
2718 		/*
2719 		 * if the fault addresses don't match, not persistent
2720 		 */
2721 		if (t_afar != ecc->flt_addr) {
2722 			return (persistent);
2723 		}
2724 
2725 		/*
2726 		 * check for UE persistence
2727 		 * since all DIMMs in the bank are identified for a UE,
2728 		 * there's no reason to check the syndrome
2729 		 */
2730 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2731 			persistent = 1;
2732 		}
2733 
2734 		/*
2735 		 * check for CE persistence
2736 		 */
2737 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2738 			if ((udb & P_DER_E_SYND) ==
2739 			    (ecc->flt_synd & P_DER_E_SYND)) {
2740 				persistent = 1;
2741 			}
2742 		}
2743 	}
2744 	return (persistent);
2745 }
2746 
2747 #ifdef HUMMINGBIRD
2748 #define	HB_FULL_DIV		1
2749 #define	HB_HALF_DIV		2
2750 #define	HB_LOWEST_DIV		8
2751 #define	HB_ECLK_INVALID		0xdeadbad
2752 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2753 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2754 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2755 	HB_ECLK_8 };
2756 
2757 #define	HB_SLOW_DOWN		0
2758 #define	HB_SPEED_UP		1
2759 
2760 #define	SET_ESTAR_MODE(mode)					\
2761 	stdphysio(HB_ESTAR_MODE, (mode));			\
2762 	/*							\
2763 	 * PLL logic requires minimum of 16 clock		\
2764 	 * cycles to lock to the new clock speed.		\
2765 	 * Wait 1 usec to satisfy this requirement.		\
2766 	 */							\
2767 	drv_usecwait(1);
2768 
2769 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2770 {								\
2771 	volatile uint64_t data;					\
2772 	uint64_t count, new_count;				\
2773 	clock_t delay;						\
2774 	data = lddphysio(HB_MEM_CNTRL0);			\
2775 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2776 	    HB_REFRESH_COUNT_SHIFT;				\
2777 	new_count = (HB_REFRESH_INTERVAL *			\
2778 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2779 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2780 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2781 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2782 	stdphysio(HB_MEM_CNTRL0, data);				\
2783 	data = lddphysio(HB_MEM_CNTRL0);        		\
2784 	/*							\
2785 	 * If we are slowing down the cpu and Memory		\
2786 	 * Self Refresh is not enabled, it is required		\
2787 	 * to wait for old refresh count to count-down and	\
2788 	 * new refresh count to go into effect (let new value	\
2789 	 * counts down once).					\
2790 	 */							\
2791 	if ((direction) == HB_SLOW_DOWN &&			\
2792 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2793 		/*						\
2794 		 * Each count takes 64 cpu clock cycles		\
2795 		 * to decrement.  Wait for current refresh	\
2796 		 * count plus new refresh count at current	\
2797 		 * cpu speed to count down to zero.  Round	\
2798 		 * up the delay time.				\
2799 		 */						\
2800 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2801 		    (count + new_count) * MICROSEC * (cur_div)) /\
2802 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2803 		drv_usecwait(delay);				\
2804 	}							\
2805 }
2806 
2807 #define	SET_SELF_REFRESH(bit)					\
2808 {								\
2809 	volatile uint64_t data;					\
2810 	data = lddphysio(HB_MEM_CNTRL0);			\
2811 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2812 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2813 	stdphysio(HB_MEM_CNTRL0, data);				\
2814 	data = lddphysio(HB_MEM_CNTRL0);			\
2815 }
2816 #endif	/* HUMMINGBIRD */
2817 
2818 /* ARGSUSED */
2819 void
2820 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2821 {
2822 #ifdef HUMMINGBIRD
2823 	uint64_t cur_mask, cur_divisor = 0;
2824 	volatile uint64_t reg;
2825 	int index;
2826 
2827 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2828 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2829 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2830 		    new_divisor);
2831 		return;
2832 	}
2833 
2834 	reg = lddphysio(HB_ESTAR_MODE);
2835 	cur_mask = reg & HB_ECLK_MASK;
2836 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2837 		if (hb_eclk[index] == cur_mask) {
2838 			cur_divisor = index;
2839 			break;
2840 		}
2841 	}
2842 
2843 	if (cur_divisor == 0)
2844 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2845 		    "can't be determined!");
2846 
2847 	/*
2848 	 * If we are already at the requested divisor speed, just
2849 	 * return.
2850 	 */
2851 	if (cur_divisor == new_divisor)
2852 		return;
2853 
2854 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2855 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2856 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2857 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2858 
2859 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2860 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2861 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2862 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2863 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2864 
2865 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2866 		/*
2867 		 * Transition to 1/2 speed first, then to
2868 		 * lower speed.
2869 		 */
2870 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2871 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2872 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2873 
2874 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2875 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2876 
2877 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2878 		/*
2879 		 * Transition to 1/2 speed first, then to
2880 		 * full speed.
2881 		 */
2882 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2883 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2884 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2885 
2886 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2887 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2888 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2889 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2890 
2891 	} else if (cur_divisor < new_divisor) {
2892 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2893 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2894 
2895 	} else if (cur_divisor > new_divisor) {
2896 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2897 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2898 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2899 	}
2900 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2901 #endif
2902 }
2903 
2904 /*
2905  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2906  * we clear all the sticky bits. If a non-null pointer to a async fault
2907  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2908  * info will be returned in the structure.  If a non-null pointer to a
2909  * uint64_t is passed in, this will be updated if the CP bit is set in the
2910  * AFSR.  The afsr will be returned.
2911  */
2912 static uint64_t
2913 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2914 {
2915 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2916 	uint64_t afsr;
2917 	uint64_t udbh, udbl;
2918 
2919 	get_asyncflt(&afsr);
2920 
2921 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2922 		*acc_afsr |= afsr;
2923 
2924 	if (spf_flt != NULL) {
2925 		aflt->flt_stat = afsr;
2926 		get_asyncaddr(&aflt->flt_addr);
2927 		aflt->flt_addr &= SABRE_AFAR_PA;
2928 
2929 		get_udb_errors(&udbh, &udbl);
2930 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2931 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2932 	}
2933 
2934 	set_asyncflt(afsr);		/* clear afsr */
2935 	clr_datapath();			/* clear udbs */
2936 	return (afsr);
2937 }
2938 
2939 /*
2940  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2941  * tag of the first bad line will be returned. We also return the old-afsr
2942  * (before clearing the sticky bits). The linecnt data will be updated to
2943  * indicate the number of bad lines detected.
2944  */
2945 static void
2946 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2947 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2948 {
2949 	ec_data_t t_ecdata[8];
2950 	uint64_t t_etag, oafsr;
2951 	uint64_t pa = AFLT_INV_ADDR;
2952 	uint32_t i, j, ecache_sz;
2953 	uint64_t acc_afsr = 0;
2954 	uint64_t *cpu_afsr = NULL;
2955 
2956 	if (CPU_PRIVATE(CPU) != NULL)
2957 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2958 
2959 	*linecnt = 0;
2960 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2961 
2962 	for (i = 0; i < ecache_sz; i += 64) {
2963 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2964 		    cpu_afsr);
2965 		acc_afsr |= oafsr;
2966 
2967 		/*
2968 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2969 		 * looking for the first occurrence of an EDP error.  The AFSR
2970 		 * info is captured for each 8-byte chunk.  Note that for
2971 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2972 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2973 		 * for the high and low 8-byte words within the 16-byte chunk).
2974 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2975 		 * granularity and only PSYND bits [7:0] are used.
2976 		 */
2977 		for (j = 0; j < 8; j++) {
2978 			ec_data_t *ecdptr = &t_ecdata[j];
2979 
2980 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
2981 				uint64_t errpa;
2982 				ushort_t psynd;
2983 				uint32_t ec_set_size = ecache_sz /
2984 				    ecache_associativity;
2985 
2986 				/*
2987 				 * For Spitfire/Blackbird, we need to look at
2988 				 * the PSYND to make sure that this 8-byte chunk
2989 				 * is the right one.  PSYND bits [15:8] belong
2990 				 * to the upper 8-byte (even) chunk.  Bits
2991 				 * [7:0] belong to the lower 8-byte chunk (odd).
2992 				 */
2993 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
2994 				if (!isus2i && !isus2e) {
2995 					if (j & 0x1)
2996 						psynd = psynd & 0xFF;
2997 					else
2998 						psynd = psynd >> 8;
2999 
3000 					if (!psynd)
3001 						continue; /* wrong chunk */
3002 				}
3003 
3004 				/* Construct the PA */
3005 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3006 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3007 				    ec_set_size);
3008 
3009 				/* clean up the cache line */
3010 				flushecacheline(P2ALIGN(errpa, 64),
3011 					cpunodes[CPU->cpu_id].ecache_size);
3012 
3013 				oafsr = clear_errors(NULL, cpu_afsr);
3014 				acc_afsr |= oafsr;
3015 
3016 				(*linecnt)++;
3017 
3018 				/*
3019 				 * Capture the PA for the first bad line found.
3020 				 * Return the ecache dump and tag info.
3021 				 */
3022 				if (pa == AFLT_INV_ADDR) {
3023 					int k;
3024 
3025 					pa = errpa;
3026 					for (k = 0; k < 8; k++)
3027 						ecache_data[k] = t_ecdata[k];
3028 					*ecache_tag = t_etag;
3029 				}
3030 				break;
3031 			}
3032 		}
3033 	}
3034 	*t_afar = pa;
3035 	*t_afsr = acc_afsr;
3036 }
3037 
3038 static void
3039 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3040 {
3041 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3042 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3043 	char linestr[30];
3044 	char *state_str;
3045 	int i;
3046 
3047 	/*
3048 	 * Check the ecache tag to make sure it
3049 	 * is valid. If invalid, a memory dump was
3050 	 * captured instead of a ecache dump.
3051 	 */
3052 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3053 		uchar_t eparity = (uchar_t)
3054 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3055 
3056 		uchar_t estate = (uchar_t)
3057 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3058 
3059 		if (estate == cpu_ec_state_shr)
3060 			state_str = "Shared";
3061 		else if (estate == cpu_ec_state_exl)
3062 			state_str = "Exclusive";
3063 		else if (estate == cpu_ec_state_own)
3064 			state_str = "Owner";
3065 		else if (estate == cpu_ec_state_mod)
3066 			state_str = "Modified";
3067 		else
3068 			state_str = "Invalid";
3069 
3070 		if (spf_flt->flt_ec_lcnt > 1) {
3071 			(void) snprintf(linestr, sizeof (linestr),
3072 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3073 		} else {
3074 			linestr[0] = '\0';
3075 		}
3076 
3077 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3078 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3079 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3080 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3081 		    (uint32_t)ecache_tag, state_str,
3082 		    (uint32_t)eparity, linestr);
3083 	} else {
3084 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3085 		    " E$tag != PA from AFAR; E$line was victimized"
3086 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3087 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3088 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3089 	}
3090 
3091 	/*
3092 	 * Dump out all 8 8-byte ecache data captured
3093 	 * For each 8-byte data captured, we check the
3094 	 * captured afsr's parity syndrome to find out
3095 	 * which 8-byte chunk is bad. For memory dump, the
3096 	 * AFSR values were initialized to 0.
3097 	 */
3098 	for (i = 0; i < 8; i++) {
3099 		ec_data_t *ecdptr;
3100 		uint_t offset;
3101 		ushort_t psynd;
3102 		ushort_t bad;
3103 		uint64_t edp;
3104 
3105 		offset = i << 3;	/* multiply by 8 */
3106 		ecdptr = &spf_flt->flt_ec_data[i];
3107 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3108 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3109 
3110 		/*
3111 		 * For Sabre/Hummingbird, parity synd is captured only
3112 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3113 		 * For spitfire/blackbird, AFSR.PSYND is captured
3114 		 * in 16-byte granularity. [15:8] represent
3115 		 * the upper 8 byte and [7:0] the lower 8 byte.
3116 		 */
3117 		if (isus2i || isus2e || (i & 0x1))
3118 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3119 		else
3120 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3121 
3122 		if (bad && edp) {
3123 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3124 			    " E$Data (0x%02x): 0x%08x.%08x "
3125 			    "*Bad* PSYND=0x%04x", offset,
3126 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3127 			    (uint32_t)ecdptr->ec_d8, psynd);
3128 		} else {
3129 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3130 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3131 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3132 			    (uint32_t)ecdptr->ec_d8);
3133 		}
3134 	}
3135 }
3136 
3137 /*
3138  * Common logging function for all cpu async errors.  This function allows the
3139  * caller to generate a single cmn_err() call that logs the appropriate items
3140  * from the fault structure, and implements our rules for AFT logging levels.
3141  *
3142  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3143  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3144  *	spflt: pointer to spitfire async fault structure
3145  *	logflags: bitflags indicating what to output
3146  *	endstr: a end string to appear at the end of this log
3147  *	fmt: a format string to appear at the beginning of the log
3148  *
3149  * The logflags allows the construction of predetermined output from the spflt
3150  * structure.  The individual data items always appear in a consistent order.
3151  * Note that either or both of the spflt structure pointer and logflags may be
3152  * NULL or zero respectively, indicating that the predetermined output
3153  * substrings are not requested in this log.  The output looks like this:
3154  *
3155  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3156  *	<CPU_SPACE><CPU_ERRID>
3157  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3158  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3159  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3160  *	newline+4spaces<CPU_SYND>
3161  *	newline+4spaces<endstr>
3162  *
3163  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3164  * it is assumed that <endstr> will be the unum string in this case.  The size
3165  * of our intermediate formatting buf[] is based on the worst case of all flags
3166  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3167  * formatting so we don't need additional stack space to format them here.
3168  */
3169 /*PRINTFLIKE6*/
3170 static void
3171 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3172 	const char *endstr, const char *fmt, ...)
3173 {
3174 	struct async_flt *aflt = (struct async_flt *)spflt;
3175 	char buf[400], *p, *q; /* see comments about buf[] size above */
3176 	va_list ap;
3177 	int console_log_flag;
3178 
3179 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3180 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3181 	    (aflt->flt_panic)) {
3182 		console_log_flag = (tagnum < 2) || aft_verbose;
3183 	} else {
3184 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3185 		    (aflt->flt_stat & P_AFSR_CE)) ?
3186 		    ce_verbose_memory : ce_verbose_other;
3187 
3188 		if (!verbose)
3189 			return;
3190 
3191 		console_log_flag = (verbose > 1);
3192 	}
3193 
3194 	if (console_log_flag)
3195 		(void) sprintf(buf, "[AFT%d]", tagnum);
3196 	else
3197 		(void) sprintf(buf, "![AFT%d]", tagnum);
3198 
3199 	p = buf + strlen(buf);	/* current buffer position */
3200 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3201 
3202 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3203 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3204 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3205 		p += strlen(p);
3206 	}
3207 
3208 	/*
3209 	 * Copy the caller's format string verbatim into buf[].  It will be
3210 	 * formatted by the call to vcmn_err() at the end of this function.
3211 	 */
3212 	if (fmt != NULL && p < q) {
3213 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3214 		buf[sizeof (buf) - 1] = '\0';
3215 		p += strlen(p);
3216 	}
3217 
3218 	if (spflt != NULL) {
3219 		if (logflags & CPU_FLTCPU) {
3220 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3221 			    aflt->flt_inst);
3222 			p += strlen(p);
3223 		}
3224 
3225 		if (logflags & CPU_SPACE) {
3226 			if (aflt->flt_status & ECC_D_TRAP)
3227 				(void) snprintf(p, (size_t)(q - p),
3228 				    " Data access");
3229 			else if (aflt->flt_status & ECC_I_TRAP)
3230 				(void) snprintf(p, (size_t)(q - p),
3231 				    " Instruction access");
3232 			p += strlen(p);
3233 		}
3234 
3235 		if (logflags & CPU_TL) {
3236 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3237 			    aflt->flt_tl ? ">0" : "=0");
3238 			p += strlen(p);
3239 		}
3240 
3241 		if (logflags & CPU_ERRID) {
3242 			(void) snprintf(p, (size_t)(q - p),
3243 			    ", errID 0x%08x.%08x",
3244 			    (uint32_t)(aflt->flt_id >> 32),
3245 			    (uint32_t)aflt->flt_id);
3246 			p += strlen(p);
3247 		}
3248 
3249 		if (logflags & CPU_AFSR) {
3250 			(void) snprintf(p, (size_t)(q - p),
3251 			    "\n    AFSR 0x%08b.%08b",
3252 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3253 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3254 			p += strlen(p);
3255 		}
3256 
3257 		if (logflags & CPU_AFAR) {
3258 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3259 			    (uint32_t)(aflt->flt_addr >> 32),
3260 			    (uint32_t)aflt->flt_addr);
3261 			p += strlen(p);
3262 		}
3263 
3264 		if (logflags & CPU_AF_PSYND) {
3265 			ushort_t psynd = (ushort_t)
3266 			    (aflt->flt_stat & P_AFSR_P_SYND);
3267 
3268 			(void) snprintf(p, (size_t)(q - p),
3269 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3270 			    psynd, ecc_psynd_score(psynd));
3271 			p += strlen(p);
3272 		}
3273 
3274 		if (logflags & CPU_AF_ETS) {
3275 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3276 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3277 			p += strlen(p);
3278 		}
3279 
3280 		if (logflags & CPU_FAULTPC) {
3281 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3282 			    (void *)aflt->flt_pc);
3283 			p += strlen(p);
3284 		}
3285 
3286 		if (logflags & CPU_UDBH) {
3287 			(void) snprintf(p, (size_t)(q - p),
3288 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3289 			    spflt->flt_sdbh, UDB_FMTSTR,
3290 			    spflt->flt_sdbh & 0xFF);
3291 			p += strlen(p);
3292 		}
3293 
3294 		if (logflags & CPU_UDBL) {
3295 			(void) snprintf(p, (size_t)(q - p),
3296 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3297 			    spflt->flt_sdbl, UDB_FMTSTR,
3298 			    spflt->flt_sdbl & 0xFF);
3299 			p += strlen(p);
3300 		}
3301 
3302 		if (logflags & CPU_SYND) {
3303 			ushort_t synd = SYND(aflt->flt_synd);
3304 
3305 			(void) snprintf(p, (size_t)(q - p),
3306 			    "\n    %s Syndrome 0x%x Memory Module ",
3307 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3308 			p += strlen(p);
3309 		}
3310 	}
3311 
3312 	if (endstr != NULL) {
3313 		if (!(logflags & CPU_SYND))
3314 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3315 		else
3316 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3317 		p += strlen(p);
3318 	}
3319 
3320 	if (ce_code == CE_CONT && (p < q - 1))
3321 		(void) strcpy(p, "\n"); /* add final \n if needed */
3322 
3323 	va_start(ap, fmt);
3324 	vcmn_err(ce_code, buf, ap);
3325 	va_end(ap);
3326 }
3327 
3328 /*
3329  * Ecache Scrubbing
3330  *
3331  * The basic idea is to prevent lines from sitting in the ecache long enough
3332  * to build up soft errors which can lead to ecache parity errors.
3333  *
3334  * The following rules are observed when flushing the ecache:
3335  *
3336  * 1. When the system is busy, flush bad clean lines
3337  * 2. When the system is idle, flush all clean lines
3338  * 3. When the system is idle, flush good dirty lines
3339  * 4. Never flush bad dirty lines.
3340  *
3341  *	modify	parity	busy   idle
3342  *	----------------------------
3343  *	clean	good		X
3344  * 	clean	bad	X	X
3345  * 	dirty	good		X
3346  *	dirty	bad
3347  *
3348  * Bad or good refers to whether a line has an E$ parity error or not.
3349  * Clean or dirty refers to the state of the modified bit.  We currently
3350  * default the scan rate to 100 (scan 10% of the cache per second).
3351  *
3352  * The following are E$ states and actions.
3353  *
3354  * We encode our state as a 3-bit number, consisting of:
3355  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3356  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3357  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3358  *
3359  * We associate a flushing and a logging action with each state.
3360  *
3361  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3362  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3363  * E$ only, in addition to value being set by ec_flush.
3364  */
3365 
3366 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3367 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3368 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3369 
3370 struct {
3371 	char	ec_flush;		/* whether to flush or not */
3372 	char	ec_log;			/* ecache logging */
3373 	char	ec_log_type;		/* log type info */
3374 } ec_action[] = {	/* states of the E$ line in M P B */
3375 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3376 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3377 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3378 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3379 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3380 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3381 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3382 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3383 };
3384 
3385 /*
3386  * Offsets into the ec_action[] that determines clean_good_busy and
3387  * dirty_good_busy lines.
3388  */
3389 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3390 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3391 
3392 /*
3393  * We are flushing lines which are Clean_Good_Busy and also the lines
3394  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3395  */
3396 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3397 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3398 
3399 #define	ECACHE_STATE_MODIFIED	0x4
3400 #define	ECACHE_STATE_PARITY	0x2
3401 #define	ECACHE_STATE_BUSY	0x1
3402 
3403 /*
3404  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3405  */
3406 int ecache_calls_a_sec_mirrored = 1;
3407 int ecache_lines_per_call_mirrored = 1;
3408 
3409 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3410 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3411 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3412 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3413 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3414 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3415 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3416 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3417 
3418 volatile int ec_timeout_calls = 1;	/* timeout calls */
3419 
3420 /*
3421  * Interrupt number and pil for ecache scrubber cross-trap calls.
3422  */
3423 static uint64_t ecache_scrub_inum;
3424 uint_t ecache_scrub_pil = PIL_9;
3425 
3426 /*
3427  * Kstats for the E$ scrubber.
3428  */
3429 typedef struct ecache_kstat {
3430 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3431 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3432 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3433 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3434 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3435 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3436 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3437 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3438 	kstat_named_t invalid_lines;		/* # of invalid lines */
3439 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3440 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3441 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3442 } ecache_kstat_t;
3443 
3444 static ecache_kstat_t ec_kstat_template = {
3445 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3446 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3447 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3448 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3449 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3450 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3451 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3452 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3453 	{ "invalid_lines", KSTAT_DATA_ULONG },
3454 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3455 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3456 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3457 };
3458 
3459 struct kmem_cache *sf_private_cache;
3460 
3461 /*
3462  * Called periodically on each CPU to scan the ecache once a sec.
3463  * adjusting the ecache line index appropriately
3464  */
3465 void
3466 scrub_ecache_line()
3467 {
3468 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3469 	int cpuid = CPU->cpu_id;
3470 	uint32_t index = ssmp->ecache_flush_index;
3471 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3472 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3473 	int nlines = ssmp->ecache_nlines;
3474 	uint32_t ec_set_size = ec_size / ecache_associativity;
3475 	int ec_mirror = ssmp->ecache_mirror;
3476 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3477 
3478 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3479 	int mpb;		/* encode Modified, Parity, Busy for action */
3480 	uchar_t state;
3481 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3482 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3483 	ec_data_t ec_data[8];
3484 	kstat_named_t *ec_knp;
3485 
3486 	switch (ec_mirror) {
3487 		default:
3488 		case ECACHE_CPU_NON_MIRROR:
3489 			/*
3490 			 * The E$ scan rate is expressed in units of tenths of
3491 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3492 			 * whole cache is scanned every second.
3493 			 */
3494 			scan_lines = (nlines * ecache_scan_rate) /
3495 					(1000 * ecache_calls_a_sec);
3496 			if (!(ssmp->ecache_busy)) {
3497 				if (ecache_idle_factor > 0) {
3498 					scan_lines *= ecache_idle_factor;
3499 				}
3500 			} else {
3501 				flush_clean_busy = (scan_lines *
3502 					ecache_flush_clean_good_busy) / 100;
3503 				flush_dirty_busy = (scan_lines *
3504 					ecache_flush_dirty_good_busy) / 100;
3505 			}
3506 
3507 			ec_timeout_calls = (ecache_calls_a_sec ?
3508 						ecache_calls_a_sec : 1);
3509 			break;
3510 
3511 		case ECACHE_CPU_MIRROR:
3512 			scan_lines = ecache_lines_per_call_mirrored;
3513 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3514 					ecache_calls_a_sec_mirrored : 1);
3515 			break;
3516 	}
3517 
3518 	/*
3519 	 * The ecache scrubber algorithm operates by reading and
3520 	 * decoding the E$ tag to determine whether the corresponding E$ line
3521 	 * can be scrubbed. There is a implicit assumption in the scrubber
3522 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3523 	 * flawed since the E$ tag may also be corrupted and have parity errors
3524 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3525 	 * before scrubbing. When a parity error is detected in the E$ tag,
3526 	 * it is possible to recover and scrub the tag under certain conditions
3527 	 * so that a ETP error condition can be avoided.
3528 	 */
3529 
3530 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3531 		/*
3532 		 * We get the old-AFSR before clearing the AFSR sticky bits
3533 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3534 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3535 		 */
3536 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3537 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3538 				cpu_ec_state_shift);
3539 
3540 		/*
3541 		 * ETP is set try to scrub the ecache tag.
3542 		 */
3543 		if (nafsr & P_AFSR_ETP) {
3544 			ecache_scrub_tag_err(nafsr, state, index);
3545 		} else if (state & cpu_ec_state_valid) {
3546 			/*
3547 			 * ETP is not set, E$ tag is valid.
3548 			 * Proceed with the E$ scrubbing.
3549 			 */
3550 			if (state & cpu_ec_state_dirty)
3551 				mpb |= ECACHE_STATE_MODIFIED;
3552 
3553 			tafsr = check_ecache_line(index, acc_afsr);
3554 
3555 			if (tafsr & P_AFSR_EDP) {
3556 				mpb |= ECACHE_STATE_PARITY;
3557 
3558 				if (ecache_scrub_verbose ||
3559 							ecache_scrub_panic) {
3560 					get_ecache_dtag(P2ALIGN(index, 64),
3561 						(uint64_t *)&ec_data[0],
3562 						&ec_tag, &oafsr, acc_afsr);
3563 				}
3564 			}
3565 
3566 			if (ssmp->ecache_busy)
3567 				mpb |= ECACHE_STATE_BUSY;
3568 
3569 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3570 			ec_knp->value.ul++;
3571 
3572 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3573 				cpu_ec_tag_shift) | (index % ec_set_size);
3574 
3575 			/*
3576 			 * We flush the E$ lines depending on the ec_flush,
3577 			 * we additionally flush clean_good_busy and
3578 			 * dirty_good_busy lines for mirrored E$.
3579 			 */
3580 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3581 				flushecacheline(paddr, ec_size);
3582 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3583 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3584 					flushecacheline(paddr, ec_size);
3585 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3586 				softcall(ecache_page_retire, (void *)paddr);
3587 			}
3588 
3589 			/*
3590 			 * Conditionally flush both the clean_good and
3591 			 * dirty_good lines when busy.
3592 			 */
3593 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3594 				flush_clean_busy--;
3595 				flushecacheline(paddr, ec_size);
3596 				ec_ksp->clean_good_busy_flush.value.ul++;
3597 			} else if (DGB(mpb, ec_mirror) &&
3598 						(flush_dirty_busy > 0)) {
3599 				flush_dirty_busy--;
3600 				flushecacheline(paddr, ec_size);
3601 				ec_ksp->dirty_good_busy_flush.value.ul++;
3602 			}
3603 
3604 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3605 						ecache_scrub_panic)) {
3606 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3607 						tafsr);
3608 			}
3609 
3610 		} else {
3611 			ec_ksp->invalid_lines.value.ul++;
3612 		}
3613 
3614 		if ((index += ec_linesize) >= ec_size)
3615 			index = 0;
3616 
3617 	}
3618 
3619 	/*
3620 	 * set the ecache scrub index for the next time around
3621 	 */
3622 	ssmp->ecache_flush_index = index;
3623 
3624 	if (*acc_afsr & P_AFSR_CP) {
3625 		uint64_t ret_afsr;
3626 
3627 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3628 		if ((ret_afsr & P_AFSR_CP) == 0)
3629 			*acc_afsr = 0;
3630 	}
3631 }
3632 
3633 /*
3634  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3635  * we decrement the outstanding request count to zero.
3636  */
3637 
3638 /*ARGSUSED*/
3639 uint_t
3640 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3641 {
3642 	int i;
3643 	int outstanding;
3644 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3645 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3646 
3647 	do {
3648 		outstanding = *countp;
3649 		ASSERT(outstanding > 0);
3650 		for (i = 0; i < outstanding; i++)
3651 			scrub_ecache_line();
3652 	} while (atomic_add_32_nv(countp, -outstanding));
3653 
3654 	return (DDI_INTR_CLAIMED);
3655 }
3656 
3657 /*
3658  * force each cpu to perform an ecache scrub, called from a timeout
3659  */
3660 extern xcfunc_t ecache_scrubreq_tl1;
3661 
3662 void
3663 do_scrub_ecache_line(void)
3664 {
3665 	long delta;
3666 
3667 	if (ecache_calls_a_sec > hz)
3668 		ecache_calls_a_sec = hz;
3669 	else if (ecache_calls_a_sec <= 0)
3670 	    ecache_calls_a_sec = 1;
3671 
3672 	if (ecache_calls_a_sec_mirrored > hz)
3673 		ecache_calls_a_sec_mirrored = hz;
3674 	else if (ecache_calls_a_sec_mirrored <= 0)
3675 	    ecache_calls_a_sec_mirrored = 1;
3676 
3677 	if (ecache_scrub_enable) {
3678 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3679 		delta = hz / ec_timeout_calls;
3680 	} else {
3681 		delta = hz;
3682 	}
3683 
3684 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3685 		delta);
3686 }
3687 
3688 /*
3689  * initialization for ecache scrubbing
3690  * This routine is called AFTER all cpus have had cpu_init_private called
3691  * to initialize their private data areas.
3692  */
3693 void
3694 cpu_init_cache_scrub(void)
3695 {
3696 	if (ecache_calls_a_sec > hz) {
3697 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3698 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3699 		ecache_calls_a_sec = hz;
3700 	}
3701 
3702 	/*
3703 	 * Register softint for ecache scrubbing.
3704 	 */
3705 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3706 	    scrub_ecache_line_intr, NULL, SOFTINT_MT);
3707 
3708 	/*
3709 	 * kick off the scrubbing using realtime timeout
3710 	 */
3711 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3712 	    hz / ecache_calls_a_sec);
3713 }
3714 
3715 /*
3716  * Unset the busy flag for this cpu.
3717  */
3718 void
3719 cpu_idle_ecache_scrub(struct cpu *cp)
3720 {
3721 	if (CPU_PRIVATE(cp) != NULL) {
3722 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3723 							sfpr_scrub_misc);
3724 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3725 	}
3726 }
3727 
3728 /*
3729  * Set the busy flag for this cpu.
3730  */
3731 void
3732 cpu_busy_ecache_scrub(struct cpu *cp)
3733 {
3734 	if (CPU_PRIVATE(cp) != NULL) {
3735 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3736 							sfpr_scrub_misc);
3737 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3738 	}
3739 }
3740 
3741 /*
3742  * initialize the ecache scrubber data structures
3743  * The global entry point cpu_init_private replaces this entry point.
3744  *
3745  */
3746 static void
3747 cpu_init_ecache_scrub_dr(struct cpu *cp)
3748 {
3749 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3750 	int cpuid = cp->cpu_id;
3751 
3752 	/*
3753 	 * intialize bookkeeping for cache scrubbing
3754 	 */
3755 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3756 
3757 	ssmp->ecache_flush_index = 0;
3758 
3759 	ssmp->ecache_nlines =
3760 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3761 
3762 	/*
3763 	 * Determine whether we are running on mirrored SRAM
3764 	 */
3765 
3766 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3767 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3768 	else
3769 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3770 
3771 	cpu_busy_ecache_scrub(cp);
3772 
3773 	/*
3774 	 * initialize the kstats
3775 	 */
3776 	ecache_kstat_init(cp);
3777 }
3778 
3779 /*
3780  * uninitialize the ecache scrubber data structures
3781  * The global entry point cpu_uninit_private replaces this entry point.
3782  */
3783 static void
3784 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3785 {
3786 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3787 
3788 	if (ssmp->ecache_ksp != NULL) {
3789 		kstat_delete(ssmp->ecache_ksp);
3790 		ssmp->ecache_ksp = NULL;
3791 	}
3792 
3793 	/*
3794 	 * un-initialize bookkeeping for cache scrubbing
3795 	 */
3796 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3797 
3798 	cpu_idle_ecache_scrub(cp);
3799 }
3800 
3801 struct kmem_cache *sf_private_cache;
3802 
3803 /*
3804  * Cpu private initialization.  This includes allocating the cpu_private
3805  * data structure, initializing it, and initializing the scrubber for this
3806  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3807  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3808  * We use kmem_cache_create for the spitfire private data structure because it
3809  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3810  */
3811 void
3812 cpu_init_private(struct cpu *cp)
3813 {
3814 	spitfire_private_t *sfprp;
3815 
3816 	ASSERT(CPU_PRIVATE(cp) == NULL);
3817 
3818 	/*
3819 	 * If the sf_private_cache has not been created, create it.
3820 	 */
3821 	if (sf_private_cache == NULL) {
3822 		sf_private_cache = kmem_cache_create("sf_private_cache",
3823 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3824 			NULL, NULL, NULL, NULL, 0);
3825 		ASSERT(sf_private_cache);
3826 	}
3827 
3828 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3829 
3830 	bzero(sfprp, sizeof (spitfire_private_t));
3831 
3832 	cpu_init_ecache_scrub_dr(cp);
3833 }
3834 
3835 /*
3836  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3837  * deallocate the scrubber data structures and cpu_private data structure.
3838  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3839  * the scrubber for the specified cpu.
3840  */
3841 void
3842 cpu_uninit_private(struct cpu *cp)
3843 {
3844 	ASSERT(CPU_PRIVATE(cp));
3845 
3846 	cpu_uninit_ecache_scrub_dr(cp);
3847 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3848 	CPU_PRIVATE(cp) = NULL;
3849 }
3850 
3851 /*
3852  * initialize the ecache kstats for each cpu
3853  */
3854 static void
3855 ecache_kstat_init(struct cpu *cp)
3856 {
3857 	struct kstat *ksp;
3858 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3859 
3860 	ASSERT(ssmp != NULL);
3861 
3862 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3863 	    KSTAT_TYPE_NAMED,
3864 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3865 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3866 		ssmp->ecache_ksp = NULL;
3867 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3868 		return;
3869 	}
3870 
3871 	ssmp->ecache_ksp = ksp;
3872 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3873 	kstat_install(ksp);
3874 }
3875 
3876 /*
3877  * log the bad ecache information
3878  */
3879 static void
3880 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3881 		uint64_t afsr)
3882 {
3883 	spitf_async_flt spf_flt;
3884 	struct async_flt *aflt;
3885 	int i;
3886 	char *class;
3887 
3888 	bzero(&spf_flt, sizeof (spitf_async_flt));
3889 	aflt = &spf_flt.cmn_asyncflt;
3890 
3891 	for (i = 0; i < 8; i++) {
3892 		spf_flt.flt_ec_data[i] = ec_data[i];
3893 	}
3894 
3895 	spf_flt.flt_ec_tag = ec_tag;
3896 
3897 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3898 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3899 	} else spf_flt.flt_type = (ushort_t)mpb;
3900 
3901 	aflt->flt_inst = CPU->cpu_id;
3902 	aflt->flt_class = CPU_FAULT;
3903 	aflt->flt_id = gethrtime_waitfree();
3904 	aflt->flt_addr = paddr;
3905 	aflt->flt_stat = afsr;
3906 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3907 
3908 	switch (mpb) {
3909 	case CPU_ECACHE_TAG_ERR:
3910 	case CPU_ECACHE_ADDR_PAR_ERR:
3911 	case CPU_ECACHE_ETP_ETS_ERR:
3912 	case CPU_ECACHE_STATE_ERR:
3913 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3914 		break;
3915 	default:
3916 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3917 		break;
3918 	}
3919 
3920 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3921 	    ue_queue, aflt->flt_panic);
3922 
3923 	if (aflt->flt_panic)
3924 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3925 					"line detected");
3926 }
3927 
3928 /*
3929  * Process an ecache error that occured during the E$ scrubbing.
3930  * We do the ecache scan to find the bad line, flush the bad line
3931  * and start the memscrubber to find any UE (in memory or in another cache)
3932  */
3933 static uint64_t
3934 ecache_scrub_misc_err(int type, uint64_t afsr)
3935 {
3936 	spitf_async_flt spf_flt;
3937 	struct async_flt *aflt;
3938 	uint64_t oafsr;
3939 
3940 	bzero(&spf_flt, sizeof (spitf_async_flt));
3941 	aflt = &spf_flt.cmn_asyncflt;
3942 
3943 	/*
3944 	 * Scan each line in the cache to look for the one
3945 	 * with bad parity
3946 	 */
3947 	aflt->flt_addr = AFLT_INV_ADDR;
3948 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3949 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3950 
3951 	if (oafsr & P_AFSR_CP) {
3952 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3953 		*cp_afsr |= oafsr;
3954 	}
3955 
3956 	/*
3957 	 * If we found a bad PA, update the state to indicate if it is
3958 	 * memory or I/O space.
3959 	 */
3960 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3961 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3962 			MMU_PAGESHIFT)) ? 1 : 0;
3963 	}
3964 
3965 	spf_flt.flt_type = (ushort_t)type;
3966 
3967 	aflt->flt_inst = CPU->cpu_id;
3968 	aflt->flt_class = CPU_FAULT;
3969 	aflt->flt_id = gethrtime_waitfree();
3970 	aflt->flt_status = afsr;
3971 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3972 
3973 	/*
3974 	 * We have the bad line, flush that line and start
3975 	 * the memscrubber.
3976 	 */
3977 	if (spf_flt.flt_ec_lcnt > 0) {
3978 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3979 			cpunodes[CPU->cpu_id].ecache_size);
3980 		read_all_memscrub = 1;
3981 		memscrub_run();
3982 	}
3983 
3984 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3985 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3986 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3987 
3988 	return (oafsr);
3989 }
3990 
3991 static void
3992 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
3993 {
3994 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
3995 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3996 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3997 	uint64_t ec_tag, paddr, oafsr;
3998 	ec_data_t ec_data[8];
3999 	int cpuid = CPU->cpu_id;
4000 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4001 						ecache_associativity;
4002 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4003 
4004 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4005 			&oafsr, cpu_afsr);
4006 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4007 						(index % ec_set_size);
4008 
4009 	/*
4010 	 * E$ tag state has good parity
4011 	 */
4012 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4013 		if (afsr_ets & cpu_ec_parity) {
4014 			/*
4015 			 * E$ tag state bits indicate the line is clean,
4016 			 * invalidate the E$ tag and continue.
4017 			 */
4018 			if (!(state & cpu_ec_state_dirty)) {
4019 				/*
4020 				 * Zero the tag and mark the state invalid
4021 				 * with good parity for the tag.
4022 				 */
4023 				if (isus2i || isus2e)
4024 					write_hb_ec_tag_parity(index);
4025 				else
4026 					write_ec_tag_parity(index);
4027 
4028 				/* Sync with the dual tag */
4029 				flushecacheline(0,
4030 					cpunodes[CPU->cpu_id].ecache_size);
4031 				ec_ksp->tags_cleared.value.ul++;
4032 				ecache_scrub_log(ec_data, ec_tag, paddr,
4033 					CPU_ECACHE_TAG_ERR, afsr);
4034 				return;
4035 			} else {
4036 				ecache_scrub_log(ec_data, ec_tag, paddr,
4037 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4038 				cmn_err(CE_PANIC, " E$ tag address has bad"
4039 							" parity");
4040 			}
4041 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4042 			/*
4043 			 * ETS is zero but ETP is set
4044 			 */
4045 			ecache_scrub_log(ec_data, ec_tag, paddr,
4046 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4047 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4048 				" AFSR.ETS is zero");
4049 		}
4050 	} else {
4051 		/*
4052 		 * E$ tag state bit has a bad parity
4053 		 */
4054 		ecache_scrub_log(ec_data, ec_tag, paddr,
4055 				CPU_ECACHE_STATE_ERR, afsr);
4056 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4057 	}
4058 }
4059 
4060 static void
4061 ecache_page_retire(void *arg)
4062 {
4063 	uint64_t paddr = (uint64_t)arg;
4064 	(void) page_retire(paddr, PR_UE);
4065 }
4066 
4067 void
4068 sticksync_slave(void)
4069 {}
4070 
4071 void
4072 sticksync_master(void)
4073 {}
4074 
4075 /*ARGSUSED*/
4076 void
4077 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4078 {}
4079 
4080 void
4081 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4082 {
4083 	int status;
4084 	ddi_fm_error_t de;
4085 
4086 	bzero(&de, sizeof (ddi_fm_error_t));
4087 
4088 	de.fme_version = DDI_FME_VERSION;
4089 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4090 	    FM_ENA_FMT1);
4091 	de.fme_flag = expected;
4092 	de.fme_bus_specific = (void *)aflt->flt_addr;
4093 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4094 
4095 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4096 		aflt->flt_panic = 1;
4097 }
4098 
4099 /*ARGSUSED*/
4100 void
4101 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4102     errorq_t *eqp, uint_t flag)
4103 {
4104 	struct async_flt *aflt = (struct async_flt *)payload;
4105 
4106 	aflt->flt_erpt_class = error_class;
4107 	errorq_dispatch(eqp, payload, payload_sz, flag);
4108 }
4109 
4110 #define	MAX_SIMM	8
4111 
4112 struct ce_info {
4113 	char    name[UNUM_NAMLEN];
4114 	uint64_t intermittent_total;
4115 	uint64_t persistent_total;
4116 	uint64_t sticky_total;
4117 	unsigned short leaky_bucket_cnt;
4118 };
4119 
4120 /*
4121  * Separately-defined structure for use in reporting the ce_info
4122  * to SunVTS without exposing the internal layout and implementation
4123  * of struct ce_info.
4124  */
4125 static struct ecc_error_info ecc_error_info_data = {
4126 	{ "version", KSTAT_DATA_UINT32 },
4127 	{ "maxcount", KSTAT_DATA_UINT32 },
4128 	{ "count", KSTAT_DATA_UINT32 }
4129 };
4130 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4131     sizeof (struct kstat_named);
4132 
4133 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4134 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4135 #endif
4136 
4137 struct ce_info  *mem_ce_simm = NULL;
4138 size_t mem_ce_simm_size = 0;
4139 
4140 /*
4141  * Default values for the number of CE's allowed per interval.
4142  * Interval is defined in minutes
4143  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4144  */
4145 #define	SOFTERR_LIMIT_DEFAULT		2
4146 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4147 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4148 #define	TIMEOUT_NONE			((timeout_id_t)0)
4149 #define	TIMEOUT_SET			((timeout_id_t)1)
4150 
4151 /*
4152  * timeout identifer for leaky_bucket
4153  */
4154 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4155 
4156 /*
4157  * Tunables for maximum number of allowed CE's in a given time
4158  */
4159 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4160 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4161 
4162 void
4163 cpu_mp_init(void)
4164 {
4165 	size_t size = cpu_aflt_size();
4166 	size_t i;
4167 	kstat_t *ksp;
4168 
4169 	/*
4170 	 * Initialize the CE error handling buffers.
4171 	 */
4172 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4173 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4174 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4175 
4176 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4177 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4178 	if (ksp != NULL) {
4179 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4180 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4181 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4182 		ecc_error_info_data.count.value.ui32 = 0;
4183 		kstat_install(ksp);
4184 	}
4185 
4186 	for (i = 0; i < mem_ce_simm_size; i++) {
4187 		struct kstat_ecc_mm_info *kceip;
4188 
4189 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4190 		    KM_SLEEP);
4191 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4192 		    KSTAT_TYPE_NAMED,
4193 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4194 		    KSTAT_FLAG_VIRTUAL);
4195 		if (ksp != NULL) {
4196 			/*
4197 			 * Re-declare ks_data_size to include room for the
4198 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4199 			 * set.
4200 			 */
4201 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4202 			    KSTAT_CE_UNUM_NAMLEN;
4203 			ksp->ks_data = kceip;
4204 			kstat_named_init(&kceip->name,
4205 			    "name", KSTAT_DATA_STRING);
4206 			kstat_named_init(&kceip->intermittent_total,
4207 			    "intermittent_total", KSTAT_DATA_UINT64);
4208 			kstat_named_init(&kceip->persistent_total,
4209 			    "persistent_total", KSTAT_DATA_UINT64);
4210 			kstat_named_init(&kceip->sticky_total,
4211 			    "sticky_total", KSTAT_DATA_UINT64);
4212 			/*
4213 			 * Use the default snapshot routine as it knows how to
4214 			 * deal with named kstats with long strings.
4215 			 */
4216 			ksp->ks_update = ecc_kstat_update;
4217 			kstat_install(ksp);
4218 		} else {
4219 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4220 		}
4221 	}
4222 }
4223 
4224 /*ARGSUSED*/
4225 static void
4226 leaky_bucket_timeout(void *arg)
4227 {
4228 	int i;
4229 	struct ce_info *psimm = mem_ce_simm;
4230 
4231 	for (i = 0; i < mem_ce_simm_size; i++) {
4232 		if (psimm[i].leaky_bucket_cnt > 0)
4233 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4234 	}
4235 	add_leaky_bucket_timeout();
4236 }
4237 
4238 static void
4239 add_leaky_bucket_timeout(void)
4240 {
4241 	long timeout_in_microsecs;
4242 
4243 	/*
4244 	 * create timeout for next leak.
4245 	 *
4246 	 * The timeout interval is calculated as follows
4247 	 *
4248 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4249 	 *
4250 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4251 	 * in a minute), then multiply this by MICROSEC to get the interval
4252 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4253 	 * the timeout interval is accurate to within a few microseconds.
4254 	 */
4255 
4256 	if (ecc_softerr_limit <= 0)
4257 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4258 	if (ecc_softerr_interval <= 0)
4259 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4260 
4261 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4262 	    ecc_softerr_limit;
4263 
4264 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4265 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4266 
4267 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4268 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4269 }
4270 
4271 /*
4272  * Legacy Correctable ECC Error Hash
4273  *
4274  * All of the code below this comment is used to implement a legacy array
4275  * which counted intermittent, persistent, and sticky CE errors by unum,
4276  * and then was later extended to publish the data as a kstat for SunVTS.
4277  * All of this code is replaced by FMA, and remains here until such time
4278  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4279  *
4280  * Errors are saved in three buckets per-unum:
4281  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4282  *     This could represent a problem, and is immediately printed out.
4283  * (2) persistent - was successfully scrubbed
4284  *     These errors use the leaky bucket algorithm to determine
4285  *     if there is a serious problem.
4286  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4287  *     and does not necessarily indicate any problem with the dimm itself,
4288  *     is critical information for debugging new hardware.
4289  *     Because we do not know if it came from the dimm, it would be
4290  *     inappropriate to include these in the leaky bucket counts.
4291  *
4292  * If the E$ line was modified before the scrub operation began, then the
4293  * displacement flush at the beginning of scrubphys() will cause the modified
4294  * line to be written out, which will clean up the CE.  Then, any subsequent
4295  * read will not cause an error, which will cause persistent errors to be
4296  * identified as intermittent.
4297  *
4298  * If a DIMM is going bad, it will produce true persistents as well as
4299  * false intermittents, so these intermittents can be safely ignored.
4300  *
4301  * If the error count is excessive for a DIMM, this function will return
4302  * PR_MCE, and the CPU module may then decide to remove that page from use.
4303  */
4304 static int
4305 ce_count_unum(int status, int len, char *unum)
4306 {
4307 	int i;
4308 	struct ce_info *psimm = mem_ce_simm;
4309 	int page_status = PR_OK;
4310 
4311 	ASSERT(psimm != NULL);
4312 
4313 	if (len <= 0 ||
4314 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4315 		return (page_status);
4316 
4317 	/*
4318 	 * Initialize the leaky_bucket timeout
4319 	 */
4320 	if (casptr(&leaky_bucket_timeout_id,
4321 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4322 		add_leaky_bucket_timeout();
4323 
4324 	for (i = 0; i < mem_ce_simm_size; i++) {
4325 		if (psimm[i].name[0] == '\0') {
4326 			/*
4327 			 * Hit the end of the valid entries, add
4328 			 * a new one.
4329 			 */
4330 			(void) strncpy(psimm[i].name, unum, len);
4331 			if (status & ECC_STICKY) {
4332 				/*
4333 				 * Sticky - the leaky bucket is used to track
4334 				 * soft errors.  Since a sticky error is a
4335 				 * hard error and likely to be retired soon,
4336 				 * we do not count it in the leaky bucket.
4337 				 */
4338 				psimm[i].leaky_bucket_cnt = 0;
4339 				psimm[i].intermittent_total = 0;
4340 				psimm[i].persistent_total = 0;
4341 				psimm[i].sticky_total = 1;
4342 				cmn_err(CE_WARN,
4343 				    "[AFT0] Sticky Softerror encountered "
4344 				    "on Memory Module %s\n", unum);
4345 				page_status = PR_MCE;
4346 			} else if (status & ECC_PERSISTENT) {
4347 				psimm[i].leaky_bucket_cnt = 1;
4348 				psimm[i].intermittent_total = 0;
4349 				psimm[i].persistent_total = 1;
4350 				psimm[i].sticky_total = 0;
4351 			} else {
4352 				/*
4353 				 * Intermittent - Because the scrub operation
4354 				 * cannot find the error in the DIMM, we will
4355 				 * not count these in the leaky bucket
4356 				 */
4357 				psimm[i].leaky_bucket_cnt = 0;
4358 				psimm[i].intermittent_total = 1;
4359 				psimm[i].persistent_total = 0;
4360 				psimm[i].sticky_total = 0;
4361 			}
4362 			ecc_error_info_data.count.value.ui32++;
4363 			break;
4364 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4365 			/*
4366 			 * Found an existing entry for the current
4367 			 * memory module, adjust the counts.
4368 			 */
4369 			if (status & ECC_STICKY) {
4370 				psimm[i].sticky_total++;
4371 				cmn_err(CE_WARN,
4372 				    "[AFT0] Sticky Softerror encountered "
4373 				    "on Memory Module %s\n", unum);
4374 				page_status = PR_MCE;
4375 			} else if (status & ECC_PERSISTENT) {
4376 				int new_value;
4377 
4378 				new_value = atomic_add_16_nv(
4379 				    &psimm[i].leaky_bucket_cnt, 1);
4380 				psimm[i].persistent_total++;
4381 				if (new_value > ecc_softerr_limit) {
4382 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4383 					    " soft errors from Memory Module"
4384 					    " %s exceed threshold (N=%d,"
4385 					    " T=%dh:%02dm) triggering page"
4386 					    " retire", new_value, unum,
4387 					    ecc_softerr_limit,
4388 					    ecc_softerr_interval / 60,
4389 					    ecc_softerr_interval % 60);
4390 					atomic_add_16(
4391 					    &psimm[i].leaky_bucket_cnt, -1);
4392 					page_status = PR_MCE;
4393 				}
4394 			} else { /* Intermittent */
4395 				psimm[i].intermittent_total++;
4396 			}
4397 			break;
4398 		}
4399 	}
4400 
4401 	if (i >= mem_ce_simm_size)
4402 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4403 		    "space.\n");
4404 
4405 	return (page_status);
4406 }
4407 
4408 /*
4409  * Function to support counting of IO detected CEs.
4410  */
4411 void
4412 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4413 {
4414 	int err;
4415 
4416 	err = ce_count_unum(ecc->flt_status, len, unum);
4417 	if (err != PR_OK && automatic_page_removal) {
4418 		(void) page_retire(ecc->flt_addr, err);
4419 	}
4420 }
4421 
4422 static int
4423 ecc_kstat_update(kstat_t *ksp, int rw)
4424 {
4425 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4426 	struct ce_info *ceip = mem_ce_simm;
4427 	int i = ksp->ks_instance;
4428 
4429 	if (rw == KSTAT_WRITE)
4430 		return (EACCES);
4431 
4432 	ASSERT(ksp->ks_data != NULL);
4433 	ASSERT(i < mem_ce_simm_size && i >= 0);
4434 
4435 	/*
4436 	 * Since we're not using locks, make sure that we don't get partial
4437 	 * data. The name is always copied before the counters are incremented
4438 	 * so only do this update routine if at least one of the counters is
4439 	 * non-zero, which ensures that ce_count_unum() is done, and the
4440 	 * string is fully copied.
4441 	 */
4442 	if (ceip[i].intermittent_total == 0 &&
4443 	    ceip[i].persistent_total == 0 &&
4444 	    ceip[i].sticky_total == 0) {
4445 		/*
4446 		 * Uninitialized or partially initialized. Ignore.
4447 		 * The ks_data buffer was allocated via kmem_zalloc,
4448 		 * so no need to bzero it.
4449 		 */
4450 		return (0);
4451 	}
4452 
4453 	kstat_named_setstr(&kceip->name, ceip[i].name);
4454 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4455 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4456 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4457 
4458 	return (0);
4459 }
4460 
4461 #define	VIS_BLOCKSIZE		64
4462 
4463 int
4464 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4465 {
4466 	int ret, watched;
4467 
4468 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4469 	ret = dtrace_blksuword32(addr, data, 0);
4470 	if (watched)
4471 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4472 
4473 	return (ret);
4474 }
4475 
4476 /*ARGSUSED*/
4477 void
4478 cpu_faulted_enter(struct cpu *cp)
4479 {
4480 }
4481 
4482 /*ARGSUSED*/
4483 void
4484 cpu_faulted_exit(struct cpu *cp)
4485 {
4486 }
4487 
4488 /*ARGSUSED*/
4489 void
4490 mmu_init_kernel_pgsz(struct hat *hat)
4491 {
4492 }
4493 
4494 size_t
4495 mmu_get_kernel_lpsize(size_t lpsize)
4496 {
4497 	uint_t tte;
4498 
4499 	if (lpsize == 0) {
4500 		/* no setting for segkmem_lpsize in /etc/system: use default */
4501 		return (MMU_PAGESIZE4M);
4502 	}
4503 
4504 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4505 		if (lpsize == TTEBYTES(tte))
4506 			return (lpsize);
4507 	}
4508 
4509 	return (TTEBYTES(TTE8K));
4510 }
4511