xref: /illumos-gate/usr/src/uts/sun4u/cpu/spitfire.c (revision 002c70ff32f5df6f93c15f88d351ce26443e6ee7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machparam.h>
32 #include <sys/machsystm.h>
33 #include <sys/cpu.h>
34 #include <sys/elf_SPARC.h>
35 #include <vm/hat_sfmmu.h>
36 #include <vm/page.h>
37 #include <vm/vm_dep.h>
38 #include <sys/cpuvar.h>
39 #include <sys/spitregs.h>
40 #include <sys/async.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/dditypes.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpu_module.h>
46 #include <sys/prom_debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sysmacros.h>
50 #include <sys/intreg.h>
51 #include <sys/machtrap.h>
52 #include <sys/ontrap.h>
53 #include <sys/ivintr.h>
54 #include <sys/atomic.h>
55 #include <sys/panic.h>
56 #include <sys/ndifm.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/fm/util.h>
59 #include <sys/fm/cpu/UltraSPARC-II.h>
60 #include <sys/ddi.h>
61 #include <sys/ecc_kstat.h>
62 #include <sys/watchpoint.h>
63 #include <sys/dtrace.h>
64 #include <sys/errclassify.h>
65 
66 uint_t	cpu_impl_dual_pgsz = 0;
67 
68 /*
69  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71  */
72 typedef struct sf_ec_data_elm {
73 	uint64_t ec_d8;
74 	uint64_t ec_afsr;
75 } ec_data_t;
76 
77 /*
78  * Define spitfire (Ultra I/II) specific asynchronous error structure
79  */
80 typedef struct spitfire_async_flt {
81 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82 	ushort_t flt_type;		/* types of faults - cpu specific */
83 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84 	uint64_t flt_ec_tag;		/* E$ tag info */
85 	int flt_ec_lcnt;		/* number of bad E$ lines */
86 	ushort_t flt_sdbh;		/* UDBH reg */
87 	ushort_t flt_sdbl;		/* UDBL reg */
88 } spitf_async_flt;
89 
90 /*
91  * Prototypes for support routines in spitfire_asm.s:
92  */
93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94 extern uint64_t get_lsu(void);
95 extern void set_lsu(uint64_t ncc);
96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97 				uint64_t *oafsr, uint64_t *acc_afsr);
98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100 				uint64_t *acc_afsr);
101 extern uint64_t read_and_clear_afsr();
102 extern void write_ec_tag_parity(uint32_t id);
103 extern void write_hb_ec_tag_parity(uint32_t id);
104 
105 /*
106  * Spitfire module routines:
107  */
108 static void cpu_async_log_err(void *flt);
109 /*PRINTFLIKE6*/
110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111     uint_t logflags, const char *endstr, const char *fmt, ...);
112 
113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116 
117 static void log_ce_err(struct async_flt *aflt, char *unum);
118 static void log_ue_err(struct async_flt *aflt, char *unum);
119 static void check_misc_err(spitf_async_flt *spf_flt);
120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121 static int check_ecc(struct async_flt *aflt);
122 static uint_t get_cpu_status(uint64_t arg);
123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125 		int *m, uint64_t *afsr);
126 static void ecache_kstat_init(struct cpu *cp);
127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128 		uint64_t paddr, int mpb, uint64_t);
129 static uint64_t ecache_scrub_misc_err(int, uint64_t);
130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131 static void ecache_page_retire(void *);
132 static int ecc_kstat_update(kstat_t *ksp, int rw);
133 static int ce_count_unum(int status, int len, char *unum);
134 static void add_leaky_bucket_timeout(void);
135 static int synd_to_synd_code(int synd_status, ushort_t synd);
136 
137 extern uint_t read_all_memscrub;
138 extern void memscrub_run(void);
139 
140 static uchar_t	isus2i;			/* set if sabre */
141 static uchar_t	isus2e;			/* set if hummingbird */
142 
143 /*
144  * Default ecache mask and shift settings for Spitfire.  If we detect a
145  * different CPU implementation, we will modify these values at boot time.
146  */
147 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157 
158 /*
159  * Default ecache state bits for Spitfire.  These individual bits indicate if
160  * the given line is in any of the valid or modified states, respectively.
161  * Again, we modify these at boot if we detect a different CPU.
162  */
163 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165 static uchar_t cpu_ec_parity		= S_EC_PARITY;
166 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167 
168 /*
169  * This table is used to determine which bit(s) is(are) bad when an ECC
170  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171  * of this array have the following semantics:
172  *
173  *      00-63   The number of the bad bit, when only one bit is bad.
174  *      64      ECC bit C0 is bad.
175  *      65      ECC bit C1 is bad.
176  *      66      ECC bit C2 is bad.
177  *      67      ECC bit C3 is bad.
178  *      68      ECC bit C4 is bad.
179  *      69      ECC bit C5 is bad.
180  *      70      ECC bit C6 is bad.
181  *      71      ECC bit C7 is bad.
182  *      72      Two bits are bad.
183  *      73      Three bits are bad.
184  *      74      Four bits are bad.
185  *      75      More than Four bits are bad.
186  *      76      NO bits are bad.
187  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188  */
189 
190 #define	C0	64
191 #define	C1	65
192 #define	C2	66
193 #define	C3	67
194 #define	C4	68
195 #define	C5	69
196 #define	C6	70
197 #define	C7	71
198 #define	M2	72
199 #define	M3	73
200 #define	M4	74
201 #define	MX	75
202 #define	NA	76
203 
204 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205 						    (synd_code < C0))
206 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207 						    (synd_code <= C7))
208 
209 static char ecc_syndrome_tab[] =
210 {
211 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227 };
228 
229 #define	SYND_TBL_SIZE 256
230 
231 /*
232  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234  */
235 #define	UDBL_REG	0x8000
236 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237 #define	SYND(synd)	(synd & 0x7FFF)
238 
239 /*
240  * These error types are specific to Spitfire and are used internally for the
241  * spitfire fault structure flt_type field.
242  */
243 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245 #define	CPU_WP_ERR		2	/* WP parity error */
246 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259 
260 /*
261  * Macro to access the "Spitfire cpu private" data structure.
262  */
263 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264 
265 /*
266  * set to 0 to disable automatic retiring of pages on
267  * DIMMs that have excessive soft errors
268  */
269 int automatic_page_removal = 1;
270 
271 /*
272  * Heuristic for figuring out which module to replace.
273  * Relative likelihood that this P_SYND indicates that this module is bad.
274  * We call it a "score", though, not a relative likelihood.
275  *
276  * Step 1.
277  * Assign a score to each byte of P_SYND according to the following rules:
278  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279  * If one bit on, give it a 95.
280  * If seven bits on, give it a 10.
281  * If two bits on:
282  *   in different nybbles, a 90
283  *   in same nybble, but unaligned, 85
284  *   in same nybble and as an aligned pair, 80
285  * If six bits on, look at the bits that are off:
286  *   in same nybble and as an aligned pair, 15
287  *   in same nybble, but unaligned, 20
288  *   in different nybbles, a 25
289  * If three bits on:
290  *   in diferent nybbles, no aligned pairs, 75
291  *   in diferent nybbles, one aligned pair, 70
292  *   in the same nybble, 65
293  * If five bits on, look at the bits that are off:
294  *   in the same nybble, 30
295  *   in diferent nybbles, one aligned pair, 35
296  *   in diferent nybbles, no aligned pairs, 40
297  * If four bits on:
298  *   all in one nybble, 45
299  *   as two aligned pairs, 50
300  *   one aligned pair, 55
301  *   no aligned pairs, 60
302  *
303  * Step 2:
304  * Take the higher of the two scores (one for each byte) as the score
305  * for the module.
306  *
307  * Print the score for each module, and field service should replace the
308  * module with the highest score.
309  */
310 
311 /*
312  * In the table below, the first row/column comment indicates the
313  * number of bits on in that nybble; the second row/column comment is
314  * the hex digit.
315  */
316 
317 static int
318 p_synd_score_table[256] = {
319 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337 };
338 
339 int
340 ecc_psynd_score(ushort_t p_synd)
341 {
342 	int i, j, a, b;
343 
344 	i = p_synd & 0xFF;
345 	j = (p_synd >> 8) & 0xFF;
346 
347 	a = p_synd_score_table[i];
348 	b = p_synd_score_table[j];
349 
350 	return (a > b ? a : b);
351 }
352 
353 /*
354  * Async Fault Logging
355  *
356  * To ease identifying, reading, and filtering async fault log messages, the
357  * label [AFT#] is now prepended to each async fault message.  These messages
358  * and the logging rules are implemented by cpu_aflt_log(), below.
359  *
360  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361  *          This includes both corrected ECC memory and ecache faults.
362  *
363  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364  *          else except CE errors) with a priority of 1 (highest).  This tag
365  *          is also used for panic messages that result from an async fault.
366  *
367  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369  *          of the E-$ data and tags.
370  *
371  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372  * printed on the console.  To send all AFT logs to both the log and the
373  * console, set aft_verbose = 1.
374  */
375 
376 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378 #define	CPU_ERRID		0x0004	/* print flt_id */
379 #define	CPU_TL			0x0008	/* print flt_tl */
380 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389 
390 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393 				CPU_FAULTPC)
394 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396 				~CPU_SPACE)
397 #define	PARERR_LFLAGS	(CMN_LFLAGS)
398 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400 				~CPU_FLTCPU & ~CPU_FAULTPC)
401 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402 #define	NO_LFLAGS	(0)
403 
404 #define	AFSR_FMTSTR0	"\020\1ME"
405 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407 #define	UDB_FMTSTR	"\020\012UE\011CE"
408 
409 /*
410  * Save the cache bootup state for use when internal
411  * caches are to be re-enabled after an error occurs.
412  */
413 uint64_t	cache_boot_state = 0;
414 
415 /*
416  * PA[31:0] represent Displacement in UPA configuration space.
417  */
418 uint_t	root_phys_addr_lo_mask = 0xffffffff;
419 
420 /*
421  * Spitfire legacy globals
422  */
423 int	itlb_entries;
424 int	dtlb_entries;
425 
426 void
427 cpu_setup(void)
428 {
429 	extern int page_retire_messages;
430 	extern int page_retire_first_ue;
431 	extern int at_flags;
432 #if defined(SF_ERRATA_57)
433 	extern caddr_t errata57_limit;
434 #endif
435 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
436 
437 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
438 
439 	/*
440 	 * Spitfire isn't currently FMA-aware, so we have to enable the
441 	 * page retirement messages. We also change the default policy
442 	 * for UE retirement to allow clearing of transient errors.
443 	 */
444 	page_retire_messages = 1;
445 	page_retire_first_ue = 0;
446 
447 	/*
448 	 * save the cache bootup state.
449 	 */
450 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
451 
452 	if (use_page_coloring) {
453 		do_pg_coloring = 1;
454 	}
455 
456 	/*
457 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
458 	 */
459 	pp_slots = MIN(8, MAXPP_SLOTS);
460 
461 	/*
462 	 * Block stores invalidate all pages of the d$ so pagecopy
463 	 * et. al. do not need virtual translations with virtual
464 	 * coloring taken into consideration.
465 	 */
466 	pp_consistent_coloring = 0;
467 
468 	isa_list =
469 	    "sparcv9+vis sparcv9 "
470 	    "sparcv8plus+vis sparcv8plus "
471 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
472 
473 	cpu_hwcap_flags = AV_SPARC_VIS;
474 
475 	/*
476 	 * On Spitfire, there's a hole in the address space
477 	 * that we must never map (the hardware only support 44-bits of
478 	 * virtual address).  Later CPUs are expected to have wider
479 	 * supported address ranges.
480 	 *
481 	 * See address map on p23 of the UltraSPARC 1 user's manual.
482 	 */
483 	hole_start = (caddr_t)0x80000000000ull;
484 	hole_end = (caddr_t)0xfffff80000000000ull;
485 
486 	/*
487 	 * A spitfire call bug requires us to be a further 4Gbytes of
488 	 * firewall from the spec.
489 	 *
490 	 * See Spitfire Errata #21
491 	 */
492 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
493 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
494 
495 	/*
496 	 * The kpm mapping window.
497 	 * kpm_size:
498 	 *	The size of a single kpm range.
499 	 *	The overall size will be: kpm_size * vac_colors.
500 	 * kpm_vbase:
501 	 *	The virtual start address of the kpm range within the kernel
502 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
503 	 */
504 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
505 	kpm_size_shift = 41;
506 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
507 
508 #if defined(SF_ERRATA_57)
509 	errata57_limit = (caddr_t)0x80000000ul;
510 #endif
511 
512 	/*
513 	 * Disable text by default.
514 	 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
515 	 */
516 	max_utext_lpsize = MMU_PAGESIZE;
517 }
518 
519 static int
520 getintprop(pnode_t node, char *name, int deflt)
521 {
522 	int	value;
523 
524 	switch (prom_getproplen(node, name)) {
525 	case 0:
526 		value = 1;	/* boolean properties */
527 		break;
528 
529 	case sizeof (int):
530 		(void) prom_getprop(node, name, (caddr_t)&value);
531 		break;
532 
533 	default:
534 		value = deflt;
535 		break;
536 	}
537 
538 	return (value);
539 }
540 
541 /*
542  * Set the magic constants of the implementation.
543  */
544 void
545 cpu_fiximp(pnode_t dnode)
546 {
547 	extern int vac_size, vac_shift;
548 	extern uint_t vac_mask;
549 	extern int dcache_line_mask;
550 	int i, a;
551 	static struct {
552 		char	*name;
553 		int	*var;
554 	} prop[] = {
555 		"dcache-size",		&dcache_size,
556 		"dcache-line-size",	&dcache_linesize,
557 		"icache-size",		&icache_size,
558 		"icache-line-size",	&icache_linesize,
559 		"ecache-size",		&ecache_size,
560 		"ecache-line-size",	&ecache_alignsize,
561 		"ecache-associativity", &ecache_associativity,
562 		"#itlb-entries",	&itlb_entries,
563 		"#dtlb-entries",	&dtlb_entries,
564 		};
565 
566 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
567 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
568 			*prop[i].var = a;
569 		}
570 	}
571 
572 	ecache_setsize = ecache_size / ecache_associativity;
573 
574 	vac_size = S_VAC_SIZE;
575 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
576 	i = 0; a = vac_size;
577 	while (a >>= 1)
578 		++i;
579 	vac_shift = i;
580 	shm_alignment = vac_size;
581 	vac = 1;
582 
583 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
584 
585 	/*
586 	 * UltraSPARC I & II have ecache sizes running
587 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
588 	 * and 8 MB. Adjust the copyin/copyout limits
589 	 * according to the cache size. The magic number
590 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
591 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
592 	 * VIS instructions.
593 	 *
594 	 * We assume that all CPUs on the system have the same size
595 	 * ecache. We're also called very early in the game.
596 	 * /etc/system will be parsed *after* we're called so
597 	 * these values can be overwritten.
598 	 */
599 
600 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
601 	if (ecache_size <= 524288) {
602 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
603 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
604 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
605 	} else if (ecache_size == 1048576) {
606 		hw_copy_limit_2 = 1024;
607 		hw_copy_limit_4 = 1280;
608 		hw_copy_limit_8 = 1536;
609 	} else if (ecache_size == 2097152) {
610 		hw_copy_limit_2 = 1536;
611 		hw_copy_limit_4 = 2048;
612 		hw_copy_limit_8 = 2560;
613 	} else if (ecache_size == 4194304) {
614 		hw_copy_limit_2 = 2048;
615 		hw_copy_limit_4 = 2560;
616 		hw_copy_limit_8 = 3072;
617 	} else {
618 		hw_copy_limit_2 = 2560;
619 		hw_copy_limit_4 = 3072;
620 		hw_copy_limit_8 = 3584;
621 	}
622 }
623 
624 /*
625  * Called by setcpudelay
626  */
627 void
628 cpu_init_tick_freq(void)
629 {
630 	/*
631 	 * Determine the cpu frequency by calling
632 	 * tod_get_cpufrequency. Use an approximate freqency
633 	 * value computed by the prom if the tod module
634 	 * is not initialized and loaded yet.
635 	 */
636 	if (tod_ops.tod_get_cpufrequency != NULL) {
637 		mutex_enter(&tod_lock);
638 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
639 		mutex_exit(&tod_lock);
640 	} else {
641 #if defined(HUMMINGBIRD)
642 		/*
643 		 * the hummingbird version of %stick is used as the basis for
644 		 * low level timing; this provides an independent constant-rate
645 		 * clock for general system use, and frees power mgmt to set
646 		 * various cpu clock speeds.
647 		 */
648 		if (system_clock_freq == 0)
649 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
650 			    system_clock_freq);
651 		sys_tick_freq = system_clock_freq;
652 #else /* SPITFIRE */
653 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
654 #endif
655 	}
656 }
657 
658 
659 void shipit(int upaid);
660 extern uint64_t xc_tick_limit;
661 extern uint64_t xc_tick_jump_limit;
662 
663 #ifdef SEND_MONDO_STATS
664 uint64_t x_early[NCPU][64];
665 #endif
666 
667 /*
668  * Note: A version of this function is used by the debugger via the KDI,
669  * and must be kept in sync with this version.  Any changes made to this
670  * function to support new chips or to accomodate errata must also be included
671  * in the KDI-specific version.  See spitfire_kdi.c.
672  */
673 void
674 send_one_mondo(int cpuid)
675 {
676 	uint64_t idsr, starttick, endtick;
677 	int upaid, busy, nack;
678 	uint64_t tick, tick_prev;
679 	ulong_t ticks;
680 
681 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
682 	upaid = CPUID_TO_UPAID(cpuid);
683 	tick = starttick = gettick();
684 	shipit(upaid);
685 	endtick = starttick + xc_tick_limit;
686 	busy = nack = 0;
687 	for (;;) {
688 		idsr = getidsr();
689 		if (idsr == 0)
690 			break;
691 		/*
692 		 * When we detect an irregular tick jump, we adjust
693 		 * the timer window to the current tick value.
694 		 */
695 		tick_prev = tick;
696 		tick = gettick();
697 		ticks = tick - tick_prev;
698 		if (ticks > xc_tick_jump_limit) {
699 			endtick = tick + xc_tick_limit;
700 		} else if (tick > endtick) {
701 			if (panic_quiesce)
702 				return;
703 			cmn_err(CE_PANIC,
704 			    "send mondo timeout (target 0x%x) [%d NACK %d "
705 			    "BUSY]", upaid, nack, busy);
706 		}
707 		if (idsr & IDSR_BUSY) {
708 			busy++;
709 			continue;
710 		}
711 		drv_usecwait(1);
712 		shipit(upaid);
713 		nack++;
714 		busy = 0;
715 	}
716 #ifdef SEND_MONDO_STATS
717 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
718 #endif
719 }
720 
721 void
722 send_mondo_set(cpuset_t set)
723 {
724 	int i;
725 
726 	for (i = 0; i < NCPU; i++)
727 		if (CPU_IN_SET(set, i)) {
728 			send_one_mondo(i);
729 			CPUSET_DEL(set, i);
730 			if (CPUSET_ISNULL(set))
731 				break;
732 		}
733 }
734 
735 void
736 syncfpu(void)
737 {
738 }
739 
740 /*
741  * Determine the size of the CPU module's error structure in bytes.  This is
742  * called once during boot to initialize the error queues.
743  */
744 int
745 cpu_aflt_size(void)
746 {
747 	/*
748 	 * We need to determine whether this is a sabre, Hummingbird or a
749 	 * Spitfire/Blackbird impl and set the appropriate state variables for
750 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
751 	 * too early in the boot flow and the cpunodes are not initialized.
752 	 * This routine will be called once after cpunodes[] is ready, so do
753 	 * it here.
754 	 */
755 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
756 		isus2i = 1;
757 		cpu_ec_tag_mask = SB_ECTAG_MASK;
758 		cpu_ec_state_mask = SB_ECSTATE_MASK;
759 		cpu_ec_par_mask = SB_ECPAR_MASK;
760 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
761 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
762 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
763 		cpu_ec_state_exl = SB_ECSTATE_EXL;
764 		cpu_ec_state_mod = SB_ECSTATE_MOD;
765 
766 		/* These states do not exist in sabre - set to 0xFF */
767 		cpu_ec_state_shr = 0xFF;
768 		cpu_ec_state_own = 0xFF;
769 
770 		cpu_ec_state_valid = SB_ECSTATE_VALID;
771 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
772 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
773 		cpu_ec_parity = SB_EC_PARITY;
774 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
775 		isus2e = 1;
776 		cpu_ec_tag_mask = HB_ECTAG_MASK;
777 		cpu_ec_state_mask = HB_ECSTATE_MASK;
778 		cpu_ec_par_mask = HB_ECPAR_MASK;
779 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
780 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
781 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
782 		cpu_ec_state_exl = HB_ECSTATE_EXL;
783 		cpu_ec_state_mod = HB_ECSTATE_MOD;
784 
785 		/* These states do not exist in hummingbird - set to 0xFF */
786 		cpu_ec_state_shr = 0xFF;
787 		cpu_ec_state_own = 0xFF;
788 
789 		cpu_ec_state_valid = HB_ECSTATE_VALID;
790 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
791 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
792 		cpu_ec_parity = HB_EC_PARITY;
793 	}
794 
795 	return (sizeof (spitf_async_flt));
796 }
797 
798 
799 /*
800  * Correctable ecc error trap handler
801  */
802 /*ARGSUSED*/
803 void
804 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
805 	uint_t p_afsr_high, uint_t p_afar_high)
806 {
807 	ushort_t sdbh, sdbl;
808 	ushort_t e_syndh, e_syndl;
809 	spitf_async_flt spf_flt;
810 	struct async_flt *ecc;
811 	int queue = 1;
812 
813 	uint64_t t_afar = p_afar;
814 	uint64_t t_afsr = p_afsr;
815 
816 	/*
817 	 * Note: the Spitfire data buffer error registers
818 	 * (upper and lower halves) are or'ed into the upper
819 	 * word of the afsr by ce_err().
820 	 */
821 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
822 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
823 
824 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
825 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
826 
827 	t_afsr &= S_AFSR_MASK;
828 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
829 
830 	/* Setup the async fault structure */
831 	bzero(&spf_flt, sizeof (spitf_async_flt));
832 	ecc = (struct async_flt *)&spf_flt;
833 	ecc->flt_id = gethrtime_waitfree();
834 	ecc->flt_stat = t_afsr;
835 	ecc->flt_addr = t_afar;
836 	ecc->flt_status = ECC_C_TRAP;
837 	ecc->flt_bus_id = getprocessorid();
838 	ecc->flt_inst = CPU->cpu_id;
839 	ecc->flt_pc = (caddr_t)rp->r_pc;
840 	ecc->flt_func = log_ce_err;
841 	ecc->flt_in_memory =
842 	    (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
843 	spf_flt.flt_sdbh = sdbh;
844 	spf_flt.flt_sdbl = sdbl;
845 
846 	/*
847 	 * Check for fatal conditions.
848 	 */
849 	check_misc_err(&spf_flt);
850 
851 	/*
852 	 * Pananoid checks for valid AFSR and UDBs
853 	 */
854 	if ((t_afsr & P_AFSR_CE) == 0) {
855 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
856 		    "** Panic due to CE bit not set in the AFSR",
857 		    "  Corrected Memory Error on");
858 	}
859 
860 	/*
861 	 * We want to skip logging only if ALL the following
862 	 * conditions are true:
863 	 *
864 	 *	1. There is only one error
865 	 *	2. That error is a correctable memory error
866 	 *	3. The error is caused by the memory scrubber (in which case
867 	 *	    the error will have occurred under on_trap protection)
868 	 *	4. The error is on a retired page
869 	 *
870 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
871 	 * However, none of those errors should occur on a retired page.
872 	 */
873 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
874 	    curthread->t_ontrap != NULL) {
875 
876 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
877 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
878 				queue = 0;
879 			}
880 		}
881 	}
882 
883 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
884 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
885 		    "** Panic due to CE bits not set in the UDBs",
886 		    " Corrected Memory Error on");
887 	}
888 
889 	if ((sdbh >> 8) & 1) {
890 		ecc->flt_synd = e_syndh;
891 		ce_scrub(ecc);
892 		if (queue) {
893 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
894 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
895 		}
896 	}
897 
898 	if ((sdbl >> 8) & 1) {
899 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
900 		ecc->flt_synd = e_syndl | UDBL_REG;
901 		ce_scrub(ecc);
902 		if (queue) {
903 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
904 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
905 		}
906 	}
907 
908 	/*
909 	 * Re-enable all error trapping (CEEN currently cleared).
910 	 */
911 	clr_datapath();
912 	set_asyncflt(P_AFSR_CE);
913 	set_error_enable(EER_ENABLE);
914 }
915 
916 /*
917  * Cpu specific CE logging routine
918  */
919 static void
920 log_ce_err(struct async_flt *aflt, char *unum)
921 {
922 	spitf_async_flt spf_flt;
923 
924 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
925 		return;
926 	}
927 
928 	spf_flt.cmn_asyncflt = *aflt;
929 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
930 	    " Corrected Memory Error detected by");
931 }
932 
933 /*
934  * Spitfire does not perform any further CE classification refinement
935  */
936 /*ARGSUSED*/
937 int
938 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
939     size_t afltoffset)
940 {
941 	return (0);
942 }
943 
944 char *
945 flt_to_error_type(struct async_flt *aflt)
946 {
947 	if (aflt->flt_status & ECC_INTERMITTENT)
948 		return (ERR_TYPE_DESC_INTERMITTENT);
949 	if (aflt->flt_status & ECC_PERSISTENT)
950 		return (ERR_TYPE_DESC_PERSISTENT);
951 	if (aflt->flt_status & ECC_STICKY)
952 		return (ERR_TYPE_DESC_STICKY);
953 	return (ERR_TYPE_DESC_UNKNOWN);
954 }
955 
956 /*
957  * Called by correctable ecc error logging code to print out
958  * the stick/persistent/intermittent status of the error.
959  */
960 static void
961 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
962 {
963 	ushort_t status;
964 	char *status1_str = "Memory";
965 	char *status2_str = "Intermittent";
966 	struct async_flt *aflt = (struct async_flt *)spf_flt;
967 
968 	status = aflt->flt_status;
969 
970 	if (status & ECC_ECACHE)
971 		status1_str = "Ecache";
972 
973 	if (status & ECC_STICKY)
974 		status2_str = "Sticky";
975 	else if (status & ECC_PERSISTENT)
976 		status2_str = "Persistent";
977 
978 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
979 	    NULL, " Corrected %s Error on %s is %s",
980 	    status1_str, unum, status2_str);
981 }
982 
983 /*
984  * check for a valid ce syndrome, then call the
985  * displacement flush scrubbing code, and then check the afsr to see if
986  * the error was persistent or intermittent. Reread the afar/afsr to see
987  * if the error was not scrubbed successfully, and is therefore sticky.
988  */
989 /*ARGSUSED1*/
990 void
991 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
992 {
993 	uint64_t eer, afsr;
994 	ushort_t status;
995 
996 	ASSERT(getpil() > LOCK_LEVEL);
997 
998 	/*
999 	 * It is possible that the flt_addr is not a valid
1000 	 * physical address. To deal with this, we disable
1001 	 * NCEEN while we scrub that address. If this causes
1002 	 * a TIMEOUT/BERR, we know this is an invalid
1003 	 * memory location.
1004 	 */
1005 	kpreempt_disable();
1006 	eer = get_error_enable();
1007 	if (eer & (EER_CEEN | EER_NCEEN))
1008 		set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1009 
1010 	/*
1011 	 * To check if the error detected by IO is persistent, sticky or
1012 	 * intermittent.
1013 	 */
1014 	if (ecc->flt_status & ECC_IOBUS) {
1015 		ecc->flt_stat = P_AFSR_CE;
1016 	}
1017 
1018 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1019 	    cpunodes[CPU->cpu_id].ecache_size);
1020 
1021 	get_asyncflt(&afsr);
1022 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1023 		/*
1024 		 * Must ensure that we don't get the TIMEOUT/BERR
1025 		 * when we reenable NCEEN, so we clear the AFSR.
1026 		 */
1027 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1028 		if (eer & (EER_CEEN | EER_NCEEN))
1029 			set_error_enable(eer);
1030 		kpreempt_enable();
1031 		return;
1032 	}
1033 
1034 	if (eer & EER_NCEEN)
1035 		set_error_enable(eer & ~EER_CEEN);
1036 
1037 	/*
1038 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1039 	 * not trip over the error, mark it intermittent.  If the scrub did
1040 	 * trip the error again and it did not scrub away, mark it sticky.
1041 	 * Otherwise mark it persistent.
1042 	 */
1043 	if (check_ecc(ecc) != 0) {
1044 		cpu_read_paddr(ecc, 0, 1);
1045 
1046 		if (check_ecc(ecc) != 0)
1047 			status = ECC_STICKY;
1048 		else
1049 			status = ECC_PERSISTENT;
1050 	} else
1051 		status = ECC_INTERMITTENT;
1052 
1053 	if (eer & (EER_CEEN | EER_NCEEN))
1054 		set_error_enable(eer);
1055 	kpreempt_enable();
1056 
1057 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1058 	ecc->flt_status |= status;
1059 }
1060 
1061 /*
1062  * get the syndrome and unum, and then call the routines
1063  * to check the other cpus and iobuses, and then do the error logging.
1064  */
1065 /*ARGSUSED1*/
1066 void
1067 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1068 {
1069 	char unum[UNUM_NAMLEN];
1070 	int len = 0;
1071 	int ce_verbose = 0;
1072 	int err;
1073 
1074 	ASSERT(ecc->flt_func != NULL);
1075 
1076 	/* Get the unum string for logging purposes */
1077 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1078 	    UNUM_NAMLEN, &len);
1079 
1080 	/* Call specific error logging routine */
1081 	(void) (*ecc->flt_func)(ecc, unum);
1082 
1083 	/*
1084 	 * Count errors per unum.
1085 	 * Non-memory errors are all counted via a special unum string.
1086 	 */
1087 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1088 	    automatic_page_removal) {
1089 		(void) page_retire(ecc->flt_addr, err);
1090 	}
1091 
1092 	if (ecc->flt_panic) {
1093 		ce_verbose = 1;
1094 	} else if ((ecc->flt_class == BUS_FAULT) ||
1095 	    (ecc->flt_stat & P_AFSR_CE)) {
1096 		ce_verbose = (ce_verbose_memory > 0);
1097 	} else {
1098 		ce_verbose = 1;
1099 	}
1100 
1101 	if (ce_verbose) {
1102 		spitf_async_flt sflt;
1103 		int synd_code;
1104 
1105 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1106 
1107 		cpu_ce_log_status(&sflt, unum);
1108 
1109 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1110 		    SYND(ecc->flt_synd));
1111 
1112 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1113 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1114 			    NULL, " ECC Data Bit %2d was in error "
1115 			    "and corrected", synd_code);
1116 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1117 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1118 			    NULL, " ECC Check Bit %2d was in error "
1119 			    "and corrected", synd_code - C0);
1120 		} else {
1121 			/*
1122 			 * These are UE errors - we shouldn't be getting CE
1123 			 * traps for these; handle them in case of bad h/w.
1124 			 */
1125 			switch (synd_code) {
1126 			case M2:
1127 				cpu_aflt_log(CE_CONT, 0, &sflt,
1128 				    CPU_ERRID_FIRST, NULL,
1129 				    " Two ECC Bits were in error");
1130 				break;
1131 			case M3:
1132 				cpu_aflt_log(CE_CONT, 0, &sflt,
1133 				    CPU_ERRID_FIRST, NULL,
1134 				    " Three ECC Bits were in error");
1135 				break;
1136 			case M4:
1137 				cpu_aflt_log(CE_CONT, 0, &sflt,
1138 				    CPU_ERRID_FIRST, NULL,
1139 				    " Four ECC Bits were in error");
1140 				break;
1141 			case MX:
1142 				cpu_aflt_log(CE_CONT, 0, &sflt,
1143 				    CPU_ERRID_FIRST, NULL,
1144 				    " More than Four ECC bits were "
1145 				    "in error");
1146 				break;
1147 			default:
1148 				cpu_aflt_log(CE_CONT, 0, &sflt,
1149 				    CPU_ERRID_FIRST, NULL,
1150 				    " Unknown fault syndrome %d",
1151 				    synd_code);
1152 				break;
1153 			}
1154 		}
1155 	}
1156 
1157 	/* Display entire cache line, if valid address */
1158 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1159 		read_ecc_data(ecc, 1, 1);
1160 }
1161 
1162 /*
1163  * We route all errors through a single switch statement.
1164  */
1165 void
1166 cpu_ue_log_err(struct async_flt *aflt)
1167 {
1168 
1169 	switch (aflt->flt_class) {
1170 	case CPU_FAULT:
1171 		cpu_async_log_err(aflt);
1172 		break;
1173 
1174 	case BUS_FAULT:
1175 		bus_async_log_err(aflt);
1176 		break;
1177 
1178 	default:
1179 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1180 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1181 		break;
1182 	}
1183 }
1184 
1185 /* Values for action variable in cpu_async_error() */
1186 #define	ACTION_NONE		0
1187 #define	ACTION_TRAMPOLINE	1
1188 #define	ACTION_AST_FLAGS	2
1189 
1190 /*
1191  * Access error trap handler for asynchronous cpu errors.  This routine is
1192  * called to handle a data or instruction access error.  All fatal errors are
1193  * completely handled by this routine (by panicking).  Non fatal error logging
1194  * is queued for later processing either via AST or softint at a lower PIL.
1195  * In case of panic, the error log queue will also be processed as part of the
1196  * panic flow to ensure all errors are logged.  This routine is called with all
1197  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1198  * error bits are also cleared.  The hardware has also disabled the I and
1199  * D-caches for us, so we must re-enable them before returning.
1200  *
1201  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1202  *
1203  *		_______________________________________________________________
1204  *		|        Privileged tl0		|         Unprivileged	      |
1205  *		| Protected	| Unprotected	| Protected	| Unprotected |
1206  *		|on_trap|lofault|		|		|	      |
1207  * -------------|-------|-------+---------------+---------------+-------------|
1208  *		|	|	|		|		|	      |
1209  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1210  *		|	|	|		|		|	      |
1211  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1212  *		|	|	|		|		|	      |
1213  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1214  *		|	|	|		|		|	      |
1215  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1216  * ____________________________________________________________________________
1217  *
1218  *
1219  * Action codes:
1220  *
1221  * L - log
1222  * M - kick off memscrubber if flt_in_memory
1223  * P - panic
1224  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1225  * R - i)  if aft_panic is set, panic
1226  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1227  * S - send SIGBUS to process
1228  * T - trampoline
1229  *
1230  * Special cases:
1231  *
1232  * 1) if aft_testfatal is set, all faults result in a panic regardless
1233  *    of type (even WP), protection (even on_trap), or privilege.
1234  */
1235 /*ARGSUSED*/
1236 void
1237 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1238 	uint_t p_afsr_high, uint_t p_afar_high)
1239 {
1240 	ushort_t sdbh, sdbl, ttype, tl;
1241 	spitf_async_flt spf_flt;
1242 	struct async_flt *aflt;
1243 	char pr_reason[28];
1244 	uint64_t oafsr;
1245 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1246 	int action = ACTION_NONE;
1247 	uint64_t t_afar = p_afar;
1248 	uint64_t t_afsr = p_afsr;
1249 	int expected = DDI_FM_ERR_UNEXPECTED;
1250 	ddi_acc_hdl_t *hp;
1251 
1252 	/*
1253 	 * We need to look at p_flag to determine if the thread detected an
1254 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1255 	 * because we just need a consistent snapshot and we know that everyone
1256 	 * else will store a consistent set of bits while holding p_lock.  We
1257 	 * don't have to worry about a race because SDOCORE is set once prior
1258 	 * to doing i/o from the process's address space and is never cleared.
1259 	 */
1260 	uint_t pflag = ttoproc(curthread)->p_flag;
1261 
1262 	pr_reason[0] = '\0';
1263 
1264 	/*
1265 	 * Note: the Spitfire data buffer error registers
1266 	 * (upper and lower halves) are or'ed into the upper
1267 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1268 	 */
1269 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1270 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1271 
1272 	/*
1273 	 * Grab the ttype encoded in <63:53> of the saved
1274 	 * afsr passed from async_err()
1275 	 */
1276 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1277 	tl = (ushort_t)(t_afsr >> 62);
1278 
1279 	t_afsr &= S_AFSR_MASK;
1280 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1281 
1282 	/*
1283 	 * Initialize most of the common and CPU-specific structure.  We derive
1284 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1285 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1286 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1287 	 * tuneable aft_testfatal is set (not the default).
1288 	 */
1289 	bzero(&spf_flt, sizeof (spitf_async_flt));
1290 	aflt = (struct async_flt *)&spf_flt;
1291 	aflt->flt_id = gethrtime_waitfree();
1292 	aflt->flt_stat = t_afsr;
1293 	aflt->flt_addr = t_afar;
1294 	aflt->flt_bus_id = getprocessorid();
1295 	aflt->flt_inst = CPU->cpu_id;
1296 	aflt->flt_pc = (caddr_t)rp->r_pc;
1297 	aflt->flt_prot = AFLT_PROT_NONE;
1298 	aflt->flt_class = CPU_FAULT;
1299 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1300 	aflt->flt_tl = (uchar_t)tl;
1301 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1302 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1303 
1304 	/*
1305 	 * Set flt_status based on the trap type.  If we end up here as the
1306 	 * result of a UE detected by the CE handling code, leave status 0.
1307 	 */
1308 	switch (ttype) {
1309 	case T_DATA_ERROR:
1310 		aflt->flt_status = ECC_D_TRAP;
1311 		break;
1312 	case T_INSTR_ERROR:
1313 		aflt->flt_status = ECC_I_TRAP;
1314 		break;
1315 	}
1316 
1317 	spf_flt.flt_sdbh = sdbh;
1318 	spf_flt.flt_sdbl = sdbl;
1319 
1320 	/*
1321 	 * Check for fatal async errors.
1322 	 */
1323 	check_misc_err(&spf_flt);
1324 
1325 	/*
1326 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1327 	 * see if we were executing in the kernel under on_trap() or t_lofault
1328 	 * protection.  If so, modify the saved registers so that we return
1329 	 * from the trap to the appropriate trampoline routine.
1330 	 */
1331 	if (aflt->flt_priv && tl == 0) {
1332 		if (curthread->t_ontrap != NULL) {
1333 			on_trap_data_t *otp = curthread->t_ontrap;
1334 
1335 			if (otp->ot_prot & OT_DATA_EC) {
1336 				aflt->flt_prot = AFLT_PROT_EC;
1337 				otp->ot_trap |= OT_DATA_EC;
1338 				rp->r_pc = otp->ot_trampoline;
1339 				rp->r_npc = rp->r_pc + 4;
1340 				action = ACTION_TRAMPOLINE;
1341 			}
1342 
1343 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1344 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1345 				aflt->flt_prot = AFLT_PROT_ACCESS;
1346 				otp->ot_trap |= OT_DATA_ACCESS;
1347 				rp->r_pc = otp->ot_trampoline;
1348 				rp->r_npc = rp->r_pc + 4;
1349 				action = ACTION_TRAMPOLINE;
1350 				/*
1351 				 * for peeks and caut_gets errors are expected
1352 				 */
1353 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1354 				if (!hp)
1355 					expected = DDI_FM_ERR_PEEK;
1356 				else if (hp->ah_acc.devacc_attr_access ==
1357 				    DDI_CAUTIOUS_ACC)
1358 					expected = DDI_FM_ERR_EXPECTED;
1359 			}
1360 
1361 		} else if (curthread->t_lofault) {
1362 			aflt->flt_prot = AFLT_PROT_COPY;
1363 			rp->r_g1 = EFAULT;
1364 			rp->r_pc = curthread->t_lofault;
1365 			rp->r_npc = rp->r_pc + 4;
1366 			action = ACTION_TRAMPOLINE;
1367 		}
1368 	}
1369 
1370 	/*
1371 	 * Determine if this error needs to be treated as fatal.  Note that
1372 	 * multiple errors detected upon entry to this trap handler does not
1373 	 * necessarily warrant a panic.  We only want to panic if the trap
1374 	 * happened in privileged mode and not under t_ontrap or t_lofault
1375 	 * protection.  The exception is WP: if we *only* get WP, it is not
1376 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1377 	 *
1378 	 * aft_panic, if set, effectively makes us treat usermode
1379 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1380 	 * panic instead of sending a contract event.  A lofault-protected
1381 	 * fault will normally follow the contract event; if aft_panic is
1382 	 * set this will be changed to a panic.
1383 	 *
1384 	 * For usermode BERR/BTO errors, eg from processes performing device
1385 	 * control through mapped device memory, we need only deliver
1386 	 * a SIGBUS to the offending process.
1387 	 *
1388 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1389 	 * checked later; for now we implement the common reasons.
1390 	 */
1391 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1392 		/*
1393 		 * Beware - multiple bits may be set in AFSR
1394 		 */
1395 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1396 			if (aflt->flt_priv || aft_panic)
1397 				aflt->flt_panic = 1;
1398 		}
1399 
1400 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1401 			if (aflt->flt_priv)
1402 				aflt->flt_panic = 1;
1403 		}
1404 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1405 		aflt->flt_panic = 1;
1406 	}
1407 
1408 	/*
1409 	 * UE/BERR/TO: Call our bus nexus friends to check for
1410 	 * IO errors that may have resulted in this trap.
1411 	 */
1412 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1413 		cpu_run_bus_error_handlers(aflt, expected);
1414 	}
1415 
1416 	/*
1417 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1418 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1419 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1420 	 * caches may introduce more parity errors (especially when the module
1421 	 * is bad) and in sabre there is no guarantee that such errors
1422 	 * (if introduced) are written back as poisoned data.
1423 	 */
1424 	if (t_afsr & P_AFSR_UE) {
1425 		int i;
1426 
1427 		(void) strcat(pr_reason, "UE ");
1428 
1429 		spf_flt.flt_type = CPU_UE_ERR;
1430 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1431 		    MMU_PAGESHIFT)) ? 1: 0;
1432 
1433 		/*
1434 		 * With UE, we have the PA of the fault.
1435 		 * Let do a diagnostic read to get the ecache
1436 		 * data and tag info of the bad line for logging.
1437 		 */
1438 		if (aflt->flt_in_memory) {
1439 			uint32_t ec_set_size;
1440 			uchar_t state;
1441 			uint32_t ecache_idx;
1442 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1443 
1444 			/* touch the line to put it in ecache */
1445 			acc_afsr |= read_and_clear_afsr();
1446 			(void) lddphys(faultpa);
1447 			acc_afsr |= (read_and_clear_afsr() &
1448 			    ~(P_AFSR_EDP | P_AFSR_UE));
1449 
1450 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1451 			    ecache_associativity;
1452 
1453 			for (i = 0; i < ecache_associativity; i++) {
1454 				ecache_idx = i * ec_set_size +
1455 				    (aflt->flt_addr % ec_set_size);
1456 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1457 				    (uint64_t *)&spf_flt.flt_ec_data[0],
1458 				    &spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1459 				acc_afsr |= oafsr;
1460 
1461 				state = (uchar_t)((spf_flt.flt_ec_tag &
1462 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1463 
1464 				if ((state & cpu_ec_state_valid) &&
1465 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1466 				    ((uint64_t)aflt->flt_addr >>
1467 				    cpu_ec_tag_shift)))
1468 					break;
1469 			}
1470 
1471 			/*
1472 			 * Check to see if the ecache tag is valid for the
1473 			 * fault PA. In the very unlikely event where the
1474 			 * line could be victimized, no ecache info will be
1475 			 * available. If this is the case, capture the line
1476 			 * from memory instead.
1477 			 */
1478 			if ((state & cpu_ec_state_valid) == 0 ||
1479 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1480 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1481 				for (i = 0; i < 8; i++, faultpa += 8) {
1482 					ec_data_t *ecdptr;
1483 
1484 					ecdptr = &spf_flt.flt_ec_data[i];
1485 					acc_afsr |= read_and_clear_afsr();
1486 					ecdptr->ec_d8 = lddphys(faultpa);
1487 					acc_afsr |= (read_and_clear_afsr() &
1488 					    ~(P_AFSR_EDP | P_AFSR_UE));
1489 					ecdptr->ec_afsr = 0;
1490 							/* null afsr value */
1491 				}
1492 
1493 				/*
1494 				 * Mark tag invalid to indicate mem dump
1495 				 * when we print out the info.
1496 				 */
1497 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1498 			}
1499 			spf_flt.flt_ec_lcnt = 1;
1500 
1501 			/*
1502 			 * Flush out the bad line
1503 			 */
1504 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1505 			    cpunodes[CPU->cpu_id].ecache_size);
1506 
1507 			acc_afsr |= clear_errors(NULL, NULL);
1508 		}
1509 
1510 		/*
1511 		 * Ask our bus nexus friends if they have any fatal errors. If
1512 		 * so, they will log appropriate error messages and panic as a
1513 		 * result. We then queue an event for each UDB that reports a
1514 		 * UE. Each UE reported in a UDB will have its own log message.
1515 		 *
1516 		 * Note from kbn: In the case where there are multiple UEs
1517 		 * (ME bit is set) - the AFAR address is only accurate to
1518 		 * the 16-byte granularity. One cannot tell whether the AFAR
1519 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1520 		 * always report the AFAR address to be 16-byte aligned.
1521 		 *
1522 		 * If we're on a Sabre, there is no SDBL, but it will always
1523 		 * read as zero, so the sdbl test below will safely fail.
1524 		 */
1525 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1526 			aflt->flt_panic = 1;
1527 
1528 		if (sdbh & P_DER_UE) {
1529 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1530 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1531 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1532 			    aflt->flt_panic);
1533 		}
1534 		if (sdbl & P_DER_UE) {
1535 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1536 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1537 			if (!(aflt->flt_stat & P_AFSR_ME))
1538 				aflt->flt_addr |= 0x8;
1539 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1540 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1541 			    aflt->flt_panic);
1542 		}
1543 
1544 		/*
1545 		 * We got a UE and are panicking, save the fault PA in a known
1546 		 * location so that the platform specific panic code can check
1547 		 * for copyback errors.
1548 		 */
1549 		if (aflt->flt_panic && aflt->flt_in_memory) {
1550 			panic_aflt = *aflt;
1551 		}
1552 	}
1553 
1554 	/*
1555 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1556 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1557 	 */
1558 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1559 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1560 
1561 		if (t_afsr & P_AFSR_EDP)
1562 			(void) strcat(pr_reason, "EDP ");
1563 
1564 		if (t_afsr & P_AFSR_LDP)
1565 			(void) strcat(pr_reason, "LDP ");
1566 
1567 		/*
1568 		 * Here we have no PA to work with.
1569 		 * Scan each line in the ecache to look for
1570 		 * the one with bad parity.
1571 		 */
1572 		aflt->flt_addr = AFLT_INV_ADDR;
1573 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1574 		    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1575 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1576 
1577 		/*
1578 		 * If we found a bad PA, update the state to indicate if it is
1579 		 * memory or I/O space.  This code will be important if we ever
1580 		 * support cacheable frame buffers.
1581 		 */
1582 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1583 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1584 			    MMU_PAGESHIFT)) ? 1 : 0;
1585 		}
1586 
1587 		if (isus2i || isus2e)
1588 			aflt->flt_panic = 1;
1589 
1590 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1591 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1592 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1593 		    aflt->flt_panic);
1594 	}
1595 
1596 	/*
1597 	 * Timeout and bus error handling.  There are two cases to consider:
1598 	 *
1599 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1600 	 * have already modified the saved registers so that we will return
1601 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1602 	 *
1603 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1604 	 * a SIGBUS.  We do not log the occurence - processes performing
1605 	 * device control would generate lots of uninteresting messages.
1606 	 */
1607 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1608 		if (t_afsr & P_AFSR_TO)
1609 			(void) strcat(pr_reason, "BTO ");
1610 
1611 		if (t_afsr & P_AFSR_BERR)
1612 			(void) strcat(pr_reason, "BERR ");
1613 
1614 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1615 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1616 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1617 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1618 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1619 			    aflt->flt_panic);
1620 		}
1621 	}
1622 
1623 	/*
1624 	 * Handle WP: WP happens when the ecache is victimized and a parity
1625 	 * error was detected on a writeback.  The data in question will be
1626 	 * poisoned as a UE will be written back.  The PA is not logged and
1627 	 * it is possible that it doesn't belong to the trapped thread.  The
1628 	 * WP trap is not fatal, but it could be fatal to someone that
1629 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1630 	 * to force the memscrubber to read all of memory when it awakens.
1631 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1632 	 * UE back to poison the data.
1633 	 */
1634 	if (t_afsr & P_AFSR_WP) {
1635 		(void) strcat(pr_reason, "WP ");
1636 		if (isus2i || isus2e) {
1637 			aflt->flt_panic = 1;
1638 		} else {
1639 			read_all_memscrub = 1;
1640 		}
1641 		spf_flt.flt_type = CPU_WP_ERR;
1642 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1643 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1644 		    aflt->flt_panic);
1645 	}
1646 
1647 	/*
1648 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1649 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1650 	 * This is fatal.
1651 	 */
1652 
1653 	if (t_afsr & P_AFSR_CP) {
1654 		if (isus2i || isus2e) {
1655 			(void) strcat(pr_reason, "CP ");
1656 			aflt->flt_panic = 1;
1657 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1658 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1659 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1660 			    aflt->flt_panic);
1661 		} else {
1662 			/*
1663 			 * Orphan CP: Happens due to signal integrity problem
1664 			 * on a CPU, where a CP is reported, without reporting
1665 			 * its associated UE. This is handled by locating the
1666 			 * bad parity line and would kick off the memscrubber
1667 			 * to find the UE if in memory or in another's cache.
1668 			 */
1669 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1670 			(void) strcat(pr_reason, "ORPHAN_CP ");
1671 
1672 			/*
1673 			 * Here we have no PA to work with.
1674 			 * Scan each line in the ecache to look for
1675 			 * the one with bad parity.
1676 			 */
1677 			aflt->flt_addr = AFLT_INV_ADDR;
1678 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1679 			    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1680 			    &oafsr);
1681 			acc_afsr |= oafsr;
1682 
1683 			/*
1684 			 * If we found a bad PA, update the state to indicate
1685 			 * if it is memory or I/O space.
1686 			 */
1687 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1688 				aflt->flt_in_memory =
1689 				    (pf_is_memory(aflt->flt_addr >>
1690 				    MMU_PAGESHIFT)) ? 1 : 0;
1691 			}
1692 			read_all_memscrub = 1;
1693 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1694 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1695 			    aflt->flt_panic);
1696 
1697 		}
1698 	}
1699 
1700 	/*
1701 	 * If we queued an error other than WP or CP and we are going to return
1702 	 * from the trap and the error was in user mode or inside of a
1703 	 * copy routine, set AST flag so the queue will be drained before
1704 	 * returning to user mode.
1705 	 *
1706 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1707 	 * and send an event to its process contract.
1708 	 *
1709 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1710 	 * will have been no error queued in this case.
1711 	 */
1712 	if ((t_afsr &
1713 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1714 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1715 			int pcb_flag = 0;
1716 
1717 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1718 				pcb_flag |= ASYNC_HWERR;
1719 
1720 			if (t_afsr & P_AFSR_BERR)
1721 				pcb_flag |= ASYNC_BERR;
1722 
1723 			if (t_afsr & P_AFSR_TO)
1724 				pcb_flag |= ASYNC_BTO;
1725 
1726 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1727 			aston(curthread);
1728 			action = ACTION_AST_FLAGS;
1729 	}
1730 
1731 	/*
1732 	 * In response to a deferred error, we must do one of three things:
1733 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1734 	 * set in cases (1) and (2) - check that either action is set or
1735 	 * (3) is true.
1736 	 *
1737 	 * On II, the WP writes poisoned data back to memory, which will
1738 	 * cause a UE and a panic or reboot when read.  In this case, we
1739 	 * don't need to panic at this time.  On IIi and IIe,
1740 	 * aflt->flt_panic is already set above.
1741 	 */
1742 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1743 	    (t_afsr & P_AFSR_WP));
1744 
1745 	/*
1746 	 * Make a final sanity check to make sure we did not get any more async
1747 	 * errors and accumulate the afsr.
1748 	 */
1749 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1750 	    cpunodes[CPU->cpu_id].ecache_linesize);
1751 	(void) clear_errors(&spf_flt, NULL);
1752 
1753 	/*
1754 	 * Take care of a special case: If there is a UE in the ecache flush
1755 	 * area, we'll see it in flush_ecache().  This will trigger the
1756 	 * CPU_ADDITIONAL_ERRORS case below.
1757 	 *
1758 	 * This could occur if the original error was a UE in the flush area,
1759 	 * or if the original error was an E$ error that was flushed out of
1760 	 * the E$ in scan_ecache().
1761 	 *
1762 	 * If it's at the same address that we're already logging, then it's
1763 	 * probably one of these cases.  Clear the bit so we don't trip over
1764 	 * it on the additional errors case, which could cause an unnecessary
1765 	 * panic.
1766 	 */
1767 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1768 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1769 	else
1770 		acc_afsr |= aflt->flt_stat;
1771 
1772 	/*
1773 	 * Check the acumulated afsr for the important bits.
1774 	 * Make sure the spf_flt.flt_type value is set, and
1775 	 * enque an error.
1776 	 */
1777 	if (acc_afsr &
1778 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1779 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1780 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1781 		    P_AFSR_ISAP))
1782 			aflt->flt_panic = 1;
1783 
1784 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1785 		aflt->flt_stat = acc_afsr;
1786 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1787 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1788 		    aflt->flt_panic);
1789 	}
1790 
1791 	/*
1792 	 * If aflt->flt_panic is set at this point, we need to panic as the
1793 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1794 	 * We've already enqueued the error in one of the if-clauses above,
1795 	 * and it will be dequeued and logged as part of the panic flow.
1796 	 */
1797 	if (aflt->flt_panic) {
1798 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1799 		    "See previous message(s) for details", " %sError(s)",
1800 		    pr_reason);
1801 	}
1802 
1803 	/*
1804 	 * Before returning, we must re-enable errors, and
1805 	 * reset the caches to their boot-up state.
1806 	 */
1807 	set_lsu(get_lsu() | cache_boot_state);
1808 	set_error_enable(EER_ENABLE);
1809 }
1810 
1811 /*
1812  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1813  * This routine is shared by the CE and UE handling code.
1814  */
1815 static void
1816 check_misc_err(spitf_async_flt *spf_flt)
1817 {
1818 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1819 	char *fatal_str = NULL;
1820 
1821 	/*
1822 	 * The ISAP and ETP errors are supposed to cause a POR
1823 	 * from the system, so in theory we never, ever see these messages.
1824 	 * ISAP, ETP and IVUE are considered to be fatal.
1825 	 */
1826 	if (aflt->flt_stat & P_AFSR_ISAP)
1827 		fatal_str = " System Address Parity Error on";
1828 	else if (aflt->flt_stat & P_AFSR_ETP)
1829 		fatal_str = " Ecache Tag Parity Error on";
1830 	else if (aflt->flt_stat & P_AFSR_IVUE)
1831 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1832 	if (fatal_str != NULL) {
1833 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1834 		    NULL, fatal_str);
1835 	}
1836 }
1837 
1838 /*
1839  * Routine to convert a syndrome into a syndrome code.
1840  */
1841 static int
1842 synd_to_synd_code(int synd_status, ushort_t synd)
1843 {
1844 	if (synd_status != AFLT_STAT_VALID)
1845 		return (-1);
1846 
1847 	/*
1848 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1849 	 * to get the code indicating which bit(s) is(are) bad.
1850 	 */
1851 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1852 		return (-1);
1853 	else
1854 		return (ecc_syndrome_tab[synd]);
1855 }
1856 
1857 /* ARGSUSED */
1858 int
1859 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1860 {
1861 	return (ENOTSUP);
1862 }
1863 
1864 /* ARGSUSED */
1865 int
1866 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1867 {
1868 	return (ENOTSUP);
1869 }
1870 
1871 /* ARGSUSED */
1872 int
1873 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1874 {
1875 	return (ENOTSUP);
1876 }
1877 
1878 /*
1879  * Routine to return a string identifying the physical name
1880  * associated with a memory/cache error.
1881  */
1882 /* ARGSUSED */
1883 int
1884 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1885     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1886     char *buf, int buflen, int *lenp)
1887 {
1888 	short synd_code;
1889 	int ret;
1890 
1891 	if (flt_in_memory) {
1892 		synd_code = synd_to_synd_code(synd_status, synd);
1893 		if (synd_code == -1) {
1894 			ret = EINVAL;
1895 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1896 		    buf, buflen, lenp) != 0) {
1897 			ret = EIO;
1898 		} else if (*lenp <= 1) {
1899 			ret = EINVAL;
1900 		} else {
1901 			ret = 0;
1902 		}
1903 	} else {
1904 		ret = ENOTSUP;
1905 	}
1906 
1907 	if (ret != 0) {
1908 		buf[0] = '\0';
1909 		*lenp = 0;
1910 	}
1911 
1912 	return (ret);
1913 }
1914 
1915 /*
1916  * Wrapper for cpu_get_mem_unum() routine that takes an
1917  * async_flt struct rather than explicit arguments.
1918  */
1919 int
1920 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1921     char *buf, int buflen, int *lenp)
1922 {
1923 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1924 	    aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1925 	    aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1926 }
1927 
1928 /*
1929  * This routine is a more generic interface to cpu_get_mem_unum(),
1930  * that may be used by other modules (e.g. mm).
1931  */
1932 int
1933 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1934 		char *buf, int buflen, int *lenp)
1935 {
1936 	int synd_status, flt_in_memory, ret;
1937 	char unum[UNUM_NAMLEN];
1938 
1939 	/*
1940 	 * Check for an invalid address.
1941 	 */
1942 	if (afar == (uint64_t)-1)
1943 		return (ENXIO);
1944 
1945 	if (synd == (uint64_t)-1)
1946 		synd_status = AFLT_STAT_INVALID;
1947 	else
1948 		synd_status = AFLT_STAT_VALID;
1949 
1950 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1951 
1952 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1953 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1954 	    != 0)
1955 		return (ret);
1956 
1957 	if (*lenp >= buflen)
1958 		return (ENAMETOOLONG);
1959 
1960 	(void) strncpy(buf, unum, buflen);
1961 
1962 	return (0);
1963 }
1964 
1965 /*
1966  * Routine to return memory information associated
1967  * with a physical address and syndrome.
1968  */
1969 /* ARGSUSED */
1970 int
1971 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1972     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1973     int *segsp, int *banksp, int *mcidp)
1974 {
1975 	return (ENOTSUP);
1976 }
1977 
1978 /*
1979  * Routine to return a string identifying the physical
1980  * name associated with a cpuid.
1981  */
1982 /* ARGSUSED */
1983 int
1984 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1985 {
1986 	return (ENOTSUP);
1987 }
1988 
1989 /*
1990  * This routine returns the size of the kernel's FRU name buffer.
1991  */
1992 size_t
1993 cpu_get_name_bufsize()
1994 {
1995 	return (UNUM_NAMLEN);
1996 }
1997 
1998 /*
1999  * Cpu specific log func for UEs.
2000  */
2001 static void
2002 log_ue_err(struct async_flt *aflt, char *unum)
2003 {
2004 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2005 	int len = 0;
2006 
2007 #ifdef DEBUG
2008 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2009 
2010 	/*
2011 	 * Paranoid Check for priv mismatch
2012 	 * Only applicable for UEs
2013 	 */
2014 	if (afsr_priv != aflt->flt_priv) {
2015 		/*
2016 		 * The priv bits in %tstate and %afsr did not match; we expect
2017 		 * this to be very rare, so flag it with a message.
2018 		 */
2019 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2020 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2021 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2022 
2023 		/* update saved afsr to reflect the correct priv */
2024 		aflt->flt_stat &= ~P_AFSR_PRIV;
2025 		if (aflt->flt_priv)
2026 			aflt->flt_stat |= P_AFSR_PRIV;
2027 	}
2028 #endif /* DEBUG */
2029 
2030 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2031 	    UNUM_NAMLEN, &len);
2032 
2033 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2034 	    " Uncorrectable Memory Error on");
2035 
2036 	if (SYND(aflt->flt_synd) == 0x3) {
2037 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2038 		    " Syndrome 0x3 indicates that this may not be a "
2039 		    "memory module problem");
2040 	}
2041 
2042 	if (aflt->flt_in_memory)
2043 		cpu_log_ecmem_info(spf_flt);
2044 }
2045 
2046 
2047 /*
2048  * The cpu_async_log_err() function is called via the ue_drain() function to
2049  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2050  * from softint context, from AST processing in the trap() flow, or from the
2051  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2052  */
2053 static void
2054 cpu_async_log_err(void *flt)
2055 {
2056 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2057 	struct async_flt *aflt = (struct async_flt *)flt;
2058 	char unum[UNUM_NAMLEN];
2059 	char *space;
2060 	char *ecache_scrub_logstr = NULL;
2061 
2062 	switch (spf_flt->flt_type) {
2063 	case CPU_UE_ERR:
2064 		/*
2065 		 * We want to skip logging only if ALL the following
2066 		 * conditions are true:
2067 		 *
2068 		 *	1. We are not panicking
2069 		 *	2. There is only one error
2070 		 *	3. That error is a memory error
2071 		 *	4. The error is caused by the memory scrubber (in
2072 		 *	   which case the error will have occurred under
2073 		 *	   on_trap protection)
2074 		 *	5. The error is on a retired page
2075 		 *
2076 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2077 		 * scrubber.  However, none of those errors should occur
2078 		 * on a retired page.
2079 		 *
2080 		 * Note 2: In the CE case, these errors are discarded before
2081 		 * the errorq.  In the UE case, we must wait until now --
2082 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2083 		 */
2084 		if (!panicstr &&
2085 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2086 		    aflt->flt_prot == AFLT_PROT_EC) {
2087 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2088 				/* Zero the address to clear the error */
2089 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2090 				return;
2091 			}
2092 		}
2093 
2094 		/*
2095 		 * Log the UE and check for causes of this UE error that
2096 		 * don't cause a trap (Copyback error).  cpu_async_error()
2097 		 * has already checked the i/o buses for us.
2098 		 */
2099 		log_ue_err(aflt, unum);
2100 		if (aflt->flt_in_memory)
2101 			cpu_check_allcpus(aflt);
2102 		break;
2103 
2104 	case CPU_EDP_LDP_ERR:
2105 		if (aflt->flt_stat & P_AFSR_EDP)
2106 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2107 			    NULL, " EDP event on");
2108 
2109 		if (aflt->flt_stat & P_AFSR_LDP)
2110 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2111 			    NULL, " LDP event on");
2112 
2113 		/* Log ecache info if exist */
2114 		if (spf_flt->flt_ec_lcnt > 0) {
2115 			cpu_log_ecmem_info(spf_flt);
2116 
2117 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2118 			    NULL, " AFAR was derived from E$Tag");
2119 		} else {
2120 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2121 			    NULL, " No error found in ecache (No fault "
2122 			    "PA available)");
2123 		}
2124 		break;
2125 
2126 	case CPU_WP_ERR:
2127 		/*
2128 		 * If the memscrub thread hasn't yet read
2129 		 * all of memory, as we requested in the
2130 		 * trap handler, then give it a kick to
2131 		 * make sure it does.
2132 		 */
2133 		if (!isus2i && !isus2e && read_all_memscrub)
2134 			memscrub_run();
2135 
2136 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2137 		    " WP event on");
2138 		return;
2139 
2140 	case CPU_BTO_BERR_ERR:
2141 		/*
2142 		 * A bus timeout or error occurred that was in user mode or not
2143 		 * in a protected kernel code region.
2144 		 */
2145 		if (aflt->flt_stat & P_AFSR_BERR) {
2146 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2147 			    spf_flt, BERRTO_LFLAGS, NULL,
2148 			    " Bus Error on System Bus in %s mode from",
2149 			    aflt->flt_priv ? "privileged" : "user");
2150 		}
2151 
2152 		if (aflt->flt_stat & P_AFSR_TO) {
2153 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2154 			    spf_flt, BERRTO_LFLAGS, NULL,
2155 			    " Timeout on System Bus in %s mode from",
2156 			    aflt->flt_priv ? "privileged" : "user");
2157 		}
2158 
2159 		return;
2160 
2161 	case CPU_PANIC_CP_ERR:
2162 		/*
2163 		 * Process the Copyback (CP) error info (if any) obtained from
2164 		 * polling all the cpus in the panic flow. This case is only
2165 		 * entered if we are panicking.
2166 		 */
2167 		ASSERT(panicstr != NULL);
2168 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2169 
2170 		/* See which space - this info may not exist */
2171 		if (panic_aflt.flt_status & ECC_D_TRAP)
2172 			space = "Data ";
2173 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2174 			space = "Instruction ";
2175 		else
2176 			space = "";
2177 
2178 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2179 		    " AFAR was derived from UE report,"
2180 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2181 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2182 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2183 
2184 		if (spf_flt->flt_ec_lcnt > 0)
2185 			cpu_log_ecmem_info(spf_flt);
2186 		else
2187 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2188 			    NULL, " No cache dump available");
2189 
2190 		return;
2191 
2192 	case CPU_TRAPPING_CP_ERR:
2193 		/*
2194 		 * For sabre only.  This is a copyback ecache parity error due
2195 		 * to a PCI DMA read.  We should be panicking if we get here.
2196 		 */
2197 		ASSERT(panicstr != NULL);
2198 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2199 		    " AFAR was derived from UE report,"
2200 		    " CP event on CPU%d (caused Data access error "
2201 		    "on PCIBus)", aflt->flt_inst);
2202 		return;
2203 
2204 		/*
2205 		 * We log the ecache lines of the following states,
2206 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2207 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2208 		 * in addition to logging if ecache_scrub_panic is set.
2209 		 */
2210 	case CPU_BADLINE_CI_ERR:
2211 		ecache_scrub_logstr = "CBI";
2212 		/* FALLTHRU */
2213 
2214 	case CPU_BADLINE_CB_ERR:
2215 		if (ecache_scrub_logstr == NULL)
2216 			ecache_scrub_logstr = "CBB";
2217 		/* FALLTHRU */
2218 
2219 	case CPU_BADLINE_DI_ERR:
2220 		if (ecache_scrub_logstr == NULL)
2221 			ecache_scrub_logstr = "DBI";
2222 		/* FALLTHRU */
2223 
2224 	case CPU_BADLINE_DB_ERR:
2225 		if (ecache_scrub_logstr == NULL)
2226 			ecache_scrub_logstr = "DBB";
2227 
2228 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2229 		    (CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2230 		    " %s event on", ecache_scrub_logstr);
2231 		cpu_log_ecmem_info(spf_flt);
2232 
2233 		return;
2234 
2235 	case CPU_ORPHAN_CP_ERR:
2236 		/*
2237 		 * Orphan CPs, where the CP bit is set, but when a CPU
2238 		 * doesn't report a UE.
2239 		 */
2240 		if (read_all_memscrub)
2241 			memscrub_run();
2242 
2243 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2244 		    NULL, " Orphan CP event on");
2245 
2246 		/* Log ecache info if exist */
2247 		if (spf_flt->flt_ec_lcnt > 0)
2248 			cpu_log_ecmem_info(spf_flt);
2249 		else
2250 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2251 			    (CP_LFLAGS | CPU_FLTCPU), NULL,
2252 			    " No error found in ecache (No fault "
2253 			    "PA available");
2254 		return;
2255 
2256 	case CPU_ECACHE_ADDR_PAR_ERR:
2257 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2258 		    " E$ Tag Address Parity error on");
2259 		cpu_log_ecmem_info(spf_flt);
2260 		return;
2261 
2262 	case CPU_ECACHE_STATE_ERR:
2263 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2264 		    " E$ Tag State Parity error on");
2265 		cpu_log_ecmem_info(spf_flt);
2266 		return;
2267 
2268 	case CPU_ECACHE_TAG_ERR:
2269 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270 		    " E$ Tag scrub event on");
2271 		cpu_log_ecmem_info(spf_flt);
2272 		return;
2273 
2274 	case CPU_ECACHE_ETP_ETS_ERR:
2275 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276 		    " AFSR.ETP is set and AFSR.ETS is zero on");
2277 		cpu_log_ecmem_info(spf_flt);
2278 		return;
2279 
2280 
2281 	case CPU_ADDITIONAL_ERR:
2282 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2283 		    " Additional errors detected during error processing on");
2284 		return;
2285 
2286 	default:
2287 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2288 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2289 		return;
2290 	}
2291 
2292 	/* ... fall through from the UE, EDP, or LDP cases */
2293 
2294 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2295 		if (!panicstr) {
2296 			(void) page_retire(aflt->flt_addr, PR_UE);
2297 		} else {
2298 			/*
2299 			 * Clear UEs on panic so that we don't
2300 			 * get haunted by them during panic or
2301 			 * after reboot
2302 			 */
2303 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2304 			    cpunodes[CPU->cpu_id].ecache_size,
2305 			    cpunodes[CPU->cpu_id].ecache_linesize);
2306 
2307 			(void) clear_errors(NULL, NULL);
2308 		}
2309 	}
2310 
2311 	/*
2312 	 * Log final recover message
2313 	 */
2314 	if (!panicstr) {
2315 		if (!aflt->flt_priv) {
2316 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2317 			    NULL, " Above Error is in User Mode"
2318 			    "\n    and is fatal: "
2319 			    "will SIGKILL process and notify contract");
2320 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2321 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2322 			    NULL, " Above Error detected while dumping core;"
2323 			    "\n    core file will be truncated");
2324 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2325 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2326 			    NULL, " Above Error is due to Kernel access"
2327 			    "\n    to User space and is fatal: "
2328 			    "will SIGKILL process and notify contract");
2329 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2330 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2331 			    " Above Error detected by protected Kernel code"
2332 			    "\n    that will try to clear error from system");
2333 		}
2334 	}
2335 }
2336 
2337 
2338 /*
2339  * Check all cpus for non-trapping UE-causing errors
2340  * In Ultra I/II, we look for copyback errors (CPs)
2341  */
2342 void
2343 cpu_check_allcpus(struct async_flt *aflt)
2344 {
2345 	spitf_async_flt cp;
2346 	spitf_async_flt *spf_cpflt = &cp;
2347 	struct async_flt *cpflt = (struct async_flt *)&cp;
2348 	int pix;
2349 
2350 	cpflt->flt_id = aflt->flt_id;
2351 	cpflt->flt_addr = aflt->flt_addr;
2352 
2353 	for (pix = 0; pix < NCPU; pix++) {
2354 		if (CPU_XCALL_READY(pix)) {
2355 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2356 			    (uint64_t)cpflt, 0);
2357 
2358 			if (cpflt->flt_stat & P_AFSR_CP) {
2359 				char *space;
2360 
2361 				/* See which space - this info may not exist */
2362 				if (aflt->flt_status & ECC_D_TRAP)
2363 					space = "Data ";
2364 				else if (aflt->flt_status & ECC_I_TRAP)
2365 					space = "Instruction ";
2366 				else
2367 					space = "";
2368 
2369 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2370 				    NULL, " AFAR was derived from UE report,"
2371 				    " CP event on CPU%d (caused %saccess "
2372 				    "error on %s%d)", pix, space,
2373 				    (aflt->flt_status & ECC_IOBUS) ?
2374 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2375 
2376 				if (spf_cpflt->flt_ec_lcnt > 0)
2377 					cpu_log_ecmem_info(spf_cpflt);
2378 				else
2379 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2380 					    CPU_ERRID_FIRST, NULL,
2381 					    " No cache dump available");
2382 			}
2383 		}
2384 	}
2385 }
2386 
2387 #ifdef DEBUG
2388 int test_mp_cp = 0;
2389 #endif
2390 
2391 /*
2392  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2393  * for copyback errors and capture relevant information.
2394  */
2395 static uint_t
2396 get_cpu_status(uint64_t arg)
2397 {
2398 	struct async_flt *aflt = (struct async_flt *)arg;
2399 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2400 	uint64_t afsr;
2401 	uint32_t ec_idx;
2402 	uint64_t sdbh, sdbl;
2403 	int i;
2404 	uint32_t ec_set_size;
2405 	uchar_t valid;
2406 	ec_data_t ec_data[8];
2407 	uint64_t ec_tag, flt_addr_tag, oafsr;
2408 	uint64_t *acc_afsr = NULL;
2409 
2410 	get_asyncflt(&afsr);
2411 	if (CPU_PRIVATE(CPU) != NULL) {
2412 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2413 		afsr |= *acc_afsr;
2414 		*acc_afsr = 0;
2415 	}
2416 
2417 #ifdef DEBUG
2418 	if (test_mp_cp)
2419 		afsr |= P_AFSR_CP;
2420 #endif
2421 	aflt->flt_stat = afsr;
2422 
2423 	if (afsr & P_AFSR_CP) {
2424 		/*
2425 		 * Capture the UDBs
2426 		 */
2427 		get_udb_errors(&sdbh, &sdbl);
2428 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2429 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2430 
2431 		/*
2432 		 * Clear CP bit before capturing ecache data
2433 		 * and AFSR info.
2434 		 */
2435 		set_asyncflt(P_AFSR_CP);
2436 
2437 		/*
2438 		 * See if we can capture the ecache line for the
2439 		 * fault PA.
2440 		 *
2441 		 * Return a valid matching ecache line, if any.
2442 		 * Otherwise, return the first matching ecache
2443 		 * line marked invalid.
2444 		 */
2445 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2446 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2447 		    ecache_associativity;
2448 		spf_flt->flt_ec_lcnt = 0;
2449 
2450 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2451 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2452 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2453 			    (uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2454 			    acc_afsr);
2455 
2456 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2457 				continue;
2458 
2459 			valid = cpu_ec_state_valid &
2460 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2461 			    cpu_ec_state_shift);
2462 
2463 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2464 				spf_flt->flt_ec_tag = ec_tag;
2465 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2466 				    sizeof (ec_data));
2467 				spf_flt->flt_ec_lcnt = 1;
2468 
2469 				if (valid)
2470 					break;
2471 			}
2472 		}
2473 	}
2474 	return (0);
2475 }
2476 
2477 /*
2478  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2479  * from panic_idle() as part of the other CPUs stopping themselves when a
2480  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2481  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2482  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2483  * CP error information.
2484  */
2485 void
2486 cpu_async_panic_callb(void)
2487 {
2488 	spitf_async_flt cp;
2489 	struct async_flt *aflt = (struct async_flt *)&cp;
2490 	uint64_t *scrub_afsr;
2491 
2492 	if (panic_aflt.flt_id != 0) {
2493 		aflt->flt_addr = panic_aflt.flt_addr;
2494 		(void) get_cpu_status((uint64_t)aflt);
2495 
2496 		if (CPU_PRIVATE(CPU) != NULL) {
2497 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2498 			if (*scrub_afsr & P_AFSR_CP) {
2499 				aflt->flt_stat |= *scrub_afsr;
2500 				*scrub_afsr = 0;
2501 			}
2502 		}
2503 		if (aflt->flt_stat & P_AFSR_CP) {
2504 			aflt->flt_id = panic_aflt.flt_id;
2505 			aflt->flt_panic = 1;
2506 			aflt->flt_inst = CPU->cpu_id;
2507 			aflt->flt_class = CPU_FAULT;
2508 			cp.flt_type = CPU_PANIC_CP_ERR;
2509 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2510 			    (void *)&cp, sizeof (cp), ue_queue,
2511 			    aflt->flt_panic);
2512 		}
2513 	}
2514 }
2515 
2516 /*
2517  * Turn off all cpu error detection, normally only used for panics.
2518  */
2519 void
2520 cpu_disable_errors(void)
2521 {
2522 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2523 }
2524 
2525 /*
2526  * Enable errors.
2527  */
2528 void
2529 cpu_enable_errors(void)
2530 {
2531 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2532 }
2533 
2534 static void
2535 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2536 {
2537 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2538 	int i, loop = 1;
2539 	ushort_t ecc_0;
2540 	uint64_t paddr;
2541 	uint64_t data;
2542 
2543 	if (verbose)
2544 		loop = 8;
2545 	for (i = 0; i < loop; i++) {
2546 		paddr = aligned_addr + (i * 8);
2547 		data = lddphys(paddr);
2548 		if (verbose) {
2549 			if (ce_err) {
2550 				ecc_0 = ecc_gen((uint32_t)(data>>32),
2551 				    (uint32_t)data);
2552 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2553 				    NULL, "    Paddr 0x%" PRIx64 ", "
2554 				    "Data 0x%08x.%08x, ECC 0x%x", paddr,
2555 				    (uint32_t)(data>>32), (uint32_t)data,
2556 				    ecc_0);
2557 			} else {
2558 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2559 				    NULL, "    Paddr 0x%" PRIx64 ", "
2560 				    "Data 0x%08x.%08x", paddr,
2561 				    (uint32_t)(data>>32), (uint32_t)data);
2562 			}
2563 		}
2564 	}
2565 }
2566 
2567 static struct {		/* sec-ded-s4ed ecc code */
2568 	uint_t hi, lo;
2569 } ecc_code[8] = {
2570 	{ 0xee55de23U, 0x16161161U },
2571 	{ 0x55eede93U, 0x61612212U },
2572 	{ 0xbb557b8cU, 0x49494494U },
2573 	{ 0x55bb7b6cU, 0x94948848U },
2574 	{ 0x16161161U, 0xee55de23U },
2575 	{ 0x61612212U, 0x55eede93U },
2576 	{ 0x49494494U, 0xbb557b8cU },
2577 	{ 0x94948848U, 0x55bb7b6cU }
2578 };
2579 
2580 static ushort_t
2581 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2582 {
2583 	int i, j;
2584 	uchar_t checker, bit_mask;
2585 	struct {
2586 		uint_t hi, lo;
2587 	} hex_data, masked_data[8];
2588 
2589 	hex_data.hi = high_bytes;
2590 	hex_data.lo = low_bytes;
2591 
2592 	/* mask out bits according to sec-ded-s4ed ecc code */
2593 	for (i = 0; i < 8; i++) {
2594 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2595 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2596 	}
2597 
2598 	/*
2599 	 * xor all bits in masked_data[i] to get bit_i of checker,
2600 	 * where i = 0 to 7
2601 	 */
2602 	checker = 0;
2603 	for (i = 0; i < 8; i++) {
2604 		bit_mask = 1 << i;
2605 		for (j = 0; j < 32; j++) {
2606 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2607 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2608 			masked_data[i].hi >>= 1;
2609 			masked_data[i].lo >>= 1;
2610 		}
2611 	}
2612 	return (checker);
2613 }
2614 
2615 /*
2616  * Flush the entire ecache using displacement flush by reading through a
2617  * physical address range as large as the ecache.
2618  */
2619 void
2620 cpu_flush_ecache(void)
2621 {
2622 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2623 	    cpunodes[CPU->cpu_id].ecache_linesize);
2624 }
2625 
2626 /*
2627  * read and display the data in the cache line where the
2628  * original ce error occurred.
2629  * This routine is mainly used for debugging new hardware.
2630  */
2631 void
2632 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2633 {
2634 	kpreempt_disable();
2635 	/* disable ECC error traps */
2636 	set_error_enable(EER_ECC_DISABLE);
2637 
2638 	/*
2639 	 * flush the ecache
2640 	 * read the data
2641 	 * check to see if an ECC error occured
2642 	 */
2643 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2644 	    cpunodes[CPU->cpu_id].ecache_linesize);
2645 	set_lsu(get_lsu() | cache_boot_state);
2646 	cpu_read_paddr(ecc, verbose, ce_err);
2647 	(void) check_ecc(ecc);
2648 
2649 	/* enable ECC error traps */
2650 	set_error_enable(EER_ENABLE);
2651 	kpreempt_enable();
2652 }
2653 
2654 /*
2655  * Check the AFSR bits for UE/CE persistence.
2656  * If UE or CE errors are detected, the routine will
2657  * clears all the AFSR sticky bits (except CP for
2658  * spitfire/blackbird) and the UDBs.
2659  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2660  */
2661 static int
2662 check_ecc(struct async_flt *ecc)
2663 {
2664 	uint64_t t_afsr;
2665 	uint64_t t_afar;
2666 	uint64_t udbh;
2667 	uint64_t udbl;
2668 	ushort_t udb;
2669 	int persistent = 0;
2670 
2671 	/*
2672 	 * Capture the AFSR, AFAR and UDBs info
2673 	 */
2674 	get_asyncflt(&t_afsr);
2675 	get_asyncaddr(&t_afar);
2676 	t_afar &= SABRE_AFAR_PA;
2677 	get_udb_errors(&udbh, &udbl);
2678 
2679 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2680 		/*
2681 		 * Clear the errors
2682 		 */
2683 		clr_datapath();
2684 
2685 		if (isus2i || isus2e)
2686 			set_asyncflt(t_afsr);
2687 		else
2688 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2689 
2690 		/*
2691 		 * determine whether to check UDBH or UDBL for persistence
2692 		 */
2693 		if (ecc->flt_synd & UDBL_REG) {
2694 			udb = (ushort_t)udbl;
2695 			t_afar |= 0x8;
2696 		} else {
2697 			udb = (ushort_t)udbh;
2698 		}
2699 
2700 		if (ce_debug || ue_debug) {
2701 			spitf_async_flt spf_flt; /* for logging */
2702 			struct async_flt *aflt =
2703 			    (struct async_flt *)&spf_flt;
2704 
2705 			/* Package the info nicely in the spf_flt struct */
2706 			bzero(&spf_flt, sizeof (spitf_async_flt));
2707 			aflt->flt_stat = t_afsr;
2708 			aflt->flt_addr = t_afar;
2709 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2710 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2711 
2712 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2713 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2714 			    " check_ecc: Dumping captured error states ...");
2715 		}
2716 
2717 		/*
2718 		 * if the fault addresses don't match, not persistent
2719 		 */
2720 		if (t_afar != ecc->flt_addr) {
2721 			return (persistent);
2722 		}
2723 
2724 		/*
2725 		 * check for UE persistence
2726 		 * since all DIMMs in the bank are identified for a UE,
2727 		 * there's no reason to check the syndrome
2728 		 */
2729 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2730 			persistent = 1;
2731 		}
2732 
2733 		/*
2734 		 * check for CE persistence
2735 		 */
2736 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2737 			if ((udb & P_DER_E_SYND) ==
2738 			    (ecc->flt_synd & P_DER_E_SYND)) {
2739 				persistent = 1;
2740 			}
2741 		}
2742 	}
2743 	return (persistent);
2744 }
2745 
2746 #ifdef HUMMINGBIRD
2747 #define	HB_FULL_DIV		1
2748 #define	HB_HALF_DIV		2
2749 #define	HB_LOWEST_DIV		8
2750 #define	HB_ECLK_INVALID		0xdeadbad
2751 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2752 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2753 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2754 	HB_ECLK_8 };
2755 
2756 #define	HB_SLOW_DOWN		0
2757 #define	HB_SPEED_UP		1
2758 
2759 #define	SET_ESTAR_MODE(mode)					\
2760 	stdphysio(HB_ESTAR_MODE, (mode));			\
2761 	/*							\
2762 	 * PLL logic requires minimum of 16 clock		\
2763 	 * cycles to lock to the new clock speed.		\
2764 	 * Wait 1 usec to satisfy this requirement.		\
2765 	 */							\
2766 	drv_usecwait(1);
2767 
2768 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2769 {								\
2770 	volatile uint64_t data;					\
2771 	uint64_t count, new_count;				\
2772 	clock_t delay;						\
2773 	data = lddphysio(HB_MEM_CNTRL0);			\
2774 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2775 	    HB_REFRESH_COUNT_SHIFT;				\
2776 	new_count = (HB_REFRESH_INTERVAL *			\
2777 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2778 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2779 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2780 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2781 	stdphysio(HB_MEM_CNTRL0, data);				\
2782 	data = lddphysio(HB_MEM_CNTRL0);        		\
2783 	/*							\
2784 	 * If we are slowing down the cpu and Memory		\
2785 	 * Self Refresh is not enabled, it is required		\
2786 	 * to wait for old refresh count to count-down and	\
2787 	 * new refresh count to go into effect (let new value	\
2788 	 * counts down once).					\
2789 	 */							\
2790 	if ((direction) == HB_SLOW_DOWN &&			\
2791 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2792 		/*						\
2793 		 * Each count takes 64 cpu clock cycles		\
2794 		 * to decrement.  Wait for current refresh	\
2795 		 * count plus new refresh count at current	\
2796 		 * cpu speed to count down to zero.  Round	\
2797 		 * up the delay time.				\
2798 		 */						\
2799 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2800 		    (count + new_count) * MICROSEC * (cur_div)) /\
2801 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2802 		drv_usecwait(delay);				\
2803 	}							\
2804 }
2805 
2806 #define	SET_SELF_REFRESH(bit)					\
2807 {								\
2808 	volatile uint64_t data;					\
2809 	data = lddphysio(HB_MEM_CNTRL0);			\
2810 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2811 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2812 	stdphysio(HB_MEM_CNTRL0, data);				\
2813 	data = lddphysio(HB_MEM_CNTRL0);			\
2814 }
2815 #endif	/* HUMMINGBIRD */
2816 
2817 /* ARGSUSED */
2818 void
2819 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2820 {
2821 #ifdef HUMMINGBIRD
2822 	uint64_t cur_mask, cur_divisor = 0;
2823 	volatile uint64_t reg;
2824 	processor_info_t *pi = &(CPU->cpu_type_info);
2825 	int index;
2826 
2827 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2828 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2829 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2830 		    new_divisor);
2831 		return;
2832 	}
2833 
2834 	reg = lddphysio(HB_ESTAR_MODE);
2835 	cur_mask = reg & HB_ECLK_MASK;
2836 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2837 		if (hb_eclk[index] == cur_mask) {
2838 			cur_divisor = index;
2839 			break;
2840 		}
2841 	}
2842 
2843 	if (cur_divisor == 0)
2844 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2845 		    "can't be determined!");
2846 
2847 	/*
2848 	 * If we are already at the requested divisor speed, just
2849 	 * return.
2850 	 */
2851 	if (cur_divisor == new_divisor)
2852 		return;
2853 
2854 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2855 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2856 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2857 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2858 
2859 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2860 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2861 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2862 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2863 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2864 
2865 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2866 		/*
2867 		 * Transition to 1/2 speed first, then to
2868 		 * lower speed.
2869 		 */
2870 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2871 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2872 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2873 
2874 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2875 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2876 
2877 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2878 		/*
2879 		 * Transition to 1/2 speed first, then to
2880 		 * full speed.
2881 		 */
2882 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2883 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2884 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2885 
2886 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2887 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2888 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2889 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2890 
2891 	} else if (cur_divisor < new_divisor) {
2892 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2893 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2894 
2895 	} else if (cur_divisor > new_divisor) {
2896 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2897 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2898 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2899 	}
2900 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2901 	CPU->cpu_curr_clock =
2902 	    (((uint64_t)pi->pi_clock * 1000000) / new_divisor);
2903 #endif
2904 }
2905 
2906 /*
2907  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2908  * we clear all the sticky bits. If a non-null pointer to a async fault
2909  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2910  * info will be returned in the structure.  If a non-null pointer to a
2911  * uint64_t is passed in, this will be updated if the CP bit is set in the
2912  * AFSR.  The afsr will be returned.
2913  */
2914 static uint64_t
2915 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2916 {
2917 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2918 	uint64_t afsr;
2919 	uint64_t udbh, udbl;
2920 
2921 	get_asyncflt(&afsr);
2922 
2923 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2924 		*acc_afsr |= afsr;
2925 
2926 	if (spf_flt != NULL) {
2927 		aflt->flt_stat = afsr;
2928 		get_asyncaddr(&aflt->flt_addr);
2929 		aflt->flt_addr &= SABRE_AFAR_PA;
2930 
2931 		get_udb_errors(&udbh, &udbl);
2932 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2933 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2934 	}
2935 
2936 	set_asyncflt(afsr);		/* clear afsr */
2937 	clr_datapath();			/* clear udbs */
2938 	return (afsr);
2939 }
2940 
2941 /*
2942  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2943  * tag of the first bad line will be returned. We also return the old-afsr
2944  * (before clearing the sticky bits). The linecnt data will be updated to
2945  * indicate the number of bad lines detected.
2946  */
2947 static void
2948 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2949 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2950 {
2951 	ec_data_t t_ecdata[8];
2952 	uint64_t t_etag, oafsr;
2953 	uint64_t pa = AFLT_INV_ADDR;
2954 	uint32_t i, j, ecache_sz;
2955 	uint64_t acc_afsr = 0;
2956 	uint64_t *cpu_afsr = NULL;
2957 
2958 	if (CPU_PRIVATE(CPU) != NULL)
2959 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2960 
2961 	*linecnt = 0;
2962 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2963 
2964 	for (i = 0; i < ecache_sz; i += 64) {
2965 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2966 		    cpu_afsr);
2967 		acc_afsr |= oafsr;
2968 
2969 		/*
2970 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2971 		 * looking for the first occurrence of an EDP error.  The AFSR
2972 		 * info is captured for each 8-byte chunk.  Note that for
2973 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2974 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2975 		 * for the high and low 8-byte words within the 16-byte chunk).
2976 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2977 		 * granularity and only PSYND bits [7:0] are used.
2978 		 */
2979 		for (j = 0; j < 8; j++) {
2980 			ec_data_t *ecdptr = &t_ecdata[j];
2981 
2982 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
2983 				uint64_t errpa;
2984 				ushort_t psynd;
2985 				uint32_t ec_set_size = ecache_sz /
2986 				    ecache_associativity;
2987 
2988 				/*
2989 				 * For Spitfire/Blackbird, we need to look at
2990 				 * the PSYND to make sure that this 8-byte chunk
2991 				 * is the right one.  PSYND bits [15:8] belong
2992 				 * to the upper 8-byte (even) chunk.  Bits
2993 				 * [7:0] belong to the lower 8-byte chunk (odd).
2994 				 */
2995 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
2996 				if (!isus2i && !isus2e) {
2997 					if (j & 0x1)
2998 						psynd = psynd & 0xFF;
2999 					else
3000 						psynd = psynd >> 8;
3001 
3002 					if (!psynd)
3003 						continue; /* wrong chunk */
3004 				}
3005 
3006 				/* Construct the PA */
3007 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3008 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3009 				    ec_set_size);
3010 
3011 				/* clean up the cache line */
3012 				flushecacheline(P2ALIGN(errpa, 64),
3013 				    cpunodes[CPU->cpu_id].ecache_size);
3014 
3015 				oafsr = clear_errors(NULL, cpu_afsr);
3016 				acc_afsr |= oafsr;
3017 
3018 				(*linecnt)++;
3019 
3020 				/*
3021 				 * Capture the PA for the first bad line found.
3022 				 * Return the ecache dump and tag info.
3023 				 */
3024 				if (pa == AFLT_INV_ADDR) {
3025 					int k;
3026 
3027 					pa = errpa;
3028 					for (k = 0; k < 8; k++)
3029 						ecache_data[k] = t_ecdata[k];
3030 					*ecache_tag = t_etag;
3031 				}
3032 				break;
3033 			}
3034 		}
3035 	}
3036 	*t_afar = pa;
3037 	*t_afsr = acc_afsr;
3038 }
3039 
3040 static void
3041 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3042 {
3043 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3044 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3045 	char linestr[30];
3046 	char *state_str;
3047 	int i;
3048 
3049 	/*
3050 	 * Check the ecache tag to make sure it
3051 	 * is valid. If invalid, a memory dump was
3052 	 * captured instead of a ecache dump.
3053 	 */
3054 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3055 		uchar_t eparity = (uchar_t)
3056 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3057 
3058 		uchar_t estate = (uchar_t)
3059 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3060 
3061 		if (estate == cpu_ec_state_shr)
3062 			state_str = "Shared";
3063 		else if (estate == cpu_ec_state_exl)
3064 			state_str = "Exclusive";
3065 		else if (estate == cpu_ec_state_own)
3066 			state_str = "Owner";
3067 		else if (estate == cpu_ec_state_mod)
3068 			state_str = "Modified";
3069 		else
3070 			state_str = "Invalid";
3071 
3072 		if (spf_flt->flt_ec_lcnt > 1) {
3073 			(void) snprintf(linestr, sizeof (linestr),
3074 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3075 		} else {
3076 			linestr[0] = '\0';
3077 		}
3078 
3079 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3080 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3081 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3082 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3083 		    (uint32_t)ecache_tag, state_str,
3084 		    (uint32_t)eparity, linestr);
3085 	} else {
3086 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3087 		    " E$tag != PA from AFAR; E$line was victimized"
3088 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3089 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3090 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3091 	}
3092 
3093 	/*
3094 	 * Dump out all 8 8-byte ecache data captured
3095 	 * For each 8-byte data captured, we check the
3096 	 * captured afsr's parity syndrome to find out
3097 	 * which 8-byte chunk is bad. For memory dump, the
3098 	 * AFSR values were initialized to 0.
3099 	 */
3100 	for (i = 0; i < 8; i++) {
3101 		ec_data_t *ecdptr;
3102 		uint_t offset;
3103 		ushort_t psynd;
3104 		ushort_t bad;
3105 		uint64_t edp;
3106 
3107 		offset = i << 3;	/* multiply by 8 */
3108 		ecdptr = &spf_flt->flt_ec_data[i];
3109 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3110 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3111 
3112 		/*
3113 		 * For Sabre/Hummingbird, parity synd is captured only
3114 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3115 		 * For spitfire/blackbird, AFSR.PSYND is captured
3116 		 * in 16-byte granularity. [15:8] represent
3117 		 * the upper 8 byte and [7:0] the lower 8 byte.
3118 		 */
3119 		if (isus2i || isus2e || (i & 0x1))
3120 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3121 		else
3122 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3123 
3124 		if (bad && edp) {
3125 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3126 			    " E$Data (0x%02x): 0x%08x.%08x "
3127 			    "*Bad* PSYND=0x%04x", offset,
3128 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3129 			    (uint32_t)ecdptr->ec_d8, psynd);
3130 		} else {
3131 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3132 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3133 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3134 			    (uint32_t)ecdptr->ec_d8);
3135 		}
3136 	}
3137 }
3138 
3139 /*
3140  * Common logging function for all cpu async errors.  This function allows the
3141  * caller to generate a single cmn_err() call that logs the appropriate items
3142  * from the fault structure, and implements our rules for AFT logging levels.
3143  *
3144  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3145  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3146  *	spflt: pointer to spitfire async fault structure
3147  *	logflags: bitflags indicating what to output
3148  *	endstr: a end string to appear at the end of this log
3149  *	fmt: a format string to appear at the beginning of the log
3150  *
3151  * The logflags allows the construction of predetermined output from the spflt
3152  * structure.  The individual data items always appear in a consistent order.
3153  * Note that either or both of the spflt structure pointer and logflags may be
3154  * NULL or zero respectively, indicating that the predetermined output
3155  * substrings are not requested in this log.  The output looks like this:
3156  *
3157  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3158  *	<CPU_SPACE><CPU_ERRID>
3159  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3160  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3161  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3162  *	newline+4spaces<CPU_SYND>
3163  *	newline+4spaces<endstr>
3164  *
3165  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3166  * it is assumed that <endstr> will be the unum string in this case.  The size
3167  * of our intermediate formatting buf[] is based on the worst case of all flags
3168  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3169  * formatting so we don't need additional stack space to format them here.
3170  */
3171 /*PRINTFLIKE6*/
3172 static void
3173 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3174 	const char *endstr, const char *fmt, ...)
3175 {
3176 	struct async_flt *aflt = (struct async_flt *)spflt;
3177 	char buf[400], *p, *q; /* see comments about buf[] size above */
3178 	va_list ap;
3179 	int console_log_flag;
3180 
3181 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3182 	    (aflt->flt_stat & P_AFSR_LEVEL1)) ||
3183 	    (aflt->flt_panic)) {
3184 		console_log_flag = (tagnum < 2) || aft_verbose;
3185 	} else {
3186 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3187 		    (aflt->flt_stat & P_AFSR_CE)) ?
3188 		    ce_verbose_memory : ce_verbose_other;
3189 
3190 		if (!verbose)
3191 			return;
3192 
3193 		console_log_flag = (verbose > 1);
3194 	}
3195 
3196 	if (console_log_flag)
3197 		(void) sprintf(buf, "[AFT%d]", tagnum);
3198 	else
3199 		(void) sprintf(buf, "![AFT%d]", tagnum);
3200 
3201 	p = buf + strlen(buf);	/* current buffer position */
3202 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3203 
3204 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3205 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3206 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3207 		p += strlen(p);
3208 	}
3209 
3210 	/*
3211 	 * Copy the caller's format string verbatim into buf[].  It will be
3212 	 * formatted by the call to vcmn_err() at the end of this function.
3213 	 */
3214 	if (fmt != NULL && p < q) {
3215 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3216 		buf[sizeof (buf) - 1] = '\0';
3217 		p += strlen(p);
3218 	}
3219 
3220 	if (spflt != NULL) {
3221 		if (logflags & CPU_FLTCPU) {
3222 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3223 			    aflt->flt_inst);
3224 			p += strlen(p);
3225 		}
3226 
3227 		if (logflags & CPU_SPACE) {
3228 			if (aflt->flt_status & ECC_D_TRAP)
3229 				(void) snprintf(p, (size_t)(q - p),
3230 				    " Data access");
3231 			else if (aflt->flt_status & ECC_I_TRAP)
3232 				(void) snprintf(p, (size_t)(q - p),
3233 				    " Instruction access");
3234 			p += strlen(p);
3235 		}
3236 
3237 		if (logflags & CPU_TL) {
3238 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3239 			    aflt->flt_tl ? ">0" : "=0");
3240 			p += strlen(p);
3241 		}
3242 
3243 		if (logflags & CPU_ERRID) {
3244 			(void) snprintf(p, (size_t)(q - p),
3245 			    ", errID 0x%08x.%08x",
3246 			    (uint32_t)(aflt->flt_id >> 32),
3247 			    (uint32_t)aflt->flt_id);
3248 			p += strlen(p);
3249 		}
3250 
3251 		if (logflags & CPU_AFSR) {
3252 			(void) snprintf(p, (size_t)(q - p),
3253 			    "\n    AFSR 0x%08b.%08b",
3254 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3255 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3256 			p += strlen(p);
3257 		}
3258 
3259 		if (logflags & CPU_AFAR) {
3260 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3261 			    (uint32_t)(aflt->flt_addr >> 32),
3262 			    (uint32_t)aflt->flt_addr);
3263 			p += strlen(p);
3264 		}
3265 
3266 		if (logflags & CPU_AF_PSYND) {
3267 			ushort_t psynd = (ushort_t)
3268 			    (aflt->flt_stat & P_AFSR_P_SYND);
3269 
3270 			(void) snprintf(p, (size_t)(q - p),
3271 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3272 			    psynd, ecc_psynd_score(psynd));
3273 			p += strlen(p);
3274 		}
3275 
3276 		if (logflags & CPU_AF_ETS) {
3277 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3278 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3279 			p += strlen(p);
3280 		}
3281 
3282 		if (logflags & CPU_FAULTPC) {
3283 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3284 			    (void *)aflt->flt_pc);
3285 			p += strlen(p);
3286 		}
3287 
3288 		if (logflags & CPU_UDBH) {
3289 			(void) snprintf(p, (size_t)(q - p),
3290 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3291 			    spflt->flt_sdbh, UDB_FMTSTR,
3292 			    spflt->flt_sdbh & 0xFF);
3293 			p += strlen(p);
3294 		}
3295 
3296 		if (logflags & CPU_UDBL) {
3297 			(void) snprintf(p, (size_t)(q - p),
3298 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3299 			    spflt->flt_sdbl, UDB_FMTSTR,
3300 			    spflt->flt_sdbl & 0xFF);
3301 			p += strlen(p);
3302 		}
3303 
3304 		if (logflags & CPU_SYND) {
3305 			ushort_t synd = SYND(aflt->flt_synd);
3306 
3307 			(void) snprintf(p, (size_t)(q - p),
3308 			    "\n    %s Syndrome 0x%x Memory Module ",
3309 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3310 			p += strlen(p);
3311 		}
3312 	}
3313 
3314 	if (endstr != NULL) {
3315 		if (!(logflags & CPU_SYND))
3316 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3317 		else
3318 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3319 		p += strlen(p);
3320 	}
3321 
3322 	if (ce_code == CE_CONT && (p < q - 1))
3323 		(void) strcpy(p, "\n"); /* add final \n if needed */
3324 
3325 	va_start(ap, fmt);
3326 	vcmn_err(ce_code, buf, ap);
3327 	va_end(ap);
3328 }
3329 
3330 /*
3331  * Ecache Scrubbing
3332  *
3333  * The basic idea is to prevent lines from sitting in the ecache long enough
3334  * to build up soft errors which can lead to ecache parity errors.
3335  *
3336  * The following rules are observed when flushing the ecache:
3337  *
3338  * 1. When the system is busy, flush bad clean lines
3339  * 2. When the system is idle, flush all clean lines
3340  * 3. When the system is idle, flush good dirty lines
3341  * 4. Never flush bad dirty lines.
3342  *
3343  *	modify	parity	busy   idle
3344  *	----------------------------
3345  *	clean	good		X
3346  * 	clean	bad	X	X
3347  * 	dirty	good		X
3348  *	dirty	bad
3349  *
3350  * Bad or good refers to whether a line has an E$ parity error or not.
3351  * Clean or dirty refers to the state of the modified bit.  We currently
3352  * default the scan rate to 100 (scan 10% of the cache per second).
3353  *
3354  * The following are E$ states and actions.
3355  *
3356  * We encode our state as a 3-bit number, consisting of:
3357  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3358  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3359  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3360  *
3361  * We associate a flushing and a logging action with each state.
3362  *
3363  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3364  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3365  * E$ only, in addition to value being set by ec_flush.
3366  */
3367 
3368 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3369 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3370 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3371 
3372 struct {
3373 	char	ec_flush;		/* whether to flush or not */
3374 	char	ec_log;			/* ecache logging */
3375 	char	ec_log_type;		/* log type info */
3376 } ec_action[] = {	/* states of the E$ line in M P B */
3377 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3378 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3379 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3380 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3381 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3382 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3383 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3384 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3385 };
3386 
3387 /*
3388  * Offsets into the ec_action[] that determines clean_good_busy and
3389  * dirty_good_busy lines.
3390  */
3391 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3392 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3393 
3394 /*
3395  * We are flushing lines which are Clean_Good_Busy and also the lines
3396  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3397  */
3398 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3399 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3400 
3401 #define	ECACHE_STATE_MODIFIED	0x4
3402 #define	ECACHE_STATE_PARITY	0x2
3403 #define	ECACHE_STATE_BUSY	0x1
3404 
3405 /*
3406  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3407  */
3408 int ecache_calls_a_sec_mirrored = 1;
3409 int ecache_lines_per_call_mirrored = 1;
3410 
3411 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3412 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3413 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3414 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3415 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3416 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3417 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3418 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3419 
3420 volatile int ec_timeout_calls = 1;	/* timeout calls */
3421 
3422 /*
3423  * Interrupt number and pil for ecache scrubber cross-trap calls.
3424  */
3425 static uint64_t ecache_scrub_inum;
3426 uint_t ecache_scrub_pil = PIL_9;
3427 
3428 /*
3429  * Kstats for the E$ scrubber.
3430  */
3431 typedef struct ecache_kstat {
3432 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3433 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3434 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3435 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3436 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3437 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3438 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3439 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3440 	kstat_named_t invalid_lines;		/* # of invalid lines */
3441 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3442 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3443 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3444 } ecache_kstat_t;
3445 
3446 static ecache_kstat_t ec_kstat_template = {
3447 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3448 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3449 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3450 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3451 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3452 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3453 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3454 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3455 	{ "invalid_lines", KSTAT_DATA_ULONG },
3456 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3457 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3458 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3459 };
3460 
3461 struct kmem_cache *sf_private_cache;
3462 
3463 /*
3464  * Called periodically on each CPU to scan the ecache once a sec.
3465  * adjusting the ecache line index appropriately
3466  */
3467 void
3468 scrub_ecache_line()
3469 {
3470 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3471 	int cpuid = CPU->cpu_id;
3472 	uint32_t index = ssmp->ecache_flush_index;
3473 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3474 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3475 	int nlines = ssmp->ecache_nlines;
3476 	uint32_t ec_set_size = ec_size / ecache_associativity;
3477 	int ec_mirror = ssmp->ecache_mirror;
3478 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3479 
3480 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3481 	int mpb;		/* encode Modified, Parity, Busy for action */
3482 	uchar_t state;
3483 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3484 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3485 	ec_data_t ec_data[8];
3486 	kstat_named_t *ec_knp;
3487 
3488 	switch (ec_mirror) {
3489 		default:
3490 		case ECACHE_CPU_NON_MIRROR:
3491 			/*
3492 			 * The E$ scan rate is expressed in units of tenths of
3493 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3494 			 * whole cache is scanned every second.
3495 			 */
3496 			scan_lines = (nlines * ecache_scan_rate) /
3497 			    (1000 * ecache_calls_a_sec);
3498 			if (!(ssmp->ecache_busy)) {
3499 				if (ecache_idle_factor > 0) {
3500 					scan_lines *= ecache_idle_factor;
3501 				}
3502 			} else {
3503 				flush_clean_busy = (scan_lines *
3504 				    ecache_flush_clean_good_busy) / 100;
3505 				flush_dirty_busy = (scan_lines *
3506 				    ecache_flush_dirty_good_busy) / 100;
3507 			}
3508 
3509 			ec_timeout_calls = (ecache_calls_a_sec ?
3510 			    ecache_calls_a_sec : 1);
3511 			break;
3512 
3513 		case ECACHE_CPU_MIRROR:
3514 			scan_lines = ecache_lines_per_call_mirrored;
3515 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3516 			    ecache_calls_a_sec_mirrored : 1);
3517 			break;
3518 	}
3519 
3520 	/*
3521 	 * The ecache scrubber algorithm operates by reading and
3522 	 * decoding the E$ tag to determine whether the corresponding E$ line
3523 	 * can be scrubbed. There is a implicit assumption in the scrubber
3524 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3525 	 * flawed since the E$ tag may also be corrupted and have parity errors
3526 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3527 	 * before scrubbing. When a parity error is detected in the E$ tag,
3528 	 * it is possible to recover and scrub the tag under certain conditions
3529 	 * so that a ETP error condition can be avoided.
3530 	 */
3531 
3532 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3533 		/*
3534 		 * We get the old-AFSR before clearing the AFSR sticky bits
3535 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3536 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3537 		 */
3538 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3539 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3540 		    cpu_ec_state_shift);
3541 
3542 		/*
3543 		 * ETP is set try to scrub the ecache tag.
3544 		 */
3545 		if (nafsr & P_AFSR_ETP) {
3546 			ecache_scrub_tag_err(nafsr, state, index);
3547 		} else if (state & cpu_ec_state_valid) {
3548 			/*
3549 			 * ETP is not set, E$ tag is valid.
3550 			 * Proceed with the E$ scrubbing.
3551 			 */
3552 			if (state & cpu_ec_state_dirty)
3553 				mpb |= ECACHE_STATE_MODIFIED;
3554 
3555 			tafsr = check_ecache_line(index, acc_afsr);
3556 
3557 			if (tafsr & P_AFSR_EDP) {
3558 				mpb |= ECACHE_STATE_PARITY;
3559 
3560 				if (ecache_scrub_verbose ||
3561 				    ecache_scrub_panic) {
3562 					get_ecache_dtag(P2ALIGN(index, 64),
3563 					    (uint64_t *)&ec_data[0],
3564 					    &ec_tag, &oafsr, acc_afsr);
3565 				}
3566 			}
3567 
3568 			if (ssmp->ecache_busy)
3569 				mpb |= ECACHE_STATE_BUSY;
3570 
3571 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3572 			ec_knp->value.ul++;
3573 
3574 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3575 			    cpu_ec_tag_shift) | (index % ec_set_size);
3576 
3577 			/*
3578 			 * We flush the E$ lines depending on the ec_flush,
3579 			 * we additionally flush clean_good_busy and
3580 			 * dirty_good_busy lines for mirrored E$.
3581 			 */
3582 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3583 				flushecacheline(paddr, ec_size);
3584 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3585 			    (ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3586 				flushecacheline(paddr, ec_size);
3587 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3588 				softcall(ecache_page_retire, (void *)paddr);
3589 			}
3590 
3591 			/*
3592 			 * Conditionally flush both the clean_good and
3593 			 * dirty_good lines when busy.
3594 			 */
3595 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3596 				flush_clean_busy--;
3597 				flushecacheline(paddr, ec_size);
3598 				ec_ksp->clean_good_busy_flush.value.ul++;
3599 			} else if (DGB(mpb, ec_mirror) &&
3600 			    (flush_dirty_busy > 0)) {
3601 				flush_dirty_busy--;
3602 				flushecacheline(paddr, ec_size);
3603 				ec_ksp->dirty_good_busy_flush.value.ul++;
3604 			}
3605 
3606 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3607 			    ecache_scrub_panic)) {
3608 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3609 				    tafsr);
3610 			}
3611 
3612 		} else {
3613 			ec_ksp->invalid_lines.value.ul++;
3614 		}
3615 
3616 		if ((index += ec_linesize) >= ec_size)
3617 			index = 0;
3618 
3619 	}
3620 
3621 	/*
3622 	 * set the ecache scrub index for the next time around
3623 	 */
3624 	ssmp->ecache_flush_index = index;
3625 
3626 	if (*acc_afsr & P_AFSR_CP) {
3627 		uint64_t ret_afsr;
3628 
3629 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3630 		if ((ret_afsr & P_AFSR_CP) == 0)
3631 			*acc_afsr = 0;
3632 	}
3633 }
3634 
3635 /*
3636  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3637  * we decrement the outstanding request count to zero.
3638  */
3639 
3640 /*ARGSUSED*/
3641 uint_t
3642 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3643 {
3644 	int i;
3645 	int outstanding;
3646 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3647 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3648 
3649 	do {
3650 		outstanding = *countp;
3651 		ASSERT(outstanding > 0);
3652 		for (i = 0; i < outstanding; i++)
3653 			scrub_ecache_line();
3654 	} while (atomic_add_32_nv(countp, -outstanding));
3655 
3656 	return (DDI_INTR_CLAIMED);
3657 }
3658 
3659 /*
3660  * force each cpu to perform an ecache scrub, called from a timeout
3661  */
3662 extern xcfunc_t ecache_scrubreq_tl1;
3663 
3664 void
3665 do_scrub_ecache_line(void)
3666 {
3667 	long delta;
3668 
3669 	if (ecache_calls_a_sec > hz)
3670 		ecache_calls_a_sec = hz;
3671 	else if (ecache_calls_a_sec <= 0)
3672 		ecache_calls_a_sec = 1;
3673 
3674 	if (ecache_calls_a_sec_mirrored > hz)
3675 		ecache_calls_a_sec_mirrored = hz;
3676 	else if (ecache_calls_a_sec_mirrored <= 0)
3677 		ecache_calls_a_sec_mirrored = 1;
3678 
3679 	if (ecache_scrub_enable) {
3680 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3681 		delta = hz / ec_timeout_calls;
3682 	} else {
3683 		delta = hz;
3684 	}
3685 
3686 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3687 	    delta);
3688 }
3689 
3690 /*
3691  * initialization for ecache scrubbing
3692  * This routine is called AFTER all cpus have had cpu_init_private called
3693  * to initialize their private data areas.
3694  */
3695 void
3696 cpu_init_cache_scrub(void)
3697 {
3698 	if (ecache_calls_a_sec > hz) {
3699 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3700 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3701 		ecache_calls_a_sec = hz;
3702 	}
3703 
3704 	/*
3705 	 * Register softint for ecache scrubbing.
3706 	 */
3707 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3708 	    scrub_ecache_line_intr, NULL, SOFTINT_MT);
3709 
3710 	/*
3711 	 * kick off the scrubbing using realtime timeout
3712 	 */
3713 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3714 	    hz / ecache_calls_a_sec);
3715 }
3716 
3717 /*
3718  * Unset the busy flag for this cpu.
3719  */
3720 void
3721 cpu_idle_ecache_scrub(struct cpu *cp)
3722 {
3723 	if (CPU_PRIVATE(cp) != NULL) {
3724 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3725 		    sfpr_scrub_misc);
3726 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3727 	}
3728 }
3729 
3730 /*
3731  * Set the busy flag for this cpu.
3732  */
3733 void
3734 cpu_busy_ecache_scrub(struct cpu *cp)
3735 {
3736 	if (CPU_PRIVATE(cp) != NULL) {
3737 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3738 		    sfpr_scrub_misc);
3739 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3740 	}
3741 }
3742 
3743 /*
3744  * initialize the ecache scrubber data structures
3745  * The global entry point cpu_init_private replaces this entry point.
3746  *
3747  */
3748 static void
3749 cpu_init_ecache_scrub_dr(struct cpu *cp)
3750 {
3751 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3752 	int cpuid = cp->cpu_id;
3753 
3754 	/*
3755 	 * intialize bookkeeping for cache scrubbing
3756 	 */
3757 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3758 
3759 	ssmp->ecache_flush_index = 0;
3760 
3761 	ssmp->ecache_nlines =
3762 	    cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3763 
3764 	/*
3765 	 * Determine whether we are running on mirrored SRAM
3766 	 */
3767 
3768 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3769 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3770 	else
3771 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3772 
3773 	cpu_busy_ecache_scrub(cp);
3774 
3775 	/*
3776 	 * initialize the kstats
3777 	 */
3778 	ecache_kstat_init(cp);
3779 }
3780 
3781 /*
3782  * uninitialize the ecache scrubber data structures
3783  * The global entry point cpu_uninit_private replaces this entry point.
3784  */
3785 static void
3786 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3787 {
3788 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3789 
3790 	if (ssmp->ecache_ksp != NULL) {
3791 		kstat_delete(ssmp->ecache_ksp);
3792 		ssmp->ecache_ksp = NULL;
3793 	}
3794 
3795 	/*
3796 	 * un-initialize bookkeeping for cache scrubbing
3797 	 */
3798 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3799 
3800 	cpu_idle_ecache_scrub(cp);
3801 }
3802 
3803 struct kmem_cache *sf_private_cache;
3804 
3805 /*
3806  * Cpu private initialization.  This includes allocating the cpu_private
3807  * data structure, initializing it, and initializing the scrubber for this
3808  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3809  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3810  * We use kmem_cache_create for the spitfire private data structure because it
3811  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3812  */
3813 void
3814 cpu_init_private(struct cpu *cp)
3815 {
3816 	spitfire_private_t *sfprp;
3817 
3818 	ASSERT(CPU_PRIVATE(cp) == NULL);
3819 
3820 	/*
3821 	 * If the sf_private_cache has not been created, create it.
3822 	 */
3823 	if (sf_private_cache == NULL) {
3824 		sf_private_cache = kmem_cache_create("sf_private_cache",
3825 		    sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3826 		    NULL, NULL, NULL, NULL, 0);
3827 		ASSERT(sf_private_cache);
3828 	}
3829 
3830 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3831 
3832 	bzero(sfprp, sizeof (spitfire_private_t));
3833 
3834 	cpu_init_ecache_scrub_dr(cp);
3835 }
3836 
3837 /*
3838  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3839  * deallocate the scrubber data structures and cpu_private data structure.
3840  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3841  * the scrubber for the specified cpu.
3842  */
3843 void
3844 cpu_uninit_private(struct cpu *cp)
3845 {
3846 	ASSERT(CPU_PRIVATE(cp));
3847 
3848 	cpu_uninit_ecache_scrub_dr(cp);
3849 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3850 	CPU_PRIVATE(cp) = NULL;
3851 }
3852 
3853 /*
3854  * initialize the ecache kstats for each cpu
3855  */
3856 static void
3857 ecache_kstat_init(struct cpu *cp)
3858 {
3859 	struct kstat *ksp;
3860 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3861 
3862 	ASSERT(ssmp != NULL);
3863 
3864 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3865 	    KSTAT_TYPE_NAMED,
3866 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3867 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3868 		ssmp->ecache_ksp = NULL;
3869 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3870 		return;
3871 	}
3872 
3873 	ssmp->ecache_ksp = ksp;
3874 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3875 	kstat_install(ksp);
3876 }
3877 
3878 /*
3879  * log the bad ecache information
3880  */
3881 static void
3882 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3883 		uint64_t afsr)
3884 {
3885 	spitf_async_flt spf_flt;
3886 	struct async_flt *aflt;
3887 	int i;
3888 	char *class;
3889 
3890 	bzero(&spf_flt, sizeof (spitf_async_flt));
3891 	aflt = &spf_flt.cmn_asyncflt;
3892 
3893 	for (i = 0; i < 8; i++) {
3894 		spf_flt.flt_ec_data[i] = ec_data[i];
3895 	}
3896 
3897 	spf_flt.flt_ec_tag = ec_tag;
3898 
3899 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3900 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3901 	} else spf_flt.flt_type = (ushort_t)mpb;
3902 
3903 	aflt->flt_inst = CPU->cpu_id;
3904 	aflt->flt_class = CPU_FAULT;
3905 	aflt->flt_id = gethrtime_waitfree();
3906 	aflt->flt_addr = paddr;
3907 	aflt->flt_stat = afsr;
3908 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3909 
3910 	switch (mpb) {
3911 	case CPU_ECACHE_TAG_ERR:
3912 	case CPU_ECACHE_ADDR_PAR_ERR:
3913 	case CPU_ECACHE_ETP_ETS_ERR:
3914 	case CPU_ECACHE_STATE_ERR:
3915 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3916 		break;
3917 	default:
3918 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3919 		break;
3920 	}
3921 
3922 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3923 	    ue_queue, aflt->flt_panic);
3924 
3925 	if (aflt->flt_panic)
3926 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3927 		    "line detected");
3928 }
3929 
3930 /*
3931  * Process an ecache error that occured during the E$ scrubbing.
3932  * We do the ecache scan to find the bad line, flush the bad line
3933  * and start the memscrubber to find any UE (in memory or in another cache)
3934  */
3935 static uint64_t
3936 ecache_scrub_misc_err(int type, uint64_t afsr)
3937 {
3938 	spitf_async_flt spf_flt;
3939 	struct async_flt *aflt;
3940 	uint64_t oafsr;
3941 
3942 	bzero(&spf_flt, sizeof (spitf_async_flt));
3943 	aflt = &spf_flt.cmn_asyncflt;
3944 
3945 	/*
3946 	 * Scan each line in the cache to look for the one
3947 	 * with bad parity
3948 	 */
3949 	aflt->flt_addr = AFLT_INV_ADDR;
3950 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3951 	    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3952 
3953 	if (oafsr & P_AFSR_CP) {
3954 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3955 		*cp_afsr |= oafsr;
3956 	}
3957 
3958 	/*
3959 	 * If we found a bad PA, update the state to indicate if it is
3960 	 * memory or I/O space.
3961 	 */
3962 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3963 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3964 		    MMU_PAGESHIFT)) ? 1 : 0;
3965 	}
3966 
3967 	spf_flt.flt_type = (ushort_t)type;
3968 
3969 	aflt->flt_inst = CPU->cpu_id;
3970 	aflt->flt_class = CPU_FAULT;
3971 	aflt->flt_id = gethrtime_waitfree();
3972 	aflt->flt_status = afsr;
3973 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3974 
3975 	/*
3976 	 * We have the bad line, flush that line and start
3977 	 * the memscrubber.
3978 	 */
3979 	if (spf_flt.flt_ec_lcnt > 0) {
3980 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3981 		    cpunodes[CPU->cpu_id].ecache_size);
3982 		read_all_memscrub = 1;
3983 		memscrub_run();
3984 	}
3985 
3986 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3987 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3988 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3989 
3990 	return (oafsr);
3991 }
3992 
3993 static void
3994 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
3995 {
3996 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
3997 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3998 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3999 	uint64_t ec_tag, paddr, oafsr;
4000 	ec_data_t ec_data[8];
4001 	int cpuid = CPU->cpu_id;
4002 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4003 	    ecache_associativity;
4004 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4005 
4006 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4007 	    &oafsr, cpu_afsr);
4008 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4009 	    (index % ec_set_size);
4010 
4011 	/*
4012 	 * E$ tag state has good parity
4013 	 */
4014 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4015 		if (afsr_ets & cpu_ec_parity) {
4016 			/*
4017 			 * E$ tag state bits indicate the line is clean,
4018 			 * invalidate the E$ tag and continue.
4019 			 */
4020 			if (!(state & cpu_ec_state_dirty)) {
4021 				/*
4022 				 * Zero the tag and mark the state invalid
4023 				 * with good parity for the tag.
4024 				 */
4025 				if (isus2i || isus2e)
4026 					write_hb_ec_tag_parity(index);
4027 				else
4028 					write_ec_tag_parity(index);
4029 
4030 				/* Sync with the dual tag */
4031 				flushecacheline(0,
4032 				    cpunodes[CPU->cpu_id].ecache_size);
4033 				ec_ksp->tags_cleared.value.ul++;
4034 				ecache_scrub_log(ec_data, ec_tag, paddr,
4035 				    CPU_ECACHE_TAG_ERR, afsr);
4036 				return;
4037 			} else {
4038 				ecache_scrub_log(ec_data, ec_tag, paddr,
4039 				    CPU_ECACHE_ADDR_PAR_ERR, afsr);
4040 				cmn_err(CE_PANIC, " E$ tag address has bad"
4041 				    " parity");
4042 			}
4043 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4044 			/*
4045 			 * ETS is zero but ETP is set
4046 			 */
4047 			ecache_scrub_log(ec_data, ec_tag, paddr,
4048 			    CPU_ECACHE_ETP_ETS_ERR, afsr);
4049 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4050 			    " AFSR.ETS is zero");
4051 		}
4052 	} else {
4053 		/*
4054 		 * E$ tag state bit has a bad parity
4055 		 */
4056 		ecache_scrub_log(ec_data, ec_tag, paddr,
4057 		    CPU_ECACHE_STATE_ERR, afsr);
4058 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4059 	}
4060 }
4061 
4062 static void
4063 ecache_page_retire(void *arg)
4064 {
4065 	uint64_t paddr = (uint64_t)arg;
4066 	(void) page_retire(paddr, PR_UE);
4067 }
4068 
4069 void
4070 sticksync_slave(void)
4071 {}
4072 
4073 void
4074 sticksync_master(void)
4075 {}
4076 
4077 /*ARGSUSED*/
4078 void
4079 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4080 {}
4081 
4082 void
4083 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4084 {
4085 	int status;
4086 	ddi_fm_error_t de;
4087 
4088 	bzero(&de, sizeof (ddi_fm_error_t));
4089 
4090 	de.fme_version = DDI_FME_VERSION;
4091 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4092 	    FM_ENA_FMT1);
4093 	de.fme_flag = expected;
4094 	de.fme_bus_specific = (void *)aflt->flt_addr;
4095 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4096 
4097 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4098 		aflt->flt_panic = 1;
4099 }
4100 
4101 /*ARGSUSED*/
4102 void
4103 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4104     errorq_t *eqp, uint_t flag)
4105 {
4106 	struct async_flt *aflt = (struct async_flt *)payload;
4107 
4108 	aflt->flt_erpt_class = error_class;
4109 	errorq_dispatch(eqp, payload, payload_sz, flag);
4110 }
4111 
4112 #define	MAX_SIMM	8
4113 
4114 struct ce_info {
4115 	char    name[UNUM_NAMLEN];
4116 	uint64_t intermittent_total;
4117 	uint64_t persistent_total;
4118 	uint64_t sticky_total;
4119 	unsigned short leaky_bucket_cnt;
4120 };
4121 
4122 /*
4123  * Separately-defined structure for use in reporting the ce_info
4124  * to SunVTS without exposing the internal layout and implementation
4125  * of struct ce_info.
4126  */
4127 static struct ecc_error_info ecc_error_info_data = {
4128 	{ "version", KSTAT_DATA_UINT32 },
4129 	{ "maxcount", KSTAT_DATA_UINT32 },
4130 	{ "count", KSTAT_DATA_UINT32 }
4131 };
4132 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4133     sizeof (struct kstat_named);
4134 
4135 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4136 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4137 #endif
4138 
4139 struct ce_info  *mem_ce_simm = NULL;
4140 size_t mem_ce_simm_size = 0;
4141 
4142 /*
4143  * Default values for the number of CE's allowed per interval.
4144  * Interval is defined in minutes
4145  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4146  */
4147 #define	SOFTERR_LIMIT_DEFAULT		2
4148 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4149 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4150 #define	TIMEOUT_NONE			((timeout_id_t)0)
4151 #define	TIMEOUT_SET			((timeout_id_t)1)
4152 
4153 /*
4154  * timeout identifer for leaky_bucket
4155  */
4156 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4157 
4158 /*
4159  * Tunables for maximum number of allowed CE's in a given time
4160  */
4161 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4162 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4163 
4164 void
4165 cpu_mp_init(void)
4166 {
4167 	size_t size = cpu_aflt_size();
4168 	size_t i;
4169 	kstat_t *ksp;
4170 
4171 	/*
4172 	 * Initialize the CE error handling buffers.
4173 	 */
4174 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4175 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4176 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4177 
4178 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4179 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4180 	if (ksp != NULL) {
4181 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4182 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4183 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4184 		ecc_error_info_data.count.value.ui32 = 0;
4185 		kstat_install(ksp);
4186 	}
4187 
4188 	for (i = 0; i < mem_ce_simm_size; i++) {
4189 		struct kstat_ecc_mm_info *kceip;
4190 
4191 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4192 		    KM_SLEEP);
4193 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4194 		    KSTAT_TYPE_NAMED,
4195 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4196 		    KSTAT_FLAG_VIRTUAL);
4197 		if (ksp != NULL) {
4198 			/*
4199 			 * Re-declare ks_data_size to include room for the
4200 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4201 			 * set.
4202 			 */
4203 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4204 			    KSTAT_CE_UNUM_NAMLEN;
4205 			ksp->ks_data = kceip;
4206 			kstat_named_init(&kceip->name,
4207 			    "name", KSTAT_DATA_STRING);
4208 			kstat_named_init(&kceip->intermittent_total,
4209 			    "intermittent_total", KSTAT_DATA_UINT64);
4210 			kstat_named_init(&kceip->persistent_total,
4211 			    "persistent_total", KSTAT_DATA_UINT64);
4212 			kstat_named_init(&kceip->sticky_total,
4213 			    "sticky_total", KSTAT_DATA_UINT64);
4214 			/*
4215 			 * Use the default snapshot routine as it knows how to
4216 			 * deal with named kstats with long strings.
4217 			 */
4218 			ksp->ks_update = ecc_kstat_update;
4219 			kstat_install(ksp);
4220 		} else {
4221 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4222 		}
4223 	}
4224 }
4225 
4226 /*ARGSUSED*/
4227 static void
4228 leaky_bucket_timeout(void *arg)
4229 {
4230 	int i;
4231 	struct ce_info *psimm = mem_ce_simm;
4232 
4233 	for (i = 0; i < mem_ce_simm_size; i++) {
4234 		if (psimm[i].leaky_bucket_cnt > 0)
4235 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4236 	}
4237 	add_leaky_bucket_timeout();
4238 }
4239 
4240 static void
4241 add_leaky_bucket_timeout(void)
4242 {
4243 	long timeout_in_microsecs;
4244 
4245 	/*
4246 	 * create timeout for next leak.
4247 	 *
4248 	 * The timeout interval is calculated as follows
4249 	 *
4250 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4251 	 *
4252 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4253 	 * in a minute), then multiply this by MICROSEC to get the interval
4254 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4255 	 * the timeout interval is accurate to within a few microseconds.
4256 	 */
4257 
4258 	if (ecc_softerr_limit <= 0)
4259 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4260 	if (ecc_softerr_interval <= 0)
4261 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4262 
4263 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4264 	    ecc_softerr_limit;
4265 
4266 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4267 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4268 
4269 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4270 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4271 }
4272 
4273 /*
4274  * Legacy Correctable ECC Error Hash
4275  *
4276  * All of the code below this comment is used to implement a legacy array
4277  * which counted intermittent, persistent, and sticky CE errors by unum,
4278  * and then was later extended to publish the data as a kstat for SunVTS.
4279  * All of this code is replaced by FMA, and remains here until such time
4280  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4281  *
4282  * Errors are saved in three buckets per-unum:
4283  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4284  *     This could represent a problem, and is immediately printed out.
4285  * (2) persistent - was successfully scrubbed
4286  *     These errors use the leaky bucket algorithm to determine
4287  *     if there is a serious problem.
4288  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4289  *     and does not necessarily indicate any problem with the dimm itself,
4290  *     is critical information for debugging new hardware.
4291  *     Because we do not know if it came from the dimm, it would be
4292  *     inappropriate to include these in the leaky bucket counts.
4293  *
4294  * If the E$ line was modified before the scrub operation began, then the
4295  * displacement flush at the beginning of scrubphys() will cause the modified
4296  * line to be written out, which will clean up the CE.  Then, any subsequent
4297  * read will not cause an error, which will cause persistent errors to be
4298  * identified as intermittent.
4299  *
4300  * If a DIMM is going bad, it will produce true persistents as well as
4301  * false intermittents, so these intermittents can be safely ignored.
4302  *
4303  * If the error count is excessive for a DIMM, this function will return
4304  * PR_MCE, and the CPU module may then decide to remove that page from use.
4305  */
4306 static int
4307 ce_count_unum(int status, int len, char *unum)
4308 {
4309 	int i;
4310 	struct ce_info *psimm = mem_ce_simm;
4311 	int page_status = PR_OK;
4312 
4313 	ASSERT(psimm != NULL);
4314 
4315 	if (len <= 0 ||
4316 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4317 		return (page_status);
4318 
4319 	/*
4320 	 * Initialize the leaky_bucket timeout
4321 	 */
4322 	if (casptr(&leaky_bucket_timeout_id,
4323 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4324 		add_leaky_bucket_timeout();
4325 
4326 	for (i = 0; i < mem_ce_simm_size; i++) {
4327 		if (psimm[i].name[0] == '\0') {
4328 			/*
4329 			 * Hit the end of the valid entries, add
4330 			 * a new one.
4331 			 */
4332 			(void) strncpy(psimm[i].name, unum, len);
4333 			if (status & ECC_STICKY) {
4334 				/*
4335 				 * Sticky - the leaky bucket is used to track
4336 				 * soft errors.  Since a sticky error is a
4337 				 * hard error and likely to be retired soon,
4338 				 * we do not count it in the leaky bucket.
4339 				 */
4340 				psimm[i].leaky_bucket_cnt = 0;
4341 				psimm[i].intermittent_total = 0;
4342 				psimm[i].persistent_total = 0;
4343 				psimm[i].sticky_total = 1;
4344 				cmn_err(CE_WARN,
4345 				    "[AFT0] Sticky Softerror encountered "
4346 				    "on Memory Module %s\n", unum);
4347 				page_status = PR_MCE;
4348 			} else if (status & ECC_PERSISTENT) {
4349 				psimm[i].leaky_bucket_cnt = 1;
4350 				psimm[i].intermittent_total = 0;
4351 				psimm[i].persistent_total = 1;
4352 				psimm[i].sticky_total = 0;
4353 			} else {
4354 				/*
4355 				 * Intermittent - Because the scrub operation
4356 				 * cannot find the error in the DIMM, we will
4357 				 * not count these in the leaky bucket
4358 				 */
4359 				psimm[i].leaky_bucket_cnt = 0;
4360 				psimm[i].intermittent_total = 1;
4361 				psimm[i].persistent_total = 0;
4362 				psimm[i].sticky_total = 0;
4363 			}
4364 			ecc_error_info_data.count.value.ui32++;
4365 			break;
4366 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4367 			/*
4368 			 * Found an existing entry for the current
4369 			 * memory module, adjust the counts.
4370 			 */
4371 			if (status & ECC_STICKY) {
4372 				psimm[i].sticky_total++;
4373 				cmn_err(CE_WARN,
4374 				    "[AFT0] Sticky Softerror encountered "
4375 				    "on Memory Module %s\n", unum);
4376 				page_status = PR_MCE;
4377 			} else if (status & ECC_PERSISTENT) {
4378 				int new_value;
4379 
4380 				new_value = atomic_add_16_nv(
4381 				    &psimm[i].leaky_bucket_cnt, 1);
4382 				psimm[i].persistent_total++;
4383 				if (new_value > ecc_softerr_limit) {
4384 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4385 					    " soft errors from Memory Module"
4386 					    " %s exceed threshold (N=%d,"
4387 					    " T=%dh:%02dm) triggering page"
4388 					    " retire", new_value, unum,
4389 					    ecc_softerr_limit,
4390 					    ecc_softerr_interval / 60,
4391 					    ecc_softerr_interval % 60);
4392 					atomic_add_16(
4393 					    &psimm[i].leaky_bucket_cnt, -1);
4394 					page_status = PR_MCE;
4395 				}
4396 			} else { /* Intermittent */
4397 				psimm[i].intermittent_total++;
4398 			}
4399 			break;
4400 		}
4401 	}
4402 
4403 	if (i >= mem_ce_simm_size)
4404 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4405 		    "space.\n");
4406 
4407 	return (page_status);
4408 }
4409 
4410 /*
4411  * Function to support counting of IO detected CEs.
4412  */
4413 void
4414 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4415 {
4416 	int err;
4417 
4418 	err = ce_count_unum(ecc->flt_status, len, unum);
4419 	if (err != PR_OK && automatic_page_removal) {
4420 		(void) page_retire(ecc->flt_addr, err);
4421 	}
4422 }
4423 
4424 static int
4425 ecc_kstat_update(kstat_t *ksp, int rw)
4426 {
4427 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4428 	struct ce_info *ceip = mem_ce_simm;
4429 	int i = ksp->ks_instance;
4430 
4431 	if (rw == KSTAT_WRITE)
4432 		return (EACCES);
4433 
4434 	ASSERT(ksp->ks_data != NULL);
4435 	ASSERT(i < mem_ce_simm_size && i >= 0);
4436 
4437 	/*
4438 	 * Since we're not using locks, make sure that we don't get partial
4439 	 * data. The name is always copied before the counters are incremented
4440 	 * so only do this update routine if at least one of the counters is
4441 	 * non-zero, which ensures that ce_count_unum() is done, and the
4442 	 * string is fully copied.
4443 	 */
4444 	if (ceip[i].intermittent_total == 0 &&
4445 	    ceip[i].persistent_total == 0 &&
4446 	    ceip[i].sticky_total == 0) {
4447 		/*
4448 		 * Uninitialized or partially initialized. Ignore.
4449 		 * The ks_data buffer was allocated via kmem_zalloc,
4450 		 * so no need to bzero it.
4451 		 */
4452 		return (0);
4453 	}
4454 
4455 	kstat_named_setstr(&kceip->name, ceip[i].name);
4456 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4457 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4458 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4459 
4460 	return (0);
4461 }
4462 
4463 #define	VIS_BLOCKSIZE		64
4464 
4465 int
4466 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4467 {
4468 	int ret, watched;
4469 
4470 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4471 	ret = dtrace_blksuword32(addr, data, 0);
4472 	if (watched)
4473 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4474 
4475 	return (ret);
4476 }
4477 
4478 /*ARGSUSED*/
4479 void
4480 cpu_faulted_enter(struct cpu *cp)
4481 {
4482 }
4483 
4484 /*ARGSUSED*/
4485 void
4486 cpu_faulted_exit(struct cpu *cp)
4487 {
4488 }
4489 
4490 /*ARGSUSED*/
4491 void
4492 mmu_init_kernel_pgsz(struct hat *hat)
4493 {
4494 }
4495 
4496 size_t
4497 mmu_get_kernel_lpsize(size_t lpsize)
4498 {
4499 	uint_t tte;
4500 
4501 	if (lpsize == 0) {
4502 		/* no setting for segkmem_lpsize in /etc/system: use default */
4503 		return (MMU_PAGESIZE4M);
4504 	}
4505 
4506 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4507 		if (lpsize == TTEBYTES(tte))
4508 			return (lpsize);
4509 	}
4510 
4511 	return (TTEBYTES(TTE8K));
4512 }
4513