xref: /titanic_50/usr/src/uts/sun4u/cpu/spitfire.c (revision 8200fe25ffab8b2032d046c88710a949f361b700)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/cpu.h>
35 #include <sys/elf_SPARC.h>
36 #include <vm/hat_sfmmu.h>
37 #include <vm/page.h>
38 #include <sys/cpuvar.h>
39 #include <sys/spitregs.h>
40 #include <sys/async.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/dditypes.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpu_module.h>
46 #include <sys/prom_debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sysmacros.h>
50 #include <sys/intreg.h>
51 #include <sys/machtrap.h>
52 #include <sys/ontrap.h>
53 #include <sys/ivintr.h>
54 #include <sys/atomic.h>
55 #include <sys/panic.h>
56 #include <sys/ndifm.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/fm/util.h>
59 #include <sys/fm/cpu/UltraSPARC-II.h>
60 #include <sys/ddi.h>
61 #include <sys/ecc_kstat.h>
62 #include <sys/watchpoint.h>
63 #include <sys/dtrace.h>
64 #include <sys/errclassify.h>
65 
66 uchar_t	*ctx_pgsz_array = NULL;
67 
68 /*
69  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71  */
72 typedef struct sf_ec_data_elm {
73 	uint64_t ec_d8;
74 	uint64_t ec_afsr;
75 } ec_data_t;
76 
77 /*
78  * Define spitfire (Ultra I/II) specific asynchronous error structure
79  */
80 typedef struct spitfire_async_flt {
81 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82 	ushort_t flt_type;		/* types of faults - cpu specific */
83 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84 	uint64_t flt_ec_tag;		/* E$ tag info */
85 	int flt_ec_lcnt;		/* number of bad E$ lines */
86 	ushort_t flt_sdbh;		/* UDBH reg */
87 	ushort_t flt_sdbl;		/* UDBL reg */
88 } spitf_async_flt;
89 
90 /*
91  * Prototypes for support routines in spitfire_asm.s:
92  */
93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94 extern uint64_t get_lsu(void);
95 extern void set_lsu(uint64_t ncc);
96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97 				uint64_t *oafsr, uint64_t *acc_afsr);
98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100 				uint64_t *acc_afsr);
101 extern uint64_t read_and_clear_afsr();
102 extern void write_ec_tag_parity(uint32_t id);
103 extern void write_hb_ec_tag_parity(uint32_t id);
104 
105 /*
106  * Spitfire module routines:
107  */
108 static void cpu_async_log_err(void *flt);
109 /*PRINTFLIKE6*/
110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111     uint_t logflags, const char *endstr, const char *fmt, ...);
112 
113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116 
117 static void log_ce_err(struct async_flt *aflt, char *unum);
118 static void log_ue_err(struct async_flt *aflt, char *unum);
119 static void check_misc_err(spitf_async_flt *spf_flt);
120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121 static int check_ecc(struct async_flt *aflt);
122 static uint_t get_cpu_status(uint64_t arg);
123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125 		int *m, uint64_t *afsr);
126 static void ecache_kstat_init(struct cpu *cp);
127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128 		uint64_t paddr, int mpb, uint64_t);
129 static uint64_t ecache_scrub_misc_err(int, uint64_t);
130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131 static void ecache_page_retire(void *);
132 static int ecc_kstat_update(kstat_t *ksp, int rw);
133 static int ce_count_unum(int status, int len, char *unum);
134 static void add_leaky_bucket_timeout(void);
135 static int synd_to_synd_code(int synd_status, ushort_t synd);
136 
137 extern uint_t read_all_memscrub;
138 extern void memscrub_run(void);
139 
140 static uchar_t	isus2i;			/* set if sabre */
141 static uchar_t	isus2e;			/* set if hummingbird */
142 
143 /*
144  * Default ecache mask and shift settings for Spitfire.  If we detect a
145  * different CPU implementation, we will modify these values at boot time.
146  */
147 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157 
158 /*
159  * Default ecache state bits for Spitfire.  These individual bits indicate if
160  * the given line is in any of the valid or modified states, respectively.
161  * Again, we modify these at boot if we detect a different CPU.
162  */
163 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165 static uchar_t cpu_ec_parity		= S_EC_PARITY;
166 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167 
168 /*
169  * This table is used to determine which bit(s) is(are) bad when an ECC
170  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171  * of this array have the following semantics:
172  *
173  *      00-63   The number of the bad bit, when only one bit is bad.
174  *      64      ECC bit C0 is bad.
175  *      65      ECC bit C1 is bad.
176  *      66      ECC bit C2 is bad.
177  *      67      ECC bit C3 is bad.
178  *      68      ECC bit C4 is bad.
179  *      69      ECC bit C5 is bad.
180  *      70      ECC bit C6 is bad.
181  *      71      ECC bit C7 is bad.
182  *      72      Two bits are bad.
183  *      73      Three bits are bad.
184  *      74      Four bits are bad.
185  *      75      More than Four bits are bad.
186  *      76      NO bits are bad.
187  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188  */
189 
190 #define	C0	64
191 #define	C1	65
192 #define	C2	66
193 #define	C3	67
194 #define	C4	68
195 #define	C5	69
196 #define	C6	70
197 #define	C7	71
198 #define	M2	72
199 #define	M3	73
200 #define	M4	74
201 #define	MX	75
202 #define	NA	76
203 
204 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205 						    (synd_code < C0))
206 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207 						    (synd_code <= C7))
208 
209 static char ecc_syndrome_tab[] =
210 {
211 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227 };
228 
229 #define	SYND_TBL_SIZE 256
230 
231 /*
232  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234  */
235 #define	UDBL_REG	0x8000
236 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237 #define	SYND(synd)	(synd & 0x7FFF)
238 
239 /*
240  * These error types are specific to Spitfire and are used internally for the
241  * spitfire fault structure flt_type field.
242  */
243 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245 #define	CPU_WP_ERR		2	/* WP parity error */
246 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259 
260 /*
261  * Macro to access the "Spitfire cpu private" data structure.
262  */
263 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264 
265 /*
266  * set to 0 to disable automatic retiring of pages on
267  * DIMMs that have excessive soft errors
268  */
269 int automatic_page_removal = 1;
270 
271 /*
272  * Heuristic for figuring out which module to replace.
273  * Relative likelihood that this P_SYND indicates that this module is bad.
274  * We call it a "score", though, not a relative likelihood.
275  *
276  * Step 1.
277  * Assign a score to each byte of P_SYND according to the following rules:
278  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279  * If one bit on, give it a 95.
280  * If seven bits on, give it a 10.
281  * If two bits on:
282  *   in different nybbles, a 90
283  *   in same nybble, but unaligned, 85
284  *   in same nybble and as an aligned pair, 80
285  * If six bits on, look at the bits that are off:
286  *   in same nybble and as an aligned pair, 15
287  *   in same nybble, but unaligned, 20
288  *   in different nybbles, a 25
289  * If three bits on:
290  *   in diferent nybbles, no aligned pairs, 75
291  *   in diferent nybbles, one aligned pair, 70
292  *   in the same nybble, 65
293  * If five bits on, look at the bits that are off:
294  *   in the same nybble, 30
295  *   in diferent nybbles, one aligned pair, 35
296  *   in diferent nybbles, no aligned pairs, 40
297  * If four bits on:
298  *   all in one nybble, 45
299  *   as two aligned pairs, 50
300  *   one aligned pair, 55
301  *   no aligned pairs, 60
302  *
303  * Step 2:
304  * Take the higher of the two scores (one for each byte) as the score
305  * for the module.
306  *
307  * Print the score for each module, and field service should replace the
308  * module with the highest score.
309  */
310 
311 /*
312  * In the table below, the first row/column comment indicates the
313  * number of bits on in that nybble; the second row/column comment is
314  * the hex digit.
315  */
316 
317 static int
318 p_synd_score_table[256] = {
319 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337 };
338 
339 int
340 ecc_psynd_score(ushort_t p_synd)
341 {
342 	int i, j, a, b;
343 
344 	i = p_synd & 0xFF;
345 	j = (p_synd >> 8) & 0xFF;
346 
347 	a = p_synd_score_table[i];
348 	b = p_synd_score_table[j];
349 
350 	return (a > b ? a : b);
351 }
352 
353 /*
354  * Async Fault Logging
355  *
356  * To ease identifying, reading, and filtering async fault log messages, the
357  * label [AFT#] is now prepended to each async fault message.  These messages
358  * and the logging rules are implemented by cpu_aflt_log(), below.
359  *
360  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361  *          This includes both corrected ECC memory and ecache faults.
362  *
363  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364  *          else except CE errors) with a priority of 1 (highest).  This tag
365  *          is also used for panic messages that result from an async fault.
366  *
367  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369  *          of the E-$ data and tags.
370  *
371  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372  * printed on the console.  To send all AFT logs to both the log and the
373  * console, set aft_verbose = 1.
374  */
375 
376 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378 #define	CPU_ERRID		0x0004	/* print flt_id */
379 #define	CPU_TL			0x0008	/* print flt_tl */
380 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389 
390 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393 				CPU_FAULTPC)
394 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396 				~CPU_SPACE)
397 #define	PARERR_LFLAGS	(CMN_LFLAGS)
398 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400 				~CPU_FLTCPU & ~CPU_FAULTPC)
401 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402 #define	NO_LFLAGS	(0)
403 
404 #define	AFSR_FMTSTR0	"\020\1ME"
405 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407 #define	UDB_FMTSTR	"\020\012UE\011CE"
408 
409 /*
410  * Maximum number of contexts for Spitfire.
411  */
412 #define	MAX_NCTXS	(1 << 13)
413 
414 /*
415  * Save the cache bootup state for use when internal
416  * caches are to be re-enabled after an error occurs.
417  */
418 uint64_t	cache_boot_state = 0;
419 
420 /*
421  * PA[31:0] represent Displacement in UPA configuration space.
422  */
423 uint_t	root_phys_addr_lo_mask = 0xffffffff;
424 
425 /*
426  * Spitfire legacy globals
427  */
428 int	itlb_entries;
429 int	dtlb_entries;
430 
431 void
432 cpu_setup(void)
433 {
434 	extern int page_retire_messages;
435 	extern int page_retire_first_ue;
436 	extern int at_flags;
437 #if defined(SF_ERRATA_57)
438 	extern caddr_t errata57_limit;
439 #endif
440 	extern int disable_text_largepages;
441 	extern int disable_initdata_largepages;
442 
443 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
444 
445 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
446 
447 	/*
448 	 * Spitfire isn't currently FMA-aware, so we have to enable the
449 	 * page retirement messages. We also change the default policy
450 	 * for UE retirement to allow clearing of transient errors.
451 	 */
452 	page_retire_messages = 1;
453 	page_retire_first_ue = 0;
454 
455 	/*
456 	 * save the cache bootup state.
457 	 */
458 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
459 
460 	/*
461 	 * Use the maximum number of contexts available for Spitfire unless
462 	 * it has been tuned for debugging.
463 	 * We are checking against 0 here since this value can be patched
464 	 * while booting.  It can not be patched via /etc/system since it
465 	 * will be patched too late and thus cause the system to panic.
466 	 */
467 	if (nctxs == 0)
468 		nctxs = MAX_NCTXS;
469 
470 	if (use_page_coloring) {
471 		do_pg_coloring = 1;
472 		if (use_virtual_coloring)
473 			do_virtual_coloring = 1;
474 	}
475 
476 	/*
477 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
478 	 */
479 	pp_slots = MIN(8, MAXPP_SLOTS);
480 
481 	/*
482 	 * Block stores invalidate all pages of the d$ so pagecopy
483 	 * et. al. do not need virtual translations with virtual
484 	 * coloring taken into consideration.
485 	 */
486 	pp_consistent_coloring = 0;
487 
488 	isa_list =
489 	    "sparcv9+vis sparcv9 "
490 	    "sparcv8plus+vis sparcv8plus "
491 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
492 
493 	cpu_hwcap_flags = AV_SPARC_VIS;
494 
495 	/*
496 	 * On Spitfire, there's a hole in the address space
497 	 * that we must never map (the hardware only support 44-bits of
498 	 * virtual address).  Later CPUs are expected to have wider
499 	 * supported address ranges.
500 	 *
501 	 * See address map on p23 of the UltraSPARC 1 user's manual.
502 	 */
503 	hole_start = (caddr_t)0x80000000000ull;
504 	hole_end = (caddr_t)0xfffff80000000000ull;
505 
506 	/*
507 	 * A spitfire call bug requires us to be a further 4Gbytes of
508 	 * firewall from the spec.
509 	 *
510 	 * See Spitfire Errata #21
511 	 */
512 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
513 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
514 
515 	/*
516 	 * The kpm mapping window.
517 	 * kpm_size:
518 	 *	The size of a single kpm range.
519 	 *	The overall size will be: kpm_size * vac_colors.
520 	 * kpm_vbase:
521 	 *	The virtual start address of the kpm range within the kernel
522 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
523 	 */
524 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
525 	kpm_size_shift = 41;
526 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
527 
528 #if defined(SF_ERRATA_57)
529 	errata57_limit = (caddr_t)0x80000000ul;
530 #endif
531 
532 	/*
533 	 * Allow only 8K, 64K and 4M pages for text by default.
534 	 * Allow only 8K and 64K page for initialized data segments by
535 	 * default.
536 	 */
537 	disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
538 	    (1 << TTE256M);
539 	disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
540 	    (1 << TTE32M) | (1 << TTE256M);
541 }
542 
543 static int
544 getintprop(pnode_t node, char *name, int deflt)
545 {
546 	int	value;
547 
548 	switch (prom_getproplen(node, name)) {
549 	case 0:
550 		value = 1;	/* boolean properties */
551 		break;
552 
553 	case sizeof (int):
554 		(void) prom_getprop(node, name, (caddr_t)&value);
555 		break;
556 
557 	default:
558 		value = deflt;
559 		break;
560 	}
561 
562 	return (value);
563 }
564 
565 /*
566  * Set the magic constants of the implementation.
567  */
568 void
569 cpu_fiximp(pnode_t dnode)
570 {
571 	extern int vac_size, vac_shift;
572 	extern uint_t vac_mask;
573 	extern int dcache_line_mask;
574 	int i, a;
575 	static struct {
576 		char	*name;
577 		int	*var;
578 	} prop[] = {
579 		"dcache-size",		&dcache_size,
580 		"dcache-line-size",	&dcache_linesize,
581 		"icache-size",		&icache_size,
582 		"icache-line-size",	&icache_linesize,
583 		"ecache-size",		&ecache_size,
584 		"ecache-line-size",	&ecache_alignsize,
585 		"ecache-associativity", &ecache_associativity,
586 		"#itlb-entries",	&itlb_entries,
587 		"#dtlb-entries",	&dtlb_entries,
588 		};
589 
590 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
591 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
592 			*prop[i].var = a;
593 		}
594 	}
595 
596 	ecache_setsize = ecache_size / ecache_associativity;
597 
598 	vac_size = S_VAC_SIZE;
599 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
600 	i = 0; a = vac_size;
601 	while (a >>= 1)
602 		++i;
603 	vac_shift = i;
604 	shm_alignment = vac_size;
605 	vac = 1;
606 
607 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
608 
609 	/*
610 	 * UltraSPARC I & II have ecache sizes running
611 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
612 	 * and 8 MB. Adjust the copyin/copyout limits
613 	 * according to the cache size. The magic number
614 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
615 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
616 	 * VIS instructions.
617 	 *
618 	 * We assume that all CPUs on the system have the same size
619 	 * ecache. We're also called very early in the game.
620 	 * /etc/system will be parsed *after* we're called so
621 	 * these values can be overwritten.
622 	 */
623 
624 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
625 	if (ecache_size <= 524288) {
626 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
627 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
628 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
629 	} else if (ecache_size == 1048576) {
630 		hw_copy_limit_2 = 1024;
631 		hw_copy_limit_4 = 1280;
632 		hw_copy_limit_8 = 1536;
633 	} else if (ecache_size == 2097152) {
634 		hw_copy_limit_2 = 1536;
635 		hw_copy_limit_4 = 2048;
636 		hw_copy_limit_8 = 2560;
637 	} else if (ecache_size == 4194304) {
638 		hw_copy_limit_2 = 2048;
639 		hw_copy_limit_4 = 2560;
640 		hw_copy_limit_8 = 3072;
641 	} else {
642 		hw_copy_limit_2 = 2560;
643 		hw_copy_limit_4 = 3072;
644 		hw_copy_limit_8 = 3584;
645 	}
646 }
647 
648 /*
649  * Called by setcpudelay
650  */
651 void
652 cpu_init_tick_freq(void)
653 {
654 	/*
655 	 * Determine the cpu frequency by calling
656 	 * tod_get_cpufrequency. Use an approximate freqency
657 	 * value computed by the prom if the tod module
658 	 * is not initialized and loaded yet.
659 	 */
660 	if (tod_ops.tod_get_cpufrequency != NULL) {
661 		mutex_enter(&tod_lock);
662 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
663 		mutex_exit(&tod_lock);
664 	} else {
665 #if defined(HUMMINGBIRD)
666 		/*
667 		 * the hummingbird version of %stick is used as the basis for
668 		 * low level timing; this provides an independent constant-rate
669 		 * clock for general system use, and frees power mgmt to set
670 		 * various cpu clock speeds.
671 		 */
672 		if (system_clock_freq == 0)
673 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
674 			    system_clock_freq);
675 		sys_tick_freq = system_clock_freq;
676 #else /* SPITFIRE */
677 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
678 #endif
679 	}
680 }
681 
682 
683 void shipit(int upaid);
684 extern uint64_t xc_tick_limit;
685 extern uint64_t xc_tick_jump_limit;
686 
687 #ifdef SEND_MONDO_STATS
688 uint64_t x_early[NCPU][64];
689 #endif
690 
691 /*
692  * Note: A version of this function is used by the debugger via the KDI,
693  * and must be kept in sync with this version.  Any changes made to this
694  * function to support new chips or to accomodate errata must also be included
695  * in the KDI-specific version.  See spitfire_kdi.c.
696  */
697 void
698 send_one_mondo(int cpuid)
699 {
700 	uint64_t idsr, starttick, endtick;
701 	int upaid, busy, nack;
702 	uint64_t tick, tick_prev;
703 	ulong_t ticks;
704 
705 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
706 	upaid = CPUID_TO_UPAID(cpuid);
707 	tick = starttick = gettick();
708 	shipit(upaid);
709 	endtick = starttick + xc_tick_limit;
710 	busy = nack = 0;
711 	for (;;) {
712 		idsr = getidsr();
713 		if (idsr == 0)
714 			break;
715 		/*
716 		 * When we detect an irregular tick jump, we adjust
717 		 * the timer window to the current tick value.
718 		 */
719 		tick_prev = tick;
720 		tick = gettick();
721 		ticks = tick - tick_prev;
722 		if (ticks > xc_tick_jump_limit) {
723 			endtick = tick + xc_tick_limit;
724 		} else if (tick > endtick) {
725 			if (panic_quiesce)
726 				return;
727 			cmn_err(CE_PANIC,
728 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
729 			upaid, nack, busy);
730 		}
731 		if (idsr & IDSR_BUSY) {
732 			busy++;
733 			continue;
734 		}
735 		drv_usecwait(1);
736 		shipit(upaid);
737 		nack++;
738 		busy = 0;
739 	}
740 #ifdef SEND_MONDO_STATS
741 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
742 #endif
743 }
744 
745 void
746 send_mondo_set(cpuset_t set)
747 {
748 	int i;
749 
750 	for (i = 0; i < NCPU; i++)
751 		if (CPU_IN_SET(set, i)) {
752 			send_one_mondo(i);
753 			CPUSET_DEL(set, i);
754 			if (CPUSET_ISNULL(set))
755 				break;
756 		}
757 }
758 
759 void
760 syncfpu(void)
761 {
762 }
763 
764 /*
765  * Determine the size of the CPU module's error structure in bytes.  This is
766  * called once during boot to initialize the error queues.
767  */
768 int
769 cpu_aflt_size(void)
770 {
771 	/*
772 	 * We need to determine whether this is a sabre, Hummingbird or a
773 	 * Spitfire/Blackbird impl and set the appropriate state variables for
774 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
775 	 * too early in the boot flow and the cpunodes are not initialized.
776 	 * This routine will be called once after cpunodes[] is ready, so do
777 	 * it here.
778 	 */
779 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
780 		isus2i = 1;
781 		cpu_ec_tag_mask = SB_ECTAG_MASK;
782 		cpu_ec_state_mask = SB_ECSTATE_MASK;
783 		cpu_ec_par_mask = SB_ECPAR_MASK;
784 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
785 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
786 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
787 		cpu_ec_state_exl = SB_ECSTATE_EXL;
788 		cpu_ec_state_mod = SB_ECSTATE_MOD;
789 
790 		/* These states do not exist in sabre - set to 0xFF */
791 		cpu_ec_state_shr = 0xFF;
792 		cpu_ec_state_own = 0xFF;
793 
794 		cpu_ec_state_valid = SB_ECSTATE_VALID;
795 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
796 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
797 		cpu_ec_parity = SB_EC_PARITY;
798 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
799 		isus2e = 1;
800 		cpu_ec_tag_mask = HB_ECTAG_MASK;
801 		cpu_ec_state_mask = HB_ECSTATE_MASK;
802 		cpu_ec_par_mask = HB_ECPAR_MASK;
803 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
804 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
805 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
806 		cpu_ec_state_exl = HB_ECSTATE_EXL;
807 		cpu_ec_state_mod = HB_ECSTATE_MOD;
808 
809 		/* These states do not exist in hummingbird - set to 0xFF */
810 		cpu_ec_state_shr = 0xFF;
811 		cpu_ec_state_own = 0xFF;
812 
813 		cpu_ec_state_valid = HB_ECSTATE_VALID;
814 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
815 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
816 		cpu_ec_parity = HB_EC_PARITY;
817 	}
818 
819 	return (sizeof (spitf_async_flt));
820 }
821 
822 
823 /*
824  * Correctable ecc error trap handler
825  */
826 /*ARGSUSED*/
827 void
828 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
829 	uint_t p_afsr_high, uint_t p_afar_high)
830 {
831 	ushort_t sdbh, sdbl;
832 	ushort_t e_syndh, e_syndl;
833 	spitf_async_flt spf_flt;
834 	struct async_flt *ecc;
835 	int queue = 1;
836 
837 	uint64_t t_afar = p_afar;
838 	uint64_t t_afsr = p_afsr;
839 
840 	/*
841 	 * Note: the Spitfire data buffer error registers
842 	 * (upper and lower halves) are or'ed into the upper
843 	 * word of the afsr by ce_err().
844 	 */
845 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
846 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
847 
848 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
849 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
850 
851 	t_afsr &= S_AFSR_MASK;
852 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
853 
854 	/* Setup the async fault structure */
855 	bzero(&spf_flt, sizeof (spitf_async_flt));
856 	ecc = (struct async_flt *)&spf_flt;
857 	ecc->flt_id = gethrtime_waitfree();
858 	ecc->flt_stat = t_afsr;
859 	ecc->flt_addr = t_afar;
860 	ecc->flt_status = ECC_C_TRAP;
861 	ecc->flt_bus_id = getprocessorid();
862 	ecc->flt_inst = CPU->cpu_id;
863 	ecc->flt_pc = (caddr_t)rp->r_pc;
864 	ecc->flt_func = log_ce_err;
865 	ecc->flt_in_memory =
866 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
867 	spf_flt.flt_sdbh = sdbh;
868 	spf_flt.flt_sdbl = sdbl;
869 
870 	/*
871 	 * Check for fatal conditions.
872 	 */
873 	check_misc_err(&spf_flt);
874 
875 	/*
876 	 * Pananoid checks for valid AFSR and UDBs
877 	 */
878 	if ((t_afsr & P_AFSR_CE) == 0) {
879 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
880 			"** Panic due to CE bit not set in the AFSR",
881 			"  Corrected Memory Error on");
882 	}
883 
884 	/*
885 	 * We want to skip logging only if ALL the following
886 	 * conditions are true:
887 	 *
888 	 *	1. There is only one error
889 	 *	2. That error is a correctable memory error
890 	 *	3. The error is caused by the memory scrubber (in which case
891 	 *	    the error will have occurred under on_trap protection)
892 	 *	4. The error is on a retired page
893 	 *
894 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
895 	 * However, none of those errors should occur on a retired page.
896 	 */
897 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
898 	    curthread->t_ontrap != NULL) {
899 
900 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
901 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
902 				queue = 0;
903 			}
904 		}
905 	}
906 
907 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
908 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
909 			"** Panic due to CE bits not set in the UDBs",
910 			" Corrected Memory Error on");
911 	}
912 
913 	if ((sdbh >> 8) & 1) {
914 		ecc->flt_synd = e_syndh;
915 		ce_scrub(ecc);
916 		if (queue) {
917 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
918 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
919 		}
920 	}
921 
922 	if ((sdbl >> 8) & 1) {
923 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
924 		ecc->flt_synd = e_syndl | UDBL_REG;
925 		ce_scrub(ecc);
926 		if (queue) {
927 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
928 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
929 		}
930 	}
931 
932 	/*
933 	 * Re-enable all error trapping (CEEN currently cleared).
934 	 */
935 	clr_datapath();
936 	set_asyncflt(P_AFSR_CE);
937 	set_error_enable(EER_ENABLE);
938 }
939 
940 /*
941  * Cpu specific CE logging routine
942  */
943 static void
944 log_ce_err(struct async_flt *aflt, char *unum)
945 {
946 	spitf_async_flt spf_flt;
947 
948 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
949 		return;
950 	}
951 
952 	spf_flt.cmn_asyncflt = *aflt;
953 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
954 	    " Corrected Memory Error detected by");
955 }
956 
957 /*
958  * Spitfire does not perform any further CE classification refinement
959  */
960 /*ARGSUSED*/
961 int
962 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
963     size_t afltoffset)
964 {
965 	return (0);
966 }
967 
968 char *
969 flt_to_error_type(struct async_flt *aflt)
970 {
971 	if (aflt->flt_status & ECC_INTERMITTENT)
972 		return (ERR_TYPE_DESC_INTERMITTENT);
973 	if (aflt->flt_status & ECC_PERSISTENT)
974 		return (ERR_TYPE_DESC_PERSISTENT);
975 	if (aflt->flt_status & ECC_STICKY)
976 		return (ERR_TYPE_DESC_STICKY);
977 	return (ERR_TYPE_DESC_UNKNOWN);
978 }
979 
980 /*
981  * Called by correctable ecc error logging code to print out
982  * the stick/persistent/intermittent status of the error.
983  */
984 static void
985 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
986 {
987 	ushort_t status;
988 	char *status1_str = "Memory";
989 	char *status2_str = "Intermittent";
990 	struct async_flt *aflt = (struct async_flt *)spf_flt;
991 
992 	status = aflt->flt_status;
993 
994 	if (status & ECC_ECACHE)
995 		status1_str = "Ecache";
996 
997 	if (status & ECC_STICKY)
998 		status2_str = "Sticky";
999 	else if (status & ECC_PERSISTENT)
1000 		status2_str = "Persistent";
1001 
1002 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
1003 		NULL, " Corrected %s Error on %s is %s",
1004 		status1_str, unum, status2_str);
1005 }
1006 
1007 /*
1008  * check for a valid ce syndrome, then call the
1009  * displacement flush scrubbing code, and then check the afsr to see if
1010  * the error was persistent or intermittent. Reread the afar/afsr to see
1011  * if the error was not scrubbed successfully, and is therefore sticky.
1012  */
1013 /*ARGSUSED1*/
1014 void
1015 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
1016 {
1017 	uint64_t eer, afsr;
1018 	ushort_t status;
1019 
1020 	ASSERT(getpil() > LOCK_LEVEL);
1021 
1022 	/*
1023 	 * It is possible that the flt_addr is not a valid
1024 	 * physical address. To deal with this, we disable
1025 	 * NCEEN while we scrub that address. If this causes
1026 	 * a TIMEOUT/BERR, we know this is an invalid
1027 	 * memory location.
1028 	 */
1029 	kpreempt_disable();
1030 	eer = get_error_enable();
1031 	if (eer & (EER_CEEN | EER_NCEEN))
1032 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1033 
1034 	/*
1035 	 * To check if the error detected by IO is persistent, sticky or
1036 	 * intermittent.
1037 	 */
1038 	if (ecc->flt_status & ECC_IOBUS) {
1039 		ecc->flt_stat = P_AFSR_CE;
1040 	}
1041 
1042 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1043 	    cpunodes[CPU->cpu_id].ecache_size);
1044 
1045 	get_asyncflt(&afsr);
1046 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1047 		/*
1048 		 * Must ensure that we don't get the TIMEOUT/BERR
1049 		 * when we reenable NCEEN, so we clear the AFSR.
1050 		 */
1051 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1052 		if (eer & (EER_CEEN | EER_NCEEN))
1053 		    set_error_enable(eer);
1054 		kpreempt_enable();
1055 		return;
1056 	}
1057 
1058 	if (eer & EER_NCEEN)
1059 	    set_error_enable(eer & ~EER_CEEN);
1060 
1061 	/*
1062 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1063 	 * not trip over the error, mark it intermittent.  If the scrub did
1064 	 * trip the error again and it did not scrub away, mark it sticky.
1065 	 * Otherwise mark it persistent.
1066 	 */
1067 	if (check_ecc(ecc) != 0) {
1068 		cpu_read_paddr(ecc, 0, 1);
1069 
1070 		if (check_ecc(ecc) != 0)
1071 			status = ECC_STICKY;
1072 		else
1073 			status = ECC_PERSISTENT;
1074 	} else
1075 		status = ECC_INTERMITTENT;
1076 
1077 	if (eer & (EER_CEEN | EER_NCEEN))
1078 	    set_error_enable(eer);
1079 	kpreempt_enable();
1080 
1081 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1082 	ecc->flt_status |= status;
1083 }
1084 
1085 /*
1086  * get the syndrome and unum, and then call the routines
1087  * to check the other cpus and iobuses, and then do the error logging.
1088  */
1089 /*ARGSUSED1*/
1090 void
1091 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1092 {
1093 	char unum[UNUM_NAMLEN];
1094 	int len = 0;
1095 	int ce_verbose = 0;
1096 	int err;
1097 
1098 	ASSERT(ecc->flt_func != NULL);
1099 
1100 	/* Get the unum string for logging purposes */
1101 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1102 	    UNUM_NAMLEN, &len);
1103 
1104 	/* Call specific error logging routine */
1105 	(void) (*ecc->flt_func)(ecc, unum);
1106 
1107 	/*
1108 	 * Count errors per unum.
1109 	 * Non-memory errors are all counted via a special unum string.
1110 	 */
1111 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1112 	    automatic_page_removal) {
1113 		(void) page_retire(ecc->flt_addr, err);
1114 	}
1115 
1116 	if (ecc->flt_panic) {
1117 		ce_verbose = 1;
1118 	} else if ((ecc->flt_class == BUS_FAULT) ||
1119 	    (ecc->flt_stat & P_AFSR_CE)) {
1120 		ce_verbose = (ce_verbose_memory > 0);
1121 	} else {
1122 		ce_verbose = 1;
1123 	}
1124 
1125 	if (ce_verbose) {
1126 		spitf_async_flt sflt;
1127 		int synd_code;
1128 
1129 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1130 
1131 		cpu_ce_log_status(&sflt, unum);
1132 
1133 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1134 				SYND(ecc->flt_synd));
1135 
1136 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1137 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1138 			    NULL, " ECC Data Bit %2d was in error "
1139 			    "and corrected", synd_code);
1140 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1141 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1142 			    NULL, " ECC Check Bit %2d was in error "
1143 			    "and corrected", synd_code - C0);
1144 		} else {
1145 			/*
1146 			 * These are UE errors - we shouldn't be getting CE
1147 			 * traps for these; handle them in case of bad h/w.
1148 			 */
1149 			switch (synd_code) {
1150 			case M2:
1151 				cpu_aflt_log(CE_CONT, 0, &sflt,
1152 				    CPU_ERRID_FIRST, NULL,
1153 				    " Two ECC Bits were in error");
1154 				break;
1155 			case M3:
1156 				cpu_aflt_log(CE_CONT, 0, &sflt,
1157 				    CPU_ERRID_FIRST, NULL,
1158 				    " Three ECC Bits were in error");
1159 				break;
1160 			case M4:
1161 				cpu_aflt_log(CE_CONT, 0, &sflt,
1162 				    CPU_ERRID_FIRST, NULL,
1163 				    " Four ECC Bits were in error");
1164 				break;
1165 			case MX:
1166 				cpu_aflt_log(CE_CONT, 0, &sflt,
1167 				    CPU_ERRID_FIRST, NULL,
1168 				    " More than Four ECC bits were "
1169 				    "in error");
1170 				break;
1171 			default:
1172 				cpu_aflt_log(CE_CONT, 0, &sflt,
1173 				    CPU_ERRID_FIRST, NULL,
1174 				    " Unknown fault syndrome %d",
1175 				    synd_code);
1176 				break;
1177 			}
1178 		}
1179 	}
1180 
1181 	/* Display entire cache line, if valid address */
1182 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1183 		read_ecc_data(ecc, 1, 1);
1184 }
1185 
1186 /*
1187  * We route all errors through a single switch statement.
1188  */
1189 void
1190 cpu_ue_log_err(struct async_flt *aflt)
1191 {
1192 
1193 	switch (aflt->flt_class) {
1194 	case CPU_FAULT:
1195 		cpu_async_log_err(aflt);
1196 		break;
1197 
1198 	case BUS_FAULT:
1199 		bus_async_log_err(aflt);
1200 		break;
1201 
1202 	default:
1203 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1204 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1205 		break;
1206 	}
1207 }
1208 
1209 /* Values for action variable in cpu_async_error() */
1210 #define	ACTION_NONE		0
1211 #define	ACTION_TRAMPOLINE	1
1212 #define	ACTION_AST_FLAGS	2
1213 
1214 /*
1215  * Access error trap handler for asynchronous cpu errors.  This routine is
1216  * called to handle a data or instruction access error.  All fatal errors are
1217  * completely handled by this routine (by panicking).  Non fatal error logging
1218  * is queued for later processing either via AST or softint at a lower PIL.
1219  * In case of panic, the error log queue will also be processed as part of the
1220  * panic flow to ensure all errors are logged.  This routine is called with all
1221  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1222  * error bits are also cleared.  The hardware has also disabled the I and
1223  * D-caches for us, so we must re-enable them before returning.
1224  *
1225  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1226  *
1227  *		_______________________________________________________________
1228  *		|        Privileged tl0		|         Unprivileged	      |
1229  *		| Protected	| Unprotected	| Protected	| Unprotected |
1230  *		|on_trap|lofault|		|		|	      |
1231  * -------------|-------|-------+---------------+---------------+-------------|
1232  *		|	|	|		|		|	      |
1233  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1234  *		|	|	|		|		|	      |
1235  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1236  *		|	|	|		|		|	      |
1237  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1238  *		|	|	|		|		|	      |
1239  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1240  * ____________________________________________________________________________
1241  *
1242  *
1243  * Action codes:
1244  *
1245  * L - log
1246  * M - kick off memscrubber if flt_in_memory
1247  * P - panic
1248  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1249  * R - i)  if aft_panic is set, panic
1250  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1251  * S - send SIGBUS to process
1252  * T - trampoline
1253  *
1254  * Special cases:
1255  *
1256  * 1) if aft_testfatal is set, all faults result in a panic regardless
1257  *    of type (even WP), protection (even on_trap), or privilege.
1258  */
1259 /*ARGSUSED*/
1260 void
1261 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1262 	uint_t p_afsr_high, uint_t p_afar_high)
1263 {
1264 	ushort_t sdbh, sdbl, ttype, tl;
1265 	spitf_async_flt spf_flt;
1266 	struct async_flt *aflt;
1267 	char pr_reason[28];
1268 	uint64_t oafsr;
1269 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1270 	int action = ACTION_NONE;
1271 	uint64_t t_afar = p_afar;
1272 	uint64_t t_afsr = p_afsr;
1273 	int expected = DDI_FM_ERR_UNEXPECTED;
1274 	ddi_acc_hdl_t *hp;
1275 
1276 	/*
1277 	 * We need to look at p_flag to determine if the thread detected an
1278 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1279 	 * because we just need a consistent snapshot and we know that everyone
1280 	 * else will store a consistent set of bits while holding p_lock.  We
1281 	 * don't have to worry about a race because SDOCORE is set once prior
1282 	 * to doing i/o from the process's address space and is never cleared.
1283 	 */
1284 	uint_t pflag = ttoproc(curthread)->p_flag;
1285 
1286 	pr_reason[0] = '\0';
1287 
1288 	/*
1289 	 * Note: the Spitfire data buffer error registers
1290 	 * (upper and lower halves) are or'ed into the upper
1291 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1292 	 */
1293 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1294 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1295 
1296 	/*
1297 	 * Grab the ttype encoded in <63:53> of the saved
1298 	 * afsr passed from async_err()
1299 	 */
1300 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1301 	tl = (ushort_t)(t_afsr >> 62);
1302 
1303 	t_afsr &= S_AFSR_MASK;
1304 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1305 
1306 	/*
1307 	 * Initialize most of the common and CPU-specific structure.  We derive
1308 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1309 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1310 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1311 	 * tuneable aft_testfatal is set (not the default).
1312 	 */
1313 	bzero(&spf_flt, sizeof (spitf_async_flt));
1314 	aflt = (struct async_flt *)&spf_flt;
1315 	aflt->flt_id = gethrtime_waitfree();
1316 	aflt->flt_stat = t_afsr;
1317 	aflt->flt_addr = t_afar;
1318 	aflt->flt_bus_id = getprocessorid();
1319 	aflt->flt_inst = CPU->cpu_id;
1320 	aflt->flt_pc = (caddr_t)rp->r_pc;
1321 	aflt->flt_prot = AFLT_PROT_NONE;
1322 	aflt->flt_class = CPU_FAULT;
1323 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1324 	aflt->flt_tl = (uchar_t)tl;
1325 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1326 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1327 
1328 	/*
1329 	 * Set flt_status based on the trap type.  If we end up here as the
1330 	 * result of a UE detected by the CE handling code, leave status 0.
1331 	 */
1332 	switch (ttype) {
1333 	case T_DATA_ERROR:
1334 		aflt->flt_status = ECC_D_TRAP;
1335 		break;
1336 	case T_INSTR_ERROR:
1337 		aflt->flt_status = ECC_I_TRAP;
1338 		break;
1339 	}
1340 
1341 	spf_flt.flt_sdbh = sdbh;
1342 	spf_flt.flt_sdbl = sdbl;
1343 
1344 	/*
1345 	 * Check for fatal async errors.
1346 	 */
1347 	check_misc_err(&spf_flt);
1348 
1349 	/*
1350 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1351 	 * see if we were executing in the kernel under on_trap() or t_lofault
1352 	 * protection.  If so, modify the saved registers so that we return
1353 	 * from the trap to the appropriate trampoline routine.
1354 	 */
1355 	if (aflt->flt_priv && tl == 0) {
1356 		if (curthread->t_ontrap != NULL) {
1357 			on_trap_data_t *otp = curthread->t_ontrap;
1358 
1359 			if (otp->ot_prot & OT_DATA_EC) {
1360 				aflt->flt_prot = AFLT_PROT_EC;
1361 				otp->ot_trap |= OT_DATA_EC;
1362 				rp->r_pc = otp->ot_trampoline;
1363 				rp->r_npc = rp->r_pc + 4;
1364 				action = ACTION_TRAMPOLINE;
1365 			}
1366 
1367 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1368 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1369 				aflt->flt_prot = AFLT_PROT_ACCESS;
1370 				otp->ot_trap |= OT_DATA_ACCESS;
1371 				rp->r_pc = otp->ot_trampoline;
1372 				rp->r_npc = rp->r_pc + 4;
1373 				action = ACTION_TRAMPOLINE;
1374 				/*
1375 				 * for peeks and caut_gets errors are expected
1376 				 */
1377 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1378 				if (!hp)
1379 					expected = DDI_FM_ERR_PEEK;
1380 				else if (hp->ah_acc.devacc_attr_access ==
1381 				    DDI_CAUTIOUS_ACC)
1382 					expected = DDI_FM_ERR_EXPECTED;
1383 			}
1384 
1385 		} else if (curthread->t_lofault) {
1386 			aflt->flt_prot = AFLT_PROT_COPY;
1387 			rp->r_g1 = EFAULT;
1388 			rp->r_pc = curthread->t_lofault;
1389 			rp->r_npc = rp->r_pc + 4;
1390 			action = ACTION_TRAMPOLINE;
1391 		}
1392 	}
1393 
1394 	/*
1395 	 * Determine if this error needs to be treated as fatal.  Note that
1396 	 * multiple errors detected upon entry to this trap handler does not
1397 	 * necessarily warrant a panic.  We only want to panic if the trap
1398 	 * happened in privileged mode and not under t_ontrap or t_lofault
1399 	 * protection.  The exception is WP: if we *only* get WP, it is not
1400 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1401 	 *
1402 	 * aft_panic, if set, effectively makes us treat usermode
1403 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1404 	 * panic instead of sending a contract event.  A lofault-protected
1405 	 * fault will normally follow the contract event; if aft_panic is
1406 	 * set this will be changed to a panic.
1407 	 *
1408 	 * For usermode BERR/BTO errors, eg from processes performing device
1409 	 * control through mapped device memory, we need only deliver
1410 	 * a SIGBUS to the offending process.
1411 	 *
1412 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1413 	 * checked later; for now we implement the common reasons.
1414 	 */
1415 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1416 		/*
1417 		 * Beware - multiple bits may be set in AFSR
1418 		 */
1419 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1420 			if (aflt->flt_priv || aft_panic)
1421 				aflt->flt_panic = 1;
1422 		}
1423 
1424 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1425 			if (aflt->flt_priv)
1426 				aflt->flt_panic = 1;
1427 		}
1428 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1429 		aflt->flt_panic = 1;
1430 	}
1431 
1432 	/*
1433 	 * UE/BERR/TO: Call our bus nexus friends to check for
1434 	 * IO errors that may have resulted in this trap.
1435 	 */
1436 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1437 		cpu_run_bus_error_handlers(aflt, expected);
1438 	}
1439 
1440 	/*
1441 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1442 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1443 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1444 	 * caches may introduce more parity errors (especially when the module
1445 	 * is bad) and in sabre there is no guarantee that such errors
1446 	 * (if introduced) are written back as poisoned data.
1447 	 */
1448 	if (t_afsr & P_AFSR_UE) {
1449 		int i;
1450 
1451 		(void) strcat(pr_reason, "UE ");
1452 
1453 		spf_flt.flt_type = CPU_UE_ERR;
1454 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1455 			MMU_PAGESHIFT)) ? 1: 0;
1456 
1457 		/*
1458 		 * With UE, we have the PA of the fault.
1459 		 * Let do a diagnostic read to get the ecache
1460 		 * data and tag info of the bad line for logging.
1461 		 */
1462 		if (aflt->flt_in_memory) {
1463 			uint32_t ec_set_size;
1464 			uchar_t state;
1465 			uint32_t ecache_idx;
1466 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1467 
1468 			/* touch the line to put it in ecache */
1469 			acc_afsr |= read_and_clear_afsr();
1470 			(void) lddphys(faultpa);
1471 			acc_afsr |= (read_and_clear_afsr() &
1472 				    ~(P_AFSR_EDP | P_AFSR_UE));
1473 
1474 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1475 			    ecache_associativity;
1476 
1477 			for (i = 0; i < ecache_associativity; i++) {
1478 				ecache_idx = i * ec_set_size +
1479 				    (aflt->flt_addr % ec_set_size);
1480 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1481 					(uint64_t *)&spf_flt.flt_ec_data[0],
1482 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1483 				acc_afsr |= oafsr;
1484 
1485 				state = (uchar_t)((spf_flt.flt_ec_tag &
1486 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1487 
1488 				if ((state & cpu_ec_state_valid) &&
1489 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1490 				    ((uint64_t)aflt->flt_addr >>
1491 				    cpu_ec_tag_shift)))
1492 					break;
1493 			}
1494 
1495 			/*
1496 			 * Check to see if the ecache tag is valid for the
1497 			 * fault PA. In the very unlikely event where the
1498 			 * line could be victimized, no ecache info will be
1499 			 * available. If this is the case, capture the line
1500 			 * from memory instead.
1501 			 */
1502 			if ((state & cpu_ec_state_valid) == 0 ||
1503 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1504 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1505 				for (i = 0; i < 8; i++, faultpa += 8) {
1506 				    ec_data_t *ecdptr;
1507 
1508 					ecdptr = &spf_flt.flt_ec_data[i];
1509 					acc_afsr |= read_and_clear_afsr();
1510 					ecdptr->ec_d8 = lddphys(faultpa);
1511 					acc_afsr |= (read_and_clear_afsr() &
1512 						    ~(P_AFSR_EDP | P_AFSR_UE));
1513 					ecdptr->ec_afsr = 0;
1514 							/* null afsr value */
1515 				}
1516 
1517 				/*
1518 				 * Mark tag invalid to indicate mem dump
1519 				 * when we print out the info.
1520 				 */
1521 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1522 			}
1523 			spf_flt.flt_ec_lcnt = 1;
1524 
1525 			/*
1526 			 * Flush out the bad line
1527 			 */
1528 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1529 				cpunodes[CPU->cpu_id].ecache_size);
1530 
1531 			acc_afsr |= clear_errors(NULL, NULL);
1532 		}
1533 
1534 		/*
1535 		 * Ask our bus nexus friends if they have any fatal errors. If
1536 		 * so, they will log appropriate error messages and panic as a
1537 		 * result. We then queue an event for each UDB that reports a
1538 		 * UE. Each UE reported in a UDB will have its own log message.
1539 		 *
1540 		 * Note from kbn: In the case where there are multiple UEs
1541 		 * (ME bit is set) - the AFAR address is only accurate to
1542 		 * the 16-byte granularity. One cannot tell whether the AFAR
1543 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1544 		 * always report the AFAR address to be 16-byte aligned.
1545 		 *
1546 		 * If we're on a Sabre, there is no SDBL, but it will always
1547 		 * read as zero, so the sdbl test below will safely fail.
1548 		 */
1549 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1550 			aflt->flt_panic = 1;
1551 
1552 		if (sdbh & P_DER_UE) {
1553 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1554 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1555 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1556 			    aflt->flt_panic);
1557 		}
1558 		if (sdbl & P_DER_UE) {
1559 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1560 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1561 			if (!(aflt->flt_stat & P_AFSR_ME))
1562 				aflt->flt_addr |= 0x8;
1563 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1564 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1565 			    aflt->flt_panic);
1566 		}
1567 
1568 		/*
1569 		 * We got a UE and are panicking, save the fault PA in a known
1570 		 * location so that the platform specific panic code can check
1571 		 * for copyback errors.
1572 		 */
1573 		if (aflt->flt_panic && aflt->flt_in_memory) {
1574 			panic_aflt = *aflt;
1575 		}
1576 	}
1577 
1578 	/*
1579 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1580 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1581 	 */
1582 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1583 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1584 
1585 		if (t_afsr & P_AFSR_EDP)
1586 			(void) strcat(pr_reason, "EDP ");
1587 
1588 		if (t_afsr & P_AFSR_LDP)
1589 			(void) strcat(pr_reason, "LDP ");
1590 
1591 		/*
1592 		 * Here we have no PA to work with.
1593 		 * Scan each line in the ecache to look for
1594 		 * the one with bad parity.
1595 		 */
1596 		aflt->flt_addr = AFLT_INV_ADDR;
1597 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1598 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1599 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1600 
1601 		/*
1602 		 * If we found a bad PA, update the state to indicate if it is
1603 		 * memory or I/O space.  This code will be important if we ever
1604 		 * support cacheable frame buffers.
1605 		 */
1606 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1607 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1608 				MMU_PAGESHIFT)) ? 1 : 0;
1609 		}
1610 
1611 		if (isus2i || isus2e)
1612 			aflt->flt_panic = 1;
1613 
1614 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1615 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1616 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1617 		    aflt->flt_panic);
1618 	}
1619 
1620 	/*
1621 	 * Timeout and bus error handling.  There are two cases to consider:
1622 	 *
1623 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1624 	 * have already modified the saved registers so that we will return
1625 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1626 	 *
1627 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1628 	 * a SIGBUS.  We do not log the occurence - processes performing
1629 	 * device control would generate lots of uninteresting messages.
1630 	 */
1631 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1632 		if (t_afsr & P_AFSR_TO)
1633 			(void) strcat(pr_reason, "BTO ");
1634 
1635 		if (t_afsr & P_AFSR_BERR)
1636 			(void) strcat(pr_reason, "BERR ");
1637 
1638 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1639 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1640 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1641 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1642 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1643 			    aflt->flt_panic);
1644 		}
1645 	}
1646 
1647 	/*
1648 	 * Handle WP: WP happens when the ecache is victimized and a parity
1649 	 * error was detected on a writeback.  The data in question will be
1650 	 * poisoned as a UE will be written back.  The PA is not logged and
1651 	 * it is possible that it doesn't belong to the trapped thread.  The
1652 	 * WP trap is not fatal, but it could be fatal to someone that
1653 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1654 	 * to force the memscrubber to read all of memory when it awakens.
1655 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1656 	 * UE back to poison the data.
1657 	 */
1658 	if (t_afsr & P_AFSR_WP) {
1659 		(void) strcat(pr_reason, "WP ");
1660 		if (isus2i || isus2e) {
1661 			aflt->flt_panic = 1;
1662 		} else {
1663 			read_all_memscrub = 1;
1664 		}
1665 		spf_flt.flt_type = CPU_WP_ERR;
1666 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1667 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1668 		    aflt->flt_panic);
1669 	}
1670 
1671 	/*
1672 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1673 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1674 	 * This is fatal.
1675 	 */
1676 
1677 	if (t_afsr & P_AFSR_CP) {
1678 		if (isus2i || isus2e) {
1679 			(void) strcat(pr_reason, "CP ");
1680 			aflt->flt_panic = 1;
1681 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1682 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1683 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1684 			    aflt->flt_panic);
1685 		} else {
1686 			/*
1687 			 * Orphan CP: Happens due to signal integrity problem
1688 			 * on a CPU, where a CP is reported, without reporting
1689 			 * its associated UE. This is handled by locating the
1690 			 * bad parity line and would kick off the memscrubber
1691 			 * to find the UE if in memory or in another's cache.
1692 			 */
1693 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1694 			(void) strcat(pr_reason, "ORPHAN_CP ");
1695 
1696 			/*
1697 			 * Here we have no PA to work with.
1698 			 * Scan each line in the ecache to look for
1699 			 * the one with bad parity.
1700 			 */
1701 			aflt->flt_addr = AFLT_INV_ADDR;
1702 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1703 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1704 				&oafsr);
1705 			acc_afsr |= oafsr;
1706 
1707 			/*
1708 			 * If we found a bad PA, update the state to indicate
1709 			 * if it is memory or I/O space.
1710 			 */
1711 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1712 				aflt->flt_in_memory =
1713 					(pf_is_memory(aflt->flt_addr >>
1714 						MMU_PAGESHIFT)) ? 1 : 0;
1715 			}
1716 			read_all_memscrub = 1;
1717 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1718 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1719 			    aflt->flt_panic);
1720 
1721 		}
1722 	}
1723 
1724 	/*
1725 	 * If we queued an error other than WP or CP and we are going to return
1726 	 * from the trap and the error was in user mode or inside of a
1727 	 * copy routine, set AST flag so the queue will be drained before
1728 	 * returning to user mode.
1729 	 *
1730 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1731 	 * and send an event to its process contract.
1732 	 *
1733 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1734 	 * will have been no error queued in this case.
1735 	 */
1736 	if ((t_afsr &
1737 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1738 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1739 			int pcb_flag = 0;
1740 
1741 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1742 				pcb_flag |= ASYNC_HWERR;
1743 
1744 			if (t_afsr & P_AFSR_BERR)
1745 				pcb_flag |= ASYNC_BERR;
1746 
1747 			if (t_afsr & P_AFSR_TO)
1748 				pcb_flag |= ASYNC_BTO;
1749 
1750 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1751 			aston(curthread);
1752 			action = ACTION_AST_FLAGS;
1753 	}
1754 
1755 	/*
1756 	 * In response to a deferred error, we must do one of three things:
1757 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1758 	 * set in cases (1) and (2) - check that either action is set or
1759 	 * (3) is true.
1760 	 *
1761 	 * On II, the WP writes poisoned data back to memory, which will
1762 	 * cause a UE and a panic or reboot when read.  In this case, we
1763 	 * don't need to panic at this time.  On IIi and IIe,
1764 	 * aflt->flt_panic is already set above.
1765 	 */
1766 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1767 	    (t_afsr & P_AFSR_WP));
1768 
1769 	/*
1770 	 * Make a final sanity check to make sure we did not get any more async
1771 	 * errors and accumulate the afsr.
1772 	 */
1773 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1774 	    cpunodes[CPU->cpu_id].ecache_linesize);
1775 	(void) clear_errors(&spf_flt, NULL);
1776 
1777 	/*
1778 	 * Take care of a special case: If there is a UE in the ecache flush
1779 	 * area, we'll see it in flush_ecache().  This will trigger the
1780 	 * CPU_ADDITIONAL_ERRORS case below.
1781 	 *
1782 	 * This could occur if the original error was a UE in the flush area,
1783 	 * or if the original error was an E$ error that was flushed out of
1784 	 * the E$ in scan_ecache().
1785 	 *
1786 	 * If it's at the same address that we're already logging, then it's
1787 	 * probably one of these cases.  Clear the bit so we don't trip over
1788 	 * it on the additional errors case, which could cause an unnecessary
1789 	 * panic.
1790 	 */
1791 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1792 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1793 	else
1794 		acc_afsr |= aflt->flt_stat;
1795 
1796 	/*
1797 	 * Check the acumulated afsr for the important bits.
1798 	 * Make sure the spf_flt.flt_type value is set, and
1799 	 * enque an error.
1800 	 */
1801 	if (acc_afsr &
1802 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1803 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1804 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1805 		    P_AFSR_ISAP))
1806 			aflt->flt_panic = 1;
1807 
1808 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1809 		aflt->flt_stat = acc_afsr;
1810 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1811 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1812 		    aflt->flt_panic);
1813 	}
1814 
1815 	/*
1816 	 * If aflt->flt_panic is set at this point, we need to panic as the
1817 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1818 	 * We've already enqueued the error in one of the if-clauses above,
1819 	 * and it will be dequeued and logged as part of the panic flow.
1820 	 */
1821 	if (aflt->flt_panic) {
1822 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1823 		    "See previous message(s) for details", " %sError(s)",
1824 		    pr_reason);
1825 	}
1826 
1827 	/*
1828 	 * Before returning, we must re-enable errors, and
1829 	 * reset the caches to their boot-up state.
1830 	 */
1831 	set_lsu(get_lsu() | cache_boot_state);
1832 	set_error_enable(EER_ENABLE);
1833 }
1834 
1835 /*
1836  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1837  * This routine is shared by the CE and UE handling code.
1838  */
1839 static void
1840 check_misc_err(spitf_async_flt *spf_flt)
1841 {
1842 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1843 	char *fatal_str = NULL;
1844 
1845 	/*
1846 	 * The ISAP and ETP errors are supposed to cause a POR
1847 	 * from the system, so in theory we never, ever see these messages.
1848 	 * ISAP, ETP and IVUE are considered to be fatal.
1849 	 */
1850 	if (aflt->flt_stat & P_AFSR_ISAP)
1851 		fatal_str = " System Address Parity Error on";
1852 	else if (aflt->flt_stat & P_AFSR_ETP)
1853 		fatal_str = " Ecache Tag Parity Error on";
1854 	else if (aflt->flt_stat & P_AFSR_IVUE)
1855 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1856 	if (fatal_str != NULL) {
1857 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1858 			NULL, fatal_str);
1859 	}
1860 }
1861 
1862 /*
1863  * Routine to convert a syndrome into a syndrome code.
1864  */
1865 static int
1866 synd_to_synd_code(int synd_status, ushort_t synd)
1867 {
1868 	if (synd_status != AFLT_STAT_VALID)
1869 		return (-1);
1870 
1871 	/*
1872 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1873 	 * to get the code indicating which bit(s) is(are) bad.
1874 	 */
1875 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1876 		return (-1);
1877 	else
1878 		return (ecc_syndrome_tab[synd]);
1879 }
1880 
1881 /* ARGSUSED */
1882 int
1883 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1884 {
1885 	return (ENOTSUP);
1886 }
1887 
1888 /* ARGSUSED */
1889 int
1890 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1891 {
1892 	return (ENOTSUP);
1893 }
1894 
1895 /* ARGSUSED */
1896 int
1897 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1898 {
1899 	return (ENOTSUP);
1900 }
1901 
1902 /*
1903  * Routine to return a string identifying the physical name
1904  * associated with a memory/cache error.
1905  */
1906 /* ARGSUSED */
1907 int
1908 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1909     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1910     char *buf, int buflen, int *lenp)
1911 {
1912 	short synd_code;
1913 	int ret;
1914 
1915 	if (flt_in_memory) {
1916 		synd_code = synd_to_synd_code(synd_status, synd);
1917 		if (synd_code == -1) {
1918 			ret = EINVAL;
1919 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1920 		    buf, buflen, lenp) != 0) {
1921 			ret = EIO;
1922 		} else if (*lenp <= 1) {
1923 			ret = EINVAL;
1924 		} else {
1925 			ret = 0;
1926 		}
1927 	} else {
1928 		ret = ENOTSUP;
1929 	}
1930 
1931 	if (ret != 0) {
1932 		buf[0] = '\0';
1933 		*lenp = 0;
1934 	}
1935 
1936 	return (ret);
1937 }
1938 
1939 /*
1940  * Wrapper for cpu_get_mem_unum() routine that takes an
1941  * async_flt struct rather than explicit arguments.
1942  */
1943 int
1944 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1945     char *buf, int buflen, int *lenp)
1946 {
1947 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1948 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1949 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1950 }
1951 
1952 /*
1953  * This routine is a more generic interface to cpu_get_mem_unum(),
1954  * that may be used by other modules (e.g. mm).
1955  */
1956 int
1957 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1958 		char *buf, int buflen, int *lenp)
1959 {
1960 	int synd_status, flt_in_memory, ret;
1961 	char unum[UNUM_NAMLEN];
1962 
1963 	/*
1964 	 * Check for an invalid address.
1965 	 */
1966 	if (afar == (uint64_t)-1)
1967 		return (ENXIO);
1968 
1969 	if (synd == (uint64_t)-1)
1970 		synd_status = AFLT_STAT_INVALID;
1971 	else
1972 		synd_status = AFLT_STAT_VALID;
1973 
1974 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1975 
1976 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1977 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1978 	    != 0)
1979 		return (ret);
1980 
1981 	if (*lenp >= buflen)
1982 		return (ENAMETOOLONG);
1983 
1984 	(void) strncpy(buf, unum, buflen);
1985 
1986 	return (0);
1987 }
1988 
1989 /*
1990  * Routine to return memory information associated
1991  * with a physical address and syndrome.
1992  */
1993 /* ARGSUSED */
1994 int
1995 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1996     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1997     int *segsp, int *banksp, int *mcidp)
1998 {
1999 	return (ENOTSUP);
2000 }
2001 
2002 /*
2003  * Routine to return a string identifying the physical
2004  * name associated with a cpuid.
2005  */
2006 /* ARGSUSED */
2007 int
2008 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
2009 {
2010 	return (ENOTSUP);
2011 }
2012 
2013 /*
2014  * This routine returns the size of the kernel's FRU name buffer.
2015  */
2016 size_t
2017 cpu_get_name_bufsize()
2018 {
2019 	return (UNUM_NAMLEN);
2020 }
2021 
2022 /*
2023  * Cpu specific log func for UEs.
2024  */
2025 static void
2026 log_ue_err(struct async_flt *aflt, char *unum)
2027 {
2028 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2029 	int len = 0;
2030 
2031 #ifdef DEBUG
2032 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2033 
2034 	/*
2035 	 * Paranoid Check for priv mismatch
2036 	 * Only applicable for UEs
2037 	 */
2038 	if (afsr_priv != aflt->flt_priv) {
2039 		/*
2040 		 * The priv bits in %tstate and %afsr did not match; we expect
2041 		 * this to be very rare, so flag it with a message.
2042 		 */
2043 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2044 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2045 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2046 
2047 		/* update saved afsr to reflect the correct priv */
2048 		aflt->flt_stat &= ~P_AFSR_PRIV;
2049 		if (aflt->flt_priv)
2050 			aflt->flt_stat |= P_AFSR_PRIV;
2051 	}
2052 #endif /* DEBUG */
2053 
2054 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2055 	    UNUM_NAMLEN, &len);
2056 
2057 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2058 	    " Uncorrectable Memory Error on");
2059 
2060 	if (SYND(aflt->flt_synd) == 0x3) {
2061 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2062 		    " Syndrome 0x3 indicates that this may not be a "
2063 		    "memory module problem");
2064 	}
2065 
2066 	if (aflt->flt_in_memory)
2067 		cpu_log_ecmem_info(spf_flt);
2068 }
2069 
2070 
2071 /*
2072  * The cpu_async_log_err() function is called via the ue_drain() function to
2073  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2074  * from softint context, from AST processing in the trap() flow, or from the
2075  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2076  */
2077 static void
2078 cpu_async_log_err(void *flt)
2079 {
2080 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2081 	struct async_flt *aflt = (struct async_flt *)flt;
2082 	char unum[UNUM_NAMLEN];
2083 	char *space;
2084 	char *ecache_scrub_logstr = NULL;
2085 
2086 	switch (spf_flt->flt_type) {
2087 	    case CPU_UE_ERR:
2088 		/*
2089 		 * We want to skip logging only if ALL the following
2090 		 * conditions are true:
2091 		 *
2092 		 *	1. We are not panicking
2093 		 *	2. There is only one error
2094 		 *	3. That error is a memory error
2095 		 *	4. The error is caused by the memory scrubber (in
2096 		 *	   which case the error will have occurred under
2097 		 *	   on_trap protection)
2098 		 *	5. The error is on a retired page
2099 		 *
2100 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2101 		 * scrubber.  However, none of those errors should occur
2102 		 * on a retired page.
2103 		 *
2104 		 * Note 2: In the CE case, these errors are discarded before
2105 		 * the errorq.  In the UE case, we must wait until now --
2106 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2107 		 */
2108 		if (!panicstr &&
2109 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2110 		    aflt->flt_prot == AFLT_PROT_EC) {
2111 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2112 				/* Zero the address to clear the error */
2113 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2114 				return;
2115 			}
2116 		}
2117 
2118 		/*
2119 		 * Log the UE and check for causes of this UE error that
2120 		 * don't cause a trap (Copyback error).  cpu_async_error()
2121 		 * has already checked the i/o buses for us.
2122 		 */
2123 		log_ue_err(aflt, unum);
2124 		if (aflt->flt_in_memory)
2125 			cpu_check_allcpus(aflt);
2126 		break;
2127 
2128 	    case CPU_EDP_LDP_ERR:
2129 		if (aflt->flt_stat & P_AFSR_EDP)
2130 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2131 			    NULL, " EDP event on");
2132 
2133 		if (aflt->flt_stat & P_AFSR_LDP)
2134 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2135 			    NULL, " LDP event on");
2136 
2137 		/* Log ecache info if exist */
2138 		if (spf_flt->flt_ec_lcnt > 0) {
2139 			cpu_log_ecmem_info(spf_flt);
2140 
2141 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2142 			    NULL, " AFAR was derived from E$Tag");
2143 		} else {
2144 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2145 			    NULL, " No error found in ecache (No fault "
2146 			    "PA available)");
2147 		}
2148 		break;
2149 
2150 	    case CPU_WP_ERR:
2151 		/*
2152 		 * If the memscrub thread hasn't yet read
2153 		 * all of memory, as we requested in the
2154 		 * trap handler, then give it a kick to
2155 		 * make sure it does.
2156 		 */
2157 		if (!isus2i && !isus2e && read_all_memscrub)
2158 			memscrub_run();
2159 
2160 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2161 		    " WP event on");
2162 		return;
2163 
2164 	    case CPU_BTO_BERR_ERR:
2165 		/*
2166 		 * A bus timeout or error occurred that was in user mode or not
2167 		 * in a protected kernel code region.
2168 		 */
2169 		if (aflt->flt_stat & P_AFSR_BERR) {
2170 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2171 			    spf_flt, BERRTO_LFLAGS, NULL,
2172 			    " Bus Error on System Bus in %s mode from",
2173 			    aflt->flt_priv ? "privileged" : "user");
2174 		}
2175 
2176 		if (aflt->flt_stat & P_AFSR_TO) {
2177 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2178 			    spf_flt, BERRTO_LFLAGS, NULL,
2179 			    " Timeout on System Bus in %s mode from",
2180 			    aflt->flt_priv ? "privileged" : "user");
2181 		}
2182 
2183 		return;
2184 
2185 	    case CPU_PANIC_CP_ERR:
2186 		/*
2187 		 * Process the Copyback (CP) error info (if any) obtained from
2188 		 * polling all the cpus in the panic flow. This case is only
2189 		 * entered if we are panicking.
2190 		 */
2191 		ASSERT(panicstr != NULL);
2192 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2193 
2194 		/* See which space - this info may not exist */
2195 		if (panic_aflt.flt_status & ECC_D_TRAP)
2196 			space = "Data ";
2197 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2198 			space = "Instruction ";
2199 		else
2200 			space = "";
2201 
2202 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2203 		    " AFAR was derived from UE report,"
2204 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2205 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2206 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2207 
2208 		if (spf_flt->flt_ec_lcnt > 0)
2209 			cpu_log_ecmem_info(spf_flt);
2210 		else
2211 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2212 			    NULL, " No cache dump available");
2213 
2214 		return;
2215 
2216 	    case CPU_TRAPPING_CP_ERR:
2217 		/*
2218 		 * For sabre only.  This is a copyback ecache parity error due
2219 		 * to a PCI DMA read.  We should be panicking if we get here.
2220 		 */
2221 		ASSERT(panicstr != NULL);
2222 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2223 		    " AFAR was derived from UE report,"
2224 		    " CP event on CPU%d (caused Data access error "
2225 		    "on PCIBus)", aflt->flt_inst);
2226 		return;
2227 
2228 		/*
2229 		 * We log the ecache lines of the following states,
2230 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2231 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2232 		 * in addition to logging if ecache_scrub_panic is set.
2233 		 */
2234 	    case CPU_BADLINE_CI_ERR:
2235 		ecache_scrub_logstr = "CBI";
2236 		/* FALLTHRU */
2237 
2238 	    case CPU_BADLINE_CB_ERR:
2239 		if (ecache_scrub_logstr == NULL)
2240 			ecache_scrub_logstr = "CBB";
2241 		/* FALLTHRU */
2242 
2243 	    case CPU_BADLINE_DI_ERR:
2244 		if (ecache_scrub_logstr == NULL)
2245 			ecache_scrub_logstr = "DBI";
2246 		/* FALLTHRU */
2247 
2248 	    case CPU_BADLINE_DB_ERR:
2249 		if (ecache_scrub_logstr == NULL)
2250 			ecache_scrub_logstr = "DBB";
2251 
2252 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2253 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2254 			" %s event on", ecache_scrub_logstr);
2255 		cpu_log_ecmem_info(spf_flt);
2256 
2257 		return;
2258 
2259 	    case CPU_ORPHAN_CP_ERR:
2260 		/*
2261 		 * Orphan CPs, where the CP bit is set, but when a CPU
2262 		 * doesn't report a UE.
2263 		 */
2264 		if (read_all_memscrub)
2265 			memscrub_run();
2266 
2267 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2268 			NULL, " Orphan CP event on");
2269 
2270 		/* Log ecache info if exist */
2271 		if (spf_flt->flt_ec_lcnt > 0)
2272 			cpu_log_ecmem_info(spf_flt);
2273 		else
2274 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2275 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2276 				" No error found in ecache (No fault "
2277 				"PA available");
2278 		return;
2279 
2280 	    case CPU_ECACHE_ADDR_PAR_ERR:
2281 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2282 				" E$ Tag Address Parity error on");
2283 		cpu_log_ecmem_info(spf_flt);
2284 		return;
2285 
2286 	    case CPU_ECACHE_STATE_ERR:
2287 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2288 				" E$ Tag State Parity error on");
2289 		cpu_log_ecmem_info(spf_flt);
2290 		return;
2291 
2292 	    case CPU_ECACHE_TAG_ERR:
2293 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2294 				" E$ Tag scrub event on");
2295 		cpu_log_ecmem_info(spf_flt);
2296 		return;
2297 
2298 	    case CPU_ECACHE_ETP_ETS_ERR:
2299 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2300 				" AFSR.ETP is set and AFSR.ETS is zero on");
2301 		cpu_log_ecmem_info(spf_flt);
2302 		return;
2303 
2304 
2305 	    case CPU_ADDITIONAL_ERR:
2306 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2307 		    " Additional errors detected during error processing on");
2308 		return;
2309 
2310 	    default:
2311 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2312 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2313 		return;
2314 	}
2315 
2316 	/* ... fall through from the UE, EDP, or LDP cases */
2317 
2318 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2319 		if (!panicstr) {
2320 			(void) page_retire(aflt->flt_addr, PR_UE);
2321 		} else {
2322 			/*
2323 			 * Clear UEs on panic so that we don't
2324 			 * get haunted by them during panic or
2325 			 * after reboot
2326 			 */
2327 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2328 			    cpunodes[CPU->cpu_id].ecache_size,
2329 			    cpunodes[CPU->cpu_id].ecache_linesize);
2330 
2331 			(void) clear_errors(NULL, NULL);
2332 		}
2333 	}
2334 
2335 	/*
2336 	 * Log final recover message
2337 	 */
2338 	if (!panicstr) {
2339 		if (!aflt->flt_priv) {
2340 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2341 			    NULL, " Above Error is in User Mode"
2342 			    "\n    and is fatal: "
2343 			    "will SIGKILL process and notify contract");
2344 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2345 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2346 			    NULL, " Above Error detected while dumping core;"
2347 			    "\n    core file will be truncated");
2348 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2349 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2350 			    NULL, " Above Error is due to Kernel access"
2351 			    "\n    to User space and is fatal: "
2352 			    "will SIGKILL process and notify contract");
2353 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2354 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2355 			    " Above Error detected by protected Kernel code"
2356 			    "\n    that will try to clear error from system");
2357 		}
2358 	}
2359 }
2360 
2361 
2362 /*
2363  * Check all cpus for non-trapping UE-causing errors
2364  * In Ultra I/II, we look for copyback errors (CPs)
2365  */
2366 void
2367 cpu_check_allcpus(struct async_flt *aflt)
2368 {
2369 	spitf_async_flt cp;
2370 	spitf_async_flt *spf_cpflt = &cp;
2371 	struct async_flt *cpflt = (struct async_flt *)&cp;
2372 	int pix;
2373 
2374 	cpflt->flt_id = aflt->flt_id;
2375 	cpflt->flt_addr = aflt->flt_addr;
2376 
2377 	for (pix = 0; pix < NCPU; pix++) {
2378 		if (CPU_XCALL_READY(pix)) {
2379 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2380 			    (uint64_t)cpflt, 0);
2381 
2382 			if (cpflt->flt_stat & P_AFSR_CP) {
2383 				char *space;
2384 
2385 				/* See which space - this info may not exist */
2386 				if (aflt->flt_status & ECC_D_TRAP)
2387 					space = "Data ";
2388 				else if (aflt->flt_status & ECC_I_TRAP)
2389 					space = "Instruction ";
2390 				else
2391 					space = "";
2392 
2393 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2394 				    NULL, " AFAR was derived from UE report,"
2395 				    " CP event on CPU%d (caused %saccess "
2396 				    "error on %s%d)", pix, space,
2397 				    (aflt->flt_status & ECC_IOBUS) ?
2398 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2399 
2400 				if (spf_cpflt->flt_ec_lcnt > 0)
2401 					cpu_log_ecmem_info(spf_cpflt);
2402 				else
2403 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2404 					    CPU_ERRID_FIRST, NULL,
2405 					    " No cache dump available");
2406 			}
2407 		}
2408 	}
2409 }
2410 
2411 #ifdef DEBUG
2412 int test_mp_cp = 0;
2413 #endif
2414 
2415 /*
2416  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2417  * for copyback errors and capture relevant information.
2418  */
2419 static uint_t
2420 get_cpu_status(uint64_t arg)
2421 {
2422 	struct async_flt *aflt = (struct async_flt *)arg;
2423 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2424 	uint64_t afsr;
2425 	uint32_t ec_idx;
2426 	uint64_t sdbh, sdbl;
2427 	int i;
2428 	uint32_t ec_set_size;
2429 	uchar_t valid;
2430 	ec_data_t ec_data[8];
2431 	uint64_t ec_tag, flt_addr_tag, oafsr;
2432 	uint64_t *acc_afsr = NULL;
2433 
2434 	get_asyncflt(&afsr);
2435 	if (CPU_PRIVATE(CPU) != NULL) {
2436 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2437 		afsr |= *acc_afsr;
2438 		*acc_afsr = 0;
2439 	}
2440 
2441 #ifdef DEBUG
2442 	if (test_mp_cp)
2443 		afsr |= P_AFSR_CP;
2444 #endif
2445 	aflt->flt_stat = afsr;
2446 
2447 	if (afsr & P_AFSR_CP) {
2448 		/*
2449 		 * Capture the UDBs
2450 		 */
2451 		get_udb_errors(&sdbh, &sdbl);
2452 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2453 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2454 
2455 		/*
2456 		 * Clear CP bit before capturing ecache data
2457 		 * and AFSR info.
2458 		 */
2459 		set_asyncflt(P_AFSR_CP);
2460 
2461 		/*
2462 		 * See if we can capture the ecache line for the
2463 		 * fault PA.
2464 		 *
2465 		 * Return a valid matching ecache line, if any.
2466 		 * Otherwise, return the first matching ecache
2467 		 * line marked invalid.
2468 		 */
2469 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2470 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2471 		    ecache_associativity;
2472 		spf_flt->flt_ec_lcnt = 0;
2473 
2474 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2475 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2476 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2477 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2478 				    acc_afsr);
2479 
2480 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2481 				continue;
2482 
2483 			valid = cpu_ec_state_valid &
2484 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2485 			    cpu_ec_state_shift);
2486 
2487 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2488 				spf_flt->flt_ec_tag = ec_tag;
2489 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2490 				    sizeof (ec_data));
2491 				spf_flt->flt_ec_lcnt = 1;
2492 
2493 				if (valid)
2494 					break;
2495 			}
2496 		}
2497 	}
2498 	return (0);
2499 }
2500 
2501 /*
2502  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2503  * from panic_idle() as part of the other CPUs stopping themselves when a
2504  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2505  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2506  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2507  * CP error information.
2508  */
2509 void
2510 cpu_async_panic_callb(void)
2511 {
2512 	spitf_async_flt cp;
2513 	struct async_flt *aflt = (struct async_flt *)&cp;
2514 	uint64_t *scrub_afsr;
2515 
2516 	if (panic_aflt.flt_id != 0) {
2517 		aflt->flt_addr = panic_aflt.flt_addr;
2518 		(void) get_cpu_status((uint64_t)aflt);
2519 
2520 		if (CPU_PRIVATE(CPU) != NULL) {
2521 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2522 			if (*scrub_afsr & P_AFSR_CP) {
2523 				aflt->flt_stat |= *scrub_afsr;
2524 				*scrub_afsr = 0;
2525 			}
2526 		}
2527 		if (aflt->flt_stat & P_AFSR_CP) {
2528 			aflt->flt_id = panic_aflt.flt_id;
2529 			aflt->flt_panic = 1;
2530 			aflt->flt_inst = CPU->cpu_id;
2531 			aflt->flt_class = CPU_FAULT;
2532 			cp.flt_type = CPU_PANIC_CP_ERR;
2533 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2534 			    (void *)&cp, sizeof (cp), ue_queue,
2535 			    aflt->flt_panic);
2536 		}
2537 	}
2538 }
2539 
2540 /*
2541  * Turn off all cpu error detection, normally only used for panics.
2542  */
2543 void
2544 cpu_disable_errors(void)
2545 {
2546 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2547 }
2548 
2549 /*
2550  * Enable errors.
2551  */
2552 void
2553 cpu_enable_errors(void)
2554 {
2555 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2556 }
2557 
2558 static void
2559 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2560 {
2561 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2562 	int i, loop = 1;
2563 	ushort_t ecc_0;
2564 	uint64_t paddr;
2565 	uint64_t data;
2566 
2567 	if (verbose)
2568 		loop = 8;
2569 	for (i = 0; i < loop; i++) {
2570 		paddr = aligned_addr + (i * 8);
2571 		data = lddphys(paddr);
2572 		if (verbose) {
2573 			if (ce_err) {
2574 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2575 			    (uint32_t)data);
2576 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2577 				NULL, "    Paddr 0x%" PRIx64 ", "
2578 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2579 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2580 			} else {
2581 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2582 				    NULL, "    Paddr 0x%" PRIx64 ", "
2583 				    "Data 0x%08x.%08x", paddr,
2584 				    (uint32_t)(data>>32), (uint32_t)data);
2585 			}
2586 		}
2587 	}
2588 }
2589 
2590 static struct {		/* sec-ded-s4ed ecc code */
2591 	uint_t hi, lo;
2592 } ecc_code[8] = {
2593 	{ 0xee55de23U, 0x16161161U },
2594 	{ 0x55eede93U, 0x61612212U },
2595 	{ 0xbb557b8cU, 0x49494494U },
2596 	{ 0x55bb7b6cU, 0x94948848U },
2597 	{ 0x16161161U, 0xee55de23U },
2598 	{ 0x61612212U, 0x55eede93U },
2599 	{ 0x49494494U, 0xbb557b8cU },
2600 	{ 0x94948848U, 0x55bb7b6cU }
2601 };
2602 
2603 static ushort_t
2604 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2605 {
2606 	int i, j;
2607 	uchar_t checker, bit_mask;
2608 	struct {
2609 		uint_t hi, lo;
2610 	} hex_data, masked_data[8];
2611 
2612 	hex_data.hi = high_bytes;
2613 	hex_data.lo = low_bytes;
2614 
2615 	/* mask out bits according to sec-ded-s4ed ecc code */
2616 	for (i = 0; i < 8; i++) {
2617 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2618 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2619 	}
2620 
2621 	/*
2622 	 * xor all bits in masked_data[i] to get bit_i of checker,
2623 	 * where i = 0 to 7
2624 	 */
2625 	checker = 0;
2626 	for (i = 0; i < 8; i++) {
2627 		bit_mask = 1 << i;
2628 		for (j = 0; j < 32; j++) {
2629 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2630 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2631 			masked_data[i].hi >>= 1;
2632 			masked_data[i].lo >>= 1;
2633 		}
2634 	}
2635 	return (checker);
2636 }
2637 
2638 /*
2639  * Flush the entire ecache using displacement flush by reading through a
2640  * physical address range as large as the ecache.
2641  */
2642 void
2643 cpu_flush_ecache(void)
2644 {
2645 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2646 	    cpunodes[CPU->cpu_id].ecache_linesize);
2647 }
2648 
2649 /*
2650  * read and display the data in the cache line where the
2651  * original ce error occurred.
2652  * This routine is mainly used for debugging new hardware.
2653  */
2654 void
2655 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2656 {
2657 	kpreempt_disable();
2658 	/* disable ECC error traps */
2659 	set_error_enable(EER_ECC_DISABLE);
2660 
2661 	/*
2662 	 * flush the ecache
2663 	 * read the data
2664 	 * check to see if an ECC error occured
2665 	 */
2666 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2667 	    cpunodes[CPU->cpu_id].ecache_linesize);
2668 	set_lsu(get_lsu() | cache_boot_state);
2669 	cpu_read_paddr(ecc, verbose, ce_err);
2670 	(void) check_ecc(ecc);
2671 
2672 	/* enable ECC error traps */
2673 	set_error_enable(EER_ENABLE);
2674 	kpreempt_enable();
2675 }
2676 
2677 /*
2678  * Check the AFSR bits for UE/CE persistence.
2679  * If UE or CE errors are detected, the routine will
2680  * clears all the AFSR sticky bits (except CP for
2681  * spitfire/blackbird) and the UDBs.
2682  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2683  */
2684 static int
2685 check_ecc(struct async_flt *ecc)
2686 {
2687 	uint64_t t_afsr;
2688 	uint64_t t_afar;
2689 	uint64_t udbh;
2690 	uint64_t udbl;
2691 	ushort_t udb;
2692 	int persistent = 0;
2693 
2694 	/*
2695 	 * Capture the AFSR, AFAR and UDBs info
2696 	 */
2697 	get_asyncflt(&t_afsr);
2698 	get_asyncaddr(&t_afar);
2699 	t_afar &= SABRE_AFAR_PA;
2700 	get_udb_errors(&udbh, &udbl);
2701 
2702 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2703 		/*
2704 		 * Clear the errors
2705 		 */
2706 		clr_datapath();
2707 
2708 		if (isus2i || isus2e)
2709 			set_asyncflt(t_afsr);
2710 		else
2711 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2712 
2713 		/*
2714 		 * determine whether to check UDBH or UDBL for persistence
2715 		 */
2716 		if (ecc->flt_synd & UDBL_REG) {
2717 			udb = (ushort_t)udbl;
2718 			t_afar |= 0x8;
2719 		} else {
2720 			udb = (ushort_t)udbh;
2721 		}
2722 
2723 		if (ce_debug || ue_debug) {
2724 			spitf_async_flt spf_flt; /* for logging */
2725 			struct async_flt *aflt =
2726 				(struct async_flt *)&spf_flt;
2727 
2728 			/* Package the info nicely in the spf_flt struct */
2729 			bzero(&spf_flt, sizeof (spitf_async_flt));
2730 			aflt->flt_stat = t_afsr;
2731 			aflt->flt_addr = t_afar;
2732 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2733 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2734 
2735 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2736 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2737 			    " check_ecc: Dumping captured error states ...");
2738 		}
2739 
2740 		/*
2741 		 * if the fault addresses don't match, not persistent
2742 		 */
2743 		if (t_afar != ecc->flt_addr) {
2744 			return (persistent);
2745 		}
2746 
2747 		/*
2748 		 * check for UE persistence
2749 		 * since all DIMMs in the bank are identified for a UE,
2750 		 * there's no reason to check the syndrome
2751 		 */
2752 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2753 			persistent = 1;
2754 		}
2755 
2756 		/*
2757 		 * check for CE persistence
2758 		 */
2759 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2760 			if ((udb & P_DER_E_SYND) ==
2761 			    (ecc->flt_synd & P_DER_E_SYND)) {
2762 				persistent = 1;
2763 			}
2764 		}
2765 	}
2766 	return (persistent);
2767 }
2768 
2769 #ifdef HUMMINGBIRD
2770 #define	HB_FULL_DIV		1
2771 #define	HB_HALF_DIV		2
2772 #define	HB_LOWEST_DIV		8
2773 #define	HB_ECLK_INVALID		0xdeadbad
2774 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2775 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2776 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2777 	HB_ECLK_8 };
2778 
2779 #define	HB_SLOW_DOWN		0
2780 #define	HB_SPEED_UP		1
2781 
2782 #define	SET_ESTAR_MODE(mode)					\
2783 	stdphysio(HB_ESTAR_MODE, (mode));			\
2784 	/*							\
2785 	 * PLL logic requires minimum of 16 clock		\
2786 	 * cycles to lock to the new clock speed.		\
2787 	 * Wait 1 usec to satisfy this requirement.		\
2788 	 */							\
2789 	drv_usecwait(1);
2790 
2791 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2792 {								\
2793 	volatile uint64_t data;					\
2794 	uint64_t count, new_count;				\
2795 	clock_t delay;						\
2796 	data = lddphysio(HB_MEM_CNTRL0);			\
2797 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2798 	    HB_REFRESH_COUNT_SHIFT;				\
2799 	new_count = (HB_REFRESH_INTERVAL *			\
2800 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2801 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2802 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2803 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2804 	stdphysio(HB_MEM_CNTRL0, data);				\
2805 	data = lddphysio(HB_MEM_CNTRL0);        		\
2806 	/*							\
2807 	 * If we are slowing down the cpu and Memory		\
2808 	 * Self Refresh is not enabled, it is required		\
2809 	 * to wait for old refresh count to count-down and	\
2810 	 * new refresh count to go into effect (let new value	\
2811 	 * counts down once).					\
2812 	 */							\
2813 	if ((direction) == HB_SLOW_DOWN &&			\
2814 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2815 		/*						\
2816 		 * Each count takes 64 cpu clock cycles		\
2817 		 * to decrement.  Wait for current refresh	\
2818 		 * count plus new refresh count at current	\
2819 		 * cpu speed to count down to zero.  Round	\
2820 		 * up the delay time.				\
2821 		 */						\
2822 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2823 		    (count + new_count) * MICROSEC * (cur_div)) /\
2824 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2825 		drv_usecwait(delay);				\
2826 	}							\
2827 }
2828 
2829 #define	SET_SELF_REFRESH(bit)					\
2830 {								\
2831 	volatile uint64_t data;					\
2832 	data = lddphysio(HB_MEM_CNTRL0);			\
2833 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2834 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2835 	stdphysio(HB_MEM_CNTRL0, data);				\
2836 	data = lddphysio(HB_MEM_CNTRL0);			\
2837 }
2838 #endif	/* HUMMINGBIRD */
2839 
2840 /* ARGSUSED */
2841 void
2842 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2843 {
2844 #ifdef HUMMINGBIRD
2845 	uint64_t cur_mask, cur_divisor = 0;
2846 	volatile uint64_t reg;
2847 	int index;
2848 
2849 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2850 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2851 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2852 		    new_divisor);
2853 		return;
2854 	}
2855 
2856 	reg = lddphysio(HB_ESTAR_MODE);
2857 	cur_mask = reg & HB_ECLK_MASK;
2858 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2859 		if (hb_eclk[index] == cur_mask) {
2860 			cur_divisor = index;
2861 			break;
2862 		}
2863 	}
2864 
2865 	if (cur_divisor == 0)
2866 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2867 		    "can't be determined!");
2868 
2869 	/*
2870 	 * If we are already at the requested divisor speed, just
2871 	 * return.
2872 	 */
2873 	if (cur_divisor == new_divisor)
2874 		return;
2875 
2876 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2877 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2878 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2879 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2880 
2881 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2882 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2883 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2884 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2885 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2886 
2887 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2888 		/*
2889 		 * Transition to 1/2 speed first, then to
2890 		 * lower speed.
2891 		 */
2892 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2893 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2894 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2895 
2896 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2897 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2898 
2899 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2900 		/*
2901 		 * Transition to 1/2 speed first, then to
2902 		 * full speed.
2903 		 */
2904 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2905 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2906 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2907 
2908 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2909 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2910 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2911 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2912 
2913 	} else if (cur_divisor < new_divisor) {
2914 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2915 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2916 
2917 	} else if (cur_divisor > new_divisor) {
2918 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2919 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2920 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2921 	}
2922 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2923 #endif
2924 }
2925 
2926 /*
2927  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2928  * we clear all the sticky bits. If a non-null pointer to a async fault
2929  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2930  * info will be returned in the structure.  If a non-null pointer to a
2931  * uint64_t is passed in, this will be updated if the CP bit is set in the
2932  * AFSR.  The afsr will be returned.
2933  */
2934 static uint64_t
2935 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2936 {
2937 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2938 	uint64_t afsr;
2939 	uint64_t udbh, udbl;
2940 
2941 	get_asyncflt(&afsr);
2942 
2943 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2944 		*acc_afsr |= afsr;
2945 
2946 	if (spf_flt != NULL) {
2947 		aflt->flt_stat = afsr;
2948 		get_asyncaddr(&aflt->flt_addr);
2949 		aflt->flt_addr &= SABRE_AFAR_PA;
2950 
2951 		get_udb_errors(&udbh, &udbl);
2952 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2953 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2954 	}
2955 
2956 	set_asyncflt(afsr);		/* clear afsr */
2957 	clr_datapath();			/* clear udbs */
2958 	return (afsr);
2959 }
2960 
2961 /*
2962  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2963  * tag of the first bad line will be returned. We also return the old-afsr
2964  * (before clearing the sticky bits). The linecnt data will be updated to
2965  * indicate the number of bad lines detected.
2966  */
2967 static void
2968 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2969 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2970 {
2971 	ec_data_t t_ecdata[8];
2972 	uint64_t t_etag, oafsr;
2973 	uint64_t pa = AFLT_INV_ADDR;
2974 	uint32_t i, j, ecache_sz;
2975 	uint64_t acc_afsr = 0;
2976 	uint64_t *cpu_afsr = NULL;
2977 
2978 	if (CPU_PRIVATE(CPU) != NULL)
2979 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2980 
2981 	*linecnt = 0;
2982 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2983 
2984 	for (i = 0; i < ecache_sz; i += 64) {
2985 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2986 		    cpu_afsr);
2987 		acc_afsr |= oafsr;
2988 
2989 		/*
2990 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2991 		 * looking for the first occurrence of an EDP error.  The AFSR
2992 		 * info is captured for each 8-byte chunk.  Note that for
2993 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2994 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2995 		 * for the high and low 8-byte words within the 16-byte chunk).
2996 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2997 		 * granularity and only PSYND bits [7:0] are used.
2998 		 */
2999 		for (j = 0; j < 8; j++) {
3000 			ec_data_t *ecdptr = &t_ecdata[j];
3001 
3002 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
3003 				uint64_t errpa;
3004 				ushort_t psynd;
3005 				uint32_t ec_set_size = ecache_sz /
3006 				    ecache_associativity;
3007 
3008 				/*
3009 				 * For Spitfire/Blackbird, we need to look at
3010 				 * the PSYND to make sure that this 8-byte chunk
3011 				 * is the right one.  PSYND bits [15:8] belong
3012 				 * to the upper 8-byte (even) chunk.  Bits
3013 				 * [7:0] belong to the lower 8-byte chunk (odd).
3014 				 */
3015 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3016 				if (!isus2i && !isus2e) {
3017 					if (j & 0x1)
3018 						psynd = psynd & 0xFF;
3019 					else
3020 						psynd = psynd >> 8;
3021 
3022 					if (!psynd)
3023 						continue; /* wrong chunk */
3024 				}
3025 
3026 				/* Construct the PA */
3027 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3028 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3029 				    ec_set_size);
3030 
3031 				/* clean up the cache line */
3032 				flushecacheline(P2ALIGN(errpa, 64),
3033 					cpunodes[CPU->cpu_id].ecache_size);
3034 
3035 				oafsr = clear_errors(NULL, cpu_afsr);
3036 				acc_afsr |= oafsr;
3037 
3038 				(*linecnt)++;
3039 
3040 				/*
3041 				 * Capture the PA for the first bad line found.
3042 				 * Return the ecache dump and tag info.
3043 				 */
3044 				if (pa == AFLT_INV_ADDR) {
3045 					int k;
3046 
3047 					pa = errpa;
3048 					for (k = 0; k < 8; k++)
3049 						ecache_data[k] = t_ecdata[k];
3050 					*ecache_tag = t_etag;
3051 				}
3052 				break;
3053 			}
3054 		}
3055 	}
3056 	*t_afar = pa;
3057 	*t_afsr = acc_afsr;
3058 }
3059 
3060 static void
3061 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3062 {
3063 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3064 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3065 	char linestr[30];
3066 	char *state_str;
3067 	int i;
3068 
3069 	/*
3070 	 * Check the ecache tag to make sure it
3071 	 * is valid. If invalid, a memory dump was
3072 	 * captured instead of a ecache dump.
3073 	 */
3074 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3075 		uchar_t eparity = (uchar_t)
3076 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3077 
3078 		uchar_t estate = (uchar_t)
3079 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3080 
3081 		if (estate == cpu_ec_state_shr)
3082 			state_str = "Shared";
3083 		else if (estate == cpu_ec_state_exl)
3084 			state_str = "Exclusive";
3085 		else if (estate == cpu_ec_state_own)
3086 			state_str = "Owner";
3087 		else if (estate == cpu_ec_state_mod)
3088 			state_str = "Modified";
3089 		else
3090 			state_str = "Invalid";
3091 
3092 		if (spf_flt->flt_ec_lcnt > 1) {
3093 			(void) snprintf(linestr, sizeof (linestr),
3094 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3095 		} else {
3096 			linestr[0] = '\0';
3097 		}
3098 
3099 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3100 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3101 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3102 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3103 		    (uint32_t)ecache_tag, state_str,
3104 		    (uint32_t)eparity, linestr);
3105 	} else {
3106 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3107 		    " E$tag != PA from AFAR; E$line was victimized"
3108 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3109 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3110 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3111 	}
3112 
3113 	/*
3114 	 * Dump out all 8 8-byte ecache data captured
3115 	 * For each 8-byte data captured, we check the
3116 	 * captured afsr's parity syndrome to find out
3117 	 * which 8-byte chunk is bad. For memory dump, the
3118 	 * AFSR values were initialized to 0.
3119 	 */
3120 	for (i = 0; i < 8; i++) {
3121 		ec_data_t *ecdptr;
3122 		uint_t offset;
3123 		ushort_t psynd;
3124 		ushort_t bad;
3125 		uint64_t edp;
3126 
3127 		offset = i << 3;	/* multiply by 8 */
3128 		ecdptr = &spf_flt->flt_ec_data[i];
3129 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3130 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3131 
3132 		/*
3133 		 * For Sabre/Hummingbird, parity synd is captured only
3134 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3135 		 * For spitfire/blackbird, AFSR.PSYND is captured
3136 		 * in 16-byte granularity. [15:8] represent
3137 		 * the upper 8 byte and [7:0] the lower 8 byte.
3138 		 */
3139 		if (isus2i || isus2e || (i & 0x1))
3140 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3141 		else
3142 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3143 
3144 		if (bad && edp) {
3145 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3146 			    " E$Data (0x%02x): 0x%08x.%08x "
3147 			    "*Bad* PSYND=0x%04x", offset,
3148 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3149 			    (uint32_t)ecdptr->ec_d8, psynd);
3150 		} else {
3151 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3152 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3153 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3154 			    (uint32_t)ecdptr->ec_d8);
3155 		}
3156 	}
3157 }
3158 
3159 /*
3160  * Common logging function for all cpu async errors.  This function allows the
3161  * caller to generate a single cmn_err() call that logs the appropriate items
3162  * from the fault structure, and implements our rules for AFT logging levels.
3163  *
3164  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3165  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3166  *	spflt: pointer to spitfire async fault structure
3167  *	logflags: bitflags indicating what to output
3168  *	endstr: a end string to appear at the end of this log
3169  *	fmt: a format string to appear at the beginning of the log
3170  *
3171  * The logflags allows the construction of predetermined output from the spflt
3172  * structure.  The individual data items always appear in a consistent order.
3173  * Note that either or both of the spflt structure pointer and logflags may be
3174  * NULL or zero respectively, indicating that the predetermined output
3175  * substrings are not requested in this log.  The output looks like this:
3176  *
3177  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3178  *	<CPU_SPACE><CPU_ERRID>
3179  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3180  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3181  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3182  *	newline+4spaces<CPU_SYND>
3183  *	newline+4spaces<endstr>
3184  *
3185  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3186  * it is assumed that <endstr> will be the unum string in this case.  The size
3187  * of our intermediate formatting buf[] is based on the worst case of all flags
3188  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3189  * formatting so we don't need additional stack space to format them here.
3190  */
3191 /*PRINTFLIKE6*/
3192 static void
3193 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3194 	const char *endstr, const char *fmt, ...)
3195 {
3196 	struct async_flt *aflt = (struct async_flt *)spflt;
3197 	char buf[400], *p, *q; /* see comments about buf[] size above */
3198 	va_list ap;
3199 	int console_log_flag;
3200 
3201 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3202 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3203 	    (aflt->flt_panic)) {
3204 		console_log_flag = (tagnum < 2) || aft_verbose;
3205 	} else {
3206 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3207 		    (aflt->flt_stat & P_AFSR_CE)) ?
3208 		    ce_verbose_memory : ce_verbose_other;
3209 
3210 		if (!verbose)
3211 			return;
3212 
3213 		console_log_flag = (verbose > 1);
3214 	}
3215 
3216 	if (console_log_flag)
3217 		(void) sprintf(buf, "[AFT%d]", tagnum);
3218 	else
3219 		(void) sprintf(buf, "![AFT%d]", tagnum);
3220 
3221 	p = buf + strlen(buf);	/* current buffer position */
3222 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3223 
3224 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3225 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3226 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3227 		p += strlen(p);
3228 	}
3229 
3230 	/*
3231 	 * Copy the caller's format string verbatim into buf[].  It will be
3232 	 * formatted by the call to vcmn_err() at the end of this function.
3233 	 */
3234 	if (fmt != NULL && p < q) {
3235 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3236 		buf[sizeof (buf) - 1] = '\0';
3237 		p += strlen(p);
3238 	}
3239 
3240 	if (spflt != NULL) {
3241 		if (logflags & CPU_FLTCPU) {
3242 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3243 			    aflt->flt_inst);
3244 			p += strlen(p);
3245 		}
3246 
3247 		if (logflags & CPU_SPACE) {
3248 			if (aflt->flt_status & ECC_D_TRAP)
3249 				(void) snprintf(p, (size_t)(q - p),
3250 				    " Data access");
3251 			else if (aflt->flt_status & ECC_I_TRAP)
3252 				(void) snprintf(p, (size_t)(q - p),
3253 				    " Instruction access");
3254 			p += strlen(p);
3255 		}
3256 
3257 		if (logflags & CPU_TL) {
3258 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3259 			    aflt->flt_tl ? ">0" : "=0");
3260 			p += strlen(p);
3261 		}
3262 
3263 		if (logflags & CPU_ERRID) {
3264 			(void) snprintf(p, (size_t)(q - p),
3265 			    ", errID 0x%08x.%08x",
3266 			    (uint32_t)(aflt->flt_id >> 32),
3267 			    (uint32_t)aflt->flt_id);
3268 			p += strlen(p);
3269 		}
3270 
3271 		if (logflags & CPU_AFSR) {
3272 			(void) snprintf(p, (size_t)(q - p),
3273 			    "\n    AFSR 0x%08b.%08b",
3274 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3275 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3276 			p += strlen(p);
3277 		}
3278 
3279 		if (logflags & CPU_AFAR) {
3280 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3281 			    (uint32_t)(aflt->flt_addr >> 32),
3282 			    (uint32_t)aflt->flt_addr);
3283 			p += strlen(p);
3284 		}
3285 
3286 		if (logflags & CPU_AF_PSYND) {
3287 			ushort_t psynd = (ushort_t)
3288 			    (aflt->flt_stat & P_AFSR_P_SYND);
3289 
3290 			(void) snprintf(p, (size_t)(q - p),
3291 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3292 			    psynd, ecc_psynd_score(psynd));
3293 			p += strlen(p);
3294 		}
3295 
3296 		if (logflags & CPU_AF_ETS) {
3297 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3298 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3299 			p += strlen(p);
3300 		}
3301 
3302 		if (logflags & CPU_FAULTPC) {
3303 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3304 			    (void *)aflt->flt_pc);
3305 			p += strlen(p);
3306 		}
3307 
3308 		if (logflags & CPU_UDBH) {
3309 			(void) snprintf(p, (size_t)(q - p),
3310 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3311 			    spflt->flt_sdbh, UDB_FMTSTR,
3312 			    spflt->flt_sdbh & 0xFF);
3313 			p += strlen(p);
3314 		}
3315 
3316 		if (logflags & CPU_UDBL) {
3317 			(void) snprintf(p, (size_t)(q - p),
3318 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3319 			    spflt->flt_sdbl, UDB_FMTSTR,
3320 			    spflt->flt_sdbl & 0xFF);
3321 			p += strlen(p);
3322 		}
3323 
3324 		if (logflags & CPU_SYND) {
3325 			ushort_t synd = SYND(aflt->flt_synd);
3326 
3327 			(void) snprintf(p, (size_t)(q - p),
3328 			    "\n    %s Syndrome 0x%x Memory Module ",
3329 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3330 			p += strlen(p);
3331 		}
3332 	}
3333 
3334 	if (endstr != NULL) {
3335 		if (!(logflags & CPU_SYND))
3336 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3337 		else
3338 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3339 		p += strlen(p);
3340 	}
3341 
3342 	if (ce_code == CE_CONT && (p < q - 1))
3343 		(void) strcpy(p, "\n"); /* add final \n if needed */
3344 
3345 	va_start(ap, fmt);
3346 	vcmn_err(ce_code, buf, ap);
3347 	va_end(ap);
3348 }
3349 
3350 /*
3351  * Ecache Scrubbing
3352  *
3353  * The basic idea is to prevent lines from sitting in the ecache long enough
3354  * to build up soft errors which can lead to ecache parity errors.
3355  *
3356  * The following rules are observed when flushing the ecache:
3357  *
3358  * 1. When the system is busy, flush bad clean lines
3359  * 2. When the system is idle, flush all clean lines
3360  * 3. When the system is idle, flush good dirty lines
3361  * 4. Never flush bad dirty lines.
3362  *
3363  *	modify	parity	busy   idle
3364  *	----------------------------
3365  *	clean	good		X
3366  * 	clean	bad	X	X
3367  * 	dirty	good		X
3368  *	dirty	bad
3369  *
3370  * Bad or good refers to whether a line has an E$ parity error or not.
3371  * Clean or dirty refers to the state of the modified bit.  We currently
3372  * default the scan rate to 100 (scan 10% of the cache per second).
3373  *
3374  * The following are E$ states and actions.
3375  *
3376  * We encode our state as a 3-bit number, consisting of:
3377  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3378  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3379  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3380  *
3381  * We associate a flushing and a logging action with each state.
3382  *
3383  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3384  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3385  * E$ only, in addition to value being set by ec_flush.
3386  */
3387 
3388 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3389 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3390 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3391 
3392 struct {
3393 	char	ec_flush;		/* whether to flush or not */
3394 	char	ec_log;			/* ecache logging */
3395 	char	ec_log_type;		/* log type info */
3396 } ec_action[] = {	/* states of the E$ line in M P B */
3397 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3398 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3399 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3400 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3401 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3402 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3403 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3404 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3405 };
3406 
3407 /*
3408  * Offsets into the ec_action[] that determines clean_good_busy and
3409  * dirty_good_busy lines.
3410  */
3411 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3412 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3413 
3414 /*
3415  * We are flushing lines which are Clean_Good_Busy and also the lines
3416  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3417  */
3418 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3419 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3420 
3421 #define	ECACHE_STATE_MODIFIED	0x4
3422 #define	ECACHE_STATE_PARITY	0x2
3423 #define	ECACHE_STATE_BUSY	0x1
3424 
3425 /*
3426  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3427  */
3428 int ecache_calls_a_sec_mirrored = 1;
3429 int ecache_lines_per_call_mirrored = 1;
3430 
3431 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3432 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3433 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3434 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3435 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3436 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3437 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3438 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3439 
3440 volatile int ec_timeout_calls = 1;	/* timeout calls */
3441 
3442 /*
3443  * Interrupt number and pil for ecache scrubber cross-trap calls.
3444  */
3445 static uint_t ecache_scrub_inum;
3446 uint_t ecache_scrub_pil = PIL_9;
3447 
3448 /*
3449  * Kstats for the E$ scrubber.
3450  */
3451 typedef struct ecache_kstat {
3452 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3453 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3454 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3455 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3456 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3457 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3458 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3459 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3460 	kstat_named_t invalid_lines;		/* # of invalid lines */
3461 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3462 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3463 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3464 } ecache_kstat_t;
3465 
3466 static ecache_kstat_t ec_kstat_template = {
3467 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3468 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3469 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3470 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3471 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3472 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3473 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3474 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3475 	{ "invalid_lines", KSTAT_DATA_ULONG },
3476 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3477 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3478 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3479 };
3480 
3481 struct kmem_cache *sf_private_cache;
3482 
3483 /*
3484  * Called periodically on each CPU to scan the ecache once a sec.
3485  * adjusting the ecache line index appropriately
3486  */
3487 void
3488 scrub_ecache_line()
3489 {
3490 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3491 	int cpuid = CPU->cpu_id;
3492 	uint32_t index = ssmp->ecache_flush_index;
3493 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3494 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3495 	int nlines = ssmp->ecache_nlines;
3496 	uint32_t ec_set_size = ec_size / ecache_associativity;
3497 	int ec_mirror = ssmp->ecache_mirror;
3498 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3499 
3500 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3501 	int mpb;		/* encode Modified, Parity, Busy for action */
3502 	uchar_t state;
3503 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3504 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3505 	ec_data_t ec_data[8];
3506 	kstat_named_t *ec_knp;
3507 
3508 	switch (ec_mirror) {
3509 		default:
3510 		case ECACHE_CPU_NON_MIRROR:
3511 			/*
3512 			 * The E$ scan rate is expressed in units of tenths of
3513 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3514 			 * whole cache is scanned every second.
3515 			 */
3516 			scan_lines = (nlines * ecache_scan_rate) /
3517 					(1000 * ecache_calls_a_sec);
3518 			if (!(ssmp->ecache_busy)) {
3519 				if (ecache_idle_factor > 0) {
3520 					scan_lines *= ecache_idle_factor;
3521 				}
3522 			} else {
3523 				flush_clean_busy = (scan_lines *
3524 					ecache_flush_clean_good_busy) / 100;
3525 				flush_dirty_busy = (scan_lines *
3526 					ecache_flush_dirty_good_busy) / 100;
3527 			}
3528 
3529 			ec_timeout_calls = (ecache_calls_a_sec ?
3530 						ecache_calls_a_sec : 1);
3531 			break;
3532 
3533 		case ECACHE_CPU_MIRROR:
3534 			scan_lines = ecache_lines_per_call_mirrored;
3535 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3536 					ecache_calls_a_sec_mirrored : 1);
3537 			break;
3538 	}
3539 
3540 	/*
3541 	 * The ecache scrubber algorithm operates by reading and
3542 	 * decoding the E$ tag to determine whether the corresponding E$ line
3543 	 * can be scrubbed. There is a implicit assumption in the scrubber
3544 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3545 	 * flawed since the E$ tag may also be corrupted and have parity errors
3546 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3547 	 * before scrubbing. When a parity error is detected in the E$ tag,
3548 	 * it is possible to recover and scrub the tag under certain conditions
3549 	 * so that a ETP error condition can be avoided.
3550 	 */
3551 
3552 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3553 		/*
3554 		 * We get the old-AFSR before clearing the AFSR sticky bits
3555 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3556 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3557 		 */
3558 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3559 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3560 				cpu_ec_state_shift);
3561 
3562 		/*
3563 		 * ETP is set try to scrub the ecache tag.
3564 		 */
3565 		if (nafsr & P_AFSR_ETP) {
3566 			ecache_scrub_tag_err(nafsr, state, index);
3567 		} else if (state & cpu_ec_state_valid) {
3568 			/*
3569 			 * ETP is not set, E$ tag is valid.
3570 			 * Proceed with the E$ scrubbing.
3571 			 */
3572 			if (state & cpu_ec_state_dirty)
3573 				mpb |= ECACHE_STATE_MODIFIED;
3574 
3575 			tafsr = check_ecache_line(index, acc_afsr);
3576 
3577 			if (tafsr & P_AFSR_EDP) {
3578 				mpb |= ECACHE_STATE_PARITY;
3579 
3580 				if (ecache_scrub_verbose ||
3581 							ecache_scrub_panic) {
3582 					get_ecache_dtag(P2ALIGN(index, 64),
3583 						(uint64_t *)&ec_data[0],
3584 						&ec_tag, &oafsr, acc_afsr);
3585 				}
3586 			}
3587 
3588 			if (ssmp->ecache_busy)
3589 				mpb |= ECACHE_STATE_BUSY;
3590 
3591 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3592 			ec_knp->value.ul++;
3593 
3594 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3595 				cpu_ec_tag_shift) | (index % ec_set_size);
3596 
3597 			/*
3598 			 * We flush the E$ lines depending on the ec_flush,
3599 			 * we additionally flush clean_good_busy and
3600 			 * dirty_good_busy lines for mirrored E$.
3601 			 */
3602 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3603 				flushecacheline(paddr, ec_size);
3604 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3605 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3606 					flushecacheline(paddr, ec_size);
3607 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3608 				softcall(ecache_page_retire, (void *)paddr);
3609 			}
3610 
3611 			/*
3612 			 * Conditionally flush both the clean_good and
3613 			 * dirty_good lines when busy.
3614 			 */
3615 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3616 				flush_clean_busy--;
3617 				flushecacheline(paddr, ec_size);
3618 				ec_ksp->clean_good_busy_flush.value.ul++;
3619 			} else if (DGB(mpb, ec_mirror) &&
3620 						(flush_dirty_busy > 0)) {
3621 				flush_dirty_busy--;
3622 				flushecacheline(paddr, ec_size);
3623 				ec_ksp->dirty_good_busy_flush.value.ul++;
3624 			}
3625 
3626 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3627 						ecache_scrub_panic)) {
3628 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3629 						tafsr);
3630 			}
3631 
3632 		} else {
3633 			ec_ksp->invalid_lines.value.ul++;
3634 		}
3635 
3636 		if ((index += ec_linesize) >= ec_size)
3637 			index = 0;
3638 
3639 	}
3640 
3641 	/*
3642 	 * set the ecache scrub index for the next time around
3643 	 */
3644 	ssmp->ecache_flush_index = index;
3645 
3646 	if (*acc_afsr & P_AFSR_CP) {
3647 		uint64_t ret_afsr;
3648 
3649 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3650 		if ((ret_afsr & P_AFSR_CP) == 0)
3651 			*acc_afsr = 0;
3652 	}
3653 }
3654 
3655 /*
3656  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3657  * we decrement the outstanding request count to zero.
3658  */
3659 
3660 /*ARGSUSED*/
3661 uint_t
3662 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3663 {
3664 	int i;
3665 	int outstanding;
3666 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3667 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3668 
3669 	do {
3670 		outstanding = *countp;
3671 		ASSERT(outstanding > 0);
3672 		for (i = 0; i < outstanding; i++)
3673 			scrub_ecache_line();
3674 	} while (atomic_add_32_nv(countp, -outstanding));
3675 
3676 	return (DDI_INTR_CLAIMED);
3677 }
3678 
3679 /*
3680  * force each cpu to perform an ecache scrub, called from a timeout
3681  */
3682 extern xcfunc_t ecache_scrubreq_tl1;
3683 
3684 void
3685 do_scrub_ecache_line(void)
3686 {
3687 	long delta;
3688 
3689 	if (ecache_calls_a_sec > hz)
3690 		ecache_calls_a_sec = hz;
3691 	else if (ecache_calls_a_sec <= 0)
3692 	    ecache_calls_a_sec = 1;
3693 
3694 	if (ecache_calls_a_sec_mirrored > hz)
3695 		ecache_calls_a_sec_mirrored = hz;
3696 	else if (ecache_calls_a_sec_mirrored <= 0)
3697 	    ecache_calls_a_sec_mirrored = 1;
3698 
3699 	if (ecache_scrub_enable) {
3700 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3701 		delta = hz / ec_timeout_calls;
3702 	} else {
3703 		delta = hz;
3704 	}
3705 
3706 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3707 		delta);
3708 }
3709 
3710 /*
3711  * initialization for ecache scrubbing
3712  * This routine is called AFTER all cpus have had cpu_init_private called
3713  * to initialize their private data areas.
3714  */
3715 void
3716 cpu_init_cache_scrub(void)
3717 {
3718 	if (ecache_calls_a_sec > hz) {
3719 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3720 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3721 		ecache_calls_a_sec = hz;
3722 	}
3723 
3724 	/*
3725 	 * Register softint for ecache scrubbing.
3726 	 */
3727 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3728 	    scrub_ecache_line_intr, NULL);
3729 
3730 	/*
3731 	 * kick off the scrubbing using realtime timeout
3732 	 */
3733 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3734 	    hz / ecache_calls_a_sec);
3735 }
3736 
3737 /*
3738  * Unset the busy flag for this cpu.
3739  */
3740 void
3741 cpu_idle_ecache_scrub(struct cpu *cp)
3742 {
3743 	if (CPU_PRIVATE(cp) != NULL) {
3744 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3745 							sfpr_scrub_misc);
3746 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3747 	}
3748 }
3749 
3750 /*
3751  * Set the busy flag for this cpu.
3752  */
3753 void
3754 cpu_busy_ecache_scrub(struct cpu *cp)
3755 {
3756 	if (CPU_PRIVATE(cp) != NULL) {
3757 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3758 							sfpr_scrub_misc);
3759 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3760 	}
3761 }
3762 
3763 /*
3764  * initialize the ecache scrubber data structures
3765  * The global entry point cpu_init_private replaces this entry point.
3766  *
3767  */
3768 static void
3769 cpu_init_ecache_scrub_dr(struct cpu *cp)
3770 {
3771 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3772 	int cpuid = cp->cpu_id;
3773 
3774 	/*
3775 	 * intialize bookkeeping for cache scrubbing
3776 	 */
3777 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3778 
3779 	ssmp->ecache_flush_index = 0;
3780 
3781 	ssmp->ecache_nlines =
3782 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3783 
3784 	/*
3785 	 * Determine whether we are running on mirrored SRAM
3786 	 */
3787 
3788 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3789 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3790 	else
3791 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3792 
3793 	cpu_busy_ecache_scrub(cp);
3794 
3795 	/*
3796 	 * initialize the kstats
3797 	 */
3798 	ecache_kstat_init(cp);
3799 }
3800 
3801 /*
3802  * uninitialize the ecache scrubber data structures
3803  * The global entry point cpu_uninit_private replaces this entry point.
3804  */
3805 static void
3806 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3807 {
3808 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3809 
3810 	if (ssmp->ecache_ksp != NULL) {
3811 		kstat_delete(ssmp->ecache_ksp);
3812 		ssmp->ecache_ksp = NULL;
3813 	}
3814 
3815 	/*
3816 	 * un-initialize bookkeeping for cache scrubbing
3817 	 */
3818 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3819 
3820 	cpu_idle_ecache_scrub(cp);
3821 }
3822 
3823 struct kmem_cache *sf_private_cache;
3824 
3825 /*
3826  * Cpu private initialization.  This includes allocating the cpu_private
3827  * data structure, initializing it, and initializing the scrubber for this
3828  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3829  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3830  * We use kmem_cache_create for the spitfire private data structure because it
3831  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3832  */
3833 void
3834 cpu_init_private(struct cpu *cp)
3835 {
3836 	spitfire_private_t *sfprp;
3837 
3838 	ASSERT(CPU_PRIVATE(cp) == NULL);
3839 
3840 	/*
3841 	 * If the sf_private_cache has not been created, create it.
3842 	 */
3843 	if (sf_private_cache == NULL) {
3844 		sf_private_cache = kmem_cache_create("sf_private_cache",
3845 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3846 			NULL, NULL, NULL, NULL, 0);
3847 		ASSERT(sf_private_cache);
3848 	}
3849 
3850 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3851 
3852 	bzero(sfprp, sizeof (spitfire_private_t));
3853 
3854 	cpu_init_ecache_scrub_dr(cp);
3855 }
3856 
3857 /*
3858  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3859  * deallocate the scrubber data structures and cpu_private data structure.
3860  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3861  * the scrubber for the specified cpu.
3862  */
3863 void
3864 cpu_uninit_private(struct cpu *cp)
3865 {
3866 	ASSERT(CPU_PRIVATE(cp));
3867 
3868 	cpu_uninit_ecache_scrub_dr(cp);
3869 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3870 	CPU_PRIVATE(cp) = NULL;
3871 }
3872 
3873 /*
3874  * initialize the ecache kstats for each cpu
3875  */
3876 static void
3877 ecache_kstat_init(struct cpu *cp)
3878 {
3879 	struct kstat *ksp;
3880 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3881 
3882 	ASSERT(ssmp != NULL);
3883 
3884 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3885 	    KSTAT_TYPE_NAMED,
3886 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3887 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3888 		ssmp->ecache_ksp = NULL;
3889 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3890 		return;
3891 	}
3892 
3893 	ssmp->ecache_ksp = ksp;
3894 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3895 	kstat_install(ksp);
3896 }
3897 
3898 /*
3899  * log the bad ecache information
3900  */
3901 static void
3902 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3903 		uint64_t afsr)
3904 {
3905 	spitf_async_flt spf_flt;
3906 	struct async_flt *aflt;
3907 	int i;
3908 	char *class;
3909 
3910 	bzero(&spf_flt, sizeof (spitf_async_flt));
3911 	aflt = &spf_flt.cmn_asyncflt;
3912 
3913 	for (i = 0; i < 8; i++) {
3914 		spf_flt.flt_ec_data[i] = ec_data[i];
3915 	}
3916 
3917 	spf_flt.flt_ec_tag = ec_tag;
3918 
3919 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3920 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3921 	} else spf_flt.flt_type = (ushort_t)mpb;
3922 
3923 	aflt->flt_inst = CPU->cpu_id;
3924 	aflt->flt_class = CPU_FAULT;
3925 	aflt->flt_id = gethrtime_waitfree();
3926 	aflt->flt_addr = paddr;
3927 	aflt->flt_stat = afsr;
3928 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3929 
3930 	switch (mpb) {
3931 	case CPU_ECACHE_TAG_ERR:
3932 	case CPU_ECACHE_ADDR_PAR_ERR:
3933 	case CPU_ECACHE_ETP_ETS_ERR:
3934 	case CPU_ECACHE_STATE_ERR:
3935 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3936 		break;
3937 	default:
3938 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3939 		break;
3940 	}
3941 
3942 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3943 	    ue_queue, aflt->flt_panic);
3944 
3945 	if (aflt->flt_panic)
3946 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3947 					"line detected");
3948 }
3949 
3950 /*
3951  * Process an ecache error that occured during the E$ scrubbing.
3952  * We do the ecache scan to find the bad line, flush the bad line
3953  * and start the memscrubber to find any UE (in memory or in another cache)
3954  */
3955 static uint64_t
3956 ecache_scrub_misc_err(int type, uint64_t afsr)
3957 {
3958 	spitf_async_flt spf_flt;
3959 	struct async_flt *aflt;
3960 	uint64_t oafsr;
3961 
3962 	bzero(&spf_flt, sizeof (spitf_async_flt));
3963 	aflt = &spf_flt.cmn_asyncflt;
3964 
3965 	/*
3966 	 * Scan each line in the cache to look for the one
3967 	 * with bad parity
3968 	 */
3969 	aflt->flt_addr = AFLT_INV_ADDR;
3970 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3971 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3972 
3973 	if (oafsr & P_AFSR_CP) {
3974 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3975 		*cp_afsr |= oafsr;
3976 	}
3977 
3978 	/*
3979 	 * If we found a bad PA, update the state to indicate if it is
3980 	 * memory or I/O space.
3981 	 */
3982 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3983 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3984 			MMU_PAGESHIFT)) ? 1 : 0;
3985 	}
3986 
3987 	spf_flt.flt_type = (ushort_t)type;
3988 
3989 	aflt->flt_inst = CPU->cpu_id;
3990 	aflt->flt_class = CPU_FAULT;
3991 	aflt->flt_id = gethrtime_waitfree();
3992 	aflt->flt_status = afsr;
3993 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3994 
3995 	/*
3996 	 * We have the bad line, flush that line and start
3997 	 * the memscrubber.
3998 	 */
3999 	if (spf_flt.flt_ec_lcnt > 0) {
4000 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
4001 			cpunodes[CPU->cpu_id].ecache_size);
4002 		read_all_memscrub = 1;
4003 		memscrub_run();
4004 	}
4005 
4006 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
4007 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
4008 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
4009 
4010 	return (oafsr);
4011 }
4012 
4013 static void
4014 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
4015 {
4016 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
4017 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
4018 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
4019 	uint64_t ec_tag, paddr, oafsr;
4020 	ec_data_t ec_data[8];
4021 	int cpuid = CPU->cpu_id;
4022 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4023 						ecache_associativity;
4024 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4025 
4026 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4027 			&oafsr, cpu_afsr);
4028 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4029 						(index % ec_set_size);
4030 
4031 	/*
4032 	 * E$ tag state has good parity
4033 	 */
4034 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4035 		if (afsr_ets & cpu_ec_parity) {
4036 			/*
4037 			 * E$ tag state bits indicate the line is clean,
4038 			 * invalidate the E$ tag and continue.
4039 			 */
4040 			if (!(state & cpu_ec_state_dirty)) {
4041 				/*
4042 				 * Zero the tag and mark the state invalid
4043 				 * with good parity for the tag.
4044 				 */
4045 				if (isus2i || isus2e)
4046 					write_hb_ec_tag_parity(index);
4047 				else
4048 					write_ec_tag_parity(index);
4049 
4050 				/* Sync with the dual tag */
4051 				flushecacheline(0,
4052 					cpunodes[CPU->cpu_id].ecache_size);
4053 				ec_ksp->tags_cleared.value.ul++;
4054 				ecache_scrub_log(ec_data, ec_tag, paddr,
4055 					CPU_ECACHE_TAG_ERR, afsr);
4056 				return;
4057 			} else {
4058 				ecache_scrub_log(ec_data, ec_tag, paddr,
4059 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4060 				cmn_err(CE_PANIC, " E$ tag address has bad"
4061 							" parity");
4062 			}
4063 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4064 			/*
4065 			 * ETS is zero but ETP is set
4066 			 */
4067 			ecache_scrub_log(ec_data, ec_tag, paddr,
4068 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4069 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4070 				" AFSR.ETS is zero");
4071 		}
4072 	} else {
4073 		/*
4074 		 * E$ tag state bit has a bad parity
4075 		 */
4076 		ecache_scrub_log(ec_data, ec_tag, paddr,
4077 				CPU_ECACHE_STATE_ERR, afsr);
4078 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4079 	}
4080 }
4081 
4082 static void
4083 ecache_page_retire(void *arg)
4084 {
4085 	uint64_t paddr = (uint64_t)arg;
4086 	(void) page_retire(paddr, PR_UE);
4087 }
4088 
4089 void
4090 sticksync_slave(void)
4091 {}
4092 
4093 void
4094 sticksync_master(void)
4095 {}
4096 
4097 /*ARGSUSED*/
4098 void
4099 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4100 {}
4101 
4102 void
4103 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4104 {
4105 	int status;
4106 	ddi_fm_error_t de;
4107 
4108 	bzero(&de, sizeof (ddi_fm_error_t));
4109 
4110 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4111 	    FM_ENA_FMT1);
4112 	de.fme_flag = expected;
4113 	de.fme_bus_specific = (void *)aflt->flt_addr;
4114 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4115 
4116 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4117 		aflt->flt_panic = 1;
4118 }
4119 
4120 /*ARGSUSED*/
4121 void
4122 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4123     errorq_t *eqp, uint_t flag)
4124 {
4125 	struct async_flt *aflt = (struct async_flt *)payload;
4126 
4127 	aflt->flt_erpt_class = error_class;
4128 	errorq_dispatch(eqp, payload, payload_sz, flag);
4129 }
4130 
4131 #define	MAX_SIMM	8
4132 
4133 struct ce_info {
4134 	char    name[UNUM_NAMLEN];
4135 	uint64_t intermittent_total;
4136 	uint64_t persistent_total;
4137 	uint64_t sticky_total;
4138 	unsigned short leaky_bucket_cnt;
4139 };
4140 
4141 /*
4142  * Separately-defined structure for use in reporting the ce_info
4143  * to SunVTS without exposing the internal layout and implementation
4144  * of struct ce_info.
4145  */
4146 static struct ecc_error_info ecc_error_info_data = {
4147 	{ "version", KSTAT_DATA_UINT32 },
4148 	{ "maxcount", KSTAT_DATA_UINT32 },
4149 	{ "count", KSTAT_DATA_UINT32 }
4150 };
4151 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4152     sizeof (struct kstat_named);
4153 
4154 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4155 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4156 #endif
4157 
4158 struct ce_info  *mem_ce_simm = NULL;
4159 size_t mem_ce_simm_size = 0;
4160 
4161 /*
4162  * Default values for the number of CE's allowed per interval.
4163  * Interval is defined in minutes
4164  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4165  */
4166 #define	SOFTERR_LIMIT_DEFAULT		2
4167 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4168 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4169 #define	TIMEOUT_NONE			((timeout_id_t)0)
4170 #define	TIMEOUT_SET			((timeout_id_t)1)
4171 
4172 /*
4173  * timeout identifer for leaky_bucket
4174  */
4175 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4176 
4177 /*
4178  * Tunables for maximum number of allowed CE's in a given time
4179  */
4180 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4181 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4182 
4183 void
4184 cpu_mp_init(void)
4185 {
4186 	size_t size = cpu_aflt_size();
4187 	size_t i;
4188 	kstat_t *ksp;
4189 
4190 	/*
4191 	 * Initialize the CE error handling buffers.
4192 	 */
4193 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4194 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4195 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4196 
4197 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4198 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4199 	if (ksp != NULL) {
4200 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4201 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4202 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4203 		ecc_error_info_data.count.value.ui32 = 0;
4204 		kstat_install(ksp);
4205 	}
4206 
4207 	for (i = 0; i < mem_ce_simm_size; i++) {
4208 		struct kstat_ecc_mm_info *kceip;
4209 
4210 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4211 		    KM_SLEEP);
4212 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4213 		    KSTAT_TYPE_NAMED,
4214 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4215 		    KSTAT_FLAG_VIRTUAL);
4216 		if (ksp != NULL) {
4217 			/*
4218 			 * Re-declare ks_data_size to include room for the
4219 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4220 			 * set.
4221 			 */
4222 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4223 			    KSTAT_CE_UNUM_NAMLEN;
4224 			ksp->ks_data = kceip;
4225 			kstat_named_init(&kceip->name,
4226 			    "name", KSTAT_DATA_STRING);
4227 			kstat_named_init(&kceip->intermittent_total,
4228 			    "intermittent_total", KSTAT_DATA_UINT64);
4229 			kstat_named_init(&kceip->persistent_total,
4230 			    "persistent_total", KSTAT_DATA_UINT64);
4231 			kstat_named_init(&kceip->sticky_total,
4232 			    "sticky_total", KSTAT_DATA_UINT64);
4233 			/*
4234 			 * Use the default snapshot routine as it knows how to
4235 			 * deal with named kstats with long strings.
4236 			 */
4237 			ksp->ks_update = ecc_kstat_update;
4238 			kstat_install(ksp);
4239 		} else {
4240 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4241 		}
4242 	}
4243 }
4244 
4245 /*ARGSUSED*/
4246 static void
4247 leaky_bucket_timeout(void *arg)
4248 {
4249 	int i;
4250 	struct ce_info *psimm = mem_ce_simm;
4251 
4252 	for (i = 0; i < mem_ce_simm_size; i++) {
4253 		if (psimm[i].leaky_bucket_cnt > 0)
4254 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4255 	}
4256 	add_leaky_bucket_timeout();
4257 }
4258 
4259 static void
4260 add_leaky_bucket_timeout(void)
4261 {
4262 	long timeout_in_microsecs;
4263 
4264 	/*
4265 	 * create timeout for next leak.
4266 	 *
4267 	 * The timeout interval is calculated as follows
4268 	 *
4269 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4270 	 *
4271 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4272 	 * in a minute), then multiply this by MICROSEC to get the interval
4273 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4274 	 * the timeout interval is accurate to within a few microseconds.
4275 	 */
4276 
4277 	if (ecc_softerr_limit <= 0)
4278 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4279 	if (ecc_softerr_interval <= 0)
4280 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4281 
4282 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4283 	    ecc_softerr_limit;
4284 
4285 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4286 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4287 
4288 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4289 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4290 }
4291 
4292 /*
4293  * Legacy Correctable ECC Error Hash
4294  *
4295  * All of the code below this comment is used to implement a legacy array
4296  * which counted intermittent, persistent, and sticky CE errors by unum,
4297  * and then was later extended to publish the data as a kstat for SunVTS.
4298  * All of this code is replaced by FMA, and remains here until such time
4299  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4300  *
4301  * Errors are saved in three buckets per-unum:
4302  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4303  *     This could represent a problem, and is immediately printed out.
4304  * (2) persistent - was successfully scrubbed
4305  *     These errors use the leaky bucket algorithm to determine
4306  *     if there is a serious problem.
4307  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4308  *     and does not necessarily indicate any problem with the dimm itself,
4309  *     is critical information for debugging new hardware.
4310  *     Because we do not know if it came from the dimm, it would be
4311  *     inappropriate to include these in the leaky bucket counts.
4312  *
4313  * If the E$ line was modified before the scrub operation began, then the
4314  * displacement flush at the beginning of scrubphys() will cause the modified
4315  * line to be written out, which will clean up the CE.  Then, any subsequent
4316  * read will not cause an error, which will cause persistent errors to be
4317  * identified as intermittent.
4318  *
4319  * If a DIMM is going bad, it will produce true persistents as well as
4320  * false intermittents, so these intermittents can be safely ignored.
4321  *
4322  * If the error count is excessive for a DIMM, this function will return
4323  * PR_MCE, and the CPU module may then decide to remove that page from use.
4324  */
4325 static int
4326 ce_count_unum(int status, int len, char *unum)
4327 {
4328 	int i;
4329 	struct ce_info *psimm = mem_ce_simm;
4330 	int page_status = PR_OK;
4331 
4332 	ASSERT(psimm != NULL);
4333 
4334 	if (len <= 0 ||
4335 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4336 		return (page_status);
4337 
4338 	/*
4339 	 * Initialize the leaky_bucket timeout
4340 	 */
4341 	if (casptr(&leaky_bucket_timeout_id,
4342 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4343 		add_leaky_bucket_timeout();
4344 
4345 	for (i = 0; i < mem_ce_simm_size; i++) {
4346 		if (psimm[i].name[0] == '\0') {
4347 			/*
4348 			 * Hit the end of the valid entries, add
4349 			 * a new one.
4350 			 */
4351 			(void) strncpy(psimm[i].name, unum, len);
4352 			if (status & ECC_STICKY) {
4353 				/*
4354 				 * Sticky - the leaky bucket is used to track
4355 				 * soft errors.  Since a sticky error is a
4356 				 * hard error and likely to be retired soon,
4357 				 * we do not count it in the leaky bucket.
4358 				 */
4359 				psimm[i].leaky_bucket_cnt = 0;
4360 				psimm[i].intermittent_total = 0;
4361 				psimm[i].persistent_total = 0;
4362 				psimm[i].sticky_total = 1;
4363 				cmn_err(CE_WARN,
4364 				    "[AFT0] Sticky Softerror encountered "
4365 				    "on Memory Module %s\n", unum);
4366 				page_status = PR_MCE;
4367 			} else if (status & ECC_PERSISTENT) {
4368 				psimm[i].leaky_bucket_cnt = 1;
4369 				psimm[i].intermittent_total = 0;
4370 				psimm[i].persistent_total = 1;
4371 				psimm[i].sticky_total = 0;
4372 			} else {
4373 				/*
4374 				 * Intermittent - Because the scrub operation
4375 				 * cannot find the error in the DIMM, we will
4376 				 * not count these in the leaky bucket
4377 				 */
4378 				psimm[i].leaky_bucket_cnt = 0;
4379 				psimm[i].intermittent_total = 1;
4380 				psimm[i].persistent_total = 0;
4381 				psimm[i].sticky_total = 0;
4382 			}
4383 			ecc_error_info_data.count.value.ui32++;
4384 			break;
4385 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4386 			/*
4387 			 * Found an existing entry for the current
4388 			 * memory module, adjust the counts.
4389 			 */
4390 			if (status & ECC_STICKY) {
4391 				psimm[i].sticky_total++;
4392 				cmn_err(CE_WARN,
4393 				    "[AFT0] Sticky Softerror encountered "
4394 				    "on Memory Module %s\n", unum);
4395 				page_status = PR_MCE;
4396 			} else if (status & ECC_PERSISTENT) {
4397 				int new_value;
4398 
4399 				new_value = atomic_add_16_nv(
4400 				    &psimm[i].leaky_bucket_cnt, 1);
4401 				psimm[i].persistent_total++;
4402 				if (new_value > ecc_softerr_limit) {
4403 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4404 					    " soft errors from Memory Module"
4405 					    " %s exceed threshold (N=%d,"
4406 					    " T=%dh:%02dm) triggering page"
4407 					    " retire", new_value, unum,
4408 					    ecc_softerr_limit,
4409 					    ecc_softerr_interval / 60,
4410 					    ecc_softerr_interval % 60);
4411 					atomic_add_16(
4412 					    &psimm[i].leaky_bucket_cnt, -1);
4413 					page_status = PR_MCE;
4414 				}
4415 			} else { /* Intermittent */
4416 				psimm[i].intermittent_total++;
4417 			}
4418 			break;
4419 		}
4420 	}
4421 
4422 	if (i >= mem_ce_simm_size)
4423 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4424 		    "space.\n");
4425 
4426 	return (page_status);
4427 }
4428 
4429 /*
4430  * Function to support counting of IO detected CEs.
4431  */
4432 void
4433 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4434 {
4435 	int err;
4436 
4437 	err = ce_count_unum(ecc->flt_status, len, unum);
4438 	if (err != PR_OK && automatic_page_removal) {
4439 		(void) page_retire(ecc->flt_addr, err);
4440 	}
4441 }
4442 
4443 static int
4444 ecc_kstat_update(kstat_t *ksp, int rw)
4445 {
4446 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4447 	struct ce_info *ceip = mem_ce_simm;
4448 	int i = ksp->ks_instance;
4449 
4450 	if (rw == KSTAT_WRITE)
4451 		return (EACCES);
4452 
4453 	ASSERT(ksp->ks_data != NULL);
4454 	ASSERT(i < mem_ce_simm_size && i >= 0);
4455 
4456 	/*
4457 	 * Since we're not using locks, make sure that we don't get partial
4458 	 * data. The name is always copied before the counters are incremented
4459 	 * so only do this update routine if at least one of the counters is
4460 	 * non-zero, which ensures that ce_count_unum() is done, and the
4461 	 * string is fully copied.
4462 	 */
4463 	if (ceip[i].intermittent_total == 0 &&
4464 	    ceip[i].persistent_total == 0 &&
4465 	    ceip[i].sticky_total == 0) {
4466 		/*
4467 		 * Uninitialized or partially initialized. Ignore.
4468 		 * The ks_data buffer was allocated via kmem_zalloc,
4469 		 * so no need to bzero it.
4470 		 */
4471 		return (0);
4472 	}
4473 
4474 	kstat_named_setstr(&kceip->name, ceip[i].name);
4475 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4476 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4477 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4478 
4479 	return (0);
4480 }
4481 
4482 #define	VIS_BLOCKSIZE		64
4483 
4484 int
4485 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4486 {
4487 	int ret, watched;
4488 
4489 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4490 	ret = dtrace_blksuword32(addr, data, 0);
4491 	if (watched)
4492 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4493 
4494 	return (ret);
4495 }
4496 
4497 /*ARGSUSED*/
4498 void
4499 cpu_faulted_enter(struct cpu *cp)
4500 {
4501 }
4502 
4503 /*ARGSUSED*/
4504 void
4505 cpu_faulted_exit(struct cpu *cp)
4506 {
4507 }
4508 
4509 static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
4510 	(1 << TTE32M) | (1 << TTE256M));
4511 static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));
4512 
4513 /*
4514  * The function returns the US_II mmu-specific values for the
4515  * hat's disable_large_pages and disable_ism_large_pages variables.
4516  */
4517 int
4518 mmu_large_pages_disabled(uint_t flag)
4519 {
4520 	int pages_disable = 0;
4521 
4522 	if (flag == HAT_LOAD) {
4523 		pages_disable = mmu_disable_large_pages;
4524 	} else if (flag == HAT_LOAD_SHARE) {
4525 		pages_disable = mmu_disable_ism_large_pages;
4526 	}
4527 	return (pages_disable);
4528 }
4529 
4530 /*ARGSUSED*/
4531 void
4532 mmu_init_kernel_pgsz(struct hat *hat)
4533 {
4534 }
4535 
4536 size_t
4537 mmu_get_kernel_lpsize(size_t lpsize)
4538 {
4539 	uint_t tte;
4540 
4541 	if (lpsize == 0) {
4542 		/* no setting for segkmem_lpsize in /etc/system: use default */
4543 		return (MMU_PAGESIZE4M);
4544 	}
4545 
4546 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4547 		if (lpsize == TTEBYTES(tte))
4548 			return (lpsize);
4549 	}
4550 
4551 	return (TTEBYTES(TTE8K));
4552 }
4553