xref: /titanic_51/usr/src/uts/sun4u/cpu/spitfire.c (revision 85025c032d701094e5f35de4f42ce66082924fc1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machparam.h>
32 #include <sys/machsystm.h>
33 #include <sys/cpu.h>
34 #include <sys/elf_SPARC.h>
35 #include <vm/hat_sfmmu.h>
36 #include <vm/page.h>
37 #include <vm/vm_dep.h>
38 #include <sys/cpuvar.h>
39 #include <sys/spitregs.h>
40 #include <sys/async.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/dditypes.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpu_module.h>
46 #include <sys/prom_debug.h>
47 #include <sys/vmsystm.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sysmacros.h>
50 #include <sys/intreg.h>
51 #include <sys/machtrap.h>
52 #include <sys/ontrap.h>
53 #include <sys/ivintr.h>
54 #include <sys/atomic.h>
55 #include <sys/panic.h>
56 #include <sys/ndifm.h>
57 #include <sys/fm/protocol.h>
58 #include <sys/fm/util.h>
59 #include <sys/fm/cpu/UltraSPARC-II.h>
60 #include <sys/ddi.h>
61 #include <sys/ecc_kstat.h>
62 #include <sys/watchpoint.h>
63 #include <sys/dtrace.h>
64 #include <sys/errclassify.h>
65 
66 uint_t	cpu_impl_dual_pgsz = 0;
67 
68 /*
69  * Structure for the 8 byte ecache data dump and the associated AFSR state.
70  * There will be 8 of these structures used to dump an ecache line (64 bytes).
71  */
72 typedef struct sf_ec_data_elm {
73 	uint64_t ec_d8;
74 	uint64_t ec_afsr;
75 } ec_data_t;
76 
77 /*
78  * Define spitfire (Ultra I/II) specific asynchronous error structure
79  */
80 typedef struct spitfire_async_flt {
81 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
82 	ushort_t flt_type;		/* types of faults - cpu specific */
83 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
84 	uint64_t flt_ec_tag;		/* E$ tag info */
85 	int flt_ec_lcnt;		/* number of bad E$ lines */
86 	ushort_t flt_sdbh;		/* UDBH reg */
87 	ushort_t flt_sdbl;		/* UDBL reg */
88 } spitf_async_flt;
89 
90 /*
91  * Prototypes for support routines in spitfire_asm.s:
92  */
93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
94 extern uint64_t get_lsu(void);
95 extern void set_lsu(uint64_t ncc);
96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
97 				uint64_t *oafsr, uint64_t *acc_afsr);
98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
100 				uint64_t *acc_afsr);
101 extern uint64_t read_and_clear_afsr();
102 extern void write_ec_tag_parity(uint32_t id);
103 extern void write_hb_ec_tag_parity(uint32_t id);
104 
105 /*
106  * Spitfire module routines:
107  */
108 static void cpu_async_log_err(void *flt);
109 /*PRINTFLIKE6*/
110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
111     uint_t logflags, const char *endstr, const char *fmt, ...);
112 
113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
116 
117 static void log_ce_err(struct async_flt *aflt, char *unum);
118 static void log_ue_err(struct async_flt *aflt, char *unum);
119 static void check_misc_err(spitf_async_flt *spf_flt);
120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
121 static int check_ecc(struct async_flt *aflt);
122 static uint_t get_cpu_status(uint64_t arg);
123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
125 		int *m, uint64_t *afsr);
126 static void ecache_kstat_init(struct cpu *cp);
127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
128 		uint64_t paddr, int mpb, uint64_t);
129 static uint64_t ecache_scrub_misc_err(int, uint64_t);
130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
131 static void ecache_page_retire(void *);
132 static int ecc_kstat_update(kstat_t *ksp, int rw);
133 static int ce_count_unum(int status, int len, char *unum);
134 static void add_leaky_bucket_timeout(void);
135 static int synd_to_synd_code(int synd_status, ushort_t synd);
136 
137 extern uint_t read_all_memscrub;
138 extern void memscrub_run(void);
139 
140 static uchar_t	isus2i;			/* set if sabre */
141 static uchar_t	isus2e;			/* set if hummingbird */
142 
143 /*
144  * Default ecache mask and shift settings for Spitfire.  If we detect a
145  * different CPU implementation, we will modify these values at boot time.
146  */
147 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
148 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
149 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
150 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
151 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
152 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
153 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
154 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
155 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
156 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
157 
158 /*
159  * Default ecache state bits for Spitfire.  These individual bits indicate if
160  * the given line is in any of the valid or modified states, respectively.
161  * Again, we modify these at boot if we detect a different CPU.
162  */
163 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
164 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
165 static uchar_t cpu_ec_parity		= S_EC_PARITY;
166 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
167 
168 /*
169  * This table is used to determine which bit(s) is(are) bad when an ECC
170  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
171  * of this array have the following semantics:
172  *
173  *      00-63   The number of the bad bit, when only one bit is bad.
174  *      64      ECC bit C0 is bad.
175  *      65      ECC bit C1 is bad.
176  *      66      ECC bit C2 is bad.
177  *      67      ECC bit C3 is bad.
178  *      68      ECC bit C4 is bad.
179  *      69      ECC bit C5 is bad.
180  *      70      ECC bit C6 is bad.
181  *      71      ECC bit C7 is bad.
182  *      72      Two bits are bad.
183  *      73      Three bits are bad.
184  *      74      Four bits are bad.
185  *      75      More than Four bits are bad.
186  *      76      NO bits are bad.
187  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
188  */
189 
190 #define	C0	64
191 #define	C1	65
192 #define	C2	66
193 #define	C3	67
194 #define	C4	68
195 #define	C5	69
196 #define	C6	70
197 #define	C7	71
198 #define	M2	72
199 #define	M3	73
200 #define	M4	74
201 #define	MX	75
202 #define	NA	76
203 
204 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
205 						    (synd_code < C0))
206 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
207 						    (synd_code <= C7))
208 
209 static char ecc_syndrome_tab[] =
210 {
211 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
212 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
213 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
214 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
215 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
216 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
217 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
218 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
219 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
220 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
221 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
222 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
223 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
224 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
225 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
226 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
227 };
228 
229 #define	SYND_TBL_SIZE 256
230 
231 /*
232  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
233  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
234  */
235 #define	UDBL_REG	0x8000
236 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
237 #define	SYND(synd)	(synd & 0x7FFF)
238 
239 /*
240  * These error types are specific to Spitfire and are used internally for the
241  * spitfire fault structure flt_type field.
242  */
243 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
244 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
245 #define	CPU_WP_ERR		2	/* WP parity error */
246 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
247 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
248 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
249 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
250 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
251 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
252 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
253 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
254 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
255 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
256 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
257 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
258 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
259 
260 /*
261  * Macro to access the "Spitfire cpu private" data structure.
262  */
263 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
264 
265 /*
266  * set to 0 to disable automatic retiring of pages on
267  * DIMMs that have excessive soft errors
268  */
269 int automatic_page_removal = 1;
270 
271 /*
272  * Heuristic for figuring out which module to replace.
273  * Relative likelihood that this P_SYND indicates that this module is bad.
274  * We call it a "score", though, not a relative likelihood.
275  *
276  * Step 1.
277  * Assign a score to each byte of P_SYND according to the following rules:
278  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
279  * If one bit on, give it a 95.
280  * If seven bits on, give it a 10.
281  * If two bits on:
282  *   in different nybbles, a 90
283  *   in same nybble, but unaligned, 85
284  *   in same nybble and as an aligned pair, 80
285  * If six bits on, look at the bits that are off:
286  *   in same nybble and as an aligned pair, 15
287  *   in same nybble, but unaligned, 20
288  *   in different nybbles, a 25
289  * If three bits on:
290  *   in diferent nybbles, no aligned pairs, 75
291  *   in diferent nybbles, one aligned pair, 70
292  *   in the same nybble, 65
293  * If five bits on, look at the bits that are off:
294  *   in the same nybble, 30
295  *   in diferent nybbles, one aligned pair, 35
296  *   in diferent nybbles, no aligned pairs, 40
297  * If four bits on:
298  *   all in one nybble, 45
299  *   as two aligned pairs, 50
300  *   one aligned pair, 55
301  *   no aligned pairs, 60
302  *
303  * Step 2:
304  * Take the higher of the two scores (one for each byte) as the score
305  * for the module.
306  *
307  * Print the score for each module, and field service should replace the
308  * module with the highest score.
309  */
310 
311 /*
312  * In the table below, the first row/column comment indicates the
313  * number of bits on in that nybble; the second row/column comment is
314  * the hex digit.
315  */
316 
317 static int
318 p_synd_score_table[256] = {
319 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
320 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
321 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
337 };
338 
339 int
340 ecc_psynd_score(ushort_t p_synd)
341 {
342 	int i, j, a, b;
343 
344 	i = p_synd & 0xFF;
345 	j = (p_synd >> 8) & 0xFF;
346 
347 	a = p_synd_score_table[i];
348 	b = p_synd_score_table[j];
349 
350 	return (a > b ? a : b);
351 }
352 
353 /*
354  * Async Fault Logging
355  *
356  * To ease identifying, reading, and filtering async fault log messages, the
357  * label [AFT#] is now prepended to each async fault message.  These messages
358  * and the logging rules are implemented by cpu_aflt_log(), below.
359  *
360  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
361  *          This includes both corrected ECC memory and ecache faults.
362  *
363  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
364  *          else except CE errors) with a priority of 1 (highest).  This tag
365  *          is also used for panic messages that result from an async fault.
366  *
367  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
368  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
369  *          of the E-$ data and tags.
370  *
371  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
372  * printed on the console.  To send all AFT logs to both the log and the
373  * console, set aft_verbose = 1.
374  */
375 
376 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
377 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
378 #define	CPU_ERRID		0x0004	/* print flt_id */
379 #define	CPU_TL			0x0008	/* print flt_tl */
380 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
381 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
382 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
383 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
384 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
385 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
386 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
387 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
388 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
389 
390 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
391 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
392 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
393 				CPU_FAULTPC)
394 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
395 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
396 				~CPU_SPACE)
397 #define	PARERR_LFLAGS	(CMN_LFLAGS)
398 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
399 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
400 				~CPU_FLTCPU & ~CPU_FAULTPC)
401 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
402 #define	NO_LFLAGS	(0)
403 
404 #define	AFSR_FMTSTR0	"\020\1ME"
405 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
406 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
407 #define	UDB_FMTSTR	"\020\012UE\011CE"
408 
409 /*
410  * Save the cache bootup state for use when internal
411  * caches are to be re-enabled after an error occurs.
412  */
413 uint64_t	cache_boot_state = 0;
414 
415 /*
416  * PA[31:0] represent Displacement in UPA configuration space.
417  */
418 uint_t	root_phys_addr_lo_mask = 0xffffffff;
419 
420 /*
421  * Spitfire legacy globals
422  */
423 int	itlb_entries;
424 int	dtlb_entries;
425 
426 void
427 cpu_setup(void)
428 {
429 	extern int page_retire_messages;
430 	extern int page_retire_first_ue;
431 	extern int at_flags;
432 #if defined(SF_ERRATA_57)
433 	extern caddr_t errata57_limit;
434 #endif
435 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
436 
437 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
438 
439 	/*
440 	 * Spitfire isn't currently FMA-aware, so we have to enable the
441 	 * page retirement messages. We also change the default policy
442 	 * for UE retirement to allow clearing of transient errors.
443 	 */
444 	page_retire_messages = 1;
445 	page_retire_first_ue = 0;
446 
447 	/*
448 	 * save the cache bootup state.
449 	 */
450 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
451 
452 	if (use_page_coloring) {
453 		do_pg_coloring = 1;
454 	}
455 
456 	/*
457 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
458 	 */
459 	pp_slots = MIN(8, MAXPP_SLOTS);
460 
461 	/*
462 	 * Block stores invalidate all pages of the d$ so pagecopy
463 	 * et. al. do not need virtual translations with virtual
464 	 * coloring taken into consideration.
465 	 */
466 	pp_consistent_coloring = 0;
467 
468 	isa_list =
469 	    "sparcv9+vis sparcv9 "
470 	    "sparcv8plus+vis sparcv8plus "
471 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
472 
473 	cpu_hwcap_flags = AV_SPARC_VIS;
474 
475 	/*
476 	 * On Spitfire, there's a hole in the address space
477 	 * that we must never map (the hardware only support 44-bits of
478 	 * virtual address).  Later CPUs are expected to have wider
479 	 * supported address ranges.
480 	 *
481 	 * See address map on p23 of the UltraSPARC 1 user's manual.
482 	 */
483 	hole_start = (caddr_t)0x80000000000ull;
484 	hole_end = (caddr_t)0xfffff80000000000ull;
485 
486 	/*
487 	 * A spitfire call bug requires us to be a further 4Gbytes of
488 	 * firewall from the spec.
489 	 *
490 	 * See Spitfire Errata #21
491 	 */
492 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
493 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
494 
495 	/*
496 	 * The kpm mapping window.
497 	 * kpm_size:
498 	 *	The size of a single kpm range.
499 	 *	The overall size will be: kpm_size * vac_colors.
500 	 * kpm_vbase:
501 	 *	The virtual start address of the kpm range within the kernel
502 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
503 	 */
504 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
505 	kpm_size_shift = 41;
506 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
507 
508 #if defined(SF_ERRATA_57)
509 	errata57_limit = (caddr_t)0x80000000ul;
510 #endif
511 
512 	/*
513 	 * Disable text by default.
514 	 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
515 	 */
516 	max_utext_lpsize = MMU_PAGESIZE;
517 }
518 
519 static int
520 getintprop(pnode_t node, char *name, int deflt)
521 {
522 	int	value;
523 
524 	switch (prom_getproplen(node, name)) {
525 	case 0:
526 		value = 1;	/* boolean properties */
527 		break;
528 
529 	case sizeof (int):
530 		(void) prom_getprop(node, name, (caddr_t)&value);
531 		break;
532 
533 	default:
534 		value = deflt;
535 		break;
536 	}
537 
538 	return (value);
539 }
540 
541 /*
542  * Set the magic constants of the implementation.
543  */
544 void
545 cpu_fiximp(pnode_t dnode)
546 {
547 	extern int vac_size, vac_shift;
548 	extern uint_t vac_mask;
549 	extern int dcache_line_mask;
550 	int i, a;
551 	static struct {
552 		char	*name;
553 		int	*var;
554 	} prop[] = {
555 		"dcache-size",		&dcache_size,
556 		"dcache-line-size",	&dcache_linesize,
557 		"icache-size",		&icache_size,
558 		"icache-line-size",	&icache_linesize,
559 		"ecache-size",		&ecache_size,
560 		"ecache-line-size",	&ecache_alignsize,
561 		"ecache-associativity", &ecache_associativity,
562 		"#itlb-entries",	&itlb_entries,
563 		"#dtlb-entries",	&dtlb_entries,
564 		};
565 
566 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
567 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
568 			*prop[i].var = a;
569 		}
570 	}
571 
572 	ecache_setsize = ecache_size / ecache_associativity;
573 
574 	vac_size = S_VAC_SIZE;
575 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
576 	i = 0; a = vac_size;
577 	while (a >>= 1)
578 		++i;
579 	vac_shift = i;
580 	shm_alignment = vac_size;
581 	vac = 1;
582 
583 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
584 
585 	/*
586 	 * UltraSPARC I & II have ecache sizes running
587 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
588 	 * and 8 MB. Adjust the copyin/copyout limits
589 	 * according to the cache size. The magic number
590 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
591 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
592 	 * VIS instructions.
593 	 *
594 	 * We assume that all CPUs on the system have the same size
595 	 * ecache. We're also called very early in the game.
596 	 * /etc/system will be parsed *after* we're called so
597 	 * these values can be overwritten.
598 	 */
599 
600 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
601 	if (ecache_size <= 524288) {
602 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
603 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
604 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
605 	} else if (ecache_size == 1048576) {
606 		hw_copy_limit_2 = 1024;
607 		hw_copy_limit_4 = 1280;
608 		hw_copy_limit_8 = 1536;
609 	} else if (ecache_size == 2097152) {
610 		hw_copy_limit_2 = 1536;
611 		hw_copy_limit_4 = 2048;
612 		hw_copy_limit_8 = 2560;
613 	} else if (ecache_size == 4194304) {
614 		hw_copy_limit_2 = 2048;
615 		hw_copy_limit_4 = 2560;
616 		hw_copy_limit_8 = 3072;
617 	} else {
618 		hw_copy_limit_2 = 2560;
619 		hw_copy_limit_4 = 3072;
620 		hw_copy_limit_8 = 3584;
621 	}
622 }
623 
624 /*
625  * Called by setcpudelay
626  */
627 void
628 cpu_init_tick_freq(void)
629 {
630 	/*
631 	 * Determine the cpu frequency by calling
632 	 * tod_get_cpufrequency. Use an approximate freqency
633 	 * value computed by the prom if the tod module
634 	 * is not initialized and loaded yet.
635 	 */
636 	if (tod_ops.tod_get_cpufrequency != NULL) {
637 		mutex_enter(&tod_lock);
638 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
639 		mutex_exit(&tod_lock);
640 	} else {
641 #if defined(HUMMINGBIRD)
642 		/*
643 		 * the hummingbird version of %stick is used as the basis for
644 		 * low level timing; this provides an independent constant-rate
645 		 * clock for general system use, and frees power mgmt to set
646 		 * various cpu clock speeds.
647 		 */
648 		if (system_clock_freq == 0)
649 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
650 			    system_clock_freq);
651 		sys_tick_freq = system_clock_freq;
652 #else /* SPITFIRE */
653 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
654 #endif
655 	}
656 }
657 
658 
659 void shipit(int upaid);
660 extern uint64_t xc_tick_limit;
661 extern uint64_t xc_tick_jump_limit;
662 
663 #ifdef SEND_MONDO_STATS
664 uint64_t x_early[NCPU][64];
665 #endif
666 
667 /*
668  * Note: A version of this function is used by the debugger via the KDI,
669  * and must be kept in sync with this version.  Any changes made to this
670  * function to support new chips or to accomodate errata must also be included
671  * in the KDI-specific version.  See spitfire_kdi.c.
672  */
673 void
674 send_one_mondo(int cpuid)
675 {
676 	uint64_t idsr, starttick, endtick;
677 	int upaid, busy, nack;
678 	uint64_t tick, tick_prev;
679 	ulong_t ticks;
680 
681 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
682 	upaid = CPUID_TO_UPAID(cpuid);
683 	tick = starttick = gettick();
684 	shipit(upaid);
685 	endtick = starttick + xc_tick_limit;
686 	busy = nack = 0;
687 	for (;;) {
688 		idsr = getidsr();
689 		if (idsr == 0)
690 			break;
691 		/*
692 		 * When we detect an irregular tick jump, we adjust
693 		 * the timer window to the current tick value.
694 		 */
695 		tick_prev = tick;
696 		tick = gettick();
697 		ticks = tick - tick_prev;
698 		if (ticks > xc_tick_jump_limit) {
699 			endtick = tick + xc_tick_limit;
700 		} else if (tick > endtick) {
701 			if (panic_quiesce)
702 				return;
703 			cmn_err(CE_PANIC,
704 			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
705 			upaid, nack, busy);
706 		}
707 		if (idsr & IDSR_BUSY) {
708 			busy++;
709 			continue;
710 		}
711 		drv_usecwait(1);
712 		shipit(upaid);
713 		nack++;
714 		busy = 0;
715 	}
716 #ifdef SEND_MONDO_STATS
717 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
718 #endif
719 }
720 
721 void
722 send_mondo_set(cpuset_t set)
723 {
724 	int i;
725 
726 	for (i = 0; i < NCPU; i++)
727 		if (CPU_IN_SET(set, i)) {
728 			send_one_mondo(i);
729 			CPUSET_DEL(set, i);
730 			if (CPUSET_ISNULL(set))
731 				break;
732 		}
733 }
734 
735 void
736 syncfpu(void)
737 {
738 }
739 
740 /*
741  * Determine the size of the CPU module's error structure in bytes.  This is
742  * called once during boot to initialize the error queues.
743  */
744 int
745 cpu_aflt_size(void)
746 {
747 	/*
748 	 * We need to determine whether this is a sabre, Hummingbird or a
749 	 * Spitfire/Blackbird impl and set the appropriate state variables for
750 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
751 	 * too early in the boot flow and the cpunodes are not initialized.
752 	 * This routine will be called once after cpunodes[] is ready, so do
753 	 * it here.
754 	 */
755 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
756 		isus2i = 1;
757 		cpu_ec_tag_mask = SB_ECTAG_MASK;
758 		cpu_ec_state_mask = SB_ECSTATE_MASK;
759 		cpu_ec_par_mask = SB_ECPAR_MASK;
760 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
761 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
762 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
763 		cpu_ec_state_exl = SB_ECSTATE_EXL;
764 		cpu_ec_state_mod = SB_ECSTATE_MOD;
765 
766 		/* These states do not exist in sabre - set to 0xFF */
767 		cpu_ec_state_shr = 0xFF;
768 		cpu_ec_state_own = 0xFF;
769 
770 		cpu_ec_state_valid = SB_ECSTATE_VALID;
771 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
772 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
773 		cpu_ec_parity = SB_EC_PARITY;
774 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
775 		isus2e = 1;
776 		cpu_ec_tag_mask = HB_ECTAG_MASK;
777 		cpu_ec_state_mask = HB_ECSTATE_MASK;
778 		cpu_ec_par_mask = HB_ECPAR_MASK;
779 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
780 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
781 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
782 		cpu_ec_state_exl = HB_ECSTATE_EXL;
783 		cpu_ec_state_mod = HB_ECSTATE_MOD;
784 
785 		/* These states do not exist in hummingbird - set to 0xFF */
786 		cpu_ec_state_shr = 0xFF;
787 		cpu_ec_state_own = 0xFF;
788 
789 		cpu_ec_state_valid = HB_ECSTATE_VALID;
790 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
791 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
792 		cpu_ec_parity = HB_EC_PARITY;
793 	}
794 
795 	return (sizeof (spitf_async_flt));
796 }
797 
798 
799 /*
800  * Correctable ecc error trap handler
801  */
802 /*ARGSUSED*/
803 void
804 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
805 	uint_t p_afsr_high, uint_t p_afar_high)
806 {
807 	ushort_t sdbh, sdbl;
808 	ushort_t e_syndh, e_syndl;
809 	spitf_async_flt spf_flt;
810 	struct async_flt *ecc;
811 	int queue = 1;
812 
813 	uint64_t t_afar = p_afar;
814 	uint64_t t_afsr = p_afsr;
815 
816 	/*
817 	 * Note: the Spitfire data buffer error registers
818 	 * (upper and lower halves) are or'ed into the upper
819 	 * word of the afsr by ce_err().
820 	 */
821 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
822 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
823 
824 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
825 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
826 
827 	t_afsr &= S_AFSR_MASK;
828 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
829 
830 	/* Setup the async fault structure */
831 	bzero(&spf_flt, sizeof (spitf_async_flt));
832 	ecc = (struct async_flt *)&spf_flt;
833 	ecc->flt_id = gethrtime_waitfree();
834 	ecc->flt_stat = t_afsr;
835 	ecc->flt_addr = t_afar;
836 	ecc->flt_status = ECC_C_TRAP;
837 	ecc->flt_bus_id = getprocessorid();
838 	ecc->flt_inst = CPU->cpu_id;
839 	ecc->flt_pc = (caddr_t)rp->r_pc;
840 	ecc->flt_func = log_ce_err;
841 	ecc->flt_in_memory =
842 		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
843 	spf_flt.flt_sdbh = sdbh;
844 	spf_flt.flt_sdbl = sdbl;
845 
846 	/*
847 	 * Check for fatal conditions.
848 	 */
849 	check_misc_err(&spf_flt);
850 
851 	/*
852 	 * Pananoid checks for valid AFSR and UDBs
853 	 */
854 	if ((t_afsr & P_AFSR_CE) == 0) {
855 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
856 			"** Panic due to CE bit not set in the AFSR",
857 			"  Corrected Memory Error on");
858 	}
859 
860 	/*
861 	 * We want to skip logging only if ALL the following
862 	 * conditions are true:
863 	 *
864 	 *	1. There is only one error
865 	 *	2. That error is a correctable memory error
866 	 *	3. The error is caused by the memory scrubber (in which case
867 	 *	    the error will have occurred under on_trap protection)
868 	 *	4. The error is on a retired page
869 	 *
870 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
871 	 * However, none of those errors should occur on a retired page.
872 	 */
873 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
874 	    curthread->t_ontrap != NULL) {
875 
876 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
877 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
878 				queue = 0;
879 			}
880 		}
881 	}
882 
883 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
884 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
885 			"** Panic due to CE bits not set in the UDBs",
886 			" Corrected Memory Error on");
887 	}
888 
889 	if ((sdbh >> 8) & 1) {
890 		ecc->flt_synd = e_syndh;
891 		ce_scrub(ecc);
892 		if (queue) {
893 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
894 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
895 		}
896 	}
897 
898 	if ((sdbl >> 8) & 1) {
899 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
900 		ecc->flt_synd = e_syndl | UDBL_REG;
901 		ce_scrub(ecc);
902 		if (queue) {
903 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
904 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
905 		}
906 	}
907 
908 	/*
909 	 * Re-enable all error trapping (CEEN currently cleared).
910 	 */
911 	clr_datapath();
912 	set_asyncflt(P_AFSR_CE);
913 	set_error_enable(EER_ENABLE);
914 }
915 
916 /*
917  * Cpu specific CE logging routine
918  */
919 static void
920 log_ce_err(struct async_flt *aflt, char *unum)
921 {
922 	spitf_async_flt spf_flt;
923 
924 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
925 		return;
926 	}
927 
928 	spf_flt.cmn_asyncflt = *aflt;
929 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
930 	    " Corrected Memory Error detected by");
931 }
932 
933 /*
934  * Spitfire does not perform any further CE classification refinement
935  */
936 /*ARGSUSED*/
937 int
938 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
939     size_t afltoffset)
940 {
941 	return (0);
942 }
943 
944 char *
945 flt_to_error_type(struct async_flt *aflt)
946 {
947 	if (aflt->flt_status & ECC_INTERMITTENT)
948 		return (ERR_TYPE_DESC_INTERMITTENT);
949 	if (aflt->flt_status & ECC_PERSISTENT)
950 		return (ERR_TYPE_DESC_PERSISTENT);
951 	if (aflt->flt_status & ECC_STICKY)
952 		return (ERR_TYPE_DESC_STICKY);
953 	return (ERR_TYPE_DESC_UNKNOWN);
954 }
955 
956 /*
957  * Called by correctable ecc error logging code to print out
958  * the stick/persistent/intermittent status of the error.
959  */
960 static void
961 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
962 {
963 	ushort_t status;
964 	char *status1_str = "Memory";
965 	char *status2_str = "Intermittent";
966 	struct async_flt *aflt = (struct async_flt *)spf_flt;
967 
968 	status = aflt->flt_status;
969 
970 	if (status & ECC_ECACHE)
971 		status1_str = "Ecache";
972 
973 	if (status & ECC_STICKY)
974 		status2_str = "Sticky";
975 	else if (status & ECC_PERSISTENT)
976 		status2_str = "Persistent";
977 
978 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
979 		NULL, " Corrected %s Error on %s is %s",
980 		status1_str, unum, status2_str);
981 }
982 
983 /*
984  * check for a valid ce syndrome, then call the
985  * displacement flush scrubbing code, and then check the afsr to see if
986  * the error was persistent or intermittent. Reread the afar/afsr to see
987  * if the error was not scrubbed successfully, and is therefore sticky.
988  */
989 /*ARGSUSED1*/
990 void
991 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
992 {
993 	uint64_t eer, afsr;
994 	ushort_t status;
995 
996 	ASSERT(getpil() > LOCK_LEVEL);
997 
998 	/*
999 	 * It is possible that the flt_addr is not a valid
1000 	 * physical address. To deal with this, we disable
1001 	 * NCEEN while we scrub that address. If this causes
1002 	 * a TIMEOUT/BERR, we know this is an invalid
1003 	 * memory location.
1004 	 */
1005 	kpreempt_disable();
1006 	eer = get_error_enable();
1007 	if (eer & (EER_CEEN | EER_NCEEN))
1008 	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1009 
1010 	/*
1011 	 * To check if the error detected by IO is persistent, sticky or
1012 	 * intermittent.
1013 	 */
1014 	if (ecc->flt_status & ECC_IOBUS) {
1015 		ecc->flt_stat = P_AFSR_CE;
1016 	}
1017 
1018 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1019 	    cpunodes[CPU->cpu_id].ecache_size);
1020 
1021 	get_asyncflt(&afsr);
1022 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1023 		/*
1024 		 * Must ensure that we don't get the TIMEOUT/BERR
1025 		 * when we reenable NCEEN, so we clear the AFSR.
1026 		 */
1027 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1028 		if (eer & (EER_CEEN | EER_NCEEN))
1029 		    set_error_enable(eer);
1030 		kpreempt_enable();
1031 		return;
1032 	}
1033 
1034 	if (eer & EER_NCEEN)
1035 	    set_error_enable(eer & ~EER_CEEN);
1036 
1037 	/*
1038 	 * Check and clear any ECC errors from the scrub.  If the scrub did
1039 	 * not trip over the error, mark it intermittent.  If the scrub did
1040 	 * trip the error again and it did not scrub away, mark it sticky.
1041 	 * Otherwise mark it persistent.
1042 	 */
1043 	if (check_ecc(ecc) != 0) {
1044 		cpu_read_paddr(ecc, 0, 1);
1045 
1046 		if (check_ecc(ecc) != 0)
1047 			status = ECC_STICKY;
1048 		else
1049 			status = ECC_PERSISTENT;
1050 	} else
1051 		status = ECC_INTERMITTENT;
1052 
1053 	if (eer & (EER_CEEN | EER_NCEEN))
1054 	    set_error_enable(eer);
1055 	kpreempt_enable();
1056 
1057 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1058 	ecc->flt_status |= status;
1059 }
1060 
1061 /*
1062  * get the syndrome and unum, and then call the routines
1063  * to check the other cpus and iobuses, and then do the error logging.
1064  */
1065 /*ARGSUSED1*/
1066 void
1067 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1068 {
1069 	char unum[UNUM_NAMLEN];
1070 	int len = 0;
1071 	int ce_verbose = 0;
1072 	int err;
1073 
1074 	ASSERT(ecc->flt_func != NULL);
1075 
1076 	/* Get the unum string for logging purposes */
1077 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1078 	    UNUM_NAMLEN, &len);
1079 
1080 	/* Call specific error logging routine */
1081 	(void) (*ecc->flt_func)(ecc, unum);
1082 
1083 	/*
1084 	 * Count errors per unum.
1085 	 * Non-memory errors are all counted via a special unum string.
1086 	 */
1087 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1088 	    automatic_page_removal) {
1089 		(void) page_retire(ecc->flt_addr, err);
1090 	}
1091 
1092 	if (ecc->flt_panic) {
1093 		ce_verbose = 1;
1094 	} else if ((ecc->flt_class == BUS_FAULT) ||
1095 	    (ecc->flt_stat & P_AFSR_CE)) {
1096 		ce_verbose = (ce_verbose_memory > 0);
1097 	} else {
1098 		ce_verbose = 1;
1099 	}
1100 
1101 	if (ce_verbose) {
1102 		spitf_async_flt sflt;
1103 		int synd_code;
1104 
1105 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1106 
1107 		cpu_ce_log_status(&sflt, unum);
1108 
1109 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1110 				SYND(ecc->flt_synd));
1111 
1112 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1113 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1114 			    NULL, " ECC Data Bit %2d was in error "
1115 			    "and corrected", synd_code);
1116 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1117 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1118 			    NULL, " ECC Check Bit %2d was in error "
1119 			    "and corrected", synd_code - C0);
1120 		} else {
1121 			/*
1122 			 * These are UE errors - we shouldn't be getting CE
1123 			 * traps for these; handle them in case of bad h/w.
1124 			 */
1125 			switch (synd_code) {
1126 			case M2:
1127 				cpu_aflt_log(CE_CONT, 0, &sflt,
1128 				    CPU_ERRID_FIRST, NULL,
1129 				    " Two ECC Bits were in error");
1130 				break;
1131 			case M3:
1132 				cpu_aflt_log(CE_CONT, 0, &sflt,
1133 				    CPU_ERRID_FIRST, NULL,
1134 				    " Three ECC Bits were in error");
1135 				break;
1136 			case M4:
1137 				cpu_aflt_log(CE_CONT, 0, &sflt,
1138 				    CPU_ERRID_FIRST, NULL,
1139 				    " Four ECC Bits were in error");
1140 				break;
1141 			case MX:
1142 				cpu_aflt_log(CE_CONT, 0, &sflt,
1143 				    CPU_ERRID_FIRST, NULL,
1144 				    " More than Four ECC bits were "
1145 				    "in error");
1146 				break;
1147 			default:
1148 				cpu_aflt_log(CE_CONT, 0, &sflt,
1149 				    CPU_ERRID_FIRST, NULL,
1150 				    " Unknown fault syndrome %d",
1151 				    synd_code);
1152 				break;
1153 			}
1154 		}
1155 	}
1156 
1157 	/* Display entire cache line, if valid address */
1158 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1159 		read_ecc_data(ecc, 1, 1);
1160 }
1161 
1162 /*
1163  * We route all errors through a single switch statement.
1164  */
1165 void
1166 cpu_ue_log_err(struct async_flt *aflt)
1167 {
1168 
1169 	switch (aflt->flt_class) {
1170 	case CPU_FAULT:
1171 		cpu_async_log_err(aflt);
1172 		break;
1173 
1174 	case BUS_FAULT:
1175 		bus_async_log_err(aflt);
1176 		break;
1177 
1178 	default:
1179 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1180 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1181 		break;
1182 	}
1183 }
1184 
1185 /* Values for action variable in cpu_async_error() */
1186 #define	ACTION_NONE		0
1187 #define	ACTION_TRAMPOLINE	1
1188 #define	ACTION_AST_FLAGS	2
1189 
1190 /*
1191  * Access error trap handler for asynchronous cpu errors.  This routine is
1192  * called to handle a data or instruction access error.  All fatal errors are
1193  * completely handled by this routine (by panicking).  Non fatal error logging
1194  * is queued for later processing either via AST or softint at a lower PIL.
1195  * In case of panic, the error log queue will also be processed as part of the
1196  * panic flow to ensure all errors are logged.  This routine is called with all
1197  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1198  * error bits are also cleared.  The hardware has also disabled the I and
1199  * D-caches for us, so we must re-enable them before returning.
1200  *
1201  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1202  *
1203  *		_______________________________________________________________
1204  *		|        Privileged tl0		|         Unprivileged	      |
1205  *		| Protected	| Unprotected	| Protected	| Unprotected |
1206  *		|on_trap|lofault|		|		|	      |
1207  * -------------|-------|-------+---------------+---------------+-------------|
1208  *		|	|	|		|		|	      |
1209  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1210  *		|	|	|		|		|	      |
1211  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1212  *		|	|	|		|		|	      |
1213  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1214  *		|	|	|		|		|	      |
1215  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1216  * ____________________________________________________________________________
1217  *
1218  *
1219  * Action codes:
1220  *
1221  * L - log
1222  * M - kick off memscrubber if flt_in_memory
1223  * P - panic
1224  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1225  * R - i)  if aft_panic is set, panic
1226  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1227  * S - send SIGBUS to process
1228  * T - trampoline
1229  *
1230  * Special cases:
1231  *
1232  * 1) if aft_testfatal is set, all faults result in a panic regardless
1233  *    of type (even WP), protection (even on_trap), or privilege.
1234  */
1235 /*ARGSUSED*/
1236 void
1237 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1238 	uint_t p_afsr_high, uint_t p_afar_high)
1239 {
1240 	ushort_t sdbh, sdbl, ttype, tl;
1241 	spitf_async_flt spf_flt;
1242 	struct async_flt *aflt;
1243 	char pr_reason[28];
1244 	uint64_t oafsr;
1245 	uint64_t acc_afsr = 0;			/* accumulated afsr */
1246 	int action = ACTION_NONE;
1247 	uint64_t t_afar = p_afar;
1248 	uint64_t t_afsr = p_afsr;
1249 	int expected = DDI_FM_ERR_UNEXPECTED;
1250 	ddi_acc_hdl_t *hp;
1251 
1252 	/*
1253 	 * We need to look at p_flag to determine if the thread detected an
1254 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1255 	 * because we just need a consistent snapshot and we know that everyone
1256 	 * else will store a consistent set of bits while holding p_lock.  We
1257 	 * don't have to worry about a race because SDOCORE is set once prior
1258 	 * to doing i/o from the process's address space and is never cleared.
1259 	 */
1260 	uint_t pflag = ttoproc(curthread)->p_flag;
1261 
1262 	pr_reason[0] = '\0';
1263 
1264 	/*
1265 	 * Note: the Spitfire data buffer error registers
1266 	 * (upper and lower halves) are or'ed into the upper
1267 	 * word of the afsr by async_err() if P_AFSR_UE is set.
1268 	 */
1269 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1270 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1271 
1272 	/*
1273 	 * Grab the ttype encoded in <63:53> of the saved
1274 	 * afsr passed from async_err()
1275 	 */
1276 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1277 	tl = (ushort_t)(t_afsr >> 62);
1278 
1279 	t_afsr &= S_AFSR_MASK;
1280 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1281 
1282 	/*
1283 	 * Initialize most of the common and CPU-specific structure.  We derive
1284 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1285 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1286 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1287 	 * tuneable aft_testfatal is set (not the default).
1288 	 */
1289 	bzero(&spf_flt, sizeof (spitf_async_flt));
1290 	aflt = (struct async_flt *)&spf_flt;
1291 	aflt->flt_id = gethrtime_waitfree();
1292 	aflt->flt_stat = t_afsr;
1293 	aflt->flt_addr = t_afar;
1294 	aflt->flt_bus_id = getprocessorid();
1295 	aflt->flt_inst = CPU->cpu_id;
1296 	aflt->flt_pc = (caddr_t)rp->r_pc;
1297 	aflt->flt_prot = AFLT_PROT_NONE;
1298 	aflt->flt_class = CPU_FAULT;
1299 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1300 	aflt->flt_tl = (uchar_t)tl;
1301 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1302 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1303 
1304 	/*
1305 	 * Set flt_status based on the trap type.  If we end up here as the
1306 	 * result of a UE detected by the CE handling code, leave status 0.
1307 	 */
1308 	switch (ttype) {
1309 	case T_DATA_ERROR:
1310 		aflt->flt_status = ECC_D_TRAP;
1311 		break;
1312 	case T_INSTR_ERROR:
1313 		aflt->flt_status = ECC_I_TRAP;
1314 		break;
1315 	}
1316 
1317 	spf_flt.flt_sdbh = sdbh;
1318 	spf_flt.flt_sdbl = sdbl;
1319 
1320 	/*
1321 	 * Check for fatal async errors.
1322 	 */
1323 	check_misc_err(&spf_flt);
1324 
1325 	/*
1326 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1327 	 * see if we were executing in the kernel under on_trap() or t_lofault
1328 	 * protection.  If so, modify the saved registers so that we return
1329 	 * from the trap to the appropriate trampoline routine.
1330 	 */
1331 	if (aflt->flt_priv && tl == 0) {
1332 		if (curthread->t_ontrap != NULL) {
1333 			on_trap_data_t *otp = curthread->t_ontrap;
1334 
1335 			if (otp->ot_prot & OT_DATA_EC) {
1336 				aflt->flt_prot = AFLT_PROT_EC;
1337 				otp->ot_trap |= OT_DATA_EC;
1338 				rp->r_pc = otp->ot_trampoline;
1339 				rp->r_npc = rp->r_pc + 4;
1340 				action = ACTION_TRAMPOLINE;
1341 			}
1342 
1343 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1344 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1345 				aflt->flt_prot = AFLT_PROT_ACCESS;
1346 				otp->ot_trap |= OT_DATA_ACCESS;
1347 				rp->r_pc = otp->ot_trampoline;
1348 				rp->r_npc = rp->r_pc + 4;
1349 				action = ACTION_TRAMPOLINE;
1350 				/*
1351 				 * for peeks and caut_gets errors are expected
1352 				 */
1353 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1354 				if (!hp)
1355 					expected = DDI_FM_ERR_PEEK;
1356 				else if (hp->ah_acc.devacc_attr_access ==
1357 				    DDI_CAUTIOUS_ACC)
1358 					expected = DDI_FM_ERR_EXPECTED;
1359 			}
1360 
1361 		} else if (curthread->t_lofault) {
1362 			aflt->flt_prot = AFLT_PROT_COPY;
1363 			rp->r_g1 = EFAULT;
1364 			rp->r_pc = curthread->t_lofault;
1365 			rp->r_npc = rp->r_pc + 4;
1366 			action = ACTION_TRAMPOLINE;
1367 		}
1368 	}
1369 
1370 	/*
1371 	 * Determine if this error needs to be treated as fatal.  Note that
1372 	 * multiple errors detected upon entry to this trap handler does not
1373 	 * necessarily warrant a panic.  We only want to panic if the trap
1374 	 * happened in privileged mode and not under t_ontrap or t_lofault
1375 	 * protection.  The exception is WP: if we *only* get WP, it is not
1376 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1377 	 *
1378 	 * aft_panic, if set, effectively makes us treat usermode
1379 	 * UE/EDP/LDP faults as if they were privileged - so we we will
1380 	 * panic instead of sending a contract event.  A lofault-protected
1381 	 * fault will normally follow the contract event; if aft_panic is
1382 	 * set this will be changed to a panic.
1383 	 *
1384 	 * For usermode BERR/BTO errors, eg from processes performing device
1385 	 * control through mapped device memory, we need only deliver
1386 	 * a SIGBUS to the offending process.
1387 	 *
1388 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1389 	 * checked later; for now we implement the common reasons.
1390 	 */
1391 	if (aflt->flt_prot == AFLT_PROT_NONE) {
1392 		/*
1393 		 * Beware - multiple bits may be set in AFSR
1394 		 */
1395 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1396 			if (aflt->flt_priv || aft_panic)
1397 				aflt->flt_panic = 1;
1398 		}
1399 
1400 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1401 			if (aflt->flt_priv)
1402 				aflt->flt_panic = 1;
1403 		}
1404 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1405 		aflt->flt_panic = 1;
1406 	}
1407 
1408 	/*
1409 	 * UE/BERR/TO: Call our bus nexus friends to check for
1410 	 * IO errors that may have resulted in this trap.
1411 	 */
1412 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1413 		cpu_run_bus_error_handlers(aflt, expected);
1414 	}
1415 
1416 	/*
1417 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1418 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1419 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1420 	 * caches may introduce more parity errors (especially when the module
1421 	 * is bad) and in sabre there is no guarantee that such errors
1422 	 * (if introduced) are written back as poisoned data.
1423 	 */
1424 	if (t_afsr & P_AFSR_UE) {
1425 		int i;
1426 
1427 		(void) strcat(pr_reason, "UE ");
1428 
1429 		spf_flt.flt_type = CPU_UE_ERR;
1430 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1431 			MMU_PAGESHIFT)) ? 1: 0;
1432 
1433 		/*
1434 		 * With UE, we have the PA of the fault.
1435 		 * Let do a diagnostic read to get the ecache
1436 		 * data and tag info of the bad line for logging.
1437 		 */
1438 		if (aflt->flt_in_memory) {
1439 			uint32_t ec_set_size;
1440 			uchar_t state;
1441 			uint32_t ecache_idx;
1442 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1443 
1444 			/* touch the line to put it in ecache */
1445 			acc_afsr |= read_and_clear_afsr();
1446 			(void) lddphys(faultpa);
1447 			acc_afsr |= (read_and_clear_afsr() &
1448 				    ~(P_AFSR_EDP | P_AFSR_UE));
1449 
1450 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1451 			    ecache_associativity;
1452 
1453 			for (i = 0; i < ecache_associativity; i++) {
1454 				ecache_idx = i * ec_set_size +
1455 				    (aflt->flt_addr % ec_set_size);
1456 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1457 					(uint64_t *)&spf_flt.flt_ec_data[0],
1458 					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1459 				acc_afsr |= oafsr;
1460 
1461 				state = (uchar_t)((spf_flt.flt_ec_tag &
1462 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1463 
1464 				if ((state & cpu_ec_state_valid) &&
1465 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1466 				    ((uint64_t)aflt->flt_addr >>
1467 				    cpu_ec_tag_shift)))
1468 					break;
1469 			}
1470 
1471 			/*
1472 			 * Check to see if the ecache tag is valid for the
1473 			 * fault PA. In the very unlikely event where the
1474 			 * line could be victimized, no ecache info will be
1475 			 * available. If this is the case, capture the line
1476 			 * from memory instead.
1477 			 */
1478 			if ((state & cpu_ec_state_valid) == 0 ||
1479 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1480 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1481 				for (i = 0; i < 8; i++, faultpa += 8) {
1482 				    ec_data_t *ecdptr;
1483 
1484 					ecdptr = &spf_flt.flt_ec_data[i];
1485 					acc_afsr |= read_and_clear_afsr();
1486 					ecdptr->ec_d8 = lddphys(faultpa);
1487 					acc_afsr |= (read_and_clear_afsr() &
1488 						    ~(P_AFSR_EDP | P_AFSR_UE));
1489 					ecdptr->ec_afsr = 0;
1490 							/* null afsr value */
1491 				}
1492 
1493 				/*
1494 				 * Mark tag invalid to indicate mem dump
1495 				 * when we print out the info.
1496 				 */
1497 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1498 			}
1499 			spf_flt.flt_ec_lcnt = 1;
1500 
1501 			/*
1502 			 * Flush out the bad line
1503 			 */
1504 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1505 				cpunodes[CPU->cpu_id].ecache_size);
1506 
1507 			acc_afsr |= clear_errors(NULL, NULL);
1508 		}
1509 
1510 		/*
1511 		 * Ask our bus nexus friends if they have any fatal errors. If
1512 		 * so, they will log appropriate error messages and panic as a
1513 		 * result. We then queue an event for each UDB that reports a
1514 		 * UE. Each UE reported in a UDB will have its own log message.
1515 		 *
1516 		 * Note from kbn: In the case where there are multiple UEs
1517 		 * (ME bit is set) - the AFAR address is only accurate to
1518 		 * the 16-byte granularity. One cannot tell whether the AFAR
1519 		 * belongs to the UDBH or UDBL syndromes. In this case, we
1520 		 * always report the AFAR address to be 16-byte aligned.
1521 		 *
1522 		 * If we're on a Sabre, there is no SDBL, but it will always
1523 		 * read as zero, so the sdbl test below will safely fail.
1524 		 */
1525 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1526 			aflt->flt_panic = 1;
1527 
1528 		if (sdbh & P_DER_UE) {
1529 			aflt->flt_synd = sdbh & P_DER_E_SYND;
1530 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1531 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1532 			    aflt->flt_panic);
1533 		}
1534 		if (sdbl & P_DER_UE) {
1535 			aflt->flt_synd = sdbl & P_DER_E_SYND;
1536 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1537 			if (!(aflt->flt_stat & P_AFSR_ME))
1538 				aflt->flt_addr |= 0x8;
1539 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1540 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1541 			    aflt->flt_panic);
1542 		}
1543 
1544 		/*
1545 		 * We got a UE and are panicking, save the fault PA in a known
1546 		 * location so that the platform specific panic code can check
1547 		 * for copyback errors.
1548 		 */
1549 		if (aflt->flt_panic && aflt->flt_in_memory) {
1550 			panic_aflt = *aflt;
1551 		}
1552 	}
1553 
1554 	/*
1555 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1556 	 * async error for logging. For Sabre, we panic on EDP or LDP.
1557 	 */
1558 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1559 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1560 
1561 		if (t_afsr & P_AFSR_EDP)
1562 			(void) strcat(pr_reason, "EDP ");
1563 
1564 		if (t_afsr & P_AFSR_LDP)
1565 			(void) strcat(pr_reason, "LDP ");
1566 
1567 		/*
1568 		 * Here we have no PA to work with.
1569 		 * Scan each line in the ecache to look for
1570 		 * the one with bad parity.
1571 		 */
1572 		aflt->flt_addr = AFLT_INV_ADDR;
1573 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1574 			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1575 		acc_afsr |= (oafsr & ~P_AFSR_WP);
1576 
1577 		/*
1578 		 * If we found a bad PA, update the state to indicate if it is
1579 		 * memory or I/O space.  This code will be important if we ever
1580 		 * support cacheable frame buffers.
1581 		 */
1582 		if (aflt->flt_addr != AFLT_INV_ADDR) {
1583 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1584 				MMU_PAGESHIFT)) ? 1 : 0;
1585 		}
1586 
1587 		if (isus2i || isus2e)
1588 			aflt->flt_panic = 1;
1589 
1590 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1591 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1592 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1593 		    aflt->flt_panic);
1594 	}
1595 
1596 	/*
1597 	 * Timeout and bus error handling.  There are two cases to consider:
1598 	 *
1599 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1600 	 * have already modified the saved registers so that we will return
1601 	 * from the trap to the appropriate trampoline routine; otherwise panic.
1602 	 *
1603 	 * (2) In user mode, we can simply use our AST mechanism to deliver
1604 	 * a SIGBUS.  We do not log the occurence - processes performing
1605 	 * device control would generate lots of uninteresting messages.
1606 	 */
1607 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1608 		if (t_afsr & P_AFSR_TO)
1609 			(void) strcat(pr_reason, "BTO ");
1610 
1611 		if (t_afsr & P_AFSR_BERR)
1612 			(void) strcat(pr_reason, "BERR ");
1613 
1614 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1615 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1616 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1617 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1618 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1619 			    aflt->flt_panic);
1620 		}
1621 	}
1622 
1623 	/*
1624 	 * Handle WP: WP happens when the ecache is victimized and a parity
1625 	 * error was detected on a writeback.  The data in question will be
1626 	 * poisoned as a UE will be written back.  The PA is not logged and
1627 	 * it is possible that it doesn't belong to the trapped thread.  The
1628 	 * WP trap is not fatal, but it could be fatal to someone that
1629 	 * subsequently accesses the toxic page.  We set read_all_memscrub
1630 	 * to force the memscrubber to read all of memory when it awakens.
1631 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1632 	 * UE back to poison the data.
1633 	 */
1634 	if (t_afsr & P_AFSR_WP) {
1635 		(void) strcat(pr_reason, "WP ");
1636 		if (isus2i || isus2e) {
1637 			aflt->flt_panic = 1;
1638 		} else {
1639 			read_all_memscrub = 1;
1640 		}
1641 		spf_flt.flt_type = CPU_WP_ERR;
1642 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1643 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1644 		    aflt->flt_panic);
1645 	}
1646 
1647 	/*
1648 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1649 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1650 	 * This is fatal.
1651 	 */
1652 
1653 	if (t_afsr & P_AFSR_CP) {
1654 		if (isus2i || isus2e) {
1655 			(void) strcat(pr_reason, "CP ");
1656 			aflt->flt_panic = 1;
1657 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1658 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1659 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1660 			    aflt->flt_panic);
1661 		} else {
1662 			/*
1663 			 * Orphan CP: Happens due to signal integrity problem
1664 			 * on a CPU, where a CP is reported, without reporting
1665 			 * its associated UE. This is handled by locating the
1666 			 * bad parity line and would kick off the memscrubber
1667 			 * to find the UE if in memory or in another's cache.
1668 			 */
1669 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1670 			(void) strcat(pr_reason, "ORPHAN_CP ");
1671 
1672 			/*
1673 			 * Here we have no PA to work with.
1674 			 * Scan each line in the ecache to look for
1675 			 * the one with bad parity.
1676 			 */
1677 			aflt->flt_addr = AFLT_INV_ADDR;
1678 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1679 				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1680 				&oafsr);
1681 			acc_afsr |= oafsr;
1682 
1683 			/*
1684 			 * If we found a bad PA, update the state to indicate
1685 			 * if it is memory or I/O space.
1686 			 */
1687 			if (aflt->flt_addr != AFLT_INV_ADDR) {
1688 				aflt->flt_in_memory =
1689 					(pf_is_memory(aflt->flt_addr >>
1690 						MMU_PAGESHIFT)) ? 1 : 0;
1691 			}
1692 			read_all_memscrub = 1;
1693 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1694 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1695 			    aflt->flt_panic);
1696 
1697 		}
1698 	}
1699 
1700 	/*
1701 	 * If we queued an error other than WP or CP and we are going to return
1702 	 * from the trap and the error was in user mode or inside of a
1703 	 * copy routine, set AST flag so the queue will be drained before
1704 	 * returning to user mode.
1705 	 *
1706 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1707 	 * and send an event to its process contract.
1708 	 *
1709 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1710 	 * will have been no error queued in this case.
1711 	 */
1712 	if ((t_afsr &
1713 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1714 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1715 			int pcb_flag = 0;
1716 
1717 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1718 				pcb_flag |= ASYNC_HWERR;
1719 
1720 			if (t_afsr & P_AFSR_BERR)
1721 				pcb_flag |= ASYNC_BERR;
1722 
1723 			if (t_afsr & P_AFSR_TO)
1724 				pcb_flag |= ASYNC_BTO;
1725 
1726 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1727 			aston(curthread);
1728 			action = ACTION_AST_FLAGS;
1729 	}
1730 
1731 	/*
1732 	 * In response to a deferred error, we must do one of three things:
1733 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1734 	 * set in cases (1) and (2) - check that either action is set or
1735 	 * (3) is true.
1736 	 *
1737 	 * On II, the WP writes poisoned data back to memory, which will
1738 	 * cause a UE and a panic or reboot when read.  In this case, we
1739 	 * don't need to panic at this time.  On IIi and IIe,
1740 	 * aflt->flt_panic is already set above.
1741 	 */
1742 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1743 	    (t_afsr & P_AFSR_WP));
1744 
1745 	/*
1746 	 * Make a final sanity check to make sure we did not get any more async
1747 	 * errors and accumulate the afsr.
1748 	 */
1749 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1750 	    cpunodes[CPU->cpu_id].ecache_linesize);
1751 	(void) clear_errors(&spf_flt, NULL);
1752 
1753 	/*
1754 	 * Take care of a special case: If there is a UE in the ecache flush
1755 	 * area, we'll see it in flush_ecache().  This will trigger the
1756 	 * CPU_ADDITIONAL_ERRORS case below.
1757 	 *
1758 	 * This could occur if the original error was a UE in the flush area,
1759 	 * or if the original error was an E$ error that was flushed out of
1760 	 * the E$ in scan_ecache().
1761 	 *
1762 	 * If it's at the same address that we're already logging, then it's
1763 	 * probably one of these cases.  Clear the bit so we don't trip over
1764 	 * it on the additional errors case, which could cause an unnecessary
1765 	 * panic.
1766 	 */
1767 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1768 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1769 	else
1770 		acc_afsr |= aflt->flt_stat;
1771 
1772 	/*
1773 	 * Check the acumulated afsr for the important bits.
1774 	 * Make sure the spf_flt.flt_type value is set, and
1775 	 * enque an error.
1776 	 */
1777 	if (acc_afsr &
1778 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1779 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1780 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1781 		    P_AFSR_ISAP))
1782 			aflt->flt_panic = 1;
1783 
1784 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1785 		aflt->flt_stat = acc_afsr;
1786 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1787 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1788 		    aflt->flt_panic);
1789 	}
1790 
1791 	/*
1792 	 * If aflt->flt_panic is set at this point, we need to panic as the
1793 	 * result of a trap at TL > 0, or an error we determined to be fatal.
1794 	 * We've already enqueued the error in one of the if-clauses above,
1795 	 * and it will be dequeued and logged as part of the panic flow.
1796 	 */
1797 	if (aflt->flt_panic) {
1798 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1799 		    "See previous message(s) for details", " %sError(s)",
1800 		    pr_reason);
1801 	}
1802 
1803 	/*
1804 	 * Before returning, we must re-enable errors, and
1805 	 * reset the caches to their boot-up state.
1806 	 */
1807 	set_lsu(get_lsu() | cache_boot_state);
1808 	set_error_enable(EER_ENABLE);
1809 }
1810 
1811 /*
1812  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1813  * This routine is shared by the CE and UE handling code.
1814  */
1815 static void
1816 check_misc_err(spitf_async_flt *spf_flt)
1817 {
1818 	struct async_flt *aflt = (struct async_flt *)spf_flt;
1819 	char *fatal_str = NULL;
1820 
1821 	/*
1822 	 * The ISAP and ETP errors are supposed to cause a POR
1823 	 * from the system, so in theory we never, ever see these messages.
1824 	 * ISAP, ETP and IVUE are considered to be fatal.
1825 	 */
1826 	if (aflt->flt_stat & P_AFSR_ISAP)
1827 		fatal_str = " System Address Parity Error on";
1828 	else if (aflt->flt_stat & P_AFSR_ETP)
1829 		fatal_str = " Ecache Tag Parity Error on";
1830 	else if (aflt->flt_stat & P_AFSR_IVUE)
1831 		fatal_str = " Interrupt Vector Uncorrectable Error on";
1832 	if (fatal_str != NULL) {
1833 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1834 			NULL, fatal_str);
1835 	}
1836 }
1837 
1838 /*
1839  * Routine to convert a syndrome into a syndrome code.
1840  */
1841 static int
1842 synd_to_synd_code(int synd_status, ushort_t synd)
1843 {
1844 	if (synd_status != AFLT_STAT_VALID)
1845 		return (-1);
1846 
1847 	/*
1848 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1849 	 * to get the code indicating which bit(s) is(are) bad.
1850 	 */
1851 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1852 		return (-1);
1853 	else
1854 		return (ecc_syndrome_tab[synd]);
1855 }
1856 
1857 /* ARGSUSED */
1858 int
1859 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1860 {
1861 	return (ENOTSUP);
1862 }
1863 
1864 /* ARGSUSED */
1865 int
1866 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1867 {
1868 	return (ENOTSUP);
1869 }
1870 
1871 /* ARGSUSED */
1872 int
1873 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1874 {
1875 	return (ENOTSUP);
1876 }
1877 
1878 /*
1879  * Routine to return a string identifying the physical name
1880  * associated with a memory/cache error.
1881  */
1882 /* ARGSUSED */
1883 int
1884 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1885     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1886     char *buf, int buflen, int *lenp)
1887 {
1888 	short synd_code;
1889 	int ret;
1890 
1891 	if (flt_in_memory) {
1892 		synd_code = synd_to_synd_code(synd_status, synd);
1893 		if (synd_code == -1) {
1894 			ret = EINVAL;
1895 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1896 		    buf, buflen, lenp) != 0) {
1897 			ret = EIO;
1898 		} else if (*lenp <= 1) {
1899 			ret = EINVAL;
1900 		} else {
1901 			ret = 0;
1902 		}
1903 	} else {
1904 		ret = ENOTSUP;
1905 	}
1906 
1907 	if (ret != 0) {
1908 		buf[0] = '\0';
1909 		*lenp = 0;
1910 	}
1911 
1912 	return (ret);
1913 }
1914 
1915 /*
1916  * Wrapper for cpu_get_mem_unum() routine that takes an
1917  * async_flt struct rather than explicit arguments.
1918  */
1919 int
1920 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1921     char *buf, int buflen, int *lenp)
1922 {
1923 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1924 		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1925 		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1926 }
1927 
1928 /*
1929  * This routine is a more generic interface to cpu_get_mem_unum(),
1930  * that may be used by other modules (e.g. mm).
1931  */
1932 int
1933 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1934 		char *buf, int buflen, int *lenp)
1935 {
1936 	int synd_status, flt_in_memory, ret;
1937 	char unum[UNUM_NAMLEN];
1938 
1939 	/*
1940 	 * Check for an invalid address.
1941 	 */
1942 	if (afar == (uint64_t)-1)
1943 		return (ENXIO);
1944 
1945 	if (synd == (uint64_t)-1)
1946 		synd_status = AFLT_STAT_INVALID;
1947 	else
1948 		synd_status = AFLT_STAT_VALID;
1949 
1950 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1951 
1952 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1953 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1954 	    != 0)
1955 		return (ret);
1956 
1957 	if (*lenp >= buflen)
1958 		return (ENAMETOOLONG);
1959 
1960 	(void) strncpy(buf, unum, buflen);
1961 
1962 	return (0);
1963 }
1964 
1965 /*
1966  * Routine to return memory information associated
1967  * with a physical address and syndrome.
1968  */
1969 /* ARGSUSED */
1970 int
1971 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1972     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1973     int *segsp, int *banksp, int *mcidp)
1974 {
1975 	return (ENOTSUP);
1976 }
1977 
1978 /*
1979  * Routine to return a string identifying the physical
1980  * name associated with a cpuid.
1981  */
1982 /* ARGSUSED */
1983 int
1984 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1985 {
1986 	return (ENOTSUP);
1987 }
1988 
1989 /*
1990  * This routine returns the size of the kernel's FRU name buffer.
1991  */
1992 size_t
1993 cpu_get_name_bufsize()
1994 {
1995 	return (UNUM_NAMLEN);
1996 }
1997 
1998 /*
1999  * Cpu specific log func for UEs.
2000  */
2001 static void
2002 log_ue_err(struct async_flt *aflt, char *unum)
2003 {
2004 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2005 	int len = 0;
2006 
2007 #ifdef DEBUG
2008 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2009 
2010 	/*
2011 	 * Paranoid Check for priv mismatch
2012 	 * Only applicable for UEs
2013 	 */
2014 	if (afsr_priv != aflt->flt_priv) {
2015 		/*
2016 		 * The priv bits in %tstate and %afsr did not match; we expect
2017 		 * this to be very rare, so flag it with a message.
2018 		 */
2019 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2020 		    ": PRIV bit in TSTATE and AFSR mismatched; "
2021 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2022 
2023 		/* update saved afsr to reflect the correct priv */
2024 		aflt->flt_stat &= ~P_AFSR_PRIV;
2025 		if (aflt->flt_priv)
2026 			aflt->flt_stat |= P_AFSR_PRIV;
2027 	}
2028 #endif /* DEBUG */
2029 
2030 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2031 	    UNUM_NAMLEN, &len);
2032 
2033 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2034 	    " Uncorrectable Memory Error on");
2035 
2036 	if (SYND(aflt->flt_synd) == 0x3) {
2037 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2038 		    " Syndrome 0x3 indicates that this may not be a "
2039 		    "memory module problem");
2040 	}
2041 
2042 	if (aflt->flt_in_memory)
2043 		cpu_log_ecmem_info(spf_flt);
2044 }
2045 
2046 
2047 /*
2048  * The cpu_async_log_err() function is called via the ue_drain() function to
2049  * handle logging for CPU events that are dequeued.  As such, it can be invoked
2050  * from softint context, from AST processing in the trap() flow, or from the
2051  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2052  */
2053 static void
2054 cpu_async_log_err(void *flt)
2055 {
2056 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2057 	struct async_flt *aflt = (struct async_flt *)flt;
2058 	char unum[UNUM_NAMLEN];
2059 	char *space;
2060 	char *ecache_scrub_logstr = NULL;
2061 
2062 	switch (spf_flt->flt_type) {
2063 	    case CPU_UE_ERR:
2064 		/*
2065 		 * We want to skip logging only if ALL the following
2066 		 * conditions are true:
2067 		 *
2068 		 *	1. We are not panicking
2069 		 *	2. There is only one error
2070 		 *	3. That error is a memory error
2071 		 *	4. The error is caused by the memory scrubber (in
2072 		 *	   which case the error will have occurred under
2073 		 *	   on_trap protection)
2074 		 *	5. The error is on a retired page
2075 		 *
2076 		 * Note 1: AFLT_PROT_EC is used places other than the memory
2077 		 * scrubber.  However, none of those errors should occur
2078 		 * on a retired page.
2079 		 *
2080 		 * Note 2: In the CE case, these errors are discarded before
2081 		 * the errorq.  In the UE case, we must wait until now --
2082 		 * softcall() grabs a mutex, which we can't do at a high PIL.
2083 		 */
2084 		if (!panicstr &&
2085 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2086 		    aflt->flt_prot == AFLT_PROT_EC) {
2087 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2088 				/* Zero the address to clear the error */
2089 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2090 				return;
2091 			}
2092 		}
2093 
2094 		/*
2095 		 * Log the UE and check for causes of this UE error that
2096 		 * don't cause a trap (Copyback error).  cpu_async_error()
2097 		 * has already checked the i/o buses for us.
2098 		 */
2099 		log_ue_err(aflt, unum);
2100 		if (aflt->flt_in_memory)
2101 			cpu_check_allcpus(aflt);
2102 		break;
2103 
2104 	    case CPU_EDP_LDP_ERR:
2105 		if (aflt->flt_stat & P_AFSR_EDP)
2106 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2107 			    NULL, " EDP event on");
2108 
2109 		if (aflt->flt_stat & P_AFSR_LDP)
2110 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2111 			    NULL, " LDP event on");
2112 
2113 		/* Log ecache info if exist */
2114 		if (spf_flt->flt_ec_lcnt > 0) {
2115 			cpu_log_ecmem_info(spf_flt);
2116 
2117 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2118 			    NULL, " AFAR was derived from E$Tag");
2119 		} else {
2120 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2121 			    NULL, " No error found in ecache (No fault "
2122 			    "PA available)");
2123 		}
2124 		break;
2125 
2126 	    case CPU_WP_ERR:
2127 		/*
2128 		 * If the memscrub thread hasn't yet read
2129 		 * all of memory, as we requested in the
2130 		 * trap handler, then give it a kick to
2131 		 * make sure it does.
2132 		 */
2133 		if (!isus2i && !isus2e && read_all_memscrub)
2134 			memscrub_run();
2135 
2136 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2137 		    " WP event on");
2138 		return;
2139 
2140 	    case CPU_BTO_BERR_ERR:
2141 		/*
2142 		 * A bus timeout or error occurred that was in user mode or not
2143 		 * in a protected kernel code region.
2144 		 */
2145 		if (aflt->flt_stat & P_AFSR_BERR) {
2146 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2147 			    spf_flt, BERRTO_LFLAGS, NULL,
2148 			    " Bus Error on System Bus in %s mode from",
2149 			    aflt->flt_priv ? "privileged" : "user");
2150 		}
2151 
2152 		if (aflt->flt_stat & P_AFSR_TO) {
2153 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2154 			    spf_flt, BERRTO_LFLAGS, NULL,
2155 			    " Timeout on System Bus in %s mode from",
2156 			    aflt->flt_priv ? "privileged" : "user");
2157 		}
2158 
2159 		return;
2160 
2161 	    case CPU_PANIC_CP_ERR:
2162 		/*
2163 		 * Process the Copyback (CP) error info (if any) obtained from
2164 		 * polling all the cpus in the panic flow. This case is only
2165 		 * entered if we are panicking.
2166 		 */
2167 		ASSERT(panicstr != NULL);
2168 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2169 
2170 		/* See which space - this info may not exist */
2171 		if (panic_aflt.flt_status & ECC_D_TRAP)
2172 			space = "Data ";
2173 		else if (panic_aflt.flt_status & ECC_I_TRAP)
2174 			space = "Instruction ";
2175 		else
2176 			space = "";
2177 
2178 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2179 		    " AFAR was derived from UE report,"
2180 		    " CP event on CPU%d (caused %saccess error on %s%d)",
2181 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2182 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2183 
2184 		if (spf_flt->flt_ec_lcnt > 0)
2185 			cpu_log_ecmem_info(spf_flt);
2186 		else
2187 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2188 			    NULL, " No cache dump available");
2189 
2190 		return;
2191 
2192 	    case CPU_TRAPPING_CP_ERR:
2193 		/*
2194 		 * For sabre only.  This is a copyback ecache parity error due
2195 		 * to a PCI DMA read.  We should be panicking if we get here.
2196 		 */
2197 		ASSERT(panicstr != NULL);
2198 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2199 		    " AFAR was derived from UE report,"
2200 		    " CP event on CPU%d (caused Data access error "
2201 		    "on PCIBus)", aflt->flt_inst);
2202 		return;
2203 
2204 		/*
2205 		 * We log the ecache lines of the following states,
2206 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2207 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2208 		 * in addition to logging if ecache_scrub_panic is set.
2209 		 */
2210 	    case CPU_BADLINE_CI_ERR:
2211 		ecache_scrub_logstr = "CBI";
2212 		/* FALLTHRU */
2213 
2214 	    case CPU_BADLINE_CB_ERR:
2215 		if (ecache_scrub_logstr == NULL)
2216 			ecache_scrub_logstr = "CBB";
2217 		/* FALLTHRU */
2218 
2219 	    case CPU_BADLINE_DI_ERR:
2220 		if (ecache_scrub_logstr == NULL)
2221 			ecache_scrub_logstr = "DBI";
2222 		/* FALLTHRU */
2223 
2224 	    case CPU_BADLINE_DB_ERR:
2225 		if (ecache_scrub_logstr == NULL)
2226 			ecache_scrub_logstr = "DBB";
2227 
2228 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2229 			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2230 			" %s event on", ecache_scrub_logstr);
2231 		cpu_log_ecmem_info(spf_flt);
2232 
2233 		return;
2234 
2235 	    case CPU_ORPHAN_CP_ERR:
2236 		/*
2237 		 * Orphan CPs, where the CP bit is set, but when a CPU
2238 		 * doesn't report a UE.
2239 		 */
2240 		if (read_all_memscrub)
2241 			memscrub_run();
2242 
2243 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2244 			NULL, " Orphan CP event on");
2245 
2246 		/* Log ecache info if exist */
2247 		if (spf_flt->flt_ec_lcnt > 0)
2248 			cpu_log_ecmem_info(spf_flt);
2249 		else
2250 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2251 				(CP_LFLAGS | CPU_FLTCPU), NULL,
2252 				" No error found in ecache (No fault "
2253 				"PA available");
2254 		return;
2255 
2256 	    case CPU_ECACHE_ADDR_PAR_ERR:
2257 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2258 				" E$ Tag Address Parity error on");
2259 		cpu_log_ecmem_info(spf_flt);
2260 		return;
2261 
2262 	    case CPU_ECACHE_STATE_ERR:
2263 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2264 				" E$ Tag State Parity error on");
2265 		cpu_log_ecmem_info(spf_flt);
2266 		return;
2267 
2268 	    case CPU_ECACHE_TAG_ERR:
2269 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270 				" E$ Tag scrub event on");
2271 		cpu_log_ecmem_info(spf_flt);
2272 		return;
2273 
2274 	    case CPU_ECACHE_ETP_ETS_ERR:
2275 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276 				" AFSR.ETP is set and AFSR.ETS is zero on");
2277 		cpu_log_ecmem_info(spf_flt);
2278 		return;
2279 
2280 
2281 	    case CPU_ADDITIONAL_ERR:
2282 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2283 		    " Additional errors detected during error processing on");
2284 		return;
2285 
2286 	    default:
2287 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2288 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2289 		return;
2290 	}
2291 
2292 	/* ... fall through from the UE, EDP, or LDP cases */
2293 
2294 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2295 		if (!panicstr) {
2296 			(void) page_retire(aflt->flt_addr, PR_UE);
2297 		} else {
2298 			/*
2299 			 * Clear UEs on panic so that we don't
2300 			 * get haunted by them during panic or
2301 			 * after reboot
2302 			 */
2303 			clearphys(P2ALIGN(aflt->flt_addr, 64),
2304 			    cpunodes[CPU->cpu_id].ecache_size,
2305 			    cpunodes[CPU->cpu_id].ecache_linesize);
2306 
2307 			(void) clear_errors(NULL, NULL);
2308 		}
2309 	}
2310 
2311 	/*
2312 	 * Log final recover message
2313 	 */
2314 	if (!panicstr) {
2315 		if (!aflt->flt_priv) {
2316 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2317 			    NULL, " Above Error is in User Mode"
2318 			    "\n    and is fatal: "
2319 			    "will SIGKILL process and notify contract");
2320 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2321 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2322 			    NULL, " Above Error detected while dumping core;"
2323 			    "\n    core file will be truncated");
2324 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2325 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2326 			    NULL, " Above Error is due to Kernel access"
2327 			    "\n    to User space and is fatal: "
2328 			    "will SIGKILL process and notify contract");
2329 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2330 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2331 			    " Above Error detected by protected Kernel code"
2332 			    "\n    that will try to clear error from system");
2333 		}
2334 	}
2335 }
2336 
2337 
2338 /*
2339  * Check all cpus for non-trapping UE-causing errors
2340  * In Ultra I/II, we look for copyback errors (CPs)
2341  */
2342 void
2343 cpu_check_allcpus(struct async_flt *aflt)
2344 {
2345 	spitf_async_flt cp;
2346 	spitf_async_flt *spf_cpflt = &cp;
2347 	struct async_flt *cpflt = (struct async_flt *)&cp;
2348 	int pix;
2349 
2350 	cpflt->flt_id = aflt->flt_id;
2351 	cpflt->flt_addr = aflt->flt_addr;
2352 
2353 	for (pix = 0; pix < NCPU; pix++) {
2354 		if (CPU_XCALL_READY(pix)) {
2355 			xc_one(pix, (xcfunc_t *)get_cpu_status,
2356 			    (uint64_t)cpflt, 0);
2357 
2358 			if (cpflt->flt_stat & P_AFSR_CP) {
2359 				char *space;
2360 
2361 				/* See which space - this info may not exist */
2362 				if (aflt->flt_status & ECC_D_TRAP)
2363 					space = "Data ";
2364 				else if (aflt->flt_status & ECC_I_TRAP)
2365 					space = "Instruction ";
2366 				else
2367 					space = "";
2368 
2369 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2370 				    NULL, " AFAR was derived from UE report,"
2371 				    " CP event on CPU%d (caused %saccess "
2372 				    "error on %s%d)", pix, space,
2373 				    (aflt->flt_status & ECC_IOBUS) ?
2374 				    "IOBUS" : "CPU", aflt->flt_bus_id);
2375 
2376 				if (spf_cpflt->flt_ec_lcnt > 0)
2377 					cpu_log_ecmem_info(spf_cpflt);
2378 				else
2379 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2380 					    CPU_ERRID_FIRST, NULL,
2381 					    " No cache dump available");
2382 			}
2383 		}
2384 	}
2385 }
2386 
2387 #ifdef DEBUG
2388 int test_mp_cp = 0;
2389 #endif
2390 
2391 /*
2392  * Cross-call callback routine to tell a CPU to read its own %afsr to check
2393  * for copyback errors and capture relevant information.
2394  */
2395 static uint_t
2396 get_cpu_status(uint64_t arg)
2397 {
2398 	struct async_flt *aflt = (struct async_flt *)arg;
2399 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2400 	uint64_t afsr;
2401 	uint32_t ec_idx;
2402 	uint64_t sdbh, sdbl;
2403 	int i;
2404 	uint32_t ec_set_size;
2405 	uchar_t valid;
2406 	ec_data_t ec_data[8];
2407 	uint64_t ec_tag, flt_addr_tag, oafsr;
2408 	uint64_t *acc_afsr = NULL;
2409 
2410 	get_asyncflt(&afsr);
2411 	if (CPU_PRIVATE(CPU) != NULL) {
2412 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2413 		afsr |= *acc_afsr;
2414 		*acc_afsr = 0;
2415 	}
2416 
2417 #ifdef DEBUG
2418 	if (test_mp_cp)
2419 		afsr |= P_AFSR_CP;
2420 #endif
2421 	aflt->flt_stat = afsr;
2422 
2423 	if (afsr & P_AFSR_CP) {
2424 		/*
2425 		 * Capture the UDBs
2426 		 */
2427 		get_udb_errors(&sdbh, &sdbl);
2428 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2429 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2430 
2431 		/*
2432 		 * Clear CP bit before capturing ecache data
2433 		 * and AFSR info.
2434 		 */
2435 		set_asyncflt(P_AFSR_CP);
2436 
2437 		/*
2438 		 * See if we can capture the ecache line for the
2439 		 * fault PA.
2440 		 *
2441 		 * Return a valid matching ecache line, if any.
2442 		 * Otherwise, return the first matching ecache
2443 		 * line marked invalid.
2444 		 */
2445 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2446 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2447 		    ecache_associativity;
2448 		spf_flt->flt_ec_lcnt = 0;
2449 
2450 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2451 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2452 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2453 				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2454 				    acc_afsr);
2455 
2456 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2457 				continue;
2458 
2459 			valid = cpu_ec_state_valid &
2460 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2461 			    cpu_ec_state_shift);
2462 
2463 			if (valid || spf_flt->flt_ec_lcnt == 0) {
2464 				spf_flt->flt_ec_tag = ec_tag;
2465 				bcopy(&ec_data, &spf_flt->flt_ec_data,
2466 				    sizeof (ec_data));
2467 				spf_flt->flt_ec_lcnt = 1;
2468 
2469 				if (valid)
2470 					break;
2471 			}
2472 		}
2473 	}
2474 	return (0);
2475 }
2476 
2477 /*
2478  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2479  * from panic_idle() as part of the other CPUs stopping themselves when a
2480  * panic occurs.  We need to be VERY careful what we do here, since panicstr
2481  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2482  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2483  * CP error information.
2484  */
2485 void
2486 cpu_async_panic_callb(void)
2487 {
2488 	spitf_async_flt cp;
2489 	struct async_flt *aflt = (struct async_flt *)&cp;
2490 	uint64_t *scrub_afsr;
2491 
2492 	if (panic_aflt.flt_id != 0) {
2493 		aflt->flt_addr = panic_aflt.flt_addr;
2494 		(void) get_cpu_status((uint64_t)aflt);
2495 
2496 		if (CPU_PRIVATE(CPU) != NULL) {
2497 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2498 			if (*scrub_afsr & P_AFSR_CP) {
2499 				aflt->flt_stat |= *scrub_afsr;
2500 				*scrub_afsr = 0;
2501 			}
2502 		}
2503 		if (aflt->flt_stat & P_AFSR_CP) {
2504 			aflt->flt_id = panic_aflt.flt_id;
2505 			aflt->flt_panic = 1;
2506 			aflt->flt_inst = CPU->cpu_id;
2507 			aflt->flt_class = CPU_FAULT;
2508 			cp.flt_type = CPU_PANIC_CP_ERR;
2509 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2510 			    (void *)&cp, sizeof (cp), ue_queue,
2511 			    aflt->flt_panic);
2512 		}
2513 	}
2514 }
2515 
2516 /*
2517  * Turn off all cpu error detection, normally only used for panics.
2518  */
2519 void
2520 cpu_disable_errors(void)
2521 {
2522 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2523 }
2524 
2525 /*
2526  * Enable errors.
2527  */
2528 void
2529 cpu_enable_errors(void)
2530 {
2531 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2532 }
2533 
2534 static void
2535 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2536 {
2537 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2538 	int i, loop = 1;
2539 	ushort_t ecc_0;
2540 	uint64_t paddr;
2541 	uint64_t data;
2542 
2543 	if (verbose)
2544 		loop = 8;
2545 	for (i = 0; i < loop; i++) {
2546 		paddr = aligned_addr + (i * 8);
2547 		data = lddphys(paddr);
2548 		if (verbose) {
2549 			if (ce_err) {
2550 			    ecc_0 = ecc_gen((uint32_t)(data>>32),
2551 			    (uint32_t)data);
2552 			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2553 				NULL, "    Paddr 0x%" PRIx64 ", "
2554 				"Data 0x%08x.%08x, ECC 0x%x", paddr,
2555 				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
2556 			} else {
2557 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2558 				    NULL, "    Paddr 0x%" PRIx64 ", "
2559 				    "Data 0x%08x.%08x", paddr,
2560 				    (uint32_t)(data>>32), (uint32_t)data);
2561 			}
2562 		}
2563 	}
2564 }
2565 
2566 static struct {		/* sec-ded-s4ed ecc code */
2567 	uint_t hi, lo;
2568 } ecc_code[8] = {
2569 	{ 0xee55de23U, 0x16161161U },
2570 	{ 0x55eede93U, 0x61612212U },
2571 	{ 0xbb557b8cU, 0x49494494U },
2572 	{ 0x55bb7b6cU, 0x94948848U },
2573 	{ 0x16161161U, 0xee55de23U },
2574 	{ 0x61612212U, 0x55eede93U },
2575 	{ 0x49494494U, 0xbb557b8cU },
2576 	{ 0x94948848U, 0x55bb7b6cU }
2577 };
2578 
2579 static ushort_t
2580 ecc_gen(uint_t high_bytes, uint_t low_bytes)
2581 {
2582 	int i, j;
2583 	uchar_t checker, bit_mask;
2584 	struct {
2585 		uint_t hi, lo;
2586 	} hex_data, masked_data[8];
2587 
2588 	hex_data.hi = high_bytes;
2589 	hex_data.lo = low_bytes;
2590 
2591 	/* mask out bits according to sec-ded-s4ed ecc code */
2592 	for (i = 0; i < 8; i++) {
2593 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2594 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2595 	}
2596 
2597 	/*
2598 	 * xor all bits in masked_data[i] to get bit_i of checker,
2599 	 * where i = 0 to 7
2600 	 */
2601 	checker = 0;
2602 	for (i = 0; i < 8; i++) {
2603 		bit_mask = 1 << i;
2604 		for (j = 0; j < 32; j++) {
2605 			if (masked_data[i].lo & 1) checker ^= bit_mask;
2606 			if (masked_data[i].hi & 1) checker ^= bit_mask;
2607 			masked_data[i].hi >>= 1;
2608 			masked_data[i].lo >>= 1;
2609 		}
2610 	}
2611 	return (checker);
2612 }
2613 
2614 /*
2615  * Flush the entire ecache using displacement flush by reading through a
2616  * physical address range as large as the ecache.
2617  */
2618 void
2619 cpu_flush_ecache(void)
2620 {
2621 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2622 	    cpunodes[CPU->cpu_id].ecache_linesize);
2623 }
2624 
2625 /*
2626  * read and display the data in the cache line where the
2627  * original ce error occurred.
2628  * This routine is mainly used for debugging new hardware.
2629  */
2630 void
2631 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2632 {
2633 	kpreempt_disable();
2634 	/* disable ECC error traps */
2635 	set_error_enable(EER_ECC_DISABLE);
2636 
2637 	/*
2638 	 * flush the ecache
2639 	 * read the data
2640 	 * check to see if an ECC error occured
2641 	 */
2642 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2643 	    cpunodes[CPU->cpu_id].ecache_linesize);
2644 	set_lsu(get_lsu() | cache_boot_state);
2645 	cpu_read_paddr(ecc, verbose, ce_err);
2646 	(void) check_ecc(ecc);
2647 
2648 	/* enable ECC error traps */
2649 	set_error_enable(EER_ENABLE);
2650 	kpreempt_enable();
2651 }
2652 
2653 /*
2654  * Check the AFSR bits for UE/CE persistence.
2655  * If UE or CE errors are detected, the routine will
2656  * clears all the AFSR sticky bits (except CP for
2657  * spitfire/blackbird) and the UDBs.
2658  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2659  */
2660 static int
2661 check_ecc(struct async_flt *ecc)
2662 {
2663 	uint64_t t_afsr;
2664 	uint64_t t_afar;
2665 	uint64_t udbh;
2666 	uint64_t udbl;
2667 	ushort_t udb;
2668 	int persistent = 0;
2669 
2670 	/*
2671 	 * Capture the AFSR, AFAR and UDBs info
2672 	 */
2673 	get_asyncflt(&t_afsr);
2674 	get_asyncaddr(&t_afar);
2675 	t_afar &= SABRE_AFAR_PA;
2676 	get_udb_errors(&udbh, &udbl);
2677 
2678 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2679 		/*
2680 		 * Clear the errors
2681 		 */
2682 		clr_datapath();
2683 
2684 		if (isus2i || isus2e)
2685 			set_asyncflt(t_afsr);
2686 		else
2687 			set_asyncflt(t_afsr & ~P_AFSR_CP);
2688 
2689 		/*
2690 		 * determine whether to check UDBH or UDBL for persistence
2691 		 */
2692 		if (ecc->flt_synd & UDBL_REG) {
2693 			udb = (ushort_t)udbl;
2694 			t_afar |= 0x8;
2695 		} else {
2696 			udb = (ushort_t)udbh;
2697 		}
2698 
2699 		if (ce_debug || ue_debug) {
2700 			spitf_async_flt spf_flt; /* for logging */
2701 			struct async_flt *aflt =
2702 				(struct async_flt *)&spf_flt;
2703 
2704 			/* Package the info nicely in the spf_flt struct */
2705 			bzero(&spf_flt, sizeof (spitf_async_flt));
2706 			aflt->flt_stat = t_afsr;
2707 			aflt->flt_addr = t_afar;
2708 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2709 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2710 
2711 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2712 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2713 			    " check_ecc: Dumping captured error states ...");
2714 		}
2715 
2716 		/*
2717 		 * if the fault addresses don't match, not persistent
2718 		 */
2719 		if (t_afar != ecc->flt_addr) {
2720 			return (persistent);
2721 		}
2722 
2723 		/*
2724 		 * check for UE persistence
2725 		 * since all DIMMs in the bank are identified for a UE,
2726 		 * there's no reason to check the syndrome
2727 		 */
2728 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2729 			persistent = 1;
2730 		}
2731 
2732 		/*
2733 		 * check for CE persistence
2734 		 */
2735 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2736 			if ((udb & P_DER_E_SYND) ==
2737 			    (ecc->flt_synd & P_DER_E_SYND)) {
2738 				persistent = 1;
2739 			}
2740 		}
2741 	}
2742 	return (persistent);
2743 }
2744 
2745 #ifdef HUMMINGBIRD
2746 #define	HB_FULL_DIV		1
2747 #define	HB_HALF_DIV		2
2748 #define	HB_LOWEST_DIV		8
2749 #define	HB_ECLK_INVALID		0xdeadbad
2750 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2751 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2752 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2753 	HB_ECLK_8 };
2754 
2755 #define	HB_SLOW_DOWN		0
2756 #define	HB_SPEED_UP		1
2757 
2758 #define	SET_ESTAR_MODE(mode)					\
2759 	stdphysio(HB_ESTAR_MODE, (mode));			\
2760 	/*							\
2761 	 * PLL logic requires minimum of 16 clock		\
2762 	 * cycles to lock to the new clock speed.		\
2763 	 * Wait 1 usec to satisfy this requirement.		\
2764 	 */							\
2765 	drv_usecwait(1);
2766 
2767 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2768 {								\
2769 	volatile uint64_t data;					\
2770 	uint64_t count, new_count;				\
2771 	clock_t delay;						\
2772 	data = lddphysio(HB_MEM_CNTRL0);			\
2773 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
2774 	    HB_REFRESH_COUNT_SHIFT;				\
2775 	new_count = (HB_REFRESH_INTERVAL *			\
2776 	    cpunodes[CPU->cpu_id].clock_freq) /			\
2777 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2778 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2779 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2780 	stdphysio(HB_MEM_CNTRL0, data);				\
2781 	data = lddphysio(HB_MEM_CNTRL0);        		\
2782 	/*							\
2783 	 * If we are slowing down the cpu and Memory		\
2784 	 * Self Refresh is not enabled, it is required		\
2785 	 * to wait for old refresh count to count-down and	\
2786 	 * new refresh count to go into effect (let new value	\
2787 	 * counts down once).					\
2788 	 */							\
2789 	if ((direction) == HB_SLOW_DOWN &&			\
2790 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2791 		/*						\
2792 		 * Each count takes 64 cpu clock cycles		\
2793 		 * to decrement.  Wait for current refresh	\
2794 		 * count plus new refresh count at current	\
2795 		 * cpu speed to count down to zero.  Round	\
2796 		 * up the delay time.				\
2797 		 */						\
2798 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2799 		    (count + new_count) * MICROSEC * (cur_div)) /\
2800 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2801 		drv_usecwait(delay);				\
2802 	}							\
2803 }
2804 
2805 #define	SET_SELF_REFRESH(bit)					\
2806 {								\
2807 	volatile uint64_t data;					\
2808 	data = lddphysio(HB_MEM_CNTRL0);			\
2809 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2810 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2811 	stdphysio(HB_MEM_CNTRL0, data);				\
2812 	data = lddphysio(HB_MEM_CNTRL0);			\
2813 }
2814 #endif	/* HUMMINGBIRD */
2815 
2816 /* ARGSUSED */
2817 void
2818 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2819 {
2820 #ifdef HUMMINGBIRD
2821 	uint64_t cur_mask, cur_divisor = 0;
2822 	volatile uint64_t reg;
2823 	int index;
2824 
2825 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2826 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2827 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2828 		    new_divisor);
2829 		return;
2830 	}
2831 
2832 	reg = lddphysio(HB_ESTAR_MODE);
2833 	cur_mask = reg & HB_ECLK_MASK;
2834 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2835 		if (hb_eclk[index] == cur_mask) {
2836 			cur_divisor = index;
2837 			break;
2838 		}
2839 	}
2840 
2841 	if (cur_divisor == 0)
2842 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2843 		    "can't be determined!");
2844 
2845 	/*
2846 	 * If we are already at the requested divisor speed, just
2847 	 * return.
2848 	 */
2849 	if (cur_divisor == new_divisor)
2850 		return;
2851 
2852 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2853 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2854 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2855 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2856 
2857 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2858 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2859 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2860 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2861 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2862 
2863 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2864 		/*
2865 		 * Transition to 1/2 speed first, then to
2866 		 * lower speed.
2867 		 */
2868 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2869 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2870 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2871 
2872 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2873 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2874 
2875 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2876 		/*
2877 		 * Transition to 1/2 speed first, then to
2878 		 * full speed.
2879 		 */
2880 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2881 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2882 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2883 
2884 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2885 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2886 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2887 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2888 
2889 	} else if (cur_divisor < new_divisor) {
2890 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2891 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2892 
2893 	} else if (cur_divisor > new_divisor) {
2894 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2895 		/* LINTED: E_FALSE_LOGICAL_EXPR */
2896 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2897 	}
2898 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2899 #endif
2900 }
2901 
2902 /*
2903  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2904  * we clear all the sticky bits. If a non-null pointer to a async fault
2905  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2906  * info will be returned in the structure.  If a non-null pointer to a
2907  * uint64_t is passed in, this will be updated if the CP bit is set in the
2908  * AFSR.  The afsr will be returned.
2909  */
2910 static uint64_t
2911 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2912 {
2913 	struct async_flt *aflt = (struct async_flt *)spf_flt;
2914 	uint64_t afsr;
2915 	uint64_t udbh, udbl;
2916 
2917 	get_asyncflt(&afsr);
2918 
2919 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2920 		*acc_afsr |= afsr;
2921 
2922 	if (spf_flt != NULL) {
2923 		aflt->flt_stat = afsr;
2924 		get_asyncaddr(&aflt->flt_addr);
2925 		aflt->flt_addr &= SABRE_AFAR_PA;
2926 
2927 		get_udb_errors(&udbh, &udbl);
2928 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2929 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2930 	}
2931 
2932 	set_asyncflt(afsr);		/* clear afsr */
2933 	clr_datapath();			/* clear udbs */
2934 	return (afsr);
2935 }
2936 
2937 /*
2938  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2939  * tag of the first bad line will be returned. We also return the old-afsr
2940  * (before clearing the sticky bits). The linecnt data will be updated to
2941  * indicate the number of bad lines detected.
2942  */
2943 static void
2944 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2945 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2946 {
2947 	ec_data_t t_ecdata[8];
2948 	uint64_t t_etag, oafsr;
2949 	uint64_t pa = AFLT_INV_ADDR;
2950 	uint32_t i, j, ecache_sz;
2951 	uint64_t acc_afsr = 0;
2952 	uint64_t *cpu_afsr = NULL;
2953 
2954 	if (CPU_PRIVATE(CPU) != NULL)
2955 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2956 
2957 	*linecnt = 0;
2958 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2959 
2960 	for (i = 0; i < ecache_sz; i += 64) {
2961 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2962 		    cpu_afsr);
2963 		acc_afsr |= oafsr;
2964 
2965 		/*
2966 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2967 		 * looking for the first occurrence of an EDP error.  The AFSR
2968 		 * info is captured for each 8-byte chunk.  Note that for
2969 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2970 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2971 		 * for the high and low 8-byte words within the 16-byte chunk).
2972 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2973 		 * granularity and only PSYND bits [7:0] are used.
2974 		 */
2975 		for (j = 0; j < 8; j++) {
2976 			ec_data_t *ecdptr = &t_ecdata[j];
2977 
2978 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
2979 				uint64_t errpa;
2980 				ushort_t psynd;
2981 				uint32_t ec_set_size = ecache_sz /
2982 				    ecache_associativity;
2983 
2984 				/*
2985 				 * For Spitfire/Blackbird, we need to look at
2986 				 * the PSYND to make sure that this 8-byte chunk
2987 				 * is the right one.  PSYND bits [15:8] belong
2988 				 * to the upper 8-byte (even) chunk.  Bits
2989 				 * [7:0] belong to the lower 8-byte chunk (odd).
2990 				 */
2991 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
2992 				if (!isus2i && !isus2e) {
2993 					if (j & 0x1)
2994 						psynd = psynd & 0xFF;
2995 					else
2996 						psynd = psynd >> 8;
2997 
2998 					if (!psynd)
2999 						continue; /* wrong chunk */
3000 				}
3001 
3002 				/* Construct the PA */
3003 				errpa = ((t_etag & cpu_ec_tag_mask) <<
3004 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3005 				    ec_set_size);
3006 
3007 				/* clean up the cache line */
3008 				flushecacheline(P2ALIGN(errpa, 64),
3009 					cpunodes[CPU->cpu_id].ecache_size);
3010 
3011 				oafsr = clear_errors(NULL, cpu_afsr);
3012 				acc_afsr |= oafsr;
3013 
3014 				(*linecnt)++;
3015 
3016 				/*
3017 				 * Capture the PA for the first bad line found.
3018 				 * Return the ecache dump and tag info.
3019 				 */
3020 				if (pa == AFLT_INV_ADDR) {
3021 					int k;
3022 
3023 					pa = errpa;
3024 					for (k = 0; k < 8; k++)
3025 						ecache_data[k] = t_ecdata[k];
3026 					*ecache_tag = t_etag;
3027 				}
3028 				break;
3029 			}
3030 		}
3031 	}
3032 	*t_afar = pa;
3033 	*t_afsr = acc_afsr;
3034 }
3035 
3036 static void
3037 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3038 {
3039 	struct async_flt *aflt = (struct async_flt *)spf_flt;
3040 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3041 	char linestr[30];
3042 	char *state_str;
3043 	int i;
3044 
3045 	/*
3046 	 * Check the ecache tag to make sure it
3047 	 * is valid. If invalid, a memory dump was
3048 	 * captured instead of a ecache dump.
3049 	 */
3050 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3051 		uchar_t eparity = (uchar_t)
3052 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3053 
3054 		uchar_t estate = (uchar_t)
3055 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3056 
3057 		if (estate == cpu_ec_state_shr)
3058 			state_str = "Shared";
3059 		else if (estate == cpu_ec_state_exl)
3060 			state_str = "Exclusive";
3061 		else if (estate == cpu_ec_state_own)
3062 			state_str = "Owner";
3063 		else if (estate == cpu_ec_state_mod)
3064 			state_str = "Modified";
3065 		else
3066 			state_str = "Invalid";
3067 
3068 		if (spf_flt->flt_ec_lcnt > 1) {
3069 			(void) snprintf(linestr, sizeof (linestr),
3070 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3071 		} else {
3072 			linestr[0] = '\0';
3073 		}
3074 
3075 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3076 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3077 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3078 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3079 		    (uint32_t)ecache_tag, state_str,
3080 		    (uint32_t)eparity, linestr);
3081 	} else {
3082 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3083 		    " E$tag != PA from AFAR; E$line was victimized"
3084 		    "\n    dumping memory from PA 0x%08x.%08x instead",
3085 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3086 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3087 	}
3088 
3089 	/*
3090 	 * Dump out all 8 8-byte ecache data captured
3091 	 * For each 8-byte data captured, we check the
3092 	 * captured afsr's parity syndrome to find out
3093 	 * which 8-byte chunk is bad. For memory dump, the
3094 	 * AFSR values were initialized to 0.
3095 	 */
3096 	for (i = 0; i < 8; i++) {
3097 		ec_data_t *ecdptr;
3098 		uint_t offset;
3099 		ushort_t psynd;
3100 		ushort_t bad;
3101 		uint64_t edp;
3102 
3103 		offset = i << 3;	/* multiply by 8 */
3104 		ecdptr = &spf_flt->flt_ec_data[i];
3105 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3106 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3107 
3108 		/*
3109 		 * For Sabre/Hummingbird, parity synd is captured only
3110 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3111 		 * For spitfire/blackbird, AFSR.PSYND is captured
3112 		 * in 16-byte granularity. [15:8] represent
3113 		 * the upper 8 byte and [7:0] the lower 8 byte.
3114 		 */
3115 		if (isus2i || isus2e || (i & 0x1))
3116 			bad = (psynd & 0xFF);		/* check bits [7:0] */
3117 		else
3118 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3119 
3120 		if (bad && edp) {
3121 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3122 			    " E$Data (0x%02x): 0x%08x.%08x "
3123 			    "*Bad* PSYND=0x%04x", offset,
3124 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3125 			    (uint32_t)ecdptr->ec_d8, psynd);
3126 		} else {
3127 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3128 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3129 			    (uint32_t)(ecdptr->ec_d8 >> 32),
3130 			    (uint32_t)ecdptr->ec_d8);
3131 		}
3132 	}
3133 }
3134 
3135 /*
3136  * Common logging function for all cpu async errors.  This function allows the
3137  * caller to generate a single cmn_err() call that logs the appropriate items
3138  * from the fault structure, and implements our rules for AFT logging levels.
3139  *
3140  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3141  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3142  *	spflt: pointer to spitfire async fault structure
3143  *	logflags: bitflags indicating what to output
3144  *	endstr: a end string to appear at the end of this log
3145  *	fmt: a format string to appear at the beginning of the log
3146  *
3147  * The logflags allows the construction of predetermined output from the spflt
3148  * structure.  The individual data items always appear in a consistent order.
3149  * Note that either or both of the spflt structure pointer and logflags may be
3150  * NULL or zero respectively, indicating that the predetermined output
3151  * substrings are not requested in this log.  The output looks like this:
3152  *
3153  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3154  *	<CPU_SPACE><CPU_ERRID>
3155  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3156  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3157  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3158  *	newline+4spaces<CPU_SYND>
3159  *	newline+4spaces<endstr>
3160  *
3161  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3162  * it is assumed that <endstr> will be the unum string in this case.  The size
3163  * of our intermediate formatting buf[] is based on the worst case of all flags
3164  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3165  * formatting so we don't need additional stack space to format them here.
3166  */
3167 /*PRINTFLIKE6*/
3168 static void
3169 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3170 	const char *endstr, const char *fmt, ...)
3171 {
3172 	struct async_flt *aflt = (struct async_flt *)spflt;
3173 	char buf[400], *p, *q; /* see comments about buf[] size above */
3174 	va_list ap;
3175 	int console_log_flag;
3176 
3177 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3178 				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
3179 	    (aflt->flt_panic)) {
3180 		console_log_flag = (tagnum < 2) || aft_verbose;
3181 	} else {
3182 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3183 		    (aflt->flt_stat & P_AFSR_CE)) ?
3184 		    ce_verbose_memory : ce_verbose_other;
3185 
3186 		if (!verbose)
3187 			return;
3188 
3189 		console_log_flag = (verbose > 1);
3190 	}
3191 
3192 	if (console_log_flag)
3193 		(void) sprintf(buf, "[AFT%d]", tagnum);
3194 	else
3195 		(void) sprintf(buf, "![AFT%d]", tagnum);
3196 
3197 	p = buf + strlen(buf);	/* current buffer position */
3198 	q = buf + sizeof (buf);	/* pointer past end of buffer */
3199 
3200 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3201 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3202 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3203 		p += strlen(p);
3204 	}
3205 
3206 	/*
3207 	 * Copy the caller's format string verbatim into buf[].  It will be
3208 	 * formatted by the call to vcmn_err() at the end of this function.
3209 	 */
3210 	if (fmt != NULL && p < q) {
3211 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3212 		buf[sizeof (buf) - 1] = '\0';
3213 		p += strlen(p);
3214 	}
3215 
3216 	if (spflt != NULL) {
3217 		if (logflags & CPU_FLTCPU) {
3218 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3219 			    aflt->flt_inst);
3220 			p += strlen(p);
3221 		}
3222 
3223 		if (logflags & CPU_SPACE) {
3224 			if (aflt->flt_status & ECC_D_TRAP)
3225 				(void) snprintf(p, (size_t)(q - p),
3226 				    " Data access");
3227 			else if (aflt->flt_status & ECC_I_TRAP)
3228 				(void) snprintf(p, (size_t)(q - p),
3229 				    " Instruction access");
3230 			p += strlen(p);
3231 		}
3232 
3233 		if (logflags & CPU_TL) {
3234 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3235 			    aflt->flt_tl ? ">0" : "=0");
3236 			p += strlen(p);
3237 		}
3238 
3239 		if (logflags & CPU_ERRID) {
3240 			(void) snprintf(p, (size_t)(q - p),
3241 			    ", errID 0x%08x.%08x",
3242 			    (uint32_t)(aflt->flt_id >> 32),
3243 			    (uint32_t)aflt->flt_id);
3244 			p += strlen(p);
3245 		}
3246 
3247 		if (logflags & CPU_AFSR) {
3248 			(void) snprintf(p, (size_t)(q - p),
3249 			    "\n    AFSR 0x%08b.%08b",
3250 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3251 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3252 			p += strlen(p);
3253 		}
3254 
3255 		if (logflags & CPU_AFAR) {
3256 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3257 			    (uint32_t)(aflt->flt_addr >> 32),
3258 			    (uint32_t)aflt->flt_addr);
3259 			p += strlen(p);
3260 		}
3261 
3262 		if (logflags & CPU_AF_PSYND) {
3263 			ushort_t psynd = (ushort_t)
3264 			    (aflt->flt_stat & P_AFSR_P_SYND);
3265 
3266 			(void) snprintf(p, (size_t)(q - p),
3267 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3268 			    psynd, ecc_psynd_score(psynd));
3269 			p += strlen(p);
3270 		}
3271 
3272 		if (logflags & CPU_AF_ETS) {
3273 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3274 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3275 			p += strlen(p);
3276 		}
3277 
3278 		if (logflags & CPU_FAULTPC) {
3279 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3280 			    (void *)aflt->flt_pc);
3281 			p += strlen(p);
3282 		}
3283 
3284 		if (logflags & CPU_UDBH) {
3285 			(void) snprintf(p, (size_t)(q - p),
3286 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
3287 			    spflt->flt_sdbh, UDB_FMTSTR,
3288 			    spflt->flt_sdbh & 0xFF);
3289 			p += strlen(p);
3290 		}
3291 
3292 		if (logflags & CPU_UDBL) {
3293 			(void) snprintf(p, (size_t)(q - p),
3294 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
3295 			    spflt->flt_sdbl, UDB_FMTSTR,
3296 			    spflt->flt_sdbl & 0xFF);
3297 			p += strlen(p);
3298 		}
3299 
3300 		if (logflags & CPU_SYND) {
3301 			ushort_t synd = SYND(aflt->flt_synd);
3302 
3303 			(void) snprintf(p, (size_t)(q - p),
3304 			    "\n    %s Syndrome 0x%x Memory Module ",
3305 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3306 			p += strlen(p);
3307 		}
3308 	}
3309 
3310 	if (endstr != NULL) {
3311 		if (!(logflags & CPU_SYND))
3312 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3313 		else
3314 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3315 		p += strlen(p);
3316 	}
3317 
3318 	if (ce_code == CE_CONT && (p < q - 1))
3319 		(void) strcpy(p, "\n"); /* add final \n if needed */
3320 
3321 	va_start(ap, fmt);
3322 	vcmn_err(ce_code, buf, ap);
3323 	va_end(ap);
3324 }
3325 
3326 /*
3327  * Ecache Scrubbing
3328  *
3329  * The basic idea is to prevent lines from sitting in the ecache long enough
3330  * to build up soft errors which can lead to ecache parity errors.
3331  *
3332  * The following rules are observed when flushing the ecache:
3333  *
3334  * 1. When the system is busy, flush bad clean lines
3335  * 2. When the system is idle, flush all clean lines
3336  * 3. When the system is idle, flush good dirty lines
3337  * 4. Never flush bad dirty lines.
3338  *
3339  *	modify	parity	busy   idle
3340  *	----------------------------
3341  *	clean	good		X
3342  * 	clean	bad	X	X
3343  * 	dirty	good		X
3344  *	dirty	bad
3345  *
3346  * Bad or good refers to whether a line has an E$ parity error or not.
3347  * Clean or dirty refers to the state of the modified bit.  We currently
3348  * default the scan rate to 100 (scan 10% of the cache per second).
3349  *
3350  * The following are E$ states and actions.
3351  *
3352  * We encode our state as a 3-bit number, consisting of:
3353  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3354  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3355  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3356  *
3357  * We associate a flushing and a logging action with each state.
3358  *
3359  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3360  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3361  * E$ only, in addition to value being set by ec_flush.
3362  */
3363 
3364 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3365 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3366 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3367 
3368 struct {
3369 	char	ec_flush;		/* whether to flush or not */
3370 	char	ec_log;			/* ecache logging */
3371 	char	ec_log_type;		/* log type info */
3372 } ec_action[] = {	/* states of the E$ line in M P B */
3373 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3374 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3375 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3376 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3377 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3378 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3379 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3380 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3381 };
3382 
3383 /*
3384  * Offsets into the ec_action[] that determines clean_good_busy and
3385  * dirty_good_busy lines.
3386  */
3387 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3388 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3389 
3390 /*
3391  * We are flushing lines which are Clean_Good_Busy and also the lines
3392  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3393  */
3394 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3395 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3396 
3397 #define	ECACHE_STATE_MODIFIED	0x4
3398 #define	ECACHE_STATE_PARITY	0x2
3399 #define	ECACHE_STATE_BUSY	0x1
3400 
3401 /*
3402  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3403  */
3404 int ecache_calls_a_sec_mirrored = 1;
3405 int ecache_lines_per_call_mirrored = 1;
3406 
3407 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3408 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3409 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3410 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3411 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3412 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3413 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3414 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3415 
3416 volatile int ec_timeout_calls = 1;	/* timeout calls */
3417 
3418 /*
3419  * Interrupt number and pil for ecache scrubber cross-trap calls.
3420  */
3421 static uint64_t ecache_scrub_inum;
3422 uint_t ecache_scrub_pil = PIL_9;
3423 
3424 /*
3425  * Kstats for the E$ scrubber.
3426  */
3427 typedef struct ecache_kstat {
3428 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3429 	kstat_named_t clean_good_busy;		/* # of lines skipped */
3430 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3431 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3432 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3433 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3434 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3435 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3436 	kstat_named_t invalid_lines;		/* # of invalid lines */
3437 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3438 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3439 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3440 } ecache_kstat_t;
3441 
3442 static ecache_kstat_t ec_kstat_template = {
3443 	{ "clean_good_idle", KSTAT_DATA_ULONG },
3444 	{ "clean_good_busy", KSTAT_DATA_ULONG },
3445 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3446 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3447 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3448 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3449 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3450 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3451 	{ "invalid_lines", KSTAT_DATA_ULONG },
3452 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3453 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3454 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3455 };
3456 
3457 struct kmem_cache *sf_private_cache;
3458 
3459 /*
3460  * Called periodically on each CPU to scan the ecache once a sec.
3461  * adjusting the ecache line index appropriately
3462  */
3463 void
3464 scrub_ecache_line()
3465 {
3466 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3467 	int cpuid = CPU->cpu_id;
3468 	uint32_t index = ssmp->ecache_flush_index;
3469 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3470 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3471 	int nlines = ssmp->ecache_nlines;
3472 	uint32_t ec_set_size = ec_size / ecache_associativity;
3473 	int ec_mirror = ssmp->ecache_mirror;
3474 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3475 
3476 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3477 	int mpb;		/* encode Modified, Parity, Busy for action */
3478 	uchar_t state;
3479 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3480 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3481 	ec_data_t ec_data[8];
3482 	kstat_named_t *ec_knp;
3483 
3484 	switch (ec_mirror) {
3485 		default:
3486 		case ECACHE_CPU_NON_MIRROR:
3487 			/*
3488 			 * The E$ scan rate is expressed in units of tenths of
3489 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3490 			 * whole cache is scanned every second.
3491 			 */
3492 			scan_lines = (nlines * ecache_scan_rate) /
3493 					(1000 * ecache_calls_a_sec);
3494 			if (!(ssmp->ecache_busy)) {
3495 				if (ecache_idle_factor > 0) {
3496 					scan_lines *= ecache_idle_factor;
3497 				}
3498 			} else {
3499 				flush_clean_busy = (scan_lines *
3500 					ecache_flush_clean_good_busy) / 100;
3501 				flush_dirty_busy = (scan_lines *
3502 					ecache_flush_dirty_good_busy) / 100;
3503 			}
3504 
3505 			ec_timeout_calls = (ecache_calls_a_sec ?
3506 						ecache_calls_a_sec : 1);
3507 			break;
3508 
3509 		case ECACHE_CPU_MIRROR:
3510 			scan_lines = ecache_lines_per_call_mirrored;
3511 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3512 					ecache_calls_a_sec_mirrored : 1);
3513 			break;
3514 	}
3515 
3516 	/*
3517 	 * The ecache scrubber algorithm operates by reading and
3518 	 * decoding the E$ tag to determine whether the corresponding E$ line
3519 	 * can be scrubbed. There is a implicit assumption in the scrubber
3520 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3521 	 * flawed since the E$ tag may also be corrupted and have parity errors
3522 	 * The scrubber logic is enhanced to check the validity of the E$ tag
3523 	 * before scrubbing. When a parity error is detected in the E$ tag,
3524 	 * it is possible to recover and scrub the tag under certain conditions
3525 	 * so that a ETP error condition can be avoided.
3526 	 */
3527 
3528 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3529 		/*
3530 		 * We get the old-AFSR before clearing the AFSR sticky bits
3531 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3532 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3533 		 */
3534 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3535 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3536 				cpu_ec_state_shift);
3537 
3538 		/*
3539 		 * ETP is set try to scrub the ecache tag.
3540 		 */
3541 		if (nafsr & P_AFSR_ETP) {
3542 			ecache_scrub_tag_err(nafsr, state, index);
3543 		} else if (state & cpu_ec_state_valid) {
3544 			/*
3545 			 * ETP is not set, E$ tag is valid.
3546 			 * Proceed with the E$ scrubbing.
3547 			 */
3548 			if (state & cpu_ec_state_dirty)
3549 				mpb |= ECACHE_STATE_MODIFIED;
3550 
3551 			tafsr = check_ecache_line(index, acc_afsr);
3552 
3553 			if (tafsr & P_AFSR_EDP) {
3554 				mpb |= ECACHE_STATE_PARITY;
3555 
3556 				if (ecache_scrub_verbose ||
3557 							ecache_scrub_panic) {
3558 					get_ecache_dtag(P2ALIGN(index, 64),
3559 						(uint64_t *)&ec_data[0],
3560 						&ec_tag, &oafsr, acc_afsr);
3561 				}
3562 			}
3563 
3564 			if (ssmp->ecache_busy)
3565 				mpb |= ECACHE_STATE_BUSY;
3566 
3567 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3568 			ec_knp->value.ul++;
3569 
3570 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3571 				cpu_ec_tag_shift) | (index % ec_set_size);
3572 
3573 			/*
3574 			 * We flush the E$ lines depending on the ec_flush,
3575 			 * we additionally flush clean_good_busy and
3576 			 * dirty_good_busy lines for mirrored E$.
3577 			 */
3578 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3579 				flushecacheline(paddr, ec_size);
3580 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3581 				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3582 					flushecacheline(paddr, ec_size);
3583 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3584 				softcall(ecache_page_retire, (void *)paddr);
3585 			}
3586 
3587 			/*
3588 			 * Conditionally flush both the clean_good and
3589 			 * dirty_good lines when busy.
3590 			 */
3591 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3592 				flush_clean_busy--;
3593 				flushecacheline(paddr, ec_size);
3594 				ec_ksp->clean_good_busy_flush.value.ul++;
3595 			} else if (DGB(mpb, ec_mirror) &&
3596 						(flush_dirty_busy > 0)) {
3597 				flush_dirty_busy--;
3598 				flushecacheline(paddr, ec_size);
3599 				ec_ksp->dirty_good_busy_flush.value.ul++;
3600 			}
3601 
3602 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3603 						ecache_scrub_panic)) {
3604 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3605 						tafsr);
3606 			}
3607 
3608 		} else {
3609 			ec_ksp->invalid_lines.value.ul++;
3610 		}
3611 
3612 		if ((index += ec_linesize) >= ec_size)
3613 			index = 0;
3614 
3615 	}
3616 
3617 	/*
3618 	 * set the ecache scrub index for the next time around
3619 	 */
3620 	ssmp->ecache_flush_index = index;
3621 
3622 	if (*acc_afsr & P_AFSR_CP) {
3623 		uint64_t ret_afsr;
3624 
3625 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3626 		if ((ret_afsr & P_AFSR_CP) == 0)
3627 			*acc_afsr = 0;
3628 	}
3629 }
3630 
3631 /*
3632  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3633  * we decrement the outstanding request count to zero.
3634  */
3635 
3636 /*ARGSUSED*/
3637 uint_t
3638 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3639 {
3640 	int i;
3641 	int outstanding;
3642 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3643 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3644 
3645 	do {
3646 		outstanding = *countp;
3647 		ASSERT(outstanding > 0);
3648 		for (i = 0; i < outstanding; i++)
3649 			scrub_ecache_line();
3650 	} while (atomic_add_32_nv(countp, -outstanding));
3651 
3652 	return (DDI_INTR_CLAIMED);
3653 }
3654 
3655 /*
3656  * force each cpu to perform an ecache scrub, called from a timeout
3657  */
3658 extern xcfunc_t ecache_scrubreq_tl1;
3659 
3660 void
3661 do_scrub_ecache_line(void)
3662 {
3663 	long delta;
3664 
3665 	if (ecache_calls_a_sec > hz)
3666 		ecache_calls_a_sec = hz;
3667 	else if (ecache_calls_a_sec <= 0)
3668 	    ecache_calls_a_sec = 1;
3669 
3670 	if (ecache_calls_a_sec_mirrored > hz)
3671 		ecache_calls_a_sec_mirrored = hz;
3672 	else if (ecache_calls_a_sec_mirrored <= 0)
3673 	    ecache_calls_a_sec_mirrored = 1;
3674 
3675 	if (ecache_scrub_enable) {
3676 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3677 		delta = hz / ec_timeout_calls;
3678 	} else {
3679 		delta = hz;
3680 	}
3681 
3682 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3683 		delta);
3684 }
3685 
3686 /*
3687  * initialization for ecache scrubbing
3688  * This routine is called AFTER all cpus have had cpu_init_private called
3689  * to initialize their private data areas.
3690  */
3691 void
3692 cpu_init_cache_scrub(void)
3693 {
3694 	if (ecache_calls_a_sec > hz) {
3695 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3696 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3697 		ecache_calls_a_sec = hz;
3698 	}
3699 
3700 	/*
3701 	 * Register softint for ecache scrubbing.
3702 	 */
3703 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3704 	    scrub_ecache_line_intr, NULL, SOFTINT_MT);
3705 
3706 	/*
3707 	 * kick off the scrubbing using realtime timeout
3708 	 */
3709 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3710 	    hz / ecache_calls_a_sec);
3711 }
3712 
3713 /*
3714  * Unset the busy flag for this cpu.
3715  */
3716 void
3717 cpu_idle_ecache_scrub(struct cpu *cp)
3718 {
3719 	if (CPU_PRIVATE(cp) != NULL) {
3720 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3721 							sfpr_scrub_misc);
3722 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3723 	}
3724 }
3725 
3726 /*
3727  * Set the busy flag for this cpu.
3728  */
3729 void
3730 cpu_busy_ecache_scrub(struct cpu *cp)
3731 {
3732 	if (CPU_PRIVATE(cp) != NULL) {
3733 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3734 							sfpr_scrub_misc);
3735 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3736 	}
3737 }
3738 
3739 /*
3740  * initialize the ecache scrubber data structures
3741  * The global entry point cpu_init_private replaces this entry point.
3742  *
3743  */
3744 static void
3745 cpu_init_ecache_scrub_dr(struct cpu *cp)
3746 {
3747 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3748 	int cpuid = cp->cpu_id;
3749 
3750 	/*
3751 	 * intialize bookkeeping for cache scrubbing
3752 	 */
3753 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3754 
3755 	ssmp->ecache_flush_index = 0;
3756 
3757 	ssmp->ecache_nlines =
3758 		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3759 
3760 	/*
3761 	 * Determine whether we are running on mirrored SRAM
3762 	 */
3763 
3764 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3765 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3766 	else
3767 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3768 
3769 	cpu_busy_ecache_scrub(cp);
3770 
3771 	/*
3772 	 * initialize the kstats
3773 	 */
3774 	ecache_kstat_init(cp);
3775 }
3776 
3777 /*
3778  * uninitialize the ecache scrubber data structures
3779  * The global entry point cpu_uninit_private replaces this entry point.
3780  */
3781 static void
3782 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3783 {
3784 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3785 
3786 	if (ssmp->ecache_ksp != NULL) {
3787 		kstat_delete(ssmp->ecache_ksp);
3788 		ssmp->ecache_ksp = NULL;
3789 	}
3790 
3791 	/*
3792 	 * un-initialize bookkeeping for cache scrubbing
3793 	 */
3794 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3795 
3796 	cpu_idle_ecache_scrub(cp);
3797 }
3798 
3799 struct kmem_cache *sf_private_cache;
3800 
3801 /*
3802  * Cpu private initialization.  This includes allocating the cpu_private
3803  * data structure, initializing it, and initializing the scrubber for this
3804  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3805  * calls cpu_init_ecache_scrub_dr to init the scrubber.
3806  * We use kmem_cache_create for the spitfire private data structure because it
3807  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3808  */
3809 void
3810 cpu_init_private(struct cpu *cp)
3811 {
3812 	spitfire_private_t *sfprp;
3813 
3814 	ASSERT(CPU_PRIVATE(cp) == NULL);
3815 
3816 	/*
3817 	 * If the sf_private_cache has not been created, create it.
3818 	 */
3819 	if (sf_private_cache == NULL) {
3820 		sf_private_cache = kmem_cache_create("sf_private_cache",
3821 			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3822 			NULL, NULL, NULL, NULL, 0);
3823 		ASSERT(sf_private_cache);
3824 	}
3825 
3826 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3827 
3828 	bzero(sfprp, sizeof (spitfire_private_t));
3829 
3830 	cpu_init_ecache_scrub_dr(cp);
3831 }
3832 
3833 /*
3834  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3835  * deallocate the scrubber data structures and cpu_private data structure.
3836  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3837  * the scrubber for the specified cpu.
3838  */
3839 void
3840 cpu_uninit_private(struct cpu *cp)
3841 {
3842 	ASSERT(CPU_PRIVATE(cp));
3843 
3844 	cpu_uninit_ecache_scrub_dr(cp);
3845 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3846 	CPU_PRIVATE(cp) = NULL;
3847 }
3848 
3849 /*
3850  * initialize the ecache kstats for each cpu
3851  */
3852 static void
3853 ecache_kstat_init(struct cpu *cp)
3854 {
3855 	struct kstat *ksp;
3856 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3857 
3858 	ASSERT(ssmp != NULL);
3859 
3860 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3861 	    KSTAT_TYPE_NAMED,
3862 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3863 	    KSTAT_FLAG_WRITABLE)) == NULL) {
3864 		ssmp->ecache_ksp = NULL;
3865 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3866 		return;
3867 	}
3868 
3869 	ssmp->ecache_ksp = ksp;
3870 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3871 	kstat_install(ksp);
3872 }
3873 
3874 /*
3875  * log the bad ecache information
3876  */
3877 static void
3878 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3879 		uint64_t afsr)
3880 {
3881 	spitf_async_flt spf_flt;
3882 	struct async_flt *aflt;
3883 	int i;
3884 	char *class;
3885 
3886 	bzero(&spf_flt, sizeof (spitf_async_flt));
3887 	aflt = &spf_flt.cmn_asyncflt;
3888 
3889 	for (i = 0; i < 8; i++) {
3890 		spf_flt.flt_ec_data[i] = ec_data[i];
3891 	}
3892 
3893 	spf_flt.flt_ec_tag = ec_tag;
3894 
3895 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3896 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3897 	} else spf_flt.flt_type = (ushort_t)mpb;
3898 
3899 	aflt->flt_inst = CPU->cpu_id;
3900 	aflt->flt_class = CPU_FAULT;
3901 	aflt->flt_id = gethrtime_waitfree();
3902 	aflt->flt_addr = paddr;
3903 	aflt->flt_stat = afsr;
3904 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3905 
3906 	switch (mpb) {
3907 	case CPU_ECACHE_TAG_ERR:
3908 	case CPU_ECACHE_ADDR_PAR_ERR:
3909 	case CPU_ECACHE_ETP_ETS_ERR:
3910 	case CPU_ECACHE_STATE_ERR:
3911 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3912 		break;
3913 	default:
3914 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3915 		break;
3916 	}
3917 
3918 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3919 	    ue_queue, aflt->flt_panic);
3920 
3921 	if (aflt->flt_panic)
3922 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3923 					"line detected");
3924 }
3925 
3926 /*
3927  * Process an ecache error that occured during the E$ scrubbing.
3928  * We do the ecache scan to find the bad line, flush the bad line
3929  * and start the memscrubber to find any UE (in memory or in another cache)
3930  */
3931 static uint64_t
3932 ecache_scrub_misc_err(int type, uint64_t afsr)
3933 {
3934 	spitf_async_flt spf_flt;
3935 	struct async_flt *aflt;
3936 	uint64_t oafsr;
3937 
3938 	bzero(&spf_flt, sizeof (spitf_async_flt));
3939 	aflt = &spf_flt.cmn_asyncflt;
3940 
3941 	/*
3942 	 * Scan each line in the cache to look for the one
3943 	 * with bad parity
3944 	 */
3945 	aflt->flt_addr = AFLT_INV_ADDR;
3946 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3947 		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3948 
3949 	if (oafsr & P_AFSR_CP) {
3950 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3951 		*cp_afsr |= oafsr;
3952 	}
3953 
3954 	/*
3955 	 * If we found a bad PA, update the state to indicate if it is
3956 	 * memory or I/O space.
3957 	 */
3958 	if (aflt->flt_addr != AFLT_INV_ADDR) {
3959 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3960 			MMU_PAGESHIFT)) ? 1 : 0;
3961 	}
3962 
3963 	spf_flt.flt_type = (ushort_t)type;
3964 
3965 	aflt->flt_inst = CPU->cpu_id;
3966 	aflt->flt_class = CPU_FAULT;
3967 	aflt->flt_id = gethrtime_waitfree();
3968 	aflt->flt_status = afsr;
3969 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3970 
3971 	/*
3972 	 * We have the bad line, flush that line and start
3973 	 * the memscrubber.
3974 	 */
3975 	if (spf_flt.flt_ec_lcnt > 0) {
3976 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3977 			cpunodes[CPU->cpu_id].ecache_size);
3978 		read_all_memscrub = 1;
3979 		memscrub_run();
3980 	}
3981 
3982 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3983 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3984 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3985 
3986 	return (oafsr);
3987 }
3988 
3989 static void
3990 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
3991 {
3992 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
3993 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3994 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3995 	uint64_t ec_tag, paddr, oafsr;
3996 	ec_data_t ec_data[8];
3997 	int cpuid = CPU->cpu_id;
3998 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
3999 						ecache_associativity;
4000 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4001 
4002 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4003 			&oafsr, cpu_afsr);
4004 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4005 						(index % ec_set_size);
4006 
4007 	/*
4008 	 * E$ tag state has good parity
4009 	 */
4010 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4011 		if (afsr_ets & cpu_ec_parity) {
4012 			/*
4013 			 * E$ tag state bits indicate the line is clean,
4014 			 * invalidate the E$ tag and continue.
4015 			 */
4016 			if (!(state & cpu_ec_state_dirty)) {
4017 				/*
4018 				 * Zero the tag and mark the state invalid
4019 				 * with good parity for the tag.
4020 				 */
4021 				if (isus2i || isus2e)
4022 					write_hb_ec_tag_parity(index);
4023 				else
4024 					write_ec_tag_parity(index);
4025 
4026 				/* Sync with the dual tag */
4027 				flushecacheline(0,
4028 					cpunodes[CPU->cpu_id].ecache_size);
4029 				ec_ksp->tags_cleared.value.ul++;
4030 				ecache_scrub_log(ec_data, ec_tag, paddr,
4031 					CPU_ECACHE_TAG_ERR, afsr);
4032 				return;
4033 			} else {
4034 				ecache_scrub_log(ec_data, ec_tag, paddr,
4035 					CPU_ECACHE_ADDR_PAR_ERR, afsr);
4036 				cmn_err(CE_PANIC, " E$ tag address has bad"
4037 							" parity");
4038 			}
4039 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4040 			/*
4041 			 * ETS is zero but ETP is set
4042 			 */
4043 			ecache_scrub_log(ec_data, ec_tag, paddr,
4044 				CPU_ECACHE_ETP_ETS_ERR, afsr);
4045 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4046 				" AFSR.ETS is zero");
4047 		}
4048 	} else {
4049 		/*
4050 		 * E$ tag state bit has a bad parity
4051 		 */
4052 		ecache_scrub_log(ec_data, ec_tag, paddr,
4053 				CPU_ECACHE_STATE_ERR, afsr);
4054 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4055 	}
4056 }
4057 
4058 static void
4059 ecache_page_retire(void *arg)
4060 {
4061 	uint64_t paddr = (uint64_t)arg;
4062 	(void) page_retire(paddr, PR_UE);
4063 }
4064 
4065 void
4066 sticksync_slave(void)
4067 {}
4068 
4069 void
4070 sticksync_master(void)
4071 {}
4072 
4073 /*ARGSUSED*/
4074 void
4075 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4076 {}
4077 
4078 void
4079 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4080 {
4081 	int status;
4082 	ddi_fm_error_t de;
4083 
4084 	bzero(&de, sizeof (ddi_fm_error_t));
4085 
4086 	de.fme_version = DDI_FME_VERSION;
4087 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4088 	    FM_ENA_FMT1);
4089 	de.fme_flag = expected;
4090 	de.fme_bus_specific = (void *)aflt->flt_addr;
4091 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4092 
4093 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4094 		aflt->flt_panic = 1;
4095 }
4096 
4097 /*ARGSUSED*/
4098 void
4099 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4100     errorq_t *eqp, uint_t flag)
4101 {
4102 	struct async_flt *aflt = (struct async_flt *)payload;
4103 
4104 	aflt->flt_erpt_class = error_class;
4105 	errorq_dispatch(eqp, payload, payload_sz, flag);
4106 }
4107 
4108 #define	MAX_SIMM	8
4109 
4110 struct ce_info {
4111 	char    name[UNUM_NAMLEN];
4112 	uint64_t intermittent_total;
4113 	uint64_t persistent_total;
4114 	uint64_t sticky_total;
4115 	unsigned short leaky_bucket_cnt;
4116 };
4117 
4118 /*
4119  * Separately-defined structure for use in reporting the ce_info
4120  * to SunVTS without exposing the internal layout and implementation
4121  * of struct ce_info.
4122  */
4123 static struct ecc_error_info ecc_error_info_data = {
4124 	{ "version", KSTAT_DATA_UINT32 },
4125 	{ "maxcount", KSTAT_DATA_UINT32 },
4126 	{ "count", KSTAT_DATA_UINT32 }
4127 };
4128 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4129     sizeof (struct kstat_named);
4130 
4131 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4132 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4133 #endif
4134 
4135 struct ce_info  *mem_ce_simm = NULL;
4136 size_t mem_ce_simm_size = 0;
4137 
4138 /*
4139  * Default values for the number of CE's allowed per interval.
4140  * Interval is defined in minutes
4141  * SOFTERR_MIN_TIMEOUT is defined in microseconds
4142  */
4143 #define	SOFTERR_LIMIT_DEFAULT		2
4144 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4145 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4146 #define	TIMEOUT_NONE			((timeout_id_t)0)
4147 #define	TIMEOUT_SET			((timeout_id_t)1)
4148 
4149 /*
4150  * timeout identifer for leaky_bucket
4151  */
4152 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4153 
4154 /*
4155  * Tunables for maximum number of allowed CE's in a given time
4156  */
4157 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4158 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4159 
4160 void
4161 cpu_mp_init(void)
4162 {
4163 	size_t size = cpu_aflt_size();
4164 	size_t i;
4165 	kstat_t *ksp;
4166 
4167 	/*
4168 	 * Initialize the CE error handling buffers.
4169 	 */
4170 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4171 	size = sizeof (struct ce_info) * mem_ce_simm_size;
4172 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4173 
4174 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4175 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4176 	if (ksp != NULL) {
4177 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4178 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4179 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4180 		ecc_error_info_data.count.value.ui32 = 0;
4181 		kstat_install(ksp);
4182 	}
4183 
4184 	for (i = 0; i < mem_ce_simm_size; i++) {
4185 		struct kstat_ecc_mm_info *kceip;
4186 
4187 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4188 		    KM_SLEEP);
4189 		ksp = kstat_create("mm", i, "ecc-info", "misc",
4190 		    KSTAT_TYPE_NAMED,
4191 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4192 		    KSTAT_FLAG_VIRTUAL);
4193 		if (ksp != NULL) {
4194 			/*
4195 			 * Re-declare ks_data_size to include room for the
4196 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4197 			 * set.
4198 			 */
4199 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4200 			    KSTAT_CE_UNUM_NAMLEN;
4201 			ksp->ks_data = kceip;
4202 			kstat_named_init(&kceip->name,
4203 			    "name", KSTAT_DATA_STRING);
4204 			kstat_named_init(&kceip->intermittent_total,
4205 			    "intermittent_total", KSTAT_DATA_UINT64);
4206 			kstat_named_init(&kceip->persistent_total,
4207 			    "persistent_total", KSTAT_DATA_UINT64);
4208 			kstat_named_init(&kceip->sticky_total,
4209 			    "sticky_total", KSTAT_DATA_UINT64);
4210 			/*
4211 			 * Use the default snapshot routine as it knows how to
4212 			 * deal with named kstats with long strings.
4213 			 */
4214 			ksp->ks_update = ecc_kstat_update;
4215 			kstat_install(ksp);
4216 		} else {
4217 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4218 		}
4219 	}
4220 }
4221 
4222 /*ARGSUSED*/
4223 static void
4224 leaky_bucket_timeout(void *arg)
4225 {
4226 	int i;
4227 	struct ce_info *psimm = mem_ce_simm;
4228 
4229 	for (i = 0; i < mem_ce_simm_size; i++) {
4230 		if (psimm[i].leaky_bucket_cnt > 0)
4231 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
4232 	}
4233 	add_leaky_bucket_timeout();
4234 }
4235 
4236 static void
4237 add_leaky_bucket_timeout(void)
4238 {
4239 	long timeout_in_microsecs;
4240 
4241 	/*
4242 	 * create timeout for next leak.
4243 	 *
4244 	 * The timeout interval is calculated as follows
4245 	 *
4246 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4247 	 *
4248 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4249 	 * in a minute), then multiply this by MICROSEC to get the interval
4250 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4251 	 * the timeout interval is accurate to within a few microseconds.
4252 	 */
4253 
4254 	if (ecc_softerr_limit <= 0)
4255 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4256 	if (ecc_softerr_interval <= 0)
4257 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4258 
4259 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4260 	    ecc_softerr_limit;
4261 
4262 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4263 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4264 
4265 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4266 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4267 }
4268 
4269 /*
4270  * Legacy Correctable ECC Error Hash
4271  *
4272  * All of the code below this comment is used to implement a legacy array
4273  * which counted intermittent, persistent, and sticky CE errors by unum,
4274  * and then was later extended to publish the data as a kstat for SunVTS.
4275  * All of this code is replaced by FMA, and remains here until such time
4276  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4277  *
4278  * Errors are saved in three buckets per-unum:
4279  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4280  *     This could represent a problem, and is immediately printed out.
4281  * (2) persistent - was successfully scrubbed
4282  *     These errors use the leaky bucket algorithm to determine
4283  *     if there is a serious problem.
4284  * (3) intermittent - may have originated from the cpu or upa/safari bus,
4285  *     and does not necessarily indicate any problem with the dimm itself,
4286  *     is critical information for debugging new hardware.
4287  *     Because we do not know if it came from the dimm, it would be
4288  *     inappropriate to include these in the leaky bucket counts.
4289  *
4290  * If the E$ line was modified before the scrub operation began, then the
4291  * displacement flush at the beginning of scrubphys() will cause the modified
4292  * line to be written out, which will clean up the CE.  Then, any subsequent
4293  * read will not cause an error, which will cause persistent errors to be
4294  * identified as intermittent.
4295  *
4296  * If a DIMM is going bad, it will produce true persistents as well as
4297  * false intermittents, so these intermittents can be safely ignored.
4298  *
4299  * If the error count is excessive for a DIMM, this function will return
4300  * PR_MCE, and the CPU module may then decide to remove that page from use.
4301  */
4302 static int
4303 ce_count_unum(int status, int len, char *unum)
4304 {
4305 	int i;
4306 	struct ce_info *psimm = mem_ce_simm;
4307 	int page_status = PR_OK;
4308 
4309 	ASSERT(psimm != NULL);
4310 
4311 	if (len <= 0 ||
4312 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4313 		return (page_status);
4314 
4315 	/*
4316 	 * Initialize the leaky_bucket timeout
4317 	 */
4318 	if (casptr(&leaky_bucket_timeout_id,
4319 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4320 		add_leaky_bucket_timeout();
4321 
4322 	for (i = 0; i < mem_ce_simm_size; i++) {
4323 		if (psimm[i].name[0] == '\0') {
4324 			/*
4325 			 * Hit the end of the valid entries, add
4326 			 * a new one.
4327 			 */
4328 			(void) strncpy(psimm[i].name, unum, len);
4329 			if (status & ECC_STICKY) {
4330 				/*
4331 				 * Sticky - the leaky bucket is used to track
4332 				 * soft errors.  Since a sticky error is a
4333 				 * hard error and likely to be retired soon,
4334 				 * we do not count it in the leaky bucket.
4335 				 */
4336 				psimm[i].leaky_bucket_cnt = 0;
4337 				psimm[i].intermittent_total = 0;
4338 				psimm[i].persistent_total = 0;
4339 				psimm[i].sticky_total = 1;
4340 				cmn_err(CE_WARN,
4341 				    "[AFT0] Sticky Softerror encountered "
4342 				    "on Memory Module %s\n", unum);
4343 				page_status = PR_MCE;
4344 			} else if (status & ECC_PERSISTENT) {
4345 				psimm[i].leaky_bucket_cnt = 1;
4346 				psimm[i].intermittent_total = 0;
4347 				psimm[i].persistent_total = 1;
4348 				psimm[i].sticky_total = 0;
4349 			} else {
4350 				/*
4351 				 * Intermittent - Because the scrub operation
4352 				 * cannot find the error in the DIMM, we will
4353 				 * not count these in the leaky bucket
4354 				 */
4355 				psimm[i].leaky_bucket_cnt = 0;
4356 				psimm[i].intermittent_total = 1;
4357 				psimm[i].persistent_total = 0;
4358 				psimm[i].sticky_total = 0;
4359 			}
4360 			ecc_error_info_data.count.value.ui32++;
4361 			break;
4362 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4363 			/*
4364 			 * Found an existing entry for the current
4365 			 * memory module, adjust the counts.
4366 			 */
4367 			if (status & ECC_STICKY) {
4368 				psimm[i].sticky_total++;
4369 				cmn_err(CE_WARN,
4370 				    "[AFT0] Sticky Softerror encountered "
4371 				    "on Memory Module %s\n", unum);
4372 				page_status = PR_MCE;
4373 			} else if (status & ECC_PERSISTENT) {
4374 				int new_value;
4375 
4376 				new_value = atomic_add_16_nv(
4377 				    &psimm[i].leaky_bucket_cnt, 1);
4378 				psimm[i].persistent_total++;
4379 				if (new_value > ecc_softerr_limit) {
4380 					cmn_err(CE_WARN, "[AFT0] Most recent %d"
4381 					    " soft errors from Memory Module"
4382 					    " %s exceed threshold (N=%d,"
4383 					    " T=%dh:%02dm) triggering page"
4384 					    " retire", new_value, unum,
4385 					    ecc_softerr_limit,
4386 					    ecc_softerr_interval / 60,
4387 					    ecc_softerr_interval % 60);
4388 					atomic_add_16(
4389 					    &psimm[i].leaky_bucket_cnt, -1);
4390 					page_status = PR_MCE;
4391 				}
4392 			} else { /* Intermittent */
4393 				psimm[i].intermittent_total++;
4394 			}
4395 			break;
4396 		}
4397 	}
4398 
4399 	if (i >= mem_ce_simm_size)
4400 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4401 		    "space.\n");
4402 
4403 	return (page_status);
4404 }
4405 
4406 /*
4407  * Function to support counting of IO detected CEs.
4408  */
4409 void
4410 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4411 {
4412 	int err;
4413 
4414 	err = ce_count_unum(ecc->flt_status, len, unum);
4415 	if (err != PR_OK && automatic_page_removal) {
4416 		(void) page_retire(ecc->flt_addr, err);
4417 	}
4418 }
4419 
4420 static int
4421 ecc_kstat_update(kstat_t *ksp, int rw)
4422 {
4423 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4424 	struct ce_info *ceip = mem_ce_simm;
4425 	int i = ksp->ks_instance;
4426 
4427 	if (rw == KSTAT_WRITE)
4428 		return (EACCES);
4429 
4430 	ASSERT(ksp->ks_data != NULL);
4431 	ASSERT(i < mem_ce_simm_size && i >= 0);
4432 
4433 	/*
4434 	 * Since we're not using locks, make sure that we don't get partial
4435 	 * data. The name is always copied before the counters are incremented
4436 	 * so only do this update routine if at least one of the counters is
4437 	 * non-zero, which ensures that ce_count_unum() is done, and the
4438 	 * string is fully copied.
4439 	 */
4440 	if (ceip[i].intermittent_total == 0 &&
4441 	    ceip[i].persistent_total == 0 &&
4442 	    ceip[i].sticky_total == 0) {
4443 		/*
4444 		 * Uninitialized or partially initialized. Ignore.
4445 		 * The ks_data buffer was allocated via kmem_zalloc,
4446 		 * so no need to bzero it.
4447 		 */
4448 		return (0);
4449 	}
4450 
4451 	kstat_named_setstr(&kceip->name, ceip[i].name);
4452 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4453 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4454 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4455 
4456 	return (0);
4457 }
4458 
4459 #define	VIS_BLOCKSIZE		64
4460 
4461 int
4462 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4463 {
4464 	int ret, watched;
4465 
4466 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4467 	ret = dtrace_blksuword32(addr, data, 0);
4468 	if (watched)
4469 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4470 
4471 	return (ret);
4472 }
4473 
4474 /*ARGSUSED*/
4475 void
4476 cpu_faulted_enter(struct cpu *cp)
4477 {
4478 }
4479 
4480 /*ARGSUSED*/
4481 void
4482 cpu_faulted_exit(struct cpu *cp)
4483 {
4484 }
4485 
4486 /*ARGSUSED*/
4487 void
4488 mmu_init_kernel_pgsz(struct hat *hat)
4489 {
4490 }
4491 
4492 size_t
4493 mmu_get_kernel_lpsize(size_t lpsize)
4494 {
4495 	uint_t tte;
4496 
4497 	if (lpsize == 0) {
4498 		/* no setting for segkmem_lpsize in /etc/system: use default */
4499 		return (MMU_PAGESIZE4M);
4500 	}
4501 
4502 	for (tte = TTE8K; tte <= TTE4M; tte++) {
4503 		if (lpsize == TTEBYTES(tte))
4504 			return (lpsize);
4505 	}
4506 
4507 	return (TTEBYTES(TTE8K));
4508 }
4509