xref: /titanic_51/usr/src/uts/sun4u/cpu/us3_common.c (revision 050c9ebdc9d01dca610febe083c1796c5e013868)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 #include <sys/pghw.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 ch_cpu_logout_t	clop_before_flush;
85 ch_cpu_logout_t	clop_after_flush;
86 uint_t	flush_retries_done = 0;
87 /*
88  * Note that 'Cheetah PRM' refers to:
89  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
90  */
91 
92 /*
93  * Per CPU pointers to physical address of TL>0 logout data areas.
94  * These pointers have to be in the kernel nucleus to avoid MMU
95  * misses.
96  */
97 uint64_t ch_err_tl1_paddrs[NCPU];
98 
99 /*
100  * One statically allocated structure to use during startup/DR
101  * to prevent unnecessary panics.
102  */
103 ch_err_tl1_data_t ch_err_tl1_data;
104 
105 /*
106  * Per CPU pending error at TL>0, used by level15 softint handler
107  */
108 uchar_t ch_err_tl1_pending[NCPU];
109 
110 /*
111  * For deferred CE re-enable after trap.
112  */
113 taskq_t		*ch_check_ce_tq;
114 
115 /*
116  * Internal functions.
117  */
118 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
119 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
120 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
121     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
122 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
123     uint64_t t_afsr_bit);
124 static int clear_ecc(struct async_flt *ecc);
125 #if defined(CPU_IMP_ECACHE_ASSOC)
126 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
127 #endif
128 int cpu_ecache_set_size(struct cpu *cp);
129 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
130 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
131 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
132 int cpu_ectag_pa_to_subblk_state(int cachesize,
133 				uint64_t subaddr, uint64_t tag);
134 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
135 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
136 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
137 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
138 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
139 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
140 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
141 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
142 static void cpu_scrubphys(struct async_flt *aflt);
143 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
144     int *, int *);
145 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
146 static void cpu_ereport_init(struct async_flt *aflt);
147 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
148 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
149 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
150     uint64_t nceen, ch_cpu_logout_t *clop);
151 static int cpu_ce_delayed_ec_logout(uint64_t);
152 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
153 static int cpu_error_is_ecache_data(int, uint64_t);
154 static void cpu_fmri_cpu_set(nvlist_t *, int);
155 static int cpu_error_to_resource_type(struct async_flt *aflt);
156 
157 #ifdef	CHEETAHPLUS_ERRATUM_25
158 static int mondo_recover_proc(uint16_t, int);
159 static void cheetah_nudge_init(void);
160 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
161     cyc_time_t *when);
162 static void cheetah_nudge_buddy(void);
163 #endif	/* CHEETAHPLUS_ERRATUM_25 */
164 
165 #if defined(CPU_IMP_L1_CACHE_PARITY)
166 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
167 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
168 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
169     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
170 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
171 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
172 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
173 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
174 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
175 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
176 #endif	/* CPU_IMP_L1_CACHE_PARITY */
177 
178 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
179     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
180     int *segsp, int *banksp, int *mcidp);
181 
182 /*
183  * This table is used to determine which bit(s) is(are) bad when an ECC
184  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
185  * of this array have the following semantics:
186  *
187  *      00-127  The number of the bad bit, when only one bit is bad.
188  *      128     ECC bit C0 is bad.
189  *      129     ECC bit C1 is bad.
190  *      130     ECC bit C2 is bad.
191  *      131     ECC bit C3 is bad.
192  *      132     ECC bit C4 is bad.
193  *      133     ECC bit C5 is bad.
194  *      134     ECC bit C6 is bad.
195  *      135     ECC bit C7 is bad.
196  *      136     ECC bit C8 is bad.
197  *	137-143 reserved for Mtag Data and ECC.
198  *      144(M2) Two bits are bad within a nibble.
199  *      145(M3) Three bits are bad within a nibble.
200  *      146(M3) Four bits are bad within a nibble.
201  *      147(M)  Multiple bits (5 or more) are bad.
202  *      148     NO bits are bad.
203  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
204  */
205 
206 #define	C0	128
207 #define	C1	129
208 #define	C2	130
209 #define	C3	131
210 #define	C4	132
211 #define	C5	133
212 #define	C6	134
213 #define	C7	135
214 #define	C8	136
215 #define	MT0	137	/* Mtag Data bit 0 */
216 #define	MT1	138
217 #define	MT2	139
218 #define	MTC0	140	/* Mtag Check bit 0 */
219 #define	MTC1	141
220 #define	MTC2	142
221 #define	MTC3	143
222 #define	M2	144
223 #define	M3	145
224 #define	M4	146
225 #define	M	147
226 #define	NA	148
227 #if defined(JALAPENO) || defined(SERRANO)
228 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
229 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
230 #define	SLAST	S003MEM	/* last special syndrome */
231 #else /* JALAPENO || SERRANO */
232 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
233 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
234 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
235 #define	SLAST	S11C	/* last special syndrome */
236 #endif /* JALAPENO || SERRANO */
237 #if defined(JALAPENO) || defined(SERRANO)
238 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
239 #define	BPAR15	167
240 #endif	/* JALAPENO || SERRANO */
241 
242 static uint8_t ecc_syndrome_tab[] =
243 {
244 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
245 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
246 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
247 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
248 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
249 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
250 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
251 #if defined(JALAPENO) || defined(SERRANO)
252 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
253 #else	/* JALAPENO || SERRANO */
254 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
255 #endif	/* JALAPENO || SERRANO */
256 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
257 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
258 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
259 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
260 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
261 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
262 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
263 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
264 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
265 #if defined(JALAPENO) || defined(SERRANO)
266 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
267 #else	/* JALAPENO || SERRANO */
268 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
269 #endif	/* JALAPENO || SERRANO */
270 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
271 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
272 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
273 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
274 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
275 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
276 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
277 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
278 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
279 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
280 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
281 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
282 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
283 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
284 };
285 
286 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
287 
288 #if !(defined(JALAPENO) || defined(SERRANO))
289 /*
290  * This table is used to determine which bit(s) is(are) bad when a Mtag
291  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
292  * of this array have the following semantics:
293  *
294  *      -1	Invalid mtag syndrome.
295  *      137     Mtag Data 0 is bad.
296  *      138     Mtag Data 1 is bad.
297  *      139     Mtag Data 2 is bad.
298  *      140     Mtag ECC 0 is bad.
299  *      141     Mtag ECC 1 is bad.
300  *      142     Mtag ECC 2 is bad.
301  *      143     Mtag ECC 3 is bad.
302  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
303  */
304 short mtag_syndrome_tab[] =
305 {
306 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
307 };
308 
309 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
310 
311 #else /* !(JALAPENO || SERRANO) */
312 
313 #define	BSYND_TBL_SIZE	16
314 
315 #endif /* !(JALAPENO || SERRANO) */
316 
317 /*
318  * Types returned from cpu_error_to_resource_type()
319  */
320 #define	ERRTYPE_UNKNOWN		0
321 #define	ERRTYPE_CPU		1
322 #define	ERRTYPE_MEMORY		2
323 #define	ERRTYPE_ECACHE_DATA	3
324 
325 /*
326  * CE initial classification and subsequent action lookup table
327  */
328 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
329 static int ce_disp_inited;
330 
331 /*
332  * Set to disable leaky and partner check for memory correctables
333  */
334 int ce_xdiag_off;
335 
336 /*
337  * The following are not incremented atomically so are indicative only
338  */
339 static int ce_xdiag_drops;
340 static int ce_xdiag_lkydrops;
341 static int ce_xdiag_ptnrdrops;
342 static int ce_xdiag_bad;
343 
344 /*
345  * CE leaky check callback structure
346  */
347 typedef struct {
348 	struct async_flt *lkycb_aflt;
349 	errorq_t *lkycb_eqp;
350 	errorq_elem_t *lkycb_eqep;
351 } ce_lkychk_cb_t;
352 
353 /*
354  * defines for various ecache_flush_flag's
355  */
356 #define	ECACHE_FLUSH_LINE	1
357 #define	ECACHE_FLUSH_ALL	2
358 
359 /*
360  * STICK sync
361  */
362 #define	STICK_ITERATION 10
363 #define	MAX_TSKEW	1
364 #define	EV_A_START	0
365 #define	EV_A_END	1
366 #define	EV_B_START	2
367 #define	EV_B_END	3
368 #define	EVENTS		4
369 
370 static int64_t stick_iter = STICK_ITERATION;
371 static int64_t stick_tsk = MAX_TSKEW;
372 
373 typedef enum {
374 	EVENT_NULL = 0,
375 	SLAVE_START,
376 	SLAVE_CONT,
377 	MASTER_START
378 } event_cmd_t;
379 
380 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
381 static int64_t timestamp[EVENTS];
382 static volatile int slave_done;
383 
384 #ifdef DEBUG
385 #define	DSYNC_ATTEMPTS 64
386 typedef struct {
387 	int64_t	skew_val[DSYNC_ATTEMPTS];
388 } ss_t;
389 
390 ss_t stick_sync_stats[NCPU];
391 #endif /* DEBUG */
392 
393 uint_t cpu_impl_dual_pgsz = 0;
394 #if defined(CPU_IMP_DUAL_PAGESIZE)
395 uint_t disable_dual_pgsz = 0;
396 #endif	/* CPU_IMP_DUAL_PAGESIZE */
397 
398 /*
399  * Save the cache bootup state for use when internal
400  * caches are to be re-enabled after an error occurs.
401  */
402 uint64_t cache_boot_state;
403 
404 /*
405  * PA[22:0] represent Displacement in Safari configuration space.
406  */
407 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
408 
409 bus_config_eclk_t bus_config_eclk[] = {
410 #if defined(JALAPENO) || defined(SERRANO)
411 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
412 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
413 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
414 #else /* JALAPENO || SERRANO */
415 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
416 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
417 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
418 #endif /* JALAPENO || SERRANO */
419 	{0, 0}
420 };
421 
422 /*
423  * Interval for deferred CEEN reenable
424  */
425 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
426 
427 /*
428  * set in /etc/system to control logging of user BERR/TO's
429  */
430 int cpu_berr_to_verbose = 0;
431 
432 /*
433  * set to 0 in /etc/system to defer CEEN reenable for all CEs
434  */
435 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
436 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
437 
438 /*
439  * Set of all offline cpus
440  */
441 cpuset_t cpu_offline_set;
442 
443 static void cpu_delayed_check_ce_errors(void *);
444 static void cpu_check_ce_errors(void *);
445 void cpu_error_ecache_flush(ch_async_flt_t *);
446 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
447 static void cpu_log_and_clear_ce(ch_async_flt_t *);
448 void cpu_ce_detected(ch_cpu_errors_t *, int);
449 
450 /*
451  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
452  * memory refresh interval of current DIMMs (64ms).  After initial fix that
453  * gives at least one full refresh cycle in which the cell can leak
454  * (whereafter further refreshes simply reinforce any incorrect bit value).
455  */
456 clock_t cpu_ce_lkychk_timeout_usec = 128000;
457 
458 /*
459  * CE partner check partner caching period in seconds
460  */
461 int cpu_ce_ptnr_cachetime_sec = 60;
462 
463 /*
464  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
465  */
466 #define	CH_SET_TRAP(ttentry, ttlabel)			\
467 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
468 		flush_instr_mem((caddr_t)&ttentry, 32);
469 
470 static int min_ecache_size;
471 static uint_t priv_hcl_1;
472 static uint_t priv_hcl_2;
473 static uint_t priv_hcl_4;
474 static uint_t priv_hcl_8;
475 
476 void
477 cpu_setup(void)
478 {
479 	extern int at_flags;
480 	extern int cpc_has_overflow_intr;
481 
482 	/*
483 	 * Setup chip-specific trap handlers.
484 	 */
485 	cpu_init_trap();
486 
487 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
488 
489 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
490 
491 	/*
492 	 * save the cache bootup state.
493 	 */
494 	cache_boot_state = get_dcu() & DCU_CACHE;
495 
496 	/*
497 	 * Due to the number of entries in the fully-associative tlb
498 	 * this may have to be tuned lower than in spitfire.
499 	 */
500 	pp_slots = MIN(8, MAXPP_SLOTS);
501 
502 	/*
503 	 * Block stores do not invalidate all pages of the d$, pagecopy
504 	 * et. al. need virtual translations with virtual coloring taken
505 	 * into consideration.  prefetch/ldd will pollute the d$ on the
506 	 * load side.
507 	 */
508 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
509 
510 	if (use_page_coloring) {
511 		do_pg_coloring = 1;
512 	}
513 
514 	isa_list =
515 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
516 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
517 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
518 
519 	/*
520 	 * On Panther-based machines, this should
521 	 * also include AV_SPARC_POPC too
522 	 */
523 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
524 
525 	/*
526 	 * On cheetah, there's no hole in the virtual address space
527 	 */
528 	hole_start = hole_end = 0;
529 
530 	/*
531 	 * The kpm mapping window.
532 	 * kpm_size:
533 	 *	The size of a single kpm range.
534 	 *	The overall size will be: kpm_size * vac_colors.
535 	 * kpm_vbase:
536 	 *	The virtual start address of the kpm range within the kernel
537 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
538 	 */
539 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
540 	kpm_size_shift = 43;
541 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
542 	kpm_smallpages = 1;
543 
544 	/*
545 	 * The traptrace code uses either %tick or %stick for
546 	 * timestamping.  We have %stick so we can use it.
547 	 */
548 	traptrace_use_stick = 1;
549 
550 	/*
551 	 * Cheetah has a performance counter overflow interrupt
552 	 */
553 	cpc_has_overflow_intr = 1;
554 
555 #if defined(CPU_IMP_DUAL_PAGESIZE)
556 	/*
557 	 * Use Cheetah+ and later dual page size support.
558 	 */
559 	if (!disable_dual_pgsz) {
560 		cpu_impl_dual_pgsz = 1;
561 	}
562 #endif	/* CPU_IMP_DUAL_PAGESIZE */
563 
564 	/*
565 	 * Declare that this architecture/cpu combination does fpRAS.
566 	 */
567 	fpras_implemented = 1;
568 
569 	/*
570 	 * Setup CE lookup table
571 	 */
572 	CE_INITDISPTBL_POPULATE(ce_disp_table);
573 	ce_disp_inited = 1;
574 }
575 
576 /*
577  * Called by setcpudelay
578  */
579 void
580 cpu_init_tick_freq(void)
581 {
582 	/*
583 	 * For UltraSPARC III and beyond we want to use the
584 	 * system clock rate as the basis for low level timing,
585 	 * due to support of mixed speed CPUs and power managment.
586 	 */
587 	if (system_clock_freq == 0)
588 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
589 
590 	sys_tick_freq = system_clock_freq;
591 }
592 
593 #ifdef CHEETAHPLUS_ERRATUM_25
594 /*
595  * Tunables
596  */
597 int cheetah_bpe_off = 0;
598 int cheetah_sendmondo_recover = 1;
599 int cheetah_sendmondo_fullscan = 0;
600 int cheetah_sendmondo_recover_delay = 5;
601 
602 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
603 
604 /*
605  * Recovery Statistics
606  */
607 typedef struct cheetah_livelock_entry	{
608 	int cpuid;		/* fallen cpu */
609 	int buddy;		/* cpu that ran recovery */
610 	clock_t lbolt;		/* when recovery started */
611 	hrtime_t recovery_time;	/* time spent in recovery */
612 } cheetah_livelock_entry_t;
613 
614 #define	CHEETAH_LIVELOCK_NENTRY	32
615 
616 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
617 int cheetah_livelock_entry_nxt;
618 
619 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
620 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
621 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
622 		cheetah_livelock_entry_nxt = 0;				\
623 	}								\
624 }
625 
626 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
627 
628 struct {
629 	hrtime_t hrt;		/* maximum recovery time */
630 	int recovery;		/* recovered */
631 	int full_claimed;	/* maximum pages claimed in full recovery */
632 	int proc_entry;		/* attempted to claim TSB */
633 	int proc_tsb_scan;	/* tsb scanned */
634 	int proc_tsb_partscan;	/* tsb partially scanned */
635 	int proc_tsb_fullscan;	/* whole tsb scanned */
636 	int proc_claimed;	/* maximum pages claimed in tsb scan */
637 	int proc_user;		/* user thread */
638 	int proc_kernel;	/* kernel thread */
639 	int proc_onflt;		/* bad stack */
640 	int proc_cpu;		/* null cpu */
641 	int proc_thread;	/* null thread */
642 	int proc_proc;		/* null proc */
643 	int proc_as;		/* null as */
644 	int proc_hat;		/* null hat */
645 	int proc_hat_inval;	/* hat contents don't make sense */
646 	int proc_hat_busy;	/* hat is changing TSBs */
647 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
648 	int proc_cnum_bad;	/* cnum out of range */
649 	int proc_cnum;		/* last cnum processed */
650 	tte_t proc_tte;		/* last tte processed */
651 } cheetah_livelock_stat;
652 
653 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
654 
655 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
656 	cheetah_livelock_stat.item = value
657 
658 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
659 	if (value > cheetah_livelock_stat.item)		\
660 		cheetah_livelock_stat.item = value;	\
661 }
662 
663 /*
664  * Attempt to recover a cpu by claiming every cache line as saved
665  * in the TSB that the non-responsive cpu is using. Since we can't
666  * grab any adaptive lock, this is at best an attempt to do so. Because
667  * we don't grab any locks, we must operate under the protection of
668  * on_fault().
669  *
670  * Return 1 if cpuid could be recovered, 0 if failed.
671  */
672 int
673 mondo_recover_proc(uint16_t cpuid, int bn)
674 {
675 	label_t ljb;
676 	cpu_t *cp;
677 	kthread_t *t;
678 	proc_t *p;
679 	struct as *as;
680 	struct hat *hat;
681 	uint_t  cnum;
682 	struct tsb_info *tsbinfop;
683 	struct tsbe *tsbep;
684 	caddr_t tsbp;
685 	caddr_t end_tsbp;
686 	uint64_t paddr;
687 	uint64_t idsr;
688 	u_longlong_t pahi, palo;
689 	int pages_claimed = 0;
690 	tte_t tsbe_tte;
691 	int tried_kernel_tsb = 0;
692 	mmu_ctx_t *mmu_ctxp;
693 
694 	CHEETAH_LIVELOCK_STAT(proc_entry);
695 
696 	if (on_fault(&ljb)) {
697 		CHEETAH_LIVELOCK_STAT(proc_onflt);
698 		goto badstruct;
699 	}
700 
701 	if ((cp = cpu[cpuid]) == NULL) {
702 		CHEETAH_LIVELOCK_STAT(proc_cpu);
703 		goto badstruct;
704 	}
705 
706 	if ((t = cp->cpu_thread) == NULL) {
707 		CHEETAH_LIVELOCK_STAT(proc_thread);
708 		goto badstruct;
709 	}
710 
711 	if ((p = ttoproc(t)) == NULL) {
712 		CHEETAH_LIVELOCK_STAT(proc_proc);
713 		goto badstruct;
714 	}
715 
716 	if ((as = p->p_as) == NULL) {
717 		CHEETAH_LIVELOCK_STAT(proc_as);
718 		goto badstruct;
719 	}
720 
721 	if ((hat = as->a_hat) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_hat);
723 		goto badstruct;
724 	}
725 
726 	if (hat != ksfmmup) {
727 		CHEETAH_LIVELOCK_STAT(proc_user);
728 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
729 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
730 			goto badstruct;
731 		}
732 		tsbinfop = hat->sfmmu_tsb;
733 		if (tsbinfop == NULL) {
734 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
735 			goto badstruct;
736 		}
737 		tsbp = tsbinfop->tsb_va;
738 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
739 	} else {
740 		CHEETAH_LIVELOCK_STAT(proc_kernel);
741 		tsbinfop = NULL;
742 		tsbp = ktsb_base;
743 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
744 	}
745 
746 	/* Verify as */
747 	if (hat->sfmmu_as != as) {
748 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
749 		goto badstruct;
750 	}
751 
752 	mmu_ctxp = CPU_MMU_CTXP(cp);
753 	ASSERT(mmu_ctxp);
754 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
755 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
756 
757 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
758 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
759 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
760 		goto badstruct;
761 	}
762 
763 	do {
764 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
765 
766 		/*
767 		 * Skip TSBs being relocated.  This is important because
768 		 * we want to avoid the following deadlock scenario:
769 		 *
770 		 * 1) when we came in we set ourselves to "in recover" state.
771 		 * 2) when we try to touch TSB being relocated the mapping
772 		 *    will be in the suspended state so we'll spin waiting
773 		 *    for it to be unlocked.
774 		 * 3) when the CPU that holds the TSB mapping locked tries to
775 		 *    unlock it it will send a xtrap which will fail to xcall
776 		 *    us or the CPU we're trying to recover, and will in turn
777 		 *    enter the mondo code.
778 		 * 4) since we are still spinning on the locked mapping
779 		 *    no further progress will be made and the system will
780 		 *    inevitably hard hang.
781 		 *
782 		 * A TSB not being relocated can't begin being relocated
783 		 * while we're accessing it because we check
784 		 * sendmondo_in_recover before relocating TSBs.
785 		 */
786 		if (hat != ksfmmup &&
787 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
788 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
789 			goto next_tsbinfo;
790 		}
791 
792 		for (tsbep = (struct tsbe *)tsbp;
793 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
794 			tsbe_tte = tsbep->tte_data;
795 
796 			if (tsbe_tte.tte_val == 0) {
797 				/*
798 				 * Invalid tte
799 				 */
800 				continue;
801 			}
802 			if (tsbe_tte.tte_se) {
803 				/*
804 				 * Don't want device registers
805 				 */
806 				continue;
807 			}
808 			if (tsbe_tte.tte_cp == 0) {
809 				/*
810 				 * Must be cached in E$
811 				 */
812 				continue;
813 			}
814 			if (tsbep->tte_tag.tag_invalid != 0) {
815 				/*
816 				 * Invalid tag, ingnore this entry.
817 				 */
818 				continue;
819 			}
820 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
821 			idsr = getidsr();
822 			if ((idsr & (IDSR_NACK_BIT(bn) |
823 			    IDSR_BUSY_BIT(bn))) == 0) {
824 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
825 				goto done;
826 			}
827 			pahi = tsbe_tte.tte_pahi;
828 			palo = tsbe_tte.tte_palo;
829 			paddr = (uint64_t)((pahi << 32) |
830 			    (palo << MMU_PAGESHIFT));
831 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
832 			    CH_ECACHE_SUBBLK_SIZE);
833 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
834 				shipit(cpuid, bn);
835 			}
836 			pages_claimed++;
837 		}
838 next_tsbinfo:
839 		if (tsbinfop != NULL)
840 			tsbinfop = tsbinfop->tsb_next;
841 		if (tsbinfop != NULL) {
842 			tsbp = tsbinfop->tsb_va;
843 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
844 		} else if (tsbp == ktsb_base) {
845 			tried_kernel_tsb = 1;
846 		} else if (!tried_kernel_tsb) {
847 			tsbp = ktsb_base;
848 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
849 			hat = ksfmmup;
850 			tsbinfop = NULL;
851 		}
852 	} while (tsbinfop != NULL ||
853 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
854 
855 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
856 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
857 	no_fault();
858 	idsr = getidsr();
859 	if ((idsr & (IDSR_NACK_BIT(bn) |
860 	    IDSR_BUSY_BIT(bn))) == 0) {
861 		return (1);
862 	} else {
863 		return (0);
864 	}
865 
866 done:
867 	no_fault();
868 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
869 	return (1);
870 
871 badstruct:
872 	no_fault();
873 	return (0);
874 }
875 
876 /*
877  * Attempt to claim ownership, temporarily, of every cache line that a
878  * non-responsive cpu might be using.  This might kick that cpu out of
879  * this state.
880  *
881  * The return value indicates to the caller if we have exhausted all recovery
882  * techniques. If 1 is returned, it is useless to call this function again
883  * even for a different target CPU.
884  */
885 int
886 mondo_recover(uint16_t cpuid, int bn)
887 {
888 	struct memseg *seg;
889 	uint64_t begin_pa, end_pa, cur_pa;
890 	hrtime_t begin_hrt, end_hrt;
891 	int retval = 0;
892 	int pages_claimed = 0;
893 	cheetah_livelock_entry_t *histp;
894 	uint64_t idsr;
895 
896 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
897 		/*
898 		 * Wait while recovery takes place
899 		 */
900 		while (sendmondo_in_recover) {
901 			drv_usecwait(1);
902 		}
903 		/*
904 		 * Assume we didn't claim the whole memory. If
905 		 * the target of this caller is not recovered,
906 		 * it will come back.
907 		 */
908 		return (retval);
909 	}
910 
911 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
912 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
913 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
914 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
915 
916 	begin_hrt = gethrtime_waitfree();
917 	/*
918 	 * First try to claim the lines in the TSB the target
919 	 * may have been using.
920 	 */
921 	if (mondo_recover_proc(cpuid, bn) == 1) {
922 		/*
923 		 * Didn't claim the whole memory
924 		 */
925 		goto done;
926 	}
927 
928 	/*
929 	 * We tried using the TSB. The target is still
930 	 * not recovered. Check if complete memory scan is
931 	 * enabled.
932 	 */
933 	if (cheetah_sendmondo_fullscan == 0) {
934 		/*
935 		 * Full memory scan is disabled.
936 		 */
937 		retval = 1;
938 		goto done;
939 	}
940 
941 	/*
942 	 * Try claiming the whole memory.
943 	 */
944 	for (seg = memsegs; seg; seg = seg->next) {
945 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
946 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
947 		for (cur_pa = begin_pa; cur_pa < end_pa;
948 		    cur_pa += MMU_PAGESIZE) {
949 			idsr = getidsr();
950 			if ((idsr & (IDSR_NACK_BIT(bn) |
951 			    IDSR_BUSY_BIT(bn))) == 0) {
952 				/*
953 				 * Didn't claim all memory
954 				 */
955 				goto done;
956 			}
957 			claimlines(cur_pa, MMU_PAGESIZE,
958 			    CH_ECACHE_SUBBLK_SIZE);
959 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
960 				shipit(cpuid, bn);
961 			}
962 			pages_claimed++;
963 		}
964 	}
965 
966 	/*
967 	 * We did all we could.
968 	 */
969 	retval = 1;
970 
971 done:
972 	/*
973 	 * Update statistics
974 	 */
975 	end_hrt = gethrtime_waitfree();
976 	CHEETAH_LIVELOCK_STAT(recovery);
977 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
978 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
979 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
980 	    (end_hrt -  begin_hrt));
981 
982 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
983 		;
984 
985 	return (retval);
986 }
987 
988 /*
989  * This is called by the cyclic framework when this CPU becomes online
990  */
991 /*ARGSUSED*/
992 static void
993 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
994 {
995 
996 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
997 	hdlr->cyh_level = CY_LOW_LEVEL;
998 	hdlr->cyh_arg = NULL;
999 
1000 	/*
1001 	 * Stagger the start time
1002 	 */
1003 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1004 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1005 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1006 	}
1007 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1008 }
1009 
1010 /*
1011  * Create a low level cyclic to send a xtrap to the next cpu online.
1012  * However, there's no need to have this running on a uniprocessor system.
1013  */
1014 static void
1015 cheetah_nudge_init(void)
1016 {
1017 	cyc_omni_handler_t hdlr;
1018 
1019 	if (max_ncpus == 1) {
1020 		return;
1021 	}
1022 
1023 	hdlr.cyo_online = cheetah_nudge_onln;
1024 	hdlr.cyo_offline = NULL;
1025 	hdlr.cyo_arg = NULL;
1026 
1027 	mutex_enter(&cpu_lock);
1028 	(void) cyclic_add_omni(&hdlr);
1029 	mutex_exit(&cpu_lock);
1030 }
1031 
1032 /*
1033  * Cyclic handler to wake up buddy
1034  */
1035 void
1036 cheetah_nudge_buddy(void)
1037 {
1038 	/*
1039 	 * Disable kernel preemption to protect the cpu list
1040 	 */
1041 	kpreempt_disable();
1042 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1043 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1044 		    0, 0);
1045 	}
1046 	kpreempt_enable();
1047 }
1048 
1049 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1050 
1051 #ifdef SEND_MONDO_STATS
1052 uint32_t x_one_stimes[64];
1053 uint32_t x_one_ltimes[16];
1054 uint32_t x_set_stimes[64];
1055 uint32_t x_set_ltimes[16];
1056 uint32_t x_set_cpus[NCPU];
1057 uint32_t x_nack_stimes[64];
1058 #endif
1059 
1060 /*
1061  * Note: A version of this function is used by the debugger via the KDI,
1062  * and must be kept in sync with this version.  Any changes made to this
1063  * function to support new chips or to accomodate errata must also be included
1064  * in the KDI-specific version.  See us3_kdi.c.
1065  */
1066 void
1067 send_one_mondo(int cpuid)
1068 {
1069 	int busy, nack;
1070 	uint64_t idsr, starttick, endtick, tick, lasttick;
1071 	uint64_t busymask;
1072 #ifdef	CHEETAHPLUS_ERRATUM_25
1073 	int recovered = 0;
1074 #endif
1075 
1076 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1077 	starttick = lasttick = gettick();
1078 	shipit(cpuid, 0);
1079 	endtick = starttick + xc_tick_limit;
1080 	busy = nack = 0;
1081 #if defined(JALAPENO) || defined(SERRANO)
1082 	/*
1083 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1084 	 * will be used for dispatching interrupt. For now, assume
1085 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1086 	 * issues with respect to BUSY/NACK pair usage.
1087 	 */
1088 	busymask  = IDSR_BUSY_BIT(cpuid);
1089 #else /* JALAPENO || SERRANO */
1090 	busymask = IDSR_BUSY;
1091 #endif /* JALAPENO || SERRANO */
1092 	for (;;) {
1093 		idsr = getidsr();
1094 		if (idsr == 0)
1095 			break;
1096 
1097 		tick = gettick();
1098 		/*
1099 		 * If there is a big jump between the current tick
1100 		 * count and lasttick, we have probably hit a break
1101 		 * point.  Adjust endtick accordingly to avoid panic.
1102 		 */
1103 		if (tick > (lasttick + xc_tick_jump_limit))
1104 			endtick += (tick - lasttick);
1105 		lasttick = tick;
1106 		if (tick > endtick) {
1107 			if (panic_quiesce)
1108 				return;
1109 #ifdef	CHEETAHPLUS_ERRATUM_25
1110 			if (cheetah_sendmondo_recover && recovered == 0) {
1111 				if (mondo_recover(cpuid, 0)) {
1112 					/*
1113 					 * We claimed the whole memory or
1114 					 * full scan is disabled.
1115 					 */
1116 					recovered++;
1117 				}
1118 				tick = gettick();
1119 				endtick = tick + xc_tick_limit;
1120 				lasttick = tick;
1121 				/*
1122 				 * Recheck idsr
1123 				 */
1124 				continue;
1125 			} else
1126 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1127 			{
1128 				cmn_err(CE_PANIC, "send mondo timeout "
1129 				    "(target 0x%x) [%d NACK %d BUSY]",
1130 				    cpuid, nack, busy);
1131 			}
1132 		}
1133 
1134 		if (idsr & busymask) {
1135 			busy++;
1136 			continue;
1137 		}
1138 		drv_usecwait(1);
1139 		shipit(cpuid, 0);
1140 		nack++;
1141 		busy = 0;
1142 	}
1143 #ifdef SEND_MONDO_STATS
1144 	{
1145 		int n = gettick() - starttick;
1146 		if (n < 8192)
1147 			x_one_stimes[n >> 7]++;
1148 		else
1149 			x_one_ltimes[(n >> 13) & 0xf]++;
1150 	}
1151 #endif
1152 }
1153 
1154 void
1155 syncfpu(void)
1156 {
1157 }
1158 
1159 /*
1160  * Return processor specific async error structure
1161  * size used.
1162  */
1163 int
1164 cpu_aflt_size(void)
1165 {
1166 	return (sizeof (ch_async_flt_t));
1167 }
1168 
1169 /*
1170  * Tunable to disable the checking of other cpu logout areas during panic for
1171  * potential syndrome 71 generating errors.
1172  */
1173 int enable_check_other_cpus_logout = 1;
1174 
1175 /*
1176  * Check other cpus logout area for potential synd 71 generating
1177  * errors.
1178  */
1179 static void
1180 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1181     ch_cpu_logout_t *clop)
1182 {
1183 	struct async_flt *aflt;
1184 	ch_async_flt_t ch_flt;
1185 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1186 
1187 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1188 		return;
1189 	}
1190 
1191 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1192 
1193 	t_afar = clop->clo_data.chd_afar;
1194 	t_afsr = clop->clo_data.chd_afsr;
1195 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1196 #if defined(SERRANO)
1197 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1198 #endif	/* SERRANO */
1199 
1200 	/*
1201 	 * In order to simplify code, we maintain this afsr_errs
1202 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1203 	 * sticky bits.
1204 	 */
1205 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1206 	    (t_afsr & C_AFSR_ALL_ERRS);
1207 
1208 	/* Setup the async fault structure */
1209 	aflt = (struct async_flt *)&ch_flt;
1210 	aflt->flt_id = gethrtime_waitfree();
1211 	ch_flt.afsr_ext = t_afsr_ext;
1212 	ch_flt.afsr_errs = t_afsr_errs;
1213 	aflt->flt_stat = t_afsr;
1214 	aflt->flt_addr = t_afar;
1215 	aflt->flt_bus_id = cpuid;
1216 	aflt->flt_inst = cpuid;
1217 	aflt->flt_pc = tpc;
1218 	aflt->flt_prot = AFLT_PROT_NONE;
1219 	aflt->flt_class = CPU_FAULT;
1220 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1221 	aflt->flt_tl = tl;
1222 	aflt->flt_status = ecc_type;
1223 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1224 
1225 	/*
1226 	 * Queue events on the async event queue, one event per error bit.
1227 	 * If no events are queued, queue an event to complain.
1228 	 */
1229 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1230 		ch_flt.flt_type = CPU_INV_AFSR;
1231 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1232 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1233 		    aflt->flt_panic);
1234 	}
1235 
1236 	/*
1237 	 * Zero out + invalidate CPU logout.
1238 	 */
1239 	bzero(clop, sizeof (ch_cpu_logout_t));
1240 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1241 }
1242 
1243 /*
1244  * Check the logout areas of all other cpus for unlogged errors.
1245  */
1246 static void
1247 cpu_check_other_cpus_logout(void)
1248 {
1249 	int i, j;
1250 	processorid_t myid;
1251 	struct cpu *cp;
1252 	ch_err_tl1_data_t *cl1p;
1253 
1254 	myid = CPU->cpu_id;
1255 	for (i = 0; i < NCPU; i++) {
1256 		cp = cpu[i];
1257 
1258 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1259 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1260 			continue;
1261 		}
1262 
1263 		/*
1264 		 * Check each of the tl>0 logout areas
1265 		 */
1266 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1267 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1268 			if (cl1p->ch_err_tl1_flags == 0)
1269 				continue;
1270 
1271 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1272 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1273 		}
1274 
1275 		/*
1276 		 * Check each of the remaining logout areas
1277 		 */
1278 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1279 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1280 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1281 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1282 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1283 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1284 	}
1285 }
1286 
1287 /*
1288  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1289  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1290  * flush the error that caused the UCU/UCC, then again here at the end to
1291  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1292  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1293  * another Fast ECC trap.
1294  *
1295  * Cheetah+ also handles: TSCE: No additional processing required.
1296  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1297  *
1298  * Note that the p_clo_flags input is only valid in cases where the
1299  * cpu_private struct is not yet initialized (since that is the only
1300  * time that information cannot be obtained from the logout struct.)
1301  */
1302 /*ARGSUSED*/
1303 void
1304 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1305 {
1306 	ch_cpu_logout_t *clop;
1307 	uint64_t ceen, nceen;
1308 
1309 	/*
1310 	 * Get the CPU log out info. If we can't find our CPU private
1311 	 * pointer, then we will have to make due without any detailed
1312 	 * logout information.
1313 	 */
1314 	if (CPU_PRIVATE(CPU) == NULL) {
1315 		clop = NULL;
1316 		ceen = p_clo_flags & EN_REG_CEEN;
1317 		nceen = p_clo_flags & EN_REG_NCEEN;
1318 	} else {
1319 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1320 		ceen = clop->clo_flags & EN_REG_CEEN;
1321 		nceen = clop->clo_flags & EN_REG_NCEEN;
1322 	}
1323 
1324 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1325 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1326 }
1327 
1328 /*
1329  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1330  * ECC at TL>0.  Need to supply either a error register pointer or a
1331  * cpu logout structure pointer.
1332  */
1333 static void
1334 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1335     uint64_t nceen, ch_cpu_logout_t *clop)
1336 {
1337 	struct async_flt *aflt;
1338 	ch_async_flt_t ch_flt;
1339 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1340 	char pr_reason[MAX_REASON_STRING];
1341 	ch_cpu_errors_t cpu_error_regs;
1342 
1343 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1344 	/*
1345 	 * If no cpu logout data, then we will have to make due without
1346 	 * any detailed logout information.
1347 	 */
1348 	if (clop == NULL) {
1349 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1350 		get_cpu_error_state(&cpu_error_regs);
1351 		set_cpu_error_state(&cpu_error_regs);
1352 		t_afar = cpu_error_regs.afar;
1353 		t_afsr = cpu_error_regs.afsr;
1354 		t_afsr_ext = cpu_error_regs.afsr_ext;
1355 #if defined(SERRANO)
1356 		ch_flt.afar2 = cpu_error_regs.afar2;
1357 #endif	/* SERRANO */
1358 	} else {
1359 		t_afar = clop->clo_data.chd_afar;
1360 		t_afsr = clop->clo_data.chd_afsr;
1361 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1362 #if defined(SERRANO)
1363 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1364 #endif	/* SERRANO */
1365 	}
1366 
1367 	/*
1368 	 * In order to simplify code, we maintain this afsr_errs
1369 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1370 	 * sticky bits.
1371 	 */
1372 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1373 	    (t_afsr & C_AFSR_ALL_ERRS);
1374 	pr_reason[0] = '\0';
1375 
1376 	/* Setup the async fault structure */
1377 	aflt = (struct async_flt *)&ch_flt;
1378 	aflt->flt_id = gethrtime_waitfree();
1379 	ch_flt.afsr_ext = t_afsr_ext;
1380 	ch_flt.afsr_errs = t_afsr_errs;
1381 	aflt->flt_stat = t_afsr;
1382 	aflt->flt_addr = t_afar;
1383 	aflt->flt_bus_id = getprocessorid();
1384 	aflt->flt_inst = CPU->cpu_id;
1385 	aflt->flt_pc = tpc;
1386 	aflt->flt_prot = AFLT_PROT_NONE;
1387 	aflt->flt_class = CPU_FAULT;
1388 	aflt->flt_priv = priv;
1389 	aflt->flt_tl = tl;
1390 	aflt->flt_status = ECC_F_TRAP;
1391 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1392 
1393 	/*
1394 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1395 	 * cmn_err messages out to the console.  The situation is a UCU (in
1396 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1397 	 * The messages for the UCU and WDU are enqueued and then pulled off
1398 	 * the async queue via softint and syslogd starts to process them
1399 	 * but doesn't get them to the console.  The UE causes a panic, but
1400 	 * since the UCU/WDU messages are already in transit, those aren't
1401 	 * on the async queue.  The hack is to check if we have a matching
1402 	 * WDU event for the UCU, and if it matches, we're more than likely
1403 	 * going to panic with a UE, unless we're under protection.  So, we
1404 	 * check to see if we got a matching WDU event and if we're under
1405 	 * protection.
1406 	 *
1407 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1408 	 * looks like this:
1409 	 *    UCU->WDU->UE
1410 	 * For Panther, it could look like either of these:
1411 	 *    UCU---->WDU->L3_WDU->UE
1412 	 *    L3_UCU->WDU->L3_WDU->UE
1413 	 */
1414 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1415 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1416 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1417 		get_cpu_error_state(&cpu_error_regs);
1418 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1419 		    (cpu_error_regs.afar == t_afar));
1420 		aflt->flt_panic |= ((clop == NULL) &&
1421 		    (t_afsr_errs & C_AFSR_WDU));
1422 	}
1423 
1424 	/*
1425 	 * Queue events on the async event queue, one event per error bit.
1426 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1427 	 * queue an event to complain.
1428 	 */
1429 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1430 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1431 		ch_flt.flt_type = CPU_INV_AFSR;
1432 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1433 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1434 		    aflt->flt_panic);
1435 	}
1436 
1437 	/*
1438 	 * Zero out + invalidate CPU logout.
1439 	 */
1440 	if (clop) {
1441 		bzero(clop, sizeof (ch_cpu_logout_t));
1442 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1443 	}
1444 
1445 	/*
1446 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1447 	 * or disrupting errors have happened.  We do this because if a
1448 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1449 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1450 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1451 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1452 	 * deferred or disrupting error happening between checking the AFSR and
1453 	 * enabling NCEEN/CEEN.
1454 	 *
1455 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1456 	 * taken.
1457 	 */
1458 	set_error_enable(get_error_enable() | (nceen | ceen));
1459 	if (clear_errors(&ch_flt)) {
1460 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1461 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1462 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1463 		    NULL);
1464 	}
1465 
1466 	/*
1467 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1468 	 * be logged as part of the panic flow.
1469 	 */
1470 	if (aflt->flt_panic)
1471 		fm_panic("%sError(s)", pr_reason);
1472 
1473 	/*
1474 	 * Flushing the Ecache here gets the part of the trap handler that
1475 	 * is run at TL=1 out of the Ecache.
1476 	 */
1477 	cpu_flush_ecache();
1478 }
1479 
1480 /*
1481  * This is called via sys_trap from pil15_interrupt code if the
1482  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1483  * various ch_err_tl1_data structures for valid entries based on the bit
1484  * settings in the ch_err_tl1_flags entry of the structure.
1485  */
1486 /*ARGSUSED*/
1487 void
1488 cpu_tl1_error(struct regs *rp, int panic)
1489 {
1490 	ch_err_tl1_data_t *cl1p, cl1;
1491 	int i, ncl1ps;
1492 	uint64_t me_flags;
1493 	uint64_t ceen, nceen;
1494 
1495 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1496 		cl1p = &ch_err_tl1_data;
1497 		ncl1ps = 1;
1498 	} else if (CPU_PRIVATE(CPU) != NULL) {
1499 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1500 		ncl1ps = CH_ERR_TL1_TLMAX;
1501 	} else {
1502 		ncl1ps = 0;
1503 	}
1504 
1505 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1506 		if (cl1p->ch_err_tl1_flags == 0)
1507 			continue;
1508 
1509 		/*
1510 		 * Grab a copy of the logout data and invalidate
1511 		 * the logout area.
1512 		 */
1513 		cl1 = *cl1p;
1514 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1515 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1516 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1517 
1518 		/*
1519 		 * Log "first error" in ch_err_tl1_data.
1520 		 */
1521 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1522 			ceen = get_error_enable() & EN_REG_CEEN;
1523 			nceen = get_error_enable() & EN_REG_NCEEN;
1524 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1525 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1526 		}
1527 #if defined(CPU_IMP_L1_CACHE_PARITY)
1528 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1529 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1530 			    (caddr_t)cl1.ch_err_tl1_tpc);
1531 		}
1532 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1533 
1534 		/*
1535 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1536 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1537 		 * if the structure is busy, we just do the cache flushing
1538 		 * we have to do and then do the retry.  So the AFSR/AFAR
1539 		 * at this point *should* have some relevant info.  If there
1540 		 * are no valid errors in the AFSR, we'll assume they've
1541 		 * already been picked up and logged.  For I$/D$ parity,
1542 		 * we just log an event with an "Unknown" (NULL) TPC.
1543 		 */
1544 		if (me_flags & CH_ERR_FECC) {
1545 			ch_cpu_errors_t cpu_error_regs;
1546 			uint64_t t_afsr_errs;
1547 
1548 			/*
1549 			 * Get the error registers and see if there's
1550 			 * a pending error.  If not, don't bother
1551 			 * generating an "Invalid AFSR" error event.
1552 			 */
1553 			get_cpu_error_state(&cpu_error_regs);
1554 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1555 			    C_AFSR_EXT_ALL_ERRS) |
1556 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1557 			if (t_afsr_errs != 0) {
1558 				ceen = get_error_enable() & EN_REG_CEEN;
1559 				nceen = get_error_enable() & EN_REG_NCEEN;
1560 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1561 				    1, ceen, nceen, NULL);
1562 			}
1563 		}
1564 #if defined(CPU_IMP_L1_CACHE_PARITY)
1565 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1566 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1567 		}
1568 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1569 	}
1570 }
1571 
1572 /*
1573  * Called from Fast ECC TL>0 handler in case of fatal error.
1574  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1575  * but if we don't, we'll panic with something reasonable.
1576  */
1577 /*ARGSUSED*/
1578 void
1579 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1580 {
1581 	cpu_tl1_error(rp, 1);
1582 	/*
1583 	 * Should never return, but just in case.
1584 	 */
1585 	fm_panic("Unsurvivable ECC Error at TL>0");
1586 }
1587 
1588 /*
1589  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1590  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1591  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1592  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1593  *
1594  * Cheetah+ also handles (No additional processing required):
1595  *    DUE, DTO, DBERR	(NCEEN controlled)
1596  *    THCE		(CEEN and ET_ECC_en controlled)
1597  *    TUE		(ET_ECC_en controlled)
1598  *
1599  * Panther further adds:
1600  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1601  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1602  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1603  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1604  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1605  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1606  *
1607  * Note that the p_clo_flags input is only valid in cases where the
1608  * cpu_private struct is not yet initialized (since that is the only
1609  * time that information cannot be obtained from the logout struct.)
1610  */
1611 /*ARGSUSED*/
1612 void
1613 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1614 {
1615 	struct async_flt *aflt;
1616 	ch_async_flt_t ch_flt;
1617 	char pr_reason[MAX_REASON_STRING];
1618 	ch_cpu_logout_t *clop;
1619 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1620 	ch_cpu_errors_t cpu_error_regs;
1621 
1622 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1623 	/*
1624 	 * Get the CPU log out info. If we can't find our CPU private
1625 	 * pointer, then we will have to make due without any detailed
1626 	 * logout information.
1627 	 */
1628 	if (CPU_PRIVATE(CPU) == NULL) {
1629 		clop = NULL;
1630 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1631 		get_cpu_error_state(&cpu_error_regs);
1632 		set_cpu_error_state(&cpu_error_regs);
1633 		t_afar = cpu_error_regs.afar;
1634 		t_afsr = cpu_error_regs.afsr;
1635 		t_afsr_ext = cpu_error_regs.afsr_ext;
1636 #if defined(SERRANO)
1637 		ch_flt.afar2 = cpu_error_regs.afar2;
1638 #endif	/* SERRANO */
1639 	} else {
1640 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1641 		t_afar = clop->clo_data.chd_afar;
1642 		t_afsr = clop->clo_data.chd_afsr;
1643 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1644 #if defined(SERRANO)
1645 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1646 #endif	/* SERRANO */
1647 	}
1648 
1649 	/*
1650 	 * In order to simplify code, we maintain this afsr_errs
1651 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1652 	 * sticky bits.
1653 	 */
1654 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1655 	    (t_afsr & C_AFSR_ALL_ERRS);
1656 
1657 	pr_reason[0] = '\0';
1658 	/* Setup the async fault structure */
1659 	aflt = (struct async_flt *)&ch_flt;
1660 	ch_flt.afsr_ext = t_afsr_ext;
1661 	ch_flt.afsr_errs = t_afsr_errs;
1662 	aflt->flt_stat = t_afsr;
1663 	aflt->flt_addr = t_afar;
1664 	aflt->flt_pc = (caddr_t)rp->r_pc;
1665 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1666 	aflt->flt_tl = 0;
1667 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1668 
1669 	/*
1670 	 * If this trap is a result of one of the errors not masked
1671 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1672 	 * indicate that a timeout is to be set later.
1673 	 */
1674 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1675 	    !aflt->flt_panic)
1676 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1677 	else
1678 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1679 
1680 	/*
1681 	 * log the CE and clean up
1682 	 */
1683 	cpu_log_and_clear_ce(&ch_flt);
1684 
1685 	/*
1686 	 * We re-enable CEEN (if required) and check if any disrupting errors
1687 	 * have happened.  We do this because if a disrupting error had occurred
1688 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1689 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1690 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1691 	 * of a error happening between checking the AFSR and enabling CEEN.
1692 	 */
1693 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1694 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1695 	if (clear_errors(&ch_flt)) {
1696 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1697 		    NULL);
1698 	}
1699 
1700 	/*
1701 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1702 	 * be logged as part of the panic flow.
1703 	 */
1704 	if (aflt->flt_panic)
1705 		fm_panic("%sError(s)", pr_reason);
1706 }
1707 
1708 /*
1709  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1710  * L3_EDU:BLD, TO, and BERR events.
1711  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1712  *
1713  * Cheetah+: No additional errors handled.
1714  *
1715  * Note that the p_clo_flags input is only valid in cases where the
1716  * cpu_private struct is not yet initialized (since that is the only
1717  * time that information cannot be obtained from the logout struct.)
1718  */
1719 /*ARGSUSED*/
1720 void
1721 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1722 {
1723 	ushort_t ttype, tl;
1724 	ch_async_flt_t ch_flt;
1725 	struct async_flt *aflt;
1726 	int trampolined = 0;
1727 	char pr_reason[MAX_REASON_STRING];
1728 	ch_cpu_logout_t *clop;
1729 	uint64_t ceen, clo_flags;
1730 	uint64_t log_afsr;
1731 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1732 	ch_cpu_errors_t cpu_error_regs;
1733 	int expected = DDI_FM_ERR_UNEXPECTED;
1734 	ddi_acc_hdl_t *hp;
1735 
1736 	/*
1737 	 * We need to look at p_flag to determine if the thread detected an
1738 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1739 	 * because we just need a consistent snapshot and we know that everyone
1740 	 * else will store a consistent set of bits while holding p_lock.  We
1741 	 * don't have to worry about a race because SDOCORE is set once prior
1742 	 * to doing i/o from the process's address space and is never cleared.
1743 	 */
1744 	uint_t pflag = ttoproc(curthread)->p_flag;
1745 
1746 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1747 	/*
1748 	 * Get the CPU log out info. If we can't find our CPU private
1749 	 * pointer then we will have to make due without any detailed
1750 	 * logout information.
1751 	 */
1752 	if (CPU_PRIVATE(CPU) == NULL) {
1753 		clop = NULL;
1754 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1755 		get_cpu_error_state(&cpu_error_regs);
1756 		set_cpu_error_state(&cpu_error_regs);
1757 		t_afar = cpu_error_regs.afar;
1758 		t_afsr = cpu_error_regs.afsr;
1759 		t_afsr_ext = cpu_error_regs.afsr_ext;
1760 #if defined(SERRANO)
1761 		ch_flt.afar2 = cpu_error_regs.afar2;
1762 #endif	/* SERRANO */
1763 		clo_flags = p_clo_flags;
1764 	} else {
1765 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1766 		t_afar = clop->clo_data.chd_afar;
1767 		t_afsr = clop->clo_data.chd_afsr;
1768 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1769 #if defined(SERRANO)
1770 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1771 #endif	/* SERRANO */
1772 		clo_flags = clop->clo_flags;
1773 	}
1774 
1775 	/*
1776 	 * In order to simplify code, we maintain this afsr_errs
1777 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1778 	 * sticky bits.
1779 	 */
1780 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1781 	    (t_afsr & C_AFSR_ALL_ERRS);
1782 	pr_reason[0] = '\0';
1783 
1784 	/*
1785 	 * Grab information encoded into our clo_flags field.
1786 	 */
1787 	ceen = clo_flags & EN_REG_CEEN;
1788 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1789 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1790 
1791 	/*
1792 	 * handle the specific error
1793 	 */
1794 	aflt = (struct async_flt *)&ch_flt;
1795 	aflt->flt_id = gethrtime_waitfree();
1796 	aflt->flt_bus_id = getprocessorid();
1797 	aflt->flt_inst = CPU->cpu_id;
1798 	ch_flt.afsr_ext = t_afsr_ext;
1799 	ch_flt.afsr_errs = t_afsr_errs;
1800 	aflt->flt_stat = t_afsr;
1801 	aflt->flt_addr = t_afar;
1802 	aflt->flt_pc = (caddr_t)rp->r_pc;
1803 	aflt->flt_prot = AFLT_PROT_NONE;
1804 	aflt->flt_class = CPU_FAULT;
1805 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1806 	aflt->flt_tl = (uchar_t)tl;
1807 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1808 	    C_AFSR_PANIC(t_afsr_errs));
1809 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1810 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1811 
1812 	/*
1813 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1814 	 * see if we were executing in the kernel under on_trap() or t_lofault
1815 	 * protection.  If so, modify the saved registers so that we return
1816 	 * from the trap to the appropriate trampoline routine.
1817 	 */
1818 	if (aflt->flt_priv && tl == 0) {
1819 		if (curthread->t_ontrap != NULL) {
1820 			on_trap_data_t *otp = curthread->t_ontrap;
1821 
1822 			if (otp->ot_prot & OT_DATA_EC) {
1823 				aflt->flt_prot = AFLT_PROT_EC;
1824 				otp->ot_trap |= OT_DATA_EC;
1825 				rp->r_pc = otp->ot_trampoline;
1826 				rp->r_npc = rp->r_pc + 4;
1827 				trampolined = 1;
1828 			}
1829 
1830 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1831 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1832 				aflt->flt_prot = AFLT_PROT_ACCESS;
1833 				otp->ot_trap |= OT_DATA_ACCESS;
1834 				rp->r_pc = otp->ot_trampoline;
1835 				rp->r_npc = rp->r_pc + 4;
1836 				trampolined = 1;
1837 				/*
1838 				 * for peeks and caut_gets errors are expected
1839 				 */
1840 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1841 				if (!hp)
1842 					expected = DDI_FM_ERR_PEEK;
1843 				else if (hp->ah_acc.devacc_attr_access ==
1844 				    DDI_CAUTIOUS_ACC)
1845 					expected = DDI_FM_ERR_EXPECTED;
1846 			}
1847 
1848 		} else if (curthread->t_lofault) {
1849 			aflt->flt_prot = AFLT_PROT_COPY;
1850 			rp->r_g1 = EFAULT;
1851 			rp->r_pc = curthread->t_lofault;
1852 			rp->r_npc = rp->r_pc + 4;
1853 			trampolined = 1;
1854 		}
1855 	}
1856 
1857 	/*
1858 	 * If we're in user mode or we're doing a protected copy, we either
1859 	 * want the ASTON code below to send a signal to the user process
1860 	 * or we want to panic if aft_panic is set.
1861 	 *
1862 	 * If we're in privileged mode and we're not doing a copy, then we
1863 	 * need to check if we've trampolined.  If we haven't trampolined,
1864 	 * we should panic.
1865 	 */
1866 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1867 		if (t_afsr_errs &
1868 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1869 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1870 			aflt->flt_panic |= aft_panic;
1871 	} else if (!trampolined) {
1872 			aflt->flt_panic = 1;
1873 	}
1874 
1875 	/*
1876 	 * If we've trampolined due to a privileged TO or BERR, or if an
1877 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1878 	 * event for that TO or BERR.  Queue all other events (if any) besides
1879 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1880 	 * ignore the number of events queued.  If we haven't trampolined due
1881 	 * to a TO or BERR, just enqueue events normally.
1882 	 */
1883 	log_afsr = t_afsr_errs;
1884 	if (trampolined) {
1885 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1886 	} else if (!aflt->flt_priv) {
1887 		/*
1888 		 * User mode, suppress messages if
1889 		 * cpu_berr_to_verbose is not set.
1890 		 */
1891 		if (!cpu_berr_to_verbose)
1892 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1893 	}
1894 
1895 	/*
1896 	 * Log any errors that occurred
1897 	 */
1898 	if (((log_afsr &
1899 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1900 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1901 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1902 		ch_flt.flt_type = CPU_INV_AFSR;
1903 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1904 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1905 		    aflt->flt_panic);
1906 	}
1907 
1908 	/*
1909 	 * Zero out + invalidate CPU logout.
1910 	 */
1911 	if (clop) {
1912 		bzero(clop, sizeof (ch_cpu_logout_t));
1913 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1914 	}
1915 
1916 #if defined(JALAPENO) || defined(SERRANO)
1917 	/*
1918 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1919 	 * IO errors that may have resulted in this trap.
1920 	 */
1921 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1922 		cpu_run_bus_error_handlers(aflt, expected);
1923 	}
1924 
1925 	/*
1926 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1927 	 * line from the Ecache.  We also need to query the bus nexus for
1928 	 * fatal errors.  Attempts to do diagnostic read on caches may
1929 	 * introduce more errors (especially when the module is bad).
1930 	 */
1931 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1932 		/*
1933 		 * Ask our bus nexus friends if they have any fatal errors.  If
1934 		 * so, they will log appropriate error messages.
1935 		 */
1936 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1937 			aflt->flt_panic = 1;
1938 
1939 		/*
1940 		 * We got a UE or RUE and are panicking, save the fault PA in
1941 		 * a known location so that the platform specific panic code
1942 		 * can check for copyback errors.
1943 		 */
1944 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1945 			panic_aflt = *aflt;
1946 		}
1947 	}
1948 
1949 	/*
1950 	 * Flush Ecache line or entire Ecache
1951 	 */
1952 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1953 		cpu_error_ecache_flush(&ch_flt);
1954 #else /* JALAPENO || SERRANO */
1955 	/*
1956 	 * UE/BERR/TO: Call our bus nexus friends to check for
1957 	 * IO errors that may have resulted in this trap.
1958 	 */
1959 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1960 		cpu_run_bus_error_handlers(aflt, expected);
1961 	}
1962 
1963 	/*
1964 	 * UE: If the UE is in memory, we need to flush the bad
1965 	 * line from the Ecache.  We also need to query the bus nexus for
1966 	 * fatal errors.  Attempts to do diagnostic read on caches may
1967 	 * introduce more errors (especially when the module is bad).
1968 	 */
1969 	if (t_afsr & C_AFSR_UE) {
1970 		/*
1971 		 * Ask our legacy bus nexus friends if they have any fatal
1972 		 * errors.  If so, they will log appropriate error messages.
1973 		 */
1974 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1975 			aflt->flt_panic = 1;
1976 
1977 		/*
1978 		 * We got a UE and are panicking, save the fault PA in a known
1979 		 * location so that the platform specific panic code can check
1980 		 * for copyback errors.
1981 		 */
1982 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1983 			panic_aflt = *aflt;
1984 		}
1985 	}
1986 
1987 	/*
1988 	 * Flush Ecache line or entire Ecache
1989 	 */
1990 	if (t_afsr_errs &
1991 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1992 		cpu_error_ecache_flush(&ch_flt);
1993 #endif /* JALAPENO || SERRANO */
1994 
1995 	/*
1996 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1997 	 * or disrupting errors have happened.  We do this because if a
1998 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1999 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2000 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2001 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2002 	 * deferred or disrupting error happening between checking the AFSR and
2003 	 * enabling NCEEN/CEEN.
2004 	 *
2005 	 * Note: CEEN reenabled only if it was on when trap taken.
2006 	 */
2007 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2008 	if (clear_errors(&ch_flt)) {
2009 		/*
2010 		 * Check for secondary errors, and avoid panicking if we
2011 		 * have them
2012 		 */
2013 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2014 		    t_afar) == 0) {
2015 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2016 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2017 		}
2018 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2019 		    NULL);
2020 	}
2021 
2022 	/*
2023 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2024 	 * be logged as part of the panic flow.
2025 	 */
2026 	if (aflt->flt_panic)
2027 		fm_panic("%sError(s)", pr_reason);
2028 
2029 	/*
2030 	 * If we queued an error and we are going to return from the trap and
2031 	 * the error was in user mode or inside of a copy routine, set AST flag
2032 	 * so the queue will be drained before returning to user mode.  The
2033 	 * AST processing will also act on our failure policy.
2034 	 */
2035 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2036 		int pcb_flag = 0;
2037 
2038 		if (t_afsr_errs &
2039 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2040 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2041 			pcb_flag |= ASYNC_HWERR;
2042 
2043 		if (t_afsr & C_AFSR_BERR)
2044 			pcb_flag |= ASYNC_BERR;
2045 
2046 		if (t_afsr & C_AFSR_TO)
2047 			pcb_flag |= ASYNC_BTO;
2048 
2049 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2050 		aston(curthread);
2051 	}
2052 }
2053 
2054 #if defined(CPU_IMP_L1_CACHE_PARITY)
2055 /*
2056  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2057  *
2058  * For Panther, P$ data parity errors during floating point load hits
2059  * are also detected (reported as TT 0x71) and handled by this trap
2060  * handler.
2061  *
2062  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2063  * is available.
2064  */
2065 /*ARGSUSED*/
2066 void
2067 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2068 {
2069 	ch_async_flt_t ch_flt;
2070 	struct async_flt *aflt;
2071 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2072 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2073 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2074 	char *error_class;
2075 
2076 	/*
2077 	 * Log the error.
2078 	 * For icache parity errors the fault address is the trap PC.
2079 	 * For dcache/pcache parity errors the instruction would have to
2080 	 * be decoded to determine the address and that isn't possible
2081 	 * at high PIL.
2082 	 */
2083 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2084 	aflt = (struct async_flt *)&ch_flt;
2085 	aflt->flt_id = gethrtime_waitfree();
2086 	aflt->flt_bus_id = getprocessorid();
2087 	aflt->flt_inst = CPU->cpu_id;
2088 	aflt->flt_pc = tpc;
2089 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2090 	aflt->flt_prot = AFLT_PROT_NONE;
2091 	aflt->flt_class = CPU_FAULT;
2092 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2093 	aflt->flt_tl = tl;
2094 	aflt->flt_panic = panic;
2095 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2096 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2097 
2098 	if (iparity) {
2099 		cpu_icache_parity_info(&ch_flt);
2100 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2101 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2102 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2103 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2104 		else
2105 			error_class = FM_EREPORT_CPU_USIII_IPE;
2106 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2107 	} else {
2108 		cpu_dcache_parity_info(&ch_flt);
2109 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2110 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2111 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2112 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2113 		else
2114 			error_class = FM_EREPORT_CPU_USIII_DPE;
2115 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2116 		/*
2117 		 * For panther we also need to check the P$ for parity errors.
2118 		 */
2119 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2120 			cpu_pcache_parity_info(&ch_flt);
2121 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2122 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2123 				aflt->flt_payload =
2124 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2125 			}
2126 		}
2127 	}
2128 
2129 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2130 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2131 
2132 	if (iparity) {
2133 		/*
2134 		 * Invalidate entire I$.
2135 		 * This is required due to the use of diagnostic ASI
2136 		 * accesses that may result in a loss of I$ coherency.
2137 		 */
2138 		if (cache_boot_state & DCU_IC) {
2139 			flush_icache();
2140 		}
2141 		/*
2142 		 * According to section P.3.1 of the Panther PRM, we
2143 		 * need to do a little more for recovery on those
2144 		 * CPUs after encountering an I$ parity error.
2145 		 */
2146 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2147 			flush_ipb();
2148 			correct_dcache_parity(dcache_size,
2149 			    dcache_linesize);
2150 			flush_pcache();
2151 		}
2152 	} else {
2153 		/*
2154 		 * Since the valid bit is ignored when checking parity the
2155 		 * D$ data and tag must also be corrected.  Set D$ data bits
2156 		 * to zero and set utag to 0, 1, 2, 3.
2157 		 */
2158 		correct_dcache_parity(dcache_size, dcache_linesize);
2159 
2160 		/*
2161 		 * According to section P.3.3 of the Panther PRM, we
2162 		 * need to do a little more for recovery on those
2163 		 * CPUs after encountering a D$ or P$ parity error.
2164 		 *
2165 		 * As far as clearing P$ parity errors, it is enough to
2166 		 * simply invalidate all entries in the P$ since P$ parity
2167 		 * error traps are only generated for floating point load
2168 		 * hits.
2169 		 */
2170 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2171 			flush_icache();
2172 			flush_ipb();
2173 			flush_pcache();
2174 		}
2175 	}
2176 
2177 	/*
2178 	 * Invalidate entire D$ if it was enabled.
2179 	 * This is done to avoid stale data in the D$ which might
2180 	 * occur with the D$ disabled and the trap handler doing
2181 	 * stores affecting lines already in the D$.
2182 	 */
2183 	if (cache_boot_state & DCU_DC) {
2184 		flush_dcache();
2185 	}
2186 
2187 	/*
2188 	 * Restore caches to their bootup state.
2189 	 */
2190 	set_dcu(get_dcu() | cache_boot_state);
2191 
2192 	/*
2193 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2194 	 * be logged as part of the panic flow.
2195 	 */
2196 	if (aflt->flt_panic)
2197 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2198 
2199 	/*
2200 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2201 	 * the chance of getting an unrecoverable Fast ECC error.  This
2202 	 * flush will evict the part of the parity trap handler that is run
2203 	 * at TL>1.
2204 	 */
2205 	if (tl) {
2206 		cpu_flush_ecache();
2207 	}
2208 }
2209 
2210 /*
2211  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2212  * to indicate which portions of the captured data should be in the ereport.
2213  */
2214 void
2215 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2216 {
2217 	int way = ch_flt->parity_data.ipe.cpl_way;
2218 	int offset = ch_flt->parity_data.ipe.cpl_off;
2219 	int tag_index;
2220 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2221 
2222 
2223 	if ((offset != -1) || (way != -1)) {
2224 		/*
2225 		 * Parity error in I$ tag or data
2226 		 */
2227 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2228 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2229 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2230 			    PN_ICIDX_TO_WAY(tag_index);
2231 		else
2232 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2233 			    CH_ICIDX_TO_WAY(tag_index);
2234 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2235 		    IC_LOGFLAG_MAGIC;
2236 	} else {
2237 		/*
2238 		 * Parity error was not identified.
2239 		 * Log tags and data for all ways.
2240 		 */
2241 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2242 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2243 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2244 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2245 				    PN_ICIDX_TO_WAY(tag_index);
2246 			else
2247 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2248 				    CH_ICIDX_TO_WAY(tag_index);
2249 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2250 			    IC_LOGFLAG_MAGIC;
2251 		}
2252 	}
2253 }
2254 
2255 /*
2256  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2257  * to indicate which portions of the captured data should be in the ereport.
2258  */
2259 void
2260 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2261 {
2262 	int way = ch_flt->parity_data.dpe.cpl_way;
2263 	int offset = ch_flt->parity_data.dpe.cpl_off;
2264 	int tag_index;
2265 
2266 	if (offset != -1) {
2267 		/*
2268 		 * Parity error in D$ or P$ data array.
2269 		 *
2270 		 * First check to see whether the parity error is in D$ or P$
2271 		 * since P$ data parity errors are reported in Panther using
2272 		 * the same trap.
2273 		 */
2274 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2275 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2276 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2277 			    CH_PCIDX_TO_WAY(tag_index);
2278 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2279 			    PC_LOGFLAG_MAGIC;
2280 		} else {
2281 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2282 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2283 			    CH_DCIDX_TO_WAY(tag_index);
2284 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2285 			    DC_LOGFLAG_MAGIC;
2286 		}
2287 	} else if (way != -1) {
2288 		/*
2289 		 * Parity error in D$ tag.
2290 		 */
2291 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2292 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2293 		    CH_DCIDX_TO_WAY(tag_index);
2294 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2295 		    DC_LOGFLAG_MAGIC;
2296 	}
2297 }
2298 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2299 
2300 /*
2301  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2302  * post-process CPU events that are dequeued.  As such, it can be invoked
2303  * from softint context, from AST processing in the trap() flow, or from the
2304  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2305  * Historically this entry point was used to log the actual cmn_err(9F) text;
2306  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2307  * With FMA this function now also returns a flag which indicates to the
2308  * caller whether the ereport should be posted (1) or suppressed (0).
2309  */
2310 static int
2311 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2312 {
2313 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2314 	struct async_flt *aflt = (struct async_flt *)flt;
2315 	uint64_t errors;
2316 	extern void memscrub_induced_error(void);
2317 
2318 	switch (ch_flt->flt_type) {
2319 	case CPU_INV_AFSR:
2320 		/*
2321 		 * If it is a disrupting trap and the AFSR is zero, then
2322 		 * the event has probably already been noted. Do not post
2323 		 * an ereport.
2324 		 */
2325 		if ((aflt->flt_status & ECC_C_TRAP) &&
2326 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2327 			return (0);
2328 		else
2329 			return (1);
2330 	case CPU_TO:
2331 	case CPU_BERR:
2332 	case CPU_FATAL:
2333 	case CPU_FPUERR:
2334 		return (1);
2335 
2336 	case CPU_UE_ECACHE_RETIRE:
2337 		cpu_log_err(aflt);
2338 		cpu_page_retire(ch_flt);
2339 		return (1);
2340 
2341 	/*
2342 	 * Cases where we may want to suppress logging or perform
2343 	 * extended diagnostics.
2344 	 */
2345 	case CPU_CE:
2346 	case CPU_EMC:
2347 		/*
2348 		 * We want to skip logging and further classification
2349 		 * only if ALL the following conditions are true:
2350 		 *
2351 		 *	1. There is only one error
2352 		 *	2. That error is a correctable memory error
2353 		 *	3. The error is caused by the memory scrubber (in
2354 		 *	   which case the error will have occurred under
2355 		 *	   on_trap protection)
2356 		 *	4. The error is on a retired page
2357 		 *
2358 		 * Note: AFLT_PROT_EC is used places other than the memory
2359 		 * scrubber.  However, none of those errors should occur
2360 		 * on a retired page.
2361 		 */
2362 		if ((ch_flt->afsr_errs &
2363 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2364 		    aflt->flt_prot == AFLT_PROT_EC) {
2365 
2366 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2367 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2368 
2369 				/*
2370 				 * Since we're skipping logging, we'll need
2371 				 * to schedule the re-enabling of CEEN
2372 				 */
2373 				(void) timeout(cpu_delayed_check_ce_errors,
2374 				    (void *)(uintptr_t)aflt->flt_inst,
2375 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2376 				    * MICROSEC));
2377 				}
2378 
2379 				/*
2380 				 * Inform memscrubber - scrubbing induced
2381 				 * CE on a retired page.
2382 				 */
2383 				memscrub_induced_error();
2384 				return (0);
2385 			}
2386 		}
2387 
2388 		/*
2389 		 * Perform/schedule further classification actions, but
2390 		 * only if the page is healthy (we don't want bad
2391 		 * pages inducing too much diagnostic activity).  If we could
2392 		 * not find a page pointer then we also skip this.  If
2393 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2394 		 * to copy and recirculate the event (for further diagnostics)
2395 		 * and we should not proceed to log it here.
2396 		 *
2397 		 * This must be the last step here before the cpu_log_err()
2398 		 * below - if an event recirculates cpu_ce_log_err() will
2399 		 * not call the current function but just proceed directly
2400 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2401 		 *
2402 		 * Note: Check cpu_impl_async_log_err if changing this
2403 		 */
2404 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2405 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2406 			    CE_XDIAG_SKIP_NOPP);
2407 		} else {
2408 			if (errors != PR_OK) {
2409 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2410 				    CE_XDIAG_SKIP_PAGEDET);
2411 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2412 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2413 				return (0);
2414 			}
2415 		}
2416 		/*FALLTHRU*/
2417 
2418 	/*
2419 	 * Cases where we just want to report the error and continue.
2420 	 */
2421 	case CPU_CE_ECACHE:
2422 	case CPU_UE_ECACHE:
2423 	case CPU_IV:
2424 	case CPU_ORPH:
2425 		cpu_log_err(aflt);
2426 		return (1);
2427 
2428 	/*
2429 	 * Cases where we want to fall through to handle panicking.
2430 	 */
2431 	case CPU_UE:
2432 		/*
2433 		 * We want to skip logging in the same conditions as the
2434 		 * CE case.  In addition, we want to make sure we're not
2435 		 * panicking.
2436 		 */
2437 		if (!panicstr && (ch_flt->afsr_errs &
2438 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2439 		    aflt->flt_prot == AFLT_PROT_EC) {
2440 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2441 				/* Zero the address to clear the error */
2442 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2443 				/*
2444 				 * Inform memscrubber - scrubbing induced
2445 				 * UE on a retired page.
2446 				 */
2447 				memscrub_induced_error();
2448 				return (0);
2449 			}
2450 		}
2451 		cpu_log_err(aflt);
2452 		break;
2453 
2454 	default:
2455 		/*
2456 		 * If the us3_common.c code doesn't know the flt_type, it may
2457 		 * be an implementation-specific code.  Call into the impldep
2458 		 * backend to find out what to do: if it tells us to continue,
2459 		 * break and handle as if falling through from a UE; if not,
2460 		 * the impldep backend has handled the error and we're done.
2461 		 */
2462 		switch (cpu_impl_async_log_err(flt, eqep)) {
2463 		case CH_ASYNC_LOG_DONE:
2464 			return (1);
2465 		case CH_ASYNC_LOG_RECIRC:
2466 			return (0);
2467 		case CH_ASYNC_LOG_CONTINUE:
2468 			break; /* continue on to handle UE-like error */
2469 		default:
2470 			cmn_err(CE_WARN, "discarding error 0x%p with "
2471 			    "invalid fault type (0x%x)",
2472 			    (void *)aflt, ch_flt->flt_type);
2473 			return (0);
2474 		}
2475 	}
2476 
2477 	/* ... fall through from the UE case */
2478 
2479 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2480 		if (!panicstr) {
2481 			cpu_page_retire(ch_flt);
2482 		} else {
2483 			/*
2484 			 * Clear UEs on panic so that we don't
2485 			 * get haunted by them during panic or
2486 			 * after reboot
2487 			 */
2488 			cpu_clearphys(aflt);
2489 			(void) clear_errors(NULL);
2490 		}
2491 	}
2492 
2493 	return (1);
2494 }
2495 
2496 /*
2497  * Retire the bad page that may contain the flushed error.
2498  */
2499 void
2500 cpu_page_retire(ch_async_flt_t *ch_flt)
2501 {
2502 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2503 	(void) page_retire(aflt->flt_addr, PR_UE);
2504 }
2505 
2506 /*
2507  * Return true if the error specified in the AFSR indicates
2508  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2509  * for Panther, none for Jalapeno/Serrano).
2510  */
2511 /* ARGSUSED */
2512 static int
2513 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2514 {
2515 #if defined(JALAPENO) || defined(SERRANO)
2516 	return (0);
2517 #elif defined(CHEETAH_PLUS)
2518 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2519 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2520 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2521 #else	/* CHEETAH_PLUS */
2522 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2523 #endif
2524 }
2525 
2526 /*
2527  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2528  * generic event post-processing for correctable and uncorrectable memory,
2529  * E$, and MTag errors.  Historically this entry point was used to log bits of
2530  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2531  * converted into an ereport.  In addition, it transmits the error to any
2532  * platform-specific service-processor FRU logging routines, if available.
2533  */
2534 void
2535 cpu_log_err(struct async_flt *aflt)
2536 {
2537 	char unum[UNUM_NAMLEN];
2538 	int synd_status, synd_code, afar_status;
2539 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2540 
2541 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2542 		aflt->flt_status |= ECC_ECACHE;
2543 	else
2544 		aflt->flt_status &= ~ECC_ECACHE;
2545 	/*
2546 	 * Determine syndrome status.
2547 	 */
2548 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2549 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2550 
2551 	/*
2552 	 * Determine afar status.
2553 	 */
2554 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2555 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2556 		    ch_flt->flt_bit);
2557 	else
2558 		afar_status = AFLT_STAT_INVALID;
2559 
2560 	synd_code = synd_to_synd_code(synd_status,
2561 	    aflt->flt_synd, ch_flt->flt_bit);
2562 
2563 	/*
2564 	 * If afar status is not invalid do a unum lookup.
2565 	 */
2566 	if (afar_status != AFLT_STAT_INVALID) {
2567 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2568 	} else {
2569 		unum[0] = '\0';
2570 	}
2571 
2572 	/*
2573 	 * Do not send the fruid message (plat_ecc_error_data_t)
2574 	 * to the SC if it can handle the enhanced error information
2575 	 * (plat_ecc_error2_data_t) or when the tunable
2576 	 * ecc_log_fruid_enable is set to 0.
2577 	 */
2578 
2579 	if (&plat_ecc_capability_sc_get &&
2580 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2581 		if (&plat_log_fruid_error)
2582 			plat_log_fruid_error(synd_code, aflt, unum,
2583 			    ch_flt->flt_bit);
2584 	}
2585 
2586 	if (aflt->flt_func != NULL)
2587 		aflt->flt_func(aflt, unum);
2588 
2589 	if (afar_status != AFLT_STAT_INVALID)
2590 		cpu_log_diag_info(ch_flt);
2591 
2592 	/*
2593 	 * If we have a CEEN error , we do not reenable CEEN until after
2594 	 * we exit the trap handler. Otherwise, another error may
2595 	 * occur causing the handler to be entered recursively.
2596 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2597 	 * to try and ensure that the CPU makes progress in the face
2598 	 * of a CE storm.
2599 	 */
2600 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2601 		(void) timeout(cpu_delayed_check_ce_errors,
2602 		    (void *)(uintptr_t)aflt->flt_inst,
2603 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2604 	}
2605 }
2606 
2607 /*
2608  * Invoked by error_init() early in startup and therefore before
2609  * startup_errorq() is called to drain any error Q -
2610  *
2611  * startup()
2612  *   startup_end()
2613  *     error_init()
2614  *       cpu_error_init()
2615  * errorq_init()
2616  *   errorq_drain()
2617  * start_other_cpus()
2618  *
2619  * The purpose of this routine is to create error-related taskqs.  Taskqs
2620  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2621  * context.
2622  */
2623 void
2624 cpu_error_init(int items)
2625 {
2626 	/*
2627 	 * Create taskq(s) to reenable CE
2628 	 */
2629 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2630 	    items, items, TASKQ_PREPOPULATE);
2631 }
2632 
2633 void
2634 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2635 {
2636 	char unum[UNUM_NAMLEN];
2637 	int len;
2638 
2639 	switch (aflt->flt_class) {
2640 	case CPU_FAULT:
2641 		cpu_ereport_init(aflt);
2642 		if (cpu_async_log_err(aflt, eqep))
2643 			cpu_ereport_post(aflt);
2644 		break;
2645 
2646 	case BUS_FAULT:
2647 		if (aflt->flt_func != NULL) {
2648 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2649 			    unum, UNUM_NAMLEN, &len);
2650 			aflt->flt_func(aflt, unum);
2651 		}
2652 		break;
2653 
2654 	case RECIRC_CPU_FAULT:
2655 		aflt->flt_class = CPU_FAULT;
2656 		cpu_log_err(aflt);
2657 		cpu_ereport_post(aflt);
2658 		break;
2659 
2660 	case RECIRC_BUS_FAULT:
2661 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2662 		/*FALLTHRU*/
2663 	default:
2664 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2665 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2666 		return;
2667 	}
2668 }
2669 
2670 /*
2671  * Scrub and classify a CE.  This function must not modify the
2672  * fault structure passed to it but instead should return the classification
2673  * information.
2674  */
2675 
2676 static uchar_t
2677 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2678 {
2679 	uchar_t disp = CE_XDIAG_EXTALG;
2680 	on_trap_data_t otd;
2681 	uint64_t orig_err;
2682 	ch_cpu_logout_t *clop;
2683 
2684 	/*
2685 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2686 	 * this, but our other callers have not.  Disable preemption to
2687 	 * avoid CPU migration so that we restore CEEN on the correct
2688 	 * cpu later.
2689 	 *
2690 	 * CEEN is cleared so that further CEs that our instruction and
2691 	 * data footprint induce do not cause use to either creep down
2692 	 * kernel stack to the point of overflow, or do so much CE
2693 	 * notification as to make little real forward progress.
2694 	 *
2695 	 * NCEEN must not be cleared.  However it is possible that
2696 	 * our accesses to the flt_addr may provoke a bus error or timeout
2697 	 * if the offending address has just been unconfigured as part of
2698 	 * a DR action.  So we must operate under on_trap protection.
2699 	 */
2700 	kpreempt_disable();
2701 	orig_err = get_error_enable();
2702 	if (orig_err & EN_REG_CEEN)
2703 		set_error_enable(orig_err & ~EN_REG_CEEN);
2704 
2705 	/*
2706 	 * Our classification algorithm includes the line state before
2707 	 * the scrub; we'd like this captured after the detection and
2708 	 * before the algorithm below - the earlier the better.
2709 	 *
2710 	 * If we've come from a cpu CE trap then this info already exists
2711 	 * in the cpu logout area.
2712 	 *
2713 	 * For a CE detected by memscrub for which there was no trap
2714 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2715 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2716 	 * marked the fault structure as incomplete as a flag to later
2717 	 * logging code.
2718 	 *
2719 	 * If called directly from an IO detected CE there has been
2720 	 * no line data capture.  In this case we logout to the cpu logout
2721 	 * area - that's appropriate since it's the cpu cache data we need
2722 	 * for classification.  We thus borrow the cpu logout area for a
2723 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2724 	 * this time (we will invalidate it again below).
2725 	 *
2726 	 * If called from the partner check xcall handler then this cpu
2727 	 * (the partner) has not necessarily experienced a CE at this
2728 	 * address.  But we want to capture line state before its scrub
2729 	 * attempt since we use that in our classification.
2730 	 */
2731 	if (logout_tried == B_FALSE) {
2732 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2733 			disp |= CE_XDIAG_NOLOGOUT;
2734 	}
2735 
2736 	/*
2737 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2738 	 * no longer be valid (if DR'd since the initial event) so we
2739 	 * perform this scrub under on_trap protection.  If this access is
2740 	 * ok then further accesses below will also be ok - DR cannot
2741 	 * proceed while this thread is active (preemption is disabled);
2742 	 * to be safe we'll nonetheless use on_trap again below.
2743 	 */
2744 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2745 		cpu_scrubphys(ecc);
2746 	} else {
2747 		no_trap();
2748 		if (orig_err & EN_REG_CEEN)
2749 			set_error_enable(orig_err);
2750 		kpreempt_enable();
2751 		return (disp);
2752 	}
2753 	no_trap();
2754 
2755 	/*
2756 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2757 	 * Note that it's quite possible that the read sourced the data from
2758 	 * another cpu.
2759 	 */
2760 	if (clear_ecc(ecc))
2761 		disp |= CE_XDIAG_CE1;
2762 
2763 	/*
2764 	 * Read the data again.  This time the read is very likely to
2765 	 * come from memory since the scrub induced a writeback to memory.
2766 	 */
2767 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2768 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2769 	} else {
2770 		no_trap();
2771 		if (orig_err & EN_REG_CEEN)
2772 			set_error_enable(orig_err);
2773 		kpreempt_enable();
2774 		return (disp);
2775 	}
2776 	no_trap();
2777 
2778 	/* Did that read induce a CE that matches the AFAR? */
2779 	if (clear_ecc(ecc))
2780 		disp |= CE_XDIAG_CE2;
2781 
2782 	/*
2783 	 * Look at the logout information and record whether we found the
2784 	 * line in l2/l3 cache.  For Panther we are interested in whether
2785 	 * we found it in either cache (it won't reside in both but
2786 	 * it is possible to read it that way given the moving target).
2787 	 */
2788 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2789 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2790 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2791 		int hit, level;
2792 		int state;
2793 		int totalsize;
2794 		ch_ec_data_t *ecp;
2795 
2796 		/*
2797 		 * If hit is nonzero then a match was found and hit will
2798 		 * be one greater than the index which hit.  For Panther we
2799 		 * also need to pay attention to level to see which of l2$ or
2800 		 * l3$ it hit in.
2801 		 */
2802 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2803 		    0, &level);
2804 
2805 		if (hit) {
2806 			--hit;
2807 			disp |= CE_XDIAG_AFARMATCH;
2808 
2809 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2810 				if (level == 2)
2811 					ecp = &clop->clo_data.chd_l2_data[hit];
2812 				else
2813 					ecp = &clop->clo_data.chd_ec_data[hit];
2814 			} else {
2815 				ASSERT(level == 2);
2816 				ecp = &clop->clo_data.chd_ec_data[hit];
2817 			}
2818 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2819 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2820 			    ecc->flt_addr, ecp->ec_tag);
2821 
2822 			/*
2823 			 * Cheetah variants use different state encodings -
2824 			 * the CH_ECSTATE_* defines vary depending on the
2825 			 * module we're compiled for.  Translate into our
2826 			 * one true version.  Conflate Owner-Shared state
2827 			 * of SSM mode with Owner as victimisation of such
2828 			 * lines may cause a writeback.
2829 			 */
2830 			switch (state) {
2831 			case CH_ECSTATE_MOD:
2832 				disp |= EC_STATE_M;
2833 				break;
2834 
2835 			case CH_ECSTATE_OWN:
2836 			case CH_ECSTATE_OWS:
2837 				disp |= EC_STATE_O;
2838 				break;
2839 
2840 			case CH_ECSTATE_EXL:
2841 				disp |= EC_STATE_E;
2842 				break;
2843 
2844 			case CH_ECSTATE_SHR:
2845 				disp |= EC_STATE_S;
2846 				break;
2847 
2848 			default:
2849 				disp |= EC_STATE_I;
2850 				break;
2851 			}
2852 		}
2853 
2854 		/*
2855 		 * If we initiated the delayed logout then we are responsible
2856 		 * for invalidating the logout area.
2857 		 */
2858 		if (logout_tried == B_FALSE) {
2859 			bzero(clop, sizeof (ch_cpu_logout_t));
2860 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2861 		}
2862 	}
2863 
2864 	/*
2865 	 * Re-enable CEEN if we turned it off.
2866 	 */
2867 	if (orig_err & EN_REG_CEEN)
2868 		set_error_enable(orig_err);
2869 	kpreempt_enable();
2870 
2871 	return (disp);
2872 }
2873 
2874 /*
2875  * Scrub a correctable memory error and collect data for classification
2876  * of CE type.  This function is called in the detection path, ie tl0 handling
2877  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2878  */
2879 void
2880 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2881 {
2882 	/*
2883 	 * Cheetah CE classification does not set any bits in flt_status.
2884 	 * Instead we will record classification datapoints in flt_disp.
2885 	 */
2886 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2887 
2888 	/*
2889 	 * To check if the error detected by IO is persistent, sticky or
2890 	 * intermittent.  This is noticed by clear_ecc().
2891 	 */
2892 	if (ecc->flt_status & ECC_IOBUS)
2893 		ecc->flt_stat = C_AFSR_MEMORY;
2894 
2895 	/*
2896 	 * Record information from this first part of the algorithm in
2897 	 * flt_disp.
2898 	 */
2899 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2900 }
2901 
2902 /*
2903  * Select a partner to perform a further CE classification check from.
2904  * Must be called with kernel preemption disabled (to stop the cpu list
2905  * from changing).  The detecting cpu we are partnering has cpuid
2906  * aflt->flt_inst; we might not be running on the detecting cpu.
2907  *
2908  * Restrict choice to active cpus in the same cpu partition as ourselves in
2909  * an effort to stop bad cpus in one partition causing other partitions to
2910  * perform excessive diagnostic activity.  Actually since the errorq drain
2911  * is run from a softint most of the time and that is a global mechanism
2912  * this isolation is only partial.  Return NULL if we fail to find a
2913  * suitable partner.
2914  *
2915  * We prefer a partner that is in a different latency group to ourselves as
2916  * we will share fewer datapaths.  If such a partner is unavailable then
2917  * choose one in the same lgroup but prefer a different chip and only allow
2918  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2919  * flags includes PTNR_SELFOK then permit selection of the original detector.
2920  *
2921  * We keep a cache of the last partner selected for a cpu, and we'll try to
2922  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2923  * have passed since that selection was made.  This provides the benefit
2924  * of the point-of-view of different partners over time but without
2925  * requiring frequent cpu list traversals.
2926  */
2927 
2928 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2929 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2930 
2931 static cpu_t *
2932 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2933 {
2934 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2935 	hrtime_t lasttime, thistime;
2936 
2937 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2938 
2939 	dtcr = cpu[aflt->flt_inst];
2940 
2941 	/*
2942 	 * Short-circuit for the following cases:
2943 	 *	. the dtcr is not flagged active
2944 	 *	. there is just one cpu present
2945 	 *	. the detector has disappeared
2946 	 *	. we were given a bad flt_inst cpuid; this should not happen
2947 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2948 	 *	  reason to panic.
2949 	 *	. there is just one cpu left online in the cpu partition
2950 	 *
2951 	 * If we return NULL after this point then we do not update the
2952 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2953 	 * again next time; this is the case where the only other cpu online
2954 	 * in the detector's partition is on the same chip as the detector
2955 	 * and since CEEN re-enable is throttled even that case should not
2956 	 * hurt performance.
2957 	 */
2958 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2959 		return (NULL);
2960 	}
2961 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2962 		if (flags & PTNR_SELFOK) {
2963 			*typep = CE_XDIAG_PTNR_SELF;
2964 			return (dtcr);
2965 		} else {
2966 			return (NULL);
2967 		}
2968 	}
2969 
2970 	thistime = gethrtime();
2971 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2972 
2973 	/*
2974 	 * Select a starting point.
2975 	 */
2976 	if (!lasttime) {
2977 		/*
2978 		 * We've never selected a partner for this detector before.
2979 		 * Start the scan at the next online cpu in the same cpu
2980 		 * partition.
2981 		 */
2982 		sp = dtcr->cpu_next_part;
2983 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2984 		/*
2985 		 * Our last selection has not aged yet.  If this partner:
2986 		 *	. is still a valid cpu,
2987 		 *	. is still in the same partition as the detector
2988 		 *	. is still marked active
2989 		 *	. satisfies the 'flags' argument criteria
2990 		 * then select it again without updating the timestamp.
2991 		 */
2992 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2993 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2994 		    !cpu_flagged_active(sp->cpu_flags) ||
2995 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2996 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
2997 		    !(flags & PTNR_SIBLINGOK))) {
2998 			sp = dtcr->cpu_next_part;
2999 		} else {
3000 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3001 				*typep = CE_XDIAG_PTNR_REMOTE;
3002 			} else if (sp == dtcr) {
3003 				*typep = CE_XDIAG_PTNR_SELF;
3004 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3005 				*typep = CE_XDIAG_PTNR_SIBLING;
3006 			} else {
3007 				*typep = CE_XDIAG_PTNR_LOCAL;
3008 			}
3009 			return (sp);
3010 		}
3011 	} else {
3012 		/*
3013 		 * Our last selection has aged.  If it is nonetheless still a
3014 		 * valid cpu then start the scan at the next cpu in the
3015 		 * partition after our last partner.  If the last selection
3016 		 * is no longer a valid cpu then go with our default.  In
3017 		 * this way we slowly cycle through possible partners to
3018 		 * obtain multiple viewpoints over time.
3019 		 */
3020 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3021 		if (sp == NULL) {
3022 			sp = dtcr->cpu_next_part;
3023 		} else {
3024 			sp = sp->cpu_next_part;		/* may be dtcr */
3025 			if (sp->cpu_part != dtcr->cpu_part)
3026 				sp = dtcr;
3027 		}
3028 	}
3029 
3030 	/*
3031 	 * We have a proposed starting point for our search, but if this
3032 	 * cpu is offline then its cpu_next_part will point to itself
3033 	 * so we can't use that to iterate over cpus in this partition in
3034 	 * the loop below.  We still want to avoid iterating over cpus not
3035 	 * in our partition, so in the case that our starting point is offline
3036 	 * we will repoint it to be the detector itself;  and if the detector
3037 	 * happens to be offline we'll return NULL from the following loop.
3038 	 */
3039 	if (!cpu_flagged_active(sp->cpu_flags)) {
3040 		sp = dtcr;
3041 	}
3042 
3043 	ptnr = sp;
3044 	locptnr = NULL;
3045 	sibptnr = NULL;
3046 	do {
3047 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3048 			continue;
3049 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3050 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3051 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3052 			*typep = CE_XDIAG_PTNR_REMOTE;
3053 			return (ptnr);
3054 		}
3055 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3056 			if (sibptnr == NULL)
3057 				sibptnr = ptnr;
3058 			continue;
3059 		}
3060 		if (locptnr == NULL)
3061 			locptnr = ptnr;
3062 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3063 
3064 	/*
3065 	 * A foreign partner has already been returned if one was available.
3066 	 *
3067 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3068 	 * detector, is active, and is not a sibling of the detector.
3069 	 *
3070 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3071 	 * active.
3072 	 *
3073 	 * If we have to resort to using the detector itself we have already
3074 	 * checked that it is active.
3075 	 */
3076 	if (locptnr) {
3077 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3078 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3079 		*typep = CE_XDIAG_PTNR_LOCAL;
3080 		return (locptnr);
3081 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3082 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3083 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3084 		*typep = CE_XDIAG_PTNR_SIBLING;
3085 		return (sibptnr);
3086 	} else if (flags & PTNR_SELFOK) {
3087 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3088 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3089 		*typep = CE_XDIAG_PTNR_SELF;
3090 		return (dtcr);
3091 	}
3092 
3093 	return (NULL);
3094 }
3095 
3096 /*
3097  * Cross call handler that is requested to run on the designated partner of
3098  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3099  */
3100 static void
3101 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3102 {
3103 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3104 }
3105 
3106 /*
3107  * The associated errorqs are never destroyed so we do not need to deal with
3108  * them disappearing before this timeout fires.  If the affected memory
3109  * has been DR'd out since the original event the scrub algrithm will catch
3110  * any errors and return null disposition info.  If the original detecting
3111  * cpu has been DR'd out then ereport detector info will not be able to
3112  * lookup CPU type;  with a small timeout this is unlikely.
3113  */
3114 static void
3115 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3116 {
3117 	struct async_flt *aflt = cbarg->lkycb_aflt;
3118 	uchar_t disp;
3119 	cpu_t *cp;
3120 	int ptnrtype;
3121 
3122 	kpreempt_disable();
3123 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3124 	    &ptnrtype)) {
3125 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3126 		    (uint64_t)&disp);
3127 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3128 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3129 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3130 	} else {
3131 		ce_xdiag_lkydrops++;
3132 		if (ncpus > 1)
3133 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3134 			    CE_XDIAG_SKIP_NOPTNR);
3135 	}
3136 	kpreempt_enable();
3137 
3138 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3139 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3140 }
3141 
3142 /*
3143  * Called from errorq drain code when processing a CE error, both from
3144  * CPU and PCI drain functions.  Decide what further classification actions,
3145  * if any, we will perform.  Perform immediate actions now, and schedule
3146  * delayed actions as required.  Note that we are no longer necessarily running
3147  * on the detecting cpu, and that the async_flt structure will not persist on
3148  * return from this function.
3149  *
3150  * Calls to this function should aim to be self-throtlling in some way.  With
3151  * the delayed re-enable of CEEN the absolute rate of calls should not
3152  * be excessive.  Callers should also avoid performing in-depth classification
3153  * for events in pages that are already known to be suspect.
3154  *
3155  * We return nonzero to indicate that the event has been copied and
3156  * recirculated for further testing.  The caller should not log the event
3157  * in this case - it will be logged when further test results are available.
3158  *
3159  * Our possible contexts are that of errorq_drain: below lock level or from
3160  * panic context.  We can assume that the cpu we are running on is online.
3161  */
3162 
3163 
3164 #ifdef DEBUG
3165 static int ce_xdiag_forceaction;
3166 #endif
3167 
3168 int
3169 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3170     errorq_elem_t *eqep, size_t afltoffset)
3171 {
3172 	ce_dispact_t dispact, action;
3173 	cpu_t *cp;
3174 	uchar_t dtcrinfo, disp;
3175 	int ptnrtype;
3176 
3177 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3178 		ce_xdiag_drops++;
3179 		return (0);
3180 	} else if (!aflt->flt_in_memory) {
3181 		ce_xdiag_drops++;
3182 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3183 		return (0);
3184 	}
3185 
3186 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3187 
3188 	/*
3189 	 * Some correctable events are not scrubbed/classified, such as those
3190 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3191 	 * initial detector classification go no further.
3192 	 */
3193 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3194 		ce_xdiag_drops++;
3195 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3196 		return (0);
3197 	}
3198 
3199 	dispact = CE_DISPACT(ce_disp_table,
3200 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3201 	    CE_XDIAG_STATE(dtcrinfo),
3202 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3203 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3204 
3205 
3206 	action = CE_ACT(dispact);	/* bad lookup caught below */
3207 #ifdef DEBUG
3208 	if (ce_xdiag_forceaction != 0)
3209 		action = ce_xdiag_forceaction;
3210 #endif
3211 
3212 	switch (action) {
3213 	case CE_ACT_LKYCHK: {
3214 		caddr_t ndata;
3215 		errorq_elem_t *neqep;
3216 		struct async_flt *ecc;
3217 		ce_lkychk_cb_t *cbargp;
3218 
3219 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3220 			ce_xdiag_lkydrops++;
3221 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3222 			    CE_XDIAG_SKIP_DUPFAIL);
3223 			break;
3224 		}
3225 		ecc = (struct async_flt *)(ndata + afltoffset);
3226 
3227 		ASSERT(ecc->flt_class == CPU_FAULT ||
3228 		    ecc->flt_class == BUS_FAULT);
3229 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3230 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3231 
3232 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3233 		cbargp->lkycb_aflt = ecc;
3234 		cbargp->lkycb_eqp = eqp;
3235 		cbargp->lkycb_eqep = neqep;
3236 
3237 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3238 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3239 		return (1);
3240 	}
3241 
3242 	case CE_ACT_PTNRCHK:
3243 		kpreempt_disable();	/* stop cpu list changing */
3244 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3245 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3246 			    (uint64_t)aflt, (uint64_t)&disp);
3247 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3248 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3249 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3250 		} else if (ncpus > 1) {
3251 			ce_xdiag_ptnrdrops++;
3252 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3253 			    CE_XDIAG_SKIP_NOPTNR);
3254 		} else {
3255 			ce_xdiag_ptnrdrops++;
3256 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3257 			    CE_XDIAG_SKIP_UNIPROC);
3258 		}
3259 		kpreempt_enable();
3260 		break;
3261 
3262 	case CE_ACT_DONE:
3263 		break;
3264 
3265 	case CE_ACT(CE_DISP_BAD):
3266 	default:
3267 #ifdef DEBUG
3268 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3269 #endif
3270 		ce_xdiag_bad++;
3271 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3272 		break;
3273 	}
3274 
3275 	return (0);
3276 }
3277 
3278 /*
3279  * We route all errors through a single switch statement.
3280  */
3281 void
3282 cpu_ue_log_err(struct async_flt *aflt)
3283 {
3284 	switch (aflt->flt_class) {
3285 	case CPU_FAULT:
3286 		cpu_ereport_init(aflt);
3287 		if (cpu_async_log_err(aflt, NULL))
3288 			cpu_ereport_post(aflt);
3289 		break;
3290 
3291 	case BUS_FAULT:
3292 		bus_async_log_err(aflt);
3293 		break;
3294 
3295 	default:
3296 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3297 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3298 		return;
3299 	}
3300 }
3301 
3302 /*
3303  * Routine for panic hook callback from panic_idle().
3304  */
3305 void
3306 cpu_async_panic_callb(void)
3307 {
3308 	ch_async_flt_t ch_flt;
3309 	struct async_flt *aflt;
3310 	ch_cpu_errors_t cpu_error_regs;
3311 	uint64_t afsr_errs;
3312 
3313 	get_cpu_error_state(&cpu_error_regs);
3314 
3315 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3316 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3317 
3318 	if (afsr_errs) {
3319 
3320 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3321 		aflt = (struct async_flt *)&ch_flt;
3322 		aflt->flt_id = gethrtime_waitfree();
3323 		aflt->flt_bus_id = getprocessorid();
3324 		aflt->flt_inst = CPU->cpu_id;
3325 		aflt->flt_stat = cpu_error_regs.afsr;
3326 		aflt->flt_addr = cpu_error_regs.afar;
3327 		aflt->flt_prot = AFLT_PROT_NONE;
3328 		aflt->flt_class = CPU_FAULT;
3329 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3330 		aflt->flt_panic = 1;
3331 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3332 		ch_flt.afsr_errs = afsr_errs;
3333 #if defined(SERRANO)
3334 		ch_flt.afar2 = cpu_error_regs.afar2;
3335 #endif	/* SERRANO */
3336 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3337 	}
3338 }
3339 
3340 /*
3341  * Routine to convert a syndrome into a syndrome code.
3342  */
3343 static int
3344 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3345 {
3346 	if (synd_status == AFLT_STAT_INVALID)
3347 		return (-1);
3348 
3349 	/*
3350 	 * Use the syndrome to index the appropriate syndrome table,
3351 	 * to get the code indicating which bit(s) is(are) bad.
3352 	 */
3353 	if (afsr_bit &
3354 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3355 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3356 #if defined(JALAPENO) || defined(SERRANO)
3357 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3358 				return (-1);
3359 			else
3360 				return (BPAR0 + synd);
3361 #else /* JALAPENO || SERRANO */
3362 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3363 				return (-1);
3364 			else
3365 				return (mtag_syndrome_tab[synd]);
3366 #endif /* JALAPENO || SERRANO */
3367 		} else {
3368 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3369 				return (-1);
3370 			else
3371 				return (ecc_syndrome_tab[synd]);
3372 		}
3373 	} else {
3374 		return (-1);
3375 	}
3376 }
3377 
3378 int
3379 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3380 {
3381 	if (&plat_get_mem_sid)
3382 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3383 	else
3384 		return (ENOTSUP);
3385 }
3386 
3387 int
3388 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3389 {
3390 	if (&plat_get_mem_offset)
3391 		return (plat_get_mem_offset(flt_addr, offp));
3392 	else
3393 		return (ENOTSUP);
3394 }
3395 
3396 int
3397 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3398 {
3399 	if (&plat_get_mem_addr)
3400 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3401 	else
3402 		return (ENOTSUP);
3403 }
3404 
3405 /*
3406  * Routine to return a string identifying the physical name
3407  * associated with a memory/cache error.
3408  */
3409 int
3410 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3411     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3412     ushort_t flt_status, char *buf, int buflen, int *lenp)
3413 {
3414 	int synd_code;
3415 	int ret;
3416 
3417 	/*
3418 	 * An AFSR of -1 defaults to a memory syndrome.
3419 	 */
3420 	if (flt_stat == (uint64_t)-1)
3421 		flt_stat = C_AFSR_CE;
3422 
3423 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3424 
3425 	/*
3426 	 * Syndrome code must be either a single-bit error code
3427 	 * (0...143) or -1 for unum lookup.
3428 	 */
3429 	if (synd_code < 0 || synd_code >= M2)
3430 		synd_code = -1;
3431 	if (&plat_get_mem_unum) {
3432 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3433 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3434 			buf[0] = '\0';
3435 			*lenp = 0;
3436 		}
3437 
3438 		return (ret);
3439 	}
3440 
3441 	return (ENOTSUP);
3442 }
3443 
3444 /*
3445  * Wrapper for cpu_get_mem_unum() routine that takes an
3446  * async_flt struct rather than explicit arguments.
3447  */
3448 int
3449 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3450     char *buf, int buflen, int *lenp)
3451 {
3452 	/*
3453 	 * If we come thru here for an IO bus error aflt->flt_stat will
3454 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3455 	 * so it will interpret this as a memory error.
3456 	 */
3457 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3458 	    (aflt->flt_class == BUS_FAULT) ?
3459 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3460 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3461 	    aflt->flt_status, buf, buflen, lenp));
3462 }
3463 
3464 /*
3465  * Return unum string given synd_code and async_flt into
3466  * the buf with size UNUM_NAMLEN
3467  */
3468 static int
3469 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3470 {
3471 	int ret, len;
3472 
3473 	/*
3474 	 * Syndrome code must be either a single-bit error code
3475 	 * (0...143) or -1 for unum lookup.
3476 	 */
3477 	if (synd_code < 0 || synd_code >= M2)
3478 		synd_code = -1;
3479 	if (&plat_get_mem_unum) {
3480 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3481 		    aflt->flt_bus_id, aflt->flt_in_memory,
3482 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3483 			buf[0] = '\0';
3484 		}
3485 		return (ret);
3486 	}
3487 
3488 	buf[0] = '\0';
3489 	return (ENOTSUP);
3490 }
3491 
3492 /*
3493  * This routine is a more generic interface to cpu_get_mem_unum()
3494  * that may be used by other modules (e.g. the 'mm' driver, through
3495  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3496  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3497  */
3498 int
3499 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3500     char *buf, int buflen, int *lenp)
3501 {
3502 	int synd_status, flt_in_memory, ret;
3503 	ushort_t flt_status = 0;
3504 	char unum[UNUM_NAMLEN];
3505 	uint64_t t_afsr_errs;
3506 
3507 	/*
3508 	 * Check for an invalid address.
3509 	 */
3510 	if (afar == (uint64_t)-1)
3511 		return (ENXIO);
3512 
3513 	if (synd == (uint64_t)-1)
3514 		synd_status = AFLT_STAT_INVALID;
3515 	else
3516 		synd_status = AFLT_STAT_VALID;
3517 
3518 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3519 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3520 
3521 	/*
3522 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3523 	 */
3524 	if (*afsr == (uint64_t)-1)
3525 		t_afsr_errs = C_AFSR_CE;
3526 	else {
3527 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3528 #if defined(CHEETAH_PLUS)
3529 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3530 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3531 #endif	/* CHEETAH_PLUS */
3532 	}
3533 
3534 	/*
3535 	 * Turn on ECC_ECACHE if error type is E$ Data.
3536 	 */
3537 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3538 		flt_status |= ECC_ECACHE;
3539 
3540 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3541 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3542 	if (ret != 0)
3543 		return (ret);
3544 
3545 	if (*lenp >= buflen)
3546 		return (ENAMETOOLONG);
3547 
3548 	(void) strncpy(buf, unum, buflen);
3549 
3550 	return (0);
3551 }
3552 
3553 /*
3554  * Routine to return memory information associated
3555  * with a physical address and syndrome.
3556  */
3557 int
3558 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3559     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3560     int *segsp, int *banksp, int *mcidp)
3561 {
3562 	int synd_status, synd_code;
3563 
3564 	if (afar == (uint64_t)-1)
3565 		return (ENXIO);
3566 
3567 	if (synd == (uint64_t)-1)
3568 		synd_status = AFLT_STAT_INVALID;
3569 	else
3570 		synd_status = AFLT_STAT_VALID;
3571 
3572 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3573 
3574 	if (p2get_mem_info != NULL)
3575 		return ((p2get_mem_info)(synd_code, afar,
3576 		    mem_sizep, seg_sizep, bank_sizep,
3577 		    segsp, banksp, mcidp));
3578 	else
3579 		return (ENOTSUP);
3580 }
3581 
3582 /*
3583  * Routine to return a string identifying the physical
3584  * name associated with a cpuid.
3585  */
3586 int
3587 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3588 {
3589 	int ret;
3590 	char unum[UNUM_NAMLEN];
3591 
3592 	if (&plat_get_cpu_unum) {
3593 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3594 		    != 0)
3595 			return (ret);
3596 	} else {
3597 		return (ENOTSUP);
3598 	}
3599 
3600 	if (*lenp >= buflen)
3601 		return (ENAMETOOLONG);
3602 
3603 	(void) strncpy(buf, unum, buflen);
3604 
3605 	return (0);
3606 }
3607 
3608 /*
3609  * This routine exports the name buffer size.
3610  */
3611 size_t
3612 cpu_get_name_bufsize()
3613 {
3614 	return (UNUM_NAMLEN);
3615 }
3616 
3617 /*
3618  * Historical function, apparantly not used.
3619  */
3620 /* ARGSUSED */
3621 void
3622 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3623 {}
3624 
3625 /*
3626  * Historical function only called for SBus errors in debugging.
3627  */
3628 /*ARGSUSED*/
3629 void
3630 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3631 {}
3632 
3633 /*
3634  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3635  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3636  * an async fault structure argument is passed in, the captured error state
3637  * (AFSR, AFAR) info will be returned in the structure.
3638  */
3639 int
3640 clear_errors(ch_async_flt_t *ch_flt)
3641 {
3642 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3643 	ch_cpu_errors_t	cpu_error_regs;
3644 
3645 	get_cpu_error_state(&cpu_error_regs);
3646 
3647 	if (ch_flt != NULL) {
3648 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3649 		aflt->flt_addr = cpu_error_regs.afar;
3650 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3651 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3652 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3653 #if defined(SERRANO)
3654 		ch_flt->afar2 = cpu_error_regs.afar2;
3655 #endif	/* SERRANO */
3656 	}
3657 
3658 	set_cpu_error_state(&cpu_error_regs);
3659 
3660 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3661 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3662 }
3663 
3664 /*
3665  * Clear any AFSR error bits, and check for persistence.
3666  *
3667  * It would be desirable to also insist that syndrome match.  PCI handling
3668  * has already filled flt_synd.  For errors trapped by CPU we only fill
3669  * flt_synd when we queue the event, so we do not have a valid flt_synd
3670  * during initial classification (it is valid if we're called as part of
3671  * subsequent low-pil additional classification attempts).  We could try
3672  * to determine which syndrome to use: we know we're only called for
3673  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3674  * would be esynd/none and esynd/msynd, respectively.  If that is
3675  * implemented then what do we do in the case that we do experience an
3676  * error on the same afar but with different syndrome?  At the very least
3677  * we should count such occurences.  Anyway, for now, we'll leave it as
3678  * it has been for ages.
3679  */
3680 static int
3681 clear_ecc(struct async_flt *aflt)
3682 {
3683 	ch_cpu_errors_t	cpu_error_regs;
3684 
3685 	/*
3686 	 * Snapshot the AFSR and AFAR and clear any errors
3687 	 */
3688 	get_cpu_error_state(&cpu_error_regs);
3689 	set_cpu_error_state(&cpu_error_regs);
3690 
3691 	/*
3692 	 * If any of the same memory access error bits are still on and
3693 	 * the AFAR matches, return that the error is persistent.
3694 	 */
3695 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3696 	    cpu_error_regs.afar == aflt->flt_addr);
3697 }
3698 
3699 /*
3700  * Turn off all cpu error detection, normally only used for panics.
3701  */
3702 void
3703 cpu_disable_errors(void)
3704 {
3705 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3706 
3707 	/*
3708 	 * With error detection now turned off, check the other cpus
3709 	 * logout areas for any unlogged errors.
3710 	 */
3711 	if (enable_check_other_cpus_logout) {
3712 		cpu_check_other_cpus_logout();
3713 		/*
3714 		 * Make a second pass over the logout areas, in case
3715 		 * there is a failing CPU in an error-trap loop which
3716 		 * will write to the logout area once it is emptied.
3717 		 */
3718 		cpu_check_other_cpus_logout();
3719 	}
3720 }
3721 
3722 /*
3723  * Enable errors.
3724  */
3725 void
3726 cpu_enable_errors(void)
3727 {
3728 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3729 }
3730 
3731 /*
3732  * Flush the entire ecache using displacement flush by reading through a
3733  * physical address range twice as large as the Ecache.
3734  */
3735 void
3736 cpu_flush_ecache(void)
3737 {
3738 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3739 	    cpunodes[CPU->cpu_id].ecache_linesize);
3740 }
3741 
3742 /*
3743  * Return CPU E$ set size - E$ size divided by the associativity.
3744  * We use this function in places where the CPU_PRIVATE ptr may not be
3745  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3746  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3747  * up before the kernel switches from OBP's to the kernel's trap table, so
3748  * we don't have to worry about cpunodes being unitialized.
3749  */
3750 int
3751 cpu_ecache_set_size(struct cpu *cp)
3752 {
3753 	if (CPU_PRIVATE(cp))
3754 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3755 
3756 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3757 }
3758 
3759 /*
3760  * Flush Ecache line.
3761  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3762  * Uses normal displacement flush for Cheetah.
3763  */
3764 static void
3765 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3766 {
3767 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3768 	int ec_set_size = cpu_ecache_set_size(CPU);
3769 
3770 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3771 }
3772 
3773 /*
3774  * Scrub physical address.
3775  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3776  * Ecache or direct-mapped Ecache.
3777  */
3778 static void
3779 cpu_scrubphys(struct async_flt *aflt)
3780 {
3781 	int ec_set_size = cpu_ecache_set_size(CPU);
3782 
3783 	scrubphys(aflt->flt_addr, ec_set_size);
3784 }
3785 
3786 /*
3787  * Clear physical address.
3788  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3789  * Ecache or direct-mapped Ecache.
3790  */
3791 void
3792 cpu_clearphys(struct async_flt *aflt)
3793 {
3794 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3795 	int ec_set_size = cpu_ecache_set_size(CPU);
3796 
3797 
3798 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3799 }
3800 
3801 #if defined(CPU_IMP_ECACHE_ASSOC)
3802 /*
3803  * Check for a matching valid line in all the sets.
3804  * If found, return set# + 1. Otherwise return 0.
3805  */
3806 static int
3807 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3808 {
3809 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3810 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3811 	int ec_set_size = cpu_ecache_set_size(CPU);
3812 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3813 	int nway = cpu_ecache_nway();
3814 	int i;
3815 
3816 	for (i = 0; i < nway; i++, ecp++) {
3817 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3818 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3819 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3820 			return (i+1);
3821 	}
3822 	return (0);
3823 }
3824 #endif /* CPU_IMP_ECACHE_ASSOC */
3825 
3826 /*
3827  * Check whether a line in the given logout info matches the specified
3828  * fault address.  If reqval is set then the line must not be Invalid.
3829  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3830  * set to 2 for l2$ or 3 for l3$.
3831  */
3832 static int
3833 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3834 {
3835 	ch_diag_data_t *cdp = data;
3836 	ch_ec_data_t *ecp;
3837 	int totalsize, ec_set_size;
3838 	int i, ways;
3839 	int match = 0;
3840 	int tagvalid;
3841 	uint64_t addr, tagpa;
3842 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3843 
3844 	/*
3845 	 * Check the l2$ logout data
3846 	 */
3847 	if (ispanther) {
3848 		ecp = &cdp->chd_l2_data[0];
3849 		ec_set_size = PN_L2_SET_SIZE;
3850 		ways = PN_L2_NWAYS;
3851 	} else {
3852 		ecp = &cdp->chd_ec_data[0];
3853 		ec_set_size = cpu_ecache_set_size(CPU);
3854 		ways = cpu_ecache_nway();
3855 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3856 	}
3857 	/* remove low order PA bits from fault address not used in PA tag */
3858 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3859 	for (i = 0; i < ways; i++, ecp++) {
3860 		if (ispanther) {
3861 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3862 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3863 		} else {
3864 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3865 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3866 			    ecp->ec_tag);
3867 		}
3868 		if (tagpa == addr && (!reqval || tagvalid)) {
3869 			match = i + 1;
3870 			*level = 2;
3871 			break;
3872 		}
3873 	}
3874 
3875 	if (match || !ispanther)
3876 		return (match);
3877 
3878 	/* For Panther we also check the l3$ */
3879 	ecp = &cdp->chd_ec_data[0];
3880 	ec_set_size = PN_L3_SET_SIZE;
3881 	ways = PN_L3_NWAYS;
3882 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3883 
3884 	for (i = 0; i < ways; i++, ecp++) {
3885 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3886 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3887 			match = i + 1;
3888 			*level = 3;
3889 			break;
3890 		}
3891 	}
3892 
3893 	return (match);
3894 }
3895 
3896 #if defined(CPU_IMP_L1_CACHE_PARITY)
3897 /*
3898  * Record information related to the source of an Dcache Parity Error.
3899  */
3900 static void
3901 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3902 {
3903 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3904 	int index;
3905 
3906 	/*
3907 	 * Since instruction decode cannot be done at high PIL
3908 	 * just examine the entire Dcache to locate the error.
3909 	 */
3910 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3911 		ch_flt->parity_data.dpe.cpl_way = -1;
3912 		ch_flt->parity_data.dpe.cpl_off = -1;
3913 	}
3914 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3915 		cpu_dcache_parity_check(ch_flt, index);
3916 }
3917 
3918 /*
3919  * Check all ways of the Dcache at a specified index for good parity.
3920  */
3921 static void
3922 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3923 {
3924 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3925 	uint64_t parity_bits, pbits, data_word;
3926 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3927 	int way, word, data_byte;
3928 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3929 	ch_dc_data_t tmp_dcp;
3930 
3931 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3932 		/*
3933 		 * Perform diagnostic read.
3934 		 */
3935 		get_dcache_dtag(index + way * dc_set_size,
3936 		    (uint64_t *)&tmp_dcp);
3937 
3938 		/*
3939 		 * Check tag for even parity.
3940 		 * Sum of 1 bits (including parity bit) should be even.
3941 		 */
3942 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3943 			/*
3944 			 * If this is the first error log detailed information
3945 			 * about it and check the snoop tag. Otherwise just
3946 			 * record the fact that we found another error.
3947 			 */
3948 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3949 				ch_flt->parity_data.dpe.cpl_way = way;
3950 				ch_flt->parity_data.dpe.cpl_cache =
3951 				    CPU_DC_PARITY;
3952 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3953 
3954 				if (popc64(tmp_dcp.dc_sntag &
3955 				    CHP_DCSNTAG_PARMASK) & 1) {
3956 					ch_flt->parity_data.dpe.cpl_tag |=
3957 					    CHP_DC_SNTAG;
3958 					ch_flt->parity_data.dpe.cpl_lcnt++;
3959 				}
3960 
3961 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3962 			}
3963 
3964 			ch_flt->parity_data.dpe.cpl_lcnt++;
3965 		}
3966 
3967 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3968 			/*
3969 			 * Panther has more parity bits than the other
3970 			 * processors for covering dcache data and so each
3971 			 * byte of data in each word has its own parity bit.
3972 			 */
3973 			parity_bits = tmp_dcp.dc_pn_data_parity;
3974 			for (word = 0; word < 4; word++) {
3975 				data_word = tmp_dcp.dc_data[word];
3976 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3977 				for (data_byte = 0; data_byte < 8;
3978 				    data_byte++) {
3979 					if (((popc64(data_word &
3980 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3981 					    (pbits & 1)) {
3982 						cpu_record_dc_data_parity(
3983 						    ch_flt, dcp, &tmp_dcp, way,
3984 						    word);
3985 					}
3986 					pbits >>= 1;
3987 					data_word >>= 8;
3988 				}
3989 				parity_bits >>= 8;
3990 			}
3991 		} else {
3992 			/*
3993 			 * Check data array for even parity.
3994 			 * The 8 parity bits are grouped into 4 pairs each
3995 			 * of which covers a 64-bit word.  The endianness is
3996 			 * reversed -- the low-order parity bits cover the
3997 			 * high-order data words.
3998 			 */
3999 			parity_bits = tmp_dcp.dc_utag >> 8;
4000 			for (word = 0; word < 4; word++) {
4001 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4002 				if ((popc64(tmp_dcp.dc_data[word]) +
4003 				    parity_bits_popc[pbits]) & 1) {
4004 					cpu_record_dc_data_parity(ch_flt, dcp,
4005 					    &tmp_dcp, way, word);
4006 				}
4007 			}
4008 		}
4009 	}
4010 }
4011 
4012 static void
4013 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4014     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4015 {
4016 	/*
4017 	 * If this is the first error log detailed information about it.
4018 	 * Otherwise just record the fact that we found another error.
4019 	 */
4020 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4021 		ch_flt->parity_data.dpe.cpl_way = way;
4022 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4023 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4024 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4025 	}
4026 	ch_flt->parity_data.dpe.cpl_lcnt++;
4027 }
4028 
4029 /*
4030  * Record information related to the source of an Icache Parity Error.
4031  *
4032  * Called with the Icache disabled so any diagnostic accesses are safe.
4033  */
4034 static void
4035 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4036 {
4037 	int	ic_set_size;
4038 	int	ic_linesize;
4039 	int	index;
4040 
4041 	if (CPU_PRIVATE(CPU)) {
4042 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4043 		    CH_ICACHE_NWAY;
4044 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4045 	} else {
4046 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4047 		ic_linesize = icache_linesize;
4048 	}
4049 
4050 	ch_flt->parity_data.ipe.cpl_way = -1;
4051 	ch_flt->parity_data.ipe.cpl_off = -1;
4052 
4053 	for (index = 0; index < ic_set_size; index += ic_linesize)
4054 		cpu_icache_parity_check(ch_flt, index);
4055 }
4056 
4057 /*
4058  * Check all ways of the Icache at a specified index for good parity.
4059  */
4060 static void
4061 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4062 {
4063 	uint64_t parmask, pn_inst_parity;
4064 	int ic_set_size;
4065 	int ic_linesize;
4066 	int flt_index, way, instr, num_instr;
4067 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4068 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4069 	ch_ic_data_t tmp_icp;
4070 
4071 	if (CPU_PRIVATE(CPU)) {
4072 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4073 		    CH_ICACHE_NWAY;
4074 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4075 	} else {
4076 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4077 		ic_linesize = icache_linesize;
4078 	}
4079 
4080 	/*
4081 	 * Panther has twice as many instructions per icache line and the
4082 	 * instruction parity bit is in a different location.
4083 	 */
4084 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4085 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4086 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4087 	} else {
4088 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4089 		pn_inst_parity = 0;
4090 	}
4091 
4092 	/*
4093 	 * Index at which we expect to find the parity error.
4094 	 */
4095 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4096 
4097 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4098 		/*
4099 		 * Diagnostic reads expect address argument in ASI format.
4100 		 */
4101 		get_icache_dtag(2 * (index + way * ic_set_size),
4102 		    (uint64_t *)&tmp_icp);
4103 
4104 		/*
4105 		 * If this is the index in which we expect to find the
4106 		 * error log detailed information about each of the ways.
4107 		 * This information will be displayed later if we can't
4108 		 * determine the exact way in which the error is located.
4109 		 */
4110 		if (flt_index == index)
4111 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4112 
4113 		/*
4114 		 * Check tag for even parity.
4115 		 * Sum of 1 bits (including parity bit) should be even.
4116 		 */
4117 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4118 			/*
4119 			 * If this way is the one in which we expected
4120 			 * to find the error record the way and check the
4121 			 * snoop tag. Otherwise just record the fact we
4122 			 * found another error.
4123 			 */
4124 			if (flt_index == index) {
4125 				ch_flt->parity_data.ipe.cpl_way = way;
4126 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4127 
4128 				if (popc64(tmp_icp.ic_sntag &
4129 				    CHP_ICSNTAG_PARMASK) & 1) {
4130 					ch_flt->parity_data.ipe.cpl_tag |=
4131 					    CHP_IC_SNTAG;
4132 					ch_flt->parity_data.ipe.cpl_lcnt++;
4133 				}
4134 
4135 			}
4136 			ch_flt->parity_data.ipe.cpl_lcnt++;
4137 			continue;
4138 		}
4139 
4140 		/*
4141 		 * Check instruction data for even parity.
4142 		 * Bits participating in parity differ for PC-relative
4143 		 * versus non-PC-relative instructions.
4144 		 */
4145 		for (instr = 0; instr < num_instr; instr++) {
4146 			parmask = (tmp_icp.ic_data[instr] &
4147 			    CH_ICDATA_PRED_ISPCREL) ?
4148 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4149 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4150 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4151 				/*
4152 				 * If this way is the one in which we expected
4153 				 * to find the error record the way and offset.
4154 				 * Otherwise just log the fact we found another
4155 				 * error.
4156 				 */
4157 				if (flt_index == index) {
4158 					ch_flt->parity_data.ipe.cpl_way = way;
4159 					ch_flt->parity_data.ipe.cpl_off =
4160 					    instr * 4;
4161 				}
4162 				ch_flt->parity_data.ipe.cpl_lcnt++;
4163 				continue;
4164 			}
4165 		}
4166 	}
4167 }
4168 
4169 /*
4170  * Record information related to the source of an Pcache Parity Error.
4171  */
4172 static void
4173 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4174 {
4175 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4176 	int index;
4177 
4178 	/*
4179 	 * Since instruction decode cannot be done at high PIL just
4180 	 * examine the entire Pcache to check for any parity errors.
4181 	 */
4182 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4183 		ch_flt->parity_data.dpe.cpl_way = -1;
4184 		ch_flt->parity_data.dpe.cpl_off = -1;
4185 	}
4186 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4187 		cpu_pcache_parity_check(ch_flt, index);
4188 }
4189 
4190 /*
4191  * Check all ways of the Pcache at a specified index for good parity.
4192  */
4193 static void
4194 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4195 {
4196 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4197 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4198 	int way, word, pbit, parity_bits;
4199 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4200 	ch_pc_data_t tmp_pcp;
4201 
4202 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4203 		/*
4204 		 * Perform diagnostic read.
4205 		 */
4206 		get_pcache_dtag(index + way * pc_set_size,
4207 		    (uint64_t *)&tmp_pcp);
4208 		/*
4209 		 * Check data array for odd parity. There are 8 parity
4210 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4211 		 * of those bits covers exactly 8 bytes of the data
4212 		 * array:
4213 		 *
4214 		 *	parity bit	P$ data bytes covered
4215 		 *	----------	---------------------
4216 		 *	50		63:56
4217 		 *	51		55:48
4218 		 *	52		47:40
4219 		 *	53		39:32
4220 		 *	54		31:24
4221 		 *	55		23:16
4222 		 *	56		15:8
4223 		 *	57		7:0
4224 		 */
4225 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4226 		for (word = 0; word < pc_data_words; word++) {
4227 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4228 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4229 				/*
4230 				 * If this is the first error log detailed
4231 				 * information about it. Otherwise just record
4232 				 * the fact that we found another error.
4233 				 */
4234 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4235 					ch_flt->parity_data.dpe.cpl_way = way;
4236 					ch_flt->parity_data.dpe.cpl_cache =
4237 					    CPU_PC_PARITY;
4238 					ch_flt->parity_data.dpe.cpl_off =
4239 					    word * sizeof (uint64_t);
4240 					bcopy(&tmp_pcp, pcp,
4241 					    sizeof (ch_pc_data_t));
4242 				}
4243 				ch_flt->parity_data.dpe.cpl_lcnt++;
4244 			}
4245 		}
4246 	}
4247 }
4248 
4249 
4250 /*
4251  * Add L1 Data cache data to the ereport payload.
4252  */
4253 static void
4254 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4255 {
4256 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4257 	ch_dc_data_t *dcp;
4258 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4259 	uint_t nelem;
4260 	int i, ways_to_check, ways_logged = 0;
4261 
4262 	/*
4263 	 * If this is an D$ fault then there may be multiple
4264 	 * ways captured in the ch_parity_log_t structure.
4265 	 * Otherwise, there will be at most one way captured
4266 	 * in the ch_diag_data_t struct.
4267 	 * Check each way to see if it should be encoded.
4268 	 */
4269 	if (ch_flt->flt_type == CPU_DC_PARITY)
4270 		ways_to_check = CH_DCACHE_NWAY;
4271 	else
4272 		ways_to_check = 1;
4273 	for (i = 0; i < ways_to_check; i++) {
4274 		if (ch_flt->flt_type == CPU_DC_PARITY)
4275 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4276 		else
4277 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4278 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4279 			bcopy(dcp, &dcdata[ways_logged],
4280 			    sizeof (ch_dc_data_t));
4281 			ways_logged++;
4282 		}
4283 	}
4284 
4285 	/*
4286 	 * Add the dcache data to the payload.
4287 	 */
4288 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4289 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4290 	if (ways_logged != 0) {
4291 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4292 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4293 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4294 	}
4295 }
4296 
4297 /*
4298  * Add L1 Instruction cache data to the ereport payload.
4299  */
4300 static void
4301 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4302 {
4303 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4304 	ch_ic_data_t *icp;
4305 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4306 	uint_t nelem;
4307 	int i, ways_to_check, ways_logged = 0;
4308 
4309 	/*
4310 	 * If this is an I$ fault then there may be multiple
4311 	 * ways captured in the ch_parity_log_t structure.
4312 	 * Otherwise, there will be at most one way captured
4313 	 * in the ch_diag_data_t struct.
4314 	 * Check each way to see if it should be encoded.
4315 	 */
4316 	if (ch_flt->flt_type == CPU_IC_PARITY)
4317 		ways_to_check = CH_ICACHE_NWAY;
4318 	else
4319 		ways_to_check = 1;
4320 	for (i = 0; i < ways_to_check; i++) {
4321 		if (ch_flt->flt_type == CPU_IC_PARITY)
4322 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4323 		else
4324 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4325 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4326 			bcopy(icp, &icdata[ways_logged],
4327 			    sizeof (ch_ic_data_t));
4328 			ways_logged++;
4329 		}
4330 	}
4331 
4332 	/*
4333 	 * Add the icache data to the payload.
4334 	 */
4335 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4336 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4337 	if (ways_logged != 0) {
4338 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4339 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4340 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4341 	}
4342 }
4343 
4344 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4345 
4346 /*
4347  * Add ecache data to payload.
4348  */
4349 static void
4350 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4351 {
4352 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4353 	ch_ec_data_t *ecp;
4354 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4355 	uint_t nelem;
4356 	int i, ways_logged = 0;
4357 
4358 	/*
4359 	 * Check each way to see if it should be encoded
4360 	 * and concatinate it into a temporary buffer.
4361 	 */
4362 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4363 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4364 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4365 			bcopy(ecp, &ecdata[ways_logged],
4366 			    sizeof (ch_ec_data_t));
4367 			ways_logged++;
4368 		}
4369 	}
4370 
4371 	/*
4372 	 * Panther CPUs have an additional level of cache and so
4373 	 * what we just collected was the L3 (ecache) and not the
4374 	 * L2 cache.
4375 	 */
4376 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4377 		/*
4378 		 * Add the L3 (ecache) data to the payload.
4379 		 */
4380 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4381 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4382 		if (ways_logged != 0) {
4383 			nelem = sizeof (ch_ec_data_t) /
4384 			    sizeof (uint64_t) * ways_logged;
4385 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4386 			    DATA_TYPE_UINT64_ARRAY, nelem,
4387 			    (uint64_t *)ecdata, NULL);
4388 		}
4389 
4390 		/*
4391 		 * Now collect the L2 cache.
4392 		 */
4393 		ways_logged = 0;
4394 		for (i = 0; i < PN_L2_NWAYS; i++) {
4395 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4396 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4397 				bcopy(ecp, &ecdata[ways_logged],
4398 				    sizeof (ch_ec_data_t));
4399 				ways_logged++;
4400 			}
4401 		}
4402 	}
4403 
4404 	/*
4405 	 * Add the L2 cache data to the payload.
4406 	 */
4407 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4408 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4409 	if (ways_logged != 0) {
4410 		nelem = sizeof (ch_ec_data_t) /
4411 		    sizeof (uint64_t) * ways_logged;
4412 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4413 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4414 	}
4415 }
4416 
4417 /*
4418  * Initialize cpu scheme for specified cpu.
4419  */
4420 static void
4421 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4422 {
4423 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4424 	uint8_t mask;
4425 
4426 	mask = cpunodes[cpuid].version;
4427 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4428 	    (u_longlong_t)cpunodes[cpuid].device_id);
4429 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4430 	    cpuid, &mask, (const char *)sbuf);
4431 }
4432 
4433 /*
4434  * Returns ereport resource type.
4435  */
4436 static int
4437 cpu_error_to_resource_type(struct async_flt *aflt)
4438 {
4439 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4440 
4441 	switch (ch_flt->flt_type) {
4442 
4443 	case CPU_CE_ECACHE:
4444 	case CPU_UE_ECACHE:
4445 	case CPU_UE_ECACHE_RETIRE:
4446 	case CPU_ORPH:
4447 		/*
4448 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4449 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4450 		 * E$ Data type, otherwise, return CPU type.
4451 		 */
4452 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4453 		    ch_flt->flt_bit))
4454 			return (ERRTYPE_ECACHE_DATA);
4455 		return (ERRTYPE_CPU);
4456 
4457 	case CPU_CE:
4458 	case CPU_UE:
4459 	case CPU_EMC:
4460 	case CPU_DUE:
4461 	case CPU_RCE:
4462 	case CPU_RUE:
4463 	case CPU_FRC:
4464 	case CPU_FRU:
4465 		return (ERRTYPE_MEMORY);
4466 
4467 	case CPU_IC_PARITY:
4468 	case CPU_DC_PARITY:
4469 	case CPU_FPUERR:
4470 	case CPU_PC_PARITY:
4471 	case CPU_ITLB_PARITY:
4472 	case CPU_DTLB_PARITY:
4473 		return (ERRTYPE_CPU);
4474 	}
4475 	return (ERRTYPE_UNKNOWN);
4476 }
4477 
4478 /*
4479  * Encode the data saved in the ch_async_flt_t struct into
4480  * the FM ereport payload.
4481  */
4482 static void
4483 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4484 	nvlist_t *resource, int *afar_status, int *synd_status)
4485 {
4486 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4487 	*synd_status = AFLT_STAT_INVALID;
4488 	*afar_status = AFLT_STAT_INVALID;
4489 
4490 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4491 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4492 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4493 	}
4494 
4495 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4496 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4497 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4498 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4499 	}
4500 
4501 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4502 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4503 		    ch_flt->flt_bit);
4504 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4505 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4506 	}
4507 
4508 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4509 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4510 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4511 	}
4512 
4513 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4514 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4515 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4516 	}
4517 
4518 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4519 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4520 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4521 	}
4522 
4523 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4524 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4525 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4526 	}
4527 
4528 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4529 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4530 		    DATA_TYPE_BOOLEAN_VALUE,
4531 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4532 	}
4533 
4534 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4535 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4536 		    DATA_TYPE_BOOLEAN_VALUE,
4537 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4538 	}
4539 
4540 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4541 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4542 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4543 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4544 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4545 	}
4546 
4547 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4548 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4549 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4550 	}
4551 
4552 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4553 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4554 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4555 	}
4556 
4557 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4558 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4559 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4560 	}
4561 
4562 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4563 		cpu_payload_add_ecache(aflt, payload);
4564 
4565 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4566 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4567 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4568 	}
4569 
4570 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4571 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4572 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4573 	}
4574 
4575 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4576 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4577 		    DATA_TYPE_UINT32_ARRAY, 16,
4578 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4579 	}
4580 
4581 #if defined(CPU_IMP_L1_CACHE_PARITY)
4582 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4583 		cpu_payload_add_dcache(aflt, payload);
4584 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4585 		cpu_payload_add_icache(aflt, payload);
4586 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4587 
4588 #if defined(CHEETAH_PLUS)
4589 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4590 		cpu_payload_add_pcache(aflt, payload);
4591 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4592 		cpu_payload_add_tlb(aflt, payload);
4593 #endif	/* CHEETAH_PLUS */
4594 	/*
4595 	 * Create the FMRI that goes into the payload
4596 	 * and contains the unum info if necessary.
4597 	 */
4598 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4599 		char unum[UNUM_NAMLEN] = "";
4600 		char sid[DIMM_SERIAL_ID_LEN] = "";
4601 		int len, ret, rtype, synd_code;
4602 		uint64_t offset = (uint64_t)-1;
4603 
4604 		rtype = cpu_error_to_resource_type(aflt);
4605 		switch (rtype) {
4606 
4607 		case ERRTYPE_MEMORY:
4608 		case ERRTYPE_ECACHE_DATA:
4609 
4610 			/*
4611 			 * Memory errors, do unum lookup
4612 			 */
4613 			if (*afar_status == AFLT_STAT_INVALID)
4614 				break;
4615 
4616 			if (rtype == ERRTYPE_ECACHE_DATA)
4617 				aflt->flt_status |= ECC_ECACHE;
4618 			else
4619 				aflt->flt_status &= ~ECC_ECACHE;
4620 
4621 			synd_code = synd_to_synd_code(*synd_status,
4622 			    aflt->flt_synd, ch_flt->flt_bit);
4623 
4624 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4625 				break;
4626 
4627 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4628 			    &len);
4629 
4630 			if (ret == 0) {
4631 				(void) cpu_get_mem_offset(aflt->flt_addr,
4632 				    &offset);
4633 			}
4634 
4635 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4636 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4637 			fm_payload_set(payload,
4638 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4639 			    DATA_TYPE_NVLIST, resource, NULL);
4640 			break;
4641 
4642 		case ERRTYPE_CPU:
4643 			/*
4644 			 * On-board processor array error, add cpu resource.
4645 			 */
4646 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4647 			fm_payload_set(payload,
4648 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4649 			    DATA_TYPE_NVLIST, resource, NULL);
4650 			break;
4651 		}
4652 	}
4653 }
4654 
4655 /*
4656  * Initialize the way info if necessary.
4657  */
4658 void
4659 cpu_ereport_init(struct async_flt *aflt)
4660 {
4661 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4662 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4663 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4664 	int i;
4665 
4666 	/*
4667 	 * Initialize the info in the CPU logout structure.
4668 	 * The I$/D$ way information is not initialized here
4669 	 * since it is captured in the logout assembly code.
4670 	 */
4671 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4672 		(ecp + i)->ec_way = i;
4673 
4674 	for (i = 0; i < PN_L2_NWAYS; i++)
4675 		(l2p + i)->ec_way = i;
4676 }
4677 
4678 /*
4679  * Returns whether fault address is valid for this error bit and
4680  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4681  */
4682 int
4683 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4684 {
4685 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4686 
4687 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4688 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4689 	    AFLT_STAT_VALID &&
4690 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4691 }
4692 
4693 /*
4694  * Returns whether fault address is valid based on the error bit for the
4695  * one event being queued and whether the address is "in memory".
4696  */
4697 static int
4698 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4699 {
4700 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4701 	int afar_status;
4702 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4703 
4704 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4705 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4706 		return (0);
4707 
4708 	afsr_errs = ch_flt->afsr_errs;
4709 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4710 
4711 	switch (afar_status) {
4712 	case AFLT_STAT_VALID:
4713 		return (1);
4714 
4715 	case AFLT_STAT_AMBIGUOUS:
4716 		/*
4717 		 * Status is ambiguous since another error bit (or bits)
4718 		 * of equal priority to the specified bit on in the afsr,
4719 		 * so check those bits. Return 1 only if the bits on in the
4720 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4721 		 * Otherwise not all the equal priority bits are for memory
4722 		 * errors, so return 0.
4723 		 */
4724 		ow_bits = afar_overwrite;
4725 		while ((afsr_ow = *ow_bits++) != 0) {
4726 			/*
4727 			 * Get other bits that are on in t_afsr_bit's priority
4728 			 * class to check for Memory Error bits only.
4729 			 */
4730 			if (afsr_ow & t_afsr_bit) {
4731 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4732 					return (0);
4733 				else
4734 					return (1);
4735 			}
4736 		}
4737 		/*FALLTHRU*/
4738 
4739 	default:
4740 		return (0);
4741 	}
4742 }
4743 
4744 static void
4745 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4746 {
4747 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4748 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4749 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4750 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4751 #if defined(CPU_IMP_ECACHE_ASSOC)
4752 	int i, nway;
4753 #endif /* CPU_IMP_ECACHE_ASSOC */
4754 
4755 	/*
4756 	 * Check if the CPU log out captured was valid.
4757 	 */
4758 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4759 	    ch_flt->flt_data_incomplete)
4760 		return;
4761 
4762 #if defined(CPU_IMP_ECACHE_ASSOC)
4763 	nway = cpu_ecache_nway();
4764 	i =  cpu_ecache_line_valid(ch_flt);
4765 	if (i == 0 || i > nway) {
4766 		for (i = 0; i < nway; i++)
4767 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4768 	} else
4769 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4770 #else /* CPU_IMP_ECACHE_ASSOC */
4771 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4772 #endif /* CPU_IMP_ECACHE_ASSOC */
4773 
4774 #if defined(CHEETAH_PLUS)
4775 	pn_cpu_log_diag_l2_info(ch_flt);
4776 #endif /* CHEETAH_PLUS */
4777 
4778 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4779 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4780 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4781 	}
4782 
4783 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4784 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4785 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4786 		else
4787 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4788 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4789 	}
4790 }
4791 
4792 /*
4793  * Cheetah ECC calculation.
4794  *
4795  * We only need to do the calculation on the data bits and can ignore check
4796  * bit and Mtag bit terms in the calculation.
4797  */
4798 static uint64_t ch_ecc_table[9][2] = {
4799 	/*
4800 	 * low order 64-bits   high-order 64-bits
4801 	 */
4802 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4803 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4804 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4805 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4806 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4807 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4808 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4809 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4810 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4811 };
4812 
4813 /*
4814  * 64-bit population count, use well-known popcnt trick.
4815  * We could use the UltraSPARC V9 POPC instruction, but some
4816  * CPUs including Cheetahplus and Jaguar do not support that
4817  * instruction.
4818  */
4819 int
4820 popc64(uint64_t val)
4821 {
4822 	int cnt;
4823 
4824 	for (cnt = 0; val != 0; val &= val - 1)
4825 		cnt++;
4826 	return (cnt);
4827 }
4828 
4829 /*
4830  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4831  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4832  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4833  * instead of doing all the xor's.
4834  */
4835 uint32_t
4836 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4837 {
4838 	int bitno, s;
4839 	int synd = 0;
4840 
4841 	for (bitno = 0; bitno < 9; bitno++) {
4842 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4843 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4844 		synd |= (s << bitno);
4845 	}
4846 	return (synd);
4847 
4848 }
4849 
4850 /*
4851  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4852  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4853  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4854  */
4855 static void
4856 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4857     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4858 {
4859 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4860 
4861 	if (reason &&
4862 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4863 		(void) strcat(reason, eccp->ec_reason);
4864 	}
4865 
4866 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4867 	ch_flt->flt_type = eccp->ec_flt_type;
4868 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4869 		ch_flt->flt_diag_data = *cdp;
4870 	else
4871 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4872 	aflt->flt_in_memory =
4873 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4874 
4875 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4876 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4877 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4878 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4879 	else
4880 		aflt->flt_synd = 0;
4881 
4882 	aflt->flt_payload = eccp->ec_err_payload;
4883 
4884 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4885 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4886 		cpu_errorq_dispatch(eccp->ec_err_class,
4887 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4888 		    aflt->flt_panic);
4889 	else
4890 		cpu_errorq_dispatch(eccp->ec_err_class,
4891 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4892 		    aflt->flt_panic);
4893 }
4894 
4895 /*
4896  * Queue events on async event queue one event per error bit.  First we
4897  * queue the events that we "expect" for the given trap, then we queue events
4898  * that we may not expect.  Return number of events queued.
4899  */
4900 int
4901 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4902     ch_cpu_logout_t *clop)
4903 {
4904 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4905 	ecc_type_to_info_t *eccp;
4906 	int nevents = 0;
4907 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4908 #if defined(CHEETAH_PLUS)
4909 	uint64_t orig_t_afsr_errs;
4910 #endif
4911 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4912 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4913 	ch_diag_data_t *cdp = NULL;
4914 
4915 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4916 
4917 #if defined(CHEETAH_PLUS)
4918 	orig_t_afsr_errs = t_afsr_errs;
4919 
4920 	/*
4921 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4922 	 */
4923 	if (clop != NULL) {
4924 		/*
4925 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4926 		 * flt_addr and flt_stat fields will be reset to the primaries
4927 		 * below, but the sdw_addr and sdw_stat will stay as the
4928 		 * secondaries.
4929 		 */
4930 		cdp = &clop->clo_sdw_data;
4931 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4932 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4933 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4934 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4935 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4936 
4937 		/*
4938 		 * If the primary and shadow AFSR differ, tag the shadow as
4939 		 * the first fault.
4940 		 */
4941 		if ((primary_afar != cdp->chd_afar) ||
4942 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4943 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4944 		}
4945 
4946 		/*
4947 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4948 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4949 		 * is expected to be zero for those CPUs which do not have
4950 		 * an AFSR_EXT register.
4951 		 */
4952 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4953 			if ((eccp->ec_afsr_bit &
4954 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4955 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4956 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4957 				cdp = NULL;
4958 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4959 				nevents++;
4960 			}
4961 		}
4962 
4963 		/*
4964 		 * If the ME bit is on in the primary AFSR turn all the
4965 		 * error bits on again that may set the ME bit to make
4966 		 * sure we see the ME AFSR error logs.
4967 		 */
4968 		if ((primary_afsr & C_AFSR_ME) != 0)
4969 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4970 	}
4971 #endif	/* CHEETAH_PLUS */
4972 
4973 	if (clop != NULL)
4974 		cdp = &clop->clo_data;
4975 
4976 	/*
4977 	 * Queue expected errors, error bit and fault type must match
4978 	 * in the ecc_type_to_info table.
4979 	 */
4980 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4981 	    eccp++) {
4982 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4983 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4984 #if defined(SERRANO)
4985 			/*
4986 			 * For FRC/FRU errors on Serrano the afar2 captures
4987 			 * the address and the associated data is
4988 			 * in the shadow logout area.
4989 			 */
4990 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4991 				if (clop != NULL)
4992 					cdp = &clop->clo_sdw_data;
4993 				aflt->flt_addr = ch_flt->afar2;
4994 			} else {
4995 				if (clop != NULL)
4996 					cdp = &clop->clo_data;
4997 				aflt->flt_addr = primary_afar;
4998 			}
4999 #else	/* SERRANO */
5000 			aflt->flt_addr = primary_afar;
5001 #endif	/* SERRANO */
5002 			aflt->flt_stat = primary_afsr;
5003 			ch_flt->afsr_ext = primary_afsr_ext;
5004 			ch_flt->afsr_errs = primary_afsr_errs;
5005 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5006 			cdp = NULL;
5007 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5008 			nevents++;
5009 		}
5010 	}
5011 
5012 	/*
5013 	 * Queue unexpected errors, error bit only match.
5014 	 */
5015 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5016 	    eccp++) {
5017 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5018 #if defined(SERRANO)
5019 			/*
5020 			 * For FRC/FRU errors on Serrano the afar2 captures
5021 			 * the address and the associated data is
5022 			 * in the shadow logout area.
5023 			 */
5024 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5025 				if (clop != NULL)
5026 					cdp = &clop->clo_sdw_data;
5027 				aflt->flt_addr = ch_flt->afar2;
5028 			} else {
5029 				if (clop != NULL)
5030 					cdp = &clop->clo_data;
5031 				aflt->flt_addr = primary_afar;
5032 			}
5033 #else	/* SERRANO */
5034 			aflt->flt_addr = primary_afar;
5035 #endif	/* SERRANO */
5036 			aflt->flt_stat = primary_afsr;
5037 			ch_flt->afsr_ext = primary_afsr_ext;
5038 			ch_flt->afsr_errs = primary_afsr_errs;
5039 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5040 			cdp = NULL;
5041 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5042 			nevents++;
5043 		}
5044 	}
5045 	return (nevents);
5046 }
5047 
5048 /*
5049  * Return trap type number.
5050  */
5051 uint8_t
5052 flt_to_trap_type(struct async_flt *aflt)
5053 {
5054 	if (aflt->flt_status & ECC_I_TRAP)
5055 		return (TRAP_TYPE_ECC_I);
5056 	if (aflt->flt_status & ECC_D_TRAP)
5057 		return (TRAP_TYPE_ECC_D);
5058 	if (aflt->flt_status & ECC_F_TRAP)
5059 		return (TRAP_TYPE_ECC_F);
5060 	if (aflt->flt_status & ECC_C_TRAP)
5061 		return (TRAP_TYPE_ECC_C);
5062 	if (aflt->flt_status & ECC_DP_TRAP)
5063 		return (TRAP_TYPE_ECC_DP);
5064 	if (aflt->flt_status & ECC_IP_TRAP)
5065 		return (TRAP_TYPE_ECC_IP);
5066 	if (aflt->flt_status & ECC_ITLB_TRAP)
5067 		return (TRAP_TYPE_ECC_ITLB);
5068 	if (aflt->flt_status & ECC_DTLB_TRAP)
5069 		return (TRAP_TYPE_ECC_DTLB);
5070 	return (TRAP_TYPE_UNKNOWN);
5071 }
5072 
5073 /*
5074  * Decide an error type based on detector and leaky/partner tests.
5075  * The following array is used for quick translation - it must
5076  * stay in sync with ce_dispact_t.
5077  */
5078 
5079 static char *cetypes[] = {
5080 	CE_DISP_DESC_U,
5081 	CE_DISP_DESC_I,
5082 	CE_DISP_DESC_PP,
5083 	CE_DISP_DESC_P,
5084 	CE_DISP_DESC_L,
5085 	CE_DISP_DESC_PS,
5086 	CE_DISP_DESC_S
5087 };
5088 
5089 char *
5090 flt_to_error_type(struct async_flt *aflt)
5091 {
5092 	ce_dispact_t dispact, disp;
5093 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5094 
5095 	/*
5096 	 * The memory payload bundle is shared by some events that do
5097 	 * not perform any classification.  For those flt_disp will be
5098 	 * 0 and we will return "unknown".
5099 	 */
5100 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5101 		return (cetypes[CE_DISP_UNKNOWN]);
5102 
5103 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5104 
5105 	/*
5106 	 * It is also possible that no scrub/classification was performed
5107 	 * by the detector, for instance where a disrupting error logged
5108 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5109 	 */
5110 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5111 		return (cetypes[CE_DISP_UNKNOWN]);
5112 
5113 	/*
5114 	 * Lookup type in initial classification/action table
5115 	 */
5116 	dispact = CE_DISPACT(ce_disp_table,
5117 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5118 	    CE_XDIAG_STATE(dtcrinfo),
5119 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5120 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5121 
5122 	/*
5123 	 * A bad lookup is not something to panic production systems for.
5124 	 */
5125 	ASSERT(dispact != CE_DISP_BAD);
5126 	if (dispact == CE_DISP_BAD)
5127 		return (cetypes[CE_DISP_UNKNOWN]);
5128 
5129 	disp = CE_DISP(dispact);
5130 
5131 	switch (disp) {
5132 	case CE_DISP_UNKNOWN:
5133 	case CE_DISP_INTERMITTENT:
5134 		break;
5135 
5136 	case CE_DISP_POSS_PERS:
5137 		/*
5138 		 * "Possible persistent" errors to which we have applied a valid
5139 		 * leaky test can be separated into "persistent" or "leaky".
5140 		 */
5141 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5142 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5143 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5144 			    CE_XDIAG_CE2SEEN(lkyinfo))
5145 				disp = CE_DISP_LEAKY;
5146 			else
5147 				disp = CE_DISP_PERS;
5148 		}
5149 		break;
5150 
5151 	case CE_DISP_POSS_STICKY:
5152 		/*
5153 		 * Promote "possible sticky" results that have been
5154 		 * confirmed by a partner test to "sticky".  Unconfirmed
5155 		 * "possible sticky" events are left at that status - we do not
5156 		 * guess at any bad reader/writer etc status here.
5157 		 */
5158 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5159 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5160 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5161 			disp = CE_DISP_STICKY;
5162 
5163 		/*
5164 		 * Promote "possible sticky" results on a uniprocessor
5165 		 * to "sticky"
5166 		 */
5167 		if (disp == CE_DISP_POSS_STICKY &&
5168 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5169 			disp = CE_DISP_STICKY;
5170 		break;
5171 
5172 	default:
5173 		disp = CE_DISP_UNKNOWN;
5174 		break;
5175 	}
5176 
5177 	return (cetypes[disp]);
5178 }
5179 
5180 /*
5181  * Given the entire afsr, the specific bit to check and a prioritized list of
5182  * error bits, determine the validity of the various overwrite priority
5183  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5184  * different overwrite priorities.
5185  *
5186  * Given a specific afsr error bit and the entire afsr, there are three cases:
5187  *   INVALID:	The specified bit is lower overwrite priority than some other
5188  *		error bit which is on in the afsr (or IVU/IVC).
5189  *   VALID:	The specified bit is higher priority than all other error bits
5190  *		which are on in the afsr.
5191  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5192  *		bit is on in the afsr.
5193  */
5194 int
5195 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5196 {
5197 	uint64_t afsr_ow;
5198 
5199 	while ((afsr_ow = *ow_bits++) != 0) {
5200 		/*
5201 		 * If bit is in the priority class, check to see if another
5202 		 * bit in the same class is on => ambiguous.  Otherwise,
5203 		 * the value is valid.  If the bit is not on at this priority
5204 		 * class, but a higher priority bit is on, then the value is
5205 		 * invalid.
5206 		 */
5207 		if (afsr_ow & afsr_bit) {
5208 			/*
5209 			 * If equal pri bit is on, ambiguous.
5210 			 */
5211 			if (afsr & (afsr_ow & ~afsr_bit))
5212 				return (AFLT_STAT_AMBIGUOUS);
5213 			return (AFLT_STAT_VALID);
5214 		} else if (afsr & afsr_ow)
5215 			break;
5216 	}
5217 
5218 	/*
5219 	 * We didn't find a match or a higher priority bit was on.  Not
5220 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5221 	 */
5222 	return (AFLT_STAT_INVALID);
5223 }
5224 
5225 static int
5226 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5227 {
5228 #if defined(SERRANO)
5229 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5230 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5231 	else
5232 #endif	/* SERRANO */
5233 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5234 }
5235 
5236 static int
5237 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5238 {
5239 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5240 }
5241 
5242 static int
5243 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5244 {
5245 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5246 }
5247 
5248 static int
5249 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5250 {
5251 #ifdef lint
5252 	cpuid = cpuid;
5253 #endif
5254 #if defined(CHEETAH_PLUS)
5255 	/*
5256 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5257 	 * policy for Cheetah+ and separate for Panther CPUs.
5258 	 */
5259 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5260 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5261 			return (afsr_to_msynd_status(afsr, afsr_bit));
5262 		else
5263 			return (afsr_to_esynd_status(afsr, afsr_bit));
5264 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5265 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5266 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5267 		else
5268 			return (afsr_to_esynd_status(afsr, afsr_bit));
5269 #else /* CHEETAH_PLUS */
5270 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5271 		return (afsr_to_msynd_status(afsr, afsr_bit));
5272 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5273 		return (afsr_to_esynd_status(afsr, afsr_bit));
5274 #endif /* CHEETAH_PLUS */
5275 	} else {
5276 		return (AFLT_STAT_INVALID);
5277 	}
5278 }
5279 
5280 /*
5281  * Slave CPU stick synchronization.
5282  */
5283 void
5284 sticksync_slave(void)
5285 {
5286 	int 		i;
5287 	int		tries = 0;
5288 	int64_t		tskew;
5289 	int64_t		av_tskew;
5290 
5291 	kpreempt_disable();
5292 	/* wait for the master side */
5293 	while (stick_sync_cmd != SLAVE_START)
5294 		;
5295 	/*
5296 	 * Synchronization should only take a few tries at most. But in the
5297 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5298 	 * without it's stick synchronized wouldn't be a good citizen.
5299 	 */
5300 	while (slave_done == 0) {
5301 		/*
5302 		 * Time skew calculation.
5303 		 */
5304 		av_tskew = tskew = 0;
5305 
5306 		for (i = 0; i < stick_iter; i++) {
5307 			/* make location hot */
5308 			timestamp[EV_A_START] = 0;
5309 			stick_timestamp(&timestamp[EV_A_START]);
5310 
5311 			/* tell the master we're ready */
5312 			stick_sync_cmd = MASTER_START;
5313 
5314 			/* and wait */
5315 			while (stick_sync_cmd != SLAVE_CONT)
5316 				;
5317 			/* Event B end */
5318 			stick_timestamp(&timestamp[EV_B_END]);
5319 
5320 			/* calculate time skew */
5321 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5322 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5323 			    / 2;
5324 
5325 			/* keep running count */
5326 			av_tskew += tskew;
5327 		} /* for */
5328 
5329 		/*
5330 		 * Adjust stick for time skew if not within the max allowed;
5331 		 * otherwise we're all done.
5332 		 */
5333 		if (stick_iter != 0)
5334 			av_tskew = av_tskew/stick_iter;
5335 		if (ABS(av_tskew) > stick_tsk) {
5336 			/*
5337 			 * If the skew is 1 (the slave's STICK register
5338 			 * is 1 STICK ahead of the master's), stick_adj
5339 			 * could fail to adjust the slave's STICK register
5340 			 * if the STICK read on the slave happens to
5341 			 * align with the increment of the STICK.
5342 			 * Therefore, we increment the skew to 2.
5343 			 */
5344 			if (av_tskew == 1)
5345 				av_tskew++;
5346 			stick_adj(-av_tskew);
5347 		} else
5348 			slave_done = 1;
5349 #ifdef DEBUG
5350 		if (tries < DSYNC_ATTEMPTS)
5351 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5352 			    av_tskew;
5353 		++tries;
5354 #endif /* DEBUG */
5355 #ifdef lint
5356 		tries = tries;
5357 #endif
5358 
5359 	} /* while */
5360 
5361 	/* allow the master to finish */
5362 	stick_sync_cmd = EVENT_NULL;
5363 	kpreempt_enable();
5364 }
5365 
5366 /*
5367  * Master CPU side of stick synchronization.
5368  *  - timestamp end of Event A
5369  *  - timestamp beginning of Event B
5370  */
5371 void
5372 sticksync_master(void)
5373 {
5374 	int		i;
5375 
5376 	kpreempt_disable();
5377 	/* tell the slave we've started */
5378 	slave_done = 0;
5379 	stick_sync_cmd = SLAVE_START;
5380 
5381 	while (slave_done == 0) {
5382 		for (i = 0; i < stick_iter; i++) {
5383 			/* wait for the slave */
5384 			while (stick_sync_cmd != MASTER_START)
5385 				;
5386 			/* Event A end */
5387 			stick_timestamp(&timestamp[EV_A_END]);
5388 
5389 			/* make location hot */
5390 			timestamp[EV_B_START] = 0;
5391 			stick_timestamp(&timestamp[EV_B_START]);
5392 
5393 			/* tell the slave to continue */
5394 			stick_sync_cmd = SLAVE_CONT;
5395 		} /* for */
5396 
5397 		/* wait while slave calculates time skew */
5398 		while (stick_sync_cmd == SLAVE_CONT)
5399 			;
5400 	} /* while */
5401 	kpreempt_enable();
5402 }
5403 
5404 /*
5405  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5406  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5407  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5408  * panic idle.
5409  */
5410 /*ARGSUSED*/
5411 void
5412 cpu_check_allcpus(struct async_flt *aflt)
5413 {}
5414 
5415 struct kmem_cache *ch_private_cache;
5416 
5417 /*
5418  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5419  * deallocate the scrubber data structures and cpu_private data structure.
5420  */
5421 void
5422 cpu_uninit_private(struct cpu *cp)
5423 {
5424 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5425 
5426 	ASSERT(chprp);
5427 	cpu_uninit_ecache_scrub_dr(cp);
5428 	CPU_PRIVATE(cp) = NULL;
5429 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5430 	kmem_cache_free(ch_private_cache, chprp);
5431 	cmp_delete_cpu(cp->cpu_id);
5432 
5433 }
5434 
5435 /*
5436  * Cheetah Cache Scrubbing
5437  *
5438  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5439  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5440  * protected by either parity or ECC.
5441  *
5442  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5443  * cache per second). Due to the the specifics of how the I$ control
5444  * logic works with respect to the ASI used to scrub I$ lines, the entire
5445  * I$ is scanned at once.
5446  */
5447 
5448 /*
5449  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5450  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5451  * on a running system.
5452  */
5453 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5454 
5455 /*
5456  * The following are the PIL levels that the softints/cross traps will fire at.
5457  */
5458 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5459 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5460 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5461 
5462 #if defined(JALAPENO)
5463 
5464 /*
5465  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5466  * on Jalapeno.
5467  */
5468 int ecache_scrub_enable = 0;
5469 
5470 #else	/* JALAPENO */
5471 
5472 /*
5473  * With all other cpu types, E$ scrubbing is on by default
5474  */
5475 int ecache_scrub_enable = 1;
5476 
5477 #endif	/* JALAPENO */
5478 
5479 
5480 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5481 
5482 /*
5483  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5484  * is disabled by default on non-Cheetah systems
5485  */
5486 int icache_scrub_enable = 0;
5487 
5488 /*
5489  * Tuneables specifying the scrub calls per second and the scan rate
5490  * for each cache
5491  *
5492  * The cyclic times are set during boot based on the following values.
5493  * Changing these values in mdb after this time will have no effect.  If
5494  * a different value is desired, it must be set in /etc/system before a
5495  * reboot.
5496  */
5497 int ecache_calls_a_sec = 1;
5498 int dcache_calls_a_sec = 2;
5499 int icache_calls_a_sec = 2;
5500 
5501 int ecache_scan_rate_idle = 1;
5502 int ecache_scan_rate_busy = 1;
5503 int dcache_scan_rate_idle = 1;
5504 int dcache_scan_rate_busy = 1;
5505 int icache_scan_rate_idle = 1;
5506 int icache_scan_rate_busy = 1;
5507 
5508 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5509 
5510 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5511 
5512 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5513 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5514 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5515 
5516 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5517 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5518 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5519 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5520 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5521 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5522 
5523 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5524 
5525 /*
5526  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5527  * increment the outstanding request counter and schedule a softint to run
5528  * the scrubber.
5529  */
5530 extern xcfunc_t cache_scrubreq_tl1;
5531 
5532 /*
5533  * These are the softint functions for each cache scrubber
5534  */
5535 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5536 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5537 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5538 
5539 /*
5540  * The cache scrub info table contains cache specific information
5541  * and allows for some of the scrub code to be table driven, reducing
5542  * duplication of cache similar code.
5543  *
5544  * This table keeps a copy of the value in the calls per second variable
5545  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5546  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5547  * mdb in a misguided attempt to disable the scrubber).
5548  */
5549 struct scrub_info {
5550 	int		*csi_enable;	/* scrubber enable flag */
5551 	int		csi_freq;	/* scrubber calls per second */
5552 	int		csi_index;	/* index to chsm_outstanding[] */
5553 	uint64_t	csi_inum;	/* scrubber interrupt number */
5554 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5555 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5556 	char		csi_name[3];	/* cache name for this scrub entry */
5557 } cache_scrub_info[] = {
5558 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5559 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5560 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5561 };
5562 
5563 /*
5564  * If scrubbing is enabled, increment the outstanding request counter.  If it
5565  * is 1 (meaning there were no previous requests outstanding), call
5566  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5567  * a self trap.
5568  */
5569 static void
5570 do_scrub(struct scrub_info *csi)
5571 {
5572 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5573 	int index = csi->csi_index;
5574 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5575 
5576 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5577 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5578 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5579 			    csi->csi_inum, 0);
5580 		}
5581 	}
5582 }
5583 
5584 /*
5585  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5586  * cross-trap the offline cpus.
5587  */
5588 static void
5589 do_scrub_offline(struct scrub_info *csi)
5590 {
5591 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5592 
5593 	if (CPUSET_ISNULL(cpu_offline_set)) {
5594 		/*
5595 		 * No offline cpus - nothing to do
5596 		 */
5597 		return;
5598 	}
5599 
5600 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5601 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5602 		    csi->csi_index);
5603 	}
5604 }
5605 
5606 /*
5607  * This is the initial setup for the scrubber cyclics - it sets the
5608  * interrupt level, frequency, and function to call.
5609  */
5610 /*ARGSUSED*/
5611 static void
5612 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5613     cyc_time_t *when)
5614 {
5615 	struct scrub_info *csi = (struct scrub_info *)arg;
5616 
5617 	ASSERT(csi != NULL);
5618 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5619 	hdlr->cyh_level = CY_LOW_LEVEL;
5620 	hdlr->cyh_arg = arg;
5621 
5622 	when->cyt_when = 0;	/* Start immediately */
5623 	when->cyt_interval = NANOSEC / csi->csi_freq;
5624 }
5625 
5626 /*
5627  * Initialization for cache scrubbing.
5628  * This routine is called AFTER all cpus have had cpu_init_private called
5629  * to initialize their private data areas.
5630  */
5631 void
5632 cpu_init_cache_scrub(void)
5633 {
5634 	int i;
5635 	struct scrub_info *csi;
5636 	cyc_omni_handler_t omni_hdlr;
5637 	cyc_handler_t offline_hdlr;
5638 	cyc_time_t when;
5639 
5640 	/*
5641 	 * save away the maximum number of lines for the D$
5642 	 */
5643 	dcache_nlines = dcache_size / dcache_linesize;
5644 
5645 	/*
5646 	 * register the softints for the cache scrubbing
5647 	 */
5648 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5649 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5650 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5651 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5652 
5653 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5654 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5655 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5656 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5657 
5658 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5659 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5660 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5661 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5662 
5663 	/*
5664 	 * start the scrubbing for all the caches
5665 	 */
5666 	mutex_enter(&cpu_lock);
5667 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5668 
5669 		csi = &cache_scrub_info[i];
5670 
5671 		if (!(*csi->csi_enable))
5672 			continue;
5673 
5674 		/*
5675 		 * force the following to be true:
5676 		 *	1 <= calls_a_sec <= hz
5677 		 */
5678 		if (csi->csi_freq > hz) {
5679 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5680 			    "(%d); resetting to hz (%d)", csi->csi_name,
5681 			    csi->csi_freq, hz);
5682 			csi->csi_freq = hz;
5683 		} else if (csi->csi_freq < 1) {
5684 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5685 			    "(%d); resetting to 1", csi->csi_name,
5686 			    csi->csi_freq);
5687 			csi->csi_freq = 1;
5688 		}
5689 
5690 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5691 		omni_hdlr.cyo_offline = NULL;
5692 		omni_hdlr.cyo_arg = (void *)csi;
5693 
5694 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5695 		offline_hdlr.cyh_arg = (void *)csi;
5696 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5697 
5698 		when.cyt_when = 0;	/* Start immediately */
5699 		when.cyt_interval = NANOSEC / csi->csi_freq;
5700 
5701 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5702 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5703 	}
5704 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5705 	mutex_exit(&cpu_lock);
5706 }
5707 
5708 /*
5709  * Indicate that the specified cpu is idle.
5710  */
5711 void
5712 cpu_idle_ecache_scrub(struct cpu *cp)
5713 {
5714 	if (CPU_PRIVATE(cp) != NULL) {
5715 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5716 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5717 	}
5718 }
5719 
5720 /*
5721  * Indicate that the specified cpu is busy.
5722  */
5723 void
5724 cpu_busy_ecache_scrub(struct cpu *cp)
5725 {
5726 	if (CPU_PRIVATE(cp) != NULL) {
5727 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5728 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5729 	}
5730 }
5731 
5732 /*
5733  * Initialization for cache scrubbing for the specified cpu.
5734  */
5735 void
5736 cpu_init_ecache_scrub_dr(struct cpu *cp)
5737 {
5738 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5739 	int cpuid = cp->cpu_id;
5740 
5741 	/* initialize the number of lines in the caches */
5742 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5743 	    cpunodes[cpuid].ecache_linesize;
5744 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5745 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5746 
5747 	/*
5748 	 * do_scrub() and do_scrub_offline() check both the global
5749 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5750 	 * check this value before scrubbing.  Currently, we use it to
5751 	 * disable the E$ scrubber on multi-core cpus or while running at
5752 	 * slowed speed.  For now, just turn everything on and allow
5753 	 * cpu_init_private() to change it if necessary.
5754 	 */
5755 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5756 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5757 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5758 
5759 	cpu_busy_ecache_scrub(cp);
5760 }
5761 
5762 /*
5763  * Un-initialization for cache scrubbing for the specified cpu.
5764  */
5765 static void
5766 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5767 {
5768 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5769 
5770 	/*
5771 	 * un-initialize bookkeeping for cache scrubbing
5772 	 */
5773 	bzero(csmp, sizeof (ch_scrub_misc_t));
5774 
5775 	cpu_idle_ecache_scrub(cp);
5776 }
5777 
5778 /*
5779  * Called periodically on each CPU to scrub the D$.
5780  */
5781 static void
5782 scrub_dcache(int how_many)
5783 {
5784 	int i;
5785 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5786 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5787 
5788 	/*
5789 	 * scrub the desired number of lines
5790 	 */
5791 	for (i = 0; i < how_many; i++) {
5792 		/*
5793 		 * scrub a D$ line
5794 		 */
5795 		dcache_inval_line(index);
5796 
5797 		/*
5798 		 * calculate the next D$ line to scrub, assumes
5799 		 * that dcache_nlines is a power of 2
5800 		 */
5801 		index = (index + 1) & (dcache_nlines - 1);
5802 	}
5803 
5804 	/*
5805 	 * set the scrub index for the next visit
5806 	 */
5807 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5808 }
5809 
5810 /*
5811  * Handler for D$ scrub inum softint. Call scrub_dcache until
5812  * we decrement the outstanding request count to zero.
5813  */
5814 /*ARGSUSED*/
5815 static uint_t
5816 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5817 {
5818 	int i;
5819 	int how_many;
5820 	int outstanding;
5821 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5822 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5823 	struct scrub_info *csi = (struct scrub_info *)arg1;
5824 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5825 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5826 
5827 	/*
5828 	 * The scan rates are expressed in units of tenths of a
5829 	 * percent.  A scan rate of 1000 (100%) means the whole
5830 	 * cache is scanned every second.
5831 	 */
5832 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5833 
5834 	do {
5835 		outstanding = *countp;
5836 		for (i = 0; i < outstanding; i++) {
5837 			scrub_dcache(how_many);
5838 		}
5839 	} while (atomic_add_32_nv(countp, -outstanding));
5840 
5841 	return (DDI_INTR_CLAIMED);
5842 }
5843 
5844 /*
5845  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5846  * by invalidating lines. Due to the characteristics of the ASI which
5847  * is used to invalidate an I$ line, the entire I$ must be invalidated
5848  * vs. an individual I$ line.
5849  */
5850 static void
5851 scrub_icache(int how_many)
5852 {
5853 	int i;
5854 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5855 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5856 	int icache_nlines = csmp->chsm_icache_nlines;
5857 
5858 	/*
5859 	 * scrub the desired number of lines
5860 	 */
5861 	for (i = 0; i < how_many; i++) {
5862 		/*
5863 		 * since the entire I$ must be scrubbed at once,
5864 		 * wait until the index wraps to zero to invalidate
5865 		 * the entire I$
5866 		 */
5867 		if (index == 0) {
5868 			icache_inval_all();
5869 		}
5870 
5871 		/*
5872 		 * calculate the next I$ line to scrub, assumes
5873 		 * that chsm_icache_nlines is a power of 2
5874 		 */
5875 		index = (index + 1) & (icache_nlines - 1);
5876 	}
5877 
5878 	/*
5879 	 * set the scrub index for the next visit
5880 	 */
5881 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5882 }
5883 
5884 /*
5885  * Handler for I$ scrub inum softint. Call scrub_icache until
5886  * we decrement the outstanding request count to zero.
5887  */
5888 /*ARGSUSED*/
5889 static uint_t
5890 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5891 {
5892 	int i;
5893 	int how_many;
5894 	int outstanding;
5895 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5896 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5897 	struct scrub_info *csi = (struct scrub_info *)arg1;
5898 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5899 	    icache_scan_rate_idle : icache_scan_rate_busy;
5900 	int icache_nlines = csmp->chsm_icache_nlines;
5901 
5902 	/*
5903 	 * The scan rates are expressed in units of tenths of a
5904 	 * percent.  A scan rate of 1000 (100%) means the whole
5905 	 * cache is scanned every second.
5906 	 */
5907 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5908 
5909 	do {
5910 		outstanding = *countp;
5911 		for (i = 0; i < outstanding; i++) {
5912 			scrub_icache(how_many);
5913 		}
5914 	} while (atomic_add_32_nv(countp, -outstanding));
5915 
5916 	return (DDI_INTR_CLAIMED);
5917 }
5918 
5919 /*
5920  * Called periodically on each CPU to scrub the E$.
5921  */
5922 static void
5923 scrub_ecache(int how_many)
5924 {
5925 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5926 	int i;
5927 	int cpuid = CPU->cpu_id;
5928 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5929 	int nlines = csmp->chsm_ecache_nlines;
5930 	int linesize = cpunodes[cpuid].ecache_linesize;
5931 	int ec_set_size = cpu_ecache_set_size(CPU);
5932 
5933 	/*
5934 	 * scrub the desired number of lines
5935 	 */
5936 	for (i = 0; i < how_many; i++) {
5937 		/*
5938 		 * scrub the E$ line
5939 		 */
5940 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5941 		    ec_set_size);
5942 
5943 		/*
5944 		 * calculate the next E$ line to scrub based on twice
5945 		 * the number of E$ lines (to displace lines containing
5946 		 * flush area data), assumes that the number of lines
5947 		 * is a power of 2
5948 		 */
5949 		index = (index + 1) & ((nlines << 1) - 1);
5950 	}
5951 
5952 	/*
5953 	 * set the ecache scrub index for the next visit
5954 	 */
5955 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5956 }
5957 
5958 /*
5959  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5960  * we decrement the outstanding request count to zero.
5961  *
5962  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5963  * become negative after the atomic_add_32_nv().  This is not a problem, as
5964  * the next trip around the loop won't scrub anything, and the next add will
5965  * reset the count back to zero.
5966  */
5967 /*ARGSUSED*/
5968 static uint_t
5969 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5970 {
5971 	int i;
5972 	int how_many;
5973 	int outstanding;
5974 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5975 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5976 	struct scrub_info *csi = (struct scrub_info *)arg1;
5977 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5978 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
5979 	int ecache_nlines = csmp->chsm_ecache_nlines;
5980 
5981 	/*
5982 	 * The scan rates are expressed in units of tenths of a
5983 	 * percent.  A scan rate of 1000 (100%) means the whole
5984 	 * cache is scanned every second.
5985 	 */
5986 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5987 
5988 	do {
5989 		outstanding = *countp;
5990 		for (i = 0; i < outstanding; i++) {
5991 			scrub_ecache(how_many);
5992 		}
5993 	} while (atomic_add_32_nv(countp, -outstanding));
5994 
5995 	return (DDI_INTR_CLAIMED);
5996 }
5997 
5998 /*
5999  * Timeout function to reenable CE
6000  */
6001 static void
6002 cpu_delayed_check_ce_errors(void *arg)
6003 {
6004 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6005 	    TQ_NOSLEEP)) {
6006 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6007 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6008 	}
6009 }
6010 
6011 /*
6012  * CE Deferred Re-enable after trap.
6013  *
6014  * When the CPU gets a disrupting trap for any of the errors
6015  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6016  * immediately. To eliminate the possibility of multiple CEs causing
6017  * recursive stack overflow in the trap handler, we cannot
6018  * reenable CEEN while still running in the trap handler. Instead,
6019  * after a CE is logged on a CPU, we schedule a timeout function,
6020  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6021  * seconds. This function will check whether any further CEs
6022  * have occurred on that CPU, and if none have, will reenable CEEN.
6023  *
6024  * If further CEs have occurred while CEEN is disabled, another
6025  * timeout will be scheduled. This is to ensure that the CPU can
6026  * make progress in the face of CE 'storms', and that it does not
6027  * spend all its time logging CE errors.
6028  */
6029 static void
6030 cpu_check_ce_errors(void *arg)
6031 {
6032 	int	cpuid = (int)(uintptr_t)arg;
6033 	cpu_t	*cp;
6034 
6035 	/*
6036 	 * We acquire cpu_lock.
6037 	 */
6038 	ASSERT(curthread->t_pil == 0);
6039 
6040 	/*
6041 	 * verify that the cpu is still around, DR
6042 	 * could have got there first ...
6043 	 */
6044 	mutex_enter(&cpu_lock);
6045 	cp = cpu_get(cpuid);
6046 	if (cp == NULL) {
6047 		mutex_exit(&cpu_lock);
6048 		return;
6049 	}
6050 	/*
6051 	 * make sure we don't migrate across CPUs
6052 	 * while checking our CE status.
6053 	 */
6054 	kpreempt_disable();
6055 
6056 	/*
6057 	 * If we are running on the CPU that got the
6058 	 * CE, we can do the checks directly.
6059 	 */
6060 	if (cp->cpu_id == CPU->cpu_id) {
6061 		mutex_exit(&cpu_lock);
6062 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6063 		kpreempt_enable();
6064 		return;
6065 	}
6066 	kpreempt_enable();
6067 
6068 	/*
6069 	 * send an x-call to get the CPU that originally
6070 	 * got the CE to do the necessary checks. If we can't
6071 	 * send the x-call, reschedule the timeout, otherwise we
6072 	 * lose CEEN forever on that CPU.
6073 	 */
6074 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6075 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6076 		    TIMEOUT_CEEN_CHECK, 0);
6077 		mutex_exit(&cpu_lock);
6078 	} else {
6079 		/*
6080 		 * When the CPU is not accepting xcalls, or
6081 		 * the processor is offlined, we don't want to
6082 		 * incur the extra overhead of trying to schedule the
6083 		 * CE timeout indefinitely. However, we don't want to lose
6084 		 * CE checking forever.
6085 		 *
6086 		 * Keep rescheduling the timeout, accepting the additional
6087 		 * overhead as the cost of correctness in the case where we get
6088 		 * a CE, disable CEEN, offline the CPU during the
6089 		 * the timeout interval, and then online it at some
6090 		 * point in the future. This is unlikely given the short
6091 		 * cpu_ceen_delay_secs.
6092 		 */
6093 		mutex_exit(&cpu_lock);
6094 		(void) timeout(cpu_delayed_check_ce_errors,
6095 		    (void *)(uintptr_t)cp->cpu_id,
6096 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6097 	}
6098 }
6099 
6100 /*
6101  * This routine will check whether CEs have occurred while
6102  * CEEN is disabled. Any CEs detected will be logged and, if
6103  * possible, scrubbed.
6104  *
6105  * The memscrubber will also use this routine to clear any errors
6106  * caused by its scrubbing with CEEN disabled.
6107  *
6108  * flag == SCRUBBER_CEEN_CHECK
6109  *		called from memscrubber, just check/scrub, no reset
6110  *		paddr 	physical addr. for start of scrub pages
6111  *		vaddr 	virtual addr. for scrub area
6112  *		psz	page size of area to be scrubbed
6113  *
6114  * flag == TIMEOUT_CEEN_CHECK
6115  *		timeout function has triggered, reset timeout or CEEN
6116  *
6117  * Note: We must not migrate cpus during this function.  This can be
6118  * achieved by one of:
6119  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6120  *	The flag value must be first xcall argument.
6121  *    - disabling kernel preemption.  This should be done for very short
6122  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6123  *	scrub an extended area with cpu_check_block.  The call for
6124  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6125  *	brief for this case.
6126  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6127  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6128  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6129  */
6130 void
6131 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6132 {
6133 	ch_cpu_errors_t	cpu_error_regs;
6134 	uint64_t	ec_err_enable;
6135 	uint64_t	page_offset;
6136 
6137 	/* Read AFSR */
6138 	get_cpu_error_state(&cpu_error_regs);
6139 
6140 	/*
6141 	 * If no CEEN errors have occurred during the timeout
6142 	 * interval, it is safe to re-enable CEEN and exit.
6143 	 */
6144 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6145 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6146 		if (flag == TIMEOUT_CEEN_CHECK &&
6147 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6148 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6149 		return;
6150 	}
6151 
6152 	/*
6153 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6154 	 * we log/clear the error.
6155 	 */
6156 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6157 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6158 
6159 	/*
6160 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6161 	 * timeout will be rescheduled when the error is logged.
6162 	 */
6163 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6164 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6165 		cpu_ce_detected(&cpu_error_regs,
6166 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6167 	else
6168 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6169 
6170 	/*
6171 	 * If the memory scrubber runs while CEEN is
6172 	 * disabled, (or if CEEN is disabled during the
6173 	 * scrub as a result of a CE being triggered by
6174 	 * it), the range being scrubbed will not be
6175 	 * completely cleaned. If there are multiple CEs
6176 	 * in the range at most two of these will be dealt
6177 	 * with, (one by the trap handler and one by the
6178 	 * timeout). It is also possible that none are dealt
6179 	 * with, (CEEN disabled and another CE occurs before
6180 	 * the timeout triggers). So to ensure that the
6181 	 * memory is actually scrubbed, we have to access each
6182 	 * memory location in the range and then check whether
6183 	 * that access causes a CE.
6184 	 */
6185 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6186 		if ((cpu_error_regs.afar >= pa) &&
6187 		    (cpu_error_regs.afar < (pa + psz))) {
6188 			/*
6189 			 * Force a load from physical memory for each
6190 			 * 64-byte block, then check AFSR to determine
6191 			 * whether this access caused an error.
6192 			 *
6193 			 * This is a slow way to do a scrub, but as it will
6194 			 * only be invoked when the memory scrubber actually
6195 			 * triggered a CE, it should not happen too
6196 			 * frequently.
6197 			 *
6198 			 * cut down what we need to check as the scrubber
6199 			 * has verified up to AFAR, so get it's offset
6200 			 * into the page and start there.
6201 			 */
6202 			page_offset = (uint64_t)(cpu_error_regs.afar &
6203 			    (psz - 1));
6204 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6205 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6206 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6207 			    psz);
6208 		}
6209 	}
6210 
6211 	/*
6212 	 * Reset error enable if this CE is not masked.
6213 	 */
6214 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6215 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6216 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6217 
6218 }
6219 
6220 /*
6221  * Attempt a cpu logout for an error that we did not trap for, such
6222  * as a CE noticed with CEEN off.  It is assumed that we are still running
6223  * on the cpu that took the error and that we cannot migrate.  Returns
6224  * 0 on success, otherwise nonzero.
6225  */
6226 static int
6227 cpu_ce_delayed_ec_logout(uint64_t afar)
6228 {
6229 	ch_cpu_logout_t *clop;
6230 
6231 	if (CPU_PRIVATE(CPU) == NULL)
6232 		return (0);
6233 
6234 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6235 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6236 	    LOGOUT_INVALID)
6237 		return (0);
6238 
6239 	cpu_delayed_logout(afar, clop);
6240 	return (1);
6241 }
6242 
6243 /*
6244  * We got an error while CEEN was disabled. We
6245  * need to clean up after it and log whatever
6246  * information we have on the CE.
6247  */
6248 void
6249 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6250 {
6251 	ch_async_flt_t 	ch_flt;
6252 	struct async_flt *aflt;
6253 	char 		pr_reason[MAX_REASON_STRING];
6254 
6255 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6256 	ch_flt.flt_trapped_ce = flag;
6257 	aflt = (struct async_flt *)&ch_flt;
6258 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6259 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6260 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6261 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6262 	aflt->flt_addr = cpu_error_regs->afar;
6263 #if defined(SERRANO)
6264 	ch_flt.afar2 = cpu_error_regs->afar2;
6265 #endif	/* SERRANO */
6266 	aflt->flt_pc = NULL;
6267 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6268 	aflt->flt_tl = 0;
6269 	aflt->flt_panic = 0;
6270 	cpu_log_and_clear_ce(&ch_flt);
6271 
6272 	/*
6273 	 * check if we caused any errors during cleanup
6274 	 */
6275 	if (clear_errors(&ch_flt)) {
6276 		pr_reason[0] = '\0';
6277 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6278 		    NULL);
6279 	}
6280 }
6281 
6282 /*
6283  * Log/clear CEEN-controlled disrupting errors
6284  */
6285 static void
6286 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6287 {
6288 	struct async_flt *aflt;
6289 	uint64_t afsr, afsr_errs;
6290 	ch_cpu_logout_t *clop;
6291 	char 		pr_reason[MAX_REASON_STRING];
6292 	on_trap_data_t	*otp = curthread->t_ontrap;
6293 
6294 	aflt = (struct async_flt *)ch_flt;
6295 	afsr = aflt->flt_stat;
6296 	afsr_errs = ch_flt->afsr_errs;
6297 	aflt->flt_id = gethrtime_waitfree();
6298 	aflt->flt_bus_id = getprocessorid();
6299 	aflt->flt_inst = CPU->cpu_id;
6300 	aflt->flt_prot = AFLT_PROT_NONE;
6301 	aflt->flt_class = CPU_FAULT;
6302 	aflt->flt_status = ECC_C_TRAP;
6303 
6304 	pr_reason[0] = '\0';
6305 	/*
6306 	 * Get the CPU log out info for Disrupting Trap.
6307 	 */
6308 	if (CPU_PRIVATE(CPU) == NULL) {
6309 		clop = NULL;
6310 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6311 	} else {
6312 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6313 	}
6314 
6315 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6316 		ch_cpu_errors_t cpu_error_regs;
6317 
6318 		get_cpu_error_state(&cpu_error_regs);
6319 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6320 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6321 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6322 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6323 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6324 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6325 		clop->clo_sdw_data.chd_afsr_ext =
6326 		    cpu_error_regs.shadow_afsr_ext;
6327 #if defined(SERRANO)
6328 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6329 #endif	/* SERRANO */
6330 		ch_flt->flt_data_incomplete = 1;
6331 
6332 		/*
6333 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6334 		 * The trap handler does it for CEEN enabled errors
6335 		 * so we need to do it here.
6336 		 */
6337 		set_cpu_error_state(&cpu_error_regs);
6338 	}
6339 
6340 #if defined(JALAPENO) || defined(SERRANO)
6341 	/*
6342 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6343 	 * For Serrano, even thou we do have the AFAR, we still do the
6344 	 * scrub on the RCE side since that's where the error type can
6345 	 * be properly classified as intermittent, persistent, etc.
6346 	 *
6347 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6348 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6349 	 * the flt_status bits.
6350 	 */
6351 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6352 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6353 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6354 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6355 	}
6356 #else /* JALAPENO || SERRANO */
6357 	/*
6358 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6359 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6360 	 * the flt_status bits.
6361 	 */
6362 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6363 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6364 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6365 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6366 		}
6367 	}
6368 
6369 #endif /* JALAPENO || SERRANO */
6370 
6371 	/*
6372 	 * Update flt_prot if this error occurred under on_trap protection.
6373 	 */
6374 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6375 		aflt->flt_prot = AFLT_PROT_EC;
6376 
6377 	/*
6378 	 * Queue events on the async event queue, one event per error bit.
6379 	 */
6380 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6381 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6382 		ch_flt->flt_type = CPU_INV_AFSR;
6383 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6384 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6385 		    aflt->flt_panic);
6386 	}
6387 
6388 	/*
6389 	 * Zero out + invalidate CPU logout.
6390 	 */
6391 	if (clop) {
6392 		bzero(clop, sizeof (ch_cpu_logout_t));
6393 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6394 	}
6395 
6396 	/*
6397 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6398 	 * was disabled, we need to flush either the entire
6399 	 * E$ or an E$ line.
6400 	 */
6401 #if defined(JALAPENO) || defined(SERRANO)
6402 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6403 #else	/* JALAPENO || SERRANO */
6404 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6405 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6406 #endif	/* JALAPENO || SERRANO */
6407 		cpu_error_ecache_flush(ch_flt);
6408 
6409 }
6410 
6411 /*
6412  * depending on the error type, we determine whether we
6413  * need to flush the entire ecache or just a line.
6414  */
6415 static int
6416 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6417 {
6418 	struct async_flt *aflt;
6419 	uint64_t	afsr;
6420 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6421 
6422 	aflt = (struct async_flt *)ch_flt;
6423 	afsr = aflt->flt_stat;
6424 
6425 	/*
6426 	 * If we got multiple errors, no point in trying
6427 	 * the individual cases, just flush the whole cache
6428 	 */
6429 	if (afsr & C_AFSR_ME) {
6430 		return (ECACHE_FLUSH_ALL);
6431 	}
6432 
6433 	/*
6434 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6435 	 * was disabled, we need to flush entire E$. We can't just
6436 	 * flush the cache line affected as the ME bit
6437 	 * is not set when multiple correctable errors of the same
6438 	 * type occur, so we might have multiple CPC or EDC errors,
6439 	 * with only the first recorded.
6440 	 */
6441 #if defined(JALAPENO) || defined(SERRANO)
6442 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6443 #else	/* JALAPENO || SERRANO */
6444 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6445 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6446 #endif	/* JALAPENO || SERRANO */
6447 		return (ECACHE_FLUSH_ALL);
6448 	}
6449 
6450 #if defined(JALAPENO) || defined(SERRANO)
6451 	/*
6452 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6453 	 * flush the entire Ecache.
6454 	 */
6455 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6456 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6457 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6458 			return (ECACHE_FLUSH_LINE);
6459 		} else {
6460 			return (ECACHE_FLUSH_ALL);
6461 		}
6462 	}
6463 #else /* JALAPENO || SERRANO */
6464 	/*
6465 	 * If UE only is set, flush the Ecache line, otherwise
6466 	 * flush the entire Ecache.
6467 	 */
6468 	if (afsr_errs & C_AFSR_UE) {
6469 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6470 		    C_AFSR_UE) {
6471 			return (ECACHE_FLUSH_LINE);
6472 		} else {
6473 			return (ECACHE_FLUSH_ALL);
6474 		}
6475 	}
6476 #endif /* JALAPENO || SERRANO */
6477 
6478 	/*
6479 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6480 	 * flush the entire Ecache.
6481 	 */
6482 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6483 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6484 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6485 			return (ECACHE_FLUSH_LINE);
6486 		} else {
6487 			return (ECACHE_FLUSH_ALL);
6488 		}
6489 	}
6490 
6491 	/*
6492 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6493 	 * flush the entire Ecache.
6494 	 */
6495 	if (afsr_errs & C_AFSR_BERR) {
6496 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6497 			return (ECACHE_FLUSH_LINE);
6498 		} else {
6499 			return (ECACHE_FLUSH_ALL);
6500 		}
6501 	}
6502 
6503 	return (0);
6504 }
6505 
6506 void
6507 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6508 {
6509 	int	ecache_flush_flag =
6510 	    cpu_error_ecache_flush_required(ch_flt);
6511 
6512 	/*
6513 	 * Flush Ecache line or entire Ecache based on above checks.
6514 	 */
6515 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6516 		cpu_flush_ecache();
6517 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6518 		cpu_flush_ecache_line(ch_flt);
6519 	}
6520 
6521 }
6522 
6523 /*
6524  * Extract the PA portion from the E$ tag.
6525  */
6526 uint64_t
6527 cpu_ectag_to_pa(int setsize, uint64_t tag)
6528 {
6529 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6530 		return (JG_ECTAG_TO_PA(setsize, tag));
6531 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6532 		return (PN_L3TAG_TO_PA(tag));
6533 	else
6534 		return (CH_ECTAG_TO_PA(setsize, tag));
6535 }
6536 
6537 /*
6538  * Convert the E$ tag PA into an E$ subblock index.
6539  */
6540 int
6541 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6542 {
6543 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6544 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6545 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6546 		/* Panther has only one subblock per line */
6547 		return (0);
6548 	else
6549 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6550 }
6551 
6552 /*
6553  * All subblocks in an E$ line must be invalid for
6554  * the line to be invalid.
6555  */
6556 int
6557 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6558 {
6559 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6560 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6561 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6562 		return (PN_L3_LINE_INVALID(tag));
6563 	else
6564 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6565 }
6566 
6567 /*
6568  * Extract state bits for a subblock given the tag.  Note that for Panther
6569  * this works on both l2 and l3 tags.
6570  */
6571 int
6572 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6573 {
6574 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6575 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6576 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6577 		return (tag & CH_ECSTATE_MASK);
6578 	else
6579 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6580 }
6581 
6582 /*
6583  * Cpu specific initialization.
6584  */
6585 void
6586 cpu_mp_init(void)
6587 {
6588 #ifdef	CHEETAHPLUS_ERRATUM_25
6589 	if (cheetah_sendmondo_recover) {
6590 		cheetah_nudge_init();
6591 	}
6592 #endif
6593 }
6594 
6595 void
6596 cpu_ereport_post(struct async_flt *aflt)
6597 {
6598 	char *cpu_type, buf[FM_MAX_CLASS];
6599 	nv_alloc_t *nva = NULL;
6600 	nvlist_t *ereport, *detector, *resource;
6601 	errorq_elem_t *eqep;
6602 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6603 	char unum[UNUM_NAMLEN];
6604 	int synd_code;
6605 	uint8_t msg_type;
6606 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6607 
6608 	if (aflt->flt_panic || panicstr) {
6609 		eqep = errorq_reserve(ereport_errorq);
6610 		if (eqep == NULL)
6611 			return;
6612 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6613 		nva = errorq_elem_nva(ereport_errorq, eqep);
6614 	} else {
6615 		ereport = fm_nvlist_create(nva);
6616 	}
6617 
6618 	/*
6619 	 * Create the scheme "cpu" FMRI.
6620 	 */
6621 	detector = fm_nvlist_create(nva);
6622 	resource = fm_nvlist_create(nva);
6623 	switch (cpunodes[aflt->flt_inst].implementation) {
6624 	case CHEETAH_IMPL:
6625 		cpu_type = FM_EREPORT_CPU_USIII;
6626 		break;
6627 	case CHEETAH_PLUS_IMPL:
6628 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6629 		break;
6630 	case JALAPENO_IMPL:
6631 		cpu_type = FM_EREPORT_CPU_USIIIi;
6632 		break;
6633 	case SERRANO_IMPL:
6634 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6635 		break;
6636 	case JAGUAR_IMPL:
6637 		cpu_type = FM_EREPORT_CPU_USIV;
6638 		break;
6639 	case PANTHER_IMPL:
6640 		cpu_type = FM_EREPORT_CPU_USIVplus;
6641 		break;
6642 	default:
6643 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6644 		break;
6645 	}
6646 
6647 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6648 
6649 	/*
6650 	 * Encode all the common data into the ereport.
6651 	 */
6652 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6653 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6654 
6655 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6656 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6657 	    detector, NULL);
6658 
6659 	/*
6660 	 * Encode the error specific data that was saved in
6661 	 * the async_flt structure into the ereport.
6662 	 */
6663 	cpu_payload_add_aflt(aflt, ereport, resource,
6664 	    &plat_ecc_ch_flt.ecaf_afar_status,
6665 	    &plat_ecc_ch_flt.ecaf_synd_status);
6666 
6667 	if (aflt->flt_panic || panicstr) {
6668 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6669 	} else {
6670 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6671 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6672 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6673 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6674 	}
6675 	/*
6676 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6677 	 * to the SC olny if it can process it.
6678 	 */
6679 
6680 	if (&plat_ecc_capability_sc_get &&
6681 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6682 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6683 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6684 			/*
6685 			 * If afar status is not invalid do a unum lookup.
6686 			 */
6687 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6688 			    AFLT_STAT_INVALID) {
6689 				synd_code = synd_to_synd_code(
6690 				    plat_ecc_ch_flt.ecaf_synd_status,
6691 				    aflt->flt_synd, ch_flt->flt_bit);
6692 				(void) cpu_get_mem_unum_synd(synd_code,
6693 				    aflt, unum);
6694 			} else {
6695 				unum[0] = '\0';
6696 			}
6697 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6698 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6699 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6700 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6701 			    ch_flt->flt_sdw_afsr_ext;
6702 
6703 			if (&plat_log_fruid_error2)
6704 				plat_log_fruid_error2(msg_type, unum, aflt,
6705 				    &plat_ecc_ch_flt);
6706 		}
6707 	}
6708 }
6709 
6710 void
6711 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6712 {
6713 	int status;
6714 	ddi_fm_error_t de;
6715 
6716 	bzero(&de, sizeof (ddi_fm_error_t));
6717 
6718 	de.fme_version = DDI_FME_VERSION;
6719 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6720 	    FM_ENA_FMT1);
6721 	de.fme_flag = expected;
6722 	de.fme_bus_specific = (void *)aflt->flt_addr;
6723 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6724 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6725 		aflt->flt_panic = 1;
6726 }
6727 
6728 void
6729 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6730     errorq_t *eqp, uint_t flag)
6731 {
6732 	struct async_flt *aflt = (struct async_flt *)payload;
6733 
6734 	aflt->flt_erpt_class = error_class;
6735 	errorq_dispatch(eqp, payload, payload_sz, flag);
6736 }
6737 
6738 /*
6739  * This routine may be called by the IO module, but does not do
6740  * anything in this cpu module. The SERD algorithm is handled by
6741  * cpumem-diagnosis engine instead.
6742  */
6743 /*ARGSUSED*/
6744 void
6745 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6746 {}
6747 
6748 void
6749 adjust_hw_copy_limits(int ecache_size)
6750 {
6751 	/*
6752 	 * Set hw copy limits.
6753 	 *
6754 	 * /etc/system will be parsed later and can override one or more
6755 	 * of these settings.
6756 	 *
6757 	 * At this time, ecache size seems only mildly relevant.
6758 	 * We seem to run into issues with the d-cache and stalls
6759 	 * we see on misses.
6760 	 *
6761 	 * Cycle measurement indicates that 2 byte aligned copies fare
6762 	 * little better than doing things with VIS at around 512 bytes.
6763 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6764 	 * aligned is faster whenever the source and destination data
6765 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6766 	 * limit seems to be driven by the 2K write cache.
6767 	 * When more than 2K of copies are done in non-VIS mode, stores
6768 	 * backup in the write cache.  In VIS mode, the write cache is
6769 	 * bypassed, allowing faster cache-line writes aligned on cache
6770 	 * boundaries.
6771 	 *
6772 	 * In addition, in non-VIS mode, there is no prefetching, so
6773 	 * for larger copies, the advantage of prefetching to avoid even
6774 	 * occasional cache misses is enough to justify using the VIS code.
6775 	 *
6776 	 * During testing, it was discovered that netbench ran 3% slower
6777 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6778 	 * applications, data is only used once (copied to the output
6779 	 * buffer, then copied by the network device off the system).  Using
6780 	 * the VIS copy saves more L2 cache state.  Network copies are
6781 	 * around 1.3K to 1.5K in size for historical reasons.
6782 	 *
6783 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6784 	 * aligned copy even for large caches and 8 MB ecache.  The
6785 	 * infrastructure to allow different limits for different sized
6786 	 * caches is kept to allow further tuning in later releases.
6787 	 */
6788 
6789 	if (min_ecache_size == 0 && use_hw_bcopy) {
6790 		/*
6791 		 * First time through - should be before /etc/system
6792 		 * is read.
6793 		 * Could skip the checks for zero but this lets us
6794 		 * preserve any debugger rewrites.
6795 		 */
6796 		if (hw_copy_limit_1 == 0) {
6797 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6798 			priv_hcl_1 = hw_copy_limit_1;
6799 		}
6800 		if (hw_copy_limit_2 == 0) {
6801 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6802 			priv_hcl_2 = hw_copy_limit_2;
6803 		}
6804 		if (hw_copy_limit_4 == 0) {
6805 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6806 			priv_hcl_4 = hw_copy_limit_4;
6807 		}
6808 		if (hw_copy_limit_8 == 0) {
6809 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6810 			priv_hcl_8 = hw_copy_limit_8;
6811 		}
6812 		min_ecache_size = ecache_size;
6813 	} else {
6814 		/*
6815 		 * MP initialization. Called *after* /etc/system has
6816 		 * been parsed. One CPU has already been initialized.
6817 		 * Need to cater for /etc/system having scragged one
6818 		 * of our values.
6819 		 */
6820 		if (ecache_size == min_ecache_size) {
6821 			/*
6822 			 * Same size ecache. We do nothing unless we
6823 			 * have a pessimistic ecache setting. In that
6824 			 * case we become more optimistic (if the cache is
6825 			 * large enough).
6826 			 */
6827 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6828 				/*
6829 				 * Need to adjust hw_copy_limit* from our
6830 				 * pessimistic uniprocessor value to a more
6831 				 * optimistic UP value *iff* it hasn't been
6832 				 * reset.
6833 				 */
6834 				if ((ecache_size > 1048576) &&
6835 				    (priv_hcl_8 == hw_copy_limit_8)) {
6836 					if (ecache_size <= 2097152)
6837 						hw_copy_limit_8 = 4 *
6838 						    VIS_COPY_THRESHOLD;
6839 					else if (ecache_size <= 4194304)
6840 						hw_copy_limit_8 = 4 *
6841 						    VIS_COPY_THRESHOLD;
6842 					else
6843 						hw_copy_limit_8 = 4 *
6844 						    VIS_COPY_THRESHOLD;
6845 					priv_hcl_8 = hw_copy_limit_8;
6846 				}
6847 			}
6848 		} else if (ecache_size < min_ecache_size) {
6849 			/*
6850 			 * A different ecache size. Can this even happen?
6851 			 */
6852 			if (priv_hcl_8 == hw_copy_limit_8) {
6853 				/*
6854 				 * The previous value that we set
6855 				 * is unchanged (i.e., it hasn't been
6856 				 * scragged by /etc/system). Rewrite it.
6857 				 */
6858 				if (ecache_size <= 1048576)
6859 					hw_copy_limit_8 = 8 *
6860 					    VIS_COPY_THRESHOLD;
6861 				else if (ecache_size <= 2097152)
6862 					hw_copy_limit_8 = 8 *
6863 					    VIS_COPY_THRESHOLD;
6864 				else if (ecache_size <= 4194304)
6865 					hw_copy_limit_8 = 8 *
6866 					    VIS_COPY_THRESHOLD;
6867 				else
6868 					hw_copy_limit_8 = 10 *
6869 					    VIS_COPY_THRESHOLD;
6870 				priv_hcl_8 = hw_copy_limit_8;
6871 				min_ecache_size = ecache_size;
6872 			}
6873 		}
6874 	}
6875 }
6876 
6877 /*
6878  * Called from illegal instruction trap handler to see if we can attribute
6879  * the trap to a fpras check.
6880  */
6881 int
6882 fpras_chktrap(struct regs *rp)
6883 {
6884 	int op;
6885 	struct fpras_chkfngrp *cgp;
6886 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6887 
6888 	if (fpras_chkfngrps == NULL)
6889 		return (0);
6890 
6891 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6892 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6893 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6894 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6895 			break;
6896 	}
6897 	if (op == FPRAS_NCOPYOPS)
6898 		return (0);
6899 
6900 	/*
6901 	 * This is an fpRAS failure caught through an illegal
6902 	 * instruction - trampoline.
6903 	 */
6904 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6905 	rp->r_npc = rp->r_pc + 4;
6906 	return (1);
6907 }
6908 
6909 /*
6910  * fpras_failure is called when a fpras check detects a bad calculation
6911  * result or an illegal instruction trap is attributed to an fpras
6912  * check.  In all cases we are still bound to CPU.
6913  */
6914 int
6915 fpras_failure(int op, int how)
6916 {
6917 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6918 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6919 	ch_async_flt_t ch_flt;
6920 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6921 	struct fpras_chkfn *sfp, *cfp;
6922 	uint32_t *sip, *cip;
6923 	int i;
6924 
6925 	/*
6926 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6927 	 * the time in which we dispatch an ereport and (if applicable) panic.
6928 	 */
6929 	use_hw_bcopy_orig = use_hw_bcopy;
6930 	use_hw_bzero_orig = use_hw_bzero;
6931 	hcl1_orig = hw_copy_limit_1;
6932 	hcl2_orig = hw_copy_limit_2;
6933 	hcl4_orig = hw_copy_limit_4;
6934 	hcl8_orig = hw_copy_limit_8;
6935 	use_hw_bcopy = use_hw_bzero = 0;
6936 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6937 	    hw_copy_limit_8 = 0;
6938 
6939 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6940 	aflt->flt_id = gethrtime_waitfree();
6941 	aflt->flt_class = CPU_FAULT;
6942 	aflt->flt_inst = CPU->cpu_id;
6943 	aflt->flt_status = (how << 8) | op;
6944 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6945 	ch_flt.flt_type = CPU_FPUERR;
6946 
6947 	/*
6948 	 * We must panic if the copy operation had no lofault protection -
6949 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6950 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6951 	 */
6952 	aflt->flt_panic = (curthread->t_lofault == NULL);
6953 
6954 	/*
6955 	 * XOR the source instruction block with the copied instruction
6956 	 * block - this will show us which bit(s) are corrupted.
6957 	 */
6958 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6959 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6960 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6961 		sip = &sfp->fpras_blk0[0];
6962 		cip = &cfp->fpras_blk0[0];
6963 	} else {
6964 		sip = &sfp->fpras_blk1[0];
6965 		cip = &cfp->fpras_blk1[0];
6966 	}
6967 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6968 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6969 
6970 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6971 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6972 
6973 	if (aflt->flt_panic)
6974 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6975 
6976 	/*
6977 	 * We get here for copyin/copyout and kcopy or bcopy where the
6978 	 * caller has used on_fault.  We will flag the error so that
6979 	 * the process may be killed  The trap_async_hwerr mechanism will
6980 	 * take appropriate further action (such as a reboot, contract
6981 	 * notification etc).  Since we may be continuing we will
6982 	 * restore the global hardware copy acceleration switches.
6983 	 *
6984 	 * When we return from this function to the copy function we want to
6985 	 * avoid potentially bad data being used, ie we want the affected
6986 	 * copy function to return an error.  The caller should therefore
6987 	 * invoke its lofault handler (which always exists for these functions)
6988 	 * which will return the appropriate error.
6989 	 */
6990 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6991 	aston(curthread);
6992 
6993 	use_hw_bcopy = use_hw_bcopy_orig;
6994 	use_hw_bzero = use_hw_bzero_orig;
6995 	hw_copy_limit_1 = hcl1_orig;
6996 	hw_copy_limit_2 = hcl2_orig;
6997 	hw_copy_limit_4 = hcl4_orig;
6998 	hw_copy_limit_8 = hcl8_orig;
6999 
7000 	return (1);
7001 }
7002 
7003 #define	VIS_BLOCKSIZE		64
7004 
7005 int
7006 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7007 {
7008 	int ret, watched;
7009 
7010 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7011 	ret = dtrace_blksuword32(addr, data, 0);
7012 	if (watched)
7013 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7014 
7015 	return (ret);
7016 }
7017 
7018 /*
7019  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7020  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7021  * CEEN from the EER to disable traps for further disrupting error types
7022  * on that cpu.  We could cross-call instead, but that has a larger
7023  * instruction and data footprint than cross-trapping, and the cpu is known
7024  * to be faulted.
7025  */
7026 
7027 void
7028 cpu_faulted_enter(struct cpu *cp)
7029 {
7030 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7031 }
7032 
7033 /*
7034  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7035  * offline, spare, or online (by the cpu requesting this state change).
7036  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7037  * disrupting error bits that have accumulated without trapping, then
7038  * we cross-trap to re-enable CEEN controlled traps.
7039  */
7040 void
7041 cpu_faulted_exit(struct cpu *cp)
7042 {
7043 	ch_cpu_errors_t cpu_error_regs;
7044 
7045 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7046 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7047 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7048 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7049 	    (uint64_t)&cpu_error_regs, 0);
7050 
7051 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7052 }
7053 
7054 /*
7055  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7056  * the errors in the original AFSR, 0 otherwise.
7057  *
7058  * For all procs if the initial error was a BERR or TO, then it is possible
7059  * that we may have caused a secondary BERR or TO in the process of logging the
7060  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7061  * if the request was protected then a panic is still not necessary, if not
7062  * protected then aft_panic is already set - so either way there's no need
7063  * to set aft_panic for the secondary error.
7064  *
7065  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7066  * a store merge, then the error handling code will call cpu_deferred_error().
7067  * When clear_errors() is called, it will determine that secondary errors have
7068  * occurred - in particular, the store merge also caused a EDU and WDU that
7069  * weren't discovered until this point.
7070  *
7071  * We do three checks to verify that we are in this case.  If we pass all three
7072  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7073  * errors occur, we return 0.
7074  *
7075  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7076  * handled in cpu_disrupting_errors().  Since this function is not even called
7077  * in the case we are interested in, we just return 0 for these processors.
7078  */
7079 /*ARGSUSED*/
7080 static int
7081 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7082     uint64_t t_afar)
7083 {
7084 #if defined(CHEETAH_PLUS)
7085 #else	/* CHEETAH_PLUS */
7086 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7087 #endif	/* CHEETAH_PLUS */
7088 
7089 	/*
7090 	 * Was the original error a BERR or TO and only a BERR or TO
7091 	 * (multiple errors are also OK)
7092 	 */
7093 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7094 		/*
7095 		 * Is the new error a BERR or TO and only a BERR or TO
7096 		 * (multiple errors are also OK)
7097 		 */
7098 		if ((ch_flt->afsr_errs &
7099 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7100 			return (1);
7101 	}
7102 
7103 #if defined(CHEETAH_PLUS)
7104 	return (0);
7105 #else	/* CHEETAH_PLUS */
7106 	/*
7107 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7108 	 *
7109 	 * Check the original error was a UE, and only a UE.  Note that
7110 	 * the ME bit will cause us to fail this check.
7111 	 */
7112 	if (t_afsr_errs != C_AFSR_UE)
7113 		return (0);
7114 
7115 	/*
7116 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7117 	 */
7118 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7119 		return (0);
7120 
7121 	/*
7122 	 * Check the AFAR of the original error and secondary errors
7123 	 * match to the 64-byte boundary
7124 	 */
7125 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7126 		return (0);
7127 
7128 	/*
7129 	 * We've passed all the checks, so it's a secondary error!
7130 	 */
7131 	return (1);
7132 #endif	/* CHEETAH_PLUS */
7133 }
7134 
7135 /*
7136  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7137  * is checked for any valid errors.  If found, the error type is
7138  * returned. If not found, the flt_type is checked for L1$ parity errors.
7139  */
7140 /*ARGSUSED*/
7141 static uint8_t
7142 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7143 {
7144 #if defined(JALAPENO)
7145 	/*
7146 	 * Currently, logging errors to the SC is not supported on Jalapeno
7147 	 */
7148 	return (PLAT_ECC_ERROR2_NONE);
7149 #else
7150 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7151 
7152 	switch (ch_flt->flt_bit) {
7153 	case C_AFSR_CE:
7154 		return (PLAT_ECC_ERROR2_CE);
7155 	case C_AFSR_UCC:
7156 	case C_AFSR_EDC:
7157 	case C_AFSR_WDC:
7158 	case C_AFSR_CPC:
7159 		return (PLAT_ECC_ERROR2_L2_CE);
7160 	case C_AFSR_EMC:
7161 		return (PLAT_ECC_ERROR2_EMC);
7162 	case C_AFSR_IVC:
7163 		return (PLAT_ECC_ERROR2_IVC);
7164 	case C_AFSR_UE:
7165 		return (PLAT_ECC_ERROR2_UE);
7166 	case C_AFSR_UCU:
7167 	case C_AFSR_EDU:
7168 	case C_AFSR_WDU:
7169 	case C_AFSR_CPU:
7170 		return (PLAT_ECC_ERROR2_L2_UE);
7171 	case C_AFSR_IVU:
7172 		return (PLAT_ECC_ERROR2_IVU);
7173 	case C_AFSR_TO:
7174 		return (PLAT_ECC_ERROR2_TO);
7175 	case C_AFSR_BERR:
7176 		return (PLAT_ECC_ERROR2_BERR);
7177 #if defined(CHEETAH_PLUS)
7178 	case C_AFSR_L3_EDC:
7179 	case C_AFSR_L3_UCC:
7180 	case C_AFSR_L3_CPC:
7181 	case C_AFSR_L3_WDC:
7182 		return (PLAT_ECC_ERROR2_L3_CE);
7183 	case C_AFSR_IMC:
7184 		return (PLAT_ECC_ERROR2_IMC);
7185 	case C_AFSR_TSCE:
7186 		return (PLAT_ECC_ERROR2_L2_TSCE);
7187 	case C_AFSR_THCE:
7188 		return (PLAT_ECC_ERROR2_L2_THCE);
7189 	case C_AFSR_L3_MECC:
7190 		return (PLAT_ECC_ERROR2_L3_MECC);
7191 	case C_AFSR_L3_THCE:
7192 		return (PLAT_ECC_ERROR2_L3_THCE);
7193 	case C_AFSR_L3_CPU:
7194 	case C_AFSR_L3_EDU:
7195 	case C_AFSR_L3_UCU:
7196 	case C_AFSR_L3_WDU:
7197 		return (PLAT_ECC_ERROR2_L3_UE);
7198 	case C_AFSR_DUE:
7199 		return (PLAT_ECC_ERROR2_DUE);
7200 	case C_AFSR_DTO:
7201 		return (PLAT_ECC_ERROR2_DTO);
7202 	case C_AFSR_DBERR:
7203 		return (PLAT_ECC_ERROR2_DBERR);
7204 #endif	/* CHEETAH_PLUS */
7205 	default:
7206 		switch (ch_flt->flt_type) {
7207 #if defined(CPU_IMP_L1_CACHE_PARITY)
7208 		case CPU_IC_PARITY:
7209 			return (PLAT_ECC_ERROR2_IPE);
7210 		case CPU_DC_PARITY:
7211 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7212 				if (ch_flt->parity_data.dpe.cpl_cache ==
7213 				    CPU_PC_PARITY) {
7214 					return (PLAT_ECC_ERROR2_PCACHE);
7215 				}
7216 			}
7217 			return (PLAT_ECC_ERROR2_DPE);
7218 #endif /* CPU_IMP_L1_CACHE_PARITY */
7219 		case CPU_ITLB_PARITY:
7220 			return (PLAT_ECC_ERROR2_ITLB);
7221 		case CPU_DTLB_PARITY:
7222 			return (PLAT_ECC_ERROR2_DTLB);
7223 		default:
7224 			return (PLAT_ECC_ERROR2_NONE);
7225 		}
7226 	}
7227 #endif	/* JALAPENO */
7228 }
7229