xref: /titanic_51/usr/src/uts/sun4u/cpu/us3_common.c (revision 6f45ec7b0b964c3be967c4880e8867ac1e7763a5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 #include <sys/pghw.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
120     uint64_t t_afsr_bit);
121 static int clear_ecc(struct async_flt *ecc);
122 #if defined(CPU_IMP_ECACHE_ASSOC)
123 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
124 #endif
125 static int cpu_ecache_set_size(struct cpu *cp);
126 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
128 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
129 static int cpu_ectag_pa_to_subblk_state(int cachesize,
130 				uint64_t subaddr, uint64_t tag);
131 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
132 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
134 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
136 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
137 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
138 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
139 static void cpu_scrubphys(struct async_flt *aflt);
140 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
141     int *, int *);
142 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
143 static void cpu_ereport_init(struct async_flt *aflt);
144 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
145 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
146 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
147     uint64_t nceen, ch_cpu_logout_t *clop);
148 static int cpu_ce_delayed_ec_logout(uint64_t);
149 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
150 static int cpu_error_is_ecache_data(int, uint64_t);
151 static void cpu_fmri_cpu_set(nvlist_t *, int);
152 static int cpu_error_to_resource_type(struct async_flt *aflt);
153 
154 #ifdef	CHEETAHPLUS_ERRATUM_25
155 static int mondo_recover_proc(uint16_t, int);
156 static void cheetah_nudge_init(void);
157 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
158     cyc_time_t *when);
159 static void cheetah_nudge_buddy(void);
160 #endif	/* CHEETAHPLUS_ERRATUM_25 */
161 
162 #if defined(CPU_IMP_L1_CACHE_PARITY)
163 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
166     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
167 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
168 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
169 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
170 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
171 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
172 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
173 #endif	/* CPU_IMP_L1_CACHE_PARITY */
174 
175 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
176     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
177     int *segsp, int *banksp, int *mcidp);
178 
179 /*
180  * This table is used to determine which bit(s) is(are) bad when an ECC
181  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
182  * of this array have the following semantics:
183  *
184  *      00-127  The number of the bad bit, when only one bit is bad.
185  *      128     ECC bit C0 is bad.
186  *      129     ECC bit C1 is bad.
187  *      130     ECC bit C2 is bad.
188  *      131     ECC bit C3 is bad.
189  *      132     ECC bit C4 is bad.
190  *      133     ECC bit C5 is bad.
191  *      134     ECC bit C6 is bad.
192  *      135     ECC bit C7 is bad.
193  *      136     ECC bit C8 is bad.
194  *	137-143 reserved for Mtag Data and ECC.
195  *      144(M2) Two bits are bad within a nibble.
196  *      145(M3) Three bits are bad within a nibble.
197  *      146(M3) Four bits are bad within a nibble.
198  *      147(M)  Multiple bits (5 or more) are bad.
199  *      148     NO bits are bad.
200  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
201  */
202 
203 #define	C0	128
204 #define	C1	129
205 #define	C2	130
206 #define	C3	131
207 #define	C4	132
208 #define	C5	133
209 #define	C6	134
210 #define	C7	135
211 #define	C8	136
212 #define	MT0	137	/* Mtag Data bit 0 */
213 #define	MT1	138
214 #define	MT2	139
215 #define	MTC0	140	/* Mtag Check bit 0 */
216 #define	MTC1	141
217 #define	MTC2	142
218 #define	MTC3	143
219 #define	M2	144
220 #define	M3	145
221 #define	M4	146
222 #define	M	147
223 #define	NA	148
224 #if defined(JALAPENO) || defined(SERRANO)
225 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
226 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
227 #define	SLAST	S003MEM	/* last special syndrome */
228 #else /* JALAPENO || SERRANO */
229 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
230 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
231 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
232 #define	SLAST	S11C	/* last special syndrome */
233 #endif /* JALAPENO || SERRANO */
234 #if defined(JALAPENO) || defined(SERRANO)
235 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
236 #define	BPAR15	167
237 #endif	/* JALAPENO || SERRANO */
238 
239 static uint8_t ecc_syndrome_tab[] =
240 {
241 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
242 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
243 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
244 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
245 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
246 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
247 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
248 #if defined(JALAPENO) || defined(SERRANO)
249 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
250 #else	/* JALAPENO || SERRANO */
251 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
252 #endif	/* JALAPENO || SERRANO */
253 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
254 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
255 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
256 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
257 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
258 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
259 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
260 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
261 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
262 #if defined(JALAPENO) || defined(SERRANO)
263 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
264 #else	/* JALAPENO || SERRANO */
265 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
266 #endif	/* JALAPENO || SERRANO */
267 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
268 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
269 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
270 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
271 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
272 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
273 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
274 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
275 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
276 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
277 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
278 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
279 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
280 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
281 };
282 
283 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
284 
285 #if !(defined(JALAPENO) || defined(SERRANO))
286 /*
287  * This table is used to determine which bit(s) is(are) bad when a Mtag
288  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
289  * of this array have the following semantics:
290  *
291  *      -1	Invalid mtag syndrome.
292  *      137     Mtag Data 0 is bad.
293  *      138     Mtag Data 1 is bad.
294  *      139     Mtag Data 2 is bad.
295  *      140     Mtag ECC 0 is bad.
296  *      141     Mtag ECC 1 is bad.
297  *      142     Mtag ECC 2 is bad.
298  *      143     Mtag ECC 3 is bad.
299  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
300  */
301 short mtag_syndrome_tab[] =
302 {
303 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
304 };
305 
306 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
307 
308 #else /* !(JALAPENO || SERRANO) */
309 
310 #define	BSYND_TBL_SIZE	16
311 
312 #endif /* !(JALAPENO || SERRANO) */
313 
314 /*
315  * Types returned from cpu_error_to_resource_type()
316  */
317 #define	ERRTYPE_UNKNOWN		0
318 #define	ERRTYPE_CPU		1
319 #define	ERRTYPE_MEMORY		2
320 #define	ERRTYPE_ECACHE_DATA	3
321 
322 /*
323  * CE initial classification and subsequent action lookup table
324  */
325 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
326 static int ce_disp_inited;
327 
328 /*
329  * Set to disable leaky and partner check for memory correctables
330  */
331 int ce_xdiag_off;
332 
333 /*
334  * The following are not incremented atomically so are indicative only
335  */
336 static int ce_xdiag_drops;
337 static int ce_xdiag_lkydrops;
338 static int ce_xdiag_ptnrdrops;
339 static int ce_xdiag_bad;
340 
341 /*
342  * CE leaky check callback structure
343  */
344 typedef struct {
345 	struct async_flt *lkycb_aflt;
346 	errorq_t *lkycb_eqp;
347 	errorq_elem_t *lkycb_eqep;
348 } ce_lkychk_cb_t;
349 
350 /*
351  * defines for various ecache_flush_flag's
352  */
353 #define	ECACHE_FLUSH_LINE	1
354 #define	ECACHE_FLUSH_ALL	2
355 
356 /*
357  * STICK sync
358  */
359 #define	STICK_ITERATION 10
360 #define	MAX_TSKEW	1
361 #define	EV_A_START	0
362 #define	EV_A_END	1
363 #define	EV_B_START	2
364 #define	EV_B_END	3
365 #define	EVENTS		4
366 
367 static int64_t stick_iter = STICK_ITERATION;
368 static int64_t stick_tsk = MAX_TSKEW;
369 
370 typedef enum {
371 	EVENT_NULL = 0,
372 	SLAVE_START,
373 	SLAVE_CONT,
374 	MASTER_START
375 } event_cmd_t;
376 
377 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
378 static int64_t timestamp[EVENTS];
379 static volatile int slave_done;
380 
381 #ifdef DEBUG
382 #define	DSYNC_ATTEMPTS 64
383 typedef struct {
384 	int64_t	skew_val[DSYNC_ATTEMPTS];
385 } ss_t;
386 
387 ss_t stick_sync_stats[NCPU];
388 #endif /* DEBUG */
389 
390 uint_t cpu_impl_dual_pgsz = 0;
391 #if defined(CPU_IMP_DUAL_PAGESIZE)
392 uint_t disable_dual_pgsz = 0;
393 #endif	/* CPU_IMP_DUAL_PAGESIZE */
394 
395 /*
396  * Save the cache bootup state for use when internal
397  * caches are to be re-enabled after an error occurs.
398  */
399 uint64_t cache_boot_state;
400 
401 /*
402  * PA[22:0] represent Displacement in Safari configuration space.
403  */
404 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
405 
406 bus_config_eclk_t bus_config_eclk[] = {
407 #if defined(JALAPENO) || defined(SERRANO)
408 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
409 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
410 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
411 #else /* JALAPENO || SERRANO */
412 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
413 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
414 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
415 #endif /* JALAPENO || SERRANO */
416 	{0, 0}
417 };
418 
419 /*
420  * Interval for deferred CEEN reenable
421  */
422 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
423 
424 /*
425  * set in /etc/system to control logging of user BERR/TO's
426  */
427 int cpu_berr_to_verbose = 0;
428 
429 /*
430  * set to 0 in /etc/system to defer CEEN reenable for all CEs
431  */
432 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
433 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
434 
435 /*
436  * Set of all offline cpus
437  */
438 cpuset_t cpu_offline_set;
439 
440 static void cpu_delayed_check_ce_errors(void *);
441 static void cpu_check_ce_errors(void *);
442 void cpu_error_ecache_flush(ch_async_flt_t *);
443 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
444 static void cpu_log_and_clear_ce(ch_async_flt_t *);
445 void cpu_ce_detected(ch_cpu_errors_t *, int);
446 
447 /*
448  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
449  * memory refresh interval of current DIMMs (64ms).  After initial fix that
450  * gives at least one full refresh cycle in which the cell can leak
451  * (whereafter further refreshes simply reinforce any incorrect bit value).
452  */
453 clock_t cpu_ce_lkychk_timeout_usec = 128000;
454 
455 /*
456  * CE partner check partner caching period in seconds
457  */
458 int cpu_ce_ptnr_cachetime_sec = 60;
459 
460 /*
461  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
462  */
463 #define	CH_SET_TRAP(ttentry, ttlabel)			\
464 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
465 		flush_instr_mem((caddr_t)&ttentry, 32);
466 
467 static int min_ecache_size;
468 static uint_t priv_hcl_1;
469 static uint_t priv_hcl_2;
470 static uint_t priv_hcl_4;
471 static uint_t priv_hcl_8;
472 
473 void
474 cpu_setup(void)
475 {
476 	extern int at_flags;
477 	extern int disable_delay_tlb_flush, delay_tlb_flush;
478 	extern int cpc_has_overflow_intr;
479 
480 	/*
481 	 * Setup chip-specific trap handlers.
482 	 */
483 	cpu_init_trap();
484 
485 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
486 
487 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
488 
489 	/*
490 	 * save the cache bootup state.
491 	 */
492 	cache_boot_state = get_dcu() & DCU_CACHE;
493 
494 	/*
495 	 * Due to the number of entries in the fully-associative tlb
496 	 * this may have to be tuned lower than in spitfire.
497 	 */
498 	pp_slots = MIN(8, MAXPP_SLOTS);
499 
500 	/*
501 	 * Block stores do not invalidate all pages of the d$, pagecopy
502 	 * et. al. need virtual translations with virtual coloring taken
503 	 * into consideration.  prefetch/ldd will pollute the d$ on the
504 	 * load side.
505 	 */
506 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
507 
508 	if (use_page_coloring) {
509 		do_pg_coloring = 1;
510 		if (use_virtual_coloring)
511 			do_virtual_coloring = 1;
512 	}
513 
514 	isa_list =
515 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
516 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
517 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
518 
519 	/*
520 	 * On Panther-based machines, this should
521 	 * also include AV_SPARC_POPC too
522 	 */
523 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
524 
525 	/*
526 	 * On cheetah, there's no hole in the virtual address space
527 	 */
528 	hole_start = hole_end = 0;
529 
530 	/*
531 	 * The kpm mapping window.
532 	 * kpm_size:
533 	 *	The size of a single kpm range.
534 	 *	The overall size will be: kpm_size * vac_colors.
535 	 * kpm_vbase:
536 	 *	The virtual start address of the kpm range within the kernel
537 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
538 	 */
539 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
540 	kpm_size_shift = 43;
541 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
542 	kpm_smallpages = 1;
543 
544 	/*
545 	 * The traptrace code uses either %tick or %stick for
546 	 * timestamping.  We have %stick so we can use it.
547 	 */
548 	traptrace_use_stick = 1;
549 
550 	/*
551 	 * Cheetah has a performance counter overflow interrupt
552 	 */
553 	cpc_has_overflow_intr = 1;
554 
555 	/*
556 	 * Use cheetah flush-all support
557 	 */
558 	if (!disable_delay_tlb_flush)
559 		delay_tlb_flush = 1;
560 
561 #if defined(CPU_IMP_DUAL_PAGESIZE)
562 	/*
563 	 * Use Cheetah+ and later dual page size support.
564 	 */
565 	if (!disable_dual_pgsz) {
566 		cpu_impl_dual_pgsz = 1;
567 	}
568 #endif	/* CPU_IMP_DUAL_PAGESIZE */
569 
570 	/*
571 	 * Declare that this architecture/cpu combination does fpRAS.
572 	 */
573 	fpras_implemented = 1;
574 
575 	/*
576 	 * Setup CE lookup table
577 	 */
578 	CE_INITDISPTBL_POPULATE(ce_disp_table);
579 	ce_disp_inited = 1;
580 }
581 
582 /*
583  * Called by setcpudelay
584  */
585 void
586 cpu_init_tick_freq(void)
587 {
588 	/*
589 	 * For UltraSPARC III and beyond we want to use the
590 	 * system clock rate as the basis for low level timing,
591 	 * due to support of mixed speed CPUs and power managment.
592 	 */
593 	if (system_clock_freq == 0)
594 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
595 
596 	sys_tick_freq = system_clock_freq;
597 }
598 
599 #ifdef CHEETAHPLUS_ERRATUM_25
600 /*
601  * Tunables
602  */
603 int cheetah_bpe_off = 0;
604 int cheetah_sendmondo_recover = 1;
605 int cheetah_sendmondo_fullscan = 0;
606 int cheetah_sendmondo_recover_delay = 5;
607 
608 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
609 
610 /*
611  * Recovery Statistics
612  */
613 typedef struct cheetah_livelock_entry	{
614 	int cpuid;		/* fallen cpu */
615 	int buddy;		/* cpu that ran recovery */
616 	clock_t lbolt;		/* when recovery started */
617 	hrtime_t recovery_time;	/* time spent in recovery */
618 } cheetah_livelock_entry_t;
619 
620 #define	CHEETAH_LIVELOCK_NENTRY	32
621 
622 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
623 int cheetah_livelock_entry_nxt;
624 
625 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
626 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
627 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
628 		cheetah_livelock_entry_nxt = 0;				\
629 	}								\
630 }
631 
632 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
633 
634 struct {
635 	hrtime_t hrt;		/* maximum recovery time */
636 	int recovery;		/* recovered */
637 	int full_claimed;	/* maximum pages claimed in full recovery */
638 	int proc_entry;		/* attempted to claim TSB */
639 	int proc_tsb_scan;	/* tsb scanned */
640 	int proc_tsb_partscan;	/* tsb partially scanned */
641 	int proc_tsb_fullscan;	/* whole tsb scanned */
642 	int proc_claimed;	/* maximum pages claimed in tsb scan */
643 	int proc_user;		/* user thread */
644 	int proc_kernel;	/* kernel thread */
645 	int proc_onflt;		/* bad stack */
646 	int proc_cpu;		/* null cpu */
647 	int proc_thread;	/* null thread */
648 	int proc_proc;		/* null proc */
649 	int proc_as;		/* null as */
650 	int proc_hat;		/* null hat */
651 	int proc_hat_inval;	/* hat contents don't make sense */
652 	int proc_hat_busy;	/* hat is changing TSBs */
653 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
654 	int proc_cnum_bad;	/* cnum out of range */
655 	int proc_cnum;		/* last cnum processed */
656 	tte_t proc_tte;		/* last tte processed */
657 } cheetah_livelock_stat;
658 
659 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
660 
661 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
662 	cheetah_livelock_stat.item = value
663 
664 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
665 	if (value > cheetah_livelock_stat.item)		\
666 		cheetah_livelock_stat.item = value;	\
667 }
668 
669 /*
670  * Attempt to recover a cpu by claiming every cache line as saved
671  * in the TSB that the non-responsive cpu is using. Since we can't
672  * grab any adaptive lock, this is at best an attempt to do so. Because
673  * we don't grab any locks, we must operate under the protection of
674  * on_fault().
675  *
676  * Return 1 if cpuid could be recovered, 0 if failed.
677  */
678 int
679 mondo_recover_proc(uint16_t cpuid, int bn)
680 {
681 	label_t ljb;
682 	cpu_t *cp;
683 	kthread_t *t;
684 	proc_t *p;
685 	struct as *as;
686 	struct hat *hat;
687 	uint_t  cnum;
688 	struct tsb_info *tsbinfop;
689 	struct tsbe *tsbep;
690 	caddr_t tsbp;
691 	caddr_t end_tsbp;
692 	uint64_t paddr;
693 	uint64_t idsr;
694 	u_longlong_t pahi, palo;
695 	int pages_claimed = 0;
696 	tte_t tsbe_tte;
697 	int tried_kernel_tsb = 0;
698 	mmu_ctx_t *mmu_ctxp;
699 
700 	CHEETAH_LIVELOCK_STAT(proc_entry);
701 
702 	if (on_fault(&ljb)) {
703 		CHEETAH_LIVELOCK_STAT(proc_onflt);
704 		goto badstruct;
705 	}
706 
707 	if ((cp = cpu[cpuid]) == NULL) {
708 		CHEETAH_LIVELOCK_STAT(proc_cpu);
709 		goto badstruct;
710 	}
711 
712 	if ((t = cp->cpu_thread) == NULL) {
713 		CHEETAH_LIVELOCK_STAT(proc_thread);
714 		goto badstruct;
715 	}
716 
717 	if ((p = ttoproc(t)) == NULL) {
718 		CHEETAH_LIVELOCK_STAT(proc_proc);
719 		goto badstruct;
720 	}
721 
722 	if ((as = p->p_as) == NULL) {
723 		CHEETAH_LIVELOCK_STAT(proc_as);
724 		goto badstruct;
725 	}
726 
727 	if ((hat = as->a_hat) == NULL) {
728 		CHEETAH_LIVELOCK_STAT(proc_hat);
729 		goto badstruct;
730 	}
731 
732 	if (hat != ksfmmup) {
733 		CHEETAH_LIVELOCK_STAT(proc_user);
734 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
735 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
736 			goto badstruct;
737 		}
738 		tsbinfop = hat->sfmmu_tsb;
739 		if (tsbinfop == NULL) {
740 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
741 			goto badstruct;
742 		}
743 		tsbp = tsbinfop->tsb_va;
744 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
745 	} else {
746 		CHEETAH_LIVELOCK_STAT(proc_kernel);
747 		tsbinfop = NULL;
748 		tsbp = ktsb_base;
749 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
750 	}
751 
752 	/* Verify as */
753 	if (hat->sfmmu_as != as) {
754 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 		goto badstruct;
756 	}
757 
758 	mmu_ctxp = CPU_MMU_CTXP(cp);
759 	ASSERT(mmu_ctxp);
760 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
761 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
762 
763 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
764 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
765 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
766 		goto badstruct;
767 	}
768 
769 	do {
770 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
771 
772 		/*
773 		 * Skip TSBs being relocated.  This is important because
774 		 * we want to avoid the following deadlock scenario:
775 		 *
776 		 * 1) when we came in we set ourselves to "in recover" state.
777 		 * 2) when we try to touch TSB being relocated the mapping
778 		 *    will be in the suspended state so we'll spin waiting
779 		 *    for it to be unlocked.
780 		 * 3) when the CPU that holds the TSB mapping locked tries to
781 		 *    unlock it it will send a xtrap which will fail to xcall
782 		 *    us or the CPU we're trying to recover, and will in turn
783 		 *    enter the mondo code.
784 		 * 4) since we are still spinning on the locked mapping
785 		 *    no further progress will be made and the system will
786 		 *    inevitably hard hang.
787 		 *
788 		 * A TSB not being relocated can't begin being relocated
789 		 * while we're accessing it because we check
790 		 * sendmondo_in_recover before relocating TSBs.
791 		 */
792 		if (hat != ksfmmup &&
793 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
794 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
795 			goto next_tsbinfo;
796 		}
797 
798 		for (tsbep = (struct tsbe *)tsbp;
799 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
800 			tsbe_tte = tsbep->tte_data;
801 
802 			if (tsbe_tte.tte_val == 0) {
803 				/*
804 				 * Invalid tte
805 				 */
806 				continue;
807 			}
808 			if (tsbe_tte.tte_se) {
809 				/*
810 				 * Don't want device registers
811 				 */
812 				continue;
813 			}
814 			if (tsbe_tte.tte_cp == 0) {
815 				/*
816 				 * Must be cached in E$
817 				 */
818 				continue;
819 			}
820 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
821 			idsr = getidsr();
822 			if ((idsr & (IDSR_NACK_BIT(bn) |
823 			    IDSR_BUSY_BIT(bn))) == 0) {
824 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
825 				goto done;
826 			}
827 			pahi = tsbe_tte.tte_pahi;
828 			palo = tsbe_tte.tte_palo;
829 			paddr = (uint64_t)((pahi << 32) |
830 			    (palo << MMU_PAGESHIFT));
831 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
832 			    CH_ECACHE_SUBBLK_SIZE);
833 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
834 				shipit(cpuid, bn);
835 			}
836 			pages_claimed++;
837 		}
838 next_tsbinfo:
839 		if (tsbinfop != NULL)
840 			tsbinfop = tsbinfop->tsb_next;
841 		if (tsbinfop != NULL) {
842 			tsbp = tsbinfop->tsb_va;
843 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
844 		} else if (tsbp == ktsb_base) {
845 			tried_kernel_tsb = 1;
846 		} else if (!tried_kernel_tsb) {
847 			tsbp = ktsb_base;
848 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
849 			hat = ksfmmup;
850 			tsbinfop = NULL;
851 		}
852 	} while (tsbinfop != NULL ||
853 			((tsbp == ktsb_base) && !tried_kernel_tsb));
854 
855 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
856 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
857 	no_fault();
858 	idsr = getidsr();
859 	if ((idsr & (IDSR_NACK_BIT(bn) |
860 	    IDSR_BUSY_BIT(bn))) == 0) {
861 		return (1);
862 	} else {
863 		return (0);
864 	}
865 
866 done:
867 	no_fault();
868 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
869 	return (1);
870 
871 badstruct:
872 	no_fault();
873 	return (0);
874 }
875 
876 /*
877  * Attempt to claim ownership, temporarily, of every cache line that a
878  * non-responsive cpu might be using.  This might kick that cpu out of
879  * this state.
880  *
881  * The return value indicates to the caller if we have exhausted all recovery
882  * techniques. If 1 is returned, it is useless to call this function again
883  * even for a different target CPU.
884  */
885 int
886 mondo_recover(uint16_t cpuid, int bn)
887 {
888 	struct memseg *seg;
889 	uint64_t begin_pa, end_pa, cur_pa;
890 	hrtime_t begin_hrt, end_hrt;
891 	int retval = 0;
892 	int pages_claimed = 0;
893 	cheetah_livelock_entry_t *histp;
894 	uint64_t idsr;
895 
896 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
897 		/*
898 		 * Wait while recovery takes place
899 		 */
900 		while (sendmondo_in_recover) {
901 			drv_usecwait(1);
902 		}
903 		/*
904 		 * Assume we didn't claim the whole memory. If
905 		 * the target of this caller is not recovered,
906 		 * it will come back.
907 		 */
908 		return (retval);
909 	}
910 
911 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
912 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
913 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
914 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
915 
916 	begin_hrt = gethrtime_waitfree();
917 	/*
918 	 * First try to claim the lines in the TSB the target
919 	 * may have been using.
920 	 */
921 	if (mondo_recover_proc(cpuid, bn) == 1) {
922 		/*
923 		 * Didn't claim the whole memory
924 		 */
925 		goto done;
926 	}
927 
928 	/*
929 	 * We tried using the TSB. The target is still
930 	 * not recovered. Check if complete memory scan is
931 	 * enabled.
932 	 */
933 	if (cheetah_sendmondo_fullscan == 0) {
934 		/*
935 		 * Full memory scan is disabled.
936 		 */
937 		retval = 1;
938 		goto done;
939 	}
940 
941 	/*
942 	 * Try claiming the whole memory.
943 	 */
944 	for (seg = memsegs; seg; seg = seg->next) {
945 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
946 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
947 		for (cur_pa = begin_pa; cur_pa < end_pa;
948 		    cur_pa += MMU_PAGESIZE) {
949 			idsr = getidsr();
950 			if ((idsr & (IDSR_NACK_BIT(bn) |
951 			    IDSR_BUSY_BIT(bn))) == 0) {
952 				/*
953 				 * Didn't claim all memory
954 				 */
955 				goto done;
956 			}
957 			claimlines(cur_pa, MMU_PAGESIZE,
958 			    CH_ECACHE_SUBBLK_SIZE);
959 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
960 				shipit(cpuid, bn);
961 			}
962 			pages_claimed++;
963 		}
964 	}
965 
966 	/*
967 	 * We did all we could.
968 	 */
969 	retval = 1;
970 
971 done:
972 	/*
973 	 * Update statistics
974 	 */
975 	end_hrt = gethrtime_waitfree();
976 	CHEETAH_LIVELOCK_STAT(recovery);
977 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
978 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
979 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
980 	    (end_hrt -  begin_hrt));
981 
982 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
983 
984 	return (retval);
985 }
986 
987 /*
988  * This is called by the cyclic framework when this CPU becomes online
989  */
990 /*ARGSUSED*/
991 static void
992 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
993 {
994 
995 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
996 	hdlr->cyh_level = CY_LOW_LEVEL;
997 	hdlr->cyh_arg = NULL;
998 
999 	/*
1000 	 * Stagger the start time
1001 	 */
1002 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1003 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1004 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1005 	}
1006 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1007 }
1008 
1009 /*
1010  * Create a low level cyclic to send a xtrap to the next cpu online.
1011  * However, there's no need to have this running on a uniprocessor system.
1012  */
1013 static void
1014 cheetah_nudge_init(void)
1015 {
1016 	cyc_omni_handler_t hdlr;
1017 
1018 	if (max_ncpus == 1) {
1019 		return;
1020 	}
1021 
1022 	hdlr.cyo_online = cheetah_nudge_onln;
1023 	hdlr.cyo_offline = NULL;
1024 	hdlr.cyo_arg = NULL;
1025 
1026 	mutex_enter(&cpu_lock);
1027 	(void) cyclic_add_omni(&hdlr);
1028 	mutex_exit(&cpu_lock);
1029 }
1030 
1031 /*
1032  * Cyclic handler to wake up buddy
1033  */
1034 void
1035 cheetah_nudge_buddy(void)
1036 {
1037 	/*
1038 	 * Disable kernel preemption to protect the cpu list
1039 	 */
1040 	kpreempt_disable();
1041 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1042 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1043 		    0, 0);
1044 	}
1045 	kpreempt_enable();
1046 }
1047 
1048 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1049 
1050 #ifdef SEND_MONDO_STATS
1051 uint32_t x_one_stimes[64];
1052 uint32_t x_one_ltimes[16];
1053 uint32_t x_set_stimes[64];
1054 uint32_t x_set_ltimes[16];
1055 uint32_t x_set_cpus[NCPU];
1056 uint32_t x_nack_stimes[64];
1057 #endif
1058 
1059 /*
1060  * Note: A version of this function is used by the debugger via the KDI,
1061  * and must be kept in sync with this version.  Any changes made to this
1062  * function to support new chips or to accomodate errata must also be included
1063  * in the KDI-specific version.  See us3_kdi.c.
1064  */
1065 void
1066 send_one_mondo(int cpuid)
1067 {
1068 	int busy, nack;
1069 	uint64_t idsr, starttick, endtick, tick, lasttick;
1070 	uint64_t busymask;
1071 #ifdef	CHEETAHPLUS_ERRATUM_25
1072 	int recovered = 0;
1073 #endif
1074 
1075 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1076 	starttick = lasttick = gettick();
1077 	shipit(cpuid, 0);
1078 	endtick = starttick + xc_tick_limit;
1079 	busy = nack = 0;
1080 #if defined(JALAPENO) || defined(SERRANO)
1081 	/*
1082 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1083 	 * will be used for dispatching interrupt. For now, assume
1084 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1085 	 * issues with respect to BUSY/NACK pair usage.
1086 	 */
1087 	busymask  = IDSR_BUSY_BIT(cpuid);
1088 #else /* JALAPENO || SERRANO */
1089 	busymask = IDSR_BUSY;
1090 #endif /* JALAPENO || SERRANO */
1091 	for (;;) {
1092 		idsr = getidsr();
1093 		if (idsr == 0)
1094 			break;
1095 
1096 		tick = gettick();
1097 		/*
1098 		 * If there is a big jump between the current tick
1099 		 * count and lasttick, we have probably hit a break
1100 		 * point.  Adjust endtick accordingly to avoid panic.
1101 		 */
1102 		if (tick > (lasttick + xc_tick_jump_limit))
1103 			endtick += (tick - lasttick);
1104 		lasttick = tick;
1105 		if (tick > endtick) {
1106 			if (panic_quiesce)
1107 				return;
1108 #ifdef	CHEETAHPLUS_ERRATUM_25
1109 			if (cheetah_sendmondo_recover && recovered == 0) {
1110 				if (mondo_recover(cpuid, 0)) {
1111 					/*
1112 					 * We claimed the whole memory or
1113 					 * full scan is disabled.
1114 					 */
1115 					recovered++;
1116 				}
1117 				tick = gettick();
1118 				endtick = tick + xc_tick_limit;
1119 				lasttick = tick;
1120 				/*
1121 				 * Recheck idsr
1122 				 */
1123 				continue;
1124 			} else
1125 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1126 			{
1127 				cmn_err(CE_PANIC, "send mondo timeout "
1128 				    "(target 0x%x) [%d NACK %d BUSY]",
1129 				    cpuid, nack, busy);
1130 			}
1131 		}
1132 
1133 		if (idsr & busymask) {
1134 			busy++;
1135 			continue;
1136 		}
1137 		drv_usecwait(1);
1138 		shipit(cpuid, 0);
1139 		nack++;
1140 		busy = 0;
1141 	}
1142 #ifdef SEND_MONDO_STATS
1143 	{
1144 		int n = gettick() - starttick;
1145 		if (n < 8192)
1146 			x_one_stimes[n >> 7]++;
1147 		else
1148 			x_one_ltimes[(n >> 13) & 0xf]++;
1149 	}
1150 #endif
1151 }
1152 
1153 void
1154 syncfpu(void)
1155 {
1156 }
1157 
1158 /*
1159  * Return processor specific async error structure
1160  * size used.
1161  */
1162 int
1163 cpu_aflt_size(void)
1164 {
1165 	return (sizeof (ch_async_flt_t));
1166 }
1167 
1168 /*
1169  * Tunable to disable the checking of other cpu logout areas during panic for
1170  * potential syndrome 71 generating errors.
1171  */
1172 int enable_check_other_cpus_logout = 1;
1173 
1174 /*
1175  * Check other cpus logout area for potential synd 71 generating
1176  * errors.
1177  */
1178 static void
1179 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1180     ch_cpu_logout_t *clop)
1181 {
1182 	struct async_flt *aflt;
1183 	ch_async_flt_t ch_flt;
1184 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1185 
1186 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1187 		return;
1188 	}
1189 
1190 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1191 
1192 	t_afar = clop->clo_data.chd_afar;
1193 	t_afsr = clop->clo_data.chd_afsr;
1194 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1195 #if defined(SERRANO)
1196 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1197 #endif	/* SERRANO */
1198 
1199 	/*
1200 	 * In order to simplify code, we maintain this afsr_errs
1201 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1202 	 * sticky bits.
1203 	 */
1204 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1205 	    (t_afsr & C_AFSR_ALL_ERRS);
1206 
1207 	/* Setup the async fault structure */
1208 	aflt = (struct async_flt *)&ch_flt;
1209 	aflt->flt_id = gethrtime_waitfree();
1210 	ch_flt.afsr_ext = t_afsr_ext;
1211 	ch_flt.afsr_errs = t_afsr_errs;
1212 	aflt->flt_stat = t_afsr;
1213 	aflt->flt_addr = t_afar;
1214 	aflt->flt_bus_id = cpuid;
1215 	aflt->flt_inst = cpuid;
1216 	aflt->flt_pc = tpc;
1217 	aflt->flt_prot = AFLT_PROT_NONE;
1218 	aflt->flt_class = CPU_FAULT;
1219 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1220 	aflt->flt_tl = tl;
1221 	aflt->flt_status = ecc_type;
1222 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1223 
1224 	/*
1225 	 * Queue events on the async event queue, one event per error bit.
1226 	 * If no events are queued, queue an event to complain.
1227 	 */
1228 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1229 		ch_flt.flt_type = CPU_INV_AFSR;
1230 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1231 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1232 		    aflt->flt_panic);
1233 	}
1234 
1235 	/*
1236 	 * Zero out + invalidate CPU logout.
1237 	 */
1238 	bzero(clop, sizeof (ch_cpu_logout_t));
1239 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1240 }
1241 
1242 /*
1243  * Check the logout areas of all other cpus for unlogged errors.
1244  */
1245 static void
1246 cpu_check_other_cpus_logout(void)
1247 {
1248 	int i, j;
1249 	processorid_t myid;
1250 	struct cpu *cp;
1251 	ch_err_tl1_data_t *cl1p;
1252 
1253 	myid = CPU->cpu_id;
1254 	for (i = 0; i < NCPU; i++) {
1255 		cp = cpu[i];
1256 
1257 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1258 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1259 			continue;
1260 		}
1261 
1262 		/*
1263 		 * Check each of the tl>0 logout areas
1264 		 */
1265 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1266 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1267 			if (cl1p->ch_err_tl1_flags == 0)
1268 				continue;
1269 
1270 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1271 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1272 		}
1273 
1274 		/*
1275 		 * Check each of the remaining logout areas
1276 		 */
1277 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1278 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1279 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1280 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1281 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1282 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1283 	}
1284 }
1285 
1286 /*
1287  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1288  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1289  * flush the error that caused the UCU/UCC, then again here at the end to
1290  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1291  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1292  * another Fast ECC trap.
1293  *
1294  * Cheetah+ also handles: TSCE: No additional processing required.
1295  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1296  *
1297  * Note that the p_clo_flags input is only valid in cases where the
1298  * cpu_private struct is not yet initialized (since that is the only
1299  * time that information cannot be obtained from the logout struct.)
1300  */
1301 /*ARGSUSED*/
1302 void
1303 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1304 {
1305 	ch_cpu_logout_t *clop;
1306 	uint64_t ceen, nceen;
1307 
1308 	/*
1309 	 * Get the CPU log out info. If we can't find our CPU private
1310 	 * pointer, then we will have to make due without any detailed
1311 	 * logout information.
1312 	 */
1313 	if (CPU_PRIVATE(CPU) == NULL) {
1314 		clop = NULL;
1315 		ceen = p_clo_flags & EN_REG_CEEN;
1316 		nceen = p_clo_flags & EN_REG_NCEEN;
1317 	} else {
1318 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1319 		ceen = clop->clo_flags & EN_REG_CEEN;
1320 		nceen = clop->clo_flags & EN_REG_NCEEN;
1321 	}
1322 
1323 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1324 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1325 }
1326 
1327 /*
1328  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1329  * ECC at TL>0.  Need to supply either a error register pointer or a
1330  * cpu logout structure pointer.
1331  */
1332 static void
1333 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1334     uint64_t nceen, ch_cpu_logout_t *clop)
1335 {
1336 	struct async_flt *aflt;
1337 	ch_async_flt_t ch_flt;
1338 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1339 	char pr_reason[MAX_REASON_STRING];
1340 	ch_cpu_errors_t cpu_error_regs;
1341 
1342 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1343 	/*
1344 	 * If no cpu logout data, then we will have to make due without
1345 	 * any detailed logout information.
1346 	 */
1347 	if (clop == NULL) {
1348 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1349 		get_cpu_error_state(&cpu_error_regs);
1350 		set_cpu_error_state(&cpu_error_regs);
1351 		t_afar = cpu_error_regs.afar;
1352 		t_afsr = cpu_error_regs.afsr;
1353 		t_afsr_ext = cpu_error_regs.afsr_ext;
1354 #if defined(SERRANO)
1355 		ch_flt.afar2 = cpu_error_regs.afar2;
1356 #endif	/* SERRANO */
1357 	} else {
1358 		t_afar = clop->clo_data.chd_afar;
1359 		t_afsr = clop->clo_data.chd_afsr;
1360 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1361 #if defined(SERRANO)
1362 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1363 #endif	/* SERRANO */
1364 	}
1365 
1366 	/*
1367 	 * In order to simplify code, we maintain this afsr_errs
1368 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1369 	 * sticky bits.
1370 	 */
1371 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1372 	    (t_afsr & C_AFSR_ALL_ERRS);
1373 	pr_reason[0] = '\0';
1374 
1375 	/* Setup the async fault structure */
1376 	aflt = (struct async_flt *)&ch_flt;
1377 	aflt->flt_id = gethrtime_waitfree();
1378 	ch_flt.afsr_ext = t_afsr_ext;
1379 	ch_flt.afsr_errs = t_afsr_errs;
1380 	aflt->flt_stat = t_afsr;
1381 	aflt->flt_addr = t_afar;
1382 	aflt->flt_bus_id = getprocessorid();
1383 	aflt->flt_inst = CPU->cpu_id;
1384 	aflt->flt_pc = tpc;
1385 	aflt->flt_prot = AFLT_PROT_NONE;
1386 	aflt->flt_class = CPU_FAULT;
1387 	aflt->flt_priv = priv;
1388 	aflt->flt_tl = tl;
1389 	aflt->flt_status = ECC_F_TRAP;
1390 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1391 
1392 	/*
1393 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1394 	 * cmn_err messages out to the console.  The situation is a UCU (in
1395 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1396 	 * The messages for the UCU and WDU are enqueued and then pulled off
1397 	 * the async queue via softint and syslogd starts to process them
1398 	 * but doesn't get them to the console.  The UE causes a panic, but
1399 	 * since the UCU/WDU messages are already in transit, those aren't
1400 	 * on the async queue.  The hack is to check if we have a matching
1401 	 * WDU event for the UCU, and if it matches, we're more than likely
1402 	 * going to panic with a UE, unless we're under protection.  So, we
1403 	 * check to see if we got a matching WDU event and if we're under
1404 	 * protection.
1405 	 *
1406 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1407 	 * looks like this:
1408 	 *    UCU->WDU->UE
1409 	 * For Panther, it could look like either of these:
1410 	 *    UCU---->WDU->L3_WDU->UE
1411 	 *    L3_UCU->WDU->L3_WDU->UE
1412 	 */
1413 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1414 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1415 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1416 		get_cpu_error_state(&cpu_error_regs);
1417 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1418 		    (cpu_error_regs.afar == t_afar));
1419 		aflt->flt_panic |= ((clop == NULL) &&
1420 		    (t_afsr_errs & C_AFSR_WDU));
1421 	}
1422 
1423 	/*
1424 	 * Queue events on the async event queue, one event per error bit.
1425 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1426 	 * queue an event to complain.
1427 	 */
1428 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1429 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1430 		ch_flt.flt_type = CPU_INV_AFSR;
1431 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1432 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1433 		    aflt->flt_panic);
1434 	}
1435 
1436 	/*
1437 	 * Zero out + invalidate CPU logout.
1438 	 */
1439 	if (clop) {
1440 		bzero(clop, sizeof (ch_cpu_logout_t));
1441 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1442 	}
1443 
1444 	/*
1445 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1446 	 * or disrupting errors have happened.  We do this because if a
1447 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1448 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1449 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1450 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1451 	 * deferred or disrupting error happening between checking the AFSR and
1452 	 * enabling NCEEN/CEEN.
1453 	 *
1454 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1455 	 * taken.
1456 	 */
1457 	set_error_enable(get_error_enable() | (nceen | ceen));
1458 	if (clear_errors(&ch_flt)) {
1459 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1460 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1461 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1462 		    NULL);
1463 	}
1464 
1465 	/*
1466 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1467 	 * be logged as part of the panic flow.
1468 	 */
1469 	if (aflt->flt_panic)
1470 		fm_panic("%sError(s)", pr_reason);
1471 
1472 	/*
1473 	 * Flushing the Ecache here gets the part of the trap handler that
1474 	 * is run at TL=1 out of the Ecache.
1475 	 */
1476 	cpu_flush_ecache();
1477 }
1478 
1479 /*
1480  * This is called via sys_trap from pil15_interrupt code if the
1481  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1482  * various ch_err_tl1_data structures for valid entries based on the bit
1483  * settings in the ch_err_tl1_flags entry of the structure.
1484  */
1485 /*ARGSUSED*/
1486 void
1487 cpu_tl1_error(struct regs *rp, int panic)
1488 {
1489 	ch_err_tl1_data_t *cl1p, cl1;
1490 	int i, ncl1ps;
1491 	uint64_t me_flags;
1492 	uint64_t ceen, nceen;
1493 
1494 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1495 		cl1p = &ch_err_tl1_data;
1496 		ncl1ps = 1;
1497 	} else if (CPU_PRIVATE(CPU) != NULL) {
1498 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1499 		ncl1ps = CH_ERR_TL1_TLMAX;
1500 	} else {
1501 		ncl1ps = 0;
1502 	}
1503 
1504 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1505 		if (cl1p->ch_err_tl1_flags == 0)
1506 			continue;
1507 
1508 		/*
1509 		 * Grab a copy of the logout data and invalidate
1510 		 * the logout area.
1511 		 */
1512 		cl1 = *cl1p;
1513 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1514 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1515 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1516 
1517 		/*
1518 		 * Log "first error" in ch_err_tl1_data.
1519 		 */
1520 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1521 			ceen = get_error_enable() & EN_REG_CEEN;
1522 			nceen = get_error_enable() & EN_REG_NCEEN;
1523 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1524 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1525 		}
1526 #if defined(CPU_IMP_L1_CACHE_PARITY)
1527 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1528 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1529 			    (caddr_t)cl1.ch_err_tl1_tpc);
1530 		}
1531 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1532 
1533 		/*
1534 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1535 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1536 		 * if the structure is busy, we just do the cache flushing
1537 		 * we have to do and then do the retry.  So the AFSR/AFAR
1538 		 * at this point *should* have some relevant info.  If there
1539 		 * are no valid errors in the AFSR, we'll assume they've
1540 		 * already been picked up and logged.  For I$/D$ parity,
1541 		 * we just log an event with an "Unknown" (NULL) TPC.
1542 		 */
1543 		if (me_flags & CH_ERR_FECC) {
1544 			ch_cpu_errors_t cpu_error_regs;
1545 			uint64_t t_afsr_errs;
1546 
1547 			/*
1548 			 * Get the error registers and see if there's
1549 			 * a pending error.  If not, don't bother
1550 			 * generating an "Invalid AFSR" error event.
1551 			 */
1552 			get_cpu_error_state(&cpu_error_regs);
1553 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1554 			    C_AFSR_EXT_ALL_ERRS) |
1555 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1556 			if (t_afsr_errs != 0) {
1557 				ceen = get_error_enable() & EN_REG_CEEN;
1558 				nceen = get_error_enable() & EN_REG_NCEEN;
1559 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1560 				    1, ceen, nceen, NULL);
1561 			}
1562 		}
1563 #if defined(CPU_IMP_L1_CACHE_PARITY)
1564 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1565 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1566 		}
1567 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1568 	}
1569 }
1570 
1571 /*
1572  * Called from Fast ECC TL>0 handler in case of fatal error.
1573  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1574  * but if we don't, we'll panic with something reasonable.
1575  */
1576 /*ARGSUSED*/
1577 void
1578 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1579 {
1580 	cpu_tl1_error(rp, 1);
1581 	/*
1582 	 * Should never return, but just in case.
1583 	 */
1584 	fm_panic("Unsurvivable ECC Error at TL>0");
1585 }
1586 
1587 /*
1588  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1589  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1590  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1591  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1592  *
1593  * Cheetah+ also handles (No additional processing required):
1594  *    DUE, DTO, DBERR	(NCEEN controlled)
1595  *    THCE		(CEEN and ET_ECC_en controlled)
1596  *    TUE		(ET_ECC_en controlled)
1597  *
1598  * Panther further adds:
1599  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1600  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1601  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1602  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1603  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1604  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1605  *
1606  * Note that the p_clo_flags input is only valid in cases where the
1607  * cpu_private struct is not yet initialized (since that is the only
1608  * time that information cannot be obtained from the logout struct.)
1609  */
1610 /*ARGSUSED*/
1611 void
1612 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1613 {
1614 	struct async_flt *aflt;
1615 	ch_async_flt_t ch_flt;
1616 	char pr_reason[MAX_REASON_STRING];
1617 	ch_cpu_logout_t *clop;
1618 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1619 	ch_cpu_errors_t cpu_error_regs;
1620 
1621 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1622 	/*
1623 	 * Get the CPU log out info. If we can't find our CPU private
1624 	 * pointer, then we will have to make due without any detailed
1625 	 * logout information.
1626 	 */
1627 	if (CPU_PRIVATE(CPU) == NULL) {
1628 		clop = NULL;
1629 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1630 		get_cpu_error_state(&cpu_error_regs);
1631 		set_cpu_error_state(&cpu_error_regs);
1632 		t_afar = cpu_error_regs.afar;
1633 		t_afsr = cpu_error_regs.afsr;
1634 		t_afsr_ext = cpu_error_regs.afsr_ext;
1635 #if defined(SERRANO)
1636 		ch_flt.afar2 = cpu_error_regs.afar2;
1637 #endif	/* SERRANO */
1638 	} else {
1639 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1640 		t_afar = clop->clo_data.chd_afar;
1641 		t_afsr = clop->clo_data.chd_afsr;
1642 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1643 #if defined(SERRANO)
1644 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1645 #endif	/* SERRANO */
1646 	}
1647 
1648 	/*
1649 	 * In order to simplify code, we maintain this afsr_errs
1650 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1651 	 * sticky bits.
1652 	 */
1653 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1654 	    (t_afsr & C_AFSR_ALL_ERRS);
1655 
1656 	pr_reason[0] = '\0';
1657 	/* Setup the async fault structure */
1658 	aflt = (struct async_flt *)&ch_flt;
1659 	ch_flt.afsr_ext = t_afsr_ext;
1660 	ch_flt.afsr_errs = t_afsr_errs;
1661 	aflt->flt_stat = t_afsr;
1662 	aflt->flt_addr = t_afar;
1663 	aflt->flt_pc = (caddr_t)rp->r_pc;
1664 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1665 	aflt->flt_tl = 0;
1666 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1667 
1668 	/*
1669 	 * If this trap is a result of one of the errors not masked
1670 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1671 	 * indicate that a timeout is to be set later.
1672 	 */
1673 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1674 	    !aflt->flt_panic)
1675 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1676 	else
1677 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1678 
1679 	/*
1680 	 * log the CE and clean up
1681 	 */
1682 	cpu_log_and_clear_ce(&ch_flt);
1683 
1684 	/*
1685 	 * We re-enable CEEN (if required) and check if any disrupting errors
1686 	 * have happened.  We do this because if a disrupting error had occurred
1687 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1688 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1689 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1690 	 * of a error happening between checking the AFSR and enabling CEEN.
1691 	 */
1692 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1693 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1694 	if (clear_errors(&ch_flt)) {
1695 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1696 		    NULL);
1697 	}
1698 
1699 	/*
1700 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1701 	 * be logged as part of the panic flow.
1702 	 */
1703 	if (aflt->flt_panic)
1704 		fm_panic("%sError(s)", pr_reason);
1705 }
1706 
1707 /*
1708  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1709  * L3_EDU:BLD, TO, and BERR events.
1710  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1711  *
1712  * Cheetah+: No additional errors handled.
1713  *
1714  * Note that the p_clo_flags input is only valid in cases where the
1715  * cpu_private struct is not yet initialized (since that is the only
1716  * time that information cannot be obtained from the logout struct.)
1717  */
1718 /*ARGSUSED*/
1719 void
1720 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1721 {
1722 	ushort_t ttype, tl;
1723 	ch_async_flt_t ch_flt;
1724 	struct async_flt *aflt;
1725 	int trampolined = 0;
1726 	char pr_reason[MAX_REASON_STRING];
1727 	ch_cpu_logout_t *clop;
1728 	uint64_t ceen, clo_flags;
1729 	uint64_t log_afsr;
1730 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1731 	ch_cpu_errors_t cpu_error_regs;
1732 	int expected = DDI_FM_ERR_UNEXPECTED;
1733 	ddi_acc_hdl_t *hp;
1734 
1735 	/*
1736 	 * We need to look at p_flag to determine if the thread detected an
1737 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1738 	 * because we just need a consistent snapshot and we know that everyone
1739 	 * else will store a consistent set of bits while holding p_lock.  We
1740 	 * don't have to worry about a race because SDOCORE is set once prior
1741 	 * to doing i/o from the process's address space and is never cleared.
1742 	 */
1743 	uint_t pflag = ttoproc(curthread)->p_flag;
1744 
1745 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1746 	/*
1747 	 * Get the CPU log out info. If we can't find our CPU private
1748 	 * pointer then we will have to make due without any detailed
1749 	 * logout information.
1750 	 */
1751 	if (CPU_PRIVATE(CPU) == NULL) {
1752 		clop = NULL;
1753 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1754 		get_cpu_error_state(&cpu_error_regs);
1755 		set_cpu_error_state(&cpu_error_regs);
1756 		t_afar = cpu_error_regs.afar;
1757 		t_afsr = cpu_error_regs.afsr;
1758 		t_afsr_ext = cpu_error_regs.afsr_ext;
1759 #if defined(SERRANO)
1760 		ch_flt.afar2 = cpu_error_regs.afar2;
1761 #endif	/* SERRANO */
1762 		clo_flags = p_clo_flags;
1763 	} else {
1764 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1765 		t_afar = clop->clo_data.chd_afar;
1766 		t_afsr = clop->clo_data.chd_afsr;
1767 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1768 #if defined(SERRANO)
1769 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1770 #endif	/* SERRANO */
1771 		clo_flags = clop->clo_flags;
1772 	}
1773 
1774 	/*
1775 	 * In order to simplify code, we maintain this afsr_errs
1776 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1777 	 * sticky bits.
1778 	 */
1779 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1780 	    (t_afsr & C_AFSR_ALL_ERRS);
1781 	pr_reason[0] = '\0';
1782 
1783 	/*
1784 	 * Grab information encoded into our clo_flags field.
1785 	 */
1786 	ceen = clo_flags & EN_REG_CEEN;
1787 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1788 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1789 
1790 	/*
1791 	 * handle the specific error
1792 	 */
1793 	aflt = (struct async_flt *)&ch_flt;
1794 	aflt->flt_id = gethrtime_waitfree();
1795 	aflt->flt_bus_id = getprocessorid();
1796 	aflt->flt_inst = CPU->cpu_id;
1797 	ch_flt.afsr_ext = t_afsr_ext;
1798 	ch_flt.afsr_errs = t_afsr_errs;
1799 	aflt->flt_stat = t_afsr;
1800 	aflt->flt_addr = t_afar;
1801 	aflt->flt_pc = (caddr_t)rp->r_pc;
1802 	aflt->flt_prot = AFLT_PROT_NONE;
1803 	aflt->flt_class = CPU_FAULT;
1804 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1805 	aflt->flt_tl = (uchar_t)tl;
1806 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1807 	    C_AFSR_PANIC(t_afsr_errs));
1808 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1809 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1810 
1811 	/*
1812 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1813 	 * see if we were executing in the kernel under on_trap() or t_lofault
1814 	 * protection.  If so, modify the saved registers so that we return
1815 	 * from the trap to the appropriate trampoline routine.
1816 	 */
1817 	if (aflt->flt_priv && tl == 0) {
1818 		if (curthread->t_ontrap != NULL) {
1819 			on_trap_data_t *otp = curthread->t_ontrap;
1820 
1821 			if (otp->ot_prot & OT_DATA_EC) {
1822 				aflt->flt_prot = AFLT_PROT_EC;
1823 				otp->ot_trap |= OT_DATA_EC;
1824 				rp->r_pc = otp->ot_trampoline;
1825 				rp->r_npc = rp->r_pc + 4;
1826 				trampolined = 1;
1827 			}
1828 
1829 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1830 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1831 				aflt->flt_prot = AFLT_PROT_ACCESS;
1832 				otp->ot_trap |= OT_DATA_ACCESS;
1833 				rp->r_pc = otp->ot_trampoline;
1834 				rp->r_npc = rp->r_pc + 4;
1835 				trampolined = 1;
1836 				/*
1837 				 * for peeks and caut_gets errors are expected
1838 				 */
1839 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1840 				if (!hp)
1841 					expected = DDI_FM_ERR_PEEK;
1842 				else if (hp->ah_acc.devacc_attr_access ==
1843 				    DDI_CAUTIOUS_ACC)
1844 					expected = DDI_FM_ERR_EXPECTED;
1845 			}
1846 
1847 		} else if (curthread->t_lofault) {
1848 			aflt->flt_prot = AFLT_PROT_COPY;
1849 			rp->r_g1 = EFAULT;
1850 			rp->r_pc = curthread->t_lofault;
1851 			rp->r_npc = rp->r_pc + 4;
1852 			trampolined = 1;
1853 		}
1854 	}
1855 
1856 	/*
1857 	 * If we're in user mode or we're doing a protected copy, we either
1858 	 * want the ASTON code below to send a signal to the user process
1859 	 * or we want to panic if aft_panic is set.
1860 	 *
1861 	 * If we're in privileged mode and we're not doing a copy, then we
1862 	 * need to check if we've trampolined.  If we haven't trampolined,
1863 	 * we should panic.
1864 	 */
1865 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1866 		if (t_afsr_errs &
1867 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1868 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1869 			aflt->flt_panic |= aft_panic;
1870 	} else if (!trampolined) {
1871 			aflt->flt_panic = 1;
1872 	}
1873 
1874 	/*
1875 	 * If we've trampolined due to a privileged TO or BERR, or if an
1876 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1877 	 * event for that TO or BERR.  Queue all other events (if any) besides
1878 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1879 	 * ignore the number of events queued.  If we haven't trampolined due
1880 	 * to a TO or BERR, just enqueue events normally.
1881 	 */
1882 	log_afsr = t_afsr_errs;
1883 	if (trampolined) {
1884 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1885 	} else if (!aflt->flt_priv) {
1886 		/*
1887 		 * User mode, suppress messages if
1888 		 * cpu_berr_to_verbose is not set.
1889 		 */
1890 		if (!cpu_berr_to_verbose)
1891 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1892 	}
1893 
1894 	/*
1895 	 * Log any errors that occurred
1896 	 */
1897 	if (((log_afsr &
1898 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1899 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1900 		(t_afsr_errs &
1901 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1902 		ch_flt.flt_type = CPU_INV_AFSR;
1903 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1904 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1905 		    aflt->flt_panic);
1906 	}
1907 
1908 	/*
1909 	 * Zero out + invalidate CPU logout.
1910 	 */
1911 	if (clop) {
1912 		bzero(clop, sizeof (ch_cpu_logout_t));
1913 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1914 	}
1915 
1916 #if defined(JALAPENO) || defined(SERRANO)
1917 	/*
1918 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1919 	 * IO errors that may have resulted in this trap.
1920 	 */
1921 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1922 		cpu_run_bus_error_handlers(aflt, expected);
1923 	}
1924 
1925 	/*
1926 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1927 	 * line from the Ecache.  We also need to query the bus nexus for
1928 	 * fatal errors.  Attempts to do diagnostic read on caches may
1929 	 * introduce more errors (especially when the module is bad).
1930 	 */
1931 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1932 		/*
1933 		 * Ask our bus nexus friends if they have any fatal errors.  If
1934 		 * so, they will log appropriate error messages.
1935 		 */
1936 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1937 			aflt->flt_panic = 1;
1938 
1939 		/*
1940 		 * We got a UE or RUE and are panicking, save the fault PA in
1941 		 * a known location so that the platform specific panic code
1942 		 * can check for copyback errors.
1943 		 */
1944 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1945 			panic_aflt = *aflt;
1946 		}
1947 	}
1948 
1949 	/*
1950 	 * Flush Ecache line or entire Ecache
1951 	 */
1952 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1953 		cpu_error_ecache_flush(&ch_flt);
1954 #else /* JALAPENO || SERRANO */
1955 	/*
1956 	 * UE/BERR/TO: Call our bus nexus friends to check for
1957 	 * IO errors that may have resulted in this trap.
1958 	 */
1959 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1960 		cpu_run_bus_error_handlers(aflt, expected);
1961 	}
1962 
1963 	/*
1964 	 * UE: If the UE is in memory, we need to flush the bad
1965 	 * line from the Ecache.  We also need to query the bus nexus for
1966 	 * fatal errors.  Attempts to do diagnostic read on caches may
1967 	 * introduce more errors (especially when the module is bad).
1968 	 */
1969 	if (t_afsr & C_AFSR_UE) {
1970 		/*
1971 		 * Ask our legacy bus nexus friends if they have any fatal
1972 		 * errors.  If so, they will log appropriate error messages.
1973 		 */
1974 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1975 			aflt->flt_panic = 1;
1976 
1977 		/*
1978 		 * We got a UE and are panicking, save the fault PA in a known
1979 		 * location so that the platform specific panic code can check
1980 		 * for copyback errors.
1981 		 */
1982 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1983 			panic_aflt = *aflt;
1984 		}
1985 	}
1986 
1987 	/*
1988 	 * Flush Ecache line or entire Ecache
1989 	 */
1990 	if (t_afsr_errs &
1991 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1992 		cpu_error_ecache_flush(&ch_flt);
1993 #endif /* JALAPENO || SERRANO */
1994 
1995 	/*
1996 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1997 	 * or disrupting errors have happened.  We do this because if a
1998 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1999 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2000 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2001 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2002 	 * deferred or disrupting error happening between checking the AFSR and
2003 	 * enabling NCEEN/CEEN.
2004 	 *
2005 	 * Note: CEEN reenabled only if it was on when trap taken.
2006 	 */
2007 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2008 	if (clear_errors(&ch_flt)) {
2009 		/*
2010 		 * Check for secondary errors, and avoid panicking if we
2011 		 * have them
2012 		 */
2013 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2014 		    t_afar) == 0) {
2015 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2016 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2017 		}
2018 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2019 		    NULL);
2020 	}
2021 
2022 	/*
2023 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2024 	 * be logged as part of the panic flow.
2025 	 */
2026 	if (aflt->flt_panic)
2027 		fm_panic("%sError(s)", pr_reason);
2028 
2029 	/*
2030 	 * If we queued an error and we are going to return from the trap and
2031 	 * the error was in user mode or inside of a copy routine, set AST flag
2032 	 * so the queue will be drained before returning to user mode.  The
2033 	 * AST processing will also act on our failure policy.
2034 	 */
2035 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2036 		int pcb_flag = 0;
2037 
2038 		if (t_afsr_errs &
2039 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2040 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2041 			pcb_flag |= ASYNC_HWERR;
2042 
2043 		if (t_afsr & C_AFSR_BERR)
2044 			pcb_flag |= ASYNC_BERR;
2045 
2046 		if (t_afsr & C_AFSR_TO)
2047 			pcb_flag |= ASYNC_BTO;
2048 
2049 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2050 		aston(curthread);
2051 	}
2052 }
2053 
2054 #if defined(CPU_IMP_L1_CACHE_PARITY)
2055 /*
2056  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2057  *
2058  * For Panther, P$ data parity errors during floating point load hits
2059  * are also detected (reported as TT 0x71) and handled by this trap
2060  * handler.
2061  *
2062  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2063  * is available.
2064  */
2065 /*ARGSUSED*/
2066 void
2067 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2068 {
2069 	ch_async_flt_t ch_flt;
2070 	struct async_flt *aflt;
2071 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2072 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2073 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2074 	char *error_class;
2075 
2076 	/*
2077 	 * Log the error.
2078 	 * For icache parity errors the fault address is the trap PC.
2079 	 * For dcache/pcache parity errors the instruction would have to
2080 	 * be decoded to determine the address and that isn't possible
2081 	 * at high PIL.
2082 	 */
2083 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2084 	aflt = (struct async_flt *)&ch_flt;
2085 	aflt->flt_id = gethrtime_waitfree();
2086 	aflt->flt_bus_id = getprocessorid();
2087 	aflt->flt_inst = CPU->cpu_id;
2088 	aflt->flt_pc = tpc;
2089 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2090 	aflt->flt_prot = AFLT_PROT_NONE;
2091 	aflt->flt_class = CPU_FAULT;
2092 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2093 	aflt->flt_tl = tl;
2094 	aflt->flt_panic = panic;
2095 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2096 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2097 
2098 	if (iparity) {
2099 		cpu_icache_parity_info(&ch_flt);
2100 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2101 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2102 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2103 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2104 		else
2105 			error_class = FM_EREPORT_CPU_USIII_IPE;
2106 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2107 	} else {
2108 		cpu_dcache_parity_info(&ch_flt);
2109 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2110 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2111 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2112 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2113 		else
2114 			error_class = FM_EREPORT_CPU_USIII_DPE;
2115 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2116 		/*
2117 		 * For panther we also need to check the P$ for parity errors.
2118 		 */
2119 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2120 			cpu_pcache_parity_info(&ch_flt);
2121 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2122 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2123 				aflt->flt_payload =
2124 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2125 			}
2126 		}
2127 	}
2128 
2129 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2130 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2131 
2132 	if (iparity) {
2133 		/*
2134 		 * Invalidate entire I$.
2135 		 * This is required due to the use of diagnostic ASI
2136 		 * accesses that may result in a loss of I$ coherency.
2137 		 */
2138 		if (cache_boot_state & DCU_IC) {
2139 			flush_icache();
2140 		}
2141 		/*
2142 		 * According to section P.3.1 of the Panther PRM, we
2143 		 * need to do a little more for recovery on those
2144 		 * CPUs after encountering an I$ parity error.
2145 		 */
2146 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2147 			flush_ipb();
2148 			correct_dcache_parity(dcache_size,
2149 			    dcache_linesize);
2150 			flush_pcache();
2151 		}
2152 	} else {
2153 		/*
2154 		 * Since the valid bit is ignored when checking parity the
2155 		 * D$ data and tag must also be corrected.  Set D$ data bits
2156 		 * to zero and set utag to 0, 1, 2, 3.
2157 		 */
2158 		correct_dcache_parity(dcache_size, dcache_linesize);
2159 
2160 		/*
2161 		 * According to section P.3.3 of the Panther PRM, we
2162 		 * need to do a little more for recovery on those
2163 		 * CPUs after encountering a D$ or P$ parity error.
2164 		 *
2165 		 * As far as clearing P$ parity errors, it is enough to
2166 		 * simply invalidate all entries in the P$ since P$ parity
2167 		 * error traps are only generated for floating point load
2168 		 * hits.
2169 		 */
2170 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2171 			flush_icache();
2172 			flush_ipb();
2173 			flush_pcache();
2174 		}
2175 	}
2176 
2177 	/*
2178 	 * Invalidate entire D$ if it was enabled.
2179 	 * This is done to avoid stale data in the D$ which might
2180 	 * occur with the D$ disabled and the trap handler doing
2181 	 * stores affecting lines already in the D$.
2182 	 */
2183 	if (cache_boot_state & DCU_DC) {
2184 		flush_dcache();
2185 	}
2186 
2187 	/*
2188 	 * Restore caches to their bootup state.
2189 	 */
2190 	set_dcu(get_dcu() | cache_boot_state);
2191 
2192 	/*
2193 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2194 	 * be logged as part of the panic flow.
2195 	 */
2196 	if (aflt->flt_panic)
2197 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2198 
2199 	/*
2200 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2201 	 * the chance of getting an unrecoverable Fast ECC error.  This
2202 	 * flush will evict the part of the parity trap handler that is run
2203 	 * at TL>1.
2204 	 */
2205 	if (tl) {
2206 		cpu_flush_ecache();
2207 	}
2208 }
2209 
2210 /*
2211  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2212  * to indicate which portions of the captured data should be in the ereport.
2213  */
2214 void
2215 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2216 {
2217 	int way = ch_flt->parity_data.ipe.cpl_way;
2218 	int offset = ch_flt->parity_data.ipe.cpl_off;
2219 	int tag_index;
2220 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2221 
2222 
2223 	if ((offset != -1) || (way != -1)) {
2224 		/*
2225 		 * Parity error in I$ tag or data
2226 		 */
2227 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2228 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2229 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2230 			    PN_ICIDX_TO_WAY(tag_index);
2231 		else
2232 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2233 			    CH_ICIDX_TO_WAY(tag_index);
2234 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2235 		    IC_LOGFLAG_MAGIC;
2236 	} else {
2237 		/*
2238 		 * Parity error was not identified.
2239 		 * Log tags and data for all ways.
2240 		 */
2241 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2242 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2243 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2244 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2245 				    PN_ICIDX_TO_WAY(tag_index);
2246 			else
2247 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2248 				    CH_ICIDX_TO_WAY(tag_index);
2249 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2250 			    IC_LOGFLAG_MAGIC;
2251 		}
2252 	}
2253 }
2254 
2255 /*
2256  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2257  * to indicate which portions of the captured data should be in the ereport.
2258  */
2259 void
2260 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2261 {
2262 	int way = ch_flt->parity_data.dpe.cpl_way;
2263 	int offset = ch_flt->parity_data.dpe.cpl_off;
2264 	int tag_index;
2265 
2266 	if (offset != -1) {
2267 		/*
2268 		 * Parity error in D$ or P$ data array.
2269 		 *
2270 		 * First check to see whether the parity error is in D$ or P$
2271 		 * since P$ data parity errors are reported in Panther using
2272 		 * the same trap.
2273 		 */
2274 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2275 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2276 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2277 			    CH_PCIDX_TO_WAY(tag_index);
2278 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2279 			    PC_LOGFLAG_MAGIC;
2280 		} else {
2281 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2282 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2283 			    CH_DCIDX_TO_WAY(tag_index);
2284 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2285 			    DC_LOGFLAG_MAGIC;
2286 		}
2287 	} else if (way != -1) {
2288 		/*
2289 		 * Parity error in D$ tag.
2290 		 */
2291 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2292 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2293 		    CH_DCIDX_TO_WAY(tag_index);
2294 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2295 		    DC_LOGFLAG_MAGIC;
2296 	}
2297 }
2298 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2299 
2300 /*
2301  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2302  * post-process CPU events that are dequeued.  As such, it can be invoked
2303  * from softint context, from AST processing in the trap() flow, or from the
2304  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2305  * Historically this entry point was used to log the actual cmn_err(9F) text;
2306  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2307  * With FMA this function now also returns a flag which indicates to the
2308  * caller whether the ereport should be posted (1) or suppressed (0).
2309  */
2310 static int
2311 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2312 {
2313 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2314 	struct async_flt *aflt = (struct async_flt *)flt;
2315 	uint64_t errors;
2316 	extern void memscrub_induced_error(void);
2317 
2318 	switch (ch_flt->flt_type) {
2319 	case CPU_INV_AFSR:
2320 		/*
2321 		 * If it is a disrupting trap and the AFSR is zero, then
2322 		 * the event has probably already been noted. Do not post
2323 		 * an ereport.
2324 		 */
2325 		if ((aflt->flt_status & ECC_C_TRAP) &&
2326 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2327 			return (0);
2328 		else
2329 			return (1);
2330 	case CPU_TO:
2331 	case CPU_BERR:
2332 	case CPU_FATAL:
2333 	case CPU_FPUERR:
2334 		return (1);
2335 
2336 	case CPU_UE_ECACHE_RETIRE:
2337 		cpu_log_err(aflt);
2338 		cpu_page_retire(ch_flt);
2339 		return (1);
2340 
2341 	/*
2342 	 * Cases where we may want to suppress logging or perform
2343 	 * extended diagnostics.
2344 	 */
2345 	case CPU_CE:
2346 	case CPU_EMC:
2347 		/*
2348 		 * We want to skip logging and further classification
2349 		 * only if ALL the following conditions are true:
2350 		 *
2351 		 *	1. There is only one error
2352 		 *	2. That error is a correctable memory error
2353 		 *	3. The error is caused by the memory scrubber (in
2354 		 *	   which case the error will have occurred under
2355 		 *	   on_trap protection)
2356 		 *	4. The error is on a retired page
2357 		 *
2358 		 * Note: AFLT_PROT_EC is used places other than the memory
2359 		 * scrubber.  However, none of those errors should occur
2360 		 * on a retired page.
2361 		 */
2362 		if ((ch_flt->afsr_errs &
2363 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2364 		    aflt->flt_prot == AFLT_PROT_EC) {
2365 
2366 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2367 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2368 
2369 				/*
2370 				 * Since we're skipping logging, we'll need
2371 				 * to schedule the re-enabling of CEEN
2372 				 */
2373 				(void) timeout(cpu_delayed_check_ce_errors,
2374 				    (void *)(uintptr_t)aflt->flt_inst,
2375 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2376 						 * MICROSEC));
2377 			    }
2378 				/*
2379 				 * Inform memscrubber - scrubbing induced
2380 				 * CE on a retired page.
2381 				 */
2382 				memscrub_induced_error();
2383 				return (0);
2384 			}
2385 		}
2386 
2387 		/*
2388 		 * Perform/schedule further classification actions, but
2389 		 * only if the page is healthy (we don't want bad
2390 		 * pages inducing too much diagnostic activity).  If we could
2391 		 * not find a page pointer then we also skip this.  If
2392 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2393 		 * to copy and recirculate the event (for further diagnostics)
2394 		 * and we should not proceed to log it here.
2395 		 *
2396 		 * This must be the last step here before the cpu_log_err()
2397 		 * below - if an event recirculates cpu_ce_log_err() will
2398 		 * not call the current function but just proceed directly
2399 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2400 		 *
2401 		 * Note: Check cpu_impl_async_log_err if changing this
2402 		 */
2403 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2404 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2405 			    CE_XDIAG_SKIP_NOPP);
2406 		} else {
2407 			if (errors != PR_OK) {
2408 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2409 				    CE_XDIAG_SKIP_PAGEDET);
2410 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2411 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2412 				return (0);
2413 			}
2414 		}
2415 		/*FALLTHRU*/
2416 
2417 	/*
2418 	 * Cases where we just want to report the error and continue.
2419 	 */
2420 	case CPU_CE_ECACHE:
2421 	case CPU_UE_ECACHE:
2422 	case CPU_IV:
2423 	case CPU_ORPH:
2424 		cpu_log_err(aflt);
2425 		return (1);
2426 
2427 	/*
2428 	 * Cases where we want to fall through to handle panicking.
2429 	 */
2430 	case CPU_UE:
2431 		/*
2432 		 * We want to skip logging in the same conditions as the
2433 		 * CE case.  In addition, we want to make sure we're not
2434 		 * panicking.
2435 		 */
2436 		if (!panicstr && (ch_flt->afsr_errs &
2437 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2438 		    aflt->flt_prot == AFLT_PROT_EC) {
2439 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2440 				/* Zero the address to clear the error */
2441 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2442 				/*
2443 				 * Inform memscrubber - scrubbing induced
2444 				 * UE on a retired page.
2445 				 */
2446 				memscrub_induced_error();
2447 				return (0);
2448 			}
2449 		}
2450 		cpu_log_err(aflt);
2451 		break;
2452 
2453 	default:
2454 		/*
2455 		 * If the us3_common.c code doesn't know the flt_type, it may
2456 		 * be an implementation-specific code.  Call into the impldep
2457 		 * backend to find out what to do: if it tells us to continue,
2458 		 * break and handle as if falling through from a UE; if not,
2459 		 * the impldep backend has handled the error and we're done.
2460 		 */
2461 		switch (cpu_impl_async_log_err(flt, eqep)) {
2462 		case CH_ASYNC_LOG_DONE:
2463 			return (1);
2464 		case CH_ASYNC_LOG_RECIRC:
2465 			return (0);
2466 		case CH_ASYNC_LOG_CONTINUE:
2467 			break; /* continue on to handle UE-like error */
2468 		default:
2469 			cmn_err(CE_WARN, "discarding error 0x%p with "
2470 			    "invalid fault type (0x%x)",
2471 			    (void *)aflt, ch_flt->flt_type);
2472 			return (0);
2473 		}
2474 	}
2475 
2476 	/* ... fall through from the UE case */
2477 
2478 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2479 		if (!panicstr) {
2480 			cpu_page_retire(ch_flt);
2481 		} else {
2482 			/*
2483 			 * Clear UEs on panic so that we don't
2484 			 * get haunted by them during panic or
2485 			 * after reboot
2486 			 */
2487 			cpu_clearphys(aflt);
2488 			(void) clear_errors(NULL);
2489 		}
2490 	}
2491 
2492 	return (1);
2493 }
2494 
2495 /*
2496  * Retire the bad page that may contain the flushed error.
2497  */
2498 void
2499 cpu_page_retire(ch_async_flt_t *ch_flt)
2500 {
2501 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2502 	(void) page_retire(aflt->flt_addr, PR_UE);
2503 }
2504 
2505 /*
2506  * Return true if the error specified in the AFSR indicates
2507  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2508  * for Panther, none for Jalapeno/Serrano).
2509  */
2510 /* ARGSUSED */
2511 static int
2512 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2513 {
2514 #if defined(JALAPENO) || defined(SERRANO)
2515 	return (0);
2516 #elif defined(CHEETAH_PLUS)
2517 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2518 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2519 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2520 #else	/* CHEETAH_PLUS */
2521 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2522 #endif
2523 }
2524 
2525 /*
2526  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2527  * generic event post-processing for correctable and uncorrectable memory,
2528  * E$, and MTag errors.  Historically this entry point was used to log bits of
2529  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2530  * converted into an ereport.  In addition, it transmits the error to any
2531  * platform-specific service-processor FRU logging routines, if available.
2532  */
2533 void
2534 cpu_log_err(struct async_flt *aflt)
2535 {
2536 	char unum[UNUM_NAMLEN];
2537 	int synd_status, synd_code, afar_status;
2538 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2539 
2540 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2541 		aflt->flt_status |= ECC_ECACHE;
2542 	else
2543 		aflt->flt_status &= ~ECC_ECACHE;
2544 	/*
2545 	 * Determine syndrome status.
2546 	 */
2547 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2548 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2549 
2550 	/*
2551 	 * Determine afar status.
2552 	 */
2553 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2554 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2555 				ch_flt->flt_bit);
2556 	else
2557 		afar_status = AFLT_STAT_INVALID;
2558 
2559 	synd_code = synd_to_synd_code(synd_status,
2560 	    aflt->flt_synd, ch_flt->flt_bit);
2561 
2562 	/*
2563 	 * If afar status is not invalid do a unum lookup.
2564 	 */
2565 	if (afar_status != AFLT_STAT_INVALID) {
2566 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2567 	} else {
2568 		unum[0] = '\0';
2569 	}
2570 
2571 	/*
2572 	 * Do not send the fruid message (plat_ecc_error_data_t)
2573 	 * to the SC if it can handle the enhanced error information
2574 	 * (plat_ecc_error2_data_t) or when the tunable
2575 	 * ecc_log_fruid_enable is set to 0.
2576 	 */
2577 
2578 	if (&plat_ecc_capability_sc_get &&
2579 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2580 		if (&plat_log_fruid_error)
2581 			plat_log_fruid_error(synd_code, aflt, unum,
2582 			    ch_flt->flt_bit);
2583 	}
2584 
2585 	if (aflt->flt_func != NULL)
2586 		aflt->flt_func(aflt, unum);
2587 
2588 	if (afar_status != AFLT_STAT_INVALID)
2589 		cpu_log_diag_info(ch_flt);
2590 
2591 	/*
2592 	 * If we have a CEEN error , we do not reenable CEEN until after
2593 	 * we exit the trap handler. Otherwise, another error may
2594 	 * occur causing the handler to be entered recursively.
2595 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2596 	 * to try and ensure that the CPU makes progress in the face
2597 	 * of a CE storm.
2598 	 */
2599 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2600 		(void) timeout(cpu_delayed_check_ce_errors,
2601 		    (void *)(uintptr_t)aflt->flt_inst,
2602 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2603 	}
2604 }
2605 
2606 /*
2607  * Invoked by error_init() early in startup and therefore before
2608  * startup_errorq() is called to drain any error Q -
2609  *
2610  * startup()
2611  *   startup_end()
2612  *     error_init()
2613  *       cpu_error_init()
2614  * errorq_init()
2615  *   errorq_drain()
2616  * start_other_cpus()
2617  *
2618  * The purpose of this routine is to create error-related taskqs.  Taskqs
2619  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2620  * context.
2621  */
2622 void
2623 cpu_error_init(int items)
2624 {
2625 	/*
2626 	 * Create taskq(s) to reenable CE
2627 	 */
2628 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2629 	    items, items, TASKQ_PREPOPULATE);
2630 }
2631 
2632 void
2633 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2634 {
2635 	char unum[UNUM_NAMLEN];
2636 	int len;
2637 
2638 	switch (aflt->flt_class) {
2639 	case CPU_FAULT:
2640 		cpu_ereport_init(aflt);
2641 		if (cpu_async_log_err(aflt, eqep))
2642 			cpu_ereport_post(aflt);
2643 		break;
2644 
2645 	case BUS_FAULT:
2646 		if (aflt->flt_func != NULL) {
2647 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2648 			    unum, UNUM_NAMLEN, &len);
2649 			aflt->flt_func(aflt, unum);
2650 		}
2651 		break;
2652 
2653 	case RECIRC_CPU_FAULT:
2654 		aflt->flt_class = CPU_FAULT;
2655 		cpu_log_err(aflt);
2656 		cpu_ereport_post(aflt);
2657 		break;
2658 
2659 	case RECIRC_BUS_FAULT:
2660 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2661 		/*FALLTHRU*/
2662 	default:
2663 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2664 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2665 		return;
2666 	}
2667 }
2668 
2669 /*
2670  * Scrub and classify a CE.  This function must not modify the
2671  * fault structure passed to it but instead should return the classification
2672  * information.
2673  */
2674 
2675 static uchar_t
2676 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2677 {
2678 	uchar_t disp = CE_XDIAG_EXTALG;
2679 	on_trap_data_t otd;
2680 	uint64_t orig_err;
2681 	ch_cpu_logout_t *clop;
2682 
2683 	/*
2684 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2685 	 * this, but our other callers have not.  Disable preemption to
2686 	 * avoid CPU migration so that we restore CEEN on the correct
2687 	 * cpu later.
2688 	 *
2689 	 * CEEN is cleared so that further CEs that our instruction and
2690 	 * data footprint induce do not cause use to either creep down
2691 	 * kernel stack to the point of overflow, or do so much CE
2692 	 * notification as to make little real forward progress.
2693 	 *
2694 	 * NCEEN must not be cleared.  However it is possible that
2695 	 * our accesses to the flt_addr may provoke a bus error or timeout
2696 	 * if the offending address has just been unconfigured as part of
2697 	 * a DR action.  So we must operate under on_trap protection.
2698 	 */
2699 	kpreempt_disable();
2700 	orig_err = get_error_enable();
2701 	if (orig_err & EN_REG_CEEN)
2702 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2703 
2704 	/*
2705 	 * Our classification algorithm includes the line state before
2706 	 * the scrub; we'd like this captured after the detection and
2707 	 * before the algorithm below - the earlier the better.
2708 	 *
2709 	 * If we've come from a cpu CE trap then this info already exists
2710 	 * in the cpu logout area.
2711 	 *
2712 	 * For a CE detected by memscrub for which there was no trap
2713 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2714 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2715 	 * marked the fault structure as incomplete as a flag to later
2716 	 * logging code.
2717 	 *
2718 	 * If called directly from an IO detected CE there has been
2719 	 * no line data capture.  In this case we logout to the cpu logout
2720 	 * area - that's appropriate since it's the cpu cache data we need
2721 	 * for classification.  We thus borrow the cpu logout area for a
2722 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2723 	 * this time (we will invalidate it again below).
2724 	 *
2725 	 * If called from the partner check xcall handler then this cpu
2726 	 * (the partner) has not necessarily experienced a CE at this
2727 	 * address.  But we want to capture line state before its scrub
2728 	 * attempt since we use that in our classification.
2729 	 */
2730 	if (logout_tried == B_FALSE) {
2731 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2732 			disp |= CE_XDIAG_NOLOGOUT;
2733 	}
2734 
2735 	/*
2736 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2737 	 * no longer be valid (if DR'd since the initial event) so we
2738 	 * perform this scrub under on_trap protection.  If this access is
2739 	 * ok then further accesses below will also be ok - DR cannot
2740 	 * proceed while this thread is active (preemption is disabled);
2741 	 * to be safe we'll nonetheless use on_trap again below.
2742 	 */
2743 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2744 		cpu_scrubphys(ecc);
2745 	} else {
2746 		no_trap();
2747 		if (orig_err & EN_REG_CEEN)
2748 		    set_error_enable(orig_err);
2749 		kpreempt_enable();
2750 		return (disp);
2751 	}
2752 	no_trap();
2753 
2754 	/*
2755 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2756 	 * Note that it's quite possible that the read sourced the data from
2757 	 * another cpu.
2758 	 */
2759 	if (clear_ecc(ecc))
2760 		disp |= CE_XDIAG_CE1;
2761 
2762 	/*
2763 	 * Read the data again.  This time the read is very likely to
2764 	 * come from memory since the scrub induced a writeback to memory.
2765 	 */
2766 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2767 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2768 	} else {
2769 		no_trap();
2770 		if (orig_err & EN_REG_CEEN)
2771 		    set_error_enable(orig_err);
2772 		kpreempt_enable();
2773 		return (disp);
2774 	}
2775 	no_trap();
2776 
2777 	/* Did that read induce a CE that matches the AFAR? */
2778 	if (clear_ecc(ecc))
2779 		disp |= CE_XDIAG_CE2;
2780 
2781 	/*
2782 	 * Look at the logout information and record whether we found the
2783 	 * line in l2/l3 cache.  For Panther we are interested in whether
2784 	 * we found it in either cache (it won't reside in both but
2785 	 * it is possible to read it that way given the moving target).
2786 	 */
2787 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2788 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2789 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2790 		int hit, level;
2791 		int state;
2792 		int totalsize;
2793 		ch_ec_data_t *ecp;
2794 
2795 		/*
2796 		 * If hit is nonzero then a match was found and hit will
2797 		 * be one greater than the index which hit.  For Panther we
2798 		 * also need to pay attention to level to see which of l2$ or
2799 		 * l3$ it hit in.
2800 		 */
2801 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2802 		    0, &level);
2803 
2804 		if (hit) {
2805 			--hit;
2806 			disp |= CE_XDIAG_AFARMATCH;
2807 
2808 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2809 				if (level == 2)
2810 					ecp = &clop->clo_data.chd_l2_data[hit];
2811 				else
2812 					ecp = &clop->clo_data.chd_ec_data[hit];
2813 			} else {
2814 				ASSERT(level == 2);
2815 				ecp = &clop->clo_data.chd_ec_data[hit];
2816 			}
2817 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2818 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2819 			    ecc->flt_addr, ecp->ec_tag);
2820 
2821 			/*
2822 			 * Cheetah variants use different state encodings -
2823 			 * the CH_ECSTATE_* defines vary depending on the
2824 			 * module we're compiled for.  Translate into our
2825 			 * one true version.  Conflate Owner-Shared state
2826 			 * of SSM mode with Owner as victimisation of such
2827 			 * lines may cause a writeback.
2828 			 */
2829 			switch (state) {
2830 			case CH_ECSTATE_MOD:
2831 				disp |= EC_STATE_M;
2832 				break;
2833 
2834 			case CH_ECSTATE_OWN:
2835 			case CH_ECSTATE_OWS:
2836 				disp |= EC_STATE_O;
2837 				break;
2838 
2839 			case CH_ECSTATE_EXL:
2840 				disp |= EC_STATE_E;
2841 				break;
2842 
2843 			case CH_ECSTATE_SHR:
2844 				disp |= EC_STATE_S;
2845 				break;
2846 
2847 			default:
2848 				disp |= EC_STATE_I;
2849 				break;
2850 			}
2851 		}
2852 
2853 		/*
2854 		 * If we initiated the delayed logout then we are responsible
2855 		 * for invalidating the logout area.
2856 		 */
2857 		if (logout_tried == B_FALSE) {
2858 			bzero(clop, sizeof (ch_cpu_logout_t));
2859 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2860 		}
2861 	}
2862 
2863 	/*
2864 	 * Re-enable CEEN if we turned it off.
2865 	 */
2866 	if (orig_err & EN_REG_CEEN)
2867 	    set_error_enable(orig_err);
2868 	kpreempt_enable();
2869 
2870 	return (disp);
2871 }
2872 
2873 /*
2874  * Scrub a correctable memory error and collect data for classification
2875  * of CE type.  This function is called in the detection path, ie tl0 handling
2876  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2877  */
2878 void
2879 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2880 {
2881 	/*
2882 	 * Cheetah CE classification does not set any bits in flt_status.
2883 	 * Instead we will record classification datapoints in flt_disp.
2884 	 */
2885 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2886 
2887 	/*
2888 	 * To check if the error detected by IO is persistent, sticky or
2889 	 * intermittent.  This is noticed by clear_ecc().
2890 	 */
2891 	if (ecc->flt_status & ECC_IOBUS)
2892 		ecc->flt_stat = C_AFSR_MEMORY;
2893 
2894 	/*
2895 	 * Record information from this first part of the algorithm in
2896 	 * flt_disp.
2897 	 */
2898 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2899 }
2900 
2901 /*
2902  * Select a partner to perform a further CE classification check from.
2903  * Must be called with kernel preemption disabled (to stop the cpu list
2904  * from changing).  The detecting cpu we are partnering has cpuid
2905  * aflt->flt_inst; we might not be running on the detecting cpu.
2906  *
2907  * Restrict choice to active cpus in the same cpu partition as ourselves in
2908  * an effort to stop bad cpus in one partition causing other partitions to
2909  * perform excessive diagnostic activity.  Actually since the errorq drain
2910  * is run from a softint most of the time and that is a global mechanism
2911  * this isolation is only partial.  Return NULL if we fail to find a
2912  * suitable partner.
2913  *
2914  * We prefer a partner that is in a different latency group to ourselves as
2915  * we will share fewer datapaths.  If such a partner is unavailable then
2916  * choose one in the same lgroup but prefer a different chip and only allow
2917  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2918  * flags includes PTNR_SELFOK then permit selection of the original detector.
2919  *
2920  * We keep a cache of the last partner selected for a cpu, and we'll try to
2921  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2922  * have passed since that selection was made.  This provides the benefit
2923  * of the point-of-view of different partners over time but without
2924  * requiring frequent cpu list traversals.
2925  */
2926 
2927 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2928 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2929 
2930 static cpu_t *
2931 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2932 {
2933 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2934 	hrtime_t lasttime, thistime;
2935 
2936 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2937 
2938 	dtcr = cpu[aflt->flt_inst];
2939 
2940 	/*
2941 	 * Short-circuit for the following cases:
2942 	 *	. the dtcr is not flagged active
2943 	 *	. there is just one cpu present
2944 	 *	. the detector has disappeared
2945 	 *	. we were given a bad flt_inst cpuid; this should not happen
2946 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2947 	 *	  reason to panic.
2948 	 *	. there is just one cpu left online in the cpu partition
2949 	 *
2950 	 * If we return NULL after this point then we do not update the
2951 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2952 	 * again next time; this is the case where the only other cpu online
2953 	 * in the detector's partition is on the same chip as the detector
2954 	 * and since CEEN re-enable is throttled even that case should not
2955 	 * hurt performance.
2956 	 */
2957 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2958 		return (NULL);
2959 	}
2960 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2961 		if (flags & PTNR_SELFOK) {
2962 			*typep = CE_XDIAG_PTNR_SELF;
2963 			return (dtcr);
2964 		} else {
2965 			return (NULL);
2966 		}
2967 	}
2968 
2969 	thistime = gethrtime();
2970 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2971 
2972 	/*
2973 	 * Select a starting point.
2974 	 */
2975 	if (!lasttime) {
2976 		/*
2977 		 * We've never selected a partner for this detector before.
2978 		 * Start the scan at the next online cpu in the same cpu
2979 		 * partition.
2980 		 */
2981 		sp = dtcr->cpu_next_part;
2982 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2983 		/*
2984 		 * Our last selection has not aged yet.  If this partner:
2985 		 *	. is still a valid cpu,
2986 		 *	. is still in the same partition as the detector
2987 		 *	. is still marked active
2988 		 *	. satisfies the 'flags' argument criteria
2989 		 * then select it again without updating the timestamp.
2990 		 */
2991 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2992 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2993 		    !cpu_flagged_active(sp->cpu_flags) ||
2994 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2995 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
2996 		    !(flags & PTNR_SIBLINGOK))) {
2997 			sp = dtcr->cpu_next_part;
2998 		} else {
2999 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3000 				*typep = CE_XDIAG_PTNR_REMOTE;
3001 			} else if (sp == dtcr) {
3002 				*typep = CE_XDIAG_PTNR_SELF;
3003 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3004 				*typep = CE_XDIAG_PTNR_SIBLING;
3005 			} else {
3006 				*typep = CE_XDIAG_PTNR_LOCAL;
3007 			}
3008 			return (sp);
3009 		}
3010 	} else {
3011 		/*
3012 		 * Our last selection has aged.  If it is nonetheless still a
3013 		 * valid cpu then start the scan at the next cpu in the
3014 		 * partition after our last partner.  If the last selection
3015 		 * is no longer a valid cpu then go with our default.  In
3016 		 * this way we slowly cycle through possible partners to
3017 		 * obtain multiple viewpoints over time.
3018 		 */
3019 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3020 		if (sp == NULL) {
3021 			sp = dtcr->cpu_next_part;
3022 		} else {
3023 			sp = sp->cpu_next_part;		/* may be dtcr */
3024 			if (sp->cpu_part != dtcr->cpu_part)
3025 				sp = dtcr;
3026 		}
3027 	}
3028 
3029 	/*
3030 	 * We have a proposed starting point for our search, but if this
3031 	 * cpu is offline then its cpu_next_part will point to itself
3032 	 * so we can't use that to iterate over cpus in this partition in
3033 	 * the loop below.  We still want to avoid iterating over cpus not
3034 	 * in our partition, so in the case that our starting point is offline
3035 	 * we will repoint it to be the detector itself;  and if the detector
3036 	 * happens to be offline we'll return NULL from the following loop.
3037 	 */
3038 	if (!cpu_flagged_active(sp->cpu_flags)) {
3039 		sp = dtcr;
3040 	}
3041 
3042 	ptnr = sp;
3043 	locptnr = NULL;
3044 	sibptnr = NULL;
3045 	do {
3046 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3047 			continue;
3048 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3049 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3050 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3051 			*typep = CE_XDIAG_PTNR_REMOTE;
3052 			return (ptnr);
3053 		}
3054 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3055 			if (sibptnr == NULL)
3056 				sibptnr = ptnr;
3057 			continue;
3058 		}
3059 		if (locptnr == NULL)
3060 			locptnr = ptnr;
3061 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3062 
3063 	/*
3064 	 * A foreign partner has already been returned if one was available.
3065 	 *
3066 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3067 	 * detector, is active, and is not a sibling of the detector.
3068 	 *
3069 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3070 	 * active.
3071 	 *
3072 	 * If we have to resort to using the detector itself we have already
3073 	 * checked that it is active.
3074 	 */
3075 	if (locptnr) {
3076 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3077 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3078 		*typep = CE_XDIAG_PTNR_LOCAL;
3079 		return (locptnr);
3080 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3081 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3082 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3083 		*typep = CE_XDIAG_PTNR_SIBLING;
3084 		return (sibptnr);
3085 	} else if (flags & PTNR_SELFOK) {
3086 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3087 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3088 		*typep = CE_XDIAG_PTNR_SELF;
3089 		return (dtcr);
3090 	}
3091 
3092 	return (NULL);
3093 }
3094 
3095 /*
3096  * Cross call handler that is requested to run on the designated partner of
3097  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3098  */
3099 static void
3100 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3101 {
3102 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3103 }
3104 
3105 /*
3106  * The associated errorqs are never destroyed so we do not need to deal with
3107  * them disappearing before this timeout fires.  If the affected memory
3108  * has been DR'd out since the original event the scrub algrithm will catch
3109  * any errors and return null disposition info.  If the original detecting
3110  * cpu has been DR'd out then ereport detector info will not be able to
3111  * lookup CPU type;  with a small timeout this is unlikely.
3112  */
3113 static void
3114 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3115 {
3116 	struct async_flt *aflt = cbarg->lkycb_aflt;
3117 	uchar_t disp;
3118 	cpu_t *cp;
3119 	int ptnrtype;
3120 
3121 	kpreempt_disable();
3122 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3123 	    &ptnrtype)) {
3124 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3125 		    (uint64_t)&disp);
3126 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3127 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3128 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3129 	} else {
3130 		ce_xdiag_lkydrops++;
3131 		if (ncpus > 1)
3132 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3133 			    CE_XDIAG_SKIP_NOPTNR);
3134 	}
3135 	kpreempt_enable();
3136 
3137 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3138 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3139 }
3140 
3141 /*
3142  * Called from errorq drain code when processing a CE error, both from
3143  * CPU and PCI drain functions.  Decide what further classification actions,
3144  * if any, we will perform.  Perform immediate actions now, and schedule
3145  * delayed actions as required.  Note that we are no longer necessarily running
3146  * on the detecting cpu, and that the async_flt structure will not persist on
3147  * return from this function.
3148  *
3149  * Calls to this function should aim to be self-throtlling in some way.  With
3150  * the delayed re-enable of CEEN the absolute rate of calls should not
3151  * be excessive.  Callers should also avoid performing in-depth classification
3152  * for events in pages that are already known to be suspect.
3153  *
3154  * We return nonzero to indicate that the event has been copied and
3155  * recirculated for further testing.  The caller should not log the event
3156  * in this case - it will be logged when further test results are available.
3157  *
3158  * Our possible contexts are that of errorq_drain: below lock level or from
3159  * panic context.  We can assume that the cpu we are running on is online.
3160  */
3161 
3162 
3163 #ifdef DEBUG
3164 static int ce_xdiag_forceaction;
3165 #endif
3166 
3167 int
3168 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3169     errorq_elem_t *eqep, size_t afltoffset)
3170 {
3171 	ce_dispact_t dispact, action;
3172 	cpu_t *cp;
3173 	uchar_t dtcrinfo, disp;
3174 	int ptnrtype;
3175 
3176 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3177 		ce_xdiag_drops++;
3178 		return (0);
3179 	} else if (!aflt->flt_in_memory) {
3180 		ce_xdiag_drops++;
3181 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3182 		return (0);
3183 	}
3184 
3185 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3186 
3187 	/*
3188 	 * Some correctable events are not scrubbed/classified, such as those
3189 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3190 	 * initial detector classification go no further.
3191 	 */
3192 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3193 		ce_xdiag_drops++;
3194 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3195 		return (0);
3196 	}
3197 
3198 	dispact = CE_DISPACT(ce_disp_table,
3199 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3200 	    CE_XDIAG_STATE(dtcrinfo),
3201 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3202 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3203 
3204 
3205 	action = CE_ACT(dispact);	/* bad lookup caught below */
3206 #ifdef DEBUG
3207 	if (ce_xdiag_forceaction != 0)
3208 		action = ce_xdiag_forceaction;
3209 #endif
3210 
3211 	switch (action) {
3212 	case CE_ACT_LKYCHK: {
3213 		caddr_t ndata;
3214 		errorq_elem_t *neqep;
3215 		struct async_flt *ecc;
3216 		ce_lkychk_cb_t *cbargp;
3217 
3218 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3219 			ce_xdiag_lkydrops++;
3220 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3221 			    CE_XDIAG_SKIP_DUPFAIL);
3222 			break;
3223 		}
3224 		ecc = (struct async_flt *)(ndata + afltoffset);
3225 
3226 		ASSERT(ecc->flt_class == CPU_FAULT ||
3227 		    ecc->flt_class == BUS_FAULT);
3228 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3229 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3230 
3231 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3232 		cbargp->lkycb_aflt = ecc;
3233 		cbargp->lkycb_eqp = eqp;
3234 		cbargp->lkycb_eqep = neqep;
3235 
3236 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3237 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3238 		return (1);
3239 	}
3240 
3241 	case CE_ACT_PTNRCHK:
3242 		kpreempt_disable();	/* stop cpu list changing */
3243 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3244 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3245 			    (uint64_t)aflt, (uint64_t)&disp);
3246 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3247 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3248 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3249 		} else if (ncpus > 1) {
3250 			ce_xdiag_ptnrdrops++;
3251 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3252 			    CE_XDIAG_SKIP_NOPTNR);
3253 		} else {
3254 			ce_xdiag_ptnrdrops++;
3255 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3256 			    CE_XDIAG_SKIP_UNIPROC);
3257 		}
3258 		kpreempt_enable();
3259 		break;
3260 
3261 	case CE_ACT_DONE:
3262 		break;
3263 
3264 	case CE_ACT(CE_DISP_BAD):
3265 	default:
3266 #ifdef DEBUG
3267 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3268 #endif
3269 		ce_xdiag_bad++;
3270 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3271 		break;
3272 	}
3273 
3274 	return (0);
3275 }
3276 
3277 /*
3278  * We route all errors through a single switch statement.
3279  */
3280 void
3281 cpu_ue_log_err(struct async_flt *aflt)
3282 {
3283 	switch (aflt->flt_class) {
3284 	case CPU_FAULT:
3285 		cpu_ereport_init(aflt);
3286 		if (cpu_async_log_err(aflt, NULL))
3287 			cpu_ereport_post(aflt);
3288 		break;
3289 
3290 	case BUS_FAULT:
3291 		bus_async_log_err(aflt);
3292 		break;
3293 
3294 	default:
3295 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3296 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3297 		return;
3298 	}
3299 }
3300 
3301 /*
3302  * Routine for panic hook callback from panic_idle().
3303  */
3304 void
3305 cpu_async_panic_callb(void)
3306 {
3307 	ch_async_flt_t ch_flt;
3308 	struct async_flt *aflt;
3309 	ch_cpu_errors_t cpu_error_regs;
3310 	uint64_t afsr_errs;
3311 
3312 	get_cpu_error_state(&cpu_error_regs);
3313 
3314 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3315 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3316 
3317 	if (afsr_errs) {
3318 
3319 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3320 		aflt = (struct async_flt *)&ch_flt;
3321 		aflt->flt_id = gethrtime_waitfree();
3322 		aflt->flt_bus_id = getprocessorid();
3323 		aflt->flt_inst = CPU->cpu_id;
3324 		aflt->flt_stat = cpu_error_regs.afsr;
3325 		aflt->flt_addr = cpu_error_regs.afar;
3326 		aflt->flt_prot = AFLT_PROT_NONE;
3327 		aflt->flt_class = CPU_FAULT;
3328 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3329 		aflt->flt_panic = 1;
3330 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3331 		ch_flt.afsr_errs = afsr_errs;
3332 #if defined(SERRANO)
3333 		ch_flt.afar2 = cpu_error_regs.afar2;
3334 #endif	/* SERRANO */
3335 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3336 	}
3337 }
3338 
3339 /*
3340  * Routine to convert a syndrome into a syndrome code.
3341  */
3342 static int
3343 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3344 {
3345 	if (synd_status == AFLT_STAT_INVALID)
3346 		return (-1);
3347 
3348 	/*
3349 	 * Use the syndrome to index the appropriate syndrome table,
3350 	 * to get the code indicating which bit(s) is(are) bad.
3351 	 */
3352 	if (afsr_bit &
3353 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3354 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3355 #if defined(JALAPENO) || defined(SERRANO)
3356 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3357 				return (-1);
3358 			else
3359 				return (BPAR0 + synd);
3360 #else /* JALAPENO || SERRANO */
3361 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3362 				return (-1);
3363 			else
3364 				return (mtag_syndrome_tab[synd]);
3365 #endif /* JALAPENO || SERRANO */
3366 		} else {
3367 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3368 				return (-1);
3369 			else
3370 				return (ecc_syndrome_tab[synd]);
3371 		}
3372 	} else {
3373 		return (-1);
3374 	}
3375 }
3376 
3377 int
3378 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3379 {
3380 	if (&plat_get_mem_sid)
3381 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3382 	else
3383 		return (ENOTSUP);
3384 }
3385 
3386 int
3387 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3388 {
3389 	if (&plat_get_mem_offset)
3390 		return (plat_get_mem_offset(flt_addr, offp));
3391 	else
3392 		return (ENOTSUP);
3393 }
3394 
3395 int
3396 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3397 {
3398 	if (&plat_get_mem_addr)
3399 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3400 	else
3401 		return (ENOTSUP);
3402 }
3403 
3404 /*
3405  * Routine to return a string identifying the physical name
3406  * associated with a memory/cache error.
3407  */
3408 int
3409 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3410     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3411     ushort_t flt_status, char *buf, int buflen, int *lenp)
3412 {
3413 	int synd_code;
3414 	int ret;
3415 
3416 	/*
3417 	 * An AFSR of -1 defaults to a memory syndrome.
3418 	 */
3419 	if (flt_stat == (uint64_t)-1)
3420 		flt_stat = C_AFSR_CE;
3421 
3422 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3423 
3424 	/*
3425 	 * Syndrome code must be either a single-bit error code
3426 	 * (0...143) or -1 for unum lookup.
3427 	 */
3428 	if (synd_code < 0 || synd_code >= M2)
3429 		synd_code = -1;
3430 	if (&plat_get_mem_unum) {
3431 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3432 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3433 			buf[0] = '\0';
3434 			*lenp = 0;
3435 		}
3436 
3437 		return (ret);
3438 	}
3439 
3440 	return (ENOTSUP);
3441 }
3442 
3443 /*
3444  * Wrapper for cpu_get_mem_unum() routine that takes an
3445  * async_flt struct rather than explicit arguments.
3446  */
3447 int
3448 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3449     char *buf, int buflen, int *lenp)
3450 {
3451 	/*
3452 	 * If we come thru here for an IO bus error aflt->flt_stat will
3453 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3454 	 * so it will interpret this as a memory error.
3455 	 */
3456 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3457 	    (aflt->flt_class == BUS_FAULT) ?
3458 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3459 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3460 	    aflt->flt_status, buf, buflen, lenp));
3461 }
3462 
3463 /*
3464  * Return unum string given synd_code and async_flt into
3465  * the buf with size UNUM_NAMLEN
3466  */
3467 static int
3468 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3469 {
3470 	int ret, len;
3471 
3472 	/*
3473 	 * Syndrome code must be either a single-bit error code
3474 	 * (0...143) or -1 for unum lookup.
3475 	 */
3476 	if (synd_code < 0 || synd_code >= M2)
3477 		synd_code = -1;
3478 	if (&plat_get_mem_unum) {
3479 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3480 		    aflt->flt_bus_id, aflt->flt_in_memory,
3481 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3482 			buf[0] = '\0';
3483 		}
3484 		return (ret);
3485 	}
3486 
3487 	buf[0] = '\0';
3488 	return (ENOTSUP);
3489 }
3490 
3491 /*
3492  * This routine is a more generic interface to cpu_get_mem_unum()
3493  * that may be used by other modules (e.g. the 'mm' driver, through
3494  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3495  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3496  */
3497 int
3498 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3499     char *buf, int buflen, int *lenp)
3500 {
3501 	int synd_status, flt_in_memory, ret;
3502 	ushort_t flt_status = 0;
3503 	char unum[UNUM_NAMLEN];
3504 	uint64_t t_afsr_errs;
3505 
3506 	/*
3507 	 * Check for an invalid address.
3508 	 */
3509 	if (afar == (uint64_t)-1)
3510 		return (ENXIO);
3511 
3512 	if (synd == (uint64_t)-1)
3513 		synd_status = AFLT_STAT_INVALID;
3514 	else
3515 		synd_status = AFLT_STAT_VALID;
3516 
3517 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3518 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3519 
3520 	/*
3521 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3522 	 */
3523 	if (*afsr == (uint64_t)-1)
3524 		t_afsr_errs = C_AFSR_CE;
3525 	else {
3526 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3527 #if defined(CHEETAH_PLUS)
3528 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3529 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3530 #endif	/* CHEETAH_PLUS */
3531 	}
3532 
3533 	/*
3534 	 * Turn on ECC_ECACHE if error type is E$ Data.
3535 	 */
3536 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3537 		flt_status |= ECC_ECACHE;
3538 
3539 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3540 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3541 	if (ret != 0)
3542 		return (ret);
3543 
3544 	if (*lenp >= buflen)
3545 		return (ENAMETOOLONG);
3546 
3547 	(void) strncpy(buf, unum, buflen);
3548 
3549 	return (0);
3550 }
3551 
3552 /*
3553  * Routine to return memory information associated
3554  * with a physical address and syndrome.
3555  */
3556 int
3557 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3558     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3559     int *segsp, int *banksp, int *mcidp)
3560 {
3561 	int synd_status, synd_code;
3562 
3563 	if (afar == (uint64_t)-1)
3564 		return (ENXIO);
3565 
3566 	if (synd == (uint64_t)-1)
3567 		synd_status = AFLT_STAT_INVALID;
3568 	else
3569 		synd_status = AFLT_STAT_VALID;
3570 
3571 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3572 
3573 	if (p2get_mem_info != NULL)
3574 		return ((p2get_mem_info)(synd_code, afar,
3575 			mem_sizep, seg_sizep, bank_sizep,
3576 			segsp, banksp, mcidp));
3577 	else
3578 		return (ENOTSUP);
3579 }
3580 
3581 /*
3582  * Routine to return a string identifying the physical
3583  * name associated with a cpuid.
3584  */
3585 int
3586 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3587 {
3588 	int ret;
3589 	char unum[UNUM_NAMLEN];
3590 
3591 	if (&plat_get_cpu_unum) {
3592 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3593 		    != 0)
3594 			return (ret);
3595 	} else {
3596 		return (ENOTSUP);
3597 	}
3598 
3599 	if (*lenp >= buflen)
3600 		return (ENAMETOOLONG);
3601 
3602 	(void) strncpy(buf, unum, buflen);
3603 
3604 	return (0);
3605 }
3606 
3607 /*
3608  * This routine exports the name buffer size.
3609  */
3610 size_t
3611 cpu_get_name_bufsize()
3612 {
3613 	return (UNUM_NAMLEN);
3614 }
3615 
3616 /*
3617  * Historical function, apparantly not used.
3618  */
3619 /* ARGSUSED */
3620 void
3621 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3622 {}
3623 
3624 /*
3625  * Historical function only called for SBus errors in debugging.
3626  */
3627 /*ARGSUSED*/
3628 void
3629 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3630 {}
3631 
3632 /*
3633  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3634  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3635  * an async fault structure argument is passed in, the captured error state
3636  * (AFSR, AFAR) info will be returned in the structure.
3637  */
3638 int
3639 clear_errors(ch_async_flt_t *ch_flt)
3640 {
3641 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3642 	ch_cpu_errors_t	cpu_error_regs;
3643 
3644 	get_cpu_error_state(&cpu_error_regs);
3645 
3646 	if (ch_flt != NULL) {
3647 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3648 		aflt->flt_addr = cpu_error_regs.afar;
3649 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3650 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3651 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3652 #if defined(SERRANO)
3653 		ch_flt->afar2 = cpu_error_regs.afar2;
3654 #endif	/* SERRANO */
3655 	}
3656 
3657 	set_cpu_error_state(&cpu_error_regs);
3658 
3659 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3660 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3661 }
3662 
3663 /*
3664  * Clear any AFSR error bits, and check for persistence.
3665  *
3666  * It would be desirable to also insist that syndrome match.  PCI handling
3667  * has already filled flt_synd.  For errors trapped by CPU we only fill
3668  * flt_synd when we queue the event, so we do not have a valid flt_synd
3669  * during initial classification (it is valid if we're called as part of
3670  * subsequent low-pil additional classification attempts).  We could try
3671  * to determine which syndrome to use: we know we're only called for
3672  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3673  * would be esynd/none and esynd/msynd, respectively.  If that is
3674  * implemented then what do we do in the case that we do experience an
3675  * error on the same afar but with different syndrome?  At the very least
3676  * we should count such occurences.  Anyway, for now, we'll leave it as
3677  * it has been for ages.
3678  */
3679 static int
3680 clear_ecc(struct async_flt *aflt)
3681 {
3682 	ch_cpu_errors_t	cpu_error_regs;
3683 
3684 	/*
3685 	 * Snapshot the AFSR and AFAR and clear any errors
3686 	 */
3687 	get_cpu_error_state(&cpu_error_regs);
3688 	set_cpu_error_state(&cpu_error_regs);
3689 
3690 	/*
3691 	 * If any of the same memory access error bits are still on and
3692 	 * the AFAR matches, return that the error is persistent.
3693 	 */
3694 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3695 	    cpu_error_regs.afar == aflt->flt_addr);
3696 }
3697 
3698 /*
3699  * Turn off all cpu error detection, normally only used for panics.
3700  */
3701 void
3702 cpu_disable_errors(void)
3703 {
3704 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3705 
3706 	/*
3707 	 * With error detection now turned off, check the other cpus
3708 	 * logout areas for any unlogged errors.
3709 	 */
3710 	if (enable_check_other_cpus_logout) {
3711 		cpu_check_other_cpus_logout();
3712 		/*
3713 		 * Make a second pass over the logout areas, in case
3714 		 * there is a failing CPU in an error-trap loop which
3715 		 * will write to the logout area once it is emptied.
3716 		 */
3717 		cpu_check_other_cpus_logout();
3718 	}
3719 }
3720 
3721 /*
3722  * Enable errors.
3723  */
3724 void
3725 cpu_enable_errors(void)
3726 {
3727 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3728 }
3729 
3730 /*
3731  * Flush the entire ecache using displacement flush by reading through a
3732  * physical address range twice as large as the Ecache.
3733  */
3734 void
3735 cpu_flush_ecache(void)
3736 {
3737 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3738 	    cpunodes[CPU->cpu_id].ecache_linesize);
3739 }
3740 
3741 /*
3742  * Return CPU E$ set size - E$ size divided by the associativity.
3743  * We use this function in places where the CPU_PRIVATE ptr may not be
3744  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3745  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3746  * up before the kernel switches from OBP's to the kernel's trap table, so
3747  * we don't have to worry about cpunodes being unitialized.
3748  */
3749 int
3750 cpu_ecache_set_size(struct cpu *cp)
3751 {
3752 	if (CPU_PRIVATE(cp))
3753 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3754 
3755 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3756 }
3757 
3758 /*
3759  * Flush Ecache line.
3760  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3761  * Uses normal displacement flush for Cheetah.
3762  */
3763 static void
3764 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3765 {
3766 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3767 	int ec_set_size = cpu_ecache_set_size(CPU);
3768 
3769 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3770 }
3771 
3772 /*
3773  * Scrub physical address.
3774  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3775  * Ecache or direct-mapped Ecache.
3776  */
3777 static void
3778 cpu_scrubphys(struct async_flt *aflt)
3779 {
3780 	int ec_set_size = cpu_ecache_set_size(CPU);
3781 
3782 	scrubphys(aflt->flt_addr, ec_set_size);
3783 }
3784 
3785 /*
3786  * Clear physical address.
3787  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3788  * Ecache or direct-mapped Ecache.
3789  */
3790 void
3791 cpu_clearphys(struct async_flt *aflt)
3792 {
3793 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3794 	int ec_set_size = cpu_ecache_set_size(CPU);
3795 
3796 
3797 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3798 }
3799 
3800 #if defined(CPU_IMP_ECACHE_ASSOC)
3801 /*
3802  * Check for a matching valid line in all the sets.
3803  * If found, return set# + 1. Otherwise return 0.
3804  */
3805 static int
3806 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3807 {
3808 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3809 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3810 	int ec_set_size = cpu_ecache_set_size(CPU);
3811 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3812 	int nway = cpu_ecache_nway();
3813 	int i;
3814 
3815 	for (i = 0; i < nway; i++, ecp++) {
3816 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3817 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3818 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3819 			return (i+1);
3820 	}
3821 	return (0);
3822 }
3823 #endif /* CPU_IMP_ECACHE_ASSOC */
3824 
3825 /*
3826  * Check whether a line in the given logout info matches the specified
3827  * fault address.  If reqval is set then the line must not be Invalid.
3828  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3829  * set to 2 for l2$ or 3 for l3$.
3830  */
3831 static int
3832 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3833 {
3834 	ch_diag_data_t *cdp = data;
3835 	ch_ec_data_t *ecp;
3836 	int totalsize, ec_set_size;
3837 	int i, ways;
3838 	int match = 0;
3839 	int tagvalid;
3840 	uint64_t addr, tagpa;
3841 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3842 
3843 	/*
3844 	 * Check the l2$ logout data
3845 	 */
3846 	if (ispanther) {
3847 		ecp = &cdp->chd_l2_data[0];
3848 		ec_set_size = PN_L2_SET_SIZE;
3849 		ways = PN_L2_NWAYS;
3850 	} else {
3851 		ecp = &cdp->chd_ec_data[0];
3852 		ec_set_size = cpu_ecache_set_size(CPU);
3853 		ways = cpu_ecache_nway();
3854 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3855 	}
3856 	/* remove low order PA bits from fault address not used in PA tag */
3857 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3858 	for (i = 0; i < ways; i++, ecp++) {
3859 		if (ispanther) {
3860 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3861 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3862 		} else {
3863 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3864 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3865 			    ecp->ec_tag);
3866 		}
3867 		if (tagpa == addr && (!reqval || tagvalid)) {
3868 			match = i + 1;
3869 			*level = 2;
3870 			break;
3871 		}
3872 	}
3873 
3874 	if (match || !ispanther)
3875 		return (match);
3876 
3877 	/* For Panther we also check the l3$ */
3878 	ecp = &cdp->chd_ec_data[0];
3879 	ec_set_size = PN_L3_SET_SIZE;
3880 	ways = PN_L3_NWAYS;
3881 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3882 
3883 	for (i = 0; i < ways; i++, ecp++) {
3884 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3885 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3886 			match = i + 1;
3887 			*level = 3;
3888 			break;
3889 		}
3890 	}
3891 
3892 	return (match);
3893 }
3894 
3895 #if defined(CPU_IMP_L1_CACHE_PARITY)
3896 /*
3897  * Record information related to the source of an Dcache Parity Error.
3898  */
3899 static void
3900 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3901 {
3902 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3903 	int index;
3904 
3905 	/*
3906 	 * Since instruction decode cannot be done at high PIL
3907 	 * just examine the entire Dcache to locate the error.
3908 	 */
3909 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3910 		ch_flt->parity_data.dpe.cpl_way = -1;
3911 		ch_flt->parity_data.dpe.cpl_off = -1;
3912 	}
3913 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3914 		cpu_dcache_parity_check(ch_flt, index);
3915 }
3916 
3917 /*
3918  * Check all ways of the Dcache at a specified index for good parity.
3919  */
3920 static void
3921 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3922 {
3923 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3924 	uint64_t parity_bits, pbits, data_word;
3925 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3926 	int way, word, data_byte;
3927 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3928 	ch_dc_data_t tmp_dcp;
3929 
3930 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3931 		/*
3932 		 * Perform diagnostic read.
3933 		 */
3934 		get_dcache_dtag(index + way * dc_set_size,
3935 				(uint64_t *)&tmp_dcp);
3936 
3937 		/*
3938 		 * Check tag for even parity.
3939 		 * Sum of 1 bits (including parity bit) should be even.
3940 		 */
3941 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3942 			/*
3943 			 * If this is the first error log detailed information
3944 			 * about it and check the snoop tag. Otherwise just
3945 			 * record the fact that we found another error.
3946 			 */
3947 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3948 				ch_flt->parity_data.dpe.cpl_way = way;
3949 				ch_flt->parity_data.dpe.cpl_cache =
3950 				    CPU_DC_PARITY;
3951 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3952 
3953 				if (popc64(tmp_dcp.dc_sntag &
3954 						CHP_DCSNTAG_PARMASK) & 1) {
3955 					ch_flt->parity_data.dpe.cpl_tag |=
3956 								CHP_DC_SNTAG;
3957 					ch_flt->parity_data.dpe.cpl_lcnt++;
3958 				}
3959 
3960 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3961 			}
3962 
3963 			ch_flt->parity_data.dpe.cpl_lcnt++;
3964 		}
3965 
3966 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3967 			/*
3968 			 * Panther has more parity bits than the other
3969 			 * processors for covering dcache data and so each
3970 			 * byte of data in each word has its own parity bit.
3971 			 */
3972 			parity_bits = tmp_dcp.dc_pn_data_parity;
3973 			for (word = 0; word < 4; word++) {
3974 				data_word = tmp_dcp.dc_data[word];
3975 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3976 				for (data_byte = 0; data_byte < 8;
3977 				    data_byte++) {
3978 					if (((popc64(data_word &
3979 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3980 					    (pbits & 1)) {
3981 						cpu_record_dc_data_parity(
3982 						ch_flt, dcp, &tmp_dcp, way,
3983 						word);
3984 					}
3985 					pbits >>= 1;
3986 					data_word >>= 8;
3987 				}
3988 				parity_bits >>= 8;
3989 			}
3990 		} else {
3991 			/*
3992 			 * Check data array for even parity.
3993 			 * The 8 parity bits are grouped into 4 pairs each
3994 			 * of which covers a 64-bit word.  The endianness is
3995 			 * reversed -- the low-order parity bits cover the
3996 			 * high-order data words.
3997 			 */
3998 			parity_bits = tmp_dcp.dc_utag >> 8;
3999 			for (word = 0; word < 4; word++) {
4000 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4001 				if ((popc64(tmp_dcp.dc_data[word]) +
4002 				    parity_bits_popc[pbits]) & 1) {
4003 					cpu_record_dc_data_parity(ch_flt, dcp,
4004 					    &tmp_dcp, way, word);
4005 				}
4006 			}
4007 		}
4008 	}
4009 }
4010 
4011 static void
4012 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4013     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4014 {
4015 	/*
4016 	 * If this is the first error log detailed information about it.
4017 	 * Otherwise just record the fact that we found another error.
4018 	 */
4019 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4020 		ch_flt->parity_data.dpe.cpl_way = way;
4021 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4022 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4023 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4024 	}
4025 	ch_flt->parity_data.dpe.cpl_lcnt++;
4026 }
4027 
4028 /*
4029  * Record information related to the source of an Icache Parity Error.
4030  *
4031  * Called with the Icache disabled so any diagnostic accesses are safe.
4032  */
4033 static void
4034 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4035 {
4036 	int	ic_set_size;
4037 	int	ic_linesize;
4038 	int	index;
4039 
4040 	if (CPU_PRIVATE(CPU)) {
4041 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4042 		    CH_ICACHE_NWAY;
4043 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4044 	} else {
4045 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4046 		ic_linesize = icache_linesize;
4047 	}
4048 
4049 	ch_flt->parity_data.ipe.cpl_way = -1;
4050 	ch_flt->parity_data.ipe.cpl_off = -1;
4051 
4052 	for (index = 0; index < ic_set_size; index += ic_linesize)
4053 		cpu_icache_parity_check(ch_flt, index);
4054 }
4055 
4056 /*
4057  * Check all ways of the Icache at a specified index for good parity.
4058  */
4059 static void
4060 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4061 {
4062 	uint64_t parmask, pn_inst_parity;
4063 	int ic_set_size;
4064 	int ic_linesize;
4065 	int flt_index, way, instr, num_instr;
4066 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4067 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4068 	ch_ic_data_t tmp_icp;
4069 
4070 	if (CPU_PRIVATE(CPU)) {
4071 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4072 		    CH_ICACHE_NWAY;
4073 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4074 	} else {
4075 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4076 		ic_linesize = icache_linesize;
4077 	}
4078 
4079 	/*
4080 	 * Panther has twice as many instructions per icache line and the
4081 	 * instruction parity bit is in a different location.
4082 	 */
4083 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4084 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4085 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4086 	} else {
4087 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4088 		pn_inst_parity = 0;
4089 	}
4090 
4091 	/*
4092 	 * Index at which we expect to find the parity error.
4093 	 */
4094 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4095 
4096 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4097 		/*
4098 		 * Diagnostic reads expect address argument in ASI format.
4099 		 */
4100 		get_icache_dtag(2 * (index + way * ic_set_size),
4101 				(uint64_t *)&tmp_icp);
4102 
4103 		/*
4104 		 * If this is the index in which we expect to find the
4105 		 * error log detailed information about each of the ways.
4106 		 * This information will be displayed later if we can't
4107 		 * determine the exact way in which the error is located.
4108 		 */
4109 		if (flt_index == index)
4110 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4111 
4112 		/*
4113 		 * Check tag for even parity.
4114 		 * Sum of 1 bits (including parity bit) should be even.
4115 		 */
4116 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4117 			/*
4118 			 * If this way is the one in which we expected
4119 			 * to find the error record the way and check the
4120 			 * snoop tag. Otherwise just record the fact we
4121 			 * found another error.
4122 			 */
4123 			if (flt_index == index) {
4124 				ch_flt->parity_data.ipe.cpl_way = way;
4125 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4126 
4127 				if (popc64(tmp_icp.ic_sntag &
4128 						CHP_ICSNTAG_PARMASK) & 1) {
4129 					ch_flt->parity_data.ipe.cpl_tag |=
4130 								CHP_IC_SNTAG;
4131 					ch_flt->parity_data.ipe.cpl_lcnt++;
4132 				}
4133 
4134 			}
4135 			ch_flt->parity_data.ipe.cpl_lcnt++;
4136 			continue;
4137 		}
4138 
4139 		/*
4140 		 * Check instruction data for even parity.
4141 		 * Bits participating in parity differ for PC-relative
4142 		 * versus non-PC-relative instructions.
4143 		 */
4144 		for (instr = 0; instr < num_instr; instr++) {
4145 			parmask = (tmp_icp.ic_data[instr] &
4146 					CH_ICDATA_PRED_ISPCREL) ?
4147 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4148 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4149 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4150 				/*
4151 				 * If this way is the one in which we expected
4152 				 * to find the error record the way and offset.
4153 				 * Otherwise just log the fact we found another
4154 				 * error.
4155 				 */
4156 				if (flt_index == index) {
4157 					ch_flt->parity_data.ipe.cpl_way = way;
4158 					ch_flt->parity_data.ipe.cpl_off =
4159 								instr * 4;
4160 				}
4161 				ch_flt->parity_data.ipe.cpl_lcnt++;
4162 				continue;
4163 			}
4164 		}
4165 	}
4166 }
4167 
4168 /*
4169  * Record information related to the source of an Pcache Parity Error.
4170  */
4171 static void
4172 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4173 {
4174 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4175 	int index;
4176 
4177 	/*
4178 	 * Since instruction decode cannot be done at high PIL just
4179 	 * examine the entire Pcache to check for any parity errors.
4180 	 */
4181 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4182 		ch_flt->parity_data.dpe.cpl_way = -1;
4183 		ch_flt->parity_data.dpe.cpl_off = -1;
4184 	}
4185 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4186 		cpu_pcache_parity_check(ch_flt, index);
4187 }
4188 
4189 /*
4190  * Check all ways of the Pcache at a specified index for good parity.
4191  */
4192 static void
4193 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4194 {
4195 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4196 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4197 	int way, word, pbit, parity_bits;
4198 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4199 	ch_pc_data_t tmp_pcp;
4200 
4201 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4202 		/*
4203 		 * Perform diagnostic read.
4204 		 */
4205 		get_pcache_dtag(index + way * pc_set_size,
4206 				(uint64_t *)&tmp_pcp);
4207 		/*
4208 		 * Check data array for odd parity. There are 8 parity
4209 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4210 		 * of those bits covers exactly 8 bytes of the data
4211 		 * array:
4212 		 *
4213 		 *	parity bit	P$ data bytes covered
4214 		 *	----------	---------------------
4215 		 *	50		63:56
4216 		 *	51		55:48
4217 		 *	52		47:40
4218 		 *	53		39:32
4219 		 *	54		31:24
4220 		 *	55		23:16
4221 		 *	56		15:8
4222 		 *	57		7:0
4223 		 */
4224 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4225 		for (word = 0; word < pc_data_words; word++) {
4226 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4227 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4228 				/*
4229 				 * If this is the first error log detailed
4230 				 * information about it. Otherwise just record
4231 				 * the fact that we found another error.
4232 				 */
4233 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4234 					ch_flt->parity_data.dpe.cpl_way = way;
4235 					ch_flt->parity_data.dpe.cpl_cache =
4236 					    CPU_PC_PARITY;
4237 					ch_flt->parity_data.dpe.cpl_off =
4238 					    word * sizeof (uint64_t);
4239 					bcopy(&tmp_pcp, pcp,
4240 							sizeof (ch_pc_data_t));
4241 				}
4242 				ch_flt->parity_data.dpe.cpl_lcnt++;
4243 			}
4244 		}
4245 	}
4246 }
4247 
4248 
4249 /*
4250  * Add L1 Data cache data to the ereport payload.
4251  */
4252 static void
4253 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4254 {
4255 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4256 	ch_dc_data_t *dcp;
4257 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4258 	uint_t nelem;
4259 	int i, ways_to_check, ways_logged = 0;
4260 
4261 	/*
4262 	 * If this is an D$ fault then there may be multiple
4263 	 * ways captured in the ch_parity_log_t structure.
4264 	 * Otherwise, there will be at most one way captured
4265 	 * in the ch_diag_data_t struct.
4266 	 * Check each way to see if it should be encoded.
4267 	 */
4268 	if (ch_flt->flt_type == CPU_DC_PARITY)
4269 		ways_to_check = CH_DCACHE_NWAY;
4270 	else
4271 		ways_to_check = 1;
4272 	for (i = 0; i < ways_to_check; i++) {
4273 		if (ch_flt->flt_type == CPU_DC_PARITY)
4274 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4275 		else
4276 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4277 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4278 			bcopy(dcp, &dcdata[ways_logged],
4279 				sizeof (ch_dc_data_t));
4280 			ways_logged++;
4281 		}
4282 	}
4283 
4284 	/*
4285 	 * Add the dcache data to the payload.
4286 	 */
4287 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4288 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4289 	if (ways_logged != 0) {
4290 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4291 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4292 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4293 	}
4294 }
4295 
4296 /*
4297  * Add L1 Instruction cache data to the ereport payload.
4298  */
4299 static void
4300 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4301 {
4302 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4303 	ch_ic_data_t *icp;
4304 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4305 	uint_t nelem;
4306 	int i, ways_to_check, ways_logged = 0;
4307 
4308 	/*
4309 	 * If this is an I$ fault then there may be multiple
4310 	 * ways captured in the ch_parity_log_t structure.
4311 	 * Otherwise, there will be at most one way captured
4312 	 * in the ch_diag_data_t struct.
4313 	 * Check each way to see if it should be encoded.
4314 	 */
4315 	if (ch_flt->flt_type == CPU_IC_PARITY)
4316 		ways_to_check = CH_ICACHE_NWAY;
4317 	else
4318 		ways_to_check = 1;
4319 	for (i = 0; i < ways_to_check; i++) {
4320 		if (ch_flt->flt_type == CPU_IC_PARITY)
4321 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4322 		else
4323 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4324 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4325 			bcopy(icp, &icdata[ways_logged],
4326 				sizeof (ch_ic_data_t));
4327 			ways_logged++;
4328 		}
4329 	}
4330 
4331 	/*
4332 	 * Add the icache data to the payload.
4333 	 */
4334 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4335 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4336 	if (ways_logged != 0) {
4337 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4338 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4339 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4340 	}
4341 }
4342 
4343 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4344 
4345 /*
4346  * Add ecache data to payload.
4347  */
4348 static void
4349 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4350 {
4351 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4352 	ch_ec_data_t *ecp;
4353 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4354 	uint_t nelem;
4355 	int i, ways_logged = 0;
4356 
4357 	/*
4358 	 * Check each way to see if it should be encoded
4359 	 * and concatinate it into a temporary buffer.
4360 	 */
4361 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4362 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4363 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4364 			bcopy(ecp, &ecdata[ways_logged],
4365 				sizeof (ch_ec_data_t));
4366 			ways_logged++;
4367 		}
4368 	}
4369 
4370 	/*
4371 	 * Panther CPUs have an additional level of cache and so
4372 	 * what we just collected was the L3 (ecache) and not the
4373 	 * L2 cache.
4374 	 */
4375 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4376 		/*
4377 		 * Add the L3 (ecache) data to the payload.
4378 		 */
4379 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4380 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4381 		if (ways_logged != 0) {
4382 			nelem = sizeof (ch_ec_data_t) /
4383 			    sizeof (uint64_t) * ways_logged;
4384 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4385 			    DATA_TYPE_UINT64_ARRAY, nelem,
4386 			    (uint64_t *)ecdata, NULL);
4387 		}
4388 
4389 		/*
4390 		 * Now collect the L2 cache.
4391 		 */
4392 		ways_logged = 0;
4393 		for (i = 0; i < PN_L2_NWAYS; i++) {
4394 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4395 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4396 				bcopy(ecp, &ecdata[ways_logged],
4397 				    sizeof (ch_ec_data_t));
4398 				ways_logged++;
4399 			}
4400 		}
4401 	}
4402 
4403 	/*
4404 	 * Add the L2 cache data to the payload.
4405 	 */
4406 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4407 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4408 	if (ways_logged != 0) {
4409 		nelem = sizeof (ch_ec_data_t) /
4410 			sizeof (uint64_t) * ways_logged;
4411 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4412 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4413 	}
4414 }
4415 
4416 /*
4417  * Initialize cpu scheme for specified cpu.
4418  */
4419 static void
4420 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4421 {
4422 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4423 	uint8_t mask;
4424 
4425 	mask = cpunodes[cpuid].version;
4426 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4427 	    (u_longlong_t)cpunodes[cpuid].device_id);
4428 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4429 	    cpuid, &mask, (const char *)sbuf);
4430 }
4431 
4432 /*
4433  * Returns ereport resource type.
4434  */
4435 static int
4436 cpu_error_to_resource_type(struct async_flt *aflt)
4437 {
4438 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4439 
4440 	switch (ch_flt->flt_type) {
4441 
4442 	case CPU_CE_ECACHE:
4443 	case CPU_UE_ECACHE:
4444 	case CPU_UE_ECACHE_RETIRE:
4445 	case CPU_ORPH:
4446 		/*
4447 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4448 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4449 		 * E$ Data type, otherwise, return CPU type.
4450 		 */
4451 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4452 		    ch_flt->flt_bit))
4453 			return (ERRTYPE_ECACHE_DATA);
4454 		return (ERRTYPE_CPU);
4455 
4456 	case CPU_CE:
4457 	case CPU_UE:
4458 	case CPU_EMC:
4459 	case CPU_DUE:
4460 	case CPU_RCE:
4461 	case CPU_RUE:
4462 	case CPU_FRC:
4463 	case CPU_FRU:
4464 		return (ERRTYPE_MEMORY);
4465 
4466 	case CPU_IC_PARITY:
4467 	case CPU_DC_PARITY:
4468 	case CPU_FPUERR:
4469 	case CPU_PC_PARITY:
4470 	case CPU_ITLB_PARITY:
4471 	case CPU_DTLB_PARITY:
4472 		return (ERRTYPE_CPU);
4473 	}
4474 	return (ERRTYPE_UNKNOWN);
4475 }
4476 
4477 /*
4478  * Encode the data saved in the ch_async_flt_t struct into
4479  * the FM ereport payload.
4480  */
4481 static void
4482 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4483 	nvlist_t *resource, int *afar_status, int *synd_status)
4484 {
4485 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4486 	*synd_status = AFLT_STAT_INVALID;
4487 	*afar_status = AFLT_STAT_INVALID;
4488 
4489 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4490 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4491 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4492 	}
4493 
4494 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4495 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4496 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4497 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4498 	}
4499 
4500 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4501 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4502 		    ch_flt->flt_bit);
4503 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4504 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4505 	}
4506 
4507 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4508 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4509 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4510 	}
4511 
4512 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4513 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4514 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4515 	}
4516 
4517 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4518 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4519 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4520 	}
4521 
4522 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4523 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4524 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4525 	}
4526 
4527 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4528 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4529 		    DATA_TYPE_BOOLEAN_VALUE,
4530 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4531 	}
4532 
4533 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4534 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4535 		    DATA_TYPE_BOOLEAN_VALUE,
4536 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4537 	}
4538 
4539 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4540 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4541 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4542 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4543 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4544 	}
4545 
4546 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4547 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4548 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4549 	}
4550 
4551 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4552 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4553 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4554 	}
4555 
4556 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4557 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4558 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4559 	}
4560 
4561 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4562 		cpu_payload_add_ecache(aflt, payload);
4563 
4564 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4565 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4566 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4567 	}
4568 
4569 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4570 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4571 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4572 	}
4573 
4574 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4575 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4576 		    DATA_TYPE_UINT32_ARRAY, 16,
4577 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4578 	}
4579 
4580 #if defined(CPU_IMP_L1_CACHE_PARITY)
4581 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4582 		cpu_payload_add_dcache(aflt, payload);
4583 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4584 		cpu_payload_add_icache(aflt, payload);
4585 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4586 
4587 #if defined(CHEETAH_PLUS)
4588 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4589 		cpu_payload_add_pcache(aflt, payload);
4590 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4591 		cpu_payload_add_tlb(aflt, payload);
4592 #endif	/* CHEETAH_PLUS */
4593 	/*
4594 	 * Create the FMRI that goes into the payload
4595 	 * and contains the unum info if necessary.
4596 	 */
4597 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4598 		char unum[UNUM_NAMLEN] = "";
4599 		char sid[DIMM_SERIAL_ID_LEN] = "";
4600 		int len, ret, rtype, synd_code;
4601 		uint64_t offset = (uint64_t)-1;
4602 
4603 		rtype = cpu_error_to_resource_type(aflt);
4604 		switch (rtype) {
4605 
4606 		case ERRTYPE_MEMORY:
4607 		case ERRTYPE_ECACHE_DATA:
4608 
4609 			/*
4610 			 * Memory errors, do unum lookup
4611 			 */
4612 			if (*afar_status == AFLT_STAT_INVALID)
4613 				break;
4614 
4615 			if (rtype == ERRTYPE_ECACHE_DATA)
4616 				aflt->flt_status |= ECC_ECACHE;
4617 			else
4618 				aflt->flt_status &= ~ECC_ECACHE;
4619 
4620 			synd_code = synd_to_synd_code(*synd_status,
4621 			    aflt->flt_synd, ch_flt->flt_bit);
4622 
4623 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4624 				break;
4625 
4626 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4627 			    &len);
4628 
4629 			if (ret == 0) {
4630 				(void) cpu_get_mem_offset(aflt->flt_addr,
4631 				    &offset);
4632 			}
4633 
4634 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4635 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4636 			fm_payload_set(payload,
4637 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4638 			    DATA_TYPE_NVLIST, resource, NULL);
4639 			break;
4640 
4641 		case ERRTYPE_CPU:
4642 			/*
4643 			 * On-board processor array error, add cpu resource.
4644 			 */
4645 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4646 			fm_payload_set(payload,
4647 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4648 			    DATA_TYPE_NVLIST, resource, NULL);
4649 			break;
4650 		}
4651 	}
4652 }
4653 
4654 /*
4655  * Initialize the way info if necessary.
4656  */
4657 void
4658 cpu_ereport_init(struct async_flt *aflt)
4659 {
4660 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4661 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4662 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4663 	int i;
4664 
4665 	/*
4666 	 * Initialize the info in the CPU logout structure.
4667 	 * The I$/D$ way information is not initialized here
4668 	 * since it is captured in the logout assembly code.
4669 	 */
4670 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4671 		(ecp + i)->ec_way = i;
4672 
4673 	for (i = 0; i < PN_L2_NWAYS; i++)
4674 		(l2p + i)->ec_way = i;
4675 }
4676 
4677 /*
4678  * Returns whether fault address is valid for this error bit and
4679  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4680  */
4681 int
4682 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4683 {
4684 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4685 
4686 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4687 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4688 	    AFLT_STAT_VALID &&
4689 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4690 }
4691 
4692 /*
4693  * Returns whether fault address is valid based on the error bit for the
4694  * one event being queued and whether the address is "in memory".
4695  */
4696 static int
4697 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4698 {
4699 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4700 	int afar_status;
4701 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4702 
4703 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4704 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4705 		return (0);
4706 
4707 	afsr_errs = ch_flt->afsr_errs;
4708 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4709 
4710 	switch (afar_status) {
4711 	case AFLT_STAT_VALID:
4712 		return (1);
4713 
4714 	case AFLT_STAT_AMBIGUOUS:
4715 		/*
4716 		 * Status is ambiguous since another error bit (or bits)
4717 		 * of equal priority to the specified bit on in the afsr,
4718 		 * so check those bits. Return 1 only if the bits on in the
4719 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4720 		 * Otherwise not all the equal priority bits are for memory
4721 		 * errors, so return 0.
4722 		 */
4723 		ow_bits = afar_overwrite;
4724 		while ((afsr_ow = *ow_bits++) != 0) {
4725 			/*
4726 			 * Get other bits that are on in t_afsr_bit's priority
4727 			 * class to check for Memory Error bits only.
4728 			 */
4729 			if (afsr_ow & t_afsr_bit) {
4730 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4731 					return (0);
4732 				else
4733 					return (1);
4734 			}
4735 		}
4736 		/*FALLTHRU*/
4737 
4738 	default:
4739 		return (0);
4740 	}
4741 }
4742 
4743 static void
4744 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4745 {
4746 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4747 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4748 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4749 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4750 #if defined(CPU_IMP_ECACHE_ASSOC)
4751 	int i, nway;
4752 #endif /* CPU_IMP_ECACHE_ASSOC */
4753 
4754 	/*
4755 	 * Check if the CPU log out captured was valid.
4756 	 */
4757 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4758 	    ch_flt->flt_data_incomplete)
4759 		return;
4760 
4761 #if defined(CPU_IMP_ECACHE_ASSOC)
4762 	nway = cpu_ecache_nway();
4763 	i =  cpu_ecache_line_valid(ch_flt);
4764 	if (i == 0 || i > nway) {
4765 		for (i = 0; i < nway; i++)
4766 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4767 	} else
4768 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4769 #else /* CPU_IMP_ECACHE_ASSOC */
4770 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4771 #endif /* CPU_IMP_ECACHE_ASSOC */
4772 
4773 #if defined(CHEETAH_PLUS)
4774 	pn_cpu_log_diag_l2_info(ch_flt);
4775 #endif /* CHEETAH_PLUS */
4776 
4777 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4778 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4779 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4780 	}
4781 
4782 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4783 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4784 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4785 		else
4786 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4787 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4788 	}
4789 }
4790 
4791 /*
4792  * Cheetah ECC calculation.
4793  *
4794  * We only need to do the calculation on the data bits and can ignore check
4795  * bit and Mtag bit terms in the calculation.
4796  */
4797 static uint64_t ch_ecc_table[9][2] = {
4798 	/*
4799 	 * low order 64-bits   high-order 64-bits
4800 	 */
4801 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4802 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4803 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4804 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4805 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4806 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4807 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4808 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4809 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4810 };
4811 
4812 /*
4813  * 64-bit population count, use well-known popcnt trick.
4814  * We could use the UltraSPARC V9 POPC instruction, but some
4815  * CPUs including Cheetahplus and Jaguar do not support that
4816  * instruction.
4817  */
4818 int
4819 popc64(uint64_t val)
4820 {
4821 	int cnt;
4822 
4823 	for (cnt = 0; val != 0; val &= val - 1)
4824 		cnt++;
4825 	return (cnt);
4826 }
4827 
4828 /*
4829  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4830  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4831  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4832  * instead of doing all the xor's.
4833  */
4834 uint32_t
4835 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4836 {
4837 	int bitno, s;
4838 	int synd = 0;
4839 
4840 	for (bitno = 0; bitno < 9; bitno++) {
4841 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4842 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4843 		synd |= (s << bitno);
4844 	}
4845 	return (synd);
4846 
4847 }
4848 
4849 /*
4850  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4851  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4852  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4853  */
4854 static void
4855 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4856     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4857 {
4858 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4859 
4860 	if (reason &&
4861 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4862 		(void) strcat(reason, eccp->ec_reason);
4863 	}
4864 
4865 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4866 	ch_flt->flt_type = eccp->ec_flt_type;
4867 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4868 		ch_flt->flt_diag_data = *cdp;
4869 	else
4870 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4871 	aflt->flt_in_memory =
4872 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4873 
4874 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4875 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4876 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4877 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4878 	else
4879 		aflt->flt_synd = 0;
4880 
4881 	aflt->flt_payload = eccp->ec_err_payload;
4882 
4883 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4884 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4885 		cpu_errorq_dispatch(eccp->ec_err_class,
4886 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4887 		    aflt->flt_panic);
4888 	else
4889 		cpu_errorq_dispatch(eccp->ec_err_class,
4890 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4891 		    aflt->flt_panic);
4892 }
4893 
4894 /*
4895  * Queue events on async event queue one event per error bit.  First we
4896  * queue the events that we "expect" for the given trap, then we queue events
4897  * that we may not expect.  Return number of events queued.
4898  */
4899 int
4900 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4901     ch_cpu_logout_t *clop)
4902 {
4903 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4904 	ecc_type_to_info_t *eccp;
4905 	int nevents = 0;
4906 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4907 #if defined(CHEETAH_PLUS)
4908 	uint64_t orig_t_afsr_errs;
4909 #endif
4910 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4911 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4912 	ch_diag_data_t *cdp = NULL;
4913 
4914 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4915 
4916 #if defined(CHEETAH_PLUS)
4917 	orig_t_afsr_errs = t_afsr_errs;
4918 
4919 	/*
4920 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4921 	 */
4922 	if (clop != NULL) {
4923 		/*
4924 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4925 		 * flt_addr and flt_stat fields will be reset to the primaries
4926 		 * below, but the sdw_addr and sdw_stat will stay as the
4927 		 * secondaries.
4928 		 */
4929 		cdp = &clop->clo_sdw_data;
4930 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4931 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4932 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4933 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4934 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4935 
4936 		/*
4937 		 * If the primary and shadow AFSR differ, tag the shadow as
4938 		 * the first fault.
4939 		 */
4940 		if ((primary_afar != cdp->chd_afar) ||
4941 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4942 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4943 		}
4944 
4945 		/*
4946 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4947 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4948 		 * is expected to be zero for those CPUs which do not have
4949 		 * an AFSR_EXT register.
4950 		 */
4951 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4952 			if ((eccp->ec_afsr_bit &
4953 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4954 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4955 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4956 				cdp = NULL;
4957 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4958 				nevents++;
4959 			}
4960 		}
4961 
4962 		/*
4963 		 * If the ME bit is on in the primary AFSR turn all the
4964 		 * error bits on again that may set the ME bit to make
4965 		 * sure we see the ME AFSR error logs.
4966 		 */
4967 		if ((primary_afsr & C_AFSR_ME) != 0)
4968 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4969 	}
4970 #endif	/* CHEETAH_PLUS */
4971 
4972 	if (clop != NULL)
4973 		cdp = &clop->clo_data;
4974 
4975 	/*
4976 	 * Queue expected errors, error bit and fault type must match
4977 	 * in the ecc_type_to_info table.
4978 	 */
4979 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4980 	    eccp++) {
4981 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4982 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4983 #if defined(SERRANO)
4984 			/*
4985 			 * For FRC/FRU errors on Serrano the afar2 captures
4986 			 * the address and the associated data is
4987 			 * in the shadow logout area.
4988 			 */
4989 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4990 				if (clop != NULL)
4991 					cdp = &clop->clo_sdw_data;
4992 				aflt->flt_addr = ch_flt->afar2;
4993 			} else {
4994 				if (clop != NULL)
4995 					cdp = &clop->clo_data;
4996 				aflt->flt_addr = primary_afar;
4997 			}
4998 #else	/* SERRANO */
4999 			aflt->flt_addr = primary_afar;
5000 #endif	/* SERRANO */
5001 			aflt->flt_stat = primary_afsr;
5002 			ch_flt->afsr_ext = primary_afsr_ext;
5003 			ch_flt->afsr_errs = primary_afsr_errs;
5004 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5005 			cdp = NULL;
5006 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5007 			nevents++;
5008 		}
5009 	}
5010 
5011 	/*
5012 	 * Queue unexpected errors, error bit only match.
5013 	 */
5014 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5015 	    eccp++) {
5016 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5017 #if defined(SERRANO)
5018 			/*
5019 			 * For FRC/FRU errors on Serrano the afar2 captures
5020 			 * the address and the associated data is
5021 			 * in the shadow logout area.
5022 			 */
5023 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5024 				if (clop != NULL)
5025 					cdp = &clop->clo_sdw_data;
5026 				aflt->flt_addr = ch_flt->afar2;
5027 			} else {
5028 				if (clop != NULL)
5029 					cdp = &clop->clo_data;
5030 				aflt->flt_addr = primary_afar;
5031 			}
5032 #else	/* SERRANO */
5033 			aflt->flt_addr = primary_afar;
5034 #endif	/* SERRANO */
5035 			aflt->flt_stat = primary_afsr;
5036 			ch_flt->afsr_ext = primary_afsr_ext;
5037 			ch_flt->afsr_errs = primary_afsr_errs;
5038 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5039 			cdp = NULL;
5040 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5041 			nevents++;
5042 		}
5043 	}
5044 	return (nevents);
5045 }
5046 
5047 /*
5048  * Return trap type number.
5049  */
5050 uint8_t
5051 flt_to_trap_type(struct async_flt *aflt)
5052 {
5053 	if (aflt->flt_status & ECC_I_TRAP)
5054 		return (TRAP_TYPE_ECC_I);
5055 	if (aflt->flt_status & ECC_D_TRAP)
5056 		return (TRAP_TYPE_ECC_D);
5057 	if (aflt->flt_status & ECC_F_TRAP)
5058 		return (TRAP_TYPE_ECC_F);
5059 	if (aflt->flt_status & ECC_C_TRAP)
5060 		return (TRAP_TYPE_ECC_C);
5061 	if (aflt->flt_status & ECC_DP_TRAP)
5062 		return (TRAP_TYPE_ECC_DP);
5063 	if (aflt->flt_status & ECC_IP_TRAP)
5064 		return (TRAP_TYPE_ECC_IP);
5065 	if (aflt->flt_status & ECC_ITLB_TRAP)
5066 		return (TRAP_TYPE_ECC_ITLB);
5067 	if (aflt->flt_status & ECC_DTLB_TRAP)
5068 		return (TRAP_TYPE_ECC_DTLB);
5069 	return (TRAP_TYPE_UNKNOWN);
5070 }
5071 
5072 /*
5073  * Decide an error type based on detector and leaky/partner tests.
5074  * The following array is used for quick translation - it must
5075  * stay in sync with ce_dispact_t.
5076  */
5077 
5078 static char *cetypes[] = {
5079 	CE_DISP_DESC_U,
5080 	CE_DISP_DESC_I,
5081 	CE_DISP_DESC_PP,
5082 	CE_DISP_DESC_P,
5083 	CE_DISP_DESC_L,
5084 	CE_DISP_DESC_PS,
5085 	CE_DISP_DESC_S
5086 };
5087 
5088 char *
5089 flt_to_error_type(struct async_flt *aflt)
5090 {
5091 	ce_dispact_t dispact, disp;
5092 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5093 
5094 	/*
5095 	 * The memory payload bundle is shared by some events that do
5096 	 * not perform any classification.  For those flt_disp will be
5097 	 * 0 and we will return "unknown".
5098 	 */
5099 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5100 		return (cetypes[CE_DISP_UNKNOWN]);
5101 
5102 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5103 
5104 	/*
5105 	 * It is also possible that no scrub/classification was performed
5106 	 * by the detector, for instance where a disrupting error logged
5107 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5108 	 */
5109 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5110 		return (cetypes[CE_DISP_UNKNOWN]);
5111 
5112 	/*
5113 	 * Lookup type in initial classification/action table
5114 	 */
5115 	dispact = CE_DISPACT(ce_disp_table,
5116 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5117 	    CE_XDIAG_STATE(dtcrinfo),
5118 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5119 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5120 
5121 	/*
5122 	 * A bad lookup is not something to panic production systems for.
5123 	 */
5124 	ASSERT(dispact != CE_DISP_BAD);
5125 	if (dispact == CE_DISP_BAD)
5126 		return (cetypes[CE_DISP_UNKNOWN]);
5127 
5128 	disp = CE_DISP(dispact);
5129 
5130 	switch (disp) {
5131 	case CE_DISP_UNKNOWN:
5132 	case CE_DISP_INTERMITTENT:
5133 		break;
5134 
5135 	case CE_DISP_POSS_PERS:
5136 		/*
5137 		 * "Possible persistent" errors to which we have applied a valid
5138 		 * leaky test can be separated into "persistent" or "leaky".
5139 		 */
5140 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5141 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5142 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5143 			    CE_XDIAG_CE2SEEN(lkyinfo))
5144 				disp = CE_DISP_LEAKY;
5145 			else
5146 				disp = CE_DISP_PERS;
5147 		}
5148 		break;
5149 
5150 	case CE_DISP_POSS_STICKY:
5151 		/*
5152 		 * Promote "possible sticky" results that have been
5153 		 * confirmed by a partner test to "sticky".  Unconfirmed
5154 		 * "possible sticky" events are left at that status - we do not
5155 		 * guess at any bad reader/writer etc status here.
5156 		 */
5157 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5158 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5159 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5160 			disp = CE_DISP_STICKY;
5161 
5162 		/*
5163 		 * Promote "possible sticky" results on a uniprocessor
5164 		 * to "sticky"
5165 		 */
5166 		if (disp == CE_DISP_POSS_STICKY &&
5167 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5168 			disp = CE_DISP_STICKY;
5169 		break;
5170 
5171 	default:
5172 		disp = CE_DISP_UNKNOWN;
5173 		break;
5174 	}
5175 
5176 	return (cetypes[disp]);
5177 }
5178 
5179 /*
5180  * Given the entire afsr, the specific bit to check and a prioritized list of
5181  * error bits, determine the validity of the various overwrite priority
5182  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5183  * different overwrite priorities.
5184  *
5185  * Given a specific afsr error bit and the entire afsr, there are three cases:
5186  *   INVALID:	The specified bit is lower overwrite priority than some other
5187  *		error bit which is on in the afsr (or IVU/IVC).
5188  *   VALID:	The specified bit is higher priority than all other error bits
5189  *		which are on in the afsr.
5190  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5191  *		bit is on in the afsr.
5192  */
5193 int
5194 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5195 {
5196 	uint64_t afsr_ow;
5197 
5198 	while ((afsr_ow = *ow_bits++) != 0) {
5199 		/*
5200 		 * If bit is in the priority class, check to see if another
5201 		 * bit in the same class is on => ambiguous.  Otherwise,
5202 		 * the value is valid.  If the bit is not on at this priority
5203 		 * class, but a higher priority bit is on, then the value is
5204 		 * invalid.
5205 		 */
5206 		if (afsr_ow & afsr_bit) {
5207 			/*
5208 			 * If equal pri bit is on, ambiguous.
5209 			 */
5210 			if (afsr & (afsr_ow & ~afsr_bit))
5211 				return (AFLT_STAT_AMBIGUOUS);
5212 			return (AFLT_STAT_VALID);
5213 		} else if (afsr & afsr_ow)
5214 			break;
5215 	}
5216 
5217 	/*
5218 	 * We didn't find a match or a higher priority bit was on.  Not
5219 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5220 	 */
5221 	return (AFLT_STAT_INVALID);
5222 }
5223 
5224 static int
5225 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5226 {
5227 #if defined(SERRANO)
5228 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5229 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5230 	else
5231 #endif	/* SERRANO */
5232 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5233 }
5234 
5235 static int
5236 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5237 {
5238 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5239 }
5240 
5241 static int
5242 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5243 {
5244 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5245 }
5246 
5247 static int
5248 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5249 {
5250 #ifdef lint
5251 	cpuid = cpuid;
5252 #endif
5253 #if defined(CHEETAH_PLUS)
5254 	/*
5255 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5256 	 * policy for Cheetah+ and separate for Panther CPUs.
5257 	 */
5258 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5259 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5260 			return (afsr_to_msynd_status(afsr, afsr_bit));
5261 		else
5262 			return (afsr_to_esynd_status(afsr, afsr_bit));
5263 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5264 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5265 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5266 		else
5267 			return (afsr_to_esynd_status(afsr, afsr_bit));
5268 #else /* CHEETAH_PLUS */
5269 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5270 		return (afsr_to_msynd_status(afsr, afsr_bit));
5271 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5272 		return (afsr_to_esynd_status(afsr, afsr_bit));
5273 #endif /* CHEETAH_PLUS */
5274 	} else {
5275 		return (AFLT_STAT_INVALID);
5276 	}
5277 }
5278 
5279 /*
5280  * Slave CPU stick synchronization.
5281  */
5282 void
5283 sticksync_slave(void)
5284 {
5285 	int 		i;
5286 	int		tries = 0;
5287 	int64_t		tskew;
5288 	int64_t		av_tskew;
5289 
5290 	kpreempt_disable();
5291 	/* wait for the master side */
5292 	while (stick_sync_cmd != SLAVE_START)
5293 		;
5294 	/*
5295 	 * Synchronization should only take a few tries at most. But in the
5296 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5297 	 * without it's stick synchronized wouldn't be a good citizen.
5298 	 */
5299 	while (slave_done == 0) {
5300 		/*
5301 		 * Time skew calculation.
5302 		 */
5303 		av_tskew = tskew = 0;
5304 
5305 		for (i = 0; i < stick_iter; i++) {
5306 			/* make location hot */
5307 			timestamp[EV_A_START] = 0;
5308 			stick_timestamp(&timestamp[EV_A_START]);
5309 
5310 			/* tell the master we're ready */
5311 			stick_sync_cmd = MASTER_START;
5312 
5313 			/* and wait */
5314 			while (stick_sync_cmd != SLAVE_CONT)
5315 				;
5316 			/* Event B end */
5317 			stick_timestamp(&timestamp[EV_B_END]);
5318 
5319 			/* calculate time skew */
5320 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5321 				- (timestamp[EV_A_END] -
5322 				timestamp[EV_A_START])) / 2;
5323 
5324 			/* keep running count */
5325 			av_tskew += tskew;
5326 		} /* for */
5327 
5328 		/*
5329 		 * Adjust stick for time skew if not within the max allowed;
5330 		 * otherwise we're all done.
5331 		 */
5332 		if (stick_iter != 0)
5333 			av_tskew = av_tskew/stick_iter;
5334 		if (ABS(av_tskew) > stick_tsk) {
5335 			/*
5336 			 * If the skew is 1 (the slave's STICK register
5337 			 * is 1 STICK ahead of the master's), stick_adj
5338 			 * could fail to adjust the slave's STICK register
5339 			 * if the STICK read on the slave happens to
5340 			 * align with the increment of the STICK.
5341 			 * Therefore, we increment the skew to 2.
5342 			 */
5343 			if (av_tskew == 1)
5344 				av_tskew++;
5345 			stick_adj(-av_tskew);
5346 		} else
5347 			slave_done = 1;
5348 #ifdef DEBUG
5349 		if (tries < DSYNC_ATTEMPTS)
5350 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5351 				av_tskew;
5352 		++tries;
5353 #endif /* DEBUG */
5354 #ifdef lint
5355 		tries = tries;
5356 #endif
5357 
5358 	} /* while */
5359 
5360 	/* allow the master to finish */
5361 	stick_sync_cmd = EVENT_NULL;
5362 	kpreempt_enable();
5363 }
5364 
5365 /*
5366  * Master CPU side of stick synchronization.
5367  *  - timestamp end of Event A
5368  *  - timestamp beginning of Event B
5369  */
5370 void
5371 sticksync_master(void)
5372 {
5373 	int		i;
5374 
5375 	kpreempt_disable();
5376 	/* tell the slave we've started */
5377 	slave_done = 0;
5378 	stick_sync_cmd = SLAVE_START;
5379 
5380 	while (slave_done == 0) {
5381 		for (i = 0; i < stick_iter; i++) {
5382 			/* wait for the slave */
5383 			while (stick_sync_cmd != MASTER_START)
5384 				;
5385 			/* Event A end */
5386 			stick_timestamp(&timestamp[EV_A_END]);
5387 
5388 			/* make location hot */
5389 			timestamp[EV_B_START] = 0;
5390 			stick_timestamp(&timestamp[EV_B_START]);
5391 
5392 			/* tell the slave to continue */
5393 			stick_sync_cmd = SLAVE_CONT;
5394 		} /* for */
5395 
5396 		/* wait while slave calculates time skew */
5397 		while (stick_sync_cmd == SLAVE_CONT)
5398 			;
5399 	} /* while */
5400 	kpreempt_enable();
5401 }
5402 
5403 /*
5404  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5405  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5406  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5407  * panic idle.
5408  */
5409 /*ARGSUSED*/
5410 void
5411 cpu_check_allcpus(struct async_flt *aflt)
5412 {}
5413 
5414 struct kmem_cache *ch_private_cache;
5415 
5416 /*
5417  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5418  * deallocate the scrubber data structures and cpu_private data structure.
5419  */
5420 void
5421 cpu_uninit_private(struct cpu *cp)
5422 {
5423 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5424 
5425 	ASSERT(chprp);
5426 	cpu_uninit_ecache_scrub_dr(cp);
5427 	CPU_PRIVATE(cp) = NULL;
5428 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5429 	kmem_cache_free(ch_private_cache, chprp);
5430 	cmp_delete_cpu(cp->cpu_id);
5431 
5432 }
5433 
5434 /*
5435  * Cheetah Cache Scrubbing
5436  *
5437  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5438  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5439  * protected by either parity or ECC.
5440  *
5441  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5442  * cache per second). Due to the the specifics of how the I$ control
5443  * logic works with respect to the ASI used to scrub I$ lines, the entire
5444  * I$ is scanned at once.
5445  */
5446 
5447 /*
5448  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5449  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5450  * on a running system.
5451  */
5452 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5453 
5454 /*
5455  * The following are the PIL levels that the softints/cross traps will fire at.
5456  */
5457 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5458 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5459 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5460 
5461 #if defined(JALAPENO)
5462 
5463 /*
5464  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5465  * on Jalapeno.
5466  */
5467 int ecache_scrub_enable = 0;
5468 
5469 #else	/* JALAPENO */
5470 
5471 /*
5472  * With all other cpu types, E$ scrubbing is on by default
5473  */
5474 int ecache_scrub_enable = 1;
5475 
5476 #endif	/* JALAPENO */
5477 
5478 
5479 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5480 
5481 /*
5482  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5483  * is disabled by default on non-Cheetah systems
5484  */
5485 int icache_scrub_enable = 0;
5486 
5487 /*
5488  * Tuneables specifying the scrub calls per second and the scan rate
5489  * for each cache
5490  *
5491  * The cyclic times are set during boot based on the following values.
5492  * Changing these values in mdb after this time will have no effect.  If
5493  * a different value is desired, it must be set in /etc/system before a
5494  * reboot.
5495  */
5496 int ecache_calls_a_sec = 1;
5497 int dcache_calls_a_sec = 2;
5498 int icache_calls_a_sec = 2;
5499 
5500 int ecache_scan_rate_idle = 1;
5501 int ecache_scan_rate_busy = 1;
5502 int dcache_scan_rate_idle = 1;
5503 int dcache_scan_rate_busy = 1;
5504 int icache_scan_rate_idle = 1;
5505 int icache_scan_rate_busy = 1;
5506 
5507 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5508 
5509 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5510 
5511 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5512 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5513 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5514 
5515 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5516 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5517 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5518 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5519 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5520 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5521 
5522 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5523 
5524 /*
5525  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5526  * increment the outstanding request counter and schedule a softint to run
5527  * the scrubber.
5528  */
5529 extern xcfunc_t cache_scrubreq_tl1;
5530 
5531 /*
5532  * These are the softint functions for each cache scrubber
5533  */
5534 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5535 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5536 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5537 
5538 /*
5539  * The cache scrub info table contains cache specific information
5540  * and allows for some of the scrub code to be table driven, reducing
5541  * duplication of cache similar code.
5542  *
5543  * This table keeps a copy of the value in the calls per second variable
5544  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5545  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5546  * mdb in a misguided attempt to disable the scrubber).
5547  */
5548 struct scrub_info {
5549 	int		*csi_enable;	/* scrubber enable flag */
5550 	int		csi_freq;	/* scrubber calls per second */
5551 	int		csi_index;	/* index to chsm_outstanding[] */
5552 	uint64_t	csi_inum;	/* scrubber interrupt number */
5553 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5554 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5555 	char		csi_name[3];	/* cache name for this scrub entry */
5556 } cache_scrub_info[] = {
5557 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5558 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5559 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5560 };
5561 
5562 /*
5563  * If scrubbing is enabled, increment the outstanding request counter.  If it
5564  * is 1 (meaning there were no previous requests outstanding), call
5565  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5566  * a self trap.
5567  */
5568 static void
5569 do_scrub(struct scrub_info *csi)
5570 {
5571 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5572 	int index = csi->csi_index;
5573 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5574 
5575 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5576 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5577 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5578 			    csi->csi_inum, 0);
5579 		}
5580 	}
5581 }
5582 
5583 /*
5584  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5585  * cross-trap the offline cpus.
5586  */
5587 static void
5588 do_scrub_offline(struct scrub_info *csi)
5589 {
5590 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5591 
5592 	if (CPUSET_ISNULL(cpu_offline_set)) {
5593 		/*
5594 		 * No offline cpus - nothing to do
5595 		 */
5596 		return;
5597 	}
5598 
5599 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5600 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5601 		    csi->csi_index);
5602 	}
5603 }
5604 
5605 /*
5606  * This is the initial setup for the scrubber cyclics - it sets the
5607  * interrupt level, frequency, and function to call.
5608  */
5609 /*ARGSUSED*/
5610 static void
5611 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5612     cyc_time_t *when)
5613 {
5614 	struct scrub_info *csi = (struct scrub_info *)arg;
5615 
5616 	ASSERT(csi != NULL);
5617 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5618 	hdlr->cyh_level = CY_LOW_LEVEL;
5619 	hdlr->cyh_arg = arg;
5620 
5621 	when->cyt_when = 0;	/* Start immediately */
5622 	when->cyt_interval = NANOSEC / csi->csi_freq;
5623 }
5624 
5625 /*
5626  * Initialization for cache scrubbing.
5627  * This routine is called AFTER all cpus have had cpu_init_private called
5628  * to initialize their private data areas.
5629  */
5630 void
5631 cpu_init_cache_scrub(void)
5632 {
5633 	int i;
5634 	struct scrub_info *csi;
5635 	cyc_omni_handler_t omni_hdlr;
5636 	cyc_handler_t offline_hdlr;
5637 	cyc_time_t when;
5638 
5639 	/*
5640 	 * save away the maximum number of lines for the D$
5641 	 */
5642 	dcache_nlines = dcache_size / dcache_linesize;
5643 
5644 	/*
5645 	 * register the softints for the cache scrubbing
5646 	 */
5647 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5648 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5649 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5650 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5651 
5652 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5653 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5654 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5655 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5656 
5657 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5658 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5659 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5660 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5661 
5662 	/*
5663 	 * start the scrubbing for all the caches
5664 	 */
5665 	mutex_enter(&cpu_lock);
5666 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5667 
5668 		csi = &cache_scrub_info[i];
5669 
5670 		if (!(*csi->csi_enable))
5671 			continue;
5672 
5673 		/*
5674 		 * force the following to be true:
5675 		 *	1 <= calls_a_sec <= hz
5676 		 */
5677 		if (csi->csi_freq > hz) {
5678 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5679 				"(%d); resetting to hz (%d)", csi->csi_name,
5680 				csi->csi_freq, hz);
5681 			csi->csi_freq = hz;
5682 		} else if (csi->csi_freq < 1) {
5683 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5684 				"(%d); resetting to 1", csi->csi_name,
5685 				csi->csi_freq);
5686 			csi->csi_freq = 1;
5687 		}
5688 
5689 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5690 		omni_hdlr.cyo_offline = NULL;
5691 		omni_hdlr.cyo_arg = (void *)csi;
5692 
5693 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5694 		offline_hdlr.cyh_arg = (void *)csi;
5695 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5696 
5697 		when.cyt_when = 0;	/* Start immediately */
5698 		when.cyt_interval = NANOSEC / csi->csi_freq;
5699 
5700 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5701 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5702 	}
5703 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5704 	mutex_exit(&cpu_lock);
5705 }
5706 
5707 /*
5708  * Indicate that the specified cpu is idle.
5709  */
5710 void
5711 cpu_idle_ecache_scrub(struct cpu *cp)
5712 {
5713 	if (CPU_PRIVATE(cp) != NULL) {
5714 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5715 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5716 	}
5717 }
5718 
5719 /*
5720  * Indicate that the specified cpu is busy.
5721  */
5722 void
5723 cpu_busy_ecache_scrub(struct cpu *cp)
5724 {
5725 	if (CPU_PRIVATE(cp) != NULL) {
5726 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5727 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5728 	}
5729 }
5730 
5731 /*
5732  * Initialization for cache scrubbing for the specified cpu.
5733  */
5734 void
5735 cpu_init_ecache_scrub_dr(struct cpu *cp)
5736 {
5737 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5738 	int cpuid = cp->cpu_id;
5739 
5740 	/* initialize the number of lines in the caches */
5741 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5742 	    cpunodes[cpuid].ecache_linesize;
5743 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5744 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5745 
5746 	/*
5747 	 * do_scrub() and do_scrub_offline() check both the global
5748 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5749 	 * check this value before scrubbing.  Currently, we use it to
5750 	 * disable the E$ scrubber on multi-core cpus or while running at
5751 	 * slowed speed.  For now, just turn everything on and allow
5752 	 * cpu_init_private() to change it if necessary.
5753 	 */
5754 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5755 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5756 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5757 
5758 	cpu_busy_ecache_scrub(cp);
5759 }
5760 
5761 /*
5762  * Un-initialization for cache scrubbing for the specified cpu.
5763  */
5764 static void
5765 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5766 {
5767 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5768 
5769 	/*
5770 	 * un-initialize bookkeeping for cache scrubbing
5771 	 */
5772 	bzero(csmp, sizeof (ch_scrub_misc_t));
5773 
5774 	cpu_idle_ecache_scrub(cp);
5775 }
5776 
5777 /*
5778  * Called periodically on each CPU to scrub the D$.
5779  */
5780 static void
5781 scrub_dcache(int how_many)
5782 {
5783 	int i;
5784 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5785 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5786 
5787 	/*
5788 	 * scrub the desired number of lines
5789 	 */
5790 	for (i = 0; i < how_many; i++) {
5791 		/*
5792 		 * scrub a D$ line
5793 		 */
5794 		dcache_inval_line(index);
5795 
5796 		/*
5797 		 * calculate the next D$ line to scrub, assumes
5798 		 * that dcache_nlines is a power of 2
5799 		 */
5800 		index = (index + 1) & (dcache_nlines - 1);
5801 	}
5802 
5803 	/*
5804 	 * set the scrub index for the next visit
5805 	 */
5806 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5807 }
5808 
5809 /*
5810  * Handler for D$ scrub inum softint. Call scrub_dcache until
5811  * we decrement the outstanding request count to zero.
5812  */
5813 /*ARGSUSED*/
5814 static uint_t
5815 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5816 {
5817 	int i;
5818 	int how_many;
5819 	int outstanding;
5820 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5821 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5822 	struct scrub_info *csi = (struct scrub_info *)arg1;
5823 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5824 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5825 
5826 	/*
5827 	 * The scan rates are expressed in units of tenths of a
5828 	 * percent.  A scan rate of 1000 (100%) means the whole
5829 	 * cache is scanned every second.
5830 	 */
5831 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5832 
5833 	do {
5834 		outstanding = *countp;
5835 		for (i = 0; i < outstanding; i++) {
5836 			scrub_dcache(how_many);
5837 		}
5838 	} while (atomic_add_32_nv(countp, -outstanding));
5839 
5840 	return (DDI_INTR_CLAIMED);
5841 }
5842 
5843 /*
5844  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5845  * by invalidating lines. Due to the characteristics of the ASI which
5846  * is used to invalidate an I$ line, the entire I$ must be invalidated
5847  * vs. an individual I$ line.
5848  */
5849 static void
5850 scrub_icache(int how_many)
5851 {
5852 	int i;
5853 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5854 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5855 	int icache_nlines = csmp->chsm_icache_nlines;
5856 
5857 	/*
5858 	 * scrub the desired number of lines
5859 	 */
5860 	for (i = 0; i < how_many; i++) {
5861 		/*
5862 		 * since the entire I$ must be scrubbed at once,
5863 		 * wait until the index wraps to zero to invalidate
5864 		 * the entire I$
5865 		 */
5866 		if (index == 0) {
5867 			icache_inval_all();
5868 		}
5869 
5870 		/*
5871 		 * calculate the next I$ line to scrub, assumes
5872 		 * that chsm_icache_nlines is a power of 2
5873 		 */
5874 		index = (index + 1) & (icache_nlines - 1);
5875 	}
5876 
5877 	/*
5878 	 * set the scrub index for the next visit
5879 	 */
5880 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5881 }
5882 
5883 /*
5884  * Handler for I$ scrub inum softint. Call scrub_icache until
5885  * we decrement the outstanding request count to zero.
5886  */
5887 /*ARGSUSED*/
5888 static uint_t
5889 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5890 {
5891 	int i;
5892 	int how_many;
5893 	int outstanding;
5894 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5895 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5896 	struct scrub_info *csi = (struct scrub_info *)arg1;
5897 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5898 	    icache_scan_rate_idle : icache_scan_rate_busy;
5899 	int icache_nlines = csmp->chsm_icache_nlines;
5900 
5901 	/*
5902 	 * The scan rates are expressed in units of tenths of a
5903 	 * percent.  A scan rate of 1000 (100%) means the whole
5904 	 * cache is scanned every second.
5905 	 */
5906 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5907 
5908 	do {
5909 		outstanding = *countp;
5910 		for (i = 0; i < outstanding; i++) {
5911 			scrub_icache(how_many);
5912 		}
5913 	} while (atomic_add_32_nv(countp, -outstanding));
5914 
5915 	return (DDI_INTR_CLAIMED);
5916 }
5917 
5918 /*
5919  * Called periodically on each CPU to scrub the E$.
5920  */
5921 static void
5922 scrub_ecache(int how_many)
5923 {
5924 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5925 	int i;
5926 	int cpuid = CPU->cpu_id;
5927 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5928 	int nlines = csmp->chsm_ecache_nlines;
5929 	int linesize = cpunodes[cpuid].ecache_linesize;
5930 	int ec_set_size = cpu_ecache_set_size(CPU);
5931 
5932 	/*
5933 	 * scrub the desired number of lines
5934 	 */
5935 	for (i = 0; i < how_many; i++) {
5936 		/*
5937 		 * scrub the E$ line
5938 		 */
5939 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5940 		    ec_set_size);
5941 
5942 		/*
5943 		 * calculate the next E$ line to scrub based on twice
5944 		 * the number of E$ lines (to displace lines containing
5945 		 * flush area data), assumes that the number of lines
5946 		 * is a power of 2
5947 		 */
5948 		index = (index + 1) & ((nlines << 1) - 1);
5949 	}
5950 
5951 	/*
5952 	 * set the ecache scrub index for the next visit
5953 	 */
5954 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5955 }
5956 
5957 /*
5958  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5959  * we decrement the outstanding request count to zero.
5960  *
5961  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5962  * become negative after the atomic_add_32_nv().  This is not a problem, as
5963  * the next trip around the loop won't scrub anything, and the next add will
5964  * reset the count back to zero.
5965  */
5966 /*ARGSUSED*/
5967 static uint_t
5968 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5969 {
5970 	int i;
5971 	int how_many;
5972 	int outstanding;
5973 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5974 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5975 	struct scrub_info *csi = (struct scrub_info *)arg1;
5976 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5977 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5978 	int ecache_nlines = csmp->chsm_ecache_nlines;
5979 
5980 	/*
5981 	 * The scan rates are expressed in units of tenths of a
5982 	 * percent.  A scan rate of 1000 (100%) means the whole
5983 	 * cache is scanned every second.
5984 	 */
5985 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5986 
5987 	do {
5988 		outstanding = *countp;
5989 		for (i = 0; i < outstanding; i++) {
5990 			scrub_ecache(how_many);
5991 		}
5992 	} while (atomic_add_32_nv(countp, -outstanding));
5993 
5994 	return (DDI_INTR_CLAIMED);
5995 }
5996 
5997 /*
5998  * Timeout function to reenable CE
5999  */
6000 static void
6001 cpu_delayed_check_ce_errors(void *arg)
6002 {
6003 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6004 	    TQ_NOSLEEP)) {
6005 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6006 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6007 	}
6008 }
6009 
6010 /*
6011  * CE Deferred Re-enable after trap.
6012  *
6013  * When the CPU gets a disrupting trap for any of the errors
6014  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6015  * immediately. To eliminate the possibility of multiple CEs causing
6016  * recursive stack overflow in the trap handler, we cannot
6017  * reenable CEEN while still running in the trap handler. Instead,
6018  * after a CE is logged on a CPU, we schedule a timeout function,
6019  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6020  * seconds. This function will check whether any further CEs
6021  * have occurred on that CPU, and if none have, will reenable CEEN.
6022  *
6023  * If further CEs have occurred while CEEN is disabled, another
6024  * timeout will be scheduled. This is to ensure that the CPU can
6025  * make progress in the face of CE 'storms', and that it does not
6026  * spend all its time logging CE errors.
6027  */
6028 static void
6029 cpu_check_ce_errors(void *arg)
6030 {
6031 	int	cpuid = (int)(uintptr_t)arg;
6032 	cpu_t	*cp;
6033 
6034 	/*
6035 	 * We acquire cpu_lock.
6036 	 */
6037 	ASSERT(curthread->t_pil == 0);
6038 
6039 	/*
6040 	 * verify that the cpu is still around, DR
6041 	 * could have got there first ...
6042 	 */
6043 	mutex_enter(&cpu_lock);
6044 	cp = cpu_get(cpuid);
6045 	if (cp == NULL) {
6046 		mutex_exit(&cpu_lock);
6047 		return;
6048 	}
6049 	/*
6050 	 * make sure we don't migrate across CPUs
6051 	 * while checking our CE status.
6052 	 */
6053 	kpreempt_disable();
6054 
6055 	/*
6056 	 * If we are running on the CPU that got the
6057 	 * CE, we can do the checks directly.
6058 	 */
6059 	if (cp->cpu_id == CPU->cpu_id) {
6060 		mutex_exit(&cpu_lock);
6061 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6062 		kpreempt_enable();
6063 		return;
6064 	}
6065 	kpreempt_enable();
6066 
6067 	/*
6068 	 * send an x-call to get the CPU that originally
6069 	 * got the CE to do the necessary checks. If we can't
6070 	 * send the x-call, reschedule the timeout, otherwise we
6071 	 * lose CEEN forever on that CPU.
6072 	 */
6073 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6074 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6075 		    TIMEOUT_CEEN_CHECK, 0);
6076 		mutex_exit(&cpu_lock);
6077 	} else {
6078 		/*
6079 		 * When the CPU is not accepting xcalls, or
6080 		 * the processor is offlined, we don't want to
6081 		 * incur the extra overhead of trying to schedule the
6082 		 * CE timeout indefinitely. However, we don't want to lose
6083 		 * CE checking forever.
6084 		 *
6085 		 * Keep rescheduling the timeout, accepting the additional
6086 		 * overhead as the cost of correctness in the case where we get
6087 		 * a CE, disable CEEN, offline the CPU during the
6088 		 * the timeout interval, and then online it at some
6089 		 * point in the future. This is unlikely given the short
6090 		 * cpu_ceen_delay_secs.
6091 		 */
6092 		mutex_exit(&cpu_lock);
6093 		(void) timeout(cpu_delayed_check_ce_errors,
6094 		    (void *)(uintptr_t)cp->cpu_id,
6095 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6096 	}
6097 }
6098 
6099 /*
6100  * This routine will check whether CEs have occurred while
6101  * CEEN is disabled. Any CEs detected will be logged and, if
6102  * possible, scrubbed.
6103  *
6104  * The memscrubber will also use this routine to clear any errors
6105  * caused by its scrubbing with CEEN disabled.
6106  *
6107  * flag == SCRUBBER_CEEN_CHECK
6108  *		called from memscrubber, just check/scrub, no reset
6109  *		paddr 	physical addr. for start of scrub pages
6110  *		vaddr 	virtual addr. for scrub area
6111  *		psz	page size of area to be scrubbed
6112  *
6113  * flag == TIMEOUT_CEEN_CHECK
6114  *		timeout function has triggered, reset timeout or CEEN
6115  *
6116  * Note: We must not migrate cpus during this function.  This can be
6117  * achieved by one of:
6118  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6119  *	The flag value must be first xcall argument.
6120  *    - disabling kernel preemption.  This should be done for very short
6121  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6122  *	scrub an extended area with cpu_check_block.  The call for
6123  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6124  *	brief for this case.
6125  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6126  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6127  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6128  */
6129 void
6130 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6131 {
6132 	ch_cpu_errors_t	cpu_error_regs;
6133 	uint64_t	ec_err_enable;
6134 	uint64_t	page_offset;
6135 
6136 	/* Read AFSR */
6137 	get_cpu_error_state(&cpu_error_regs);
6138 
6139 	/*
6140 	 * If no CEEN errors have occurred during the timeout
6141 	 * interval, it is safe to re-enable CEEN and exit.
6142 	 */
6143 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
6144 		if (flag == TIMEOUT_CEEN_CHECK &&
6145 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6146 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6147 		return;
6148 	}
6149 
6150 	/*
6151 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6152 	 * we log/clear the error.
6153 	 */
6154 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6155 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6156 
6157 	/*
6158 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6159 	 * timeout will be rescheduled when the error is logged.
6160 	 */
6161 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
6162 	    cpu_ce_detected(&cpu_error_regs,
6163 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6164 	else
6165 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6166 
6167 	/*
6168 	 * If the memory scrubber runs while CEEN is
6169 	 * disabled, (or if CEEN is disabled during the
6170 	 * scrub as a result of a CE being triggered by
6171 	 * it), the range being scrubbed will not be
6172 	 * completely cleaned. If there are multiple CEs
6173 	 * in the range at most two of these will be dealt
6174 	 * with, (one by the trap handler and one by the
6175 	 * timeout). It is also possible that none are dealt
6176 	 * with, (CEEN disabled and another CE occurs before
6177 	 * the timeout triggers). So to ensure that the
6178 	 * memory is actually scrubbed, we have to access each
6179 	 * memory location in the range and then check whether
6180 	 * that access causes a CE.
6181 	 */
6182 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6183 		if ((cpu_error_regs.afar >= pa) &&
6184 		    (cpu_error_regs.afar < (pa + psz))) {
6185 			/*
6186 			 * Force a load from physical memory for each
6187 			 * 64-byte block, then check AFSR to determine
6188 			 * whether this access caused an error.
6189 			 *
6190 			 * This is a slow way to do a scrub, but as it will
6191 			 * only be invoked when the memory scrubber actually
6192 			 * triggered a CE, it should not happen too
6193 			 * frequently.
6194 			 *
6195 			 * cut down what we need to check as the scrubber
6196 			 * has verified up to AFAR, so get it's offset
6197 			 * into the page and start there.
6198 			 */
6199 			page_offset = (uint64_t)(cpu_error_regs.afar &
6200 			    (psz - 1));
6201 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6202 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6203 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6204 			    psz);
6205 		}
6206 	}
6207 
6208 	/*
6209 	 * Reset error enable if this CE is not masked.
6210 	 */
6211 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6212 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6213 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
6214 
6215 }
6216 
6217 /*
6218  * Attempt a cpu logout for an error that we did not trap for, such
6219  * as a CE noticed with CEEN off.  It is assumed that we are still running
6220  * on the cpu that took the error and that we cannot migrate.  Returns
6221  * 0 on success, otherwise nonzero.
6222  */
6223 static int
6224 cpu_ce_delayed_ec_logout(uint64_t afar)
6225 {
6226 	ch_cpu_logout_t *clop;
6227 
6228 	if (CPU_PRIVATE(CPU) == NULL)
6229 		return (0);
6230 
6231 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6232 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6233 	    LOGOUT_INVALID)
6234 		return (0);
6235 
6236 	cpu_delayed_logout(afar, clop);
6237 	return (1);
6238 }
6239 
6240 /*
6241  * We got an error while CEEN was disabled. We
6242  * need to clean up after it and log whatever
6243  * information we have on the CE.
6244  */
6245 void
6246 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6247 {
6248 	ch_async_flt_t 	ch_flt;
6249 	struct async_flt *aflt;
6250 	char 		pr_reason[MAX_REASON_STRING];
6251 
6252 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6253 	ch_flt.flt_trapped_ce = flag;
6254 	aflt = (struct async_flt *)&ch_flt;
6255 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6256 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6257 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6258 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6259 	aflt->flt_addr = cpu_error_regs->afar;
6260 #if defined(SERRANO)
6261 	ch_flt.afar2 = cpu_error_regs->afar2;
6262 #endif	/* SERRANO */
6263 	aflt->flt_pc = NULL;
6264 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6265 	aflt->flt_tl = 0;
6266 	aflt->flt_panic = 0;
6267 	cpu_log_and_clear_ce(&ch_flt);
6268 
6269 	/*
6270 	 * check if we caused any errors during cleanup
6271 	 */
6272 	if (clear_errors(&ch_flt)) {
6273 		pr_reason[0] = '\0';
6274 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6275 		    NULL);
6276 	}
6277 }
6278 
6279 /*
6280  * Log/clear CEEN-controlled disrupting errors
6281  */
6282 static void
6283 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6284 {
6285 	struct async_flt *aflt;
6286 	uint64_t afsr, afsr_errs;
6287 	ch_cpu_logout_t *clop;
6288 	char 		pr_reason[MAX_REASON_STRING];
6289 	on_trap_data_t	*otp = curthread->t_ontrap;
6290 
6291 	aflt = (struct async_flt *)ch_flt;
6292 	afsr = aflt->flt_stat;
6293 	afsr_errs = ch_flt->afsr_errs;
6294 	aflt->flt_id = gethrtime_waitfree();
6295 	aflt->flt_bus_id = getprocessorid();
6296 	aflt->flt_inst = CPU->cpu_id;
6297 	aflt->flt_prot = AFLT_PROT_NONE;
6298 	aflt->flt_class = CPU_FAULT;
6299 	aflt->flt_status = ECC_C_TRAP;
6300 
6301 	pr_reason[0] = '\0';
6302 	/*
6303 	 * Get the CPU log out info for Disrupting Trap.
6304 	 */
6305 	if (CPU_PRIVATE(CPU) == NULL) {
6306 		clop = NULL;
6307 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6308 	} else {
6309 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6310 	}
6311 
6312 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6313 		ch_cpu_errors_t cpu_error_regs;
6314 
6315 		get_cpu_error_state(&cpu_error_regs);
6316 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6317 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6318 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6319 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6320 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6321 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6322 		clop->clo_sdw_data.chd_afsr_ext =
6323 		    cpu_error_regs.shadow_afsr_ext;
6324 #if defined(SERRANO)
6325 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6326 #endif	/* SERRANO */
6327 		ch_flt->flt_data_incomplete = 1;
6328 
6329 		/*
6330 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6331 		 * The trap handler does it for CEEN enabled errors
6332 		 * so we need to do it here.
6333 		 */
6334 		set_cpu_error_state(&cpu_error_regs);
6335 	}
6336 
6337 #if defined(JALAPENO) || defined(SERRANO)
6338 	/*
6339 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6340 	 * For Serrano, even thou we do have the AFAR, we still do the
6341 	 * scrub on the RCE side since that's where the error type can
6342 	 * be properly classified as intermittent, persistent, etc.
6343 	 *
6344 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6345 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6346 	 * the flt_status bits.
6347 	 */
6348 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6349 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6350 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6351 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6352 	}
6353 #else /* JALAPENO || SERRANO */
6354 	/*
6355 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6356 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6357 	 * the flt_status bits.
6358 	 */
6359 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6360 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6361 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6362 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6363 		}
6364 	}
6365 
6366 #endif /* JALAPENO || SERRANO */
6367 
6368 	/*
6369 	 * Update flt_prot if this error occurred under on_trap protection.
6370 	 */
6371 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6372 		aflt->flt_prot = AFLT_PROT_EC;
6373 
6374 	/*
6375 	 * Queue events on the async event queue, one event per error bit.
6376 	 */
6377 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6378 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6379 		ch_flt->flt_type = CPU_INV_AFSR;
6380 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6381 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6382 		    aflt->flt_panic);
6383 	}
6384 
6385 	/*
6386 	 * Zero out + invalidate CPU logout.
6387 	 */
6388 	if (clop) {
6389 		bzero(clop, sizeof (ch_cpu_logout_t));
6390 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6391 	}
6392 
6393 	/*
6394 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6395 	 * was disabled, we need to flush either the entire
6396 	 * E$ or an E$ line.
6397 	 */
6398 #if defined(JALAPENO) || defined(SERRANO)
6399 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6400 #else	/* JALAPENO || SERRANO */
6401 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6402 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6403 #endif	/* JALAPENO || SERRANO */
6404 		cpu_error_ecache_flush(ch_flt);
6405 
6406 }
6407 
6408 /*
6409  * depending on the error type, we determine whether we
6410  * need to flush the entire ecache or just a line.
6411  */
6412 static int
6413 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6414 {
6415 	struct async_flt *aflt;
6416 	uint64_t	afsr;
6417 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6418 
6419 	aflt = (struct async_flt *)ch_flt;
6420 	afsr = aflt->flt_stat;
6421 
6422 	/*
6423 	 * If we got multiple errors, no point in trying
6424 	 * the individual cases, just flush the whole cache
6425 	 */
6426 	if (afsr & C_AFSR_ME) {
6427 		return (ECACHE_FLUSH_ALL);
6428 	}
6429 
6430 	/*
6431 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6432 	 * was disabled, we need to flush entire E$. We can't just
6433 	 * flush the cache line affected as the ME bit
6434 	 * is not set when multiple correctable errors of the same
6435 	 * type occur, so we might have multiple CPC or EDC errors,
6436 	 * with only the first recorded.
6437 	 */
6438 #if defined(JALAPENO) || defined(SERRANO)
6439 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6440 #else	/* JALAPENO || SERRANO */
6441 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6442 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6443 #endif	/* JALAPENO || SERRANO */
6444 		return (ECACHE_FLUSH_ALL);
6445 	}
6446 
6447 #if defined(JALAPENO) || defined(SERRANO)
6448 	/*
6449 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6450 	 * flush the entire Ecache.
6451 	 */
6452 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6453 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6454 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6455 			return (ECACHE_FLUSH_LINE);
6456 		} else {
6457 			return (ECACHE_FLUSH_ALL);
6458 		}
6459 	}
6460 #else /* JALAPENO || SERRANO */
6461 	/*
6462 	 * If UE only is set, flush the Ecache line, otherwise
6463 	 * flush the entire Ecache.
6464 	 */
6465 	if (afsr_errs & C_AFSR_UE) {
6466 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6467 		    C_AFSR_UE) {
6468 			return (ECACHE_FLUSH_LINE);
6469 		} else {
6470 			return (ECACHE_FLUSH_ALL);
6471 		}
6472 	}
6473 #endif /* JALAPENO || SERRANO */
6474 
6475 	/*
6476 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6477 	 * flush the entire Ecache.
6478 	 */
6479 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6480 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6481 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6482 			return (ECACHE_FLUSH_LINE);
6483 		} else {
6484 			return (ECACHE_FLUSH_ALL);
6485 		}
6486 	}
6487 
6488 	/*
6489 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6490 	 * flush the entire Ecache.
6491 	 */
6492 	if (afsr_errs & C_AFSR_BERR) {
6493 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6494 			return (ECACHE_FLUSH_LINE);
6495 		} else {
6496 			return (ECACHE_FLUSH_ALL);
6497 		}
6498 	}
6499 
6500 	return (0);
6501 }
6502 
6503 void
6504 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6505 {
6506 	int	ecache_flush_flag =
6507 	    cpu_error_ecache_flush_required(ch_flt);
6508 
6509 	/*
6510 	 * Flush Ecache line or entire Ecache based on above checks.
6511 	 */
6512 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6513 		cpu_flush_ecache();
6514 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6515 		cpu_flush_ecache_line(ch_flt);
6516 	}
6517 
6518 }
6519 
6520 /*
6521  * Extract the PA portion from the E$ tag.
6522  */
6523 uint64_t
6524 cpu_ectag_to_pa(int setsize, uint64_t tag)
6525 {
6526 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6527 		return (JG_ECTAG_TO_PA(setsize, tag));
6528 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6529 		return (PN_L3TAG_TO_PA(tag));
6530 	else
6531 		return (CH_ECTAG_TO_PA(setsize, tag));
6532 }
6533 
6534 /*
6535  * Convert the E$ tag PA into an E$ subblock index.
6536  */
6537 static int
6538 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6539 {
6540 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6541 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6542 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6543 		/* Panther has only one subblock per line */
6544 		return (0);
6545 	else
6546 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6547 }
6548 
6549 /*
6550  * All subblocks in an E$ line must be invalid for
6551  * the line to be invalid.
6552  */
6553 int
6554 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6555 {
6556 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6557 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6558 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6559 		return (PN_L3_LINE_INVALID(tag));
6560 	else
6561 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6562 }
6563 
6564 /*
6565  * Extract state bits for a subblock given the tag.  Note that for Panther
6566  * this works on both l2 and l3 tags.
6567  */
6568 static int
6569 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6570 {
6571 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6572 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6573 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6574 		return (tag & CH_ECSTATE_MASK);
6575 	else
6576 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6577 }
6578 
6579 /*
6580  * Cpu specific initialization.
6581  */
6582 void
6583 cpu_mp_init(void)
6584 {
6585 #ifdef	CHEETAHPLUS_ERRATUM_25
6586 	if (cheetah_sendmondo_recover) {
6587 		cheetah_nudge_init();
6588 	}
6589 #endif
6590 }
6591 
6592 void
6593 cpu_ereport_post(struct async_flt *aflt)
6594 {
6595 	char *cpu_type, buf[FM_MAX_CLASS];
6596 	nv_alloc_t *nva = NULL;
6597 	nvlist_t *ereport, *detector, *resource;
6598 	errorq_elem_t *eqep;
6599 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6600 	char unum[UNUM_NAMLEN];
6601 	int synd_code;
6602 	uint8_t msg_type;
6603 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6604 
6605 	if (aflt->flt_panic || panicstr) {
6606 		eqep = errorq_reserve(ereport_errorq);
6607 		if (eqep == NULL)
6608 			return;
6609 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6610 		nva = errorq_elem_nva(ereport_errorq, eqep);
6611 	} else {
6612 		ereport = fm_nvlist_create(nva);
6613 	}
6614 
6615 	/*
6616 	 * Create the scheme "cpu" FMRI.
6617 	 */
6618 	detector = fm_nvlist_create(nva);
6619 	resource = fm_nvlist_create(nva);
6620 	switch (cpunodes[aflt->flt_inst].implementation) {
6621 	case CHEETAH_IMPL:
6622 		cpu_type = FM_EREPORT_CPU_USIII;
6623 		break;
6624 	case CHEETAH_PLUS_IMPL:
6625 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6626 		break;
6627 	case JALAPENO_IMPL:
6628 		cpu_type = FM_EREPORT_CPU_USIIIi;
6629 		break;
6630 	case SERRANO_IMPL:
6631 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6632 		break;
6633 	case JAGUAR_IMPL:
6634 		cpu_type = FM_EREPORT_CPU_USIV;
6635 		break;
6636 	case PANTHER_IMPL:
6637 		cpu_type = FM_EREPORT_CPU_USIVplus;
6638 		break;
6639 	default:
6640 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6641 		break;
6642 	}
6643 
6644 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6645 
6646 	/*
6647 	 * Encode all the common data into the ereport.
6648 	 */
6649 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6650 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6651 
6652 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6653 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6654 	    detector, NULL);
6655 
6656 	/*
6657 	 * Encode the error specific data that was saved in
6658 	 * the async_flt structure into the ereport.
6659 	 */
6660 	cpu_payload_add_aflt(aflt, ereport, resource,
6661 	    &plat_ecc_ch_flt.ecaf_afar_status,
6662 	    &plat_ecc_ch_flt.ecaf_synd_status);
6663 
6664 	if (aflt->flt_panic || panicstr) {
6665 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6666 	} else {
6667 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6668 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6669 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6670 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6671 	}
6672 	/*
6673 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6674 	 * to the SC olny if it can process it.
6675 	 */
6676 
6677 	if (&plat_ecc_capability_sc_get &&
6678 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6679 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6680 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6681 			/*
6682 			 * If afar status is not invalid do a unum lookup.
6683 			 */
6684 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6685 			    AFLT_STAT_INVALID) {
6686 				synd_code = synd_to_synd_code(
6687 				    plat_ecc_ch_flt.ecaf_synd_status,
6688 				    aflt->flt_synd, ch_flt->flt_bit);
6689 				(void) cpu_get_mem_unum_synd(synd_code,
6690 				    aflt, unum);
6691 			} else {
6692 				unum[0] = '\0';
6693 			}
6694 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6695 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6696 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6697 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6698 			    ch_flt->flt_sdw_afsr_ext;
6699 
6700 			if (&plat_log_fruid_error2)
6701 				plat_log_fruid_error2(msg_type, unum, aflt,
6702 				    &plat_ecc_ch_flt);
6703 		}
6704 	}
6705 }
6706 
6707 void
6708 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6709 {
6710 	int status;
6711 	ddi_fm_error_t de;
6712 
6713 	bzero(&de, sizeof (ddi_fm_error_t));
6714 
6715 	de.fme_version = DDI_FME_VERSION;
6716 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6717 	    FM_ENA_FMT1);
6718 	de.fme_flag = expected;
6719 	de.fme_bus_specific = (void *)aflt->flt_addr;
6720 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6721 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6722 		aflt->flt_panic = 1;
6723 }
6724 
6725 void
6726 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6727     errorq_t *eqp, uint_t flag)
6728 {
6729 	struct async_flt *aflt = (struct async_flt *)payload;
6730 
6731 	aflt->flt_erpt_class = error_class;
6732 	errorq_dispatch(eqp, payload, payload_sz, flag);
6733 }
6734 
6735 /*
6736  * This routine may be called by the IO module, but does not do
6737  * anything in this cpu module. The SERD algorithm is handled by
6738  * cpumem-diagnosis engine instead.
6739  */
6740 /*ARGSUSED*/
6741 void
6742 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6743 {}
6744 
6745 void
6746 adjust_hw_copy_limits(int ecache_size)
6747 {
6748 	/*
6749 	 * Set hw copy limits.
6750 	 *
6751 	 * /etc/system will be parsed later and can override one or more
6752 	 * of these settings.
6753 	 *
6754 	 * At this time, ecache size seems only mildly relevant.
6755 	 * We seem to run into issues with the d-cache and stalls
6756 	 * we see on misses.
6757 	 *
6758 	 * Cycle measurement indicates that 2 byte aligned copies fare
6759 	 * little better than doing things with VIS at around 512 bytes.
6760 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6761 	 * aligned is faster whenever the source and destination data
6762 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6763 	 * limit seems to be driven by the 2K write cache.
6764 	 * When more than 2K of copies are done in non-VIS mode, stores
6765 	 * backup in the write cache.  In VIS mode, the write cache is
6766 	 * bypassed, allowing faster cache-line writes aligned on cache
6767 	 * boundaries.
6768 	 *
6769 	 * In addition, in non-VIS mode, there is no prefetching, so
6770 	 * for larger copies, the advantage of prefetching to avoid even
6771 	 * occasional cache misses is enough to justify using the VIS code.
6772 	 *
6773 	 * During testing, it was discovered that netbench ran 3% slower
6774 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6775 	 * applications, data is only used once (copied to the output
6776 	 * buffer, then copied by the network device off the system).  Using
6777 	 * the VIS copy saves more L2 cache state.  Network copies are
6778 	 * around 1.3K to 1.5K in size for historical reasons.
6779 	 *
6780 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6781 	 * aligned copy even for large caches and 8 MB ecache.  The
6782 	 * infrastructure to allow different limits for different sized
6783 	 * caches is kept to allow further tuning in later releases.
6784 	 */
6785 
6786 	if (min_ecache_size == 0 && use_hw_bcopy) {
6787 		/*
6788 		 * First time through - should be before /etc/system
6789 		 * is read.
6790 		 * Could skip the checks for zero but this lets us
6791 		 * preserve any debugger rewrites.
6792 		 */
6793 		if (hw_copy_limit_1 == 0) {
6794 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6795 			priv_hcl_1 = hw_copy_limit_1;
6796 		}
6797 		if (hw_copy_limit_2 == 0) {
6798 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6799 			priv_hcl_2 = hw_copy_limit_2;
6800 		}
6801 		if (hw_copy_limit_4 == 0) {
6802 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6803 			priv_hcl_4 = hw_copy_limit_4;
6804 		}
6805 		if (hw_copy_limit_8 == 0) {
6806 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6807 			priv_hcl_8 = hw_copy_limit_8;
6808 		}
6809 		min_ecache_size = ecache_size;
6810 	} else {
6811 		/*
6812 		 * MP initialization. Called *after* /etc/system has
6813 		 * been parsed. One CPU has already been initialized.
6814 		 * Need to cater for /etc/system having scragged one
6815 		 * of our values.
6816 		 */
6817 		if (ecache_size == min_ecache_size) {
6818 			/*
6819 			 * Same size ecache. We do nothing unless we
6820 			 * have a pessimistic ecache setting. In that
6821 			 * case we become more optimistic (if the cache is
6822 			 * large enough).
6823 			 */
6824 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6825 				/*
6826 				 * Need to adjust hw_copy_limit* from our
6827 				 * pessimistic uniprocessor value to a more
6828 				 * optimistic UP value *iff* it hasn't been
6829 				 * reset.
6830 				 */
6831 				if ((ecache_size > 1048576) &&
6832 				    (priv_hcl_8 == hw_copy_limit_8)) {
6833 					if (ecache_size <= 2097152)
6834 						hw_copy_limit_8 = 4 *
6835 						    VIS_COPY_THRESHOLD;
6836 					else if (ecache_size <= 4194304)
6837 						hw_copy_limit_8 = 4 *
6838 						    VIS_COPY_THRESHOLD;
6839 					else
6840 						hw_copy_limit_8 = 4 *
6841 						    VIS_COPY_THRESHOLD;
6842 					priv_hcl_8 = hw_copy_limit_8;
6843 				}
6844 			}
6845 		} else if (ecache_size < min_ecache_size) {
6846 			/*
6847 			 * A different ecache size. Can this even happen?
6848 			 */
6849 			if (priv_hcl_8 == hw_copy_limit_8) {
6850 				/*
6851 				 * The previous value that we set
6852 				 * is unchanged (i.e., it hasn't been
6853 				 * scragged by /etc/system). Rewrite it.
6854 				 */
6855 				if (ecache_size <= 1048576)
6856 					hw_copy_limit_8 = 8 *
6857 					    VIS_COPY_THRESHOLD;
6858 				else if (ecache_size <= 2097152)
6859 					hw_copy_limit_8 = 8 *
6860 					    VIS_COPY_THRESHOLD;
6861 				else if (ecache_size <= 4194304)
6862 					hw_copy_limit_8 = 8 *
6863 					    VIS_COPY_THRESHOLD;
6864 				else
6865 					hw_copy_limit_8 = 10 *
6866 					    VIS_COPY_THRESHOLD;
6867 				priv_hcl_8 = hw_copy_limit_8;
6868 				min_ecache_size = ecache_size;
6869 			}
6870 		}
6871 	}
6872 }
6873 
6874 /*
6875  * Called from illegal instruction trap handler to see if we can attribute
6876  * the trap to a fpras check.
6877  */
6878 int
6879 fpras_chktrap(struct regs *rp)
6880 {
6881 	int op;
6882 	struct fpras_chkfngrp *cgp;
6883 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6884 
6885 	if (fpras_chkfngrps == NULL)
6886 		return (0);
6887 
6888 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6889 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6890 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6891 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6892 			break;
6893 	}
6894 	if (op == FPRAS_NCOPYOPS)
6895 		return (0);
6896 
6897 	/*
6898 	 * This is an fpRAS failure caught through an illegal
6899 	 * instruction - trampoline.
6900 	 */
6901 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6902 	rp->r_npc = rp->r_pc + 4;
6903 	return (1);
6904 }
6905 
6906 /*
6907  * fpras_failure is called when a fpras check detects a bad calculation
6908  * result or an illegal instruction trap is attributed to an fpras
6909  * check.  In all cases we are still bound to CPU.
6910  */
6911 int
6912 fpras_failure(int op, int how)
6913 {
6914 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6915 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6916 	ch_async_flt_t ch_flt;
6917 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6918 	struct fpras_chkfn *sfp, *cfp;
6919 	uint32_t *sip, *cip;
6920 	int i;
6921 
6922 	/*
6923 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6924 	 * the time in which we dispatch an ereport and (if applicable) panic.
6925 	 */
6926 	use_hw_bcopy_orig = use_hw_bcopy;
6927 	use_hw_bzero_orig = use_hw_bzero;
6928 	hcl1_orig = hw_copy_limit_1;
6929 	hcl2_orig = hw_copy_limit_2;
6930 	hcl4_orig = hw_copy_limit_4;
6931 	hcl8_orig = hw_copy_limit_8;
6932 	use_hw_bcopy = use_hw_bzero = 0;
6933 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6934 	    hw_copy_limit_8 = 0;
6935 
6936 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6937 	aflt->flt_id = gethrtime_waitfree();
6938 	aflt->flt_class = CPU_FAULT;
6939 	aflt->flt_inst = CPU->cpu_id;
6940 	aflt->flt_status = (how << 8) | op;
6941 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6942 	ch_flt.flt_type = CPU_FPUERR;
6943 
6944 	/*
6945 	 * We must panic if the copy operation had no lofault protection -
6946 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6947 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6948 	 */
6949 	aflt->flt_panic = (curthread->t_lofault == NULL);
6950 
6951 	/*
6952 	 * XOR the source instruction block with the copied instruction
6953 	 * block - this will show us which bit(s) are corrupted.
6954 	 */
6955 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6956 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6957 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6958 		sip = &sfp->fpras_blk0[0];
6959 		cip = &cfp->fpras_blk0[0];
6960 	} else {
6961 		sip = &sfp->fpras_blk1[0];
6962 		cip = &cfp->fpras_blk1[0];
6963 	}
6964 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6965 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6966 
6967 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6968 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6969 
6970 	if (aflt->flt_panic)
6971 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6972 
6973 	/*
6974 	 * We get here for copyin/copyout and kcopy or bcopy where the
6975 	 * caller has used on_fault.  We will flag the error so that
6976 	 * the process may be killed  The trap_async_hwerr mechanism will
6977 	 * take appropriate further action (such as a reboot, contract
6978 	 * notification etc).  Since we may be continuing we will
6979 	 * restore the global hardware copy acceleration switches.
6980 	 *
6981 	 * When we return from this function to the copy function we want to
6982 	 * avoid potentially bad data being used, ie we want the affected
6983 	 * copy function to return an error.  The caller should therefore
6984 	 * invoke its lofault handler (which always exists for these functions)
6985 	 * which will return the appropriate error.
6986 	 */
6987 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6988 	aston(curthread);
6989 
6990 	use_hw_bcopy = use_hw_bcopy_orig;
6991 	use_hw_bzero = use_hw_bzero_orig;
6992 	hw_copy_limit_1 = hcl1_orig;
6993 	hw_copy_limit_2 = hcl2_orig;
6994 	hw_copy_limit_4 = hcl4_orig;
6995 	hw_copy_limit_8 = hcl8_orig;
6996 
6997 	return (1);
6998 }
6999 
7000 #define	VIS_BLOCKSIZE		64
7001 
7002 int
7003 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7004 {
7005 	int ret, watched;
7006 
7007 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7008 	ret = dtrace_blksuword32(addr, data, 0);
7009 	if (watched)
7010 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7011 
7012 	return (ret);
7013 }
7014 
7015 /*
7016  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7017  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7018  * CEEN from the EER to disable traps for further disrupting error types
7019  * on that cpu.  We could cross-call instead, but that has a larger
7020  * instruction and data footprint than cross-trapping, and the cpu is known
7021  * to be faulted.
7022  */
7023 
7024 void
7025 cpu_faulted_enter(struct cpu *cp)
7026 {
7027 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7028 }
7029 
7030 /*
7031  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7032  * offline, spare, or online (by the cpu requesting this state change).
7033  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7034  * disrupting error bits that have accumulated without trapping, then
7035  * we cross-trap to re-enable CEEN controlled traps.
7036  */
7037 void
7038 cpu_faulted_exit(struct cpu *cp)
7039 {
7040 	ch_cpu_errors_t cpu_error_regs;
7041 
7042 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7043 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7044 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7045 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7046 	    (uint64_t)&cpu_error_regs, 0);
7047 
7048 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7049 }
7050 
7051 /*
7052  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7053  * the errors in the original AFSR, 0 otherwise.
7054  *
7055  * For all procs if the initial error was a BERR or TO, then it is possible
7056  * that we may have caused a secondary BERR or TO in the process of logging the
7057  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7058  * if the request was protected then a panic is still not necessary, if not
7059  * protected then aft_panic is already set - so either way there's no need
7060  * to set aft_panic for the secondary error.
7061  *
7062  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7063  * a store merge, then the error handling code will call cpu_deferred_error().
7064  * When clear_errors() is called, it will determine that secondary errors have
7065  * occurred - in particular, the store merge also caused a EDU and WDU that
7066  * weren't discovered until this point.
7067  *
7068  * We do three checks to verify that we are in this case.  If we pass all three
7069  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7070  * errors occur, we return 0.
7071  *
7072  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7073  * handled in cpu_disrupting_errors().  Since this function is not even called
7074  * in the case we are interested in, we just return 0 for these processors.
7075  */
7076 /*ARGSUSED*/
7077 static int
7078 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7079     uint64_t t_afar)
7080 {
7081 #if defined(CHEETAH_PLUS)
7082 #else	/* CHEETAH_PLUS */
7083 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7084 #endif	/* CHEETAH_PLUS */
7085 
7086 	/*
7087 	 * Was the original error a BERR or TO and only a BERR or TO
7088 	 * (multiple errors are also OK)
7089 	 */
7090 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7091 		/*
7092 		 * Is the new error a BERR or TO and only a BERR or TO
7093 		 * (multiple errors are also OK)
7094 		 */
7095 		if ((ch_flt->afsr_errs &
7096 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7097 			return (1);
7098 	}
7099 
7100 #if defined(CHEETAH_PLUS)
7101 	return (0);
7102 #else	/* CHEETAH_PLUS */
7103 	/*
7104 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7105 	 *
7106 	 * Check the original error was a UE, and only a UE.  Note that
7107 	 * the ME bit will cause us to fail this check.
7108 	 */
7109 	if (t_afsr_errs != C_AFSR_UE)
7110 		return (0);
7111 
7112 	/*
7113 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7114 	 */
7115 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7116 		return (0);
7117 
7118 	/*
7119 	 * Check the AFAR of the original error and secondary errors
7120 	 * match to the 64-byte boundary
7121 	 */
7122 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7123 		return (0);
7124 
7125 	/*
7126 	 * We've passed all the checks, so it's a secondary error!
7127 	 */
7128 	return (1);
7129 #endif	/* CHEETAH_PLUS */
7130 }
7131 
7132 /*
7133  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7134  * is checked for any valid errors.  If found, the error type is
7135  * returned. If not found, the flt_type is checked for L1$ parity errors.
7136  */
7137 /*ARGSUSED*/
7138 static uint8_t
7139 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7140 {
7141 #if defined(JALAPENO)
7142 	/*
7143 	 * Currently, logging errors to the SC is not supported on Jalapeno
7144 	 */
7145 	return (PLAT_ECC_ERROR2_NONE);
7146 #else
7147 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7148 
7149 	switch (ch_flt->flt_bit) {
7150 	case C_AFSR_CE:
7151 		return (PLAT_ECC_ERROR2_CE);
7152 	case C_AFSR_UCC:
7153 	case C_AFSR_EDC:
7154 	case C_AFSR_WDC:
7155 	case C_AFSR_CPC:
7156 		return (PLAT_ECC_ERROR2_L2_CE);
7157 	case C_AFSR_EMC:
7158 		return (PLAT_ECC_ERROR2_EMC);
7159 	case C_AFSR_IVC:
7160 		return (PLAT_ECC_ERROR2_IVC);
7161 	case C_AFSR_UE:
7162 		return (PLAT_ECC_ERROR2_UE);
7163 	case C_AFSR_UCU:
7164 	case C_AFSR_EDU:
7165 	case C_AFSR_WDU:
7166 	case C_AFSR_CPU:
7167 		return (PLAT_ECC_ERROR2_L2_UE);
7168 	case C_AFSR_IVU:
7169 		return (PLAT_ECC_ERROR2_IVU);
7170 	case C_AFSR_TO:
7171 		return (PLAT_ECC_ERROR2_TO);
7172 	case C_AFSR_BERR:
7173 		return (PLAT_ECC_ERROR2_BERR);
7174 #if defined(CHEETAH_PLUS)
7175 	case C_AFSR_L3_EDC:
7176 	case C_AFSR_L3_UCC:
7177 	case C_AFSR_L3_CPC:
7178 	case C_AFSR_L3_WDC:
7179 		return (PLAT_ECC_ERROR2_L3_CE);
7180 	case C_AFSR_IMC:
7181 		return (PLAT_ECC_ERROR2_IMC);
7182 	case C_AFSR_TSCE:
7183 		return (PLAT_ECC_ERROR2_L2_TSCE);
7184 	case C_AFSR_THCE:
7185 		return (PLAT_ECC_ERROR2_L2_THCE);
7186 	case C_AFSR_L3_MECC:
7187 		return (PLAT_ECC_ERROR2_L3_MECC);
7188 	case C_AFSR_L3_THCE:
7189 		return (PLAT_ECC_ERROR2_L3_THCE);
7190 	case C_AFSR_L3_CPU:
7191 	case C_AFSR_L3_EDU:
7192 	case C_AFSR_L3_UCU:
7193 	case C_AFSR_L3_WDU:
7194 		return (PLAT_ECC_ERROR2_L3_UE);
7195 	case C_AFSR_DUE:
7196 		return (PLAT_ECC_ERROR2_DUE);
7197 	case C_AFSR_DTO:
7198 		return (PLAT_ECC_ERROR2_DTO);
7199 	case C_AFSR_DBERR:
7200 		return (PLAT_ECC_ERROR2_DBERR);
7201 #endif	/* CHEETAH_PLUS */
7202 	default:
7203 		switch (ch_flt->flt_type) {
7204 #if defined(CPU_IMP_L1_CACHE_PARITY)
7205 		case CPU_IC_PARITY:
7206 			return (PLAT_ECC_ERROR2_IPE);
7207 		case CPU_DC_PARITY:
7208 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7209 				if (ch_flt->parity_data.dpe.cpl_cache ==
7210 				    CPU_PC_PARITY) {
7211 					return (PLAT_ECC_ERROR2_PCACHE);
7212 				}
7213 			}
7214 			return (PLAT_ECC_ERROR2_DPE);
7215 #endif /* CPU_IMP_L1_CACHE_PARITY */
7216 		case CPU_ITLB_PARITY:
7217 			return (PLAT_ECC_ERROR2_ITLB);
7218 		case CPU_DTLB_PARITY:
7219 			return (PLAT_ECC_ERROR2_DTLB);
7220 		default:
7221 			return (PLAT_ECC_ERROR2_NONE);
7222 		}
7223 	}
7224 #endif	/* JALAPENO */
7225 }
7226