xref: /titanic_41/usr/src/uts/sun4u/cpu/us3_common.c (revision 864221ad7169608e293fbeaa9df563afc9f345a0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 #include <sys/pghw.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
120     uint64_t t_afsr_bit);
121 static int clear_ecc(struct async_flt *ecc);
122 #if defined(CPU_IMP_ECACHE_ASSOC)
123 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
124 #endif
125 static int cpu_ecache_set_size(struct cpu *cp);
126 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
128 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
129 static int cpu_ectag_pa_to_subblk_state(int cachesize,
130 				uint64_t subaddr, uint64_t tag);
131 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
132 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
134 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
136 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
137 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
138 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
139 static void cpu_scrubphys(struct async_flt *aflt);
140 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
141     int *, int *);
142 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
143 static void cpu_ereport_init(struct async_flt *aflt);
144 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
145 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
146 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
147     uint64_t nceen, ch_cpu_logout_t *clop);
148 static int cpu_ce_delayed_ec_logout(uint64_t);
149 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
150 static int cpu_error_is_ecache_data(int, uint64_t);
151 static void cpu_fmri_cpu_set(nvlist_t *, int);
152 static int cpu_error_to_resource_type(struct async_flt *aflt);
153 
154 #ifdef	CHEETAHPLUS_ERRATUM_25
155 static int mondo_recover_proc(uint16_t, int);
156 static void cheetah_nudge_init(void);
157 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
158     cyc_time_t *when);
159 static void cheetah_nudge_buddy(void);
160 #endif	/* CHEETAHPLUS_ERRATUM_25 */
161 
162 #if defined(CPU_IMP_L1_CACHE_PARITY)
163 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
166     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
167 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
168 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
169 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
170 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
171 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
172 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
173 #endif	/* CPU_IMP_L1_CACHE_PARITY */
174 
175 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
176     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
177     int *segsp, int *banksp, int *mcidp);
178 
179 /*
180  * This table is used to determine which bit(s) is(are) bad when an ECC
181  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
182  * of this array have the following semantics:
183  *
184  *      00-127  The number of the bad bit, when only one bit is bad.
185  *      128     ECC bit C0 is bad.
186  *      129     ECC bit C1 is bad.
187  *      130     ECC bit C2 is bad.
188  *      131     ECC bit C3 is bad.
189  *      132     ECC bit C4 is bad.
190  *      133     ECC bit C5 is bad.
191  *      134     ECC bit C6 is bad.
192  *      135     ECC bit C7 is bad.
193  *      136     ECC bit C8 is bad.
194  *	137-143 reserved for Mtag Data and ECC.
195  *      144(M2) Two bits are bad within a nibble.
196  *      145(M3) Three bits are bad within a nibble.
197  *      146(M3) Four bits are bad within a nibble.
198  *      147(M)  Multiple bits (5 or more) are bad.
199  *      148     NO bits are bad.
200  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
201  */
202 
203 #define	C0	128
204 #define	C1	129
205 #define	C2	130
206 #define	C3	131
207 #define	C4	132
208 #define	C5	133
209 #define	C6	134
210 #define	C7	135
211 #define	C8	136
212 #define	MT0	137	/* Mtag Data bit 0 */
213 #define	MT1	138
214 #define	MT2	139
215 #define	MTC0	140	/* Mtag Check bit 0 */
216 #define	MTC1	141
217 #define	MTC2	142
218 #define	MTC3	143
219 #define	M2	144
220 #define	M3	145
221 #define	M4	146
222 #define	M	147
223 #define	NA	148
224 #if defined(JALAPENO) || defined(SERRANO)
225 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
226 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
227 #define	SLAST	S003MEM	/* last special syndrome */
228 #else /* JALAPENO || SERRANO */
229 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
230 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
231 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
232 #define	SLAST	S11C	/* last special syndrome */
233 #endif /* JALAPENO || SERRANO */
234 #if defined(JALAPENO) || defined(SERRANO)
235 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
236 #define	BPAR15	167
237 #endif	/* JALAPENO || SERRANO */
238 
239 static uint8_t ecc_syndrome_tab[] =
240 {
241 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
242 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
243 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
244 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
245 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
246 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
247 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
248 #if defined(JALAPENO) || defined(SERRANO)
249 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
250 #else	/* JALAPENO || SERRANO */
251 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
252 #endif	/* JALAPENO || SERRANO */
253 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
254 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
255 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
256 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
257 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
258 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
259 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
260 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
261 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
262 #if defined(JALAPENO) || defined(SERRANO)
263 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
264 #else	/* JALAPENO || SERRANO */
265 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
266 #endif	/* JALAPENO || SERRANO */
267 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
268 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
269 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
270 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
271 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
272 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
273 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
274 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
275 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
276 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
277 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
278 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
279 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
280 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
281 };
282 
283 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
284 
285 #if !(defined(JALAPENO) || defined(SERRANO))
286 /*
287  * This table is used to determine which bit(s) is(are) bad when a Mtag
288  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
289  * of this array have the following semantics:
290  *
291  *      -1	Invalid mtag syndrome.
292  *      137     Mtag Data 0 is bad.
293  *      138     Mtag Data 1 is bad.
294  *      139     Mtag Data 2 is bad.
295  *      140     Mtag ECC 0 is bad.
296  *      141     Mtag ECC 1 is bad.
297  *      142     Mtag ECC 2 is bad.
298  *      143     Mtag ECC 3 is bad.
299  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
300  */
301 short mtag_syndrome_tab[] =
302 {
303 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
304 };
305 
306 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
307 
308 #else /* !(JALAPENO || SERRANO) */
309 
310 #define	BSYND_TBL_SIZE	16
311 
312 #endif /* !(JALAPENO || SERRANO) */
313 
314 /*
315  * Types returned from cpu_error_to_resource_type()
316  */
317 #define	ERRTYPE_UNKNOWN		0
318 #define	ERRTYPE_CPU		1
319 #define	ERRTYPE_MEMORY		2
320 #define	ERRTYPE_ECACHE_DATA	3
321 
322 /*
323  * CE initial classification and subsequent action lookup table
324  */
325 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
326 static int ce_disp_inited;
327 
328 /*
329  * Set to disable leaky and partner check for memory correctables
330  */
331 int ce_xdiag_off;
332 
333 /*
334  * The following are not incremented atomically so are indicative only
335  */
336 static int ce_xdiag_drops;
337 static int ce_xdiag_lkydrops;
338 static int ce_xdiag_ptnrdrops;
339 static int ce_xdiag_bad;
340 
341 /*
342  * CE leaky check callback structure
343  */
344 typedef struct {
345 	struct async_flt *lkycb_aflt;
346 	errorq_t *lkycb_eqp;
347 	errorq_elem_t *lkycb_eqep;
348 } ce_lkychk_cb_t;
349 
350 /*
351  * defines for various ecache_flush_flag's
352  */
353 #define	ECACHE_FLUSH_LINE	1
354 #define	ECACHE_FLUSH_ALL	2
355 
356 /*
357  * STICK sync
358  */
359 #define	STICK_ITERATION 10
360 #define	MAX_TSKEW	1
361 #define	EV_A_START	0
362 #define	EV_A_END	1
363 #define	EV_B_START	2
364 #define	EV_B_END	3
365 #define	EVENTS		4
366 
367 static int64_t stick_iter = STICK_ITERATION;
368 static int64_t stick_tsk = MAX_TSKEW;
369 
370 typedef enum {
371 	EVENT_NULL = 0,
372 	SLAVE_START,
373 	SLAVE_CONT,
374 	MASTER_START
375 } event_cmd_t;
376 
377 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
378 static int64_t timestamp[EVENTS];
379 static volatile int slave_done;
380 
381 #ifdef DEBUG
382 #define	DSYNC_ATTEMPTS 64
383 typedef struct {
384 	int64_t	skew_val[DSYNC_ATTEMPTS];
385 } ss_t;
386 
387 ss_t stick_sync_stats[NCPU];
388 #endif /* DEBUG */
389 
390 uint_t cpu_impl_dual_pgsz = 0;
391 #if defined(CPU_IMP_DUAL_PAGESIZE)
392 uint_t disable_dual_pgsz = 0;
393 #endif	/* CPU_IMP_DUAL_PAGESIZE */
394 
395 /*
396  * Save the cache bootup state for use when internal
397  * caches are to be re-enabled after an error occurs.
398  */
399 uint64_t cache_boot_state;
400 
401 /*
402  * PA[22:0] represent Displacement in Safari configuration space.
403  */
404 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
405 
406 bus_config_eclk_t bus_config_eclk[] = {
407 #if defined(JALAPENO) || defined(SERRANO)
408 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
409 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
410 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
411 #else /* JALAPENO || SERRANO */
412 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
413 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
414 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
415 #endif /* JALAPENO || SERRANO */
416 	{0, 0}
417 };
418 
419 /*
420  * Interval for deferred CEEN reenable
421  */
422 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
423 
424 /*
425  * set in /etc/system to control logging of user BERR/TO's
426  */
427 int cpu_berr_to_verbose = 0;
428 
429 /*
430  * set to 0 in /etc/system to defer CEEN reenable for all CEs
431  */
432 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
433 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
434 
435 /*
436  * Set of all offline cpus
437  */
438 cpuset_t cpu_offline_set;
439 
440 static void cpu_delayed_check_ce_errors(void *);
441 static void cpu_check_ce_errors(void *);
442 void cpu_error_ecache_flush(ch_async_flt_t *);
443 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
444 static void cpu_log_and_clear_ce(ch_async_flt_t *);
445 void cpu_ce_detected(ch_cpu_errors_t *, int);
446 
447 /*
448  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
449  * memory refresh interval of current DIMMs (64ms).  After initial fix that
450  * gives at least one full refresh cycle in which the cell can leak
451  * (whereafter further refreshes simply reinforce any incorrect bit value).
452  */
453 clock_t cpu_ce_lkychk_timeout_usec = 128000;
454 
455 /*
456  * CE partner check partner caching period in seconds
457  */
458 int cpu_ce_ptnr_cachetime_sec = 60;
459 
460 /*
461  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
462  */
463 #define	CH_SET_TRAP(ttentry, ttlabel)			\
464 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
465 		flush_instr_mem((caddr_t)&ttentry, 32);
466 
467 static int min_ecache_size;
468 static uint_t priv_hcl_1;
469 static uint_t priv_hcl_2;
470 static uint_t priv_hcl_4;
471 static uint_t priv_hcl_8;
472 
473 void
474 cpu_setup(void)
475 {
476 	extern int at_flags;
477 	extern int cpc_has_overflow_intr;
478 
479 	/*
480 	 * Setup chip-specific trap handlers.
481 	 */
482 	cpu_init_trap();
483 
484 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
485 
486 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
487 
488 	/*
489 	 * save the cache bootup state.
490 	 */
491 	cache_boot_state = get_dcu() & DCU_CACHE;
492 
493 	/*
494 	 * Due to the number of entries in the fully-associative tlb
495 	 * this may have to be tuned lower than in spitfire.
496 	 */
497 	pp_slots = MIN(8, MAXPP_SLOTS);
498 
499 	/*
500 	 * Block stores do not invalidate all pages of the d$, pagecopy
501 	 * et. al. need virtual translations with virtual coloring taken
502 	 * into consideration.  prefetch/ldd will pollute the d$ on the
503 	 * load side.
504 	 */
505 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
506 
507 	if (use_page_coloring) {
508 		do_pg_coloring = 1;
509 	}
510 
511 	isa_list =
512 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
513 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
514 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
515 
516 	/*
517 	 * On Panther-based machines, this should
518 	 * also include AV_SPARC_POPC too
519 	 */
520 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
521 
522 	/*
523 	 * On cheetah, there's no hole in the virtual address space
524 	 */
525 	hole_start = hole_end = 0;
526 
527 	/*
528 	 * The kpm mapping window.
529 	 * kpm_size:
530 	 *	The size of a single kpm range.
531 	 *	The overall size will be: kpm_size * vac_colors.
532 	 * kpm_vbase:
533 	 *	The virtual start address of the kpm range within the kernel
534 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
535 	 */
536 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
537 	kpm_size_shift = 43;
538 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
539 	kpm_smallpages = 1;
540 
541 	/*
542 	 * The traptrace code uses either %tick or %stick for
543 	 * timestamping.  We have %stick so we can use it.
544 	 */
545 	traptrace_use_stick = 1;
546 
547 	/*
548 	 * Cheetah has a performance counter overflow interrupt
549 	 */
550 	cpc_has_overflow_intr = 1;
551 
552 #if defined(CPU_IMP_DUAL_PAGESIZE)
553 	/*
554 	 * Use Cheetah+ and later dual page size support.
555 	 */
556 	if (!disable_dual_pgsz) {
557 		cpu_impl_dual_pgsz = 1;
558 	}
559 #endif	/* CPU_IMP_DUAL_PAGESIZE */
560 
561 	/*
562 	 * Declare that this architecture/cpu combination does fpRAS.
563 	 */
564 	fpras_implemented = 1;
565 
566 	/*
567 	 * Setup CE lookup table
568 	 */
569 	CE_INITDISPTBL_POPULATE(ce_disp_table);
570 	ce_disp_inited = 1;
571 }
572 
573 /*
574  * Called by setcpudelay
575  */
576 void
577 cpu_init_tick_freq(void)
578 {
579 	/*
580 	 * For UltraSPARC III and beyond we want to use the
581 	 * system clock rate as the basis for low level timing,
582 	 * due to support of mixed speed CPUs and power managment.
583 	 */
584 	if (system_clock_freq == 0)
585 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
586 
587 	sys_tick_freq = system_clock_freq;
588 }
589 
590 #ifdef CHEETAHPLUS_ERRATUM_25
591 /*
592  * Tunables
593  */
594 int cheetah_bpe_off = 0;
595 int cheetah_sendmondo_recover = 1;
596 int cheetah_sendmondo_fullscan = 0;
597 int cheetah_sendmondo_recover_delay = 5;
598 
599 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
600 
601 /*
602  * Recovery Statistics
603  */
604 typedef struct cheetah_livelock_entry	{
605 	int cpuid;		/* fallen cpu */
606 	int buddy;		/* cpu that ran recovery */
607 	clock_t lbolt;		/* when recovery started */
608 	hrtime_t recovery_time;	/* time spent in recovery */
609 } cheetah_livelock_entry_t;
610 
611 #define	CHEETAH_LIVELOCK_NENTRY	32
612 
613 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
614 int cheetah_livelock_entry_nxt;
615 
616 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
617 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
618 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
619 		cheetah_livelock_entry_nxt = 0;				\
620 	}								\
621 }
622 
623 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
624 
625 struct {
626 	hrtime_t hrt;		/* maximum recovery time */
627 	int recovery;		/* recovered */
628 	int full_claimed;	/* maximum pages claimed in full recovery */
629 	int proc_entry;		/* attempted to claim TSB */
630 	int proc_tsb_scan;	/* tsb scanned */
631 	int proc_tsb_partscan;	/* tsb partially scanned */
632 	int proc_tsb_fullscan;	/* whole tsb scanned */
633 	int proc_claimed;	/* maximum pages claimed in tsb scan */
634 	int proc_user;		/* user thread */
635 	int proc_kernel;	/* kernel thread */
636 	int proc_onflt;		/* bad stack */
637 	int proc_cpu;		/* null cpu */
638 	int proc_thread;	/* null thread */
639 	int proc_proc;		/* null proc */
640 	int proc_as;		/* null as */
641 	int proc_hat;		/* null hat */
642 	int proc_hat_inval;	/* hat contents don't make sense */
643 	int proc_hat_busy;	/* hat is changing TSBs */
644 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
645 	int proc_cnum_bad;	/* cnum out of range */
646 	int proc_cnum;		/* last cnum processed */
647 	tte_t proc_tte;		/* last tte processed */
648 } cheetah_livelock_stat;
649 
650 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
651 
652 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
653 	cheetah_livelock_stat.item = value
654 
655 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
656 	if (value > cheetah_livelock_stat.item)		\
657 		cheetah_livelock_stat.item = value;	\
658 }
659 
660 /*
661  * Attempt to recover a cpu by claiming every cache line as saved
662  * in the TSB that the non-responsive cpu is using. Since we can't
663  * grab any adaptive lock, this is at best an attempt to do so. Because
664  * we don't grab any locks, we must operate under the protection of
665  * on_fault().
666  *
667  * Return 1 if cpuid could be recovered, 0 if failed.
668  */
669 int
670 mondo_recover_proc(uint16_t cpuid, int bn)
671 {
672 	label_t ljb;
673 	cpu_t *cp;
674 	kthread_t *t;
675 	proc_t *p;
676 	struct as *as;
677 	struct hat *hat;
678 	uint_t  cnum;
679 	struct tsb_info *tsbinfop;
680 	struct tsbe *tsbep;
681 	caddr_t tsbp;
682 	caddr_t end_tsbp;
683 	uint64_t paddr;
684 	uint64_t idsr;
685 	u_longlong_t pahi, palo;
686 	int pages_claimed = 0;
687 	tte_t tsbe_tte;
688 	int tried_kernel_tsb = 0;
689 	mmu_ctx_t *mmu_ctxp;
690 
691 	CHEETAH_LIVELOCK_STAT(proc_entry);
692 
693 	if (on_fault(&ljb)) {
694 		CHEETAH_LIVELOCK_STAT(proc_onflt);
695 		goto badstruct;
696 	}
697 
698 	if ((cp = cpu[cpuid]) == NULL) {
699 		CHEETAH_LIVELOCK_STAT(proc_cpu);
700 		goto badstruct;
701 	}
702 
703 	if ((t = cp->cpu_thread) == NULL) {
704 		CHEETAH_LIVELOCK_STAT(proc_thread);
705 		goto badstruct;
706 	}
707 
708 	if ((p = ttoproc(t)) == NULL) {
709 		CHEETAH_LIVELOCK_STAT(proc_proc);
710 		goto badstruct;
711 	}
712 
713 	if ((as = p->p_as) == NULL) {
714 		CHEETAH_LIVELOCK_STAT(proc_as);
715 		goto badstruct;
716 	}
717 
718 	if ((hat = as->a_hat) == NULL) {
719 		CHEETAH_LIVELOCK_STAT(proc_hat);
720 		goto badstruct;
721 	}
722 
723 	if (hat != ksfmmup) {
724 		CHEETAH_LIVELOCK_STAT(proc_user);
725 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
726 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
727 			goto badstruct;
728 		}
729 		tsbinfop = hat->sfmmu_tsb;
730 		if (tsbinfop == NULL) {
731 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
732 			goto badstruct;
733 		}
734 		tsbp = tsbinfop->tsb_va;
735 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
736 	} else {
737 		CHEETAH_LIVELOCK_STAT(proc_kernel);
738 		tsbinfop = NULL;
739 		tsbp = ktsb_base;
740 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
741 	}
742 
743 	/* Verify as */
744 	if (hat->sfmmu_as != as) {
745 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
746 		goto badstruct;
747 	}
748 
749 	mmu_ctxp = CPU_MMU_CTXP(cp);
750 	ASSERT(mmu_ctxp);
751 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
752 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
753 
754 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
755 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
756 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
757 		goto badstruct;
758 	}
759 
760 	do {
761 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
762 
763 		/*
764 		 * Skip TSBs being relocated.  This is important because
765 		 * we want to avoid the following deadlock scenario:
766 		 *
767 		 * 1) when we came in we set ourselves to "in recover" state.
768 		 * 2) when we try to touch TSB being relocated the mapping
769 		 *    will be in the suspended state so we'll spin waiting
770 		 *    for it to be unlocked.
771 		 * 3) when the CPU that holds the TSB mapping locked tries to
772 		 *    unlock it it will send a xtrap which will fail to xcall
773 		 *    us or the CPU we're trying to recover, and will in turn
774 		 *    enter the mondo code.
775 		 * 4) since we are still spinning on the locked mapping
776 		 *    no further progress will be made and the system will
777 		 *    inevitably hard hang.
778 		 *
779 		 * A TSB not being relocated can't begin being relocated
780 		 * while we're accessing it because we check
781 		 * sendmondo_in_recover before relocating TSBs.
782 		 */
783 		if (hat != ksfmmup &&
784 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
785 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
786 			goto next_tsbinfo;
787 		}
788 
789 		for (tsbep = (struct tsbe *)tsbp;
790 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
791 			tsbe_tte = tsbep->tte_data;
792 
793 			if (tsbe_tte.tte_val == 0) {
794 				/*
795 				 * Invalid tte
796 				 */
797 				continue;
798 			}
799 			if (tsbe_tte.tte_se) {
800 				/*
801 				 * Don't want device registers
802 				 */
803 				continue;
804 			}
805 			if (tsbe_tte.tte_cp == 0) {
806 				/*
807 				 * Must be cached in E$
808 				 */
809 				continue;
810 			}
811 			if (tsbep->tte_tag.tag_invalid != 0) {
812 				/*
813 				 * Invalid tag, ingnore this entry.
814 				 */
815 				continue;
816 			}
817 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
818 			idsr = getidsr();
819 			if ((idsr & (IDSR_NACK_BIT(bn) |
820 			    IDSR_BUSY_BIT(bn))) == 0) {
821 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
822 				goto done;
823 			}
824 			pahi = tsbe_tte.tte_pahi;
825 			palo = tsbe_tte.tte_palo;
826 			paddr = (uint64_t)((pahi << 32) |
827 			    (palo << MMU_PAGESHIFT));
828 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
829 			    CH_ECACHE_SUBBLK_SIZE);
830 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
831 				shipit(cpuid, bn);
832 			}
833 			pages_claimed++;
834 		}
835 next_tsbinfo:
836 		if (tsbinfop != NULL)
837 			tsbinfop = tsbinfop->tsb_next;
838 		if (tsbinfop != NULL) {
839 			tsbp = tsbinfop->tsb_va;
840 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
841 		} else if (tsbp == ktsb_base) {
842 			tried_kernel_tsb = 1;
843 		} else if (!tried_kernel_tsb) {
844 			tsbp = ktsb_base;
845 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
846 			hat = ksfmmup;
847 			tsbinfop = NULL;
848 		}
849 	} while (tsbinfop != NULL ||
850 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
851 
852 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
853 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
854 	no_fault();
855 	idsr = getidsr();
856 	if ((idsr & (IDSR_NACK_BIT(bn) |
857 	    IDSR_BUSY_BIT(bn))) == 0) {
858 		return (1);
859 	} else {
860 		return (0);
861 	}
862 
863 done:
864 	no_fault();
865 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
866 	return (1);
867 
868 badstruct:
869 	no_fault();
870 	return (0);
871 }
872 
873 /*
874  * Attempt to claim ownership, temporarily, of every cache line that a
875  * non-responsive cpu might be using.  This might kick that cpu out of
876  * this state.
877  *
878  * The return value indicates to the caller if we have exhausted all recovery
879  * techniques. If 1 is returned, it is useless to call this function again
880  * even for a different target CPU.
881  */
882 int
883 mondo_recover(uint16_t cpuid, int bn)
884 {
885 	struct memseg *seg;
886 	uint64_t begin_pa, end_pa, cur_pa;
887 	hrtime_t begin_hrt, end_hrt;
888 	int retval = 0;
889 	int pages_claimed = 0;
890 	cheetah_livelock_entry_t *histp;
891 	uint64_t idsr;
892 
893 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
894 		/*
895 		 * Wait while recovery takes place
896 		 */
897 		while (sendmondo_in_recover) {
898 			drv_usecwait(1);
899 		}
900 		/*
901 		 * Assume we didn't claim the whole memory. If
902 		 * the target of this caller is not recovered,
903 		 * it will come back.
904 		 */
905 		return (retval);
906 	}
907 
908 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
909 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
910 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
911 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
912 
913 	begin_hrt = gethrtime_waitfree();
914 	/*
915 	 * First try to claim the lines in the TSB the target
916 	 * may have been using.
917 	 */
918 	if (mondo_recover_proc(cpuid, bn) == 1) {
919 		/*
920 		 * Didn't claim the whole memory
921 		 */
922 		goto done;
923 	}
924 
925 	/*
926 	 * We tried using the TSB. The target is still
927 	 * not recovered. Check if complete memory scan is
928 	 * enabled.
929 	 */
930 	if (cheetah_sendmondo_fullscan == 0) {
931 		/*
932 		 * Full memory scan is disabled.
933 		 */
934 		retval = 1;
935 		goto done;
936 	}
937 
938 	/*
939 	 * Try claiming the whole memory.
940 	 */
941 	for (seg = memsegs; seg; seg = seg->next) {
942 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
943 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
944 		for (cur_pa = begin_pa; cur_pa < end_pa;
945 		    cur_pa += MMU_PAGESIZE) {
946 			idsr = getidsr();
947 			if ((idsr & (IDSR_NACK_BIT(bn) |
948 			    IDSR_BUSY_BIT(bn))) == 0) {
949 				/*
950 				 * Didn't claim all memory
951 				 */
952 				goto done;
953 			}
954 			claimlines(cur_pa, MMU_PAGESIZE,
955 			    CH_ECACHE_SUBBLK_SIZE);
956 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
957 				shipit(cpuid, bn);
958 			}
959 			pages_claimed++;
960 		}
961 	}
962 
963 	/*
964 	 * We did all we could.
965 	 */
966 	retval = 1;
967 
968 done:
969 	/*
970 	 * Update statistics
971 	 */
972 	end_hrt = gethrtime_waitfree();
973 	CHEETAH_LIVELOCK_STAT(recovery);
974 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
975 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
976 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
977 	    (end_hrt -  begin_hrt));
978 
979 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
980 		;
981 
982 	return (retval);
983 }
984 
985 /*
986  * This is called by the cyclic framework when this CPU becomes online
987  */
988 /*ARGSUSED*/
989 static void
990 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
991 {
992 
993 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
994 	hdlr->cyh_level = CY_LOW_LEVEL;
995 	hdlr->cyh_arg = NULL;
996 
997 	/*
998 	 * Stagger the start time
999 	 */
1000 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1001 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1002 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1003 	}
1004 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1005 }
1006 
1007 /*
1008  * Create a low level cyclic to send a xtrap to the next cpu online.
1009  * However, there's no need to have this running on a uniprocessor system.
1010  */
1011 static void
1012 cheetah_nudge_init(void)
1013 {
1014 	cyc_omni_handler_t hdlr;
1015 
1016 	if (max_ncpus == 1) {
1017 		return;
1018 	}
1019 
1020 	hdlr.cyo_online = cheetah_nudge_onln;
1021 	hdlr.cyo_offline = NULL;
1022 	hdlr.cyo_arg = NULL;
1023 
1024 	mutex_enter(&cpu_lock);
1025 	(void) cyclic_add_omni(&hdlr);
1026 	mutex_exit(&cpu_lock);
1027 }
1028 
1029 /*
1030  * Cyclic handler to wake up buddy
1031  */
1032 void
1033 cheetah_nudge_buddy(void)
1034 {
1035 	/*
1036 	 * Disable kernel preemption to protect the cpu list
1037 	 */
1038 	kpreempt_disable();
1039 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1040 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1041 		    0, 0);
1042 	}
1043 	kpreempt_enable();
1044 }
1045 
1046 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1047 
1048 #ifdef SEND_MONDO_STATS
1049 uint32_t x_one_stimes[64];
1050 uint32_t x_one_ltimes[16];
1051 uint32_t x_set_stimes[64];
1052 uint32_t x_set_ltimes[16];
1053 uint32_t x_set_cpus[NCPU];
1054 uint32_t x_nack_stimes[64];
1055 #endif
1056 
1057 /*
1058  * Note: A version of this function is used by the debugger via the KDI,
1059  * and must be kept in sync with this version.  Any changes made to this
1060  * function to support new chips or to accomodate errata must also be included
1061  * in the KDI-specific version.  See us3_kdi.c.
1062  */
1063 void
1064 send_one_mondo(int cpuid)
1065 {
1066 	int busy, nack;
1067 	uint64_t idsr, starttick, endtick, tick, lasttick;
1068 	uint64_t busymask;
1069 #ifdef	CHEETAHPLUS_ERRATUM_25
1070 	int recovered = 0;
1071 #endif
1072 
1073 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1074 	starttick = lasttick = gettick();
1075 	shipit(cpuid, 0);
1076 	endtick = starttick + xc_tick_limit;
1077 	busy = nack = 0;
1078 #if defined(JALAPENO) || defined(SERRANO)
1079 	/*
1080 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1081 	 * will be used for dispatching interrupt. For now, assume
1082 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1083 	 * issues with respect to BUSY/NACK pair usage.
1084 	 */
1085 	busymask  = IDSR_BUSY_BIT(cpuid);
1086 #else /* JALAPENO || SERRANO */
1087 	busymask = IDSR_BUSY;
1088 #endif /* JALAPENO || SERRANO */
1089 	for (;;) {
1090 		idsr = getidsr();
1091 		if (idsr == 0)
1092 			break;
1093 
1094 		tick = gettick();
1095 		/*
1096 		 * If there is a big jump between the current tick
1097 		 * count and lasttick, we have probably hit a break
1098 		 * point.  Adjust endtick accordingly to avoid panic.
1099 		 */
1100 		if (tick > (lasttick + xc_tick_jump_limit))
1101 			endtick += (tick - lasttick);
1102 		lasttick = tick;
1103 		if (tick > endtick) {
1104 			if (panic_quiesce)
1105 				return;
1106 #ifdef	CHEETAHPLUS_ERRATUM_25
1107 			if (cheetah_sendmondo_recover && recovered == 0) {
1108 				if (mondo_recover(cpuid, 0)) {
1109 					/*
1110 					 * We claimed the whole memory or
1111 					 * full scan is disabled.
1112 					 */
1113 					recovered++;
1114 				}
1115 				tick = gettick();
1116 				endtick = tick + xc_tick_limit;
1117 				lasttick = tick;
1118 				/*
1119 				 * Recheck idsr
1120 				 */
1121 				continue;
1122 			} else
1123 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1124 			{
1125 				cmn_err(CE_PANIC, "send mondo timeout "
1126 				    "(target 0x%x) [%d NACK %d BUSY]",
1127 				    cpuid, nack, busy);
1128 			}
1129 		}
1130 
1131 		if (idsr & busymask) {
1132 			busy++;
1133 			continue;
1134 		}
1135 		drv_usecwait(1);
1136 		shipit(cpuid, 0);
1137 		nack++;
1138 		busy = 0;
1139 	}
1140 #ifdef SEND_MONDO_STATS
1141 	{
1142 		int n = gettick() - starttick;
1143 		if (n < 8192)
1144 			x_one_stimes[n >> 7]++;
1145 		else
1146 			x_one_ltimes[(n >> 13) & 0xf]++;
1147 	}
1148 #endif
1149 }
1150 
1151 void
1152 syncfpu(void)
1153 {
1154 }
1155 
1156 /*
1157  * Return processor specific async error structure
1158  * size used.
1159  */
1160 int
1161 cpu_aflt_size(void)
1162 {
1163 	return (sizeof (ch_async_flt_t));
1164 }
1165 
1166 /*
1167  * Tunable to disable the checking of other cpu logout areas during panic for
1168  * potential syndrome 71 generating errors.
1169  */
1170 int enable_check_other_cpus_logout = 1;
1171 
1172 /*
1173  * Check other cpus logout area for potential synd 71 generating
1174  * errors.
1175  */
1176 static void
1177 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1178     ch_cpu_logout_t *clop)
1179 {
1180 	struct async_flt *aflt;
1181 	ch_async_flt_t ch_flt;
1182 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1183 
1184 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1185 		return;
1186 	}
1187 
1188 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1189 
1190 	t_afar = clop->clo_data.chd_afar;
1191 	t_afsr = clop->clo_data.chd_afsr;
1192 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1193 #if defined(SERRANO)
1194 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1195 #endif	/* SERRANO */
1196 
1197 	/*
1198 	 * In order to simplify code, we maintain this afsr_errs
1199 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1200 	 * sticky bits.
1201 	 */
1202 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1203 	    (t_afsr & C_AFSR_ALL_ERRS);
1204 
1205 	/* Setup the async fault structure */
1206 	aflt = (struct async_flt *)&ch_flt;
1207 	aflt->flt_id = gethrtime_waitfree();
1208 	ch_flt.afsr_ext = t_afsr_ext;
1209 	ch_flt.afsr_errs = t_afsr_errs;
1210 	aflt->flt_stat = t_afsr;
1211 	aflt->flt_addr = t_afar;
1212 	aflt->flt_bus_id = cpuid;
1213 	aflt->flt_inst = cpuid;
1214 	aflt->flt_pc = tpc;
1215 	aflt->flt_prot = AFLT_PROT_NONE;
1216 	aflt->flt_class = CPU_FAULT;
1217 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1218 	aflt->flt_tl = tl;
1219 	aflt->flt_status = ecc_type;
1220 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1221 
1222 	/*
1223 	 * Queue events on the async event queue, one event per error bit.
1224 	 * If no events are queued, queue an event to complain.
1225 	 */
1226 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1227 		ch_flt.flt_type = CPU_INV_AFSR;
1228 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1229 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1230 		    aflt->flt_panic);
1231 	}
1232 
1233 	/*
1234 	 * Zero out + invalidate CPU logout.
1235 	 */
1236 	bzero(clop, sizeof (ch_cpu_logout_t));
1237 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1238 }
1239 
1240 /*
1241  * Check the logout areas of all other cpus for unlogged errors.
1242  */
1243 static void
1244 cpu_check_other_cpus_logout(void)
1245 {
1246 	int i, j;
1247 	processorid_t myid;
1248 	struct cpu *cp;
1249 	ch_err_tl1_data_t *cl1p;
1250 
1251 	myid = CPU->cpu_id;
1252 	for (i = 0; i < NCPU; i++) {
1253 		cp = cpu[i];
1254 
1255 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1256 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1257 			continue;
1258 		}
1259 
1260 		/*
1261 		 * Check each of the tl>0 logout areas
1262 		 */
1263 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1264 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1265 			if (cl1p->ch_err_tl1_flags == 0)
1266 				continue;
1267 
1268 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1269 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1270 		}
1271 
1272 		/*
1273 		 * Check each of the remaining logout areas
1274 		 */
1275 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1276 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1277 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1278 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1279 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1280 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1281 	}
1282 }
1283 
1284 /*
1285  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1286  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1287  * flush the error that caused the UCU/UCC, then again here at the end to
1288  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1289  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1290  * another Fast ECC trap.
1291  *
1292  * Cheetah+ also handles: TSCE: No additional processing required.
1293  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1294  *
1295  * Note that the p_clo_flags input is only valid in cases where the
1296  * cpu_private struct is not yet initialized (since that is the only
1297  * time that information cannot be obtained from the logout struct.)
1298  */
1299 /*ARGSUSED*/
1300 void
1301 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1302 {
1303 	ch_cpu_logout_t *clop;
1304 	uint64_t ceen, nceen;
1305 
1306 	/*
1307 	 * Get the CPU log out info. If we can't find our CPU private
1308 	 * pointer, then we will have to make due without any detailed
1309 	 * logout information.
1310 	 */
1311 	if (CPU_PRIVATE(CPU) == NULL) {
1312 		clop = NULL;
1313 		ceen = p_clo_flags & EN_REG_CEEN;
1314 		nceen = p_clo_flags & EN_REG_NCEEN;
1315 	} else {
1316 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1317 		ceen = clop->clo_flags & EN_REG_CEEN;
1318 		nceen = clop->clo_flags & EN_REG_NCEEN;
1319 	}
1320 
1321 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1322 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1323 }
1324 
1325 /*
1326  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1327  * ECC at TL>0.  Need to supply either a error register pointer or a
1328  * cpu logout structure pointer.
1329  */
1330 static void
1331 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1332     uint64_t nceen, ch_cpu_logout_t *clop)
1333 {
1334 	struct async_flt *aflt;
1335 	ch_async_flt_t ch_flt;
1336 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1337 	char pr_reason[MAX_REASON_STRING];
1338 	ch_cpu_errors_t cpu_error_regs;
1339 
1340 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1341 	/*
1342 	 * If no cpu logout data, then we will have to make due without
1343 	 * any detailed logout information.
1344 	 */
1345 	if (clop == NULL) {
1346 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1347 		get_cpu_error_state(&cpu_error_regs);
1348 		set_cpu_error_state(&cpu_error_regs);
1349 		t_afar = cpu_error_regs.afar;
1350 		t_afsr = cpu_error_regs.afsr;
1351 		t_afsr_ext = cpu_error_regs.afsr_ext;
1352 #if defined(SERRANO)
1353 		ch_flt.afar2 = cpu_error_regs.afar2;
1354 #endif	/* SERRANO */
1355 	} else {
1356 		t_afar = clop->clo_data.chd_afar;
1357 		t_afsr = clop->clo_data.chd_afsr;
1358 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1359 #if defined(SERRANO)
1360 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1361 #endif	/* SERRANO */
1362 	}
1363 
1364 	/*
1365 	 * In order to simplify code, we maintain this afsr_errs
1366 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1367 	 * sticky bits.
1368 	 */
1369 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1370 	    (t_afsr & C_AFSR_ALL_ERRS);
1371 	pr_reason[0] = '\0';
1372 
1373 	/* Setup the async fault structure */
1374 	aflt = (struct async_flt *)&ch_flt;
1375 	aflt->flt_id = gethrtime_waitfree();
1376 	ch_flt.afsr_ext = t_afsr_ext;
1377 	ch_flt.afsr_errs = t_afsr_errs;
1378 	aflt->flt_stat = t_afsr;
1379 	aflt->flt_addr = t_afar;
1380 	aflt->flt_bus_id = getprocessorid();
1381 	aflt->flt_inst = CPU->cpu_id;
1382 	aflt->flt_pc = tpc;
1383 	aflt->flt_prot = AFLT_PROT_NONE;
1384 	aflt->flt_class = CPU_FAULT;
1385 	aflt->flt_priv = priv;
1386 	aflt->flt_tl = tl;
1387 	aflt->flt_status = ECC_F_TRAP;
1388 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1389 
1390 	/*
1391 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1392 	 * cmn_err messages out to the console.  The situation is a UCU (in
1393 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1394 	 * The messages for the UCU and WDU are enqueued and then pulled off
1395 	 * the async queue via softint and syslogd starts to process them
1396 	 * but doesn't get them to the console.  The UE causes a panic, but
1397 	 * since the UCU/WDU messages are already in transit, those aren't
1398 	 * on the async queue.  The hack is to check if we have a matching
1399 	 * WDU event for the UCU, and if it matches, we're more than likely
1400 	 * going to panic with a UE, unless we're under protection.  So, we
1401 	 * check to see if we got a matching WDU event and if we're under
1402 	 * protection.
1403 	 *
1404 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1405 	 * looks like this:
1406 	 *    UCU->WDU->UE
1407 	 * For Panther, it could look like either of these:
1408 	 *    UCU---->WDU->L3_WDU->UE
1409 	 *    L3_UCU->WDU->L3_WDU->UE
1410 	 */
1411 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1412 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1413 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1414 		get_cpu_error_state(&cpu_error_regs);
1415 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1416 		    (cpu_error_regs.afar == t_afar));
1417 		aflt->flt_panic |= ((clop == NULL) &&
1418 		    (t_afsr_errs & C_AFSR_WDU));
1419 	}
1420 
1421 	/*
1422 	 * Queue events on the async event queue, one event per error bit.
1423 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1424 	 * queue an event to complain.
1425 	 */
1426 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1427 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1428 		ch_flt.flt_type = CPU_INV_AFSR;
1429 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1430 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1431 		    aflt->flt_panic);
1432 	}
1433 
1434 	/*
1435 	 * Zero out + invalidate CPU logout.
1436 	 */
1437 	if (clop) {
1438 		bzero(clop, sizeof (ch_cpu_logout_t));
1439 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1440 	}
1441 
1442 	/*
1443 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1444 	 * or disrupting errors have happened.  We do this because if a
1445 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1446 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1447 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1448 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1449 	 * deferred or disrupting error happening between checking the AFSR and
1450 	 * enabling NCEEN/CEEN.
1451 	 *
1452 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1453 	 * taken.
1454 	 */
1455 	set_error_enable(get_error_enable() | (nceen | ceen));
1456 	if (clear_errors(&ch_flt)) {
1457 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1458 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1459 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1460 		    NULL);
1461 	}
1462 
1463 	/*
1464 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1465 	 * be logged as part of the panic flow.
1466 	 */
1467 	if (aflt->flt_panic)
1468 		fm_panic("%sError(s)", pr_reason);
1469 
1470 	/*
1471 	 * Flushing the Ecache here gets the part of the trap handler that
1472 	 * is run at TL=1 out of the Ecache.
1473 	 */
1474 	cpu_flush_ecache();
1475 }
1476 
1477 /*
1478  * This is called via sys_trap from pil15_interrupt code if the
1479  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1480  * various ch_err_tl1_data structures for valid entries based on the bit
1481  * settings in the ch_err_tl1_flags entry of the structure.
1482  */
1483 /*ARGSUSED*/
1484 void
1485 cpu_tl1_error(struct regs *rp, int panic)
1486 {
1487 	ch_err_tl1_data_t *cl1p, cl1;
1488 	int i, ncl1ps;
1489 	uint64_t me_flags;
1490 	uint64_t ceen, nceen;
1491 
1492 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1493 		cl1p = &ch_err_tl1_data;
1494 		ncl1ps = 1;
1495 	} else if (CPU_PRIVATE(CPU) != NULL) {
1496 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1497 		ncl1ps = CH_ERR_TL1_TLMAX;
1498 	} else {
1499 		ncl1ps = 0;
1500 	}
1501 
1502 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1503 		if (cl1p->ch_err_tl1_flags == 0)
1504 			continue;
1505 
1506 		/*
1507 		 * Grab a copy of the logout data and invalidate
1508 		 * the logout area.
1509 		 */
1510 		cl1 = *cl1p;
1511 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1512 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1513 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1514 
1515 		/*
1516 		 * Log "first error" in ch_err_tl1_data.
1517 		 */
1518 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1519 			ceen = get_error_enable() & EN_REG_CEEN;
1520 			nceen = get_error_enable() & EN_REG_NCEEN;
1521 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1522 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1523 		}
1524 #if defined(CPU_IMP_L1_CACHE_PARITY)
1525 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1526 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1527 			    (caddr_t)cl1.ch_err_tl1_tpc);
1528 		}
1529 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1530 
1531 		/*
1532 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1533 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1534 		 * if the structure is busy, we just do the cache flushing
1535 		 * we have to do and then do the retry.  So the AFSR/AFAR
1536 		 * at this point *should* have some relevant info.  If there
1537 		 * are no valid errors in the AFSR, we'll assume they've
1538 		 * already been picked up and logged.  For I$/D$ parity,
1539 		 * we just log an event with an "Unknown" (NULL) TPC.
1540 		 */
1541 		if (me_flags & CH_ERR_FECC) {
1542 			ch_cpu_errors_t cpu_error_regs;
1543 			uint64_t t_afsr_errs;
1544 
1545 			/*
1546 			 * Get the error registers and see if there's
1547 			 * a pending error.  If not, don't bother
1548 			 * generating an "Invalid AFSR" error event.
1549 			 */
1550 			get_cpu_error_state(&cpu_error_regs);
1551 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1552 			    C_AFSR_EXT_ALL_ERRS) |
1553 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1554 			if (t_afsr_errs != 0) {
1555 				ceen = get_error_enable() & EN_REG_CEEN;
1556 				nceen = get_error_enable() & EN_REG_NCEEN;
1557 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1558 				    1, ceen, nceen, NULL);
1559 			}
1560 		}
1561 #if defined(CPU_IMP_L1_CACHE_PARITY)
1562 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1563 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1564 		}
1565 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1566 	}
1567 }
1568 
1569 /*
1570  * Called from Fast ECC TL>0 handler in case of fatal error.
1571  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1572  * but if we don't, we'll panic with something reasonable.
1573  */
1574 /*ARGSUSED*/
1575 void
1576 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1577 {
1578 	cpu_tl1_error(rp, 1);
1579 	/*
1580 	 * Should never return, but just in case.
1581 	 */
1582 	fm_panic("Unsurvivable ECC Error at TL>0");
1583 }
1584 
1585 /*
1586  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1587  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1588  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1589  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1590  *
1591  * Cheetah+ also handles (No additional processing required):
1592  *    DUE, DTO, DBERR	(NCEEN controlled)
1593  *    THCE		(CEEN and ET_ECC_en controlled)
1594  *    TUE		(ET_ECC_en controlled)
1595  *
1596  * Panther further adds:
1597  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1598  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1599  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1600  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1601  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1602  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1603  *
1604  * Note that the p_clo_flags input is only valid in cases where the
1605  * cpu_private struct is not yet initialized (since that is the only
1606  * time that information cannot be obtained from the logout struct.)
1607  */
1608 /*ARGSUSED*/
1609 void
1610 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1611 {
1612 	struct async_flt *aflt;
1613 	ch_async_flt_t ch_flt;
1614 	char pr_reason[MAX_REASON_STRING];
1615 	ch_cpu_logout_t *clop;
1616 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1617 	ch_cpu_errors_t cpu_error_regs;
1618 
1619 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1620 	/*
1621 	 * Get the CPU log out info. If we can't find our CPU private
1622 	 * pointer, then we will have to make due without any detailed
1623 	 * logout information.
1624 	 */
1625 	if (CPU_PRIVATE(CPU) == NULL) {
1626 		clop = NULL;
1627 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1628 		get_cpu_error_state(&cpu_error_regs);
1629 		set_cpu_error_state(&cpu_error_regs);
1630 		t_afar = cpu_error_regs.afar;
1631 		t_afsr = cpu_error_regs.afsr;
1632 		t_afsr_ext = cpu_error_regs.afsr_ext;
1633 #if defined(SERRANO)
1634 		ch_flt.afar2 = cpu_error_regs.afar2;
1635 #endif	/* SERRANO */
1636 	} else {
1637 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1638 		t_afar = clop->clo_data.chd_afar;
1639 		t_afsr = clop->clo_data.chd_afsr;
1640 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1641 #if defined(SERRANO)
1642 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1643 #endif	/* SERRANO */
1644 	}
1645 
1646 	/*
1647 	 * In order to simplify code, we maintain this afsr_errs
1648 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1649 	 * sticky bits.
1650 	 */
1651 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1652 	    (t_afsr & C_AFSR_ALL_ERRS);
1653 
1654 	pr_reason[0] = '\0';
1655 	/* Setup the async fault structure */
1656 	aflt = (struct async_flt *)&ch_flt;
1657 	ch_flt.afsr_ext = t_afsr_ext;
1658 	ch_flt.afsr_errs = t_afsr_errs;
1659 	aflt->flt_stat = t_afsr;
1660 	aflt->flt_addr = t_afar;
1661 	aflt->flt_pc = (caddr_t)rp->r_pc;
1662 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1663 	aflt->flt_tl = 0;
1664 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1665 
1666 	/*
1667 	 * If this trap is a result of one of the errors not masked
1668 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1669 	 * indicate that a timeout is to be set later.
1670 	 */
1671 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1672 	    !aflt->flt_panic)
1673 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1674 	else
1675 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1676 
1677 	/*
1678 	 * log the CE and clean up
1679 	 */
1680 	cpu_log_and_clear_ce(&ch_flt);
1681 
1682 	/*
1683 	 * We re-enable CEEN (if required) and check if any disrupting errors
1684 	 * have happened.  We do this because if a disrupting error had occurred
1685 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1686 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1687 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1688 	 * of a error happening between checking the AFSR and enabling CEEN.
1689 	 */
1690 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1691 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1692 	if (clear_errors(&ch_flt)) {
1693 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1694 		    NULL);
1695 	}
1696 
1697 	/*
1698 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1699 	 * be logged as part of the panic flow.
1700 	 */
1701 	if (aflt->flt_panic)
1702 		fm_panic("%sError(s)", pr_reason);
1703 }
1704 
1705 /*
1706  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1707  * L3_EDU:BLD, TO, and BERR events.
1708  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1709  *
1710  * Cheetah+: No additional errors handled.
1711  *
1712  * Note that the p_clo_flags input is only valid in cases where the
1713  * cpu_private struct is not yet initialized (since that is the only
1714  * time that information cannot be obtained from the logout struct.)
1715  */
1716 /*ARGSUSED*/
1717 void
1718 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1719 {
1720 	ushort_t ttype, tl;
1721 	ch_async_flt_t ch_flt;
1722 	struct async_flt *aflt;
1723 	int trampolined = 0;
1724 	char pr_reason[MAX_REASON_STRING];
1725 	ch_cpu_logout_t *clop;
1726 	uint64_t ceen, clo_flags;
1727 	uint64_t log_afsr;
1728 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1729 	ch_cpu_errors_t cpu_error_regs;
1730 	int expected = DDI_FM_ERR_UNEXPECTED;
1731 	ddi_acc_hdl_t *hp;
1732 
1733 	/*
1734 	 * We need to look at p_flag to determine if the thread detected an
1735 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1736 	 * because we just need a consistent snapshot and we know that everyone
1737 	 * else will store a consistent set of bits while holding p_lock.  We
1738 	 * don't have to worry about a race because SDOCORE is set once prior
1739 	 * to doing i/o from the process's address space and is never cleared.
1740 	 */
1741 	uint_t pflag = ttoproc(curthread)->p_flag;
1742 
1743 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1744 	/*
1745 	 * Get the CPU log out info. If we can't find our CPU private
1746 	 * pointer then we will have to make due without any detailed
1747 	 * logout information.
1748 	 */
1749 	if (CPU_PRIVATE(CPU) == NULL) {
1750 		clop = NULL;
1751 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1752 		get_cpu_error_state(&cpu_error_regs);
1753 		set_cpu_error_state(&cpu_error_regs);
1754 		t_afar = cpu_error_regs.afar;
1755 		t_afsr = cpu_error_regs.afsr;
1756 		t_afsr_ext = cpu_error_regs.afsr_ext;
1757 #if defined(SERRANO)
1758 		ch_flt.afar2 = cpu_error_regs.afar2;
1759 #endif	/* SERRANO */
1760 		clo_flags = p_clo_flags;
1761 	} else {
1762 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1763 		t_afar = clop->clo_data.chd_afar;
1764 		t_afsr = clop->clo_data.chd_afsr;
1765 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1766 #if defined(SERRANO)
1767 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1768 #endif	/* SERRANO */
1769 		clo_flags = clop->clo_flags;
1770 	}
1771 
1772 	/*
1773 	 * In order to simplify code, we maintain this afsr_errs
1774 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1775 	 * sticky bits.
1776 	 */
1777 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1778 	    (t_afsr & C_AFSR_ALL_ERRS);
1779 	pr_reason[0] = '\0';
1780 
1781 	/*
1782 	 * Grab information encoded into our clo_flags field.
1783 	 */
1784 	ceen = clo_flags & EN_REG_CEEN;
1785 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1786 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1787 
1788 	/*
1789 	 * handle the specific error
1790 	 */
1791 	aflt = (struct async_flt *)&ch_flt;
1792 	aflt->flt_id = gethrtime_waitfree();
1793 	aflt->flt_bus_id = getprocessorid();
1794 	aflt->flt_inst = CPU->cpu_id;
1795 	ch_flt.afsr_ext = t_afsr_ext;
1796 	ch_flt.afsr_errs = t_afsr_errs;
1797 	aflt->flt_stat = t_afsr;
1798 	aflt->flt_addr = t_afar;
1799 	aflt->flt_pc = (caddr_t)rp->r_pc;
1800 	aflt->flt_prot = AFLT_PROT_NONE;
1801 	aflt->flt_class = CPU_FAULT;
1802 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1803 	aflt->flt_tl = (uchar_t)tl;
1804 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1805 	    C_AFSR_PANIC(t_afsr_errs));
1806 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1807 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1808 
1809 	/*
1810 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1811 	 * see if we were executing in the kernel under on_trap() or t_lofault
1812 	 * protection.  If so, modify the saved registers so that we return
1813 	 * from the trap to the appropriate trampoline routine.
1814 	 */
1815 	if (aflt->flt_priv && tl == 0) {
1816 		if (curthread->t_ontrap != NULL) {
1817 			on_trap_data_t *otp = curthread->t_ontrap;
1818 
1819 			if (otp->ot_prot & OT_DATA_EC) {
1820 				aflt->flt_prot = AFLT_PROT_EC;
1821 				otp->ot_trap |= OT_DATA_EC;
1822 				rp->r_pc = otp->ot_trampoline;
1823 				rp->r_npc = rp->r_pc + 4;
1824 				trampolined = 1;
1825 			}
1826 
1827 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1828 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1829 				aflt->flt_prot = AFLT_PROT_ACCESS;
1830 				otp->ot_trap |= OT_DATA_ACCESS;
1831 				rp->r_pc = otp->ot_trampoline;
1832 				rp->r_npc = rp->r_pc + 4;
1833 				trampolined = 1;
1834 				/*
1835 				 * for peeks and caut_gets errors are expected
1836 				 */
1837 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1838 				if (!hp)
1839 					expected = DDI_FM_ERR_PEEK;
1840 				else if (hp->ah_acc.devacc_attr_access ==
1841 				    DDI_CAUTIOUS_ACC)
1842 					expected = DDI_FM_ERR_EXPECTED;
1843 			}
1844 
1845 		} else if (curthread->t_lofault) {
1846 			aflt->flt_prot = AFLT_PROT_COPY;
1847 			rp->r_g1 = EFAULT;
1848 			rp->r_pc = curthread->t_lofault;
1849 			rp->r_npc = rp->r_pc + 4;
1850 			trampolined = 1;
1851 		}
1852 	}
1853 
1854 	/*
1855 	 * If we're in user mode or we're doing a protected copy, we either
1856 	 * want the ASTON code below to send a signal to the user process
1857 	 * or we want to panic if aft_panic is set.
1858 	 *
1859 	 * If we're in privileged mode and we're not doing a copy, then we
1860 	 * need to check if we've trampolined.  If we haven't trampolined,
1861 	 * we should panic.
1862 	 */
1863 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1864 		if (t_afsr_errs &
1865 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1866 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1867 			aflt->flt_panic |= aft_panic;
1868 	} else if (!trampolined) {
1869 			aflt->flt_panic = 1;
1870 	}
1871 
1872 	/*
1873 	 * If we've trampolined due to a privileged TO or BERR, or if an
1874 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1875 	 * event for that TO or BERR.  Queue all other events (if any) besides
1876 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1877 	 * ignore the number of events queued.  If we haven't trampolined due
1878 	 * to a TO or BERR, just enqueue events normally.
1879 	 */
1880 	log_afsr = t_afsr_errs;
1881 	if (trampolined) {
1882 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1883 	} else if (!aflt->flt_priv) {
1884 		/*
1885 		 * User mode, suppress messages if
1886 		 * cpu_berr_to_verbose is not set.
1887 		 */
1888 		if (!cpu_berr_to_verbose)
1889 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1890 	}
1891 
1892 	/*
1893 	 * Log any errors that occurred
1894 	 */
1895 	if (((log_afsr &
1896 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1897 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1898 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1899 		ch_flt.flt_type = CPU_INV_AFSR;
1900 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1901 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1902 		    aflt->flt_panic);
1903 	}
1904 
1905 	/*
1906 	 * Zero out + invalidate CPU logout.
1907 	 */
1908 	if (clop) {
1909 		bzero(clop, sizeof (ch_cpu_logout_t));
1910 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1911 	}
1912 
1913 #if defined(JALAPENO) || defined(SERRANO)
1914 	/*
1915 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1916 	 * IO errors that may have resulted in this trap.
1917 	 */
1918 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1919 		cpu_run_bus_error_handlers(aflt, expected);
1920 	}
1921 
1922 	/*
1923 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1924 	 * line from the Ecache.  We also need to query the bus nexus for
1925 	 * fatal errors.  Attempts to do diagnostic read on caches may
1926 	 * introduce more errors (especially when the module is bad).
1927 	 */
1928 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1929 		/*
1930 		 * Ask our bus nexus friends if they have any fatal errors.  If
1931 		 * so, they will log appropriate error messages.
1932 		 */
1933 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1934 			aflt->flt_panic = 1;
1935 
1936 		/*
1937 		 * We got a UE or RUE and are panicking, save the fault PA in
1938 		 * a known location so that the platform specific panic code
1939 		 * can check for copyback errors.
1940 		 */
1941 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1942 			panic_aflt = *aflt;
1943 		}
1944 	}
1945 
1946 	/*
1947 	 * Flush Ecache line or entire Ecache
1948 	 */
1949 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1950 		cpu_error_ecache_flush(&ch_flt);
1951 #else /* JALAPENO || SERRANO */
1952 	/*
1953 	 * UE/BERR/TO: Call our bus nexus friends to check for
1954 	 * IO errors that may have resulted in this trap.
1955 	 */
1956 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1957 		cpu_run_bus_error_handlers(aflt, expected);
1958 	}
1959 
1960 	/*
1961 	 * UE: If the UE is in memory, we need to flush the bad
1962 	 * line from the Ecache.  We also need to query the bus nexus for
1963 	 * fatal errors.  Attempts to do diagnostic read on caches may
1964 	 * introduce more errors (especially when the module is bad).
1965 	 */
1966 	if (t_afsr & C_AFSR_UE) {
1967 		/*
1968 		 * Ask our legacy bus nexus friends if they have any fatal
1969 		 * errors.  If so, they will log appropriate error messages.
1970 		 */
1971 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1972 			aflt->flt_panic = 1;
1973 
1974 		/*
1975 		 * We got a UE and are panicking, save the fault PA in a known
1976 		 * location so that the platform specific panic code can check
1977 		 * for copyback errors.
1978 		 */
1979 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1980 			panic_aflt = *aflt;
1981 		}
1982 	}
1983 
1984 	/*
1985 	 * Flush Ecache line or entire Ecache
1986 	 */
1987 	if (t_afsr_errs &
1988 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1989 		cpu_error_ecache_flush(&ch_flt);
1990 #endif /* JALAPENO || SERRANO */
1991 
1992 	/*
1993 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1994 	 * or disrupting errors have happened.  We do this because if a
1995 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1996 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1997 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1998 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1999 	 * deferred or disrupting error happening between checking the AFSR and
2000 	 * enabling NCEEN/CEEN.
2001 	 *
2002 	 * Note: CEEN reenabled only if it was on when trap taken.
2003 	 */
2004 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2005 	if (clear_errors(&ch_flt)) {
2006 		/*
2007 		 * Check for secondary errors, and avoid panicking if we
2008 		 * have them
2009 		 */
2010 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2011 		    t_afar) == 0) {
2012 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2013 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2014 		}
2015 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2016 		    NULL);
2017 	}
2018 
2019 	/*
2020 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2021 	 * be logged as part of the panic flow.
2022 	 */
2023 	if (aflt->flt_panic)
2024 		fm_panic("%sError(s)", pr_reason);
2025 
2026 	/*
2027 	 * If we queued an error and we are going to return from the trap and
2028 	 * the error was in user mode or inside of a copy routine, set AST flag
2029 	 * so the queue will be drained before returning to user mode.  The
2030 	 * AST processing will also act on our failure policy.
2031 	 */
2032 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2033 		int pcb_flag = 0;
2034 
2035 		if (t_afsr_errs &
2036 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2037 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2038 			pcb_flag |= ASYNC_HWERR;
2039 
2040 		if (t_afsr & C_AFSR_BERR)
2041 			pcb_flag |= ASYNC_BERR;
2042 
2043 		if (t_afsr & C_AFSR_TO)
2044 			pcb_flag |= ASYNC_BTO;
2045 
2046 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2047 		aston(curthread);
2048 	}
2049 }
2050 
2051 #if defined(CPU_IMP_L1_CACHE_PARITY)
2052 /*
2053  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2054  *
2055  * For Panther, P$ data parity errors during floating point load hits
2056  * are also detected (reported as TT 0x71) and handled by this trap
2057  * handler.
2058  *
2059  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2060  * is available.
2061  */
2062 /*ARGSUSED*/
2063 void
2064 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2065 {
2066 	ch_async_flt_t ch_flt;
2067 	struct async_flt *aflt;
2068 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2069 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2070 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2071 	char *error_class;
2072 
2073 	/*
2074 	 * Log the error.
2075 	 * For icache parity errors the fault address is the trap PC.
2076 	 * For dcache/pcache parity errors the instruction would have to
2077 	 * be decoded to determine the address and that isn't possible
2078 	 * at high PIL.
2079 	 */
2080 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2081 	aflt = (struct async_flt *)&ch_flt;
2082 	aflt->flt_id = gethrtime_waitfree();
2083 	aflt->flt_bus_id = getprocessorid();
2084 	aflt->flt_inst = CPU->cpu_id;
2085 	aflt->flt_pc = tpc;
2086 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2087 	aflt->flt_prot = AFLT_PROT_NONE;
2088 	aflt->flt_class = CPU_FAULT;
2089 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2090 	aflt->flt_tl = tl;
2091 	aflt->flt_panic = panic;
2092 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2093 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2094 
2095 	if (iparity) {
2096 		cpu_icache_parity_info(&ch_flt);
2097 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2098 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2099 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2100 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2101 		else
2102 			error_class = FM_EREPORT_CPU_USIII_IPE;
2103 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2104 	} else {
2105 		cpu_dcache_parity_info(&ch_flt);
2106 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2107 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2108 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2109 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2110 		else
2111 			error_class = FM_EREPORT_CPU_USIII_DPE;
2112 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2113 		/*
2114 		 * For panther we also need to check the P$ for parity errors.
2115 		 */
2116 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2117 			cpu_pcache_parity_info(&ch_flt);
2118 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2119 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2120 				aflt->flt_payload =
2121 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2122 			}
2123 		}
2124 	}
2125 
2126 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2127 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2128 
2129 	if (iparity) {
2130 		/*
2131 		 * Invalidate entire I$.
2132 		 * This is required due to the use of diagnostic ASI
2133 		 * accesses that may result in a loss of I$ coherency.
2134 		 */
2135 		if (cache_boot_state & DCU_IC) {
2136 			flush_icache();
2137 		}
2138 		/*
2139 		 * According to section P.3.1 of the Panther PRM, we
2140 		 * need to do a little more for recovery on those
2141 		 * CPUs after encountering an I$ parity error.
2142 		 */
2143 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2144 			flush_ipb();
2145 			correct_dcache_parity(dcache_size,
2146 			    dcache_linesize);
2147 			flush_pcache();
2148 		}
2149 	} else {
2150 		/*
2151 		 * Since the valid bit is ignored when checking parity the
2152 		 * D$ data and tag must also be corrected.  Set D$ data bits
2153 		 * to zero and set utag to 0, 1, 2, 3.
2154 		 */
2155 		correct_dcache_parity(dcache_size, dcache_linesize);
2156 
2157 		/*
2158 		 * According to section P.3.3 of the Panther PRM, we
2159 		 * need to do a little more for recovery on those
2160 		 * CPUs after encountering a D$ or P$ parity error.
2161 		 *
2162 		 * As far as clearing P$ parity errors, it is enough to
2163 		 * simply invalidate all entries in the P$ since P$ parity
2164 		 * error traps are only generated for floating point load
2165 		 * hits.
2166 		 */
2167 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2168 			flush_icache();
2169 			flush_ipb();
2170 			flush_pcache();
2171 		}
2172 	}
2173 
2174 	/*
2175 	 * Invalidate entire D$ if it was enabled.
2176 	 * This is done to avoid stale data in the D$ which might
2177 	 * occur with the D$ disabled and the trap handler doing
2178 	 * stores affecting lines already in the D$.
2179 	 */
2180 	if (cache_boot_state & DCU_DC) {
2181 		flush_dcache();
2182 	}
2183 
2184 	/*
2185 	 * Restore caches to their bootup state.
2186 	 */
2187 	set_dcu(get_dcu() | cache_boot_state);
2188 
2189 	/*
2190 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2191 	 * be logged as part of the panic flow.
2192 	 */
2193 	if (aflt->flt_panic)
2194 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2195 
2196 	/*
2197 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2198 	 * the chance of getting an unrecoverable Fast ECC error.  This
2199 	 * flush will evict the part of the parity trap handler that is run
2200 	 * at TL>1.
2201 	 */
2202 	if (tl) {
2203 		cpu_flush_ecache();
2204 	}
2205 }
2206 
2207 /*
2208  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2209  * to indicate which portions of the captured data should be in the ereport.
2210  */
2211 void
2212 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2213 {
2214 	int way = ch_flt->parity_data.ipe.cpl_way;
2215 	int offset = ch_flt->parity_data.ipe.cpl_off;
2216 	int tag_index;
2217 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2218 
2219 
2220 	if ((offset != -1) || (way != -1)) {
2221 		/*
2222 		 * Parity error in I$ tag or data
2223 		 */
2224 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2225 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2226 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2227 			    PN_ICIDX_TO_WAY(tag_index);
2228 		else
2229 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2230 			    CH_ICIDX_TO_WAY(tag_index);
2231 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2232 		    IC_LOGFLAG_MAGIC;
2233 	} else {
2234 		/*
2235 		 * Parity error was not identified.
2236 		 * Log tags and data for all ways.
2237 		 */
2238 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2239 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2240 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2241 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2242 				    PN_ICIDX_TO_WAY(tag_index);
2243 			else
2244 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2245 				    CH_ICIDX_TO_WAY(tag_index);
2246 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2247 			    IC_LOGFLAG_MAGIC;
2248 		}
2249 	}
2250 }
2251 
2252 /*
2253  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2254  * to indicate which portions of the captured data should be in the ereport.
2255  */
2256 void
2257 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2258 {
2259 	int way = ch_flt->parity_data.dpe.cpl_way;
2260 	int offset = ch_flt->parity_data.dpe.cpl_off;
2261 	int tag_index;
2262 
2263 	if (offset != -1) {
2264 		/*
2265 		 * Parity error in D$ or P$ data array.
2266 		 *
2267 		 * First check to see whether the parity error is in D$ or P$
2268 		 * since P$ data parity errors are reported in Panther using
2269 		 * the same trap.
2270 		 */
2271 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2272 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2273 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2274 			    CH_PCIDX_TO_WAY(tag_index);
2275 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2276 			    PC_LOGFLAG_MAGIC;
2277 		} else {
2278 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2279 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2280 			    CH_DCIDX_TO_WAY(tag_index);
2281 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2282 			    DC_LOGFLAG_MAGIC;
2283 		}
2284 	} else if (way != -1) {
2285 		/*
2286 		 * Parity error in D$ tag.
2287 		 */
2288 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2289 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2290 		    CH_DCIDX_TO_WAY(tag_index);
2291 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2292 		    DC_LOGFLAG_MAGIC;
2293 	}
2294 }
2295 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2296 
2297 /*
2298  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2299  * post-process CPU events that are dequeued.  As such, it can be invoked
2300  * from softint context, from AST processing in the trap() flow, or from the
2301  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2302  * Historically this entry point was used to log the actual cmn_err(9F) text;
2303  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2304  * With FMA this function now also returns a flag which indicates to the
2305  * caller whether the ereport should be posted (1) or suppressed (0).
2306  */
2307 static int
2308 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2309 {
2310 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2311 	struct async_flt *aflt = (struct async_flt *)flt;
2312 	uint64_t errors;
2313 	extern void memscrub_induced_error(void);
2314 
2315 	switch (ch_flt->flt_type) {
2316 	case CPU_INV_AFSR:
2317 		/*
2318 		 * If it is a disrupting trap and the AFSR is zero, then
2319 		 * the event has probably already been noted. Do not post
2320 		 * an ereport.
2321 		 */
2322 		if ((aflt->flt_status & ECC_C_TRAP) &&
2323 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2324 			return (0);
2325 		else
2326 			return (1);
2327 	case CPU_TO:
2328 	case CPU_BERR:
2329 	case CPU_FATAL:
2330 	case CPU_FPUERR:
2331 		return (1);
2332 
2333 	case CPU_UE_ECACHE_RETIRE:
2334 		cpu_log_err(aflt);
2335 		cpu_page_retire(ch_flt);
2336 		return (1);
2337 
2338 	/*
2339 	 * Cases where we may want to suppress logging or perform
2340 	 * extended diagnostics.
2341 	 */
2342 	case CPU_CE:
2343 	case CPU_EMC:
2344 		/*
2345 		 * We want to skip logging and further classification
2346 		 * only if ALL the following conditions are true:
2347 		 *
2348 		 *	1. There is only one error
2349 		 *	2. That error is a correctable memory error
2350 		 *	3. The error is caused by the memory scrubber (in
2351 		 *	   which case the error will have occurred under
2352 		 *	   on_trap protection)
2353 		 *	4. The error is on a retired page
2354 		 *
2355 		 * Note: AFLT_PROT_EC is used places other than the memory
2356 		 * scrubber.  However, none of those errors should occur
2357 		 * on a retired page.
2358 		 */
2359 		if ((ch_flt->afsr_errs &
2360 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2361 		    aflt->flt_prot == AFLT_PROT_EC) {
2362 
2363 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2364 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2365 
2366 				/*
2367 				 * Since we're skipping logging, we'll need
2368 				 * to schedule the re-enabling of CEEN
2369 				 */
2370 				(void) timeout(cpu_delayed_check_ce_errors,
2371 				    (void *)(uintptr_t)aflt->flt_inst,
2372 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2373 				    * MICROSEC));
2374 				}
2375 
2376 				/*
2377 				 * Inform memscrubber - scrubbing induced
2378 				 * CE on a retired page.
2379 				 */
2380 				memscrub_induced_error();
2381 				return (0);
2382 			}
2383 		}
2384 
2385 		/*
2386 		 * Perform/schedule further classification actions, but
2387 		 * only if the page is healthy (we don't want bad
2388 		 * pages inducing too much diagnostic activity).  If we could
2389 		 * not find a page pointer then we also skip this.  If
2390 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2391 		 * to copy and recirculate the event (for further diagnostics)
2392 		 * and we should not proceed to log it here.
2393 		 *
2394 		 * This must be the last step here before the cpu_log_err()
2395 		 * below - if an event recirculates cpu_ce_log_err() will
2396 		 * not call the current function but just proceed directly
2397 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2398 		 *
2399 		 * Note: Check cpu_impl_async_log_err if changing this
2400 		 */
2401 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2402 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2403 			    CE_XDIAG_SKIP_NOPP);
2404 		} else {
2405 			if (errors != PR_OK) {
2406 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2407 				    CE_XDIAG_SKIP_PAGEDET);
2408 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2409 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2410 				return (0);
2411 			}
2412 		}
2413 		/*FALLTHRU*/
2414 
2415 	/*
2416 	 * Cases where we just want to report the error and continue.
2417 	 */
2418 	case CPU_CE_ECACHE:
2419 	case CPU_UE_ECACHE:
2420 	case CPU_IV:
2421 	case CPU_ORPH:
2422 		cpu_log_err(aflt);
2423 		return (1);
2424 
2425 	/*
2426 	 * Cases where we want to fall through to handle panicking.
2427 	 */
2428 	case CPU_UE:
2429 		/*
2430 		 * We want to skip logging in the same conditions as the
2431 		 * CE case.  In addition, we want to make sure we're not
2432 		 * panicking.
2433 		 */
2434 		if (!panicstr && (ch_flt->afsr_errs &
2435 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2436 		    aflt->flt_prot == AFLT_PROT_EC) {
2437 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2438 				/* Zero the address to clear the error */
2439 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2440 				/*
2441 				 * Inform memscrubber - scrubbing induced
2442 				 * UE on a retired page.
2443 				 */
2444 				memscrub_induced_error();
2445 				return (0);
2446 			}
2447 		}
2448 		cpu_log_err(aflt);
2449 		break;
2450 
2451 	default:
2452 		/*
2453 		 * If the us3_common.c code doesn't know the flt_type, it may
2454 		 * be an implementation-specific code.  Call into the impldep
2455 		 * backend to find out what to do: if it tells us to continue,
2456 		 * break and handle as if falling through from a UE; if not,
2457 		 * the impldep backend has handled the error and we're done.
2458 		 */
2459 		switch (cpu_impl_async_log_err(flt, eqep)) {
2460 		case CH_ASYNC_LOG_DONE:
2461 			return (1);
2462 		case CH_ASYNC_LOG_RECIRC:
2463 			return (0);
2464 		case CH_ASYNC_LOG_CONTINUE:
2465 			break; /* continue on to handle UE-like error */
2466 		default:
2467 			cmn_err(CE_WARN, "discarding error 0x%p with "
2468 			    "invalid fault type (0x%x)",
2469 			    (void *)aflt, ch_flt->flt_type);
2470 			return (0);
2471 		}
2472 	}
2473 
2474 	/* ... fall through from the UE case */
2475 
2476 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2477 		if (!panicstr) {
2478 			cpu_page_retire(ch_flt);
2479 		} else {
2480 			/*
2481 			 * Clear UEs on panic so that we don't
2482 			 * get haunted by them during panic or
2483 			 * after reboot
2484 			 */
2485 			cpu_clearphys(aflt);
2486 			(void) clear_errors(NULL);
2487 		}
2488 	}
2489 
2490 	return (1);
2491 }
2492 
2493 /*
2494  * Retire the bad page that may contain the flushed error.
2495  */
2496 void
2497 cpu_page_retire(ch_async_flt_t *ch_flt)
2498 {
2499 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2500 	(void) page_retire(aflt->flt_addr, PR_UE);
2501 }
2502 
2503 /*
2504  * Return true if the error specified in the AFSR indicates
2505  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2506  * for Panther, none for Jalapeno/Serrano).
2507  */
2508 /* ARGSUSED */
2509 static int
2510 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2511 {
2512 #if defined(JALAPENO) || defined(SERRANO)
2513 	return (0);
2514 #elif defined(CHEETAH_PLUS)
2515 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2516 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2517 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2518 #else	/* CHEETAH_PLUS */
2519 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2520 #endif
2521 }
2522 
2523 /*
2524  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2525  * generic event post-processing for correctable and uncorrectable memory,
2526  * E$, and MTag errors.  Historically this entry point was used to log bits of
2527  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2528  * converted into an ereport.  In addition, it transmits the error to any
2529  * platform-specific service-processor FRU logging routines, if available.
2530  */
2531 void
2532 cpu_log_err(struct async_flt *aflt)
2533 {
2534 	char unum[UNUM_NAMLEN];
2535 	int synd_status, synd_code, afar_status;
2536 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2537 
2538 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2539 		aflt->flt_status |= ECC_ECACHE;
2540 	else
2541 		aflt->flt_status &= ~ECC_ECACHE;
2542 	/*
2543 	 * Determine syndrome status.
2544 	 */
2545 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2546 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2547 
2548 	/*
2549 	 * Determine afar status.
2550 	 */
2551 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2552 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2553 		    ch_flt->flt_bit);
2554 	else
2555 		afar_status = AFLT_STAT_INVALID;
2556 
2557 	synd_code = synd_to_synd_code(synd_status,
2558 	    aflt->flt_synd, ch_flt->flt_bit);
2559 
2560 	/*
2561 	 * If afar status is not invalid do a unum lookup.
2562 	 */
2563 	if (afar_status != AFLT_STAT_INVALID) {
2564 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2565 	} else {
2566 		unum[0] = '\0';
2567 	}
2568 
2569 	/*
2570 	 * Do not send the fruid message (plat_ecc_error_data_t)
2571 	 * to the SC if it can handle the enhanced error information
2572 	 * (plat_ecc_error2_data_t) or when the tunable
2573 	 * ecc_log_fruid_enable is set to 0.
2574 	 */
2575 
2576 	if (&plat_ecc_capability_sc_get &&
2577 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2578 		if (&plat_log_fruid_error)
2579 			plat_log_fruid_error(synd_code, aflt, unum,
2580 			    ch_flt->flt_bit);
2581 	}
2582 
2583 	if (aflt->flt_func != NULL)
2584 		aflt->flt_func(aflt, unum);
2585 
2586 	if (afar_status != AFLT_STAT_INVALID)
2587 		cpu_log_diag_info(ch_flt);
2588 
2589 	/*
2590 	 * If we have a CEEN error , we do not reenable CEEN until after
2591 	 * we exit the trap handler. Otherwise, another error may
2592 	 * occur causing the handler to be entered recursively.
2593 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2594 	 * to try and ensure that the CPU makes progress in the face
2595 	 * of a CE storm.
2596 	 */
2597 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2598 		(void) timeout(cpu_delayed_check_ce_errors,
2599 		    (void *)(uintptr_t)aflt->flt_inst,
2600 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2601 	}
2602 }
2603 
2604 /*
2605  * Invoked by error_init() early in startup and therefore before
2606  * startup_errorq() is called to drain any error Q -
2607  *
2608  * startup()
2609  *   startup_end()
2610  *     error_init()
2611  *       cpu_error_init()
2612  * errorq_init()
2613  *   errorq_drain()
2614  * start_other_cpus()
2615  *
2616  * The purpose of this routine is to create error-related taskqs.  Taskqs
2617  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2618  * context.
2619  */
2620 void
2621 cpu_error_init(int items)
2622 {
2623 	/*
2624 	 * Create taskq(s) to reenable CE
2625 	 */
2626 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2627 	    items, items, TASKQ_PREPOPULATE);
2628 }
2629 
2630 void
2631 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2632 {
2633 	char unum[UNUM_NAMLEN];
2634 	int len;
2635 
2636 	switch (aflt->flt_class) {
2637 	case CPU_FAULT:
2638 		cpu_ereport_init(aflt);
2639 		if (cpu_async_log_err(aflt, eqep))
2640 			cpu_ereport_post(aflt);
2641 		break;
2642 
2643 	case BUS_FAULT:
2644 		if (aflt->flt_func != NULL) {
2645 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2646 			    unum, UNUM_NAMLEN, &len);
2647 			aflt->flt_func(aflt, unum);
2648 		}
2649 		break;
2650 
2651 	case RECIRC_CPU_FAULT:
2652 		aflt->flt_class = CPU_FAULT;
2653 		cpu_log_err(aflt);
2654 		cpu_ereport_post(aflt);
2655 		break;
2656 
2657 	case RECIRC_BUS_FAULT:
2658 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2659 		/*FALLTHRU*/
2660 	default:
2661 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2662 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2663 		return;
2664 	}
2665 }
2666 
2667 /*
2668  * Scrub and classify a CE.  This function must not modify the
2669  * fault structure passed to it but instead should return the classification
2670  * information.
2671  */
2672 
2673 static uchar_t
2674 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2675 {
2676 	uchar_t disp = CE_XDIAG_EXTALG;
2677 	on_trap_data_t otd;
2678 	uint64_t orig_err;
2679 	ch_cpu_logout_t *clop;
2680 
2681 	/*
2682 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2683 	 * this, but our other callers have not.  Disable preemption to
2684 	 * avoid CPU migration so that we restore CEEN on the correct
2685 	 * cpu later.
2686 	 *
2687 	 * CEEN is cleared so that further CEs that our instruction and
2688 	 * data footprint induce do not cause use to either creep down
2689 	 * kernel stack to the point of overflow, or do so much CE
2690 	 * notification as to make little real forward progress.
2691 	 *
2692 	 * NCEEN must not be cleared.  However it is possible that
2693 	 * our accesses to the flt_addr may provoke a bus error or timeout
2694 	 * if the offending address has just been unconfigured as part of
2695 	 * a DR action.  So we must operate under on_trap protection.
2696 	 */
2697 	kpreempt_disable();
2698 	orig_err = get_error_enable();
2699 	if (orig_err & EN_REG_CEEN)
2700 		set_error_enable(orig_err & ~EN_REG_CEEN);
2701 
2702 	/*
2703 	 * Our classification algorithm includes the line state before
2704 	 * the scrub; we'd like this captured after the detection and
2705 	 * before the algorithm below - the earlier the better.
2706 	 *
2707 	 * If we've come from a cpu CE trap then this info already exists
2708 	 * in the cpu logout area.
2709 	 *
2710 	 * For a CE detected by memscrub for which there was no trap
2711 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2712 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2713 	 * marked the fault structure as incomplete as a flag to later
2714 	 * logging code.
2715 	 *
2716 	 * If called directly from an IO detected CE there has been
2717 	 * no line data capture.  In this case we logout to the cpu logout
2718 	 * area - that's appropriate since it's the cpu cache data we need
2719 	 * for classification.  We thus borrow the cpu logout area for a
2720 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2721 	 * this time (we will invalidate it again below).
2722 	 *
2723 	 * If called from the partner check xcall handler then this cpu
2724 	 * (the partner) has not necessarily experienced a CE at this
2725 	 * address.  But we want to capture line state before its scrub
2726 	 * attempt since we use that in our classification.
2727 	 */
2728 	if (logout_tried == B_FALSE) {
2729 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2730 			disp |= CE_XDIAG_NOLOGOUT;
2731 	}
2732 
2733 	/*
2734 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2735 	 * no longer be valid (if DR'd since the initial event) so we
2736 	 * perform this scrub under on_trap protection.  If this access is
2737 	 * ok then further accesses below will also be ok - DR cannot
2738 	 * proceed while this thread is active (preemption is disabled);
2739 	 * to be safe we'll nonetheless use on_trap again below.
2740 	 */
2741 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2742 		cpu_scrubphys(ecc);
2743 	} else {
2744 		no_trap();
2745 		if (orig_err & EN_REG_CEEN)
2746 			set_error_enable(orig_err);
2747 		kpreempt_enable();
2748 		return (disp);
2749 	}
2750 	no_trap();
2751 
2752 	/*
2753 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2754 	 * Note that it's quite possible that the read sourced the data from
2755 	 * another cpu.
2756 	 */
2757 	if (clear_ecc(ecc))
2758 		disp |= CE_XDIAG_CE1;
2759 
2760 	/*
2761 	 * Read the data again.  This time the read is very likely to
2762 	 * come from memory since the scrub induced a writeback to memory.
2763 	 */
2764 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2765 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2766 	} else {
2767 		no_trap();
2768 		if (orig_err & EN_REG_CEEN)
2769 			set_error_enable(orig_err);
2770 		kpreempt_enable();
2771 		return (disp);
2772 	}
2773 	no_trap();
2774 
2775 	/* Did that read induce a CE that matches the AFAR? */
2776 	if (clear_ecc(ecc))
2777 		disp |= CE_XDIAG_CE2;
2778 
2779 	/*
2780 	 * Look at the logout information and record whether we found the
2781 	 * line in l2/l3 cache.  For Panther we are interested in whether
2782 	 * we found it in either cache (it won't reside in both but
2783 	 * it is possible to read it that way given the moving target).
2784 	 */
2785 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2786 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2787 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2788 		int hit, level;
2789 		int state;
2790 		int totalsize;
2791 		ch_ec_data_t *ecp;
2792 
2793 		/*
2794 		 * If hit is nonzero then a match was found and hit will
2795 		 * be one greater than the index which hit.  For Panther we
2796 		 * also need to pay attention to level to see which of l2$ or
2797 		 * l3$ it hit in.
2798 		 */
2799 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2800 		    0, &level);
2801 
2802 		if (hit) {
2803 			--hit;
2804 			disp |= CE_XDIAG_AFARMATCH;
2805 
2806 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2807 				if (level == 2)
2808 					ecp = &clop->clo_data.chd_l2_data[hit];
2809 				else
2810 					ecp = &clop->clo_data.chd_ec_data[hit];
2811 			} else {
2812 				ASSERT(level == 2);
2813 				ecp = &clop->clo_data.chd_ec_data[hit];
2814 			}
2815 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2816 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2817 			    ecc->flt_addr, ecp->ec_tag);
2818 
2819 			/*
2820 			 * Cheetah variants use different state encodings -
2821 			 * the CH_ECSTATE_* defines vary depending on the
2822 			 * module we're compiled for.  Translate into our
2823 			 * one true version.  Conflate Owner-Shared state
2824 			 * of SSM mode with Owner as victimisation of such
2825 			 * lines may cause a writeback.
2826 			 */
2827 			switch (state) {
2828 			case CH_ECSTATE_MOD:
2829 				disp |= EC_STATE_M;
2830 				break;
2831 
2832 			case CH_ECSTATE_OWN:
2833 			case CH_ECSTATE_OWS:
2834 				disp |= EC_STATE_O;
2835 				break;
2836 
2837 			case CH_ECSTATE_EXL:
2838 				disp |= EC_STATE_E;
2839 				break;
2840 
2841 			case CH_ECSTATE_SHR:
2842 				disp |= EC_STATE_S;
2843 				break;
2844 
2845 			default:
2846 				disp |= EC_STATE_I;
2847 				break;
2848 			}
2849 		}
2850 
2851 		/*
2852 		 * If we initiated the delayed logout then we are responsible
2853 		 * for invalidating the logout area.
2854 		 */
2855 		if (logout_tried == B_FALSE) {
2856 			bzero(clop, sizeof (ch_cpu_logout_t));
2857 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2858 		}
2859 	}
2860 
2861 	/*
2862 	 * Re-enable CEEN if we turned it off.
2863 	 */
2864 	if (orig_err & EN_REG_CEEN)
2865 		set_error_enable(orig_err);
2866 	kpreempt_enable();
2867 
2868 	return (disp);
2869 }
2870 
2871 /*
2872  * Scrub a correctable memory error and collect data for classification
2873  * of CE type.  This function is called in the detection path, ie tl0 handling
2874  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2875  */
2876 void
2877 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2878 {
2879 	/*
2880 	 * Cheetah CE classification does not set any bits in flt_status.
2881 	 * Instead we will record classification datapoints in flt_disp.
2882 	 */
2883 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2884 
2885 	/*
2886 	 * To check if the error detected by IO is persistent, sticky or
2887 	 * intermittent.  This is noticed by clear_ecc().
2888 	 */
2889 	if (ecc->flt_status & ECC_IOBUS)
2890 		ecc->flt_stat = C_AFSR_MEMORY;
2891 
2892 	/*
2893 	 * Record information from this first part of the algorithm in
2894 	 * flt_disp.
2895 	 */
2896 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2897 }
2898 
2899 /*
2900  * Select a partner to perform a further CE classification check from.
2901  * Must be called with kernel preemption disabled (to stop the cpu list
2902  * from changing).  The detecting cpu we are partnering has cpuid
2903  * aflt->flt_inst; we might not be running on the detecting cpu.
2904  *
2905  * Restrict choice to active cpus in the same cpu partition as ourselves in
2906  * an effort to stop bad cpus in one partition causing other partitions to
2907  * perform excessive diagnostic activity.  Actually since the errorq drain
2908  * is run from a softint most of the time and that is a global mechanism
2909  * this isolation is only partial.  Return NULL if we fail to find a
2910  * suitable partner.
2911  *
2912  * We prefer a partner that is in a different latency group to ourselves as
2913  * we will share fewer datapaths.  If such a partner is unavailable then
2914  * choose one in the same lgroup but prefer a different chip and only allow
2915  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2916  * flags includes PTNR_SELFOK then permit selection of the original detector.
2917  *
2918  * We keep a cache of the last partner selected for a cpu, and we'll try to
2919  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2920  * have passed since that selection was made.  This provides the benefit
2921  * of the point-of-view of different partners over time but without
2922  * requiring frequent cpu list traversals.
2923  */
2924 
2925 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2926 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2927 
2928 static cpu_t *
2929 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2930 {
2931 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2932 	hrtime_t lasttime, thistime;
2933 
2934 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2935 
2936 	dtcr = cpu[aflt->flt_inst];
2937 
2938 	/*
2939 	 * Short-circuit for the following cases:
2940 	 *	. the dtcr is not flagged active
2941 	 *	. there is just one cpu present
2942 	 *	. the detector has disappeared
2943 	 *	. we were given a bad flt_inst cpuid; this should not happen
2944 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2945 	 *	  reason to panic.
2946 	 *	. there is just one cpu left online in the cpu partition
2947 	 *
2948 	 * If we return NULL after this point then we do not update the
2949 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2950 	 * again next time; this is the case where the only other cpu online
2951 	 * in the detector's partition is on the same chip as the detector
2952 	 * and since CEEN re-enable is throttled even that case should not
2953 	 * hurt performance.
2954 	 */
2955 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2956 		return (NULL);
2957 	}
2958 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2959 		if (flags & PTNR_SELFOK) {
2960 			*typep = CE_XDIAG_PTNR_SELF;
2961 			return (dtcr);
2962 		} else {
2963 			return (NULL);
2964 		}
2965 	}
2966 
2967 	thistime = gethrtime();
2968 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2969 
2970 	/*
2971 	 * Select a starting point.
2972 	 */
2973 	if (!lasttime) {
2974 		/*
2975 		 * We've never selected a partner for this detector before.
2976 		 * Start the scan at the next online cpu in the same cpu
2977 		 * partition.
2978 		 */
2979 		sp = dtcr->cpu_next_part;
2980 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2981 		/*
2982 		 * Our last selection has not aged yet.  If this partner:
2983 		 *	. is still a valid cpu,
2984 		 *	. is still in the same partition as the detector
2985 		 *	. is still marked active
2986 		 *	. satisfies the 'flags' argument criteria
2987 		 * then select it again without updating the timestamp.
2988 		 */
2989 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2990 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2991 		    !cpu_flagged_active(sp->cpu_flags) ||
2992 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2993 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
2994 		    !(flags & PTNR_SIBLINGOK))) {
2995 			sp = dtcr->cpu_next_part;
2996 		} else {
2997 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2998 				*typep = CE_XDIAG_PTNR_REMOTE;
2999 			} else if (sp == dtcr) {
3000 				*typep = CE_XDIAG_PTNR_SELF;
3001 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3002 				*typep = CE_XDIAG_PTNR_SIBLING;
3003 			} else {
3004 				*typep = CE_XDIAG_PTNR_LOCAL;
3005 			}
3006 			return (sp);
3007 		}
3008 	} else {
3009 		/*
3010 		 * Our last selection has aged.  If it is nonetheless still a
3011 		 * valid cpu then start the scan at the next cpu in the
3012 		 * partition after our last partner.  If the last selection
3013 		 * is no longer a valid cpu then go with our default.  In
3014 		 * this way we slowly cycle through possible partners to
3015 		 * obtain multiple viewpoints over time.
3016 		 */
3017 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3018 		if (sp == NULL) {
3019 			sp = dtcr->cpu_next_part;
3020 		} else {
3021 			sp = sp->cpu_next_part;		/* may be dtcr */
3022 			if (sp->cpu_part != dtcr->cpu_part)
3023 				sp = dtcr;
3024 		}
3025 	}
3026 
3027 	/*
3028 	 * We have a proposed starting point for our search, but if this
3029 	 * cpu is offline then its cpu_next_part will point to itself
3030 	 * so we can't use that to iterate over cpus in this partition in
3031 	 * the loop below.  We still want to avoid iterating over cpus not
3032 	 * in our partition, so in the case that our starting point is offline
3033 	 * we will repoint it to be the detector itself;  and if the detector
3034 	 * happens to be offline we'll return NULL from the following loop.
3035 	 */
3036 	if (!cpu_flagged_active(sp->cpu_flags)) {
3037 		sp = dtcr;
3038 	}
3039 
3040 	ptnr = sp;
3041 	locptnr = NULL;
3042 	sibptnr = NULL;
3043 	do {
3044 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3045 			continue;
3046 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3047 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3048 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3049 			*typep = CE_XDIAG_PTNR_REMOTE;
3050 			return (ptnr);
3051 		}
3052 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3053 			if (sibptnr == NULL)
3054 				sibptnr = ptnr;
3055 			continue;
3056 		}
3057 		if (locptnr == NULL)
3058 			locptnr = ptnr;
3059 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3060 
3061 	/*
3062 	 * A foreign partner has already been returned if one was available.
3063 	 *
3064 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3065 	 * detector, is active, and is not a sibling of the detector.
3066 	 *
3067 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3068 	 * active.
3069 	 *
3070 	 * If we have to resort to using the detector itself we have already
3071 	 * checked that it is active.
3072 	 */
3073 	if (locptnr) {
3074 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3075 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3076 		*typep = CE_XDIAG_PTNR_LOCAL;
3077 		return (locptnr);
3078 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3079 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3080 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3081 		*typep = CE_XDIAG_PTNR_SIBLING;
3082 		return (sibptnr);
3083 	} else if (flags & PTNR_SELFOK) {
3084 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3085 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3086 		*typep = CE_XDIAG_PTNR_SELF;
3087 		return (dtcr);
3088 	}
3089 
3090 	return (NULL);
3091 }
3092 
3093 /*
3094  * Cross call handler that is requested to run on the designated partner of
3095  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3096  */
3097 static void
3098 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3099 {
3100 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3101 }
3102 
3103 /*
3104  * The associated errorqs are never destroyed so we do not need to deal with
3105  * them disappearing before this timeout fires.  If the affected memory
3106  * has been DR'd out since the original event the scrub algrithm will catch
3107  * any errors and return null disposition info.  If the original detecting
3108  * cpu has been DR'd out then ereport detector info will not be able to
3109  * lookup CPU type;  with a small timeout this is unlikely.
3110  */
3111 static void
3112 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3113 {
3114 	struct async_flt *aflt = cbarg->lkycb_aflt;
3115 	uchar_t disp;
3116 	cpu_t *cp;
3117 	int ptnrtype;
3118 
3119 	kpreempt_disable();
3120 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3121 	    &ptnrtype)) {
3122 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3123 		    (uint64_t)&disp);
3124 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3125 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3126 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3127 	} else {
3128 		ce_xdiag_lkydrops++;
3129 		if (ncpus > 1)
3130 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3131 			    CE_XDIAG_SKIP_NOPTNR);
3132 	}
3133 	kpreempt_enable();
3134 
3135 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3136 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3137 }
3138 
3139 /*
3140  * Called from errorq drain code when processing a CE error, both from
3141  * CPU and PCI drain functions.  Decide what further classification actions,
3142  * if any, we will perform.  Perform immediate actions now, and schedule
3143  * delayed actions as required.  Note that we are no longer necessarily running
3144  * on the detecting cpu, and that the async_flt structure will not persist on
3145  * return from this function.
3146  *
3147  * Calls to this function should aim to be self-throtlling in some way.  With
3148  * the delayed re-enable of CEEN the absolute rate of calls should not
3149  * be excessive.  Callers should also avoid performing in-depth classification
3150  * for events in pages that are already known to be suspect.
3151  *
3152  * We return nonzero to indicate that the event has been copied and
3153  * recirculated for further testing.  The caller should not log the event
3154  * in this case - it will be logged when further test results are available.
3155  *
3156  * Our possible contexts are that of errorq_drain: below lock level or from
3157  * panic context.  We can assume that the cpu we are running on is online.
3158  */
3159 
3160 
3161 #ifdef DEBUG
3162 static int ce_xdiag_forceaction;
3163 #endif
3164 
3165 int
3166 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3167     errorq_elem_t *eqep, size_t afltoffset)
3168 {
3169 	ce_dispact_t dispact, action;
3170 	cpu_t *cp;
3171 	uchar_t dtcrinfo, disp;
3172 	int ptnrtype;
3173 
3174 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3175 		ce_xdiag_drops++;
3176 		return (0);
3177 	} else if (!aflt->flt_in_memory) {
3178 		ce_xdiag_drops++;
3179 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3180 		return (0);
3181 	}
3182 
3183 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3184 
3185 	/*
3186 	 * Some correctable events are not scrubbed/classified, such as those
3187 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3188 	 * initial detector classification go no further.
3189 	 */
3190 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3191 		ce_xdiag_drops++;
3192 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3193 		return (0);
3194 	}
3195 
3196 	dispact = CE_DISPACT(ce_disp_table,
3197 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3198 	    CE_XDIAG_STATE(dtcrinfo),
3199 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3200 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3201 
3202 
3203 	action = CE_ACT(dispact);	/* bad lookup caught below */
3204 #ifdef DEBUG
3205 	if (ce_xdiag_forceaction != 0)
3206 		action = ce_xdiag_forceaction;
3207 #endif
3208 
3209 	switch (action) {
3210 	case CE_ACT_LKYCHK: {
3211 		caddr_t ndata;
3212 		errorq_elem_t *neqep;
3213 		struct async_flt *ecc;
3214 		ce_lkychk_cb_t *cbargp;
3215 
3216 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3217 			ce_xdiag_lkydrops++;
3218 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3219 			    CE_XDIAG_SKIP_DUPFAIL);
3220 			break;
3221 		}
3222 		ecc = (struct async_flt *)(ndata + afltoffset);
3223 
3224 		ASSERT(ecc->flt_class == CPU_FAULT ||
3225 		    ecc->flt_class == BUS_FAULT);
3226 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3227 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3228 
3229 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3230 		cbargp->lkycb_aflt = ecc;
3231 		cbargp->lkycb_eqp = eqp;
3232 		cbargp->lkycb_eqep = neqep;
3233 
3234 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3235 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3236 		return (1);
3237 	}
3238 
3239 	case CE_ACT_PTNRCHK:
3240 		kpreempt_disable();	/* stop cpu list changing */
3241 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3242 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3243 			    (uint64_t)aflt, (uint64_t)&disp);
3244 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3245 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3246 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3247 		} else if (ncpus > 1) {
3248 			ce_xdiag_ptnrdrops++;
3249 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3250 			    CE_XDIAG_SKIP_NOPTNR);
3251 		} else {
3252 			ce_xdiag_ptnrdrops++;
3253 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3254 			    CE_XDIAG_SKIP_UNIPROC);
3255 		}
3256 		kpreempt_enable();
3257 		break;
3258 
3259 	case CE_ACT_DONE:
3260 		break;
3261 
3262 	case CE_ACT(CE_DISP_BAD):
3263 	default:
3264 #ifdef DEBUG
3265 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3266 #endif
3267 		ce_xdiag_bad++;
3268 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3269 		break;
3270 	}
3271 
3272 	return (0);
3273 }
3274 
3275 /*
3276  * We route all errors through a single switch statement.
3277  */
3278 void
3279 cpu_ue_log_err(struct async_flt *aflt)
3280 {
3281 	switch (aflt->flt_class) {
3282 	case CPU_FAULT:
3283 		cpu_ereport_init(aflt);
3284 		if (cpu_async_log_err(aflt, NULL))
3285 			cpu_ereport_post(aflt);
3286 		break;
3287 
3288 	case BUS_FAULT:
3289 		bus_async_log_err(aflt);
3290 		break;
3291 
3292 	default:
3293 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3294 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3295 		return;
3296 	}
3297 }
3298 
3299 /*
3300  * Routine for panic hook callback from panic_idle().
3301  */
3302 void
3303 cpu_async_panic_callb(void)
3304 {
3305 	ch_async_flt_t ch_flt;
3306 	struct async_flt *aflt;
3307 	ch_cpu_errors_t cpu_error_regs;
3308 	uint64_t afsr_errs;
3309 
3310 	get_cpu_error_state(&cpu_error_regs);
3311 
3312 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3313 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3314 
3315 	if (afsr_errs) {
3316 
3317 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3318 		aflt = (struct async_flt *)&ch_flt;
3319 		aflt->flt_id = gethrtime_waitfree();
3320 		aflt->flt_bus_id = getprocessorid();
3321 		aflt->flt_inst = CPU->cpu_id;
3322 		aflt->flt_stat = cpu_error_regs.afsr;
3323 		aflt->flt_addr = cpu_error_regs.afar;
3324 		aflt->flt_prot = AFLT_PROT_NONE;
3325 		aflt->flt_class = CPU_FAULT;
3326 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3327 		aflt->flt_panic = 1;
3328 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3329 		ch_flt.afsr_errs = afsr_errs;
3330 #if defined(SERRANO)
3331 		ch_flt.afar2 = cpu_error_regs.afar2;
3332 #endif	/* SERRANO */
3333 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3334 	}
3335 }
3336 
3337 /*
3338  * Routine to convert a syndrome into a syndrome code.
3339  */
3340 static int
3341 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3342 {
3343 	if (synd_status == AFLT_STAT_INVALID)
3344 		return (-1);
3345 
3346 	/*
3347 	 * Use the syndrome to index the appropriate syndrome table,
3348 	 * to get the code indicating which bit(s) is(are) bad.
3349 	 */
3350 	if (afsr_bit &
3351 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3352 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3353 #if defined(JALAPENO) || defined(SERRANO)
3354 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3355 				return (-1);
3356 			else
3357 				return (BPAR0 + synd);
3358 #else /* JALAPENO || SERRANO */
3359 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3360 				return (-1);
3361 			else
3362 				return (mtag_syndrome_tab[synd]);
3363 #endif /* JALAPENO || SERRANO */
3364 		} else {
3365 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3366 				return (-1);
3367 			else
3368 				return (ecc_syndrome_tab[synd]);
3369 		}
3370 	} else {
3371 		return (-1);
3372 	}
3373 }
3374 
3375 int
3376 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3377 {
3378 	if (&plat_get_mem_sid)
3379 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3380 	else
3381 		return (ENOTSUP);
3382 }
3383 
3384 int
3385 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3386 {
3387 	if (&plat_get_mem_offset)
3388 		return (plat_get_mem_offset(flt_addr, offp));
3389 	else
3390 		return (ENOTSUP);
3391 }
3392 
3393 int
3394 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3395 {
3396 	if (&plat_get_mem_addr)
3397 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3398 	else
3399 		return (ENOTSUP);
3400 }
3401 
3402 /*
3403  * Routine to return a string identifying the physical name
3404  * associated with a memory/cache error.
3405  */
3406 int
3407 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3408     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3409     ushort_t flt_status, char *buf, int buflen, int *lenp)
3410 {
3411 	int synd_code;
3412 	int ret;
3413 
3414 	/*
3415 	 * An AFSR of -1 defaults to a memory syndrome.
3416 	 */
3417 	if (flt_stat == (uint64_t)-1)
3418 		flt_stat = C_AFSR_CE;
3419 
3420 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3421 
3422 	/*
3423 	 * Syndrome code must be either a single-bit error code
3424 	 * (0...143) or -1 for unum lookup.
3425 	 */
3426 	if (synd_code < 0 || synd_code >= M2)
3427 		synd_code = -1;
3428 	if (&plat_get_mem_unum) {
3429 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3430 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3431 			buf[0] = '\0';
3432 			*lenp = 0;
3433 		}
3434 
3435 		return (ret);
3436 	}
3437 
3438 	return (ENOTSUP);
3439 }
3440 
3441 /*
3442  * Wrapper for cpu_get_mem_unum() routine that takes an
3443  * async_flt struct rather than explicit arguments.
3444  */
3445 int
3446 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3447     char *buf, int buflen, int *lenp)
3448 {
3449 	/*
3450 	 * If we come thru here for an IO bus error aflt->flt_stat will
3451 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3452 	 * so it will interpret this as a memory error.
3453 	 */
3454 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3455 	    (aflt->flt_class == BUS_FAULT) ?
3456 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3457 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3458 	    aflt->flt_status, buf, buflen, lenp));
3459 }
3460 
3461 /*
3462  * Return unum string given synd_code and async_flt into
3463  * the buf with size UNUM_NAMLEN
3464  */
3465 static int
3466 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3467 {
3468 	int ret, len;
3469 
3470 	/*
3471 	 * Syndrome code must be either a single-bit error code
3472 	 * (0...143) or -1 for unum lookup.
3473 	 */
3474 	if (synd_code < 0 || synd_code >= M2)
3475 		synd_code = -1;
3476 	if (&plat_get_mem_unum) {
3477 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3478 		    aflt->flt_bus_id, aflt->flt_in_memory,
3479 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3480 			buf[0] = '\0';
3481 		}
3482 		return (ret);
3483 	}
3484 
3485 	buf[0] = '\0';
3486 	return (ENOTSUP);
3487 }
3488 
3489 /*
3490  * This routine is a more generic interface to cpu_get_mem_unum()
3491  * that may be used by other modules (e.g. the 'mm' driver, through
3492  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3493  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3494  */
3495 int
3496 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3497     char *buf, int buflen, int *lenp)
3498 {
3499 	int synd_status, flt_in_memory, ret;
3500 	ushort_t flt_status = 0;
3501 	char unum[UNUM_NAMLEN];
3502 	uint64_t t_afsr_errs;
3503 
3504 	/*
3505 	 * Check for an invalid address.
3506 	 */
3507 	if (afar == (uint64_t)-1)
3508 		return (ENXIO);
3509 
3510 	if (synd == (uint64_t)-1)
3511 		synd_status = AFLT_STAT_INVALID;
3512 	else
3513 		synd_status = AFLT_STAT_VALID;
3514 
3515 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3516 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3517 
3518 	/*
3519 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3520 	 */
3521 	if (*afsr == (uint64_t)-1)
3522 		t_afsr_errs = C_AFSR_CE;
3523 	else {
3524 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3525 #if defined(CHEETAH_PLUS)
3526 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3527 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3528 #endif	/* CHEETAH_PLUS */
3529 	}
3530 
3531 	/*
3532 	 * Turn on ECC_ECACHE if error type is E$ Data.
3533 	 */
3534 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3535 		flt_status |= ECC_ECACHE;
3536 
3537 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3538 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3539 	if (ret != 0)
3540 		return (ret);
3541 
3542 	if (*lenp >= buflen)
3543 		return (ENAMETOOLONG);
3544 
3545 	(void) strncpy(buf, unum, buflen);
3546 
3547 	return (0);
3548 }
3549 
3550 /*
3551  * Routine to return memory information associated
3552  * with a physical address and syndrome.
3553  */
3554 int
3555 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3556     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3557     int *segsp, int *banksp, int *mcidp)
3558 {
3559 	int synd_status, synd_code;
3560 
3561 	if (afar == (uint64_t)-1)
3562 		return (ENXIO);
3563 
3564 	if (synd == (uint64_t)-1)
3565 		synd_status = AFLT_STAT_INVALID;
3566 	else
3567 		synd_status = AFLT_STAT_VALID;
3568 
3569 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3570 
3571 	if (p2get_mem_info != NULL)
3572 		return ((p2get_mem_info)(synd_code, afar,
3573 		    mem_sizep, seg_sizep, bank_sizep,
3574 		    segsp, banksp, mcidp));
3575 	else
3576 		return (ENOTSUP);
3577 }
3578 
3579 /*
3580  * Routine to return a string identifying the physical
3581  * name associated with a cpuid.
3582  */
3583 int
3584 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3585 {
3586 	int ret;
3587 	char unum[UNUM_NAMLEN];
3588 
3589 	if (&plat_get_cpu_unum) {
3590 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3591 		    != 0)
3592 			return (ret);
3593 	} else {
3594 		return (ENOTSUP);
3595 	}
3596 
3597 	if (*lenp >= buflen)
3598 		return (ENAMETOOLONG);
3599 
3600 	(void) strncpy(buf, unum, buflen);
3601 
3602 	return (0);
3603 }
3604 
3605 /*
3606  * This routine exports the name buffer size.
3607  */
3608 size_t
3609 cpu_get_name_bufsize()
3610 {
3611 	return (UNUM_NAMLEN);
3612 }
3613 
3614 /*
3615  * Historical function, apparantly not used.
3616  */
3617 /* ARGSUSED */
3618 void
3619 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3620 {}
3621 
3622 /*
3623  * Historical function only called for SBus errors in debugging.
3624  */
3625 /*ARGSUSED*/
3626 void
3627 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3628 {}
3629 
3630 /*
3631  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3632  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3633  * an async fault structure argument is passed in, the captured error state
3634  * (AFSR, AFAR) info will be returned in the structure.
3635  */
3636 int
3637 clear_errors(ch_async_flt_t *ch_flt)
3638 {
3639 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3640 	ch_cpu_errors_t	cpu_error_regs;
3641 
3642 	get_cpu_error_state(&cpu_error_regs);
3643 
3644 	if (ch_flt != NULL) {
3645 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3646 		aflt->flt_addr = cpu_error_regs.afar;
3647 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3648 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3649 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3650 #if defined(SERRANO)
3651 		ch_flt->afar2 = cpu_error_regs.afar2;
3652 #endif	/* SERRANO */
3653 	}
3654 
3655 	set_cpu_error_state(&cpu_error_regs);
3656 
3657 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3658 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3659 }
3660 
3661 /*
3662  * Clear any AFSR error bits, and check for persistence.
3663  *
3664  * It would be desirable to also insist that syndrome match.  PCI handling
3665  * has already filled flt_synd.  For errors trapped by CPU we only fill
3666  * flt_synd when we queue the event, so we do not have a valid flt_synd
3667  * during initial classification (it is valid if we're called as part of
3668  * subsequent low-pil additional classification attempts).  We could try
3669  * to determine which syndrome to use: we know we're only called for
3670  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3671  * would be esynd/none and esynd/msynd, respectively.  If that is
3672  * implemented then what do we do in the case that we do experience an
3673  * error on the same afar but with different syndrome?  At the very least
3674  * we should count such occurences.  Anyway, for now, we'll leave it as
3675  * it has been for ages.
3676  */
3677 static int
3678 clear_ecc(struct async_flt *aflt)
3679 {
3680 	ch_cpu_errors_t	cpu_error_regs;
3681 
3682 	/*
3683 	 * Snapshot the AFSR and AFAR and clear any errors
3684 	 */
3685 	get_cpu_error_state(&cpu_error_regs);
3686 	set_cpu_error_state(&cpu_error_regs);
3687 
3688 	/*
3689 	 * If any of the same memory access error bits are still on and
3690 	 * the AFAR matches, return that the error is persistent.
3691 	 */
3692 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3693 	    cpu_error_regs.afar == aflt->flt_addr);
3694 }
3695 
3696 /*
3697  * Turn off all cpu error detection, normally only used for panics.
3698  */
3699 void
3700 cpu_disable_errors(void)
3701 {
3702 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3703 
3704 	/*
3705 	 * With error detection now turned off, check the other cpus
3706 	 * logout areas for any unlogged errors.
3707 	 */
3708 	if (enable_check_other_cpus_logout) {
3709 		cpu_check_other_cpus_logout();
3710 		/*
3711 		 * Make a second pass over the logout areas, in case
3712 		 * there is a failing CPU in an error-trap loop which
3713 		 * will write to the logout area once it is emptied.
3714 		 */
3715 		cpu_check_other_cpus_logout();
3716 	}
3717 }
3718 
3719 /*
3720  * Enable errors.
3721  */
3722 void
3723 cpu_enable_errors(void)
3724 {
3725 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3726 }
3727 
3728 /*
3729  * Flush the entire ecache using displacement flush by reading through a
3730  * physical address range twice as large as the Ecache.
3731  */
3732 void
3733 cpu_flush_ecache(void)
3734 {
3735 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3736 	    cpunodes[CPU->cpu_id].ecache_linesize);
3737 }
3738 
3739 /*
3740  * Return CPU E$ set size - E$ size divided by the associativity.
3741  * We use this function in places where the CPU_PRIVATE ptr may not be
3742  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3743  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3744  * up before the kernel switches from OBP's to the kernel's trap table, so
3745  * we don't have to worry about cpunodes being unitialized.
3746  */
3747 int
3748 cpu_ecache_set_size(struct cpu *cp)
3749 {
3750 	if (CPU_PRIVATE(cp))
3751 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3752 
3753 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3754 }
3755 
3756 /*
3757  * Flush Ecache line.
3758  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3759  * Uses normal displacement flush for Cheetah.
3760  */
3761 static void
3762 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3763 {
3764 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3765 	int ec_set_size = cpu_ecache_set_size(CPU);
3766 
3767 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3768 }
3769 
3770 /*
3771  * Scrub physical address.
3772  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3773  * Ecache or direct-mapped Ecache.
3774  */
3775 static void
3776 cpu_scrubphys(struct async_flt *aflt)
3777 {
3778 	int ec_set_size = cpu_ecache_set_size(CPU);
3779 
3780 	scrubphys(aflt->flt_addr, ec_set_size);
3781 }
3782 
3783 /*
3784  * Clear physical address.
3785  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3786  * Ecache or direct-mapped Ecache.
3787  */
3788 void
3789 cpu_clearphys(struct async_flt *aflt)
3790 {
3791 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3792 	int ec_set_size = cpu_ecache_set_size(CPU);
3793 
3794 
3795 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3796 }
3797 
3798 #if defined(CPU_IMP_ECACHE_ASSOC)
3799 /*
3800  * Check for a matching valid line in all the sets.
3801  * If found, return set# + 1. Otherwise return 0.
3802  */
3803 static int
3804 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3805 {
3806 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3807 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3808 	int ec_set_size = cpu_ecache_set_size(CPU);
3809 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3810 	int nway = cpu_ecache_nway();
3811 	int i;
3812 
3813 	for (i = 0; i < nway; i++, ecp++) {
3814 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3815 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3816 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3817 			return (i+1);
3818 	}
3819 	return (0);
3820 }
3821 #endif /* CPU_IMP_ECACHE_ASSOC */
3822 
3823 /*
3824  * Check whether a line in the given logout info matches the specified
3825  * fault address.  If reqval is set then the line must not be Invalid.
3826  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3827  * set to 2 for l2$ or 3 for l3$.
3828  */
3829 static int
3830 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3831 {
3832 	ch_diag_data_t *cdp = data;
3833 	ch_ec_data_t *ecp;
3834 	int totalsize, ec_set_size;
3835 	int i, ways;
3836 	int match = 0;
3837 	int tagvalid;
3838 	uint64_t addr, tagpa;
3839 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3840 
3841 	/*
3842 	 * Check the l2$ logout data
3843 	 */
3844 	if (ispanther) {
3845 		ecp = &cdp->chd_l2_data[0];
3846 		ec_set_size = PN_L2_SET_SIZE;
3847 		ways = PN_L2_NWAYS;
3848 	} else {
3849 		ecp = &cdp->chd_ec_data[0];
3850 		ec_set_size = cpu_ecache_set_size(CPU);
3851 		ways = cpu_ecache_nway();
3852 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3853 	}
3854 	/* remove low order PA bits from fault address not used in PA tag */
3855 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3856 	for (i = 0; i < ways; i++, ecp++) {
3857 		if (ispanther) {
3858 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3859 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3860 		} else {
3861 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3862 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3863 			    ecp->ec_tag);
3864 		}
3865 		if (tagpa == addr && (!reqval || tagvalid)) {
3866 			match = i + 1;
3867 			*level = 2;
3868 			break;
3869 		}
3870 	}
3871 
3872 	if (match || !ispanther)
3873 		return (match);
3874 
3875 	/* For Panther we also check the l3$ */
3876 	ecp = &cdp->chd_ec_data[0];
3877 	ec_set_size = PN_L3_SET_SIZE;
3878 	ways = PN_L3_NWAYS;
3879 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3880 
3881 	for (i = 0; i < ways; i++, ecp++) {
3882 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3883 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3884 			match = i + 1;
3885 			*level = 3;
3886 			break;
3887 		}
3888 	}
3889 
3890 	return (match);
3891 }
3892 
3893 #if defined(CPU_IMP_L1_CACHE_PARITY)
3894 /*
3895  * Record information related to the source of an Dcache Parity Error.
3896  */
3897 static void
3898 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3899 {
3900 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3901 	int index;
3902 
3903 	/*
3904 	 * Since instruction decode cannot be done at high PIL
3905 	 * just examine the entire Dcache to locate the error.
3906 	 */
3907 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3908 		ch_flt->parity_data.dpe.cpl_way = -1;
3909 		ch_flt->parity_data.dpe.cpl_off = -1;
3910 	}
3911 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3912 		cpu_dcache_parity_check(ch_flt, index);
3913 }
3914 
3915 /*
3916  * Check all ways of the Dcache at a specified index for good parity.
3917  */
3918 static void
3919 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3920 {
3921 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3922 	uint64_t parity_bits, pbits, data_word;
3923 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3924 	int way, word, data_byte;
3925 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3926 	ch_dc_data_t tmp_dcp;
3927 
3928 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3929 		/*
3930 		 * Perform diagnostic read.
3931 		 */
3932 		get_dcache_dtag(index + way * dc_set_size,
3933 		    (uint64_t *)&tmp_dcp);
3934 
3935 		/*
3936 		 * Check tag for even parity.
3937 		 * Sum of 1 bits (including parity bit) should be even.
3938 		 */
3939 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3940 			/*
3941 			 * If this is the first error log detailed information
3942 			 * about it and check the snoop tag. Otherwise just
3943 			 * record the fact that we found another error.
3944 			 */
3945 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3946 				ch_flt->parity_data.dpe.cpl_way = way;
3947 				ch_flt->parity_data.dpe.cpl_cache =
3948 				    CPU_DC_PARITY;
3949 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3950 
3951 				if (popc64(tmp_dcp.dc_sntag &
3952 				    CHP_DCSNTAG_PARMASK) & 1) {
3953 					ch_flt->parity_data.dpe.cpl_tag |=
3954 					    CHP_DC_SNTAG;
3955 					ch_flt->parity_data.dpe.cpl_lcnt++;
3956 				}
3957 
3958 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3959 			}
3960 
3961 			ch_flt->parity_data.dpe.cpl_lcnt++;
3962 		}
3963 
3964 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3965 			/*
3966 			 * Panther has more parity bits than the other
3967 			 * processors for covering dcache data and so each
3968 			 * byte of data in each word has its own parity bit.
3969 			 */
3970 			parity_bits = tmp_dcp.dc_pn_data_parity;
3971 			for (word = 0; word < 4; word++) {
3972 				data_word = tmp_dcp.dc_data[word];
3973 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3974 				for (data_byte = 0; data_byte < 8;
3975 				    data_byte++) {
3976 					if (((popc64(data_word &
3977 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3978 					    (pbits & 1)) {
3979 						cpu_record_dc_data_parity(
3980 						    ch_flt, dcp, &tmp_dcp, way,
3981 						    word);
3982 					}
3983 					pbits >>= 1;
3984 					data_word >>= 8;
3985 				}
3986 				parity_bits >>= 8;
3987 			}
3988 		} else {
3989 			/*
3990 			 * Check data array for even parity.
3991 			 * The 8 parity bits are grouped into 4 pairs each
3992 			 * of which covers a 64-bit word.  The endianness is
3993 			 * reversed -- the low-order parity bits cover the
3994 			 * high-order data words.
3995 			 */
3996 			parity_bits = tmp_dcp.dc_utag >> 8;
3997 			for (word = 0; word < 4; word++) {
3998 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3999 				if ((popc64(tmp_dcp.dc_data[word]) +
4000 				    parity_bits_popc[pbits]) & 1) {
4001 					cpu_record_dc_data_parity(ch_flt, dcp,
4002 					    &tmp_dcp, way, word);
4003 				}
4004 			}
4005 		}
4006 	}
4007 }
4008 
4009 static void
4010 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4011     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4012 {
4013 	/*
4014 	 * If this is the first error log detailed information about it.
4015 	 * Otherwise just record the fact that we found another error.
4016 	 */
4017 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4018 		ch_flt->parity_data.dpe.cpl_way = way;
4019 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4020 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4021 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4022 	}
4023 	ch_flt->parity_data.dpe.cpl_lcnt++;
4024 }
4025 
4026 /*
4027  * Record information related to the source of an Icache Parity Error.
4028  *
4029  * Called with the Icache disabled so any diagnostic accesses are safe.
4030  */
4031 static void
4032 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4033 {
4034 	int	ic_set_size;
4035 	int	ic_linesize;
4036 	int	index;
4037 
4038 	if (CPU_PRIVATE(CPU)) {
4039 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4040 		    CH_ICACHE_NWAY;
4041 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4042 	} else {
4043 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4044 		ic_linesize = icache_linesize;
4045 	}
4046 
4047 	ch_flt->parity_data.ipe.cpl_way = -1;
4048 	ch_flt->parity_data.ipe.cpl_off = -1;
4049 
4050 	for (index = 0; index < ic_set_size; index += ic_linesize)
4051 		cpu_icache_parity_check(ch_flt, index);
4052 }
4053 
4054 /*
4055  * Check all ways of the Icache at a specified index for good parity.
4056  */
4057 static void
4058 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4059 {
4060 	uint64_t parmask, pn_inst_parity;
4061 	int ic_set_size;
4062 	int ic_linesize;
4063 	int flt_index, way, instr, num_instr;
4064 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4065 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4066 	ch_ic_data_t tmp_icp;
4067 
4068 	if (CPU_PRIVATE(CPU)) {
4069 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4070 		    CH_ICACHE_NWAY;
4071 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4072 	} else {
4073 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4074 		ic_linesize = icache_linesize;
4075 	}
4076 
4077 	/*
4078 	 * Panther has twice as many instructions per icache line and the
4079 	 * instruction parity bit is in a different location.
4080 	 */
4081 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4082 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4083 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4084 	} else {
4085 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4086 		pn_inst_parity = 0;
4087 	}
4088 
4089 	/*
4090 	 * Index at which we expect to find the parity error.
4091 	 */
4092 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4093 
4094 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4095 		/*
4096 		 * Diagnostic reads expect address argument in ASI format.
4097 		 */
4098 		get_icache_dtag(2 * (index + way * ic_set_size),
4099 		    (uint64_t *)&tmp_icp);
4100 
4101 		/*
4102 		 * If this is the index in which we expect to find the
4103 		 * error log detailed information about each of the ways.
4104 		 * This information will be displayed later if we can't
4105 		 * determine the exact way in which the error is located.
4106 		 */
4107 		if (flt_index == index)
4108 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4109 
4110 		/*
4111 		 * Check tag for even parity.
4112 		 * Sum of 1 bits (including parity bit) should be even.
4113 		 */
4114 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4115 			/*
4116 			 * If this way is the one in which we expected
4117 			 * to find the error record the way and check the
4118 			 * snoop tag. Otherwise just record the fact we
4119 			 * found another error.
4120 			 */
4121 			if (flt_index == index) {
4122 				ch_flt->parity_data.ipe.cpl_way = way;
4123 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4124 
4125 				if (popc64(tmp_icp.ic_sntag &
4126 				    CHP_ICSNTAG_PARMASK) & 1) {
4127 					ch_flt->parity_data.ipe.cpl_tag |=
4128 					    CHP_IC_SNTAG;
4129 					ch_flt->parity_data.ipe.cpl_lcnt++;
4130 				}
4131 
4132 			}
4133 			ch_flt->parity_data.ipe.cpl_lcnt++;
4134 			continue;
4135 		}
4136 
4137 		/*
4138 		 * Check instruction data for even parity.
4139 		 * Bits participating in parity differ for PC-relative
4140 		 * versus non-PC-relative instructions.
4141 		 */
4142 		for (instr = 0; instr < num_instr; instr++) {
4143 			parmask = (tmp_icp.ic_data[instr] &
4144 			    CH_ICDATA_PRED_ISPCREL) ?
4145 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4146 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4147 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4148 				/*
4149 				 * If this way is the one in which we expected
4150 				 * to find the error record the way and offset.
4151 				 * Otherwise just log the fact we found another
4152 				 * error.
4153 				 */
4154 				if (flt_index == index) {
4155 					ch_flt->parity_data.ipe.cpl_way = way;
4156 					ch_flt->parity_data.ipe.cpl_off =
4157 					    instr * 4;
4158 				}
4159 				ch_flt->parity_data.ipe.cpl_lcnt++;
4160 				continue;
4161 			}
4162 		}
4163 	}
4164 }
4165 
4166 /*
4167  * Record information related to the source of an Pcache Parity Error.
4168  */
4169 static void
4170 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4171 {
4172 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4173 	int index;
4174 
4175 	/*
4176 	 * Since instruction decode cannot be done at high PIL just
4177 	 * examine the entire Pcache to check for any parity errors.
4178 	 */
4179 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4180 		ch_flt->parity_data.dpe.cpl_way = -1;
4181 		ch_flt->parity_data.dpe.cpl_off = -1;
4182 	}
4183 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4184 		cpu_pcache_parity_check(ch_flt, index);
4185 }
4186 
4187 /*
4188  * Check all ways of the Pcache at a specified index for good parity.
4189  */
4190 static void
4191 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4192 {
4193 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4194 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4195 	int way, word, pbit, parity_bits;
4196 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4197 	ch_pc_data_t tmp_pcp;
4198 
4199 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4200 		/*
4201 		 * Perform diagnostic read.
4202 		 */
4203 		get_pcache_dtag(index + way * pc_set_size,
4204 		    (uint64_t *)&tmp_pcp);
4205 		/*
4206 		 * Check data array for odd parity. There are 8 parity
4207 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4208 		 * of those bits covers exactly 8 bytes of the data
4209 		 * array:
4210 		 *
4211 		 *	parity bit	P$ data bytes covered
4212 		 *	----------	---------------------
4213 		 *	50		63:56
4214 		 *	51		55:48
4215 		 *	52		47:40
4216 		 *	53		39:32
4217 		 *	54		31:24
4218 		 *	55		23:16
4219 		 *	56		15:8
4220 		 *	57		7:0
4221 		 */
4222 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4223 		for (word = 0; word < pc_data_words; word++) {
4224 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4225 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4226 				/*
4227 				 * If this is the first error log detailed
4228 				 * information about it. Otherwise just record
4229 				 * the fact that we found another error.
4230 				 */
4231 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4232 					ch_flt->parity_data.dpe.cpl_way = way;
4233 					ch_flt->parity_data.dpe.cpl_cache =
4234 					    CPU_PC_PARITY;
4235 					ch_flt->parity_data.dpe.cpl_off =
4236 					    word * sizeof (uint64_t);
4237 					bcopy(&tmp_pcp, pcp,
4238 					    sizeof (ch_pc_data_t));
4239 				}
4240 				ch_flt->parity_data.dpe.cpl_lcnt++;
4241 			}
4242 		}
4243 	}
4244 }
4245 
4246 
4247 /*
4248  * Add L1 Data cache data to the ereport payload.
4249  */
4250 static void
4251 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4252 {
4253 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4254 	ch_dc_data_t *dcp;
4255 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4256 	uint_t nelem;
4257 	int i, ways_to_check, ways_logged = 0;
4258 
4259 	/*
4260 	 * If this is an D$ fault then there may be multiple
4261 	 * ways captured in the ch_parity_log_t structure.
4262 	 * Otherwise, there will be at most one way captured
4263 	 * in the ch_diag_data_t struct.
4264 	 * Check each way to see if it should be encoded.
4265 	 */
4266 	if (ch_flt->flt_type == CPU_DC_PARITY)
4267 		ways_to_check = CH_DCACHE_NWAY;
4268 	else
4269 		ways_to_check = 1;
4270 	for (i = 0; i < ways_to_check; i++) {
4271 		if (ch_flt->flt_type == CPU_DC_PARITY)
4272 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4273 		else
4274 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4275 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4276 			bcopy(dcp, &dcdata[ways_logged],
4277 			    sizeof (ch_dc_data_t));
4278 			ways_logged++;
4279 		}
4280 	}
4281 
4282 	/*
4283 	 * Add the dcache data to the payload.
4284 	 */
4285 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4286 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4287 	if (ways_logged != 0) {
4288 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4289 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4290 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4291 	}
4292 }
4293 
4294 /*
4295  * Add L1 Instruction cache data to the ereport payload.
4296  */
4297 static void
4298 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4299 {
4300 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4301 	ch_ic_data_t *icp;
4302 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4303 	uint_t nelem;
4304 	int i, ways_to_check, ways_logged = 0;
4305 
4306 	/*
4307 	 * If this is an I$ fault then there may be multiple
4308 	 * ways captured in the ch_parity_log_t structure.
4309 	 * Otherwise, there will be at most one way captured
4310 	 * in the ch_diag_data_t struct.
4311 	 * Check each way to see if it should be encoded.
4312 	 */
4313 	if (ch_flt->flt_type == CPU_IC_PARITY)
4314 		ways_to_check = CH_ICACHE_NWAY;
4315 	else
4316 		ways_to_check = 1;
4317 	for (i = 0; i < ways_to_check; i++) {
4318 		if (ch_flt->flt_type == CPU_IC_PARITY)
4319 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4320 		else
4321 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4322 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4323 			bcopy(icp, &icdata[ways_logged],
4324 			    sizeof (ch_ic_data_t));
4325 			ways_logged++;
4326 		}
4327 	}
4328 
4329 	/*
4330 	 * Add the icache data to the payload.
4331 	 */
4332 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4333 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4334 	if (ways_logged != 0) {
4335 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4336 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4337 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4338 	}
4339 }
4340 
4341 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4342 
4343 /*
4344  * Add ecache data to payload.
4345  */
4346 static void
4347 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4348 {
4349 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4350 	ch_ec_data_t *ecp;
4351 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4352 	uint_t nelem;
4353 	int i, ways_logged = 0;
4354 
4355 	/*
4356 	 * Check each way to see if it should be encoded
4357 	 * and concatinate it into a temporary buffer.
4358 	 */
4359 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4360 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4361 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4362 			bcopy(ecp, &ecdata[ways_logged],
4363 			    sizeof (ch_ec_data_t));
4364 			ways_logged++;
4365 		}
4366 	}
4367 
4368 	/*
4369 	 * Panther CPUs have an additional level of cache and so
4370 	 * what we just collected was the L3 (ecache) and not the
4371 	 * L2 cache.
4372 	 */
4373 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4374 		/*
4375 		 * Add the L3 (ecache) data to the payload.
4376 		 */
4377 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4378 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4379 		if (ways_logged != 0) {
4380 			nelem = sizeof (ch_ec_data_t) /
4381 			    sizeof (uint64_t) * ways_logged;
4382 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4383 			    DATA_TYPE_UINT64_ARRAY, nelem,
4384 			    (uint64_t *)ecdata, NULL);
4385 		}
4386 
4387 		/*
4388 		 * Now collect the L2 cache.
4389 		 */
4390 		ways_logged = 0;
4391 		for (i = 0; i < PN_L2_NWAYS; i++) {
4392 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4393 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4394 				bcopy(ecp, &ecdata[ways_logged],
4395 				    sizeof (ch_ec_data_t));
4396 				ways_logged++;
4397 			}
4398 		}
4399 	}
4400 
4401 	/*
4402 	 * Add the L2 cache data to the payload.
4403 	 */
4404 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4405 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4406 	if (ways_logged != 0) {
4407 		nelem = sizeof (ch_ec_data_t) /
4408 		    sizeof (uint64_t) * ways_logged;
4409 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4410 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4411 	}
4412 }
4413 
4414 /*
4415  * Initialize cpu scheme for specified cpu.
4416  */
4417 static void
4418 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4419 {
4420 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4421 	uint8_t mask;
4422 
4423 	mask = cpunodes[cpuid].version;
4424 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4425 	    (u_longlong_t)cpunodes[cpuid].device_id);
4426 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4427 	    cpuid, &mask, (const char *)sbuf);
4428 }
4429 
4430 /*
4431  * Returns ereport resource type.
4432  */
4433 static int
4434 cpu_error_to_resource_type(struct async_flt *aflt)
4435 {
4436 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4437 
4438 	switch (ch_flt->flt_type) {
4439 
4440 	case CPU_CE_ECACHE:
4441 	case CPU_UE_ECACHE:
4442 	case CPU_UE_ECACHE_RETIRE:
4443 	case CPU_ORPH:
4444 		/*
4445 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4446 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4447 		 * E$ Data type, otherwise, return CPU type.
4448 		 */
4449 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4450 		    ch_flt->flt_bit))
4451 			return (ERRTYPE_ECACHE_DATA);
4452 		return (ERRTYPE_CPU);
4453 
4454 	case CPU_CE:
4455 	case CPU_UE:
4456 	case CPU_EMC:
4457 	case CPU_DUE:
4458 	case CPU_RCE:
4459 	case CPU_RUE:
4460 	case CPU_FRC:
4461 	case CPU_FRU:
4462 		return (ERRTYPE_MEMORY);
4463 
4464 	case CPU_IC_PARITY:
4465 	case CPU_DC_PARITY:
4466 	case CPU_FPUERR:
4467 	case CPU_PC_PARITY:
4468 	case CPU_ITLB_PARITY:
4469 	case CPU_DTLB_PARITY:
4470 		return (ERRTYPE_CPU);
4471 	}
4472 	return (ERRTYPE_UNKNOWN);
4473 }
4474 
4475 /*
4476  * Encode the data saved in the ch_async_flt_t struct into
4477  * the FM ereport payload.
4478  */
4479 static void
4480 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4481 	nvlist_t *resource, int *afar_status, int *synd_status)
4482 {
4483 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4484 	*synd_status = AFLT_STAT_INVALID;
4485 	*afar_status = AFLT_STAT_INVALID;
4486 
4487 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4488 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4489 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4490 	}
4491 
4492 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4493 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4494 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4495 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4496 	}
4497 
4498 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4499 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4500 		    ch_flt->flt_bit);
4501 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4502 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4503 	}
4504 
4505 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4506 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4507 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4508 	}
4509 
4510 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4511 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4512 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4513 	}
4514 
4515 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4516 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4517 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4518 	}
4519 
4520 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4521 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4522 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4523 	}
4524 
4525 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4526 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4527 		    DATA_TYPE_BOOLEAN_VALUE,
4528 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4529 	}
4530 
4531 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4532 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4533 		    DATA_TYPE_BOOLEAN_VALUE,
4534 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4535 	}
4536 
4537 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4538 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4539 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4540 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4541 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4542 	}
4543 
4544 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4545 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4546 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4547 	}
4548 
4549 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4550 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4551 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4552 	}
4553 
4554 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4555 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4556 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4557 	}
4558 
4559 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4560 		cpu_payload_add_ecache(aflt, payload);
4561 
4562 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4563 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4564 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4565 	}
4566 
4567 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4568 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4569 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4570 	}
4571 
4572 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4573 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4574 		    DATA_TYPE_UINT32_ARRAY, 16,
4575 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4576 	}
4577 
4578 #if defined(CPU_IMP_L1_CACHE_PARITY)
4579 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4580 		cpu_payload_add_dcache(aflt, payload);
4581 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4582 		cpu_payload_add_icache(aflt, payload);
4583 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4584 
4585 #if defined(CHEETAH_PLUS)
4586 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4587 		cpu_payload_add_pcache(aflt, payload);
4588 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4589 		cpu_payload_add_tlb(aflt, payload);
4590 #endif	/* CHEETAH_PLUS */
4591 	/*
4592 	 * Create the FMRI that goes into the payload
4593 	 * and contains the unum info if necessary.
4594 	 */
4595 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4596 		char unum[UNUM_NAMLEN] = "";
4597 		char sid[DIMM_SERIAL_ID_LEN] = "";
4598 		int len, ret, rtype, synd_code;
4599 		uint64_t offset = (uint64_t)-1;
4600 
4601 		rtype = cpu_error_to_resource_type(aflt);
4602 		switch (rtype) {
4603 
4604 		case ERRTYPE_MEMORY:
4605 		case ERRTYPE_ECACHE_DATA:
4606 
4607 			/*
4608 			 * Memory errors, do unum lookup
4609 			 */
4610 			if (*afar_status == AFLT_STAT_INVALID)
4611 				break;
4612 
4613 			if (rtype == ERRTYPE_ECACHE_DATA)
4614 				aflt->flt_status |= ECC_ECACHE;
4615 			else
4616 				aflt->flt_status &= ~ECC_ECACHE;
4617 
4618 			synd_code = synd_to_synd_code(*synd_status,
4619 			    aflt->flt_synd, ch_flt->flt_bit);
4620 
4621 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4622 				break;
4623 
4624 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4625 			    &len);
4626 
4627 			if (ret == 0) {
4628 				(void) cpu_get_mem_offset(aflt->flt_addr,
4629 				    &offset);
4630 			}
4631 
4632 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4633 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4634 			fm_payload_set(payload,
4635 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4636 			    DATA_TYPE_NVLIST, resource, NULL);
4637 			break;
4638 
4639 		case ERRTYPE_CPU:
4640 			/*
4641 			 * On-board processor array error, add cpu resource.
4642 			 */
4643 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4644 			fm_payload_set(payload,
4645 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4646 			    DATA_TYPE_NVLIST, resource, NULL);
4647 			break;
4648 		}
4649 	}
4650 }
4651 
4652 /*
4653  * Initialize the way info if necessary.
4654  */
4655 void
4656 cpu_ereport_init(struct async_flt *aflt)
4657 {
4658 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4659 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4660 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4661 	int i;
4662 
4663 	/*
4664 	 * Initialize the info in the CPU logout structure.
4665 	 * The I$/D$ way information is not initialized here
4666 	 * since it is captured in the logout assembly code.
4667 	 */
4668 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4669 		(ecp + i)->ec_way = i;
4670 
4671 	for (i = 0; i < PN_L2_NWAYS; i++)
4672 		(l2p + i)->ec_way = i;
4673 }
4674 
4675 /*
4676  * Returns whether fault address is valid for this error bit and
4677  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4678  */
4679 int
4680 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4681 {
4682 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4683 
4684 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4685 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4686 	    AFLT_STAT_VALID &&
4687 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4688 }
4689 
4690 /*
4691  * Returns whether fault address is valid based on the error bit for the
4692  * one event being queued and whether the address is "in memory".
4693  */
4694 static int
4695 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4696 {
4697 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4698 	int afar_status;
4699 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4700 
4701 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4702 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4703 		return (0);
4704 
4705 	afsr_errs = ch_flt->afsr_errs;
4706 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4707 
4708 	switch (afar_status) {
4709 	case AFLT_STAT_VALID:
4710 		return (1);
4711 
4712 	case AFLT_STAT_AMBIGUOUS:
4713 		/*
4714 		 * Status is ambiguous since another error bit (or bits)
4715 		 * of equal priority to the specified bit on in the afsr,
4716 		 * so check those bits. Return 1 only if the bits on in the
4717 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4718 		 * Otherwise not all the equal priority bits are for memory
4719 		 * errors, so return 0.
4720 		 */
4721 		ow_bits = afar_overwrite;
4722 		while ((afsr_ow = *ow_bits++) != 0) {
4723 			/*
4724 			 * Get other bits that are on in t_afsr_bit's priority
4725 			 * class to check for Memory Error bits only.
4726 			 */
4727 			if (afsr_ow & t_afsr_bit) {
4728 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4729 					return (0);
4730 				else
4731 					return (1);
4732 			}
4733 		}
4734 		/*FALLTHRU*/
4735 
4736 	default:
4737 		return (0);
4738 	}
4739 }
4740 
4741 static void
4742 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4743 {
4744 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4745 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4746 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4747 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4748 #if defined(CPU_IMP_ECACHE_ASSOC)
4749 	int i, nway;
4750 #endif /* CPU_IMP_ECACHE_ASSOC */
4751 
4752 	/*
4753 	 * Check if the CPU log out captured was valid.
4754 	 */
4755 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4756 	    ch_flt->flt_data_incomplete)
4757 		return;
4758 
4759 #if defined(CPU_IMP_ECACHE_ASSOC)
4760 	nway = cpu_ecache_nway();
4761 	i =  cpu_ecache_line_valid(ch_flt);
4762 	if (i == 0 || i > nway) {
4763 		for (i = 0; i < nway; i++)
4764 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4765 	} else
4766 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4767 #else /* CPU_IMP_ECACHE_ASSOC */
4768 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4769 #endif /* CPU_IMP_ECACHE_ASSOC */
4770 
4771 #if defined(CHEETAH_PLUS)
4772 	pn_cpu_log_diag_l2_info(ch_flt);
4773 #endif /* CHEETAH_PLUS */
4774 
4775 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4776 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4777 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4778 	}
4779 
4780 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4781 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4782 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4783 		else
4784 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4785 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4786 	}
4787 }
4788 
4789 /*
4790  * Cheetah ECC calculation.
4791  *
4792  * We only need to do the calculation on the data bits and can ignore check
4793  * bit and Mtag bit terms in the calculation.
4794  */
4795 static uint64_t ch_ecc_table[9][2] = {
4796 	/*
4797 	 * low order 64-bits   high-order 64-bits
4798 	 */
4799 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4800 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4801 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4802 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4803 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4804 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4805 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4806 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4807 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4808 };
4809 
4810 /*
4811  * 64-bit population count, use well-known popcnt trick.
4812  * We could use the UltraSPARC V9 POPC instruction, but some
4813  * CPUs including Cheetahplus and Jaguar do not support that
4814  * instruction.
4815  */
4816 int
4817 popc64(uint64_t val)
4818 {
4819 	int cnt;
4820 
4821 	for (cnt = 0; val != 0; val &= val - 1)
4822 		cnt++;
4823 	return (cnt);
4824 }
4825 
4826 /*
4827  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4828  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4829  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4830  * instead of doing all the xor's.
4831  */
4832 uint32_t
4833 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4834 {
4835 	int bitno, s;
4836 	int synd = 0;
4837 
4838 	for (bitno = 0; bitno < 9; bitno++) {
4839 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4840 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4841 		synd |= (s << bitno);
4842 	}
4843 	return (synd);
4844 
4845 }
4846 
4847 /*
4848  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4849  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4850  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4851  */
4852 static void
4853 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4854     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4855 {
4856 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4857 
4858 	if (reason &&
4859 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4860 		(void) strcat(reason, eccp->ec_reason);
4861 	}
4862 
4863 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4864 	ch_flt->flt_type = eccp->ec_flt_type;
4865 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4866 		ch_flt->flt_diag_data = *cdp;
4867 	else
4868 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4869 	aflt->flt_in_memory =
4870 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4871 
4872 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4873 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4874 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4875 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4876 	else
4877 		aflt->flt_synd = 0;
4878 
4879 	aflt->flt_payload = eccp->ec_err_payload;
4880 
4881 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4882 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4883 		cpu_errorq_dispatch(eccp->ec_err_class,
4884 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4885 		    aflt->flt_panic);
4886 	else
4887 		cpu_errorq_dispatch(eccp->ec_err_class,
4888 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4889 		    aflt->flt_panic);
4890 }
4891 
4892 /*
4893  * Queue events on async event queue one event per error bit.  First we
4894  * queue the events that we "expect" for the given trap, then we queue events
4895  * that we may not expect.  Return number of events queued.
4896  */
4897 int
4898 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4899     ch_cpu_logout_t *clop)
4900 {
4901 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4902 	ecc_type_to_info_t *eccp;
4903 	int nevents = 0;
4904 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4905 #if defined(CHEETAH_PLUS)
4906 	uint64_t orig_t_afsr_errs;
4907 #endif
4908 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4909 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4910 	ch_diag_data_t *cdp = NULL;
4911 
4912 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4913 
4914 #if defined(CHEETAH_PLUS)
4915 	orig_t_afsr_errs = t_afsr_errs;
4916 
4917 	/*
4918 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4919 	 */
4920 	if (clop != NULL) {
4921 		/*
4922 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4923 		 * flt_addr and flt_stat fields will be reset to the primaries
4924 		 * below, but the sdw_addr and sdw_stat will stay as the
4925 		 * secondaries.
4926 		 */
4927 		cdp = &clop->clo_sdw_data;
4928 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4929 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4930 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4931 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4932 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4933 
4934 		/*
4935 		 * If the primary and shadow AFSR differ, tag the shadow as
4936 		 * the first fault.
4937 		 */
4938 		if ((primary_afar != cdp->chd_afar) ||
4939 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4940 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4941 		}
4942 
4943 		/*
4944 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4945 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4946 		 * is expected to be zero for those CPUs which do not have
4947 		 * an AFSR_EXT register.
4948 		 */
4949 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4950 			if ((eccp->ec_afsr_bit &
4951 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4952 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4953 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4954 				cdp = NULL;
4955 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4956 				nevents++;
4957 			}
4958 		}
4959 
4960 		/*
4961 		 * If the ME bit is on in the primary AFSR turn all the
4962 		 * error bits on again that may set the ME bit to make
4963 		 * sure we see the ME AFSR error logs.
4964 		 */
4965 		if ((primary_afsr & C_AFSR_ME) != 0)
4966 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4967 	}
4968 #endif	/* CHEETAH_PLUS */
4969 
4970 	if (clop != NULL)
4971 		cdp = &clop->clo_data;
4972 
4973 	/*
4974 	 * Queue expected errors, error bit and fault type must match
4975 	 * in the ecc_type_to_info table.
4976 	 */
4977 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4978 	    eccp++) {
4979 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4980 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4981 #if defined(SERRANO)
4982 			/*
4983 			 * For FRC/FRU errors on Serrano the afar2 captures
4984 			 * the address and the associated data is
4985 			 * in the shadow logout area.
4986 			 */
4987 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4988 				if (clop != NULL)
4989 					cdp = &clop->clo_sdw_data;
4990 				aflt->flt_addr = ch_flt->afar2;
4991 			} else {
4992 				if (clop != NULL)
4993 					cdp = &clop->clo_data;
4994 				aflt->flt_addr = primary_afar;
4995 			}
4996 #else	/* SERRANO */
4997 			aflt->flt_addr = primary_afar;
4998 #endif	/* SERRANO */
4999 			aflt->flt_stat = primary_afsr;
5000 			ch_flt->afsr_ext = primary_afsr_ext;
5001 			ch_flt->afsr_errs = primary_afsr_errs;
5002 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5003 			cdp = NULL;
5004 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5005 			nevents++;
5006 		}
5007 	}
5008 
5009 	/*
5010 	 * Queue unexpected errors, error bit only match.
5011 	 */
5012 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5013 	    eccp++) {
5014 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5015 #if defined(SERRANO)
5016 			/*
5017 			 * For FRC/FRU errors on Serrano the afar2 captures
5018 			 * the address and the associated data is
5019 			 * in the shadow logout area.
5020 			 */
5021 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5022 				if (clop != NULL)
5023 					cdp = &clop->clo_sdw_data;
5024 				aflt->flt_addr = ch_flt->afar2;
5025 			} else {
5026 				if (clop != NULL)
5027 					cdp = &clop->clo_data;
5028 				aflt->flt_addr = primary_afar;
5029 			}
5030 #else	/* SERRANO */
5031 			aflt->flt_addr = primary_afar;
5032 #endif	/* SERRANO */
5033 			aflt->flt_stat = primary_afsr;
5034 			ch_flt->afsr_ext = primary_afsr_ext;
5035 			ch_flt->afsr_errs = primary_afsr_errs;
5036 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5037 			cdp = NULL;
5038 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5039 			nevents++;
5040 		}
5041 	}
5042 	return (nevents);
5043 }
5044 
5045 /*
5046  * Return trap type number.
5047  */
5048 uint8_t
5049 flt_to_trap_type(struct async_flt *aflt)
5050 {
5051 	if (aflt->flt_status & ECC_I_TRAP)
5052 		return (TRAP_TYPE_ECC_I);
5053 	if (aflt->flt_status & ECC_D_TRAP)
5054 		return (TRAP_TYPE_ECC_D);
5055 	if (aflt->flt_status & ECC_F_TRAP)
5056 		return (TRAP_TYPE_ECC_F);
5057 	if (aflt->flt_status & ECC_C_TRAP)
5058 		return (TRAP_TYPE_ECC_C);
5059 	if (aflt->flt_status & ECC_DP_TRAP)
5060 		return (TRAP_TYPE_ECC_DP);
5061 	if (aflt->flt_status & ECC_IP_TRAP)
5062 		return (TRAP_TYPE_ECC_IP);
5063 	if (aflt->flt_status & ECC_ITLB_TRAP)
5064 		return (TRAP_TYPE_ECC_ITLB);
5065 	if (aflt->flt_status & ECC_DTLB_TRAP)
5066 		return (TRAP_TYPE_ECC_DTLB);
5067 	return (TRAP_TYPE_UNKNOWN);
5068 }
5069 
5070 /*
5071  * Decide an error type based on detector and leaky/partner tests.
5072  * The following array is used for quick translation - it must
5073  * stay in sync with ce_dispact_t.
5074  */
5075 
5076 static char *cetypes[] = {
5077 	CE_DISP_DESC_U,
5078 	CE_DISP_DESC_I,
5079 	CE_DISP_DESC_PP,
5080 	CE_DISP_DESC_P,
5081 	CE_DISP_DESC_L,
5082 	CE_DISP_DESC_PS,
5083 	CE_DISP_DESC_S
5084 };
5085 
5086 char *
5087 flt_to_error_type(struct async_flt *aflt)
5088 {
5089 	ce_dispact_t dispact, disp;
5090 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5091 
5092 	/*
5093 	 * The memory payload bundle is shared by some events that do
5094 	 * not perform any classification.  For those flt_disp will be
5095 	 * 0 and we will return "unknown".
5096 	 */
5097 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5098 		return (cetypes[CE_DISP_UNKNOWN]);
5099 
5100 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5101 
5102 	/*
5103 	 * It is also possible that no scrub/classification was performed
5104 	 * by the detector, for instance where a disrupting error logged
5105 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5106 	 */
5107 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5108 		return (cetypes[CE_DISP_UNKNOWN]);
5109 
5110 	/*
5111 	 * Lookup type in initial classification/action table
5112 	 */
5113 	dispact = CE_DISPACT(ce_disp_table,
5114 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5115 	    CE_XDIAG_STATE(dtcrinfo),
5116 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5117 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5118 
5119 	/*
5120 	 * A bad lookup is not something to panic production systems for.
5121 	 */
5122 	ASSERT(dispact != CE_DISP_BAD);
5123 	if (dispact == CE_DISP_BAD)
5124 		return (cetypes[CE_DISP_UNKNOWN]);
5125 
5126 	disp = CE_DISP(dispact);
5127 
5128 	switch (disp) {
5129 	case CE_DISP_UNKNOWN:
5130 	case CE_DISP_INTERMITTENT:
5131 		break;
5132 
5133 	case CE_DISP_POSS_PERS:
5134 		/*
5135 		 * "Possible persistent" errors to which we have applied a valid
5136 		 * leaky test can be separated into "persistent" or "leaky".
5137 		 */
5138 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5139 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5140 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5141 			    CE_XDIAG_CE2SEEN(lkyinfo))
5142 				disp = CE_DISP_LEAKY;
5143 			else
5144 				disp = CE_DISP_PERS;
5145 		}
5146 		break;
5147 
5148 	case CE_DISP_POSS_STICKY:
5149 		/*
5150 		 * Promote "possible sticky" results that have been
5151 		 * confirmed by a partner test to "sticky".  Unconfirmed
5152 		 * "possible sticky" events are left at that status - we do not
5153 		 * guess at any bad reader/writer etc status here.
5154 		 */
5155 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5156 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5157 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5158 			disp = CE_DISP_STICKY;
5159 
5160 		/*
5161 		 * Promote "possible sticky" results on a uniprocessor
5162 		 * to "sticky"
5163 		 */
5164 		if (disp == CE_DISP_POSS_STICKY &&
5165 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5166 			disp = CE_DISP_STICKY;
5167 		break;
5168 
5169 	default:
5170 		disp = CE_DISP_UNKNOWN;
5171 		break;
5172 	}
5173 
5174 	return (cetypes[disp]);
5175 }
5176 
5177 /*
5178  * Given the entire afsr, the specific bit to check and a prioritized list of
5179  * error bits, determine the validity of the various overwrite priority
5180  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5181  * different overwrite priorities.
5182  *
5183  * Given a specific afsr error bit and the entire afsr, there are three cases:
5184  *   INVALID:	The specified bit is lower overwrite priority than some other
5185  *		error bit which is on in the afsr (or IVU/IVC).
5186  *   VALID:	The specified bit is higher priority than all other error bits
5187  *		which are on in the afsr.
5188  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5189  *		bit is on in the afsr.
5190  */
5191 int
5192 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5193 {
5194 	uint64_t afsr_ow;
5195 
5196 	while ((afsr_ow = *ow_bits++) != 0) {
5197 		/*
5198 		 * If bit is in the priority class, check to see if another
5199 		 * bit in the same class is on => ambiguous.  Otherwise,
5200 		 * the value is valid.  If the bit is not on at this priority
5201 		 * class, but a higher priority bit is on, then the value is
5202 		 * invalid.
5203 		 */
5204 		if (afsr_ow & afsr_bit) {
5205 			/*
5206 			 * If equal pri bit is on, ambiguous.
5207 			 */
5208 			if (afsr & (afsr_ow & ~afsr_bit))
5209 				return (AFLT_STAT_AMBIGUOUS);
5210 			return (AFLT_STAT_VALID);
5211 		} else if (afsr & afsr_ow)
5212 			break;
5213 	}
5214 
5215 	/*
5216 	 * We didn't find a match or a higher priority bit was on.  Not
5217 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5218 	 */
5219 	return (AFLT_STAT_INVALID);
5220 }
5221 
5222 static int
5223 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5224 {
5225 #if defined(SERRANO)
5226 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5227 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5228 	else
5229 #endif	/* SERRANO */
5230 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5231 }
5232 
5233 static int
5234 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5235 {
5236 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5237 }
5238 
5239 static int
5240 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5241 {
5242 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5243 }
5244 
5245 static int
5246 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5247 {
5248 #ifdef lint
5249 	cpuid = cpuid;
5250 #endif
5251 #if defined(CHEETAH_PLUS)
5252 	/*
5253 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5254 	 * policy for Cheetah+ and separate for Panther CPUs.
5255 	 */
5256 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5257 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5258 			return (afsr_to_msynd_status(afsr, afsr_bit));
5259 		else
5260 			return (afsr_to_esynd_status(afsr, afsr_bit));
5261 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5262 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5263 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5264 		else
5265 			return (afsr_to_esynd_status(afsr, afsr_bit));
5266 #else /* CHEETAH_PLUS */
5267 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5268 		return (afsr_to_msynd_status(afsr, afsr_bit));
5269 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5270 		return (afsr_to_esynd_status(afsr, afsr_bit));
5271 #endif /* CHEETAH_PLUS */
5272 	} else {
5273 		return (AFLT_STAT_INVALID);
5274 	}
5275 }
5276 
5277 /*
5278  * Slave CPU stick synchronization.
5279  */
5280 void
5281 sticksync_slave(void)
5282 {
5283 	int 		i;
5284 	int		tries = 0;
5285 	int64_t		tskew;
5286 	int64_t		av_tskew;
5287 
5288 	kpreempt_disable();
5289 	/* wait for the master side */
5290 	while (stick_sync_cmd != SLAVE_START)
5291 		;
5292 	/*
5293 	 * Synchronization should only take a few tries at most. But in the
5294 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5295 	 * without it's stick synchronized wouldn't be a good citizen.
5296 	 */
5297 	while (slave_done == 0) {
5298 		/*
5299 		 * Time skew calculation.
5300 		 */
5301 		av_tskew = tskew = 0;
5302 
5303 		for (i = 0; i < stick_iter; i++) {
5304 			/* make location hot */
5305 			timestamp[EV_A_START] = 0;
5306 			stick_timestamp(&timestamp[EV_A_START]);
5307 
5308 			/* tell the master we're ready */
5309 			stick_sync_cmd = MASTER_START;
5310 
5311 			/* and wait */
5312 			while (stick_sync_cmd != SLAVE_CONT)
5313 				;
5314 			/* Event B end */
5315 			stick_timestamp(&timestamp[EV_B_END]);
5316 
5317 			/* calculate time skew */
5318 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5319 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5320 			    / 2;
5321 
5322 			/* keep running count */
5323 			av_tskew += tskew;
5324 		} /* for */
5325 
5326 		/*
5327 		 * Adjust stick for time skew if not within the max allowed;
5328 		 * otherwise we're all done.
5329 		 */
5330 		if (stick_iter != 0)
5331 			av_tskew = av_tskew/stick_iter;
5332 		if (ABS(av_tskew) > stick_tsk) {
5333 			/*
5334 			 * If the skew is 1 (the slave's STICK register
5335 			 * is 1 STICK ahead of the master's), stick_adj
5336 			 * could fail to adjust the slave's STICK register
5337 			 * if the STICK read on the slave happens to
5338 			 * align with the increment of the STICK.
5339 			 * Therefore, we increment the skew to 2.
5340 			 */
5341 			if (av_tskew == 1)
5342 				av_tskew++;
5343 			stick_adj(-av_tskew);
5344 		} else
5345 			slave_done = 1;
5346 #ifdef DEBUG
5347 		if (tries < DSYNC_ATTEMPTS)
5348 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5349 			    av_tskew;
5350 		++tries;
5351 #endif /* DEBUG */
5352 #ifdef lint
5353 		tries = tries;
5354 #endif
5355 
5356 	} /* while */
5357 
5358 	/* allow the master to finish */
5359 	stick_sync_cmd = EVENT_NULL;
5360 	kpreempt_enable();
5361 }
5362 
5363 /*
5364  * Master CPU side of stick synchronization.
5365  *  - timestamp end of Event A
5366  *  - timestamp beginning of Event B
5367  */
5368 void
5369 sticksync_master(void)
5370 {
5371 	int		i;
5372 
5373 	kpreempt_disable();
5374 	/* tell the slave we've started */
5375 	slave_done = 0;
5376 	stick_sync_cmd = SLAVE_START;
5377 
5378 	while (slave_done == 0) {
5379 		for (i = 0; i < stick_iter; i++) {
5380 			/* wait for the slave */
5381 			while (stick_sync_cmd != MASTER_START)
5382 				;
5383 			/* Event A end */
5384 			stick_timestamp(&timestamp[EV_A_END]);
5385 
5386 			/* make location hot */
5387 			timestamp[EV_B_START] = 0;
5388 			stick_timestamp(&timestamp[EV_B_START]);
5389 
5390 			/* tell the slave to continue */
5391 			stick_sync_cmd = SLAVE_CONT;
5392 		} /* for */
5393 
5394 		/* wait while slave calculates time skew */
5395 		while (stick_sync_cmd == SLAVE_CONT)
5396 			;
5397 	} /* while */
5398 	kpreempt_enable();
5399 }
5400 
5401 /*
5402  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5403  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5404  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5405  * panic idle.
5406  */
5407 /*ARGSUSED*/
5408 void
5409 cpu_check_allcpus(struct async_flt *aflt)
5410 {}
5411 
5412 struct kmem_cache *ch_private_cache;
5413 
5414 /*
5415  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5416  * deallocate the scrubber data structures and cpu_private data structure.
5417  */
5418 void
5419 cpu_uninit_private(struct cpu *cp)
5420 {
5421 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5422 
5423 	ASSERT(chprp);
5424 	cpu_uninit_ecache_scrub_dr(cp);
5425 	CPU_PRIVATE(cp) = NULL;
5426 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5427 	kmem_cache_free(ch_private_cache, chprp);
5428 	cmp_delete_cpu(cp->cpu_id);
5429 
5430 }
5431 
5432 /*
5433  * Cheetah Cache Scrubbing
5434  *
5435  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5436  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5437  * protected by either parity or ECC.
5438  *
5439  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5440  * cache per second). Due to the the specifics of how the I$ control
5441  * logic works with respect to the ASI used to scrub I$ lines, the entire
5442  * I$ is scanned at once.
5443  */
5444 
5445 /*
5446  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5447  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5448  * on a running system.
5449  */
5450 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5451 
5452 /*
5453  * The following are the PIL levels that the softints/cross traps will fire at.
5454  */
5455 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5456 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5457 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5458 
5459 #if defined(JALAPENO)
5460 
5461 /*
5462  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5463  * on Jalapeno.
5464  */
5465 int ecache_scrub_enable = 0;
5466 
5467 #else	/* JALAPENO */
5468 
5469 /*
5470  * With all other cpu types, E$ scrubbing is on by default
5471  */
5472 int ecache_scrub_enable = 1;
5473 
5474 #endif	/* JALAPENO */
5475 
5476 
5477 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5478 
5479 /*
5480  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5481  * is disabled by default on non-Cheetah systems
5482  */
5483 int icache_scrub_enable = 0;
5484 
5485 /*
5486  * Tuneables specifying the scrub calls per second and the scan rate
5487  * for each cache
5488  *
5489  * The cyclic times are set during boot based on the following values.
5490  * Changing these values in mdb after this time will have no effect.  If
5491  * a different value is desired, it must be set in /etc/system before a
5492  * reboot.
5493  */
5494 int ecache_calls_a_sec = 1;
5495 int dcache_calls_a_sec = 2;
5496 int icache_calls_a_sec = 2;
5497 
5498 int ecache_scan_rate_idle = 1;
5499 int ecache_scan_rate_busy = 1;
5500 int dcache_scan_rate_idle = 1;
5501 int dcache_scan_rate_busy = 1;
5502 int icache_scan_rate_idle = 1;
5503 int icache_scan_rate_busy = 1;
5504 
5505 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5506 
5507 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5508 
5509 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5510 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5511 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5512 
5513 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5514 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5515 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5516 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5517 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5518 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5519 
5520 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5521 
5522 /*
5523  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5524  * increment the outstanding request counter and schedule a softint to run
5525  * the scrubber.
5526  */
5527 extern xcfunc_t cache_scrubreq_tl1;
5528 
5529 /*
5530  * These are the softint functions for each cache scrubber
5531  */
5532 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5533 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5534 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5535 
5536 /*
5537  * The cache scrub info table contains cache specific information
5538  * and allows for some of the scrub code to be table driven, reducing
5539  * duplication of cache similar code.
5540  *
5541  * This table keeps a copy of the value in the calls per second variable
5542  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5543  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5544  * mdb in a misguided attempt to disable the scrubber).
5545  */
5546 struct scrub_info {
5547 	int		*csi_enable;	/* scrubber enable flag */
5548 	int		csi_freq;	/* scrubber calls per second */
5549 	int		csi_index;	/* index to chsm_outstanding[] */
5550 	uint64_t	csi_inum;	/* scrubber interrupt number */
5551 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5552 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5553 	char		csi_name[3];	/* cache name for this scrub entry */
5554 } cache_scrub_info[] = {
5555 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5556 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5557 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5558 };
5559 
5560 /*
5561  * If scrubbing is enabled, increment the outstanding request counter.  If it
5562  * is 1 (meaning there were no previous requests outstanding), call
5563  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5564  * a self trap.
5565  */
5566 static void
5567 do_scrub(struct scrub_info *csi)
5568 {
5569 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5570 	int index = csi->csi_index;
5571 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5572 
5573 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5574 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5575 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5576 			    csi->csi_inum, 0);
5577 		}
5578 	}
5579 }
5580 
5581 /*
5582  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5583  * cross-trap the offline cpus.
5584  */
5585 static void
5586 do_scrub_offline(struct scrub_info *csi)
5587 {
5588 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5589 
5590 	if (CPUSET_ISNULL(cpu_offline_set)) {
5591 		/*
5592 		 * No offline cpus - nothing to do
5593 		 */
5594 		return;
5595 	}
5596 
5597 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5598 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5599 		    csi->csi_index);
5600 	}
5601 }
5602 
5603 /*
5604  * This is the initial setup for the scrubber cyclics - it sets the
5605  * interrupt level, frequency, and function to call.
5606  */
5607 /*ARGSUSED*/
5608 static void
5609 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5610     cyc_time_t *when)
5611 {
5612 	struct scrub_info *csi = (struct scrub_info *)arg;
5613 
5614 	ASSERT(csi != NULL);
5615 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5616 	hdlr->cyh_level = CY_LOW_LEVEL;
5617 	hdlr->cyh_arg = arg;
5618 
5619 	when->cyt_when = 0;	/* Start immediately */
5620 	when->cyt_interval = NANOSEC / csi->csi_freq;
5621 }
5622 
5623 /*
5624  * Initialization for cache scrubbing.
5625  * This routine is called AFTER all cpus have had cpu_init_private called
5626  * to initialize their private data areas.
5627  */
5628 void
5629 cpu_init_cache_scrub(void)
5630 {
5631 	int i;
5632 	struct scrub_info *csi;
5633 	cyc_omni_handler_t omni_hdlr;
5634 	cyc_handler_t offline_hdlr;
5635 	cyc_time_t when;
5636 
5637 	/*
5638 	 * save away the maximum number of lines for the D$
5639 	 */
5640 	dcache_nlines = dcache_size / dcache_linesize;
5641 
5642 	/*
5643 	 * register the softints for the cache scrubbing
5644 	 */
5645 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5646 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5647 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5648 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5649 
5650 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5651 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5652 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5653 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5654 
5655 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5656 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5657 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5658 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5659 
5660 	/*
5661 	 * start the scrubbing for all the caches
5662 	 */
5663 	mutex_enter(&cpu_lock);
5664 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5665 
5666 		csi = &cache_scrub_info[i];
5667 
5668 		if (!(*csi->csi_enable))
5669 			continue;
5670 
5671 		/*
5672 		 * force the following to be true:
5673 		 *	1 <= calls_a_sec <= hz
5674 		 */
5675 		if (csi->csi_freq > hz) {
5676 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5677 			    "(%d); resetting to hz (%d)", csi->csi_name,
5678 			    csi->csi_freq, hz);
5679 			csi->csi_freq = hz;
5680 		} else if (csi->csi_freq < 1) {
5681 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5682 			    "(%d); resetting to 1", csi->csi_name,
5683 			    csi->csi_freq);
5684 			csi->csi_freq = 1;
5685 		}
5686 
5687 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5688 		omni_hdlr.cyo_offline = NULL;
5689 		omni_hdlr.cyo_arg = (void *)csi;
5690 
5691 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5692 		offline_hdlr.cyh_arg = (void *)csi;
5693 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5694 
5695 		when.cyt_when = 0;	/* Start immediately */
5696 		when.cyt_interval = NANOSEC / csi->csi_freq;
5697 
5698 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5699 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5700 	}
5701 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5702 	mutex_exit(&cpu_lock);
5703 }
5704 
5705 /*
5706  * Indicate that the specified cpu is idle.
5707  */
5708 void
5709 cpu_idle_ecache_scrub(struct cpu *cp)
5710 {
5711 	if (CPU_PRIVATE(cp) != NULL) {
5712 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5713 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5714 	}
5715 }
5716 
5717 /*
5718  * Indicate that the specified cpu is busy.
5719  */
5720 void
5721 cpu_busy_ecache_scrub(struct cpu *cp)
5722 {
5723 	if (CPU_PRIVATE(cp) != NULL) {
5724 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5725 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5726 	}
5727 }
5728 
5729 /*
5730  * Initialization for cache scrubbing for the specified cpu.
5731  */
5732 void
5733 cpu_init_ecache_scrub_dr(struct cpu *cp)
5734 {
5735 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5736 	int cpuid = cp->cpu_id;
5737 
5738 	/* initialize the number of lines in the caches */
5739 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5740 	    cpunodes[cpuid].ecache_linesize;
5741 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5742 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5743 
5744 	/*
5745 	 * do_scrub() and do_scrub_offline() check both the global
5746 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5747 	 * check this value before scrubbing.  Currently, we use it to
5748 	 * disable the E$ scrubber on multi-core cpus or while running at
5749 	 * slowed speed.  For now, just turn everything on and allow
5750 	 * cpu_init_private() to change it if necessary.
5751 	 */
5752 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5753 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5754 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5755 
5756 	cpu_busy_ecache_scrub(cp);
5757 }
5758 
5759 /*
5760  * Un-initialization for cache scrubbing for the specified cpu.
5761  */
5762 static void
5763 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5764 {
5765 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5766 
5767 	/*
5768 	 * un-initialize bookkeeping for cache scrubbing
5769 	 */
5770 	bzero(csmp, sizeof (ch_scrub_misc_t));
5771 
5772 	cpu_idle_ecache_scrub(cp);
5773 }
5774 
5775 /*
5776  * Called periodically on each CPU to scrub the D$.
5777  */
5778 static void
5779 scrub_dcache(int how_many)
5780 {
5781 	int i;
5782 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5783 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5784 
5785 	/*
5786 	 * scrub the desired number of lines
5787 	 */
5788 	for (i = 0; i < how_many; i++) {
5789 		/*
5790 		 * scrub a D$ line
5791 		 */
5792 		dcache_inval_line(index);
5793 
5794 		/*
5795 		 * calculate the next D$ line to scrub, assumes
5796 		 * that dcache_nlines is a power of 2
5797 		 */
5798 		index = (index + 1) & (dcache_nlines - 1);
5799 	}
5800 
5801 	/*
5802 	 * set the scrub index for the next visit
5803 	 */
5804 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5805 }
5806 
5807 /*
5808  * Handler for D$ scrub inum softint. Call scrub_dcache until
5809  * we decrement the outstanding request count to zero.
5810  */
5811 /*ARGSUSED*/
5812 static uint_t
5813 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5814 {
5815 	int i;
5816 	int how_many;
5817 	int outstanding;
5818 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5819 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5820 	struct scrub_info *csi = (struct scrub_info *)arg1;
5821 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5822 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5823 
5824 	/*
5825 	 * The scan rates are expressed in units of tenths of a
5826 	 * percent.  A scan rate of 1000 (100%) means the whole
5827 	 * cache is scanned every second.
5828 	 */
5829 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5830 
5831 	do {
5832 		outstanding = *countp;
5833 		for (i = 0; i < outstanding; i++) {
5834 			scrub_dcache(how_many);
5835 		}
5836 	} while (atomic_add_32_nv(countp, -outstanding));
5837 
5838 	return (DDI_INTR_CLAIMED);
5839 }
5840 
5841 /*
5842  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5843  * by invalidating lines. Due to the characteristics of the ASI which
5844  * is used to invalidate an I$ line, the entire I$ must be invalidated
5845  * vs. an individual I$ line.
5846  */
5847 static void
5848 scrub_icache(int how_many)
5849 {
5850 	int i;
5851 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5852 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5853 	int icache_nlines = csmp->chsm_icache_nlines;
5854 
5855 	/*
5856 	 * scrub the desired number of lines
5857 	 */
5858 	for (i = 0; i < how_many; i++) {
5859 		/*
5860 		 * since the entire I$ must be scrubbed at once,
5861 		 * wait until the index wraps to zero to invalidate
5862 		 * the entire I$
5863 		 */
5864 		if (index == 0) {
5865 			icache_inval_all();
5866 		}
5867 
5868 		/*
5869 		 * calculate the next I$ line to scrub, assumes
5870 		 * that chsm_icache_nlines is a power of 2
5871 		 */
5872 		index = (index + 1) & (icache_nlines - 1);
5873 	}
5874 
5875 	/*
5876 	 * set the scrub index for the next visit
5877 	 */
5878 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5879 }
5880 
5881 /*
5882  * Handler for I$ scrub inum softint. Call scrub_icache until
5883  * we decrement the outstanding request count to zero.
5884  */
5885 /*ARGSUSED*/
5886 static uint_t
5887 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5888 {
5889 	int i;
5890 	int how_many;
5891 	int outstanding;
5892 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5893 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5894 	struct scrub_info *csi = (struct scrub_info *)arg1;
5895 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5896 	    icache_scan_rate_idle : icache_scan_rate_busy;
5897 	int icache_nlines = csmp->chsm_icache_nlines;
5898 
5899 	/*
5900 	 * The scan rates are expressed in units of tenths of a
5901 	 * percent.  A scan rate of 1000 (100%) means the whole
5902 	 * cache is scanned every second.
5903 	 */
5904 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5905 
5906 	do {
5907 		outstanding = *countp;
5908 		for (i = 0; i < outstanding; i++) {
5909 			scrub_icache(how_many);
5910 		}
5911 	} while (atomic_add_32_nv(countp, -outstanding));
5912 
5913 	return (DDI_INTR_CLAIMED);
5914 }
5915 
5916 /*
5917  * Called periodically on each CPU to scrub the E$.
5918  */
5919 static void
5920 scrub_ecache(int how_many)
5921 {
5922 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5923 	int i;
5924 	int cpuid = CPU->cpu_id;
5925 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5926 	int nlines = csmp->chsm_ecache_nlines;
5927 	int linesize = cpunodes[cpuid].ecache_linesize;
5928 	int ec_set_size = cpu_ecache_set_size(CPU);
5929 
5930 	/*
5931 	 * scrub the desired number of lines
5932 	 */
5933 	for (i = 0; i < how_many; i++) {
5934 		/*
5935 		 * scrub the E$ line
5936 		 */
5937 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5938 		    ec_set_size);
5939 
5940 		/*
5941 		 * calculate the next E$ line to scrub based on twice
5942 		 * the number of E$ lines (to displace lines containing
5943 		 * flush area data), assumes that the number of lines
5944 		 * is a power of 2
5945 		 */
5946 		index = (index + 1) & ((nlines << 1) - 1);
5947 	}
5948 
5949 	/*
5950 	 * set the ecache scrub index for the next visit
5951 	 */
5952 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5953 }
5954 
5955 /*
5956  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5957  * we decrement the outstanding request count to zero.
5958  *
5959  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5960  * become negative after the atomic_add_32_nv().  This is not a problem, as
5961  * the next trip around the loop won't scrub anything, and the next add will
5962  * reset the count back to zero.
5963  */
5964 /*ARGSUSED*/
5965 static uint_t
5966 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5967 {
5968 	int i;
5969 	int how_many;
5970 	int outstanding;
5971 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5972 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5973 	struct scrub_info *csi = (struct scrub_info *)arg1;
5974 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5975 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
5976 	int ecache_nlines = csmp->chsm_ecache_nlines;
5977 
5978 	/*
5979 	 * The scan rates are expressed in units of tenths of a
5980 	 * percent.  A scan rate of 1000 (100%) means the whole
5981 	 * cache is scanned every second.
5982 	 */
5983 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5984 
5985 	do {
5986 		outstanding = *countp;
5987 		for (i = 0; i < outstanding; i++) {
5988 			scrub_ecache(how_many);
5989 		}
5990 	} while (atomic_add_32_nv(countp, -outstanding));
5991 
5992 	return (DDI_INTR_CLAIMED);
5993 }
5994 
5995 /*
5996  * Timeout function to reenable CE
5997  */
5998 static void
5999 cpu_delayed_check_ce_errors(void *arg)
6000 {
6001 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6002 	    TQ_NOSLEEP)) {
6003 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6004 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6005 	}
6006 }
6007 
6008 /*
6009  * CE Deferred Re-enable after trap.
6010  *
6011  * When the CPU gets a disrupting trap for any of the errors
6012  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6013  * immediately. To eliminate the possibility of multiple CEs causing
6014  * recursive stack overflow in the trap handler, we cannot
6015  * reenable CEEN while still running in the trap handler. Instead,
6016  * after a CE is logged on a CPU, we schedule a timeout function,
6017  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6018  * seconds. This function will check whether any further CEs
6019  * have occurred on that CPU, and if none have, will reenable CEEN.
6020  *
6021  * If further CEs have occurred while CEEN is disabled, another
6022  * timeout will be scheduled. This is to ensure that the CPU can
6023  * make progress in the face of CE 'storms', and that it does not
6024  * spend all its time logging CE errors.
6025  */
6026 static void
6027 cpu_check_ce_errors(void *arg)
6028 {
6029 	int	cpuid = (int)(uintptr_t)arg;
6030 	cpu_t	*cp;
6031 
6032 	/*
6033 	 * We acquire cpu_lock.
6034 	 */
6035 	ASSERT(curthread->t_pil == 0);
6036 
6037 	/*
6038 	 * verify that the cpu is still around, DR
6039 	 * could have got there first ...
6040 	 */
6041 	mutex_enter(&cpu_lock);
6042 	cp = cpu_get(cpuid);
6043 	if (cp == NULL) {
6044 		mutex_exit(&cpu_lock);
6045 		return;
6046 	}
6047 	/*
6048 	 * make sure we don't migrate across CPUs
6049 	 * while checking our CE status.
6050 	 */
6051 	kpreempt_disable();
6052 
6053 	/*
6054 	 * If we are running on the CPU that got the
6055 	 * CE, we can do the checks directly.
6056 	 */
6057 	if (cp->cpu_id == CPU->cpu_id) {
6058 		mutex_exit(&cpu_lock);
6059 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6060 		kpreempt_enable();
6061 		return;
6062 	}
6063 	kpreempt_enable();
6064 
6065 	/*
6066 	 * send an x-call to get the CPU that originally
6067 	 * got the CE to do the necessary checks. If we can't
6068 	 * send the x-call, reschedule the timeout, otherwise we
6069 	 * lose CEEN forever on that CPU.
6070 	 */
6071 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6072 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6073 		    TIMEOUT_CEEN_CHECK, 0);
6074 		mutex_exit(&cpu_lock);
6075 	} else {
6076 		/*
6077 		 * When the CPU is not accepting xcalls, or
6078 		 * the processor is offlined, we don't want to
6079 		 * incur the extra overhead of trying to schedule the
6080 		 * CE timeout indefinitely. However, we don't want to lose
6081 		 * CE checking forever.
6082 		 *
6083 		 * Keep rescheduling the timeout, accepting the additional
6084 		 * overhead as the cost of correctness in the case where we get
6085 		 * a CE, disable CEEN, offline the CPU during the
6086 		 * the timeout interval, and then online it at some
6087 		 * point in the future. This is unlikely given the short
6088 		 * cpu_ceen_delay_secs.
6089 		 */
6090 		mutex_exit(&cpu_lock);
6091 		(void) timeout(cpu_delayed_check_ce_errors,
6092 		    (void *)(uintptr_t)cp->cpu_id,
6093 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6094 	}
6095 }
6096 
6097 /*
6098  * This routine will check whether CEs have occurred while
6099  * CEEN is disabled. Any CEs detected will be logged and, if
6100  * possible, scrubbed.
6101  *
6102  * The memscrubber will also use this routine to clear any errors
6103  * caused by its scrubbing with CEEN disabled.
6104  *
6105  * flag == SCRUBBER_CEEN_CHECK
6106  *		called from memscrubber, just check/scrub, no reset
6107  *		paddr 	physical addr. for start of scrub pages
6108  *		vaddr 	virtual addr. for scrub area
6109  *		psz	page size of area to be scrubbed
6110  *
6111  * flag == TIMEOUT_CEEN_CHECK
6112  *		timeout function has triggered, reset timeout or CEEN
6113  *
6114  * Note: We must not migrate cpus during this function.  This can be
6115  * achieved by one of:
6116  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6117  *	The flag value must be first xcall argument.
6118  *    - disabling kernel preemption.  This should be done for very short
6119  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6120  *	scrub an extended area with cpu_check_block.  The call for
6121  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6122  *	brief for this case.
6123  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6124  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6125  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6126  */
6127 void
6128 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6129 {
6130 	ch_cpu_errors_t	cpu_error_regs;
6131 	uint64_t	ec_err_enable;
6132 	uint64_t	page_offset;
6133 
6134 	/* Read AFSR */
6135 	get_cpu_error_state(&cpu_error_regs);
6136 
6137 	/*
6138 	 * If no CEEN errors have occurred during the timeout
6139 	 * interval, it is safe to re-enable CEEN and exit.
6140 	 */
6141 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6142 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6143 		if (flag == TIMEOUT_CEEN_CHECK &&
6144 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6145 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6146 		return;
6147 	}
6148 
6149 	/*
6150 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6151 	 * we log/clear the error.
6152 	 */
6153 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6154 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6155 
6156 	/*
6157 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6158 	 * timeout will be rescheduled when the error is logged.
6159 	 */
6160 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6161 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6162 		cpu_ce_detected(&cpu_error_regs,
6163 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6164 	else
6165 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6166 
6167 	/*
6168 	 * If the memory scrubber runs while CEEN is
6169 	 * disabled, (or if CEEN is disabled during the
6170 	 * scrub as a result of a CE being triggered by
6171 	 * it), the range being scrubbed will not be
6172 	 * completely cleaned. If there are multiple CEs
6173 	 * in the range at most two of these will be dealt
6174 	 * with, (one by the trap handler and one by the
6175 	 * timeout). It is also possible that none are dealt
6176 	 * with, (CEEN disabled and another CE occurs before
6177 	 * the timeout triggers). So to ensure that the
6178 	 * memory is actually scrubbed, we have to access each
6179 	 * memory location in the range and then check whether
6180 	 * that access causes a CE.
6181 	 */
6182 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6183 		if ((cpu_error_regs.afar >= pa) &&
6184 		    (cpu_error_regs.afar < (pa + psz))) {
6185 			/*
6186 			 * Force a load from physical memory for each
6187 			 * 64-byte block, then check AFSR to determine
6188 			 * whether this access caused an error.
6189 			 *
6190 			 * This is a slow way to do a scrub, but as it will
6191 			 * only be invoked when the memory scrubber actually
6192 			 * triggered a CE, it should not happen too
6193 			 * frequently.
6194 			 *
6195 			 * cut down what we need to check as the scrubber
6196 			 * has verified up to AFAR, so get it's offset
6197 			 * into the page and start there.
6198 			 */
6199 			page_offset = (uint64_t)(cpu_error_regs.afar &
6200 			    (psz - 1));
6201 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6202 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6203 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6204 			    psz);
6205 		}
6206 	}
6207 
6208 	/*
6209 	 * Reset error enable if this CE is not masked.
6210 	 */
6211 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6212 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6213 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6214 
6215 }
6216 
6217 /*
6218  * Attempt a cpu logout for an error that we did not trap for, such
6219  * as a CE noticed with CEEN off.  It is assumed that we are still running
6220  * on the cpu that took the error and that we cannot migrate.  Returns
6221  * 0 on success, otherwise nonzero.
6222  */
6223 static int
6224 cpu_ce_delayed_ec_logout(uint64_t afar)
6225 {
6226 	ch_cpu_logout_t *clop;
6227 
6228 	if (CPU_PRIVATE(CPU) == NULL)
6229 		return (0);
6230 
6231 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6232 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6233 	    LOGOUT_INVALID)
6234 		return (0);
6235 
6236 	cpu_delayed_logout(afar, clop);
6237 	return (1);
6238 }
6239 
6240 /*
6241  * We got an error while CEEN was disabled. We
6242  * need to clean up after it and log whatever
6243  * information we have on the CE.
6244  */
6245 void
6246 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6247 {
6248 	ch_async_flt_t 	ch_flt;
6249 	struct async_flt *aflt;
6250 	char 		pr_reason[MAX_REASON_STRING];
6251 
6252 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6253 	ch_flt.flt_trapped_ce = flag;
6254 	aflt = (struct async_flt *)&ch_flt;
6255 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6256 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6257 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6258 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6259 	aflt->flt_addr = cpu_error_regs->afar;
6260 #if defined(SERRANO)
6261 	ch_flt.afar2 = cpu_error_regs->afar2;
6262 #endif	/* SERRANO */
6263 	aflt->flt_pc = NULL;
6264 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6265 	aflt->flt_tl = 0;
6266 	aflt->flt_panic = 0;
6267 	cpu_log_and_clear_ce(&ch_flt);
6268 
6269 	/*
6270 	 * check if we caused any errors during cleanup
6271 	 */
6272 	if (clear_errors(&ch_flt)) {
6273 		pr_reason[0] = '\0';
6274 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6275 		    NULL);
6276 	}
6277 }
6278 
6279 /*
6280  * Log/clear CEEN-controlled disrupting errors
6281  */
6282 static void
6283 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6284 {
6285 	struct async_flt *aflt;
6286 	uint64_t afsr, afsr_errs;
6287 	ch_cpu_logout_t *clop;
6288 	char 		pr_reason[MAX_REASON_STRING];
6289 	on_trap_data_t	*otp = curthread->t_ontrap;
6290 
6291 	aflt = (struct async_flt *)ch_flt;
6292 	afsr = aflt->flt_stat;
6293 	afsr_errs = ch_flt->afsr_errs;
6294 	aflt->flt_id = gethrtime_waitfree();
6295 	aflt->flt_bus_id = getprocessorid();
6296 	aflt->flt_inst = CPU->cpu_id;
6297 	aflt->flt_prot = AFLT_PROT_NONE;
6298 	aflt->flt_class = CPU_FAULT;
6299 	aflt->flt_status = ECC_C_TRAP;
6300 
6301 	pr_reason[0] = '\0';
6302 	/*
6303 	 * Get the CPU log out info for Disrupting Trap.
6304 	 */
6305 	if (CPU_PRIVATE(CPU) == NULL) {
6306 		clop = NULL;
6307 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6308 	} else {
6309 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6310 	}
6311 
6312 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6313 		ch_cpu_errors_t cpu_error_regs;
6314 
6315 		get_cpu_error_state(&cpu_error_regs);
6316 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6317 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6318 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6319 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6320 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6321 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6322 		clop->clo_sdw_data.chd_afsr_ext =
6323 		    cpu_error_regs.shadow_afsr_ext;
6324 #if defined(SERRANO)
6325 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6326 #endif	/* SERRANO */
6327 		ch_flt->flt_data_incomplete = 1;
6328 
6329 		/*
6330 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6331 		 * The trap handler does it for CEEN enabled errors
6332 		 * so we need to do it here.
6333 		 */
6334 		set_cpu_error_state(&cpu_error_regs);
6335 	}
6336 
6337 #if defined(JALAPENO) || defined(SERRANO)
6338 	/*
6339 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6340 	 * For Serrano, even thou we do have the AFAR, we still do the
6341 	 * scrub on the RCE side since that's where the error type can
6342 	 * be properly classified as intermittent, persistent, etc.
6343 	 *
6344 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6345 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6346 	 * the flt_status bits.
6347 	 */
6348 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6349 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6350 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6351 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6352 	}
6353 #else /* JALAPENO || SERRANO */
6354 	/*
6355 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6356 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6357 	 * the flt_status bits.
6358 	 */
6359 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6360 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6361 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6362 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6363 		}
6364 	}
6365 
6366 #endif /* JALAPENO || SERRANO */
6367 
6368 	/*
6369 	 * Update flt_prot if this error occurred under on_trap protection.
6370 	 */
6371 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6372 		aflt->flt_prot = AFLT_PROT_EC;
6373 
6374 	/*
6375 	 * Queue events on the async event queue, one event per error bit.
6376 	 */
6377 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6378 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6379 		ch_flt->flt_type = CPU_INV_AFSR;
6380 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6381 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6382 		    aflt->flt_panic);
6383 	}
6384 
6385 	/*
6386 	 * Zero out + invalidate CPU logout.
6387 	 */
6388 	if (clop) {
6389 		bzero(clop, sizeof (ch_cpu_logout_t));
6390 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6391 	}
6392 
6393 	/*
6394 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6395 	 * was disabled, we need to flush either the entire
6396 	 * E$ or an E$ line.
6397 	 */
6398 #if defined(JALAPENO) || defined(SERRANO)
6399 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6400 #else	/* JALAPENO || SERRANO */
6401 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6402 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6403 #endif	/* JALAPENO || SERRANO */
6404 		cpu_error_ecache_flush(ch_flt);
6405 
6406 }
6407 
6408 /*
6409  * depending on the error type, we determine whether we
6410  * need to flush the entire ecache or just a line.
6411  */
6412 static int
6413 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6414 {
6415 	struct async_flt *aflt;
6416 	uint64_t	afsr;
6417 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6418 
6419 	aflt = (struct async_flt *)ch_flt;
6420 	afsr = aflt->flt_stat;
6421 
6422 	/*
6423 	 * If we got multiple errors, no point in trying
6424 	 * the individual cases, just flush the whole cache
6425 	 */
6426 	if (afsr & C_AFSR_ME) {
6427 		return (ECACHE_FLUSH_ALL);
6428 	}
6429 
6430 	/*
6431 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6432 	 * was disabled, we need to flush entire E$. We can't just
6433 	 * flush the cache line affected as the ME bit
6434 	 * is not set when multiple correctable errors of the same
6435 	 * type occur, so we might have multiple CPC or EDC errors,
6436 	 * with only the first recorded.
6437 	 */
6438 #if defined(JALAPENO) || defined(SERRANO)
6439 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6440 #else	/* JALAPENO || SERRANO */
6441 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6442 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6443 #endif	/* JALAPENO || SERRANO */
6444 		return (ECACHE_FLUSH_ALL);
6445 	}
6446 
6447 #if defined(JALAPENO) || defined(SERRANO)
6448 	/*
6449 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6450 	 * flush the entire Ecache.
6451 	 */
6452 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6453 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6454 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6455 			return (ECACHE_FLUSH_LINE);
6456 		} else {
6457 			return (ECACHE_FLUSH_ALL);
6458 		}
6459 	}
6460 #else /* JALAPENO || SERRANO */
6461 	/*
6462 	 * If UE only is set, flush the Ecache line, otherwise
6463 	 * flush the entire Ecache.
6464 	 */
6465 	if (afsr_errs & C_AFSR_UE) {
6466 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6467 		    C_AFSR_UE) {
6468 			return (ECACHE_FLUSH_LINE);
6469 		} else {
6470 			return (ECACHE_FLUSH_ALL);
6471 		}
6472 	}
6473 #endif /* JALAPENO || SERRANO */
6474 
6475 	/*
6476 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6477 	 * flush the entire Ecache.
6478 	 */
6479 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6480 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6481 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6482 			return (ECACHE_FLUSH_LINE);
6483 		} else {
6484 			return (ECACHE_FLUSH_ALL);
6485 		}
6486 	}
6487 
6488 	/*
6489 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6490 	 * flush the entire Ecache.
6491 	 */
6492 	if (afsr_errs & C_AFSR_BERR) {
6493 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6494 			return (ECACHE_FLUSH_LINE);
6495 		} else {
6496 			return (ECACHE_FLUSH_ALL);
6497 		}
6498 	}
6499 
6500 	return (0);
6501 }
6502 
6503 void
6504 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6505 {
6506 	int	ecache_flush_flag =
6507 	    cpu_error_ecache_flush_required(ch_flt);
6508 
6509 	/*
6510 	 * Flush Ecache line or entire Ecache based on above checks.
6511 	 */
6512 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6513 		cpu_flush_ecache();
6514 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6515 		cpu_flush_ecache_line(ch_flt);
6516 	}
6517 
6518 }
6519 
6520 /*
6521  * Extract the PA portion from the E$ tag.
6522  */
6523 uint64_t
6524 cpu_ectag_to_pa(int setsize, uint64_t tag)
6525 {
6526 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6527 		return (JG_ECTAG_TO_PA(setsize, tag));
6528 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6529 		return (PN_L3TAG_TO_PA(tag));
6530 	else
6531 		return (CH_ECTAG_TO_PA(setsize, tag));
6532 }
6533 
6534 /*
6535  * Convert the E$ tag PA into an E$ subblock index.
6536  */
6537 static int
6538 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6539 {
6540 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6541 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6542 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6543 		/* Panther has only one subblock per line */
6544 		return (0);
6545 	else
6546 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6547 }
6548 
6549 /*
6550  * All subblocks in an E$ line must be invalid for
6551  * the line to be invalid.
6552  */
6553 int
6554 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6555 {
6556 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6557 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6558 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6559 		return (PN_L3_LINE_INVALID(tag));
6560 	else
6561 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6562 }
6563 
6564 /*
6565  * Extract state bits for a subblock given the tag.  Note that for Panther
6566  * this works on both l2 and l3 tags.
6567  */
6568 static int
6569 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6570 {
6571 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6572 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6573 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6574 		return (tag & CH_ECSTATE_MASK);
6575 	else
6576 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6577 }
6578 
6579 /*
6580  * Cpu specific initialization.
6581  */
6582 void
6583 cpu_mp_init(void)
6584 {
6585 #ifdef	CHEETAHPLUS_ERRATUM_25
6586 	if (cheetah_sendmondo_recover) {
6587 		cheetah_nudge_init();
6588 	}
6589 #endif
6590 }
6591 
6592 void
6593 cpu_ereport_post(struct async_flt *aflt)
6594 {
6595 	char *cpu_type, buf[FM_MAX_CLASS];
6596 	nv_alloc_t *nva = NULL;
6597 	nvlist_t *ereport, *detector, *resource;
6598 	errorq_elem_t *eqep;
6599 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6600 	char unum[UNUM_NAMLEN];
6601 	int synd_code;
6602 	uint8_t msg_type;
6603 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6604 
6605 	if (aflt->flt_panic || panicstr) {
6606 		eqep = errorq_reserve(ereport_errorq);
6607 		if (eqep == NULL)
6608 			return;
6609 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6610 		nva = errorq_elem_nva(ereport_errorq, eqep);
6611 	} else {
6612 		ereport = fm_nvlist_create(nva);
6613 	}
6614 
6615 	/*
6616 	 * Create the scheme "cpu" FMRI.
6617 	 */
6618 	detector = fm_nvlist_create(nva);
6619 	resource = fm_nvlist_create(nva);
6620 	switch (cpunodes[aflt->flt_inst].implementation) {
6621 	case CHEETAH_IMPL:
6622 		cpu_type = FM_EREPORT_CPU_USIII;
6623 		break;
6624 	case CHEETAH_PLUS_IMPL:
6625 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6626 		break;
6627 	case JALAPENO_IMPL:
6628 		cpu_type = FM_EREPORT_CPU_USIIIi;
6629 		break;
6630 	case SERRANO_IMPL:
6631 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6632 		break;
6633 	case JAGUAR_IMPL:
6634 		cpu_type = FM_EREPORT_CPU_USIV;
6635 		break;
6636 	case PANTHER_IMPL:
6637 		cpu_type = FM_EREPORT_CPU_USIVplus;
6638 		break;
6639 	default:
6640 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6641 		break;
6642 	}
6643 
6644 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6645 
6646 	/*
6647 	 * Encode all the common data into the ereport.
6648 	 */
6649 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6650 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6651 
6652 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6653 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6654 	    detector, NULL);
6655 
6656 	/*
6657 	 * Encode the error specific data that was saved in
6658 	 * the async_flt structure into the ereport.
6659 	 */
6660 	cpu_payload_add_aflt(aflt, ereport, resource,
6661 	    &plat_ecc_ch_flt.ecaf_afar_status,
6662 	    &plat_ecc_ch_flt.ecaf_synd_status);
6663 
6664 	if (aflt->flt_panic || panicstr) {
6665 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6666 	} else {
6667 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6668 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6669 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6670 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6671 	}
6672 	/*
6673 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6674 	 * to the SC olny if it can process it.
6675 	 */
6676 
6677 	if (&plat_ecc_capability_sc_get &&
6678 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6679 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6680 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6681 			/*
6682 			 * If afar status is not invalid do a unum lookup.
6683 			 */
6684 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6685 			    AFLT_STAT_INVALID) {
6686 				synd_code = synd_to_synd_code(
6687 				    plat_ecc_ch_flt.ecaf_synd_status,
6688 				    aflt->flt_synd, ch_flt->flt_bit);
6689 				(void) cpu_get_mem_unum_synd(synd_code,
6690 				    aflt, unum);
6691 			} else {
6692 				unum[0] = '\0';
6693 			}
6694 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6695 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6696 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6697 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6698 			    ch_flt->flt_sdw_afsr_ext;
6699 
6700 			if (&plat_log_fruid_error2)
6701 				plat_log_fruid_error2(msg_type, unum, aflt,
6702 				    &plat_ecc_ch_flt);
6703 		}
6704 	}
6705 }
6706 
6707 void
6708 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6709 {
6710 	int status;
6711 	ddi_fm_error_t de;
6712 
6713 	bzero(&de, sizeof (ddi_fm_error_t));
6714 
6715 	de.fme_version = DDI_FME_VERSION;
6716 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6717 	    FM_ENA_FMT1);
6718 	de.fme_flag = expected;
6719 	de.fme_bus_specific = (void *)aflt->flt_addr;
6720 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6721 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6722 		aflt->flt_panic = 1;
6723 }
6724 
6725 void
6726 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6727     errorq_t *eqp, uint_t flag)
6728 {
6729 	struct async_flt *aflt = (struct async_flt *)payload;
6730 
6731 	aflt->flt_erpt_class = error_class;
6732 	errorq_dispatch(eqp, payload, payload_sz, flag);
6733 }
6734 
6735 /*
6736  * This routine may be called by the IO module, but does not do
6737  * anything in this cpu module. The SERD algorithm is handled by
6738  * cpumem-diagnosis engine instead.
6739  */
6740 /*ARGSUSED*/
6741 void
6742 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6743 {}
6744 
6745 void
6746 adjust_hw_copy_limits(int ecache_size)
6747 {
6748 	/*
6749 	 * Set hw copy limits.
6750 	 *
6751 	 * /etc/system will be parsed later and can override one or more
6752 	 * of these settings.
6753 	 *
6754 	 * At this time, ecache size seems only mildly relevant.
6755 	 * We seem to run into issues with the d-cache and stalls
6756 	 * we see on misses.
6757 	 *
6758 	 * Cycle measurement indicates that 2 byte aligned copies fare
6759 	 * little better than doing things with VIS at around 512 bytes.
6760 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6761 	 * aligned is faster whenever the source and destination data
6762 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6763 	 * limit seems to be driven by the 2K write cache.
6764 	 * When more than 2K of copies are done in non-VIS mode, stores
6765 	 * backup in the write cache.  In VIS mode, the write cache is
6766 	 * bypassed, allowing faster cache-line writes aligned on cache
6767 	 * boundaries.
6768 	 *
6769 	 * In addition, in non-VIS mode, there is no prefetching, so
6770 	 * for larger copies, the advantage of prefetching to avoid even
6771 	 * occasional cache misses is enough to justify using the VIS code.
6772 	 *
6773 	 * During testing, it was discovered that netbench ran 3% slower
6774 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6775 	 * applications, data is only used once (copied to the output
6776 	 * buffer, then copied by the network device off the system).  Using
6777 	 * the VIS copy saves more L2 cache state.  Network copies are
6778 	 * around 1.3K to 1.5K in size for historical reasons.
6779 	 *
6780 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6781 	 * aligned copy even for large caches and 8 MB ecache.  The
6782 	 * infrastructure to allow different limits for different sized
6783 	 * caches is kept to allow further tuning in later releases.
6784 	 */
6785 
6786 	if (min_ecache_size == 0 && use_hw_bcopy) {
6787 		/*
6788 		 * First time through - should be before /etc/system
6789 		 * is read.
6790 		 * Could skip the checks for zero but this lets us
6791 		 * preserve any debugger rewrites.
6792 		 */
6793 		if (hw_copy_limit_1 == 0) {
6794 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6795 			priv_hcl_1 = hw_copy_limit_1;
6796 		}
6797 		if (hw_copy_limit_2 == 0) {
6798 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6799 			priv_hcl_2 = hw_copy_limit_2;
6800 		}
6801 		if (hw_copy_limit_4 == 0) {
6802 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6803 			priv_hcl_4 = hw_copy_limit_4;
6804 		}
6805 		if (hw_copy_limit_8 == 0) {
6806 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6807 			priv_hcl_8 = hw_copy_limit_8;
6808 		}
6809 		min_ecache_size = ecache_size;
6810 	} else {
6811 		/*
6812 		 * MP initialization. Called *after* /etc/system has
6813 		 * been parsed. One CPU has already been initialized.
6814 		 * Need to cater for /etc/system having scragged one
6815 		 * of our values.
6816 		 */
6817 		if (ecache_size == min_ecache_size) {
6818 			/*
6819 			 * Same size ecache. We do nothing unless we
6820 			 * have a pessimistic ecache setting. In that
6821 			 * case we become more optimistic (if the cache is
6822 			 * large enough).
6823 			 */
6824 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6825 				/*
6826 				 * Need to adjust hw_copy_limit* from our
6827 				 * pessimistic uniprocessor value to a more
6828 				 * optimistic UP value *iff* it hasn't been
6829 				 * reset.
6830 				 */
6831 				if ((ecache_size > 1048576) &&
6832 				    (priv_hcl_8 == hw_copy_limit_8)) {
6833 					if (ecache_size <= 2097152)
6834 						hw_copy_limit_8 = 4 *
6835 						    VIS_COPY_THRESHOLD;
6836 					else if (ecache_size <= 4194304)
6837 						hw_copy_limit_8 = 4 *
6838 						    VIS_COPY_THRESHOLD;
6839 					else
6840 						hw_copy_limit_8 = 4 *
6841 						    VIS_COPY_THRESHOLD;
6842 					priv_hcl_8 = hw_copy_limit_8;
6843 				}
6844 			}
6845 		} else if (ecache_size < min_ecache_size) {
6846 			/*
6847 			 * A different ecache size. Can this even happen?
6848 			 */
6849 			if (priv_hcl_8 == hw_copy_limit_8) {
6850 				/*
6851 				 * The previous value that we set
6852 				 * is unchanged (i.e., it hasn't been
6853 				 * scragged by /etc/system). Rewrite it.
6854 				 */
6855 				if (ecache_size <= 1048576)
6856 					hw_copy_limit_8 = 8 *
6857 					    VIS_COPY_THRESHOLD;
6858 				else if (ecache_size <= 2097152)
6859 					hw_copy_limit_8 = 8 *
6860 					    VIS_COPY_THRESHOLD;
6861 				else if (ecache_size <= 4194304)
6862 					hw_copy_limit_8 = 8 *
6863 					    VIS_COPY_THRESHOLD;
6864 				else
6865 					hw_copy_limit_8 = 10 *
6866 					    VIS_COPY_THRESHOLD;
6867 				priv_hcl_8 = hw_copy_limit_8;
6868 				min_ecache_size = ecache_size;
6869 			}
6870 		}
6871 	}
6872 }
6873 
6874 /*
6875  * Called from illegal instruction trap handler to see if we can attribute
6876  * the trap to a fpras check.
6877  */
6878 int
6879 fpras_chktrap(struct regs *rp)
6880 {
6881 	int op;
6882 	struct fpras_chkfngrp *cgp;
6883 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6884 
6885 	if (fpras_chkfngrps == NULL)
6886 		return (0);
6887 
6888 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6889 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6890 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6891 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6892 			break;
6893 	}
6894 	if (op == FPRAS_NCOPYOPS)
6895 		return (0);
6896 
6897 	/*
6898 	 * This is an fpRAS failure caught through an illegal
6899 	 * instruction - trampoline.
6900 	 */
6901 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6902 	rp->r_npc = rp->r_pc + 4;
6903 	return (1);
6904 }
6905 
6906 /*
6907  * fpras_failure is called when a fpras check detects a bad calculation
6908  * result or an illegal instruction trap is attributed to an fpras
6909  * check.  In all cases we are still bound to CPU.
6910  */
6911 int
6912 fpras_failure(int op, int how)
6913 {
6914 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6915 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6916 	ch_async_flt_t ch_flt;
6917 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6918 	struct fpras_chkfn *sfp, *cfp;
6919 	uint32_t *sip, *cip;
6920 	int i;
6921 
6922 	/*
6923 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6924 	 * the time in which we dispatch an ereport and (if applicable) panic.
6925 	 */
6926 	use_hw_bcopy_orig = use_hw_bcopy;
6927 	use_hw_bzero_orig = use_hw_bzero;
6928 	hcl1_orig = hw_copy_limit_1;
6929 	hcl2_orig = hw_copy_limit_2;
6930 	hcl4_orig = hw_copy_limit_4;
6931 	hcl8_orig = hw_copy_limit_8;
6932 	use_hw_bcopy = use_hw_bzero = 0;
6933 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6934 	    hw_copy_limit_8 = 0;
6935 
6936 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6937 	aflt->flt_id = gethrtime_waitfree();
6938 	aflt->flt_class = CPU_FAULT;
6939 	aflt->flt_inst = CPU->cpu_id;
6940 	aflt->flt_status = (how << 8) | op;
6941 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6942 	ch_flt.flt_type = CPU_FPUERR;
6943 
6944 	/*
6945 	 * We must panic if the copy operation had no lofault protection -
6946 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6947 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6948 	 */
6949 	aflt->flt_panic = (curthread->t_lofault == NULL);
6950 
6951 	/*
6952 	 * XOR the source instruction block with the copied instruction
6953 	 * block - this will show us which bit(s) are corrupted.
6954 	 */
6955 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6956 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6957 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6958 		sip = &sfp->fpras_blk0[0];
6959 		cip = &cfp->fpras_blk0[0];
6960 	} else {
6961 		sip = &sfp->fpras_blk1[0];
6962 		cip = &cfp->fpras_blk1[0];
6963 	}
6964 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6965 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6966 
6967 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6968 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6969 
6970 	if (aflt->flt_panic)
6971 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6972 
6973 	/*
6974 	 * We get here for copyin/copyout and kcopy or bcopy where the
6975 	 * caller has used on_fault.  We will flag the error so that
6976 	 * the process may be killed  The trap_async_hwerr mechanism will
6977 	 * take appropriate further action (such as a reboot, contract
6978 	 * notification etc).  Since we may be continuing we will
6979 	 * restore the global hardware copy acceleration switches.
6980 	 *
6981 	 * When we return from this function to the copy function we want to
6982 	 * avoid potentially bad data being used, ie we want the affected
6983 	 * copy function to return an error.  The caller should therefore
6984 	 * invoke its lofault handler (which always exists for these functions)
6985 	 * which will return the appropriate error.
6986 	 */
6987 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6988 	aston(curthread);
6989 
6990 	use_hw_bcopy = use_hw_bcopy_orig;
6991 	use_hw_bzero = use_hw_bzero_orig;
6992 	hw_copy_limit_1 = hcl1_orig;
6993 	hw_copy_limit_2 = hcl2_orig;
6994 	hw_copy_limit_4 = hcl4_orig;
6995 	hw_copy_limit_8 = hcl8_orig;
6996 
6997 	return (1);
6998 }
6999 
7000 #define	VIS_BLOCKSIZE		64
7001 
7002 int
7003 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7004 {
7005 	int ret, watched;
7006 
7007 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7008 	ret = dtrace_blksuword32(addr, data, 0);
7009 	if (watched)
7010 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7011 
7012 	return (ret);
7013 }
7014 
7015 /*
7016  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7017  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7018  * CEEN from the EER to disable traps for further disrupting error types
7019  * on that cpu.  We could cross-call instead, but that has a larger
7020  * instruction and data footprint than cross-trapping, and the cpu is known
7021  * to be faulted.
7022  */
7023 
7024 void
7025 cpu_faulted_enter(struct cpu *cp)
7026 {
7027 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7028 }
7029 
7030 /*
7031  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7032  * offline, spare, or online (by the cpu requesting this state change).
7033  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7034  * disrupting error bits that have accumulated without trapping, then
7035  * we cross-trap to re-enable CEEN controlled traps.
7036  */
7037 void
7038 cpu_faulted_exit(struct cpu *cp)
7039 {
7040 	ch_cpu_errors_t cpu_error_regs;
7041 
7042 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7043 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7044 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7045 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7046 	    (uint64_t)&cpu_error_regs, 0);
7047 
7048 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7049 }
7050 
7051 /*
7052  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7053  * the errors in the original AFSR, 0 otherwise.
7054  *
7055  * For all procs if the initial error was a BERR or TO, then it is possible
7056  * that we may have caused a secondary BERR or TO in the process of logging the
7057  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7058  * if the request was protected then a panic is still not necessary, if not
7059  * protected then aft_panic is already set - so either way there's no need
7060  * to set aft_panic for the secondary error.
7061  *
7062  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7063  * a store merge, then the error handling code will call cpu_deferred_error().
7064  * When clear_errors() is called, it will determine that secondary errors have
7065  * occurred - in particular, the store merge also caused a EDU and WDU that
7066  * weren't discovered until this point.
7067  *
7068  * We do three checks to verify that we are in this case.  If we pass all three
7069  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7070  * errors occur, we return 0.
7071  *
7072  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7073  * handled in cpu_disrupting_errors().  Since this function is not even called
7074  * in the case we are interested in, we just return 0 for these processors.
7075  */
7076 /*ARGSUSED*/
7077 static int
7078 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7079     uint64_t t_afar)
7080 {
7081 #if defined(CHEETAH_PLUS)
7082 #else	/* CHEETAH_PLUS */
7083 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7084 #endif	/* CHEETAH_PLUS */
7085 
7086 	/*
7087 	 * Was the original error a BERR or TO and only a BERR or TO
7088 	 * (multiple errors are also OK)
7089 	 */
7090 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7091 		/*
7092 		 * Is the new error a BERR or TO and only a BERR or TO
7093 		 * (multiple errors are also OK)
7094 		 */
7095 		if ((ch_flt->afsr_errs &
7096 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7097 			return (1);
7098 	}
7099 
7100 #if defined(CHEETAH_PLUS)
7101 	return (0);
7102 #else	/* CHEETAH_PLUS */
7103 	/*
7104 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7105 	 *
7106 	 * Check the original error was a UE, and only a UE.  Note that
7107 	 * the ME bit will cause us to fail this check.
7108 	 */
7109 	if (t_afsr_errs != C_AFSR_UE)
7110 		return (0);
7111 
7112 	/*
7113 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7114 	 */
7115 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7116 		return (0);
7117 
7118 	/*
7119 	 * Check the AFAR of the original error and secondary errors
7120 	 * match to the 64-byte boundary
7121 	 */
7122 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7123 		return (0);
7124 
7125 	/*
7126 	 * We've passed all the checks, so it's a secondary error!
7127 	 */
7128 	return (1);
7129 #endif	/* CHEETAH_PLUS */
7130 }
7131 
7132 /*
7133  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7134  * is checked for any valid errors.  If found, the error type is
7135  * returned. If not found, the flt_type is checked for L1$ parity errors.
7136  */
7137 /*ARGSUSED*/
7138 static uint8_t
7139 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7140 {
7141 #if defined(JALAPENO)
7142 	/*
7143 	 * Currently, logging errors to the SC is not supported on Jalapeno
7144 	 */
7145 	return (PLAT_ECC_ERROR2_NONE);
7146 #else
7147 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7148 
7149 	switch (ch_flt->flt_bit) {
7150 	case C_AFSR_CE:
7151 		return (PLAT_ECC_ERROR2_CE);
7152 	case C_AFSR_UCC:
7153 	case C_AFSR_EDC:
7154 	case C_AFSR_WDC:
7155 	case C_AFSR_CPC:
7156 		return (PLAT_ECC_ERROR2_L2_CE);
7157 	case C_AFSR_EMC:
7158 		return (PLAT_ECC_ERROR2_EMC);
7159 	case C_AFSR_IVC:
7160 		return (PLAT_ECC_ERROR2_IVC);
7161 	case C_AFSR_UE:
7162 		return (PLAT_ECC_ERROR2_UE);
7163 	case C_AFSR_UCU:
7164 	case C_AFSR_EDU:
7165 	case C_AFSR_WDU:
7166 	case C_AFSR_CPU:
7167 		return (PLAT_ECC_ERROR2_L2_UE);
7168 	case C_AFSR_IVU:
7169 		return (PLAT_ECC_ERROR2_IVU);
7170 	case C_AFSR_TO:
7171 		return (PLAT_ECC_ERROR2_TO);
7172 	case C_AFSR_BERR:
7173 		return (PLAT_ECC_ERROR2_BERR);
7174 #if defined(CHEETAH_PLUS)
7175 	case C_AFSR_L3_EDC:
7176 	case C_AFSR_L3_UCC:
7177 	case C_AFSR_L3_CPC:
7178 	case C_AFSR_L3_WDC:
7179 		return (PLAT_ECC_ERROR2_L3_CE);
7180 	case C_AFSR_IMC:
7181 		return (PLAT_ECC_ERROR2_IMC);
7182 	case C_AFSR_TSCE:
7183 		return (PLAT_ECC_ERROR2_L2_TSCE);
7184 	case C_AFSR_THCE:
7185 		return (PLAT_ECC_ERROR2_L2_THCE);
7186 	case C_AFSR_L3_MECC:
7187 		return (PLAT_ECC_ERROR2_L3_MECC);
7188 	case C_AFSR_L3_THCE:
7189 		return (PLAT_ECC_ERROR2_L3_THCE);
7190 	case C_AFSR_L3_CPU:
7191 	case C_AFSR_L3_EDU:
7192 	case C_AFSR_L3_UCU:
7193 	case C_AFSR_L3_WDU:
7194 		return (PLAT_ECC_ERROR2_L3_UE);
7195 	case C_AFSR_DUE:
7196 		return (PLAT_ECC_ERROR2_DUE);
7197 	case C_AFSR_DTO:
7198 		return (PLAT_ECC_ERROR2_DTO);
7199 	case C_AFSR_DBERR:
7200 		return (PLAT_ECC_ERROR2_DBERR);
7201 #endif	/* CHEETAH_PLUS */
7202 	default:
7203 		switch (ch_flt->flt_type) {
7204 #if defined(CPU_IMP_L1_CACHE_PARITY)
7205 		case CPU_IC_PARITY:
7206 			return (PLAT_ECC_ERROR2_IPE);
7207 		case CPU_DC_PARITY:
7208 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7209 				if (ch_flt->parity_data.dpe.cpl_cache ==
7210 				    CPU_PC_PARITY) {
7211 					return (PLAT_ECC_ERROR2_PCACHE);
7212 				}
7213 			}
7214 			return (PLAT_ECC_ERROR2_DPE);
7215 #endif /* CPU_IMP_L1_CACHE_PARITY */
7216 		case CPU_ITLB_PARITY:
7217 			return (PLAT_ECC_ERROR2_ITLB);
7218 		case CPU_DTLB_PARITY:
7219 			return (PLAT_ECC_ERROR2_DTLB);
7220 		default:
7221 			return (PLAT_ECC_ERROR2_NONE);
7222 		}
7223 	}
7224 #endif	/* JALAPENO */
7225 }
7226