xref: /titanic_41/usr/src/uts/sun4u/cpu/us3_common.c (revision 239e91abc172c1397b1e94869c5d0e8ab67bfc22)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 #include <sys/pghw.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
120     uint64_t t_afsr_bit);
121 static int clear_ecc(struct async_flt *ecc);
122 #if defined(CPU_IMP_ECACHE_ASSOC)
123 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
124 #endif
125 static int cpu_ecache_set_size(struct cpu *cp);
126 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
128 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
129 static int cpu_ectag_pa_to_subblk_state(int cachesize,
130 				uint64_t subaddr, uint64_t tag);
131 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
132 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
134 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
136 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
137 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
138 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
139 static void cpu_scrubphys(struct async_flt *aflt);
140 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
141     int *, int *);
142 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
143 static void cpu_ereport_init(struct async_flt *aflt);
144 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
145 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
146 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
147     uint64_t nceen, ch_cpu_logout_t *clop);
148 static int cpu_ce_delayed_ec_logout(uint64_t);
149 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
150 static int cpu_error_is_ecache_data(int, uint64_t);
151 static void cpu_fmri_cpu_set(nvlist_t *, int);
152 static int cpu_error_to_resource_type(struct async_flt *aflt);
153 
154 #ifdef	CHEETAHPLUS_ERRATUM_25
155 static int mondo_recover_proc(uint16_t, int);
156 static void cheetah_nudge_init(void);
157 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
158     cyc_time_t *when);
159 static void cheetah_nudge_buddy(void);
160 #endif	/* CHEETAHPLUS_ERRATUM_25 */
161 
162 #if defined(CPU_IMP_L1_CACHE_PARITY)
163 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
166     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
167 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
168 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
169 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
170 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
171 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
172 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
173 #endif	/* CPU_IMP_L1_CACHE_PARITY */
174 
175 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
176     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
177     int *segsp, int *banksp, int *mcidp);
178 
179 /*
180  * This table is used to determine which bit(s) is(are) bad when an ECC
181  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
182  * of this array have the following semantics:
183  *
184  *      00-127  The number of the bad bit, when only one bit is bad.
185  *      128     ECC bit C0 is bad.
186  *      129     ECC bit C1 is bad.
187  *      130     ECC bit C2 is bad.
188  *      131     ECC bit C3 is bad.
189  *      132     ECC bit C4 is bad.
190  *      133     ECC bit C5 is bad.
191  *      134     ECC bit C6 is bad.
192  *      135     ECC bit C7 is bad.
193  *      136     ECC bit C8 is bad.
194  *	137-143 reserved for Mtag Data and ECC.
195  *      144(M2) Two bits are bad within a nibble.
196  *      145(M3) Three bits are bad within a nibble.
197  *      146(M3) Four bits are bad within a nibble.
198  *      147(M)  Multiple bits (5 or more) are bad.
199  *      148     NO bits are bad.
200  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
201  */
202 
203 #define	C0	128
204 #define	C1	129
205 #define	C2	130
206 #define	C3	131
207 #define	C4	132
208 #define	C5	133
209 #define	C6	134
210 #define	C7	135
211 #define	C8	136
212 #define	MT0	137	/* Mtag Data bit 0 */
213 #define	MT1	138
214 #define	MT2	139
215 #define	MTC0	140	/* Mtag Check bit 0 */
216 #define	MTC1	141
217 #define	MTC2	142
218 #define	MTC3	143
219 #define	M2	144
220 #define	M3	145
221 #define	M4	146
222 #define	M	147
223 #define	NA	148
224 #if defined(JALAPENO) || defined(SERRANO)
225 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
226 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
227 #define	SLAST	S003MEM	/* last special syndrome */
228 #else /* JALAPENO || SERRANO */
229 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
230 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
231 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
232 #define	SLAST	S11C	/* last special syndrome */
233 #endif /* JALAPENO || SERRANO */
234 #if defined(JALAPENO) || defined(SERRANO)
235 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
236 #define	BPAR15	167
237 #endif	/* JALAPENO || SERRANO */
238 
239 static uint8_t ecc_syndrome_tab[] =
240 {
241 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
242 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
243 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
244 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
245 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
246 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
247 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
248 #if defined(JALAPENO) || defined(SERRANO)
249 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
250 #else	/* JALAPENO || SERRANO */
251 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
252 #endif	/* JALAPENO || SERRANO */
253 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
254 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
255 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
256 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
257 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
258 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
259 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
260 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
261 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
262 #if defined(JALAPENO) || defined(SERRANO)
263 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
264 #else	/* JALAPENO || SERRANO */
265 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
266 #endif	/* JALAPENO || SERRANO */
267 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
268 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
269 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
270 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
271 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
272 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
273 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
274 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
275 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
276 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
277 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
278 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
279 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
280 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
281 };
282 
283 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
284 
285 #if !(defined(JALAPENO) || defined(SERRANO))
286 /*
287  * This table is used to determine which bit(s) is(are) bad when a Mtag
288  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
289  * of this array have the following semantics:
290  *
291  *      -1	Invalid mtag syndrome.
292  *      137     Mtag Data 0 is bad.
293  *      138     Mtag Data 1 is bad.
294  *      139     Mtag Data 2 is bad.
295  *      140     Mtag ECC 0 is bad.
296  *      141     Mtag ECC 1 is bad.
297  *      142     Mtag ECC 2 is bad.
298  *      143     Mtag ECC 3 is bad.
299  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
300  */
301 short mtag_syndrome_tab[] =
302 {
303 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
304 };
305 
306 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
307 
308 #else /* !(JALAPENO || SERRANO) */
309 
310 #define	BSYND_TBL_SIZE	16
311 
312 #endif /* !(JALAPENO || SERRANO) */
313 
314 /*
315  * Types returned from cpu_error_to_resource_type()
316  */
317 #define	ERRTYPE_UNKNOWN		0
318 #define	ERRTYPE_CPU		1
319 #define	ERRTYPE_MEMORY		2
320 #define	ERRTYPE_ECACHE_DATA	3
321 
322 /*
323  * CE initial classification and subsequent action lookup table
324  */
325 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
326 static int ce_disp_inited;
327 
328 /*
329  * Set to disable leaky and partner check for memory correctables
330  */
331 int ce_xdiag_off;
332 
333 /*
334  * The following are not incremented atomically so are indicative only
335  */
336 static int ce_xdiag_drops;
337 static int ce_xdiag_lkydrops;
338 static int ce_xdiag_ptnrdrops;
339 static int ce_xdiag_bad;
340 
341 /*
342  * CE leaky check callback structure
343  */
344 typedef struct {
345 	struct async_flt *lkycb_aflt;
346 	errorq_t *lkycb_eqp;
347 	errorq_elem_t *lkycb_eqep;
348 } ce_lkychk_cb_t;
349 
350 /*
351  * defines for various ecache_flush_flag's
352  */
353 #define	ECACHE_FLUSH_LINE	1
354 #define	ECACHE_FLUSH_ALL	2
355 
356 /*
357  * STICK sync
358  */
359 #define	STICK_ITERATION 10
360 #define	MAX_TSKEW	1
361 #define	EV_A_START	0
362 #define	EV_A_END	1
363 #define	EV_B_START	2
364 #define	EV_B_END	3
365 #define	EVENTS		4
366 
367 static int64_t stick_iter = STICK_ITERATION;
368 static int64_t stick_tsk = MAX_TSKEW;
369 
370 typedef enum {
371 	EVENT_NULL = 0,
372 	SLAVE_START,
373 	SLAVE_CONT,
374 	MASTER_START
375 } event_cmd_t;
376 
377 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
378 static int64_t timestamp[EVENTS];
379 static volatile int slave_done;
380 
381 #ifdef DEBUG
382 #define	DSYNC_ATTEMPTS 64
383 typedef struct {
384 	int64_t	skew_val[DSYNC_ATTEMPTS];
385 } ss_t;
386 
387 ss_t stick_sync_stats[NCPU];
388 #endif /* DEBUG */
389 
390 uint_t cpu_impl_dual_pgsz = 0;
391 #if defined(CPU_IMP_DUAL_PAGESIZE)
392 uint_t disable_dual_pgsz = 0;
393 #endif	/* CPU_IMP_DUAL_PAGESIZE */
394 
395 /*
396  * Save the cache bootup state for use when internal
397  * caches are to be re-enabled after an error occurs.
398  */
399 uint64_t cache_boot_state;
400 
401 /*
402  * PA[22:0] represent Displacement in Safari configuration space.
403  */
404 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
405 
406 bus_config_eclk_t bus_config_eclk[] = {
407 #if defined(JALAPENO) || defined(SERRANO)
408 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
409 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
410 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
411 #else /* JALAPENO || SERRANO */
412 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
413 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
414 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
415 #endif /* JALAPENO || SERRANO */
416 	{0, 0}
417 };
418 
419 /*
420  * Interval for deferred CEEN reenable
421  */
422 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
423 
424 /*
425  * set in /etc/system to control logging of user BERR/TO's
426  */
427 int cpu_berr_to_verbose = 0;
428 
429 /*
430  * set to 0 in /etc/system to defer CEEN reenable for all CEs
431  */
432 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
433 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
434 
435 /*
436  * Set of all offline cpus
437  */
438 cpuset_t cpu_offline_set;
439 
440 static void cpu_delayed_check_ce_errors(void *);
441 static void cpu_check_ce_errors(void *);
442 void cpu_error_ecache_flush(ch_async_flt_t *);
443 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
444 static void cpu_log_and_clear_ce(ch_async_flt_t *);
445 void cpu_ce_detected(ch_cpu_errors_t *, int);
446 
447 /*
448  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
449  * memory refresh interval of current DIMMs (64ms).  After initial fix that
450  * gives at least one full refresh cycle in which the cell can leak
451  * (whereafter further refreshes simply reinforce any incorrect bit value).
452  */
453 clock_t cpu_ce_lkychk_timeout_usec = 128000;
454 
455 /*
456  * CE partner check partner caching period in seconds
457  */
458 int cpu_ce_ptnr_cachetime_sec = 60;
459 
460 /*
461  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
462  */
463 #define	CH_SET_TRAP(ttentry, ttlabel)			\
464 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
465 		flush_instr_mem((caddr_t)&ttentry, 32);
466 
467 static int min_ecache_size;
468 static uint_t priv_hcl_1;
469 static uint_t priv_hcl_2;
470 static uint_t priv_hcl_4;
471 static uint_t priv_hcl_8;
472 
473 void
474 cpu_setup(void)
475 {
476 	extern int at_flags;
477 	extern int disable_delay_tlb_flush, delay_tlb_flush;
478 	extern int cpc_has_overflow_intr;
479 
480 	/*
481 	 * Setup chip-specific trap handlers.
482 	 */
483 	cpu_init_trap();
484 
485 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
486 
487 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
488 
489 	/*
490 	 * save the cache bootup state.
491 	 */
492 	cache_boot_state = get_dcu() & DCU_CACHE;
493 
494 	/*
495 	 * Due to the number of entries in the fully-associative tlb
496 	 * this may have to be tuned lower than in spitfire.
497 	 */
498 	pp_slots = MIN(8, MAXPP_SLOTS);
499 
500 	/*
501 	 * Block stores do not invalidate all pages of the d$, pagecopy
502 	 * et. al. need virtual translations with virtual coloring taken
503 	 * into consideration.  prefetch/ldd will pollute the d$ on the
504 	 * load side.
505 	 */
506 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
507 
508 	if (use_page_coloring) {
509 		do_pg_coloring = 1;
510 		if (use_virtual_coloring)
511 			do_virtual_coloring = 1;
512 	}
513 
514 	isa_list =
515 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
516 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
517 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
518 
519 	/*
520 	 * On Panther-based machines, this should
521 	 * also include AV_SPARC_POPC too
522 	 */
523 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
524 
525 	/*
526 	 * On cheetah, there's no hole in the virtual address space
527 	 */
528 	hole_start = hole_end = 0;
529 
530 	/*
531 	 * The kpm mapping window.
532 	 * kpm_size:
533 	 *	The size of a single kpm range.
534 	 *	The overall size will be: kpm_size * vac_colors.
535 	 * kpm_vbase:
536 	 *	The virtual start address of the kpm range within the kernel
537 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
538 	 */
539 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
540 	kpm_size_shift = 43;
541 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
542 	kpm_smallpages = 1;
543 
544 	/*
545 	 * The traptrace code uses either %tick or %stick for
546 	 * timestamping.  We have %stick so we can use it.
547 	 */
548 	traptrace_use_stick = 1;
549 
550 	/*
551 	 * Cheetah has a performance counter overflow interrupt
552 	 */
553 	cpc_has_overflow_intr = 1;
554 
555 	/*
556 	 * Use cheetah flush-all support
557 	 */
558 	if (!disable_delay_tlb_flush)
559 		delay_tlb_flush = 1;
560 
561 #if defined(CPU_IMP_DUAL_PAGESIZE)
562 	/*
563 	 * Use Cheetah+ and later dual page size support.
564 	 */
565 	if (!disable_dual_pgsz) {
566 		cpu_impl_dual_pgsz = 1;
567 	}
568 #endif	/* CPU_IMP_DUAL_PAGESIZE */
569 
570 	/*
571 	 * Declare that this architecture/cpu combination does fpRAS.
572 	 */
573 	fpras_implemented = 1;
574 
575 	/*
576 	 * Setup CE lookup table
577 	 */
578 	CE_INITDISPTBL_POPULATE(ce_disp_table);
579 	ce_disp_inited = 1;
580 }
581 
582 /*
583  * Called by setcpudelay
584  */
585 void
586 cpu_init_tick_freq(void)
587 {
588 	/*
589 	 * For UltraSPARC III and beyond we want to use the
590 	 * system clock rate as the basis for low level timing,
591 	 * due to support of mixed speed CPUs and power managment.
592 	 */
593 	if (system_clock_freq == 0)
594 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
595 
596 	sys_tick_freq = system_clock_freq;
597 }
598 
599 #ifdef CHEETAHPLUS_ERRATUM_25
600 /*
601  * Tunables
602  */
603 int cheetah_bpe_off = 0;
604 int cheetah_sendmondo_recover = 1;
605 int cheetah_sendmondo_fullscan = 0;
606 int cheetah_sendmondo_recover_delay = 5;
607 
608 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
609 
610 /*
611  * Recovery Statistics
612  */
613 typedef struct cheetah_livelock_entry	{
614 	int cpuid;		/* fallen cpu */
615 	int buddy;		/* cpu that ran recovery */
616 	clock_t lbolt;		/* when recovery started */
617 	hrtime_t recovery_time;	/* time spent in recovery */
618 } cheetah_livelock_entry_t;
619 
620 #define	CHEETAH_LIVELOCK_NENTRY	32
621 
622 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
623 int cheetah_livelock_entry_nxt;
624 
625 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
626 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
627 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
628 		cheetah_livelock_entry_nxt = 0;				\
629 	}								\
630 }
631 
632 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
633 
634 struct {
635 	hrtime_t hrt;		/* maximum recovery time */
636 	int recovery;		/* recovered */
637 	int full_claimed;	/* maximum pages claimed in full recovery */
638 	int proc_entry;		/* attempted to claim TSB */
639 	int proc_tsb_scan;	/* tsb scanned */
640 	int proc_tsb_partscan;	/* tsb partially scanned */
641 	int proc_tsb_fullscan;	/* whole tsb scanned */
642 	int proc_claimed;	/* maximum pages claimed in tsb scan */
643 	int proc_user;		/* user thread */
644 	int proc_kernel;	/* kernel thread */
645 	int proc_onflt;		/* bad stack */
646 	int proc_cpu;		/* null cpu */
647 	int proc_thread;	/* null thread */
648 	int proc_proc;		/* null proc */
649 	int proc_as;		/* null as */
650 	int proc_hat;		/* null hat */
651 	int proc_hat_inval;	/* hat contents don't make sense */
652 	int proc_hat_busy;	/* hat is changing TSBs */
653 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
654 	int proc_cnum_bad;	/* cnum out of range */
655 	int proc_cnum;		/* last cnum processed */
656 	tte_t proc_tte;		/* last tte processed */
657 } cheetah_livelock_stat;
658 
659 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
660 
661 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
662 	cheetah_livelock_stat.item = value
663 
664 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
665 	if (value > cheetah_livelock_stat.item)		\
666 		cheetah_livelock_stat.item = value;	\
667 }
668 
669 /*
670  * Attempt to recover a cpu by claiming every cache line as saved
671  * in the TSB that the non-responsive cpu is using. Since we can't
672  * grab any adaptive lock, this is at best an attempt to do so. Because
673  * we don't grab any locks, we must operate under the protection of
674  * on_fault().
675  *
676  * Return 1 if cpuid could be recovered, 0 if failed.
677  */
678 int
679 mondo_recover_proc(uint16_t cpuid, int bn)
680 {
681 	label_t ljb;
682 	cpu_t *cp;
683 	kthread_t *t;
684 	proc_t *p;
685 	struct as *as;
686 	struct hat *hat;
687 	uint_t  cnum;
688 	struct tsb_info *tsbinfop;
689 	struct tsbe *tsbep;
690 	caddr_t tsbp;
691 	caddr_t end_tsbp;
692 	uint64_t paddr;
693 	uint64_t idsr;
694 	u_longlong_t pahi, palo;
695 	int pages_claimed = 0;
696 	tte_t tsbe_tte;
697 	int tried_kernel_tsb = 0;
698 	mmu_ctx_t *mmu_ctxp;
699 
700 	CHEETAH_LIVELOCK_STAT(proc_entry);
701 
702 	if (on_fault(&ljb)) {
703 		CHEETAH_LIVELOCK_STAT(proc_onflt);
704 		goto badstruct;
705 	}
706 
707 	if ((cp = cpu[cpuid]) == NULL) {
708 		CHEETAH_LIVELOCK_STAT(proc_cpu);
709 		goto badstruct;
710 	}
711 
712 	if ((t = cp->cpu_thread) == NULL) {
713 		CHEETAH_LIVELOCK_STAT(proc_thread);
714 		goto badstruct;
715 	}
716 
717 	if ((p = ttoproc(t)) == NULL) {
718 		CHEETAH_LIVELOCK_STAT(proc_proc);
719 		goto badstruct;
720 	}
721 
722 	if ((as = p->p_as) == NULL) {
723 		CHEETAH_LIVELOCK_STAT(proc_as);
724 		goto badstruct;
725 	}
726 
727 	if ((hat = as->a_hat) == NULL) {
728 		CHEETAH_LIVELOCK_STAT(proc_hat);
729 		goto badstruct;
730 	}
731 
732 	if (hat != ksfmmup) {
733 		CHEETAH_LIVELOCK_STAT(proc_user);
734 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
735 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
736 			goto badstruct;
737 		}
738 		tsbinfop = hat->sfmmu_tsb;
739 		if (tsbinfop == NULL) {
740 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
741 			goto badstruct;
742 		}
743 		tsbp = tsbinfop->tsb_va;
744 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
745 	} else {
746 		CHEETAH_LIVELOCK_STAT(proc_kernel);
747 		tsbinfop = NULL;
748 		tsbp = ktsb_base;
749 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
750 	}
751 
752 	/* Verify as */
753 	if (hat->sfmmu_as != as) {
754 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 		goto badstruct;
756 	}
757 
758 	mmu_ctxp = CPU_MMU_CTXP(cp);
759 	ASSERT(mmu_ctxp);
760 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
761 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
762 
763 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
764 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
765 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
766 		goto badstruct;
767 	}
768 
769 	do {
770 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
771 
772 		/*
773 		 * Skip TSBs being relocated.  This is important because
774 		 * we want to avoid the following deadlock scenario:
775 		 *
776 		 * 1) when we came in we set ourselves to "in recover" state.
777 		 * 2) when we try to touch TSB being relocated the mapping
778 		 *    will be in the suspended state so we'll spin waiting
779 		 *    for it to be unlocked.
780 		 * 3) when the CPU that holds the TSB mapping locked tries to
781 		 *    unlock it it will send a xtrap which will fail to xcall
782 		 *    us or the CPU we're trying to recover, and will in turn
783 		 *    enter the mondo code.
784 		 * 4) since we are still spinning on the locked mapping
785 		 *    no further progress will be made and the system will
786 		 *    inevitably hard hang.
787 		 *
788 		 * A TSB not being relocated can't begin being relocated
789 		 * while we're accessing it because we check
790 		 * sendmondo_in_recover before relocating TSBs.
791 		 */
792 		if (hat != ksfmmup &&
793 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
794 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
795 			goto next_tsbinfo;
796 		}
797 
798 		for (tsbep = (struct tsbe *)tsbp;
799 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
800 			tsbe_tte = tsbep->tte_data;
801 
802 			if (tsbe_tte.tte_val == 0) {
803 				/*
804 				 * Invalid tte
805 				 */
806 				continue;
807 			}
808 			if (tsbe_tte.tte_se) {
809 				/*
810 				 * Don't want device registers
811 				 */
812 				continue;
813 			}
814 			if (tsbe_tte.tte_cp == 0) {
815 				/*
816 				 * Must be cached in E$
817 				 */
818 				continue;
819 			}
820 			if (tsbep->tte_tag.tag_invalid != 0) {
821 				/*
822 				 * Invalid tag, ingnore this entry.
823 				 */
824 				continue;
825 			}
826 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
827 			idsr = getidsr();
828 			if ((idsr & (IDSR_NACK_BIT(bn) |
829 			    IDSR_BUSY_BIT(bn))) == 0) {
830 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
831 				goto done;
832 			}
833 			pahi = tsbe_tte.tte_pahi;
834 			palo = tsbe_tte.tte_palo;
835 			paddr = (uint64_t)((pahi << 32) |
836 			    (palo << MMU_PAGESHIFT));
837 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
838 			    CH_ECACHE_SUBBLK_SIZE);
839 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
840 				shipit(cpuid, bn);
841 			}
842 			pages_claimed++;
843 		}
844 next_tsbinfo:
845 		if (tsbinfop != NULL)
846 			tsbinfop = tsbinfop->tsb_next;
847 		if (tsbinfop != NULL) {
848 			tsbp = tsbinfop->tsb_va;
849 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
850 		} else if (tsbp == ktsb_base) {
851 			tried_kernel_tsb = 1;
852 		} else if (!tried_kernel_tsb) {
853 			tsbp = ktsb_base;
854 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
855 			hat = ksfmmup;
856 			tsbinfop = NULL;
857 		}
858 	} while (tsbinfop != NULL ||
859 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
860 
861 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
862 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
863 	no_fault();
864 	idsr = getidsr();
865 	if ((idsr & (IDSR_NACK_BIT(bn) |
866 	    IDSR_BUSY_BIT(bn))) == 0) {
867 		return (1);
868 	} else {
869 		return (0);
870 	}
871 
872 done:
873 	no_fault();
874 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
875 	return (1);
876 
877 badstruct:
878 	no_fault();
879 	return (0);
880 }
881 
882 /*
883  * Attempt to claim ownership, temporarily, of every cache line that a
884  * non-responsive cpu might be using.  This might kick that cpu out of
885  * this state.
886  *
887  * The return value indicates to the caller if we have exhausted all recovery
888  * techniques. If 1 is returned, it is useless to call this function again
889  * even for a different target CPU.
890  */
891 int
892 mondo_recover(uint16_t cpuid, int bn)
893 {
894 	struct memseg *seg;
895 	uint64_t begin_pa, end_pa, cur_pa;
896 	hrtime_t begin_hrt, end_hrt;
897 	int retval = 0;
898 	int pages_claimed = 0;
899 	cheetah_livelock_entry_t *histp;
900 	uint64_t idsr;
901 
902 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
903 		/*
904 		 * Wait while recovery takes place
905 		 */
906 		while (sendmondo_in_recover) {
907 			drv_usecwait(1);
908 		}
909 		/*
910 		 * Assume we didn't claim the whole memory. If
911 		 * the target of this caller is not recovered,
912 		 * it will come back.
913 		 */
914 		return (retval);
915 	}
916 
917 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
918 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
919 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
920 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
921 
922 	begin_hrt = gethrtime_waitfree();
923 	/*
924 	 * First try to claim the lines in the TSB the target
925 	 * may have been using.
926 	 */
927 	if (mondo_recover_proc(cpuid, bn) == 1) {
928 		/*
929 		 * Didn't claim the whole memory
930 		 */
931 		goto done;
932 	}
933 
934 	/*
935 	 * We tried using the TSB. The target is still
936 	 * not recovered. Check if complete memory scan is
937 	 * enabled.
938 	 */
939 	if (cheetah_sendmondo_fullscan == 0) {
940 		/*
941 		 * Full memory scan is disabled.
942 		 */
943 		retval = 1;
944 		goto done;
945 	}
946 
947 	/*
948 	 * Try claiming the whole memory.
949 	 */
950 	for (seg = memsegs; seg; seg = seg->next) {
951 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
952 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
953 		for (cur_pa = begin_pa; cur_pa < end_pa;
954 		    cur_pa += MMU_PAGESIZE) {
955 			idsr = getidsr();
956 			if ((idsr & (IDSR_NACK_BIT(bn) |
957 			    IDSR_BUSY_BIT(bn))) == 0) {
958 				/*
959 				 * Didn't claim all memory
960 				 */
961 				goto done;
962 			}
963 			claimlines(cur_pa, MMU_PAGESIZE,
964 			    CH_ECACHE_SUBBLK_SIZE);
965 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
966 				shipit(cpuid, bn);
967 			}
968 			pages_claimed++;
969 		}
970 	}
971 
972 	/*
973 	 * We did all we could.
974 	 */
975 	retval = 1;
976 
977 done:
978 	/*
979 	 * Update statistics
980 	 */
981 	end_hrt = gethrtime_waitfree();
982 	CHEETAH_LIVELOCK_STAT(recovery);
983 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
984 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
985 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
986 	    (end_hrt -  begin_hrt));
987 
988 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
989 		;
990 
991 	return (retval);
992 }
993 
994 /*
995  * This is called by the cyclic framework when this CPU becomes online
996  */
997 /*ARGSUSED*/
998 static void
999 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1000 {
1001 
1002 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1003 	hdlr->cyh_level = CY_LOW_LEVEL;
1004 	hdlr->cyh_arg = NULL;
1005 
1006 	/*
1007 	 * Stagger the start time
1008 	 */
1009 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1010 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1011 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1012 	}
1013 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1014 }
1015 
1016 /*
1017  * Create a low level cyclic to send a xtrap to the next cpu online.
1018  * However, there's no need to have this running on a uniprocessor system.
1019  */
1020 static void
1021 cheetah_nudge_init(void)
1022 {
1023 	cyc_omni_handler_t hdlr;
1024 
1025 	if (max_ncpus == 1) {
1026 		return;
1027 	}
1028 
1029 	hdlr.cyo_online = cheetah_nudge_onln;
1030 	hdlr.cyo_offline = NULL;
1031 	hdlr.cyo_arg = NULL;
1032 
1033 	mutex_enter(&cpu_lock);
1034 	(void) cyclic_add_omni(&hdlr);
1035 	mutex_exit(&cpu_lock);
1036 }
1037 
1038 /*
1039  * Cyclic handler to wake up buddy
1040  */
1041 void
1042 cheetah_nudge_buddy(void)
1043 {
1044 	/*
1045 	 * Disable kernel preemption to protect the cpu list
1046 	 */
1047 	kpreempt_disable();
1048 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1049 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1050 		    0, 0);
1051 	}
1052 	kpreempt_enable();
1053 }
1054 
1055 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1056 
1057 #ifdef SEND_MONDO_STATS
1058 uint32_t x_one_stimes[64];
1059 uint32_t x_one_ltimes[16];
1060 uint32_t x_set_stimes[64];
1061 uint32_t x_set_ltimes[16];
1062 uint32_t x_set_cpus[NCPU];
1063 uint32_t x_nack_stimes[64];
1064 #endif
1065 
1066 /*
1067  * Note: A version of this function is used by the debugger via the KDI,
1068  * and must be kept in sync with this version.  Any changes made to this
1069  * function to support new chips or to accomodate errata must also be included
1070  * in the KDI-specific version.  See us3_kdi.c.
1071  */
1072 void
1073 send_one_mondo(int cpuid)
1074 {
1075 	int busy, nack;
1076 	uint64_t idsr, starttick, endtick, tick, lasttick;
1077 	uint64_t busymask;
1078 #ifdef	CHEETAHPLUS_ERRATUM_25
1079 	int recovered = 0;
1080 #endif
1081 
1082 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1083 	starttick = lasttick = gettick();
1084 	shipit(cpuid, 0);
1085 	endtick = starttick + xc_tick_limit;
1086 	busy = nack = 0;
1087 #if defined(JALAPENO) || defined(SERRANO)
1088 	/*
1089 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1090 	 * will be used for dispatching interrupt. For now, assume
1091 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1092 	 * issues with respect to BUSY/NACK pair usage.
1093 	 */
1094 	busymask  = IDSR_BUSY_BIT(cpuid);
1095 #else /* JALAPENO || SERRANO */
1096 	busymask = IDSR_BUSY;
1097 #endif /* JALAPENO || SERRANO */
1098 	for (;;) {
1099 		idsr = getidsr();
1100 		if (idsr == 0)
1101 			break;
1102 
1103 		tick = gettick();
1104 		/*
1105 		 * If there is a big jump between the current tick
1106 		 * count and lasttick, we have probably hit a break
1107 		 * point.  Adjust endtick accordingly to avoid panic.
1108 		 */
1109 		if (tick > (lasttick + xc_tick_jump_limit))
1110 			endtick += (tick - lasttick);
1111 		lasttick = tick;
1112 		if (tick > endtick) {
1113 			if (panic_quiesce)
1114 				return;
1115 #ifdef	CHEETAHPLUS_ERRATUM_25
1116 			if (cheetah_sendmondo_recover && recovered == 0) {
1117 				if (mondo_recover(cpuid, 0)) {
1118 					/*
1119 					 * We claimed the whole memory or
1120 					 * full scan is disabled.
1121 					 */
1122 					recovered++;
1123 				}
1124 				tick = gettick();
1125 				endtick = tick + xc_tick_limit;
1126 				lasttick = tick;
1127 				/*
1128 				 * Recheck idsr
1129 				 */
1130 				continue;
1131 			} else
1132 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1133 			{
1134 				cmn_err(CE_PANIC, "send mondo timeout "
1135 				    "(target 0x%x) [%d NACK %d BUSY]",
1136 				    cpuid, nack, busy);
1137 			}
1138 		}
1139 
1140 		if (idsr & busymask) {
1141 			busy++;
1142 			continue;
1143 		}
1144 		drv_usecwait(1);
1145 		shipit(cpuid, 0);
1146 		nack++;
1147 		busy = 0;
1148 	}
1149 #ifdef SEND_MONDO_STATS
1150 	{
1151 		int n = gettick() - starttick;
1152 		if (n < 8192)
1153 			x_one_stimes[n >> 7]++;
1154 		else
1155 			x_one_ltimes[(n >> 13) & 0xf]++;
1156 	}
1157 #endif
1158 }
1159 
1160 void
1161 syncfpu(void)
1162 {
1163 }
1164 
1165 /*
1166  * Return processor specific async error structure
1167  * size used.
1168  */
1169 int
1170 cpu_aflt_size(void)
1171 {
1172 	return (sizeof (ch_async_flt_t));
1173 }
1174 
1175 /*
1176  * Tunable to disable the checking of other cpu logout areas during panic for
1177  * potential syndrome 71 generating errors.
1178  */
1179 int enable_check_other_cpus_logout = 1;
1180 
1181 /*
1182  * Check other cpus logout area for potential synd 71 generating
1183  * errors.
1184  */
1185 static void
1186 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1187     ch_cpu_logout_t *clop)
1188 {
1189 	struct async_flt *aflt;
1190 	ch_async_flt_t ch_flt;
1191 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1192 
1193 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1194 		return;
1195 	}
1196 
1197 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1198 
1199 	t_afar = clop->clo_data.chd_afar;
1200 	t_afsr = clop->clo_data.chd_afsr;
1201 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1202 #if defined(SERRANO)
1203 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1204 #endif	/* SERRANO */
1205 
1206 	/*
1207 	 * In order to simplify code, we maintain this afsr_errs
1208 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1209 	 * sticky bits.
1210 	 */
1211 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1212 	    (t_afsr & C_AFSR_ALL_ERRS);
1213 
1214 	/* Setup the async fault structure */
1215 	aflt = (struct async_flt *)&ch_flt;
1216 	aflt->flt_id = gethrtime_waitfree();
1217 	ch_flt.afsr_ext = t_afsr_ext;
1218 	ch_flt.afsr_errs = t_afsr_errs;
1219 	aflt->flt_stat = t_afsr;
1220 	aflt->flt_addr = t_afar;
1221 	aflt->flt_bus_id = cpuid;
1222 	aflt->flt_inst = cpuid;
1223 	aflt->flt_pc = tpc;
1224 	aflt->flt_prot = AFLT_PROT_NONE;
1225 	aflt->flt_class = CPU_FAULT;
1226 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1227 	aflt->flt_tl = tl;
1228 	aflt->flt_status = ecc_type;
1229 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1230 
1231 	/*
1232 	 * Queue events on the async event queue, one event per error bit.
1233 	 * If no events are queued, queue an event to complain.
1234 	 */
1235 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1236 		ch_flt.flt_type = CPU_INV_AFSR;
1237 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1238 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1239 		    aflt->flt_panic);
1240 	}
1241 
1242 	/*
1243 	 * Zero out + invalidate CPU logout.
1244 	 */
1245 	bzero(clop, sizeof (ch_cpu_logout_t));
1246 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1247 }
1248 
1249 /*
1250  * Check the logout areas of all other cpus for unlogged errors.
1251  */
1252 static void
1253 cpu_check_other_cpus_logout(void)
1254 {
1255 	int i, j;
1256 	processorid_t myid;
1257 	struct cpu *cp;
1258 	ch_err_tl1_data_t *cl1p;
1259 
1260 	myid = CPU->cpu_id;
1261 	for (i = 0; i < NCPU; i++) {
1262 		cp = cpu[i];
1263 
1264 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1265 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1266 			continue;
1267 		}
1268 
1269 		/*
1270 		 * Check each of the tl>0 logout areas
1271 		 */
1272 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1273 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1274 			if (cl1p->ch_err_tl1_flags == 0)
1275 				continue;
1276 
1277 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1278 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1279 		}
1280 
1281 		/*
1282 		 * Check each of the remaining logout areas
1283 		 */
1284 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1285 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1286 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1287 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1288 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1289 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1290 	}
1291 }
1292 
1293 /*
1294  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1295  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1296  * flush the error that caused the UCU/UCC, then again here at the end to
1297  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1298  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1299  * another Fast ECC trap.
1300  *
1301  * Cheetah+ also handles: TSCE: No additional processing required.
1302  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1303  *
1304  * Note that the p_clo_flags input is only valid in cases where the
1305  * cpu_private struct is not yet initialized (since that is the only
1306  * time that information cannot be obtained from the logout struct.)
1307  */
1308 /*ARGSUSED*/
1309 void
1310 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1311 {
1312 	ch_cpu_logout_t *clop;
1313 	uint64_t ceen, nceen;
1314 
1315 	/*
1316 	 * Get the CPU log out info. If we can't find our CPU private
1317 	 * pointer, then we will have to make due without any detailed
1318 	 * logout information.
1319 	 */
1320 	if (CPU_PRIVATE(CPU) == NULL) {
1321 		clop = NULL;
1322 		ceen = p_clo_flags & EN_REG_CEEN;
1323 		nceen = p_clo_flags & EN_REG_NCEEN;
1324 	} else {
1325 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1326 		ceen = clop->clo_flags & EN_REG_CEEN;
1327 		nceen = clop->clo_flags & EN_REG_NCEEN;
1328 	}
1329 
1330 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1331 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1332 }
1333 
1334 /*
1335  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1336  * ECC at TL>0.  Need to supply either a error register pointer or a
1337  * cpu logout structure pointer.
1338  */
1339 static void
1340 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1341     uint64_t nceen, ch_cpu_logout_t *clop)
1342 {
1343 	struct async_flt *aflt;
1344 	ch_async_flt_t ch_flt;
1345 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1346 	char pr_reason[MAX_REASON_STRING];
1347 	ch_cpu_errors_t cpu_error_regs;
1348 
1349 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1350 	/*
1351 	 * If no cpu logout data, then we will have to make due without
1352 	 * any detailed logout information.
1353 	 */
1354 	if (clop == NULL) {
1355 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1356 		get_cpu_error_state(&cpu_error_regs);
1357 		set_cpu_error_state(&cpu_error_regs);
1358 		t_afar = cpu_error_regs.afar;
1359 		t_afsr = cpu_error_regs.afsr;
1360 		t_afsr_ext = cpu_error_regs.afsr_ext;
1361 #if defined(SERRANO)
1362 		ch_flt.afar2 = cpu_error_regs.afar2;
1363 #endif	/* SERRANO */
1364 	} else {
1365 		t_afar = clop->clo_data.chd_afar;
1366 		t_afsr = clop->clo_data.chd_afsr;
1367 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1368 #if defined(SERRANO)
1369 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1370 #endif	/* SERRANO */
1371 	}
1372 
1373 	/*
1374 	 * In order to simplify code, we maintain this afsr_errs
1375 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1376 	 * sticky bits.
1377 	 */
1378 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1379 	    (t_afsr & C_AFSR_ALL_ERRS);
1380 	pr_reason[0] = '\0';
1381 
1382 	/* Setup the async fault structure */
1383 	aflt = (struct async_flt *)&ch_flt;
1384 	aflt->flt_id = gethrtime_waitfree();
1385 	ch_flt.afsr_ext = t_afsr_ext;
1386 	ch_flt.afsr_errs = t_afsr_errs;
1387 	aflt->flt_stat = t_afsr;
1388 	aflt->flt_addr = t_afar;
1389 	aflt->flt_bus_id = getprocessorid();
1390 	aflt->flt_inst = CPU->cpu_id;
1391 	aflt->flt_pc = tpc;
1392 	aflt->flt_prot = AFLT_PROT_NONE;
1393 	aflt->flt_class = CPU_FAULT;
1394 	aflt->flt_priv = priv;
1395 	aflt->flt_tl = tl;
1396 	aflt->flt_status = ECC_F_TRAP;
1397 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1398 
1399 	/*
1400 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1401 	 * cmn_err messages out to the console.  The situation is a UCU (in
1402 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1403 	 * The messages for the UCU and WDU are enqueued and then pulled off
1404 	 * the async queue via softint and syslogd starts to process them
1405 	 * but doesn't get them to the console.  The UE causes a panic, but
1406 	 * since the UCU/WDU messages are already in transit, those aren't
1407 	 * on the async queue.  The hack is to check if we have a matching
1408 	 * WDU event for the UCU, and if it matches, we're more than likely
1409 	 * going to panic with a UE, unless we're under protection.  So, we
1410 	 * check to see if we got a matching WDU event and if we're under
1411 	 * protection.
1412 	 *
1413 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1414 	 * looks like this:
1415 	 *    UCU->WDU->UE
1416 	 * For Panther, it could look like either of these:
1417 	 *    UCU---->WDU->L3_WDU->UE
1418 	 *    L3_UCU->WDU->L3_WDU->UE
1419 	 */
1420 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1421 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1422 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1423 		get_cpu_error_state(&cpu_error_regs);
1424 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1425 		    (cpu_error_regs.afar == t_afar));
1426 		aflt->flt_panic |= ((clop == NULL) &&
1427 		    (t_afsr_errs & C_AFSR_WDU));
1428 	}
1429 
1430 	/*
1431 	 * Queue events on the async event queue, one event per error bit.
1432 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1433 	 * queue an event to complain.
1434 	 */
1435 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1436 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1437 		ch_flt.flt_type = CPU_INV_AFSR;
1438 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1439 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1440 		    aflt->flt_panic);
1441 	}
1442 
1443 	/*
1444 	 * Zero out + invalidate CPU logout.
1445 	 */
1446 	if (clop) {
1447 		bzero(clop, sizeof (ch_cpu_logout_t));
1448 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1449 	}
1450 
1451 	/*
1452 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1453 	 * or disrupting errors have happened.  We do this because if a
1454 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1455 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1456 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1457 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1458 	 * deferred or disrupting error happening between checking the AFSR and
1459 	 * enabling NCEEN/CEEN.
1460 	 *
1461 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1462 	 * taken.
1463 	 */
1464 	set_error_enable(get_error_enable() | (nceen | ceen));
1465 	if (clear_errors(&ch_flt)) {
1466 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1467 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1468 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1469 		    NULL);
1470 	}
1471 
1472 	/*
1473 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1474 	 * be logged as part of the panic flow.
1475 	 */
1476 	if (aflt->flt_panic)
1477 		fm_panic("%sError(s)", pr_reason);
1478 
1479 	/*
1480 	 * Flushing the Ecache here gets the part of the trap handler that
1481 	 * is run at TL=1 out of the Ecache.
1482 	 */
1483 	cpu_flush_ecache();
1484 }
1485 
1486 /*
1487  * This is called via sys_trap from pil15_interrupt code if the
1488  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1489  * various ch_err_tl1_data structures for valid entries based on the bit
1490  * settings in the ch_err_tl1_flags entry of the structure.
1491  */
1492 /*ARGSUSED*/
1493 void
1494 cpu_tl1_error(struct regs *rp, int panic)
1495 {
1496 	ch_err_tl1_data_t *cl1p, cl1;
1497 	int i, ncl1ps;
1498 	uint64_t me_flags;
1499 	uint64_t ceen, nceen;
1500 
1501 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1502 		cl1p = &ch_err_tl1_data;
1503 		ncl1ps = 1;
1504 	} else if (CPU_PRIVATE(CPU) != NULL) {
1505 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1506 		ncl1ps = CH_ERR_TL1_TLMAX;
1507 	} else {
1508 		ncl1ps = 0;
1509 	}
1510 
1511 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1512 		if (cl1p->ch_err_tl1_flags == 0)
1513 			continue;
1514 
1515 		/*
1516 		 * Grab a copy of the logout data and invalidate
1517 		 * the logout area.
1518 		 */
1519 		cl1 = *cl1p;
1520 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1521 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1522 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1523 
1524 		/*
1525 		 * Log "first error" in ch_err_tl1_data.
1526 		 */
1527 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1528 			ceen = get_error_enable() & EN_REG_CEEN;
1529 			nceen = get_error_enable() & EN_REG_NCEEN;
1530 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1531 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1532 		}
1533 #if defined(CPU_IMP_L1_CACHE_PARITY)
1534 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1535 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1536 			    (caddr_t)cl1.ch_err_tl1_tpc);
1537 		}
1538 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1539 
1540 		/*
1541 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1542 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1543 		 * if the structure is busy, we just do the cache flushing
1544 		 * we have to do and then do the retry.  So the AFSR/AFAR
1545 		 * at this point *should* have some relevant info.  If there
1546 		 * are no valid errors in the AFSR, we'll assume they've
1547 		 * already been picked up and logged.  For I$/D$ parity,
1548 		 * we just log an event with an "Unknown" (NULL) TPC.
1549 		 */
1550 		if (me_flags & CH_ERR_FECC) {
1551 			ch_cpu_errors_t cpu_error_regs;
1552 			uint64_t t_afsr_errs;
1553 
1554 			/*
1555 			 * Get the error registers and see if there's
1556 			 * a pending error.  If not, don't bother
1557 			 * generating an "Invalid AFSR" error event.
1558 			 */
1559 			get_cpu_error_state(&cpu_error_regs);
1560 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1561 			    C_AFSR_EXT_ALL_ERRS) |
1562 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1563 			if (t_afsr_errs != 0) {
1564 				ceen = get_error_enable() & EN_REG_CEEN;
1565 				nceen = get_error_enable() & EN_REG_NCEEN;
1566 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1567 				    1, ceen, nceen, NULL);
1568 			}
1569 		}
1570 #if defined(CPU_IMP_L1_CACHE_PARITY)
1571 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1572 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1573 		}
1574 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1575 	}
1576 }
1577 
1578 /*
1579  * Called from Fast ECC TL>0 handler in case of fatal error.
1580  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1581  * but if we don't, we'll panic with something reasonable.
1582  */
1583 /*ARGSUSED*/
1584 void
1585 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1586 {
1587 	cpu_tl1_error(rp, 1);
1588 	/*
1589 	 * Should never return, but just in case.
1590 	 */
1591 	fm_panic("Unsurvivable ECC Error at TL>0");
1592 }
1593 
1594 /*
1595  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1596  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1597  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1598  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1599  *
1600  * Cheetah+ also handles (No additional processing required):
1601  *    DUE, DTO, DBERR	(NCEEN controlled)
1602  *    THCE		(CEEN and ET_ECC_en controlled)
1603  *    TUE		(ET_ECC_en controlled)
1604  *
1605  * Panther further adds:
1606  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1607  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1608  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1609  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1610  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1611  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1612  *
1613  * Note that the p_clo_flags input is only valid in cases where the
1614  * cpu_private struct is not yet initialized (since that is the only
1615  * time that information cannot be obtained from the logout struct.)
1616  */
1617 /*ARGSUSED*/
1618 void
1619 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1620 {
1621 	struct async_flt *aflt;
1622 	ch_async_flt_t ch_flt;
1623 	char pr_reason[MAX_REASON_STRING];
1624 	ch_cpu_logout_t *clop;
1625 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1626 	ch_cpu_errors_t cpu_error_regs;
1627 
1628 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1629 	/*
1630 	 * Get the CPU log out info. If we can't find our CPU private
1631 	 * pointer, then we will have to make due without any detailed
1632 	 * logout information.
1633 	 */
1634 	if (CPU_PRIVATE(CPU) == NULL) {
1635 		clop = NULL;
1636 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1637 		get_cpu_error_state(&cpu_error_regs);
1638 		set_cpu_error_state(&cpu_error_regs);
1639 		t_afar = cpu_error_regs.afar;
1640 		t_afsr = cpu_error_regs.afsr;
1641 		t_afsr_ext = cpu_error_regs.afsr_ext;
1642 #if defined(SERRANO)
1643 		ch_flt.afar2 = cpu_error_regs.afar2;
1644 #endif	/* SERRANO */
1645 	} else {
1646 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1647 		t_afar = clop->clo_data.chd_afar;
1648 		t_afsr = clop->clo_data.chd_afsr;
1649 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1650 #if defined(SERRANO)
1651 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1652 #endif	/* SERRANO */
1653 	}
1654 
1655 	/*
1656 	 * In order to simplify code, we maintain this afsr_errs
1657 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1658 	 * sticky bits.
1659 	 */
1660 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1661 	    (t_afsr & C_AFSR_ALL_ERRS);
1662 
1663 	pr_reason[0] = '\0';
1664 	/* Setup the async fault structure */
1665 	aflt = (struct async_flt *)&ch_flt;
1666 	ch_flt.afsr_ext = t_afsr_ext;
1667 	ch_flt.afsr_errs = t_afsr_errs;
1668 	aflt->flt_stat = t_afsr;
1669 	aflt->flt_addr = t_afar;
1670 	aflt->flt_pc = (caddr_t)rp->r_pc;
1671 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1672 	aflt->flt_tl = 0;
1673 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1674 
1675 	/*
1676 	 * If this trap is a result of one of the errors not masked
1677 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1678 	 * indicate that a timeout is to be set later.
1679 	 */
1680 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1681 	    !aflt->flt_panic)
1682 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1683 	else
1684 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1685 
1686 	/*
1687 	 * log the CE and clean up
1688 	 */
1689 	cpu_log_and_clear_ce(&ch_flt);
1690 
1691 	/*
1692 	 * We re-enable CEEN (if required) and check if any disrupting errors
1693 	 * have happened.  We do this because if a disrupting error had occurred
1694 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1695 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1696 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1697 	 * of a error happening between checking the AFSR and enabling CEEN.
1698 	 */
1699 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1700 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1701 	if (clear_errors(&ch_flt)) {
1702 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1703 		    NULL);
1704 	}
1705 
1706 	/*
1707 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1708 	 * be logged as part of the panic flow.
1709 	 */
1710 	if (aflt->flt_panic)
1711 		fm_panic("%sError(s)", pr_reason);
1712 }
1713 
1714 /*
1715  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1716  * L3_EDU:BLD, TO, and BERR events.
1717  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1718  *
1719  * Cheetah+: No additional errors handled.
1720  *
1721  * Note that the p_clo_flags input is only valid in cases where the
1722  * cpu_private struct is not yet initialized (since that is the only
1723  * time that information cannot be obtained from the logout struct.)
1724  */
1725 /*ARGSUSED*/
1726 void
1727 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1728 {
1729 	ushort_t ttype, tl;
1730 	ch_async_flt_t ch_flt;
1731 	struct async_flt *aflt;
1732 	int trampolined = 0;
1733 	char pr_reason[MAX_REASON_STRING];
1734 	ch_cpu_logout_t *clop;
1735 	uint64_t ceen, clo_flags;
1736 	uint64_t log_afsr;
1737 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1738 	ch_cpu_errors_t cpu_error_regs;
1739 	int expected = DDI_FM_ERR_UNEXPECTED;
1740 	ddi_acc_hdl_t *hp;
1741 
1742 	/*
1743 	 * We need to look at p_flag to determine if the thread detected an
1744 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1745 	 * because we just need a consistent snapshot and we know that everyone
1746 	 * else will store a consistent set of bits while holding p_lock.  We
1747 	 * don't have to worry about a race because SDOCORE is set once prior
1748 	 * to doing i/o from the process's address space and is never cleared.
1749 	 */
1750 	uint_t pflag = ttoproc(curthread)->p_flag;
1751 
1752 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1753 	/*
1754 	 * Get the CPU log out info. If we can't find our CPU private
1755 	 * pointer then we will have to make due without any detailed
1756 	 * logout information.
1757 	 */
1758 	if (CPU_PRIVATE(CPU) == NULL) {
1759 		clop = NULL;
1760 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1761 		get_cpu_error_state(&cpu_error_regs);
1762 		set_cpu_error_state(&cpu_error_regs);
1763 		t_afar = cpu_error_regs.afar;
1764 		t_afsr = cpu_error_regs.afsr;
1765 		t_afsr_ext = cpu_error_regs.afsr_ext;
1766 #if defined(SERRANO)
1767 		ch_flt.afar2 = cpu_error_regs.afar2;
1768 #endif	/* SERRANO */
1769 		clo_flags = p_clo_flags;
1770 	} else {
1771 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1772 		t_afar = clop->clo_data.chd_afar;
1773 		t_afsr = clop->clo_data.chd_afsr;
1774 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1775 #if defined(SERRANO)
1776 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1777 #endif	/* SERRANO */
1778 		clo_flags = clop->clo_flags;
1779 	}
1780 
1781 	/*
1782 	 * In order to simplify code, we maintain this afsr_errs
1783 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1784 	 * sticky bits.
1785 	 */
1786 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1787 	    (t_afsr & C_AFSR_ALL_ERRS);
1788 	pr_reason[0] = '\0';
1789 
1790 	/*
1791 	 * Grab information encoded into our clo_flags field.
1792 	 */
1793 	ceen = clo_flags & EN_REG_CEEN;
1794 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1795 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1796 
1797 	/*
1798 	 * handle the specific error
1799 	 */
1800 	aflt = (struct async_flt *)&ch_flt;
1801 	aflt->flt_id = gethrtime_waitfree();
1802 	aflt->flt_bus_id = getprocessorid();
1803 	aflt->flt_inst = CPU->cpu_id;
1804 	ch_flt.afsr_ext = t_afsr_ext;
1805 	ch_flt.afsr_errs = t_afsr_errs;
1806 	aflt->flt_stat = t_afsr;
1807 	aflt->flt_addr = t_afar;
1808 	aflt->flt_pc = (caddr_t)rp->r_pc;
1809 	aflt->flt_prot = AFLT_PROT_NONE;
1810 	aflt->flt_class = CPU_FAULT;
1811 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1812 	aflt->flt_tl = (uchar_t)tl;
1813 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1814 	    C_AFSR_PANIC(t_afsr_errs));
1815 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1816 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1817 
1818 	/*
1819 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1820 	 * see if we were executing in the kernel under on_trap() or t_lofault
1821 	 * protection.  If so, modify the saved registers so that we return
1822 	 * from the trap to the appropriate trampoline routine.
1823 	 */
1824 	if (aflt->flt_priv && tl == 0) {
1825 		if (curthread->t_ontrap != NULL) {
1826 			on_trap_data_t *otp = curthread->t_ontrap;
1827 
1828 			if (otp->ot_prot & OT_DATA_EC) {
1829 				aflt->flt_prot = AFLT_PROT_EC;
1830 				otp->ot_trap |= OT_DATA_EC;
1831 				rp->r_pc = otp->ot_trampoline;
1832 				rp->r_npc = rp->r_pc + 4;
1833 				trampolined = 1;
1834 			}
1835 
1836 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1837 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1838 				aflt->flt_prot = AFLT_PROT_ACCESS;
1839 				otp->ot_trap |= OT_DATA_ACCESS;
1840 				rp->r_pc = otp->ot_trampoline;
1841 				rp->r_npc = rp->r_pc + 4;
1842 				trampolined = 1;
1843 				/*
1844 				 * for peeks and caut_gets errors are expected
1845 				 */
1846 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1847 				if (!hp)
1848 					expected = DDI_FM_ERR_PEEK;
1849 				else if (hp->ah_acc.devacc_attr_access ==
1850 				    DDI_CAUTIOUS_ACC)
1851 					expected = DDI_FM_ERR_EXPECTED;
1852 			}
1853 
1854 		} else if (curthread->t_lofault) {
1855 			aflt->flt_prot = AFLT_PROT_COPY;
1856 			rp->r_g1 = EFAULT;
1857 			rp->r_pc = curthread->t_lofault;
1858 			rp->r_npc = rp->r_pc + 4;
1859 			trampolined = 1;
1860 		}
1861 	}
1862 
1863 	/*
1864 	 * If we're in user mode or we're doing a protected copy, we either
1865 	 * want the ASTON code below to send a signal to the user process
1866 	 * or we want to panic if aft_panic is set.
1867 	 *
1868 	 * If we're in privileged mode and we're not doing a copy, then we
1869 	 * need to check if we've trampolined.  If we haven't trampolined,
1870 	 * we should panic.
1871 	 */
1872 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1873 		if (t_afsr_errs &
1874 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1875 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1876 			aflt->flt_panic |= aft_panic;
1877 	} else if (!trampolined) {
1878 			aflt->flt_panic = 1;
1879 	}
1880 
1881 	/*
1882 	 * If we've trampolined due to a privileged TO or BERR, or if an
1883 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1884 	 * event for that TO or BERR.  Queue all other events (if any) besides
1885 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1886 	 * ignore the number of events queued.  If we haven't trampolined due
1887 	 * to a TO or BERR, just enqueue events normally.
1888 	 */
1889 	log_afsr = t_afsr_errs;
1890 	if (trampolined) {
1891 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1892 	} else if (!aflt->flt_priv) {
1893 		/*
1894 		 * User mode, suppress messages if
1895 		 * cpu_berr_to_verbose is not set.
1896 		 */
1897 		if (!cpu_berr_to_verbose)
1898 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1899 	}
1900 
1901 	/*
1902 	 * Log any errors that occurred
1903 	 */
1904 	if (((log_afsr &
1905 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1906 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1907 		(t_afsr_errs &
1908 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1909 		ch_flt.flt_type = CPU_INV_AFSR;
1910 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1911 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1912 		    aflt->flt_panic);
1913 	}
1914 
1915 	/*
1916 	 * Zero out + invalidate CPU logout.
1917 	 */
1918 	if (clop) {
1919 		bzero(clop, sizeof (ch_cpu_logout_t));
1920 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1921 	}
1922 
1923 #if defined(JALAPENO) || defined(SERRANO)
1924 	/*
1925 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1926 	 * IO errors that may have resulted in this trap.
1927 	 */
1928 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1929 		cpu_run_bus_error_handlers(aflt, expected);
1930 	}
1931 
1932 	/*
1933 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1934 	 * line from the Ecache.  We also need to query the bus nexus for
1935 	 * fatal errors.  Attempts to do diagnostic read on caches may
1936 	 * introduce more errors (especially when the module is bad).
1937 	 */
1938 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1939 		/*
1940 		 * Ask our bus nexus friends if they have any fatal errors.  If
1941 		 * so, they will log appropriate error messages.
1942 		 */
1943 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1944 			aflt->flt_panic = 1;
1945 
1946 		/*
1947 		 * We got a UE or RUE and are panicking, save the fault PA in
1948 		 * a known location so that the platform specific panic code
1949 		 * can check for copyback errors.
1950 		 */
1951 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1952 			panic_aflt = *aflt;
1953 		}
1954 	}
1955 
1956 	/*
1957 	 * Flush Ecache line or entire Ecache
1958 	 */
1959 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1960 		cpu_error_ecache_flush(&ch_flt);
1961 #else /* JALAPENO || SERRANO */
1962 	/*
1963 	 * UE/BERR/TO: Call our bus nexus friends to check for
1964 	 * IO errors that may have resulted in this trap.
1965 	 */
1966 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1967 		cpu_run_bus_error_handlers(aflt, expected);
1968 	}
1969 
1970 	/*
1971 	 * UE: If the UE is in memory, we need to flush the bad
1972 	 * line from the Ecache.  We also need to query the bus nexus for
1973 	 * fatal errors.  Attempts to do diagnostic read on caches may
1974 	 * introduce more errors (especially when the module is bad).
1975 	 */
1976 	if (t_afsr & C_AFSR_UE) {
1977 		/*
1978 		 * Ask our legacy bus nexus friends if they have any fatal
1979 		 * errors.  If so, they will log appropriate error messages.
1980 		 */
1981 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1982 			aflt->flt_panic = 1;
1983 
1984 		/*
1985 		 * We got a UE and are panicking, save the fault PA in a known
1986 		 * location so that the platform specific panic code can check
1987 		 * for copyback errors.
1988 		 */
1989 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1990 			panic_aflt = *aflt;
1991 		}
1992 	}
1993 
1994 	/*
1995 	 * Flush Ecache line or entire Ecache
1996 	 */
1997 	if (t_afsr_errs &
1998 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1999 		cpu_error_ecache_flush(&ch_flt);
2000 #endif /* JALAPENO || SERRANO */
2001 
2002 	/*
2003 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2004 	 * or disrupting errors have happened.  We do this because if a
2005 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2006 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2007 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2008 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2009 	 * deferred or disrupting error happening between checking the AFSR and
2010 	 * enabling NCEEN/CEEN.
2011 	 *
2012 	 * Note: CEEN reenabled only if it was on when trap taken.
2013 	 */
2014 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2015 	if (clear_errors(&ch_flt)) {
2016 		/*
2017 		 * Check for secondary errors, and avoid panicking if we
2018 		 * have them
2019 		 */
2020 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2021 		    t_afar) == 0) {
2022 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2023 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2024 		}
2025 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2026 		    NULL);
2027 	}
2028 
2029 	/*
2030 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2031 	 * be logged as part of the panic flow.
2032 	 */
2033 	if (aflt->flt_panic)
2034 		fm_panic("%sError(s)", pr_reason);
2035 
2036 	/*
2037 	 * If we queued an error and we are going to return from the trap and
2038 	 * the error was in user mode or inside of a copy routine, set AST flag
2039 	 * so the queue will be drained before returning to user mode.  The
2040 	 * AST processing will also act on our failure policy.
2041 	 */
2042 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2043 		int pcb_flag = 0;
2044 
2045 		if (t_afsr_errs &
2046 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2047 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2048 			pcb_flag |= ASYNC_HWERR;
2049 
2050 		if (t_afsr & C_AFSR_BERR)
2051 			pcb_flag |= ASYNC_BERR;
2052 
2053 		if (t_afsr & C_AFSR_TO)
2054 			pcb_flag |= ASYNC_BTO;
2055 
2056 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2057 		aston(curthread);
2058 	}
2059 }
2060 
2061 #if defined(CPU_IMP_L1_CACHE_PARITY)
2062 /*
2063  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2064  *
2065  * For Panther, P$ data parity errors during floating point load hits
2066  * are also detected (reported as TT 0x71) and handled by this trap
2067  * handler.
2068  *
2069  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2070  * is available.
2071  */
2072 /*ARGSUSED*/
2073 void
2074 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2075 {
2076 	ch_async_flt_t ch_flt;
2077 	struct async_flt *aflt;
2078 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2079 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2080 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2081 	char *error_class;
2082 
2083 	/*
2084 	 * Log the error.
2085 	 * For icache parity errors the fault address is the trap PC.
2086 	 * For dcache/pcache parity errors the instruction would have to
2087 	 * be decoded to determine the address and that isn't possible
2088 	 * at high PIL.
2089 	 */
2090 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2091 	aflt = (struct async_flt *)&ch_flt;
2092 	aflt->flt_id = gethrtime_waitfree();
2093 	aflt->flt_bus_id = getprocessorid();
2094 	aflt->flt_inst = CPU->cpu_id;
2095 	aflt->flt_pc = tpc;
2096 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2097 	aflt->flt_prot = AFLT_PROT_NONE;
2098 	aflt->flt_class = CPU_FAULT;
2099 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2100 	aflt->flt_tl = tl;
2101 	aflt->flt_panic = panic;
2102 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2103 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2104 
2105 	if (iparity) {
2106 		cpu_icache_parity_info(&ch_flt);
2107 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2108 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2109 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2110 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2111 		else
2112 			error_class = FM_EREPORT_CPU_USIII_IPE;
2113 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2114 	} else {
2115 		cpu_dcache_parity_info(&ch_flt);
2116 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2117 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2118 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2119 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2120 		else
2121 			error_class = FM_EREPORT_CPU_USIII_DPE;
2122 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2123 		/*
2124 		 * For panther we also need to check the P$ for parity errors.
2125 		 */
2126 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2127 			cpu_pcache_parity_info(&ch_flt);
2128 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2129 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2130 				aflt->flt_payload =
2131 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2132 			}
2133 		}
2134 	}
2135 
2136 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2137 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2138 
2139 	if (iparity) {
2140 		/*
2141 		 * Invalidate entire I$.
2142 		 * This is required due to the use of diagnostic ASI
2143 		 * accesses that may result in a loss of I$ coherency.
2144 		 */
2145 		if (cache_boot_state & DCU_IC) {
2146 			flush_icache();
2147 		}
2148 		/*
2149 		 * According to section P.3.1 of the Panther PRM, we
2150 		 * need to do a little more for recovery on those
2151 		 * CPUs after encountering an I$ parity error.
2152 		 */
2153 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2154 			flush_ipb();
2155 			correct_dcache_parity(dcache_size,
2156 			    dcache_linesize);
2157 			flush_pcache();
2158 		}
2159 	} else {
2160 		/*
2161 		 * Since the valid bit is ignored when checking parity the
2162 		 * D$ data and tag must also be corrected.  Set D$ data bits
2163 		 * to zero and set utag to 0, 1, 2, 3.
2164 		 */
2165 		correct_dcache_parity(dcache_size, dcache_linesize);
2166 
2167 		/*
2168 		 * According to section P.3.3 of the Panther PRM, we
2169 		 * need to do a little more for recovery on those
2170 		 * CPUs after encountering a D$ or P$ parity error.
2171 		 *
2172 		 * As far as clearing P$ parity errors, it is enough to
2173 		 * simply invalidate all entries in the P$ since P$ parity
2174 		 * error traps are only generated for floating point load
2175 		 * hits.
2176 		 */
2177 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2178 			flush_icache();
2179 			flush_ipb();
2180 			flush_pcache();
2181 		}
2182 	}
2183 
2184 	/*
2185 	 * Invalidate entire D$ if it was enabled.
2186 	 * This is done to avoid stale data in the D$ which might
2187 	 * occur with the D$ disabled and the trap handler doing
2188 	 * stores affecting lines already in the D$.
2189 	 */
2190 	if (cache_boot_state & DCU_DC) {
2191 		flush_dcache();
2192 	}
2193 
2194 	/*
2195 	 * Restore caches to their bootup state.
2196 	 */
2197 	set_dcu(get_dcu() | cache_boot_state);
2198 
2199 	/*
2200 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2201 	 * be logged as part of the panic flow.
2202 	 */
2203 	if (aflt->flt_panic)
2204 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2205 
2206 	/*
2207 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2208 	 * the chance of getting an unrecoverable Fast ECC error.  This
2209 	 * flush will evict the part of the parity trap handler that is run
2210 	 * at TL>1.
2211 	 */
2212 	if (tl) {
2213 		cpu_flush_ecache();
2214 	}
2215 }
2216 
2217 /*
2218  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2219  * to indicate which portions of the captured data should be in the ereport.
2220  */
2221 void
2222 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2223 {
2224 	int way = ch_flt->parity_data.ipe.cpl_way;
2225 	int offset = ch_flt->parity_data.ipe.cpl_off;
2226 	int tag_index;
2227 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2228 
2229 
2230 	if ((offset != -1) || (way != -1)) {
2231 		/*
2232 		 * Parity error in I$ tag or data
2233 		 */
2234 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2235 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2236 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2237 			    PN_ICIDX_TO_WAY(tag_index);
2238 		else
2239 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2240 			    CH_ICIDX_TO_WAY(tag_index);
2241 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2242 		    IC_LOGFLAG_MAGIC;
2243 	} else {
2244 		/*
2245 		 * Parity error was not identified.
2246 		 * Log tags and data for all ways.
2247 		 */
2248 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2249 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2250 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2251 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2252 				    PN_ICIDX_TO_WAY(tag_index);
2253 			else
2254 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2255 				    CH_ICIDX_TO_WAY(tag_index);
2256 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2257 			    IC_LOGFLAG_MAGIC;
2258 		}
2259 	}
2260 }
2261 
2262 /*
2263  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2264  * to indicate which portions of the captured data should be in the ereport.
2265  */
2266 void
2267 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2268 {
2269 	int way = ch_flt->parity_data.dpe.cpl_way;
2270 	int offset = ch_flt->parity_data.dpe.cpl_off;
2271 	int tag_index;
2272 
2273 	if (offset != -1) {
2274 		/*
2275 		 * Parity error in D$ or P$ data array.
2276 		 *
2277 		 * First check to see whether the parity error is in D$ or P$
2278 		 * since P$ data parity errors are reported in Panther using
2279 		 * the same trap.
2280 		 */
2281 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2282 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2283 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2284 			    CH_PCIDX_TO_WAY(tag_index);
2285 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2286 			    PC_LOGFLAG_MAGIC;
2287 		} else {
2288 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2289 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2290 			    CH_DCIDX_TO_WAY(tag_index);
2291 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2292 			    DC_LOGFLAG_MAGIC;
2293 		}
2294 	} else if (way != -1) {
2295 		/*
2296 		 * Parity error in D$ tag.
2297 		 */
2298 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2299 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2300 		    CH_DCIDX_TO_WAY(tag_index);
2301 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2302 		    DC_LOGFLAG_MAGIC;
2303 	}
2304 }
2305 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2306 
2307 /*
2308  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2309  * post-process CPU events that are dequeued.  As such, it can be invoked
2310  * from softint context, from AST processing in the trap() flow, or from the
2311  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2312  * Historically this entry point was used to log the actual cmn_err(9F) text;
2313  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2314  * With FMA this function now also returns a flag which indicates to the
2315  * caller whether the ereport should be posted (1) or suppressed (0).
2316  */
2317 static int
2318 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2319 {
2320 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2321 	struct async_flt *aflt = (struct async_flt *)flt;
2322 	uint64_t errors;
2323 	extern void memscrub_induced_error(void);
2324 
2325 	switch (ch_flt->flt_type) {
2326 	case CPU_INV_AFSR:
2327 		/*
2328 		 * If it is a disrupting trap and the AFSR is zero, then
2329 		 * the event has probably already been noted. Do not post
2330 		 * an ereport.
2331 		 */
2332 		if ((aflt->flt_status & ECC_C_TRAP) &&
2333 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2334 			return (0);
2335 		else
2336 			return (1);
2337 	case CPU_TO:
2338 	case CPU_BERR:
2339 	case CPU_FATAL:
2340 	case CPU_FPUERR:
2341 		return (1);
2342 
2343 	case CPU_UE_ECACHE_RETIRE:
2344 		cpu_log_err(aflt);
2345 		cpu_page_retire(ch_flt);
2346 		return (1);
2347 
2348 	/*
2349 	 * Cases where we may want to suppress logging or perform
2350 	 * extended diagnostics.
2351 	 */
2352 	case CPU_CE:
2353 	case CPU_EMC:
2354 		/*
2355 		 * We want to skip logging and further classification
2356 		 * only if ALL the following conditions are true:
2357 		 *
2358 		 *	1. There is only one error
2359 		 *	2. That error is a correctable memory error
2360 		 *	3. The error is caused by the memory scrubber (in
2361 		 *	   which case the error will have occurred under
2362 		 *	   on_trap protection)
2363 		 *	4. The error is on a retired page
2364 		 *
2365 		 * Note: AFLT_PROT_EC is used places other than the memory
2366 		 * scrubber.  However, none of those errors should occur
2367 		 * on a retired page.
2368 		 */
2369 		if ((ch_flt->afsr_errs &
2370 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2371 		    aflt->flt_prot == AFLT_PROT_EC) {
2372 
2373 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2374 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2375 
2376 				/*
2377 				 * Since we're skipping logging, we'll need
2378 				 * to schedule the re-enabling of CEEN
2379 				 */
2380 				(void) timeout(cpu_delayed_check_ce_errors,
2381 				    (void *)(uintptr_t)aflt->flt_inst,
2382 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2383 						 * MICROSEC));
2384 			    }
2385 				/*
2386 				 * Inform memscrubber - scrubbing induced
2387 				 * CE on a retired page.
2388 				 */
2389 				memscrub_induced_error();
2390 				return (0);
2391 			}
2392 		}
2393 
2394 		/*
2395 		 * Perform/schedule further classification actions, but
2396 		 * only if the page is healthy (we don't want bad
2397 		 * pages inducing too much diagnostic activity).  If we could
2398 		 * not find a page pointer then we also skip this.  If
2399 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2400 		 * to copy and recirculate the event (for further diagnostics)
2401 		 * and we should not proceed to log it here.
2402 		 *
2403 		 * This must be the last step here before the cpu_log_err()
2404 		 * below - if an event recirculates cpu_ce_log_err() will
2405 		 * not call the current function but just proceed directly
2406 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2407 		 *
2408 		 * Note: Check cpu_impl_async_log_err if changing this
2409 		 */
2410 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2411 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2412 			    CE_XDIAG_SKIP_NOPP);
2413 		} else {
2414 			if (errors != PR_OK) {
2415 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2416 				    CE_XDIAG_SKIP_PAGEDET);
2417 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2418 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2419 				return (0);
2420 			}
2421 		}
2422 		/*FALLTHRU*/
2423 
2424 	/*
2425 	 * Cases where we just want to report the error and continue.
2426 	 */
2427 	case CPU_CE_ECACHE:
2428 	case CPU_UE_ECACHE:
2429 	case CPU_IV:
2430 	case CPU_ORPH:
2431 		cpu_log_err(aflt);
2432 		return (1);
2433 
2434 	/*
2435 	 * Cases where we want to fall through to handle panicking.
2436 	 */
2437 	case CPU_UE:
2438 		/*
2439 		 * We want to skip logging in the same conditions as the
2440 		 * CE case.  In addition, we want to make sure we're not
2441 		 * panicking.
2442 		 */
2443 		if (!panicstr && (ch_flt->afsr_errs &
2444 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2445 		    aflt->flt_prot == AFLT_PROT_EC) {
2446 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2447 				/* Zero the address to clear the error */
2448 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2449 				/*
2450 				 * Inform memscrubber - scrubbing induced
2451 				 * UE on a retired page.
2452 				 */
2453 				memscrub_induced_error();
2454 				return (0);
2455 			}
2456 		}
2457 		cpu_log_err(aflt);
2458 		break;
2459 
2460 	default:
2461 		/*
2462 		 * If the us3_common.c code doesn't know the flt_type, it may
2463 		 * be an implementation-specific code.  Call into the impldep
2464 		 * backend to find out what to do: if it tells us to continue,
2465 		 * break and handle as if falling through from a UE; if not,
2466 		 * the impldep backend has handled the error and we're done.
2467 		 */
2468 		switch (cpu_impl_async_log_err(flt, eqep)) {
2469 		case CH_ASYNC_LOG_DONE:
2470 			return (1);
2471 		case CH_ASYNC_LOG_RECIRC:
2472 			return (0);
2473 		case CH_ASYNC_LOG_CONTINUE:
2474 			break; /* continue on to handle UE-like error */
2475 		default:
2476 			cmn_err(CE_WARN, "discarding error 0x%p with "
2477 			    "invalid fault type (0x%x)",
2478 			    (void *)aflt, ch_flt->flt_type);
2479 			return (0);
2480 		}
2481 	}
2482 
2483 	/* ... fall through from the UE case */
2484 
2485 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2486 		if (!panicstr) {
2487 			cpu_page_retire(ch_flt);
2488 		} else {
2489 			/*
2490 			 * Clear UEs on panic so that we don't
2491 			 * get haunted by them during panic or
2492 			 * after reboot
2493 			 */
2494 			cpu_clearphys(aflt);
2495 			(void) clear_errors(NULL);
2496 		}
2497 	}
2498 
2499 	return (1);
2500 }
2501 
2502 /*
2503  * Retire the bad page that may contain the flushed error.
2504  */
2505 void
2506 cpu_page_retire(ch_async_flt_t *ch_flt)
2507 {
2508 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2509 	(void) page_retire(aflt->flt_addr, PR_UE);
2510 }
2511 
2512 /*
2513  * Return true if the error specified in the AFSR indicates
2514  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2515  * for Panther, none for Jalapeno/Serrano).
2516  */
2517 /* ARGSUSED */
2518 static int
2519 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2520 {
2521 #if defined(JALAPENO) || defined(SERRANO)
2522 	return (0);
2523 #elif defined(CHEETAH_PLUS)
2524 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2525 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2526 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2527 #else	/* CHEETAH_PLUS */
2528 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2529 #endif
2530 }
2531 
2532 /*
2533  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2534  * generic event post-processing for correctable and uncorrectable memory,
2535  * E$, and MTag errors.  Historically this entry point was used to log bits of
2536  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2537  * converted into an ereport.  In addition, it transmits the error to any
2538  * platform-specific service-processor FRU logging routines, if available.
2539  */
2540 void
2541 cpu_log_err(struct async_flt *aflt)
2542 {
2543 	char unum[UNUM_NAMLEN];
2544 	int synd_status, synd_code, afar_status;
2545 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2546 
2547 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2548 		aflt->flt_status |= ECC_ECACHE;
2549 	else
2550 		aflt->flt_status &= ~ECC_ECACHE;
2551 	/*
2552 	 * Determine syndrome status.
2553 	 */
2554 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2555 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2556 
2557 	/*
2558 	 * Determine afar status.
2559 	 */
2560 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2561 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2562 				ch_flt->flt_bit);
2563 	else
2564 		afar_status = AFLT_STAT_INVALID;
2565 
2566 	synd_code = synd_to_synd_code(synd_status,
2567 	    aflt->flt_synd, ch_flt->flt_bit);
2568 
2569 	/*
2570 	 * If afar status is not invalid do a unum lookup.
2571 	 */
2572 	if (afar_status != AFLT_STAT_INVALID) {
2573 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2574 	} else {
2575 		unum[0] = '\0';
2576 	}
2577 
2578 	/*
2579 	 * Do not send the fruid message (plat_ecc_error_data_t)
2580 	 * to the SC if it can handle the enhanced error information
2581 	 * (plat_ecc_error2_data_t) or when the tunable
2582 	 * ecc_log_fruid_enable is set to 0.
2583 	 */
2584 
2585 	if (&plat_ecc_capability_sc_get &&
2586 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2587 		if (&plat_log_fruid_error)
2588 			plat_log_fruid_error(synd_code, aflt, unum,
2589 			    ch_flt->flt_bit);
2590 	}
2591 
2592 	if (aflt->flt_func != NULL)
2593 		aflt->flt_func(aflt, unum);
2594 
2595 	if (afar_status != AFLT_STAT_INVALID)
2596 		cpu_log_diag_info(ch_flt);
2597 
2598 	/*
2599 	 * If we have a CEEN error , we do not reenable CEEN until after
2600 	 * we exit the trap handler. Otherwise, another error may
2601 	 * occur causing the handler to be entered recursively.
2602 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2603 	 * to try and ensure that the CPU makes progress in the face
2604 	 * of a CE storm.
2605 	 */
2606 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2607 		(void) timeout(cpu_delayed_check_ce_errors,
2608 		    (void *)(uintptr_t)aflt->flt_inst,
2609 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2610 	}
2611 }
2612 
2613 /*
2614  * Invoked by error_init() early in startup and therefore before
2615  * startup_errorq() is called to drain any error Q -
2616  *
2617  * startup()
2618  *   startup_end()
2619  *     error_init()
2620  *       cpu_error_init()
2621  * errorq_init()
2622  *   errorq_drain()
2623  * start_other_cpus()
2624  *
2625  * The purpose of this routine is to create error-related taskqs.  Taskqs
2626  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2627  * context.
2628  */
2629 void
2630 cpu_error_init(int items)
2631 {
2632 	/*
2633 	 * Create taskq(s) to reenable CE
2634 	 */
2635 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2636 	    items, items, TASKQ_PREPOPULATE);
2637 }
2638 
2639 void
2640 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2641 {
2642 	char unum[UNUM_NAMLEN];
2643 	int len;
2644 
2645 	switch (aflt->flt_class) {
2646 	case CPU_FAULT:
2647 		cpu_ereport_init(aflt);
2648 		if (cpu_async_log_err(aflt, eqep))
2649 			cpu_ereport_post(aflt);
2650 		break;
2651 
2652 	case BUS_FAULT:
2653 		if (aflt->flt_func != NULL) {
2654 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2655 			    unum, UNUM_NAMLEN, &len);
2656 			aflt->flt_func(aflt, unum);
2657 		}
2658 		break;
2659 
2660 	case RECIRC_CPU_FAULT:
2661 		aflt->flt_class = CPU_FAULT;
2662 		cpu_log_err(aflt);
2663 		cpu_ereport_post(aflt);
2664 		break;
2665 
2666 	case RECIRC_BUS_FAULT:
2667 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2668 		/*FALLTHRU*/
2669 	default:
2670 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2671 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2672 		return;
2673 	}
2674 }
2675 
2676 /*
2677  * Scrub and classify a CE.  This function must not modify the
2678  * fault structure passed to it but instead should return the classification
2679  * information.
2680  */
2681 
2682 static uchar_t
2683 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2684 {
2685 	uchar_t disp = CE_XDIAG_EXTALG;
2686 	on_trap_data_t otd;
2687 	uint64_t orig_err;
2688 	ch_cpu_logout_t *clop;
2689 
2690 	/*
2691 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2692 	 * this, but our other callers have not.  Disable preemption to
2693 	 * avoid CPU migration so that we restore CEEN on the correct
2694 	 * cpu later.
2695 	 *
2696 	 * CEEN is cleared so that further CEs that our instruction and
2697 	 * data footprint induce do not cause use to either creep down
2698 	 * kernel stack to the point of overflow, or do so much CE
2699 	 * notification as to make little real forward progress.
2700 	 *
2701 	 * NCEEN must not be cleared.  However it is possible that
2702 	 * our accesses to the flt_addr may provoke a bus error or timeout
2703 	 * if the offending address has just been unconfigured as part of
2704 	 * a DR action.  So we must operate under on_trap protection.
2705 	 */
2706 	kpreempt_disable();
2707 	orig_err = get_error_enable();
2708 	if (orig_err & EN_REG_CEEN)
2709 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2710 
2711 	/*
2712 	 * Our classification algorithm includes the line state before
2713 	 * the scrub; we'd like this captured after the detection and
2714 	 * before the algorithm below - the earlier the better.
2715 	 *
2716 	 * If we've come from a cpu CE trap then this info already exists
2717 	 * in the cpu logout area.
2718 	 *
2719 	 * For a CE detected by memscrub for which there was no trap
2720 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2721 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2722 	 * marked the fault structure as incomplete as a flag to later
2723 	 * logging code.
2724 	 *
2725 	 * If called directly from an IO detected CE there has been
2726 	 * no line data capture.  In this case we logout to the cpu logout
2727 	 * area - that's appropriate since it's the cpu cache data we need
2728 	 * for classification.  We thus borrow the cpu logout area for a
2729 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2730 	 * this time (we will invalidate it again below).
2731 	 *
2732 	 * If called from the partner check xcall handler then this cpu
2733 	 * (the partner) has not necessarily experienced a CE at this
2734 	 * address.  But we want to capture line state before its scrub
2735 	 * attempt since we use that in our classification.
2736 	 */
2737 	if (logout_tried == B_FALSE) {
2738 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2739 			disp |= CE_XDIAG_NOLOGOUT;
2740 	}
2741 
2742 	/*
2743 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2744 	 * no longer be valid (if DR'd since the initial event) so we
2745 	 * perform this scrub under on_trap protection.  If this access is
2746 	 * ok then further accesses below will also be ok - DR cannot
2747 	 * proceed while this thread is active (preemption is disabled);
2748 	 * to be safe we'll nonetheless use on_trap again below.
2749 	 */
2750 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2751 		cpu_scrubphys(ecc);
2752 	} else {
2753 		no_trap();
2754 		if (orig_err & EN_REG_CEEN)
2755 		    set_error_enable(orig_err);
2756 		kpreempt_enable();
2757 		return (disp);
2758 	}
2759 	no_trap();
2760 
2761 	/*
2762 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2763 	 * Note that it's quite possible that the read sourced the data from
2764 	 * another cpu.
2765 	 */
2766 	if (clear_ecc(ecc))
2767 		disp |= CE_XDIAG_CE1;
2768 
2769 	/*
2770 	 * Read the data again.  This time the read is very likely to
2771 	 * come from memory since the scrub induced a writeback to memory.
2772 	 */
2773 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2774 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2775 	} else {
2776 		no_trap();
2777 		if (orig_err & EN_REG_CEEN)
2778 		    set_error_enable(orig_err);
2779 		kpreempt_enable();
2780 		return (disp);
2781 	}
2782 	no_trap();
2783 
2784 	/* Did that read induce a CE that matches the AFAR? */
2785 	if (clear_ecc(ecc))
2786 		disp |= CE_XDIAG_CE2;
2787 
2788 	/*
2789 	 * Look at the logout information and record whether we found the
2790 	 * line in l2/l3 cache.  For Panther we are interested in whether
2791 	 * we found it in either cache (it won't reside in both but
2792 	 * it is possible to read it that way given the moving target).
2793 	 */
2794 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2795 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2796 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2797 		int hit, level;
2798 		int state;
2799 		int totalsize;
2800 		ch_ec_data_t *ecp;
2801 
2802 		/*
2803 		 * If hit is nonzero then a match was found and hit will
2804 		 * be one greater than the index which hit.  For Panther we
2805 		 * also need to pay attention to level to see which of l2$ or
2806 		 * l3$ it hit in.
2807 		 */
2808 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2809 		    0, &level);
2810 
2811 		if (hit) {
2812 			--hit;
2813 			disp |= CE_XDIAG_AFARMATCH;
2814 
2815 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2816 				if (level == 2)
2817 					ecp = &clop->clo_data.chd_l2_data[hit];
2818 				else
2819 					ecp = &clop->clo_data.chd_ec_data[hit];
2820 			} else {
2821 				ASSERT(level == 2);
2822 				ecp = &clop->clo_data.chd_ec_data[hit];
2823 			}
2824 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2825 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2826 			    ecc->flt_addr, ecp->ec_tag);
2827 
2828 			/*
2829 			 * Cheetah variants use different state encodings -
2830 			 * the CH_ECSTATE_* defines vary depending on the
2831 			 * module we're compiled for.  Translate into our
2832 			 * one true version.  Conflate Owner-Shared state
2833 			 * of SSM mode with Owner as victimisation of such
2834 			 * lines may cause a writeback.
2835 			 */
2836 			switch (state) {
2837 			case CH_ECSTATE_MOD:
2838 				disp |= EC_STATE_M;
2839 				break;
2840 
2841 			case CH_ECSTATE_OWN:
2842 			case CH_ECSTATE_OWS:
2843 				disp |= EC_STATE_O;
2844 				break;
2845 
2846 			case CH_ECSTATE_EXL:
2847 				disp |= EC_STATE_E;
2848 				break;
2849 
2850 			case CH_ECSTATE_SHR:
2851 				disp |= EC_STATE_S;
2852 				break;
2853 
2854 			default:
2855 				disp |= EC_STATE_I;
2856 				break;
2857 			}
2858 		}
2859 
2860 		/*
2861 		 * If we initiated the delayed logout then we are responsible
2862 		 * for invalidating the logout area.
2863 		 */
2864 		if (logout_tried == B_FALSE) {
2865 			bzero(clop, sizeof (ch_cpu_logout_t));
2866 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2867 		}
2868 	}
2869 
2870 	/*
2871 	 * Re-enable CEEN if we turned it off.
2872 	 */
2873 	if (orig_err & EN_REG_CEEN)
2874 	    set_error_enable(orig_err);
2875 	kpreempt_enable();
2876 
2877 	return (disp);
2878 }
2879 
2880 /*
2881  * Scrub a correctable memory error and collect data for classification
2882  * of CE type.  This function is called in the detection path, ie tl0 handling
2883  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2884  */
2885 void
2886 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2887 {
2888 	/*
2889 	 * Cheetah CE classification does not set any bits in flt_status.
2890 	 * Instead we will record classification datapoints in flt_disp.
2891 	 */
2892 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2893 
2894 	/*
2895 	 * To check if the error detected by IO is persistent, sticky or
2896 	 * intermittent.  This is noticed by clear_ecc().
2897 	 */
2898 	if (ecc->flt_status & ECC_IOBUS)
2899 		ecc->flt_stat = C_AFSR_MEMORY;
2900 
2901 	/*
2902 	 * Record information from this first part of the algorithm in
2903 	 * flt_disp.
2904 	 */
2905 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2906 }
2907 
2908 /*
2909  * Select a partner to perform a further CE classification check from.
2910  * Must be called with kernel preemption disabled (to stop the cpu list
2911  * from changing).  The detecting cpu we are partnering has cpuid
2912  * aflt->flt_inst; we might not be running on the detecting cpu.
2913  *
2914  * Restrict choice to active cpus in the same cpu partition as ourselves in
2915  * an effort to stop bad cpus in one partition causing other partitions to
2916  * perform excessive diagnostic activity.  Actually since the errorq drain
2917  * is run from a softint most of the time and that is a global mechanism
2918  * this isolation is only partial.  Return NULL if we fail to find a
2919  * suitable partner.
2920  *
2921  * We prefer a partner that is in a different latency group to ourselves as
2922  * we will share fewer datapaths.  If such a partner is unavailable then
2923  * choose one in the same lgroup but prefer a different chip and only allow
2924  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2925  * flags includes PTNR_SELFOK then permit selection of the original detector.
2926  *
2927  * We keep a cache of the last partner selected for a cpu, and we'll try to
2928  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2929  * have passed since that selection was made.  This provides the benefit
2930  * of the point-of-view of different partners over time but without
2931  * requiring frequent cpu list traversals.
2932  */
2933 
2934 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2935 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2936 
2937 static cpu_t *
2938 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2939 {
2940 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2941 	hrtime_t lasttime, thistime;
2942 
2943 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2944 
2945 	dtcr = cpu[aflt->flt_inst];
2946 
2947 	/*
2948 	 * Short-circuit for the following cases:
2949 	 *	. the dtcr is not flagged active
2950 	 *	. there is just one cpu present
2951 	 *	. the detector has disappeared
2952 	 *	. we were given a bad flt_inst cpuid; this should not happen
2953 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2954 	 *	  reason to panic.
2955 	 *	. there is just one cpu left online in the cpu partition
2956 	 *
2957 	 * If we return NULL after this point then we do not update the
2958 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2959 	 * again next time; this is the case where the only other cpu online
2960 	 * in the detector's partition is on the same chip as the detector
2961 	 * and since CEEN re-enable is throttled even that case should not
2962 	 * hurt performance.
2963 	 */
2964 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2965 		return (NULL);
2966 	}
2967 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2968 		if (flags & PTNR_SELFOK) {
2969 			*typep = CE_XDIAG_PTNR_SELF;
2970 			return (dtcr);
2971 		} else {
2972 			return (NULL);
2973 		}
2974 	}
2975 
2976 	thistime = gethrtime();
2977 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2978 
2979 	/*
2980 	 * Select a starting point.
2981 	 */
2982 	if (!lasttime) {
2983 		/*
2984 		 * We've never selected a partner for this detector before.
2985 		 * Start the scan at the next online cpu in the same cpu
2986 		 * partition.
2987 		 */
2988 		sp = dtcr->cpu_next_part;
2989 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2990 		/*
2991 		 * Our last selection has not aged yet.  If this partner:
2992 		 *	. is still a valid cpu,
2993 		 *	. is still in the same partition as the detector
2994 		 *	. is still marked active
2995 		 *	. satisfies the 'flags' argument criteria
2996 		 * then select it again without updating the timestamp.
2997 		 */
2998 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2999 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3000 		    !cpu_flagged_active(sp->cpu_flags) ||
3001 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3002 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3003 		    !(flags & PTNR_SIBLINGOK))) {
3004 			sp = dtcr->cpu_next_part;
3005 		} else {
3006 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3007 				*typep = CE_XDIAG_PTNR_REMOTE;
3008 			} else if (sp == dtcr) {
3009 				*typep = CE_XDIAG_PTNR_SELF;
3010 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3011 				*typep = CE_XDIAG_PTNR_SIBLING;
3012 			} else {
3013 				*typep = CE_XDIAG_PTNR_LOCAL;
3014 			}
3015 			return (sp);
3016 		}
3017 	} else {
3018 		/*
3019 		 * Our last selection has aged.  If it is nonetheless still a
3020 		 * valid cpu then start the scan at the next cpu in the
3021 		 * partition after our last partner.  If the last selection
3022 		 * is no longer a valid cpu then go with our default.  In
3023 		 * this way we slowly cycle through possible partners to
3024 		 * obtain multiple viewpoints over time.
3025 		 */
3026 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3027 		if (sp == NULL) {
3028 			sp = dtcr->cpu_next_part;
3029 		} else {
3030 			sp = sp->cpu_next_part;		/* may be dtcr */
3031 			if (sp->cpu_part != dtcr->cpu_part)
3032 				sp = dtcr;
3033 		}
3034 	}
3035 
3036 	/*
3037 	 * We have a proposed starting point for our search, but if this
3038 	 * cpu is offline then its cpu_next_part will point to itself
3039 	 * so we can't use that to iterate over cpus in this partition in
3040 	 * the loop below.  We still want to avoid iterating over cpus not
3041 	 * in our partition, so in the case that our starting point is offline
3042 	 * we will repoint it to be the detector itself;  and if the detector
3043 	 * happens to be offline we'll return NULL from the following loop.
3044 	 */
3045 	if (!cpu_flagged_active(sp->cpu_flags)) {
3046 		sp = dtcr;
3047 	}
3048 
3049 	ptnr = sp;
3050 	locptnr = NULL;
3051 	sibptnr = NULL;
3052 	do {
3053 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3054 			continue;
3055 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3056 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3057 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3058 			*typep = CE_XDIAG_PTNR_REMOTE;
3059 			return (ptnr);
3060 		}
3061 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3062 			if (sibptnr == NULL)
3063 				sibptnr = ptnr;
3064 			continue;
3065 		}
3066 		if (locptnr == NULL)
3067 			locptnr = ptnr;
3068 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3069 
3070 	/*
3071 	 * A foreign partner has already been returned if one was available.
3072 	 *
3073 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3074 	 * detector, is active, and is not a sibling of the detector.
3075 	 *
3076 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3077 	 * active.
3078 	 *
3079 	 * If we have to resort to using the detector itself we have already
3080 	 * checked that it is active.
3081 	 */
3082 	if (locptnr) {
3083 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3084 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3085 		*typep = CE_XDIAG_PTNR_LOCAL;
3086 		return (locptnr);
3087 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3088 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3089 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3090 		*typep = CE_XDIAG_PTNR_SIBLING;
3091 		return (sibptnr);
3092 	} else if (flags & PTNR_SELFOK) {
3093 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3094 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3095 		*typep = CE_XDIAG_PTNR_SELF;
3096 		return (dtcr);
3097 	}
3098 
3099 	return (NULL);
3100 }
3101 
3102 /*
3103  * Cross call handler that is requested to run on the designated partner of
3104  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3105  */
3106 static void
3107 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3108 {
3109 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3110 }
3111 
3112 /*
3113  * The associated errorqs are never destroyed so we do not need to deal with
3114  * them disappearing before this timeout fires.  If the affected memory
3115  * has been DR'd out since the original event the scrub algrithm will catch
3116  * any errors and return null disposition info.  If the original detecting
3117  * cpu has been DR'd out then ereport detector info will not be able to
3118  * lookup CPU type;  with a small timeout this is unlikely.
3119  */
3120 static void
3121 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3122 {
3123 	struct async_flt *aflt = cbarg->lkycb_aflt;
3124 	uchar_t disp;
3125 	cpu_t *cp;
3126 	int ptnrtype;
3127 
3128 	kpreempt_disable();
3129 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3130 	    &ptnrtype)) {
3131 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3132 		    (uint64_t)&disp);
3133 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3134 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3135 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3136 	} else {
3137 		ce_xdiag_lkydrops++;
3138 		if (ncpus > 1)
3139 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3140 			    CE_XDIAG_SKIP_NOPTNR);
3141 	}
3142 	kpreempt_enable();
3143 
3144 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3145 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3146 }
3147 
3148 /*
3149  * Called from errorq drain code when processing a CE error, both from
3150  * CPU and PCI drain functions.  Decide what further classification actions,
3151  * if any, we will perform.  Perform immediate actions now, and schedule
3152  * delayed actions as required.  Note that we are no longer necessarily running
3153  * on the detecting cpu, and that the async_flt structure will not persist on
3154  * return from this function.
3155  *
3156  * Calls to this function should aim to be self-throtlling in some way.  With
3157  * the delayed re-enable of CEEN the absolute rate of calls should not
3158  * be excessive.  Callers should also avoid performing in-depth classification
3159  * for events in pages that are already known to be suspect.
3160  *
3161  * We return nonzero to indicate that the event has been copied and
3162  * recirculated for further testing.  The caller should not log the event
3163  * in this case - it will be logged when further test results are available.
3164  *
3165  * Our possible contexts are that of errorq_drain: below lock level or from
3166  * panic context.  We can assume that the cpu we are running on is online.
3167  */
3168 
3169 
3170 #ifdef DEBUG
3171 static int ce_xdiag_forceaction;
3172 #endif
3173 
3174 int
3175 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3176     errorq_elem_t *eqep, size_t afltoffset)
3177 {
3178 	ce_dispact_t dispact, action;
3179 	cpu_t *cp;
3180 	uchar_t dtcrinfo, disp;
3181 	int ptnrtype;
3182 
3183 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3184 		ce_xdiag_drops++;
3185 		return (0);
3186 	} else if (!aflt->flt_in_memory) {
3187 		ce_xdiag_drops++;
3188 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3189 		return (0);
3190 	}
3191 
3192 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3193 
3194 	/*
3195 	 * Some correctable events are not scrubbed/classified, such as those
3196 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3197 	 * initial detector classification go no further.
3198 	 */
3199 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3200 		ce_xdiag_drops++;
3201 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3202 		return (0);
3203 	}
3204 
3205 	dispact = CE_DISPACT(ce_disp_table,
3206 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3207 	    CE_XDIAG_STATE(dtcrinfo),
3208 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3209 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3210 
3211 
3212 	action = CE_ACT(dispact);	/* bad lookup caught below */
3213 #ifdef DEBUG
3214 	if (ce_xdiag_forceaction != 0)
3215 		action = ce_xdiag_forceaction;
3216 #endif
3217 
3218 	switch (action) {
3219 	case CE_ACT_LKYCHK: {
3220 		caddr_t ndata;
3221 		errorq_elem_t *neqep;
3222 		struct async_flt *ecc;
3223 		ce_lkychk_cb_t *cbargp;
3224 
3225 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3226 			ce_xdiag_lkydrops++;
3227 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3228 			    CE_XDIAG_SKIP_DUPFAIL);
3229 			break;
3230 		}
3231 		ecc = (struct async_flt *)(ndata + afltoffset);
3232 
3233 		ASSERT(ecc->flt_class == CPU_FAULT ||
3234 		    ecc->flt_class == BUS_FAULT);
3235 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3236 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3237 
3238 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3239 		cbargp->lkycb_aflt = ecc;
3240 		cbargp->lkycb_eqp = eqp;
3241 		cbargp->lkycb_eqep = neqep;
3242 
3243 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3244 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3245 		return (1);
3246 	}
3247 
3248 	case CE_ACT_PTNRCHK:
3249 		kpreempt_disable();	/* stop cpu list changing */
3250 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3251 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3252 			    (uint64_t)aflt, (uint64_t)&disp);
3253 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3254 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3255 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3256 		} else if (ncpus > 1) {
3257 			ce_xdiag_ptnrdrops++;
3258 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3259 			    CE_XDIAG_SKIP_NOPTNR);
3260 		} else {
3261 			ce_xdiag_ptnrdrops++;
3262 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3263 			    CE_XDIAG_SKIP_UNIPROC);
3264 		}
3265 		kpreempt_enable();
3266 		break;
3267 
3268 	case CE_ACT_DONE:
3269 		break;
3270 
3271 	case CE_ACT(CE_DISP_BAD):
3272 	default:
3273 #ifdef DEBUG
3274 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3275 #endif
3276 		ce_xdiag_bad++;
3277 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3278 		break;
3279 	}
3280 
3281 	return (0);
3282 }
3283 
3284 /*
3285  * We route all errors through a single switch statement.
3286  */
3287 void
3288 cpu_ue_log_err(struct async_flt *aflt)
3289 {
3290 	switch (aflt->flt_class) {
3291 	case CPU_FAULT:
3292 		cpu_ereport_init(aflt);
3293 		if (cpu_async_log_err(aflt, NULL))
3294 			cpu_ereport_post(aflt);
3295 		break;
3296 
3297 	case BUS_FAULT:
3298 		bus_async_log_err(aflt);
3299 		break;
3300 
3301 	default:
3302 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3303 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3304 		return;
3305 	}
3306 }
3307 
3308 /*
3309  * Routine for panic hook callback from panic_idle().
3310  */
3311 void
3312 cpu_async_panic_callb(void)
3313 {
3314 	ch_async_flt_t ch_flt;
3315 	struct async_flt *aflt;
3316 	ch_cpu_errors_t cpu_error_regs;
3317 	uint64_t afsr_errs;
3318 
3319 	get_cpu_error_state(&cpu_error_regs);
3320 
3321 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3322 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3323 
3324 	if (afsr_errs) {
3325 
3326 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3327 		aflt = (struct async_flt *)&ch_flt;
3328 		aflt->flt_id = gethrtime_waitfree();
3329 		aflt->flt_bus_id = getprocessorid();
3330 		aflt->flt_inst = CPU->cpu_id;
3331 		aflt->flt_stat = cpu_error_regs.afsr;
3332 		aflt->flt_addr = cpu_error_regs.afar;
3333 		aflt->flt_prot = AFLT_PROT_NONE;
3334 		aflt->flt_class = CPU_FAULT;
3335 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3336 		aflt->flt_panic = 1;
3337 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3338 		ch_flt.afsr_errs = afsr_errs;
3339 #if defined(SERRANO)
3340 		ch_flt.afar2 = cpu_error_regs.afar2;
3341 #endif	/* SERRANO */
3342 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3343 	}
3344 }
3345 
3346 /*
3347  * Routine to convert a syndrome into a syndrome code.
3348  */
3349 static int
3350 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3351 {
3352 	if (synd_status == AFLT_STAT_INVALID)
3353 		return (-1);
3354 
3355 	/*
3356 	 * Use the syndrome to index the appropriate syndrome table,
3357 	 * to get the code indicating which bit(s) is(are) bad.
3358 	 */
3359 	if (afsr_bit &
3360 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3361 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3362 #if defined(JALAPENO) || defined(SERRANO)
3363 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3364 				return (-1);
3365 			else
3366 				return (BPAR0 + synd);
3367 #else /* JALAPENO || SERRANO */
3368 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3369 				return (-1);
3370 			else
3371 				return (mtag_syndrome_tab[synd]);
3372 #endif /* JALAPENO || SERRANO */
3373 		} else {
3374 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3375 				return (-1);
3376 			else
3377 				return (ecc_syndrome_tab[synd]);
3378 		}
3379 	} else {
3380 		return (-1);
3381 	}
3382 }
3383 
3384 int
3385 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3386 {
3387 	if (&plat_get_mem_sid)
3388 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3389 	else
3390 		return (ENOTSUP);
3391 }
3392 
3393 int
3394 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3395 {
3396 	if (&plat_get_mem_offset)
3397 		return (plat_get_mem_offset(flt_addr, offp));
3398 	else
3399 		return (ENOTSUP);
3400 }
3401 
3402 int
3403 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3404 {
3405 	if (&plat_get_mem_addr)
3406 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3407 	else
3408 		return (ENOTSUP);
3409 }
3410 
3411 /*
3412  * Routine to return a string identifying the physical name
3413  * associated with a memory/cache error.
3414  */
3415 int
3416 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3417     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3418     ushort_t flt_status, char *buf, int buflen, int *lenp)
3419 {
3420 	int synd_code;
3421 	int ret;
3422 
3423 	/*
3424 	 * An AFSR of -1 defaults to a memory syndrome.
3425 	 */
3426 	if (flt_stat == (uint64_t)-1)
3427 		flt_stat = C_AFSR_CE;
3428 
3429 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3430 
3431 	/*
3432 	 * Syndrome code must be either a single-bit error code
3433 	 * (0...143) or -1 for unum lookup.
3434 	 */
3435 	if (synd_code < 0 || synd_code >= M2)
3436 		synd_code = -1;
3437 	if (&plat_get_mem_unum) {
3438 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3439 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3440 			buf[0] = '\0';
3441 			*lenp = 0;
3442 		}
3443 
3444 		return (ret);
3445 	}
3446 
3447 	return (ENOTSUP);
3448 }
3449 
3450 /*
3451  * Wrapper for cpu_get_mem_unum() routine that takes an
3452  * async_flt struct rather than explicit arguments.
3453  */
3454 int
3455 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3456     char *buf, int buflen, int *lenp)
3457 {
3458 	/*
3459 	 * If we come thru here for an IO bus error aflt->flt_stat will
3460 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3461 	 * so it will interpret this as a memory error.
3462 	 */
3463 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3464 	    (aflt->flt_class == BUS_FAULT) ?
3465 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3466 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3467 	    aflt->flt_status, buf, buflen, lenp));
3468 }
3469 
3470 /*
3471  * Return unum string given synd_code and async_flt into
3472  * the buf with size UNUM_NAMLEN
3473  */
3474 static int
3475 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3476 {
3477 	int ret, len;
3478 
3479 	/*
3480 	 * Syndrome code must be either a single-bit error code
3481 	 * (0...143) or -1 for unum lookup.
3482 	 */
3483 	if (synd_code < 0 || synd_code >= M2)
3484 		synd_code = -1;
3485 	if (&plat_get_mem_unum) {
3486 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3487 		    aflt->flt_bus_id, aflt->flt_in_memory,
3488 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3489 			buf[0] = '\0';
3490 		}
3491 		return (ret);
3492 	}
3493 
3494 	buf[0] = '\0';
3495 	return (ENOTSUP);
3496 }
3497 
3498 /*
3499  * This routine is a more generic interface to cpu_get_mem_unum()
3500  * that may be used by other modules (e.g. the 'mm' driver, through
3501  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3502  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3503  */
3504 int
3505 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3506     char *buf, int buflen, int *lenp)
3507 {
3508 	int synd_status, flt_in_memory, ret;
3509 	ushort_t flt_status = 0;
3510 	char unum[UNUM_NAMLEN];
3511 	uint64_t t_afsr_errs;
3512 
3513 	/*
3514 	 * Check for an invalid address.
3515 	 */
3516 	if (afar == (uint64_t)-1)
3517 		return (ENXIO);
3518 
3519 	if (synd == (uint64_t)-1)
3520 		synd_status = AFLT_STAT_INVALID;
3521 	else
3522 		synd_status = AFLT_STAT_VALID;
3523 
3524 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3525 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3526 
3527 	/*
3528 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3529 	 */
3530 	if (*afsr == (uint64_t)-1)
3531 		t_afsr_errs = C_AFSR_CE;
3532 	else {
3533 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3534 #if defined(CHEETAH_PLUS)
3535 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3536 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3537 #endif	/* CHEETAH_PLUS */
3538 	}
3539 
3540 	/*
3541 	 * Turn on ECC_ECACHE if error type is E$ Data.
3542 	 */
3543 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3544 		flt_status |= ECC_ECACHE;
3545 
3546 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3547 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3548 	if (ret != 0)
3549 		return (ret);
3550 
3551 	if (*lenp >= buflen)
3552 		return (ENAMETOOLONG);
3553 
3554 	(void) strncpy(buf, unum, buflen);
3555 
3556 	return (0);
3557 }
3558 
3559 /*
3560  * Routine to return memory information associated
3561  * with a physical address and syndrome.
3562  */
3563 int
3564 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3565     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3566     int *segsp, int *banksp, int *mcidp)
3567 {
3568 	int synd_status, synd_code;
3569 
3570 	if (afar == (uint64_t)-1)
3571 		return (ENXIO);
3572 
3573 	if (synd == (uint64_t)-1)
3574 		synd_status = AFLT_STAT_INVALID;
3575 	else
3576 		synd_status = AFLT_STAT_VALID;
3577 
3578 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3579 
3580 	if (p2get_mem_info != NULL)
3581 		return ((p2get_mem_info)(synd_code, afar,
3582 			mem_sizep, seg_sizep, bank_sizep,
3583 			segsp, banksp, mcidp));
3584 	else
3585 		return (ENOTSUP);
3586 }
3587 
3588 /*
3589  * Routine to return a string identifying the physical
3590  * name associated with a cpuid.
3591  */
3592 int
3593 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3594 {
3595 	int ret;
3596 	char unum[UNUM_NAMLEN];
3597 
3598 	if (&plat_get_cpu_unum) {
3599 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3600 		    != 0)
3601 			return (ret);
3602 	} else {
3603 		return (ENOTSUP);
3604 	}
3605 
3606 	if (*lenp >= buflen)
3607 		return (ENAMETOOLONG);
3608 
3609 	(void) strncpy(buf, unum, buflen);
3610 
3611 	return (0);
3612 }
3613 
3614 /*
3615  * This routine exports the name buffer size.
3616  */
3617 size_t
3618 cpu_get_name_bufsize()
3619 {
3620 	return (UNUM_NAMLEN);
3621 }
3622 
3623 /*
3624  * Historical function, apparantly not used.
3625  */
3626 /* ARGSUSED */
3627 void
3628 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3629 {}
3630 
3631 /*
3632  * Historical function only called for SBus errors in debugging.
3633  */
3634 /*ARGSUSED*/
3635 void
3636 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3637 {}
3638 
3639 /*
3640  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3641  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3642  * an async fault structure argument is passed in, the captured error state
3643  * (AFSR, AFAR) info will be returned in the structure.
3644  */
3645 int
3646 clear_errors(ch_async_flt_t *ch_flt)
3647 {
3648 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3649 	ch_cpu_errors_t	cpu_error_regs;
3650 
3651 	get_cpu_error_state(&cpu_error_regs);
3652 
3653 	if (ch_flt != NULL) {
3654 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3655 		aflt->flt_addr = cpu_error_regs.afar;
3656 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3657 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3658 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3659 #if defined(SERRANO)
3660 		ch_flt->afar2 = cpu_error_regs.afar2;
3661 #endif	/* SERRANO */
3662 	}
3663 
3664 	set_cpu_error_state(&cpu_error_regs);
3665 
3666 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3667 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3668 }
3669 
3670 /*
3671  * Clear any AFSR error bits, and check for persistence.
3672  *
3673  * It would be desirable to also insist that syndrome match.  PCI handling
3674  * has already filled flt_synd.  For errors trapped by CPU we only fill
3675  * flt_synd when we queue the event, so we do not have a valid flt_synd
3676  * during initial classification (it is valid if we're called as part of
3677  * subsequent low-pil additional classification attempts).  We could try
3678  * to determine which syndrome to use: we know we're only called for
3679  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3680  * would be esynd/none and esynd/msynd, respectively.  If that is
3681  * implemented then what do we do in the case that we do experience an
3682  * error on the same afar but with different syndrome?  At the very least
3683  * we should count such occurences.  Anyway, for now, we'll leave it as
3684  * it has been for ages.
3685  */
3686 static int
3687 clear_ecc(struct async_flt *aflt)
3688 {
3689 	ch_cpu_errors_t	cpu_error_regs;
3690 
3691 	/*
3692 	 * Snapshot the AFSR and AFAR and clear any errors
3693 	 */
3694 	get_cpu_error_state(&cpu_error_regs);
3695 	set_cpu_error_state(&cpu_error_regs);
3696 
3697 	/*
3698 	 * If any of the same memory access error bits are still on and
3699 	 * the AFAR matches, return that the error is persistent.
3700 	 */
3701 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3702 	    cpu_error_regs.afar == aflt->flt_addr);
3703 }
3704 
3705 /*
3706  * Turn off all cpu error detection, normally only used for panics.
3707  */
3708 void
3709 cpu_disable_errors(void)
3710 {
3711 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3712 
3713 	/*
3714 	 * With error detection now turned off, check the other cpus
3715 	 * logout areas for any unlogged errors.
3716 	 */
3717 	if (enable_check_other_cpus_logout) {
3718 		cpu_check_other_cpus_logout();
3719 		/*
3720 		 * Make a second pass over the logout areas, in case
3721 		 * there is a failing CPU in an error-trap loop which
3722 		 * will write to the logout area once it is emptied.
3723 		 */
3724 		cpu_check_other_cpus_logout();
3725 	}
3726 }
3727 
3728 /*
3729  * Enable errors.
3730  */
3731 void
3732 cpu_enable_errors(void)
3733 {
3734 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3735 }
3736 
3737 /*
3738  * Flush the entire ecache using displacement flush by reading through a
3739  * physical address range twice as large as the Ecache.
3740  */
3741 void
3742 cpu_flush_ecache(void)
3743 {
3744 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3745 	    cpunodes[CPU->cpu_id].ecache_linesize);
3746 }
3747 
3748 /*
3749  * Return CPU E$ set size - E$ size divided by the associativity.
3750  * We use this function in places where the CPU_PRIVATE ptr may not be
3751  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3752  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3753  * up before the kernel switches from OBP's to the kernel's trap table, so
3754  * we don't have to worry about cpunodes being unitialized.
3755  */
3756 int
3757 cpu_ecache_set_size(struct cpu *cp)
3758 {
3759 	if (CPU_PRIVATE(cp))
3760 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3761 
3762 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3763 }
3764 
3765 /*
3766  * Flush Ecache line.
3767  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3768  * Uses normal displacement flush for Cheetah.
3769  */
3770 static void
3771 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3772 {
3773 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3774 	int ec_set_size = cpu_ecache_set_size(CPU);
3775 
3776 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3777 }
3778 
3779 /*
3780  * Scrub physical address.
3781  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3782  * Ecache or direct-mapped Ecache.
3783  */
3784 static void
3785 cpu_scrubphys(struct async_flt *aflt)
3786 {
3787 	int ec_set_size = cpu_ecache_set_size(CPU);
3788 
3789 	scrubphys(aflt->flt_addr, ec_set_size);
3790 }
3791 
3792 /*
3793  * Clear physical address.
3794  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3795  * Ecache or direct-mapped Ecache.
3796  */
3797 void
3798 cpu_clearphys(struct async_flt *aflt)
3799 {
3800 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3801 	int ec_set_size = cpu_ecache_set_size(CPU);
3802 
3803 
3804 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3805 }
3806 
3807 #if defined(CPU_IMP_ECACHE_ASSOC)
3808 /*
3809  * Check for a matching valid line in all the sets.
3810  * If found, return set# + 1. Otherwise return 0.
3811  */
3812 static int
3813 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3814 {
3815 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3816 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3817 	int ec_set_size = cpu_ecache_set_size(CPU);
3818 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3819 	int nway = cpu_ecache_nway();
3820 	int i;
3821 
3822 	for (i = 0; i < nway; i++, ecp++) {
3823 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3824 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3825 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3826 			return (i+1);
3827 	}
3828 	return (0);
3829 }
3830 #endif /* CPU_IMP_ECACHE_ASSOC */
3831 
3832 /*
3833  * Check whether a line in the given logout info matches the specified
3834  * fault address.  If reqval is set then the line must not be Invalid.
3835  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3836  * set to 2 for l2$ or 3 for l3$.
3837  */
3838 static int
3839 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3840 {
3841 	ch_diag_data_t *cdp = data;
3842 	ch_ec_data_t *ecp;
3843 	int totalsize, ec_set_size;
3844 	int i, ways;
3845 	int match = 0;
3846 	int tagvalid;
3847 	uint64_t addr, tagpa;
3848 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3849 
3850 	/*
3851 	 * Check the l2$ logout data
3852 	 */
3853 	if (ispanther) {
3854 		ecp = &cdp->chd_l2_data[0];
3855 		ec_set_size = PN_L2_SET_SIZE;
3856 		ways = PN_L2_NWAYS;
3857 	} else {
3858 		ecp = &cdp->chd_ec_data[0];
3859 		ec_set_size = cpu_ecache_set_size(CPU);
3860 		ways = cpu_ecache_nway();
3861 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3862 	}
3863 	/* remove low order PA bits from fault address not used in PA tag */
3864 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3865 	for (i = 0; i < ways; i++, ecp++) {
3866 		if (ispanther) {
3867 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3868 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3869 		} else {
3870 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3871 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3872 			    ecp->ec_tag);
3873 		}
3874 		if (tagpa == addr && (!reqval || tagvalid)) {
3875 			match = i + 1;
3876 			*level = 2;
3877 			break;
3878 		}
3879 	}
3880 
3881 	if (match || !ispanther)
3882 		return (match);
3883 
3884 	/* For Panther we also check the l3$ */
3885 	ecp = &cdp->chd_ec_data[0];
3886 	ec_set_size = PN_L3_SET_SIZE;
3887 	ways = PN_L3_NWAYS;
3888 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3889 
3890 	for (i = 0; i < ways; i++, ecp++) {
3891 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3892 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3893 			match = i + 1;
3894 			*level = 3;
3895 			break;
3896 		}
3897 	}
3898 
3899 	return (match);
3900 }
3901 
3902 #if defined(CPU_IMP_L1_CACHE_PARITY)
3903 /*
3904  * Record information related to the source of an Dcache Parity Error.
3905  */
3906 static void
3907 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3908 {
3909 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3910 	int index;
3911 
3912 	/*
3913 	 * Since instruction decode cannot be done at high PIL
3914 	 * just examine the entire Dcache to locate the error.
3915 	 */
3916 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3917 		ch_flt->parity_data.dpe.cpl_way = -1;
3918 		ch_flt->parity_data.dpe.cpl_off = -1;
3919 	}
3920 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3921 		cpu_dcache_parity_check(ch_flt, index);
3922 }
3923 
3924 /*
3925  * Check all ways of the Dcache at a specified index for good parity.
3926  */
3927 static void
3928 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3929 {
3930 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3931 	uint64_t parity_bits, pbits, data_word;
3932 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3933 	int way, word, data_byte;
3934 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3935 	ch_dc_data_t tmp_dcp;
3936 
3937 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3938 		/*
3939 		 * Perform diagnostic read.
3940 		 */
3941 		get_dcache_dtag(index + way * dc_set_size,
3942 				(uint64_t *)&tmp_dcp);
3943 
3944 		/*
3945 		 * Check tag for even parity.
3946 		 * Sum of 1 bits (including parity bit) should be even.
3947 		 */
3948 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3949 			/*
3950 			 * If this is the first error log detailed information
3951 			 * about it and check the snoop tag. Otherwise just
3952 			 * record the fact that we found another error.
3953 			 */
3954 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3955 				ch_flt->parity_data.dpe.cpl_way = way;
3956 				ch_flt->parity_data.dpe.cpl_cache =
3957 				    CPU_DC_PARITY;
3958 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3959 
3960 				if (popc64(tmp_dcp.dc_sntag &
3961 						CHP_DCSNTAG_PARMASK) & 1) {
3962 					ch_flt->parity_data.dpe.cpl_tag |=
3963 								CHP_DC_SNTAG;
3964 					ch_flt->parity_data.dpe.cpl_lcnt++;
3965 				}
3966 
3967 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3968 			}
3969 
3970 			ch_flt->parity_data.dpe.cpl_lcnt++;
3971 		}
3972 
3973 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3974 			/*
3975 			 * Panther has more parity bits than the other
3976 			 * processors for covering dcache data and so each
3977 			 * byte of data in each word has its own parity bit.
3978 			 */
3979 			parity_bits = tmp_dcp.dc_pn_data_parity;
3980 			for (word = 0; word < 4; word++) {
3981 				data_word = tmp_dcp.dc_data[word];
3982 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3983 				for (data_byte = 0; data_byte < 8;
3984 				    data_byte++) {
3985 					if (((popc64(data_word &
3986 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3987 					    (pbits & 1)) {
3988 						cpu_record_dc_data_parity(
3989 						ch_flt, dcp, &tmp_dcp, way,
3990 						word);
3991 					}
3992 					pbits >>= 1;
3993 					data_word >>= 8;
3994 				}
3995 				parity_bits >>= 8;
3996 			}
3997 		} else {
3998 			/*
3999 			 * Check data array for even parity.
4000 			 * The 8 parity bits are grouped into 4 pairs each
4001 			 * of which covers a 64-bit word.  The endianness is
4002 			 * reversed -- the low-order parity bits cover the
4003 			 * high-order data words.
4004 			 */
4005 			parity_bits = tmp_dcp.dc_utag >> 8;
4006 			for (word = 0; word < 4; word++) {
4007 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4008 				if ((popc64(tmp_dcp.dc_data[word]) +
4009 				    parity_bits_popc[pbits]) & 1) {
4010 					cpu_record_dc_data_parity(ch_flt, dcp,
4011 					    &tmp_dcp, way, word);
4012 				}
4013 			}
4014 		}
4015 	}
4016 }
4017 
4018 static void
4019 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4020     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4021 {
4022 	/*
4023 	 * If this is the first error log detailed information about it.
4024 	 * Otherwise just record the fact that we found another error.
4025 	 */
4026 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4027 		ch_flt->parity_data.dpe.cpl_way = way;
4028 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4029 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4030 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4031 	}
4032 	ch_flt->parity_data.dpe.cpl_lcnt++;
4033 }
4034 
4035 /*
4036  * Record information related to the source of an Icache Parity Error.
4037  *
4038  * Called with the Icache disabled so any diagnostic accesses are safe.
4039  */
4040 static void
4041 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4042 {
4043 	int	ic_set_size;
4044 	int	ic_linesize;
4045 	int	index;
4046 
4047 	if (CPU_PRIVATE(CPU)) {
4048 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4049 		    CH_ICACHE_NWAY;
4050 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4051 	} else {
4052 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4053 		ic_linesize = icache_linesize;
4054 	}
4055 
4056 	ch_flt->parity_data.ipe.cpl_way = -1;
4057 	ch_flt->parity_data.ipe.cpl_off = -1;
4058 
4059 	for (index = 0; index < ic_set_size; index += ic_linesize)
4060 		cpu_icache_parity_check(ch_flt, index);
4061 }
4062 
4063 /*
4064  * Check all ways of the Icache at a specified index for good parity.
4065  */
4066 static void
4067 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4068 {
4069 	uint64_t parmask, pn_inst_parity;
4070 	int ic_set_size;
4071 	int ic_linesize;
4072 	int flt_index, way, instr, num_instr;
4073 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4074 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4075 	ch_ic_data_t tmp_icp;
4076 
4077 	if (CPU_PRIVATE(CPU)) {
4078 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4079 		    CH_ICACHE_NWAY;
4080 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4081 	} else {
4082 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4083 		ic_linesize = icache_linesize;
4084 	}
4085 
4086 	/*
4087 	 * Panther has twice as many instructions per icache line and the
4088 	 * instruction parity bit is in a different location.
4089 	 */
4090 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4091 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4092 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4093 	} else {
4094 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4095 		pn_inst_parity = 0;
4096 	}
4097 
4098 	/*
4099 	 * Index at which we expect to find the parity error.
4100 	 */
4101 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4102 
4103 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4104 		/*
4105 		 * Diagnostic reads expect address argument in ASI format.
4106 		 */
4107 		get_icache_dtag(2 * (index + way * ic_set_size),
4108 				(uint64_t *)&tmp_icp);
4109 
4110 		/*
4111 		 * If this is the index in which we expect to find the
4112 		 * error log detailed information about each of the ways.
4113 		 * This information will be displayed later if we can't
4114 		 * determine the exact way in which the error is located.
4115 		 */
4116 		if (flt_index == index)
4117 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4118 
4119 		/*
4120 		 * Check tag for even parity.
4121 		 * Sum of 1 bits (including parity bit) should be even.
4122 		 */
4123 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4124 			/*
4125 			 * If this way is the one in which we expected
4126 			 * to find the error record the way and check the
4127 			 * snoop tag. Otherwise just record the fact we
4128 			 * found another error.
4129 			 */
4130 			if (flt_index == index) {
4131 				ch_flt->parity_data.ipe.cpl_way = way;
4132 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4133 
4134 				if (popc64(tmp_icp.ic_sntag &
4135 						CHP_ICSNTAG_PARMASK) & 1) {
4136 					ch_flt->parity_data.ipe.cpl_tag |=
4137 								CHP_IC_SNTAG;
4138 					ch_flt->parity_data.ipe.cpl_lcnt++;
4139 				}
4140 
4141 			}
4142 			ch_flt->parity_data.ipe.cpl_lcnt++;
4143 			continue;
4144 		}
4145 
4146 		/*
4147 		 * Check instruction data for even parity.
4148 		 * Bits participating in parity differ for PC-relative
4149 		 * versus non-PC-relative instructions.
4150 		 */
4151 		for (instr = 0; instr < num_instr; instr++) {
4152 			parmask = (tmp_icp.ic_data[instr] &
4153 					CH_ICDATA_PRED_ISPCREL) ?
4154 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4155 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4156 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4157 				/*
4158 				 * If this way is the one in which we expected
4159 				 * to find the error record the way and offset.
4160 				 * Otherwise just log the fact we found another
4161 				 * error.
4162 				 */
4163 				if (flt_index == index) {
4164 					ch_flt->parity_data.ipe.cpl_way = way;
4165 					ch_flt->parity_data.ipe.cpl_off =
4166 								instr * 4;
4167 				}
4168 				ch_flt->parity_data.ipe.cpl_lcnt++;
4169 				continue;
4170 			}
4171 		}
4172 	}
4173 }
4174 
4175 /*
4176  * Record information related to the source of an Pcache Parity Error.
4177  */
4178 static void
4179 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4180 {
4181 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4182 	int index;
4183 
4184 	/*
4185 	 * Since instruction decode cannot be done at high PIL just
4186 	 * examine the entire Pcache to check for any parity errors.
4187 	 */
4188 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4189 		ch_flt->parity_data.dpe.cpl_way = -1;
4190 		ch_flt->parity_data.dpe.cpl_off = -1;
4191 	}
4192 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4193 		cpu_pcache_parity_check(ch_flt, index);
4194 }
4195 
4196 /*
4197  * Check all ways of the Pcache at a specified index for good parity.
4198  */
4199 static void
4200 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4201 {
4202 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4203 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4204 	int way, word, pbit, parity_bits;
4205 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4206 	ch_pc_data_t tmp_pcp;
4207 
4208 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4209 		/*
4210 		 * Perform diagnostic read.
4211 		 */
4212 		get_pcache_dtag(index + way * pc_set_size,
4213 				(uint64_t *)&tmp_pcp);
4214 		/*
4215 		 * Check data array for odd parity. There are 8 parity
4216 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4217 		 * of those bits covers exactly 8 bytes of the data
4218 		 * array:
4219 		 *
4220 		 *	parity bit	P$ data bytes covered
4221 		 *	----------	---------------------
4222 		 *	50		63:56
4223 		 *	51		55:48
4224 		 *	52		47:40
4225 		 *	53		39:32
4226 		 *	54		31:24
4227 		 *	55		23:16
4228 		 *	56		15:8
4229 		 *	57		7:0
4230 		 */
4231 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4232 		for (word = 0; word < pc_data_words; word++) {
4233 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4234 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4235 				/*
4236 				 * If this is the first error log detailed
4237 				 * information about it. Otherwise just record
4238 				 * the fact that we found another error.
4239 				 */
4240 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4241 					ch_flt->parity_data.dpe.cpl_way = way;
4242 					ch_flt->parity_data.dpe.cpl_cache =
4243 					    CPU_PC_PARITY;
4244 					ch_flt->parity_data.dpe.cpl_off =
4245 					    word * sizeof (uint64_t);
4246 					bcopy(&tmp_pcp, pcp,
4247 							sizeof (ch_pc_data_t));
4248 				}
4249 				ch_flt->parity_data.dpe.cpl_lcnt++;
4250 			}
4251 		}
4252 	}
4253 }
4254 
4255 
4256 /*
4257  * Add L1 Data cache data to the ereport payload.
4258  */
4259 static void
4260 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4261 {
4262 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4263 	ch_dc_data_t *dcp;
4264 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4265 	uint_t nelem;
4266 	int i, ways_to_check, ways_logged = 0;
4267 
4268 	/*
4269 	 * If this is an D$ fault then there may be multiple
4270 	 * ways captured in the ch_parity_log_t structure.
4271 	 * Otherwise, there will be at most one way captured
4272 	 * in the ch_diag_data_t struct.
4273 	 * Check each way to see if it should be encoded.
4274 	 */
4275 	if (ch_flt->flt_type == CPU_DC_PARITY)
4276 		ways_to_check = CH_DCACHE_NWAY;
4277 	else
4278 		ways_to_check = 1;
4279 	for (i = 0; i < ways_to_check; i++) {
4280 		if (ch_flt->flt_type == CPU_DC_PARITY)
4281 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4282 		else
4283 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4284 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4285 			bcopy(dcp, &dcdata[ways_logged],
4286 				sizeof (ch_dc_data_t));
4287 			ways_logged++;
4288 		}
4289 	}
4290 
4291 	/*
4292 	 * Add the dcache data to the payload.
4293 	 */
4294 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4295 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4296 	if (ways_logged != 0) {
4297 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4298 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4299 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4300 	}
4301 }
4302 
4303 /*
4304  * Add L1 Instruction cache data to the ereport payload.
4305  */
4306 static void
4307 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4308 {
4309 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4310 	ch_ic_data_t *icp;
4311 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4312 	uint_t nelem;
4313 	int i, ways_to_check, ways_logged = 0;
4314 
4315 	/*
4316 	 * If this is an I$ fault then there may be multiple
4317 	 * ways captured in the ch_parity_log_t structure.
4318 	 * Otherwise, there will be at most one way captured
4319 	 * in the ch_diag_data_t struct.
4320 	 * Check each way to see if it should be encoded.
4321 	 */
4322 	if (ch_flt->flt_type == CPU_IC_PARITY)
4323 		ways_to_check = CH_ICACHE_NWAY;
4324 	else
4325 		ways_to_check = 1;
4326 	for (i = 0; i < ways_to_check; i++) {
4327 		if (ch_flt->flt_type == CPU_IC_PARITY)
4328 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4329 		else
4330 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4331 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4332 			bcopy(icp, &icdata[ways_logged],
4333 				sizeof (ch_ic_data_t));
4334 			ways_logged++;
4335 		}
4336 	}
4337 
4338 	/*
4339 	 * Add the icache data to the payload.
4340 	 */
4341 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4342 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4343 	if (ways_logged != 0) {
4344 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4345 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4346 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4347 	}
4348 }
4349 
4350 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4351 
4352 /*
4353  * Add ecache data to payload.
4354  */
4355 static void
4356 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4357 {
4358 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4359 	ch_ec_data_t *ecp;
4360 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4361 	uint_t nelem;
4362 	int i, ways_logged = 0;
4363 
4364 	/*
4365 	 * Check each way to see if it should be encoded
4366 	 * and concatinate it into a temporary buffer.
4367 	 */
4368 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4369 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4370 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4371 			bcopy(ecp, &ecdata[ways_logged],
4372 				sizeof (ch_ec_data_t));
4373 			ways_logged++;
4374 		}
4375 	}
4376 
4377 	/*
4378 	 * Panther CPUs have an additional level of cache and so
4379 	 * what we just collected was the L3 (ecache) and not the
4380 	 * L2 cache.
4381 	 */
4382 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4383 		/*
4384 		 * Add the L3 (ecache) data to the payload.
4385 		 */
4386 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4387 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4388 		if (ways_logged != 0) {
4389 			nelem = sizeof (ch_ec_data_t) /
4390 			    sizeof (uint64_t) * ways_logged;
4391 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4392 			    DATA_TYPE_UINT64_ARRAY, nelem,
4393 			    (uint64_t *)ecdata, NULL);
4394 		}
4395 
4396 		/*
4397 		 * Now collect the L2 cache.
4398 		 */
4399 		ways_logged = 0;
4400 		for (i = 0; i < PN_L2_NWAYS; i++) {
4401 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4402 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4403 				bcopy(ecp, &ecdata[ways_logged],
4404 				    sizeof (ch_ec_data_t));
4405 				ways_logged++;
4406 			}
4407 		}
4408 	}
4409 
4410 	/*
4411 	 * Add the L2 cache data to the payload.
4412 	 */
4413 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4414 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4415 	if (ways_logged != 0) {
4416 		nelem = sizeof (ch_ec_data_t) /
4417 			sizeof (uint64_t) * ways_logged;
4418 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4419 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4420 	}
4421 }
4422 
4423 /*
4424  * Initialize cpu scheme for specified cpu.
4425  */
4426 static void
4427 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4428 {
4429 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4430 	uint8_t mask;
4431 
4432 	mask = cpunodes[cpuid].version;
4433 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4434 	    (u_longlong_t)cpunodes[cpuid].device_id);
4435 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4436 	    cpuid, &mask, (const char *)sbuf);
4437 }
4438 
4439 /*
4440  * Returns ereport resource type.
4441  */
4442 static int
4443 cpu_error_to_resource_type(struct async_flt *aflt)
4444 {
4445 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4446 
4447 	switch (ch_flt->flt_type) {
4448 
4449 	case CPU_CE_ECACHE:
4450 	case CPU_UE_ECACHE:
4451 	case CPU_UE_ECACHE_RETIRE:
4452 	case CPU_ORPH:
4453 		/*
4454 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4455 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4456 		 * E$ Data type, otherwise, return CPU type.
4457 		 */
4458 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4459 		    ch_flt->flt_bit))
4460 			return (ERRTYPE_ECACHE_DATA);
4461 		return (ERRTYPE_CPU);
4462 
4463 	case CPU_CE:
4464 	case CPU_UE:
4465 	case CPU_EMC:
4466 	case CPU_DUE:
4467 	case CPU_RCE:
4468 	case CPU_RUE:
4469 	case CPU_FRC:
4470 	case CPU_FRU:
4471 		return (ERRTYPE_MEMORY);
4472 
4473 	case CPU_IC_PARITY:
4474 	case CPU_DC_PARITY:
4475 	case CPU_FPUERR:
4476 	case CPU_PC_PARITY:
4477 	case CPU_ITLB_PARITY:
4478 	case CPU_DTLB_PARITY:
4479 		return (ERRTYPE_CPU);
4480 	}
4481 	return (ERRTYPE_UNKNOWN);
4482 }
4483 
4484 /*
4485  * Encode the data saved in the ch_async_flt_t struct into
4486  * the FM ereport payload.
4487  */
4488 static void
4489 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4490 	nvlist_t *resource, int *afar_status, int *synd_status)
4491 {
4492 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4493 	*synd_status = AFLT_STAT_INVALID;
4494 	*afar_status = AFLT_STAT_INVALID;
4495 
4496 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4497 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4498 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4499 	}
4500 
4501 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4502 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4503 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4504 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4505 	}
4506 
4507 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4508 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4509 		    ch_flt->flt_bit);
4510 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4511 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4512 	}
4513 
4514 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4515 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4516 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4517 	}
4518 
4519 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4520 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4521 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4522 	}
4523 
4524 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4525 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4526 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4527 	}
4528 
4529 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4530 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4531 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4532 	}
4533 
4534 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4535 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4536 		    DATA_TYPE_BOOLEAN_VALUE,
4537 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4538 	}
4539 
4540 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4541 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4542 		    DATA_TYPE_BOOLEAN_VALUE,
4543 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4544 	}
4545 
4546 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4547 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4548 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4549 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4550 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4551 	}
4552 
4553 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4554 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4555 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4556 	}
4557 
4558 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4559 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4560 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4561 	}
4562 
4563 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4564 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4565 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4566 	}
4567 
4568 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4569 		cpu_payload_add_ecache(aflt, payload);
4570 
4571 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4572 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4573 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4574 	}
4575 
4576 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4577 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4578 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4579 	}
4580 
4581 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4582 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4583 		    DATA_TYPE_UINT32_ARRAY, 16,
4584 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4585 	}
4586 
4587 #if defined(CPU_IMP_L1_CACHE_PARITY)
4588 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4589 		cpu_payload_add_dcache(aflt, payload);
4590 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4591 		cpu_payload_add_icache(aflt, payload);
4592 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4593 
4594 #if defined(CHEETAH_PLUS)
4595 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4596 		cpu_payload_add_pcache(aflt, payload);
4597 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4598 		cpu_payload_add_tlb(aflt, payload);
4599 #endif	/* CHEETAH_PLUS */
4600 	/*
4601 	 * Create the FMRI that goes into the payload
4602 	 * and contains the unum info if necessary.
4603 	 */
4604 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4605 		char unum[UNUM_NAMLEN] = "";
4606 		char sid[DIMM_SERIAL_ID_LEN] = "";
4607 		int len, ret, rtype, synd_code;
4608 		uint64_t offset = (uint64_t)-1;
4609 
4610 		rtype = cpu_error_to_resource_type(aflt);
4611 		switch (rtype) {
4612 
4613 		case ERRTYPE_MEMORY:
4614 		case ERRTYPE_ECACHE_DATA:
4615 
4616 			/*
4617 			 * Memory errors, do unum lookup
4618 			 */
4619 			if (*afar_status == AFLT_STAT_INVALID)
4620 				break;
4621 
4622 			if (rtype == ERRTYPE_ECACHE_DATA)
4623 				aflt->flt_status |= ECC_ECACHE;
4624 			else
4625 				aflt->flt_status &= ~ECC_ECACHE;
4626 
4627 			synd_code = synd_to_synd_code(*synd_status,
4628 			    aflt->flt_synd, ch_flt->flt_bit);
4629 
4630 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4631 				break;
4632 
4633 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4634 			    &len);
4635 
4636 			if (ret == 0) {
4637 				(void) cpu_get_mem_offset(aflt->flt_addr,
4638 				    &offset);
4639 			}
4640 
4641 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4642 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4643 			fm_payload_set(payload,
4644 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4645 			    DATA_TYPE_NVLIST, resource, NULL);
4646 			break;
4647 
4648 		case ERRTYPE_CPU:
4649 			/*
4650 			 * On-board processor array error, add cpu resource.
4651 			 */
4652 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4653 			fm_payload_set(payload,
4654 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4655 			    DATA_TYPE_NVLIST, resource, NULL);
4656 			break;
4657 		}
4658 	}
4659 }
4660 
4661 /*
4662  * Initialize the way info if necessary.
4663  */
4664 void
4665 cpu_ereport_init(struct async_flt *aflt)
4666 {
4667 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4668 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4669 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4670 	int i;
4671 
4672 	/*
4673 	 * Initialize the info in the CPU logout structure.
4674 	 * The I$/D$ way information is not initialized here
4675 	 * since it is captured in the logout assembly code.
4676 	 */
4677 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4678 		(ecp + i)->ec_way = i;
4679 
4680 	for (i = 0; i < PN_L2_NWAYS; i++)
4681 		(l2p + i)->ec_way = i;
4682 }
4683 
4684 /*
4685  * Returns whether fault address is valid for this error bit and
4686  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4687  */
4688 int
4689 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4690 {
4691 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4692 
4693 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4694 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4695 	    AFLT_STAT_VALID &&
4696 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4697 }
4698 
4699 /*
4700  * Returns whether fault address is valid based on the error bit for the
4701  * one event being queued and whether the address is "in memory".
4702  */
4703 static int
4704 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4705 {
4706 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4707 	int afar_status;
4708 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4709 
4710 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4711 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4712 		return (0);
4713 
4714 	afsr_errs = ch_flt->afsr_errs;
4715 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4716 
4717 	switch (afar_status) {
4718 	case AFLT_STAT_VALID:
4719 		return (1);
4720 
4721 	case AFLT_STAT_AMBIGUOUS:
4722 		/*
4723 		 * Status is ambiguous since another error bit (or bits)
4724 		 * of equal priority to the specified bit on in the afsr,
4725 		 * so check those bits. Return 1 only if the bits on in the
4726 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4727 		 * Otherwise not all the equal priority bits are for memory
4728 		 * errors, so return 0.
4729 		 */
4730 		ow_bits = afar_overwrite;
4731 		while ((afsr_ow = *ow_bits++) != 0) {
4732 			/*
4733 			 * Get other bits that are on in t_afsr_bit's priority
4734 			 * class to check for Memory Error bits only.
4735 			 */
4736 			if (afsr_ow & t_afsr_bit) {
4737 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4738 					return (0);
4739 				else
4740 					return (1);
4741 			}
4742 		}
4743 		/*FALLTHRU*/
4744 
4745 	default:
4746 		return (0);
4747 	}
4748 }
4749 
4750 static void
4751 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4752 {
4753 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4754 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4755 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4756 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4757 #if defined(CPU_IMP_ECACHE_ASSOC)
4758 	int i, nway;
4759 #endif /* CPU_IMP_ECACHE_ASSOC */
4760 
4761 	/*
4762 	 * Check if the CPU log out captured was valid.
4763 	 */
4764 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4765 	    ch_flt->flt_data_incomplete)
4766 		return;
4767 
4768 #if defined(CPU_IMP_ECACHE_ASSOC)
4769 	nway = cpu_ecache_nway();
4770 	i =  cpu_ecache_line_valid(ch_flt);
4771 	if (i == 0 || i > nway) {
4772 		for (i = 0; i < nway; i++)
4773 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4774 	} else
4775 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4776 #else /* CPU_IMP_ECACHE_ASSOC */
4777 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4778 #endif /* CPU_IMP_ECACHE_ASSOC */
4779 
4780 #if defined(CHEETAH_PLUS)
4781 	pn_cpu_log_diag_l2_info(ch_flt);
4782 #endif /* CHEETAH_PLUS */
4783 
4784 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4785 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4786 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4787 	}
4788 
4789 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4790 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4791 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4792 		else
4793 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4794 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4795 	}
4796 }
4797 
4798 /*
4799  * Cheetah ECC calculation.
4800  *
4801  * We only need to do the calculation on the data bits and can ignore check
4802  * bit and Mtag bit terms in the calculation.
4803  */
4804 static uint64_t ch_ecc_table[9][2] = {
4805 	/*
4806 	 * low order 64-bits   high-order 64-bits
4807 	 */
4808 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4809 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4810 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4811 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4812 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4813 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4814 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4815 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4816 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4817 };
4818 
4819 /*
4820  * 64-bit population count, use well-known popcnt trick.
4821  * We could use the UltraSPARC V9 POPC instruction, but some
4822  * CPUs including Cheetahplus and Jaguar do not support that
4823  * instruction.
4824  */
4825 int
4826 popc64(uint64_t val)
4827 {
4828 	int cnt;
4829 
4830 	for (cnt = 0; val != 0; val &= val - 1)
4831 		cnt++;
4832 	return (cnt);
4833 }
4834 
4835 /*
4836  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4837  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4838  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4839  * instead of doing all the xor's.
4840  */
4841 uint32_t
4842 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4843 {
4844 	int bitno, s;
4845 	int synd = 0;
4846 
4847 	for (bitno = 0; bitno < 9; bitno++) {
4848 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4849 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4850 		synd |= (s << bitno);
4851 	}
4852 	return (synd);
4853 
4854 }
4855 
4856 /*
4857  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4858  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4859  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4860  */
4861 static void
4862 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4863     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4864 {
4865 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4866 
4867 	if (reason &&
4868 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4869 		(void) strcat(reason, eccp->ec_reason);
4870 	}
4871 
4872 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4873 	ch_flt->flt_type = eccp->ec_flt_type;
4874 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4875 		ch_flt->flt_diag_data = *cdp;
4876 	else
4877 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4878 	aflt->flt_in_memory =
4879 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4880 
4881 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4882 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4883 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4884 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4885 	else
4886 		aflt->flt_synd = 0;
4887 
4888 	aflt->flt_payload = eccp->ec_err_payload;
4889 
4890 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4891 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4892 		cpu_errorq_dispatch(eccp->ec_err_class,
4893 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4894 		    aflt->flt_panic);
4895 	else
4896 		cpu_errorq_dispatch(eccp->ec_err_class,
4897 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4898 		    aflt->flt_panic);
4899 }
4900 
4901 /*
4902  * Queue events on async event queue one event per error bit.  First we
4903  * queue the events that we "expect" for the given trap, then we queue events
4904  * that we may not expect.  Return number of events queued.
4905  */
4906 int
4907 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4908     ch_cpu_logout_t *clop)
4909 {
4910 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4911 	ecc_type_to_info_t *eccp;
4912 	int nevents = 0;
4913 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4914 #if defined(CHEETAH_PLUS)
4915 	uint64_t orig_t_afsr_errs;
4916 #endif
4917 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4918 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4919 	ch_diag_data_t *cdp = NULL;
4920 
4921 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4922 
4923 #if defined(CHEETAH_PLUS)
4924 	orig_t_afsr_errs = t_afsr_errs;
4925 
4926 	/*
4927 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4928 	 */
4929 	if (clop != NULL) {
4930 		/*
4931 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4932 		 * flt_addr and flt_stat fields will be reset to the primaries
4933 		 * below, but the sdw_addr and sdw_stat will stay as the
4934 		 * secondaries.
4935 		 */
4936 		cdp = &clop->clo_sdw_data;
4937 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4938 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4939 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4940 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4941 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4942 
4943 		/*
4944 		 * If the primary and shadow AFSR differ, tag the shadow as
4945 		 * the first fault.
4946 		 */
4947 		if ((primary_afar != cdp->chd_afar) ||
4948 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4949 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4950 		}
4951 
4952 		/*
4953 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4954 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4955 		 * is expected to be zero for those CPUs which do not have
4956 		 * an AFSR_EXT register.
4957 		 */
4958 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4959 			if ((eccp->ec_afsr_bit &
4960 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4961 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4962 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4963 				cdp = NULL;
4964 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4965 				nevents++;
4966 			}
4967 		}
4968 
4969 		/*
4970 		 * If the ME bit is on in the primary AFSR turn all the
4971 		 * error bits on again that may set the ME bit to make
4972 		 * sure we see the ME AFSR error logs.
4973 		 */
4974 		if ((primary_afsr & C_AFSR_ME) != 0)
4975 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4976 	}
4977 #endif	/* CHEETAH_PLUS */
4978 
4979 	if (clop != NULL)
4980 		cdp = &clop->clo_data;
4981 
4982 	/*
4983 	 * Queue expected errors, error bit and fault type must match
4984 	 * in the ecc_type_to_info table.
4985 	 */
4986 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4987 	    eccp++) {
4988 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4989 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4990 #if defined(SERRANO)
4991 			/*
4992 			 * For FRC/FRU errors on Serrano the afar2 captures
4993 			 * the address and the associated data is
4994 			 * in the shadow logout area.
4995 			 */
4996 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4997 				if (clop != NULL)
4998 					cdp = &clop->clo_sdw_data;
4999 				aflt->flt_addr = ch_flt->afar2;
5000 			} else {
5001 				if (clop != NULL)
5002 					cdp = &clop->clo_data;
5003 				aflt->flt_addr = primary_afar;
5004 			}
5005 #else	/* SERRANO */
5006 			aflt->flt_addr = primary_afar;
5007 #endif	/* SERRANO */
5008 			aflt->flt_stat = primary_afsr;
5009 			ch_flt->afsr_ext = primary_afsr_ext;
5010 			ch_flt->afsr_errs = primary_afsr_errs;
5011 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5012 			cdp = NULL;
5013 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5014 			nevents++;
5015 		}
5016 	}
5017 
5018 	/*
5019 	 * Queue unexpected errors, error bit only match.
5020 	 */
5021 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5022 	    eccp++) {
5023 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5024 #if defined(SERRANO)
5025 			/*
5026 			 * For FRC/FRU errors on Serrano the afar2 captures
5027 			 * the address and the associated data is
5028 			 * in the shadow logout area.
5029 			 */
5030 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5031 				if (clop != NULL)
5032 					cdp = &clop->clo_sdw_data;
5033 				aflt->flt_addr = ch_flt->afar2;
5034 			} else {
5035 				if (clop != NULL)
5036 					cdp = &clop->clo_data;
5037 				aflt->flt_addr = primary_afar;
5038 			}
5039 #else	/* SERRANO */
5040 			aflt->flt_addr = primary_afar;
5041 #endif	/* SERRANO */
5042 			aflt->flt_stat = primary_afsr;
5043 			ch_flt->afsr_ext = primary_afsr_ext;
5044 			ch_flt->afsr_errs = primary_afsr_errs;
5045 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5046 			cdp = NULL;
5047 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5048 			nevents++;
5049 		}
5050 	}
5051 	return (nevents);
5052 }
5053 
5054 /*
5055  * Return trap type number.
5056  */
5057 uint8_t
5058 flt_to_trap_type(struct async_flt *aflt)
5059 {
5060 	if (aflt->flt_status & ECC_I_TRAP)
5061 		return (TRAP_TYPE_ECC_I);
5062 	if (aflt->flt_status & ECC_D_TRAP)
5063 		return (TRAP_TYPE_ECC_D);
5064 	if (aflt->flt_status & ECC_F_TRAP)
5065 		return (TRAP_TYPE_ECC_F);
5066 	if (aflt->flt_status & ECC_C_TRAP)
5067 		return (TRAP_TYPE_ECC_C);
5068 	if (aflt->flt_status & ECC_DP_TRAP)
5069 		return (TRAP_TYPE_ECC_DP);
5070 	if (aflt->flt_status & ECC_IP_TRAP)
5071 		return (TRAP_TYPE_ECC_IP);
5072 	if (aflt->flt_status & ECC_ITLB_TRAP)
5073 		return (TRAP_TYPE_ECC_ITLB);
5074 	if (aflt->flt_status & ECC_DTLB_TRAP)
5075 		return (TRAP_TYPE_ECC_DTLB);
5076 	return (TRAP_TYPE_UNKNOWN);
5077 }
5078 
5079 /*
5080  * Decide an error type based on detector and leaky/partner tests.
5081  * The following array is used for quick translation - it must
5082  * stay in sync with ce_dispact_t.
5083  */
5084 
5085 static char *cetypes[] = {
5086 	CE_DISP_DESC_U,
5087 	CE_DISP_DESC_I,
5088 	CE_DISP_DESC_PP,
5089 	CE_DISP_DESC_P,
5090 	CE_DISP_DESC_L,
5091 	CE_DISP_DESC_PS,
5092 	CE_DISP_DESC_S
5093 };
5094 
5095 char *
5096 flt_to_error_type(struct async_flt *aflt)
5097 {
5098 	ce_dispact_t dispact, disp;
5099 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5100 
5101 	/*
5102 	 * The memory payload bundle is shared by some events that do
5103 	 * not perform any classification.  For those flt_disp will be
5104 	 * 0 and we will return "unknown".
5105 	 */
5106 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5107 		return (cetypes[CE_DISP_UNKNOWN]);
5108 
5109 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5110 
5111 	/*
5112 	 * It is also possible that no scrub/classification was performed
5113 	 * by the detector, for instance where a disrupting error logged
5114 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5115 	 */
5116 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5117 		return (cetypes[CE_DISP_UNKNOWN]);
5118 
5119 	/*
5120 	 * Lookup type in initial classification/action table
5121 	 */
5122 	dispact = CE_DISPACT(ce_disp_table,
5123 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5124 	    CE_XDIAG_STATE(dtcrinfo),
5125 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5126 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5127 
5128 	/*
5129 	 * A bad lookup is not something to panic production systems for.
5130 	 */
5131 	ASSERT(dispact != CE_DISP_BAD);
5132 	if (dispact == CE_DISP_BAD)
5133 		return (cetypes[CE_DISP_UNKNOWN]);
5134 
5135 	disp = CE_DISP(dispact);
5136 
5137 	switch (disp) {
5138 	case CE_DISP_UNKNOWN:
5139 	case CE_DISP_INTERMITTENT:
5140 		break;
5141 
5142 	case CE_DISP_POSS_PERS:
5143 		/*
5144 		 * "Possible persistent" errors to which we have applied a valid
5145 		 * leaky test can be separated into "persistent" or "leaky".
5146 		 */
5147 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5148 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5149 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5150 			    CE_XDIAG_CE2SEEN(lkyinfo))
5151 				disp = CE_DISP_LEAKY;
5152 			else
5153 				disp = CE_DISP_PERS;
5154 		}
5155 		break;
5156 
5157 	case CE_DISP_POSS_STICKY:
5158 		/*
5159 		 * Promote "possible sticky" results that have been
5160 		 * confirmed by a partner test to "sticky".  Unconfirmed
5161 		 * "possible sticky" events are left at that status - we do not
5162 		 * guess at any bad reader/writer etc status here.
5163 		 */
5164 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5165 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5166 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5167 			disp = CE_DISP_STICKY;
5168 
5169 		/*
5170 		 * Promote "possible sticky" results on a uniprocessor
5171 		 * to "sticky"
5172 		 */
5173 		if (disp == CE_DISP_POSS_STICKY &&
5174 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5175 			disp = CE_DISP_STICKY;
5176 		break;
5177 
5178 	default:
5179 		disp = CE_DISP_UNKNOWN;
5180 		break;
5181 	}
5182 
5183 	return (cetypes[disp]);
5184 }
5185 
5186 /*
5187  * Given the entire afsr, the specific bit to check and a prioritized list of
5188  * error bits, determine the validity of the various overwrite priority
5189  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5190  * different overwrite priorities.
5191  *
5192  * Given a specific afsr error bit and the entire afsr, there are three cases:
5193  *   INVALID:	The specified bit is lower overwrite priority than some other
5194  *		error bit which is on in the afsr (or IVU/IVC).
5195  *   VALID:	The specified bit is higher priority than all other error bits
5196  *		which are on in the afsr.
5197  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5198  *		bit is on in the afsr.
5199  */
5200 int
5201 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5202 {
5203 	uint64_t afsr_ow;
5204 
5205 	while ((afsr_ow = *ow_bits++) != 0) {
5206 		/*
5207 		 * If bit is in the priority class, check to see if another
5208 		 * bit in the same class is on => ambiguous.  Otherwise,
5209 		 * the value is valid.  If the bit is not on at this priority
5210 		 * class, but a higher priority bit is on, then the value is
5211 		 * invalid.
5212 		 */
5213 		if (afsr_ow & afsr_bit) {
5214 			/*
5215 			 * If equal pri bit is on, ambiguous.
5216 			 */
5217 			if (afsr & (afsr_ow & ~afsr_bit))
5218 				return (AFLT_STAT_AMBIGUOUS);
5219 			return (AFLT_STAT_VALID);
5220 		} else if (afsr & afsr_ow)
5221 			break;
5222 	}
5223 
5224 	/*
5225 	 * We didn't find a match or a higher priority bit was on.  Not
5226 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5227 	 */
5228 	return (AFLT_STAT_INVALID);
5229 }
5230 
5231 static int
5232 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5233 {
5234 #if defined(SERRANO)
5235 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5236 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5237 	else
5238 #endif	/* SERRANO */
5239 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5240 }
5241 
5242 static int
5243 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5244 {
5245 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5246 }
5247 
5248 static int
5249 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5250 {
5251 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5252 }
5253 
5254 static int
5255 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5256 {
5257 #ifdef lint
5258 	cpuid = cpuid;
5259 #endif
5260 #if defined(CHEETAH_PLUS)
5261 	/*
5262 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5263 	 * policy for Cheetah+ and separate for Panther CPUs.
5264 	 */
5265 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5266 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5267 			return (afsr_to_msynd_status(afsr, afsr_bit));
5268 		else
5269 			return (afsr_to_esynd_status(afsr, afsr_bit));
5270 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5271 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5272 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5273 		else
5274 			return (afsr_to_esynd_status(afsr, afsr_bit));
5275 #else /* CHEETAH_PLUS */
5276 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5277 		return (afsr_to_msynd_status(afsr, afsr_bit));
5278 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5279 		return (afsr_to_esynd_status(afsr, afsr_bit));
5280 #endif /* CHEETAH_PLUS */
5281 	} else {
5282 		return (AFLT_STAT_INVALID);
5283 	}
5284 }
5285 
5286 /*
5287  * Slave CPU stick synchronization.
5288  */
5289 void
5290 sticksync_slave(void)
5291 {
5292 	int 		i;
5293 	int		tries = 0;
5294 	int64_t		tskew;
5295 	int64_t		av_tskew;
5296 
5297 	kpreempt_disable();
5298 	/* wait for the master side */
5299 	while (stick_sync_cmd != SLAVE_START)
5300 		;
5301 	/*
5302 	 * Synchronization should only take a few tries at most. But in the
5303 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5304 	 * without it's stick synchronized wouldn't be a good citizen.
5305 	 */
5306 	while (slave_done == 0) {
5307 		/*
5308 		 * Time skew calculation.
5309 		 */
5310 		av_tskew = tskew = 0;
5311 
5312 		for (i = 0; i < stick_iter; i++) {
5313 			/* make location hot */
5314 			timestamp[EV_A_START] = 0;
5315 			stick_timestamp(&timestamp[EV_A_START]);
5316 
5317 			/* tell the master we're ready */
5318 			stick_sync_cmd = MASTER_START;
5319 
5320 			/* and wait */
5321 			while (stick_sync_cmd != SLAVE_CONT)
5322 				;
5323 			/* Event B end */
5324 			stick_timestamp(&timestamp[EV_B_END]);
5325 
5326 			/* calculate time skew */
5327 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5328 				- (timestamp[EV_A_END] -
5329 				timestamp[EV_A_START])) / 2;
5330 
5331 			/* keep running count */
5332 			av_tskew += tskew;
5333 		} /* for */
5334 
5335 		/*
5336 		 * Adjust stick for time skew if not within the max allowed;
5337 		 * otherwise we're all done.
5338 		 */
5339 		if (stick_iter != 0)
5340 			av_tskew = av_tskew/stick_iter;
5341 		if (ABS(av_tskew) > stick_tsk) {
5342 			/*
5343 			 * If the skew is 1 (the slave's STICK register
5344 			 * is 1 STICK ahead of the master's), stick_adj
5345 			 * could fail to adjust the slave's STICK register
5346 			 * if the STICK read on the slave happens to
5347 			 * align with the increment of the STICK.
5348 			 * Therefore, we increment the skew to 2.
5349 			 */
5350 			if (av_tskew == 1)
5351 				av_tskew++;
5352 			stick_adj(-av_tskew);
5353 		} else
5354 			slave_done = 1;
5355 #ifdef DEBUG
5356 		if (tries < DSYNC_ATTEMPTS)
5357 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5358 				av_tskew;
5359 		++tries;
5360 #endif /* DEBUG */
5361 #ifdef lint
5362 		tries = tries;
5363 #endif
5364 
5365 	} /* while */
5366 
5367 	/* allow the master to finish */
5368 	stick_sync_cmd = EVENT_NULL;
5369 	kpreempt_enable();
5370 }
5371 
5372 /*
5373  * Master CPU side of stick synchronization.
5374  *  - timestamp end of Event A
5375  *  - timestamp beginning of Event B
5376  */
5377 void
5378 sticksync_master(void)
5379 {
5380 	int		i;
5381 
5382 	kpreempt_disable();
5383 	/* tell the slave we've started */
5384 	slave_done = 0;
5385 	stick_sync_cmd = SLAVE_START;
5386 
5387 	while (slave_done == 0) {
5388 		for (i = 0; i < stick_iter; i++) {
5389 			/* wait for the slave */
5390 			while (stick_sync_cmd != MASTER_START)
5391 				;
5392 			/* Event A end */
5393 			stick_timestamp(&timestamp[EV_A_END]);
5394 
5395 			/* make location hot */
5396 			timestamp[EV_B_START] = 0;
5397 			stick_timestamp(&timestamp[EV_B_START]);
5398 
5399 			/* tell the slave to continue */
5400 			stick_sync_cmd = SLAVE_CONT;
5401 		} /* for */
5402 
5403 		/* wait while slave calculates time skew */
5404 		while (stick_sync_cmd == SLAVE_CONT)
5405 			;
5406 	} /* while */
5407 	kpreempt_enable();
5408 }
5409 
5410 /*
5411  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5412  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5413  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5414  * panic idle.
5415  */
5416 /*ARGSUSED*/
5417 void
5418 cpu_check_allcpus(struct async_flt *aflt)
5419 {}
5420 
5421 struct kmem_cache *ch_private_cache;
5422 
5423 /*
5424  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5425  * deallocate the scrubber data structures and cpu_private data structure.
5426  */
5427 void
5428 cpu_uninit_private(struct cpu *cp)
5429 {
5430 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5431 
5432 	ASSERT(chprp);
5433 	cpu_uninit_ecache_scrub_dr(cp);
5434 	CPU_PRIVATE(cp) = NULL;
5435 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5436 	kmem_cache_free(ch_private_cache, chprp);
5437 	cmp_delete_cpu(cp->cpu_id);
5438 
5439 }
5440 
5441 /*
5442  * Cheetah Cache Scrubbing
5443  *
5444  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5445  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5446  * protected by either parity or ECC.
5447  *
5448  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5449  * cache per second). Due to the the specifics of how the I$ control
5450  * logic works with respect to the ASI used to scrub I$ lines, the entire
5451  * I$ is scanned at once.
5452  */
5453 
5454 /*
5455  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5456  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5457  * on a running system.
5458  */
5459 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5460 
5461 /*
5462  * The following are the PIL levels that the softints/cross traps will fire at.
5463  */
5464 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5465 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5466 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5467 
5468 #if defined(JALAPENO)
5469 
5470 /*
5471  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5472  * on Jalapeno.
5473  */
5474 int ecache_scrub_enable = 0;
5475 
5476 #else	/* JALAPENO */
5477 
5478 /*
5479  * With all other cpu types, E$ scrubbing is on by default
5480  */
5481 int ecache_scrub_enable = 1;
5482 
5483 #endif	/* JALAPENO */
5484 
5485 
5486 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5487 
5488 /*
5489  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5490  * is disabled by default on non-Cheetah systems
5491  */
5492 int icache_scrub_enable = 0;
5493 
5494 /*
5495  * Tuneables specifying the scrub calls per second and the scan rate
5496  * for each cache
5497  *
5498  * The cyclic times are set during boot based on the following values.
5499  * Changing these values in mdb after this time will have no effect.  If
5500  * a different value is desired, it must be set in /etc/system before a
5501  * reboot.
5502  */
5503 int ecache_calls_a_sec = 1;
5504 int dcache_calls_a_sec = 2;
5505 int icache_calls_a_sec = 2;
5506 
5507 int ecache_scan_rate_idle = 1;
5508 int ecache_scan_rate_busy = 1;
5509 int dcache_scan_rate_idle = 1;
5510 int dcache_scan_rate_busy = 1;
5511 int icache_scan_rate_idle = 1;
5512 int icache_scan_rate_busy = 1;
5513 
5514 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5515 
5516 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5517 
5518 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5519 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5520 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5521 
5522 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5523 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5524 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5525 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5526 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5527 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5528 
5529 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5530 
5531 /*
5532  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5533  * increment the outstanding request counter and schedule a softint to run
5534  * the scrubber.
5535  */
5536 extern xcfunc_t cache_scrubreq_tl1;
5537 
5538 /*
5539  * These are the softint functions for each cache scrubber
5540  */
5541 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5542 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5543 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5544 
5545 /*
5546  * The cache scrub info table contains cache specific information
5547  * and allows for some of the scrub code to be table driven, reducing
5548  * duplication of cache similar code.
5549  *
5550  * This table keeps a copy of the value in the calls per second variable
5551  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5552  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5553  * mdb in a misguided attempt to disable the scrubber).
5554  */
5555 struct scrub_info {
5556 	int		*csi_enable;	/* scrubber enable flag */
5557 	int		csi_freq;	/* scrubber calls per second */
5558 	int		csi_index;	/* index to chsm_outstanding[] */
5559 	uint64_t	csi_inum;	/* scrubber interrupt number */
5560 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5561 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5562 	char		csi_name[3];	/* cache name for this scrub entry */
5563 } cache_scrub_info[] = {
5564 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5565 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5566 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5567 };
5568 
5569 /*
5570  * If scrubbing is enabled, increment the outstanding request counter.  If it
5571  * is 1 (meaning there were no previous requests outstanding), call
5572  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5573  * a self trap.
5574  */
5575 static void
5576 do_scrub(struct scrub_info *csi)
5577 {
5578 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5579 	int index = csi->csi_index;
5580 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5581 
5582 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5583 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5584 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5585 			    csi->csi_inum, 0);
5586 		}
5587 	}
5588 }
5589 
5590 /*
5591  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5592  * cross-trap the offline cpus.
5593  */
5594 static void
5595 do_scrub_offline(struct scrub_info *csi)
5596 {
5597 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5598 
5599 	if (CPUSET_ISNULL(cpu_offline_set)) {
5600 		/*
5601 		 * No offline cpus - nothing to do
5602 		 */
5603 		return;
5604 	}
5605 
5606 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5607 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5608 		    csi->csi_index);
5609 	}
5610 }
5611 
5612 /*
5613  * This is the initial setup for the scrubber cyclics - it sets the
5614  * interrupt level, frequency, and function to call.
5615  */
5616 /*ARGSUSED*/
5617 static void
5618 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5619     cyc_time_t *when)
5620 {
5621 	struct scrub_info *csi = (struct scrub_info *)arg;
5622 
5623 	ASSERT(csi != NULL);
5624 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5625 	hdlr->cyh_level = CY_LOW_LEVEL;
5626 	hdlr->cyh_arg = arg;
5627 
5628 	when->cyt_when = 0;	/* Start immediately */
5629 	when->cyt_interval = NANOSEC / csi->csi_freq;
5630 }
5631 
5632 /*
5633  * Initialization for cache scrubbing.
5634  * This routine is called AFTER all cpus have had cpu_init_private called
5635  * to initialize their private data areas.
5636  */
5637 void
5638 cpu_init_cache_scrub(void)
5639 {
5640 	int i;
5641 	struct scrub_info *csi;
5642 	cyc_omni_handler_t omni_hdlr;
5643 	cyc_handler_t offline_hdlr;
5644 	cyc_time_t when;
5645 
5646 	/*
5647 	 * save away the maximum number of lines for the D$
5648 	 */
5649 	dcache_nlines = dcache_size / dcache_linesize;
5650 
5651 	/*
5652 	 * register the softints for the cache scrubbing
5653 	 */
5654 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5655 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5656 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5657 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5658 
5659 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5660 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5661 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5662 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5663 
5664 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5665 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5666 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5667 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5668 
5669 	/*
5670 	 * start the scrubbing for all the caches
5671 	 */
5672 	mutex_enter(&cpu_lock);
5673 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5674 
5675 		csi = &cache_scrub_info[i];
5676 
5677 		if (!(*csi->csi_enable))
5678 			continue;
5679 
5680 		/*
5681 		 * force the following to be true:
5682 		 *	1 <= calls_a_sec <= hz
5683 		 */
5684 		if (csi->csi_freq > hz) {
5685 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5686 				"(%d); resetting to hz (%d)", csi->csi_name,
5687 				csi->csi_freq, hz);
5688 			csi->csi_freq = hz;
5689 		} else if (csi->csi_freq < 1) {
5690 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5691 				"(%d); resetting to 1", csi->csi_name,
5692 				csi->csi_freq);
5693 			csi->csi_freq = 1;
5694 		}
5695 
5696 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5697 		omni_hdlr.cyo_offline = NULL;
5698 		omni_hdlr.cyo_arg = (void *)csi;
5699 
5700 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5701 		offline_hdlr.cyh_arg = (void *)csi;
5702 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5703 
5704 		when.cyt_when = 0;	/* Start immediately */
5705 		when.cyt_interval = NANOSEC / csi->csi_freq;
5706 
5707 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5708 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5709 	}
5710 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5711 	mutex_exit(&cpu_lock);
5712 }
5713 
5714 /*
5715  * Indicate that the specified cpu is idle.
5716  */
5717 void
5718 cpu_idle_ecache_scrub(struct cpu *cp)
5719 {
5720 	if (CPU_PRIVATE(cp) != NULL) {
5721 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5722 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5723 	}
5724 }
5725 
5726 /*
5727  * Indicate that the specified cpu is busy.
5728  */
5729 void
5730 cpu_busy_ecache_scrub(struct cpu *cp)
5731 {
5732 	if (CPU_PRIVATE(cp) != NULL) {
5733 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5734 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5735 	}
5736 }
5737 
5738 /*
5739  * Initialization for cache scrubbing for the specified cpu.
5740  */
5741 void
5742 cpu_init_ecache_scrub_dr(struct cpu *cp)
5743 {
5744 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5745 	int cpuid = cp->cpu_id;
5746 
5747 	/* initialize the number of lines in the caches */
5748 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5749 	    cpunodes[cpuid].ecache_linesize;
5750 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5751 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5752 
5753 	/*
5754 	 * do_scrub() and do_scrub_offline() check both the global
5755 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5756 	 * check this value before scrubbing.  Currently, we use it to
5757 	 * disable the E$ scrubber on multi-core cpus or while running at
5758 	 * slowed speed.  For now, just turn everything on and allow
5759 	 * cpu_init_private() to change it if necessary.
5760 	 */
5761 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5762 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5763 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5764 
5765 	cpu_busy_ecache_scrub(cp);
5766 }
5767 
5768 /*
5769  * Un-initialization for cache scrubbing for the specified cpu.
5770  */
5771 static void
5772 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5773 {
5774 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5775 
5776 	/*
5777 	 * un-initialize bookkeeping for cache scrubbing
5778 	 */
5779 	bzero(csmp, sizeof (ch_scrub_misc_t));
5780 
5781 	cpu_idle_ecache_scrub(cp);
5782 }
5783 
5784 /*
5785  * Called periodically on each CPU to scrub the D$.
5786  */
5787 static void
5788 scrub_dcache(int how_many)
5789 {
5790 	int i;
5791 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5792 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5793 
5794 	/*
5795 	 * scrub the desired number of lines
5796 	 */
5797 	for (i = 0; i < how_many; i++) {
5798 		/*
5799 		 * scrub a D$ line
5800 		 */
5801 		dcache_inval_line(index);
5802 
5803 		/*
5804 		 * calculate the next D$ line to scrub, assumes
5805 		 * that dcache_nlines is a power of 2
5806 		 */
5807 		index = (index + 1) & (dcache_nlines - 1);
5808 	}
5809 
5810 	/*
5811 	 * set the scrub index for the next visit
5812 	 */
5813 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5814 }
5815 
5816 /*
5817  * Handler for D$ scrub inum softint. Call scrub_dcache until
5818  * we decrement the outstanding request count to zero.
5819  */
5820 /*ARGSUSED*/
5821 static uint_t
5822 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5823 {
5824 	int i;
5825 	int how_many;
5826 	int outstanding;
5827 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5828 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5829 	struct scrub_info *csi = (struct scrub_info *)arg1;
5830 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5831 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5832 
5833 	/*
5834 	 * The scan rates are expressed in units of tenths of a
5835 	 * percent.  A scan rate of 1000 (100%) means the whole
5836 	 * cache is scanned every second.
5837 	 */
5838 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5839 
5840 	do {
5841 		outstanding = *countp;
5842 		for (i = 0; i < outstanding; i++) {
5843 			scrub_dcache(how_many);
5844 		}
5845 	} while (atomic_add_32_nv(countp, -outstanding));
5846 
5847 	return (DDI_INTR_CLAIMED);
5848 }
5849 
5850 /*
5851  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5852  * by invalidating lines. Due to the characteristics of the ASI which
5853  * is used to invalidate an I$ line, the entire I$ must be invalidated
5854  * vs. an individual I$ line.
5855  */
5856 static void
5857 scrub_icache(int how_many)
5858 {
5859 	int i;
5860 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5861 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5862 	int icache_nlines = csmp->chsm_icache_nlines;
5863 
5864 	/*
5865 	 * scrub the desired number of lines
5866 	 */
5867 	for (i = 0; i < how_many; i++) {
5868 		/*
5869 		 * since the entire I$ must be scrubbed at once,
5870 		 * wait until the index wraps to zero to invalidate
5871 		 * the entire I$
5872 		 */
5873 		if (index == 0) {
5874 			icache_inval_all();
5875 		}
5876 
5877 		/*
5878 		 * calculate the next I$ line to scrub, assumes
5879 		 * that chsm_icache_nlines is a power of 2
5880 		 */
5881 		index = (index + 1) & (icache_nlines - 1);
5882 	}
5883 
5884 	/*
5885 	 * set the scrub index for the next visit
5886 	 */
5887 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5888 }
5889 
5890 /*
5891  * Handler for I$ scrub inum softint. Call scrub_icache until
5892  * we decrement the outstanding request count to zero.
5893  */
5894 /*ARGSUSED*/
5895 static uint_t
5896 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5897 {
5898 	int i;
5899 	int how_many;
5900 	int outstanding;
5901 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5902 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5903 	struct scrub_info *csi = (struct scrub_info *)arg1;
5904 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5905 	    icache_scan_rate_idle : icache_scan_rate_busy;
5906 	int icache_nlines = csmp->chsm_icache_nlines;
5907 
5908 	/*
5909 	 * The scan rates are expressed in units of tenths of a
5910 	 * percent.  A scan rate of 1000 (100%) means the whole
5911 	 * cache is scanned every second.
5912 	 */
5913 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5914 
5915 	do {
5916 		outstanding = *countp;
5917 		for (i = 0; i < outstanding; i++) {
5918 			scrub_icache(how_many);
5919 		}
5920 	} while (atomic_add_32_nv(countp, -outstanding));
5921 
5922 	return (DDI_INTR_CLAIMED);
5923 }
5924 
5925 /*
5926  * Called periodically on each CPU to scrub the E$.
5927  */
5928 static void
5929 scrub_ecache(int how_many)
5930 {
5931 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5932 	int i;
5933 	int cpuid = CPU->cpu_id;
5934 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5935 	int nlines = csmp->chsm_ecache_nlines;
5936 	int linesize = cpunodes[cpuid].ecache_linesize;
5937 	int ec_set_size = cpu_ecache_set_size(CPU);
5938 
5939 	/*
5940 	 * scrub the desired number of lines
5941 	 */
5942 	for (i = 0; i < how_many; i++) {
5943 		/*
5944 		 * scrub the E$ line
5945 		 */
5946 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5947 		    ec_set_size);
5948 
5949 		/*
5950 		 * calculate the next E$ line to scrub based on twice
5951 		 * the number of E$ lines (to displace lines containing
5952 		 * flush area data), assumes that the number of lines
5953 		 * is a power of 2
5954 		 */
5955 		index = (index + 1) & ((nlines << 1) - 1);
5956 	}
5957 
5958 	/*
5959 	 * set the ecache scrub index for the next visit
5960 	 */
5961 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5962 }
5963 
5964 /*
5965  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5966  * we decrement the outstanding request count to zero.
5967  *
5968  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5969  * become negative after the atomic_add_32_nv().  This is not a problem, as
5970  * the next trip around the loop won't scrub anything, and the next add will
5971  * reset the count back to zero.
5972  */
5973 /*ARGSUSED*/
5974 static uint_t
5975 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5976 {
5977 	int i;
5978 	int how_many;
5979 	int outstanding;
5980 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5981 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5982 	struct scrub_info *csi = (struct scrub_info *)arg1;
5983 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5984 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5985 	int ecache_nlines = csmp->chsm_ecache_nlines;
5986 
5987 	/*
5988 	 * The scan rates are expressed in units of tenths of a
5989 	 * percent.  A scan rate of 1000 (100%) means the whole
5990 	 * cache is scanned every second.
5991 	 */
5992 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5993 
5994 	do {
5995 		outstanding = *countp;
5996 		for (i = 0; i < outstanding; i++) {
5997 			scrub_ecache(how_many);
5998 		}
5999 	} while (atomic_add_32_nv(countp, -outstanding));
6000 
6001 	return (DDI_INTR_CLAIMED);
6002 }
6003 
6004 /*
6005  * Timeout function to reenable CE
6006  */
6007 static void
6008 cpu_delayed_check_ce_errors(void *arg)
6009 {
6010 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6011 	    TQ_NOSLEEP)) {
6012 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6013 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6014 	}
6015 }
6016 
6017 /*
6018  * CE Deferred Re-enable after trap.
6019  *
6020  * When the CPU gets a disrupting trap for any of the errors
6021  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6022  * immediately. To eliminate the possibility of multiple CEs causing
6023  * recursive stack overflow in the trap handler, we cannot
6024  * reenable CEEN while still running in the trap handler. Instead,
6025  * after a CE is logged on a CPU, we schedule a timeout function,
6026  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6027  * seconds. This function will check whether any further CEs
6028  * have occurred on that CPU, and if none have, will reenable CEEN.
6029  *
6030  * If further CEs have occurred while CEEN is disabled, another
6031  * timeout will be scheduled. This is to ensure that the CPU can
6032  * make progress in the face of CE 'storms', and that it does not
6033  * spend all its time logging CE errors.
6034  */
6035 static void
6036 cpu_check_ce_errors(void *arg)
6037 {
6038 	int	cpuid = (int)(uintptr_t)arg;
6039 	cpu_t	*cp;
6040 
6041 	/*
6042 	 * We acquire cpu_lock.
6043 	 */
6044 	ASSERT(curthread->t_pil == 0);
6045 
6046 	/*
6047 	 * verify that the cpu is still around, DR
6048 	 * could have got there first ...
6049 	 */
6050 	mutex_enter(&cpu_lock);
6051 	cp = cpu_get(cpuid);
6052 	if (cp == NULL) {
6053 		mutex_exit(&cpu_lock);
6054 		return;
6055 	}
6056 	/*
6057 	 * make sure we don't migrate across CPUs
6058 	 * while checking our CE status.
6059 	 */
6060 	kpreempt_disable();
6061 
6062 	/*
6063 	 * If we are running on the CPU that got the
6064 	 * CE, we can do the checks directly.
6065 	 */
6066 	if (cp->cpu_id == CPU->cpu_id) {
6067 		mutex_exit(&cpu_lock);
6068 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6069 		kpreempt_enable();
6070 		return;
6071 	}
6072 	kpreempt_enable();
6073 
6074 	/*
6075 	 * send an x-call to get the CPU that originally
6076 	 * got the CE to do the necessary checks. If we can't
6077 	 * send the x-call, reschedule the timeout, otherwise we
6078 	 * lose CEEN forever on that CPU.
6079 	 */
6080 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6081 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6082 		    TIMEOUT_CEEN_CHECK, 0);
6083 		mutex_exit(&cpu_lock);
6084 	} else {
6085 		/*
6086 		 * When the CPU is not accepting xcalls, or
6087 		 * the processor is offlined, we don't want to
6088 		 * incur the extra overhead of trying to schedule the
6089 		 * CE timeout indefinitely. However, we don't want to lose
6090 		 * CE checking forever.
6091 		 *
6092 		 * Keep rescheduling the timeout, accepting the additional
6093 		 * overhead as the cost of correctness in the case where we get
6094 		 * a CE, disable CEEN, offline the CPU during the
6095 		 * the timeout interval, and then online it at some
6096 		 * point in the future. This is unlikely given the short
6097 		 * cpu_ceen_delay_secs.
6098 		 */
6099 		mutex_exit(&cpu_lock);
6100 		(void) timeout(cpu_delayed_check_ce_errors,
6101 		    (void *)(uintptr_t)cp->cpu_id,
6102 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6103 	}
6104 }
6105 
6106 /*
6107  * This routine will check whether CEs have occurred while
6108  * CEEN is disabled. Any CEs detected will be logged and, if
6109  * possible, scrubbed.
6110  *
6111  * The memscrubber will also use this routine to clear any errors
6112  * caused by its scrubbing with CEEN disabled.
6113  *
6114  * flag == SCRUBBER_CEEN_CHECK
6115  *		called from memscrubber, just check/scrub, no reset
6116  *		paddr 	physical addr. for start of scrub pages
6117  *		vaddr 	virtual addr. for scrub area
6118  *		psz	page size of area to be scrubbed
6119  *
6120  * flag == TIMEOUT_CEEN_CHECK
6121  *		timeout function has triggered, reset timeout or CEEN
6122  *
6123  * Note: We must not migrate cpus during this function.  This can be
6124  * achieved by one of:
6125  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6126  *	The flag value must be first xcall argument.
6127  *    - disabling kernel preemption.  This should be done for very short
6128  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6129  *	scrub an extended area with cpu_check_block.  The call for
6130  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6131  *	brief for this case.
6132  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6133  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6134  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6135  */
6136 void
6137 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6138 {
6139 	ch_cpu_errors_t	cpu_error_regs;
6140 	uint64_t	ec_err_enable;
6141 	uint64_t	page_offset;
6142 
6143 	/* Read AFSR */
6144 	get_cpu_error_state(&cpu_error_regs);
6145 
6146 	/*
6147 	 * If no CEEN errors have occurred during the timeout
6148 	 * interval, it is safe to re-enable CEEN and exit.
6149 	 */
6150 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
6151 		if (flag == TIMEOUT_CEEN_CHECK &&
6152 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6153 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6154 		return;
6155 	}
6156 
6157 	/*
6158 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6159 	 * we log/clear the error.
6160 	 */
6161 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6162 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6163 
6164 	/*
6165 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6166 	 * timeout will be rescheduled when the error is logged.
6167 	 */
6168 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
6169 	    cpu_ce_detected(&cpu_error_regs,
6170 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6171 	else
6172 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6173 
6174 	/*
6175 	 * If the memory scrubber runs while CEEN is
6176 	 * disabled, (or if CEEN is disabled during the
6177 	 * scrub as a result of a CE being triggered by
6178 	 * it), the range being scrubbed will not be
6179 	 * completely cleaned. If there are multiple CEs
6180 	 * in the range at most two of these will be dealt
6181 	 * with, (one by the trap handler and one by the
6182 	 * timeout). It is also possible that none are dealt
6183 	 * with, (CEEN disabled and another CE occurs before
6184 	 * the timeout triggers). So to ensure that the
6185 	 * memory is actually scrubbed, we have to access each
6186 	 * memory location in the range and then check whether
6187 	 * that access causes a CE.
6188 	 */
6189 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6190 		if ((cpu_error_regs.afar >= pa) &&
6191 		    (cpu_error_regs.afar < (pa + psz))) {
6192 			/*
6193 			 * Force a load from physical memory for each
6194 			 * 64-byte block, then check AFSR to determine
6195 			 * whether this access caused an error.
6196 			 *
6197 			 * This is a slow way to do a scrub, but as it will
6198 			 * only be invoked when the memory scrubber actually
6199 			 * triggered a CE, it should not happen too
6200 			 * frequently.
6201 			 *
6202 			 * cut down what we need to check as the scrubber
6203 			 * has verified up to AFAR, so get it's offset
6204 			 * into the page and start there.
6205 			 */
6206 			page_offset = (uint64_t)(cpu_error_regs.afar &
6207 			    (psz - 1));
6208 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6209 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6210 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6211 			    psz);
6212 		}
6213 	}
6214 
6215 	/*
6216 	 * Reset error enable if this CE is not masked.
6217 	 */
6218 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6219 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6220 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
6221 
6222 }
6223 
6224 /*
6225  * Attempt a cpu logout for an error that we did not trap for, such
6226  * as a CE noticed with CEEN off.  It is assumed that we are still running
6227  * on the cpu that took the error and that we cannot migrate.  Returns
6228  * 0 on success, otherwise nonzero.
6229  */
6230 static int
6231 cpu_ce_delayed_ec_logout(uint64_t afar)
6232 {
6233 	ch_cpu_logout_t *clop;
6234 
6235 	if (CPU_PRIVATE(CPU) == NULL)
6236 		return (0);
6237 
6238 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6239 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6240 	    LOGOUT_INVALID)
6241 		return (0);
6242 
6243 	cpu_delayed_logout(afar, clop);
6244 	return (1);
6245 }
6246 
6247 /*
6248  * We got an error while CEEN was disabled. We
6249  * need to clean up after it and log whatever
6250  * information we have on the CE.
6251  */
6252 void
6253 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6254 {
6255 	ch_async_flt_t 	ch_flt;
6256 	struct async_flt *aflt;
6257 	char 		pr_reason[MAX_REASON_STRING];
6258 
6259 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6260 	ch_flt.flt_trapped_ce = flag;
6261 	aflt = (struct async_flt *)&ch_flt;
6262 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6263 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6264 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6265 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6266 	aflt->flt_addr = cpu_error_regs->afar;
6267 #if defined(SERRANO)
6268 	ch_flt.afar2 = cpu_error_regs->afar2;
6269 #endif	/* SERRANO */
6270 	aflt->flt_pc = NULL;
6271 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6272 	aflt->flt_tl = 0;
6273 	aflt->flt_panic = 0;
6274 	cpu_log_and_clear_ce(&ch_flt);
6275 
6276 	/*
6277 	 * check if we caused any errors during cleanup
6278 	 */
6279 	if (clear_errors(&ch_flt)) {
6280 		pr_reason[0] = '\0';
6281 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6282 		    NULL);
6283 	}
6284 }
6285 
6286 /*
6287  * Log/clear CEEN-controlled disrupting errors
6288  */
6289 static void
6290 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6291 {
6292 	struct async_flt *aflt;
6293 	uint64_t afsr, afsr_errs;
6294 	ch_cpu_logout_t *clop;
6295 	char 		pr_reason[MAX_REASON_STRING];
6296 	on_trap_data_t	*otp = curthread->t_ontrap;
6297 
6298 	aflt = (struct async_flt *)ch_flt;
6299 	afsr = aflt->flt_stat;
6300 	afsr_errs = ch_flt->afsr_errs;
6301 	aflt->flt_id = gethrtime_waitfree();
6302 	aflt->flt_bus_id = getprocessorid();
6303 	aflt->flt_inst = CPU->cpu_id;
6304 	aflt->flt_prot = AFLT_PROT_NONE;
6305 	aflt->flt_class = CPU_FAULT;
6306 	aflt->flt_status = ECC_C_TRAP;
6307 
6308 	pr_reason[0] = '\0';
6309 	/*
6310 	 * Get the CPU log out info for Disrupting Trap.
6311 	 */
6312 	if (CPU_PRIVATE(CPU) == NULL) {
6313 		clop = NULL;
6314 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6315 	} else {
6316 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6317 	}
6318 
6319 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6320 		ch_cpu_errors_t cpu_error_regs;
6321 
6322 		get_cpu_error_state(&cpu_error_regs);
6323 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6324 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6325 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6326 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6327 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6328 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6329 		clop->clo_sdw_data.chd_afsr_ext =
6330 		    cpu_error_regs.shadow_afsr_ext;
6331 #if defined(SERRANO)
6332 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6333 #endif	/* SERRANO */
6334 		ch_flt->flt_data_incomplete = 1;
6335 
6336 		/*
6337 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6338 		 * The trap handler does it for CEEN enabled errors
6339 		 * so we need to do it here.
6340 		 */
6341 		set_cpu_error_state(&cpu_error_regs);
6342 	}
6343 
6344 #if defined(JALAPENO) || defined(SERRANO)
6345 	/*
6346 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6347 	 * For Serrano, even thou we do have the AFAR, we still do the
6348 	 * scrub on the RCE side since that's where the error type can
6349 	 * be properly classified as intermittent, persistent, etc.
6350 	 *
6351 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6352 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6353 	 * the flt_status bits.
6354 	 */
6355 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6356 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6357 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6358 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6359 	}
6360 #else /* JALAPENO || SERRANO */
6361 	/*
6362 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6363 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6364 	 * the flt_status bits.
6365 	 */
6366 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6367 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6368 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6369 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6370 		}
6371 	}
6372 
6373 #endif /* JALAPENO || SERRANO */
6374 
6375 	/*
6376 	 * Update flt_prot if this error occurred under on_trap protection.
6377 	 */
6378 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6379 		aflt->flt_prot = AFLT_PROT_EC;
6380 
6381 	/*
6382 	 * Queue events on the async event queue, one event per error bit.
6383 	 */
6384 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6385 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6386 		ch_flt->flt_type = CPU_INV_AFSR;
6387 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6388 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6389 		    aflt->flt_panic);
6390 	}
6391 
6392 	/*
6393 	 * Zero out + invalidate CPU logout.
6394 	 */
6395 	if (clop) {
6396 		bzero(clop, sizeof (ch_cpu_logout_t));
6397 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6398 	}
6399 
6400 	/*
6401 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6402 	 * was disabled, we need to flush either the entire
6403 	 * E$ or an E$ line.
6404 	 */
6405 #if defined(JALAPENO) || defined(SERRANO)
6406 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6407 #else	/* JALAPENO || SERRANO */
6408 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6409 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6410 #endif	/* JALAPENO || SERRANO */
6411 		cpu_error_ecache_flush(ch_flt);
6412 
6413 }
6414 
6415 /*
6416  * depending on the error type, we determine whether we
6417  * need to flush the entire ecache or just a line.
6418  */
6419 static int
6420 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6421 {
6422 	struct async_flt *aflt;
6423 	uint64_t	afsr;
6424 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6425 
6426 	aflt = (struct async_flt *)ch_flt;
6427 	afsr = aflt->flt_stat;
6428 
6429 	/*
6430 	 * If we got multiple errors, no point in trying
6431 	 * the individual cases, just flush the whole cache
6432 	 */
6433 	if (afsr & C_AFSR_ME) {
6434 		return (ECACHE_FLUSH_ALL);
6435 	}
6436 
6437 	/*
6438 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6439 	 * was disabled, we need to flush entire E$. We can't just
6440 	 * flush the cache line affected as the ME bit
6441 	 * is not set when multiple correctable errors of the same
6442 	 * type occur, so we might have multiple CPC or EDC errors,
6443 	 * with only the first recorded.
6444 	 */
6445 #if defined(JALAPENO) || defined(SERRANO)
6446 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6447 #else	/* JALAPENO || SERRANO */
6448 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6449 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6450 #endif	/* JALAPENO || SERRANO */
6451 		return (ECACHE_FLUSH_ALL);
6452 	}
6453 
6454 #if defined(JALAPENO) || defined(SERRANO)
6455 	/*
6456 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6457 	 * flush the entire Ecache.
6458 	 */
6459 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6460 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6461 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6462 			return (ECACHE_FLUSH_LINE);
6463 		} else {
6464 			return (ECACHE_FLUSH_ALL);
6465 		}
6466 	}
6467 #else /* JALAPENO || SERRANO */
6468 	/*
6469 	 * If UE only is set, flush the Ecache line, otherwise
6470 	 * flush the entire Ecache.
6471 	 */
6472 	if (afsr_errs & C_AFSR_UE) {
6473 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6474 		    C_AFSR_UE) {
6475 			return (ECACHE_FLUSH_LINE);
6476 		} else {
6477 			return (ECACHE_FLUSH_ALL);
6478 		}
6479 	}
6480 #endif /* JALAPENO || SERRANO */
6481 
6482 	/*
6483 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6484 	 * flush the entire Ecache.
6485 	 */
6486 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6487 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6488 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6489 			return (ECACHE_FLUSH_LINE);
6490 		} else {
6491 			return (ECACHE_FLUSH_ALL);
6492 		}
6493 	}
6494 
6495 	/*
6496 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6497 	 * flush the entire Ecache.
6498 	 */
6499 	if (afsr_errs & C_AFSR_BERR) {
6500 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6501 			return (ECACHE_FLUSH_LINE);
6502 		} else {
6503 			return (ECACHE_FLUSH_ALL);
6504 		}
6505 	}
6506 
6507 	return (0);
6508 }
6509 
6510 void
6511 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6512 {
6513 	int	ecache_flush_flag =
6514 	    cpu_error_ecache_flush_required(ch_flt);
6515 
6516 	/*
6517 	 * Flush Ecache line or entire Ecache based on above checks.
6518 	 */
6519 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6520 		cpu_flush_ecache();
6521 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6522 		cpu_flush_ecache_line(ch_flt);
6523 	}
6524 
6525 }
6526 
6527 /*
6528  * Extract the PA portion from the E$ tag.
6529  */
6530 uint64_t
6531 cpu_ectag_to_pa(int setsize, uint64_t tag)
6532 {
6533 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6534 		return (JG_ECTAG_TO_PA(setsize, tag));
6535 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6536 		return (PN_L3TAG_TO_PA(tag));
6537 	else
6538 		return (CH_ECTAG_TO_PA(setsize, tag));
6539 }
6540 
6541 /*
6542  * Convert the E$ tag PA into an E$ subblock index.
6543  */
6544 static int
6545 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6546 {
6547 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6548 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6549 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6550 		/* Panther has only one subblock per line */
6551 		return (0);
6552 	else
6553 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6554 }
6555 
6556 /*
6557  * All subblocks in an E$ line must be invalid for
6558  * the line to be invalid.
6559  */
6560 int
6561 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6562 {
6563 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6564 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6565 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6566 		return (PN_L3_LINE_INVALID(tag));
6567 	else
6568 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6569 }
6570 
6571 /*
6572  * Extract state bits for a subblock given the tag.  Note that for Panther
6573  * this works on both l2 and l3 tags.
6574  */
6575 static int
6576 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6577 {
6578 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6579 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6580 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6581 		return (tag & CH_ECSTATE_MASK);
6582 	else
6583 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6584 }
6585 
6586 /*
6587  * Cpu specific initialization.
6588  */
6589 void
6590 cpu_mp_init(void)
6591 {
6592 #ifdef	CHEETAHPLUS_ERRATUM_25
6593 	if (cheetah_sendmondo_recover) {
6594 		cheetah_nudge_init();
6595 	}
6596 #endif
6597 }
6598 
6599 void
6600 cpu_ereport_post(struct async_flt *aflt)
6601 {
6602 	char *cpu_type, buf[FM_MAX_CLASS];
6603 	nv_alloc_t *nva = NULL;
6604 	nvlist_t *ereport, *detector, *resource;
6605 	errorq_elem_t *eqep;
6606 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6607 	char unum[UNUM_NAMLEN];
6608 	int synd_code;
6609 	uint8_t msg_type;
6610 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6611 
6612 	if (aflt->flt_panic || panicstr) {
6613 		eqep = errorq_reserve(ereport_errorq);
6614 		if (eqep == NULL)
6615 			return;
6616 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6617 		nva = errorq_elem_nva(ereport_errorq, eqep);
6618 	} else {
6619 		ereport = fm_nvlist_create(nva);
6620 	}
6621 
6622 	/*
6623 	 * Create the scheme "cpu" FMRI.
6624 	 */
6625 	detector = fm_nvlist_create(nva);
6626 	resource = fm_nvlist_create(nva);
6627 	switch (cpunodes[aflt->flt_inst].implementation) {
6628 	case CHEETAH_IMPL:
6629 		cpu_type = FM_EREPORT_CPU_USIII;
6630 		break;
6631 	case CHEETAH_PLUS_IMPL:
6632 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6633 		break;
6634 	case JALAPENO_IMPL:
6635 		cpu_type = FM_EREPORT_CPU_USIIIi;
6636 		break;
6637 	case SERRANO_IMPL:
6638 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6639 		break;
6640 	case JAGUAR_IMPL:
6641 		cpu_type = FM_EREPORT_CPU_USIV;
6642 		break;
6643 	case PANTHER_IMPL:
6644 		cpu_type = FM_EREPORT_CPU_USIVplus;
6645 		break;
6646 	default:
6647 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6648 		break;
6649 	}
6650 
6651 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6652 
6653 	/*
6654 	 * Encode all the common data into the ereport.
6655 	 */
6656 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6657 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6658 
6659 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6660 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6661 	    detector, NULL);
6662 
6663 	/*
6664 	 * Encode the error specific data that was saved in
6665 	 * the async_flt structure into the ereport.
6666 	 */
6667 	cpu_payload_add_aflt(aflt, ereport, resource,
6668 	    &plat_ecc_ch_flt.ecaf_afar_status,
6669 	    &plat_ecc_ch_flt.ecaf_synd_status);
6670 
6671 	if (aflt->flt_panic || panicstr) {
6672 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6673 	} else {
6674 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6675 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6676 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6677 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6678 	}
6679 	/*
6680 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6681 	 * to the SC olny if it can process it.
6682 	 */
6683 
6684 	if (&plat_ecc_capability_sc_get &&
6685 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6686 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6687 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6688 			/*
6689 			 * If afar status is not invalid do a unum lookup.
6690 			 */
6691 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6692 			    AFLT_STAT_INVALID) {
6693 				synd_code = synd_to_synd_code(
6694 				    plat_ecc_ch_flt.ecaf_synd_status,
6695 				    aflt->flt_synd, ch_flt->flt_bit);
6696 				(void) cpu_get_mem_unum_synd(synd_code,
6697 				    aflt, unum);
6698 			} else {
6699 				unum[0] = '\0';
6700 			}
6701 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6702 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6703 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6704 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6705 			    ch_flt->flt_sdw_afsr_ext;
6706 
6707 			if (&plat_log_fruid_error2)
6708 				plat_log_fruid_error2(msg_type, unum, aflt,
6709 				    &plat_ecc_ch_flt);
6710 		}
6711 	}
6712 }
6713 
6714 void
6715 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6716 {
6717 	int status;
6718 	ddi_fm_error_t de;
6719 
6720 	bzero(&de, sizeof (ddi_fm_error_t));
6721 
6722 	de.fme_version = DDI_FME_VERSION;
6723 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6724 	    FM_ENA_FMT1);
6725 	de.fme_flag = expected;
6726 	de.fme_bus_specific = (void *)aflt->flt_addr;
6727 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6728 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6729 		aflt->flt_panic = 1;
6730 }
6731 
6732 void
6733 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6734     errorq_t *eqp, uint_t flag)
6735 {
6736 	struct async_flt *aflt = (struct async_flt *)payload;
6737 
6738 	aflt->flt_erpt_class = error_class;
6739 	errorq_dispatch(eqp, payload, payload_sz, flag);
6740 }
6741 
6742 /*
6743  * This routine may be called by the IO module, but does not do
6744  * anything in this cpu module. The SERD algorithm is handled by
6745  * cpumem-diagnosis engine instead.
6746  */
6747 /*ARGSUSED*/
6748 void
6749 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6750 {}
6751 
6752 void
6753 adjust_hw_copy_limits(int ecache_size)
6754 {
6755 	/*
6756 	 * Set hw copy limits.
6757 	 *
6758 	 * /etc/system will be parsed later and can override one or more
6759 	 * of these settings.
6760 	 *
6761 	 * At this time, ecache size seems only mildly relevant.
6762 	 * We seem to run into issues with the d-cache and stalls
6763 	 * we see on misses.
6764 	 *
6765 	 * Cycle measurement indicates that 2 byte aligned copies fare
6766 	 * little better than doing things with VIS at around 512 bytes.
6767 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6768 	 * aligned is faster whenever the source and destination data
6769 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6770 	 * limit seems to be driven by the 2K write cache.
6771 	 * When more than 2K of copies are done in non-VIS mode, stores
6772 	 * backup in the write cache.  In VIS mode, the write cache is
6773 	 * bypassed, allowing faster cache-line writes aligned on cache
6774 	 * boundaries.
6775 	 *
6776 	 * In addition, in non-VIS mode, there is no prefetching, so
6777 	 * for larger copies, the advantage of prefetching to avoid even
6778 	 * occasional cache misses is enough to justify using the VIS code.
6779 	 *
6780 	 * During testing, it was discovered that netbench ran 3% slower
6781 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6782 	 * applications, data is only used once (copied to the output
6783 	 * buffer, then copied by the network device off the system).  Using
6784 	 * the VIS copy saves more L2 cache state.  Network copies are
6785 	 * around 1.3K to 1.5K in size for historical reasons.
6786 	 *
6787 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6788 	 * aligned copy even for large caches and 8 MB ecache.  The
6789 	 * infrastructure to allow different limits for different sized
6790 	 * caches is kept to allow further tuning in later releases.
6791 	 */
6792 
6793 	if (min_ecache_size == 0 && use_hw_bcopy) {
6794 		/*
6795 		 * First time through - should be before /etc/system
6796 		 * is read.
6797 		 * Could skip the checks for zero but this lets us
6798 		 * preserve any debugger rewrites.
6799 		 */
6800 		if (hw_copy_limit_1 == 0) {
6801 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6802 			priv_hcl_1 = hw_copy_limit_1;
6803 		}
6804 		if (hw_copy_limit_2 == 0) {
6805 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6806 			priv_hcl_2 = hw_copy_limit_2;
6807 		}
6808 		if (hw_copy_limit_4 == 0) {
6809 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6810 			priv_hcl_4 = hw_copy_limit_4;
6811 		}
6812 		if (hw_copy_limit_8 == 0) {
6813 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6814 			priv_hcl_8 = hw_copy_limit_8;
6815 		}
6816 		min_ecache_size = ecache_size;
6817 	} else {
6818 		/*
6819 		 * MP initialization. Called *after* /etc/system has
6820 		 * been parsed. One CPU has already been initialized.
6821 		 * Need to cater for /etc/system having scragged one
6822 		 * of our values.
6823 		 */
6824 		if (ecache_size == min_ecache_size) {
6825 			/*
6826 			 * Same size ecache. We do nothing unless we
6827 			 * have a pessimistic ecache setting. In that
6828 			 * case we become more optimistic (if the cache is
6829 			 * large enough).
6830 			 */
6831 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6832 				/*
6833 				 * Need to adjust hw_copy_limit* from our
6834 				 * pessimistic uniprocessor value to a more
6835 				 * optimistic UP value *iff* it hasn't been
6836 				 * reset.
6837 				 */
6838 				if ((ecache_size > 1048576) &&
6839 				    (priv_hcl_8 == hw_copy_limit_8)) {
6840 					if (ecache_size <= 2097152)
6841 						hw_copy_limit_8 = 4 *
6842 						    VIS_COPY_THRESHOLD;
6843 					else if (ecache_size <= 4194304)
6844 						hw_copy_limit_8 = 4 *
6845 						    VIS_COPY_THRESHOLD;
6846 					else
6847 						hw_copy_limit_8 = 4 *
6848 						    VIS_COPY_THRESHOLD;
6849 					priv_hcl_8 = hw_copy_limit_8;
6850 				}
6851 			}
6852 		} else if (ecache_size < min_ecache_size) {
6853 			/*
6854 			 * A different ecache size. Can this even happen?
6855 			 */
6856 			if (priv_hcl_8 == hw_copy_limit_8) {
6857 				/*
6858 				 * The previous value that we set
6859 				 * is unchanged (i.e., it hasn't been
6860 				 * scragged by /etc/system). Rewrite it.
6861 				 */
6862 				if (ecache_size <= 1048576)
6863 					hw_copy_limit_8 = 8 *
6864 					    VIS_COPY_THRESHOLD;
6865 				else if (ecache_size <= 2097152)
6866 					hw_copy_limit_8 = 8 *
6867 					    VIS_COPY_THRESHOLD;
6868 				else if (ecache_size <= 4194304)
6869 					hw_copy_limit_8 = 8 *
6870 					    VIS_COPY_THRESHOLD;
6871 				else
6872 					hw_copy_limit_8 = 10 *
6873 					    VIS_COPY_THRESHOLD;
6874 				priv_hcl_8 = hw_copy_limit_8;
6875 				min_ecache_size = ecache_size;
6876 			}
6877 		}
6878 	}
6879 }
6880 
6881 /*
6882  * Called from illegal instruction trap handler to see if we can attribute
6883  * the trap to a fpras check.
6884  */
6885 int
6886 fpras_chktrap(struct regs *rp)
6887 {
6888 	int op;
6889 	struct fpras_chkfngrp *cgp;
6890 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6891 
6892 	if (fpras_chkfngrps == NULL)
6893 		return (0);
6894 
6895 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6896 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6897 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6898 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6899 			break;
6900 	}
6901 	if (op == FPRAS_NCOPYOPS)
6902 		return (0);
6903 
6904 	/*
6905 	 * This is an fpRAS failure caught through an illegal
6906 	 * instruction - trampoline.
6907 	 */
6908 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6909 	rp->r_npc = rp->r_pc + 4;
6910 	return (1);
6911 }
6912 
6913 /*
6914  * fpras_failure is called when a fpras check detects a bad calculation
6915  * result or an illegal instruction trap is attributed to an fpras
6916  * check.  In all cases we are still bound to CPU.
6917  */
6918 int
6919 fpras_failure(int op, int how)
6920 {
6921 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6922 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6923 	ch_async_flt_t ch_flt;
6924 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6925 	struct fpras_chkfn *sfp, *cfp;
6926 	uint32_t *sip, *cip;
6927 	int i;
6928 
6929 	/*
6930 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6931 	 * the time in which we dispatch an ereport and (if applicable) panic.
6932 	 */
6933 	use_hw_bcopy_orig = use_hw_bcopy;
6934 	use_hw_bzero_orig = use_hw_bzero;
6935 	hcl1_orig = hw_copy_limit_1;
6936 	hcl2_orig = hw_copy_limit_2;
6937 	hcl4_orig = hw_copy_limit_4;
6938 	hcl8_orig = hw_copy_limit_8;
6939 	use_hw_bcopy = use_hw_bzero = 0;
6940 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6941 	    hw_copy_limit_8 = 0;
6942 
6943 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6944 	aflt->flt_id = gethrtime_waitfree();
6945 	aflt->flt_class = CPU_FAULT;
6946 	aflt->flt_inst = CPU->cpu_id;
6947 	aflt->flt_status = (how << 8) | op;
6948 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6949 	ch_flt.flt_type = CPU_FPUERR;
6950 
6951 	/*
6952 	 * We must panic if the copy operation had no lofault protection -
6953 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6954 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6955 	 */
6956 	aflt->flt_panic = (curthread->t_lofault == NULL);
6957 
6958 	/*
6959 	 * XOR the source instruction block with the copied instruction
6960 	 * block - this will show us which bit(s) are corrupted.
6961 	 */
6962 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6963 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6964 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6965 		sip = &sfp->fpras_blk0[0];
6966 		cip = &cfp->fpras_blk0[0];
6967 	} else {
6968 		sip = &sfp->fpras_blk1[0];
6969 		cip = &cfp->fpras_blk1[0];
6970 	}
6971 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6972 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6973 
6974 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6975 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6976 
6977 	if (aflt->flt_panic)
6978 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6979 
6980 	/*
6981 	 * We get here for copyin/copyout and kcopy or bcopy where the
6982 	 * caller has used on_fault.  We will flag the error so that
6983 	 * the process may be killed  The trap_async_hwerr mechanism will
6984 	 * take appropriate further action (such as a reboot, contract
6985 	 * notification etc).  Since we may be continuing we will
6986 	 * restore the global hardware copy acceleration switches.
6987 	 *
6988 	 * When we return from this function to the copy function we want to
6989 	 * avoid potentially bad data being used, ie we want the affected
6990 	 * copy function to return an error.  The caller should therefore
6991 	 * invoke its lofault handler (which always exists for these functions)
6992 	 * which will return the appropriate error.
6993 	 */
6994 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6995 	aston(curthread);
6996 
6997 	use_hw_bcopy = use_hw_bcopy_orig;
6998 	use_hw_bzero = use_hw_bzero_orig;
6999 	hw_copy_limit_1 = hcl1_orig;
7000 	hw_copy_limit_2 = hcl2_orig;
7001 	hw_copy_limit_4 = hcl4_orig;
7002 	hw_copy_limit_8 = hcl8_orig;
7003 
7004 	return (1);
7005 }
7006 
7007 #define	VIS_BLOCKSIZE		64
7008 
7009 int
7010 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7011 {
7012 	int ret, watched;
7013 
7014 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7015 	ret = dtrace_blksuword32(addr, data, 0);
7016 	if (watched)
7017 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7018 
7019 	return (ret);
7020 }
7021 
7022 /*
7023  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7024  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7025  * CEEN from the EER to disable traps for further disrupting error types
7026  * on that cpu.  We could cross-call instead, but that has a larger
7027  * instruction and data footprint than cross-trapping, and the cpu is known
7028  * to be faulted.
7029  */
7030 
7031 void
7032 cpu_faulted_enter(struct cpu *cp)
7033 {
7034 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7035 }
7036 
7037 /*
7038  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7039  * offline, spare, or online (by the cpu requesting this state change).
7040  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7041  * disrupting error bits that have accumulated without trapping, then
7042  * we cross-trap to re-enable CEEN controlled traps.
7043  */
7044 void
7045 cpu_faulted_exit(struct cpu *cp)
7046 {
7047 	ch_cpu_errors_t cpu_error_regs;
7048 
7049 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7050 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7051 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7052 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7053 	    (uint64_t)&cpu_error_regs, 0);
7054 
7055 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7056 }
7057 
7058 /*
7059  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7060  * the errors in the original AFSR, 0 otherwise.
7061  *
7062  * For all procs if the initial error was a BERR or TO, then it is possible
7063  * that we may have caused a secondary BERR or TO in the process of logging the
7064  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7065  * if the request was protected then a panic is still not necessary, if not
7066  * protected then aft_panic is already set - so either way there's no need
7067  * to set aft_panic for the secondary error.
7068  *
7069  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7070  * a store merge, then the error handling code will call cpu_deferred_error().
7071  * When clear_errors() is called, it will determine that secondary errors have
7072  * occurred - in particular, the store merge also caused a EDU and WDU that
7073  * weren't discovered until this point.
7074  *
7075  * We do three checks to verify that we are in this case.  If we pass all three
7076  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7077  * errors occur, we return 0.
7078  *
7079  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7080  * handled in cpu_disrupting_errors().  Since this function is not even called
7081  * in the case we are interested in, we just return 0 for these processors.
7082  */
7083 /*ARGSUSED*/
7084 static int
7085 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7086     uint64_t t_afar)
7087 {
7088 #if defined(CHEETAH_PLUS)
7089 #else	/* CHEETAH_PLUS */
7090 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7091 #endif	/* CHEETAH_PLUS */
7092 
7093 	/*
7094 	 * Was the original error a BERR or TO and only a BERR or TO
7095 	 * (multiple errors are also OK)
7096 	 */
7097 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7098 		/*
7099 		 * Is the new error a BERR or TO and only a BERR or TO
7100 		 * (multiple errors are also OK)
7101 		 */
7102 		if ((ch_flt->afsr_errs &
7103 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7104 			return (1);
7105 	}
7106 
7107 #if defined(CHEETAH_PLUS)
7108 	return (0);
7109 #else	/* CHEETAH_PLUS */
7110 	/*
7111 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7112 	 *
7113 	 * Check the original error was a UE, and only a UE.  Note that
7114 	 * the ME bit will cause us to fail this check.
7115 	 */
7116 	if (t_afsr_errs != C_AFSR_UE)
7117 		return (0);
7118 
7119 	/*
7120 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7121 	 */
7122 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7123 		return (0);
7124 
7125 	/*
7126 	 * Check the AFAR of the original error and secondary errors
7127 	 * match to the 64-byte boundary
7128 	 */
7129 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7130 		return (0);
7131 
7132 	/*
7133 	 * We've passed all the checks, so it's a secondary error!
7134 	 */
7135 	return (1);
7136 #endif	/* CHEETAH_PLUS */
7137 }
7138 
7139 /*
7140  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7141  * is checked for any valid errors.  If found, the error type is
7142  * returned. If not found, the flt_type is checked for L1$ parity errors.
7143  */
7144 /*ARGSUSED*/
7145 static uint8_t
7146 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7147 {
7148 #if defined(JALAPENO)
7149 	/*
7150 	 * Currently, logging errors to the SC is not supported on Jalapeno
7151 	 */
7152 	return (PLAT_ECC_ERROR2_NONE);
7153 #else
7154 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7155 
7156 	switch (ch_flt->flt_bit) {
7157 	case C_AFSR_CE:
7158 		return (PLAT_ECC_ERROR2_CE);
7159 	case C_AFSR_UCC:
7160 	case C_AFSR_EDC:
7161 	case C_AFSR_WDC:
7162 	case C_AFSR_CPC:
7163 		return (PLAT_ECC_ERROR2_L2_CE);
7164 	case C_AFSR_EMC:
7165 		return (PLAT_ECC_ERROR2_EMC);
7166 	case C_AFSR_IVC:
7167 		return (PLAT_ECC_ERROR2_IVC);
7168 	case C_AFSR_UE:
7169 		return (PLAT_ECC_ERROR2_UE);
7170 	case C_AFSR_UCU:
7171 	case C_AFSR_EDU:
7172 	case C_AFSR_WDU:
7173 	case C_AFSR_CPU:
7174 		return (PLAT_ECC_ERROR2_L2_UE);
7175 	case C_AFSR_IVU:
7176 		return (PLAT_ECC_ERROR2_IVU);
7177 	case C_AFSR_TO:
7178 		return (PLAT_ECC_ERROR2_TO);
7179 	case C_AFSR_BERR:
7180 		return (PLAT_ECC_ERROR2_BERR);
7181 #if defined(CHEETAH_PLUS)
7182 	case C_AFSR_L3_EDC:
7183 	case C_AFSR_L3_UCC:
7184 	case C_AFSR_L3_CPC:
7185 	case C_AFSR_L3_WDC:
7186 		return (PLAT_ECC_ERROR2_L3_CE);
7187 	case C_AFSR_IMC:
7188 		return (PLAT_ECC_ERROR2_IMC);
7189 	case C_AFSR_TSCE:
7190 		return (PLAT_ECC_ERROR2_L2_TSCE);
7191 	case C_AFSR_THCE:
7192 		return (PLAT_ECC_ERROR2_L2_THCE);
7193 	case C_AFSR_L3_MECC:
7194 		return (PLAT_ECC_ERROR2_L3_MECC);
7195 	case C_AFSR_L3_THCE:
7196 		return (PLAT_ECC_ERROR2_L3_THCE);
7197 	case C_AFSR_L3_CPU:
7198 	case C_AFSR_L3_EDU:
7199 	case C_AFSR_L3_UCU:
7200 	case C_AFSR_L3_WDU:
7201 		return (PLAT_ECC_ERROR2_L3_UE);
7202 	case C_AFSR_DUE:
7203 		return (PLAT_ECC_ERROR2_DUE);
7204 	case C_AFSR_DTO:
7205 		return (PLAT_ECC_ERROR2_DTO);
7206 	case C_AFSR_DBERR:
7207 		return (PLAT_ECC_ERROR2_DBERR);
7208 #endif	/* CHEETAH_PLUS */
7209 	default:
7210 		switch (ch_flt->flt_type) {
7211 #if defined(CPU_IMP_L1_CACHE_PARITY)
7212 		case CPU_IC_PARITY:
7213 			return (PLAT_ECC_ERROR2_IPE);
7214 		case CPU_DC_PARITY:
7215 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7216 				if (ch_flt->parity_data.dpe.cpl_cache ==
7217 				    CPU_PC_PARITY) {
7218 					return (PLAT_ECC_ERROR2_PCACHE);
7219 				}
7220 			}
7221 			return (PLAT_ECC_ERROR2_DPE);
7222 #endif /* CPU_IMP_L1_CACHE_PARITY */
7223 		case CPU_ITLB_PARITY:
7224 			return (PLAT_ECC_ERROR2_ITLB);
7225 		case CPU_DTLB_PARITY:
7226 			return (PLAT_ECC_ERROR2_DTLB);
7227 		default:
7228 			return (PLAT_ECC_ERROR2_NONE);
7229 		}
7230 	}
7231 #endif	/* JALAPENO */
7232 }
7233