xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common.c (revision 888e055994b8b0dc77b98c53dd97026237caec5d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/ddi.h>
29 #include <sys/sysmacros.h>
30 #include <sys/archsystm.h>
31 #include <sys/vmsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/machthread.h>
35 #include <sys/cpu.h>
36 #include <sys/cmp.h>
37 #include <sys/elf_SPARC.h>
38 #include <vm/vm_dep.h>
39 #include <vm/hat_sfmmu.h>
40 #include <vm/seg_kpm.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/us3_module.h>
44 #include <sys/async.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/dditypes.h>
48 #include <sys/prom_debug.h>
49 #include <sys/prom_plat.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/machtrap.h>
56 #include <sys/ontrap.h>
57 #include <sys/panic.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/ivintr.h>
61 #include <sys/atomic.h>
62 #include <sys/taskq.h>
63 #include <sys/note.h>
64 #include <sys/ndifm.h>
65 #include <sys/ddifm.h>
66 #include <sys/fm/protocol.h>
67 #include <sys/fm/util.h>
68 #include <sys/fm/cpu/UltraSPARC-III.h>
69 #include <sys/fpras_impl.h>
70 #include <sys/dtrace.h>
71 #include <sys/watchpoint.h>
72 #include <sys/plat_ecc_unum.h>
73 #include <sys/cyclic.h>
74 #include <sys/errorq.h>
75 #include <sys/errclassify.h>
76 #include <sys/pghw.h>
77 
78 #ifdef	CHEETAHPLUS_ERRATUM_25
79 #include <sys/xc_impl.h>
80 #endif	/* CHEETAHPLUS_ERRATUM_25 */
81 
82 ch_cpu_logout_t	clop_before_flush;
83 ch_cpu_logout_t	clop_after_flush;
84 uint_t	flush_retries_done = 0;
85 /*
86  * Note that 'Cheetah PRM' refers to:
87  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
88  */
89 
90 /*
91  * Per CPU pointers to physical address of TL>0 logout data areas.
92  * These pointers have to be in the kernel nucleus to avoid MMU
93  * misses.
94  */
95 uint64_t ch_err_tl1_paddrs[NCPU];
96 
97 /*
98  * One statically allocated structure to use during startup/DR
99  * to prevent unnecessary panics.
100  */
101 ch_err_tl1_data_t ch_err_tl1_data;
102 
103 /*
104  * Per CPU pending error at TL>0, used by level15 softint handler
105  */
106 uchar_t ch_err_tl1_pending[NCPU];
107 
108 /*
109  * For deferred CE re-enable after trap.
110  */
111 taskq_t		*ch_check_ce_tq;
112 
113 /*
114  * Internal functions.
115  */
116 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
117 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
118 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
119     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
120 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
121     uint64_t t_afsr_bit);
122 static int clear_ecc(struct async_flt *ecc);
123 #if defined(CPU_IMP_ECACHE_ASSOC)
124 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
125 #endif
126 int cpu_ecache_set_size(struct cpu *cp);
127 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
128 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
129 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
130 int cpu_ectag_pa_to_subblk_state(int cachesize,
131 				uint64_t subaddr, uint64_t tag);
132 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
133 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
134 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
136 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
137 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
138 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
139 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
140 static void cpu_scrubphys(struct async_flt *aflt);
141 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
142     int *, int *);
143 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
144 static void cpu_ereport_init(struct async_flt *aflt);
145 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
146 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
147 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
148     uint64_t nceen, ch_cpu_logout_t *clop);
149 static int cpu_ce_delayed_ec_logout(uint64_t);
150 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
151 static int cpu_error_is_ecache_data(int, uint64_t);
152 static void cpu_fmri_cpu_set(nvlist_t *, int);
153 static int cpu_error_to_resource_type(struct async_flt *aflt);
154 
155 #ifdef	CHEETAHPLUS_ERRATUM_25
156 static int mondo_recover_proc(uint16_t, int);
157 static void cheetah_nudge_init(void);
158 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
159     cyc_time_t *when);
160 static void cheetah_nudge_buddy(void);
161 #endif	/* CHEETAHPLUS_ERRATUM_25 */
162 
163 #if defined(CPU_IMP_L1_CACHE_PARITY)
164 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
165 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
166 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
167     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
168 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
169 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
170 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
171 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
172 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
173 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
174 #endif	/* CPU_IMP_L1_CACHE_PARITY */
175 
176 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
177     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
178     int *segsp, int *banksp, int *mcidp);
179 
180 /*
181  * This table is used to determine which bit(s) is(are) bad when an ECC
182  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
183  * of this array have the following semantics:
184  *
185  *      00-127  The number of the bad bit, when only one bit is bad.
186  *      128     ECC bit C0 is bad.
187  *      129     ECC bit C1 is bad.
188  *      130     ECC bit C2 is bad.
189  *      131     ECC bit C3 is bad.
190  *      132     ECC bit C4 is bad.
191  *      133     ECC bit C5 is bad.
192  *      134     ECC bit C6 is bad.
193  *      135     ECC bit C7 is bad.
194  *      136     ECC bit C8 is bad.
195  *	137-143 reserved for Mtag Data and ECC.
196  *      144(M2) Two bits are bad within a nibble.
197  *      145(M3) Three bits are bad within a nibble.
198  *      146(M3) Four bits are bad within a nibble.
199  *      147(M)  Multiple bits (5 or more) are bad.
200  *      148     NO bits are bad.
201  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
202  */
203 
204 #define	C0	128
205 #define	C1	129
206 #define	C2	130
207 #define	C3	131
208 #define	C4	132
209 #define	C5	133
210 #define	C6	134
211 #define	C7	135
212 #define	C8	136
213 #define	MT0	137	/* Mtag Data bit 0 */
214 #define	MT1	138
215 #define	MT2	139
216 #define	MTC0	140	/* Mtag Check bit 0 */
217 #define	MTC1	141
218 #define	MTC2	142
219 #define	MTC3	143
220 #define	M2	144
221 #define	M3	145
222 #define	M4	146
223 #define	M	147
224 #define	NA	148
225 #if defined(JALAPENO) || defined(SERRANO)
226 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
227 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
228 #define	SLAST	S003MEM	/* last special syndrome */
229 #else /* JALAPENO || SERRANO */
230 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
231 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
232 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
233 #define	SLAST	S11C	/* last special syndrome */
234 #endif /* JALAPENO || SERRANO */
235 #if defined(JALAPENO) || defined(SERRANO)
236 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
237 #define	BPAR15	167
238 #endif	/* JALAPENO || SERRANO */
239 
240 static uint8_t ecc_syndrome_tab[] =
241 {
242 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
243 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
244 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
245 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
246 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
247 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
248 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
249 #if defined(JALAPENO) || defined(SERRANO)
250 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
251 #else	/* JALAPENO || SERRANO */
252 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
253 #endif	/* JALAPENO || SERRANO */
254 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
255 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
256 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
257 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
258 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
259 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
260 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
261 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
262 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
263 #if defined(JALAPENO) || defined(SERRANO)
264 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
265 #else	/* JALAPENO || SERRANO */
266 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
267 #endif	/* JALAPENO || SERRANO */
268 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
269 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
270 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
271 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
272 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
273 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
274 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
275 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
276 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
277 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
278 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
279 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
280 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
281 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
282 };
283 
284 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
285 
286 #if !(defined(JALAPENO) || defined(SERRANO))
287 /*
288  * This table is used to determine which bit(s) is(are) bad when a Mtag
289  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
290  * of this array have the following semantics:
291  *
292  *      -1	Invalid mtag syndrome.
293  *      137     Mtag Data 0 is bad.
294  *      138     Mtag Data 1 is bad.
295  *      139     Mtag Data 2 is bad.
296  *      140     Mtag ECC 0 is bad.
297  *      141     Mtag ECC 1 is bad.
298  *      142     Mtag ECC 2 is bad.
299  *      143     Mtag ECC 3 is bad.
300  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
301  */
302 short mtag_syndrome_tab[] =
303 {
304 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
305 };
306 
307 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
308 
309 #else /* !(JALAPENO || SERRANO) */
310 
311 #define	BSYND_TBL_SIZE	16
312 
313 #endif /* !(JALAPENO || SERRANO) */
314 
315 /*
316  * Types returned from cpu_error_to_resource_type()
317  */
318 #define	ERRTYPE_UNKNOWN		0
319 #define	ERRTYPE_CPU		1
320 #define	ERRTYPE_MEMORY		2
321 #define	ERRTYPE_ECACHE_DATA	3
322 
323 /*
324  * CE initial classification and subsequent action lookup table
325  */
326 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
327 static int ce_disp_inited;
328 
329 /*
330  * Set to disable leaky and partner check for memory correctables
331  */
332 int ce_xdiag_off;
333 
334 /*
335  * The following are not incremented atomically so are indicative only
336  */
337 static int ce_xdiag_drops;
338 static int ce_xdiag_lkydrops;
339 static int ce_xdiag_ptnrdrops;
340 static int ce_xdiag_bad;
341 
342 /*
343  * CE leaky check callback structure
344  */
345 typedef struct {
346 	struct async_flt *lkycb_aflt;
347 	errorq_t *lkycb_eqp;
348 	errorq_elem_t *lkycb_eqep;
349 } ce_lkychk_cb_t;
350 
351 /*
352  * defines for various ecache_flush_flag's
353  */
354 #define	ECACHE_FLUSH_LINE	1
355 #define	ECACHE_FLUSH_ALL	2
356 
357 /*
358  * STICK sync
359  */
360 #define	STICK_ITERATION 10
361 #define	MAX_TSKEW	1
362 #define	EV_A_START	0
363 #define	EV_A_END	1
364 #define	EV_B_START	2
365 #define	EV_B_END	3
366 #define	EVENTS		4
367 
368 static int64_t stick_iter = STICK_ITERATION;
369 static int64_t stick_tsk = MAX_TSKEW;
370 
371 typedef enum {
372 	EVENT_NULL = 0,
373 	SLAVE_START,
374 	SLAVE_CONT,
375 	MASTER_START
376 } event_cmd_t;
377 
378 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
379 static int64_t timestamp[EVENTS];
380 static volatile int slave_done;
381 
382 #ifdef DEBUG
383 #define	DSYNC_ATTEMPTS 64
384 typedef struct {
385 	int64_t	skew_val[DSYNC_ATTEMPTS];
386 } ss_t;
387 
388 ss_t stick_sync_stats[NCPU];
389 #endif /* DEBUG */
390 
391 uint_t cpu_impl_dual_pgsz = 0;
392 #if defined(CPU_IMP_DUAL_PAGESIZE)
393 uint_t disable_dual_pgsz = 0;
394 #endif	/* CPU_IMP_DUAL_PAGESIZE */
395 
396 /*
397  * Save the cache bootup state for use when internal
398  * caches are to be re-enabled after an error occurs.
399  */
400 uint64_t cache_boot_state;
401 
402 /*
403  * PA[22:0] represent Displacement in Safari configuration space.
404  */
405 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
406 
407 bus_config_eclk_t bus_config_eclk[] = {
408 #if defined(JALAPENO) || defined(SERRANO)
409 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
410 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
411 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
412 #else /* JALAPENO || SERRANO */
413 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
414 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
415 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
416 #endif /* JALAPENO || SERRANO */
417 	{0, 0}
418 };
419 
420 /*
421  * Interval for deferred CEEN reenable
422  */
423 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
424 
425 /*
426  * set in /etc/system to control logging of user BERR/TO's
427  */
428 int cpu_berr_to_verbose = 0;
429 
430 /*
431  * set to 0 in /etc/system to defer CEEN reenable for all CEs
432  */
433 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
434 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
435 
436 /*
437  * Set of all offline cpus
438  */
439 cpuset_t cpu_offline_set;
440 
441 static void cpu_delayed_check_ce_errors(void *);
442 static void cpu_check_ce_errors(void *);
443 void cpu_error_ecache_flush(ch_async_flt_t *);
444 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
445 static void cpu_log_and_clear_ce(ch_async_flt_t *);
446 void cpu_ce_detected(ch_cpu_errors_t *, int);
447 
448 /*
449  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
450  * memory refresh interval of current DIMMs (64ms).  After initial fix that
451  * gives at least one full refresh cycle in which the cell can leak
452  * (whereafter further refreshes simply reinforce any incorrect bit value).
453  */
454 clock_t cpu_ce_lkychk_timeout_usec = 128000;
455 
456 /*
457  * CE partner check partner caching period in seconds
458  */
459 int cpu_ce_ptnr_cachetime_sec = 60;
460 
461 /*
462  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
463  */
464 #define	CH_SET_TRAP(ttentry, ttlabel)			\
465 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
466 		flush_instr_mem((caddr_t)&ttentry, 32);
467 
468 static int min_ecache_size;
469 static uint_t priv_hcl_1;
470 static uint_t priv_hcl_2;
471 static uint_t priv_hcl_4;
472 static uint_t priv_hcl_8;
473 
474 void
475 cpu_setup(void)
476 {
477 	extern int at_flags;
478 	extern int cpc_has_overflow_intr;
479 
480 	/*
481 	 * Setup chip-specific trap handlers.
482 	 */
483 	cpu_init_trap();
484 
485 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
486 
487 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
488 
489 	/*
490 	 * save the cache bootup state.
491 	 */
492 	cache_boot_state = get_dcu() & DCU_CACHE;
493 
494 	/*
495 	 * Due to the number of entries in the fully-associative tlb
496 	 * this may have to be tuned lower than in spitfire.
497 	 */
498 	pp_slots = MIN(8, MAXPP_SLOTS);
499 
500 	/*
501 	 * Block stores do not invalidate all pages of the d$, pagecopy
502 	 * et. al. need virtual translations with virtual coloring taken
503 	 * into consideration.  prefetch/ldd will pollute the d$ on the
504 	 * load side.
505 	 */
506 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
507 
508 	if (use_page_coloring) {
509 		do_pg_coloring = 1;
510 	}
511 
512 	isa_list =
513 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
514 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
515 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
516 
517 	/*
518 	 * On Panther-based machines, this should
519 	 * also include AV_SPARC_POPC too
520 	 */
521 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
522 
523 	/*
524 	 * On cheetah, there's no hole in the virtual address space
525 	 */
526 	hole_start = hole_end = 0;
527 
528 	/*
529 	 * The kpm mapping window.
530 	 * kpm_size:
531 	 *	The size of a single kpm range.
532 	 *	The overall size will be: kpm_size * vac_colors.
533 	 * kpm_vbase:
534 	 *	The virtual start address of the kpm range within the kernel
535 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
536 	 */
537 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
538 	kpm_size_shift = 43;
539 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
540 	kpm_smallpages = 1;
541 
542 	/*
543 	 * The traptrace code uses either %tick or %stick for
544 	 * timestamping.  We have %stick so we can use it.
545 	 */
546 	traptrace_use_stick = 1;
547 
548 	/*
549 	 * Cheetah has a performance counter overflow interrupt
550 	 */
551 	cpc_has_overflow_intr = 1;
552 
553 #if defined(CPU_IMP_DUAL_PAGESIZE)
554 	/*
555 	 * Use Cheetah+ and later dual page size support.
556 	 */
557 	if (!disable_dual_pgsz) {
558 		cpu_impl_dual_pgsz = 1;
559 	}
560 #endif	/* CPU_IMP_DUAL_PAGESIZE */
561 
562 	/*
563 	 * Declare that this architecture/cpu combination does fpRAS.
564 	 */
565 	fpras_implemented = 1;
566 
567 	/*
568 	 * Setup CE lookup table
569 	 */
570 	CE_INITDISPTBL_POPULATE(ce_disp_table);
571 	ce_disp_inited = 1;
572 }
573 
574 /*
575  * Called by setcpudelay
576  */
577 void
578 cpu_init_tick_freq(void)
579 {
580 	/*
581 	 * For UltraSPARC III and beyond we want to use the
582 	 * system clock rate as the basis for low level timing,
583 	 * due to support of mixed speed CPUs and power managment.
584 	 */
585 	if (system_clock_freq == 0)
586 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
587 
588 	sys_tick_freq = system_clock_freq;
589 }
590 
591 #ifdef CHEETAHPLUS_ERRATUM_25
592 /*
593  * Tunables
594  */
595 int cheetah_bpe_off = 0;
596 int cheetah_sendmondo_recover = 1;
597 int cheetah_sendmondo_fullscan = 0;
598 int cheetah_sendmondo_recover_delay = 5;
599 
600 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
601 
602 /*
603  * Recovery Statistics
604  */
605 typedef struct cheetah_livelock_entry	{
606 	int cpuid;		/* fallen cpu */
607 	int buddy;		/* cpu that ran recovery */
608 	clock_t lbolt;		/* when recovery started */
609 	hrtime_t recovery_time;	/* time spent in recovery */
610 } cheetah_livelock_entry_t;
611 
612 #define	CHEETAH_LIVELOCK_NENTRY	32
613 
614 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
615 int cheetah_livelock_entry_nxt;
616 
617 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
618 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
619 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
620 		cheetah_livelock_entry_nxt = 0;				\
621 	}								\
622 }
623 
624 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
625 
626 struct {
627 	hrtime_t hrt;		/* maximum recovery time */
628 	int recovery;		/* recovered */
629 	int full_claimed;	/* maximum pages claimed in full recovery */
630 	int proc_entry;		/* attempted to claim TSB */
631 	int proc_tsb_scan;	/* tsb scanned */
632 	int proc_tsb_partscan;	/* tsb partially scanned */
633 	int proc_tsb_fullscan;	/* whole tsb scanned */
634 	int proc_claimed;	/* maximum pages claimed in tsb scan */
635 	int proc_user;		/* user thread */
636 	int proc_kernel;	/* kernel thread */
637 	int proc_onflt;		/* bad stack */
638 	int proc_cpu;		/* null cpu */
639 	int proc_thread;	/* null thread */
640 	int proc_proc;		/* null proc */
641 	int proc_as;		/* null as */
642 	int proc_hat;		/* null hat */
643 	int proc_hat_inval;	/* hat contents don't make sense */
644 	int proc_hat_busy;	/* hat is changing TSBs */
645 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
646 	int proc_cnum_bad;	/* cnum out of range */
647 	int proc_cnum;		/* last cnum processed */
648 	tte_t proc_tte;		/* last tte processed */
649 } cheetah_livelock_stat;
650 
651 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
652 
653 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
654 	cheetah_livelock_stat.item = value
655 
656 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
657 	if (value > cheetah_livelock_stat.item)		\
658 		cheetah_livelock_stat.item = value;	\
659 }
660 
661 /*
662  * Attempt to recover a cpu by claiming every cache line as saved
663  * in the TSB that the non-responsive cpu is using. Since we can't
664  * grab any adaptive lock, this is at best an attempt to do so. Because
665  * we don't grab any locks, we must operate under the protection of
666  * on_fault().
667  *
668  * Return 1 if cpuid could be recovered, 0 if failed.
669  */
670 int
671 mondo_recover_proc(uint16_t cpuid, int bn)
672 {
673 	label_t ljb;
674 	cpu_t *cp;
675 	kthread_t *t;
676 	proc_t *p;
677 	struct as *as;
678 	struct hat *hat;
679 	uint_t  cnum;
680 	struct tsb_info *tsbinfop;
681 	struct tsbe *tsbep;
682 	caddr_t tsbp;
683 	caddr_t end_tsbp;
684 	uint64_t paddr;
685 	uint64_t idsr;
686 	u_longlong_t pahi, palo;
687 	int pages_claimed = 0;
688 	tte_t tsbe_tte;
689 	int tried_kernel_tsb = 0;
690 	mmu_ctx_t *mmu_ctxp;
691 
692 	CHEETAH_LIVELOCK_STAT(proc_entry);
693 
694 	if (on_fault(&ljb)) {
695 		CHEETAH_LIVELOCK_STAT(proc_onflt);
696 		goto badstruct;
697 	}
698 
699 	if ((cp = cpu[cpuid]) == NULL) {
700 		CHEETAH_LIVELOCK_STAT(proc_cpu);
701 		goto badstruct;
702 	}
703 
704 	if ((t = cp->cpu_thread) == NULL) {
705 		CHEETAH_LIVELOCK_STAT(proc_thread);
706 		goto badstruct;
707 	}
708 
709 	if ((p = ttoproc(t)) == NULL) {
710 		CHEETAH_LIVELOCK_STAT(proc_proc);
711 		goto badstruct;
712 	}
713 
714 	if ((as = p->p_as) == NULL) {
715 		CHEETAH_LIVELOCK_STAT(proc_as);
716 		goto badstruct;
717 	}
718 
719 	if ((hat = as->a_hat) == NULL) {
720 		CHEETAH_LIVELOCK_STAT(proc_hat);
721 		goto badstruct;
722 	}
723 
724 	if (hat != ksfmmup) {
725 		CHEETAH_LIVELOCK_STAT(proc_user);
726 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
727 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
728 			goto badstruct;
729 		}
730 		tsbinfop = hat->sfmmu_tsb;
731 		if (tsbinfop == NULL) {
732 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
733 			goto badstruct;
734 		}
735 		tsbp = tsbinfop->tsb_va;
736 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
737 	} else {
738 		CHEETAH_LIVELOCK_STAT(proc_kernel);
739 		tsbinfop = NULL;
740 		tsbp = ktsb_base;
741 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
742 	}
743 
744 	/* Verify as */
745 	if (hat->sfmmu_as != as) {
746 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
747 		goto badstruct;
748 	}
749 
750 	mmu_ctxp = CPU_MMU_CTXP(cp);
751 	ASSERT(mmu_ctxp);
752 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
753 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
754 
755 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
756 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
757 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
758 		goto badstruct;
759 	}
760 
761 	do {
762 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
763 
764 		/*
765 		 * Skip TSBs being relocated.  This is important because
766 		 * we want to avoid the following deadlock scenario:
767 		 *
768 		 * 1) when we came in we set ourselves to "in recover" state.
769 		 * 2) when we try to touch TSB being relocated the mapping
770 		 *    will be in the suspended state so we'll spin waiting
771 		 *    for it to be unlocked.
772 		 * 3) when the CPU that holds the TSB mapping locked tries to
773 		 *    unlock it it will send a xtrap which will fail to xcall
774 		 *    us or the CPU we're trying to recover, and will in turn
775 		 *    enter the mondo code.
776 		 * 4) since we are still spinning on the locked mapping
777 		 *    no further progress will be made and the system will
778 		 *    inevitably hard hang.
779 		 *
780 		 * A TSB not being relocated can't begin being relocated
781 		 * while we're accessing it because we check
782 		 * sendmondo_in_recover before relocating TSBs.
783 		 */
784 		if (hat != ksfmmup &&
785 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
786 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
787 			goto next_tsbinfo;
788 		}
789 
790 		for (tsbep = (struct tsbe *)tsbp;
791 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
792 			tsbe_tte = tsbep->tte_data;
793 
794 			if (tsbe_tte.tte_val == 0) {
795 				/*
796 				 * Invalid tte
797 				 */
798 				continue;
799 			}
800 			if (tsbe_tte.tte_se) {
801 				/*
802 				 * Don't want device registers
803 				 */
804 				continue;
805 			}
806 			if (tsbe_tte.tte_cp == 0) {
807 				/*
808 				 * Must be cached in E$
809 				 */
810 				continue;
811 			}
812 			if (tsbep->tte_tag.tag_invalid != 0) {
813 				/*
814 				 * Invalid tag, ingnore this entry.
815 				 */
816 				continue;
817 			}
818 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
819 			idsr = getidsr();
820 			if ((idsr & (IDSR_NACK_BIT(bn) |
821 			    IDSR_BUSY_BIT(bn))) == 0) {
822 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
823 				goto done;
824 			}
825 			pahi = tsbe_tte.tte_pahi;
826 			palo = tsbe_tte.tte_palo;
827 			paddr = (uint64_t)((pahi << 32) |
828 			    (palo << MMU_PAGESHIFT));
829 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
830 			    CH_ECACHE_SUBBLK_SIZE);
831 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
832 				shipit(cpuid, bn);
833 			}
834 			pages_claimed++;
835 		}
836 next_tsbinfo:
837 		if (tsbinfop != NULL)
838 			tsbinfop = tsbinfop->tsb_next;
839 		if (tsbinfop != NULL) {
840 			tsbp = tsbinfop->tsb_va;
841 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
842 		} else if (tsbp == ktsb_base) {
843 			tried_kernel_tsb = 1;
844 		} else if (!tried_kernel_tsb) {
845 			tsbp = ktsb_base;
846 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
847 			hat = ksfmmup;
848 			tsbinfop = NULL;
849 		}
850 	} while (tsbinfop != NULL ||
851 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
852 
853 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
854 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
855 	no_fault();
856 	idsr = getidsr();
857 	if ((idsr & (IDSR_NACK_BIT(bn) |
858 	    IDSR_BUSY_BIT(bn))) == 0) {
859 		return (1);
860 	} else {
861 		return (0);
862 	}
863 
864 done:
865 	no_fault();
866 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
867 	return (1);
868 
869 badstruct:
870 	no_fault();
871 	return (0);
872 }
873 
874 /*
875  * Attempt to claim ownership, temporarily, of every cache line that a
876  * non-responsive cpu might be using.  This might kick that cpu out of
877  * this state.
878  *
879  * The return value indicates to the caller if we have exhausted all recovery
880  * techniques. If 1 is returned, it is useless to call this function again
881  * even for a different target CPU.
882  */
883 int
884 mondo_recover(uint16_t cpuid, int bn)
885 {
886 	struct memseg *seg;
887 	uint64_t begin_pa, end_pa, cur_pa;
888 	hrtime_t begin_hrt, end_hrt;
889 	int retval = 0;
890 	int pages_claimed = 0;
891 	cheetah_livelock_entry_t *histp;
892 	uint64_t idsr;
893 
894 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
895 		/*
896 		 * Wait while recovery takes place
897 		 */
898 		while (sendmondo_in_recover) {
899 			drv_usecwait(1);
900 		}
901 		/*
902 		 * Assume we didn't claim the whole memory. If
903 		 * the target of this caller is not recovered,
904 		 * it will come back.
905 		 */
906 		return (retval);
907 	}
908 
909 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
910 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
911 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
912 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
913 
914 	begin_hrt = gethrtime_waitfree();
915 	/*
916 	 * First try to claim the lines in the TSB the target
917 	 * may have been using.
918 	 */
919 	if (mondo_recover_proc(cpuid, bn) == 1) {
920 		/*
921 		 * Didn't claim the whole memory
922 		 */
923 		goto done;
924 	}
925 
926 	/*
927 	 * We tried using the TSB. The target is still
928 	 * not recovered. Check if complete memory scan is
929 	 * enabled.
930 	 */
931 	if (cheetah_sendmondo_fullscan == 0) {
932 		/*
933 		 * Full memory scan is disabled.
934 		 */
935 		retval = 1;
936 		goto done;
937 	}
938 
939 	/*
940 	 * Try claiming the whole memory.
941 	 */
942 	for (seg = memsegs; seg; seg = seg->next) {
943 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
944 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
945 		for (cur_pa = begin_pa; cur_pa < end_pa;
946 		    cur_pa += MMU_PAGESIZE) {
947 			idsr = getidsr();
948 			if ((idsr & (IDSR_NACK_BIT(bn) |
949 			    IDSR_BUSY_BIT(bn))) == 0) {
950 				/*
951 				 * Didn't claim all memory
952 				 */
953 				goto done;
954 			}
955 			claimlines(cur_pa, MMU_PAGESIZE,
956 			    CH_ECACHE_SUBBLK_SIZE);
957 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
958 				shipit(cpuid, bn);
959 			}
960 			pages_claimed++;
961 		}
962 	}
963 
964 	/*
965 	 * We did all we could.
966 	 */
967 	retval = 1;
968 
969 done:
970 	/*
971 	 * Update statistics
972 	 */
973 	end_hrt = gethrtime_waitfree();
974 	CHEETAH_LIVELOCK_STAT(recovery);
975 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
976 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
977 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
978 	    (end_hrt -  begin_hrt));
979 
980 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
981 		;
982 
983 	return (retval);
984 }
985 
986 /*
987  * This is called by the cyclic framework when this CPU becomes online
988  */
989 /*ARGSUSED*/
990 static void
991 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
992 {
993 
994 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
995 	hdlr->cyh_level = CY_LOW_LEVEL;
996 	hdlr->cyh_arg = NULL;
997 
998 	/*
999 	 * Stagger the start time
1000 	 */
1001 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1002 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1003 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1004 	}
1005 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1006 }
1007 
1008 /*
1009  * Create a low level cyclic to send a xtrap to the next cpu online.
1010  * However, there's no need to have this running on a uniprocessor system.
1011  */
1012 static void
1013 cheetah_nudge_init(void)
1014 {
1015 	cyc_omni_handler_t hdlr;
1016 
1017 	if (max_ncpus == 1) {
1018 		return;
1019 	}
1020 
1021 	hdlr.cyo_online = cheetah_nudge_onln;
1022 	hdlr.cyo_offline = NULL;
1023 	hdlr.cyo_arg = NULL;
1024 
1025 	mutex_enter(&cpu_lock);
1026 	(void) cyclic_add_omni(&hdlr);
1027 	mutex_exit(&cpu_lock);
1028 }
1029 
1030 /*
1031  * Cyclic handler to wake up buddy
1032  */
1033 void
1034 cheetah_nudge_buddy(void)
1035 {
1036 	/*
1037 	 * Disable kernel preemption to protect the cpu list
1038 	 */
1039 	kpreempt_disable();
1040 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1041 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1042 		    0, 0);
1043 	}
1044 	kpreempt_enable();
1045 }
1046 
1047 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1048 
1049 #ifdef SEND_MONDO_STATS
1050 uint32_t x_one_stimes[64];
1051 uint32_t x_one_ltimes[16];
1052 uint32_t x_set_stimes[64];
1053 uint32_t x_set_ltimes[16];
1054 uint32_t x_set_cpus[NCPU];
1055 uint32_t x_nack_stimes[64];
1056 #endif
1057 
1058 /*
1059  * Note: A version of this function is used by the debugger via the KDI,
1060  * and must be kept in sync with this version.  Any changes made to this
1061  * function to support new chips or to accomodate errata must also be included
1062  * in the KDI-specific version.  See us3_kdi.c.
1063  */
1064 void
1065 send_one_mondo(int cpuid)
1066 {
1067 	int busy, nack;
1068 	uint64_t idsr, starttick, endtick, tick, lasttick;
1069 	uint64_t busymask;
1070 #ifdef	CHEETAHPLUS_ERRATUM_25
1071 	int recovered = 0;
1072 #endif
1073 
1074 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1075 	starttick = lasttick = gettick();
1076 	shipit(cpuid, 0);
1077 	endtick = starttick + xc_tick_limit;
1078 	busy = nack = 0;
1079 #if defined(JALAPENO) || defined(SERRANO)
1080 	/*
1081 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1082 	 * will be used for dispatching interrupt. For now, assume
1083 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1084 	 * issues with respect to BUSY/NACK pair usage.
1085 	 */
1086 	busymask  = IDSR_BUSY_BIT(cpuid);
1087 #else /* JALAPENO || SERRANO */
1088 	busymask = IDSR_BUSY;
1089 #endif /* JALAPENO || SERRANO */
1090 	for (;;) {
1091 		idsr = getidsr();
1092 		if (idsr == 0)
1093 			break;
1094 
1095 		tick = gettick();
1096 		/*
1097 		 * If there is a big jump between the current tick
1098 		 * count and lasttick, we have probably hit a break
1099 		 * point.  Adjust endtick accordingly to avoid panic.
1100 		 */
1101 		if (tick > (lasttick + xc_tick_jump_limit))
1102 			endtick += (tick - lasttick);
1103 		lasttick = tick;
1104 		if (tick > endtick) {
1105 			if (panic_quiesce)
1106 				return;
1107 #ifdef	CHEETAHPLUS_ERRATUM_25
1108 			if (cheetah_sendmondo_recover && recovered == 0) {
1109 				if (mondo_recover(cpuid, 0)) {
1110 					/*
1111 					 * We claimed the whole memory or
1112 					 * full scan is disabled.
1113 					 */
1114 					recovered++;
1115 				}
1116 				tick = gettick();
1117 				endtick = tick + xc_tick_limit;
1118 				lasttick = tick;
1119 				/*
1120 				 * Recheck idsr
1121 				 */
1122 				continue;
1123 			} else
1124 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1125 			{
1126 				cmn_err(CE_PANIC, "send mondo timeout "
1127 				    "(target 0x%x) [%d NACK %d BUSY]",
1128 				    cpuid, nack, busy);
1129 			}
1130 		}
1131 
1132 		if (idsr & busymask) {
1133 			busy++;
1134 			continue;
1135 		}
1136 		drv_usecwait(1);
1137 		shipit(cpuid, 0);
1138 		nack++;
1139 		busy = 0;
1140 	}
1141 #ifdef SEND_MONDO_STATS
1142 	{
1143 		int n = gettick() - starttick;
1144 		if (n < 8192)
1145 			x_one_stimes[n >> 7]++;
1146 		else
1147 			x_one_ltimes[(n >> 13) & 0xf]++;
1148 	}
1149 #endif
1150 }
1151 
1152 void
1153 syncfpu(void)
1154 {
1155 }
1156 
1157 /*
1158  * Return processor specific async error structure
1159  * size used.
1160  */
1161 int
1162 cpu_aflt_size(void)
1163 {
1164 	return (sizeof (ch_async_flt_t));
1165 }
1166 
1167 /*
1168  * Tunable to disable the checking of other cpu logout areas during panic for
1169  * potential syndrome 71 generating errors.
1170  */
1171 int enable_check_other_cpus_logout = 1;
1172 
1173 /*
1174  * Check other cpus logout area for potential synd 71 generating
1175  * errors.
1176  */
1177 static void
1178 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1179     ch_cpu_logout_t *clop)
1180 {
1181 	struct async_flt *aflt;
1182 	ch_async_flt_t ch_flt;
1183 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1184 
1185 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1186 		return;
1187 	}
1188 
1189 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1190 
1191 	t_afar = clop->clo_data.chd_afar;
1192 	t_afsr = clop->clo_data.chd_afsr;
1193 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1194 #if defined(SERRANO)
1195 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1196 #endif	/* SERRANO */
1197 
1198 	/*
1199 	 * In order to simplify code, we maintain this afsr_errs
1200 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1201 	 * sticky bits.
1202 	 */
1203 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1204 	    (t_afsr & C_AFSR_ALL_ERRS);
1205 
1206 	/* Setup the async fault structure */
1207 	aflt = (struct async_flt *)&ch_flt;
1208 	aflt->flt_id = gethrtime_waitfree();
1209 	ch_flt.afsr_ext = t_afsr_ext;
1210 	ch_flt.afsr_errs = t_afsr_errs;
1211 	aflt->flt_stat = t_afsr;
1212 	aflt->flt_addr = t_afar;
1213 	aflt->flt_bus_id = cpuid;
1214 	aflt->flt_inst = cpuid;
1215 	aflt->flt_pc = tpc;
1216 	aflt->flt_prot = AFLT_PROT_NONE;
1217 	aflt->flt_class = CPU_FAULT;
1218 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1219 	aflt->flt_tl = tl;
1220 	aflt->flt_status = ecc_type;
1221 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1222 
1223 	/*
1224 	 * Queue events on the async event queue, one event per error bit.
1225 	 * If no events are queued, queue an event to complain.
1226 	 */
1227 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1228 		ch_flt.flt_type = CPU_INV_AFSR;
1229 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1230 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1231 		    aflt->flt_panic);
1232 	}
1233 
1234 	/*
1235 	 * Zero out + invalidate CPU logout.
1236 	 */
1237 	bzero(clop, sizeof (ch_cpu_logout_t));
1238 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1239 }
1240 
1241 /*
1242  * Check the logout areas of all other cpus for unlogged errors.
1243  */
1244 static void
1245 cpu_check_other_cpus_logout(void)
1246 {
1247 	int i, j;
1248 	processorid_t myid;
1249 	struct cpu *cp;
1250 	ch_err_tl1_data_t *cl1p;
1251 
1252 	myid = CPU->cpu_id;
1253 	for (i = 0; i < NCPU; i++) {
1254 		cp = cpu[i];
1255 
1256 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1257 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1258 			continue;
1259 		}
1260 
1261 		/*
1262 		 * Check each of the tl>0 logout areas
1263 		 */
1264 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1265 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1266 			if (cl1p->ch_err_tl1_flags == 0)
1267 				continue;
1268 
1269 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1270 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1271 		}
1272 
1273 		/*
1274 		 * Check each of the remaining logout areas
1275 		 */
1276 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1277 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1278 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1279 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1280 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1281 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1282 	}
1283 }
1284 
1285 /*
1286  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1287  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1288  * flush the error that caused the UCU/UCC, then again here at the end to
1289  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1290  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1291  * another Fast ECC trap.
1292  *
1293  * Cheetah+ also handles: TSCE: No additional processing required.
1294  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1295  *
1296  * Note that the p_clo_flags input is only valid in cases where the
1297  * cpu_private struct is not yet initialized (since that is the only
1298  * time that information cannot be obtained from the logout struct.)
1299  */
1300 /*ARGSUSED*/
1301 void
1302 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1303 {
1304 	ch_cpu_logout_t *clop;
1305 	uint64_t ceen, nceen;
1306 
1307 	/*
1308 	 * Get the CPU log out info. If we can't find our CPU private
1309 	 * pointer, then we will have to make due without any detailed
1310 	 * logout information.
1311 	 */
1312 	if (CPU_PRIVATE(CPU) == NULL) {
1313 		clop = NULL;
1314 		ceen = p_clo_flags & EN_REG_CEEN;
1315 		nceen = p_clo_flags & EN_REG_NCEEN;
1316 	} else {
1317 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1318 		ceen = clop->clo_flags & EN_REG_CEEN;
1319 		nceen = clop->clo_flags & EN_REG_NCEEN;
1320 	}
1321 
1322 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1323 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1324 }
1325 
1326 /*
1327  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1328  * ECC at TL>0.  Need to supply either a error register pointer or a
1329  * cpu logout structure pointer.
1330  */
1331 static void
1332 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1333     uint64_t nceen, ch_cpu_logout_t *clop)
1334 {
1335 	struct async_flt *aflt;
1336 	ch_async_flt_t ch_flt;
1337 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1338 	char pr_reason[MAX_REASON_STRING];
1339 	ch_cpu_errors_t cpu_error_regs;
1340 
1341 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1342 	/*
1343 	 * If no cpu logout data, then we will have to make due without
1344 	 * any detailed logout information.
1345 	 */
1346 	if (clop == NULL) {
1347 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1348 		get_cpu_error_state(&cpu_error_regs);
1349 		set_cpu_error_state(&cpu_error_regs);
1350 		t_afar = cpu_error_regs.afar;
1351 		t_afsr = cpu_error_regs.afsr;
1352 		t_afsr_ext = cpu_error_regs.afsr_ext;
1353 #if defined(SERRANO)
1354 		ch_flt.afar2 = cpu_error_regs.afar2;
1355 #endif	/* SERRANO */
1356 	} else {
1357 		t_afar = clop->clo_data.chd_afar;
1358 		t_afsr = clop->clo_data.chd_afsr;
1359 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1360 #if defined(SERRANO)
1361 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1362 #endif	/* SERRANO */
1363 	}
1364 
1365 	/*
1366 	 * In order to simplify code, we maintain this afsr_errs
1367 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1368 	 * sticky bits.
1369 	 */
1370 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1371 	    (t_afsr & C_AFSR_ALL_ERRS);
1372 	pr_reason[0] = '\0';
1373 
1374 	/* Setup the async fault structure */
1375 	aflt = (struct async_flt *)&ch_flt;
1376 	aflt->flt_id = gethrtime_waitfree();
1377 	ch_flt.afsr_ext = t_afsr_ext;
1378 	ch_flt.afsr_errs = t_afsr_errs;
1379 	aflt->flt_stat = t_afsr;
1380 	aflt->flt_addr = t_afar;
1381 	aflt->flt_bus_id = getprocessorid();
1382 	aflt->flt_inst = CPU->cpu_id;
1383 	aflt->flt_pc = tpc;
1384 	aflt->flt_prot = AFLT_PROT_NONE;
1385 	aflt->flt_class = CPU_FAULT;
1386 	aflt->flt_priv = priv;
1387 	aflt->flt_tl = tl;
1388 	aflt->flt_status = ECC_F_TRAP;
1389 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1390 
1391 	/*
1392 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1393 	 * cmn_err messages out to the console.  The situation is a UCU (in
1394 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1395 	 * The messages for the UCU and WDU are enqueued and then pulled off
1396 	 * the async queue via softint and syslogd starts to process them
1397 	 * but doesn't get them to the console.  The UE causes a panic, but
1398 	 * since the UCU/WDU messages are already in transit, those aren't
1399 	 * on the async queue.  The hack is to check if we have a matching
1400 	 * WDU event for the UCU, and if it matches, we're more than likely
1401 	 * going to panic with a UE, unless we're under protection.  So, we
1402 	 * check to see if we got a matching WDU event and if we're under
1403 	 * protection.
1404 	 *
1405 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1406 	 * looks like this:
1407 	 *    UCU->WDU->UE
1408 	 * For Panther, it could look like either of these:
1409 	 *    UCU---->WDU->L3_WDU->UE
1410 	 *    L3_UCU->WDU->L3_WDU->UE
1411 	 */
1412 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1413 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1414 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1415 		get_cpu_error_state(&cpu_error_regs);
1416 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1417 			aflt->flt_panic |=
1418 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1419 			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1420 			    (cpu_error_regs.afar == t_afar));
1421 			aflt->flt_panic |= ((clop == NULL) &&
1422 			    (t_afsr_errs & C_AFSR_WDU) &&
1423 			    (t_afsr_errs & C_AFSR_L3_WDU));
1424 		} else {
1425 			aflt->flt_panic |=
1426 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1427 			    (cpu_error_regs.afar == t_afar));
1428 			aflt->flt_panic |= ((clop == NULL) &&
1429 			    (t_afsr_errs & C_AFSR_WDU));
1430 		}
1431 	}
1432 
1433 	/*
1434 	 * Queue events on the async event queue, one event per error bit.
1435 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1436 	 * queue an event to complain.
1437 	 */
1438 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1439 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1440 		ch_flt.flt_type = CPU_INV_AFSR;
1441 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1442 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1443 		    aflt->flt_panic);
1444 	}
1445 
1446 	/*
1447 	 * Zero out + invalidate CPU logout.
1448 	 */
1449 	if (clop) {
1450 		bzero(clop, sizeof (ch_cpu_logout_t));
1451 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1452 	}
1453 
1454 	/*
1455 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1456 	 * or disrupting errors have happened.  We do this because if a
1457 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1458 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1459 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1460 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1461 	 * deferred or disrupting error happening between checking the AFSR and
1462 	 * enabling NCEEN/CEEN.
1463 	 *
1464 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1465 	 * taken.
1466 	 */
1467 	set_error_enable(get_error_enable() | (nceen | ceen));
1468 	if (clear_errors(&ch_flt)) {
1469 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1470 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1471 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1472 		    NULL);
1473 	}
1474 
1475 	/*
1476 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1477 	 * be logged as part of the panic flow.
1478 	 */
1479 	if (aflt->flt_panic)
1480 		fm_panic("%sError(s)", pr_reason);
1481 
1482 	/*
1483 	 * Flushing the Ecache here gets the part of the trap handler that
1484 	 * is run at TL=1 out of the Ecache.
1485 	 */
1486 	cpu_flush_ecache();
1487 }
1488 
1489 /*
1490  * This is called via sys_trap from pil15_interrupt code if the
1491  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1492  * various ch_err_tl1_data structures for valid entries based on the bit
1493  * settings in the ch_err_tl1_flags entry of the structure.
1494  */
1495 /*ARGSUSED*/
1496 void
1497 cpu_tl1_error(struct regs *rp, int panic)
1498 {
1499 	ch_err_tl1_data_t *cl1p, cl1;
1500 	int i, ncl1ps;
1501 	uint64_t me_flags;
1502 	uint64_t ceen, nceen;
1503 
1504 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1505 		cl1p = &ch_err_tl1_data;
1506 		ncl1ps = 1;
1507 	} else if (CPU_PRIVATE(CPU) != NULL) {
1508 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1509 		ncl1ps = CH_ERR_TL1_TLMAX;
1510 	} else {
1511 		ncl1ps = 0;
1512 	}
1513 
1514 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1515 		if (cl1p->ch_err_tl1_flags == 0)
1516 			continue;
1517 
1518 		/*
1519 		 * Grab a copy of the logout data and invalidate
1520 		 * the logout area.
1521 		 */
1522 		cl1 = *cl1p;
1523 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1524 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1525 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1526 
1527 		/*
1528 		 * Log "first error" in ch_err_tl1_data.
1529 		 */
1530 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1531 			ceen = get_error_enable() & EN_REG_CEEN;
1532 			nceen = get_error_enable() & EN_REG_NCEEN;
1533 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1534 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1535 		}
1536 #if defined(CPU_IMP_L1_CACHE_PARITY)
1537 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1538 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1539 			    (caddr_t)cl1.ch_err_tl1_tpc);
1540 		}
1541 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1542 
1543 		/*
1544 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1545 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1546 		 * if the structure is busy, we just do the cache flushing
1547 		 * we have to do and then do the retry.  So the AFSR/AFAR
1548 		 * at this point *should* have some relevant info.  If there
1549 		 * are no valid errors in the AFSR, we'll assume they've
1550 		 * already been picked up and logged.  For I$/D$ parity,
1551 		 * we just log an event with an "Unknown" (NULL) TPC.
1552 		 */
1553 		if (me_flags & CH_ERR_FECC) {
1554 			ch_cpu_errors_t cpu_error_regs;
1555 			uint64_t t_afsr_errs;
1556 
1557 			/*
1558 			 * Get the error registers and see if there's
1559 			 * a pending error.  If not, don't bother
1560 			 * generating an "Invalid AFSR" error event.
1561 			 */
1562 			get_cpu_error_state(&cpu_error_regs);
1563 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1564 			    C_AFSR_EXT_ALL_ERRS) |
1565 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1566 			if (t_afsr_errs != 0) {
1567 				ceen = get_error_enable() & EN_REG_CEEN;
1568 				nceen = get_error_enable() & EN_REG_NCEEN;
1569 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1570 				    1, ceen, nceen, NULL);
1571 			}
1572 		}
1573 #if defined(CPU_IMP_L1_CACHE_PARITY)
1574 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1575 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1576 		}
1577 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1578 	}
1579 }
1580 
1581 /*
1582  * Called from Fast ECC TL>0 handler in case of fatal error.
1583  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1584  * but if we don't, we'll panic with something reasonable.
1585  */
1586 /*ARGSUSED*/
1587 void
1588 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1589 {
1590 	cpu_tl1_error(rp, 1);
1591 	/*
1592 	 * Should never return, but just in case.
1593 	 */
1594 	fm_panic("Unsurvivable ECC Error at TL>0");
1595 }
1596 
1597 /*
1598  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1599  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1600  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1601  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1602  *
1603  * Cheetah+ also handles (No additional processing required):
1604  *    DUE, DTO, DBERR	(NCEEN controlled)
1605  *    THCE		(CEEN and ET_ECC_en controlled)
1606  *    TUE		(ET_ECC_en controlled)
1607  *
1608  * Panther further adds:
1609  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1610  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1611  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1612  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1613  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1614  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1615  *
1616  * Note that the p_clo_flags input is only valid in cases where the
1617  * cpu_private struct is not yet initialized (since that is the only
1618  * time that information cannot be obtained from the logout struct.)
1619  */
1620 /*ARGSUSED*/
1621 void
1622 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1623 {
1624 	struct async_flt *aflt;
1625 	ch_async_flt_t ch_flt;
1626 	char pr_reason[MAX_REASON_STRING];
1627 	ch_cpu_logout_t *clop;
1628 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1629 	ch_cpu_errors_t cpu_error_regs;
1630 
1631 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1632 	/*
1633 	 * Get the CPU log out info. If we can't find our CPU private
1634 	 * pointer, then we will have to make due without any detailed
1635 	 * logout information.
1636 	 */
1637 	if (CPU_PRIVATE(CPU) == NULL) {
1638 		clop = NULL;
1639 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1640 		get_cpu_error_state(&cpu_error_regs);
1641 		set_cpu_error_state(&cpu_error_regs);
1642 		t_afar = cpu_error_regs.afar;
1643 		t_afsr = cpu_error_regs.afsr;
1644 		t_afsr_ext = cpu_error_regs.afsr_ext;
1645 #if defined(SERRANO)
1646 		ch_flt.afar2 = cpu_error_regs.afar2;
1647 #endif	/* SERRANO */
1648 	} else {
1649 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1650 		t_afar = clop->clo_data.chd_afar;
1651 		t_afsr = clop->clo_data.chd_afsr;
1652 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1653 #if defined(SERRANO)
1654 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1655 #endif	/* SERRANO */
1656 	}
1657 
1658 	/*
1659 	 * In order to simplify code, we maintain this afsr_errs
1660 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1661 	 * sticky bits.
1662 	 */
1663 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1664 	    (t_afsr & C_AFSR_ALL_ERRS);
1665 
1666 	pr_reason[0] = '\0';
1667 	/* Setup the async fault structure */
1668 	aflt = (struct async_flt *)&ch_flt;
1669 	ch_flt.afsr_ext = t_afsr_ext;
1670 	ch_flt.afsr_errs = t_afsr_errs;
1671 	aflt->flt_stat = t_afsr;
1672 	aflt->flt_addr = t_afar;
1673 	aflt->flt_pc = (caddr_t)rp->r_pc;
1674 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1675 	aflt->flt_tl = 0;
1676 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1677 
1678 	/*
1679 	 * If this trap is a result of one of the errors not masked
1680 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1681 	 * indicate that a timeout is to be set later.
1682 	 */
1683 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1684 	    !aflt->flt_panic)
1685 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1686 	else
1687 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1688 
1689 	/*
1690 	 * log the CE and clean up
1691 	 */
1692 	cpu_log_and_clear_ce(&ch_flt);
1693 
1694 	/*
1695 	 * We re-enable CEEN (if required) and check if any disrupting errors
1696 	 * have happened.  We do this because if a disrupting error had occurred
1697 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1698 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1699 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1700 	 * of a error happening between checking the AFSR and enabling CEEN.
1701 	 */
1702 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1703 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1704 	if (clear_errors(&ch_flt)) {
1705 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1706 		    NULL);
1707 	}
1708 
1709 	/*
1710 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1711 	 * be logged as part of the panic flow.
1712 	 */
1713 	if (aflt->flt_panic)
1714 		fm_panic("%sError(s)", pr_reason);
1715 }
1716 
1717 /*
1718  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1719  * L3_EDU:BLD, TO, and BERR events.
1720  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1721  *
1722  * Cheetah+: No additional errors handled.
1723  *
1724  * Note that the p_clo_flags input is only valid in cases where the
1725  * cpu_private struct is not yet initialized (since that is the only
1726  * time that information cannot be obtained from the logout struct.)
1727  */
1728 /*ARGSUSED*/
1729 void
1730 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1731 {
1732 	ushort_t ttype, tl;
1733 	ch_async_flt_t ch_flt;
1734 	struct async_flt *aflt;
1735 	int trampolined = 0;
1736 	char pr_reason[MAX_REASON_STRING];
1737 	ch_cpu_logout_t *clop;
1738 	uint64_t ceen, clo_flags;
1739 	uint64_t log_afsr;
1740 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1741 	ch_cpu_errors_t cpu_error_regs;
1742 	int expected = DDI_FM_ERR_UNEXPECTED;
1743 	ddi_acc_hdl_t *hp;
1744 
1745 	/*
1746 	 * We need to look at p_flag to determine if the thread detected an
1747 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1748 	 * because we just need a consistent snapshot and we know that everyone
1749 	 * else will store a consistent set of bits while holding p_lock.  We
1750 	 * don't have to worry about a race because SDOCORE is set once prior
1751 	 * to doing i/o from the process's address space and is never cleared.
1752 	 */
1753 	uint_t pflag = ttoproc(curthread)->p_flag;
1754 
1755 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1756 	/*
1757 	 * Get the CPU log out info. If we can't find our CPU private
1758 	 * pointer then we will have to make due without any detailed
1759 	 * logout information.
1760 	 */
1761 	if (CPU_PRIVATE(CPU) == NULL) {
1762 		clop = NULL;
1763 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1764 		get_cpu_error_state(&cpu_error_regs);
1765 		set_cpu_error_state(&cpu_error_regs);
1766 		t_afar = cpu_error_regs.afar;
1767 		t_afsr = cpu_error_regs.afsr;
1768 		t_afsr_ext = cpu_error_regs.afsr_ext;
1769 #if defined(SERRANO)
1770 		ch_flt.afar2 = cpu_error_regs.afar2;
1771 #endif	/* SERRANO */
1772 		clo_flags = p_clo_flags;
1773 	} else {
1774 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1775 		t_afar = clop->clo_data.chd_afar;
1776 		t_afsr = clop->clo_data.chd_afsr;
1777 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1778 #if defined(SERRANO)
1779 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1780 #endif	/* SERRANO */
1781 		clo_flags = clop->clo_flags;
1782 	}
1783 
1784 	/*
1785 	 * In order to simplify code, we maintain this afsr_errs
1786 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1787 	 * sticky bits.
1788 	 */
1789 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1790 	    (t_afsr & C_AFSR_ALL_ERRS);
1791 	pr_reason[0] = '\0';
1792 
1793 	/*
1794 	 * Grab information encoded into our clo_flags field.
1795 	 */
1796 	ceen = clo_flags & EN_REG_CEEN;
1797 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1798 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1799 
1800 	/*
1801 	 * handle the specific error
1802 	 */
1803 	aflt = (struct async_flt *)&ch_flt;
1804 	aflt->flt_id = gethrtime_waitfree();
1805 	aflt->flt_bus_id = getprocessorid();
1806 	aflt->flt_inst = CPU->cpu_id;
1807 	ch_flt.afsr_ext = t_afsr_ext;
1808 	ch_flt.afsr_errs = t_afsr_errs;
1809 	aflt->flt_stat = t_afsr;
1810 	aflt->flt_addr = t_afar;
1811 	aflt->flt_pc = (caddr_t)rp->r_pc;
1812 	aflt->flt_prot = AFLT_PROT_NONE;
1813 	aflt->flt_class = CPU_FAULT;
1814 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1815 	aflt->flt_tl = (uchar_t)tl;
1816 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1817 	    C_AFSR_PANIC(t_afsr_errs));
1818 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1819 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1820 
1821 	/*
1822 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1823 	 * see if we were executing in the kernel under on_trap() or t_lofault
1824 	 * protection.  If so, modify the saved registers so that we return
1825 	 * from the trap to the appropriate trampoline routine.
1826 	 */
1827 	if (aflt->flt_priv && tl == 0) {
1828 		if (curthread->t_ontrap != NULL) {
1829 			on_trap_data_t *otp = curthread->t_ontrap;
1830 
1831 			if (otp->ot_prot & OT_DATA_EC) {
1832 				aflt->flt_prot = AFLT_PROT_EC;
1833 				otp->ot_trap |= OT_DATA_EC;
1834 				rp->r_pc = otp->ot_trampoline;
1835 				rp->r_npc = rp->r_pc + 4;
1836 				trampolined = 1;
1837 			}
1838 
1839 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1840 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1841 				aflt->flt_prot = AFLT_PROT_ACCESS;
1842 				otp->ot_trap |= OT_DATA_ACCESS;
1843 				rp->r_pc = otp->ot_trampoline;
1844 				rp->r_npc = rp->r_pc + 4;
1845 				trampolined = 1;
1846 				/*
1847 				 * for peeks and caut_gets errors are expected
1848 				 */
1849 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1850 				if (!hp)
1851 					expected = DDI_FM_ERR_PEEK;
1852 				else if (hp->ah_acc.devacc_attr_access ==
1853 				    DDI_CAUTIOUS_ACC)
1854 					expected = DDI_FM_ERR_EXPECTED;
1855 			}
1856 
1857 		} else if (curthread->t_lofault) {
1858 			aflt->flt_prot = AFLT_PROT_COPY;
1859 			rp->r_g1 = EFAULT;
1860 			rp->r_pc = curthread->t_lofault;
1861 			rp->r_npc = rp->r_pc + 4;
1862 			trampolined = 1;
1863 		}
1864 	}
1865 
1866 	/*
1867 	 * If we're in user mode or we're doing a protected copy, we either
1868 	 * want the ASTON code below to send a signal to the user process
1869 	 * or we want to panic if aft_panic is set.
1870 	 *
1871 	 * If we're in privileged mode and we're not doing a copy, then we
1872 	 * need to check if we've trampolined.  If we haven't trampolined,
1873 	 * we should panic.
1874 	 */
1875 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1876 		if (t_afsr_errs &
1877 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1878 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1879 			aflt->flt_panic |= aft_panic;
1880 	} else if (!trampolined) {
1881 			aflt->flt_panic = 1;
1882 	}
1883 
1884 	/*
1885 	 * If we've trampolined due to a privileged TO or BERR, or if an
1886 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1887 	 * event for that TO or BERR.  Queue all other events (if any) besides
1888 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1889 	 * ignore the number of events queued.  If we haven't trampolined due
1890 	 * to a TO or BERR, just enqueue events normally.
1891 	 */
1892 	log_afsr = t_afsr_errs;
1893 	if (trampolined) {
1894 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1895 	} else if (!aflt->flt_priv) {
1896 		/*
1897 		 * User mode, suppress messages if
1898 		 * cpu_berr_to_verbose is not set.
1899 		 */
1900 		if (!cpu_berr_to_verbose)
1901 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1902 	}
1903 
1904 	/*
1905 	 * Log any errors that occurred
1906 	 */
1907 	if (((log_afsr &
1908 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1909 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1910 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1911 		ch_flt.flt_type = CPU_INV_AFSR;
1912 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1913 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1914 		    aflt->flt_panic);
1915 	}
1916 
1917 	/*
1918 	 * Zero out + invalidate CPU logout.
1919 	 */
1920 	if (clop) {
1921 		bzero(clop, sizeof (ch_cpu_logout_t));
1922 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1923 	}
1924 
1925 #if defined(JALAPENO) || defined(SERRANO)
1926 	/*
1927 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1928 	 * IO errors that may have resulted in this trap.
1929 	 */
1930 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1931 		cpu_run_bus_error_handlers(aflt, expected);
1932 	}
1933 
1934 	/*
1935 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1936 	 * line from the Ecache.  We also need to query the bus nexus for
1937 	 * fatal errors.  Attempts to do diagnostic read on caches may
1938 	 * introduce more errors (especially when the module is bad).
1939 	 */
1940 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1941 		/*
1942 		 * Ask our bus nexus friends if they have any fatal errors.  If
1943 		 * so, they will log appropriate error messages.
1944 		 */
1945 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1946 			aflt->flt_panic = 1;
1947 
1948 		/*
1949 		 * We got a UE or RUE and are panicking, save the fault PA in
1950 		 * a known location so that the platform specific panic code
1951 		 * can check for copyback errors.
1952 		 */
1953 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1954 			panic_aflt = *aflt;
1955 		}
1956 	}
1957 
1958 	/*
1959 	 * Flush Ecache line or entire Ecache
1960 	 */
1961 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1962 		cpu_error_ecache_flush(&ch_flt);
1963 #else /* JALAPENO || SERRANO */
1964 	/*
1965 	 * UE/BERR/TO: Call our bus nexus friends to check for
1966 	 * IO errors that may have resulted in this trap.
1967 	 */
1968 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1969 		cpu_run_bus_error_handlers(aflt, expected);
1970 	}
1971 
1972 	/*
1973 	 * UE: If the UE is in memory, we need to flush the bad
1974 	 * line from the Ecache.  We also need to query the bus nexus for
1975 	 * fatal errors.  Attempts to do diagnostic read on caches may
1976 	 * introduce more errors (especially when the module is bad).
1977 	 */
1978 	if (t_afsr & C_AFSR_UE) {
1979 		/*
1980 		 * Ask our legacy bus nexus friends if they have any fatal
1981 		 * errors.  If so, they will log appropriate error messages.
1982 		 */
1983 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1984 			aflt->flt_panic = 1;
1985 
1986 		/*
1987 		 * We got a UE and are panicking, save the fault PA in a known
1988 		 * location so that the platform specific panic code can check
1989 		 * for copyback errors.
1990 		 */
1991 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1992 			panic_aflt = *aflt;
1993 		}
1994 	}
1995 
1996 	/*
1997 	 * Flush Ecache line or entire Ecache
1998 	 */
1999 	if (t_afsr_errs &
2000 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2001 		cpu_error_ecache_flush(&ch_flt);
2002 #endif /* JALAPENO || SERRANO */
2003 
2004 	/*
2005 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2006 	 * or disrupting errors have happened.  We do this because if a
2007 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2008 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2009 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2010 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2011 	 * deferred or disrupting error happening between checking the AFSR and
2012 	 * enabling NCEEN/CEEN.
2013 	 *
2014 	 * Note: CEEN reenabled only if it was on when trap taken.
2015 	 */
2016 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2017 	if (clear_errors(&ch_flt)) {
2018 		/*
2019 		 * Check for secondary errors, and avoid panicking if we
2020 		 * have them
2021 		 */
2022 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2023 		    t_afar) == 0) {
2024 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2025 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2026 		}
2027 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2028 		    NULL);
2029 	}
2030 
2031 	/*
2032 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2033 	 * be logged as part of the panic flow.
2034 	 */
2035 	if (aflt->flt_panic)
2036 		fm_panic("%sError(s)", pr_reason);
2037 
2038 	/*
2039 	 * If we queued an error and we are going to return from the trap and
2040 	 * the error was in user mode or inside of a copy routine, set AST flag
2041 	 * so the queue will be drained before returning to user mode.  The
2042 	 * AST processing will also act on our failure policy.
2043 	 */
2044 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2045 		int pcb_flag = 0;
2046 
2047 		if (t_afsr_errs &
2048 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2049 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2050 			pcb_flag |= ASYNC_HWERR;
2051 
2052 		if (t_afsr & C_AFSR_BERR)
2053 			pcb_flag |= ASYNC_BERR;
2054 
2055 		if (t_afsr & C_AFSR_TO)
2056 			pcb_flag |= ASYNC_BTO;
2057 
2058 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2059 		aston(curthread);
2060 	}
2061 }
2062 
2063 #if defined(CPU_IMP_L1_CACHE_PARITY)
2064 /*
2065  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2066  *
2067  * For Panther, P$ data parity errors during floating point load hits
2068  * are also detected (reported as TT 0x71) and handled by this trap
2069  * handler.
2070  *
2071  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2072  * is available.
2073  */
2074 /*ARGSUSED*/
2075 void
2076 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2077 {
2078 	ch_async_flt_t ch_flt;
2079 	struct async_flt *aflt;
2080 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2081 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2082 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2083 	char *error_class;
2084 
2085 	/*
2086 	 * Log the error.
2087 	 * For icache parity errors the fault address is the trap PC.
2088 	 * For dcache/pcache parity errors the instruction would have to
2089 	 * be decoded to determine the address and that isn't possible
2090 	 * at high PIL.
2091 	 */
2092 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2093 	aflt = (struct async_flt *)&ch_flt;
2094 	aflt->flt_id = gethrtime_waitfree();
2095 	aflt->flt_bus_id = getprocessorid();
2096 	aflt->flt_inst = CPU->cpu_id;
2097 	aflt->flt_pc = tpc;
2098 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2099 	aflt->flt_prot = AFLT_PROT_NONE;
2100 	aflt->flt_class = CPU_FAULT;
2101 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2102 	aflt->flt_tl = tl;
2103 	aflt->flt_panic = panic;
2104 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2105 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2106 
2107 	if (iparity) {
2108 		cpu_icache_parity_info(&ch_flt);
2109 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2110 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2111 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2112 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2113 		else
2114 			error_class = FM_EREPORT_CPU_USIII_IPE;
2115 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2116 	} else {
2117 		cpu_dcache_parity_info(&ch_flt);
2118 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2119 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2120 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2121 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2122 		else
2123 			error_class = FM_EREPORT_CPU_USIII_DPE;
2124 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2125 		/*
2126 		 * For panther we also need to check the P$ for parity errors.
2127 		 */
2128 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2129 			cpu_pcache_parity_info(&ch_flt);
2130 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2131 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2132 				aflt->flt_payload =
2133 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2134 			}
2135 		}
2136 	}
2137 
2138 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2139 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2140 
2141 	if (iparity) {
2142 		/*
2143 		 * Invalidate entire I$.
2144 		 * This is required due to the use of diagnostic ASI
2145 		 * accesses that may result in a loss of I$ coherency.
2146 		 */
2147 		if (cache_boot_state & DCU_IC) {
2148 			flush_icache();
2149 		}
2150 		/*
2151 		 * According to section P.3.1 of the Panther PRM, we
2152 		 * need to do a little more for recovery on those
2153 		 * CPUs after encountering an I$ parity error.
2154 		 */
2155 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2156 			flush_ipb();
2157 			correct_dcache_parity(dcache_size,
2158 			    dcache_linesize);
2159 			flush_pcache();
2160 		}
2161 	} else {
2162 		/*
2163 		 * Since the valid bit is ignored when checking parity the
2164 		 * D$ data and tag must also be corrected.  Set D$ data bits
2165 		 * to zero and set utag to 0, 1, 2, 3.
2166 		 */
2167 		correct_dcache_parity(dcache_size, dcache_linesize);
2168 
2169 		/*
2170 		 * According to section P.3.3 of the Panther PRM, we
2171 		 * need to do a little more for recovery on those
2172 		 * CPUs after encountering a D$ or P$ parity error.
2173 		 *
2174 		 * As far as clearing P$ parity errors, it is enough to
2175 		 * simply invalidate all entries in the P$ since P$ parity
2176 		 * error traps are only generated for floating point load
2177 		 * hits.
2178 		 */
2179 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2180 			flush_icache();
2181 			flush_ipb();
2182 			flush_pcache();
2183 		}
2184 	}
2185 
2186 	/*
2187 	 * Invalidate entire D$ if it was enabled.
2188 	 * This is done to avoid stale data in the D$ which might
2189 	 * occur with the D$ disabled and the trap handler doing
2190 	 * stores affecting lines already in the D$.
2191 	 */
2192 	if (cache_boot_state & DCU_DC) {
2193 		flush_dcache();
2194 	}
2195 
2196 	/*
2197 	 * Restore caches to their bootup state.
2198 	 */
2199 	set_dcu(get_dcu() | cache_boot_state);
2200 
2201 	/*
2202 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2203 	 * be logged as part of the panic flow.
2204 	 */
2205 	if (aflt->flt_panic)
2206 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2207 
2208 	/*
2209 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2210 	 * the chance of getting an unrecoverable Fast ECC error.  This
2211 	 * flush will evict the part of the parity trap handler that is run
2212 	 * at TL>1.
2213 	 */
2214 	if (tl) {
2215 		cpu_flush_ecache();
2216 	}
2217 }
2218 
2219 /*
2220  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2221  * to indicate which portions of the captured data should be in the ereport.
2222  */
2223 void
2224 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2225 {
2226 	int way = ch_flt->parity_data.ipe.cpl_way;
2227 	int offset = ch_flt->parity_data.ipe.cpl_off;
2228 	int tag_index;
2229 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2230 
2231 
2232 	if ((offset != -1) || (way != -1)) {
2233 		/*
2234 		 * Parity error in I$ tag or data
2235 		 */
2236 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2237 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2238 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2239 			    PN_ICIDX_TO_WAY(tag_index);
2240 		else
2241 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2242 			    CH_ICIDX_TO_WAY(tag_index);
2243 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2244 		    IC_LOGFLAG_MAGIC;
2245 	} else {
2246 		/*
2247 		 * Parity error was not identified.
2248 		 * Log tags and data for all ways.
2249 		 */
2250 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2251 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2252 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2253 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2254 				    PN_ICIDX_TO_WAY(tag_index);
2255 			else
2256 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2257 				    CH_ICIDX_TO_WAY(tag_index);
2258 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2259 			    IC_LOGFLAG_MAGIC;
2260 		}
2261 	}
2262 }
2263 
2264 /*
2265  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2266  * to indicate which portions of the captured data should be in the ereport.
2267  */
2268 void
2269 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2270 {
2271 	int way = ch_flt->parity_data.dpe.cpl_way;
2272 	int offset = ch_flt->parity_data.dpe.cpl_off;
2273 	int tag_index;
2274 
2275 	if (offset != -1) {
2276 		/*
2277 		 * Parity error in D$ or P$ data array.
2278 		 *
2279 		 * First check to see whether the parity error is in D$ or P$
2280 		 * since P$ data parity errors are reported in Panther using
2281 		 * the same trap.
2282 		 */
2283 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2284 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2285 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2286 			    CH_PCIDX_TO_WAY(tag_index);
2287 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2288 			    PC_LOGFLAG_MAGIC;
2289 		} else {
2290 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2291 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2292 			    CH_DCIDX_TO_WAY(tag_index);
2293 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2294 			    DC_LOGFLAG_MAGIC;
2295 		}
2296 	} else if (way != -1) {
2297 		/*
2298 		 * Parity error in D$ tag.
2299 		 */
2300 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2301 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2302 		    CH_DCIDX_TO_WAY(tag_index);
2303 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2304 		    DC_LOGFLAG_MAGIC;
2305 	}
2306 }
2307 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2308 
2309 /*
2310  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2311  * post-process CPU events that are dequeued.  As such, it can be invoked
2312  * from softint context, from AST processing in the trap() flow, or from the
2313  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2314  * Historically this entry point was used to log the actual cmn_err(9F) text;
2315  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2316  * With FMA this function now also returns a flag which indicates to the
2317  * caller whether the ereport should be posted (1) or suppressed (0).
2318  */
2319 static int
2320 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2321 {
2322 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2323 	struct async_flt *aflt = (struct async_flt *)flt;
2324 	uint64_t errors;
2325 	extern void memscrub_induced_error(void);
2326 
2327 	switch (ch_flt->flt_type) {
2328 	case CPU_INV_AFSR:
2329 		/*
2330 		 * If it is a disrupting trap and the AFSR is zero, then
2331 		 * the event has probably already been noted. Do not post
2332 		 * an ereport.
2333 		 */
2334 		if ((aflt->flt_status & ECC_C_TRAP) &&
2335 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2336 			return (0);
2337 		else
2338 			return (1);
2339 	case CPU_TO:
2340 	case CPU_BERR:
2341 	case CPU_FATAL:
2342 	case CPU_FPUERR:
2343 		return (1);
2344 
2345 	case CPU_UE_ECACHE_RETIRE:
2346 		cpu_log_err(aflt);
2347 		cpu_page_retire(ch_flt);
2348 		return (1);
2349 
2350 	/*
2351 	 * Cases where we may want to suppress logging or perform
2352 	 * extended diagnostics.
2353 	 */
2354 	case CPU_CE:
2355 	case CPU_EMC:
2356 		/*
2357 		 * We want to skip logging and further classification
2358 		 * only if ALL the following conditions are true:
2359 		 *
2360 		 *	1. There is only one error
2361 		 *	2. That error is a correctable memory error
2362 		 *	3. The error is caused by the memory scrubber (in
2363 		 *	   which case the error will have occurred under
2364 		 *	   on_trap protection)
2365 		 *	4. The error is on a retired page
2366 		 *
2367 		 * Note: AFLT_PROT_EC is used places other than the memory
2368 		 * scrubber.  However, none of those errors should occur
2369 		 * on a retired page.
2370 		 */
2371 		if ((ch_flt->afsr_errs &
2372 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2373 		    aflt->flt_prot == AFLT_PROT_EC) {
2374 
2375 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2376 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2377 
2378 				/*
2379 				 * Since we're skipping logging, we'll need
2380 				 * to schedule the re-enabling of CEEN
2381 				 */
2382 				(void) timeout(cpu_delayed_check_ce_errors,
2383 				    (void *)(uintptr_t)aflt->flt_inst,
2384 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2385 				    * MICROSEC));
2386 				}
2387 
2388 				/*
2389 				 * Inform memscrubber - scrubbing induced
2390 				 * CE on a retired page.
2391 				 */
2392 				memscrub_induced_error();
2393 				return (0);
2394 			}
2395 		}
2396 
2397 		/*
2398 		 * Perform/schedule further classification actions, but
2399 		 * only if the page is healthy (we don't want bad
2400 		 * pages inducing too much diagnostic activity).  If we could
2401 		 * not find a page pointer then we also skip this.  If
2402 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2403 		 * to copy and recirculate the event (for further diagnostics)
2404 		 * and we should not proceed to log it here.
2405 		 *
2406 		 * This must be the last step here before the cpu_log_err()
2407 		 * below - if an event recirculates cpu_ce_log_err() will
2408 		 * not call the current function but just proceed directly
2409 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2410 		 *
2411 		 * Note: Check cpu_impl_async_log_err if changing this
2412 		 */
2413 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2414 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2415 			    CE_XDIAG_SKIP_NOPP);
2416 		} else {
2417 			if (errors != PR_OK) {
2418 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2419 				    CE_XDIAG_SKIP_PAGEDET);
2420 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2421 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2422 				return (0);
2423 			}
2424 		}
2425 		/*FALLTHRU*/
2426 
2427 	/*
2428 	 * Cases where we just want to report the error and continue.
2429 	 */
2430 	case CPU_CE_ECACHE:
2431 	case CPU_UE_ECACHE:
2432 	case CPU_IV:
2433 	case CPU_ORPH:
2434 		cpu_log_err(aflt);
2435 		return (1);
2436 
2437 	/*
2438 	 * Cases where we want to fall through to handle panicking.
2439 	 */
2440 	case CPU_UE:
2441 		/*
2442 		 * We want to skip logging in the same conditions as the
2443 		 * CE case.  In addition, we want to make sure we're not
2444 		 * panicking.
2445 		 */
2446 		if (!panicstr && (ch_flt->afsr_errs &
2447 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2448 		    aflt->flt_prot == AFLT_PROT_EC) {
2449 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2450 				/* Zero the address to clear the error */
2451 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2452 				/*
2453 				 * Inform memscrubber - scrubbing induced
2454 				 * UE on a retired page.
2455 				 */
2456 				memscrub_induced_error();
2457 				return (0);
2458 			}
2459 		}
2460 		cpu_log_err(aflt);
2461 		break;
2462 
2463 	default:
2464 		/*
2465 		 * If the us3_common.c code doesn't know the flt_type, it may
2466 		 * be an implementation-specific code.  Call into the impldep
2467 		 * backend to find out what to do: if it tells us to continue,
2468 		 * break and handle as if falling through from a UE; if not,
2469 		 * the impldep backend has handled the error and we're done.
2470 		 */
2471 		switch (cpu_impl_async_log_err(flt, eqep)) {
2472 		case CH_ASYNC_LOG_DONE:
2473 			return (1);
2474 		case CH_ASYNC_LOG_RECIRC:
2475 			return (0);
2476 		case CH_ASYNC_LOG_CONTINUE:
2477 			break; /* continue on to handle UE-like error */
2478 		default:
2479 			cmn_err(CE_WARN, "discarding error 0x%p with "
2480 			    "invalid fault type (0x%x)",
2481 			    (void *)aflt, ch_flt->flt_type);
2482 			return (0);
2483 		}
2484 	}
2485 
2486 	/* ... fall through from the UE case */
2487 
2488 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2489 		if (!panicstr) {
2490 			cpu_page_retire(ch_flt);
2491 		} else {
2492 			/*
2493 			 * Clear UEs on panic so that we don't
2494 			 * get haunted by them during panic or
2495 			 * after reboot
2496 			 */
2497 			cpu_clearphys(aflt);
2498 			(void) clear_errors(NULL);
2499 		}
2500 	}
2501 
2502 	return (1);
2503 }
2504 
2505 /*
2506  * Retire the bad page that may contain the flushed error.
2507  */
2508 void
2509 cpu_page_retire(ch_async_flt_t *ch_flt)
2510 {
2511 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2512 	(void) page_retire(aflt->flt_addr, PR_UE);
2513 }
2514 
2515 /*
2516  * Return true if the error specified in the AFSR indicates
2517  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2518  * for Panther, none for Jalapeno/Serrano).
2519  */
2520 /* ARGSUSED */
2521 static int
2522 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2523 {
2524 #if defined(JALAPENO) || defined(SERRANO)
2525 	return (0);
2526 #elif defined(CHEETAH_PLUS)
2527 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2528 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2529 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2530 #else	/* CHEETAH_PLUS */
2531 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2532 #endif
2533 }
2534 
2535 /*
2536  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2537  * generic event post-processing for correctable and uncorrectable memory,
2538  * E$, and MTag errors.  Historically this entry point was used to log bits of
2539  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2540  * converted into an ereport.  In addition, it transmits the error to any
2541  * platform-specific service-processor FRU logging routines, if available.
2542  */
2543 void
2544 cpu_log_err(struct async_flt *aflt)
2545 {
2546 	char unum[UNUM_NAMLEN];
2547 	int synd_status, synd_code, afar_status;
2548 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2549 
2550 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2551 		aflt->flt_status |= ECC_ECACHE;
2552 	else
2553 		aflt->flt_status &= ~ECC_ECACHE;
2554 	/*
2555 	 * Determine syndrome status.
2556 	 */
2557 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2558 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2559 
2560 	/*
2561 	 * Determine afar status.
2562 	 */
2563 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2564 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2565 		    ch_flt->flt_bit);
2566 	else
2567 		afar_status = AFLT_STAT_INVALID;
2568 
2569 	synd_code = synd_to_synd_code(synd_status,
2570 	    aflt->flt_synd, ch_flt->flt_bit);
2571 
2572 	/*
2573 	 * If afar status is not invalid do a unum lookup.
2574 	 */
2575 	if (afar_status != AFLT_STAT_INVALID) {
2576 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2577 	} else {
2578 		unum[0] = '\0';
2579 	}
2580 
2581 	/*
2582 	 * Do not send the fruid message (plat_ecc_error_data_t)
2583 	 * to the SC if it can handle the enhanced error information
2584 	 * (plat_ecc_error2_data_t) or when the tunable
2585 	 * ecc_log_fruid_enable is set to 0.
2586 	 */
2587 
2588 	if (&plat_ecc_capability_sc_get &&
2589 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2590 		if (&plat_log_fruid_error)
2591 			plat_log_fruid_error(synd_code, aflt, unum,
2592 			    ch_flt->flt_bit);
2593 	}
2594 
2595 	if (aflt->flt_func != NULL)
2596 		aflt->flt_func(aflt, unum);
2597 
2598 	if (afar_status != AFLT_STAT_INVALID)
2599 		cpu_log_diag_info(ch_flt);
2600 
2601 	/*
2602 	 * If we have a CEEN error , we do not reenable CEEN until after
2603 	 * we exit the trap handler. Otherwise, another error may
2604 	 * occur causing the handler to be entered recursively.
2605 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2606 	 * to try and ensure that the CPU makes progress in the face
2607 	 * of a CE storm.
2608 	 */
2609 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2610 		(void) timeout(cpu_delayed_check_ce_errors,
2611 		    (void *)(uintptr_t)aflt->flt_inst,
2612 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2613 	}
2614 }
2615 
2616 /*
2617  * Invoked by error_init() early in startup and therefore before
2618  * startup_errorq() is called to drain any error Q -
2619  *
2620  * startup()
2621  *   startup_end()
2622  *     error_init()
2623  *       cpu_error_init()
2624  * errorq_init()
2625  *   errorq_drain()
2626  * start_other_cpus()
2627  *
2628  * The purpose of this routine is to create error-related taskqs.  Taskqs
2629  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2630  * context.
2631  */
2632 void
2633 cpu_error_init(int items)
2634 {
2635 	/*
2636 	 * Create taskq(s) to reenable CE
2637 	 */
2638 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2639 	    items, items, TASKQ_PREPOPULATE);
2640 }
2641 
2642 void
2643 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2644 {
2645 	char unum[UNUM_NAMLEN];
2646 	int len;
2647 
2648 	switch (aflt->flt_class) {
2649 	case CPU_FAULT:
2650 		cpu_ereport_init(aflt);
2651 		if (cpu_async_log_err(aflt, eqep))
2652 			cpu_ereport_post(aflt);
2653 		break;
2654 
2655 	case BUS_FAULT:
2656 		if (aflt->flt_func != NULL) {
2657 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2658 			    unum, UNUM_NAMLEN, &len);
2659 			aflt->flt_func(aflt, unum);
2660 		}
2661 		break;
2662 
2663 	case RECIRC_CPU_FAULT:
2664 		aflt->flt_class = CPU_FAULT;
2665 		cpu_log_err(aflt);
2666 		cpu_ereport_post(aflt);
2667 		break;
2668 
2669 	case RECIRC_BUS_FAULT:
2670 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2671 		/*FALLTHRU*/
2672 	default:
2673 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2674 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2675 		return;
2676 	}
2677 }
2678 
2679 /*
2680  * Scrub and classify a CE.  This function must not modify the
2681  * fault structure passed to it but instead should return the classification
2682  * information.
2683  */
2684 
2685 static uchar_t
2686 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2687 {
2688 	uchar_t disp = CE_XDIAG_EXTALG;
2689 	on_trap_data_t otd;
2690 	uint64_t orig_err;
2691 	ch_cpu_logout_t *clop;
2692 
2693 	/*
2694 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2695 	 * this, but our other callers have not.  Disable preemption to
2696 	 * avoid CPU migration so that we restore CEEN on the correct
2697 	 * cpu later.
2698 	 *
2699 	 * CEEN is cleared so that further CEs that our instruction and
2700 	 * data footprint induce do not cause use to either creep down
2701 	 * kernel stack to the point of overflow, or do so much CE
2702 	 * notification as to make little real forward progress.
2703 	 *
2704 	 * NCEEN must not be cleared.  However it is possible that
2705 	 * our accesses to the flt_addr may provoke a bus error or timeout
2706 	 * if the offending address has just been unconfigured as part of
2707 	 * a DR action.  So we must operate under on_trap protection.
2708 	 */
2709 	kpreempt_disable();
2710 	orig_err = get_error_enable();
2711 	if (orig_err & EN_REG_CEEN)
2712 		set_error_enable(orig_err & ~EN_REG_CEEN);
2713 
2714 	/*
2715 	 * Our classification algorithm includes the line state before
2716 	 * the scrub; we'd like this captured after the detection and
2717 	 * before the algorithm below - the earlier the better.
2718 	 *
2719 	 * If we've come from a cpu CE trap then this info already exists
2720 	 * in the cpu logout area.
2721 	 *
2722 	 * For a CE detected by memscrub for which there was no trap
2723 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2724 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2725 	 * marked the fault structure as incomplete as a flag to later
2726 	 * logging code.
2727 	 *
2728 	 * If called directly from an IO detected CE there has been
2729 	 * no line data capture.  In this case we logout to the cpu logout
2730 	 * area - that's appropriate since it's the cpu cache data we need
2731 	 * for classification.  We thus borrow the cpu logout area for a
2732 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2733 	 * this time (we will invalidate it again below).
2734 	 *
2735 	 * If called from the partner check xcall handler then this cpu
2736 	 * (the partner) has not necessarily experienced a CE at this
2737 	 * address.  But we want to capture line state before its scrub
2738 	 * attempt since we use that in our classification.
2739 	 */
2740 	if (logout_tried == B_FALSE) {
2741 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2742 			disp |= CE_XDIAG_NOLOGOUT;
2743 	}
2744 
2745 	/*
2746 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2747 	 * no longer be valid (if DR'd since the initial event) so we
2748 	 * perform this scrub under on_trap protection.  If this access is
2749 	 * ok then further accesses below will also be ok - DR cannot
2750 	 * proceed while this thread is active (preemption is disabled);
2751 	 * to be safe we'll nonetheless use on_trap again below.
2752 	 */
2753 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2754 		cpu_scrubphys(ecc);
2755 	} else {
2756 		no_trap();
2757 		if (orig_err & EN_REG_CEEN)
2758 			set_error_enable(orig_err);
2759 		kpreempt_enable();
2760 		return (disp);
2761 	}
2762 	no_trap();
2763 
2764 	/*
2765 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2766 	 * Note that it's quite possible that the read sourced the data from
2767 	 * another cpu.
2768 	 */
2769 	if (clear_ecc(ecc))
2770 		disp |= CE_XDIAG_CE1;
2771 
2772 	/*
2773 	 * Read the data again.  This time the read is very likely to
2774 	 * come from memory since the scrub induced a writeback to memory.
2775 	 */
2776 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2777 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2778 	} else {
2779 		no_trap();
2780 		if (orig_err & EN_REG_CEEN)
2781 			set_error_enable(orig_err);
2782 		kpreempt_enable();
2783 		return (disp);
2784 	}
2785 	no_trap();
2786 
2787 	/* Did that read induce a CE that matches the AFAR? */
2788 	if (clear_ecc(ecc))
2789 		disp |= CE_XDIAG_CE2;
2790 
2791 	/*
2792 	 * Look at the logout information and record whether we found the
2793 	 * line in l2/l3 cache.  For Panther we are interested in whether
2794 	 * we found it in either cache (it won't reside in both but
2795 	 * it is possible to read it that way given the moving target).
2796 	 */
2797 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2798 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2799 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2800 		int hit, level;
2801 		int state;
2802 		int totalsize;
2803 		ch_ec_data_t *ecp;
2804 
2805 		/*
2806 		 * If hit is nonzero then a match was found and hit will
2807 		 * be one greater than the index which hit.  For Panther we
2808 		 * also need to pay attention to level to see which of l2$ or
2809 		 * l3$ it hit in.
2810 		 */
2811 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2812 		    0, &level);
2813 
2814 		if (hit) {
2815 			--hit;
2816 			disp |= CE_XDIAG_AFARMATCH;
2817 
2818 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2819 				if (level == 2)
2820 					ecp = &clop->clo_data.chd_l2_data[hit];
2821 				else
2822 					ecp = &clop->clo_data.chd_ec_data[hit];
2823 			} else {
2824 				ASSERT(level == 2);
2825 				ecp = &clop->clo_data.chd_ec_data[hit];
2826 			}
2827 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2828 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2829 			    ecc->flt_addr, ecp->ec_tag);
2830 
2831 			/*
2832 			 * Cheetah variants use different state encodings -
2833 			 * the CH_ECSTATE_* defines vary depending on the
2834 			 * module we're compiled for.  Translate into our
2835 			 * one true version.  Conflate Owner-Shared state
2836 			 * of SSM mode with Owner as victimisation of such
2837 			 * lines may cause a writeback.
2838 			 */
2839 			switch (state) {
2840 			case CH_ECSTATE_MOD:
2841 				disp |= EC_STATE_M;
2842 				break;
2843 
2844 			case CH_ECSTATE_OWN:
2845 			case CH_ECSTATE_OWS:
2846 				disp |= EC_STATE_O;
2847 				break;
2848 
2849 			case CH_ECSTATE_EXL:
2850 				disp |= EC_STATE_E;
2851 				break;
2852 
2853 			case CH_ECSTATE_SHR:
2854 				disp |= EC_STATE_S;
2855 				break;
2856 
2857 			default:
2858 				disp |= EC_STATE_I;
2859 				break;
2860 			}
2861 		}
2862 
2863 		/*
2864 		 * If we initiated the delayed logout then we are responsible
2865 		 * for invalidating the logout area.
2866 		 */
2867 		if (logout_tried == B_FALSE) {
2868 			bzero(clop, sizeof (ch_cpu_logout_t));
2869 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2870 		}
2871 	}
2872 
2873 	/*
2874 	 * Re-enable CEEN if we turned it off.
2875 	 */
2876 	if (orig_err & EN_REG_CEEN)
2877 		set_error_enable(orig_err);
2878 	kpreempt_enable();
2879 
2880 	return (disp);
2881 }
2882 
2883 /*
2884  * Scrub a correctable memory error and collect data for classification
2885  * of CE type.  This function is called in the detection path, ie tl0 handling
2886  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2887  */
2888 void
2889 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2890 {
2891 	/*
2892 	 * Cheetah CE classification does not set any bits in flt_status.
2893 	 * Instead we will record classification datapoints in flt_disp.
2894 	 */
2895 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2896 
2897 	/*
2898 	 * To check if the error detected by IO is persistent, sticky or
2899 	 * intermittent.  This is noticed by clear_ecc().
2900 	 */
2901 	if (ecc->flt_status & ECC_IOBUS)
2902 		ecc->flt_stat = C_AFSR_MEMORY;
2903 
2904 	/*
2905 	 * Record information from this first part of the algorithm in
2906 	 * flt_disp.
2907 	 */
2908 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2909 }
2910 
2911 /*
2912  * Select a partner to perform a further CE classification check from.
2913  * Must be called with kernel preemption disabled (to stop the cpu list
2914  * from changing).  The detecting cpu we are partnering has cpuid
2915  * aflt->flt_inst; we might not be running on the detecting cpu.
2916  *
2917  * Restrict choice to active cpus in the same cpu partition as ourselves in
2918  * an effort to stop bad cpus in one partition causing other partitions to
2919  * perform excessive diagnostic activity.  Actually since the errorq drain
2920  * is run from a softint most of the time and that is a global mechanism
2921  * this isolation is only partial.  Return NULL if we fail to find a
2922  * suitable partner.
2923  *
2924  * We prefer a partner that is in a different latency group to ourselves as
2925  * we will share fewer datapaths.  If such a partner is unavailable then
2926  * choose one in the same lgroup but prefer a different chip and only allow
2927  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2928  * flags includes PTNR_SELFOK then permit selection of the original detector.
2929  *
2930  * We keep a cache of the last partner selected for a cpu, and we'll try to
2931  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2932  * have passed since that selection was made.  This provides the benefit
2933  * of the point-of-view of different partners over time but without
2934  * requiring frequent cpu list traversals.
2935  */
2936 
2937 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2938 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2939 
2940 static cpu_t *
2941 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2942 {
2943 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2944 	hrtime_t lasttime, thistime;
2945 
2946 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2947 
2948 	dtcr = cpu[aflt->flt_inst];
2949 
2950 	/*
2951 	 * Short-circuit for the following cases:
2952 	 *	. the dtcr is not flagged active
2953 	 *	. there is just one cpu present
2954 	 *	. the detector has disappeared
2955 	 *	. we were given a bad flt_inst cpuid; this should not happen
2956 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2957 	 *	  reason to panic.
2958 	 *	. there is just one cpu left online in the cpu partition
2959 	 *
2960 	 * If we return NULL after this point then we do not update the
2961 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2962 	 * again next time; this is the case where the only other cpu online
2963 	 * in the detector's partition is on the same chip as the detector
2964 	 * and since CEEN re-enable is throttled even that case should not
2965 	 * hurt performance.
2966 	 */
2967 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2968 		return (NULL);
2969 	}
2970 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2971 		if (flags & PTNR_SELFOK) {
2972 			*typep = CE_XDIAG_PTNR_SELF;
2973 			return (dtcr);
2974 		} else {
2975 			return (NULL);
2976 		}
2977 	}
2978 
2979 	thistime = gethrtime();
2980 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2981 
2982 	/*
2983 	 * Select a starting point.
2984 	 */
2985 	if (!lasttime) {
2986 		/*
2987 		 * We've never selected a partner for this detector before.
2988 		 * Start the scan at the next online cpu in the same cpu
2989 		 * partition.
2990 		 */
2991 		sp = dtcr->cpu_next_part;
2992 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2993 		/*
2994 		 * Our last selection has not aged yet.  If this partner:
2995 		 *	. is still a valid cpu,
2996 		 *	. is still in the same partition as the detector
2997 		 *	. is still marked active
2998 		 *	. satisfies the 'flags' argument criteria
2999 		 * then select it again without updating the timestamp.
3000 		 */
3001 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3002 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3003 		    !cpu_flagged_active(sp->cpu_flags) ||
3004 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3005 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3006 		    !(flags & PTNR_SIBLINGOK))) {
3007 			sp = dtcr->cpu_next_part;
3008 		} else {
3009 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3010 				*typep = CE_XDIAG_PTNR_REMOTE;
3011 			} else if (sp == dtcr) {
3012 				*typep = CE_XDIAG_PTNR_SELF;
3013 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3014 				*typep = CE_XDIAG_PTNR_SIBLING;
3015 			} else {
3016 				*typep = CE_XDIAG_PTNR_LOCAL;
3017 			}
3018 			return (sp);
3019 		}
3020 	} else {
3021 		/*
3022 		 * Our last selection has aged.  If it is nonetheless still a
3023 		 * valid cpu then start the scan at the next cpu in the
3024 		 * partition after our last partner.  If the last selection
3025 		 * is no longer a valid cpu then go with our default.  In
3026 		 * this way we slowly cycle through possible partners to
3027 		 * obtain multiple viewpoints over time.
3028 		 */
3029 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3030 		if (sp == NULL) {
3031 			sp = dtcr->cpu_next_part;
3032 		} else {
3033 			sp = sp->cpu_next_part;		/* may be dtcr */
3034 			if (sp->cpu_part != dtcr->cpu_part)
3035 				sp = dtcr;
3036 		}
3037 	}
3038 
3039 	/*
3040 	 * We have a proposed starting point for our search, but if this
3041 	 * cpu is offline then its cpu_next_part will point to itself
3042 	 * so we can't use that to iterate over cpus in this partition in
3043 	 * the loop below.  We still want to avoid iterating over cpus not
3044 	 * in our partition, so in the case that our starting point is offline
3045 	 * we will repoint it to be the detector itself;  and if the detector
3046 	 * happens to be offline we'll return NULL from the following loop.
3047 	 */
3048 	if (!cpu_flagged_active(sp->cpu_flags)) {
3049 		sp = dtcr;
3050 	}
3051 
3052 	ptnr = sp;
3053 	locptnr = NULL;
3054 	sibptnr = NULL;
3055 	do {
3056 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3057 			continue;
3058 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3059 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3060 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3061 			*typep = CE_XDIAG_PTNR_REMOTE;
3062 			return (ptnr);
3063 		}
3064 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3065 			if (sibptnr == NULL)
3066 				sibptnr = ptnr;
3067 			continue;
3068 		}
3069 		if (locptnr == NULL)
3070 			locptnr = ptnr;
3071 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3072 
3073 	/*
3074 	 * A foreign partner has already been returned if one was available.
3075 	 *
3076 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3077 	 * detector, is active, and is not a sibling of the detector.
3078 	 *
3079 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3080 	 * active.
3081 	 *
3082 	 * If we have to resort to using the detector itself we have already
3083 	 * checked that it is active.
3084 	 */
3085 	if (locptnr) {
3086 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3087 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3088 		*typep = CE_XDIAG_PTNR_LOCAL;
3089 		return (locptnr);
3090 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3091 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3092 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3093 		*typep = CE_XDIAG_PTNR_SIBLING;
3094 		return (sibptnr);
3095 	} else if (flags & PTNR_SELFOK) {
3096 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3097 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3098 		*typep = CE_XDIAG_PTNR_SELF;
3099 		return (dtcr);
3100 	}
3101 
3102 	return (NULL);
3103 }
3104 
3105 /*
3106  * Cross call handler that is requested to run on the designated partner of
3107  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3108  */
3109 static void
3110 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3111 {
3112 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3113 }
3114 
3115 /*
3116  * The associated errorqs are never destroyed so we do not need to deal with
3117  * them disappearing before this timeout fires.  If the affected memory
3118  * has been DR'd out since the original event the scrub algrithm will catch
3119  * any errors and return null disposition info.  If the original detecting
3120  * cpu has been DR'd out then ereport detector info will not be able to
3121  * lookup CPU type;  with a small timeout this is unlikely.
3122  */
3123 static void
3124 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3125 {
3126 	struct async_flt *aflt = cbarg->lkycb_aflt;
3127 	uchar_t disp;
3128 	cpu_t *cp;
3129 	int ptnrtype;
3130 
3131 	kpreempt_disable();
3132 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3133 	    &ptnrtype)) {
3134 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3135 		    (uint64_t)&disp);
3136 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3137 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3138 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3139 	} else {
3140 		ce_xdiag_lkydrops++;
3141 		if (ncpus > 1)
3142 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3143 			    CE_XDIAG_SKIP_NOPTNR);
3144 	}
3145 	kpreempt_enable();
3146 
3147 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3148 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3149 }
3150 
3151 /*
3152  * Called from errorq drain code when processing a CE error, both from
3153  * CPU and PCI drain functions.  Decide what further classification actions,
3154  * if any, we will perform.  Perform immediate actions now, and schedule
3155  * delayed actions as required.  Note that we are no longer necessarily running
3156  * on the detecting cpu, and that the async_flt structure will not persist on
3157  * return from this function.
3158  *
3159  * Calls to this function should aim to be self-throtlling in some way.  With
3160  * the delayed re-enable of CEEN the absolute rate of calls should not
3161  * be excessive.  Callers should also avoid performing in-depth classification
3162  * for events in pages that are already known to be suspect.
3163  *
3164  * We return nonzero to indicate that the event has been copied and
3165  * recirculated for further testing.  The caller should not log the event
3166  * in this case - it will be logged when further test results are available.
3167  *
3168  * Our possible contexts are that of errorq_drain: below lock level or from
3169  * panic context.  We can assume that the cpu we are running on is online.
3170  */
3171 
3172 
3173 #ifdef DEBUG
3174 static int ce_xdiag_forceaction;
3175 #endif
3176 
3177 int
3178 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3179     errorq_elem_t *eqep, size_t afltoffset)
3180 {
3181 	ce_dispact_t dispact, action;
3182 	cpu_t *cp;
3183 	uchar_t dtcrinfo, disp;
3184 	int ptnrtype;
3185 
3186 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3187 		ce_xdiag_drops++;
3188 		return (0);
3189 	} else if (!aflt->flt_in_memory) {
3190 		ce_xdiag_drops++;
3191 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3192 		return (0);
3193 	}
3194 
3195 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3196 
3197 	/*
3198 	 * Some correctable events are not scrubbed/classified, such as those
3199 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3200 	 * initial detector classification go no further.
3201 	 */
3202 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3203 		ce_xdiag_drops++;
3204 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3205 		return (0);
3206 	}
3207 
3208 	dispact = CE_DISPACT(ce_disp_table,
3209 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3210 	    CE_XDIAG_STATE(dtcrinfo),
3211 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3212 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3213 
3214 
3215 	action = CE_ACT(dispact);	/* bad lookup caught below */
3216 #ifdef DEBUG
3217 	if (ce_xdiag_forceaction != 0)
3218 		action = ce_xdiag_forceaction;
3219 #endif
3220 
3221 	switch (action) {
3222 	case CE_ACT_LKYCHK: {
3223 		caddr_t ndata;
3224 		errorq_elem_t *neqep;
3225 		struct async_flt *ecc;
3226 		ce_lkychk_cb_t *cbargp;
3227 
3228 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3229 			ce_xdiag_lkydrops++;
3230 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3231 			    CE_XDIAG_SKIP_DUPFAIL);
3232 			break;
3233 		}
3234 		ecc = (struct async_flt *)(ndata + afltoffset);
3235 
3236 		ASSERT(ecc->flt_class == CPU_FAULT ||
3237 		    ecc->flt_class == BUS_FAULT);
3238 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3239 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3240 
3241 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3242 		cbargp->lkycb_aflt = ecc;
3243 		cbargp->lkycb_eqp = eqp;
3244 		cbargp->lkycb_eqep = neqep;
3245 
3246 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3247 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3248 		return (1);
3249 	}
3250 
3251 	case CE_ACT_PTNRCHK:
3252 		kpreempt_disable();	/* stop cpu list changing */
3253 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3254 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3255 			    (uint64_t)aflt, (uint64_t)&disp);
3256 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3257 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3258 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3259 		} else if (ncpus > 1) {
3260 			ce_xdiag_ptnrdrops++;
3261 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3262 			    CE_XDIAG_SKIP_NOPTNR);
3263 		} else {
3264 			ce_xdiag_ptnrdrops++;
3265 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3266 			    CE_XDIAG_SKIP_UNIPROC);
3267 		}
3268 		kpreempt_enable();
3269 		break;
3270 
3271 	case CE_ACT_DONE:
3272 		break;
3273 
3274 	case CE_ACT(CE_DISP_BAD):
3275 	default:
3276 #ifdef DEBUG
3277 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3278 #endif
3279 		ce_xdiag_bad++;
3280 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3281 		break;
3282 	}
3283 
3284 	return (0);
3285 }
3286 
3287 /*
3288  * We route all errors through a single switch statement.
3289  */
3290 void
3291 cpu_ue_log_err(struct async_flt *aflt)
3292 {
3293 	switch (aflt->flt_class) {
3294 	case CPU_FAULT:
3295 		cpu_ereport_init(aflt);
3296 		if (cpu_async_log_err(aflt, NULL))
3297 			cpu_ereport_post(aflt);
3298 		break;
3299 
3300 	case BUS_FAULT:
3301 		bus_async_log_err(aflt);
3302 		break;
3303 
3304 	default:
3305 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3306 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3307 		return;
3308 	}
3309 }
3310 
3311 /*
3312  * Routine for panic hook callback from panic_idle().
3313  */
3314 void
3315 cpu_async_panic_callb(void)
3316 {
3317 	ch_async_flt_t ch_flt;
3318 	struct async_flt *aflt;
3319 	ch_cpu_errors_t cpu_error_regs;
3320 	uint64_t afsr_errs;
3321 
3322 	get_cpu_error_state(&cpu_error_regs);
3323 
3324 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3325 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3326 
3327 	if (afsr_errs) {
3328 
3329 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3330 		aflt = (struct async_flt *)&ch_flt;
3331 		aflt->flt_id = gethrtime_waitfree();
3332 		aflt->flt_bus_id = getprocessorid();
3333 		aflt->flt_inst = CPU->cpu_id;
3334 		aflt->flt_stat = cpu_error_regs.afsr;
3335 		aflt->flt_addr = cpu_error_regs.afar;
3336 		aflt->flt_prot = AFLT_PROT_NONE;
3337 		aflt->flt_class = CPU_FAULT;
3338 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3339 		aflt->flt_panic = 1;
3340 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3341 		ch_flt.afsr_errs = afsr_errs;
3342 #if defined(SERRANO)
3343 		ch_flt.afar2 = cpu_error_regs.afar2;
3344 #endif	/* SERRANO */
3345 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3346 	}
3347 }
3348 
3349 /*
3350  * Routine to convert a syndrome into a syndrome code.
3351  */
3352 static int
3353 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3354 {
3355 	if (synd_status == AFLT_STAT_INVALID)
3356 		return (-1);
3357 
3358 	/*
3359 	 * Use the syndrome to index the appropriate syndrome table,
3360 	 * to get the code indicating which bit(s) is(are) bad.
3361 	 */
3362 	if (afsr_bit &
3363 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3364 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3365 #if defined(JALAPENO) || defined(SERRANO)
3366 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3367 				return (-1);
3368 			else
3369 				return (BPAR0 + synd);
3370 #else /* JALAPENO || SERRANO */
3371 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3372 				return (-1);
3373 			else
3374 				return (mtag_syndrome_tab[synd]);
3375 #endif /* JALAPENO || SERRANO */
3376 		} else {
3377 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3378 				return (-1);
3379 			else
3380 				return (ecc_syndrome_tab[synd]);
3381 		}
3382 	} else {
3383 		return (-1);
3384 	}
3385 }
3386 
3387 int
3388 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3389 {
3390 	if (&plat_get_mem_sid)
3391 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3392 	else
3393 		return (ENOTSUP);
3394 }
3395 
3396 int
3397 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3398 {
3399 	if (&plat_get_mem_offset)
3400 		return (plat_get_mem_offset(flt_addr, offp));
3401 	else
3402 		return (ENOTSUP);
3403 }
3404 
3405 int
3406 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3407 {
3408 	if (&plat_get_mem_addr)
3409 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3410 	else
3411 		return (ENOTSUP);
3412 }
3413 
3414 /*
3415  * Routine to return a string identifying the physical name
3416  * associated with a memory/cache error.
3417  */
3418 int
3419 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3420     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3421     ushort_t flt_status, char *buf, int buflen, int *lenp)
3422 {
3423 	int synd_code;
3424 	int ret;
3425 
3426 	/*
3427 	 * An AFSR of -1 defaults to a memory syndrome.
3428 	 */
3429 	if (flt_stat == (uint64_t)-1)
3430 		flt_stat = C_AFSR_CE;
3431 
3432 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3433 
3434 	/*
3435 	 * Syndrome code must be either a single-bit error code
3436 	 * (0...143) or -1 for unum lookup.
3437 	 */
3438 	if (synd_code < 0 || synd_code >= M2)
3439 		synd_code = -1;
3440 	if (&plat_get_mem_unum) {
3441 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3442 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3443 			buf[0] = '\0';
3444 			*lenp = 0;
3445 		}
3446 
3447 		return (ret);
3448 	}
3449 
3450 	return (ENOTSUP);
3451 }
3452 
3453 /*
3454  * Wrapper for cpu_get_mem_unum() routine that takes an
3455  * async_flt struct rather than explicit arguments.
3456  */
3457 int
3458 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3459     char *buf, int buflen, int *lenp)
3460 {
3461 	/*
3462 	 * If we come thru here for an IO bus error aflt->flt_stat will
3463 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3464 	 * so it will interpret this as a memory error.
3465 	 */
3466 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3467 	    (aflt->flt_class == BUS_FAULT) ?
3468 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3469 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3470 	    aflt->flt_status, buf, buflen, lenp));
3471 }
3472 
3473 /*
3474  * Return unum string given synd_code and async_flt into
3475  * the buf with size UNUM_NAMLEN
3476  */
3477 static int
3478 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3479 {
3480 	int ret, len;
3481 
3482 	/*
3483 	 * Syndrome code must be either a single-bit error code
3484 	 * (0...143) or -1 for unum lookup.
3485 	 */
3486 	if (synd_code < 0 || synd_code >= M2)
3487 		synd_code = -1;
3488 	if (&plat_get_mem_unum) {
3489 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3490 		    aflt->flt_bus_id, aflt->flt_in_memory,
3491 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3492 			buf[0] = '\0';
3493 		}
3494 		return (ret);
3495 	}
3496 
3497 	buf[0] = '\0';
3498 	return (ENOTSUP);
3499 }
3500 
3501 /*
3502  * This routine is a more generic interface to cpu_get_mem_unum()
3503  * that may be used by other modules (e.g. the 'mm' driver, through
3504  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3505  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3506  */
3507 int
3508 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3509     char *buf, int buflen, int *lenp)
3510 {
3511 	int synd_status, flt_in_memory, ret;
3512 	ushort_t flt_status = 0;
3513 	char unum[UNUM_NAMLEN];
3514 	uint64_t t_afsr_errs;
3515 
3516 	/*
3517 	 * Check for an invalid address.
3518 	 */
3519 	if (afar == (uint64_t)-1)
3520 		return (ENXIO);
3521 
3522 	if (synd == (uint64_t)-1)
3523 		synd_status = AFLT_STAT_INVALID;
3524 	else
3525 		synd_status = AFLT_STAT_VALID;
3526 
3527 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3528 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3529 
3530 	/*
3531 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3532 	 */
3533 	if (*afsr == (uint64_t)-1)
3534 		t_afsr_errs = C_AFSR_CE;
3535 	else {
3536 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3537 #if defined(CHEETAH_PLUS)
3538 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3539 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3540 #endif	/* CHEETAH_PLUS */
3541 	}
3542 
3543 	/*
3544 	 * Turn on ECC_ECACHE if error type is E$ Data.
3545 	 */
3546 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3547 		flt_status |= ECC_ECACHE;
3548 
3549 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3550 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3551 	if (ret != 0)
3552 		return (ret);
3553 
3554 	if (*lenp >= buflen)
3555 		return (ENAMETOOLONG);
3556 
3557 	(void) strncpy(buf, unum, buflen);
3558 
3559 	return (0);
3560 }
3561 
3562 /*
3563  * Routine to return memory information associated
3564  * with a physical address and syndrome.
3565  */
3566 int
3567 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3568     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3569     int *segsp, int *banksp, int *mcidp)
3570 {
3571 	int synd_status, synd_code;
3572 
3573 	if (afar == (uint64_t)-1)
3574 		return (ENXIO);
3575 
3576 	if (synd == (uint64_t)-1)
3577 		synd_status = AFLT_STAT_INVALID;
3578 	else
3579 		synd_status = AFLT_STAT_VALID;
3580 
3581 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3582 
3583 	if (p2get_mem_info != NULL)
3584 		return ((p2get_mem_info)(synd_code, afar,
3585 		    mem_sizep, seg_sizep, bank_sizep,
3586 		    segsp, banksp, mcidp));
3587 	else
3588 		return (ENOTSUP);
3589 }
3590 
3591 /*
3592  * Routine to return a string identifying the physical
3593  * name associated with a cpuid.
3594  */
3595 int
3596 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3597 {
3598 	int ret;
3599 	char unum[UNUM_NAMLEN];
3600 
3601 	if (&plat_get_cpu_unum) {
3602 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3603 		    != 0)
3604 			return (ret);
3605 	} else {
3606 		return (ENOTSUP);
3607 	}
3608 
3609 	if (*lenp >= buflen)
3610 		return (ENAMETOOLONG);
3611 
3612 	(void) strncpy(buf, unum, buflen);
3613 
3614 	return (0);
3615 }
3616 
3617 /*
3618  * This routine exports the name buffer size.
3619  */
3620 size_t
3621 cpu_get_name_bufsize()
3622 {
3623 	return (UNUM_NAMLEN);
3624 }
3625 
3626 /*
3627  * Historical function, apparantly not used.
3628  */
3629 /* ARGSUSED */
3630 void
3631 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3632 {}
3633 
3634 /*
3635  * Historical function only called for SBus errors in debugging.
3636  */
3637 /*ARGSUSED*/
3638 void
3639 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3640 {}
3641 
3642 /*
3643  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3644  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3645  * an async fault structure argument is passed in, the captured error state
3646  * (AFSR, AFAR) info will be returned in the structure.
3647  */
3648 int
3649 clear_errors(ch_async_flt_t *ch_flt)
3650 {
3651 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3652 	ch_cpu_errors_t	cpu_error_regs;
3653 
3654 	get_cpu_error_state(&cpu_error_regs);
3655 
3656 	if (ch_flt != NULL) {
3657 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3658 		aflt->flt_addr = cpu_error_regs.afar;
3659 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3660 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3661 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3662 #if defined(SERRANO)
3663 		ch_flt->afar2 = cpu_error_regs.afar2;
3664 #endif	/* SERRANO */
3665 	}
3666 
3667 	set_cpu_error_state(&cpu_error_regs);
3668 
3669 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3670 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3671 }
3672 
3673 /*
3674  * Clear any AFSR error bits, and check for persistence.
3675  *
3676  * It would be desirable to also insist that syndrome match.  PCI handling
3677  * has already filled flt_synd.  For errors trapped by CPU we only fill
3678  * flt_synd when we queue the event, so we do not have a valid flt_synd
3679  * during initial classification (it is valid if we're called as part of
3680  * subsequent low-pil additional classification attempts).  We could try
3681  * to determine which syndrome to use: we know we're only called for
3682  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3683  * would be esynd/none and esynd/msynd, respectively.  If that is
3684  * implemented then what do we do in the case that we do experience an
3685  * error on the same afar but with different syndrome?  At the very least
3686  * we should count such occurences.  Anyway, for now, we'll leave it as
3687  * it has been for ages.
3688  */
3689 static int
3690 clear_ecc(struct async_flt *aflt)
3691 {
3692 	ch_cpu_errors_t	cpu_error_regs;
3693 
3694 	/*
3695 	 * Snapshot the AFSR and AFAR and clear any errors
3696 	 */
3697 	get_cpu_error_state(&cpu_error_regs);
3698 	set_cpu_error_state(&cpu_error_regs);
3699 
3700 	/*
3701 	 * If any of the same memory access error bits are still on and
3702 	 * the AFAR matches, return that the error is persistent.
3703 	 */
3704 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3705 	    cpu_error_regs.afar == aflt->flt_addr);
3706 }
3707 
3708 /*
3709  * Turn off all cpu error detection, normally only used for panics.
3710  */
3711 void
3712 cpu_disable_errors(void)
3713 {
3714 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3715 
3716 	/*
3717 	 * With error detection now turned off, check the other cpus
3718 	 * logout areas for any unlogged errors.
3719 	 */
3720 	if (enable_check_other_cpus_logout) {
3721 		cpu_check_other_cpus_logout();
3722 		/*
3723 		 * Make a second pass over the logout areas, in case
3724 		 * there is a failing CPU in an error-trap loop which
3725 		 * will write to the logout area once it is emptied.
3726 		 */
3727 		cpu_check_other_cpus_logout();
3728 	}
3729 }
3730 
3731 /*
3732  * Enable errors.
3733  */
3734 void
3735 cpu_enable_errors(void)
3736 {
3737 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3738 }
3739 
3740 /*
3741  * Flush the entire ecache using displacement flush by reading through a
3742  * physical address range twice as large as the Ecache.
3743  */
3744 void
3745 cpu_flush_ecache(void)
3746 {
3747 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3748 	    cpunodes[CPU->cpu_id].ecache_linesize);
3749 }
3750 
3751 /*
3752  * Return CPU E$ set size - E$ size divided by the associativity.
3753  * We use this function in places where the CPU_PRIVATE ptr may not be
3754  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3755  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3756  * up before the kernel switches from OBP's to the kernel's trap table, so
3757  * we don't have to worry about cpunodes being unitialized.
3758  */
3759 int
3760 cpu_ecache_set_size(struct cpu *cp)
3761 {
3762 	if (CPU_PRIVATE(cp))
3763 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3764 
3765 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3766 }
3767 
3768 /*
3769  * Flush Ecache line.
3770  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3771  * Uses normal displacement flush for Cheetah.
3772  */
3773 static void
3774 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3775 {
3776 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3777 	int ec_set_size = cpu_ecache_set_size(CPU);
3778 
3779 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3780 }
3781 
3782 /*
3783  * Scrub physical address.
3784  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3785  * Ecache or direct-mapped Ecache.
3786  */
3787 static void
3788 cpu_scrubphys(struct async_flt *aflt)
3789 {
3790 	int ec_set_size = cpu_ecache_set_size(CPU);
3791 
3792 	scrubphys(aflt->flt_addr, ec_set_size);
3793 }
3794 
3795 /*
3796  * Clear physical address.
3797  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3798  * Ecache or direct-mapped Ecache.
3799  */
3800 void
3801 cpu_clearphys(struct async_flt *aflt)
3802 {
3803 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3804 	int ec_set_size = cpu_ecache_set_size(CPU);
3805 
3806 
3807 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3808 }
3809 
3810 #if defined(CPU_IMP_ECACHE_ASSOC)
3811 /*
3812  * Check for a matching valid line in all the sets.
3813  * If found, return set# + 1. Otherwise return 0.
3814  */
3815 static int
3816 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3817 {
3818 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3819 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3820 	int ec_set_size = cpu_ecache_set_size(CPU);
3821 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3822 	int nway = cpu_ecache_nway();
3823 	int i;
3824 
3825 	for (i = 0; i < nway; i++, ecp++) {
3826 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3827 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3828 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3829 			return (i+1);
3830 	}
3831 	return (0);
3832 }
3833 #endif /* CPU_IMP_ECACHE_ASSOC */
3834 
3835 /*
3836  * Check whether a line in the given logout info matches the specified
3837  * fault address.  If reqval is set then the line must not be Invalid.
3838  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3839  * set to 2 for l2$ or 3 for l3$.
3840  */
3841 static int
3842 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3843 {
3844 	ch_diag_data_t *cdp = data;
3845 	ch_ec_data_t *ecp;
3846 	int totalsize, ec_set_size;
3847 	int i, ways;
3848 	int match = 0;
3849 	int tagvalid;
3850 	uint64_t addr, tagpa;
3851 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3852 
3853 	/*
3854 	 * Check the l2$ logout data
3855 	 */
3856 	if (ispanther) {
3857 		ecp = &cdp->chd_l2_data[0];
3858 		ec_set_size = PN_L2_SET_SIZE;
3859 		ways = PN_L2_NWAYS;
3860 	} else {
3861 		ecp = &cdp->chd_ec_data[0];
3862 		ec_set_size = cpu_ecache_set_size(CPU);
3863 		ways = cpu_ecache_nway();
3864 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3865 	}
3866 	/* remove low order PA bits from fault address not used in PA tag */
3867 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3868 	for (i = 0; i < ways; i++, ecp++) {
3869 		if (ispanther) {
3870 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3871 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3872 		} else {
3873 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3874 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3875 			    ecp->ec_tag);
3876 		}
3877 		if (tagpa == addr && (!reqval || tagvalid)) {
3878 			match = i + 1;
3879 			*level = 2;
3880 			break;
3881 		}
3882 	}
3883 
3884 	if (match || !ispanther)
3885 		return (match);
3886 
3887 	/* For Panther we also check the l3$ */
3888 	ecp = &cdp->chd_ec_data[0];
3889 	ec_set_size = PN_L3_SET_SIZE;
3890 	ways = PN_L3_NWAYS;
3891 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3892 
3893 	for (i = 0; i < ways; i++, ecp++) {
3894 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3895 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3896 			match = i + 1;
3897 			*level = 3;
3898 			break;
3899 		}
3900 	}
3901 
3902 	return (match);
3903 }
3904 
3905 #if defined(CPU_IMP_L1_CACHE_PARITY)
3906 /*
3907  * Record information related to the source of an Dcache Parity Error.
3908  */
3909 static void
3910 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3911 {
3912 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3913 	int index;
3914 
3915 	/*
3916 	 * Since instruction decode cannot be done at high PIL
3917 	 * just examine the entire Dcache to locate the error.
3918 	 */
3919 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3920 		ch_flt->parity_data.dpe.cpl_way = -1;
3921 		ch_flt->parity_data.dpe.cpl_off = -1;
3922 	}
3923 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3924 		cpu_dcache_parity_check(ch_flt, index);
3925 }
3926 
3927 /*
3928  * Check all ways of the Dcache at a specified index for good parity.
3929  */
3930 static void
3931 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3932 {
3933 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3934 	uint64_t parity_bits, pbits, data_word;
3935 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3936 	int way, word, data_byte;
3937 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3938 	ch_dc_data_t tmp_dcp;
3939 
3940 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3941 		/*
3942 		 * Perform diagnostic read.
3943 		 */
3944 		get_dcache_dtag(index + way * dc_set_size,
3945 		    (uint64_t *)&tmp_dcp);
3946 
3947 		/*
3948 		 * Check tag for even parity.
3949 		 * Sum of 1 bits (including parity bit) should be even.
3950 		 */
3951 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3952 			/*
3953 			 * If this is the first error log detailed information
3954 			 * about it and check the snoop tag. Otherwise just
3955 			 * record the fact that we found another error.
3956 			 */
3957 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3958 				ch_flt->parity_data.dpe.cpl_way = way;
3959 				ch_flt->parity_data.dpe.cpl_cache =
3960 				    CPU_DC_PARITY;
3961 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3962 
3963 				if (popc64(tmp_dcp.dc_sntag &
3964 				    CHP_DCSNTAG_PARMASK) & 1) {
3965 					ch_flt->parity_data.dpe.cpl_tag |=
3966 					    CHP_DC_SNTAG;
3967 					ch_flt->parity_data.dpe.cpl_lcnt++;
3968 				}
3969 
3970 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3971 			}
3972 
3973 			ch_flt->parity_data.dpe.cpl_lcnt++;
3974 		}
3975 
3976 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3977 			/*
3978 			 * Panther has more parity bits than the other
3979 			 * processors for covering dcache data and so each
3980 			 * byte of data in each word has its own parity bit.
3981 			 */
3982 			parity_bits = tmp_dcp.dc_pn_data_parity;
3983 			for (word = 0; word < 4; word++) {
3984 				data_word = tmp_dcp.dc_data[word];
3985 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3986 				for (data_byte = 0; data_byte < 8;
3987 				    data_byte++) {
3988 					if (((popc64(data_word &
3989 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3990 					    (pbits & 1)) {
3991 						cpu_record_dc_data_parity(
3992 						    ch_flt, dcp, &tmp_dcp, way,
3993 						    word);
3994 					}
3995 					pbits >>= 1;
3996 					data_word >>= 8;
3997 				}
3998 				parity_bits >>= 8;
3999 			}
4000 		} else {
4001 			/*
4002 			 * Check data array for even parity.
4003 			 * The 8 parity bits are grouped into 4 pairs each
4004 			 * of which covers a 64-bit word.  The endianness is
4005 			 * reversed -- the low-order parity bits cover the
4006 			 * high-order data words.
4007 			 */
4008 			parity_bits = tmp_dcp.dc_utag >> 8;
4009 			for (word = 0; word < 4; word++) {
4010 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4011 				if ((popc64(tmp_dcp.dc_data[word]) +
4012 				    parity_bits_popc[pbits]) & 1) {
4013 					cpu_record_dc_data_parity(ch_flt, dcp,
4014 					    &tmp_dcp, way, word);
4015 				}
4016 			}
4017 		}
4018 	}
4019 }
4020 
4021 static void
4022 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4023     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4024 {
4025 	/*
4026 	 * If this is the first error log detailed information about it.
4027 	 * Otherwise just record the fact that we found another error.
4028 	 */
4029 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4030 		ch_flt->parity_data.dpe.cpl_way = way;
4031 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4032 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4033 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4034 	}
4035 	ch_flt->parity_data.dpe.cpl_lcnt++;
4036 }
4037 
4038 /*
4039  * Record information related to the source of an Icache Parity Error.
4040  *
4041  * Called with the Icache disabled so any diagnostic accesses are safe.
4042  */
4043 static void
4044 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4045 {
4046 	int	ic_set_size;
4047 	int	ic_linesize;
4048 	int	index;
4049 
4050 	if (CPU_PRIVATE(CPU)) {
4051 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4052 		    CH_ICACHE_NWAY;
4053 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4054 	} else {
4055 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4056 		ic_linesize = icache_linesize;
4057 	}
4058 
4059 	ch_flt->parity_data.ipe.cpl_way = -1;
4060 	ch_flt->parity_data.ipe.cpl_off = -1;
4061 
4062 	for (index = 0; index < ic_set_size; index += ic_linesize)
4063 		cpu_icache_parity_check(ch_flt, index);
4064 }
4065 
4066 /*
4067  * Check all ways of the Icache at a specified index for good parity.
4068  */
4069 static void
4070 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4071 {
4072 	uint64_t parmask, pn_inst_parity;
4073 	int ic_set_size;
4074 	int ic_linesize;
4075 	int flt_index, way, instr, num_instr;
4076 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4077 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4078 	ch_ic_data_t tmp_icp;
4079 
4080 	if (CPU_PRIVATE(CPU)) {
4081 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4082 		    CH_ICACHE_NWAY;
4083 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4084 	} else {
4085 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4086 		ic_linesize = icache_linesize;
4087 	}
4088 
4089 	/*
4090 	 * Panther has twice as many instructions per icache line and the
4091 	 * instruction parity bit is in a different location.
4092 	 */
4093 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4094 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4095 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4096 	} else {
4097 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4098 		pn_inst_parity = 0;
4099 	}
4100 
4101 	/*
4102 	 * Index at which we expect to find the parity error.
4103 	 */
4104 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4105 
4106 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4107 		/*
4108 		 * Diagnostic reads expect address argument in ASI format.
4109 		 */
4110 		get_icache_dtag(2 * (index + way * ic_set_size),
4111 		    (uint64_t *)&tmp_icp);
4112 
4113 		/*
4114 		 * If this is the index in which we expect to find the
4115 		 * error log detailed information about each of the ways.
4116 		 * This information will be displayed later if we can't
4117 		 * determine the exact way in which the error is located.
4118 		 */
4119 		if (flt_index == index)
4120 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4121 
4122 		/*
4123 		 * Check tag for even parity.
4124 		 * Sum of 1 bits (including parity bit) should be even.
4125 		 */
4126 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4127 			/*
4128 			 * If this way is the one in which we expected
4129 			 * to find the error record the way and check the
4130 			 * snoop tag. Otherwise just record the fact we
4131 			 * found another error.
4132 			 */
4133 			if (flt_index == index) {
4134 				ch_flt->parity_data.ipe.cpl_way = way;
4135 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4136 
4137 				if (popc64(tmp_icp.ic_sntag &
4138 				    CHP_ICSNTAG_PARMASK) & 1) {
4139 					ch_flt->parity_data.ipe.cpl_tag |=
4140 					    CHP_IC_SNTAG;
4141 					ch_flt->parity_data.ipe.cpl_lcnt++;
4142 				}
4143 
4144 			}
4145 			ch_flt->parity_data.ipe.cpl_lcnt++;
4146 			continue;
4147 		}
4148 
4149 		/*
4150 		 * Check instruction data for even parity.
4151 		 * Bits participating in parity differ for PC-relative
4152 		 * versus non-PC-relative instructions.
4153 		 */
4154 		for (instr = 0; instr < num_instr; instr++) {
4155 			parmask = (tmp_icp.ic_data[instr] &
4156 			    CH_ICDATA_PRED_ISPCREL) ?
4157 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4158 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4159 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4160 				/*
4161 				 * If this way is the one in which we expected
4162 				 * to find the error record the way and offset.
4163 				 * Otherwise just log the fact we found another
4164 				 * error.
4165 				 */
4166 				if (flt_index == index) {
4167 					ch_flt->parity_data.ipe.cpl_way = way;
4168 					ch_flt->parity_data.ipe.cpl_off =
4169 					    instr * 4;
4170 				}
4171 				ch_flt->parity_data.ipe.cpl_lcnt++;
4172 				continue;
4173 			}
4174 		}
4175 	}
4176 }
4177 
4178 /*
4179  * Record information related to the source of an Pcache Parity Error.
4180  */
4181 static void
4182 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4183 {
4184 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4185 	int index;
4186 
4187 	/*
4188 	 * Since instruction decode cannot be done at high PIL just
4189 	 * examine the entire Pcache to check for any parity errors.
4190 	 */
4191 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4192 		ch_flt->parity_data.dpe.cpl_way = -1;
4193 		ch_flt->parity_data.dpe.cpl_off = -1;
4194 	}
4195 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4196 		cpu_pcache_parity_check(ch_flt, index);
4197 }
4198 
4199 /*
4200  * Check all ways of the Pcache at a specified index for good parity.
4201  */
4202 static void
4203 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4204 {
4205 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4206 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4207 	int way, word, pbit, parity_bits;
4208 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4209 	ch_pc_data_t tmp_pcp;
4210 
4211 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4212 		/*
4213 		 * Perform diagnostic read.
4214 		 */
4215 		get_pcache_dtag(index + way * pc_set_size,
4216 		    (uint64_t *)&tmp_pcp);
4217 		/*
4218 		 * Check data array for odd parity. There are 8 parity
4219 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4220 		 * of those bits covers exactly 8 bytes of the data
4221 		 * array:
4222 		 *
4223 		 *	parity bit	P$ data bytes covered
4224 		 *	----------	---------------------
4225 		 *	50		63:56
4226 		 *	51		55:48
4227 		 *	52		47:40
4228 		 *	53		39:32
4229 		 *	54		31:24
4230 		 *	55		23:16
4231 		 *	56		15:8
4232 		 *	57		7:0
4233 		 */
4234 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4235 		for (word = 0; word < pc_data_words; word++) {
4236 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4237 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4238 				/*
4239 				 * If this is the first error log detailed
4240 				 * information about it. Otherwise just record
4241 				 * the fact that we found another error.
4242 				 */
4243 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4244 					ch_flt->parity_data.dpe.cpl_way = way;
4245 					ch_flt->parity_data.dpe.cpl_cache =
4246 					    CPU_PC_PARITY;
4247 					ch_flt->parity_data.dpe.cpl_off =
4248 					    word * sizeof (uint64_t);
4249 					bcopy(&tmp_pcp, pcp,
4250 					    sizeof (ch_pc_data_t));
4251 				}
4252 				ch_flt->parity_data.dpe.cpl_lcnt++;
4253 			}
4254 		}
4255 	}
4256 }
4257 
4258 
4259 /*
4260  * Add L1 Data cache data to the ereport payload.
4261  */
4262 static void
4263 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4264 {
4265 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4266 	ch_dc_data_t *dcp;
4267 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4268 	uint_t nelem;
4269 	int i, ways_to_check, ways_logged = 0;
4270 
4271 	/*
4272 	 * If this is an D$ fault then there may be multiple
4273 	 * ways captured in the ch_parity_log_t structure.
4274 	 * Otherwise, there will be at most one way captured
4275 	 * in the ch_diag_data_t struct.
4276 	 * Check each way to see if it should be encoded.
4277 	 */
4278 	if (ch_flt->flt_type == CPU_DC_PARITY)
4279 		ways_to_check = CH_DCACHE_NWAY;
4280 	else
4281 		ways_to_check = 1;
4282 	for (i = 0; i < ways_to_check; i++) {
4283 		if (ch_flt->flt_type == CPU_DC_PARITY)
4284 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4285 		else
4286 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4287 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4288 			bcopy(dcp, &dcdata[ways_logged],
4289 			    sizeof (ch_dc_data_t));
4290 			ways_logged++;
4291 		}
4292 	}
4293 
4294 	/*
4295 	 * Add the dcache data to the payload.
4296 	 */
4297 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4298 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4299 	if (ways_logged != 0) {
4300 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4301 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4302 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4303 	}
4304 }
4305 
4306 /*
4307  * Add L1 Instruction cache data to the ereport payload.
4308  */
4309 static void
4310 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4311 {
4312 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4313 	ch_ic_data_t *icp;
4314 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4315 	uint_t nelem;
4316 	int i, ways_to_check, ways_logged = 0;
4317 
4318 	/*
4319 	 * If this is an I$ fault then there may be multiple
4320 	 * ways captured in the ch_parity_log_t structure.
4321 	 * Otherwise, there will be at most one way captured
4322 	 * in the ch_diag_data_t struct.
4323 	 * Check each way to see if it should be encoded.
4324 	 */
4325 	if (ch_flt->flt_type == CPU_IC_PARITY)
4326 		ways_to_check = CH_ICACHE_NWAY;
4327 	else
4328 		ways_to_check = 1;
4329 	for (i = 0; i < ways_to_check; i++) {
4330 		if (ch_flt->flt_type == CPU_IC_PARITY)
4331 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4332 		else
4333 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4334 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4335 			bcopy(icp, &icdata[ways_logged],
4336 			    sizeof (ch_ic_data_t));
4337 			ways_logged++;
4338 		}
4339 	}
4340 
4341 	/*
4342 	 * Add the icache data to the payload.
4343 	 */
4344 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4345 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4346 	if (ways_logged != 0) {
4347 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4348 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4349 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4350 	}
4351 }
4352 
4353 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4354 
4355 /*
4356  * Add ecache data to payload.
4357  */
4358 static void
4359 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4360 {
4361 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4362 	ch_ec_data_t *ecp;
4363 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4364 	uint_t nelem;
4365 	int i, ways_logged = 0;
4366 
4367 	/*
4368 	 * Check each way to see if it should be encoded
4369 	 * and concatinate it into a temporary buffer.
4370 	 */
4371 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4372 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4373 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4374 			bcopy(ecp, &ecdata[ways_logged],
4375 			    sizeof (ch_ec_data_t));
4376 			ways_logged++;
4377 		}
4378 	}
4379 
4380 	/*
4381 	 * Panther CPUs have an additional level of cache and so
4382 	 * what we just collected was the L3 (ecache) and not the
4383 	 * L2 cache.
4384 	 */
4385 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4386 		/*
4387 		 * Add the L3 (ecache) data to the payload.
4388 		 */
4389 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4390 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4391 		if (ways_logged != 0) {
4392 			nelem = sizeof (ch_ec_data_t) /
4393 			    sizeof (uint64_t) * ways_logged;
4394 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4395 			    DATA_TYPE_UINT64_ARRAY, nelem,
4396 			    (uint64_t *)ecdata, NULL);
4397 		}
4398 
4399 		/*
4400 		 * Now collect the L2 cache.
4401 		 */
4402 		ways_logged = 0;
4403 		for (i = 0; i < PN_L2_NWAYS; i++) {
4404 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4405 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4406 				bcopy(ecp, &ecdata[ways_logged],
4407 				    sizeof (ch_ec_data_t));
4408 				ways_logged++;
4409 			}
4410 		}
4411 	}
4412 
4413 	/*
4414 	 * Add the L2 cache data to the payload.
4415 	 */
4416 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4417 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4418 	if (ways_logged != 0) {
4419 		nelem = sizeof (ch_ec_data_t) /
4420 		    sizeof (uint64_t) * ways_logged;
4421 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4422 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4423 	}
4424 }
4425 
4426 /*
4427  * Initialize cpu scheme for specified cpu.
4428  */
4429 static void
4430 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4431 {
4432 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4433 	uint8_t mask;
4434 
4435 	mask = cpunodes[cpuid].version;
4436 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4437 	    (u_longlong_t)cpunodes[cpuid].device_id);
4438 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4439 	    cpuid, &mask, (const char *)sbuf);
4440 }
4441 
4442 /*
4443  * Returns ereport resource type.
4444  */
4445 static int
4446 cpu_error_to_resource_type(struct async_flt *aflt)
4447 {
4448 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4449 
4450 	switch (ch_flt->flt_type) {
4451 
4452 	case CPU_CE_ECACHE:
4453 	case CPU_UE_ECACHE:
4454 	case CPU_UE_ECACHE_RETIRE:
4455 	case CPU_ORPH:
4456 		/*
4457 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4458 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4459 		 * E$ Data type, otherwise, return CPU type.
4460 		 */
4461 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4462 		    ch_flt->flt_bit))
4463 			return (ERRTYPE_ECACHE_DATA);
4464 		return (ERRTYPE_CPU);
4465 
4466 	case CPU_CE:
4467 	case CPU_UE:
4468 	case CPU_EMC:
4469 	case CPU_DUE:
4470 	case CPU_RCE:
4471 	case CPU_RUE:
4472 	case CPU_FRC:
4473 	case CPU_FRU:
4474 		return (ERRTYPE_MEMORY);
4475 
4476 	case CPU_IC_PARITY:
4477 	case CPU_DC_PARITY:
4478 	case CPU_FPUERR:
4479 	case CPU_PC_PARITY:
4480 	case CPU_ITLB_PARITY:
4481 	case CPU_DTLB_PARITY:
4482 		return (ERRTYPE_CPU);
4483 	}
4484 	return (ERRTYPE_UNKNOWN);
4485 }
4486 
4487 /*
4488  * Encode the data saved in the ch_async_flt_t struct into
4489  * the FM ereport payload.
4490  */
4491 static void
4492 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4493 	nvlist_t *resource, int *afar_status, int *synd_status)
4494 {
4495 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4496 	*synd_status = AFLT_STAT_INVALID;
4497 	*afar_status = AFLT_STAT_INVALID;
4498 
4499 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4500 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4501 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4502 	}
4503 
4504 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4505 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4506 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4507 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4508 	}
4509 
4510 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4511 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4512 		    ch_flt->flt_bit);
4513 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4514 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4515 	}
4516 
4517 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4518 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4519 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4520 	}
4521 
4522 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4523 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4524 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4525 	}
4526 
4527 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4528 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4529 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4530 	}
4531 
4532 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4533 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4534 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4535 	}
4536 
4537 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4538 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4539 		    DATA_TYPE_BOOLEAN_VALUE,
4540 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4541 	}
4542 
4543 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4544 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4545 		    DATA_TYPE_BOOLEAN_VALUE,
4546 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4547 	}
4548 
4549 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4550 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4551 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4552 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4553 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4554 	}
4555 
4556 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4557 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4558 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4559 	}
4560 
4561 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4562 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4563 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4564 	}
4565 
4566 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4567 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4568 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4569 	}
4570 
4571 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4572 		cpu_payload_add_ecache(aflt, payload);
4573 
4574 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4575 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4576 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4577 	}
4578 
4579 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4580 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4581 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4582 	}
4583 
4584 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4585 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4586 		    DATA_TYPE_UINT32_ARRAY, 16,
4587 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4588 	}
4589 
4590 #if defined(CPU_IMP_L1_CACHE_PARITY)
4591 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4592 		cpu_payload_add_dcache(aflt, payload);
4593 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4594 		cpu_payload_add_icache(aflt, payload);
4595 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4596 
4597 #if defined(CHEETAH_PLUS)
4598 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4599 		cpu_payload_add_pcache(aflt, payload);
4600 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4601 		cpu_payload_add_tlb(aflt, payload);
4602 #endif	/* CHEETAH_PLUS */
4603 	/*
4604 	 * Create the FMRI that goes into the payload
4605 	 * and contains the unum info if necessary.
4606 	 */
4607 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4608 		char unum[UNUM_NAMLEN] = "";
4609 		char sid[DIMM_SERIAL_ID_LEN] = "";
4610 		int len, ret, rtype, synd_code;
4611 		uint64_t offset = (uint64_t)-1;
4612 
4613 		rtype = cpu_error_to_resource_type(aflt);
4614 		switch (rtype) {
4615 
4616 		case ERRTYPE_MEMORY:
4617 		case ERRTYPE_ECACHE_DATA:
4618 
4619 			/*
4620 			 * Memory errors, do unum lookup
4621 			 */
4622 			if (*afar_status == AFLT_STAT_INVALID)
4623 				break;
4624 
4625 			if (rtype == ERRTYPE_ECACHE_DATA)
4626 				aflt->flt_status |= ECC_ECACHE;
4627 			else
4628 				aflt->flt_status &= ~ECC_ECACHE;
4629 
4630 			synd_code = synd_to_synd_code(*synd_status,
4631 			    aflt->flt_synd, ch_flt->flt_bit);
4632 
4633 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4634 				break;
4635 
4636 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4637 			    &len);
4638 
4639 			if (ret == 0) {
4640 				(void) cpu_get_mem_offset(aflt->flt_addr,
4641 				    &offset);
4642 			}
4643 
4644 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4645 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4646 			fm_payload_set(payload,
4647 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4648 			    DATA_TYPE_NVLIST, resource, NULL);
4649 			break;
4650 
4651 		case ERRTYPE_CPU:
4652 			/*
4653 			 * On-board processor array error, add cpu resource.
4654 			 */
4655 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4656 			fm_payload_set(payload,
4657 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4658 			    DATA_TYPE_NVLIST, resource, NULL);
4659 			break;
4660 		}
4661 	}
4662 }
4663 
4664 /*
4665  * Initialize the way info if necessary.
4666  */
4667 void
4668 cpu_ereport_init(struct async_flt *aflt)
4669 {
4670 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4671 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4672 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4673 	int i;
4674 
4675 	/*
4676 	 * Initialize the info in the CPU logout structure.
4677 	 * The I$/D$ way information is not initialized here
4678 	 * since it is captured in the logout assembly code.
4679 	 */
4680 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4681 		(ecp + i)->ec_way = i;
4682 
4683 	for (i = 0; i < PN_L2_NWAYS; i++)
4684 		(l2p + i)->ec_way = i;
4685 }
4686 
4687 /*
4688  * Returns whether fault address is valid for this error bit and
4689  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4690  */
4691 int
4692 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4693 {
4694 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4695 
4696 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4697 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4698 	    AFLT_STAT_VALID &&
4699 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4700 }
4701 
4702 /*
4703  * Returns whether fault address is valid based on the error bit for the
4704  * one event being queued and whether the address is "in memory".
4705  */
4706 static int
4707 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4708 {
4709 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4710 	int afar_status;
4711 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4712 
4713 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4714 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4715 		return (0);
4716 
4717 	afsr_errs = ch_flt->afsr_errs;
4718 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4719 
4720 	switch (afar_status) {
4721 	case AFLT_STAT_VALID:
4722 		return (1);
4723 
4724 	case AFLT_STAT_AMBIGUOUS:
4725 		/*
4726 		 * Status is ambiguous since another error bit (or bits)
4727 		 * of equal priority to the specified bit on in the afsr,
4728 		 * so check those bits. Return 1 only if the bits on in the
4729 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4730 		 * Otherwise not all the equal priority bits are for memory
4731 		 * errors, so return 0.
4732 		 */
4733 		ow_bits = afar_overwrite;
4734 		while ((afsr_ow = *ow_bits++) != 0) {
4735 			/*
4736 			 * Get other bits that are on in t_afsr_bit's priority
4737 			 * class to check for Memory Error bits only.
4738 			 */
4739 			if (afsr_ow & t_afsr_bit) {
4740 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4741 					return (0);
4742 				else
4743 					return (1);
4744 			}
4745 		}
4746 		/*FALLTHRU*/
4747 
4748 	default:
4749 		return (0);
4750 	}
4751 }
4752 
4753 static void
4754 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4755 {
4756 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4757 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4758 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4759 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4760 #if defined(CPU_IMP_ECACHE_ASSOC)
4761 	int i, nway;
4762 #endif /* CPU_IMP_ECACHE_ASSOC */
4763 
4764 	/*
4765 	 * Check if the CPU log out captured was valid.
4766 	 */
4767 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4768 	    ch_flt->flt_data_incomplete)
4769 		return;
4770 
4771 #if defined(CPU_IMP_ECACHE_ASSOC)
4772 	nway = cpu_ecache_nway();
4773 	i =  cpu_ecache_line_valid(ch_flt);
4774 	if (i == 0 || i > nway) {
4775 		for (i = 0; i < nway; i++)
4776 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4777 	} else
4778 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4779 #else /* CPU_IMP_ECACHE_ASSOC */
4780 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4781 #endif /* CPU_IMP_ECACHE_ASSOC */
4782 
4783 #if defined(CHEETAH_PLUS)
4784 	pn_cpu_log_diag_l2_info(ch_flt);
4785 #endif /* CHEETAH_PLUS */
4786 
4787 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4788 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4789 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4790 	}
4791 
4792 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4793 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4794 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4795 		else
4796 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4797 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4798 	}
4799 }
4800 
4801 /*
4802  * Cheetah ECC calculation.
4803  *
4804  * We only need to do the calculation on the data bits and can ignore check
4805  * bit and Mtag bit terms in the calculation.
4806  */
4807 static uint64_t ch_ecc_table[9][2] = {
4808 	/*
4809 	 * low order 64-bits   high-order 64-bits
4810 	 */
4811 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4812 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4813 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4814 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4815 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4816 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4817 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4818 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4819 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4820 };
4821 
4822 /*
4823  * 64-bit population count, use well-known popcnt trick.
4824  * We could use the UltraSPARC V9 POPC instruction, but some
4825  * CPUs including Cheetahplus and Jaguar do not support that
4826  * instruction.
4827  */
4828 int
4829 popc64(uint64_t val)
4830 {
4831 	int cnt;
4832 
4833 	for (cnt = 0; val != 0; val &= val - 1)
4834 		cnt++;
4835 	return (cnt);
4836 }
4837 
4838 /*
4839  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4840  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4841  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4842  * instead of doing all the xor's.
4843  */
4844 uint32_t
4845 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4846 {
4847 	int bitno, s;
4848 	int synd = 0;
4849 
4850 	for (bitno = 0; bitno < 9; bitno++) {
4851 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4852 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4853 		synd |= (s << bitno);
4854 	}
4855 	return (synd);
4856 
4857 }
4858 
4859 /*
4860  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4861  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4862  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4863  */
4864 static void
4865 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4866     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4867 {
4868 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4869 
4870 	if (reason &&
4871 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4872 		(void) strcat(reason, eccp->ec_reason);
4873 	}
4874 
4875 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4876 	ch_flt->flt_type = eccp->ec_flt_type;
4877 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4878 		ch_flt->flt_diag_data = *cdp;
4879 	else
4880 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4881 	aflt->flt_in_memory =
4882 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4883 
4884 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4885 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4886 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4887 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4888 	else
4889 		aflt->flt_synd = 0;
4890 
4891 	aflt->flt_payload = eccp->ec_err_payload;
4892 
4893 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4894 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4895 		cpu_errorq_dispatch(eccp->ec_err_class,
4896 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4897 		    aflt->flt_panic);
4898 	else
4899 		cpu_errorq_dispatch(eccp->ec_err_class,
4900 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4901 		    aflt->flt_panic);
4902 }
4903 
4904 /*
4905  * Queue events on async event queue one event per error bit.  First we
4906  * queue the events that we "expect" for the given trap, then we queue events
4907  * that we may not expect.  Return number of events queued.
4908  */
4909 int
4910 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4911     ch_cpu_logout_t *clop)
4912 {
4913 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4914 	ecc_type_to_info_t *eccp;
4915 	int nevents = 0;
4916 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4917 #if defined(CHEETAH_PLUS)
4918 	uint64_t orig_t_afsr_errs;
4919 #endif
4920 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4921 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4922 	ch_diag_data_t *cdp = NULL;
4923 
4924 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4925 
4926 #if defined(CHEETAH_PLUS)
4927 	orig_t_afsr_errs = t_afsr_errs;
4928 
4929 	/*
4930 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4931 	 */
4932 	if (clop != NULL) {
4933 		/*
4934 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4935 		 * flt_addr and flt_stat fields will be reset to the primaries
4936 		 * below, but the sdw_addr and sdw_stat will stay as the
4937 		 * secondaries.
4938 		 */
4939 		cdp = &clop->clo_sdw_data;
4940 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4941 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4942 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4943 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4944 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4945 
4946 		/*
4947 		 * If the primary and shadow AFSR differ, tag the shadow as
4948 		 * the first fault.
4949 		 */
4950 		if ((primary_afar != cdp->chd_afar) ||
4951 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4952 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4953 		}
4954 
4955 		/*
4956 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4957 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4958 		 * is expected to be zero for those CPUs which do not have
4959 		 * an AFSR_EXT register.
4960 		 */
4961 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4962 			if ((eccp->ec_afsr_bit &
4963 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4964 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4965 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4966 				cdp = NULL;
4967 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4968 				nevents++;
4969 			}
4970 		}
4971 
4972 		/*
4973 		 * If the ME bit is on in the primary AFSR turn all the
4974 		 * error bits on again that may set the ME bit to make
4975 		 * sure we see the ME AFSR error logs.
4976 		 */
4977 		if ((primary_afsr & C_AFSR_ME) != 0)
4978 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4979 	}
4980 #endif	/* CHEETAH_PLUS */
4981 
4982 	if (clop != NULL)
4983 		cdp = &clop->clo_data;
4984 
4985 	/*
4986 	 * Queue expected errors, error bit and fault type must match
4987 	 * in the ecc_type_to_info table.
4988 	 */
4989 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4990 	    eccp++) {
4991 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4992 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4993 #if defined(SERRANO)
4994 			/*
4995 			 * For FRC/FRU errors on Serrano the afar2 captures
4996 			 * the address and the associated data is
4997 			 * in the shadow logout area.
4998 			 */
4999 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5000 				if (clop != NULL)
5001 					cdp = &clop->clo_sdw_data;
5002 				aflt->flt_addr = ch_flt->afar2;
5003 			} else {
5004 				if (clop != NULL)
5005 					cdp = &clop->clo_data;
5006 				aflt->flt_addr = primary_afar;
5007 			}
5008 #else	/* SERRANO */
5009 			aflt->flt_addr = primary_afar;
5010 #endif	/* SERRANO */
5011 			aflt->flt_stat = primary_afsr;
5012 			ch_flt->afsr_ext = primary_afsr_ext;
5013 			ch_flt->afsr_errs = primary_afsr_errs;
5014 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5015 			cdp = NULL;
5016 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5017 			nevents++;
5018 		}
5019 	}
5020 
5021 	/*
5022 	 * Queue unexpected errors, error bit only match.
5023 	 */
5024 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5025 	    eccp++) {
5026 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5027 #if defined(SERRANO)
5028 			/*
5029 			 * For FRC/FRU errors on Serrano the afar2 captures
5030 			 * the address and the associated data is
5031 			 * in the shadow logout area.
5032 			 */
5033 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5034 				if (clop != NULL)
5035 					cdp = &clop->clo_sdw_data;
5036 				aflt->flt_addr = ch_flt->afar2;
5037 			} else {
5038 				if (clop != NULL)
5039 					cdp = &clop->clo_data;
5040 				aflt->flt_addr = primary_afar;
5041 			}
5042 #else	/* SERRANO */
5043 			aflt->flt_addr = primary_afar;
5044 #endif	/* SERRANO */
5045 			aflt->flt_stat = primary_afsr;
5046 			ch_flt->afsr_ext = primary_afsr_ext;
5047 			ch_flt->afsr_errs = primary_afsr_errs;
5048 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5049 			cdp = NULL;
5050 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5051 			nevents++;
5052 		}
5053 	}
5054 	return (nevents);
5055 }
5056 
5057 /*
5058  * Return trap type number.
5059  */
5060 uint8_t
5061 flt_to_trap_type(struct async_flt *aflt)
5062 {
5063 	if (aflt->flt_status & ECC_I_TRAP)
5064 		return (TRAP_TYPE_ECC_I);
5065 	if (aflt->flt_status & ECC_D_TRAP)
5066 		return (TRAP_TYPE_ECC_D);
5067 	if (aflt->flt_status & ECC_F_TRAP)
5068 		return (TRAP_TYPE_ECC_F);
5069 	if (aflt->flt_status & ECC_C_TRAP)
5070 		return (TRAP_TYPE_ECC_C);
5071 	if (aflt->flt_status & ECC_DP_TRAP)
5072 		return (TRAP_TYPE_ECC_DP);
5073 	if (aflt->flt_status & ECC_IP_TRAP)
5074 		return (TRAP_TYPE_ECC_IP);
5075 	if (aflt->flt_status & ECC_ITLB_TRAP)
5076 		return (TRAP_TYPE_ECC_ITLB);
5077 	if (aflt->flt_status & ECC_DTLB_TRAP)
5078 		return (TRAP_TYPE_ECC_DTLB);
5079 	return (TRAP_TYPE_UNKNOWN);
5080 }
5081 
5082 /*
5083  * Decide an error type based on detector and leaky/partner tests.
5084  * The following array is used for quick translation - it must
5085  * stay in sync with ce_dispact_t.
5086  */
5087 
5088 static char *cetypes[] = {
5089 	CE_DISP_DESC_U,
5090 	CE_DISP_DESC_I,
5091 	CE_DISP_DESC_PP,
5092 	CE_DISP_DESC_P,
5093 	CE_DISP_DESC_L,
5094 	CE_DISP_DESC_PS,
5095 	CE_DISP_DESC_S
5096 };
5097 
5098 char *
5099 flt_to_error_type(struct async_flt *aflt)
5100 {
5101 	ce_dispact_t dispact, disp;
5102 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5103 
5104 	/*
5105 	 * The memory payload bundle is shared by some events that do
5106 	 * not perform any classification.  For those flt_disp will be
5107 	 * 0 and we will return "unknown".
5108 	 */
5109 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5110 		return (cetypes[CE_DISP_UNKNOWN]);
5111 
5112 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5113 
5114 	/*
5115 	 * It is also possible that no scrub/classification was performed
5116 	 * by the detector, for instance where a disrupting error logged
5117 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5118 	 */
5119 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5120 		return (cetypes[CE_DISP_UNKNOWN]);
5121 
5122 	/*
5123 	 * Lookup type in initial classification/action table
5124 	 */
5125 	dispact = CE_DISPACT(ce_disp_table,
5126 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5127 	    CE_XDIAG_STATE(dtcrinfo),
5128 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5129 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5130 
5131 	/*
5132 	 * A bad lookup is not something to panic production systems for.
5133 	 */
5134 	ASSERT(dispact != CE_DISP_BAD);
5135 	if (dispact == CE_DISP_BAD)
5136 		return (cetypes[CE_DISP_UNKNOWN]);
5137 
5138 	disp = CE_DISP(dispact);
5139 
5140 	switch (disp) {
5141 	case CE_DISP_UNKNOWN:
5142 	case CE_DISP_INTERMITTENT:
5143 		break;
5144 
5145 	case CE_DISP_POSS_PERS:
5146 		/*
5147 		 * "Possible persistent" errors to which we have applied a valid
5148 		 * leaky test can be separated into "persistent" or "leaky".
5149 		 */
5150 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5151 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5152 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5153 			    CE_XDIAG_CE2SEEN(lkyinfo))
5154 				disp = CE_DISP_LEAKY;
5155 			else
5156 				disp = CE_DISP_PERS;
5157 		}
5158 		break;
5159 
5160 	case CE_DISP_POSS_STICKY:
5161 		/*
5162 		 * Promote "possible sticky" results that have been
5163 		 * confirmed by a partner test to "sticky".  Unconfirmed
5164 		 * "possible sticky" events are left at that status - we do not
5165 		 * guess at any bad reader/writer etc status here.
5166 		 */
5167 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5168 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5169 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5170 			disp = CE_DISP_STICKY;
5171 
5172 		/*
5173 		 * Promote "possible sticky" results on a uniprocessor
5174 		 * to "sticky"
5175 		 */
5176 		if (disp == CE_DISP_POSS_STICKY &&
5177 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5178 			disp = CE_DISP_STICKY;
5179 		break;
5180 
5181 	default:
5182 		disp = CE_DISP_UNKNOWN;
5183 		break;
5184 	}
5185 
5186 	return (cetypes[disp]);
5187 }
5188 
5189 /*
5190  * Given the entire afsr, the specific bit to check and a prioritized list of
5191  * error bits, determine the validity of the various overwrite priority
5192  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5193  * different overwrite priorities.
5194  *
5195  * Given a specific afsr error bit and the entire afsr, there are three cases:
5196  *   INVALID:	The specified bit is lower overwrite priority than some other
5197  *		error bit which is on in the afsr (or IVU/IVC).
5198  *   VALID:	The specified bit is higher priority than all other error bits
5199  *		which are on in the afsr.
5200  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5201  *		bit is on in the afsr.
5202  */
5203 int
5204 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5205 {
5206 	uint64_t afsr_ow;
5207 
5208 	while ((afsr_ow = *ow_bits++) != 0) {
5209 		/*
5210 		 * If bit is in the priority class, check to see if another
5211 		 * bit in the same class is on => ambiguous.  Otherwise,
5212 		 * the value is valid.  If the bit is not on at this priority
5213 		 * class, but a higher priority bit is on, then the value is
5214 		 * invalid.
5215 		 */
5216 		if (afsr_ow & afsr_bit) {
5217 			/*
5218 			 * If equal pri bit is on, ambiguous.
5219 			 */
5220 			if (afsr & (afsr_ow & ~afsr_bit))
5221 				return (AFLT_STAT_AMBIGUOUS);
5222 			return (AFLT_STAT_VALID);
5223 		} else if (afsr & afsr_ow)
5224 			break;
5225 	}
5226 
5227 	/*
5228 	 * We didn't find a match or a higher priority bit was on.  Not
5229 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5230 	 */
5231 	return (AFLT_STAT_INVALID);
5232 }
5233 
5234 static int
5235 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5236 {
5237 #if defined(SERRANO)
5238 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5239 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5240 	else
5241 #endif	/* SERRANO */
5242 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5243 }
5244 
5245 static int
5246 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5247 {
5248 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5249 }
5250 
5251 static int
5252 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5253 {
5254 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5255 }
5256 
5257 static int
5258 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5259 {
5260 #ifdef lint
5261 	cpuid = cpuid;
5262 #endif
5263 #if defined(CHEETAH_PLUS)
5264 	/*
5265 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5266 	 * policy for Cheetah+ and separate for Panther CPUs.
5267 	 */
5268 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5269 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5270 			return (afsr_to_msynd_status(afsr, afsr_bit));
5271 		else
5272 			return (afsr_to_esynd_status(afsr, afsr_bit));
5273 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5274 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5275 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5276 		else
5277 			return (afsr_to_esynd_status(afsr, afsr_bit));
5278 #else /* CHEETAH_PLUS */
5279 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5280 		return (afsr_to_msynd_status(afsr, afsr_bit));
5281 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5282 		return (afsr_to_esynd_status(afsr, afsr_bit));
5283 #endif /* CHEETAH_PLUS */
5284 	} else {
5285 		return (AFLT_STAT_INVALID);
5286 	}
5287 }
5288 
5289 /*
5290  * Slave CPU stick synchronization.
5291  */
5292 void
5293 sticksync_slave(void)
5294 {
5295 	int 		i;
5296 	int		tries = 0;
5297 	int64_t		tskew;
5298 	int64_t		av_tskew;
5299 
5300 	kpreempt_disable();
5301 	/* wait for the master side */
5302 	while (stick_sync_cmd != SLAVE_START)
5303 		;
5304 	/*
5305 	 * Synchronization should only take a few tries at most. But in the
5306 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5307 	 * without it's stick synchronized wouldn't be a good citizen.
5308 	 */
5309 	while (slave_done == 0) {
5310 		/*
5311 		 * Time skew calculation.
5312 		 */
5313 		av_tskew = tskew = 0;
5314 
5315 		for (i = 0; i < stick_iter; i++) {
5316 			/* make location hot */
5317 			timestamp[EV_A_START] = 0;
5318 			stick_timestamp(&timestamp[EV_A_START]);
5319 
5320 			/* tell the master we're ready */
5321 			stick_sync_cmd = MASTER_START;
5322 
5323 			/* and wait */
5324 			while (stick_sync_cmd != SLAVE_CONT)
5325 				;
5326 			/* Event B end */
5327 			stick_timestamp(&timestamp[EV_B_END]);
5328 
5329 			/* calculate time skew */
5330 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5331 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5332 			    / 2;
5333 
5334 			/* keep running count */
5335 			av_tskew += tskew;
5336 		} /* for */
5337 
5338 		/*
5339 		 * Adjust stick for time skew if not within the max allowed;
5340 		 * otherwise we're all done.
5341 		 */
5342 		if (stick_iter != 0)
5343 			av_tskew = av_tskew/stick_iter;
5344 		if (ABS(av_tskew) > stick_tsk) {
5345 			/*
5346 			 * If the skew is 1 (the slave's STICK register
5347 			 * is 1 STICK ahead of the master's), stick_adj
5348 			 * could fail to adjust the slave's STICK register
5349 			 * if the STICK read on the slave happens to
5350 			 * align with the increment of the STICK.
5351 			 * Therefore, we increment the skew to 2.
5352 			 */
5353 			if (av_tskew == 1)
5354 				av_tskew++;
5355 			stick_adj(-av_tskew);
5356 		} else
5357 			slave_done = 1;
5358 #ifdef DEBUG
5359 		if (tries < DSYNC_ATTEMPTS)
5360 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5361 			    av_tskew;
5362 		++tries;
5363 #endif /* DEBUG */
5364 #ifdef lint
5365 		tries = tries;
5366 #endif
5367 
5368 	} /* while */
5369 
5370 	/* allow the master to finish */
5371 	stick_sync_cmd = EVENT_NULL;
5372 	kpreempt_enable();
5373 }
5374 
5375 /*
5376  * Master CPU side of stick synchronization.
5377  *  - timestamp end of Event A
5378  *  - timestamp beginning of Event B
5379  */
5380 void
5381 sticksync_master(void)
5382 {
5383 	int		i;
5384 
5385 	kpreempt_disable();
5386 	/* tell the slave we've started */
5387 	slave_done = 0;
5388 	stick_sync_cmd = SLAVE_START;
5389 
5390 	while (slave_done == 0) {
5391 		for (i = 0; i < stick_iter; i++) {
5392 			/* wait for the slave */
5393 			while (stick_sync_cmd != MASTER_START)
5394 				;
5395 			/* Event A end */
5396 			stick_timestamp(&timestamp[EV_A_END]);
5397 
5398 			/* make location hot */
5399 			timestamp[EV_B_START] = 0;
5400 			stick_timestamp(&timestamp[EV_B_START]);
5401 
5402 			/* tell the slave to continue */
5403 			stick_sync_cmd = SLAVE_CONT;
5404 		} /* for */
5405 
5406 		/* wait while slave calculates time skew */
5407 		while (stick_sync_cmd == SLAVE_CONT)
5408 			;
5409 	} /* while */
5410 	kpreempt_enable();
5411 }
5412 
5413 /*
5414  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5415  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5416  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5417  * panic idle.
5418  */
5419 /*ARGSUSED*/
5420 void
5421 cpu_check_allcpus(struct async_flt *aflt)
5422 {}
5423 
5424 struct kmem_cache *ch_private_cache;
5425 
5426 /*
5427  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5428  * deallocate the scrubber data structures and cpu_private data structure.
5429  */
5430 void
5431 cpu_uninit_private(struct cpu *cp)
5432 {
5433 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5434 
5435 	ASSERT(chprp);
5436 	cpu_uninit_ecache_scrub_dr(cp);
5437 	CPU_PRIVATE(cp) = NULL;
5438 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5439 	kmem_cache_free(ch_private_cache, chprp);
5440 	cmp_delete_cpu(cp->cpu_id);
5441 
5442 }
5443 
5444 /*
5445  * Cheetah Cache Scrubbing
5446  *
5447  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5448  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5449  * protected by either parity or ECC.
5450  *
5451  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5452  * cache per second). Due to the the specifics of how the I$ control
5453  * logic works with respect to the ASI used to scrub I$ lines, the entire
5454  * I$ is scanned at once.
5455  */
5456 
5457 /*
5458  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5459  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5460  * on a running system.
5461  */
5462 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5463 
5464 /*
5465  * The following are the PIL levels that the softints/cross traps will fire at.
5466  */
5467 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5468 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5469 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5470 
5471 #if defined(JALAPENO)
5472 
5473 /*
5474  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5475  * on Jalapeno.
5476  */
5477 int ecache_scrub_enable = 0;
5478 
5479 #else	/* JALAPENO */
5480 
5481 /*
5482  * With all other cpu types, E$ scrubbing is on by default
5483  */
5484 int ecache_scrub_enable = 1;
5485 
5486 #endif	/* JALAPENO */
5487 
5488 
5489 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5490 
5491 /*
5492  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5493  * is disabled by default on non-Cheetah systems
5494  */
5495 int icache_scrub_enable = 0;
5496 
5497 /*
5498  * Tuneables specifying the scrub calls per second and the scan rate
5499  * for each cache
5500  *
5501  * The cyclic times are set during boot based on the following values.
5502  * Changing these values in mdb after this time will have no effect.  If
5503  * a different value is desired, it must be set in /etc/system before a
5504  * reboot.
5505  */
5506 int ecache_calls_a_sec = 1;
5507 int dcache_calls_a_sec = 2;
5508 int icache_calls_a_sec = 2;
5509 
5510 int ecache_scan_rate_idle = 1;
5511 int ecache_scan_rate_busy = 1;
5512 int dcache_scan_rate_idle = 1;
5513 int dcache_scan_rate_busy = 1;
5514 int icache_scan_rate_idle = 1;
5515 int icache_scan_rate_busy = 1;
5516 
5517 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5518 
5519 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5520 
5521 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5522 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5523 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5524 
5525 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5526 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5527 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5528 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5529 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5530 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5531 
5532 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5533 
5534 /*
5535  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5536  * increment the outstanding request counter and schedule a softint to run
5537  * the scrubber.
5538  */
5539 extern xcfunc_t cache_scrubreq_tl1;
5540 
5541 /*
5542  * These are the softint functions for each cache scrubber
5543  */
5544 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5545 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5546 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5547 
5548 /*
5549  * The cache scrub info table contains cache specific information
5550  * and allows for some of the scrub code to be table driven, reducing
5551  * duplication of cache similar code.
5552  *
5553  * This table keeps a copy of the value in the calls per second variable
5554  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5555  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5556  * mdb in a misguided attempt to disable the scrubber).
5557  */
5558 struct scrub_info {
5559 	int		*csi_enable;	/* scrubber enable flag */
5560 	int		csi_freq;	/* scrubber calls per second */
5561 	int		csi_index;	/* index to chsm_outstanding[] */
5562 	uint64_t	csi_inum;	/* scrubber interrupt number */
5563 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5564 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5565 	char		csi_name[3];	/* cache name for this scrub entry */
5566 } cache_scrub_info[] = {
5567 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5568 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5569 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5570 };
5571 
5572 /*
5573  * If scrubbing is enabled, increment the outstanding request counter.  If it
5574  * is 1 (meaning there were no previous requests outstanding), call
5575  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5576  * a self trap.
5577  */
5578 static void
5579 do_scrub(struct scrub_info *csi)
5580 {
5581 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5582 	int index = csi->csi_index;
5583 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5584 
5585 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5586 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5587 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5588 			    csi->csi_inum, 0);
5589 		}
5590 	}
5591 }
5592 
5593 /*
5594  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5595  * cross-trap the offline cpus.
5596  */
5597 static void
5598 do_scrub_offline(struct scrub_info *csi)
5599 {
5600 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5601 
5602 	if (CPUSET_ISNULL(cpu_offline_set)) {
5603 		/*
5604 		 * No offline cpus - nothing to do
5605 		 */
5606 		return;
5607 	}
5608 
5609 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5610 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5611 		    csi->csi_index);
5612 	}
5613 }
5614 
5615 /*
5616  * This is the initial setup for the scrubber cyclics - it sets the
5617  * interrupt level, frequency, and function to call.
5618  */
5619 /*ARGSUSED*/
5620 static void
5621 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5622     cyc_time_t *when)
5623 {
5624 	struct scrub_info *csi = (struct scrub_info *)arg;
5625 
5626 	ASSERT(csi != NULL);
5627 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5628 	hdlr->cyh_level = CY_LOW_LEVEL;
5629 	hdlr->cyh_arg = arg;
5630 
5631 	when->cyt_when = 0;	/* Start immediately */
5632 	when->cyt_interval = NANOSEC / csi->csi_freq;
5633 }
5634 
5635 /*
5636  * Initialization for cache scrubbing.
5637  * This routine is called AFTER all cpus have had cpu_init_private called
5638  * to initialize their private data areas.
5639  */
5640 void
5641 cpu_init_cache_scrub(void)
5642 {
5643 	int i;
5644 	struct scrub_info *csi;
5645 	cyc_omni_handler_t omni_hdlr;
5646 	cyc_handler_t offline_hdlr;
5647 	cyc_time_t when;
5648 
5649 	/*
5650 	 * save away the maximum number of lines for the D$
5651 	 */
5652 	dcache_nlines = dcache_size / dcache_linesize;
5653 
5654 	/*
5655 	 * register the softints for the cache scrubbing
5656 	 */
5657 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5658 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5659 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5660 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5661 
5662 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5663 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5664 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5665 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5666 
5667 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5668 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5669 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5670 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5671 
5672 	/*
5673 	 * start the scrubbing for all the caches
5674 	 */
5675 	mutex_enter(&cpu_lock);
5676 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5677 
5678 		csi = &cache_scrub_info[i];
5679 
5680 		if (!(*csi->csi_enable))
5681 			continue;
5682 
5683 		/*
5684 		 * force the following to be true:
5685 		 *	1 <= calls_a_sec <= hz
5686 		 */
5687 		if (csi->csi_freq > hz) {
5688 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5689 			    "(%d); resetting to hz (%d)", csi->csi_name,
5690 			    csi->csi_freq, hz);
5691 			csi->csi_freq = hz;
5692 		} else if (csi->csi_freq < 1) {
5693 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5694 			    "(%d); resetting to 1", csi->csi_name,
5695 			    csi->csi_freq);
5696 			csi->csi_freq = 1;
5697 		}
5698 
5699 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5700 		omni_hdlr.cyo_offline = NULL;
5701 		omni_hdlr.cyo_arg = (void *)csi;
5702 
5703 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5704 		offline_hdlr.cyh_arg = (void *)csi;
5705 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5706 
5707 		when.cyt_when = 0;	/* Start immediately */
5708 		when.cyt_interval = NANOSEC / csi->csi_freq;
5709 
5710 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5711 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5712 	}
5713 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5714 	mutex_exit(&cpu_lock);
5715 }
5716 
5717 /*
5718  * Indicate that the specified cpu is idle.
5719  */
5720 void
5721 cpu_idle_ecache_scrub(struct cpu *cp)
5722 {
5723 	if (CPU_PRIVATE(cp) != NULL) {
5724 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5725 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5726 	}
5727 }
5728 
5729 /*
5730  * Indicate that the specified cpu is busy.
5731  */
5732 void
5733 cpu_busy_ecache_scrub(struct cpu *cp)
5734 {
5735 	if (CPU_PRIVATE(cp) != NULL) {
5736 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5737 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5738 	}
5739 }
5740 
5741 /*
5742  * Initialization for cache scrubbing for the specified cpu.
5743  */
5744 void
5745 cpu_init_ecache_scrub_dr(struct cpu *cp)
5746 {
5747 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5748 	int cpuid = cp->cpu_id;
5749 
5750 	/* initialize the number of lines in the caches */
5751 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5752 	    cpunodes[cpuid].ecache_linesize;
5753 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5754 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5755 
5756 	/*
5757 	 * do_scrub() and do_scrub_offline() check both the global
5758 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5759 	 * check this value before scrubbing.  Currently, we use it to
5760 	 * disable the E$ scrubber on multi-core cpus or while running at
5761 	 * slowed speed.  For now, just turn everything on and allow
5762 	 * cpu_init_private() to change it if necessary.
5763 	 */
5764 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5765 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5766 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5767 
5768 	cpu_busy_ecache_scrub(cp);
5769 }
5770 
5771 /*
5772  * Un-initialization for cache scrubbing for the specified cpu.
5773  */
5774 static void
5775 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5776 {
5777 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5778 
5779 	/*
5780 	 * un-initialize bookkeeping for cache scrubbing
5781 	 */
5782 	bzero(csmp, sizeof (ch_scrub_misc_t));
5783 
5784 	cpu_idle_ecache_scrub(cp);
5785 }
5786 
5787 /*
5788  * Called periodically on each CPU to scrub the D$.
5789  */
5790 static void
5791 scrub_dcache(int how_many)
5792 {
5793 	int i;
5794 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5795 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5796 
5797 	/*
5798 	 * scrub the desired number of lines
5799 	 */
5800 	for (i = 0; i < how_many; i++) {
5801 		/*
5802 		 * scrub a D$ line
5803 		 */
5804 		dcache_inval_line(index);
5805 
5806 		/*
5807 		 * calculate the next D$ line to scrub, assumes
5808 		 * that dcache_nlines is a power of 2
5809 		 */
5810 		index = (index + 1) & (dcache_nlines - 1);
5811 	}
5812 
5813 	/*
5814 	 * set the scrub index for the next visit
5815 	 */
5816 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5817 }
5818 
5819 /*
5820  * Handler for D$ scrub inum softint. Call scrub_dcache until
5821  * we decrement the outstanding request count to zero.
5822  */
5823 /*ARGSUSED*/
5824 static uint_t
5825 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5826 {
5827 	int i;
5828 	int how_many;
5829 	int outstanding;
5830 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5831 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5832 	struct scrub_info *csi = (struct scrub_info *)arg1;
5833 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5834 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5835 
5836 	/*
5837 	 * The scan rates are expressed in units of tenths of a
5838 	 * percent.  A scan rate of 1000 (100%) means the whole
5839 	 * cache is scanned every second.
5840 	 */
5841 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5842 
5843 	do {
5844 		outstanding = *countp;
5845 		for (i = 0; i < outstanding; i++) {
5846 			scrub_dcache(how_many);
5847 		}
5848 	} while (atomic_add_32_nv(countp, -outstanding));
5849 
5850 	return (DDI_INTR_CLAIMED);
5851 }
5852 
5853 /*
5854  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5855  * by invalidating lines. Due to the characteristics of the ASI which
5856  * is used to invalidate an I$ line, the entire I$ must be invalidated
5857  * vs. an individual I$ line.
5858  */
5859 static void
5860 scrub_icache(int how_many)
5861 {
5862 	int i;
5863 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5864 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5865 	int icache_nlines = csmp->chsm_icache_nlines;
5866 
5867 	/*
5868 	 * scrub the desired number of lines
5869 	 */
5870 	for (i = 0; i < how_many; i++) {
5871 		/*
5872 		 * since the entire I$ must be scrubbed at once,
5873 		 * wait until the index wraps to zero to invalidate
5874 		 * the entire I$
5875 		 */
5876 		if (index == 0) {
5877 			icache_inval_all();
5878 		}
5879 
5880 		/*
5881 		 * calculate the next I$ line to scrub, assumes
5882 		 * that chsm_icache_nlines is a power of 2
5883 		 */
5884 		index = (index + 1) & (icache_nlines - 1);
5885 	}
5886 
5887 	/*
5888 	 * set the scrub index for the next visit
5889 	 */
5890 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5891 }
5892 
5893 /*
5894  * Handler for I$ scrub inum softint. Call scrub_icache until
5895  * we decrement the outstanding request count to zero.
5896  */
5897 /*ARGSUSED*/
5898 static uint_t
5899 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5900 {
5901 	int i;
5902 	int how_many;
5903 	int outstanding;
5904 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5905 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5906 	struct scrub_info *csi = (struct scrub_info *)arg1;
5907 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5908 	    icache_scan_rate_idle : icache_scan_rate_busy;
5909 	int icache_nlines = csmp->chsm_icache_nlines;
5910 
5911 	/*
5912 	 * The scan rates are expressed in units of tenths of a
5913 	 * percent.  A scan rate of 1000 (100%) means the whole
5914 	 * cache is scanned every second.
5915 	 */
5916 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5917 
5918 	do {
5919 		outstanding = *countp;
5920 		for (i = 0; i < outstanding; i++) {
5921 			scrub_icache(how_many);
5922 		}
5923 	} while (atomic_add_32_nv(countp, -outstanding));
5924 
5925 	return (DDI_INTR_CLAIMED);
5926 }
5927 
5928 /*
5929  * Called periodically on each CPU to scrub the E$.
5930  */
5931 static void
5932 scrub_ecache(int how_many)
5933 {
5934 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5935 	int i;
5936 	int cpuid = CPU->cpu_id;
5937 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5938 	int nlines = csmp->chsm_ecache_nlines;
5939 	int linesize = cpunodes[cpuid].ecache_linesize;
5940 	int ec_set_size = cpu_ecache_set_size(CPU);
5941 
5942 	/*
5943 	 * scrub the desired number of lines
5944 	 */
5945 	for (i = 0; i < how_many; i++) {
5946 		/*
5947 		 * scrub the E$ line
5948 		 */
5949 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5950 		    ec_set_size);
5951 
5952 		/*
5953 		 * calculate the next E$ line to scrub based on twice
5954 		 * the number of E$ lines (to displace lines containing
5955 		 * flush area data), assumes that the number of lines
5956 		 * is a power of 2
5957 		 */
5958 		index = (index + 1) & ((nlines << 1) - 1);
5959 	}
5960 
5961 	/*
5962 	 * set the ecache scrub index for the next visit
5963 	 */
5964 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5965 }
5966 
5967 /*
5968  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5969  * we decrement the outstanding request count to zero.
5970  *
5971  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5972  * become negative after the atomic_add_32_nv().  This is not a problem, as
5973  * the next trip around the loop won't scrub anything, and the next add will
5974  * reset the count back to zero.
5975  */
5976 /*ARGSUSED*/
5977 static uint_t
5978 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5979 {
5980 	int i;
5981 	int how_many;
5982 	int outstanding;
5983 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5984 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5985 	struct scrub_info *csi = (struct scrub_info *)arg1;
5986 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5987 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
5988 	int ecache_nlines = csmp->chsm_ecache_nlines;
5989 
5990 	/*
5991 	 * The scan rates are expressed in units of tenths of a
5992 	 * percent.  A scan rate of 1000 (100%) means the whole
5993 	 * cache is scanned every second.
5994 	 */
5995 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5996 
5997 	do {
5998 		outstanding = *countp;
5999 		for (i = 0; i < outstanding; i++) {
6000 			scrub_ecache(how_many);
6001 		}
6002 	} while (atomic_add_32_nv(countp, -outstanding));
6003 
6004 	return (DDI_INTR_CLAIMED);
6005 }
6006 
6007 /*
6008  * Timeout function to reenable CE
6009  */
6010 static void
6011 cpu_delayed_check_ce_errors(void *arg)
6012 {
6013 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6014 	    TQ_NOSLEEP)) {
6015 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6016 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6017 	}
6018 }
6019 
6020 /*
6021  * CE Deferred Re-enable after trap.
6022  *
6023  * When the CPU gets a disrupting trap for any of the errors
6024  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6025  * immediately. To eliminate the possibility of multiple CEs causing
6026  * recursive stack overflow in the trap handler, we cannot
6027  * reenable CEEN while still running in the trap handler. Instead,
6028  * after a CE is logged on a CPU, we schedule a timeout function,
6029  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6030  * seconds. This function will check whether any further CEs
6031  * have occurred on that CPU, and if none have, will reenable CEEN.
6032  *
6033  * If further CEs have occurred while CEEN is disabled, another
6034  * timeout will be scheduled. This is to ensure that the CPU can
6035  * make progress in the face of CE 'storms', and that it does not
6036  * spend all its time logging CE errors.
6037  */
6038 static void
6039 cpu_check_ce_errors(void *arg)
6040 {
6041 	int	cpuid = (int)(uintptr_t)arg;
6042 	cpu_t	*cp;
6043 
6044 	/*
6045 	 * We acquire cpu_lock.
6046 	 */
6047 	ASSERT(curthread->t_pil == 0);
6048 
6049 	/*
6050 	 * verify that the cpu is still around, DR
6051 	 * could have got there first ...
6052 	 */
6053 	mutex_enter(&cpu_lock);
6054 	cp = cpu_get(cpuid);
6055 	if (cp == NULL) {
6056 		mutex_exit(&cpu_lock);
6057 		return;
6058 	}
6059 	/*
6060 	 * make sure we don't migrate across CPUs
6061 	 * while checking our CE status.
6062 	 */
6063 	kpreempt_disable();
6064 
6065 	/*
6066 	 * If we are running on the CPU that got the
6067 	 * CE, we can do the checks directly.
6068 	 */
6069 	if (cp->cpu_id == CPU->cpu_id) {
6070 		mutex_exit(&cpu_lock);
6071 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6072 		kpreempt_enable();
6073 		return;
6074 	}
6075 	kpreempt_enable();
6076 
6077 	/*
6078 	 * send an x-call to get the CPU that originally
6079 	 * got the CE to do the necessary checks. If we can't
6080 	 * send the x-call, reschedule the timeout, otherwise we
6081 	 * lose CEEN forever on that CPU.
6082 	 */
6083 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6084 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6085 		    TIMEOUT_CEEN_CHECK, 0);
6086 		mutex_exit(&cpu_lock);
6087 	} else {
6088 		/*
6089 		 * When the CPU is not accepting xcalls, or
6090 		 * the processor is offlined, we don't want to
6091 		 * incur the extra overhead of trying to schedule the
6092 		 * CE timeout indefinitely. However, we don't want to lose
6093 		 * CE checking forever.
6094 		 *
6095 		 * Keep rescheduling the timeout, accepting the additional
6096 		 * overhead as the cost of correctness in the case where we get
6097 		 * a CE, disable CEEN, offline the CPU during the
6098 		 * the timeout interval, and then online it at some
6099 		 * point in the future. This is unlikely given the short
6100 		 * cpu_ceen_delay_secs.
6101 		 */
6102 		mutex_exit(&cpu_lock);
6103 		(void) timeout(cpu_delayed_check_ce_errors,
6104 		    (void *)(uintptr_t)cp->cpu_id,
6105 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6106 	}
6107 }
6108 
6109 /*
6110  * This routine will check whether CEs have occurred while
6111  * CEEN is disabled. Any CEs detected will be logged and, if
6112  * possible, scrubbed.
6113  *
6114  * The memscrubber will also use this routine to clear any errors
6115  * caused by its scrubbing with CEEN disabled.
6116  *
6117  * flag == SCRUBBER_CEEN_CHECK
6118  *		called from memscrubber, just check/scrub, no reset
6119  *		paddr 	physical addr. for start of scrub pages
6120  *		vaddr 	virtual addr. for scrub area
6121  *		psz	page size of area to be scrubbed
6122  *
6123  * flag == TIMEOUT_CEEN_CHECK
6124  *		timeout function has triggered, reset timeout or CEEN
6125  *
6126  * Note: We must not migrate cpus during this function.  This can be
6127  * achieved by one of:
6128  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6129  *	The flag value must be first xcall argument.
6130  *    - disabling kernel preemption.  This should be done for very short
6131  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6132  *	scrub an extended area with cpu_check_block.  The call for
6133  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6134  *	brief for this case.
6135  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6136  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6137  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6138  */
6139 void
6140 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6141 {
6142 	ch_cpu_errors_t	cpu_error_regs;
6143 	uint64_t	ec_err_enable;
6144 	uint64_t	page_offset;
6145 
6146 	/* Read AFSR */
6147 	get_cpu_error_state(&cpu_error_regs);
6148 
6149 	/*
6150 	 * If no CEEN errors have occurred during the timeout
6151 	 * interval, it is safe to re-enable CEEN and exit.
6152 	 */
6153 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6154 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6155 		if (flag == TIMEOUT_CEEN_CHECK &&
6156 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6157 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6158 		return;
6159 	}
6160 
6161 	/*
6162 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6163 	 * we log/clear the error.
6164 	 */
6165 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6166 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6167 
6168 	/*
6169 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6170 	 * timeout will be rescheduled when the error is logged.
6171 	 */
6172 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6173 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6174 		cpu_ce_detected(&cpu_error_regs,
6175 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6176 	else
6177 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6178 
6179 	/*
6180 	 * If the memory scrubber runs while CEEN is
6181 	 * disabled, (or if CEEN is disabled during the
6182 	 * scrub as a result of a CE being triggered by
6183 	 * it), the range being scrubbed will not be
6184 	 * completely cleaned. If there are multiple CEs
6185 	 * in the range at most two of these will be dealt
6186 	 * with, (one by the trap handler and one by the
6187 	 * timeout). It is also possible that none are dealt
6188 	 * with, (CEEN disabled and another CE occurs before
6189 	 * the timeout triggers). So to ensure that the
6190 	 * memory is actually scrubbed, we have to access each
6191 	 * memory location in the range and then check whether
6192 	 * that access causes a CE.
6193 	 */
6194 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6195 		if ((cpu_error_regs.afar >= pa) &&
6196 		    (cpu_error_regs.afar < (pa + psz))) {
6197 			/*
6198 			 * Force a load from physical memory for each
6199 			 * 64-byte block, then check AFSR to determine
6200 			 * whether this access caused an error.
6201 			 *
6202 			 * This is a slow way to do a scrub, but as it will
6203 			 * only be invoked when the memory scrubber actually
6204 			 * triggered a CE, it should not happen too
6205 			 * frequently.
6206 			 *
6207 			 * cut down what we need to check as the scrubber
6208 			 * has verified up to AFAR, so get it's offset
6209 			 * into the page and start there.
6210 			 */
6211 			page_offset = (uint64_t)(cpu_error_regs.afar &
6212 			    (psz - 1));
6213 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6214 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6215 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6216 			    psz);
6217 		}
6218 	}
6219 
6220 	/*
6221 	 * Reset error enable if this CE is not masked.
6222 	 */
6223 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6224 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6225 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6226 
6227 }
6228 
6229 /*
6230  * Attempt a cpu logout for an error that we did not trap for, such
6231  * as a CE noticed with CEEN off.  It is assumed that we are still running
6232  * on the cpu that took the error and that we cannot migrate.  Returns
6233  * 0 on success, otherwise nonzero.
6234  */
6235 static int
6236 cpu_ce_delayed_ec_logout(uint64_t afar)
6237 {
6238 	ch_cpu_logout_t *clop;
6239 
6240 	if (CPU_PRIVATE(CPU) == NULL)
6241 		return (0);
6242 
6243 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6244 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6245 	    LOGOUT_INVALID)
6246 		return (0);
6247 
6248 	cpu_delayed_logout(afar, clop);
6249 	return (1);
6250 }
6251 
6252 /*
6253  * We got an error while CEEN was disabled. We
6254  * need to clean up after it and log whatever
6255  * information we have on the CE.
6256  */
6257 void
6258 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6259 {
6260 	ch_async_flt_t 	ch_flt;
6261 	struct async_flt *aflt;
6262 	char 		pr_reason[MAX_REASON_STRING];
6263 
6264 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6265 	ch_flt.flt_trapped_ce = flag;
6266 	aflt = (struct async_flt *)&ch_flt;
6267 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6268 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6269 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6270 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6271 	aflt->flt_addr = cpu_error_regs->afar;
6272 #if defined(SERRANO)
6273 	ch_flt.afar2 = cpu_error_regs->afar2;
6274 #endif	/* SERRANO */
6275 	aflt->flt_pc = NULL;
6276 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6277 	aflt->flt_tl = 0;
6278 	aflt->flt_panic = 0;
6279 	cpu_log_and_clear_ce(&ch_flt);
6280 
6281 	/*
6282 	 * check if we caused any errors during cleanup
6283 	 */
6284 	if (clear_errors(&ch_flt)) {
6285 		pr_reason[0] = '\0';
6286 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6287 		    NULL);
6288 	}
6289 }
6290 
6291 /*
6292  * Log/clear CEEN-controlled disrupting errors
6293  */
6294 static void
6295 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6296 {
6297 	struct async_flt *aflt;
6298 	uint64_t afsr, afsr_errs;
6299 	ch_cpu_logout_t *clop;
6300 	char 		pr_reason[MAX_REASON_STRING];
6301 	on_trap_data_t	*otp = curthread->t_ontrap;
6302 
6303 	aflt = (struct async_flt *)ch_flt;
6304 	afsr = aflt->flt_stat;
6305 	afsr_errs = ch_flt->afsr_errs;
6306 	aflt->flt_id = gethrtime_waitfree();
6307 	aflt->flt_bus_id = getprocessorid();
6308 	aflt->flt_inst = CPU->cpu_id;
6309 	aflt->flt_prot = AFLT_PROT_NONE;
6310 	aflt->flt_class = CPU_FAULT;
6311 	aflt->flt_status = ECC_C_TRAP;
6312 
6313 	pr_reason[0] = '\0';
6314 	/*
6315 	 * Get the CPU log out info for Disrupting Trap.
6316 	 */
6317 	if (CPU_PRIVATE(CPU) == NULL) {
6318 		clop = NULL;
6319 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6320 	} else {
6321 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6322 	}
6323 
6324 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6325 		ch_cpu_errors_t cpu_error_regs;
6326 
6327 		get_cpu_error_state(&cpu_error_regs);
6328 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6329 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6330 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6331 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6332 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6333 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6334 		clop->clo_sdw_data.chd_afsr_ext =
6335 		    cpu_error_regs.shadow_afsr_ext;
6336 #if defined(SERRANO)
6337 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6338 #endif	/* SERRANO */
6339 		ch_flt->flt_data_incomplete = 1;
6340 
6341 		/*
6342 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6343 		 * The trap handler does it for CEEN enabled errors
6344 		 * so we need to do it here.
6345 		 */
6346 		set_cpu_error_state(&cpu_error_regs);
6347 	}
6348 
6349 #if defined(JALAPENO) || defined(SERRANO)
6350 	/*
6351 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6352 	 * For Serrano, even thou we do have the AFAR, we still do the
6353 	 * scrub on the RCE side since that's where the error type can
6354 	 * be properly classified as intermittent, persistent, etc.
6355 	 *
6356 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6357 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6358 	 * the flt_status bits.
6359 	 */
6360 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6361 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6362 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6363 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6364 	}
6365 #else /* JALAPENO || SERRANO */
6366 	/*
6367 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6368 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6369 	 * the flt_status bits.
6370 	 */
6371 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6372 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6373 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6374 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6375 		}
6376 	}
6377 
6378 #endif /* JALAPENO || SERRANO */
6379 
6380 	/*
6381 	 * Update flt_prot if this error occurred under on_trap protection.
6382 	 */
6383 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6384 		aflt->flt_prot = AFLT_PROT_EC;
6385 
6386 	/*
6387 	 * Queue events on the async event queue, one event per error bit.
6388 	 */
6389 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6390 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6391 		ch_flt->flt_type = CPU_INV_AFSR;
6392 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6393 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6394 		    aflt->flt_panic);
6395 	}
6396 
6397 	/*
6398 	 * Zero out + invalidate CPU logout.
6399 	 */
6400 	if (clop) {
6401 		bzero(clop, sizeof (ch_cpu_logout_t));
6402 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6403 	}
6404 
6405 	/*
6406 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6407 	 * was disabled, we need to flush either the entire
6408 	 * E$ or an E$ line.
6409 	 */
6410 #if defined(JALAPENO) || defined(SERRANO)
6411 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6412 #else	/* JALAPENO || SERRANO */
6413 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6414 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6415 #endif	/* JALAPENO || SERRANO */
6416 		cpu_error_ecache_flush(ch_flt);
6417 
6418 }
6419 
6420 /*
6421  * depending on the error type, we determine whether we
6422  * need to flush the entire ecache or just a line.
6423  */
6424 static int
6425 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6426 {
6427 	struct async_flt *aflt;
6428 	uint64_t	afsr;
6429 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6430 
6431 	aflt = (struct async_flt *)ch_flt;
6432 	afsr = aflt->flt_stat;
6433 
6434 	/*
6435 	 * If we got multiple errors, no point in trying
6436 	 * the individual cases, just flush the whole cache
6437 	 */
6438 	if (afsr & C_AFSR_ME) {
6439 		return (ECACHE_FLUSH_ALL);
6440 	}
6441 
6442 	/*
6443 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6444 	 * was disabled, we need to flush entire E$. We can't just
6445 	 * flush the cache line affected as the ME bit
6446 	 * is not set when multiple correctable errors of the same
6447 	 * type occur, so we might have multiple CPC or EDC errors,
6448 	 * with only the first recorded.
6449 	 */
6450 #if defined(JALAPENO) || defined(SERRANO)
6451 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6452 #else	/* JALAPENO || SERRANO */
6453 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6454 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6455 #endif	/* JALAPENO || SERRANO */
6456 		return (ECACHE_FLUSH_ALL);
6457 	}
6458 
6459 #if defined(JALAPENO) || defined(SERRANO)
6460 	/*
6461 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6462 	 * flush the entire Ecache.
6463 	 */
6464 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6465 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6466 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6467 			return (ECACHE_FLUSH_LINE);
6468 		} else {
6469 			return (ECACHE_FLUSH_ALL);
6470 		}
6471 	}
6472 #else /* JALAPENO || SERRANO */
6473 	/*
6474 	 * If UE only is set, flush the Ecache line, otherwise
6475 	 * flush the entire Ecache.
6476 	 */
6477 	if (afsr_errs & C_AFSR_UE) {
6478 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6479 		    C_AFSR_UE) {
6480 			return (ECACHE_FLUSH_LINE);
6481 		} else {
6482 			return (ECACHE_FLUSH_ALL);
6483 		}
6484 	}
6485 #endif /* JALAPENO || SERRANO */
6486 
6487 	/*
6488 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6489 	 * flush the entire Ecache.
6490 	 */
6491 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6492 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6493 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6494 			return (ECACHE_FLUSH_LINE);
6495 		} else {
6496 			return (ECACHE_FLUSH_ALL);
6497 		}
6498 	}
6499 
6500 	/*
6501 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6502 	 * flush the entire Ecache.
6503 	 */
6504 	if (afsr_errs & C_AFSR_BERR) {
6505 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6506 			return (ECACHE_FLUSH_LINE);
6507 		} else {
6508 			return (ECACHE_FLUSH_ALL);
6509 		}
6510 	}
6511 
6512 	return (0);
6513 }
6514 
6515 void
6516 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6517 {
6518 	int	ecache_flush_flag =
6519 	    cpu_error_ecache_flush_required(ch_flt);
6520 
6521 	/*
6522 	 * Flush Ecache line or entire Ecache based on above checks.
6523 	 */
6524 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6525 		cpu_flush_ecache();
6526 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6527 		cpu_flush_ecache_line(ch_flt);
6528 	}
6529 
6530 }
6531 
6532 /*
6533  * Extract the PA portion from the E$ tag.
6534  */
6535 uint64_t
6536 cpu_ectag_to_pa(int setsize, uint64_t tag)
6537 {
6538 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6539 		return (JG_ECTAG_TO_PA(setsize, tag));
6540 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6541 		return (PN_L3TAG_TO_PA(tag));
6542 	else
6543 		return (CH_ECTAG_TO_PA(setsize, tag));
6544 }
6545 
6546 /*
6547  * Convert the E$ tag PA into an E$ subblock index.
6548  */
6549 int
6550 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6551 {
6552 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6553 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6554 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6555 		/* Panther has only one subblock per line */
6556 		return (0);
6557 	else
6558 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6559 }
6560 
6561 /*
6562  * All subblocks in an E$ line must be invalid for
6563  * the line to be invalid.
6564  */
6565 int
6566 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6567 {
6568 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6569 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6570 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6571 		return (PN_L3_LINE_INVALID(tag));
6572 	else
6573 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6574 }
6575 
6576 /*
6577  * Extract state bits for a subblock given the tag.  Note that for Panther
6578  * this works on both l2 and l3 tags.
6579  */
6580 int
6581 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6582 {
6583 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6584 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6585 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6586 		return (tag & CH_ECSTATE_MASK);
6587 	else
6588 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6589 }
6590 
6591 /*
6592  * Cpu specific initialization.
6593  */
6594 void
6595 cpu_mp_init(void)
6596 {
6597 #ifdef	CHEETAHPLUS_ERRATUM_25
6598 	if (cheetah_sendmondo_recover) {
6599 		cheetah_nudge_init();
6600 	}
6601 #endif
6602 }
6603 
6604 void
6605 cpu_ereport_post(struct async_flt *aflt)
6606 {
6607 	char *cpu_type, buf[FM_MAX_CLASS];
6608 	nv_alloc_t *nva = NULL;
6609 	nvlist_t *ereport, *detector, *resource;
6610 	errorq_elem_t *eqep;
6611 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6612 	char unum[UNUM_NAMLEN];
6613 	int synd_code;
6614 	uint8_t msg_type;
6615 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6616 
6617 	if (aflt->flt_panic || panicstr) {
6618 		eqep = errorq_reserve(ereport_errorq);
6619 		if (eqep == NULL)
6620 			return;
6621 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6622 		nva = errorq_elem_nva(ereport_errorq, eqep);
6623 	} else {
6624 		ereport = fm_nvlist_create(nva);
6625 	}
6626 
6627 	/*
6628 	 * Create the scheme "cpu" FMRI.
6629 	 */
6630 	detector = fm_nvlist_create(nva);
6631 	resource = fm_nvlist_create(nva);
6632 	switch (cpunodes[aflt->flt_inst].implementation) {
6633 	case CHEETAH_IMPL:
6634 		cpu_type = FM_EREPORT_CPU_USIII;
6635 		break;
6636 	case CHEETAH_PLUS_IMPL:
6637 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6638 		break;
6639 	case JALAPENO_IMPL:
6640 		cpu_type = FM_EREPORT_CPU_USIIIi;
6641 		break;
6642 	case SERRANO_IMPL:
6643 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6644 		break;
6645 	case JAGUAR_IMPL:
6646 		cpu_type = FM_EREPORT_CPU_USIV;
6647 		break;
6648 	case PANTHER_IMPL:
6649 		cpu_type = FM_EREPORT_CPU_USIVplus;
6650 		break;
6651 	default:
6652 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6653 		break;
6654 	}
6655 
6656 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6657 
6658 	/*
6659 	 * Encode all the common data into the ereport.
6660 	 */
6661 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6662 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6663 
6664 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6665 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6666 	    detector, NULL);
6667 
6668 	/*
6669 	 * Encode the error specific data that was saved in
6670 	 * the async_flt structure into the ereport.
6671 	 */
6672 	cpu_payload_add_aflt(aflt, ereport, resource,
6673 	    &plat_ecc_ch_flt.ecaf_afar_status,
6674 	    &plat_ecc_ch_flt.ecaf_synd_status);
6675 
6676 	if (aflt->flt_panic || panicstr) {
6677 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6678 	} else {
6679 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6680 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6681 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6682 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6683 	}
6684 	/*
6685 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6686 	 * to the SC olny if it can process it.
6687 	 */
6688 
6689 	if (&plat_ecc_capability_sc_get &&
6690 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6691 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6692 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6693 			/*
6694 			 * If afar status is not invalid do a unum lookup.
6695 			 */
6696 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6697 			    AFLT_STAT_INVALID) {
6698 				synd_code = synd_to_synd_code(
6699 				    plat_ecc_ch_flt.ecaf_synd_status,
6700 				    aflt->flt_synd, ch_flt->flt_bit);
6701 				(void) cpu_get_mem_unum_synd(synd_code,
6702 				    aflt, unum);
6703 			} else {
6704 				unum[0] = '\0';
6705 			}
6706 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6707 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6708 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6709 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6710 			    ch_flt->flt_sdw_afsr_ext;
6711 
6712 			if (&plat_log_fruid_error2)
6713 				plat_log_fruid_error2(msg_type, unum, aflt,
6714 				    &plat_ecc_ch_flt);
6715 		}
6716 	}
6717 }
6718 
6719 void
6720 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6721 {
6722 	int status;
6723 	ddi_fm_error_t de;
6724 
6725 	bzero(&de, sizeof (ddi_fm_error_t));
6726 
6727 	de.fme_version = DDI_FME_VERSION;
6728 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6729 	    FM_ENA_FMT1);
6730 	de.fme_flag = expected;
6731 	de.fme_bus_specific = (void *)aflt->flt_addr;
6732 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6733 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6734 		aflt->flt_panic = 1;
6735 }
6736 
6737 void
6738 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6739     errorq_t *eqp, uint_t flag)
6740 {
6741 	struct async_flt *aflt = (struct async_flt *)payload;
6742 
6743 	aflt->flt_erpt_class = error_class;
6744 	errorq_dispatch(eqp, payload, payload_sz, flag);
6745 }
6746 
6747 /*
6748  * This routine may be called by the IO module, but does not do
6749  * anything in this cpu module. The SERD algorithm is handled by
6750  * cpumem-diagnosis engine instead.
6751  */
6752 /*ARGSUSED*/
6753 void
6754 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6755 {}
6756 
6757 void
6758 adjust_hw_copy_limits(int ecache_size)
6759 {
6760 	/*
6761 	 * Set hw copy limits.
6762 	 *
6763 	 * /etc/system will be parsed later and can override one or more
6764 	 * of these settings.
6765 	 *
6766 	 * At this time, ecache size seems only mildly relevant.
6767 	 * We seem to run into issues with the d-cache and stalls
6768 	 * we see on misses.
6769 	 *
6770 	 * Cycle measurement indicates that 2 byte aligned copies fare
6771 	 * little better than doing things with VIS at around 512 bytes.
6772 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6773 	 * aligned is faster whenever the source and destination data
6774 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6775 	 * limit seems to be driven by the 2K write cache.
6776 	 * When more than 2K of copies are done in non-VIS mode, stores
6777 	 * backup in the write cache.  In VIS mode, the write cache is
6778 	 * bypassed, allowing faster cache-line writes aligned on cache
6779 	 * boundaries.
6780 	 *
6781 	 * In addition, in non-VIS mode, there is no prefetching, so
6782 	 * for larger copies, the advantage of prefetching to avoid even
6783 	 * occasional cache misses is enough to justify using the VIS code.
6784 	 *
6785 	 * During testing, it was discovered that netbench ran 3% slower
6786 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6787 	 * applications, data is only used once (copied to the output
6788 	 * buffer, then copied by the network device off the system).  Using
6789 	 * the VIS copy saves more L2 cache state.  Network copies are
6790 	 * around 1.3K to 1.5K in size for historical reasons.
6791 	 *
6792 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6793 	 * aligned copy even for large caches and 8 MB ecache.  The
6794 	 * infrastructure to allow different limits for different sized
6795 	 * caches is kept to allow further tuning in later releases.
6796 	 */
6797 
6798 	if (min_ecache_size == 0 && use_hw_bcopy) {
6799 		/*
6800 		 * First time through - should be before /etc/system
6801 		 * is read.
6802 		 * Could skip the checks for zero but this lets us
6803 		 * preserve any debugger rewrites.
6804 		 */
6805 		if (hw_copy_limit_1 == 0) {
6806 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6807 			priv_hcl_1 = hw_copy_limit_1;
6808 		}
6809 		if (hw_copy_limit_2 == 0) {
6810 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6811 			priv_hcl_2 = hw_copy_limit_2;
6812 		}
6813 		if (hw_copy_limit_4 == 0) {
6814 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6815 			priv_hcl_4 = hw_copy_limit_4;
6816 		}
6817 		if (hw_copy_limit_8 == 0) {
6818 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6819 			priv_hcl_8 = hw_copy_limit_8;
6820 		}
6821 		min_ecache_size = ecache_size;
6822 	} else {
6823 		/*
6824 		 * MP initialization. Called *after* /etc/system has
6825 		 * been parsed. One CPU has already been initialized.
6826 		 * Need to cater for /etc/system having scragged one
6827 		 * of our values.
6828 		 */
6829 		if (ecache_size == min_ecache_size) {
6830 			/*
6831 			 * Same size ecache. We do nothing unless we
6832 			 * have a pessimistic ecache setting. In that
6833 			 * case we become more optimistic (if the cache is
6834 			 * large enough).
6835 			 */
6836 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6837 				/*
6838 				 * Need to adjust hw_copy_limit* from our
6839 				 * pessimistic uniprocessor value to a more
6840 				 * optimistic UP value *iff* it hasn't been
6841 				 * reset.
6842 				 */
6843 				if ((ecache_size > 1048576) &&
6844 				    (priv_hcl_8 == hw_copy_limit_8)) {
6845 					if (ecache_size <= 2097152)
6846 						hw_copy_limit_8 = 4 *
6847 						    VIS_COPY_THRESHOLD;
6848 					else if (ecache_size <= 4194304)
6849 						hw_copy_limit_8 = 4 *
6850 						    VIS_COPY_THRESHOLD;
6851 					else
6852 						hw_copy_limit_8 = 4 *
6853 						    VIS_COPY_THRESHOLD;
6854 					priv_hcl_8 = hw_copy_limit_8;
6855 				}
6856 			}
6857 		} else if (ecache_size < min_ecache_size) {
6858 			/*
6859 			 * A different ecache size. Can this even happen?
6860 			 */
6861 			if (priv_hcl_8 == hw_copy_limit_8) {
6862 				/*
6863 				 * The previous value that we set
6864 				 * is unchanged (i.e., it hasn't been
6865 				 * scragged by /etc/system). Rewrite it.
6866 				 */
6867 				if (ecache_size <= 1048576)
6868 					hw_copy_limit_8 = 8 *
6869 					    VIS_COPY_THRESHOLD;
6870 				else if (ecache_size <= 2097152)
6871 					hw_copy_limit_8 = 8 *
6872 					    VIS_COPY_THRESHOLD;
6873 				else if (ecache_size <= 4194304)
6874 					hw_copy_limit_8 = 8 *
6875 					    VIS_COPY_THRESHOLD;
6876 				else
6877 					hw_copy_limit_8 = 10 *
6878 					    VIS_COPY_THRESHOLD;
6879 				priv_hcl_8 = hw_copy_limit_8;
6880 				min_ecache_size = ecache_size;
6881 			}
6882 		}
6883 	}
6884 }
6885 
6886 /*
6887  * Called from illegal instruction trap handler to see if we can attribute
6888  * the trap to a fpras check.
6889  */
6890 int
6891 fpras_chktrap(struct regs *rp)
6892 {
6893 	int op;
6894 	struct fpras_chkfngrp *cgp;
6895 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6896 
6897 	if (fpras_chkfngrps == NULL)
6898 		return (0);
6899 
6900 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6901 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6902 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6903 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6904 			break;
6905 	}
6906 	if (op == FPRAS_NCOPYOPS)
6907 		return (0);
6908 
6909 	/*
6910 	 * This is an fpRAS failure caught through an illegal
6911 	 * instruction - trampoline.
6912 	 */
6913 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6914 	rp->r_npc = rp->r_pc + 4;
6915 	return (1);
6916 }
6917 
6918 /*
6919  * fpras_failure is called when a fpras check detects a bad calculation
6920  * result or an illegal instruction trap is attributed to an fpras
6921  * check.  In all cases we are still bound to CPU.
6922  */
6923 int
6924 fpras_failure(int op, int how)
6925 {
6926 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6927 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6928 	ch_async_flt_t ch_flt;
6929 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6930 	struct fpras_chkfn *sfp, *cfp;
6931 	uint32_t *sip, *cip;
6932 	int i;
6933 
6934 	/*
6935 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6936 	 * the time in which we dispatch an ereport and (if applicable) panic.
6937 	 */
6938 	use_hw_bcopy_orig = use_hw_bcopy;
6939 	use_hw_bzero_orig = use_hw_bzero;
6940 	hcl1_orig = hw_copy_limit_1;
6941 	hcl2_orig = hw_copy_limit_2;
6942 	hcl4_orig = hw_copy_limit_4;
6943 	hcl8_orig = hw_copy_limit_8;
6944 	use_hw_bcopy = use_hw_bzero = 0;
6945 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6946 	    hw_copy_limit_8 = 0;
6947 
6948 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6949 	aflt->flt_id = gethrtime_waitfree();
6950 	aflt->flt_class = CPU_FAULT;
6951 	aflt->flt_inst = CPU->cpu_id;
6952 	aflt->flt_status = (how << 8) | op;
6953 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6954 	ch_flt.flt_type = CPU_FPUERR;
6955 
6956 	/*
6957 	 * We must panic if the copy operation had no lofault protection -
6958 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6959 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6960 	 */
6961 	aflt->flt_panic = (curthread->t_lofault == NULL);
6962 
6963 	/*
6964 	 * XOR the source instruction block with the copied instruction
6965 	 * block - this will show us which bit(s) are corrupted.
6966 	 */
6967 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6968 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6969 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6970 		sip = &sfp->fpras_blk0[0];
6971 		cip = &cfp->fpras_blk0[0];
6972 	} else {
6973 		sip = &sfp->fpras_blk1[0];
6974 		cip = &cfp->fpras_blk1[0];
6975 	}
6976 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6977 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6978 
6979 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6980 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6981 
6982 	if (aflt->flt_panic)
6983 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6984 
6985 	/*
6986 	 * We get here for copyin/copyout and kcopy or bcopy where the
6987 	 * caller has used on_fault.  We will flag the error so that
6988 	 * the process may be killed  The trap_async_hwerr mechanism will
6989 	 * take appropriate further action (such as a reboot, contract
6990 	 * notification etc).  Since we may be continuing we will
6991 	 * restore the global hardware copy acceleration switches.
6992 	 *
6993 	 * When we return from this function to the copy function we want to
6994 	 * avoid potentially bad data being used, ie we want the affected
6995 	 * copy function to return an error.  The caller should therefore
6996 	 * invoke its lofault handler (which always exists for these functions)
6997 	 * which will return the appropriate error.
6998 	 */
6999 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7000 	aston(curthread);
7001 
7002 	use_hw_bcopy = use_hw_bcopy_orig;
7003 	use_hw_bzero = use_hw_bzero_orig;
7004 	hw_copy_limit_1 = hcl1_orig;
7005 	hw_copy_limit_2 = hcl2_orig;
7006 	hw_copy_limit_4 = hcl4_orig;
7007 	hw_copy_limit_8 = hcl8_orig;
7008 
7009 	return (1);
7010 }
7011 
7012 #define	VIS_BLOCKSIZE		64
7013 
7014 int
7015 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7016 {
7017 	int ret, watched;
7018 
7019 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7020 	ret = dtrace_blksuword32(addr, data, 0);
7021 	if (watched)
7022 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7023 
7024 	return (ret);
7025 }
7026 
7027 /*
7028  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7029  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7030  * CEEN from the EER to disable traps for further disrupting error types
7031  * on that cpu.  We could cross-call instead, but that has a larger
7032  * instruction and data footprint than cross-trapping, and the cpu is known
7033  * to be faulted.
7034  */
7035 
7036 void
7037 cpu_faulted_enter(struct cpu *cp)
7038 {
7039 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7040 }
7041 
7042 /*
7043  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7044  * offline, spare, or online (by the cpu requesting this state change).
7045  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7046  * disrupting error bits that have accumulated without trapping, then
7047  * we cross-trap to re-enable CEEN controlled traps.
7048  */
7049 void
7050 cpu_faulted_exit(struct cpu *cp)
7051 {
7052 	ch_cpu_errors_t cpu_error_regs;
7053 
7054 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7055 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7056 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7057 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7058 	    (uint64_t)&cpu_error_regs, 0);
7059 
7060 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7061 }
7062 
7063 /*
7064  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7065  * the errors in the original AFSR, 0 otherwise.
7066  *
7067  * For all procs if the initial error was a BERR or TO, then it is possible
7068  * that we may have caused a secondary BERR or TO in the process of logging the
7069  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7070  * if the request was protected then a panic is still not necessary, if not
7071  * protected then aft_panic is already set - so either way there's no need
7072  * to set aft_panic for the secondary error.
7073  *
7074  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7075  * a store merge, then the error handling code will call cpu_deferred_error().
7076  * When clear_errors() is called, it will determine that secondary errors have
7077  * occurred - in particular, the store merge also caused a EDU and WDU that
7078  * weren't discovered until this point.
7079  *
7080  * We do three checks to verify that we are in this case.  If we pass all three
7081  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7082  * errors occur, we return 0.
7083  *
7084  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7085  * handled in cpu_disrupting_errors().  Since this function is not even called
7086  * in the case we are interested in, we just return 0 for these processors.
7087  */
7088 /*ARGSUSED*/
7089 static int
7090 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7091     uint64_t t_afar)
7092 {
7093 #if defined(CHEETAH_PLUS)
7094 #else	/* CHEETAH_PLUS */
7095 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7096 #endif	/* CHEETAH_PLUS */
7097 
7098 	/*
7099 	 * Was the original error a BERR or TO and only a BERR or TO
7100 	 * (multiple errors are also OK)
7101 	 */
7102 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7103 		/*
7104 		 * Is the new error a BERR or TO and only a BERR or TO
7105 		 * (multiple errors are also OK)
7106 		 */
7107 		if ((ch_flt->afsr_errs &
7108 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7109 			return (1);
7110 	}
7111 
7112 #if defined(CHEETAH_PLUS)
7113 	return (0);
7114 #else	/* CHEETAH_PLUS */
7115 	/*
7116 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7117 	 *
7118 	 * Check the original error was a UE, and only a UE.  Note that
7119 	 * the ME bit will cause us to fail this check.
7120 	 */
7121 	if (t_afsr_errs != C_AFSR_UE)
7122 		return (0);
7123 
7124 	/*
7125 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7126 	 */
7127 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7128 		return (0);
7129 
7130 	/*
7131 	 * Check the AFAR of the original error and secondary errors
7132 	 * match to the 64-byte boundary
7133 	 */
7134 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7135 		return (0);
7136 
7137 	/*
7138 	 * We've passed all the checks, so it's a secondary error!
7139 	 */
7140 	return (1);
7141 #endif	/* CHEETAH_PLUS */
7142 }
7143 
7144 /*
7145  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7146  * is checked for any valid errors.  If found, the error type is
7147  * returned. If not found, the flt_type is checked for L1$ parity errors.
7148  */
7149 /*ARGSUSED*/
7150 static uint8_t
7151 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7152 {
7153 #if defined(JALAPENO)
7154 	/*
7155 	 * Currently, logging errors to the SC is not supported on Jalapeno
7156 	 */
7157 	return (PLAT_ECC_ERROR2_NONE);
7158 #else
7159 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7160 
7161 	switch (ch_flt->flt_bit) {
7162 	case C_AFSR_CE:
7163 		return (PLAT_ECC_ERROR2_CE);
7164 	case C_AFSR_UCC:
7165 	case C_AFSR_EDC:
7166 	case C_AFSR_WDC:
7167 	case C_AFSR_CPC:
7168 		return (PLAT_ECC_ERROR2_L2_CE);
7169 	case C_AFSR_EMC:
7170 		return (PLAT_ECC_ERROR2_EMC);
7171 	case C_AFSR_IVC:
7172 		return (PLAT_ECC_ERROR2_IVC);
7173 	case C_AFSR_UE:
7174 		return (PLAT_ECC_ERROR2_UE);
7175 	case C_AFSR_UCU:
7176 	case C_AFSR_EDU:
7177 	case C_AFSR_WDU:
7178 	case C_AFSR_CPU:
7179 		return (PLAT_ECC_ERROR2_L2_UE);
7180 	case C_AFSR_IVU:
7181 		return (PLAT_ECC_ERROR2_IVU);
7182 	case C_AFSR_TO:
7183 		return (PLAT_ECC_ERROR2_TO);
7184 	case C_AFSR_BERR:
7185 		return (PLAT_ECC_ERROR2_BERR);
7186 #if defined(CHEETAH_PLUS)
7187 	case C_AFSR_L3_EDC:
7188 	case C_AFSR_L3_UCC:
7189 	case C_AFSR_L3_CPC:
7190 	case C_AFSR_L3_WDC:
7191 		return (PLAT_ECC_ERROR2_L3_CE);
7192 	case C_AFSR_IMC:
7193 		return (PLAT_ECC_ERROR2_IMC);
7194 	case C_AFSR_TSCE:
7195 		return (PLAT_ECC_ERROR2_L2_TSCE);
7196 	case C_AFSR_THCE:
7197 		return (PLAT_ECC_ERROR2_L2_THCE);
7198 	case C_AFSR_L3_MECC:
7199 		return (PLAT_ECC_ERROR2_L3_MECC);
7200 	case C_AFSR_L3_THCE:
7201 		return (PLAT_ECC_ERROR2_L3_THCE);
7202 	case C_AFSR_L3_CPU:
7203 	case C_AFSR_L3_EDU:
7204 	case C_AFSR_L3_UCU:
7205 	case C_AFSR_L3_WDU:
7206 		return (PLAT_ECC_ERROR2_L3_UE);
7207 	case C_AFSR_DUE:
7208 		return (PLAT_ECC_ERROR2_DUE);
7209 	case C_AFSR_DTO:
7210 		return (PLAT_ECC_ERROR2_DTO);
7211 	case C_AFSR_DBERR:
7212 		return (PLAT_ECC_ERROR2_DBERR);
7213 #endif	/* CHEETAH_PLUS */
7214 	default:
7215 		switch (ch_flt->flt_type) {
7216 #if defined(CPU_IMP_L1_CACHE_PARITY)
7217 		case CPU_IC_PARITY:
7218 			return (PLAT_ECC_ERROR2_IPE);
7219 		case CPU_DC_PARITY:
7220 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7221 				if (ch_flt->parity_data.dpe.cpl_cache ==
7222 				    CPU_PC_PARITY) {
7223 					return (PLAT_ECC_ERROR2_PCACHE);
7224 				}
7225 			}
7226 			return (PLAT_ECC_ERROR2_DPE);
7227 #endif /* CPU_IMP_L1_CACHE_PARITY */
7228 		case CPU_ITLB_PARITY:
7229 			return (PLAT_ECC_ERROR2_ITLB);
7230 		case CPU_DTLB_PARITY:
7231 			return (PLAT_ECC_ERROR2_DTLB);
7232 		default:
7233 			return (PLAT_ECC_ERROR2_NONE);
7234 		}
7235 	}
7236 #endif	/* JALAPENO */
7237 }
7238