xref: /titanic_52/usr/src/uts/sun4u/cpu/us3_common.c (revision 7a80ba23c9972427b3d0a2bc6942bdd8bfa4652b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/ddi.h>
29 #include <sys/sysmacros.h>
30 #include <sys/archsystm.h>
31 #include <sys/vmsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/machthread.h>
35 #include <sys/cpu.h>
36 #include <sys/cmp.h>
37 #include <sys/elf_SPARC.h>
38 #include <vm/vm_dep.h>
39 #include <vm/hat_sfmmu.h>
40 #include <vm/seg_kpm.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/us3_module.h>
44 #include <sys/async.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/dditypes.h>
48 #include <sys/prom_debug.h>
49 #include <sys/prom_plat.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/machtrap.h>
56 #include <sys/ontrap.h>
57 #include <sys/panic.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/ivintr.h>
61 #include <sys/atomic.h>
62 #include <sys/taskq.h>
63 #include <sys/note.h>
64 #include <sys/ndifm.h>
65 #include <sys/ddifm.h>
66 #include <sys/fm/protocol.h>
67 #include <sys/fm/util.h>
68 #include <sys/fm/cpu/UltraSPARC-III.h>
69 #include <sys/fpras_impl.h>
70 #include <sys/dtrace.h>
71 #include <sys/watchpoint.h>
72 #include <sys/plat_ecc_unum.h>
73 #include <sys/cyclic.h>
74 #include <sys/errorq.h>
75 #include <sys/errclassify.h>
76 #include <sys/pghw.h>
77 
78 #ifdef	CHEETAHPLUS_ERRATUM_25
79 #include <sys/xc_impl.h>
80 #endif	/* CHEETAHPLUS_ERRATUM_25 */
81 
82 ch_cpu_logout_t	clop_before_flush;
83 ch_cpu_logout_t	clop_after_flush;
84 uint_t	flush_retries_done = 0;
85 int32_t last_l3tag_error_injected_way = 255;
86 uint8_t last_l3tag_error_injected_bit;
87 int32_t last_l2tag_error_injected_way = 255;
88 uint8_t last_l2tag_error_injected_bit;
89 /*
90  * Note that 'Cheetah PRM' refers to:
91  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
92  */
93 
94 /*
95  * Per CPU pointers to physical address of TL>0 logout data areas.
96  * These pointers have to be in the kernel nucleus to avoid MMU
97  * misses.
98  */
99 uint64_t ch_err_tl1_paddrs[NCPU];
100 
101 /*
102  * One statically allocated structure to use during startup/DR
103  * to prevent unnecessary panics.
104  */
105 ch_err_tl1_data_t ch_err_tl1_data;
106 
107 /*
108  * Per CPU pending error at TL>0, used by level15 softint handler
109  */
110 uchar_t ch_err_tl1_pending[NCPU];
111 
112 /*
113  * For deferred CE re-enable after trap.
114  */
115 taskq_t		*ch_check_ce_tq;
116 
117 /*
118  * Internal functions.
119  */
120 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
121 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
122 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
123     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
124 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
125     uint64_t t_afsr_bit);
126 static int clear_ecc(struct async_flt *ecc);
127 #if defined(CPU_IMP_ECACHE_ASSOC)
128 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
129 #endif
130 int cpu_ecache_set_size(struct cpu *cp);
131 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
132 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
133 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
134 int cpu_ectag_pa_to_subblk_state(int cachesize,
135 				uint64_t subaddr, uint64_t tag);
136 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
137 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
138 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
139 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
140 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
141 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
142 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
143 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
144 static void cpu_scrubphys(struct async_flt *aflt);
145 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
146     int *, int *);
147 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
148 static void cpu_ereport_init(struct async_flt *aflt);
149 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
150 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
151 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
152     uint64_t nceen, ch_cpu_logout_t *clop);
153 static int cpu_ce_delayed_ec_logout(uint64_t);
154 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
155 static int cpu_error_is_ecache_data(int, uint64_t);
156 static void cpu_fmri_cpu_set(nvlist_t *, int);
157 static int cpu_error_to_resource_type(struct async_flt *aflt);
158 
159 #ifdef	CHEETAHPLUS_ERRATUM_25
160 static int mondo_recover_proc(uint16_t, int);
161 static void cheetah_nudge_init(void);
162 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
163     cyc_time_t *when);
164 static void cheetah_nudge_buddy(void);
165 #endif	/* CHEETAHPLUS_ERRATUM_25 */
166 
167 #if defined(CPU_IMP_L1_CACHE_PARITY)
168 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
169 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
170 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
171     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
172 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
173 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
174 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
175 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
176 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
177 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
178 #endif	/* CPU_IMP_L1_CACHE_PARITY */
179 
180 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
181     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
182     int *segsp, int *banksp, int *mcidp);
183 
184 /*
185  * This table is used to determine which bit(s) is(are) bad when an ECC
186  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
187  * of this array have the following semantics:
188  *
189  *      00-127  The number of the bad bit, when only one bit is bad.
190  *      128     ECC bit C0 is bad.
191  *      129     ECC bit C1 is bad.
192  *      130     ECC bit C2 is bad.
193  *      131     ECC bit C3 is bad.
194  *      132     ECC bit C4 is bad.
195  *      133     ECC bit C5 is bad.
196  *      134     ECC bit C6 is bad.
197  *      135     ECC bit C7 is bad.
198  *      136     ECC bit C8 is bad.
199  *	137-143 reserved for Mtag Data and ECC.
200  *      144(M2) Two bits are bad within a nibble.
201  *      145(M3) Three bits are bad within a nibble.
202  *      146(M3) Four bits are bad within a nibble.
203  *      147(M)  Multiple bits (5 or more) are bad.
204  *      148     NO bits are bad.
205  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
206  */
207 
208 #define	C0	128
209 #define	C1	129
210 #define	C2	130
211 #define	C3	131
212 #define	C4	132
213 #define	C5	133
214 #define	C6	134
215 #define	C7	135
216 #define	C8	136
217 #define	MT0	137	/* Mtag Data bit 0 */
218 #define	MT1	138
219 #define	MT2	139
220 #define	MTC0	140	/* Mtag Check bit 0 */
221 #define	MTC1	141
222 #define	MTC2	142
223 #define	MTC3	143
224 #define	M2	144
225 #define	M3	145
226 #define	M4	146
227 #define	M	147
228 #define	NA	148
229 #if defined(JALAPENO) || defined(SERRANO)
230 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
231 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
232 #define	SLAST	S003MEM	/* last special syndrome */
233 #else /* JALAPENO || SERRANO */
234 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
235 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
236 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
237 #define	SLAST	S11C	/* last special syndrome */
238 #endif /* JALAPENO || SERRANO */
239 #if defined(JALAPENO) || defined(SERRANO)
240 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
241 #define	BPAR15	167
242 #endif	/* JALAPENO || SERRANO */
243 
244 static uint8_t ecc_syndrome_tab[] =
245 {
246 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
247 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
248 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
249 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
250 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
251 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
252 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
253 #if defined(JALAPENO) || defined(SERRANO)
254 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
255 #else	/* JALAPENO || SERRANO */
256 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
257 #endif	/* JALAPENO || SERRANO */
258 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
259 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
260 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
261 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
262 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
263 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
264 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
265 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
266 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
267 #if defined(JALAPENO) || defined(SERRANO)
268 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
269 #else	/* JALAPENO || SERRANO */
270 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
271 #endif	/* JALAPENO || SERRANO */
272 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
273 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
274 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
275 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
276 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
277 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
278 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
279 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
280 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
281 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
282 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
283 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
284 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
285 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
286 };
287 
288 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
289 
290 #if !(defined(JALAPENO) || defined(SERRANO))
291 /*
292  * This table is used to determine which bit(s) is(are) bad when a Mtag
293  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
294  * of this array have the following semantics:
295  *
296  *      -1	Invalid mtag syndrome.
297  *      137     Mtag Data 0 is bad.
298  *      138     Mtag Data 1 is bad.
299  *      139     Mtag Data 2 is bad.
300  *      140     Mtag ECC 0 is bad.
301  *      141     Mtag ECC 1 is bad.
302  *      142     Mtag ECC 2 is bad.
303  *      143     Mtag ECC 3 is bad.
304  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
305  */
306 short mtag_syndrome_tab[] =
307 {
308 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
309 };
310 
311 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
312 
313 #else /* !(JALAPENO || SERRANO) */
314 
315 #define	BSYND_TBL_SIZE	16
316 
317 #endif /* !(JALAPENO || SERRANO) */
318 
319 /*
320  * Types returned from cpu_error_to_resource_type()
321  */
322 #define	ERRTYPE_UNKNOWN		0
323 #define	ERRTYPE_CPU		1
324 #define	ERRTYPE_MEMORY		2
325 #define	ERRTYPE_ECACHE_DATA	3
326 
327 /*
328  * CE initial classification and subsequent action lookup table
329  */
330 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
331 static int ce_disp_inited;
332 
333 /*
334  * Set to disable leaky and partner check for memory correctables
335  */
336 int ce_xdiag_off;
337 
338 /*
339  * The following are not incremented atomically so are indicative only
340  */
341 static int ce_xdiag_drops;
342 static int ce_xdiag_lkydrops;
343 static int ce_xdiag_ptnrdrops;
344 static int ce_xdiag_bad;
345 
346 /*
347  * CE leaky check callback structure
348  */
349 typedef struct {
350 	struct async_flt *lkycb_aflt;
351 	errorq_t *lkycb_eqp;
352 	errorq_elem_t *lkycb_eqep;
353 } ce_lkychk_cb_t;
354 
355 /*
356  * defines for various ecache_flush_flag's
357  */
358 #define	ECACHE_FLUSH_LINE	1
359 #define	ECACHE_FLUSH_ALL	2
360 
361 /*
362  * STICK sync
363  */
364 #define	STICK_ITERATION 10
365 #define	MAX_TSKEW	1
366 #define	EV_A_START	0
367 #define	EV_A_END	1
368 #define	EV_B_START	2
369 #define	EV_B_END	3
370 #define	EVENTS		4
371 
372 static int64_t stick_iter = STICK_ITERATION;
373 static int64_t stick_tsk = MAX_TSKEW;
374 
375 typedef enum {
376 	EVENT_NULL = 0,
377 	SLAVE_START,
378 	SLAVE_CONT,
379 	MASTER_START
380 } event_cmd_t;
381 
382 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
383 static int64_t timestamp[EVENTS];
384 static volatile int slave_done;
385 
386 #ifdef DEBUG
387 #define	DSYNC_ATTEMPTS 64
388 typedef struct {
389 	int64_t	skew_val[DSYNC_ATTEMPTS];
390 } ss_t;
391 
392 ss_t stick_sync_stats[NCPU];
393 #endif /* DEBUG */
394 
395 uint_t cpu_impl_dual_pgsz = 0;
396 #if defined(CPU_IMP_DUAL_PAGESIZE)
397 uint_t disable_dual_pgsz = 0;
398 #endif	/* CPU_IMP_DUAL_PAGESIZE */
399 
400 /*
401  * Save the cache bootup state for use when internal
402  * caches are to be re-enabled after an error occurs.
403  */
404 uint64_t cache_boot_state;
405 
406 /*
407  * PA[22:0] represent Displacement in Safari configuration space.
408  */
409 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
410 
411 bus_config_eclk_t bus_config_eclk[] = {
412 #if defined(JALAPENO) || defined(SERRANO)
413 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
414 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
415 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
416 #else /* JALAPENO || SERRANO */
417 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
418 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
419 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
420 #endif /* JALAPENO || SERRANO */
421 	{0, 0}
422 };
423 
424 /*
425  * Interval for deferred CEEN reenable
426  */
427 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
428 
429 /*
430  * set in /etc/system to control logging of user BERR/TO's
431  */
432 int cpu_berr_to_verbose = 0;
433 
434 /*
435  * set to 0 in /etc/system to defer CEEN reenable for all CEs
436  */
437 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
438 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
439 
440 /*
441  * Set of all offline cpus
442  */
443 cpuset_t cpu_offline_set;
444 
445 static void cpu_delayed_check_ce_errors(void *);
446 static void cpu_check_ce_errors(void *);
447 void cpu_error_ecache_flush(ch_async_flt_t *);
448 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
449 static void cpu_log_and_clear_ce(ch_async_flt_t *);
450 void cpu_ce_detected(ch_cpu_errors_t *, int);
451 
452 /*
453  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
454  * memory refresh interval of current DIMMs (64ms).  After initial fix that
455  * gives at least one full refresh cycle in which the cell can leak
456  * (whereafter further refreshes simply reinforce any incorrect bit value).
457  */
458 clock_t cpu_ce_lkychk_timeout_usec = 128000;
459 
460 /*
461  * CE partner check partner caching period in seconds
462  */
463 int cpu_ce_ptnr_cachetime_sec = 60;
464 
465 /*
466  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
467  */
468 #define	CH_SET_TRAP(ttentry, ttlabel)			\
469 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
470 		flush_instr_mem((caddr_t)&ttentry, 32);
471 
472 static int min_ecache_size;
473 static uint_t priv_hcl_1;
474 static uint_t priv_hcl_2;
475 static uint_t priv_hcl_4;
476 static uint_t priv_hcl_8;
477 
478 void
479 cpu_setup(void)
480 {
481 	extern int at_flags;
482 	extern int cpc_has_overflow_intr;
483 
484 	/*
485 	 * Setup chip-specific trap handlers.
486 	 */
487 	cpu_init_trap();
488 
489 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
490 
491 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
492 
493 	/*
494 	 * save the cache bootup state.
495 	 */
496 	cache_boot_state = get_dcu() & DCU_CACHE;
497 
498 	/*
499 	 * Due to the number of entries in the fully-associative tlb
500 	 * this may have to be tuned lower than in spitfire.
501 	 */
502 	pp_slots = MIN(8, MAXPP_SLOTS);
503 
504 	/*
505 	 * Block stores do not invalidate all pages of the d$, pagecopy
506 	 * et. al. need virtual translations with virtual coloring taken
507 	 * into consideration.  prefetch/ldd will pollute the d$ on the
508 	 * load side.
509 	 */
510 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
511 
512 	if (use_page_coloring) {
513 		do_pg_coloring = 1;
514 	}
515 
516 	isa_list =
517 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
518 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
519 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
520 
521 	/*
522 	 * On Panther-based machines, this should
523 	 * also include AV_SPARC_POPC too
524 	 */
525 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
526 
527 	/*
528 	 * On cheetah, there's no hole in the virtual address space
529 	 */
530 	hole_start = hole_end = 0;
531 
532 	/*
533 	 * The kpm mapping window.
534 	 * kpm_size:
535 	 *	The size of a single kpm range.
536 	 *	The overall size will be: kpm_size * vac_colors.
537 	 * kpm_vbase:
538 	 *	The virtual start address of the kpm range within the kernel
539 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
540 	 */
541 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
542 	kpm_size_shift = 43;
543 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
544 	kpm_smallpages = 1;
545 
546 	/*
547 	 * The traptrace code uses either %tick or %stick for
548 	 * timestamping.  We have %stick so we can use it.
549 	 */
550 	traptrace_use_stick = 1;
551 
552 	/*
553 	 * Cheetah has a performance counter overflow interrupt
554 	 */
555 	cpc_has_overflow_intr = 1;
556 
557 #if defined(CPU_IMP_DUAL_PAGESIZE)
558 	/*
559 	 * Use Cheetah+ and later dual page size support.
560 	 */
561 	if (!disable_dual_pgsz) {
562 		cpu_impl_dual_pgsz = 1;
563 	}
564 #endif	/* CPU_IMP_DUAL_PAGESIZE */
565 
566 	/*
567 	 * Declare that this architecture/cpu combination does fpRAS.
568 	 */
569 	fpras_implemented = 1;
570 
571 	/*
572 	 * Setup CE lookup table
573 	 */
574 	CE_INITDISPTBL_POPULATE(ce_disp_table);
575 	ce_disp_inited = 1;
576 }
577 
578 /*
579  * Called by setcpudelay
580  */
581 void
582 cpu_init_tick_freq(void)
583 {
584 	/*
585 	 * For UltraSPARC III and beyond we want to use the
586 	 * system clock rate as the basis for low level timing,
587 	 * due to support of mixed speed CPUs and power managment.
588 	 */
589 	if (system_clock_freq == 0)
590 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
591 
592 	sys_tick_freq = system_clock_freq;
593 }
594 
595 #ifdef CHEETAHPLUS_ERRATUM_25
596 /*
597  * Tunables
598  */
599 int cheetah_bpe_off = 0;
600 int cheetah_sendmondo_recover = 1;
601 int cheetah_sendmondo_fullscan = 0;
602 int cheetah_sendmondo_recover_delay = 5;
603 
604 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
605 
606 /*
607  * Recovery Statistics
608  */
609 typedef struct cheetah_livelock_entry	{
610 	int cpuid;		/* fallen cpu */
611 	int buddy;		/* cpu that ran recovery */
612 	clock_t lbolt;		/* when recovery started */
613 	hrtime_t recovery_time;	/* time spent in recovery */
614 } cheetah_livelock_entry_t;
615 
616 #define	CHEETAH_LIVELOCK_NENTRY	32
617 
618 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
619 int cheetah_livelock_entry_nxt;
620 
621 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
622 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
623 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
624 		cheetah_livelock_entry_nxt = 0;				\
625 	}								\
626 }
627 
628 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
629 
630 struct {
631 	hrtime_t hrt;		/* maximum recovery time */
632 	int recovery;		/* recovered */
633 	int full_claimed;	/* maximum pages claimed in full recovery */
634 	int proc_entry;		/* attempted to claim TSB */
635 	int proc_tsb_scan;	/* tsb scanned */
636 	int proc_tsb_partscan;	/* tsb partially scanned */
637 	int proc_tsb_fullscan;	/* whole tsb scanned */
638 	int proc_claimed;	/* maximum pages claimed in tsb scan */
639 	int proc_user;		/* user thread */
640 	int proc_kernel;	/* kernel thread */
641 	int proc_onflt;		/* bad stack */
642 	int proc_cpu;		/* null cpu */
643 	int proc_thread;	/* null thread */
644 	int proc_proc;		/* null proc */
645 	int proc_as;		/* null as */
646 	int proc_hat;		/* null hat */
647 	int proc_hat_inval;	/* hat contents don't make sense */
648 	int proc_hat_busy;	/* hat is changing TSBs */
649 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
650 	int proc_cnum_bad;	/* cnum out of range */
651 	int proc_cnum;		/* last cnum processed */
652 	tte_t proc_tte;		/* last tte processed */
653 } cheetah_livelock_stat;
654 
655 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
656 
657 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
658 	cheetah_livelock_stat.item = value
659 
660 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
661 	if (value > cheetah_livelock_stat.item)		\
662 		cheetah_livelock_stat.item = value;	\
663 }
664 
665 /*
666  * Attempt to recover a cpu by claiming every cache line as saved
667  * in the TSB that the non-responsive cpu is using. Since we can't
668  * grab any adaptive lock, this is at best an attempt to do so. Because
669  * we don't grab any locks, we must operate under the protection of
670  * on_fault().
671  *
672  * Return 1 if cpuid could be recovered, 0 if failed.
673  */
674 int
675 mondo_recover_proc(uint16_t cpuid, int bn)
676 {
677 	label_t ljb;
678 	cpu_t *cp;
679 	kthread_t *t;
680 	proc_t *p;
681 	struct as *as;
682 	struct hat *hat;
683 	uint_t  cnum;
684 	struct tsb_info *tsbinfop;
685 	struct tsbe *tsbep;
686 	caddr_t tsbp;
687 	caddr_t end_tsbp;
688 	uint64_t paddr;
689 	uint64_t idsr;
690 	u_longlong_t pahi, palo;
691 	int pages_claimed = 0;
692 	tte_t tsbe_tte;
693 	int tried_kernel_tsb = 0;
694 	mmu_ctx_t *mmu_ctxp;
695 
696 	CHEETAH_LIVELOCK_STAT(proc_entry);
697 
698 	if (on_fault(&ljb)) {
699 		CHEETAH_LIVELOCK_STAT(proc_onflt);
700 		goto badstruct;
701 	}
702 
703 	if ((cp = cpu[cpuid]) == NULL) {
704 		CHEETAH_LIVELOCK_STAT(proc_cpu);
705 		goto badstruct;
706 	}
707 
708 	if ((t = cp->cpu_thread) == NULL) {
709 		CHEETAH_LIVELOCK_STAT(proc_thread);
710 		goto badstruct;
711 	}
712 
713 	if ((p = ttoproc(t)) == NULL) {
714 		CHEETAH_LIVELOCK_STAT(proc_proc);
715 		goto badstruct;
716 	}
717 
718 	if ((as = p->p_as) == NULL) {
719 		CHEETAH_LIVELOCK_STAT(proc_as);
720 		goto badstruct;
721 	}
722 
723 	if ((hat = as->a_hat) == NULL) {
724 		CHEETAH_LIVELOCK_STAT(proc_hat);
725 		goto badstruct;
726 	}
727 
728 	if (hat != ksfmmup) {
729 		CHEETAH_LIVELOCK_STAT(proc_user);
730 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
731 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
732 			goto badstruct;
733 		}
734 		tsbinfop = hat->sfmmu_tsb;
735 		if (tsbinfop == NULL) {
736 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
737 			goto badstruct;
738 		}
739 		tsbp = tsbinfop->tsb_va;
740 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
741 	} else {
742 		CHEETAH_LIVELOCK_STAT(proc_kernel);
743 		tsbinfop = NULL;
744 		tsbp = ktsb_base;
745 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
746 	}
747 
748 	/* Verify as */
749 	if (hat->sfmmu_as != as) {
750 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
751 		goto badstruct;
752 	}
753 
754 	mmu_ctxp = CPU_MMU_CTXP(cp);
755 	ASSERT(mmu_ctxp);
756 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
757 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
758 
759 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
760 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
761 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
762 		goto badstruct;
763 	}
764 
765 	do {
766 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
767 
768 		/*
769 		 * Skip TSBs being relocated.  This is important because
770 		 * we want to avoid the following deadlock scenario:
771 		 *
772 		 * 1) when we came in we set ourselves to "in recover" state.
773 		 * 2) when we try to touch TSB being relocated the mapping
774 		 *    will be in the suspended state so we'll spin waiting
775 		 *    for it to be unlocked.
776 		 * 3) when the CPU that holds the TSB mapping locked tries to
777 		 *    unlock it it will send a xtrap which will fail to xcall
778 		 *    us or the CPU we're trying to recover, and will in turn
779 		 *    enter the mondo code.
780 		 * 4) since we are still spinning on the locked mapping
781 		 *    no further progress will be made and the system will
782 		 *    inevitably hard hang.
783 		 *
784 		 * A TSB not being relocated can't begin being relocated
785 		 * while we're accessing it because we check
786 		 * sendmondo_in_recover before relocating TSBs.
787 		 */
788 		if (hat != ksfmmup &&
789 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
790 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
791 			goto next_tsbinfo;
792 		}
793 
794 		for (tsbep = (struct tsbe *)tsbp;
795 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
796 			tsbe_tte = tsbep->tte_data;
797 
798 			if (tsbe_tte.tte_val == 0) {
799 				/*
800 				 * Invalid tte
801 				 */
802 				continue;
803 			}
804 			if (tsbe_tte.tte_se) {
805 				/*
806 				 * Don't want device registers
807 				 */
808 				continue;
809 			}
810 			if (tsbe_tte.tte_cp == 0) {
811 				/*
812 				 * Must be cached in E$
813 				 */
814 				continue;
815 			}
816 			if (tsbep->tte_tag.tag_invalid != 0) {
817 				/*
818 				 * Invalid tag, ingnore this entry.
819 				 */
820 				continue;
821 			}
822 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
823 			idsr = getidsr();
824 			if ((idsr & (IDSR_NACK_BIT(bn) |
825 			    IDSR_BUSY_BIT(bn))) == 0) {
826 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
827 				goto done;
828 			}
829 			pahi = tsbe_tte.tte_pahi;
830 			palo = tsbe_tte.tte_palo;
831 			paddr = (uint64_t)((pahi << 32) |
832 			    (palo << MMU_PAGESHIFT));
833 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
834 			    CH_ECACHE_SUBBLK_SIZE);
835 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
836 				shipit(cpuid, bn);
837 			}
838 			pages_claimed++;
839 		}
840 next_tsbinfo:
841 		if (tsbinfop != NULL)
842 			tsbinfop = tsbinfop->tsb_next;
843 		if (tsbinfop != NULL) {
844 			tsbp = tsbinfop->tsb_va;
845 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
846 		} else if (tsbp == ktsb_base) {
847 			tried_kernel_tsb = 1;
848 		} else if (!tried_kernel_tsb) {
849 			tsbp = ktsb_base;
850 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
851 			hat = ksfmmup;
852 			tsbinfop = NULL;
853 		}
854 	} while (tsbinfop != NULL ||
855 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
856 
857 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
858 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
859 	no_fault();
860 	idsr = getidsr();
861 	if ((idsr & (IDSR_NACK_BIT(bn) |
862 	    IDSR_BUSY_BIT(bn))) == 0) {
863 		return (1);
864 	} else {
865 		return (0);
866 	}
867 
868 done:
869 	no_fault();
870 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
871 	return (1);
872 
873 badstruct:
874 	no_fault();
875 	return (0);
876 }
877 
878 /*
879  * Attempt to claim ownership, temporarily, of every cache line that a
880  * non-responsive cpu might be using.  This might kick that cpu out of
881  * this state.
882  *
883  * The return value indicates to the caller if we have exhausted all recovery
884  * techniques. If 1 is returned, it is useless to call this function again
885  * even for a different target CPU.
886  */
887 int
888 mondo_recover(uint16_t cpuid, int bn)
889 {
890 	struct memseg *seg;
891 	uint64_t begin_pa, end_pa, cur_pa;
892 	hrtime_t begin_hrt, end_hrt;
893 	int retval = 0;
894 	int pages_claimed = 0;
895 	cheetah_livelock_entry_t *histp;
896 	uint64_t idsr;
897 
898 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
899 		/*
900 		 * Wait while recovery takes place
901 		 */
902 		while (sendmondo_in_recover) {
903 			drv_usecwait(1);
904 		}
905 		/*
906 		 * Assume we didn't claim the whole memory. If
907 		 * the target of this caller is not recovered,
908 		 * it will come back.
909 		 */
910 		return (retval);
911 	}
912 
913 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
914 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
915 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
916 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
917 
918 	begin_hrt = gethrtime_waitfree();
919 	/*
920 	 * First try to claim the lines in the TSB the target
921 	 * may have been using.
922 	 */
923 	if (mondo_recover_proc(cpuid, bn) == 1) {
924 		/*
925 		 * Didn't claim the whole memory
926 		 */
927 		goto done;
928 	}
929 
930 	/*
931 	 * We tried using the TSB. The target is still
932 	 * not recovered. Check if complete memory scan is
933 	 * enabled.
934 	 */
935 	if (cheetah_sendmondo_fullscan == 0) {
936 		/*
937 		 * Full memory scan is disabled.
938 		 */
939 		retval = 1;
940 		goto done;
941 	}
942 
943 	/*
944 	 * Try claiming the whole memory.
945 	 */
946 	for (seg = memsegs; seg; seg = seg->next) {
947 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
948 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
949 		for (cur_pa = begin_pa; cur_pa < end_pa;
950 		    cur_pa += MMU_PAGESIZE) {
951 			idsr = getidsr();
952 			if ((idsr & (IDSR_NACK_BIT(bn) |
953 			    IDSR_BUSY_BIT(bn))) == 0) {
954 				/*
955 				 * Didn't claim all memory
956 				 */
957 				goto done;
958 			}
959 			claimlines(cur_pa, MMU_PAGESIZE,
960 			    CH_ECACHE_SUBBLK_SIZE);
961 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
962 				shipit(cpuid, bn);
963 			}
964 			pages_claimed++;
965 		}
966 	}
967 
968 	/*
969 	 * We did all we could.
970 	 */
971 	retval = 1;
972 
973 done:
974 	/*
975 	 * Update statistics
976 	 */
977 	end_hrt = gethrtime_waitfree();
978 	CHEETAH_LIVELOCK_STAT(recovery);
979 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
980 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
981 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
982 	    (end_hrt -  begin_hrt));
983 
984 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
985 		;
986 
987 	return (retval);
988 }
989 
990 /*
991  * This is called by the cyclic framework when this CPU becomes online
992  */
993 /*ARGSUSED*/
994 static void
995 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
996 {
997 
998 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
999 	hdlr->cyh_level = CY_LOW_LEVEL;
1000 	hdlr->cyh_arg = NULL;
1001 
1002 	/*
1003 	 * Stagger the start time
1004 	 */
1005 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1006 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1007 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1008 	}
1009 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1010 }
1011 
1012 /*
1013  * Create a low level cyclic to send a xtrap to the next cpu online.
1014  * However, there's no need to have this running on a uniprocessor system.
1015  */
1016 static void
1017 cheetah_nudge_init(void)
1018 {
1019 	cyc_omni_handler_t hdlr;
1020 
1021 	if (max_ncpus == 1) {
1022 		return;
1023 	}
1024 
1025 	hdlr.cyo_online = cheetah_nudge_onln;
1026 	hdlr.cyo_offline = NULL;
1027 	hdlr.cyo_arg = NULL;
1028 
1029 	mutex_enter(&cpu_lock);
1030 	(void) cyclic_add_omni(&hdlr);
1031 	mutex_exit(&cpu_lock);
1032 }
1033 
1034 /*
1035  * Cyclic handler to wake up buddy
1036  */
1037 void
1038 cheetah_nudge_buddy(void)
1039 {
1040 	/*
1041 	 * Disable kernel preemption to protect the cpu list
1042 	 */
1043 	kpreempt_disable();
1044 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1045 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1046 		    0, 0);
1047 	}
1048 	kpreempt_enable();
1049 }
1050 
1051 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1052 
1053 #ifdef SEND_MONDO_STATS
1054 uint32_t x_one_stimes[64];
1055 uint32_t x_one_ltimes[16];
1056 uint32_t x_set_stimes[64];
1057 uint32_t x_set_ltimes[16];
1058 uint32_t x_set_cpus[NCPU];
1059 uint32_t x_nack_stimes[64];
1060 #endif
1061 
1062 /*
1063  * Note: A version of this function is used by the debugger via the KDI,
1064  * and must be kept in sync with this version.  Any changes made to this
1065  * function to support new chips or to accomodate errata must also be included
1066  * in the KDI-specific version.  See us3_kdi.c.
1067  */
1068 void
1069 send_one_mondo(int cpuid)
1070 {
1071 	int busy, nack;
1072 	uint64_t idsr, starttick, endtick, tick, lasttick;
1073 	uint64_t busymask;
1074 #ifdef	CHEETAHPLUS_ERRATUM_25
1075 	int recovered = 0;
1076 #endif
1077 
1078 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1079 	starttick = lasttick = gettick();
1080 	shipit(cpuid, 0);
1081 	endtick = starttick + xc_tick_limit;
1082 	busy = nack = 0;
1083 #if defined(JALAPENO) || defined(SERRANO)
1084 	/*
1085 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1086 	 * will be used for dispatching interrupt. For now, assume
1087 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1088 	 * issues with respect to BUSY/NACK pair usage.
1089 	 */
1090 	busymask  = IDSR_BUSY_BIT(cpuid);
1091 #else /* JALAPENO || SERRANO */
1092 	busymask = IDSR_BUSY;
1093 #endif /* JALAPENO || SERRANO */
1094 	for (;;) {
1095 		idsr = getidsr();
1096 		if (idsr == 0)
1097 			break;
1098 
1099 		tick = gettick();
1100 		/*
1101 		 * If there is a big jump between the current tick
1102 		 * count and lasttick, we have probably hit a break
1103 		 * point.  Adjust endtick accordingly to avoid panic.
1104 		 */
1105 		if (tick > (lasttick + xc_tick_jump_limit))
1106 			endtick += (tick - lasttick);
1107 		lasttick = tick;
1108 		if (tick > endtick) {
1109 			if (panic_quiesce)
1110 				return;
1111 #ifdef	CHEETAHPLUS_ERRATUM_25
1112 			if (cheetah_sendmondo_recover && recovered == 0) {
1113 				if (mondo_recover(cpuid, 0)) {
1114 					/*
1115 					 * We claimed the whole memory or
1116 					 * full scan is disabled.
1117 					 */
1118 					recovered++;
1119 				}
1120 				tick = gettick();
1121 				endtick = tick + xc_tick_limit;
1122 				lasttick = tick;
1123 				/*
1124 				 * Recheck idsr
1125 				 */
1126 				continue;
1127 			} else
1128 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1129 			{
1130 				cmn_err(CE_PANIC, "send mondo timeout "
1131 				    "(target 0x%x) [%d NACK %d BUSY]",
1132 				    cpuid, nack, busy);
1133 			}
1134 		}
1135 
1136 		if (idsr & busymask) {
1137 			busy++;
1138 			continue;
1139 		}
1140 		drv_usecwait(1);
1141 		shipit(cpuid, 0);
1142 		nack++;
1143 		busy = 0;
1144 	}
1145 #ifdef SEND_MONDO_STATS
1146 	{
1147 		int n = gettick() - starttick;
1148 		if (n < 8192)
1149 			x_one_stimes[n >> 7]++;
1150 		else
1151 			x_one_ltimes[(n >> 13) & 0xf]++;
1152 	}
1153 #endif
1154 }
1155 
1156 void
1157 syncfpu(void)
1158 {
1159 }
1160 
1161 /*
1162  * Return processor specific async error structure
1163  * size used.
1164  */
1165 int
1166 cpu_aflt_size(void)
1167 {
1168 	return (sizeof (ch_async_flt_t));
1169 }
1170 
1171 /*
1172  * Tunable to disable the checking of other cpu logout areas during panic for
1173  * potential syndrome 71 generating errors.
1174  */
1175 int enable_check_other_cpus_logout = 1;
1176 
1177 /*
1178  * Check other cpus logout area for potential synd 71 generating
1179  * errors.
1180  */
1181 static void
1182 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1183     ch_cpu_logout_t *clop)
1184 {
1185 	struct async_flt *aflt;
1186 	ch_async_flt_t ch_flt;
1187 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1188 
1189 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1190 		return;
1191 	}
1192 
1193 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1194 
1195 	t_afar = clop->clo_data.chd_afar;
1196 	t_afsr = clop->clo_data.chd_afsr;
1197 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1198 #if defined(SERRANO)
1199 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1200 #endif	/* SERRANO */
1201 
1202 	/*
1203 	 * In order to simplify code, we maintain this afsr_errs
1204 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1205 	 * sticky bits.
1206 	 */
1207 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1208 	    (t_afsr & C_AFSR_ALL_ERRS);
1209 
1210 	/* Setup the async fault structure */
1211 	aflt = (struct async_flt *)&ch_flt;
1212 	aflt->flt_id = gethrtime_waitfree();
1213 	ch_flt.afsr_ext = t_afsr_ext;
1214 	ch_flt.afsr_errs = t_afsr_errs;
1215 	aflt->flt_stat = t_afsr;
1216 	aflt->flt_addr = t_afar;
1217 	aflt->flt_bus_id = cpuid;
1218 	aflt->flt_inst = cpuid;
1219 	aflt->flt_pc = tpc;
1220 	aflt->flt_prot = AFLT_PROT_NONE;
1221 	aflt->flt_class = CPU_FAULT;
1222 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1223 	aflt->flt_tl = tl;
1224 	aflt->flt_status = ecc_type;
1225 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1226 
1227 	/*
1228 	 * Queue events on the async event queue, one event per error bit.
1229 	 * If no events are queued, queue an event to complain.
1230 	 */
1231 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1232 		ch_flt.flt_type = CPU_INV_AFSR;
1233 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1234 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1235 		    aflt->flt_panic);
1236 	}
1237 
1238 	/*
1239 	 * Zero out + invalidate CPU logout.
1240 	 */
1241 	bzero(clop, sizeof (ch_cpu_logout_t));
1242 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1243 }
1244 
1245 /*
1246  * Check the logout areas of all other cpus for unlogged errors.
1247  */
1248 static void
1249 cpu_check_other_cpus_logout(void)
1250 {
1251 	int i, j;
1252 	processorid_t myid;
1253 	struct cpu *cp;
1254 	ch_err_tl1_data_t *cl1p;
1255 
1256 	myid = CPU->cpu_id;
1257 	for (i = 0; i < NCPU; i++) {
1258 		cp = cpu[i];
1259 
1260 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1261 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1262 			continue;
1263 		}
1264 
1265 		/*
1266 		 * Check each of the tl>0 logout areas
1267 		 */
1268 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1269 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1270 			if (cl1p->ch_err_tl1_flags == 0)
1271 				continue;
1272 
1273 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1274 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1275 		}
1276 
1277 		/*
1278 		 * Check each of the remaining logout areas
1279 		 */
1280 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1281 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1282 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1283 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1284 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1285 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1286 	}
1287 }
1288 
1289 /*
1290  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1291  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1292  * flush the error that caused the UCU/UCC, then again here at the end to
1293  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1294  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1295  * another Fast ECC trap.
1296  *
1297  * Cheetah+ also handles: TSCE: No additional processing required.
1298  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1299  *
1300  * Note that the p_clo_flags input is only valid in cases where the
1301  * cpu_private struct is not yet initialized (since that is the only
1302  * time that information cannot be obtained from the logout struct.)
1303  */
1304 /*ARGSUSED*/
1305 void
1306 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1307 {
1308 	ch_cpu_logout_t *clop;
1309 	uint64_t ceen, nceen;
1310 
1311 	/*
1312 	 * Get the CPU log out info. If we can't find our CPU private
1313 	 * pointer, then we will have to make due without any detailed
1314 	 * logout information.
1315 	 */
1316 	if (CPU_PRIVATE(CPU) == NULL) {
1317 		clop = NULL;
1318 		ceen = p_clo_flags & EN_REG_CEEN;
1319 		nceen = p_clo_flags & EN_REG_NCEEN;
1320 	} else {
1321 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1322 		ceen = clop->clo_flags & EN_REG_CEEN;
1323 		nceen = clop->clo_flags & EN_REG_NCEEN;
1324 	}
1325 
1326 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1327 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1328 }
1329 
1330 /*
1331  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1332  * ECC at TL>0.  Need to supply either a error register pointer or a
1333  * cpu logout structure pointer.
1334  */
1335 static void
1336 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1337     uint64_t nceen, ch_cpu_logout_t *clop)
1338 {
1339 	struct async_flt *aflt;
1340 	ch_async_flt_t ch_flt;
1341 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1342 	char pr_reason[MAX_REASON_STRING];
1343 	ch_cpu_errors_t cpu_error_regs;
1344 
1345 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1346 	/*
1347 	 * If no cpu logout data, then we will have to make due without
1348 	 * any detailed logout information.
1349 	 */
1350 	if (clop == NULL) {
1351 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1352 		get_cpu_error_state(&cpu_error_regs);
1353 		set_cpu_error_state(&cpu_error_regs);
1354 		t_afar = cpu_error_regs.afar;
1355 		t_afsr = cpu_error_regs.afsr;
1356 		t_afsr_ext = cpu_error_regs.afsr_ext;
1357 #if defined(SERRANO)
1358 		ch_flt.afar2 = cpu_error_regs.afar2;
1359 #endif	/* SERRANO */
1360 	} else {
1361 		t_afar = clop->clo_data.chd_afar;
1362 		t_afsr = clop->clo_data.chd_afsr;
1363 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1364 #if defined(SERRANO)
1365 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1366 #endif	/* SERRANO */
1367 	}
1368 
1369 	/*
1370 	 * In order to simplify code, we maintain this afsr_errs
1371 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1372 	 * sticky bits.
1373 	 */
1374 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1375 	    (t_afsr & C_AFSR_ALL_ERRS);
1376 	pr_reason[0] = '\0';
1377 
1378 	/* Setup the async fault structure */
1379 	aflt = (struct async_flt *)&ch_flt;
1380 	aflt->flt_id = gethrtime_waitfree();
1381 	ch_flt.afsr_ext = t_afsr_ext;
1382 	ch_flt.afsr_errs = t_afsr_errs;
1383 	aflt->flt_stat = t_afsr;
1384 	aflt->flt_addr = t_afar;
1385 	aflt->flt_bus_id = getprocessorid();
1386 	aflt->flt_inst = CPU->cpu_id;
1387 	aflt->flt_pc = tpc;
1388 	aflt->flt_prot = AFLT_PROT_NONE;
1389 	aflt->flt_class = CPU_FAULT;
1390 	aflt->flt_priv = priv;
1391 	aflt->flt_tl = tl;
1392 	aflt->flt_status = ECC_F_TRAP;
1393 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1394 
1395 	/*
1396 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1397 	 * cmn_err messages out to the console.  The situation is a UCU (in
1398 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1399 	 * The messages for the UCU and WDU are enqueued and then pulled off
1400 	 * the async queue via softint and syslogd starts to process them
1401 	 * but doesn't get them to the console.  The UE causes a panic, but
1402 	 * since the UCU/WDU messages are already in transit, those aren't
1403 	 * on the async queue.  The hack is to check if we have a matching
1404 	 * WDU event for the UCU, and if it matches, we're more than likely
1405 	 * going to panic with a UE, unless we're under protection.  So, we
1406 	 * check to see if we got a matching WDU event and if we're under
1407 	 * protection.
1408 	 *
1409 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1410 	 * looks like this:
1411 	 *    UCU->WDU->UE
1412 	 * For Panther, it could look like either of these:
1413 	 *    UCU---->WDU->L3_WDU->UE
1414 	 *    L3_UCU->WDU->L3_WDU->UE
1415 	 */
1416 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1417 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1418 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1419 		get_cpu_error_state(&cpu_error_regs);
1420 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1421 			aflt->flt_panic |=
1422 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1423 			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1424 			    (cpu_error_regs.afar == t_afar));
1425 			aflt->flt_panic |= ((clop == NULL) &&
1426 			    (t_afsr_errs & C_AFSR_WDU) &&
1427 			    (t_afsr_errs & C_AFSR_L3_WDU));
1428 		} else {
1429 			aflt->flt_panic |=
1430 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1431 			    (cpu_error_regs.afar == t_afar));
1432 			aflt->flt_panic |= ((clop == NULL) &&
1433 			    (t_afsr_errs & C_AFSR_WDU));
1434 		}
1435 	}
1436 
1437 	/*
1438 	 * Queue events on the async event queue, one event per error bit.
1439 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1440 	 * queue an event to complain.
1441 	 */
1442 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1443 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1444 		ch_flt.flt_type = CPU_INV_AFSR;
1445 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1446 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1447 		    aflt->flt_panic);
1448 	}
1449 
1450 	/*
1451 	 * Zero out + invalidate CPU logout.
1452 	 */
1453 	if (clop) {
1454 		bzero(clop, sizeof (ch_cpu_logout_t));
1455 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1456 	}
1457 
1458 	/*
1459 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1460 	 * or disrupting errors have happened.  We do this because if a
1461 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1462 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1463 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1464 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1465 	 * deferred or disrupting error happening between checking the AFSR and
1466 	 * enabling NCEEN/CEEN.
1467 	 *
1468 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1469 	 * taken.
1470 	 */
1471 	set_error_enable(get_error_enable() | (nceen | ceen));
1472 	if (clear_errors(&ch_flt)) {
1473 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1474 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1475 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1476 		    NULL);
1477 	}
1478 
1479 	/*
1480 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1481 	 * be logged as part of the panic flow.
1482 	 */
1483 	if (aflt->flt_panic)
1484 		fm_panic("%sError(s)", pr_reason);
1485 
1486 	/*
1487 	 * Flushing the Ecache here gets the part of the trap handler that
1488 	 * is run at TL=1 out of the Ecache.
1489 	 */
1490 	cpu_flush_ecache();
1491 }
1492 
1493 /*
1494  * This is called via sys_trap from pil15_interrupt code if the
1495  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1496  * various ch_err_tl1_data structures for valid entries based on the bit
1497  * settings in the ch_err_tl1_flags entry of the structure.
1498  */
1499 /*ARGSUSED*/
1500 void
1501 cpu_tl1_error(struct regs *rp, int panic)
1502 {
1503 	ch_err_tl1_data_t *cl1p, cl1;
1504 	int i, ncl1ps;
1505 	uint64_t me_flags;
1506 	uint64_t ceen, nceen;
1507 
1508 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1509 		cl1p = &ch_err_tl1_data;
1510 		ncl1ps = 1;
1511 	} else if (CPU_PRIVATE(CPU) != NULL) {
1512 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1513 		ncl1ps = CH_ERR_TL1_TLMAX;
1514 	} else {
1515 		ncl1ps = 0;
1516 	}
1517 
1518 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1519 		if (cl1p->ch_err_tl1_flags == 0)
1520 			continue;
1521 
1522 		/*
1523 		 * Grab a copy of the logout data and invalidate
1524 		 * the logout area.
1525 		 */
1526 		cl1 = *cl1p;
1527 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1528 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1529 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1530 
1531 		/*
1532 		 * Log "first error" in ch_err_tl1_data.
1533 		 */
1534 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1535 			ceen = get_error_enable() & EN_REG_CEEN;
1536 			nceen = get_error_enable() & EN_REG_NCEEN;
1537 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1538 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1539 		}
1540 #if defined(CPU_IMP_L1_CACHE_PARITY)
1541 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1542 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1543 			    (caddr_t)cl1.ch_err_tl1_tpc);
1544 		}
1545 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1546 
1547 		/*
1548 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1549 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1550 		 * if the structure is busy, we just do the cache flushing
1551 		 * we have to do and then do the retry.  So the AFSR/AFAR
1552 		 * at this point *should* have some relevant info.  If there
1553 		 * are no valid errors in the AFSR, we'll assume they've
1554 		 * already been picked up and logged.  For I$/D$ parity,
1555 		 * we just log an event with an "Unknown" (NULL) TPC.
1556 		 */
1557 		if (me_flags & CH_ERR_FECC) {
1558 			ch_cpu_errors_t cpu_error_regs;
1559 			uint64_t t_afsr_errs;
1560 
1561 			/*
1562 			 * Get the error registers and see if there's
1563 			 * a pending error.  If not, don't bother
1564 			 * generating an "Invalid AFSR" error event.
1565 			 */
1566 			get_cpu_error_state(&cpu_error_regs);
1567 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1568 			    C_AFSR_EXT_ALL_ERRS) |
1569 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1570 			if (t_afsr_errs != 0) {
1571 				ceen = get_error_enable() & EN_REG_CEEN;
1572 				nceen = get_error_enable() & EN_REG_NCEEN;
1573 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1574 				    1, ceen, nceen, NULL);
1575 			}
1576 		}
1577 #if defined(CPU_IMP_L1_CACHE_PARITY)
1578 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1579 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1580 		}
1581 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1582 	}
1583 }
1584 
1585 /*
1586  * Called from Fast ECC TL>0 handler in case of fatal error.
1587  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1588  * but if we don't, we'll panic with something reasonable.
1589  */
1590 /*ARGSUSED*/
1591 void
1592 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1593 {
1594 	cpu_tl1_error(rp, 1);
1595 	/*
1596 	 * Should never return, but just in case.
1597 	 */
1598 	fm_panic("Unsurvivable ECC Error at TL>0");
1599 }
1600 
1601 /*
1602  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1603  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1604  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1605  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1606  *
1607  * Cheetah+ also handles (No additional processing required):
1608  *    DUE, DTO, DBERR	(NCEEN controlled)
1609  *    THCE		(CEEN and ET_ECC_en controlled)
1610  *    TUE		(ET_ECC_en controlled)
1611  *
1612  * Panther further adds:
1613  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1614  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1615  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1616  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1617  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1618  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1619  *
1620  * Note that the p_clo_flags input is only valid in cases where the
1621  * cpu_private struct is not yet initialized (since that is the only
1622  * time that information cannot be obtained from the logout struct.)
1623  */
1624 /*ARGSUSED*/
1625 void
1626 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1627 {
1628 	struct async_flt *aflt;
1629 	ch_async_flt_t ch_flt;
1630 	char pr_reason[MAX_REASON_STRING];
1631 	ch_cpu_logout_t *clop;
1632 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1633 	ch_cpu_errors_t cpu_error_regs;
1634 
1635 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1636 	/*
1637 	 * Get the CPU log out info. If we can't find our CPU private
1638 	 * pointer, then we will have to make due without any detailed
1639 	 * logout information.
1640 	 */
1641 	if (CPU_PRIVATE(CPU) == NULL) {
1642 		clop = NULL;
1643 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1644 		get_cpu_error_state(&cpu_error_regs);
1645 		set_cpu_error_state(&cpu_error_regs);
1646 		t_afar = cpu_error_regs.afar;
1647 		t_afsr = cpu_error_regs.afsr;
1648 		t_afsr_ext = cpu_error_regs.afsr_ext;
1649 #if defined(SERRANO)
1650 		ch_flt.afar2 = cpu_error_regs.afar2;
1651 #endif	/* SERRANO */
1652 	} else {
1653 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1654 		t_afar = clop->clo_data.chd_afar;
1655 		t_afsr = clop->clo_data.chd_afsr;
1656 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1657 #if defined(SERRANO)
1658 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1659 #endif	/* SERRANO */
1660 	}
1661 
1662 	/*
1663 	 * In order to simplify code, we maintain this afsr_errs
1664 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1665 	 * sticky bits.
1666 	 */
1667 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1668 	    (t_afsr & C_AFSR_ALL_ERRS);
1669 
1670 	pr_reason[0] = '\0';
1671 	/* Setup the async fault structure */
1672 	aflt = (struct async_flt *)&ch_flt;
1673 	ch_flt.afsr_ext = t_afsr_ext;
1674 	ch_flt.afsr_errs = t_afsr_errs;
1675 	aflt->flt_stat = t_afsr;
1676 	aflt->flt_addr = t_afar;
1677 	aflt->flt_pc = (caddr_t)rp->r_pc;
1678 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1679 	aflt->flt_tl = 0;
1680 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1681 
1682 	/*
1683 	 * If this trap is a result of one of the errors not masked
1684 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1685 	 * indicate that a timeout is to be set later.
1686 	 */
1687 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1688 	    !aflt->flt_panic)
1689 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1690 	else
1691 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1692 
1693 	/*
1694 	 * log the CE and clean up
1695 	 */
1696 	cpu_log_and_clear_ce(&ch_flt);
1697 
1698 	/*
1699 	 * We re-enable CEEN (if required) and check if any disrupting errors
1700 	 * have happened.  We do this because if a disrupting error had occurred
1701 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1702 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1703 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1704 	 * of a error happening between checking the AFSR and enabling CEEN.
1705 	 */
1706 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1707 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1708 	if (clear_errors(&ch_flt)) {
1709 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1710 		    NULL);
1711 	}
1712 
1713 	/*
1714 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1715 	 * be logged as part of the panic flow.
1716 	 */
1717 	if (aflt->flt_panic)
1718 		fm_panic("%sError(s)", pr_reason);
1719 }
1720 
1721 /*
1722  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1723  * L3_EDU:BLD, TO, and BERR events.
1724  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1725  *
1726  * Cheetah+: No additional errors handled.
1727  *
1728  * Note that the p_clo_flags input is only valid in cases where the
1729  * cpu_private struct is not yet initialized (since that is the only
1730  * time that information cannot be obtained from the logout struct.)
1731  */
1732 /*ARGSUSED*/
1733 void
1734 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1735 {
1736 	ushort_t ttype, tl;
1737 	ch_async_flt_t ch_flt;
1738 	struct async_flt *aflt;
1739 	int trampolined = 0;
1740 	char pr_reason[MAX_REASON_STRING];
1741 	ch_cpu_logout_t *clop;
1742 	uint64_t ceen, clo_flags;
1743 	uint64_t log_afsr;
1744 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1745 	ch_cpu_errors_t cpu_error_regs;
1746 	int expected = DDI_FM_ERR_UNEXPECTED;
1747 	ddi_acc_hdl_t *hp;
1748 
1749 	/*
1750 	 * We need to look at p_flag to determine if the thread detected an
1751 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1752 	 * because we just need a consistent snapshot and we know that everyone
1753 	 * else will store a consistent set of bits while holding p_lock.  We
1754 	 * don't have to worry about a race because SDOCORE is set once prior
1755 	 * to doing i/o from the process's address space and is never cleared.
1756 	 */
1757 	uint_t pflag = ttoproc(curthread)->p_flag;
1758 
1759 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1760 	/*
1761 	 * Get the CPU log out info. If we can't find our CPU private
1762 	 * pointer then we will have to make due without any detailed
1763 	 * logout information.
1764 	 */
1765 	if (CPU_PRIVATE(CPU) == NULL) {
1766 		clop = NULL;
1767 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1768 		get_cpu_error_state(&cpu_error_regs);
1769 		set_cpu_error_state(&cpu_error_regs);
1770 		t_afar = cpu_error_regs.afar;
1771 		t_afsr = cpu_error_regs.afsr;
1772 		t_afsr_ext = cpu_error_regs.afsr_ext;
1773 #if defined(SERRANO)
1774 		ch_flt.afar2 = cpu_error_regs.afar2;
1775 #endif	/* SERRANO */
1776 		clo_flags = p_clo_flags;
1777 	} else {
1778 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1779 		t_afar = clop->clo_data.chd_afar;
1780 		t_afsr = clop->clo_data.chd_afsr;
1781 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1782 #if defined(SERRANO)
1783 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1784 #endif	/* SERRANO */
1785 		clo_flags = clop->clo_flags;
1786 	}
1787 
1788 	/*
1789 	 * In order to simplify code, we maintain this afsr_errs
1790 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1791 	 * sticky bits.
1792 	 */
1793 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1794 	    (t_afsr & C_AFSR_ALL_ERRS);
1795 	pr_reason[0] = '\0';
1796 
1797 	/*
1798 	 * Grab information encoded into our clo_flags field.
1799 	 */
1800 	ceen = clo_flags & EN_REG_CEEN;
1801 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1802 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1803 
1804 	/*
1805 	 * handle the specific error
1806 	 */
1807 	aflt = (struct async_flt *)&ch_flt;
1808 	aflt->flt_id = gethrtime_waitfree();
1809 	aflt->flt_bus_id = getprocessorid();
1810 	aflt->flt_inst = CPU->cpu_id;
1811 	ch_flt.afsr_ext = t_afsr_ext;
1812 	ch_flt.afsr_errs = t_afsr_errs;
1813 	aflt->flt_stat = t_afsr;
1814 	aflt->flt_addr = t_afar;
1815 	aflt->flt_pc = (caddr_t)rp->r_pc;
1816 	aflt->flt_prot = AFLT_PROT_NONE;
1817 	aflt->flt_class = CPU_FAULT;
1818 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1819 	aflt->flt_tl = (uchar_t)tl;
1820 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1821 	    C_AFSR_PANIC(t_afsr_errs));
1822 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1823 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1824 
1825 	/*
1826 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1827 	 * see if we were executing in the kernel under on_trap() or t_lofault
1828 	 * protection.  If so, modify the saved registers so that we return
1829 	 * from the trap to the appropriate trampoline routine.
1830 	 */
1831 	if (aflt->flt_priv && tl == 0) {
1832 		if (curthread->t_ontrap != NULL) {
1833 			on_trap_data_t *otp = curthread->t_ontrap;
1834 
1835 			if (otp->ot_prot & OT_DATA_EC) {
1836 				aflt->flt_prot = AFLT_PROT_EC;
1837 				otp->ot_trap |= OT_DATA_EC;
1838 				rp->r_pc = otp->ot_trampoline;
1839 				rp->r_npc = rp->r_pc + 4;
1840 				trampolined = 1;
1841 			}
1842 
1843 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1844 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1845 				aflt->flt_prot = AFLT_PROT_ACCESS;
1846 				otp->ot_trap |= OT_DATA_ACCESS;
1847 				rp->r_pc = otp->ot_trampoline;
1848 				rp->r_npc = rp->r_pc + 4;
1849 				trampolined = 1;
1850 				/*
1851 				 * for peeks and caut_gets errors are expected
1852 				 */
1853 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1854 				if (!hp)
1855 					expected = DDI_FM_ERR_PEEK;
1856 				else if (hp->ah_acc.devacc_attr_access ==
1857 				    DDI_CAUTIOUS_ACC)
1858 					expected = DDI_FM_ERR_EXPECTED;
1859 			}
1860 
1861 		} else if (curthread->t_lofault) {
1862 			aflt->flt_prot = AFLT_PROT_COPY;
1863 			rp->r_g1 = EFAULT;
1864 			rp->r_pc = curthread->t_lofault;
1865 			rp->r_npc = rp->r_pc + 4;
1866 			trampolined = 1;
1867 		}
1868 	}
1869 
1870 	/*
1871 	 * If we're in user mode or we're doing a protected copy, we either
1872 	 * want the ASTON code below to send a signal to the user process
1873 	 * or we want to panic if aft_panic is set.
1874 	 *
1875 	 * If we're in privileged mode and we're not doing a copy, then we
1876 	 * need to check if we've trampolined.  If we haven't trampolined,
1877 	 * we should panic.
1878 	 */
1879 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1880 		if (t_afsr_errs &
1881 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1882 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1883 			aflt->flt_panic |= aft_panic;
1884 	} else if (!trampolined) {
1885 			aflt->flt_panic = 1;
1886 	}
1887 
1888 	/*
1889 	 * If we've trampolined due to a privileged TO or BERR, or if an
1890 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1891 	 * event for that TO or BERR.  Queue all other events (if any) besides
1892 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1893 	 * ignore the number of events queued.  If we haven't trampolined due
1894 	 * to a TO or BERR, just enqueue events normally.
1895 	 */
1896 	log_afsr = t_afsr_errs;
1897 	if (trampolined) {
1898 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1899 	} else if (!aflt->flt_priv) {
1900 		/*
1901 		 * User mode, suppress messages if
1902 		 * cpu_berr_to_verbose is not set.
1903 		 */
1904 		if (!cpu_berr_to_verbose)
1905 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1906 	}
1907 
1908 	/*
1909 	 * Log any errors that occurred
1910 	 */
1911 	if (((log_afsr &
1912 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1913 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1914 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1915 		ch_flt.flt_type = CPU_INV_AFSR;
1916 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1917 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1918 		    aflt->flt_panic);
1919 	}
1920 
1921 	/*
1922 	 * Zero out + invalidate CPU logout.
1923 	 */
1924 	if (clop) {
1925 		bzero(clop, sizeof (ch_cpu_logout_t));
1926 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1927 	}
1928 
1929 #if defined(JALAPENO) || defined(SERRANO)
1930 	/*
1931 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1932 	 * IO errors that may have resulted in this trap.
1933 	 */
1934 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1935 		cpu_run_bus_error_handlers(aflt, expected);
1936 	}
1937 
1938 	/*
1939 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1940 	 * line from the Ecache.  We also need to query the bus nexus for
1941 	 * fatal errors.  Attempts to do diagnostic read on caches may
1942 	 * introduce more errors (especially when the module is bad).
1943 	 */
1944 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1945 		/*
1946 		 * Ask our bus nexus friends if they have any fatal errors.  If
1947 		 * so, they will log appropriate error messages.
1948 		 */
1949 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1950 			aflt->flt_panic = 1;
1951 
1952 		/*
1953 		 * We got a UE or RUE and are panicking, save the fault PA in
1954 		 * a known location so that the platform specific panic code
1955 		 * can check for copyback errors.
1956 		 */
1957 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1958 			panic_aflt = *aflt;
1959 		}
1960 	}
1961 
1962 	/*
1963 	 * Flush Ecache line or entire Ecache
1964 	 */
1965 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1966 		cpu_error_ecache_flush(&ch_flt);
1967 #else /* JALAPENO || SERRANO */
1968 	/*
1969 	 * UE/BERR/TO: Call our bus nexus friends to check for
1970 	 * IO errors that may have resulted in this trap.
1971 	 */
1972 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1973 		cpu_run_bus_error_handlers(aflt, expected);
1974 	}
1975 
1976 	/*
1977 	 * UE: If the UE is in memory, we need to flush the bad
1978 	 * line from the Ecache.  We also need to query the bus nexus for
1979 	 * fatal errors.  Attempts to do diagnostic read on caches may
1980 	 * introduce more errors (especially when the module is bad).
1981 	 */
1982 	if (t_afsr & C_AFSR_UE) {
1983 		/*
1984 		 * Ask our legacy bus nexus friends if they have any fatal
1985 		 * errors.  If so, they will log appropriate error messages.
1986 		 */
1987 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1988 			aflt->flt_panic = 1;
1989 
1990 		/*
1991 		 * We got a UE and are panicking, save the fault PA in a known
1992 		 * location so that the platform specific panic code can check
1993 		 * for copyback errors.
1994 		 */
1995 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1996 			panic_aflt = *aflt;
1997 		}
1998 	}
1999 
2000 	/*
2001 	 * Flush Ecache line or entire Ecache
2002 	 */
2003 	if (t_afsr_errs &
2004 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2005 		cpu_error_ecache_flush(&ch_flt);
2006 #endif /* JALAPENO || SERRANO */
2007 
2008 	/*
2009 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2010 	 * or disrupting errors have happened.  We do this because if a
2011 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2012 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2013 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2014 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2015 	 * deferred or disrupting error happening between checking the AFSR and
2016 	 * enabling NCEEN/CEEN.
2017 	 *
2018 	 * Note: CEEN reenabled only if it was on when trap taken.
2019 	 */
2020 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2021 	if (clear_errors(&ch_flt)) {
2022 		/*
2023 		 * Check for secondary errors, and avoid panicking if we
2024 		 * have them
2025 		 */
2026 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2027 		    t_afar) == 0) {
2028 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2029 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2030 		}
2031 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2032 		    NULL);
2033 	}
2034 
2035 	/*
2036 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2037 	 * be logged as part of the panic flow.
2038 	 */
2039 	if (aflt->flt_panic)
2040 		fm_panic("%sError(s)", pr_reason);
2041 
2042 	/*
2043 	 * If we queued an error and we are going to return from the trap and
2044 	 * the error was in user mode or inside of a copy routine, set AST flag
2045 	 * so the queue will be drained before returning to user mode.  The
2046 	 * AST processing will also act on our failure policy.
2047 	 */
2048 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2049 		int pcb_flag = 0;
2050 
2051 		if (t_afsr_errs &
2052 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2053 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2054 			pcb_flag |= ASYNC_HWERR;
2055 
2056 		if (t_afsr & C_AFSR_BERR)
2057 			pcb_flag |= ASYNC_BERR;
2058 
2059 		if (t_afsr & C_AFSR_TO)
2060 			pcb_flag |= ASYNC_BTO;
2061 
2062 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2063 		aston(curthread);
2064 	}
2065 }
2066 
2067 #if defined(CPU_IMP_L1_CACHE_PARITY)
2068 /*
2069  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2070  *
2071  * For Panther, P$ data parity errors during floating point load hits
2072  * are also detected (reported as TT 0x71) and handled by this trap
2073  * handler.
2074  *
2075  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2076  * is available.
2077  */
2078 /*ARGSUSED*/
2079 void
2080 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2081 {
2082 	ch_async_flt_t ch_flt;
2083 	struct async_flt *aflt;
2084 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2085 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2086 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2087 	char *error_class;
2088 
2089 	/*
2090 	 * Log the error.
2091 	 * For icache parity errors the fault address is the trap PC.
2092 	 * For dcache/pcache parity errors the instruction would have to
2093 	 * be decoded to determine the address and that isn't possible
2094 	 * at high PIL.
2095 	 */
2096 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2097 	aflt = (struct async_flt *)&ch_flt;
2098 	aflt->flt_id = gethrtime_waitfree();
2099 	aflt->flt_bus_id = getprocessorid();
2100 	aflt->flt_inst = CPU->cpu_id;
2101 	aflt->flt_pc = tpc;
2102 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2103 	aflt->flt_prot = AFLT_PROT_NONE;
2104 	aflt->flt_class = CPU_FAULT;
2105 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2106 	aflt->flt_tl = tl;
2107 	aflt->flt_panic = panic;
2108 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2109 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2110 
2111 	if (iparity) {
2112 		cpu_icache_parity_info(&ch_flt);
2113 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2114 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2115 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2116 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2117 		else
2118 			error_class = FM_EREPORT_CPU_USIII_IPE;
2119 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2120 	} else {
2121 		cpu_dcache_parity_info(&ch_flt);
2122 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2123 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2124 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2125 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2126 		else
2127 			error_class = FM_EREPORT_CPU_USIII_DPE;
2128 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2129 		/*
2130 		 * For panther we also need to check the P$ for parity errors.
2131 		 */
2132 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2133 			cpu_pcache_parity_info(&ch_flt);
2134 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2135 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2136 				aflt->flt_payload =
2137 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2138 			}
2139 		}
2140 	}
2141 
2142 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2143 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2144 
2145 	if (iparity) {
2146 		/*
2147 		 * Invalidate entire I$.
2148 		 * This is required due to the use of diagnostic ASI
2149 		 * accesses that may result in a loss of I$ coherency.
2150 		 */
2151 		if (cache_boot_state & DCU_IC) {
2152 			flush_icache();
2153 		}
2154 		/*
2155 		 * According to section P.3.1 of the Panther PRM, we
2156 		 * need to do a little more for recovery on those
2157 		 * CPUs after encountering an I$ parity error.
2158 		 */
2159 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2160 			flush_ipb();
2161 			correct_dcache_parity(dcache_size,
2162 			    dcache_linesize);
2163 			flush_pcache();
2164 		}
2165 	} else {
2166 		/*
2167 		 * Since the valid bit is ignored when checking parity the
2168 		 * D$ data and tag must also be corrected.  Set D$ data bits
2169 		 * to zero and set utag to 0, 1, 2, 3.
2170 		 */
2171 		correct_dcache_parity(dcache_size, dcache_linesize);
2172 
2173 		/*
2174 		 * According to section P.3.3 of the Panther PRM, we
2175 		 * need to do a little more for recovery on those
2176 		 * CPUs after encountering a D$ or P$ parity error.
2177 		 *
2178 		 * As far as clearing P$ parity errors, it is enough to
2179 		 * simply invalidate all entries in the P$ since P$ parity
2180 		 * error traps are only generated for floating point load
2181 		 * hits.
2182 		 */
2183 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2184 			flush_icache();
2185 			flush_ipb();
2186 			flush_pcache();
2187 		}
2188 	}
2189 
2190 	/*
2191 	 * Invalidate entire D$ if it was enabled.
2192 	 * This is done to avoid stale data in the D$ which might
2193 	 * occur with the D$ disabled and the trap handler doing
2194 	 * stores affecting lines already in the D$.
2195 	 */
2196 	if (cache_boot_state & DCU_DC) {
2197 		flush_dcache();
2198 	}
2199 
2200 	/*
2201 	 * Restore caches to their bootup state.
2202 	 */
2203 	set_dcu(get_dcu() | cache_boot_state);
2204 
2205 	/*
2206 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2207 	 * be logged as part of the panic flow.
2208 	 */
2209 	if (aflt->flt_panic)
2210 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2211 
2212 	/*
2213 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2214 	 * the chance of getting an unrecoverable Fast ECC error.  This
2215 	 * flush will evict the part of the parity trap handler that is run
2216 	 * at TL>1.
2217 	 */
2218 	if (tl) {
2219 		cpu_flush_ecache();
2220 	}
2221 }
2222 
2223 /*
2224  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2225  * to indicate which portions of the captured data should be in the ereport.
2226  */
2227 void
2228 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2229 {
2230 	int way = ch_flt->parity_data.ipe.cpl_way;
2231 	int offset = ch_flt->parity_data.ipe.cpl_off;
2232 	int tag_index;
2233 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2234 
2235 
2236 	if ((offset != -1) || (way != -1)) {
2237 		/*
2238 		 * Parity error in I$ tag or data
2239 		 */
2240 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2241 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2242 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2243 			    PN_ICIDX_TO_WAY(tag_index);
2244 		else
2245 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2246 			    CH_ICIDX_TO_WAY(tag_index);
2247 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2248 		    IC_LOGFLAG_MAGIC;
2249 	} else {
2250 		/*
2251 		 * Parity error was not identified.
2252 		 * Log tags and data for all ways.
2253 		 */
2254 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2255 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2256 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2257 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2258 				    PN_ICIDX_TO_WAY(tag_index);
2259 			else
2260 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2261 				    CH_ICIDX_TO_WAY(tag_index);
2262 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2263 			    IC_LOGFLAG_MAGIC;
2264 		}
2265 	}
2266 }
2267 
2268 /*
2269  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2270  * to indicate which portions of the captured data should be in the ereport.
2271  */
2272 void
2273 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2274 {
2275 	int way = ch_flt->parity_data.dpe.cpl_way;
2276 	int offset = ch_flt->parity_data.dpe.cpl_off;
2277 	int tag_index;
2278 
2279 	if (offset != -1) {
2280 		/*
2281 		 * Parity error in D$ or P$ data array.
2282 		 *
2283 		 * First check to see whether the parity error is in D$ or P$
2284 		 * since P$ data parity errors are reported in Panther using
2285 		 * the same trap.
2286 		 */
2287 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2288 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2289 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2290 			    CH_PCIDX_TO_WAY(tag_index);
2291 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2292 			    PC_LOGFLAG_MAGIC;
2293 		} else {
2294 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2295 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2296 			    CH_DCIDX_TO_WAY(tag_index);
2297 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2298 			    DC_LOGFLAG_MAGIC;
2299 		}
2300 	} else if (way != -1) {
2301 		/*
2302 		 * Parity error in D$ tag.
2303 		 */
2304 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2305 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2306 		    CH_DCIDX_TO_WAY(tag_index);
2307 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2308 		    DC_LOGFLAG_MAGIC;
2309 	}
2310 }
2311 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2312 
2313 /*
2314  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2315  * post-process CPU events that are dequeued.  As such, it can be invoked
2316  * from softint context, from AST processing in the trap() flow, or from the
2317  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2318  * Historically this entry point was used to log the actual cmn_err(9F) text;
2319  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2320  * With FMA this function now also returns a flag which indicates to the
2321  * caller whether the ereport should be posted (1) or suppressed (0).
2322  */
2323 static int
2324 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2325 {
2326 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2327 	struct async_flt *aflt = (struct async_flt *)flt;
2328 	uint64_t errors;
2329 	extern void memscrub_induced_error(void);
2330 
2331 	switch (ch_flt->flt_type) {
2332 	case CPU_INV_AFSR:
2333 		/*
2334 		 * If it is a disrupting trap and the AFSR is zero, then
2335 		 * the event has probably already been noted. Do not post
2336 		 * an ereport.
2337 		 */
2338 		if ((aflt->flt_status & ECC_C_TRAP) &&
2339 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2340 			return (0);
2341 		else
2342 			return (1);
2343 	case CPU_TO:
2344 	case CPU_BERR:
2345 	case CPU_FATAL:
2346 	case CPU_FPUERR:
2347 		return (1);
2348 
2349 	case CPU_UE_ECACHE_RETIRE:
2350 		cpu_log_err(aflt);
2351 		cpu_page_retire(ch_flt);
2352 		return (1);
2353 
2354 	/*
2355 	 * Cases where we may want to suppress logging or perform
2356 	 * extended diagnostics.
2357 	 */
2358 	case CPU_CE:
2359 	case CPU_EMC:
2360 		/*
2361 		 * We want to skip logging and further classification
2362 		 * only if ALL the following conditions are true:
2363 		 *
2364 		 *	1. There is only one error
2365 		 *	2. That error is a correctable memory error
2366 		 *	3. The error is caused by the memory scrubber (in
2367 		 *	   which case the error will have occurred under
2368 		 *	   on_trap protection)
2369 		 *	4. The error is on a retired page
2370 		 *
2371 		 * Note: AFLT_PROT_EC is used places other than the memory
2372 		 * scrubber.  However, none of those errors should occur
2373 		 * on a retired page.
2374 		 */
2375 		if ((ch_flt->afsr_errs &
2376 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2377 		    aflt->flt_prot == AFLT_PROT_EC) {
2378 
2379 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2380 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2381 
2382 				/*
2383 				 * Since we're skipping logging, we'll need
2384 				 * to schedule the re-enabling of CEEN
2385 				 */
2386 				(void) timeout(cpu_delayed_check_ce_errors,
2387 				    (void *)(uintptr_t)aflt->flt_inst,
2388 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2389 				    * MICROSEC));
2390 				}
2391 
2392 				/*
2393 				 * Inform memscrubber - scrubbing induced
2394 				 * CE on a retired page.
2395 				 */
2396 				memscrub_induced_error();
2397 				return (0);
2398 			}
2399 		}
2400 
2401 		/*
2402 		 * Perform/schedule further classification actions, but
2403 		 * only if the page is healthy (we don't want bad
2404 		 * pages inducing too much diagnostic activity).  If we could
2405 		 * not find a page pointer then we also skip this.  If
2406 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2407 		 * to copy and recirculate the event (for further diagnostics)
2408 		 * and we should not proceed to log it here.
2409 		 *
2410 		 * This must be the last step here before the cpu_log_err()
2411 		 * below - if an event recirculates cpu_ce_log_err() will
2412 		 * not call the current function but just proceed directly
2413 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2414 		 *
2415 		 * Note: Check cpu_impl_async_log_err if changing this
2416 		 */
2417 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2418 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2419 			    CE_XDIAG_SKIP_NOPP);
2420 		} else {
2421 			if (errors != PR_OK) {
2422 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2423 				    CE_XDIAG_SKIP_PAGEDET);
2424 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2425 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2426 				return (0);
2427 			}
2428 		}
2429 		/*FALLTHRU*/
2430 
2431 	/*
2432 	 * Cases where we just want to report the error and continue.
2433 	 */
2434 	case CPU_CE_ECACHE:
2435 	case CPU_UE_ECACHE:
2436 	case CPU_IV:
2437 	case CPU_ORPH:
2438 		cpu_log_err(aflt);
2439 		return (1);
2440 
2441 	/*
2442 	 * Cases where we want to fall through to handle panicking.
2443 	 */
2444 	case CPU_UE:
2445 		/*
2446 		 * We want to skip logging in the same conditions as the
2447 		 * CE case.  In addition, we want to make sure we're not
2448 		 * panicking.
2449 		 */
2450 		if (!panicstr && (ch_flt->afsr_errs &
2451 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2452 		    aflt->flt_prot == AFLT_PROT_EC) {
2453 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2454 				/* Zero the address to clear the error */
2455 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2456 				/*
2457 				 * Inform memscrubber - scrubbing induced
2458 				 * UE on a retired page.
2459 				 */
2460 				memscrub_induced_error();
2461 				return (0);
2462 			}
2463 		}
2464 		cpu_log_err(aflt);
2465 		break;
2466 
2467 	default:
2468 		/*
2469 		 * If the us3_common.c code doesn't know the flt_type, it may
2470 		 * be an implementation-specific code.  Call into the impldep
2471 		 * backend to find out what to do: if it tells us to continue,
2472 		 * break and handle as if falling through from a UE; if not,
2473 		 * the impldep backend has handled the error and we're done.
2474 		 */
2475 		switch (cpu_impl_async_log_err(flt, eqep)) {
2476 		case CH_ASYNC_LOG_DONE:
2477 			return (1);
2478 		case CH_ASYNC_LOG_RECIRC:
2479 			return (0);
2480 		case CH_ASYNC_LOG_CONTINUE:
2481 			break; /* continue on to handle UE-like error */
2482 		default:
2483 			cmn_err(CE_WARN, "discarding error 0x%p with "
2484 			    "invalid fault type (0x%x)",
2485 			    (void *)aflt, ch_flt->flt_type);
2486 			return (0);
2487 		}
2488 	}
2489 
2490 	/* ... fall through from the UE case */
2491 
2492 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2493 		if (!panicstr) {
2494 			cpu_page_retire(ch_flt);
2495 		} else {
2496 			/*
2497 			 * Clear UEs on panic so that we don't
2498 			 * get haunted by them during panic or
2499 			 * after reboot
2500 			 */
2501 			cpu_clearphys(aflt);
2502 			(void) clear_errors(NULL);
2503 		}
2504 	}
2505 
2506 	return (1);
2507 }
2508 
2509 /*
2510  * Retire the bad page that may contain the flushed error.
2511  */
2512 void
2513 cpu_page_retire(ch_async_flt_t *ch_flt)
2514 {
2515 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2516 	(void) page_retire(aflt->flt_addr, PR_UE);
2517 }
2518 
2519 /*
2520  * Return true if the error specified in the AFSR indicates
2521  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2522  * for Panther, none for Jalapeno/Serrano).
2523  */
2524 /* ARGSUSED */
2525 static int
2526 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2527 {
2528 #if defined(JALAPENO) || defined(SERRANO)
2529 	return (0);
2530 #elif defined(CHEETAH_PLUS)
2531 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2532 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2533 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2534 #else	/* CHEETAH_PLUS */
2535 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2536 #endif
2537 }
2538 
2539 /*
2540  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2541  * generic event post-processing for correctable and uncorrectable memory,
2542  * E$, and MTag errors.  Historically this entry point was used to log bits of
2543  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2544  * converted into an ereport.  In addition, it transmits the error to any
2545  * platform-specific service-processor FRU logging routines, if available.
2546  */
2547 void
2548 cpu_log_err(struct async_flt *aflt)
2549 {
2550 	char unum[UNUM_NAMLEN];
2551 	int synd_status, synd_code, afar_status;
2552 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2553 
2554 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2555 		aflt->flt_status |= ECC_ECACHE;
2556 	else
2557 		aflt->flt_status &= ~ECC_ECACHE;
2558 	/*
2559 	 * Determine syndrome status.
2560 	 */
2561 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2562 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2563 
2564 	/*
2565 	 * Determine afar status.
2566 	 */
2567 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2568 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2569 		    ch_flt->flt_bit);
2570 	else
2571 		afar_status = AFLT_STAT_INVALID;
2572 
2573 	synd_code = synd_to_synd_code(synd_status,
2574 	    aflt->flt_synd, ch_flt->flt_bit);
2575 
2576 	/*
2577 	 * If afar status is not invalid do a unum lookup.
2578 	 */
2579 	if (afar_status != AFLT_STAT_INVALID) {
2580 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2581 	} else {
2582 		unum[0] = '\0';
2583 	}
2584 
2585 	/*
2586 	 * Do not send the fruid message (plat_ecc_error_data_t)
2587 	 * to the SC if it can handle the enhanced error information
2588 	 * (plat_ecc_error2_data_t) or when the tunable
2589 	 * ecc_log_fruid_enable is set to 0.
2590 	 */
2591 
2592 	if (&plat_ecc_capability_sc_get &&
2593 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2594 		if (&plat_log_fruid_error)
2595 			plat_log_fruid_error(synd_code, aflt, unum,
2596 			    ch_flt->flt_bit);
2597 	}
2598 
2599 	if (aflt->flt_func != NULL)
2600 		aflt->flt_func(aflt, unum);
2601 
2602 	if (afar_status != AFLT_STAT_INVALID)
2603 		cpu_log_diag_info(ch_flt);
2604 
2605 	/*
2606 	 * If we have a CEEN error , we do not reenable CEEN until after
2607 	 * we exit the trap handler. Otherwise, another error may
2608 	 * occur causing the handler to be entered recursively.
2609 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2610 	 * to try and ensure that the CPU makes progress in the face
2611 	 * of a CE storm.
2612 	 */
2613 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2614 		(void) timeout(cpu_delayed_check_ce_errors,
2615 		    (void *)(uintptr_t)aflt->flt_inst,
2616 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2617 	}
2618 }
2619 
2620 /*
2621  * Invoked by error_init() early in startup and therefore before
2622  * startup_errorq() is called to drain any error Q -
2623  *
2624  * startup()
2625  *   startup_end()
2626  *     error_init()
2627  *       cpu_error_init()
2628  * errorq_init()
2629  *   errorq_drain()
2630  * start_other_cpus()
2631  *
2632  * The purpose of this routine is to create error-related taskqs.  Taskqs
2633  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2634  * context.
2635  */
2636 void
2637 cpu_error_init(int items)
2638 {
2639 	/*
2640 	 * Create taskq(s) to reenable CE
2641 	 */
2642 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2643 	    items, items, TASKQ_PREPOPULATE);
2644 }
2645 
2646 void
2647 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2648 {
2649 	char unum[UNUM_NAMLEN];
2650 	int len;
2651 
2652 	switch (aflt->flt_class) {
2653 	case CPU_FAULT:
2654 		cpu_ereport_init(aflt);
2655 		if (cpu_async_log_err(aflt, eqep))
2656 			cpu_ereport_post(aflt);
2657 		break;
2658 
2659 	case BUS_FAULT:
2660 		if (aflt->flt_func != NULL) {
2661 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2662 			    unum, UNUM_NAMLEN, &len);
2663 			aflt->flt_func(aflt, unum);
2664 		}
2665 		break;
2666 
2667 	case RECIRC_CPU_FAULT:
2668 		aflt->flt_class = CPU_FAULT;
2669 		cpu_log_err(aflt);
2670 		cpu_ereport_post(aflt);
2671 		break;
2672 
2673 	case RECIRC_BUS_FAULT:
2674 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2675 		/*FALLTHRU*/
2676 	default:
2677 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2678 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2679 		return;
2680 	}
2681 }
2682 
2683 /*
2684  * Scrub and classify a CE.  This function must not modify the
2685  * fault structure passed to it but instead should return the classification
2686  * information.
2687  */
2688 
2689 static uchar_t
2690 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2691 {
2692 	uchar_t disp = CE_XDIAG_EXTALG;
2693 	on_trap_data_t otd;
2694 	uint64_t orig_err;
2695 	ch_cpu_logout_t *clop;
2696 
2697 	/*
2698 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2699 	 * this, but our other callers have not.  Disable preemption to
2700 	 * avoid CPU migration so that we restore CEEN on the correct
2701 	 * cpu later.
2702 	 *
2703 	 * CEEN is cleared so that further CEs that our instruction and
2704 	 * data footprint induce do not cause use to either creep down
2705 	 * kernel stack to the point of overflow, or do so much CE
2706 	 * notification as to make little real forward progress.
2707 	 *
2708 	 * NCEEN must not be cleared.  However it is possible that
2709 	 * our accesses to the flt_addr may provoke a bus error or timeout
2710 	 * if the offending address has just been unconfigured as part of
2711 	 * a DR action.  So we must operate under on_trap protection.
2712 	 */
2713 	kpreempt_disable();
2714 	orig_err = get_error_enable();
2715 	if (orig_err & EN_REG_CEEN)
2716 		set_error_enable(orig_err & ~EN_REG_CEEN);
2717 
2718 	/*
2719 	 * Our classification algorithm includes the line state before
2720 	 * the scrub; we'd like this captured after the detection and
2721 	 * before the algorithm below - the earlier the better.
2722 	 *
2723 	 * If we've come from a cpu CE trap then this info already exists
2724 	 * in the cpu logout area.
2725 	 *
2726 	 * For a CE detected by memscrub for which there was no trap
2727 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2728 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2729 	 * marked the fault structure as incomplete as a flag to later
2730 	 * logging code.
2731 	 *
2732 	 * If called directly from an IO detected CE there has been
2733 	 * no line data capture.  In this case we logout to the cpu logout
2734 	 * area - that's appropriate since it's the cpu cache data we need
2735 	 * for classification.  We thus borrow the cpu logout area for a
2736 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2737 	 * this time (we will invalidate it again below).
2738 	 *
2739 	 * If called from the partner check xcall handler then this cpu
2740 	 * (the partner) has not necessarily experienced a CE at this
2741 	 * address.  But we want to capture line state before its scrub
2742 	 * attempt since we use that in our classification.
2743 	 */
2744 	if (logout_tried == B_FALSE) {
2745 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2746 			disp |= CE_XDIAG_NOLOGOUT;
2747 	}
2748 
2749 	/*
2750 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2751 	 * no longer be valid (if DR'd since the initial event) so we
2752 	 * perform this scrub under on_trap protection.  If this access is
2753 	 * ok then further accesses below will also be ok - DR cannot
2754 	 * proceed while this thread is active (preemption is disabled);
2755 	 * to be safe we'll nonetheless use on_trap again below.
2756 	 */
2757 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2758 		cpu_scrubphys(ecc);
2759 	} else {
2760 		no_trap();
2761 		if (orig_err & EN_REG_CEEN)
2762 			set_error_enable(orig_err);
2763 		kpreempt_enable();
2764 		return (disp);
2765 	}
2766 	no_trap();
2767 
2768 	/*
2769 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2770 	 * Note that it's quite possible that the read sourced the data from
2771 	 * another cpu.
2772 	 */
2773 	if (clear_ecc(ecc))
2774 		disp |= CE_XDIAG_CE1;
2775 
2776 	/*
2777 	 * Read the data again.  This time the read is very likely to
2778 	 * come from memory since the scrub induced a writeback to memory.
2779 	 */
2780 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2781 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2782 	} else {
2783 		no_trap();
2784 		if (orig_err & EN_REG_CEEN)
2785 			set_error_enable(orig_err);
2786 		kpreempt_enable();
2787 		return (disp);
2788 	}
2789 	no_trap();
2790 
2791 	/* Did that read induce a CE that matches the AFAR? */
2792 	if (clear_ecc(ecc))
2793 		disp |= CE_XDIAG_CE2;
2794 
2795 	/*
2796 	 * Look at the logout information and record whether we found the
2797 	 * line in l2/l3 cache.  For Panther we are interested in whether
2798 	 * we found it in either cache (it won't reside in both but
2799 	 * it is possible to read it that way given the moving target).
2800 	 */
2801 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2802 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2803 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2804 		int hit, level;
2805 		int state;
2806 		int totalsize;
2807 		ch_ec_data_t *ecp;
2808 
2809 		/*
2810 		 * If hit is nonzero then a match was found and hit will
2811 		 * be one greater than the index which hit.  For Panther we
2812 		 * also need to pay attention to level to see which of l2$ or
2813 		 * l3$ it hit in.
2814 		 */
2815 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2816 		    0, &level);
2817 
2818 		if (hit) {
2819 			--hit;
2820 			disp |= CE_XDIAG_AFARMATCH;
2821 
2822 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2823 				if (level == 2)
2824 					ecp = &clop->clo_data.chd_l2_data[hit];
2825 				else
2826 					ecp = &clop->clo_data.chd_ec_data[hit];
2827 			} else {
2828 				ASSERT(level == 2);
2829 				ecp = &clop->clo_data.chd_ec_data[hit];
2830 			}
2831 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2832 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2833 			    ecc->flt_addr, ecp->ec_tag);
2834 
2835 			/*
2836 			 * Cheetah variants use different state encodings -
2837 			 * the CH_ECSTATE_* defines vary depending on the
2838 			 * module we're compiled for.  Translate into our
2839 			 * one true version.  Conflate Owner-Shared state
2840 			 * of SSM mode with Owner as victimisation of such
2841 			 * lines may cause a writeback.
2842 			 */
2843 			switch (state) {
2844 			case CH_ECSTATE_MOD:
2845 				disp |= EC_STATE_M;
2846 				break;
2847 
2848 			case CH_ECSTATE_OWN:
2849 			case CH_ECSTATE_OWS:
2850 				disp |= EC_STATE_O;
2851 				break;
2852 
2853 			case CH_ECSTATE_EXL:
2854 				disp |= EC_STATE_E;
2855 				break;
2856 
2857 			case CH_ECSTATE_SHR:
2858 				disp |= EC_STATE_S;
2859 				break;
2860 
2861 			default:
2862 				disp |= EC_STATE_I;
2863 				break;
2864 			}
2865 		}
2866 
2867 		/*
2868 		 * If we initiated the delayed logout then we are responsible
2869 		 * for invalidating the logout area.
2870 		 */
2871 		if (logout_tried == B_FALSE) {
2872 			bzero(clop, sizeof (ch_cpu_logout_t));
2873 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2874 		}
2875 	}
2876 
2877 	/*
2878 	 * Re-enable CEEN if we turned it off.
2879 	 */
2880 	if (orig_err & EN_REG_CEEN)
2881 		set_error_enable(orig_err);
2882 	kpreempt_enable();
2883 
2884 	return (disp);
2885 }
2886 
2887 /*
2888  * Scrub a correctable memory error and collect data for classification
2889  * of CE type.  This function is called in the detection path, ie tl0 handling
2890  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2891  */
2892 void
2893 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2894 {
2895 	/*
2896 	 * Cheetah CE classification does not set any bits in flt_status.
2897 	 * Instead we will record classification datapoints in flt_disp.
2898 	 */
2899 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2900 
2901 	/*
2902 	 * To check if the error detected by IO is persistent, sticky or
2903 	 * intermittent.  This is noticed by clear_ecc().
2904 	 */
2905 	if (ecc->flt_status & ECC_IOBUS)
2906 		ecc->flt_stat = C_AFSR_MEMORY;
2907 
2908 	/*
2909 	 * Record information from this first part of the algorithm in
2910 	 * flt_disp.
2911 	 */
2912 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2913 }
2914 
2915 /*
2916  * Select a partner to perform a further CE classification check from.
2917  * Must be called with kernel preemption disabled (to stop the cpu list
2918  * from changing).  The detecting cpu we are partnering has cpuid
2919  * aflt->flt_inst; we might not be running on the detecting cpu.
2920  *
2921  * Restrict choice to active cpus in the same cpu partition as ourselves in
2922  * an effort to stop bad cpus in one partition causing other partitions to
2923  * perform excessive diagnostic activity.  Actually since the errorq drain
2924  * is run from a softint most of the time and that is a global mechanism
2925  * this isolation is only partial.  Return NULL if we fail to find a
2926  * suitable partner.
2927  *
2928  * We prefer a partner that is in a different latency group to ourselves as
2929  * we will share fewer datapaths.  If such a partner is unavailable then
2930  * choose one in the same lgroup but prefer a different chip and only allow
2931  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2932  * flags includes PTNR_SELFOK then permit selection of the original detector.
2933  *
2934  * We keep a cache of the last partner selected for a cpu, and we'll try to
2935  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2936  * have passed since that selection was made.  This provides the benefit
2937  * of the point-of-view of different partners over time but without
2938  * requiring frequent cpu list traversals.
2939  */
2940 
2941 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2942 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2943 
2944 static cpu_t *
2945 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2946 {
2947 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2948 	hrtime_t lasttime, thistime;
2949 
2950 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2951 
2952 	dtcr = cpu[aflt->flt_inst];
2953 
2954 	/*
2955 	 * Short-circuit for the following cases:
2956 	 *	. the dtcr is not flagged active
2957 	 *	. there is just one cpu present
2958 	 *	. the detector has disappeared
2959 	 *	. we were given a bad flt_inst cpuid; this should not happen
2960 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2961 	 *	  reason to panic.
2962 	 *	. there is just one cpu left online in the cpu partition
2963 	 *
2964 	 * If we return NULL after this point then we do not update the
2965 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2966 	 * again next time; this is the case where the only other cpu online
2967 	 * in the detector's partition is on the same chip as the detector
2968 	 * and since CEEN re-enable is throttled even that case should not
2969 	 * hurt performance.
2970 	 */
2971 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2972 		return (NULL);
2973 	}
2974 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2975 		if (flags & PTNR_SELFOK) {
2976 			*typep = CE_XDIAG_PTNR_SELF;
2977 			return (dtcr);
2978 		} else {
2979 			return (NULL);
2980 		}
2981 	}
2982 
2983 	thistime = gethrtime();
2984 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2985 
2986 	/*
2987 	 * Select a starting point.
2988 	 */
2989 	if (!lasttime) {
2990 		/*
2991 		 * We've never selected a partner for this detector before.
2992 		 * Start the scan at the next online cpu in the same cpu
2993 		 * partition.
2994 		 */
2995 		sp = dtcr->cpu_next_part;
2996 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2997 		/*
2998 		 * Our last selection has not aged yet.  If this partner:
2999 		 *	. is still a valid cpu,
3000 		 *	. is still in the same partition as the detector
3001 		 *	. is still marked active
3002 		 *	. satisfies the 'flags' argument criteria
3003 		 * then select it again without updating the timestamp.
3004 		 */
3005 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3006 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3007 		    !cpu_flagged_active(sp->cpu_flags) ||
3008 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3009 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3010 		    !(flags & PTNR_SIBLINGOK))) {
3011 			sp = dtcr->cpu_next_part;
3012 		} else {
3013 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3014 				*typep = CE_XDIAG_PTNR_REMOTE;
3015 			} else if (sp == dtcr) {
3016 				*typep = CE_XDIAG_PTNR_SELF;
3017 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3018 				*typep = CE_XDIAG_PTNR_SIBLING;
3019 			} else {
3020 				*typep = CE_XDIAG_PTNR_LOCAL;
3021 			}
3022 			return (sp);
3023 		}
3024 	} else {
3025 		/*
3026 		 * Our last selection has aged.  If it is nonetheless still a
3027 		 * valid cpu then start the scan at the next cpu in the
3028 		 * partition after our last partner.  If the last selection
3029 		 * is no longer a valid cpu then go with our default.  In
3030 		 * this way we slowly cycle through possible partners to
3031 		 * obtain multiple viewpoints over time.
3032 		 */
3033 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3034 		if (sp == NULL) {
3035 			sp = dtcr->cpu_next_part;
3036 		} else {
3037 			sp = sp->cpu_next_part;		/* may be dtcr */
3038 			if (sp->cpu_part != dtcr->cpu_part)
3039 				sp = dtcr;
3040 		}
3041 	}
3042 
3043 	/*
3044 	 * We have a proposed starting point for our search, but if this
3045 	 * cpu is offline then its cpu_next_part will point to itself
3046 	 * so we can't use that to iterate over cpus in this partition in
3047 	 * the loop below.  We still want to avoid iterating over cpus not
3048 	 * in our partition, so in the case that our starting point is offline
3049 	 * we will repoint it to be the detector itself;  and if the detector
3050 	 * happens to be offline we'll return NULL from the following loop.
3051 	 */
3052 	if (!cpu_flagged_active(sp->cpu_flags)) {
3053 		sp = dtcr;
3054 	}
3055 
3056 	ptnr = sp;
3057 	locptnr = NULL;
3058 	sibptnr = NULL;
3059 	do {
3060 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3061 			continue;
3062 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3063 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3064 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3065 			*typep = CE_XDIAG_PTNR_REMOTE;
3066 			return (ptnr);
3067 		}
3068 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3069 			if (sibptnr == NULL)
3070 				sibptnr = ptnr;
3071 			continue;
3072 		}
3073 		if (locptnr == NULL)
3074 			locptnr = ptnr;
3075 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3076 
3077 	/*
3078 	 * A foreign partner has already been returned if one was available.
3079 	 *
3080 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3081 	 * detector, is active, and is not a sibling of the detector.
3082 	 *
3083 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3084 	 * active.
3085 	 *
3086 	 * If we have to resort to using the detector itself we have already
3087 	 * checked that it is active.
3088 	 */
3089 	if (locptnr) {
3090 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3091 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3092 		*typep = CE_XDIAG_PTNR_LOCAL;
3093 		return (locptnr);
3094 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3095 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3096 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3097 		*typep = CE_XDIAG_PTNR_SIBLING;
3098 		return (sibptnr);
3099 	} else if (flags & PTNR_SELFOK) {
3100 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3101 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3102 		*typep = CE_XDIAG_PTNR_SELF;
3103 		return (dtcr);
3104 	}
3105 
3106 	return (NULL);
3107 }
3108 
3109 /*
3110  * Cross call handler that is requested to run on the designated partner of
3111  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3112  */
3113 static void
3114 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3115 {
3116 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3117 }
3118 
3119 /*
3120  * The associated errorqs are never destroyed so we do not need to deal with
3121  * them disappearing before this timeout fires.  If the affected memory
3122  * has been DR'd out since the original event the scrub algrithm will catch
3123  * any errors and return null disposition info.  If the original detecting
3124  * cpu has been DR'd out then ereport detector info will not be able to
3125  * lookup CPU type;  with a small timeout this is unlikely.
3126  */
3127 static void
3128 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3129 {
3130 	struct async_flt *aflt = cbarg->lkycb_aflt;
3131 	uchar_t disp;
3132 	cpu_t *cp;
3133 	int ptnrtype;
3134 
3135 	kpreempt_disable();
3136 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3137 	    &ptnrtype)) {
3138 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3139 		    (uint64_t)&disp);
3140 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3141 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3142 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3143 	} else {
3144 		ce_xdiag_lkydrops++;
3145 		if (ncpus > 1)
3146 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3147 			    CE_XDIAG_SKIP_NOPTNR);
3148 	}
3149 	kpreempt_enable();
3150 
3151 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3152 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3153 }
3154 
3155 /*
3156  * Called from errorq drain code when processing a CE error, both from
3157  * CPU and PCI drain functions.  Decide what further classification actions,
3158  * if any, we will perform.  Perform immediate actions now, and schedule
3159  * delayed actions as required.  Note that we are no longer necessarily running
3160  * on the detecting cpu, and that the async_flt structure will not persist on
3161  * return from this function.
3162  *
3163  * Calls to this function should aim to be self-throtlling in some way.  With
3164  * the delayed re-enable of CEEN the absolute rate of calls should not
3165  * be excessive.  Callers should also avoid performing in-depth classification
3166  * for events in pages that are already known to be suspect.
3167  *
3168  * We return nonzero to indicate that the event has been copied and
3169  * recirculated for further testing.  The caller should not log the event
3170  * in this case - it will be logged when further test results are available.
3171  *
3172  * Our possible contexts are that of errorq_drain: below lock level or from
3173  * panic context.  We can assume that the cpu we are running on is online.
3174  */
3175 
3176 
3177 #ifdef DEBUG
3178 static int ce_xdiag_forceaction;
3179 #endif
3180 
3181 int
3182 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3183     errorq_elem_t *eqep, size_t afltoffset)
3184 {
3185 	ce_dispact_t dispact, action;
3186 	cpu_t *cp;
3187 	uchar_t dtcrinfo, disp;
3188 	int ptnrtype;
3189 
3190 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3191 		ce_xdiag_drops++;
3192 		return (0);
3193 	} else if (!aflt->flt_in_memory) {
3194 		ce_xdiag_drops++;
3195 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3196 		return (0);
3197 	}
3198 
3199 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3200 
3201 	/*
3202 	 * Some correctable events are not scrubbed/classified, such as those
3203 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3204 	 * initial detector classification go no further.
3205 	 */
3206 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3207 		ce_xdiag_drops++;
3208 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3209 		return (0);
3210 	}
3211 
3212 	dispact = CE_DISPACT(ce_disp_table,
3213 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3214 	    CE_XDIAG_STATE(dtcrinfo),
3215 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3216 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3217 
3218 
3219 	action = CE_ACT(dispact);	/* bad lookup caught below */
3220 #ifdef DEBUG
3221 	if (ce_xdiag_forceaction != 0)
3222 		action = ce_xdiag_forceaction;
3223 #endif
3224 
3225 	switch (action) {
3226 	case CE_ACT_LKYCHK: {
3227 		caddr_t ndata;
3228 		errorq_elem_t *neqep;
3229 		struct async_flt *ecc;
3230 		ce_lkychk_cb_t *cbargp;
3231 
3232 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3233 			ce_xdiag_lkydrops++;
3234 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3235 			    CE_XDIAG_SKIP_DUPFAIL);
3236 			break;
3237 		}
3238 		ecc = (struct async_flt *)(ndata + afltoffset);
3239 
3240 		ASSERT(ecc->flt_class == CPU_FAULT ||
3241 		    ecc->flt_class == BUS_FAULT);
3242 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3243 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3244 
3245 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3246 		cbargp->lkycb_aflt = ecc;
3247 		cbargp->lkycb_eqp = eqp;
3248 		cbargp->lkycb_eqep = neqep;
3249 
3250 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3251 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3252 		return (1);
3253 	}
3254 
3255 	case CE_ACT_PTNRCHK:
3256 		kpreempt_disable();	/* stop cpu list changing */
3257 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3258 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3259 			    (uint64_t)aflt, (uint64_t)&disp);
3260 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3261 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3262 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3263 		} else if (ncpus > 1) {
3264 			ce_xdiag_ptnrdrops++;
3265 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3266 			    CE_XDIAG_SKIP_NOPTNR);
3267 		} else {
3268 			ce_xdiag_ptnrdrops++;
3269 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3270 			    CE_XDIAG_SKIP_UNIPROC);
3271 		}
3272 		kpreempt_enable();
3273 		break;
3274 
3275 	case CE_ACT_DONE:
3276 		break;
3277 
3278 	case CE_ACT(CE_DISP_BAD):
3279 	default:
3280 #ifdef DEBUG
3281 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3282 #endif
3283 		ce_xdiag_bad++;
3284 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3285 		break;
3286 	}
3287 
3288 	return (0);
3289 }
3290 
3291 /*
3292  * We route all errors through a single switch statement.
3293  */
3294 void
3295 cpu_ue_log_err(struct async_flt *aflt)
3296 {
3297 	switch (aflt->flt_class) {
3298 	case CPU_FAULT:
3299 		cpu_ereport_init(aflt);
3300 		if (cpu_async_log_err(aflt, NULL))
3301 			cpu_ereport_post(aflt);
3302 		break;
3303 
3304 	case BUS_FAULT:
3305 		bus_async_log_err(aflt);
3306 		break;
3307 
3308 	default:
3309 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3310 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3311 		return;
3312 	}
3313 }
3314 
3315 /*
3316  * Routine for panic hook callback from panic_idle().
3317  */
3318 void
3319 cpu_async_panic_callb(void)
3320 {
3321 	ch_async_flt_t ch_flt;
3322 	struct async_flt *aflt;
3323 	ch_cpu_errors_t cpu_error_regs;
3324 	uint64_t afsr_errs;
3325 
3326 	get_cpu_error_state(&cpu_error_regs);
3327 
3328 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3329 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3330 
3331 	if (afsr_errs) {
3332 
3333 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3334 		aflt = (struct async_flt *)&ch_flt;
3335 		aflt->flt_id = gethrtime_waitfree();
3336 		aflt->flt_bus_id = getprocessorid();
3337 		aflt->flt_inst = CPU->cpu_id;
3338 		aflt->flt_stat = cpu_error_regs.afsr;
3339 		aflt->flt_addr = cpu_error_regs.afar;
3340 		aflt->flt_prot = AFLT_PROT_NONE;
3341 		aflt->flt_class = CPU_FAULT;
3342 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3343 		aflt->flt_panic = 1;
3344 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3345 		ch_flt.afsr_errs = afsr_errs;
3346 #if defined(SERRANO)
3347 		ch_flt.afar2 = cpu_error_regs.afar2;
3348 #endif	/* SERRANO */
3349 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3350 	}
3351 }
3352 
3353 /*
3354  * Routine to convert a syndrome into a syndrome code.
3355  */
3356 static int
3357 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3358 {
3359 	if (synd_status == AFLT_STAT_INVALID)
3360 		return (-1);
3361 
3362 	/*
3363 	 * Use the syndrome to index the appropriate syndrome table,
3364 	 * to get the code indicating which bit(s) is(are) bad.
3365 	 */
3366 	if (afsr_bit &
3367 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3368 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3369 #if defined(JALAPENO) || defined(SERRANO)
3370 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3371 				return (-1);
3372 			else
3373 				return (BPAR0 + synd);
3374 #else /* JALAPENO || SERRANO */
3375 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3376 				return (-1);
3377 			else
3378 				return (mtag_syndrome_tab[synd]);
3379 #endif /* JALAPENO || SERRANO */
3380 		} else {
3381 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3382 				return (-1);
3383 			else
3384 				return (ecc_syndrome_tab[synd]);
3385 		}
3386 	} else {
3387 		return (-1);
3388 	}
3389 }
3390 
3391 int
3392 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3393 {
3394 	if (&plat_get_mem_sid)
3395 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3396 	else
3397 		return (ENOTSUP);
3398 }
3399 
3400 int
3401 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3402 {
3403 	if (&plat_get_mem_offset)
3404 		return (plat_get_mem_offset(flt_addr, offp));
3405 	else
3406 		return (ENOTSUP);
3407 }
3408 
3409 int
3410 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3411 {
3412 	if (&plat_get_mem_addr)
3413 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3414 	else
3415 		return (ENOTSUP);
3416 }
3417 
3418 /*
3419  * Routine to return a string identifying the physical name
3420  * associated with a memory/cache error.
3421  */
3422 int
3423 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3424     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3425     ushort_t flt_status, char *buf, int buflen, int *lenp)
3426 {
3427 	int synd_code;
3428 	int ret;
3429 
3430 	/*
3431 	 * An AFSR of -1 defaults to a memory syndrome.
3432 	 */
3433 	if (flt_stat == (uint64_t)-1)
3434 		flt_stat = C_AFSR_CE;
3435 
3436 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3437 
3438 	/*
3439 	 * Syndrome code must be either a single-bit error code
3440 	 * (0...143) or -1 for unum lookup.
3441 	 */
3442 	if (synd_code < 0 || synd_code >= M2)
3443 		synd_code = -1;
3444 	if (&plat_get_mem_unum) {
3445 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3446 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3447 			buf[0] = '\0';
3448 			*lenp = 0;
3449 		}
3450 
3451 		return (ret);
3452 	}
3453 
3454 	return (ENOTSUP);
3455 }
3456 
3457 /*
3458  * Wrapper for cpu_get_mem_unum() routine that takes an
3459  * async_flt struct rather than explicit arguments.
3460  */
3461 int
3462 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3463     char *buf, int buflen, int *lenp)
3464 {
3465 	/*
3466 	 * If we come thru here for an IO bus error aflt->flt_stat will
3467 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3468 	 * so it will interpret this as a memory error.
3469 	 */
3470 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3471 	    (aflt->flt_class == BUS_FAULT) ?
3472 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3473 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3474 	    aflt->flt_status, buf, buflen, lenp));
3475 }
3476 
3477 /*
3478  * Return unum string given synd_code and async_flt into
3479  * the buf with size UNUM_NAMLEN
3480  */
3481 static int
3482 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3483 {
3484 	int ret, len;
3485 
3486 	/*
3487 	 * Syndrome code must be either a single-bit error code
3488 	 * (0...143) or -1 for unum lookup.
3489 	 */
3490 	if (synd_code < 0 || synd_code >= M2)
3491 		synd_code = -1;
3492 	if (&plat_get_mem_unum) {
3493 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3494 		    aflt->flt_bus_id, aflt->flt_in_memory,
3495 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3496 			buf[0] = '\0';
3497 		}
3498 		return (ret);
3499 	}
3500 
3501 	buf[0] = '\0';
3502 	return (ENOTSUP);
3503 }
3504 
3505 /*
3506  * This routine is a more generic interface to cpu_get_mem_unum()
3507  * that may be used by other modules (e.g. the 'mm' driver, through
3508  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3509  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3510  */
3511 int
3512 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3513     char *buf, int buflen, int *lenp)
3514 {
3515 	int synd_status, flt_in_memory, ret;
3516 	ushort_t flt_status = 0;
3517 	char unum[UNUM_NAMLEN];
3518 	uint64_t t_afsr_errs;
3519 
3520 	/*
3521 	 * Check for an invalid address.
3522 	 */
3523 	if (afar == (uint64_t)-1)
3524 		return (ENXIO);
3525 
3526 	if (synd == (uint64_t)-1)
3527 		synd_status = AFLT_STAT_INVALID;
3528 	else
3529 		synd_status = AFLT_STAT_VALID;
3530 
3531 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3532 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3533 
3534 	/*
3535 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3536 	 */
3537 	if (*afsr == (uint64_t)-1)
3538 		t_afsr_errs = C_AFSR_CE;
3539 	else {
3540 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3541 #if defined(CHEETAH_PLUS)
3542 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3543 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3544 #endif	/* CHEETAH_PLUS */
3545 	}
3546 
3547 	/*
3548 	 * Turn on ECC_ECACHE if error type is E$ Data.
3549 	 */
3550 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3551 		flt_status |= ECC_ECACHE;
3552 
3553 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3554 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3555 	if (ret != 0)
3556 		return (ret);
3557 
3558 	if (*lenp >= buflen)
3559 		return (ENAMETOOLONG);
3560 
3561 	(void) strncpy(buf, unum, buflen);
3562 
3563 	return (0);
3564 }
3565 
3566 /*
3567  * Routine to return memory information associated
3568  * with a physical address and syndrome.
3569  */
3570 int
3571 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3572     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3573     int *segsp, int *banksp, int *mcidp)
3574 {
3575 	int synd_status, synd_code;
3576 
3577 	if (afar == (uint64_t)-1)
3578 		return (ENXIO);
3579 
3580 	if (synd == (uint64_t)-1)
3581 		synd_status = AFLT_STAT_INVALID;
3582 	else
3583 		synd_status = AFLT_STAT_VALID;
3584 
3585 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3586 
3587 	if (p2get_mem_info != NULL)
3588 		return ((p2get_mem_info)(synd_code, afar,
3589 		    mem_sizep, seg_sizep, bank_sizep,
3590 		    segsp, banksp, mcidp));
3591 	else
3592 		return (ENOTSUP);
3593 }
3594 
3595 /*
3596  * Routine to return a string identifying the physical
3597  * name associated with a cpuid.
3598  */
3599 int
3600 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3601 {
3602 	int ret;
3603 	char unum[UNUM_NAMLEN];
3604 
3605 	if (&plat_get_cpu_unum) {
3606 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3607 		    != 0)
3608 			return (ret);
3609 	} else {
3610 		return (ENOTSUP);
3611 	}
3612 
3613 	if (*lenp >= buflen)
3614 		return (ENAMETOOLONG);
3615 
3616 	(void) strncpy(buf, unum, buflen);
3617 
3618 	return (0);
3619 }
3620 
3621 /*
3622  * This routine exports the name buffer size.
3623  */
3624 size_t
3625 cpu_get_name_bufsize()
3626 {
3627 	return (UNUM_NAMLEN);
3628 }
3629 
3630 /*
3631  * Historical function, apparantly not used.
3632  */
3633 /* ARGSUSED */
3634 void
3635 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3636 {}
3637 
3638 /*
3639  * Historical function only called for SBus errors in debugging.
3640  */
3641 /*ARGSUSED*/
3642 void
3643 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3644 {}
3645 
3646 /*
3647  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3648  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3649  * an async fault structure argument is passed in, the captured error state
3650  * (AFSR, AFAR) info will be returned in the structure.
3651  */
3652 int
3653 clear_errors(ch_async_flt_t *ch_flt)
3654 {
3655 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3656 	ch_cpu_errors_t	cpu_error_regs;
3657 
3658 	get_cpu_error_state(&cpu_error_regs);
3659 
3660 	if (ch_flt != NULL) {
3661 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3662 		aflt->flt_addr = cpu_error_regs.afar;
3663 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3664 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3665 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3666 #if defined(SERRANO)
3667 		ch_flt->afar2 = cpu_error_regs.afar2;
3668 #endif	/* SERRANO */
3669 	}
3670 
3671 	set_cpu_error_state(&cpu_error_regs);
3672 
3673 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3674 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3675 }
3676 
3677 /*
3678  * Clear any AFSR error bits, and check for persistence.
3679  *
3680  * It would be desirable to also insist that syndrome match.  PCI handling
3681  * has already filled flt_synd.  For errors trapped by CPU we only fill
3682  * flt_synd when we queue the event, so we do not have a valid flt_synd
3683  * during initial classification (it is valid if we're called as part of
3684  * subsequent low-pil additional classification attempts).  We could try
3685  * to determine which syndrome to use: we know we're only called for
3686  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3687  * would be esynd/none and esynd/msynd, respectively.  If that is
3688  * implemented then what do we do in the case that we do experience an
3689  * error on the same afar but with different syndrome?  At the very least
3690  * we should count such occurences.  Anyway, for now, we'll leave it as
3691  * it has been for ages.
3692  */
3693 static int
3694 clear_ecc(struct async_flt *aflt)
3695 {
3696 	ch_cpu_errors_t	cpu_error_regs;
3697 
3698 	/*
3699 	 * Snapshot the AFSR and AFAR and clear any errors
3700 	 */
3701 	get_cpu_error_state(&cpu_error_regs);
3702 	set_cpu_error_state(&cpu_error_regs);
3703 
3704 	/*
3705 	 * If any of the same memory access error bits are still on and
3706 	 * the AFAR matches, return that the error is persistent.
3707 	 */
3708 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3709 	    cpu_error_regs.afar == aflt->flt_addr);
3710 }
3711 
3712 /*
3713  * Turn off all cpu error detection, normally only used for panics.
3714  */
3715 void
3716 cpu_disable_errors(void)
3717 {
3718 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3719 
3720 	/*
3721 	 * With error detection now turned off, check the other cpus
3722 	 * logout areas for any unlogged errors.
3723 	 */
3724 	if (enable_check_other_cpus_logout) {
3725 		cpu_check_other_cpus_logout();
3726 		/*
3727 		 * Make a second pass over the logout areas, in case
3728 		 * there is a failing CPU in an error-trap loop which
3729 		 * will write to the logout area once it is emptied.
3730 		 */
3731 		cpu_check_other_cpus_logout();
3732 	}
3733 }
3734 
3735 /*
3736  * Enable errors.
3737  */
3738 void
3739 cpu_enable_errors(void)
3740 {
3741 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3742 }
3743 
3744 /*
3745  * Flush the entire ecache using displacement flush by reading through a
3746  * physical address range twice as large as the Ecache.
3747  */
3748 void
3749 cpu_flush_ecache(void)
3750 {
3751 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3752 	    cpunodes[CPU->cpu_id].ecache_linesize);
3753 }
3754 
3755 /*
3756  * Return CPU E$ set size - E$ size divided by the associativity.
3757  * We use this function in places where the CPU_PRIVATE ptr may not be
3758  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3759  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3760  * up before the kernel switches from OBP's to the kernel's trap table, so
3761  * we don't have to worry about cpunodes being unitialized.
3762  */
3763 int
3764 cpu_ecache_set_size(struct cpu *cp)
3765 {
3766 	if (CPU_PRIVATE(cp))
3767 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3768 
3769 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3770 }
3771 
3772 /*
3773  * Flush Ecache line.
3774  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3775  * Uses normal displacement flush for Cheetah.
3776  */
3777 static void
3778 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3779 {
3780 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3781 	int ec_set_size = cpu_ecache_set_size(CPU);
3782 
3783 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3784 }
3785 
3786 /*
3787  * Scrub physical address.
3788  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3789  * Ecache or direct-mapped Ecache.
3790  */
3791 static void
3792 cpu_scrubphys(struct async_flt *aflt)
3793 {
3794 	int ec_set_size = cpu_ecache_set_size(CPU);
3795 
3796 	scrubphys(aflt->flt_addr, ec_set_size);
3797 }
3798 
3799 /*
3800  * Clear physical address.
3801  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3802  * Ecache or direct-mapped Ecache.
3803  */
3804 void
3805 cpu_clearphys(struct async_flt *aflt)
3806 {
3807 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3808 	int ec_set_size = cpu_ecache_set_size(CPU);
3809 
3810 
3811 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3812 }
3813 
3814 #if defined(CPU_IMP_ECACHE_ASSOC)
3815 /*
3816  * Check for a matching valid line in all the sets.
3817  * If found, return set# + 1. Otherwise return 0.
3818  */
3819 static int
3820 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3821 {
3822 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3823 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3824 	int ec_set_size = cpu_ecache_set_size(CPU);
3825 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3826 	int nway = cpu_ecache_nway();
3827 	int i;
3828 
3829 	for (i = 0; i < nway; i++, ecp++) {
3830 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3831 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3832 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3833 			return (i+1);
3834 	}
3835 	return (0);
3836 }
3837 #endif /* CPU_IMP_ECACHE_ASSOC */
3838 
3839 /*
3840  * Check whether a line in the given logout info matches the specified
3841  * fault address.  If reqval is set then the line must not be Invalid.
3842  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3843  * set to 2 for l2$ or 3 for l3$.
3844  */
3845 static int
3846 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3847 {
3848 	ch_diag_data_t *cdp = data;
3849 	ch_ec_data_t *ecp;
3850 	int totalsize, ec_set_size;
3851 	int i, ways;
3852 	int match = 0;
3853 	int tagvalid;
3854 	uint64_t addr, tagpa;
3855 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3856 
3857 	/*
3858 	 * Check the l2$ logout data
3859 	 */
3860 	if (ispanther) {
3861 		ecp = &cdp->chd_l2_data[0];
3862 		ec_set_size = PN_L2_SET_SIZE;
3863 		ways = PN_L2_NWAYS;
3864 	} else {
3865 		ecp = &cdp->chd_ec_data[0];
3866 		ec_set_size = cpu_ecache_set_size(CPU);
3867 		ways = cpu_ecache_nway();
3868 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3869 	}
3870 	/* remove low order PA bits from fault address not used in PA tag */
3871 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3872 	for (i = 0; i < ways; i++, ecp++) {
3873 		if (ispanther) {
3874 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3875 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3876 		} else {
3877 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3878 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3879 			    ecp->ec_tag);
3880 		}
3881 		if (tagpa == addr && (!reqval || tagvalid)) {
3882 			match = i + 1;
3883 			*level = 2;
3884 			break;
3885 		}
3886 	}
3887 
3888 	if (match || !ispanther)
3889 		return (match);
3890 
3891 	/* For Panther we also check the l3$ */
3892 	ecp = &cdp->chd_ec_data[0];
3893 	ec_set_size = PN_L3_SET_SIZE;
3894 	ways = PN_L3_NWAYS;
3895 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3896 
3897 	for (i = 0; i < ways; i++, ecp++) {
3898 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3899 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3900 			match = i + 1;
3901 			*level = 3;
3902 			break;
3903 		}
3904 	}
3905 
3906 	return (match);
3907 }
3908 
3909 #if defined(CPU_IMP_L1_CACHE_PARITY)
3910 /*
3911  * Record information related to the source of an Dcache Parity Error.
3912  */
3913 static void
3914 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3915 {
3916 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3917 	int index;
3918 
3919 	/*
3920 	 * Since instruction decode cannot be done at high PIL
3921 	 * just examine the entire Dcache to locate the error.
3922 	 */
3923 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3924 		ch_flt->parity_data.dpe.cpl_way = -1;
3925 		ch_flt->parity_data.dpe.cpl_off = -1;
3926 	}
3927 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3928 		cpu_dcache_parity_check(ch_flt, index);
3929 }
3930 
3931 /*
3932  * Check all ways of the Dcache at a specified index for good parity.
3933  */
3934 static void
3935 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3936 {
3937 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3938 	uint64_t parity_bits, pbits, data_word;
3939 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3940 	int way, word, data_byte;
3941 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3942 	ch_dc_data_t tmp_dcp;
3943 
3944 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3945 		/*
3946 		 * Perform diagnostic read.
3947 		 */
3948 		get_dcache_dtag(index + way * dc_set_size,
3949 		    (uint64_t *)&tmp_dcp);
3950 
3951 		/*
3952 		 * Check tag for even parity.
3953 		 * Sum of 1 bits (including parity bit) should be even.
3954 		 */
3955 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3956 			/*
3957 			 * If this is the first error log detailed information
3958 			 * about it and check the snoop tag. Otherwise just
3959 			 * record the fact that we found another error.
3960 			 */
3961 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3962 				ch_flt->parity_data.dpe.cpl_way = way;
3963 				ch_flt->parity_data.dpe.cpl_cache =
3964 				    CPU_DC_PARITY;
3965 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3966 
3967 				if (popc64(tmp_dcp.dc_sntag &
3968 				    CHP_DCSNTAG_PARMASK) & 1) {
3969 					ch_flt->parity_data.dpe.cpl_tag |=
3970 					    CHP_DC_SNTAG;
3971 					ch_flt->parity_data.dpe.cpl_lcnt++;
3972 				}
3973 
3974 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3975 			}
3976 
3977 			ch_flt->parity_data.dpe.cpl_lcnt++;
3978 		}
3979 
3980 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3981 			/*
3982 			 * Panther has more parity bits than the other
3983 			 * processors for covering dcache data and so each
3984 			 * byte of data in each word has its own parity bit.
3985 			 */
3986 			parity_bits = tmp_dcp.dc_pn_data_parity;
3987 			for (word = 0; word < 4; word++) {
3988 				data_word = tmp_dcp.dc_data[word];
3989 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3990 				for (data_byte = 0; data_byte < 8;
3991 				    data_byte++) {
3992 					if (((popc64(data_word &
3993 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3994 					    (pbits & 1)) {
3995 						cpu_record_dc_data_parity(
3996 						    ch_flt, dcp, &tmp_dcp, way,
3997 						    word);
3998 					}
3999 					pbits >>= 1;
4000 					data_word >>= 8;
4001 				}
4002 				parity_bits >>= 8;
4003 			}
4004 		} else {
4005 			/*
4006 			 * Check data array for even parity.
4007 			 * The 8 parity bits are grouped into 4 pairs each
4008 			 * of which covers a 64-bit word.  The endianness is
4009 			 * reversed -- the low-order parity bits cover the
4010 			 * high-order data words.
4011 			 */
4012 			parity_bits = tmp_dcp.dc_utag >> 8;
4013 			for (word = 0; word < 4; word++) {
4014 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4015 				if ((popc64(tmp_dcp.dc_data[word]) +
4016 				    parity_bits_popc[pbits]) & 1) {
4017 					cpu_record_dc_data_parity(ch_flt, dcp,
4018 					    &tmp_dcp, way, word);
4019 				}
4020 			}
4021 		}
4022 	}
4023 }
4024 
4025 static void
4026 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4027     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4028 {
4029 	/*
4030 	 * If this is the first error log detailed information about it.
4031 	 * Otherwise just record the fact that we found another error.
4032 	 */
4033 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4034 		ch_flt->parity_data.dpe.cpl_way = way;
4035 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4036 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4037 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4038 	}
4039 	ch_flt->parity_data.dpe.cpl_lcnt++;
4040 }
4041 
4042 /*
4043  * Record information related to the source of an Icache Parity Error.
4044  *
4045  * Called with the Icache disabled so any diagnostic accesses are safe.
4046  */
4047 static void
4048 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4049 {
4050 	int	ic_set_size;
4051 	int	ic_linesize;
4052 	int	index;
4053 
4054 	if (CPU_PRIVATE(CPU)) {
4055 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4056 		    CH_ICACHE_NWAY;
4057 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4058 	} else {
4059 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4060 		ic_linesize = icache_linesize;
4061 	}
4062 
4063 	ch_flt->parity_data.ipe.cpl_way = -1;
4064 	ch_flt->parity_data.ipe.cpl_off = -1;
4065 
4066 	for (index = 0; index < ic_set_size; index += ic_linesize)
4067 		cpu_icache_parity_check(ch_flt, index);
4068 }
4069 
4070 /*
4071  * Check all ways of the Icache at a specified index for good parity.
4072  */
4073 static void
4074 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4075 {
4076 	uint64_t parmask, pn_inst_parity;
4077 	int ic_set_size;
4078 	int ic_linesize;
4079 	int flt_index, way, instr, num_instr;
4080 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4081 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4082 	ch_ic_data_t tmp_icp;
4083 
4084 	if (CPU_PRIVATE(CPU)) {
4085 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4086 		    CH_ICACHE_NWAY;
4087 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4088 	} else {
4089 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4090 		ic_linesize = icache_linesize;
4091 	}
4092 
4093 	/*
4094 	 * Panther has twice as many instructions per icache line and the
4095 	 * instruction parity bit is in a different location.
4096 	 */
4097 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4098 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4099 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4100 	} else {
4101 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4102 		pn_inst_parity = 0;
4103 	}
4104 
4105 	/*
4106 	 * Index at which we expect to find the parity error.
4107 	 */
4108 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4109 
4110 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4111 		/*
4112 		 * Diagnostic reads expect address argument in ASI format.
4113 		 */
4114 		get_icache_dtag(2 * (index + way * ic_set_size),
4115 		    (uint64_t *)&tmp_icp);
4116 
4117 		/*
4118 		 * If this is the index in which we expect to find the
4119 		 * error log detailed information about each of the ways.
4120 		 * This information will be displayed later if we can't
4121 		 * determine the exact way in which the error is located.
4122 		 */
4123 		if (flt_index == index)
4124 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4125 
4126 		/*
4127 		 * Check tag for even parity.
4128 		 * Sum of 1 bits (including parity bit) should be even.
4129 		 */
4130 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4131 			/*
4132 			 * If this way is the one in which we expected
4133 			 * to find the error record the way and check the
4134 			 * snoop tag. Otherwise just record the fact we
4135 			 * found another error.
4136 			 */
4137 			if (flt_index == index) {
4138 				ch_flt->parity_data.ipe.cpl_way = way;
4139 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4140 
4141 				if (popc64(tmp_icp.ic_sntag &
4142 				    CHP_ICSNTAG_PARMASK) & 1) {
4143 					ch_flt->parity_data.ipe.cpl_tag |=
4144 					    CHP_IC_SNTAG;
4145 					ch_flt->parity_data.ipe.cpl_lcnt++;
4146 				}
4147 
4148 			}
4149 			ch_flt->parity_data.ipe.cpl_lcnt++;
4150 			continue;
4151 		}
4152 
4153 		/*
4154 		 * Check instruction data for even parity.
4155 		 * Bits participating in parity differ for PC-relative
4156 		 * versus non-PC-relative instructions.
4157 		 */
4158 		for (instr = 0; instr < num_instr; instr++) {
4159 			parmask = (tmp_icp.ic_data[instr] &
4160 			    CH_ICDATA_PRED_ISPCREL) ?
4161 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4162 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4163 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4164 				/*
4165 				 * If this way is the one in which we expected
4166 				 * to find the error record the way and offset.
4167 				 * Otherwise just log the fact we found another
4168 				 * error.
4169 				 */
4170 				if (flt_index == index) {
4171 					ch_flt->parity_data.ipe.cpl_way = way;
4172 					ch_flt->parity_data.ipe.cpl_off =
4173 					    instr * 4;
4174 				}
4175 				ch_flt->parity_data.ipe.cpl_lcnt++;
4176 				continue;
4177 			}
4178 		}
4179 	}
4180 }
4181 
4182 /*
4183  * Record information related to the source of an Pcache Parity Error.
4184  */
4185 static void
4186 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4187 {
4188 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4189 	int index;
4190 
4191 	/*
4192 	 * Since instruction decode cannot be done at high PIL just
4193 	 * examine the entire Pcache to check for any parity errors.
4194 	 */
4195 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4196 		ch_flt->parity_data.dpe.cpl_way = -1;
4197 		ch_flt->parity_data.dpe.cpl_off = -1;
4198 	}
4199 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4200 		cpu_pcache_parity_check(ch_flt, index);
4201 }
4202 
4203 /*
4204  * Check all ways of the Pcache at a specified index for good parity.
4205  */
4206 static void
4207 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4208 {
4209 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4210 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4211 	int way, word, pbit, parity_bits;
4212 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4213 	ch_pc_data_t tmp_pcp;
4214 
4215 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4216 		/*
4217 		 * Perform diagnostic read.
4218 		 */
4219 		get_pcache_dtag(index + way * pc_set_size,
4220 		    (uint64_t *)&tmp_pcp);
4221 		/*
4222 		 * Check data array for odd parity. There are 8 parity
4223 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4224 		 * of those bits covers exactly 8 bytes of the data
4225 		 * array:
4226 		 *
4227 		 *	parity bit	P$ data bytes covered
4228 		 *	----------	---------------------
4229 		 *	50		63:56
4230 		 *	51		55:48
4231 		 *	52		47:40
4232 		 *	53		39:32
4233 		 *	54		31:24
4234 		 *	55		23:16
4235 		 *	56		15:8
4236 		 *	57		7:0
4237 		 */
4238 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4239 		for (word = 0; word < pc_data_words; word++) {
4240 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4241 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4242 				/*
4243 				 * If this is the first error log detailed
4244 				 * information about it. Otherwise just record
4245 				 * the fact that we found another error.
4246 				 */
4247 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4248 					ch_flt->parity_data.dpe.cpl_way = way;
4249 					ch_flt->parity_data.dpe.cpl_cache =
4250 					    CPU_PC_PARITY;
4251 					ch_flt->parity_data.dpe.cpl_off =
4252 					    word * sizeof (uint64_t);
4253 					bcopy(&tmp_pcp, pcp,
4254 					    sizeof (ch_pc_data_t));
4255 				}
4256 				ch_flt->parity_data.dpe.cpl_lcnt++;
4257 			}
4258 		}
4259 	}
4260 }
4261 
4262 
4263 /*
4264  * Add L1 Data cache data to the ereport payload.
4265  */
4266 static void
4267 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4268 {
4269 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4270 	ch_dc_data_t *dcp;
4271 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4272 	uint_t nelem;
4273 	int i, ways_to_check, ways_logged = 0;
4274 
4275 	/*
4276 	 * If this is an D$ fault then there may be multiple
4277 	 * ways captured in the ch_parity_log_t structure.
4278 	 * Otherwise, there will be at most one way captured
4279 	 * in the ch_diag_data_t struct.
4280 	 * Check each way to see if it should be encoded.
4281 	 */
4282 	if (ch_flt->flt_type == CPU_DC_PARITY)
4283 		ways_to_check = CH_DCACHE_NWAY;
4284 	else
4285 		ways_to_check = 1;
4286 	for (i = 0; i < ways_to_check; i++) {
4287 		if (ch_flt->flt_type == CPU_DC_PARITY)
4288 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4289 		else
4290 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4291 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4292 			bcopy(dcp, &dcdata[ways_logged],
4293 			    sizeof (ch_dc_data_t));
4294 			ways_logged++;
4295 		}
4296 	}
4297 
4298 	/*
4299 	 * Add the dcache data to the payload.
4300 	 */
4301 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4302 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4303 	if (ways_logged != 0) {
4304 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4305 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4306 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4307 	}
4308 }
4309 
4310 /*
4311  * Add L1 Instruction cache data to the ereport payload.
4312  */
4313 static void
4314 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4315 {
4316 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4317 	ch_ic_data_t *icp;
4318 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4319 	uint_t nelem;
4320 	int i, ways_to_check, ways_logged = 0;
4321 
4322 	/*
4323 	 * If this is an I$ fault then there may be multiple
4324 	 * ways captured in the ch_parity_log_t structure.
4325 	 * Otherwise, there will be at most one way captured
4326 	 * in the ch_diag_data_t struct.
4327 	 * Check each way to see if it should be encoded.
4328 	 */
4329 	if (ch_flt->flt_type == CPU_IC_PARITY)
4330 		ways_to_check = CH_ICACHE_NWAY;
4331 	else
4332 		ways_to_check = 1;
4333 	for (i = 0; i < ways_to_check; i++) {
4334 		if (ch_flt->flt_type == CPU_IC_PARITY)
4335 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4336 		else
4337 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4338 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4339 			bcopy(icp, &icdata[ways_logged],
4340 			    sizeof (ch_ic_data_t));
4341 			ways_logged++;
4342 		}
4343 	}
4344 
4345 	/*
4346 	 * Add the icache data to the payload.
4347 	 */
4348 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4349 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4350 	if (ways_logged != 0) {
4351 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4352 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4353 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4354 	}
4355 }
4356 
4357 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4358 
4359 /*
4360  * Add ecache data to payload.
4361  */
4362 static void
4363 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4364 {
4365 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4366 	ch_ec_data_t *ecp;
4367 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4368 	uint_t nelem;
4369 	int i, ways_logged = 0;
4370 
4371 	/*
4372 	 * Check each way to see if it should be encoded
4373 	 * and concatinate it into a temporary buffer.
4374 	 */
4375 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4376 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4377 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4378 			bcopy(ecp, &ecdata[ways_logged],
4379 			    sizeof (ch_ec_data_t));
4380 			ways_logged++;
4381 		}
4382 	}
4383 
4384 	/*
4385 	 * Panther CPUs have an additional level of cache and so
4386 	 * what we just collected was the L3 (ecache) and not the
4387 	 * L2 cache.
4388 	 */
4389 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4390 		/*
4391 		 * Add the L3 (ecache) data to the payload.
4392 		 */
4393 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4394 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4395 		if (ways_logged != 0) {
4396 			nelem = sizeof (ch_ec_data_t) /
4397 			    sizeof (uint64_t) * ways_logged;
4398 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4399 			    DATA_TYPE_UINT64_ARRAY, nelem,
4400 			    (uint64_t *)ecdata, NULL);
4401 		}
4402 
4403 		/*
4404 		 * Now collect the L2 cache.
4405 		 */
4406 		ways_logged = 0;
4407 		for (i = 0; i < PN_L2_NWAYS; i++) {
4408 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4409 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4410 				bcopy(ecp, &ecdata[ways_logged],
4411 				    sizeof (ch_ec_data_t));
4412 				ways_logged++;
4413 			}
4414 		}
4415 	}
4416 
4417 	/*
4418 	 * Add the L2 cache data to the payload.
4419 	 */
4420 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4421 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4422 	if (ways_logged != 0) {
4423 		nelem = sizeof (ch_ec_data_t) /
4424 		    sizeof (uint64_t) * ways_logged;
4425 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4426 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4427 	}
4428 }
4429 
4430 /*
4431  * Initialize cpu scheme for specified cpu.
4432  */
4433 static void
4434 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4435 {
4436 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4437 	uint8_t mask;
4438 
4439 	mask = cpunodes[cpuid].version;
4440 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4441 	    (u_longlong_t)cpunodes[cpuid].device_id);
4442 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4443 	    cpuid, &mask, (const char *)sbuf);
4444 }
4445 
4446 /*
4447  * Returns ereport resource type.
4448  */
4449 static int
4450 cpu_error_to_resource_type(struct async_flt *aflt)
4451 {
4452 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4453 
4454 	switch (ch_flt->flt_type) {
4455 
4456 	case CPU_CE_ECACHE:
4457 	case CPU_UE_ECACHE:
4458 	case CPU_UE_ECACHE_RETIRE:
4459 	case CPU_ORPH:
4460 		/*
4461 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4462 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4463 		 * E$ Data type, otherwise, return CPU type.
4464 		 */
4465 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4466 		    ch_flt->flt_bit))
4467 			return (ERRTYPE_ECACHE_DATA);
4468 		return (ERRTYPE_CPU);
4469 
4470 	case CPU_CE:
4471 	case CPU_UE:
4472 	case CPU_EMC:
4473 	case CPU_DUE:
4474 	case CPU_RCE:
4475 	case CPU_RUE:
4476 	case CPU_FRC:
4477 	case CPU_FRU:
4478 		return (ERRTYPE_MEMORY);
4479 
4480 	case CPU_IC_PARITY:
4481 	case CPU_DC_PARITY:
4482 	case CPU_FPUERR:
4483 	case CPU_PC_PARITY:
4484 	case CPU_ITLB_PARITY:
4485 	case CPU_DTLB_PARITY:
4486 		return (ERRTYPE_CPU);
4487 	}
4488 	return (ERRTYPE_UNKNOWN);
4489 }
4490 
4491 /*
4492  * Encode the data saved in the ch_async_flt_t struct into
4493  * the FM ereport payload.
4494  */
4495 static void
4496 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4497 	nvlist_t *resource, int *afar_status, int *synd_status)
4498 {
4499 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4500 	*synd_status = AFLT_STAT_INVALID;
4501 	*afar_status = AFLT_STAT_INVALID;
4502 
4503 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4504 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4505 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4506 	}
4507 
4508 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4509 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4510 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4511 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4512 	}
4513 
4514 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4515 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4516 		    ch_flt->flt_bit);
4517 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4518 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4519 	}
4520 
4521 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4522 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4523 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4524 	}
4525 
4526 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4527 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4528 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4529 	}
4530 
4531 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4532 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4533 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4534 	}
4535 
4536 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4537 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4538 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4539 	}
4540 
4541 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4542 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4543 		    DATA_TYPE_BOOLEAN_VALUE,
4544 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4545 	}
4546 
4547 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4548 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4549 		    DATA_TYPE_BOOLEAN_VALUE,
4550 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4551 	}
4552 
4553 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4554 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4555 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4556 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4557 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4558 	}
4559 
4560 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4561 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4562 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4563 	}
4564 
4565 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4566 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4567 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4568 	}
4569 
4570 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4571 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4572 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4573 	}
4574 
4575 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4576 		cpu_payload_add_ecache(aflt, payload);
4577 
4578 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4579 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4580 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4581 	}
4582 
4583 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4584 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4585 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4586 	}
4587 
4588 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4589 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4590 		    DATA_TYPE_UINT32_ARRAY, 16,
4591 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4592 	}
4593 
4594 #if defined(CPU_IMP_L1_CACHE_PARITY)
4595 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4596 		cpu_payload_add_dcache(aflt, payload);
4597 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4598 		cpu_payload_add_icache(aflt, payload);
4599 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4600 
4601 #if defined(CHEETAH_PLUS)
4602 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4603 		cpu_payload_add_pcache(aflt, payload);
4604 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4605 		cpu_payload_add_tlb(aflt, payload);
4606 #endif	/* CHEETAH_PLUS */
4607 	/*
4608 	 * Create the FMRI that goes into the payload
4609 	 * and contains the unum info if necessary.
4610 	 */
4611 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4612 		char unum[UNUM_NAMLEN] = "";
4613 		char sid[DIMM_SERIAL_ID_LEN] = "";
4614 		int len, ret, rtype, synd_code;
4615 		uint64_t offset = (uint64_t)-1;
4616 
4617 		rtype = cpu_error_to_resource_type(aflt);
4618 		switch (rtype) {
4619 
4620 		case ERRTYPE_MEMORY:
4621 		case ERRTYPE_ECACHE_DATA:
4622 
4623 			/*
4624 			 * Memory errors, do unum lookup
4625 			 */
4626 			if (*afar_status == AFLT_STAT_INVALID)
4627 				break;
4628 
4629 			if (rtype == ERRTYPE_ECACHE_DATA)
4630 				aflt->flt_status |= ECC_ECACHE;
4631 			else
4632 				aflt->flt_status &= ~ECC_ECACHE;
4633 
4634 			synd_code = synd_to_synd_code(*synd_status,
4635 			    aflt->flt_synd, ch_flt->flt_bit);
4636 
4637 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4638 				break;
4639 
4640 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4641 			    &len);
4642 
4643 			if (ret == 0) {
4644 				(void) cpu_get_mem_offset(aflt->flt_addr,
4645 				    &offset);
4646 			}
4647 
4648 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4649 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4650 			fm_payload_set(payload,
4651 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4652 			    DATA_TYPE_NVLIST, resource, NULL);
4653 			break;
4654 
4655 		case ERRTYPE_CPU:
4656 			/*
4657 			 * On-board processor array error, add cpu resource.
4658 			 */
4659 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4660 			fm_payload_set(payload,
4661 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4662 			    DATA_TYPE_NVLIST, resource, NULL);
4663 			break;
4664 		}
4665 	}
4666 }
4667 
4668 /*
4669  * Initialize the way info if necessary.
4670  */
4671 void
4672 cpu_ereport_init(struct async_flt *aflt)
4673 {
4674 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4675 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4676 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4677 	int i;
4678 
4679 	/*
4680 	 * Initialize the info in the CPU logout structure.
4681 	 * The I$/D$ way information is not initialized here
4682 	 * since it is captured in the logout assembly code.
4683 	 */
4684 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4685 		(ecp + i)->ec_way = i;
4686 
4687 	for (i = 0; i < PN_L2_NWAYS; i++)
4688 		(l2p + i)->ec_way = i;
4689 }
4690 
4691 /*
4692  * Returns whether fault address is valid for this error bit and
4693  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4694  */
4695 int
4696 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4697 {
4698 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4699 
4700 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4701 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4702 	    AFLT_STAT_VALID &&
4703 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4704 }
4705 
4706 /*
4707  * Returns whether fault address is valid based on the error bit for the
4708  * one event being queued and whether the address is "in memory".
4709  */
4710 static int
4711 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4712 {
4713 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4714 	int afar_status;
4715 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4716 
4717 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4718 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4719 		return (0);
4720 
4721 	afsr_errs = ch_flt->afsr_errs;
4722 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4723 
4724 	switch (afar_status) {
4725 	case AFLT_STAT_VALID:
4726 		return (1);
4727 
4728 	case AFLT_STAT_AMBIGUOUS:
4729 		/*
4730 		 * Status is ambiguous since another error bit (or bits)
4731 		 * of equal priority to the specified bit on in the afsr,
4732 		 * so check those bits. Return 1 only if the bits on in the
4733 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4734 		 * Otherwise not all the equal priority bits are for memory
4735 		 * errors, so return 0.
4736 		 */
4737 		ow_bits = afar_overwrite;
4738 		while ((afsr_ow = *ow_bits++) != 0) {
4739 			/*
4740 			 * Get other bits that are on in t_afsr_bit's priority
4741 			 * class to check for Memory Error bits only.
4742 			 */
4743 			if (afsr_ow & t_afsr_bit) {
4744 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4745 					return (0);
4746 				else
4747 					return (1);
4748 			}
4749 		}
4750 		/*FALLTHRU*/
4751 
4752 	default:
4753 		return (0);
4754 	}
4755 }
4756 
4757 static void
4758 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4759 {
4760 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4761 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4762 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4763 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4764 #if defined(CPU_IMP_ECACHE_ASSOC)
4765 	int i, nway;
4766 #endif /* CPU_IMP_ECACHE_ASSOC */
4767 
4768 	/*
4769 	 * Check if the CPU log out captured was valid.
4770 	 */
4771 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4772 	    ch_flt->flt_data_incomplete)
4773 		return;
4774 
4775 #if defined(CPU_IMP_ECACHE_ASSOC)
4776 	nway = cpu_ecache_nway();
4777 	i =  cpu_ecache_line_valid(ch_flt);
4778 	if (i == 0 || i > nway) {
4779 		for (i = 0; i < nway; i++)
4780 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4781 	} else
4782 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4783 #else /* CPU_IMP_ECACHE_ASSOC */
4784 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4785 #endif /* CPU_IMP_ECACHE_ASSOC */
4786 
4787 #if defined(CHEETAH_PLUS)
4788 	pn_cpu_log_diag_l2_info(ch_flt);
4789 #endif /* CHEETAH_PLUS */
4790 
4791 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4792 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4793 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4794 	}
4795 
4796 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4797 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4798 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4799 		else
4800 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4801 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4802 	}
4803 }
4804 
4805 /*
4806  * Cheetah ECC calculation.
4807  *
4808  * We only need to do the calculation on the data bits and can ignore check
4809  * bit and Mtag bit terms in the calculation.
4810  */
4811 static uint64_t ch_ecc_table[9][2] = {
4812 	/*
4813 	 * low order 64-bits   high-order 64-bits
4814 	 */
4815 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4816 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4817 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4818 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4819 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4820 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4821 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4822 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4823 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4824 };
4825 
4826 /*
4827  * 64-bit population count, use well-known popcnt trick.
4828  * We could use the UltraSPARC V9 POPC instruction, but some
4829  * CPUs including Cheetahplus and Jaguar do not support that
4830  * instruction.
4831  */
4832 int
4833 popc64(uint64_t val)
4834 {
4835 	int cnt;
4836 
4837 	for (cnt = 0; val != 0; val &= val - 1)
4838 		cnt++;
4839 	return (cnt);
4840 }
4841 
4842 /*
4843  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4844  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4845  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4846  * instead of doing all the xor's.
4847  */
4848 uint32_t
4849 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4850 {
4851 	int bitno, s;
4852 	int synd = 0;
4853 
4854 	for (bitno = 0; bitno < 9; bitno++) {
4855 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4856 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4857 		synd |= (s << bitno);
4858 	}
4859 	return (synd);
4860 
4861 }
4862 
4863 /*
4864  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4865  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4866  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4867  */
4868 static void
4869 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4870     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4871 {
4872 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4873 
4874 	if (reason &&
4875 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4876 		(void) strcat(reason, eccp->ec_reason);
4877 	}
4878 
4879 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4880 	ch_flt->flt_type = eccp->ec_flt_type;
4881 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4882 		ch_flt->flt_diag_data = *cdp;
4883 	else
4884 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4885 	aflt->flt_in_memory =
4886 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4887 
4888 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4889 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4890 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4891 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4892 	else
4893 		aflt->flt_synd = 0;
4894 
4895 	aflt->flt_payload = eccp->ec_err_payload;
4896 
4897 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4898 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4899 		cpu_errorq_dispatch(eccp->ec_err_class,
4900 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4901 		    aflt->flt_panic);
4902 	else
4903 		cpu_errorq_dispatch(eccp->ec_err_class,
4904 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4905 		    aflt->flt_panic);
4906 }
4907 
4908 /*
4909  * Queue events on async event queue one event per error bit.  First we
4910  * queue the events that we "expect" for the given trap, then we queue events
4911  * that we may not expect.  Return number of events queued.
4912  */
4913 int
4914 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4915     ch_cpu_logout_t *clop)
4916 {
4917 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4918 	ecc_type_to_info_t *eccp;
4919 	int nevents = 0;
4920 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4921 #if defined(CHEETAH_PLUS)
4922 	uint64_t orig_t_afsr_errs;
4923 #endif
4924 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4925 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4926 	ch_diag_data_t *cdp = NULL;
4927 
4928 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4929 
4930 #if defined(CHEETAH_PLUS)
4931 	orig_t_afsr_errs = t_afsr_errs;
4932 
4933 	/*
4934 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4935 	 */
4936 	if (clop != NULL) {
4937 		/*
4938 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4939 		 * flt_addr and flt_stat fields will be reset to the primaries
4940 		 * below, but the sdw_addr and sdw_stat will stay as the
4941 		 * secondaries.
4942 		 */
4943 		cdp = &clop->clo_sdw_data;
4944 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4945 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4946 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4947 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4948 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4949 
4950 		/*
4951 		 * If the primary and shadow AFSR differ, tag the shadow as
4952 		 * the first fault.
4953 		 */
4954 		if ((primary_afar != cdp->chd_afar) ||
4955 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4956 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4957 		}
4958 
4959 		/*
4960 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4961 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4962 		 * is expected to be zero for those CPUs which do not have
4963 		 * an AFSR_EXT register.
4964 		 */
4965 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4966 			if ((eccp->ec_afsr_bit &
4967 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4968 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4969 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4970 				cdp = NULL;
4971 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4972 				nevents++;
4973 			}
4974 		}
4975 
4976 		/*
4977 		 * If the ME bit is on in the primary AFSR turn all the
4978 		 * error bits on again that may set the ME bit to make
4979 		 * sure we see the ME AFSR error logs.
4980 		 */
4981 		if ((primary_afsr & C_AFSR_ME) != 0)
4982 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4983 	}
4984 #endif	/* CHEETAH_PLUS */
4985 
4986 	if (clop != NULL)
4987 		cdp = &clop->clo_data;
4988 
4989 	/*
4990 	 * Queue expected errors, error bit and fault type must match
4991 	 * in the ecc_type_to_info table.
4992 	 */
4993 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4994 	    eccp++) {
4995 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4996 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4997 #if defined(SERRANO)
4998 			/*
4999 			 * For FRC/FRU errors on Serrano the afar2 captures
5000 			 * the address and the associated data is
5001 			 * in the shadow logout area.
5002 			 */
5003 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5004 				if (clop != NULL)
5005 					cdp = &clop->clo_sdw_data;
5006 				aflt->flt_addr = ch_flt->afar2;
5007 			} else {
5008 				if (clop != NULL)
5009 					cdp = &clop->clo_data;
5010 				aflt->flt_addr = primary_afar;
5011 			}
5012 #else	/* SERRANO */
5013 			aflt->flt_addr = primary_afar;
5014 #endif	/* SERRANO */
5015 			aflt->flt_stat = primary_afsr;
5016 			ch_flt->afsr_ext = primary_afsr_ext;
5017 			ch_flt->afsr_errs = primary_afsr_errs;
5018 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5019 			cdp = NULL;
5020 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5021 			nevents++;
5022 		}
5023 	}
5024 
5025 	/*
5026 	 * Queue unexpected errors, error bit only match.
5027 	 */
5028 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5029 	    eccp++) {
5030 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5031 #if defined(SERRANO)
5032 			/*
5033 			 * For FRC/FRU errors on Serrano the afar2 captures
5034 			 * the address and the associated data is
5035 			 * in the shadow logout area.
5036 			 */
5037 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5038 				if (clop != NULL)
5039 					cdp = &clop->clo_sdw_data;
5040 				aflt->flt_addr = ch_flt->afar2;
5041 			} else {
5042 				if (clop != NULL)
5043 					cdp = &clop->clo_data;
5044 				aflt->flt_addr = primary_afar;
5045 			}
5046 #else	/* SERRANO */
5047 			aflt->flt_addr = primary_afar;
5048 #endif	/* SERRANO */
5049 			aflt->flt_stat = primary_afsr;
5050 			ch_flt->afsr_ext = primary_afsr_ext;
5051 			ch_flt->afsr_errs = primary_afsr_errs;
5052 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5053 			cdp = NULL;
5054 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5055 			nevents++;
5056 		}
5057 	}
5058 	return (nevents);
5059 }
5060 
5061 /*
5062  * Return trap type number.
5063  */
5064 uint8_t
5065 flt_to_trap_type(struct async_flt *aflt)
5066 {
5067 	if (aflt->flt_status & ECC_I_TRAP)
5068 		return (TRAP_TYPE_ECC_I);
5069 	if (aflt->flt_status & ECC_D_TRAP)
5070 		return (TRAP_TYPE_ECC_D);
5071 	if (aflt->flt_status & ECC_F_TRAP)
5072 		return (TRAP_TYPE_ECC_F);
5073 	if (aflt->flt_status & ECC_C_TRAP)
5074 		return (TRAP_TYPE_ECC_C);
5075 	if (aflt->flt_status & ECC_DP_TRAP)
5076 		return (TRAP_TYPE_ECC_DP);
5077 	if (aflt->flt_status & ECC_IP_TRAP)
5078 		return (TRAP_TYPE_ECC_IP);
5079 	if (aflt->flt_status & ECC_ITLB_TRAP)
5080 		return (TRAP_TYPE_ECC_ITLB);
5081 	if (aflt->flt_status & ECC_DTLB_TRAP)
5082 		return (TRAP_TYPE_ECC_DTLB);
5083 	return (TRAP_TYPE_UNKNOWN);
5084 }
5085 
5086 /*
5087  * Decide an error type based on detector and leaky/partner tests.
5088  * The following array is used for quick translation - it must
5089  * stay in sync with ce_dispact_t.
5090  */
5091 
5092 static char *cetypes[] = {
5093 	CE_DISP_DESC_U,
5094 	CE_DISP_DESC_I,
5095 	CE_DISP_DESC_PP,
5096 	CE_DISP_DESC_P,
5097 	CE_DISP_DESC_L,
5098 	CE_DISP_DESC_PS,
5099 	CE_DISP_DESC_S
5100 };
5101 
5102 char *
5103 flt_to_error_type(struct async_flt *aflt)
5104 {
5105 	ce_dispact_t dispact, disp;
5106 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5107 
5108 	/*
5109 	 * The memory payload bundle is shared by some events that do
5110 	 * not perform any classification.  For those flt_disp will be
5111 	 * 0 and we will return "unknown".
5112 	 */
5113 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5114 		return (cetypes[CE_DISP_UNKNOWN]);
5115 
5116 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5117 
5118 	/*
5119 	 * It is also possible that no scrub/classification was performed
5120 	 * by the detector, for instance where a disrupting error logged
5121 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5122 	 */
5123 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5124 		return (cetypes[CE_DISP_UNKNOWN]);
5125 
5126 	/*
5127 	 * Lookup type in initial classification/action table
5128 	 */
5129 	dispact = CE_DISPACT(ce_disp_table,
5130 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5131 	    CE_XDIAG_STATE(dtcrinfo),
5132 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5133 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5134 
5135 	/*
5136 	 * A bad lookup is not something to panic production systems for.
5137 	 */
5138 	ASSERT(dispact != CE_DISP_BAD);
5139 	if (dispact == CE_DISP_BAD)
5140 		return (cetypes[CE_DISP_UNKNOWN]);
5141 
5142 	disp = CE_DISP(dispact);
5143 
5144 	switch (disp) {
5145 	case CE_DISP_UNKNOWN:
5146 	case CE_DISP_INTERMITTENT:
5147 		break;
5148 
5149 	case CE_DISP_POSS_PERS:
5150 		/*
5151 		 * "Possible persistent" errors to which we have applied a valid
5152 		 * leaky test can be separated into "persistent" or "leaky".
5153 		 */
5154 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5155 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5156 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5157 			    CE_XDIAG_CE2SEEN(lkyinfo))
5158 				disp = CE_DISP_LEAKY;
5159 			else
5160 				disp = CE_DISP_PERS;
5161 		}
5162 		break;
5163 
5164 	case CE_DISP_POSS_STICKY:
5165 		/*
5166 		 * Promote "possible sticky" results that have been
5167 		 * confirmed by a partner test to "sticky".  Unconfirmed
5168 		 * "possible sticky" events are left at that status - we do not
5169 		 * guess at any bad reader/writer etc status here.
5170 		 */
5171 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5172 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5173 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5174 			disp = CE_DISP_STICKY;
5175 
5176 		/*
5177 		 * Promote "possible sticky" results on a uniprocessor
5178 		 * to "sticky"
5179 		 */
5180 		if (disp == CE_DISP_POSS_STICKY &&
5181 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5182 			disp = CE_DISP_STICKY;
5183 		break;
5184 
5185 	default:
5186 		disp = CE_DISP_UNKNOWN;
5187 		break;
5188 	}
5189 
5190 	return (cetypes[disp]);
5191 }
5192 
5193 /*
5194  * Given the entire afsr, the specific bit to check and a prioritized list of
5195  * error bits, determine the validity of the various overwrite priority
5196  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5197  * different overwrite priorities.
5198  *
5199  * Given a specific afsr error bit and the entire afsr, there are three cases:
5200  *   INVALID:	The specified bit is lower overwrite priority than some other
5201  *		error bit which is on in the afsr (or IVU/IVC).
5202  *   VALID:	The specified bit is higher priority than all other error bits
5203  *		which are on in the afsr.
5204  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5205  *		bit is on in the afsr.
5206  */
5207 int
5208 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5209 {
5210 	uint64_t afsr_ow;
5211 
5212 	while ((afsr_ow = *ow_bits++) != 0) {
5213 		/*
5214 		 * If bit is in the priority class, check to see if another
5215 		 * bit in the same class is on => ambiguous.  Otherwise,
5216 		 * the value is valid.  If the bit is not on at this priority
5217 		 * class, but a higher priority bit is on, then the value is
5218 		 * invalid.
5219 		 */
5220 		if (afsr_ow & afsr_bit) {
5221 			/*
5222 			 * If equal pri bit is on, ambiguous.
5223 			 */
5224 			if (afsr & (afsr_ow & ~afsr_bit))
5225 				return (AFLT_STAT_AMBIGUOUS);
5226 			return (AFLT_STAT_VALID);
5227 		} else if (afsr & afsr_ow)
5228 			break;
5229 	}
5230 
5231 	/*
5232 	 * We didn't find a match or a higher priority bit was on.  Not
5233 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5234 	 */
5235 	return (AFLT_STAT_INVALID);
5236 }
5237 
5238 static int
5239 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5240 {
5241 #if defined(SERRANO)
5242 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5243 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5244 	else
5245 #endif	/* SERRANO */
5246 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5247 }
5248 
5249 static int
5250 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5251 {
5252 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5253 }
5254 
5255 static int
5256 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5257 {
5258 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5259 }
5260 
5261 static int
5262 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5263 {
5264 #ifdef lint
5265 	cpuid = cpuid;
5266 #endif
5267 #if defined(CHEETAH_PLUS)
5268 	/*
5269 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5270 	 * policy for Cheetah+ and separate for Panther CPUs.
5271 	 */
5272 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5273 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5274 			return (afsr_to_msynd_status(afsr, afsr_bit));
5275 		else
5276 			return (afsr_to_esynd_status(afsr, afsr_bit));
5277 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5278 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5279 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5280 		else
5281 			return (afsr_to_esynd_status(afsr, afsr_bit));
5282 #else /* CHEETAH_PLUS */
5283 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5284 		return (afsr_to_msynd_status(afsr, afsr_bit));
5285 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5286 		return (afsr_to_esynd_status(afsr, afsr_bit));
5287 #endif /* CHEETAH_PLUS */
5288 	} else {
5289 		return (AFLT_STAT_INVALID);
5290 	}
5291 }
5292 
5293 /*
5294  * Slave CPU stick synchronization.
5295  */
5296 void
5297 sticksync_slave(void)
5298 {
5299 	int 		i;
5300 	int		tries = 0;
5301 	int64_t		tskew;
5302 	int64_t		av_tskew;
5303 
5304 	kpreempt_disable();
5305 	/* wait for the master side */
5306 	while (stick_sync_cmd != SLAVE_START)
5307 		;
5308 	/*
5309 	 * Synchronization should only take a few tries at most. But in the
5310 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5311 	 * without it's stick synchronized wouldn't be a good citizen.
5312 	 */
5313 	while (slave_done == 0) {
5314 		/*
5315 		 * Time skew calculation.
5316 		 */
5317 		av_tskew = tskew = 0;
5318 
5319 		for (i = 0; i < stick_iter; i++) {
5320 			/* make location hot */
5321 			timestamp[EV_A_START] = 0;
5322 			stick_timestamp(&timestamp[EV_A_START]);
5323 
5324 			/* tell the master we're ready */
5325 			stick_sync_cmd = MASTER_START;
5326 
5327 			/* and wait */
5328 			while (stick_sync_cmd != SLAVE_CONT)
5329 				;
5330 			/* Event B end */
5331 			stick_timestamp(&timestamp[EV_B_END]);
5332 
5333 			/* calculate time skew */
5334 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5335 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5336 			    / 2;
5337 
5338 			/* keep running count */
5339 			av_tskew += tskew;
5340 		} /* for */
5341 
5342 		/*
5343 		 * Adjust stick for time skew if not within the max allowed;
5344 		 * otherwise we're all done.
5345 		 */
5346 		if (stick_iter != 0)
5347 			av_tskew = av_tskew/stick_iter;
5348 		if (ABS(av_tskew) > stick_tsk) {
5349 			/*
5350 			 * If the skew is 1 (the slave's STICK register
5351 			 * is 1 STICK ahead of the master's), stick_adj
5352 			 * could fail to adjust the slave's STICK register
5353 			 * if the STICK read on the slave happens to
5354 			 * align with the increment of the STICK.
5355 			 * Therefore, we increment the skew to 2.
5356 			 */
5357 			if (av_tskew == 1)
5358 				av_tskew++;
5359 			stick_adj(-av_tskew);
5360 		} else
5361 			slave_done = 1;
5362 #ifdef DEBUG
5363 		if (tries < DSYNC_ATTEMPTS)
5364 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5365 			    av_tskew;
5366 		++tries;
5367 #endif /* DEBUG */
5368 #ifdef lint
5369 		tries = tries;
5370 #endif
5371 
5372 	} /* while */
5373 
5374 	/* allow the master to finish */
5375 	stick_sync_cmd = EVENT_NULL;
5376 	kpreempt_enable();
5377 }
5378 
5379 /*
5380  * Master CPU side of stick synchronization.
5381  *  - timestamp end of Event A
5382  *  - timestamp beginning of Event B
5383  */
5384 void
5385 sticksync_master(void)
5386 {
5387 	int		i;
5388 
5389 	kpreempt_disable();
5390 	/* tell the slave we've started */
5391 	slave_done = 0;
5392 	stick_sync_cmd = SLAVE_START;
5393 
5394 	while (slave_done == 0) {
5395 		for (i = 0; i < stick_iter; i++) {
5396 			/* wait for the slave */
5397 			while (stick_sync_cmd != MASTER_START)
5398 				;
5399 			/* Event A end */
5400 			stick_timestamp(&timestamp[EV_A_END]);
5401 
5402 			/* make location hot */
5403 			timestamp[EV_B_START] = 0;
5404 			stick_timestamp(&timestamp[EV_B_START]);
5405 
5406 			/* tell the slave to continue */
5407 			stick_sync_cmd = SLAVE_CONT;
5408 		} /* for */
5409 
5410 		/* wait while slave calculates time skew */
5411 		while (stick_sync_cmd == SLAVE_CONT)
5412 			;
5413 	} /* while */
5414 	kpreempt_enable();
5415 }
5416 
5417 /*
5418  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5419  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5420  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5421  * panic idle.
5422  */
5423 /*ARGSUSED*/
5424 void
5425 cpu_check_allcpus(struct async_flt *aflt)
5426 {}
5427 
5428 struct kmem_cache *ch_private_cache;
5429 
5430 /*
5431  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5432  * deallocate the scrubber data structures and cpu_private data structure.
5433  */
5434 void
5435 cpu_uninit_private(struct cpu *cp)
5436 {
5437 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5438 
5439 	ASSERT(chprp);
5440 	cpu_uninit_ecache_scrub_dr(cp);
5441 	CPU_PRIVATE(cp) = NULL;
5442 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5443 	kmem_cache_free(ch_private_cache, chprp);
5444 	cmp_delete_cpu(cp->cpu_id);
5445 
5446 }
5447 
5448 /*
5449  * Cheetah Cache Scrubbing
5450  *
5451  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5452  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5453  * protected by either parity or ECC.
5454  *
5455  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5456  * cache per second). Due to the the specifics of how the I$ control
5457  * logic works with respect to the ASI used to scrub I$ lines, the entire
5458  * I$ is scanned at once.
5459  */
5460 
5461 /*
5462  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5463  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5464  * on a running system.
5465  */
5466 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5467 
5468 /*
5469  * The following are the PIL levels that the softints/cross traps will fire at.
5470  */
5471 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5472 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5473 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5474 
5475 #if defined(JALAPENO)
5476 
5477 /*
5478  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5479  * on Jalapeno.
5480  */
5481 int ecache_scrub_enable = 0;
5482 
5483 #else	/* JALAPENO */
5484 
5485 /*
5486  * With all other cpu types, E$ scrubbing is on by default
5487  */
5488 int ecache_scrub_enable = 1;
5489 
5490 #endif	/* JALAPENO */
5491 
5492 
5493 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5494 
5495 /*
5496  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5497  * is disabled by default on non-Cheetah systems
5498  */
5499 int icache_scrub_enable = 0;
5500 
5501 /*
5502  * Tuneables specifying the scrub calls per second and the scan rate
5503  * for each cache
5504  *
5505  * The cyclic times are set during boot based on the following values.
5506  * Changing these values in mdb after this time will have no effect.  If
5507  * a different value is desired, it must be set in /etc/system before a
5508  * reboot.
5509  */
5510 int ecache_calls_a_sec = 1;
5511 int dcache_calls_a_sec = 2;
5512 int icache_calls_a_sec = 2;
5513 
5514 int ecache_scan_rate_idle = 1;
5515 int ecache_scan_rate_busy = 1;
5516 int dcache_scan_rate_idle = 1;
5517 int dcache_scan_rate_busy = 1;
5518 int icache_scan_rate_idle = 1;
5519 int icache_scan_rate_busy = 1;
5520 
5521 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5522 
5523 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5524 
5525 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5526 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5527 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5528 
5529 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5530 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5531 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5532 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5533 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5534 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5535 
5536 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5537 
5538 /*
5539  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5540  * increment the outstanding request counter and schedule a softint to run
5541  * the scrubber.
5542  */
5543 extern xcfunc_t cache_scrubreq_tl1;
5544 
5545 /*
5546  * These are the softint functions for each cache scrubber
5547  */
5548 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5549 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5550 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5551 
5552 /*
5553  * The cache scrub info table contains cache specific information
5554  * and allows for some of the scrub code to be table driven, reducing
5555  * duplication of cache similar code.
5556  *
5557  * This table keeps a copy of the value in the calls per second variable
5558  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5559  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5560  * mdb in a misguided attempt to disable the scrubber).
5561  */
5562 struct scrub_info {
5563 	int		*csi_enable;	/* scrubber enable flag */
5564 	int		csi_freq;	/* scrubber calls per second */
5565 	int		csi_index;	/* index to chsm_outstanding[] */
5566 	uint64_t	csi_inum;	/* scrubber interrupt number */
5567 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5568 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5569 	char		csi_name[3];	/* cache name for this scrub entry */
5570 } cache_scrub_info[] = {
5571 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5572 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5573 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5574 };
5575 
5576 /*
5577  * If scrubbing is enabled, increment the outstanding request counter.  If it
5578  * is 1 (meaning there were no previous requests outstanding), call
5579  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5580  * a self trap.
5581  */
5582 static void
5583 do_scrub(struct scrub_info *csi)
5584 {
5585 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5586 	int index = csi->csi_index;
5587 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5588 
5589 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5590 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5591 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5592 			    csi->csi_inum, 0);
5593 		}
5594 	}
5595 }
5596 
5597 /*
5598  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5599  * cross-trap the offline cpus.
5600  */
5601 static void
5602 do_scrub_offline(struct scrub_info *csi)
5603 {
5604 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5605 
5606 	if (CPUSET_ISNULL(cpu_offline_set)) {
5607 		/*
5608 		 * No offline cpus - nothing to do
5609 		 */
5610 		return;
5611 	}
5612 
5613 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5614 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5615 		    csi->csi_index);
5616 	}
5617 }
5618 
5619 /*
5620  * This is the initial setup for the scrubber cyclics - it sets the
5621  * interrupt level, frequency, and function to call.
5622  */
5623 /*ARGSUSED*/
5624 static void
5625 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5626     cyc_time_t *when)
5627 {
5628 	struct scrub_info *csi = (struct scrub_info *)arg;
5629 
5630 	ASSERT(csi != NULL);
5631 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5632 	hdlr->cyh_level = CY_LOW_LEVEL;
5633 	hdlr->cyh_arg = arg;
5634 
5635 	when->cyt_when = 0;	/* Start immediately */
5636 	when->cyt_interval = NANOSEC / csi->csi_freq;
5637 }
5638 
5639 /*
5640  * Initialization for cache scrubbing.
5641  * This routine is called AFTER all cpus have had cpu_init_private called
5642  * to initialize their private data areas.
5643  */
5644 void
5645 cpu_init_cache_scrub(void)
5646 {
5647 	int i;
5648 	struct scrub_info *csi;
5649 	cyc_omni_handler_t omni_hdlr;
5650 	cyc_handler_t offline_hdlr;
5651 	cyc_time_t when;
5652 
5653 	/*
5654 	 * save away the maximum number of lines for the D$
5655 	 */
5656 	dcache_nlines = dcache_size / dcache_linesize;
5657 
5658 	/*
5659 	 * register the softints for the cache scrubbing
5660 	 */
5661 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5662 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5663 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5664 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5665 
5666 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5667 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5668 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5669 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5670 
5671 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5672 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5673 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5674 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5675 
5676 	/*
5677 	 * start the scrubbing for all the caches
5678 	 */
5679 	mutex_enter(&cpu_lock);
5680 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5681 
5682 		csi = &cache_scrub_info[i];
5683 
5684 		if (!(*csi->csi_enable))
5685 			continue;
5686 
5687 		/*
5688 		 * force the following to be true:
5689 		 *	1 <= calls_a_sec <= hz
5690 		 */
5691 		if (csi->csi_freq > hz) {
5692 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5693 			    "(%d); resetting to hz (%d)", csi->csi_name,
5694 			    csi->csi_freq, hz);
5695 			csi->csi_freq = hz;
5696 		} else if (csi->csi_freq < 1) {
5697 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5698 			    "(%d); resetting to 1", csi->csi_name,
5699 			    csi->csi_freq);
5700 			csi->csi_freq = 1;
5701 		}
5702 
5703 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5704 		omni_hdlr.cyo_offline = NULL;
5705 		omni_hdlr.cyo_arg = (void *)csi;
5706 
5707 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5708 		offline_hdlr.cyh_arg = (void *)csi;
5709 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5710 
5711 		when.cyt_when = 0;	/* Start immediately */
5712 		when.cyt_interval = NANOSEC / csi->csi_freq;
5713 
5714 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5715 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5716 	}
5717 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5718 	mutex_exit(&cpu_lock);
5719 }
5720 
5721 /*
5722  * Indicate that the specified cpu is idle.
5723  */
5724 void
5725 cpu_idle_ecache_scrub(struct cpu *cp)
5726 {
5727 	if (CPU_PRIVATE(cp) != NULL) {
5728 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5729 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5730 	}
5731 }
5732 
5733 /*
5734  * Indicate that the specified cpu is busy.
5735  */
5736 void
5737 cpu_busy_ecache_scrub(struct cpu *cp)
5738 {
5739 	if (CPU_PRIVATE(cp) != NULL) {
5740 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5741 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5742 	}
5743 }
5744 
5745 /*
5746  * Initialization for cache scrubbing for the specified cpu.
5747  */
5748 void
5749 cpu_init_ecache_scrub_dr(struct cpu *cp)
5750 {
5751 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5752 	int cpuid = cp->cpu_id;
5753 
5754 	/* initialize the number of lines in the caches */
5755 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5756 	    cpunodes[cpuid].ecache_linesize;
5757 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5758 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5759 
5760 	/*
5761 	 * do_scrub() and do_scrub_offline() check both the global
5762 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5763 	 * check this value before scrubbing.  Currently, we use it to
5764 	 * disable the E$ scrubber on multi-core cpus or while running at
5765 	 * slowed speed.  For now, just turn everything on and allow
5766 	 * cpu_init_private() to change it if necessary.
5767 	 */
5768 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5769 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5770 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5771 
5772 	cpu_busy_ecache_scrub(cp);
5773 }
5774 
5775 /*
5776  * Un-initialization for cache scrubbing for the specified cpu.
5777  */
5778 static void
5779 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5780 {
5781 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5782 
5783 	/*
5784 	 * un-initialize bookkeeping for cache scrubbing
5785 	 */
5786 	bzero(csmp, sizeof (ch_scrub_misc_t));
5787 
5788 	cpu_idle_ecache_scrub(cp);
5789 }
5790 
5791 /*
5792  * Called periodically on each CPU to scrub the D$.
5793  */
5794 static void
5795 scrub_dcache(int how_many)
5796 {
5797 	int i;
5798 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5799 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5800 
5801 	/*
5802 	 * scrub the desired number of lines
5803 	 */
5804 	for (i = 0; i < how_many; i++) {
5805 		/*
5806 		 * scrub a D$ line
5807 		 */
5808 		dcache_inval_line(index);
5809 
5810 		/*
5811 		 * calculate the next D$ line to scrub, assumes
5812 		 * that dcache_nlines is a power of 2
5813 		 */
5814 		index = (index + 1) & (dcache_nlines - 1);
5815 	}
5816 
5817 	/*
5818 	 * set the scrub index for the next visit
5819 	 */
5820 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5821 }
5822 
5823 /*
5824  * Handler for D$ scrub inum softint. Call scrub_dcache until
5825  * we decrement the outstanding request count to zero.
5826  */
5827 /*ARGSUSED*/
5828 static uint_t
5829 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5830 {
5831 	int i;
5832 	int how_many;
5833 	int outstanding;
5834 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5835 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5836 	struct scrub_info *csi = (struct scrub_info *)arg1;
5837 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5838 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5839 
5840 	/*
5841 	 * The scan rates are expressed in units of tenths of a
5842 	 * percent.  A scan rate of 1000 (100%) means the whole
5843 	 * cache is scanned every second.
5844 	 */
5845 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5846 
5847 	do {
5848 		outstanding = *countp;
5849 		for (i = 0; i < outstanding; i++) {
5850 			scrub_dcache(how_many);
5851 		}
5852 	} while (atomic_add_32_nv(countp, -outstanding));
5853 
5854 	return (DDI_INTR_CLAIMED);
5855 }
5856 
5857 /*
5858  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5859  * by invalidating lines. Due to the characteristics of the ASI which
5860  * is used to invalidate an I$ line, the entire I$ must be invalidated
5861  * vs. an individual I$ line.
5862  */
5863 static void
5864 scrub_icache(int how_many)
5865 {
5866 	int i;
5867 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5868 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5869 	int icache_nlines = csmp->chsm_icache_nlines;
5870 
5871 	/*
5872 	 * scrub the desired number of lines
5873 	 */
5874 	for (i = 0; i < how_many; i++) {
5875 		/*
5876 		 * since the entire I$ must be scrubbed at once,
5877 		 * wait until the index wraps to zero to invalidate
5878 		 * the entire I$
5879 		 */
5880 		if (index == 0) {
5881 			icache_inval_all();
5882 		}
5883 
5884 		/*
5885 		 * calculate the next I$ line to scrub, assumes
5886 		 * that chsm_icache_nlines is a power of 2
5887 		 */
5888 		index = (index + 1) & (icache_nlines - 1);
5889 	}
5890 
5891 	/*
5892 	 * set the scrub index for the next visit
5893 	 */
5894 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5895 }
5896 
5897 /*
5898  * Handler for I$ scrub inum softint. Call scrub_icache until
5899  * we decrement the outstanding request count to zero.
5900  */
5901 /*ARGSUSED*/
5902 static uint_t
5903 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5904 {
5905 	int i;
5906 	int how_many;
5907 	int outstanding;
5908 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5909 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5910 	struct scrub_info *csi = (struct scrub_info *)arg1;
5911 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5912 	    icache_scan_rate_idle : icache_scan_rate_busy;
5913 	int icache_nlines = csmp->chsm_icache_nlines;
5914 
5915 	/*
5916 	 * The scan rates are expressed in units of tenths of a
5917 	 * percent.  A scan rate of 1000 (100%) means the whole
5918 	 * cache is scanned every second.
5919 	 */
5920 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5921 
5922 	do {
5923 		outstanding = *countp;
5924 		for (i = 0; i < outstanding; i++) {
5925 			scrub_icache(how_many);
5926 		}
5927 	} while (atomic_add_32_nv(countp, -outstanding));
5928 
5929 	return (DDI_INTR_CLAIMED);
5930 }
5931 
5932 /*
5933  * Called periodically on each CPU to scrub the E$.
5934  */
5935 static void
5936 scrub_ecache(int how_many)
5937 {
5938 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5939 	int i;
5940 	int cpuid = CPU->cpu_id;
5941 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5942 	int nlines = csmp->chsm_ecache_nlines;
5943 	int linesize = cpunodes[cpuid].ecache_linesize;
5944 	int ec_set_size = cpu_ecache_set_size(CPU);
5945 
5946 	/*
5947 	 * scrub the desired number of lines
5948 	 */
5949 	for (i = 0; i < how_many; i++) {
5950 		/*
5951 		 * scrub the E$ line
5952 		 */
5953 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5954 		    ec_set_size);
5955 
5956 		/*
5957 		 * calculate the next E$ line to scrub based on twice
5958 		 * the number of E$ lines (to displace lines containing
5959 		 * flush area data), assumes that the number of lines
5960 		 * is a power of 2
5961 		 */
5962 		index = (index + 1) & ((nlines << 1) - 1);
5963 	}
5964 
5965 	/*
5966 	 * set the ecache scrub index for the next visit
5967 	 */
5968 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5969 }
5970 
5971 /*
5972  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5973  * we decrement the outstanding request count to zero.
5974  *
5975  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5976  * become negative after the atomic_add_32_nv().  This is not a problem, as
5977  * the next trip around the loop won't scrub anything, and the next add will
5978  * reset the count back to zero.
5979  */
5980 /*ARGSUSED*/
5981 static uint_t
5982 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5983 {
5984 	int i;
5985 	int how_many;
5986 	int outstanding;
5987 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5988 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5989 	struct scrub_info *csi = (struct scrub_info *)arg1;
5990 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5991 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
5992 	int ecache_nlines = csmp->chsm_ecache_nlines;
5993 
5994 	/*
5995 	 * The scan rates are expressed in units of tenths of a
5996 	 * percent.  A scan rate of 1000 (100%) means the whole
5997 	 * cache is scanned every second.
5998 	 */
5999 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
6000 
6001 	do {
6002 		outstanding = *countp;
6003 		for (i = 0; i < outstanding; i++) {
6004 			scrub_ecache(how_many);
6005 		}
6006 	} while (atomic_add_32_nv(countp, -outstanding));
6007 
6008 	return (DDI_INTR_CLAIMED);
6009 }
6010 
6011 /*
6012  * Timeout function to reenable CE
6013  */
6014 static void
6015 cpu_delayed_check_ce_errors(void *arg)
6016 {
6017 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6018 	    TQ_NOSLEEP)) {
6019 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6020 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6021 	}
6022 }
6023 
6024 /*
6025  * CE Deferred Re-enable after trap.
6026  *
6027  * When the CPU gets a disrupting trap for any of the errors
6028  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6029  * immediately. To eliminate the possibility of multiple CEs causing
6030  * recursive stack overflow in the trap handler, we cannot
6031  * reenable CEEN while still running in the trap handler. Instead,
6032  * after a CE is logged on a CPU, we schedule a timeout function,
6033  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6034  * seconds. This function will check whether any further CEs
6035  * have occurred on that CPU, and if none have, will reenable CEEN.
6036  *
6037  * If further CEs have occurred while CEEN is disabled, another
6038  * timeout will be scheduled. This is to ensure that the CPU can
6039  * make progress in the face of CE 'storms', and that it does not
6040  * spend all its time logging CE errors.
6041  */
6042 static void
6043 cpu_check_ce_errors(void *arg)
6044 {
6045 	int	cpuid = (int)(uintptr_t)arg;
6046 	cpu_t	*cp;
6047 
6048 	/*
6049 	 * We acquire cpu_lock.
6050 	 */
6051 	ASSERT(curthread->t_pil == 0);
6052 
6053 	/*
6054 	 * verify that the cpu is still around, DR
6055 	 * could have got there first ...
6056 	 */
6057 	mutex_enter(&cpu_lock);
6058 	cp = cpu_get(cpuid);
6059 	if (cp == NULL) {
6060 		mutex_exit(&cpu_lock);
6061 		return;
6062 	}
6063 	/*
6064 	 * make sure we don't migrate across CPUs
6065 	 * while checking our CE status.
6066 	 */
6067 	kpreempt_disable();
6068 
6069 	/*
6070 	 * If we are running on the CPU that got the
6071 	 * CE, we can do the checks directly.
6072 	 */
6073 	if (cp->cpu_id == CPU->cpu_id) {
6074 		mutex_exit(&cpu_lock);
6075 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6076 		kpreempt_enable();
6077 		return;
6078 	}
6079 	kpreempt_enable();
6080 
6081 	/*
6082 	 * send an x-call to get the CPU that originally
6083 	 * got the CE to do the necessary checks. If we can't
6084 	 * send the x-call, reschedule the timeout, otherwise we
6085 	 * lose CEEN forever on that CPU.
6086 	 */
6087 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6088 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6089 		    TIMEOUT_CEEN_CHECK, 0);
6090 		mutex_exit(&cpu_lock);
6091 	} else {
6092 		/*
6093 		 * When the CPU is not accepting xcalls, or
6094 		 * the processor is offlined, we don't want to
6095 		 * incur the extra overhead of trying to schedule the
6096 		 * CE timeout indefinitely. However, we don't want to lose
6097 		 * CE checking forever.
6098 		 *
6099 		 * Keep rescheduling the timeout, accepting the additional
6100 		 * overhead as the cost of correctness in the case where we get
6101 		 * a CE, disable CEEN, offline the CPU during the
6102 		 * the timeout interval, and then online it at some
6103 		 * point in the future. This is unlikely given the short
6104 		 * cpu_ceen_delay_secs.
6105 		 */
6106 		mutex_exit(&cpu_lock);
6107 		(void) timeout(cpu_delayed_check_ce_errors,
6108 		    (void *)(uintptr_t)cp->cpu_id,
6109 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6110 	}
6111 }
6112 
6113 /*
6114  * This routine will check whether CEs have occurred while
6115  * CEEN is disabled. Any CEs detected will be logged and, if
6116  * possible, scrubbed.
6117  *
6118  * The memscrubber will also use this routine to clear any errors
6119  * caused by its scrubbing with CEEN disabled.
6120  *
6121  * flag == SCRUBBER_CEEN_CHECK
6122  *		called from memscrubber, just check/scrub, no reset
6123  *		paddr 	physical addr. for start of scrub pages
6124  *		vaddr 	virtual addr. for scrub area
6125  *		psz	page size of area to be scrubbed
6126  *
6127  * flag == TIMEOUT_CEEN_CHECK
6128  *		timeout function has triggered, reset timeout or CEEN
6129  *
6130  * Note: We must not migrate cpus during this function.  This can be
6131  * achieved by one of:
6132  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6133  *	The flag value must be first xcall argument.
6134  *    - disabling kernel preemption.  This should be done for very short
6135  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6136  *	scrub an extended area with cpu_check_block.  The call for
6137  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6138  *	brief for this case.
6139  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6140  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6141  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6142  */
6143 void
6144 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6145 {
6146 	ch_cpu_errors_t	cpu_error_regs;
6147 	uint64_t	ec_err_enable;
6148 	uint64_t	page_offset;
6149 
6150 	/* Read AFSR */
6151 	get_cpu_error_state(&cpu_error_regs);
6152 
6153 	/*
6154 	 * If no CEEN errors have occurred during the timeout
6155 	 * interval, it is safe to re-enable CEEN and exit.
6156 	 */
6157 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6158 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6159 		if (flag == TIMEOUT_CEEN_CHECK &&
6160 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6161 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6162 		return;
6163 	}
6164 
6165 	/*
6166 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6167 	 * we log/clear the error.
6168 	 */
6169 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6170 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6171 
6172 	/*
6173 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6174 	 * timeout will be rescheduled when the error is logged.
6175 	 */
6176 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6177 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6178 		cpu_ce_detected(&cpu_error_regs,
6179 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6180 	else
6181 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6182 
6183 	/*
6184 	 * If the memory scrubber runs while CEEN is
6185 	 * disabled, (or if CEEN is disabled during the
6186 	 * scrub as a result of a CE being triggered by
6187 	 * it), the range being scrubbed will not be
6188 	 * completely cleaned. If there are multiple CEs
6189 	 * in the range at most two of these will be dealt
6190 	 * with, (one by the trap handler and one by the
6191 	 * timeout). It is also possible that none are dealt
6192 	 * with, (CEEN disabled and another CE occurs before
6193 	 * the timeout triggers). So to ensure that the
6194 	 * memory is actually scrubbed, we have to access each
6195 	 * memory location in the range and then check whether
6196 	 * that access causes a CE.
6197 	 */
6198 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6199 		if ((cpu_error_regs.afar >= pa) &&
6200 		    (cpu_error_regs.afar < (pa + psz))) {
6201 			/*
6202 			 * Force a load from physical memory for each
6203 			 * 64-byte block, then check AFSR to determine
6204 			 * whether this access caused an error.
6205 			 *
6206 			 * This is a slow way to do a scrub, but as it will
6207 			 * only be invoked when the memory scrubber actually
6208 			 * triggered a CE, it should not happen too
6209 			 * frequently.
6210 			 *
6211 			 * cut down what we need to check as the scrubber
6212 			 * has verified up to AFAR, so get it's offset
6213 			 * into the page and start there.
6214 			 */
6215 			page_offset = (uint64_t)(cpu_error_regs.afar &
6216 			    (psz - 1));
6217 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6218 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6219 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6220 			    psz);
6221 		}
6222 	}
6223 
6224 	/*
6225 	 * Reset error enable if this CE is not masked.
6226 	 */
6227 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6228 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6229 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6230 
6231 }
6232 
6233 /*
6234  * Attempt a cpu logout for an error that we did not trap for, such
6235  * as a CE noticed with CEEN off.  It is assumed that we are still running
6236  * on the cpu that took the error and that we cannot migrate.  Returns
6237  * 0 on success, otherwise nonzero.
6238  */
6239 static int
6240 cpu_ce_delayed_ec_logout(uint64_t afar)
6241 {
6242 	ch_cpu_logout_t *clop;
6243 
6244 	if (CPU_PRIVATE(CPU) == NULL)
6245 		return (0);
6246 
6247 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6248 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6249 	    LOGOUT_INVALID)
6250 		return (0);
6251 
6252 	cpu_delayed_logout(afar, clop);
6253 	return (1);
6254 }
6255 
6256 /*
6257  * We got an error while CEEN was disabled. We
6258  * need to clean up after it and log whatever
6259  * information we have on the CE.
6260  */
6261 void
6262 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6263 {
6264 	ch_async_flt_t 	ch_flt;
6265 	struct async_flt *aflt;
6266 	char 		pr_reason[MAX_REASON_STRING];
6267 
6268 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6269 	ch_flt.flt_trapped_ce = flag;
6270 	aflt = (struct async_flt *)&ch_flt;
6271 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6272 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6273 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6274 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6275 	aflt->flt_addr = cpu_error_regs->afar;
6276 #if defined(SERRANO)
6277 	ch_flt.afar2 = cpu_error_regs->afar2;
6278 #endif	/* SERRANO */
6279 	aflt->flt_pc = NULL;
6280 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6281 	aflt->flt_tl = 0;
6282 	aflt->flt_panic = 0;
6283 	cpu_log_and_clear_ce(&ch_flt);
6284 
6285 	/*
6286 	 * check if we caused any errors during cleanup
6287 	 */
6288 	if (clear_errors(&ch_flt)) {
6289 		pr_reason[0] = '\0';
6290 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6291 		    NULL);
6292 	}
6293 }
6294 
6295 /*
6296  * Log/clear CEEN-controlled disrupting errors
6297  */
6298 static void
6299 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6300 {
6301 	struct async_flt *aflt;
6302 	uint64_t afsr, afsr_errs;
6303 	ch_cpu_logout_t *clop;
6304 	char 		pr_reason[MAX_REASON_STRING];
6305 	on_trap_data_t	*otp = curthread->t_ontrap;
6306 
6307 	aflt = (struct async_flt *)ch_flt;
6308 	afsr = aflt->flt_stat;
6309 	afsr_errs = ch_flt->afsr_errs;
6310 	aflt->flt_id = gethrtime_waitfree();
6311 	aflt->flt_bus_id = getprocessorid();
6312 	aflt->flt_inst = CPU->cpu_id;
6313 	aflt->flt_prot = AFLT_PROT_NONE;
6314 	aflt->flt_class = CPU_FAULT;
6315 	aflt->flt_status = ECC_C_TRAP;
6316 
6317 	pr_reason[0] = '\0';
6318 	/*
6319 	 * Get the CPU log out info for Disrupting Trap.
6320 	 */
6321 	if (CPU_PRIVATE(CPU) == NULL) {
6322 		clop = NULL;
6323 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6324 	} else {
6325 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6326 	}
6327 
6328 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6329 		ch_cpu_errors_t cpu_error_regs;
6330 
6331 		get_cpu_error_state(&cpu_error_regs);
6332 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6333 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6334 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6335 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6336 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6337 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6338 		clop->clo_sdw_data.chd_afsr_ext =
6339 		    cpu_error_regs.shadow_afsr_ext;
6340 #if defined(SERRANO)
6341 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6342 #endif	/* SERRANO */
6343 		ch_flt->flt_data_incomplete = 1;
6344 
6345 		/*
6346 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6347 		 * The trap handler does it for CEEN enabled errors
6348 		 * so we need to do it here.
6349 		 */
6350 		set_cpu_error_state(&cpu_error_regs);
6351 	}
6352 
6353 #if defined(JALAPENO) || defined(SERRANO)
6354 	/*
6355 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6356 	 * For Serrano, even thou we do have the AFAR, we still do the
6357 	 * scrub on the RCE side since that's where the error type can
6358 	 * be properly classified as intermittent, persistent, etc.
6359 	 *
6360 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6361 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6362 	 * the flt_status bits.
6363 	 */
6364 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6365 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6366 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6367 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6368 	}
6369 #else /* JALAPENO || SERRANO */
6370 	/*
6371 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6372 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6373 	 * the flt_status bits.
6374 	 */
6375 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6376 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6377 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6378 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6379 		}
6380 	}
6381 
6382 #endif /* JALAPENO || SERRANO */
6383 
6384 	/*
6385 	 * Update flt_prot if this error occurred under on_trap protection.
6386 	 */
6387 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6388 		aflt->flt_prot = AFLT_PROT_EC;
6389 
6390 	/*
6391 	 * Queue events on the async event queue, one event per error bit.
6392 	 */
6393 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6394 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6395 		ch_flt->flt_type = CPU_INV_AFSR;
6396 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6397 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6398 		    aflt->flt_panic);
6399 	}
6400 
6401 	/*
6402 	 * Zero out + invalidate CPU logout.
6403 	 */
6404 	if (clop) {
6405 		bzero(clop, sizeof (ch_cpu_logout_t));
6406 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6407 	}
6408 
6409 	/*
6410 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6411 	 * was disabled, we need to flush either the entire
6412 	 * E$ or an E$ line.
6413 	 */
6414 #if defined(JALAPENO) || defined(SERRANO)
6415 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6416 #else	/* JALAPENO || SERRANO */
6417 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6418 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6419 #endif	/* JALAPENO || SERRANO */
6420 		cpu_error_ecache_flush(ch_flt);
6421 
6422 }
6423 
6424 /*
6425  * depending on the error type, we determine whether we
6426  * need to flush the entire ecache or just a line.
6427  */
6428 static int
6429 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6430 {
6431 	struct async_flt *aflt;
6432 	uint64_t	afsr;
6433 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6434 
6435 	aflt = (struct async_flt *)ch_flt;
6436 	afsr = aflt->flt_stat;
6437 
6438 	/*
6439 	 * If we got multiple errors, no point in trying
6440 	 * the individual cases, just flush the whole cache
6441 	 */
6442 	if (afsr & C_AFSR_ME) {
6443 		return (ECACHE_FLUSH_ALL);
6444 	}
6445 
6446 	/*
6447 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6448 	 * was disabled, we need to flush entire E$. We can't just
6449 	 * flush the cache line affected as the ME bit
6450 	 * is not set when multiple correctable errors of the same
6451 	 * type occur, so we might have multiple CPC or EDC errors,
6452 	 * with only the first recorded.
6453 	 */
6454 #if defined(JALAPENO) || defined(SERRANO)
6455 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6456 #else	/* JALAPENO || SERRANO */
6457 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6458 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6459 #endif	/* JALAPENO || SERRANO */
6460 		return (ECACHE_FLUSH_ALL);
6461 	}
6462 
6463 #if defined(JALAPENO) || defined(SERRANO)
6464 	/*
6465 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6466 	 * flush the entire Ecache.
6467 	 */
6468 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6469 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6470 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6471 			return (ECACHE_FLUSH_LINE);
6472 		} else {
6473 			return (ECACHE_FLUSH_ALL);
6474 		}
6475 	}
6476 #else /* JALAPENO || SERRANO */
6477 	/*
6478 	 * If UE only is set, flush the Ecache line, otherwise
6479 	 * flush the entire Ecache.
6480 	 */
6481 	if (afsr_errs & C_AFSR_UE) {
6482 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6483 		    C_AFSR_UE) {
6484 			return (ECACHE_FLUSH_LINE);
6485 		} else {
6486 			return (ECACHE_FLUSH_ALL);
6487 		}
6488 	}
6489 #endif /* JALAPENO || SERRANO */
6490 
6491 	/*
6492 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6493 	 * flush the entire Ecache.
6494 	 */
6495 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6496 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6497 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6498 			return (ECACHE_FLUSH_LINE);
6499 		} else {
6500 			return (ECACHE_FLUSH_ALL);
6501 		}
6502 	}
6503 
6504 	/*
6505 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6506 	 * flush the entire Ecache.
6507 	 */
6508 	if (afsr_errs & C_AFSR_BERR) {
6509 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6510 			return (ECACHE_FLUSH_LINE);
6511 		} else {
6512 			return (ECACHE_FLUSH_ALL);
6513 		}
6514 	}
6515 
6516 	return (0);
6517 }
6518 
6519 void
6520 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6521 {
6522 	int	ecache_flush_flag =
6523 	    cpu_error_ecache_flush_required(ch_flt);
6524 
6525 	/*
6526 	 * Flush Ecache line or entire Ecache based on above checks.
6527 	 */
6528 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6529 		cpu_flush_ecache();
6530 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6531 		cpu_flush_ecache_line(ch_flt);
6532 	}
6533 
6534 }
6535 
6536 /*
6537  * Extract the PA portion from the E$ tag.
6538  */
6539 uint64_t
6540 cpu_ectag_to_pa(int setsize, uint64_t tag)
6541 {
6542 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6543 		return (JG_ECTAG_TO_PA(setsize, tag));
6544 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6545 		return (PN_L3TAG_TO_PA(tag));
6546 	else
6547 		return (CH_ECTAG_TO_PA(setsize, tag));
6548 }
6549 
6550 /*
6551  * Convert the E$ tag PA into an E$ subblock index.
6552  */
6553 int
6554 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6555 {
6556 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6557 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6558 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6559 		/* Panther has only one subblock per line */
6560 		return (0);
6561 	else
6562 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6563 }
6564 
6565 /*
6566  * All subblocks in an E$ line must be invalid for
6567  * the line to be invalid.
6568  */
6569 int
6570 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6571 {
6572 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6573 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6574 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6575 		return (PN_L3_LINE_INVALID(tag));
6576 	else
6577 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6578 }
6579 
6580 /*
6581  * Extract state bits for a subblock given the tag.  Note that for Panther
6582  * this works on both l2 and l3 tags.
6583  */
6584 int
6585 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6586 {
6587 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6588 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6589 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6590 		return (tag & CH_ECSTATE_MASK);
6591 	else
6592 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6593 }
6594 
6595 /*
6596  * Cpu specific initialization.
6597  */
6598 void
6599 cpu_mp_init(void)
6600 {
6601 #ifdef	CHEETAHPLUS_ERRATUM_25
6602 	if (cheetah_sendmondo_recover) {
6603 		cheetah_nudge_init();
6604 	}
6605 #endif
6606 }
6607 
6608 void
6609 cpu_ereport_post(struct async_flt *aflt)
6610 {
6611 	char *cpu_type, buf[FM_MAX_CLASS];
6612 	nv_alloc_t *nva = NULL;
6613 	nvlist_t *ereport, *detector, *resource;
6614 	errorq_elem_t *eqep;
6615 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6616 	char unum[UNUM_NAMLEN];
6617 	int synd_code;
6618 	uint8_t msg_type;
6619 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6620 
6621 	if (aflt->flt_panic || panicstr) {
6622 		eqep = errorq_reserve(ereport_errorq);
6623 		if (eqep == NULL)
6624 			return;
6625 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6626 		nva = errorq_elem_nva(ereport_errorq, eqep);
6627 	} else {
6628 		ereport = fm_nvlist_create(nva);
6629 	}
6630 
6631 	/*
6632 	 * Create the scheme "cpu" FMRI.
6633 	 */
6634 	detector = fm_nvlist_create(nva);
6635 	resource = fm_nvlist_create(nva);
6636 	switch (cpunodes[aflt->flt_inst].implementation) {
6637 	case CHEETAH_IMPL:
6638 		cpu_type = FM_EREPORT_CPU_USIII;
6639 		break;
6640 	case CHEETAH_PLUS_IMPL:
6641 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6642 		break;
6643 	case JALAPENO_IMPL:
6644 		cpu_type = FM_EREPORT_CPU_USIIIi;
6645 		break;
6646 	case SERRANO_IMPL:
6647 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6648 		break;
6649 	case JAGUAR_IMPL:
6650 		cpu_type = FM_EREPORT_CPU_USIV;
6651 		break;
6652 	case PANTHER_IMPL:
6653 		cpu_type = FM_EREPORT_CPU_USIVplus;
6654 		break;
6655 	default:
6656 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6657 		break;
6658 	}
6659 
6660 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6661 
6662 	/*
6663 	 * Encode all the common data into the ereport.
6664 	 */
6665 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6666 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6667 
6668 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6669 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6670 	    detector, NULL);
6671 
6672 	/*
6673 	 * Encode the error specific data that was saved in
6674 	 * the async_flt structure into the ereport.
6675 	 */
6676 	cpu_payload_add_aflt(aflt, ereport, resource,
6677 	    &plat_ecc_ch_flt.ecaf_afar_status,
6678 	    &plat_ecc_ch_flt.ecaf_synd_status);
6679 
6680 	if (aflt->flt_panic || panicstr) {
6681 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6682 	} else {
6683 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6684 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6685 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6686 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6687 	}
6688 	/*
6689 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6690 	 * to the SC olny if it can process it.
6691 	 */
6692 
6693 	if (&plat_ecc_capability_sc_get &&
6694 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6695 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6696 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6697 			/*
6698 			 * If afar status is not invalid do a unum lookup.
6699 			 */
6700 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6701 			    AFLT_STAT_INVALID) {
6702 				synd_code = synd_to_synd_code(
6703 				    plat_ecc_ch_flt.ecaf_synd_status,
6704 				    aflt->flt_synd, ch_flt->flt_bit);
6705 				(void) cpu_get_mem_unum_synd(synd_code,
6706 				    aflt, unum);
6707 			} else {
6708 				unum[0] = '\0';
6709 			}
6710 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6711 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6712 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6713 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6714 			    ch_flt->flt_sdw_afsr_ext;
6715 
6716 			if (&plat_log_fruid_error2)
6717 				plat_log_fruid_error2(msg_type, unum, aflt,
6718 				    &plat_ecc_ch_flt);
6719 		}
6720 	}
6721 }
6722 
6723 void
6724 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6725 {
6726 	int status;
6727 	ddi_fm_error_t de;
6728 
6729 	bzero(&de, sizeof (ddi_fm_error_t));
6730 
6731 	de.fme_version = DDI_FME_VERSION;
6732 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6733 	    FM_ENA_FMT1);
6734 	de.fme_flag = expected;
6735 	de.fme_bus_specific = (void *)aflt->flt_addr;
6736 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6737 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6738 		aflt->flt_panic = 1;
6739 }
6740 
6741 void
6742 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6743     errorq_t *eqp, uint_t flag)
6744 {
6745 	struct async_flt *aflt = (struct async_flt *)payload;
6746 
6747 	aflt->flt_erpt_class = error_class;
6748 	errorq_dispatch(eqp, payload, payload_sz, flag);
6749 }
6750 
6751 /*
6752  * This routine may be called by the IO module, but does not do
6753  * anything in this cpu module. The SERD algorithm is handled by
6754  * cpumem-diagnosis engine instead.
6755  */
6756 /*ARGSUSED*/
6757 void
6758 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6759 {}
6760 
6761 void
6762 adjust_hw_copy_limits(int ecache_size)
6763 {
6764 	/*
6765 	 * Set hw copy limits.
6766 	 *
6767 	 * /etc/system will be parsed later and can override one or more
6768 	 * of these settings.
6769 	 *
6770 	 * At this time, ecache size seems only mildly relevant.
6771 	 * We seem to run into issues with the d-cache and stalls
6772 	 * we see on misses.
6773 	 *
6774 	 * Cycle measurement indicates that 2 byte aligned copies fare
6775 	 * little better than doing things with VIS at around 512 bytes.
6776 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6777 	 * aligned is faster whenever the source and destination data
6778 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6779 	 * limit seems to be driven by the 2K write cache.
6780 	 * When more than 2K of copies are done in non-VIS mode, stores
6781 	 * backup in the write cache.  In VIS mode, the write cache is
6782 	 * bypassed, allowing faster cache-line writes aligned on cache
6783 	 * boundaries.
6784 	 *
6785 	 * In addition, in non-VIS mode, there is no prefetching, so
6786 	 * for larger copies, the advantage of prefetching to avoid even
6787 	 * occasional cache misses is enough to justify using the VIS code.
6788 	 *
6789 	 * During testing, it was discovered that netbench ran 3% slower
6790 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6791 	 * applications, data is only used once (copied to the output
6792 	 * buffer, then copied by the network device off the system).  Using
6793 	 * the VIS copy saves more L2 cache state.  Network copies are
6794 	 * around 1.3K to 1.5K in size for historical reasons.
6795 	 *
6796 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6797 	 * aligned copy even for large caches and 8 MB ecache.  The
6798 	 * infrastructure to allow different limits for different sized
6799 	 * caches is kept to allow further tuning in later releases.
6800 	 */
6801 
6802 	if (min_ecache_size == 0 && use_hw_bcopy) {
6803 		/*
6804 		 * First time through - should be before /etc/system
6805 		 * is read.
6806 		 * Could skip the checks for zero but this lets us
6807 		 * preserve any debugger rewrites.
6808 		 */
6809 		if (hw_copy_limit_1 == 0) {
6810 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6811 			priv_hcl_1 = hw_copy_limit_1;
6812 		}
6813 		if (hw_copy_limit_2 == 0) {
6814 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6815 			priv_hcl_2 = hw_copy_limit_2;
6816 		}
6817 		if (hw_copy_limit_4 == 0) {
6818 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6819 			priv_hcl_4 = hw_copy_limit_4;
6820 		}
6821 		if (hw_copy_limit_8 == 0) {
6822 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6823 			priv_hcl_8 = hw_copy_limit_8;
6824 		}
6825 		min_ecache_size = ecache_size;
6826 	} else {
6827 		/*
6828 		 * MP initialization. Called *after* /etc/system has
6829 		 * been parsed. One CPU has already been initialized.
6830 		 * Need to cater for /etc/system having scragged one
6831 		 * of our values.
6832 		 */
6833 		if (ecache_size == min_ecache_size) {
6834 			/*
6835 			 * Same size ecache. We do nothing unless we
6836 			 * have a pessimistic ecache setting. In that
6837 			 * case we become more optimistic (if the cache is
6838 			 * large enough).
6839 			 */
6840 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6841 				/*
6842 				 * Need to adjust hw_copy_limit* from our
6843 				 * pessimistic uniprocessor value to a more
6844 				 * optimistic UP value *iff* it hasn't been
6845 				 * reset.
6846 				 */
6847 				if ((ecache_size > 1048576) &&
6848 				    (priv_hcl_8 == hw_copy_limit_8)) {
6849 					if (ecache_size <= 2097152)
6850 						hw_copy_limit_8 = 4 *
6851 						    VIS_COPY_THRESHOLD;
6852 					else if (ecache_size <= 4194304)
6853 						hw_copy_limit_8 = 4 *
6854 						    VIS_COPY_THRESHOLD;
6855 					else
6856 						hw_copy_limit_8 = 4 *
6857 						    VIS_COPY_THRESHOLD;
6858 					priv_hcl_8 = hw_copy_limit_8;
6859 				}
6860 			}
6861 		} else if (ecache_size < min_ecache_size) {
6862 			/*
6863 			 * A different ecache size. Can this even happen?
6864 			 */
6865 			if (priv_hcl_8 == hw_copy_limit_8) {
6866 				/*
6867 				 * The previous value that we set
6868 				 * is unchanged (i.e., it hasn't been
6869 				 * scragged by /etc/system). Rewrite it.
6870 				 */
6871 				if (ecache_size <= 1048576)
6872 					hw_copy_limit_8 = 8 *
6873 					    VIS_COPY_THRESHOLD;
6874 				else if (ecache_size <= 2097152)
6875 					hw_copy_limit_8 = 8 *
6876 					    VIS_COPY_THRESHOLD;
6877 				else if (ecache_size <= 4194304)
6878 					hw_copy_limit_8 = 8 *
6879 					    VIS_COPY_THRESHOLD;
6880 				else
6881 					hw_copy_limit_8 = 10 *
6882 					    VIS_COPY_THRESHOLD;
6883 				priv_hcl_8 = hw_copy_limit_8;
6884 				min_ecache_size = ecache_size;
6885 			}
6886 		}
6887 	}
6888 }
6889 
6890 /*
6891  * Called from illegal instruction trap handler to see if we can attribute
6892  * the trap to a fpras check.
6893  */
6894 int
6895 fpras_chktrap(struct regs *rp)
6896 {
6897 	int op;
6898 	struct fpras_chkfngrp *cgp;
6899 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6900 
6901 	if (fpras_chkfngrps == NULL)
6902 		return (0);
6903 
6904 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6905 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6906 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6907 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6908 			break;
6909 	}
6910 	if (op == FPRAS_NCOPYOPS)
6911 		return (0);
6912 
6913 	/*
6914 	 * This is an fpRAS failure caught through an illegal
6915 	 * instruction - trampoline.
6916 	 */
6917 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6918 	rp->r_npc = rp->r_pc + 4;
6919 	return (1);
6920 }
6921 
6922 /*
6923  * fpras_failure is called when a fpras check detects a bad calculation
6924  * result or an illegal instruction trap is attributed to an fpras
6925  * check.  In all cases we are still bound to CPU.
6926  */
6927 int
6928 fpras_failure(int op, int how)
6929 {
6930 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6931 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6932 	ch_async_flt_t ch_flt;
6933 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6934 	struct fpras_chkfn *sfp, *cfp;
6935 	uint32_t *sip, *cip;
6936 	int i;
6937 
6938 	/*
6939 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6940 	 * the time in which we dispatch an ereport and (if applicable) panic.
6941 	 */
6942 	use_hw_bcopy_orig = use_hw_bcopy;
6943 	use_hw_bzero_orig = use_hw_bzero;
6944 	hcl1_orig = hw_copy_limit_1;
6945 	hcl2_orig = hw_copy_limit_2;
6946 	hcl4_orig = hw_copy_limit_4;
6947 	hcl8_orig = hw_copy_limit_8;
6948 	use_hw_bcopy = use_hw_bzero = 0;
6949 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6950 	    hw_copy_limit_8 = 0;
6951 
6952 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6953 	aflt->flt_id = gethrtime_waitfree();
6954 	aflt->flt_class = CPU_FAULT;
6955 	aflt->flt_inst = CPU->cpu_id;
6956 	aflt->flt_status = (how << 8) | op;
6957 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6958 	ch_flt.flt_type = CPU_FPUERR;
6959 
6960 	/*
6961 	 * We must panic if the copy operation had no lofault protection -
6962 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6963 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6964 	 */
6965 	aflt->flt_panic = (curthread->t_lofault == NULL);
6966 
6967 	/*
6968 	 * XOR the source instruction block with the copied instruction
6969 	 * block - this will show us which bit(s) are corrupted.
6970 	 */
6971 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6972 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6973 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6974 		sip = &sfp->fpras_blk0[0];
6975 		cip = &cfp->fpras_blk0[0];
6976 	} else {
6977 		sip = &sfp->fpras_blk1[0];
6978 		cip = &cfp->fpras_blk1[0];
6979 	}
6980 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6981 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6982 
6983 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6984 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6985 
6986 	if (aflt->flt_panic)
6987 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6988 
6989 	/*
6990 	 * We get here for copyin/copyout and kcopy or bcopy where the
6991 	 * caller has used on_fault.  We will flag the error so that
6992 	 * the process may be killed  The trap_async_hwerr mechanism will
6993 	 * take appropriate further action (such as a reboot, contract
6994 	 * notification etc).  Since we may be continuing we will
6995 	 * restore the global hardware copy acceleration switches.
6996 	 *
6997 	 * When we return from this function to the copy function we want to
6998 	 * avoid potentially bad data being used, ie we want the affected
6999 	 * copy function to return an error.  The caller should therefore
7000 	 * invoke its lofault handler (which always exists for these functions)
7001 	 * which will return the appropriate error.
7002 	 */
7003 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7004 	aston(curthread);
7005 
7006 	use_hw_bcopy = use_hw_bcopy_orig;
7007 	use_hw_bzero = use_hw_bzero_orig;
7008 	hw_copy_limit_1 = hcl1_orig;
7009 	hw_copy_limit_2 = hcl2_orig;
7010 	hw_copy_limit_4 = hcl4_orig;
7011 	hw_copy_limit_8 = hcl8_orig;
7012 
7013 	return (1);
7014 }
7015 
7016 #define	VIS_BLOCKSIZE		64
7017 
7018 int
7019 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7020 {
7021 	int ret, watched;
7022 
7023 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7024 	ret = dtrace_blksuword32(addr, data, 0);
7025 	if (watched)
7026 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7027 
7028 	return (ret);
7029 }
7030 
7031 /*
7032  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7033  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7034  * CEEN from the EER to disable traps for further disrupting error types
7035  * on that cpu.  We could cross-call instead, but that has a larger
7036  * instruction and data footprint than cross-trapping, and the cpu is known
7037  * to be faulted.
7038  */
7039 
7040 void
7041 cpu_faulted_enter(struct cpu *cp)
7042 {
7043 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7044 }
7045 
7046 /*
7047  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7048  * offline, spare, or online (by the cpu requesting this state change).
7049  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7050  * disrupting error bits that have accumulated without trapping, then
7051  * we cross-trap to re-enable CEEN controlled traps.
7052  */
7053 void
7054 cpu_faulted_exit(struct cpu *cp)
7055 {
7056 	ch_cpu_errors_t cpu_error_regs;
7057 
7058 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7059 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7060 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7061 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7062 	    (uint64_t)&cpu_error_regs, 0);
7063 
7064 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7065 }
7066 
7067 /*
7068  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7069  * the errors in the original AFSR, 0 otherwise.
7070  *
7071  * For all procs if the initial error was a BERR or TO, then it is possible
7072  * that we may have caused a secondary BERR or TO in the process of logging the
7073  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7074  * if the request was protected then a panic is still not necessary, if not
7075  * protected then aft_panic is already set - so either way there's no need
7076  * to set aft_panic for the secondary error.
7077  *
7078  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7079  * a store merge, then the error handling code will call cpu_deferred_error().
7080  * When clear_errors() is called, it will determine that secondary errors have
7081  * occurred - in particular, the store merge also caused a EDU and WDU that
7082  * weren't discovered until this point.
7083  *
7084  * We do three checks to verify that we are in this case.  If we pass all three
7085  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7086  * errors occur, we return 0.
7087  *
7088  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7089  * handled in cpu_disrupting_errors().  Since this function is not even called
7090  * in the case we are interested in, we just return 0 for these processors.
7091  */
7092 /*ARGSUSED*/
7093 static int
7094 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7095     uint64_t t_afar)
7096 {
7097 #if defined(CHEETAH_PLUS)
7098 #else	/* CHEETAH_PLUS */
7099 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7100 #endif	/* CHEETAH_PLUS */
7101 
7102 	/*
7103 	 * Was the original error a BERR or TO and only a BERR or TO
7104 	 * (multiple errors are also OK)
7105 	 */
7106 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7107 		/*
7108 		 * Is the new error a BERR or TO and only a BERR or TO
7109 		 * (multiple errors are also OK)
7110 		 */
7111 		if ((ch_flt->afsr_errs &
7112 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7113 			return (1);
7114 	}
7115 
7116 #if defined(CHEETAH_PLUS)
7117 	return (0);
7118 #else	/* CHEETAH_PLUS */
7119 	/*
7120 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7121 	 *
7122 	 * Check the original error was a UE, and only a UE.  Note that
7123 	 * the ME bit will cause us to fail this check.
7124 	 */
7125 	if (t_afsr_errs != C_AFSR_UE)
7126 		return (0);
7127 
7128 	/*
7129 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7130 	 */
7131 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7132 		return (0);
7133 
7134 	/*
7135 	 * Check the AFAR of the original error and secondary errors
7136 	 * match to the 64-byte boundary
7137 	 */
7138 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7139 		return (0);
7140 
7141 	/*
7142 	 * We've passed all the checks, so it's a secondary error!
7143 	 */
7144 	return (1);
7145 #endif	/* CHEETAH_PLUS */
7146 }
7147 
7148 /*
7149  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7150  * is checked for any valid errors.  If found, the error type is
7151  * returned. If not found, the flt_type is checked for L1$ parity errors.
7152  */
7153 /*ARGSUSED*/
7154 static uint8_t
7155 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7156 {
7157 #if defined(JALAPENO)
7158 	/*
7159 	 * Currently, logging errors to the SC is not supported on Jalapeno
7160 	 */
7161 	return (PLAT_ECC_ERROR2_NONE);
7162 #else
7163 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7164 
7165 	switch (ch_flt->flt_bit) {
7166 	case C_AFSR_CE:
7167 		return (PLAT_ECC_ERROR2_CE);
7168 	case C_AFSR_UCC:
7169 	case C_AFSR_EDC:
7170 	case C_AFSR_WDC:
7171 	case C_AFSR_CPC:
7172 		return (PLAT_ECC_ERROR2_L2_CE);
7173 	case C_AFSR_EMC:
7174 		return (PLAT_ECC_ERROR2_EMC);
7175 	case C_AFSR_IVC:
7176 		return (PLAT_ECC_ERROR2_IVC);
7177 	case C_AFSR_UE:
7178 		return (PLAT_ECC_ERROR2_UE);
7179 	case C_AFSR_UCU:
7180 	case C_AFSR_EDU:
7181 	case C_AFSR_WDU:
7182 	case C_AFSR_CPU:
7183 		return (PLAT_ECC_ERROR2_L2_UE);
7184 	case C_AFSR_IVU:
7185 		return (PLAT_ECC_ERROR2_IVU);
7186 	case C_AFSR_TO:
7187 		return (PLAT_ECC_ERROR2_TO);
7188 	case C_AFSR_BERR:
7189 		return (PLAT_ECC_ERROR2_BERR);
7190 #if defined(CHEETAH_PLUS)
7191 	case C_AFSR_L3_EDC:
7192 	case C_AFSR_L3_UCC:
7193 	case C_AFSR_L3_CPC:
7194 	case C_AFSR_L3_WDC:
7195 		return (PLAT_ECC_ERROR2_L3_CE);
7196 	case C_AFSR_IMC:
7197 		return (PLAT_ECC_ERROR2_IMC);
7198 	case C_AFSR_TSCE:
7199 		return (PLAT_ECC_ERROR2_L2_TSCE);
7200 	case C_AFSR_THCE:
7201 		return (PLAT_ECC_ERROR2_L2_THCE);
7202 	case C_AFSR_L3_MECC:
7203 		return (PLAT_ECC_ERROR2_L3_MECC);
7204 	case C_AFSR_L3_THCE:
7205 		return (PLAT_ECC_ERROR2_L3_THCE);
7206 	case C_AFSR_L3_CPU:
7207 	case C_AFSR_L3_EDU:
7208 	case C_AFSR_L3_UCU:
7209 	case C_AFSR_L3_WDU:
7210 		return (PLAT_ECC_ERROR2_L3_UE);
7211 	case C_AFSR_DUE:
7212 		return (PLAT_ECC_ERROR2_DUE);
7213 	case C_AFSR_DTO:
7214 		return (PLAT_ECC_ERROR2_DTO);
7215 	case C_AFSR_DBERR:
7216 		return (PLAT_ECC_ERROR2_DBERR);
7217 #endif	/* CHEETAH_PLUS */
7218 	default:
7219 		switch (ch_flt->flt_type) {
7220 #if defined(CPU_IMP_L1_CACHE_PARITY)
7221 		case CPU_IC_PARITY:
7222 			return (PLAT_ECC_ERROR2_IPE);
7223 		case CPU_DC_PARITY:
7224 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7225 				if (ch_flt->parity_data.dpe.cpl_cache ==
7226 				    CPU_PC_PARITY) {
7227 					return (PLAT_ECC_ERROR2_PCACHE);
7228 				}
7229 			}
7230 			return (PLAT_ECC_ERROR2_DPE);
7231 #endif /* CPU_IMP_L1_CACHE_PARITY */
7232 		case CPU_ITLB_PARITY:
7233 			return (PLAT_ECC_ERROR2_ITLB);
7234 		case CPU_DTLB_PARITY:
7235 			return (PLAT_ECC_ERROR2_DTLB);
7236 		default:
7237 			return (PLAT_ECC_ERROR2_NONE);
7238 		}
7239 	}
7240 #endif	/* JALAPENO */
7241 }
7242