xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common.c (revision 7c4dcc5546f9f002dfc2b95de47c90f00d07c066)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/archsystm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/machthread.h>
38 #include <sys/cpu.h>
39 #include <sys/cmp.h>
40 #include <sys/elf_SPARC.h>
41 #include <vm/vm_dep.h>
42 #include <vm/hat_sfmmu.h>
43 #include <vm/seg_kpm.h>
44 #include <sys/cpuvar.h>
45 #include <sys/cheetahregs.h>
46 #include <sys/us3_module.h>
47 #include <sys/async.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/dditypes.h>
51 #include <sys/prom_debug.h>
52 #include <sys/prom_plat.h>
53 #include <sys/cpu_module.h>
54 #include <sys/sysmacros.h>
55 #include <sys/intreg.h>
56 #include <sys/clock.h>
57 #include <sys/platform_module.h>
58 #include <sys/machtrap.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/memlist.h>
62 #include <sys/bootconf.h>
63 #include <sys/ivintr.h>
64 #include <sys/atomic.h>
65 #include <sys/taskq.h>
66 #include <sys/note.h>
67 #include <sys/ndifm.h>
68 #include <sys/ddifm.h>
69 #include <sys/fm/protocol.h>
70 #include <sys/fm/util.h>
71 #include <sys/fm/cpu/UltraSPARC-III.h>
72 #include <sys/fpras_impl.h>
73 #include <sys/dtrace.h>
74 #include <sys/watchpoint.h>
75 #include <sys/plat_ecc_unum.h>
76 #include <sys/cyclic.h>
77 #include <sys/errorq.h>
78 #include <sys/errclassify.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int clear_ecc(struct async_flt *ecc);
120 #if defined(CPU_IMP_ECACHE_ASSOC)
121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
122 #endif
123 static int cpu_ecache_set_size(struct cpu *cp);
124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk_state(int cachesize,
128 				uint64_t subaddr, uint64_t tag);
129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
136 static void cpu_scrubphys(struct async_flt *aflt);
137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
138     int *, int *);
139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
140 static void cpu_ereport_init(struct async_flt *aflt);
141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144     uint64_t nceen, ch_cpu_logout_t *clop);
145 static int cpu_ce_delayed_ec_logout(uint64_t);
146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
147 
148 #ifdef	CHEETAHPLUS_ERRATUM_25
149 static int mondo_recover_proc(uint16_t, int);
150 static void cheetah_nudge_init(void);
151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
152     cyc_time_t *when);
153 static void cheetah_nudge_buddy(void);
154 #endif	/* CHEETAHPLUS_ERRATUM_25 */
155 
156 #if defined(CPU_IMP_L1_CACHE_PARITY)
157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
160     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
167 #endif	/* CPU_IMP_L1_CACHE_PARITY */
168 
169 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
170     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
171     int *segsp, int *banksp, int *mcidp);
172 
173 /*
174  * This table is used to determine which bit(s) is(are) bad when an ECC
175  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
176  * of this array have the following semantics:
177  *
178  *      00-127  The number of the bad bit, when only one bit is bad.
179  *      128     ECC bit C0 is bad.
180  *      129     ECC bit C1 is bad.
181  *      130     ECC bit C2 is bad.
182  *      131     ECC bit C3 is bad.
183  *      132     ECC bit C4 is bad.
184  *      133     ECC bit C5 is bad.
185  *      134     ECC bit C6 is bad.
186  *      135     ECC bit C7 is bad.
187  *      136     ECC bit C8 is bad.
188  *	137-143 reserved for Mtag Data and ECC.
189  *      144(M2) Two bits are bad within a nibble.
190  *      145(M3) Three bits are bad within a nibble.
191  *      146(M3) Four bits are bad within a nibble.
192  *      147(M)  Multiple bits (5 or more) are bad.
193  *      148     NO bits are bad.
194  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
195  */
196 
197 #define	C0	128
198 #define	C1	129
199 #define	C2	130
200 #define	C3	131
201 #define	C4	132
202 #define	C5	133
203 #define	C6	134
204 #define	C7	135
205 #define	C8	136
206 #define	MT0	137	/* Mtag Data bit 0 */
207 #define	MT1	138
208 #define	MT2	139
209 #define	MTC0	140	/* Mtag Check bit 0 */
210 #define	MTC1	141
211 #define	MTC2	142
212 #define	MTC3	143
213 #define	M2	144
214 #define	M3	145
215 #define	M4	146
216 #define	M	147
217 #define	NA	148
218 #if defined(JALAPENO) || defined(SERRANO)
219 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
220 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
221 #define	SLAST	S003MEM	/* last special syndrome */
222 #else /* JALAPENO || SERRANO */
223 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
224 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
225 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
226 #define	SLAST	S11C	/* last special syndrome */
227 #endif /* JALAPENO || SERRANO */
228 #if defined(JALAPENO) || defined(SERRANO)
229 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
230 #define	BPAR15	167
231 #endif	/* JALAPENO || SERRANO */
232 
233 static uint8_t ecc_syndrome_tab[] =
234 {
235 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
236 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
237 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
238 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
239 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
240 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
241 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
242 #if defined(JALAPENO) || defined(SERRANO)
243 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
244 #else	/* JALAPENO || SERRANO */
245 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246 #endif	/* JALAPENO || SERRANO */
247 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
248 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
249 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
250 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
251 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
252 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
253 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
254 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
255 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
256 #if defined(JALAPENO) || defined(SERRANO)
257 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
258 #else	/* JALAPENO || SERRANO */
259 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
260 #endif	/* JALAPENO || SERRANO */
261 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
262 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
263 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
264 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
265 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
266 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
267 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
268 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
269 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
270 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
271 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
272 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
273 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
274 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
275 };
276 
277 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
278 
279 #if !(defined(JALAPENO) || defined(SERRANO))
280 /*
281  * This table is used to determine which bit(s) is(are) bad when a Mtag
282  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
283  * of this array have the following semantics:
284  *
285  *      -1	Invalid mtag syndrome.
286  *      137     Mtag Data 0 is bad.
287  *      138     Mtag Data 1 is bad.
288  *      139     Mtag Data 2 is bad.
289  *      140     Mtag ECC 0 is bad.
290  *      141     Mtag ECC 1 is bad.
291  *      142     Mtag ECC 2 is bad.
292  *      143     Mtag ECC 3 is bad.
293  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
294  */
295 short mtag_syndrome_tab[] =
296 {
297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
298 };
299 
300 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
301 
302 #else /* !(JALAPENO || SERRANO) */
303 
304 #define	BSYND_TBL_SIZE	16
305 
306 #endif /* !(JALAPENO || SERRANO) */
307 
308 /*
309  * CE initial classification and subsequent action lookup table
310  */
311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
312 static int ce_disp_inited;
313 
314 /*
315  * Set to disable leaky and partner check for memory correctables
316  */
317 int ce_xdiag_off;
318 
319 /*
320  * The following are not incremented atomically so are indicative only
321  */
322 static int ce_xdiag_drops;
323 static int ce_xdiag_lkydrops;
324 static int ce_xdiag_ptnrdrops;
325 static int ce_xdiag_bad;
326 
327 /*
328  * CE leaky check callback structure
329  */
330 typedef struct {
331 	struct async_flt *lkycb_aflt;
332 	errorq_t *lkycb_eqp;
333 	errorq_elem_t *lkycb_eqep;
334 } ce_lkychk_cb_t;
335 
336 /*
337  * defines for various ecache_flush_flag's
338  */
339 #define	ECACHE_FLUSH_LINE	1
340 #define	ECACHE_FLUSH_ALL	2
341 
342 /*
343  * STICK sync
344  */
345 #define	STICK_ITERATION 10
346 #define	MAX_TSKEW	1
347 #define	EV_A_START	0
348 #define	EV_A_END	1
349 #define	EV_B_START	2
350 #define	EV_B_END	3
351 #define	EVENTS		4
352 
353 static int64_t stick_iter = STICK_ITERATION;
354 static int64_t stick_tsk = MAX_TSKEW;
355 
356 typedef enum {
357 	EVENT_NULL = 0,
358 	SLAVE_START,
359 	SLAVE_CONT,
360 	MASTER_START
361 } event_cmd_t;
362 
363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
364 static int64_t timestamp[EVENTS];
365 static volatile int slave_done;
366 
367 #ifdef DEBUG
368 #define	DSYNC_ATTEMPTS 64
369 typedef struct {
370 	int64_t	skew_val[DSYNC_ATTEMPTS];
371 } ss_t;
372 
373 ss_t stick_sync_stats[NCPU];
374 #endif /* DEBUG */
375 
376 /*
377  * Maximum number of contexts for Cheetah.
378  */
379 #define	MAX_NCTXS	(1 << 13)
380 
381 /* Will be set !NULL for Cheetah+ and derivatives. */
382 uchar_t *ctx_pgsz_array = NULL;
383 #if defined(CPU_IMP_DUAL_PAGESIZE)
384 static uchar_t ctx_pgsz_arr[MAX_NCTXS];
385 uint_t disable_dual_pgsz = 0;
386 #endif	/* CPU_IMP_DUAL_PAGESIZE */
387 
388 /*
389  * Save the cache bootup state for use when internal
390  * caches are to be re-enabled after an error occurs.
391  */
392 uint64_t cache_boot_state;
393 
394 /*
395  * PA[22:0] represent Displacement in Safari configuration space.
396  */
397 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
398 
399 bus_config_eclk_t bus_config_eclk[] = {
400 #if defined(JALAPENO) || defined(SERRANO)
401 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
402 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
403 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
404 #else /* JALAPENO || SERRANO */
405 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
406 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
407 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
408 #endif /* JALAPENO || SERRANO */
409 	{0, 0}
410 };
411 
412 /*
413  * Interval for deferred CEEN reenable
414  */
415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
416 
417 /*
418  * set in /etc/system to control logging of user BERR/TO's
419  */
420 int cpu_berr_to_verbose = 0;
421 
422 /*
423  * set to 0 in /etc/system to defer CEEN reenable for all CEs
424  */
425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
427 
428 /*
429  * Set of all offline cpus
430  */
431 cpuset_t cpu_offline_set;
432 
433 static void cpu_delayed_check_ce_errors(void *);
434 static void cpu_check_ce_errors(void *);
435 void cpu_error_ecache_flush(ch_async_flt_t *);
436 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
437 static void cpu_log_and_clear_ce(ch_async_flt_t *);
438 void cpu_ce_detected(ch_cpu_errors_t *, int);
439 
440 /*
441  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
442  * memory refresh interval of current DIMMs (64ms).  After initial fix that
443  * gives at least one full refresh cycle in which the cell can leak
444  * (whereafter further refreshes simply reinforce any incorrect bit value).
445  */
446 clock_t cpu_ce_lkychk_timeout_usec = 128000;
447 
448 /*
449  * CE partner check partner caching period in seconds
450  */
451 int cpu_ce_ptnr_cachetime_sec = 60;
452 
453 /*
454  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
455  */
456 #define	CH_SET_TRAP(ttentry, ttlabel)			\
457 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
458 		flush_instr_mem((caddr_t)&ttentry, 32);
459 
460 static int min_ecache_size;
461 static uint_t priv_hcl_1;
462 static uint_t priv_hcl_2;
463 static uint_t priv_hcl_4;
464 static uint_t priv_hcl_8;
465 
466 void
467 cpu_setup(void)
468 {
469 	extern int at_flags;
470 	extern int disable_delay_tlb_flush, delay_tlb_flush;
471 	extern int cpc_has_overflow_intr;
472 	extern int disable_text_largepages;
473 	extern int use_text_pgsz4m;
474 
475 	/*
476 	 * Setup chip-specific trap handlers.
477 	 */
478 	cpu_init_trap();
479 
480 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
481 
482 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
483 
484 	/*
485 	 * save the cache bootup state.
486 	 */
487 	cache_boot_state = get_dcu() & DCU_CACHE;
488 
489 	/*
490 	 * Use the maximum number of contexts available for Cheetah
491 	 * unless it has been tuned for debugging.
492 	 * We are checking against 0 here since this value can be patched
493 	 * while booting.  It can not be patched via /etc/system since it
494 	 * will be patched too late and thus cause the system to panic.
495 	 */
496 	if (nctxs == 0)
497 		nctxs = MAX_NCTXS;
498 
499 	/*
500 	 * Due to the number of entries in the fully-associative tlb
501 	 * this may have to be tuned lower than in spitfire.
502 	 */
503 	pp_slots = MIN(8, MAXPP_SLOTS);
504 
505 	/*
506 	 * Block stores do not invalidate all pages of the d$, pagecopy
507 	 * et. al. need virtual translations with virtual coloring taken
508 	 * into consideration.  prefetch/ldd will pollute the d$ on the
509 	 * load side.
510 	 */
511 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
512 
513 	if (use_page_coloring) {
514 		do_pg_coloring = 1;
515 		if (use_virtual_coloring)
516 			do_virtual_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 	/*
561 	 * Use cheetah flush-all support
562 	 */
563 	if (!disable_delay_tlb_flush)
564 		delay_tlb_flush = 1;
565 
566 #if defined(CPU_IMP_DUAL_PAGESIZE)
567 	/*
568 	 * Use Cheetah+ and later dual page size support.
569 	 */
570 	if (!disable_dual_pgsz) {
571 		ctx_pgsz_array = ctx_pgsz_arr;
572 	}
573 #endif	/* CPU_IMP_DUAL_PAGESIZE */
574 
575 	/*
576 	 * Declare that this architecture/cpu combination does fpRAS.
577 	 */
578 	fpras_implemented = 1;
579 
580 	/*
581 	 * Enable 4M pages to be used for mapping user text by default.  Don't
582 	 * use large pages for initialized data segments since we may not know
583 	 * at exec() time what should be the preferred large page size for DTLB
584 	 * programming.
585 	 */
586 	use_text_pgsz4m = 1;
587 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
588 	    (1 << TTE32M) | (1 << TTE256M);
589 
590 	/*
591 	 * Setup CE lookup table
592 	 */
593 	CE_INITDISPTBL_POPULATE(ce_disp_table);
594 	ce_disp_inited = 1;
595 }
596 
597 /*
598  * Called by setcpudelay
599  */
600 void
601 cpu_init_tick_freq(void)
602 {
603 	/*
604 	 * For UltraSPARC III and beyond we want to use the
605 	 * system clock rate as the basis for low level timing,
606 	 * due to support of mixed speed CPUs and power managment.
607 	 */
608 	if (system_clock_freq == 0)
609 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
610 
611 	sys_tick_freq = system_clock_freq;
612 }
613 
614 #ifdef CHEETAHPLUS_ERRATUM_25
615 /*
616  * Tunables
617  */
618 int cheetah_bpe_off = 0;
619 int cheetah_sendmondo_recover = 1;
620 int cheetah_sendmondo_fullscan = 0;
621 int cheetah_sendmondo_recover_delay = 5;
622 
623 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
624 
625 /*
626  * Recovery Statistics
627  */
628 typedef struct cheetah_livelock_entry	{
629 	int cpuid;		/* fallen cpu */
630 	int buddy;		/* cpu that ran recovery */
631 	clock_t lbolt;		/* when recovery started */
632 	hrtime_t recovery_time;	/* time spent in recovery */
633 } cheetah_livelock_entry_t;
634 
635 #define	CHEETAH_LIVELOCK_NENTRY	32
636 
637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
638 int cheetah_livelock_entry_nxt;
639 
640 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
641 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
642 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
643 		cheetah_livelock_entry_nxt = 0;				\
644 	}								\
645 }
646 
647 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
648 
649 struct {
650 	hrtime_t hrt;		/* maximum recovery time */
651 	int recovery;		/* recovered */
652 	int full_claimed;	/* maximum pages claimed in full recovery */
653 	int proc_entry;		/* attempted to claim TSB */
654 	int proc_tsb_scan;	/* tsb scanned */
655 	int proc_tsb_partscan;	/* tsb partially scanned */
656 	int proc_tsb_fullscan;	/* whole tsb scanned */
657 	int proc_claimed;	/* maximum pages claimed in tsb scan */
658 	int proc_user;		/* user thread */
659 	int proc_kernel;	/* kernel thread */
660 	int proc_onflt;		/* bad stack */
661 	int proc_cpu;		/* null cpu */
662 	int proc_thread;	/* null thread */
663 	int proc_proc;		/* null proc */
664 	int proc_as;		/* null as */
665 	int proc_hat;		/* null hat */
666 	int proc_hat_inval;	/* hat contents don't make sense */
667 	int proc_hat_busy;	/* hat is changing TSBs */
668 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
669 	int proc_cnum_bad;	/* cnum out of range */
670 	int proc_cnum;		/* last cnum processed */
671 	tte_t proc_tte;		/* last tte processed */
672 } cheetah_livelock_stat;
673 
674 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
675 
676 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
677 	cheetah_livelock_stat.item = value
678 
679 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
680 	if (value > cheetah_livelock_stat.item)		\
681 		cheetah_livelock_stat.item = value;	\
682 }
683 
684 /*
685  * Attempt to recover a cpu by claiming every cache line as saved
686  * in the TSB that the non-responsive cpu is using. Since we can't
687  * grab any adaptive lock, this is at best an attempt to do so. Because
688  * we don't grab any locks, we must operate under the protection of
689  * on_fault().
690  *
691  * Return 1 if cpuid could be recovered, 0 if failed.
692  */
693 int
694 mondo_recover_proc(uint16_t cpuid, int bn)
695 {
696 	label_t ljb;
697 	cpu_t *cp;
698 	kthread_t *t;
699 	proc_t *p;
700 	struct as *as;
701 	struct hat *hat;
702 	short  cnum;
703 	struct tsb_info *tsbinfop;
704 	struct tsbe *tsbep;
705 	caddr_t tsbp;
706 	caddr_t end_tsbp;
707 	uint64_t paddr;
708 	uint64_t idsr;
709 	u_longlong_t pahi, palo;
710 	int pages_claimed = 0;
711 	tte_t tsbe_tte;
712 	int tried_kernel_tsb = 0;
713 
714 	CHEETAH_LIVELOCK_STAT(proc_entry);
715 
716 	if (on_fault(&ljb)) {
717 		CHEETAH_LIVELOCK_STAT(proc_onflt);
718 		goto badstruct;
719 	}
720 
721 	if ((cp = cpu[cpuid]) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_cpu);
723 		goto badstruct;
724 	}
725 
726 	if ((t = cp->cpu_thread) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_thread);
728 		goto badstruct;
729 	}
730 
731 	if ((p = ttoproc(t)) == NULL) {
732 		CHEETAH_LIVELOCK_STAT(proc_proc);
733 		goto badstruct;
734 	}
735 
736 	if ((as = p->p_as) == NULL) {
737 		CHEETAH_LIVELOCK_STAT(proc_as);
738 		goto badstruct;
739 	}
740 
741 	if ((hat = as->a_hat) == NULL) {
742 		CHEETAH_LIVELOCK_STAT(proc_hat);
743 		goto badstruct;
744 	}
745 
746 	if (hat != ksfmmup) {
747 		CHEETAH_LIVELOCK_STAT(proc_user);
748 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
749 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
750 			goto badstruct;
751 		}
752 		tsbinfop = hat->sfmmu_tsb;
753 		if (tsbinfop == NULL) {
754 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 			goto badstruct;
756 		}
757 		tsbp = tsbinfop->tsb_va;
758 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
759 	} else {
760 		CHEETAH_LIVELOCK_STAT(proc_kernel);
761 		tsbinfop = NULL;
762 		tsbp = ktsb_base;
763 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
764 	}
765 
766 	/* Verify as */
767 	if (hat->sfmmu_as != as) {
768 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
769 		goto badstruct;
770 	}
771 
772 	cnum = hat->sfmmu_cnum;
773 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
774 
775 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
776 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
777 		goto badstruct;
778 	}
779 
780 	do {
781 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
782 
783 		/*
784 		 * Skip TSBs being relocated.  This is important because
785 		 * we want to avoid the following deadlock scenario:
786 		 *
787 		 * 1) when we came in we set ourselves to "in recover" state.
788 		 * 2) when we try to touch TSB being relocated the mapping
789 		 *    will be in the suspended state so we'll spin waiting
790 		 *    for it to be unlocked.
791 		 * 3) when the CPU that holds the TSB mapping locked tries to
792 		 *    unlock it it will send a xtrap which will fail to xcall
793 		 *    us or the CPU we're trying to recover, and will in turn
794 		 *    enter the mondo code.
795 		 * 4) since we are still spinning on the locked mapping
796 		 *    no further progress will be made and the system will
797 		 *    inevitably hard hang.
798 		 *
799 		 * A TSB not being relocated can't begin being relocated
800 		 * while we're accessing it because we check
801 		 * sendmondo_in_recover before relocating TSBs.
802 		 */
803 		if (hat != ksfmmup &&
804 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
805 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
806 			goto next_tsbinfo;
807 		}
808 
809 		for (tsbep = (struct tsbe *)tsbp;
810 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
811 			tsbe_tte = tsbep->tte_data;
812 
813 			if (tsbe_tte.tte_val == 0) {
814 				/*
815 				 * Invalid tte
816 				 */
817 				continue;
818 			}
819 			if (tsbe_tte.tte_se) {
820 				/*
821 				 * Don't want device registers
822 				 */
823 				continue;
824 			}
825 			if (tsbe_tte.tte_cp == 0) {
826 				/*
827 				 * Must be cached in E$
828 				 */
829 				continue;
830 			}
831 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
832 			idsr = getidsr();
833 			if ((idsr & (IDSR_NACK_BIT(bn) |
834 			    IDSR_BUSY_BIT(bn))) == 0) {
835 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
836 				goto done;
837 			}
838 			pahi = tsbe_tte.tte_pahi;
839 			palo = tsbe_tte.tte_palo;
840 			paddr = (uint64_t)((pahi << 32) |
841 			    (palo << MMU_PAGESHIFT));
842 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
843 			    CH_ECACHE_SUBBLK_SIZE);
844 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
845 				shipit(cpuid, bn);
846 			}
847 			pages_claimed++;
848 		}
849 next_tsbinfo:
850 		if (tsbinfop != NULL)
851 			tsbinfop = tsbinfop->tsb_next;
852 		if (tsbinfop != NULL) {
853 			tsbp = tsbinfop->tsb_va;
854 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
855 		} else if (tsbp == ktsb_base) {
856 			tried_kernel_tsb = 1;
857 		} else if (!tried_kernel_tsb) {
858 			tsbp = ktsb_base;
859 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
860 			hat = ksfmmup;
861 			tsbinfop = NULL;
862 		}
863 	} while (tsbinfop != NULL ||
864 			((tsbp == ktsb_base) && !tried_kernel_tsb));
865 
866 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
867 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
868 	no_fault();
869 	idsr = getidsr();
870 	if ((idsr & (IDSR_NACK_BIT(bn) |
871 	    IDSR_BUSY_BIT(bn))) == 0) {
872 		return (1);
873 	} else {
874 		return (0);
875 	}
876 
877 done:
878 	no_fault();
879 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
880 	return (1);
881 
882 badstruct:
883 	no_fault();
884 	return (0);
885 }
886 
887 /*
888  * Attempt to claim ownership, temporarily, of every cache line that a
889  * non-responsive cpu might be using.  This might kick that cpu out of
890  * this state.
891  *
892  * The return value indicates to the caller if we have exhausted all recovery
893  * techniques. If 1 is returned, it is useless to call this function again
894  * even for a different target CPU.
895  */
896 int
897 mondo_recover(uint16_t cpuid, int bn)
898 {
899 	struct memseg *seg;
900 	uint64_t begin_pa, end_pa, cur_pa;
901 	hrtime_t begin_hrt, end_hrt;
902 	int retval = 0;
903 	int pages_claimed = 0;
904 	cheetah_livelock_entry_t *histp;
905 	uint64_t idsr;
906 
907 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
908 		/*
909 		 * Wait while recovery takes place
910 		 */
911 		while (sendmondo_in_recover) {
912 			drv_usecwait(1);
913 		}
914 		/*
915 		 * Assume we didn't claim the whole memory. If
916 		 * the target of this caller is not recovered,
917 		 * it will come back.
918 		 */
919 		return (retval);
920 	}
921 
922 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
923 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
924 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
925 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
926 
927 	begin_hrt = gethrtime_waitfree();
928 	/*
929 	 * First try to claim the lines in the TSB the target
930 	 * may have been using.
931 	 */
932 	if (mondo_recover_proc(cpuid, bn) == 1) {
933 		/*
934 		 * Didn't claim the whole memory
935 		 */
936 		goto done;
937 	}
938 
939 	/*
940 	 * We tried using the TSB. The target is still
941 	 * not recovered. Check if complete memory scan is
942 	 * enabled.
943 	 */
944 	if (cheetah_sendmondo_fullscan == 0) {
945 		/*
946 		 * Full memory scan is disabled.
947 		 */
948 		retval = 1;
949 		goto done;
950 	}
951 
952 	/*
953 	 * Try claiming the whole memory.
954 	 */
955 	for (seg = memsegs; seg; seg = seg->next) {
956 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
957 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
958 		for (cur_pa = begin_pa; cur_pa < end_pa;
959 		    cur_pa += MMU_PAGESIZE) {
960 			idsr = getidsr();
961 			if ((idsr & (IDSR_NACK_BIT(bn) |
962 			    IDSR_BUSY_BIT(bn))) == 0) {
963 				/*
964 				 * Didn't claim all memory
965 				 */
966 				goto done;
967 			}
968 			claimlines(cur_pa, MMU_PAGESIZE,
969 			    CH_ECACHE_SUBBLK_SIZE);
970 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
971 				shipit(cpuid, bn);
972 			}
973 			pages_claimed++;
974 		}
975 	}
976 
977 	/*
978 	 * We did all we could.
979 	 */
980 	retval = 1;
981 
982 done:
983 	/*
984 	 * Update statistics
985 	 */
986 	end_hrt = gethrtime_waitfree();
987 	CHEETAH_LIVELOCK_STAT(recovery);
988 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
989 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
990 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
991 	    (end_hrt -  begin_hrt));
992 
993 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
994 
995 	return (retval);
996 }
997 
998 /*
999  * This is called by the cyclic framework when this CPU becomes online
1000  */
1001 /*ARGSUSED*/
1002 static void
1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1004 {
1005 
1006 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1007 	hdlr->cyh_level = CY_LOW_LEVEL;
1008 	hdlr->cyh_arg = NULL;
1009 
1010 	/*
1011 	 * Stagger the start time
1012 	 */
1013 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1014 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1015 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1016 	}
1017 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1018 }
1019 
1020 /*
1021  * Create a low level cyclic to send a xtrap to the next cpu online.
1022  * However, there's no need to have this running on a uniprocessor system.
1023  */
1024 static void
1025 cheetah_nudge_init(void)
1026 {
1027 	cyc_omni_handler_t hdlr;
1028 
1029 	if (max_ncpus == 1) {
1030 		return;
1031 	}
1032 
1033 	hdlr.cyo_online = cheetah_nudge_onln;
1034 	hdlr.cyo_offline = NULL;
1035 	hdlr.cyo_arg = NULL;
1036 
1037 	mutex_enter(&cpu_lock);
1038 	(void) cyclic_add_omni(&hdlr);
1039 	mutex_exit(&cpu_lock);
1040 }
1041 
1042 /*
1043  * Cyclic handler to wake up buddy
1044  */
1045 void
1046 cheetah_nudge_buddy(void)
1047 {
1048 	/*
1049 	 * Disable kernel preemption to protect the cpu list
1050 	 */
1051 	kpreempt_disable();
1052 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1053 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1054 		    0, 0);
1055 	}
1056 	kpreempt_enable();
1057 }
1058 
1059 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1060 
1061 #ifdef SEND_MONDO_STATS
1062 uint32_t x_one_stimes[64];
1063 uint32_t x_one_ltimes[16];
1064 uint32_t x_set_stimes[64];
1065 uint32_t x_set_ltimes[16];
1066 uint32_t x_set_cpus[NCPU];
1067 uint32_t x_nack_stimes[64];
1068 #endif
1069 
1070 /*
1071  * Note: A version of this function is used by the debugger via the KDI,
1072  * and must be kept in sync with this version.  Any changes made to this
1073  * function to support new chips or to accomodate errata must also be included
1074  * in the KDI-specific version.  See us3_kdi.c.
1075  */
1076 void
1077 send_one_mondo(int cpuid)
1078 {
1079 	int busy, nack;
1080 	uint64_t idsr, starttick, endtick, tick, lasttick;
1081 	uint64_t busymask;
1082 #ifdef	CHEETAHPLUS_ERRATUM_25
1083 	int recovered = 0;
1084 #endif
1085 
1086 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1087 	starttick = lasttick = gettick();
1088 	shipit(cpuid, 0);
1089 	endtick = starttick + xc_tick_limit;
1090 	busy = nack = 0;
1091 #if defined(JALAPENO) || defined(SERRANO)
1092 	/*
1093 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1094 	 * will be used for dispatching interrupt. For now, assume
1095 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1096 	 * issues with respect to BUSY/NACK pair usage.
1097 	 */
1098 	busymask  = IDSR_BUSY_BIT(cpuid);
1099 #else /* JALAPENO || SERRANO */
1100 	busymask = IDSR_BUSY;
1101 #endif /* JALAPENO || SERRANO */
1102 	for (;;) {
1103 		idsr = getidsr();
1104 		if (idsr == 0)
1105 			break;
1106 
1107 		tick = gettick();
1108 		/*
1109 		 * If there is a big jump between the current tick
1110 		 * count and lasttick, we have probably hit a break
1111 		 * point.  Adjust endtick accordingly to avoid panic.
1112 		 */
1113 		if (tick > (lasttick + xc_tick_jump_limit))
1114 			endtick += (tick - lasttick);
1115 		lasttick = tick;
1116 		if (tick > endtick) {
1117 			if (panic_quiesce)
1118 				return;
1119 #ifdef	CHEETAHPLUS_ERRATUM_25
1120 			if (cheetah_sendmondo_recover && recovered == 0) {
1121 				if (mondo_recover(cpuid, 0)) {
1122 					/*
1123 					 * We claimed the whole memory or
1124 					 * full scan is disabled.
1125 					 */
1126 					recovered++;
1127 				}
1128 				tick = gettick();
1129 				endtick = tick + xc_tick_limit;
1130 				lasttick = tick;
1131 				/*
1132 				 * Recheck idsr
1133 				 */
1134 				continue;
1135 			} else
1136 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1137 			{
1138 				cmn_err(CE_PANIC, "send mondo timeout "
1139 				    "(target 0x%x) [%d NACK %d BUSY]",
1140 				    cpuid, nack, busy);
1141 			}
1142 		}
1143 
1144 		if (idsr & busymask) {
1145 			busy++;
1146 			continue;
1147 		}
1148 		drv_usecwait(1);
1149 		shipit(cpuid, 0);
1150 		nack++;
1151 		busy = 0;
1152 	}
1153 #ifdef SEND_MONDO_STATS
1154 	{
1155 		int n = gettick() - starttick;
1156 		if (n < 8192)
1157 			x_one_stimes[n >> 7]++;
1158 		else
1159 			x_one_ltimes[(n >> 13) & 0xf]++;
1160 	}
1161 #endif
1162 }
1163 
1164 void
1165 syncfpu(void)
1166 {
1167 }
1168 
1169 /*
1170  * Return processor specific async error structure
1171  * size used.
1172  */
1173 int
1174 cpu_aflt_size(void)
1175 {
1176 	return (sizeof (ch_async_flt_t));
1177 }
1178 
1179 /*
1180  * Tunable to disable the checking of other cpu logout areas during panic for
1181  * potential syndrome 71 generating errors.
1182  */
1183 int enable_check_other_cpus_logout = 1;
1184 
1185 /*
1186  * Check other cpus logout area for potential synd 71 generating
1187  * errors.
1188  */
1189 static void
1190 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1191     ch_cpu_logout_t *clop)
1192 {
1193 	struct async_flt *aflt;
1194 	ch_async_flt_t ch_flt;
1195 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1196 
1197 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1198 		return;
1199 	}
1200 
1201 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1202 
1203 	t_afar = clop->clo_data.chd_afar;
1204 	t_afsr = clop->clo_data.chd_afsr;
1205 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1206 #if defined(SERRANO)
1207 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1208 #endif	/* SERRANO */
1209 
1210 	/*
1211 	 * In order to simplify code, we maintain this afsr_errs
1212 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1213 	 * sticky bits.
1214 	 */
1215 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1216 	    (t_afsr & C_AFSR_ALL_ERRS);
1217 
1218 	/* Setup the async fault structure */
1219 	aflt = (struct async_flt *)&ch_flt;
1220 	aflt->flt_id = gethrtime_waitfree();
1221 	ch_flt.afsr_ext = t_afsr_ext;
1222 	ch_flt.afsr_errs = t_afsr_errs;
1223 	aflt->flt_stat = t_afsr;
1224 	aflt->flt_addr = t_afar;
1225 	aflt->flt_bus_id = cpuid;
1226 	aflt->flt_inst = cpuid;
1227 	aflt->flt_pc = tpc;
1228 	aflt->flt_prot = AFLT_PROT_NONE;
1229 	aflt->flt_class = CPU_FAULT;
1230 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1231 	aflt->flt_tl = tl;
1232 	aflt->flt_status = ecc_type;
1233 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1234 
1235 	/*
1236 	 * Queue events on the async event queue, one event per error bit.
1237 	 * If no events are queued, queue an event to complain.
1238 	 */
1239 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1240 		ch_flt.flt_type = CPU_INV_AFSR;
1241 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1242 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1243 		    aflt->flt_panic);
1244 	}
1245 
1246 	/*
1247 	 * Zero out + invalidate CPU logout.
1248 	 */
1249 	bzero(clop, sizeof (ch_cpu_logout_t));
1250 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1251 }
1252 
1253 /*
1254  * Check the logout areas of all other cpus for unlogged errors.
1255  */
1256 static void
1257 cpu_check_other_cpus_logout(void)
1258 {
1259 	int i, j;
1260 	processorid_t myid;
1261 	struct cpu *cp;
1262 	ch_err_tl1_data_t *cl1p;
1263 
1264 	myid = CPU->cpu_id;
1265 	for (i = 0; i < NCPU; i++) {
1266 		cp = cpu[i];
1267 
1268 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1269 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1270 			continue;
1271 		}
1272 
1273 		/*
1274 		 * Check each of the tl>0 logout areas
1275 		 */
1276 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1277 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1278 			if (cl1p->ch_err_tl1_flags == 0)
1279 				continue;
1280 
1281 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1282 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1283 		}
1284 
1285 		/*
1286 		 * Check each of the remaining logout areas
1287 		 */
1288 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1289 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1290 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1291 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1292 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1293 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1294 	}
1295 }
1296 
1297 /*
1298  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1299  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1300  * flush the error that caused the UCU/UCC, then again here at the end to
1301  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1302  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1303  * another Fast ECC trap.
1304  *
1305  * Cheetah+ also handles: TSCE: No additional processing required.
1306  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1307  *
1308  * Note that the p_clo_flags input is only valid in cases where the
1309  * cpu_private struct is not yet initialized (since that is the only
1310  * time that information cannot be obtained from the logout struct.)
1311  */
1312 /*ARGSUSED*/
1313 void
1314 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1315 {
1316 	ch_cpu_logout_t *clop;
1317 	uint64_t ceen, nceen;
1318 
1319 	/*
1320 	 * Get the CPU log out info. If we can't find our CPU private
1321 	 * pointer, then we will have to make due without any detailed
1322 	 * logout information.
1323 	 */
1324 	if (CPU_PRIVATE(CPU) == NULL) {
1325 		clop = NULL;
1326 		ceen = p_clo_flags & EN_REG_CEEN;
1327 		nceen = p_clo_flags & EN_REG_NCEEN;
1328 	} else {
1329 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1330 		ceen = clop->clo_flags & EN_REG_CEEN;
1331 		nceen = clop->clo_flags & EN_REG_NCEEN;
1332 	}
1333 
1334 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1335 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1336 }
1337 
1338 /*
1339  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1340  * ECC at TL>0.  Need to supply either a error register pointer or a
1341  * cpu logout structure pointer.
1342  */
1343 static void
1344 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1345     uint64_t nceen, ch_cpu_logout_t *clop)
1346 {
1347 	struct async_flt *aflt;
1348 	ch_async_flt_t ch_flt;
1349 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1350 	char pr_reason[MAX_REASON_STRING];
1351 	ch_cpu_errors_t cpu_error_regs;
1352 
1353 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1354 	/*
1355 	 * If no cpu logout data, then we will have to make due without
1356 	 * any detailed logout information.
1357 	 */
1358 	if (clop == NULL) {
1359 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1360 		get_cpu_error_state(&cpu_error_regs);
1361 		set_cpu_error_state(&cpu_error_regs);
1362 		t_afar = cpu_error_regs.afar;
1363 		t_afsr = cpu_error_regs.afsr;
1364 		t_afsr_ext = cpu_error_regs.afsr_ext;
1365 #if defined(SERRANO)
1366 		ch_flt.afar2 = cpu_error_regs.afar2;
1367 #endif	/* SERRANO */
1368 	} else {
1369 		t_afar = clop->clo_data.chd_afar;
1370 		t_afsr = clop->clo_data.chd_afsr;
1371 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1372 #if defined(SERRANO)
1373 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1374 #endif	/* SERRANO */
1375 	}
1376 
1377 	/*
1378 	 * In order to simplify code, we maintain this afsr_errs
1379 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1380 	 * sticky bits.
1381 	 */
1382 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1383 	    (t_afsr & C_AFSR_ALL_ERRS);
1384 	pr_reason[0] = '\0';
1385 
1386 	/* Setup the async fault structure */
1387 	aflt = (struct async_flt *)&ch_flt;
1388 	aflt->flt_id = gethrtime_waitfree();
1389 	ch_flt.afsr_ext = t_afsr_ext;
1390 	ch_flt.afsr_errs = t_afsr_errs;
1391 	aflt->flt_stat = t_afsr;
1392 	aflt->flt_addr = t_afar;
1393 	aflt->flt_bus_id = getprocessorid();
1394 	aflt->flt_inst = CPU->cpu_id;
1395 	aflt->flt_pc = tpc;
1396 	aflt->flt_prot = AFLT_PROT_NONE;
1397 	aflt->flt_class = CPU_FAULT;
1398 	aflt->flt_priv = priv;
1399 	aflt->flt_tl = tl;
1400 	aflt->flt_status = ECC_F_TRAP;
1401 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1402 
1403 	/*
1404 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1405 	 * cmn_err messages out to the console.  The situation is a UCU (in
1406 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1407 	 * The messages for the UCU and WDU are enqueued and then pulled off
1408 	 * the async queue via softint and syslogd starts to process them
1409 	 * but doesn't get them to the console.  The UE causes a panic, but
1410 	 * since the UCU/WDU messages are already in transit, those aren't
1411 	 * on the async queue.  The hack is to check if we have a matching
1412 	 * WDU event for the UCU, and if it matches, we're more than likely
1413 	 * going to panic with a UE, unless we're under protection.  So, we
1414 	 * check to see if we got a matching WDU event and if we're under
1415 	 * protection.
1416 	 *
1417 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1418 	 * looks like this:
1419 	 *    UCU->WDU->UE
1420 	 * For Panther, it could look like either of these:
1421 	 *    UCU---->WDU->L3_WDU->UE
1422 	 *    L3_UCU->WDU->L3_WDU->UE
1423 	 */
1424 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1425 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1426 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1427 		get_cpu_error_state(&cpu_error_regs);
1428 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1429 		    (cpu_error_regs.afar == t_afar));
1430 		aflt->flt_panic |= ((clop == NULL) &&
1431 		    (t_afsr_errs & C_AFSR_WDU));
1432 	}
1433 
1434 	/*
1435 	 * Queue events on the async event queue, one event per error bit.
1436 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1437 	 * queue an event to complain.
1438 	 */
1439 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1440 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1441 		ch_flt.flt_type = CPU_INV_AFSR;
1442 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1443 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1444 		    aflt->flt_panic);
1445 	}
1446 
1447 	/*
1448 	 * Zero out + invalidate CPU logout.
1449 	 */
1450 	if (clop) {
1451 		bzero(clop, sizeof (ch_cpu_logout_t));
1452 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1453 	}
1454 
1455 	/*
1456 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1457 	 * or disrupting errors have happened.  We do this because if a
1458 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1459 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1460 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1461 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1462 	 * deferred or disrupting error happening between checking the AFSR and
1463 	 * enabling NCEEN/CEEN.
1464 	 *
1465 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1466 	 * taken.
1467 	 */
1468 	set_error_enable(get_error_enable() | (nceen | ceen));
1469 	if (clear_errors(&ch_flt)) {
1470 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1471 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1472 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1473 		    NULL);
1474 	}
1475 
1476 	/*
1477 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1478 	 * be logged as part of the panic flow.
1479 	 */
1480 	if (aflt->flt_panic)
1481 		fm_panic("%sError(s)", pr_reason);
1482 
1483 	/*
1484 	 * Flushing the Ecache here gets the part of the trap handler that
1485 	 * is run at TL=1 out of the Ecache.
1486 	 */
1487 	cpu_flush_ecache();
1488 }
1489 
1490 /*
1491  * This is called via sys_trap from pil15_interrupt code if the
1492  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1493  * various ch_err_tl1_data structures for valid entries based on the bit
1494  * settings in the ch_err_tl1_flags entry of the structure.
1495  */
1496 /*ARGSUSED*/
1497 void
1498 cpu_tl1_error(struct regs *rp, int panic)
1499 {
1500 	ch_err_tl1_data_t *cl1p, cl1;
1501 	int i, ncl1ps;
1502 	uint64_t me_flags;
1503 	uint64_t ceen, nceen;
1504 
1505 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1506 		cl1p = &ch_err_tl1_data;
1507 		ncl1ps = 1;
1508 	} else if (CPU_PRIVATE(CPU) != NULL) {
1509 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1510 		ncl1ps = CH_ERR_TL1_TLMAX;
1511 	} else {
1512 		ncl1ps = 0;
1513 	}
1514 
1515 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1516 		if (cl1p->ch_err_tl1_flags == 0)
1517 			continue;
1518 
1519 		/*
1520 		 * Grab a copy of the logout data and invalidate
1521 		 * the logout area.
1522 		 */
1523 		cl1 = *cl1p;
1524 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1525 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1526 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1527 
1528 		/*
1529 		 * Log "first error" in ch_err_tl1_data.
1530 		 */
1531 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1532 			ceen = get_error_enable() & EN_REG_CEEN;
1533 			nceen = get_error_enable() & EN_REG_NCEEN;
1534 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1535 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1536 		}
1537 #if defined(CPU_IMP_L1_CACHE_PARITY)
1538 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1539 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1540 			    (caddr_t)cl1.ch_err_tl1_tpc);
1541 		}
1542 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1543 
1544 		/*
1545 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1546 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1547 		 * if the structure is busy, we just do the cache flushing
1548 		 * we have to do and then do the retry.  So the AFSR/AFAR
1549 		 * at this point *should* have some relevant info.  If there
1550 		 * are no valid errors in the AFSR, we'll assume they've
1551 		 * already been picked up and logged.  For I$/D$ parity,
1552 		 * we just log an event with an "Unknown" (NULL) TPC.
1553 		 */
1554 		if (me_flags & CH_ERR_FECC) {
1555 			ch_cpu_errors_t cpu_error_regs;
1556 			uint64_t t_afsr_errs;
1557 
1558 			/*
1559 			 * Get the error registers and see if there's
1560 			 * a pending error.  If not, don't bother
1561 			 * generating an "Invalid AFSR" error event.
1562 			 */
1563 			get_cpu_error_state(&cpu_error_regs);
1564 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1565 			    C_AFSR_EXT_ALL_ERRS) |
1566 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1567 			if (t_afsr_errs != 0) {
1568 				ceen = get_error_enable() & EN_REG_CEEN;
1569 				nceen = get_error_enable() & EN_REG_NCEEN;
1570 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1571 				    1, ceen, nceen, NULL);
1572 			}
1573 		}
1574 #if defined(CPU_IMP_L1_CACHE_PARITY)
1575 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1576 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1577 		}
1578 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1579 	}
1580 }
1581 
1582 /*
1583  * Called from Fast ECC TL>0 handler in case of fatal error.
1584  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1585  * but if we don't, we'll panic with something reasonable.
1586  */
1587 /*ARGSUSED*/
1588 void
1589 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1590 {
1591 	cpu_tl1_error(rp, 1);
1592 	/*
1593 	 * Should never return, but just in case.
1594 	 */
1595 	fm_panic("Unsurvivable ECC Error at TL>0");
1596 }
1597 
1598 /*
1599  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1600  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1601  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1602  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1603  *
1604  * Cheetah+ also handles (No additional processing required):
1605  *    DUE, DTO, DBERR	(NCEEN controlled)
1606  *    THCE		(CEEN and ET_ECC_en controlled)
1607  *    TUE		(ET_ECC_en controlled)
1608  *
1609  * Panther further adds:
1610  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1611  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1612  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1613  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1614  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1615  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1616  *
1617  * Note that the p_clo_flags input is only valid in cases where the
1618  * cpu_private struct is not yet initialized (since that is the only
1619  * time that information cannot be obtained from the logout struct.)
1620  */
1621 /*ARGSUSED*/
1622 void
1623 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1624 {
1625 	struct async_flt *aflt;
1626 	ch_async_flt_t ch_flt;
1627 	char pr_reason[MAX_REASON_STRING];
1628 	ch_cpu_logout_t *clop;
1629 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1630 	ch_cpu_errors_t cpu_error_regs;
1631 
1632 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1633 	/*
1634 	 * Get the CPU log out info. If we can't find our CPU private
1635 	 * pointer, then we will have to make due without any detailed
1636 	 * logout information.
1637 	 */
1638 	if (CPU_PRIVATE(CPU) == NULL) {
1639 		clop = NULL;
1640 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1641 		get_cpu_error_state(&cpu_error_regs);
1642 		set_cpu_error_state(&cpu_error_regs);
1643 		t_afar = cpu_error_regs.afar;
1644 		t_afsr = cpu_error_regs.afsr;
1645 		t_afsr_ext = cpu_error_regs.afsr_ext;
1646 #if defined(SERRANO)
1647 		ch_flt.afar2 = cpu_error_regs.afar2;
1648 #endif	/* SERRANO */
1649 	} else {
1650 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1651 		t_afar = clop->clo_data.chd_afar;
1652 		t_afsr = clop->clo_data.chd_afsr;
1653 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1654 #if defined(SERRANO)
1655 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1656 #endif	/* SERRANO */
1657 	}
1658 
1659 	/*
1660 	 * In order to simplify code, we maintain this afsr_errs
1661 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1662 	 * sticky bits.
1663 	 */
1664 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1665 	    (t_afsr & C_AFSR_ALL_ERRS);
1666 
1667 	pr_reason[0] = '\0';
1668 	/* Setup the async fault structure */
1669 	aflt = (struct async_flt *)&ch_flt;
1670 	ch_flt.afsr_ext = t_afsr_ext;
1671 	ch_flt.afsr_errs = t_afsr_errs;
1672 	aflt->flt_stat = t_afsr;
1673 	aflt->flt_addr = t_afar;
1674 	aflt->flt_pc = (caddr_t)rp->r_pc;
1675 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1676 	aflt->flt_tl = 0;
1677 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1678 
1679 	/*
1680 	 * If this trap is a result of one of the errors not masked
1681 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1682 	 * indicate that a timeout is to be set later.
1683 	 */
1684 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1685 	    !aflt->flt_panic)
1686 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1687 	else
1688 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1689 
1690 	/*
1691 	 * log the CE and clean up
1692 	 */
1693 	cpu_log_and_clear_ce(&ch_flt);
1694 
1695 	/*
1696 	 * We re-enable CEEN (if required) and check if any disrupting errors
1697 	 * have happened.  We do this because if a disrupting error had occurred
1698 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1699 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1700 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1701 	 * of a error happening between checking the AFSR and enabling CEEN.
1702 	 */
1703 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1704 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1705 	if (clear_errors(&ch_flt)) {
1706 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1707 		    NULL);
1708 	}
1709 
1710 	/*
1711 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1712 	 * be logged as part of the panic flow.
1713 	 */
1714 	if (aflt->flt_panic)
1715 		fm_panic("%sError(s)", pr_reason);
1716 }
1717 
1718 /*
1719  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1720  * L3_EDU:BLD, TO, and BERR events.
1721  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1722  *
1723  * Cheetah+: No additional errors handled.
1724  *
1725  * Note that the p_clo_flags input is only valid in cases where the
1726  * cpu_private struct is not yet initialized (since that is the only
1727  * time that information cannot be obtained from the logout struct.)
1728  */
1729 /*ARGSUSED*/
1730 void
1731 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1732 {
1733 	ushort_t ttype, tl;
1734 	ch_async_flt_t ch_flt;
1735 	struct async_flt *aflt;
1736 	int trampolined = 0;
1737 	char pr_reason[MAX_REASON_STRING];
1738 	ch_cpu_logout_t *clop;
1739 	uint64_t ceen, clo_flags;
1740 	uint64_t log_afsr;
1741 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1742 	ch_cpu_errors_t cpu_error_regs;
1743 	int expected = DDI_FM_ERR_UNEXPECTED;
1744 	ddi_acc_hdl_t *hp;
1745 
1746 	/*
1747 	 * We need to look at p_flag to determine if the thread detected an
1748 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1749 	 * because we just need a consistent snapshot and we know that everyone
1750 	 * else will store a consistent set of bits while holding p_lock.  We
1751 	 * don't have to worry about a race because SDOCORE is set once prior
1752 	 * to doing i/o from the process's address space and is never cleared.
1753 	 */
1754 	uint_t pflag = ttoproc(curthread)->p_flag;
1755 
1756 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1757 	/*
1758 	 * Get the CPU log out info. If we can't find our CPU private
1759 	 * pointer then we will have to make due without any detailed
1760 	 * logout information.
1761 	 */
1762 	if (CPU_PRIVATE(CPU) == NULL) {
1763 		clop = NULL;
1764 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1765 		get_cpu_error_state(&cpu_error_regs);
1766 		set_cpu_error_state(&cpu_error_regs);
1767 		t_afar = cpu_error_regs.afar;
1768 		t_afsr = cpu_error_regs.afsr;
1769 		t_afsr_ext = cpu_error_regs.afsr_ext;
1770 #if defined(SERRANO)
1771 		ch_flt.afar2 = cpu_error_regs.afar2;
1772 #endif	/* SERRANO */
1773 		clo_flags = p_clo_flags;
1774 	} else {
1775 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1776 		t_afar = clop->clo_data.chd_afar;
1777 		t_afsr = clop->clo_data.chd_afsr;
1778 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1779 #if defined(SERRANO)
1780 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1781 #endif	/* SERRANO */
1782 		clo_flags = clop->clo_flags;
1783 	}
1784 
1785 	/*
1786 	 * In order to simplify code, we maintain this afsr_errs
1787 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1788 	 * sticky bits.
1789 	 */
1790 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1791 	    (t_afsr & C_AFSR_ALL_ERRS);
1792 	pr_reason[0] = '\0';
1793 
1794 	/*
1795 	 * Grab information encoded into our clo_flags field.
1796 	 */
1797 	ceen = clo_flags & EN_REG_CEEN;
1798 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1799 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1800 
1801 	/*
1802 	 * handle the specific error
1803 	 */
1804 	aflt = (struct async_flt *)&ch_flt;
1805 	aflt->flt_id = gethrtime_waitfree();
1806 	aflt->flt_bus_id = getprocessorid();
1807 	aflt->flt_inst = CPU->cpu_id;
1808 	ch_flt.afsr_ext = t_afsr_ext;
1809 	ch_flt.afsr_errs = t_afsr_errs;
1810 	aflt->flt_stat = t_afsr;
1811 	aflt->flt_addr = t_afar;
1812 	aflt->flt_pc = (caddr_t)rp->r_pc;
1813 	aflt->flt_prot = AFLT_PROT_NONE;
1814 	aflt->flt_class = CPU_FAULT;
1815 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1816 	aflt->flt_tl = (uchar_t)tl;
1817 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1818 	    C_AFSR_PANIC(t_afsr_errs));
1819 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1820 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1821 
1822 	/*
1823 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1824 	 * see if we were executing in the kernel under on_trap() or t_lofault
1825 	 * protection.  If so, modify the saved registers so that we return
1826 	 * from the trap to the appropriate trampoline routine.
1827 	 */
1828 	if (aflt->flt_priv && tl == 0) {
1829 		if (curthread->t_ontrap != NULL) {
1830 			on_trap_data_t *otp = curthread->t_ontrap;
1831 
1832 			if (otp->ot_prot & OT_DATA_EC) {
1833 				aflt->flt_prot = AFLT_PROT_EC;
1834 				otp->ot_trap |= OT_DATA_EC;
1835 				rp->r_pc = otp->ot_trampoline;
1836 				rp->r_npc = rp->r_pc + 4;
1837 				trampolined = 1;
1838 			}
1839 
1840 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1841 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1842 				aflt->flt_prot = AFLT_PROT_ACCESS;
1843 				otp->ot_trap |= OT_DATA_ACCESS;
1844 				rp->r_pc = otp->ot_trampoline;
1845 				rp->r_npc = rp->r_pc + 4;
1846 				trampolined = 1;
1847 				/*
1848 				 * for peeks and caut_gets errors are expected
1849 				 */
1850 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1851 				if (!hp)
1852 					expected = DDI_FM_ERR_PEEK;
1853 				else if (hp->ah_acc.devacc_attr_access ==
1854 				    DDI_CAUTIOUS_ACC)
1855 					expected = DDI_FM_ERR_EXPECTED;
1856 			}
1857 
1858 		} else if (curthread->t_lofault) {
1859 			aflt->flt_prot = AFLT_PROT_COPY;
1860 			rp->r_g1 = EFAULT;
1861 			rp->r_pc = curthread->t_lofault;
1862 			rp->r_npc = rp->r_pc + 4;
1863 			trampolined = 1;
1864 		}
1865 	}
1866 
1867 	/*
1868 	 * If we're in user mode or we're doing a protected copy, we either
1869 	 * want the ASTON code below to send a signal to the user process
1870 	 * or we want to panic if aft_panic is set.
1871 	 *
1872 	 * If we're in privileged mode and we're not doing a copy, then we
1873 	 * need to check if we've trampolined.  If we haven't trampolined,
1874 	 * we should panic.
1875 	 */
1876 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1877 		if (t_afsr_errs &
1878 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1879 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1880 			aflt->flt_panic |= aft_panic;
1881 	} else if (!trampolined) {
1882 			aflt->flt_panic = 1;
1883 	}
1884 
1885 	/*
1886 	 * If we've trampolined due to a privileged TO or BERR, or if an
1887 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1888 	 * event for that TO or BERR.  Queue all other events (if any) besides
1889 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1890 	 * ignore the number of events queued.  If we haven't trampolined due
1891 	 * to a TO or BERR, just enqueue events normally.
1892 	 */
1893 	log_afsr = t_afsr_errs;
1894 	if (trampolined) {
1895 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1896 	} else if (!aflt->flt_priv) {
1897 		/*
1898 		 * User mode, suppress messages if
1899 		 * cpu_berr_to_verbose is not set.
1900 		 */
1901 		if (!cpu_berr_to_verbose)
1902 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1903 	}
1904 
1905 	/*
1906 	 * Log any errors that occurred
1907 	 */
1908 	if (((log_afsr &
1909 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1910 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1911 		(t_afsr_errs &
1912 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1913 		ch_flt.flt_type = CPU_INV_AFSR;
1914 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1915 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1916 		    aflt->flt_panic);
1917 	}
1918 
1919 	/*
1920 	 * Zero out + invalidate CPU logout.
1921 	 */
1922 	if (clop) {
1923 		bzero(clop, sizeof (ch_cpu_logout_t));
1924 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1925 	}
1926 
1927 #if defined(JALAPENO) || defined(SERRANO)
1928 	/*
1929 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1930 	 * IO errors that may have resulted in this trap.
1931 	 */
1932 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1933 		cpu_run_bus_error_handlers(aflt, expected);
1934 	}
1935 
1936 	/*
1937 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1938 	 * line from the Ecache.  We also need to query the bus nexus for
1939 	 * fatal errors.  Attempts to do diagnostic read on caches may
1940 	 * introduce more errors (especially when the module is bad).
1941 	 */
1942 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1943 		/*
1944 		 * Ask our bus nexus friends if they have any fatal errors.  If
1945 		 * so, they will log appropriate error messages.
1946 		 */
1947 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1948 			aflt->flt_panic = 1;
1949 
1950 		/*
1951 		 * We got a UE or RUE and are panicking, save the fault PA in
1952 		 * a known location so that the platform specific panic code
1953 		 * can check for copyback errors.
1954 		 */
1955 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1956 			panic_aflt = *aflt;
1957 		}
1958 	}
1959 
1960 	/*
1961 	 * Flush Ecache line or entire Ecache
1962 	 */
1963 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1964 		cpu_error_ecache_flush(&ch_flt);
1965 #else /* JALAPENO || SERRANO */
1966 	/*
1967 	 * UE/BERR/TO: Call our bus nexus friends to check for
1968 	 * IO errors that may have resulted in this trap.
1969 	 */
1970 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1971 		cpu_run_bus_error_handlers(aflt, expected);
1972 	}
1973 
1974 	/*
1975 	 * UE: If the UE is in memory, we need to flush the bad
1976 	 * line from the Ecache.  We also need to query the bus nexus for
1977 	 * fatal errors.  Attempts to do diagnostic read on caches may
1978 	 * introduce more errors (especially when the module is bad).
1979 	 */
1980 	if (t_afsr & C_AFSR_UE) {
1981 		/*
1982 		 * Ask our legacy bus nexus friends if they have any fatal
1983 		 * errors.  If so, they will log appropriate error messages.
1984 		 */
1985 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1986 			aflt->flt_panic = 1;
1987 
1988 		/*
1989 		 * We got a UE and are panicking, save the fault PA in a known
1990 		 * location so that the platform specific panic code can check
1991 		 * for copyback errors.
1992 		 */
1993 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1994 			panic_aflt = *aflt;
1995 		}
1996 	}
1997 
1998 	/*
1999 	 * Flush Ecache line or entire Ecache
2000 	 */
2001 	if (t_afsr_errs &
2002 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2003 		cpu_error_ecache_flush(&ch_flt);
2004 #endif /* JALAPENO || SERRANO */
2005 
2006 	/*
2007 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2008 	 * or disrupting errors have happened.  We do this because if a
2009 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2010 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2011 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2012 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2013 	 * deferred or disrupting error happening between checking the AFSR and
2014 	 * enabling NCEEN/CEEN.
2015 	 *
2016 	 * Note: CEEN reenabled only if it was on when trap taken.
2017 	 */
2018 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2019 	if (clear_errors(&ch_flt)) {
2020 		/*
2021 		 * Check for secondary errors, and avoid panicking if we
2022 		 * have them
2023 		 */
2024 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2025 		    t_afar) == 0) {
2026 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2027 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2028 		}
2029 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2030 		    NULL);
2031 	}
2032 
2033 	/*
2034 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2035 	 * be logged as part of the panic flow.
2036 	 */
2037 	if (aflt->flt_panic)
2038 		fm_panic("%sError(s)", pr_reason);
2039 
2040 	/*
2041 	 * If we queued an error and we are going to return from the trap and
2042 	 * the error was in user mode or inside of a copy routine, set AST flag
2043 	 * so the queue will be drained before returning to user mode.  The
2044 	 * AST processing will also act on our failure policy.
2045 	 */
2046 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2047 		int pcb_flag = 0;
2048 
2049 		if (t_afsr_errs &
2050 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2051 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2052 			pcb_flag |= ASYNC_HWERR;
2053 
2054 		if (t_afsr & C_AFSR_BERR)
2055 			pcb_flag |= ASYNC_BERR;
2056 
2057 		if (t_afsr & C_AFSR_TO)
2058 			pcb_flag |= ASYNC_BTO;
2059 
2060 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2061 		aston(curthread);
2062 	}
2063 }
2064 
2065 #if defined(CPU_IMP_L1_CACHE_PARITY)
2066 /*
2067  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2068  *
2069  * For Panther, P$ data parity errors during floating point load hits
2070  * are also detected (reported as TT 0x71) and handled by this trap
2071  * handler.
2072  *
2073  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2074  * is available.
2075  */
2076 /*ARGSUSED*/
2077 void
2078 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2079 {
2080 	ch_async_flt_t ch_flt;
2081 	struct async_flt *aflt;
2082 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2083 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2084 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2085 	char *error_class;
2086 
2087 	/*
2088 	 * Log the error.
2089 	 * For icache parity errors the fault address is the trap PC.
2090 	 * For dcache/pcache parity errors the instruction would have to
2091 	 * be decoded to determine the address and that isn't possible
2092 	 * at high PIL.
2093 	 */
2094 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2095 	aflt = (struct async_flt *)&ch_flt;
2096 	aflt->flt_id = gethrtime_waitfree();
2097 	aflt->flt_bus_id = getprocessorid();
2098 	aflt->flt_inst = CPU->cpu_id;
2099 	aflt->flt_pc = tpc;
2100 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2101 	aflt->flt_prot = AFLT_PROT_NONE;
2102 	aflt->flt_class = CPU_FAULT;
2103 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2104 	aflt->flt_tl = tl;
2105 	aflt->flt_panic = panic;
2106 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2107 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2108 
2109 	if (iparity) {
2110 		cpu_icache_parity_info(&ch_flt);
2111 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2112 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2113 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2114 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2115 		else
2116 			error_class = FM_EREPORT_CPU_USIII_IPE;
2117 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2118 	} else {
2119 		cpu_dcache_parity_info(&ch_flt);
2120 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2121 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2122 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2123 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2124 		else
2125 			error_class = FM_EREPORT_CPU_USIII_DPE;
2126 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2127 		/*
2128 		 * For panther we also need to check the P$ for parity errors.
2129 		 */
2130 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2131 			cpu_pcache_parity_info(&ch_flt);
2132 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2133 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2134 				aflt->flt_payload =
2135 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2136 			}
2137 		}
2138 	}
2139 
2140 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2141 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2142 
2143 	if (iparity) {
2144 		/*
2145 		 * Invalidate entire I$.
2146 		 * This is required due to the use of diagnostic ASI
2147 		 * accesses that may result in a loss of I$ coherency.
2148 		 */
2149 		if (cache_boot_state & DCU_IC) {
2150 			flush_icache();
2151 		}
2152 		/*
2153 		 * According to section P.3.1 of the Panther PRM, we
2154 		 * need to do a little more for recovery on those
2155 		 * CPUs after encountering an I$ parity error.
2156 		 */
2157 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2158 			flush_ipb();
2159 			correct_dcache_parity(dcache_size,
2160 			    dcache_linesize);
2161 			flush_pcache();
2162 		}
2163 	} else {
2164 		/*
2165 		 * Since the valid bit is ignored when checking parity the
2166 		 * D$ data and tag must also be corrected.  Set D$ data bits
2167 		 * to zero and set utag to 0, 1, 2, 3.
2168 		 */
2169 		correct_dcache_parity(dcache_size, dcache_linesize);
2170 
2171 		/*
2172 		 * According to section P.3.3 of the Panther PRM, we
2173 		 * need to do a little more for recovery on those
2174 		 * CPUs after encountering a D$ or P$ parity error.
2175 		 *
2176 		 * As far as clearing P$ parity errors, it is enough to
2177 		 * simply invalidate all entries in the P$ since P$ parity
2178 		 * error traps are only generated for floating point load
2179 		 * hits.
2180 		 */
2181 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2182 			flush_icache();
2183 			flush_ipb();
2184 			flush_pcache();
2185 		}
2186 	}
2187 
2188 	/*
2189 	 * Invalidate entire D$ if it was enabled.
2190 	 * This is done to avoid stale data in the D$ which might
2191 	 * occur with the D$ disabled and the trap handler doing
2192 	 * stores affecting lines already in the D$.
2193 	 */
2194 	if (cache_boot_state & DCU_DC) {
2195 		flush_dcache();
2196 	}
2197 
2198 	/*
2199 	 * Restore caches to their bootup state.
2200 	 */
2201 	set_dcu(get_dcu() | cache_boot_state);
2202 
2203 	/*
2204 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2205 	 * be logged as part of the panic flow.
2206 	 */
2207 	if (aflt->flt_panic)
2208 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2209 
2210 	/*
2211 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2212 	 * the chance of getting an unrecoverable Fast ECC error.  This
2213 	 * flush will evict the part of the parity trap handler that is run
2214 	 * at TL>1.
2215 	 */
2216 	if (tl) {
2217 		cpu_flush_ecache();
2218 	}
2219 }
2220 
2221 /*
2222  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2223  * to indicate which portions of the captured data should be in the ereport.
2224  */
2225 void
2226 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2227 {
2228 	int way = ch_flt->parity_data.ipe.cpl_way;
2229 	int offset = ch_flt->parity_data.ipe.cpl_off;
2230 	int tag_index;
2231 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2232 
2233 
2234 	if ((offset != -1) || (way != -1)) {
2235 		/*
2236 		 * Parity error in I$ tag or data
2237 		 */
2238 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2239 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2240 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2241 			    PN_ICIDX_TO_WAY(tag_index);
2242 		else
2243 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2244 			    CH_ICIDX_TO_WAY(tag_index);
2245 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2246 		    IC_LOGFLAG_MAGIC;
2247 	} else {
2248 		/*
2249 		 * Parity error was not identified.
2250 		 * Log tags and data for all ways.
2251 		 */
2252 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2253 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2254 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2255 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2256 				    PN_ICIDX_TO_WAY(tag_index);
2257 			else
2258 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2259 				    CH_ICIDX_TO_WAY(tag_index);
2260 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2261 			    IC_LOGFLAG_MAGIC;
2262 		}
2263 	}
2264 }
2265 
2266 /*
2267  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2268  * to indicate which portions of the captured data should be in the ereport.
2269  */
2270 void
2271 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2272 {
2273 	int way = ch_flt->parity_data.dpe.cpl_way;
2274 	int offset = ch_flt->parity_data.dpe.cpl_off;
2275 	int tag_index;
2276 
2277 	if (offset != -1) {
2278 		/*
2279 		 * Parity error in D$ or P$ data array.
2280 		 *
2281 		 * First check to see whether the parity error is in D$ or P$
2282 		 * since P$ data parity errors are reported in Panther using
2283 		 * the same trap.
2284 		 */
2285 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2286 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2287 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2288 			    CH_PCIDX_TO_WAY(tag_index);
2289 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2290 			    PC_LOGFLAG_MAGIC;
2291 		} else {
2292 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2293 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2294 			    CH_DCIDX_TO_WAY(tag_index);
2295 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2296 			    DC_LOGFLAG_MAGIC;
2297 		}
2298 	} else if (way != -1) {
2299 		/*
2300 		 * Parity error in D$ tag.
2301 		 */
2302 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2303 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2304 		    CH_DCIDX_TO_WAY(tag_index);
2305 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2306 		    DC_LOGFLAG_MAGIC;
2307 	}
2308 }
2309 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2310 
2311 /*
2312  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2313  * post-process CPU events that are dequeued.  As such, it can be invoked
2314  * from softint context, from AST processing in the trap() flow, or from the
2315  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2316  * Historically this entry point was used to log the actual cmn_err(9F) text;
2317  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2318  * With FMA this function now also returns a flag which indicates to the
2319  * caller whether the ereport should be posted (1) or suppressed (0).
2320  */
2321 static int
2322 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2323 {
2324 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2325 	struct async_flt *aflt = (struct async_flt *)flt;
2326 	uint64_t errors;
2327 
2328 	switch (ch_flt->flt_type) {
2329 	case CPU_INV_AFSR:
2330 		/*
2331 		 * If it is a disrupting trap and the AFSR is zero, then
2332 		 * the event has probably already been noted. Do not post
2333 		 * an ereport.
2334 		 */
2335 		if ((aflt->flt_status & ECC_C_TRAP) &&
2336 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2337 			return (0);
2338 		else
2339 			return (1);
2340 	case CPU_TO:
2341 	case CPU_BERR:
2342 	case CPU_FATAL:
2343 	case CPU_FPUERR:
2344 		return (1);
2345 
2346 	case CPU_UE_ECACHE_RETIRE:
2347 		cpu_log_err(aflt);
2348 		cpu_page_retire(ch_flt);
2349 		return (1);
2350 
2351 	/*
2352 	 * Cases where we may want to suppress logging or perform
2353 	 * extended diagnostics.
2354 	 */
2355 	case CPU_CE:
2356 	case CPU_EMC:
2357 		/*
2358 		 * We want to skip logging and further classification
2359 		 * only if ALL the following conditions are true:
2360 		 *
2361 		 *	1. There is only one error
2362 		 *	2. That error is a correctable memory error
2363 		 *	3. The error is caused by the memory scrubber (in
2364 		 *	   which case the error will have occurred under
2365 		 *	   on_trap protection)
2366 		 *	4. The error is on a retired page
2367 		 *
2368 		 * Note: AFLT_PROT_EC is used places other than the memory
2369 		 * scrubber.  However, none of those errors should occur
2370 		 * on a retired page.
2371 		 */
2372 		if ((ch_flt->afsr_errs &
2373 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2374 		    aflt->flt_prot == AFLT_PROT_EC) {
2375 
2376 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2377 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2378 
2379 				/*
2380 				 * Since we're skipping logging, we'll need
2381 				 * to schedule the re-enabling of CEEN
2382 				 */
2383 				(void) timeout(cpu_delayed_check_ce_errors,
2384 				    (void *)(uintptr_t)aflt->flt_inst,
2385 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2386 						 * MICROSEC));
2387 			    }
2388 			    return (0);
2389 			}
2390 		}
2391 
2392 		/*
2393 		 * Perform/schedule further classification actions, but
2394 		 * only if the page is healthy (we don't want bad
2395 		 * pages inducing too much diagnostic activity).  If we could
2396 		 * not find a page pointer then we also skip this.  If
2397 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2398 		 * to copy and recirculate the event (for further diagnostics)
2399 		 * and we should not proceed to log it here.
2400 		 *
2401 		 * This must be the last step here before the cpu_log_err()
2402 		 * below - if an event recirculates cpu_ce_log_err() will
2403 		 * not call the current function but just proceed directly
2404 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2405 		 *
2406 		 * Note: Check cpu_impl_async_log_err if changing this
2407 		 */
2408 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2409 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2410 			    CE_XDIAG_SKIP_NOPP);
2411 		} else {
2412 			if (errors != PR_OK) {
2413 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2414 				    CE_XDIAG_SKIP_PAGEDET);
2415 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2416 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2417 				return (0);
2418 			}
2419 		}
2420 		/*FALLTHRU*/
2421 
2422 	/*
2423 	 * Cases where we just want to report the error and continue.
2424 	 */
2425 	case CPU_CE_ECACHE:
2426 	case CPU_UE_ECACHE:
2427 	case CPU_IV:
2428 	case CPU_ORPH:
2429 		cpu_log_err(aflt);
2430 		return (1);
2431 
2432 	/*
2433 	 * Cases where we want to fall through to handle panicking.
2434 	 */
2435 	case CPU_UE:
2436 		/*
2437 		 * We want to skip logging in the same conditions as the
2438 		 * CE case.  In addition, we want to make sure we're not
2439 		 * panicking.
2440 		 */
2441 		if (!panicstr && (ch_flt->afsr_errs &
2442 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2443 		    aflt->flt_prot == AFLT_PROT_EC) {
2444 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2445 				/* Zero the address to clear the error */
2446 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2447 				return (0);
2448 			}
2449 		}
2450 		cpu_log_err(aflt);
2451 		break;
2452 
2453 	default:
2454 		/*
2455 		 * If the us3_common.c code doesn't know the flt_type, it may
2456 		 * be an implementation-specific code.  Call into the impldep
2457 		 * backend to find out what to do: if it tells us to continue,
2458 		 * break and handle as if falling through from a UE; if not,
2459 		 * the impldep backend has handled the error and we're done.
2460 		 */
2461 		switch (cpu_impl_async_log_err(flt, eqep)) {
2462 		case CH_ASYNC_LOG_DONE:
2463 			return (1);
2464 		case CH_ASYNC_LOG_RECIRC:
2465 			return (0);
2466 		case CH_ASYNC_LOG_CONTINUE:
2467 			break; /* continue on to handle UE-like error */
2468 		default:
2469 			cmn_err(CE_WARN, "discarding error 0x%p with "
2470 			    "invalid fault type (0x%x)",
2471 			    (void *)aflt, ch_flt->flt_type);
2472 			return (0);
2473 		}
2474 	}
2475 
2476 	/* ... fall through from the UE case */
2477 
2478 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2479 		if (!panicstr) {
2480 			cpu_page_retire(ch_flt);
2481 		} else {
2482 			/*
2483 			 * Clear UEs on panic so that we don't
2484 			 * get haunted by them during panic or
2485 			 * after reboot
2486 			 */
2487 			cpu_clearphys(aflt);
2488 			(void) clear_errors(NULL);
2489 		}
2490 	}
2491 
2492 	return (1);
2493 }
2494 
2495 /*
2496  * Retire the bad page that may contain the flushed error.
2497  */
2498 void
2499 cpu_page_retire(ch_async_flt_t *ch_flt)
2500 {
2501 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2502 	(void) page_retire(aflt->flt_addr, PR_UE);
2503 }
2504 
2505 /*
2506  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2507  * generic event post-processing for correctable and uncorrectable memory,
2508  * E$, and MTag errors.  Historically this entry point was used to log bits of
2509  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2510  * converted into an ereport.  In addition, it transmits the error to any
2511  * platform-specific service-processor FRU logging routines, if available.
2512  */
2513 void
2514 cpu_log_err(struct async_flt *aflt)
2515 {
2516 	char unum[UNUM_NAMLEN];
2517 	int len = 0;
2518 	int synd_status, synd_code, afar_status;
2519 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2520 
2521 	/*
2522 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2523 	 * For Panther, L2$ is not external, so we don't want to
2524 	 * generate an E$ unum for those errors.
2525 	 */
2526 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2527 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2528 			aflt->flt_status |= ECC_ECACHE;
2529 	} else {
2530 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2531 			aflt->flt_status |= ECC_ECACHE;
2532 	}
2533 
2534 	/*
2535 	 * Determine syndrome status.
2536 	 */
2537 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2538 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2539 
2540 	/*
2541 	 * Determine afar status.
2542 	 */
2543 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2544 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2545 				ch_flt->flt_bit);
2546 	else
2547 		afar_status = AFLT_STAT_INVALID;
2548 
2549 	/*
2550 	 * If afar status is not invalid do a unum lookup.
2551 	 */
2552 	if (afar_status != AFLT_STAT_INVALID) {
2553 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2554 			UNUM_NAMLEN, &len);
2555 	} else {
2556 		unum[0] = '\0';
2557 	}
2558 
2559 	synd_code = synd_to_synd_code(synd_status,
2560 	    aflt->flt_synd, ch_flt->flt_bit);
2561 
2562 	/*
2563 	 * Do not send the fruid message (plat_ecc_error_data_t)
2564 	 * to the SC if it can handle the enhanced error information
2565 	 * (plat_ecc_error2_data_t) or when the tunable
2566 	 * ecc_log_fruid_enable is set to 0.
2567 	 */
2568 
2569 	if (&plat_ecc_capability_sc_get &&
2570 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2571 		if (&plat_log_fruid_error)
2572 			plat_log_fruid_error(synd_code, aflt, unum,
2573 			    ch_flt->flt_bit);
2574 	}
2575 
2576 	if (aflt->flt_func != NULL)
2577 		aflt->flt_func(aflt, unum);
2578 
2579 	if (afar_status != AFLT_STAT_INVALID)
2580 		cpu_log_diag_info(ch_flt);
2581 
2582 	/*
2583 	 * If we have a CEEN error , we do not reenable CEEN until after
2584 	 * we exit the trap handler. Otherwise, another error may
2585 	 * occur causing the handler to be entered recursively.
2586 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2587 	 * to try and ensure that the CPU makes progress in the face
2588 	 * of a CE storm.
2589 	 */
2590 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2591 		(void) timeout(cpu_delayed_check_ce_errors,
2592 		    (void *)(uintptr_t)aflt->flt_inst,
2593 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2594 	}
2595 }
2596 
2597 /*
2598  * Invoked by error_init() early in startup and therefore before
2599  * startup_errorq() is called to drain any error Q -
2600  *
2601  * startup()
2602  *   startup_end()
2603  *     error_init()
2604  *       cpu_error_init()
2605  * errorq_init()
2606  *   errorq_drain()
2607  * start_other_cpus()
2608  *
2609  * The purpose of this routine is to create error-related taskqs.  Taskqs
2610  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2611  * context.
2612  */
2613 void
2614 cpu_error_init(int items)
2615 {
2616 	/*
2617 	 * Create taskq(s) to reenable CE
2618 	 */
2619 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2620 	    items, items, TASKQ_PREPOPULATE);
2621 }
2622 
2623 void
2624 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2625 {
2626 	char unum[UNUM_NAMLEN];
2627 	int len;
2628 
2629 	switch (aflt->flt_class) {
2630 	case CPU_FAULT:
2631 		cpu_ereport_init(aflt);
2632 		if (cpu_async_log_err(aflt, eqep))
2633 			cpu_ereport_post(aflt);
2634 		break;
2635 
2636 	case BUS_FAULT:
2637 		if (aflt->flt_func != NULL) {
2638 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2639 			    unum, UNUM_NAMLEN, &len);
2640 			aflt->flt_func(aflt, unum);
2641 		}
2642 		break;
2643 
2644 	case RECIRC_CPU_FAULT:
2645 		aflt->flt_class = CPU_FAULT;
2646 		cpu_log_err(aflt);
2647 		cpu_ereport_post(aflt);
2648 		break;
2649 
2650 	case RECIRC_BUS_FAULT:
2651 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2652 		/*FALLTHRU*/
2653 	default:
2654 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2655 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2656 		return;
2657 	}
2658 }
2659 
2660 /*
2661  * Scrub and classify a CE.  This function must not modify the
2662  * fault structure passed to it but instead should return the classification
2663  * information.
2664  */
2665 
2666 static uchar_t
2667 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2668 {
2669 	uchar_t disp = CE_XDIAG_EXTALG;
2670 	on_trap_data_t otd;
2671 	uint64_t orig_err;
2672 	ch_cpu_logout_t *clop;
2673 
2674 	/*
2675 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2676 	 * this, but our other callers have not.  Disable preemption to
2677 	 * avoid CPU migration so that we restore CEEN on the correct
2678 	 * cpu later.
2679 	 *
2680 	 * CEEN is cleared so that further CEs that our instruction and
2681 	 * data footprint induce do not cause use to either creep down
2682 	 * kernel stack to the point of overflow, or do so much CE
2683 	 * notification as to make little real forward progress.
2684 	 *
2685 	 * NCEEN must not be cleared.  However it is possible that
2686 	 * our accesses to the flt_addr may provoke a bus error or timeout
2687 	 * if the offending address has just been unconfigured as part of
2688 	 * a DR action.  So we must operate under on_trap protection.
2689 	 */
2690 	kpreempt_disable();
2691 	orig_err = get_error_enable();
2692 	if (orig_err & EN_REG_CEEN)
2693 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2694 
2695 	/*
2696 	 * Our classification algorithm includes the line state before
2697 	 * the scrub; we'd like this captured after the detection and
2698 	 * before the algorithm below - the earlier the better.
2699 	 *
2700 	 * If we've come from a cpu CE trap then this info already exists
2701 	 * in the cpu logout area.
2702 	 *
2703 	 * For a CE detected by memscrub for which there was no trap
2704 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2705 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2706 	 * marked the fault structure as incomplete as a flag to later
2707 	 * logging code.
2708 	 *
2709 	 * If called directly from an IO detected CE there has been
2710 	 * no line data capture.  In this case we logout to the cpu logout
2711 	 * area - that's appropriate since it's the cpu cache data we need
2712 	 * for classification.  We thus borrow the cpu logout area for a
2713 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2714 	 * this time (we will invalidate it again below).
2715 	 *
2716 	 * If called from the partner check xcall handler then this cpu
2717 	 * (the partner) has not necessarily experienced a CE at this
2718 	 * address.  But we want to capture line state before its scrub
2719 	 * attempt since we use that in our classification.
2720 	 */
2721 	if (logout_tried == B_FALSE) {
2722 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2723 			disp |= CE_XDIAG_NOLOGOUT;
2724 	}
2725 
2726 	/*
2727 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2728 	 * no longer be valid (if DR'd since the initial event) so we
2729 	 * perform this scrub under on_trap protection.  If this access is
2730 	 * ok then further accesses below will also be ok - DR cannot
2731 	 * proceed while this thread is active (preemption is disabled);
2732 	 * to be safe we'll nonetheless use on_trap again below.
2733 	 */
2734 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2735 		cpu_scrubphys(ecc);
2736 	} else {
2737 		no_trap();
2738 		if (orig_err & EN_REG_CEEN)
2739 		    set_error_enable(orig_err);
2740 		kpreempt_enable();
2741 		return (disp);
2742 	}
2743 	no_trap();
2744 
2745 	/*
2746 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2747 	 * Note that it's quite possible that the read sourced the data from
2748 	 * another cpu.
2749 	 */
2750 	if (clear_ecc(ecc))
2751 		disp |= CE_XDIAG_CE1;
2752 
2753 	/*
2754 	 * Read the data again.  This time the read is very likely to
2755 	 * come from memory since the scrub induced a writeback to memory.
2756 	 */
2757 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2758 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2759 	} else {
2760 		no_trap();
2761 		if (orig_err & EN_REG_CEEN)
2762 		    set_error_enable(orig_err);
2763 		kpreempt_enable();
2764 		return (disp);
2765 	}
2766 	no_trap();
2767 
2768 	/* Did that read induce a CE that matches the AFAR? */
2769 	if (clear_ecc(ecc))
2770 		disp |= CE_XDIAG_CE2;
2771 
2772 	/*
2773 	 * Look at the logout information and record whether we found the
2774 	 * line in l2/l3 cache.  For Panther we are interested in whether
2775 	 * we found it in either cache (it won't reside in both but
2776 	 * it is possible to read it that way given the moving target).
2777 	 */
2778 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2779 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2780 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2781 		int hit, level;
2782 		int state;
2783 		int totalsize;
2784 		ch_ec_data_t *ecp;
2785 
2786 		/*
2787 		 * If hit is nonzero then a match was found and hit will
2788 		 * be one greater than the index which hit.  For Panther we
2789 		 * also need to pay attention to level to see which of l2$ or
2790 		 * l3$ it hit in.
2791 		 */
2792 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2793 		    0, &level);
2794 
2795 		if (hit) {
2796 			--hit;
2797 			disp |= CE_XDIAG_AFARMATCH;
2798 
2799 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2800 				if (level == 2)
2801 					ecp = &clop->clo_data.chd_l2_data[hit];
2802 				else
2803 					ecp = &clop->clo_data.chd_ec_data[hit];
2804 			} else {
2805 				ASSERT(level == 2);
2806 				ecp = &clop->clo_data.chd_ec_data[hit];
2807 			}
2808 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2809 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2810 			    ecc->flt_addr, ecp->ec_tag);
2811 
2812 			/*
2813 			 * Cheetah variants use different state encodings -
2814 			 * the CH_ECSTATE_* defines vary depending on the
2815 			 * module we're compiled for.  Translate into our
2816 			 * one true version.  Conflate Owner-Shared state
2817 			 * of SSM mode with Owner as victimisation of such
2818 			 * lines may cause a writeback.
2819 			 */
2820 			switch (state) {
2821 			case CH_ECSTATE_MOD:
2822 				disp |= EC_STATE_M;
2823 				break;
2824 
2825 			case CH_ECSTATE_OWN:
2826 			case CH_ECSTATE_OWS:
2827 				disp |= EC_STATE_O;
2828 				break;
2829 
2830 			case CH_ECSTATE_EXL:
2831 				disp |= EC_STATE_E;
2832 				break;
2833 
2834 			case CH_ECSTATE_SHR:
2835 				disp |= EC_STATE_S;
2836 				break;
2837 
2838 			default:
2839 				disp |= EC_STATE_I;
2840 				break;
2841 			}
2842 		}
2843 
2844 		/*
2845 		 * If we initiated the delayed logout then we are responsible
2846 		 * for invalidating the logout area.
2847 		 */
2848 		if (logout_tried == B_FALSE) {
2849 			bzero(clop, sizeof (ch_cpu_logout_t));
2850 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2851 		}
2852 	}
2853 
2854 	/*
2855 	 * Re-enable CEEN if we turned it off.
2856 	 */
2857 	if (orig_err & EN_REG_CEEN)
2858 	    set_error_enable(orig_err);
2859 	kpreempt_enable();
2860 
2861 	return (disp);
2862 }
2863 
2864 /*
2865  * Scrub a correctable memory error and collect data for classification
2866  * of CE type.  This function is called in the detection path, ie tl0 handling
2867  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2868  */
2869 void
2870 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2871 {
2872 	/*
2873 	 * Cheetah CE classification does not set any bits in flt_status.
2874 	 * Instead we will record classification datapoints in flt_disp.
2875 	 */
2876 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2877 
2878 	/*
2879 	 * To check if the error detected by IO is persistent, sticky or
2880 	 * intermittent.  This is noticed by clear_ecc().
2881 	 */
2882 	if (ecc->flt_status & ECC_IOBUS)
2883 		ecc->flt_stat = C_AFSR_MEMORY;
2884 
2885 	/*
2886 	 * Record information from this first part of the algorithm in
2887 	 * flt_disp.
2888 	 */
2889 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2890 }
2891 
2892 /*
2893  * Select a partner to perform a further CE classification check from.
2894  * Must be called with kernel preemption disabled (to stop the cpu list
2895  * from changing).  The detecting cpu we are partnering has cpuid
2896  * aflt->flt_inst; we might not be running on the detecting cpu.
2897  *
2898  * Restrict choice to active cpus in the same cpu partition as ourselves in
2899  * an effort to stop bad cpus in one partition causing other partitions to
2900  * perform excessive diagnostic activity.  Actually since the errorq drain
2901  * is run from a softint most of the time and that is a global mechanism
2902  * this isolation is only partial.  Return NULL if we fail to find a
2903  * suitable partner.
2904  *
2905  * We prefer a partner that is in a different latency group to ourselves as
2906  * we will share fewer datapaths.  If such a partner is unavailable then
2907  * choose one in the same lgroup but prefer a different chip and only allow
2908  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2909  * flags includes PTNR_SELFOK then permit selection of the original detector.
2910  *
2911  * We keep a cache of the last partner selected for a cpu, and we'll try to
2912  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2913  * have passed since that selection was made.  This provides the benefit
2914  * of the point-of-view of different partners over time but without
2915  * requiring frequent cpu list traversals.
2916  */
2917 
2918 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2919 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2920 
2921 static cpu_t *
2922 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2923 {
2924 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2925 	hrtime_t lasttime, thistime;
2926 
2927 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2928 
2929 	dtcr = cpu[aflt->flt_inst];
2930 
2931 	/*
2932 	 * Short-circuit for the following cases:
2933 	 *	. the dtcr is not flagged active
2934 	 *	. there is just one cpu present
2935 	 *	. the detector has disappeared
2936 	 *	. we were given a bad flt_inst cpuid; this should not happen
2937 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2938 	 *	  reason to panic.
2939 	 *	. there is just one cpu left online in the cpu partition
2940 	 *
2941 	 * If we return NULL after this point then we do not update the
2942 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2943 	 * again next time; this is the case where the only other cpu online
2944 	 * in the detector's partition is on the same chip as the detector
2945 	 * and since CEEN re-enable is throttled even that case should not
2946 	 * hurt performance.
2947 	 */
2948 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2949 		return (NULL);
2950 	}
2951 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2952 		if (flags & PTNR_SELFOK) {
2953 			*typep = CE_XDIAG_PTNR_SELF;
2954 			return (dtcr);
2955 		} else {
2956 			return (NULL);
2957 		}
2958 	}
2959 
2960 	thistime = gethrtime();
2961 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2962 
2963 	/*
2964 	 * Select a starting point.
2965 	 */
2966 	if (!lasttime) {
2967 		/*
2968 		 * We've never selected a partner for this detector before.
2969 		 * Start the scan at the next online cpu in the same cpu
2970 		 * partition.
2971 		 */
2972 		sp = dtcr->cpu_next_part;
2973 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2974 		/*
2975 		 * Our last selection has not aged yet.  If this partner:
2976 		 *	. is still a valid cpu,
2977 		 *	. is still in the same partition as the detector
2978 		 *	. is still marked active
2979 		 *	. satisfies the 'flags' argument criteria
2980 		 * then select it again without updating the timestamp.
2981 		 */
2982 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2983 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2984 		    !cpu_flagged_active(sp->cpu_flags) ||
2985 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2986 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2987 		    !(flags & PTNR_SIBLINGOK))) {
2988 			sp = dtcr->cpu_next_part;
2989 		} else {
2990 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2991 				*typep = CE_XDIAG_PTNR_REMOTE;
2992 			} else if (sp == dtcr) {
2993 				*typep = CE_XDIAG_PTNR_SELF;
2994 			} else if (sp->cpu_chip->chip_id ==
2995 			    dtcr->cpu_chip->chip_id) {
2996 				*typep = CE_XDIAG_PTNR_SIBLING;
2997 			} else {
2998 				*typep = CE_XDIAG_PTNR_LOCAL;
2999 			}
3000 			return (sp);
3001 		}
3002 	} else {
3003 		/*
3004 		 * Our last selection has aged.  If it is nonetheless still a
3005 		 * valid cpu then start the scan at the next cpu in the
3006 		 * partition after our last partner.  If the last selection
3007 		 * is no longer a valid cpu then go with our default.  In
3008 		 * this way we slowly cycle through possible partners to
3009 		 * obtain multiple viewpoints over time.
3010 		 */
3011 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3012 		if (sp == NULL) {
3013 			sp = dtcr->cpu_next_part;
3014 		} else {
3015 			sp = sp->cpu_next_part;		/* may be dtcr */
3016 			if (sp->cpu_part != dtcr->cpu_part)
3017 				sp = dtcr;
3018 		}
3019 	}
3020 
3021 	/*
3022 	 * We have a proposed starting point for our search, but if this
3023 	 * cpu is offline then its cpu_next_part will point to itself
3024 	 * so we can't use that to iterate over cpus in this partition in
3025 	 * the loop below.  We still want to avoid iterating over cpus not
3026 	 * in our partition, so in the case that our starting point is offline
3027 	 * we will repoint it to be the detector itself;  and if the detector
3028 	 * happens to be offline we'll return NULL from the following loop.
3029 	 */
3030 	if (!cpu_flagged_active(sp->cpu_flags)) {
3031 		sp = dtcr;
3032 	}
3033 
3034 	ptnr = sp;
3035 	locptnr = NULL;
3036 	sibptnr = NULL;
3037 	do {
3038 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3039 			continue;
3040 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3041 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3042 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3043 			*typep = CE_XDIAG_PTNR_REMOTE;
3044 			return (ptnr);
3045 		}
3046 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
3047 			if (sibptnr == NULL)
3048 				sibptnr = ptnr;
3049 			continue;
3050 		}
3051 		if (locptnr == NULL)
3052 			locptnr = ptnr;
3053 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3054 
3055 	/*
3056 	 * A foreign partner has already been returned if one was available.
3057 	 *
3058 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3059 	 * detector, is active, and is not a sibling of the detector.
3060 	 *
3061 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3062 	 * active.
3063 	 *
3064 	 * If we have to resort to using the detector itself we have already
3065 	 * checked that it is active.
3066 	 */
3067 	if (locptnr) {
3068 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3069 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3070 		*typep = CE_XDIAG_PTNR_LOCAL;
3071 		return (locptnr);
3072 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3073 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3074 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3075 		*typep = CE_XDIAG_PTNR_SIBLING;
3076 		return (sibptnr);
3077 	} else if (flags & PTNR_SELFOK) {
3078 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3079 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3080 		*typep = CE_XDIAG_PTNR_SELF;
3081 		return (dtcr);
3082 	}
3083 
3084 	return (NULL);
3085 }
3086 
3087 /*
3088  * Cross call handler that is requested to run on the designated partner of
3089  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3090  */
3091 static void
3092 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3093 {
3094 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3095 }
3096 
3097 /*
3098  * The associated errorqs are never destroyed so we do not need to deal with
3099  * them disappearing before this timeout fires.  If the affected memory
3100  * has been DR'd out since the original event the scrub algrithm will catch
3101  * any errors and return null disposition info.  If the original detecting
3102  * cpu has been DR'd out then ereport detector info will not be able to
3103  * lookup CPU type;  with a small timeout this is unlikely.
3104  */
3105 static void
3106 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3107 {
3108 	struct async_flt *aflt = cbarg->lkycb_aflt;
3109 	uchar_t disp;
3110 	cpu_t *cp;
3111 	int ptnrtype;
3112 
3113 	kpreempt_disable();
3114 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3115 	    &ptnrtype)) {
3116 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3117 		    (uint64_t)&disp);
3118 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3119 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3120 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3121 	} else {
3122 		ce_xdiag_lkydrops++;
3123 		if (ncpus > 1)
3124 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3125 			    CE_XDIAG_SKIP_NOPTNR);
3126 	}
3127 	kpreempt_enable();
3128 
3129 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3130 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3131 }
3132 
3133 /*
3134  * Called from errorq drain code when processing a CE error, both from
3135  * CPU and PCI drain functions.  Decide what further classification actions,
3136  * if any, we will perform.  Perform immediate actions now, and schedule
3137  * delayed actions as required.  Note that we are no longer necessarily running
3138  * on the detecting cpu, and that the async_flt structure will not persist on
3139  * return from this function.
3140  *
3141  * Calls to this function should aim to be self-throtlling in some way.  With
3142  * the delayed re-enable of CEEN the absolute rate of calls should not
3143  * be excessive.  Callers should also avoid performing in-depth classification
3144  * for events in pages that are already known to be suspect.
3145  *
3146  * We return nonzero to indicate that the event has been copied and
3147  * recirculated for further testing.  The caller should not log the event
3148  * in this case - it will be logged when further test results are available.
3149  *
3150  * Our possible contexts are that of errorq_drain: below lock level or from
3151  * panic context.  We can assume that the cpu we are running on is online.
3152  */
3153 
3154 
3155 #ifdef DEBUG
3156 static int ce_xdiag_forceaction;
3157 #endif
3158 
3159 int
3160 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3161     errorq_elem_t *eqep, size_t afltoffset)
3162 {
3163 	ce_dispact_t dispact, action;
3164 	cpu_t *cp;
3165 	uchar_t dtcrinfo, disp;
3166 	int ptnrtype;
3167 
3168 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3169 		ce_xdiag_drops++;
3170 		return (0);
3171 	} else if (!aflt->flt_in_memory) {
3172 		ce_xdiag_drops++;
3173 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3174 		return (0);
3175 	}
3176 
3177 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3178 
3179 	/*
3180 	 * Some correctable events are not scrubbed/classified, such as those
3181 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3182 	 * initial detector classification go no further.
3183 	 */
3184 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3185 		ce_xdiag_drops++;
3186 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3187 		return (0);
3188 	}
3189 
3190 	dispact = CE_DISPACT(ce_disp_table,
3191 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3192 	    CE_XDIAG_STATE(dtcrinfo),
3193 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3194 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3195 
3196 
3197 	action = CE_ACT(dispact);	/* bad lookup caught below */
3198 #ifdef DEBUG
3199 	if (ce_xdiag_forceaction != 0)
3200 		action = ce_xdiag_forceaction;
3201 #endif
3202 
3203 	switch (action) {
3204 	case CE_ACT_LKYCHK: {
3205 		caddr_t ndata;
3206 		errorq_elem_t *neqep;
3207 		struct async_flt *ecc;
3208 		ce_lkychk_cb_t *cbargp;
3209 
3210 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3211 			ce_xdiag_lkydrops++;
3212 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3213 			    CE_XDIAG_SKIP_DUPFAIL);
3214 			break;
3215 		}
3216 		ecc = (struct async_flt *)(ndata + afltoffset);
3217 
3218 		ASSERT(ecc->flt_class == CPU_FAULT ||
3219 		    ecc->flt_class == BUS_FAULT);
3220 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3221 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3222 
3223 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3224 		cbargp->lkycb_aflt = ecc;
3225 		cbargp->lkycb_eqp = eqp;
3226 		cbargp->lkycb_eqep = neqep;
3227 
3228 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3229 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3230 		return (1);
3231 	}
3232 
3233 	case CE_ACT_PTNRCHK:
3234 		kpreempt_disable();	/* stop cpu list changing */
3235 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3236 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3237 			    (uint64_t)aflt, (uint64_t)&disp);
3238 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3239 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3240 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3241 		} else if (ncpus > 1) {
3242 			ce_xdiag_ptnrdrops++;
3243 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3244 			    CE_XDIAG_SKIP_NOPTNR);
3245 		} else {
3246 			ce_xdiag_ptnrdrops++;
3247 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3248 			    CE_XDIAG_SKIP_UNIPROC);
3249 		}
3250 		kpreempt_enable();
3251 		break;
3252 
3253 	case CE_ACT_DONE:
3254 		break;
3255 
3256 	case CE_ACT(CE_DISP_BAD):
3257 	default:
3258 #ifdef DEBUG
3259 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3260 #endif
3261 		ce_xdiag_bad++;
3262 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3263 		break;
3264 	}
3265 
3266 	return (0);
3267 }
3268 
3269 /*
3270  * We route all errors through a single switch statement.
3271  */
3272 void
3273 cpu_ue_log_err(struct async_flt *aflt)
3274 {
3275 	switch (aflt->flt_class) {
3276 	case CPU_FAULT:
3277 		cpu_ereport_init(aflt);
3278 		if (cpu_async_log_err(aflt, NULL))
3279 			cpu_ereport_post(aflt);
3280 		break;
3281 
3282 	case BUS_FAULT:
3283 		bus_async_log_err(aflt);
3284 		break;
3285 
3286 	default:
3287 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3288 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3289 		return;
3290 	}
3291 }
3292 
3293 /*
3294  * Routine for panic hook callback from panic_idle().
3295  */
3296 void
3297 cpu_async_panic_callb(void)
3298 {
3299 	ch_async_flt_t ch_flt;
3300 	struct async_flt *aflt;
3301 	ch_cpu_errors_t cpu_error_regs;
3302 	uint64_t afsr_errs;
3303 
3304 	get_cpu_error_state(&cpu_error_regs);
3305 
3306 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3307 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3308 
3309 	if (afsr_errs) {
3310 
3311 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3312 		aflt = (struct async_flt *)&ch_flt;
3313 		aflt->flt_id = gethrtime_waitfree();
3314 		aflt->flt_bus_id = getprocessorid();
3315 		aflt->flt_inst = CPU->cpu_id;
3316 		aflt->flt_stat = cpu_error_regs.afsr;
3317 		aflt->flt_addr = cpu_error_regs.afar;
3318 		aflt->flt_prot = AFLT_PROT_NONE;
3319 		aflt->flt_class = CPU_FAULT;
3320 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3321 		aflt->flt_panic = 1;
3322 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3323 		ch_flt.afsr_errs = afsr_errs;
3324 #if defined(SERRANO)
3325 		ch_flt.afar2 = cpu_error_regs.afar2;
3326 #endif	/* SERRANO */
3327 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3328 	}
3329 }
3330 
3331 /*
3332  * Routine to convert a syndrome into a syndrome code.
3333  */
3334 static int
3335 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3336 {
3337 	if (synd_status == AFLT_STAT_INVALID)
3338 		return (-1);
3339 
3340 	/*
3341 	 * Use the syndrome to index the appropriate syndrome table,
3342 	 * to get the code indicating which bit(s) is(are) bad.
3343 	 */
3344 	if (afsr_bit &
3345 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3346 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3347 #if defined(JALAPENO) || defined(SERRANO)
3348 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3349 				return (-1);
3350 			else
3351 				return (BPAR0 + synd);
3352 #else /* JALAPENO || SERRANO */
3353 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3354 				return (-1);
3355 			else
3356 				return (mtag_syndrome_tab[synd]);
3357 #endif /* JALAPENO || SERRANO */
3358 		} else {
3359 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3360 				return (-1);
3361 			else
3362 				return (ecc_syndrome_tab[synd]);
3363 		}
3364 	} else {
3365 		return (-1);
3366 	}
3367 }
3368 
3369 int
3370 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3371 {
3372 	if (&plat_get_mem_sid)
3373 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3374 	else
3375 		return (ENOTSUP);
3376 }
3377 
3378 int
3379 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3380 {
3381 	if (&plat_get_mem_offset)
3382 		return (plat_get_mem_offset(flt_addr, offp));
3383 	else
3384 		return (ENOTSUP);
3385 }
3386 
3387 int
3388 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3389 {
3390 	if (&plat_get_mem_addr)
3391 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3392 	else
3393 		return (ENOTSUP);
3394 }
3395 
3396 /*
3397  * Routine to return a string identifying the physical name
3398  * associated with a memory/cache error.
3399  */
3400 int
3401 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3402     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3403     ushort_t flt_status, char *buf, int buflen, int *lenp)
3404 {
3405 	int synd_code;
3406 	int ret;
3407 
3408 	/*
3409 	 * An AFSR of -1 defaults to a memory syndrome.
3410 	 */
3411 	if (flt_stat == (uint64_t)-1)
3412 		flt_stat = C_AFSR_CE;
3413 
3414 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3415 
3416 	/*
3417 	 * Syndrome code must be either a single-bit error code
3418 	 * (0...143) or -1 for unum lookup.
3419 	 */
3420 	if (synd_code < 0 || synd_code >= M2)
3421 		synd_code = -1;
3422 	if (&plat_get_mem_unum) {
3423 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3424 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3425 			buf[0] = '\0';
3426 			*lenp = 0;
3427 		}
3428 
3429 		return (ret);
3430 	}
3431 
3432 	return (ENOTSUP);
3433 }
3434 
3435 /*
3436  * Wrapper for cpu_get_mem_unum() routine that takes an
3437  * async_flt struct rather than explicit arguments.
3438  */
3439 int
3440 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3441     char *buf, int buflen, int *lenp)
3442 {
3443 	/*
3444 	 * If we come thru here for an IO bus error aflt->flt_stat will
3445 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3446 	 * so it will interpret this as a memory error.
3447 	 */
3448 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3449 	    (aflt->flt_class == BUS_FAULT) ?
3450 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3451 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3452 	    aflt->flt_status, buf, buflen, lenp));
3453 }
3454 
3455 /*
3456  * This routine is a more generic interface to cpu_get_mem_unum()
3457  * that may be used by other modules (e.g. mm).
3458  */
3459 int
3460 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3461     char *buf, int buflen, int *lenp)
3462 {
3463 	int synd_status, flt_in_memory, ret;
3464 	ushort_t flt_status = 0;
3465 	char unum[UNUM_NAMLEN];
3466 
3467 	/*
3468 	 * Check for an invalid address.
3469 	 */
3470 	if (afar == (uint64_t)-1)
3471 		return (ENXIO);
3472 
3473 	if (synd == (uint64_t)-1)
3474 		synd_status = AFLT_STAT_INVALID;
3475 	else
3476 		synd_status = AFLT_STAT_VALID;
3477 
3478 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3479 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3480 
3481 	/*
3482 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3483 	 * For Panther, L2$ is not external, so we don't want to
3484 	 * generate an E$ unum for those errors.
3485 	 */
3486 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3487 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3488 			flt_status |= ECC_ECACHE;
3489 	} else {
3490 		if (*afsr & C_AFSR_ECACHE)
3491 			flt_status |= ECC_ECACHE;
3492 	}
3493 
3494 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3495 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3496 	if (ret != 0)
3497 		return (ret);
3498 
3499 	if (*lenp >= buflen)
3500 		return (ENAMETOOLONG);
3501 
3502 	(void) strncpy(buf, unum, buflen);
3503 
3504 	return (0);
3505 }
3506 
3507 /*
3508  * Routine to return memory information associated
3509  * with a physical address and syndrome.
3510  */
3511 int
3512 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3513     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3514     int *segsp, int *banksp, int *mcidp)
3515 {
3516 	int synd_status, synd_code;
3517 
3518 	if (afar == (uint64_t)-1)
3519 		return (ENXIO);
3520 
3521 	if (synd == (uint64_t)-1)
3522 		synd_status = AFLT_STAT_INVALID;
3523 	else
3524 		synd_status = AFLT_STAT_VALID;
3525 
3526 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3527 
3528 	if (p2get_mem_info != NULL)
3529 		return ((p2get_mem_info)(synd_code, afar,
3530 			mem_sizep, seg_sizep, bank_sizep,
3531 			segsp, banksp, mcidp));
3532 	else
3533 		return (ENOTSUP);
3534 }
3535 
3536 /*
3537  * Routine to return a string identifying the physical
3538  * name associated with a cpuid.
3539  */
3540 int
3541 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3542 {
3543 	int ret;
3544 	char unum[UNUM_NAMLEN];
3545 
3546 	if (&plat_get_cpu_unum) {
3547 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3548 		    != 0)
3549 			return (ret);
3550 	} else {
3551 		return (ENOTSUP);
3552 	}
3553 
3554 	if (*lenp >= buflen)
3555 		return (ENAMETOOLONG);
3556 
3557 	(void) strncpy(buf, unum, buflen);
3558 
3559 	return (0);
3560 }
3561 
3562 /*
3563  * This routine exports the name buffer size.
3564  */
3565 size_t
3566 cpu_get_name_bufsize()
3567 {
3568 	return (UNUM_NAMLEN);
3569 }
3570 
3571 /*
3572  * Historical function, apparantly not used.
3573  */
3574 /* ARGSUSED */
3575 void
3576 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3577 {}
3578 
3579 /*
3580  * Historical function only called for SBus errors in debugging.
3581  */
3582 /*ARGSUSED*/
3583 void
3584 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3585 {}
3586 
3587 /*
3588  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3589  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3590  * an async fault structure argument is passed in, the captured error state
3591  * (AFSR, AFAR) info will be returned in the structure.
3592  */
3593 int
3594 clear_errors(ch_async_flt_t *ch_flt)
3595 {
3596 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3597 	ch_cpu_errors_t	cpu_error_regs;
3598 
3599 	get_cpu_error_state(&cpu_error_regs);
3600 
3601 	if (ch_flt != NULL) {
3602 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3603 		aflt->flt_addr = cpu_error_regs.afar;
3604 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3605 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3606 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3607 #if defined(SERRANO)
3608 		ch_flt->afar2 = cpu_error_regs.afar2;
3609 #endif	/* SERRANO */
3610 	}
3611 
3612 	set_cpu_error_state(&cpu_error_regs);
3613 
3614 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3615 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3616 }
3617 
3618 /*
3619  * Clear any AFSR error bits, and check for persistence.
3620  *
3621  * It would be desirable to also insist that syndrome match.  PCI handling
3622  * has already filled flt_synd.  For errors trapped by CPU we only fill
3623  * flt_synd when we queue the event, so we do not have a valid flt_synd
3624  * during initial classification (it is valid if we're called as part of
3625  * subsequent low-pil additional classification attempts).  We could try
3626  * to determine which syndrome to use: we know we're only called for
3627  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3628  * would be esynd/none and esynd/msynd, respectively.  If that is
3629  * implemented then what do we do in the case that we do experience an
3630  * error on the same afar but with different syndrome?  At the very least
3631  * we should count such occurences.  Anyway, for now, we'll leave it as
3632  * it has been for ages.
3633  */
3634 static int
3635 clear_ecc(struct async_flt *aflt)
3636 {
3637 	ch_cpu_errors_t	cpu_error_regs;
3638 
3639 	/*
3640 	 * Snapshot the AFSR and AFAR and clear any errors
3641 	 */
3642 	get_cpu_error_state(&cpu_error_regs);
3643 	set_cpu_error_state(&cpu_error_regs);
3644 
3645 	/*
3646 	 * If any of the same memory access error bits are still on and
3647 	 * the AFAR matches, return that the error is persistent.
3648 	 */
3649 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3650 	    cpu_error_regs.afar == aflt->flt_addr);
3651 }
3652 
3653 /*
3654  * Turn off all cpu error detection, normally only used for panics.
3655  */
3656 void
3657 cpu_disable_errors(void)
3658 {
3659 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3660 
3661 	/*
3662 	 * With error detection now turned off, check the other cpus
3663 	 * logout areas for any unlogged errors.
3664 	 */
3665 	if (enable_check_other_cpus_logout) {
3666 		cpu_check_other_cpus_logout();
3667 		/*
3668 		 * Make a second pass over the logout areas, in case
3669 		 * there is a failing CPU in an error-trap loop which
3670 		 * will write to the logout area once it is emptied.
3671 		 */
3672 		cpu_check_other_cpus_logout();
3673 	}
3674 }
3675 
3676 /*
3677  * Enable errors.
3678  */
3679 void
3680 cpu_enable_errors(void)
3681 {
3682 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3683 }
3684 
3685 /*
3686  * Flush the entire ecache using displacement flush by reading through a
3687  * physical address range twice as large as the Ecache.
3688  */
3689 void
3690 cpu_flush_ecache(void)
3691 {
3692 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3693 	    cpunodes[CPU->cpu_id].ecache_linesize);
3694 }
3695 
3696 /*
3697  * Return CPU E$ set size - E$ size divided by the associativity.
3698  * We use this function in places where the CPU_PRIVATE ptr may not be
3699  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3700  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3701  * up before the kernel switches from OBP's to the kernel's trap table, so
3702  * we don't have to worry about cpunodes being unitialized.
3703  */
3704 int
3705 cpu_ecache_set_size(struct cpu *cp)
3706 {
3707 	if (CPU_PRIVATE(cp))
3708 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3709 
3710 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3711 }
3712 
3713 /*
3714  * Flush Ecache line.
3715  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3716  * Uses normal displacement flush for Cheetah.
3717  */
3718 static void
3719 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3720 {
3721 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3722 	int ec_set_size = cpu_ecache_set_size(CPU);
3723 
3724 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3725 }
3726 
3727 /*
3728  * Scrub physical address.
3729  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3730  * Ecache or direct-mapped Ecache.
3731  */
3732 static void
3733 cpu_scrubphys(struct async_flt *aflt)
3734 {
3735 	int ec_set_size = cpu_ecache_set_size(CPU);
3736 
3737 	scrubphys(aflt->flt_addr, ec_set_size);
3738 }
3739 
3740 /*
3741  * Clear physical address.
3742  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3743  * Ecache or direct-mapped Ecache.
3744  */
3745 void
3746 cpu_clearphys(struct async_flt *aflt)
3747 {
3748 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3749 	int ec_set_size = cpu_ecache_set_size(CPU);
3750 
3751 
3752 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3753 }
3754 
3755 #if defined(CPU_IMP_ECACHE_ASSOC)
3756 /*
3757  * Check for a matching valid line in all the sets.
3758  * If found, return set# + 1. Otherwise return 0.
3759  */
3760 static int
3761 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3762 {
3763 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3764 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3765 	int ec_set_size = cpu_ecache_set_size(CPU);
3766 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3767 	int nway = cpu_ecache_nway();
3768 	int i;
3769 
3770 	for (i = 0; i < nway; i++, ecp++) {
3771 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3772 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3773 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3774 			return (i+1);
3775 	}
3776 	return (0);
3777 }
3778 #endif /* CPU_IMP_ECACHE_ASSOC */
3779 
3780 /*
3781  * Check whether a line in the given logout info matches the specified
3782  * fault address.  If reqval is set then the line must not be Invalid.
3783  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3784  * set to 2 for l2$ or 3 for l3$.
3785  */
3786 static int
3787 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3788 {
3789 	ch_diag_data_t *cdp = data;
3790 	ch_ec_data_t *ecp;
3791 	int totalsize, ec_set_size;
3792 	int i, ways;
3793 	int match = 0;
3794 	int tagvalid;
3795 	uint64_t addr, tagpa;
3796 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3797 
3798 	/*
3799 	 * Check the l2$ logout data
3800 	 */
3801 	if (ispanther) {
3802 		ecp = &cdp->chd_l2_data[0];
3803 		ec_set_size = PN_L2_SET_SIZE;
3804 		ways = PN_L2_NWAYS;
3805 	} else {
3806 		ecp = &cdp->chd_ec_data[0];
3807 		ec_set_size = cpu_ecache_set_size(CPU);
3808 		ways = cpu_ecache_nway();
3809 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3810 	}
3811 	/* remove low order PA bits from fault address not used in PA tag */
3812 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3813 	for (i = 0; i < ways; i++, ecp++) {
3814 		if (ispanther) {
3815 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3816 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3817 		} else {
3818 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3819 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3820 			    ecp->ec_tag);
3821 		}
3822 		if (tagpa == addr && (!reqval || tagvalid)) {
3823 			match = i + 1;
3824 			*level = 2;
3825 			break;
3826 		}
3827 	}
3828 
3829 	if (match || !ispanther)
3830 		return (match);
3831 
3832 	/* For Panther we also check the l3$ */
3833 	ecp = &cdp->chd_ec_data[0];
3834 	ec_set_size = PN_L3_SET_SIZE;
3835 	ways = PN_L3_NWAYS;
3836 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3837 
3838 	for (i = 0; i < ways; i++, ecp++) {
3839 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3840 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3841 			match = i + 1;
3842 			*level = 3;
3843 			break;
3844 		}
3845 	}
3846 
3847 	return (match);
3848 }
3849 
3850 #if defined(CPU_IMP_L1_CACHE_PARITY)
3851 /*
3852  * Record information related to the source of an Dcache Parity Error.
3853  */
3854 static void
3855 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3856 {
3857 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3858 	int index;
3859 
3860 	/*
3861 	 * Since instruction decode cannot be done at high PIL
3862 	 * just examine the entire Dcache to locate the error.
3863 	 */
3864 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3865 		ch_flt->parity_data.dpe.cpl_way = -1;
3866 		ch_flt->parity_data.dpe.cpl_off = -1;
3867 	}
3868 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3869 		cpu_dcache_parity_check(ch_flt, index);
3870 }
3871 
3872 /*
3873  * Check all ways of the Dcache at a specified index for good parity.
3874  */
3875 static void
3876 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3877 {
3878 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3879 	uint64_t parity_bits, pbits, data_word;
3880 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3881 	int way, word, data_byte;
3882 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3883 	ch_dc_data_t tmp_dcp;
3884 
3885 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3886 		/*
3887 		 * Perform diagnostic read.
3888 		 */
3889 		get_dcache_dtag(index + way * dc_set_size,
3890 				(uint64_t *)&tmp_dcp);
3891 
3892 		/*
3893 		 * Check tag for even parity.
3894 		 * Sum of 1 bits (including parity bit) should be even.
3895 		 */
3896 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3897 			/*
3898 			 * If this is the first error log detailed information
3899 			 * about it and check the snoop tag. Otherwise just
3900 			 * record the fact that we found another error.
3901 			 */
3902 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3903 				ch_flt->parity_data.dpe.cpl_way = way;
3904 				ch_flt->parity_data.dpe.cpl_cache =
3905 				    CPU_DC_PARITY;
3906 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3907 
3908 				if (popc64(tmp_dcp.dc_sntag &
3909 						CHP_DCSNTAG_PARMASK) & 1) {
3910 					ch_flt->parity_data.dpe.cpl_tag |=
3911 								CHP_DC_SNTAG;
3912 					ch_flt->parity_data.dpe.cpl_lcnt++;
3913 				}
3914 
3915 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3916 			}
3917 
3918 			ch_flt->parity_data.dpe.cpl_lcnt++;
3919 		}
3920 
3921 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3922 			/*
3923 			 * Panther has more parity bits than the other
3924 			 * processors for covering dcache data and so each
3925 			 * byte of data in each word has its own parity bit.
3926 			 */
3927 			parity_bits = tmp_dcp.dc_pn_data_parity;
3928 			for (word = 0; word < 4; word++) {
3929 				data_word = tmp_dcp.dc_data[word];
3930 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3931 				for (data_byte = 0; data_byte < 8;
3932 				    data_byte++) {
3933 					if (((popc64(data_word &
3934 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3935 					    (pbits & 1)) {
3936 						cpu_record_dc_data_parity(
3937 						ch_flt, dcp, &tmp_dcp, way,
3938 						word);
3939 					}
3940 					pbits >>= 1;
3941 					data_word >>= 8;
3942 				}
3943 				parity_bits >>= 8;
3944 			}
3945 		} else {
3946 			/*
3947 			 * Check data array for even parity.
3948 			 * The 8 parity bits are grouped into 4 pairs each
3949 			 * of which covers a 64-bit word.  The endianness is
3950 			 * reversed -- the low-order parity bits cover the
3951 			 * high-order data words.
3952 			 */
3953 			parity_bits = tmp_dcp.dc_utag >> 8;
3954 			for (word = 0; word < 4; word++) {
3955 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3956 				if ((popc64(tmp_dcp.dc_data[word]) +
3957 				    parity_bits_popc[pbits]) & 1) {
3958 					cpu_record_dc_data_parity(ch_flt, dcp,
3959 					    &tmp_dcp, way, word);
3960 				}
3961 			}
3962 		}
3963 	}
3964 }
3965 
3966 static void
3967 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3968     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3969 {
3970 	/*
3971 	 * If this is the first error log detailed information about it.
3972 	 * Otherwise just record the fact that we found another error.
3973 	 */
3974 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3975 		ch_flt->parity_data.dpe.cpl_way = way;
3976 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3977 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3978 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3979 	}
3980 	ch_flt->parity_data.dpe.cpl_lcnt++;
3981 }
3982 
3983 /*
3984  * Record information related to the source of an Icache Parity Error.
3985  *
3986  * Called with the Icache disabled so any diagnostic accesses are safe.
3987  */
3988 static void
3989 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3990 {
3991 	int	ic_set_size;
3992 	int	ic_linesize;
3993 	int	index;
3994 
3995 	if (CPU_PRIVATE(CPU)) {
3996 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3997 		    CH_ICACHE_NWAY;
3998 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3999 	} else {
4000 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4001 		ic_linesize = icache_linesize;
4002 	}
4003 
4004 	ch_flt->parity_data.ipe.cpl_way = -1;
4005 	ch_flt->parity_data.ipe.cpl_off = -1;
4006 
4007 	for (index = 0; index < ic_set_size; index += ic_linesize)
4008 		cpu_icache_parity_check(ch_flt, index);
4009 }
4010 
4011 /*
4012  * Check all ways of the Icache at a specified index for good parity.
4013  */
4014 static void
4015 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4016 {
4017 	uint64_t parmask, pn_inst_parity;
4018 	int ic_set_size;
4019 	int ic_linesize;
4020 	int flt_index, way, instr, num_instr;
4021 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4022 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4023 	ch_ic_data_t tmp_icp;
4024 
4025 	if (CPU_PRIVATE(CPU)) {
4026 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4027 		    CH_ICACHE_NWAY;
4028 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4029 	} else {
4030 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4031 		ic_linesize = icache_linesize;
4032 	}
4033 
4034 	/*
4035 	 * Panther has twice as many instructions per icache line and the
4036 	 * instruction parity bit is in a different location.
4037 	 */
4038 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4039 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4040 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4041 	} else {
4042 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4043 		pn_inst_parity = 0;
4044 	}
4045 
4046 	/*
4047 	 * Index at which we expect to find the parity error.
4048 	 */
4049 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4050 
4051 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4052 		/*
4053 		 * Diagnostic reads expect address argument in ASI format.
4054 		 */
4055 		get_icache_dtag(2 * (index + way * ic_set_size),
4056 				(uint64_t *)&tmp_icp);
4057 
4058 		/*
4059 		 * If this is the index in which we expect to find the
4060 		 * error log detailed information about each of the ways.
4061 		 * This information will be displayed later if we can't
4062 		 * determine the exact way in which the error is located.
4063 		 */
4064 		if (flt_index == index)
4065 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4066 
4067 		/*
4068 		 * Check tag for even parity.
4069 		 * Sum of 1 bits (including parity bit) should be even.
4070 		 */
4071 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4072 			/*
4073 			 * If this way is the one in which we expected
4074 			 * to find the error record the way and check the
4075 			 * snoop tag. Otherwise just record the fact we
4076 			 * found another error.
4077 			 */
4078 			if (flt_index == index) {
4079 				ch_flt->parity_data.ipe.cpl_way = way;
4080 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4081 
4082 				if (popc64(tmp_icp.ic_sntag &
4083 						CHP_ICSNTAG_PARMASK) & 1) {
4084 					ch_flt->parity_data.ipe.cpl_tag |=
4085 								CHP_IC_SNTAG;
4086 					ch_flt->parity_data.ipe.cpl_lcnt++;
4087 				}
4088 
4089 			}
4090 			ch_flt->parity_data.ipe.cpl_lcnt++;
4091 			continue;
4092 		}
4093 
4094 		/*
4095 		 * Check instruction data for even parity.
4096 		 * Bits participating in parity differ for PC-relative
4097 		 * versus non-PC-relative instructions.
4098 		 */
4099 		for (instr = 0; instr < num_instr; instr++) {
4100 			parmask = (tmp_icp.ic_data[instr] &
4101 					CH_ICDATA_PRED_ISPCREL) ?
4102 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4103 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4104 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4105 				/*
4106 				 * If this way is the one in which we expected
4107 				 * to find the error record the way and offset.
4108 				 * Otherwise just log the fact we found another
4109 				 * error.
4110 				 */
4111 				if (flt_index == index) {
4112 					ch_flt->parity_data.ipe.cpl_way = way;
4113 					ch_flt->parity_data.ipe.cpl_off =
4114 								instr * 4;
4115 				}
4116 				ch_flt->parity_data.ipe.cpl_lcnt++;
4117 				continue;
4118 			}
4119 		}
4120 	}
4121 }
4122 
4123 /*
4124  * Record information related to the source of an Pcache Parity Error.
4125  */
4126 static void
4127 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4128 {
4129 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4130 	int index;
4131 
4132 	/*
4133 	 * Since instruction decode cannot be done at high PIL just
4134 	 * examine the entire Pcache to check for any parity errors.
4135 	 */
4136 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4137 		ch_flt->parity_data.dpe.cpl_way = -1;
4138 		ch_flt->parity_data.dpe.cpl_off = -1;
4139 	}
4140 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4141 		cpu_pcache_parity_check(ch_flt, index);
4142 }
4143 
4144 /*
4145  * Check all ways of the Pcache at a specified index for good parity.
4146  */
4147 static void
4148 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4149 {
4150 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4151 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4152 	int way, word, pbit, parity_bits;
4153 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4154 	ch_pc_data_t tmp_pcp;
4155 
4156 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4157 		/*
4158 		 * Perform diagnostic read.
4159 		 */
4160 		get_pcache_dtag(index + way * pc_set_size,
4161 				(uint64_t *)&tmp_pcp);
4162 		/*
4163 		 * Check data array for odd parity. There are 8 parity
4164 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4165 		 * of those bits covers exactly 8 bytes of the data
4166 		 * array:
4167 		 *
4168 		 *	parity bit	P$ data bytes covered
4169 		 *	----------	---------------------
4170 		 *	50		63:56
4171 		 *	51		55:48
4172 		 *	52		47:40
4173 		 *	53		39:32
4174 		 *	54		31:24
4175 		 *	55		23:16
4176 		 *	56		15:8
4177 		 *	57		7:0
4178 		 */
4179 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4180 		for (word = 0; word < pc_data_words; word++) {
4181 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4182 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4183 				/*
4184 				 * If this is the first error log detailed
4185 				 * information about it. Otherwise just record
4186 				 * the fact that we found another error.
4187 				 */
4188 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4189 					ch_flt->parity_data.dpe.cpl_way = way;
4190 					ch_flt->parity_data.dpe.cpl_cache =
4191 					    CPU_PC_PARITY;
4192 					ch_flt->parity_data.dpe.cpl_off =
4193 					    word * sizeof (uint64_t);
4194 					bcopy(&tmp_pcp, pcp,
4195 							sizeof (ch_pc_data_t));
4196 				}
4197 				ch_flt->parity_data.dpe.cpl_lcnt++;
4198 			}
4199 		}
4200 	}
4201 }
4202 
4203 
4204 /*
4205  * Add L1 Data cache data to the ereport payload.
4206  */
4207 static void
4208 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4209 {
4210 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4211 	ch_dc_data_t *dcp;
4212 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4213 	uint_t nelem;
4214 	int i, ways_to_check, ways_logged = 0;
4215 
4216 	/*
4217 	 * If this is an D$ fault then there may be multiple
4218 	 * ways captured in the ch_parity_log_t structure.
4219 	 * Otherwise, there will be at most one way captured
4220 	 * in the ch_diag_data_t struct.
4221 	 * Check each way to see if it should be encoded.
4222 	 */
4223 	if (ch_flt->flt_type == CPU_DC_PARITY)
4224 		ways_to_check = CH_DCACHE_NWAY;
4225 	else
4226 		ways_to_check = 1;
4227 	for (i = 0; i < ways_to_check; i++) {
4228 		if (ch_flt->flt_type == CPU_DC_PARITY)
4229 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4230 		else
4231 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4232 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4233 			bcopy(dcp, &dcdata[ways_logged],
4234 				sizeof (ch_dc_data_t));
4235 			ways_logged++;
4236 		}
4237 	}
4238 
4239 	/*
4240 	 * Add the dcache data to the payload.
4241 	 */
4242 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4243 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4244 	if (ways_logged != 0) {
4245 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4246 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4247 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4248 	}
4249 }
4250 
4251 /*
4252  * Add L1 Instruction cache data to the ereport payload.
4253  */
4254 static void
4255 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4256 {
4257 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4258 	ch_ic_data_t *icp;
4259 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4260 	uint_t nelem;
4261 	int i, ways_to_check, ways_logged = 0;
4262 
4263 	/*
4264 	 * If this is an I$ fault then there may be multiple
4265 	 * ways captured in the ch_parity_log_t structure.
4266 	 * Otherwise, there will be at most one way captured
4267 	 * in the ch_diag_data_t struct.
4268 	 * Check each way to see if it should be encoded.
4269 	 */
4270 	if (ch_flt->flt_type == CPU_IC_PARITY)
4271 		ways_to_check = CH_ICACHE_NWAY;
4272 	else
4273 		ways_to_check = 1;
4274 	for (i = 0; i < ways_to_check; i++) {
4275 		if (ch_flt->flt_type == CPU_IC_PARITY)
4276 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4277 		else
4278 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4279 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4280 			bcopy(icp, &icdata[ways_logged],
4281 				sizeof (ch_ic_data_t));
4282 			ways_logged++;
4283 		}
4284 	}
4285 
4286 	/*
4287 	 * Add the icache data to the payload.
4288 	 */
4289 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4290 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4291 	if (ways_logged != 0) {
4292 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4293 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4294 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4295 	}
4296 }
4297 
4298 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4299 
4300 /*
4301  * Add ecache data to payload.
4302  */
4303 static void
4304 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4305 {
4306 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4307 	ch_ec_data_t *ecp;
4308 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4309 	uint_t nelem;
4310 	int i, ways_logged = 0;
4311 
4312 	/*
4313 	 * Check each way to see if it should be encoded
4314 	 * and concatinate it into a temporary buffer.
4315 	 */
4316 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4317 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4318 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4319 			bcopy(ecp, &ecdata[ways_logged],
4320 				sizeof (ch_ec_data_t));
4321 			ways_logged++;
4322 		}
4323 	}
4324 
4325 	/*
4326 	 * Panther CPUs have an additional level of cache and so
4327 	 * what we just collected was the L3 (ecache) and not the
4328 	 * L2 cache.
4329 	 */
4330 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4331 		/*
4332 		 * Add the L3 (ecache) data to the payload.
4333 		 */
4334 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4335 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4336 		if (ways_logged != 0) {
4337 			nelem = sizeof (ch_ec_data_t) /
4338 			    sizeof (uint64_t) * ways_logged;
4339 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4340 			    DATA_TYPE_UINT64_ARRAY, nelem,
4341 			    (uint64_t *)ecdata, NULL);
4342 		}
4343 
4344 		/*
4345 		 * Now collect the L2 cache.
4346 		 */
4347 		ways_logged = 0;
4348 		for (i = 0; i < PN_L2_NWAYS; i++) {
4349 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4350 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4351 				bcopy(ecp, &ecdata[ways_logged],
4352 				    sizeof (ch_ec_data_t));
4353 				ways_logged++;
4354 			}
4355 		}
4356 	}
4357 
4358 	/*
4359 	 * Add the L2 cache data to the payload.
4360 	 */
4361 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4362 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4363 	if (ways_logged != 0) {
4364 		nelem = sizeof (ch_ec_data_t) /
4365 			sizeof (uint64_t) * ways_logged;
4366 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4367 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4368 	}
4369 }
4370 
4371 /*
4372  * Encode the data saved in the ch_async_flt_t struct into
4373  * the FM ereport payload.
4374  */
4375 static void
4376 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4377 	nvlist_t *resource, int *afar_status, int *synd_status)
4378 {
4379 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4380 	*synd_status = AFLT_STAT_INVALID;
4381 	*afar_status = AFLT_STAT_INVALID;
4382 
4383 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4384 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4385 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4386 	}
4387 
4388 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4389 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4390 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4391 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4392 	}
4393 
4394 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4395 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4396 		    ch_flt->flt_bit);
4397 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4398 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4399 	}
4400 
4401 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4402 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4403 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4404 	}
4405 
4406 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4407 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4408 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4409 	}
4410 
4411 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4412 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4413 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4414 	}
4415 
4416 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4417 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4418 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4419 	}
4420 
4421 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4422 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4423 		    DATA_TYPE_BOOLEAN_VALUE,
4424 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4425 	}
4426 
4427 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4428 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4429 		    DATA_TYPE_BOOLEAN_VALUE,
4430 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4431 	}
4432 
4433 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4434 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4435 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4436 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4437 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4438 	}
4439 
4440 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4441 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4442 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4443 	}
4444 
4445 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4446 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4447 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4448 	}
4449 
4450 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4451 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4452 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4453 	}
4454 
4455 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4456 		cpu_payload_add_ecache(aflt, payload);
4457 
4458 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4459 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4460 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4461 	}
4462 
4463 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4464 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4465 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4466 	}
4467 
4468 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4469 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4470 		    DATA_TYPE_UINT32_ARRAY, 16,
4471 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4472 	}
4473 
4474 #if defined(CPU_IMP_L1_CACHE_PARITY)
4475 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4476 		cpu_payload_add_dcache(aflt, payload);
4477 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4478 		cpu_payload_add_icache(aflt, payload);
4479 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4480 
4481 #if defined(CHEETAH_PLUS)
4482 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4483 		cpu_payload_add_pcache(aflt, payload);
4484 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4485 		cpu_payload_add_tlb(aflt, payload);
4486 #endif	/* CHEETAH_PLUS */
4487 	/*
4488 	 * Create the FMRI that goes into the payload
4489 	 * and contains the unum info if necessary.
4490 	 */
4491 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4492 	    (*afar_status == AFLT_STAT_VALID)) {
4493 		char unum[UNUM_NAMLEN] = "";
4494 		char sid[DIMM_SERIAL_ID_LEN] = "";
4495 		int len;
4496 
4497 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4498 		    UNUM_NAMLEN, &len) == 0) {
4499 			uint64_t offset = (uint64_t)-1;
4500 			int ret;
4501 
4502 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4503 			    &len);
4504 
4505 			if (ret == 0) {
4506 				(void) cpu_get_mem_offset(aflt->flt_addr,
4507 				    &offset);
4508 			}
4509 
4510 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4511 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4512 			fm_payload_set(payload,
4513 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4514 			    DATA_TYPE_NVLIST, resource, NULL);
4515 		}
4516 	}
4517 }
4518 
4519 /*
4520  * Initialize the way info if necessary.
4521  */
4522 void
4523 cpu_ereport_init(struct async_flt *aflt)
4524 {
4525 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4526 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4527 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4528 	int i;
4529 
4530 	/*
4531 	 * Initialize the info in the CPU logout structure.
4532 	 * The I$/D$ way information is not initialized here
4533 	 * since it is captured in the logout assembly code.
4534 	 */
4535 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4536 		(ecp + i)->ec_way = i;
4537 
4538 	for (i = 0; i < PN_L2_NWAYS; i++)
4539 		(l2p + i)->ec_way = i;
4540 }
4541 
4542 /*
4543  * Returns whether fault address is valid for this error bit and
4544  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4545  */
4546 int
4547 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4548 {
4549 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4550 
4551 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4552 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4553 	    AFLT_STAT_VALID &&
4554 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4555 }
4556 
4557 static void
4558 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4559 {
4560 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4561 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4562 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4563 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4564 #if defined(CPU_IMP_ECACHE_ASSOC)
4565 	int i, nway;
4566 #endif /* CPU_IMP_ECACHE_ASSOC */
4567 
4568 	/*
4569 	 * Check if the CPU log out captured was valid.
4570 	 */
4571 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4572 	    ch_flt->flt_data_incomplete)
4573 		return;
4574 
4575 #if defined(CPU_IMP_ECACHE_ASSOC)
4576 	nway = cpu_ecache_nway();
4577 	i =  cpu_ecache_line_valid(ch_flt);
4578 	if (i == 0 || i > nway) {
4579 		for (i = 0; i < nway; i++)
4580 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4581 	} else
4582 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4583 #else /* CPU_IMP_ECACHE_ASSOC */
4584 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4585 #endif /* CPU_IMP_ECACHE_ASSOC */
4586 
4587 #if defined(CHEETAH_PLUS)
4588 	pn_cpu_log_diag_l2_info(ch_flt);
4589 #endif /* CHEETAH_PLUS */
4590 
4591 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4592 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4593 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4594 	}
4595 
4596 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4597 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4598 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4599 		else
4600 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4601 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4602 	}
4603 }
4604 
4605 /*
4606  * Cheetah ECC calculation.
4607  *
4608  * We only need to do the calculation on the data bits and can ignore check
4609  * bit and Mtag bit terms in the calculation.
4610  */
4611 static uint64_t ch_ecc_table[9][2] = {
4612 	/*
4613 	 * low order 64-bits   high-order 64-bits
4614 	 */
4615 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4616 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4617 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4618 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4619 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4620 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4621 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4622 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4623 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4624 };
4625 
4626 /*
4627  * 64-bit population count, use well-known popcnt trick.
4628  * We could use the UltraSPARC V9 POPC instruction, but some
4629  * CPUs including Cheetahplus and Jaguar do not support that
4630  * instruction.
4631  */
4632 int
4633 popc64(uint64_t val)
4634 {
4635 	int cnt;
4636 
4637 	for (cnt = 0; val != 0; val &= val - 1)
4638 		cnt++;
4639 	return (cnt);
4640 }
4641 
4642 /*
4643  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4644  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4645  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4646  * instead of doing all the xor's.
4647  */
4648 uint32_t
4649 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4650 {
4651 	int bitno, s;
4652 	int synd = 0;
4653 
4654 	for (bitno = 0; bitno < 9; bitno++) {
4655 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4656 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4657 		synd |= (s << bitno);
4658 	}
4659 	return (synd);
4660 
4661 }
4662 
4663 /*
4664  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4665  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4666  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4667  */
4668 static void
4669 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4670     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4671 {
4672 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4673 
4674 	if (reason &&
4675 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4676 		(void) strcat(reason, eccp->ec_reason);
4677 	}
4678 
4679 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4680 	ch_flt->flt_type = eccp->ec_flt_type;
4681 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4682 		ch_flt->flt_diag_data = *cdp;
4683 	else
4684 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4685 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4686 
4687 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4688 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4689 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4690 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4691 	else
4692 		aflt->flt_synd = 0;
4693 
4694 	aflt->flt_payload = eccp->ec_err_payload;
4695 
4696 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4697 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4698 		cpu_errorq_dispatch(eccp->ec_err_class,
4699 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4700 		    aflt->flt_panic);
4701 	else
4702 		cpu_errorq_dispatch(eccp->ec_err_class,
4703 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4704 		    aflt->flt_panic);
4705 }
4706 
4707 /*
4708  * Queue events on async event queue one event per error bit.  First we
4709  * queue the events that we "expect" for the given trap, then we queue events
4710  * that we may not expect.  Return number of events queued.
4711  */
4712 int
4713 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4714     ch_cpu_logout_t *clop)
4715 {
4716 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4717 	ecc_type_to_info_t *eccp;
4718 	int nevents = 0;
4719 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4720 #if defined(CHEETAH_PLUS)
4721 	uint64_t orig_t_afsr_errs;
4722 #endif
4723 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4724 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4725 	ch_diag_data_t *cdp = NULL;
4726 
4727 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4728 
4729 #if defined(CHEETAH_PLUS)
4730 	orig_t_afsr_errs = t_afsr_errs;
4731 
4732 	/*
4733 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4734 	 */
4735 	if (clop != NULL) {
4736 		/*
4737 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4738 		 * flt_addr and flt_stat fields will be reset to the primaries
4739 		 * below, but the sdw_addr and sdw_stat will stay as the
4740 		 * secondaries.
4741 		 */
4742 		cdp = &clop->clo_sdw_data;
4743 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4744 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4745 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4746 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4747 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4748 
4749 		/*
4750 		 * If the primary and shadow AFSR differ, tag the shadow as
4751 		 * the first fault.
4752 		 */
4753 		if ((primary_afar != cdp->chd_afar) ||
4754 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4755 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4756 		}
4757 
4758 		/*
4759 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4760 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4761 		 * is expected to be zero for those CPUs which do not have
4762 		 * an AFSR_EXT register.
4763 		 */
4764 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4765 			if ((eccp->ec_afsr_bit &
4766 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4767 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4768 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4769 				cdp = NULL;
4770 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4771 				nevents++;
4772 			}
4773 		}
4774 
4775 		/*
4776 		 * If the ME bit is on in the primary AFSR turn all the
4777 		 * error bits on again that may set the ME bit to make
4778 		 * sure we see the ME AFSR error logs.
4779 		 */
4780 		if ((primary_afsr & C_AFSR_ME) != 0)
4781 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4782 	}
4783 #endif	/* CHEETAH_PLUS */
4784 
4785 	if (clop != NULL)
4786 		cdp = &clop->clo_data;
4787 
4788 	/*
4789 	 * Queue expected errors, error bit and fault type must match
4790 	 * in the ecc_type_to_info table.
4791 	 */
4792 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4793 	    eccp++) {
4794 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4795 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4796 #if defined(SERRANO)
4797 			/*
4798 			 * For FRC/FRU errors on Serrano the afar2 captures
4799 			 * the address and the associated data is
4800 			 * in the shadow logout area.
4801 			 */
4802 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4803 				if (clop != NULL)
4804 					cdp = &clop->clo_sdw_data;
4805 				aflt->flt_addr = ch_flt->afar2;
4806 			} else {
4807 				if (clop != NULL)
4808 					cdp = &clop->clo_data;
4809 				aflt->flt_addr = primary_afar;
4810 			}
4811 #else	/* SERRANO */
4812 			aflt->flt_addr = primary_afar;
4813 #endif	/* SERRANO */
4814 			aflt->flt_stat = primary_afsr;
4815 			ch_flt->afsr_ext = primary_afsr_ext;
4816 			ch_flt->afsr_errs = primary_afsr_errs;
4817 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4818 			cdp = NULL;
4819 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4820 			nevents++;
4821 		}
4822 	}
4823 
4824 	/*
4825 	 * Queue unexpected errors, error bit only match.
4826 	 */
4827 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4828 	    eccp++) {
4829 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4830 #if defined(SERRANO)
4831 			/*
4832 			 * For FRC/FRU errors on Serrano the afar2 captures
4833 			 * the address and the associated data is
4834 			 * in the shadow logout area.
4835 			 */
4836 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4837 				if (clop != NULL)
4838 					cdp = &clop->clo_sdw_data;
4839 				aflt->flt_addr = ch_flt->afar2;
4840 			} else {
4841 				if (clop != NULL)
4842 					cdp = &clop->clo_data;
4843 				aflt->flt_addr = primary_afar;
4844 			}
4845 #else	/* SERRANO */
4846 			aflt->flt_addr = primary_afar;
4847 #endif	/* SERRANO */
4848 			aflt->flt_stat = primary_afsr;
4849 			ch_flt->afsr_ext = primary_afsr_ext;
4850 			ch_flt->afsr_errs = primary_afsr_errs;
4851 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4852 			cdp = NULL;
4853 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4854 			nevents++;
4855 		}
4856 	}
4857 	return (nevents);
4858 }
4859 
4860 /*
4861  * Return trap type number.
4862  */
4863 uint8_t
4864 flt_to_trap_type(struct async_flt *aflt)
4865 {
4866 	if (aflt->flt_status & ECC_I_TRAP)
4867 		return (TRAP_TYPE_ECC_I);
4868 	if (aflt->flt_status & ECC_D_TRAP)
4869 		return (TRAP_TYPE_ECC_D);
4870 	if (aflt->flt_status & ECC_F_TRAP)
4871 		return (TRAP_TYPE_ECC_F);
4872 	if (aflt->flt_status & ECC_C_TRAP)
4873 		return (TRAP_TYPE_ECC_C);
4874 	if (aflt->flt_status & ECC_DP_TRAP)
4875 		return (TRAP_TYPE_ECC_DP);
4876 	if (aflt->flt_status & ECC_IP_TRAP)
4877 		return (TRAP_TYPE_ECC_IP);
4878 	if (aflt->flt_status & ECC_ITLB_TRAP)
4879 		return (TRAP_TYPE_ECC_ITLB);
4880 	if (aflt->flt_status & ECC_DTLB_TRAP)
4881 		return (TRAP_TYPE_ECC_DTLB);
4882 	return (TRAP_TYPE_UNKNOWN);
4883 }
4884 
4885 /*
4886  * Decide an error type based on detector and leaky/partner tests.
4887  * The following array is used for quick translation - it must
4888  * stay in sync with ce_dispact_t.
4889  */
4890 
4891 static char *cetypes[] = {
4892 	CE_DISP_DESC_U,
4893 	CE_DISP_DESC_I,
4894 	CE_DISP_DESC_PP,
4895 	CE_DISP_DESC_P,
4896 	CE_DISP_DESC_L,
4897 	CE_DISP_DESC_PS,
4898 	CE_DISP_DESC_S
4899 };
4900 
4901 char *
4902 flt_to_error_type(struct async_flt *aflt)
4903 {
4904 	ce_dispact_t dispact, disp;
4905 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4906 
4907 	/*
4908 	 * The memory payload bundle is shared by some events that do
4909 	 * not perform any classification.  For those flt_disp will be
4910 	 * 0 and we will return "unknown".
4911 	 */
4912 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4913 		return (cetypes[CE_DISP_UNKNOWN]);
4914 
4915 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4916 
4917 	/*
4918 	 * It is also possible that no scrub/classification was performed
4919 	 * by the detector, for instance where a disrupting error logged
4920 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4921 	 */
4922 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4923 		return (cetypes[CE_DISP_UNKNOWN]);
4924 
4925 	/*
4926 	 * Lookup type in initial classification/action table
4927 	 */
4928 	dispact = CE_DISPACT(ce_disp_table,
4929 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4930 	    CE_XDIAG_STATE(dtcrinfo),
4931 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4932 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4933 
4934 	/*
4935 	 * A bad lookup is not something to panic production systems for.
4936 	 */
4937 	ASSERT(dispact != CE_DISP_BAD);
4938 	if (dispact == CE_DISP_BAD)
4939 		return (cetypes[CE_DISP_UNKNOWN]);
4940 
4941 	disp = CE_DISP(dispact);
4942 
4943 	switch (disp) {
4944 	case CE_DISP_UNKNOWN:
4945 	case CE_DISP_INTERMITTENT:
4946 		break;
4947 
4948 	case CE_DISP_POSS_PERS:
4949 		/*
4950 		 * "Possible persistent" errors to which we have applied a valid
4951 		 * leaky test can be separated into "persistent" or "leaky".
4952 		 */
4953 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4954 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4955 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4956 			    CE_XDIAG_CE2SEEN(lkyinfo))
4957 				disp = CE_DISP_LEAKY;
4958 			else
4959 				disp = CE_DISP_PERS;
4960 		}
4961 		break;
4962 
4963 	case CE_DISP_POSS_STICKY:
4964 		/*
4965 		 * Promote "possible sticky" results that have been
4966 		 * confirmed by a partner test to "sticky".  Unconfirmed
4967 		 * "possible sticky" events are left at that status - we do not
4968 		 * guess at any bad reader/writer etc status here.
4969 		 */
4970 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4971 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4972 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4973 			disp = CE_DISP_STICKY;
4974 
4975 		/*
4976 		 * Promote "possible sticky" results on a uniprocessor
4977 		 * to "sticky"
4978 		 */
4979 		if (disp == CE_DISP_POSS_STICKY &&
4980 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4981 			disp = CE_DISP_STICKY;
4982 		break;
4983 
4984 	default:
4985 		disp = CE_DISP_UNKNOWN;
4986 		break;
4987 	}
4988 
4989 	return (cetypes[disp]);
4990 }
4991 
4992 /*
4993  * Given the entire afsr, the specific bit to check and a prioritized list of
4994  * error bits, determine the validity of the various overwrite priority
4995  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4996  * different overwrite priorities.
4997  *
4998  * Given a specific afsr error bit and the entire afsr, there are three cases:
4999  *   INVALID:	The specified bit is lower overwrite priority than some other
5000  *		error bit which is on in the afsr (or IVU/IVC).
5001  *   VALID:	The specified bit is higher priority than all other error bits
5002  *		which are on in the afsr.
5003  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5004  *		bit is on in the afsr.
5005  */
5006 int
5007 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5008 {
5009 	uint64_t afsr_ow;
5010 
5011 	while ((afsr_ow = *ow_bits++) != 0) {
5012 		/*
5013 		 * If bit is in the priority class, check to see if another
5014 		 * bit in the same class is on => ambiguous.  Otherwise,
5015 		 * the value is valid.  If the bit is not on at this priority
5016 		 * class, but a higher priority bit is on, then the value is
5017 		 * invalid.
5018 		 */
5019 		if (afsr_ow & afsr_bit) {
5020 			/*
5021 			 * If equal pri bit is on, ambiguous.
5022 			 */
5023 			if (afsr & (afsr_ow & ~afsr_bit))
5024 				return (AFLT_STAT_AMBIGUOUS);
5025 			return (AFLT_STAT_VALID);
5026 		} else if (afsr & afsr_ow)
5027 			break;
5028 	}
5029 
5030 	/*
5031 	 * We didn't find a match or a higher priority bit was on.  Not
5032 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5033 	 */
5034 	return (AFLT_STAT_INVALID);
5035 }
5036 
5037 static int
5038 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5039 {
5040 #if defined(SERRANO)
5041 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5042 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5043 	else
5044 #endif	/* SERRANO */
5045 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5046 }
5047 
5048 static int
5049 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5050 {
5051 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5052 }
5053 
5054 static int
5055 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5056 {
5057 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5058 }
5059 
5060 static int
5061 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5062 {
5063 #ifdef lint
5064 	cpuid = cpuid;
5065 #endif
5066 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5067 		return (afsr_to_msynd_status(afsr, afsr_bit));
5068 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5069 #if defined(CHEETAH_PLUS)
5070 		/*
5071 		 * The E_SYND overwrite policy is slightly different
5072 		 * for Panther CPUs.
5073 		 */
5074 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5075 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5076 		else
5077 			return (afsr_to_esynd_status(afsr, afsr_bit));
5078 #else /* CHEETAH_PLUS */
5079 		return (afsr_to_esynd_status(afsr, afsr_bit));
5080 #endif /* CHEETAH_PLUS */
5081 	} else {
5082 		return (AFLT_STAT_INVALID);
5083 	}
5084 }
5085 
5086 /*
5087  * Slave CPU stick synchronization.
5088  */
5089 void
5090 sticksync_slave(void)
5091 {
5092 	int 		i;
5093 	int		tries = 0;
5094 	int64_t		tskew;
5095 	int64_t		av_tskew;
5096 
5097 	kpreempt_disable();
5098 	/* wait for the master side */
5099 	while (stick_sync_cmd != SLAVE_START)
5100 		;
5101 	/*
5102 	 * Synchronization should only take a few tries at most. But in the
5103 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5104 	 * without it's stick synchronized wouldn't be a good citizen.
5105 	 */
5106 	while (slave_done == 0) {
5107 		/*
5108 		 * Time skew calculation.
5109 		 */
5110 		av_tskew = tskew = 0;
5111 
5112 		for (i = 0; i < stick_iter; i++) {
5113 			/* make location hot */
5114 			timestamp[EV_A_START] = 0;
5115 			stick_timestamp(&timestamp[EV_A_START]);
5116 
5117 			/* tell the master we're ready */
5118 			stick_sync_cmd = MASTER_START;
5119 
5120 			/* and wait */
5121 			while (stick_sync_cmd != SLAVE_CONT)
5122 				;
5123 			/* Event B end */
5124 			stick_timestamp(&timestamp[EV_B_END]);
5125 
5126 			/* calculate time skew */
5127 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5128 				- (timestamp[EV_A_END] -
5129 				timestamp[EV_A_START])) / 2;
5130 
5131 			/* keep running count */
5132 			av_tskew += tskew;
5133 		} /* for */
5134 
5135 		/*
5136 		 * Adjust stick for time skew if not within the max allowed;
5137 		 * otherwise we're all done.
5138 		 */
5139 		if (stick_iter != 0)
5140 			av_tskew = av_tskew/stick_iter;
5141 		if (ABS(av_tskew) > stick_tsk) {
5142 			/*
5143 			 * If the skew is 1 (the slave's STICK register
5144 			 * is 1 STICK ahead of the master's), stick_adj
5145 			 * could fail to adjust the slave's STICK register
5146 			 * if the STICK read on the slave happens to
5147 			 * align with the increment of the STICK.
5148 			 * Therefore, we increment the skew to 2.
5149 			 */
5150 			if (av_tskew == 1)
5151 				av_tskew++;
5152 			stick_adj(-av_tskew);
5153 		} else
5154 			slave_done = 1;
5155 #ifdef DEBUG
5156 		if (tries < DSYNC_ATTEMPTS)
5157 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5158 				av_tskew;
5159 		++tries;
5160 #endif /* DEBUG */
5161 #ifdef lint
5162 		tries = tries;
5163 #endif
5164 
5165 	} /* while */
5166 
5167 	/* allow the master to finish */
5168 	stick_sync_cmd = EVENT_NULL;
5169 	kpreempt_enable();
5170 }
5171 
5172 /*
5173  * Master CPU side of stick synchronization.
5174  *  - timestamp end of Event A
5175  *  - timestamp beginning of Event B
5176  */
5177 void
5178 sticksync_master(void)
5179 {
5180 	int		i;
5181 
5182 	kpreempt_disable();
5183 	/* tell the slave we've started */
5184 	slave_done = 0;
5185 	stick_sync_cmd = SLAVE_START;
5186 
5187 	while (slave_done == 0) {
5188 		for (i = 0; i < stick_iter; i++) {
5189 			/* wait for the slave */
5190 			while (stick_sync_cmd != MASTER_START)
5191 				;
5192 			/* Event A end */
5193 			stick_timestamp(&timestamp[EV_A_END]);
5194 
5195 			/* make location hot */
5196 			timestamp[EV_B_START] = 0;
5197 			stick_timestamp(&timestamp[EV_B_START]);
5198 
5199 			/* tell the slave to continue */
5200 			stick_sync_cmd = SLAVE_CONT;
5201 		} /* for */
5202 
5203 		/* wait while slave calculates time skew */
5204 		while (stick_sync_cmd == SLAVE_CONT)
5205 			;
5206 	} /* while */
5207 	kpreempt_enable();
5208 }
5209 
5210 /*
5211  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5212  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5213  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5214  * panic idle.
5215  */
5216 /*ARGSUSED*/
5217 void
5218 cpu_check_allcpus(struct async_flt *aflt)
5219 {}
5220 
5221 struct kmem_cache *ch_private_cache;
5222 
5223 /*
5224  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5225  * deallocate the scrubber data structures and cpu_private data structure.
5226  */
5227 void
5228 cpu_uninit_private(struct cpu *cp)
5229 {
5230 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5231 
5232 	ASSERT(chprp);
5233 	cpu_uninit_ecache_scrub_dr(cp);
5234 	CPU_PRIVATE(cp) = NULL;
5235 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5236 	kmem_cache_free(ch_private_cache, chprp);
5237 	cmp_delete_cpu(cp->cpu_id);
5238 
5239 }
5240 
5241 /*
5242  * Cheetah Cache Scrubbing
5243  *
5244  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5245  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5246  * protected by either parity or ECC.
5247  *
5248  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5249  * cache per second). Due to the the specifics of how the I$ control
5250  * logic works with respect to the ASI used to scrub I$ lines, the entire
5251  * I$ is scanned at once.
5252  */
5253 
5254 /*
5255  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5256  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5257  * on a running system.
5258  */
5259 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5260 
5261 /*
5262  * The following are the PIL levels that the softints/cross traps will fire at.
5263  */
5264 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5265 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5266 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5267 
5268 #if defined(JALAPENO)
5269 
5270 /*
5271  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5272  * on Jalapeno.
5273  */
5274 int ecache_scrub_enable = 0;
5275 
5276 #else	/* JALAPENO */
5277 
5278 /*
5279  * With all other cpu types, E$ scrubbing is on by default
5280  */
5281 int ecache_scrub_enable = 1;
5282 
5283 #endif	/* JALAPENO */
5284 
5285 
5286 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5287 
5288 /*
5289  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5290  * is disabled by default on non-Cheetah systems
5291  */
5292 int icache_scrub_enable = 0;
5293 
5294 /*
5295  * Tuneables specifying the scrub calls per second and the scan rate
5296  * for each cache
5297  *
5298  * The cyclic times are set during boot based on the following values.
5299  * Changing these values in mdb after this time will have no effect.  If
5300  * a different value is desired, it must be set in /etc/system before a
5301  * reboot.
5302  */
5303 int ecache_calls_a_sec = 1;
5304 int dcache_calls_a_sec = 2;
5305 int icache_calls_a_sec = 2;
5306 
5307 int ecache_scan_rate_idle = 1;
5308 int ecache_scan_rate_busy = 1;
5309 int dcache_scan_rate_idle = 1;
5310 int dcache_scan_rate_busy = 1;
5311 int icache_scan_rate_idle = 1;
5312 int icache_scan_rate_busy = 1;
5313 
5314 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5315 
5316 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5317 
5318 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5319 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5320 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5321 
5322 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5323 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5324 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5325 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5326 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5327 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5328 
5329 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5330 
5331 /*
5332  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5333  * increment the outstanding request counter and schedule a softint to run
5334  * the scrubber.
5335  */
5336 extern xcfunc_t cache_scrubreq_tl1;
5337 
5338 /*
5339  * These are the softint functions for each cache scrubber
5340  */
5341 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5342 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5343 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5344 
5345 /*
5346  * The cache scrub info table contains cache specific information
5347  * and allows for some of the scrub code to be table driven, reducing
5348  * duplication of cache similar code.
5349  *
5350  * This table keeps a copy of the value in the calls per second variable
5351  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5352  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5353  * mdb in a misguided attempt to disable the scrubber).
5354  */
5355 struct scrub_info {
5356 	int		*csi_enable;	/* scrubber enable flag */
5357 	int		csi_freq;	/* scrubber calls per second */
5358 	int		csi_index;	/* index to chsm_outstanding[] */
5359 	uint_t		csi_inum;	/* scrubber interrupt number */
5360 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5361 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5362 	char		csi_name[3];	/* cache name for this scrub entry */
5363 } cache_scrub_info[] = {
5364 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5365 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5366 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5367 };
5368 
5369 /*
5370  * If scrubbing is enabled, increment the outstanding request counter.  If it
5371  * is 1 (meaning there were no previous requests outstanding), call
5372  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5373  * a self trap.
5374  */
5375 static void
5376 do_scrub(struct scrub_info *csi)
5377 {
5378 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5379 	int index = csi->csi_index;
5380 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5381 
5382 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5383 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5384 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5385 			    csi->csi_inum, 0);
5386 		}
5387 	}
5388 }
5389 
5390 /*
5391  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5392  * cross-trap the offline cpus.
5393  */
5394 static void
5395 do_scrub_offline(struct scrub_info *csi)
5396 {
5397 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5398 
5399 	if (CPUSET_ISNULL(cpu_offline_set)) {
5400 		/*
5401 		 * No offline cpus - nothing to do
5402 		 */
5403 		return;
5404 	}
5405 
5406 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5407 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5408 		    csi->csi_index);
5409 	}
5410 }
5411 
5412 /*
5413  * This is the initial setup for the scrubber cyclics - it sets the
5414  * interrupt level, frequency, and function to call.
5415  */
5416 /*ARGSUSED*/
5417 static void
5418 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5419     cyc_time_t *when)
5420 {
5421 	struct scrub_info *csi = (struct scrub_info *)arg;
5422 
5423 	ASSERT(csi != NULL);
5424 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5425 	hdlr->cyh_level = CY_LOW_LEVEL;
5426 	hdlr->cyh_arg = arg;
5427 
5428 	when->cyt_when = 0;	/* Start immediately */
5429 	when->cyt_interval = NANOSEC / csi->csi_freq;
5430 }
5431 
5432 /*
5433  * Initialization for cache scrubbing.
5434  * This routine is called AFTER all cpus have had cpu_init_private called
5435  * to initialize their private data areas.
5436  */
5437 void
5438 cpu_init_cache_scrub(void)
5439 {
5440 	int i;
5441 	struct scrub_info *csi;
5442 	cyc_omni_handler_t omni_hdlr;
5443 	cyc_handler_t offline_hdlr;
5444 	cyc_time_t when;
5445 
5446 	/*
5447 	 * save away the maximum number of lines for the D$
5448 	 */
5449 	dcache_nlines = dcache_size / dcache_linesize;
5450 
5451 	/*
5452 	 * register the softints for the cache scrubbing
5453 	 */
5454 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5455 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5456 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5457 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5458 
5459 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5460 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5461 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5462 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5463 
5464 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5465 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5466 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5467 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5468 
5469 	/*
5470 	 * start the scrubbing for all the caches
5471 	 */
5472 	mutex_enter(&cpu_lock);
5473 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5474 
5475 		csi = &cache_scrub_info[i];
5476 
5477 		if (!(*csi->csi_enable))
5478 			continue;
5479 
5480 		/*
5481 		 * force the following to be true:
5482 		 *	1 <= calls_a_sec <= hz
5483 		 */
5484 		if (csi->csi_freq > hz) {
5485 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5486 				"(%d); resetting to hz (%d)", csi->csi_name,
5487 				csi->csi_freq, hz);
5488 			csi->csi_freq = hz;
5489 		} else if (csi->csi_freq < 1) {
5490 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5491 				"(%d); resetting to 1", csi->csi_name,
5492 				csi->csi_freq);
5493 			csi->csi_freq = 1;
5494 		}
5495 
5496 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5497 		omni_hdlr.cyo_offline = NULL;
5498 		omni_hdlr.cyo_arg = (void *)csi;
5499 
5500 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5501 		offline_hdlr.cyh_arg = (void *)csi;
5502 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5503 
5504 		when.cyt_when = 0;	/* Start immediately */
5505 		when.cyt_interval = NANOSEC / csi->csi_freq;
5506 
5507 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5508 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5509 	}
5510 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5511 	mutex_exit(&cpu_lock);
5512 }
5513 
5514 /*
5515  * Indicate that the specified cpu is idle.
5516  */
5517 void
5518 cpu_idle_ecache_scrub(struct cpu *cp)
5519 {
5520 	if (CPU_PRIVATE(cp) != NULL) {
5521 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5522 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5523 	}
5524 }
5525 
5526 /*
5527  * Indicate that the specified cpu is busy.
5528  */
5529 void
5530 cpu_busy_ecache_scrub(struct cpu *cp)
5531 {
5532 	if (CPU_PRIVATE(cp) != NULL) {
5533 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5534 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5535 	}
5536 }
5537 
5538 /*
5539  * Initialization for cache scrubbing for the specified cpu.
5540  */
5541 void
5542 cpu_init_ecache_scrub_dr(struct cpu *cp)
5543 {
5544 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5545 	int cpuid = cp->cpu_id;
5546 
5547 	/* initialize the number of lines in the caches */
5548 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5549 	    cpunodes[cpuid].ecache_linesize;
5550 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5551 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5552 
5553 	/*
5554 	 * do_scrub() and do_scrub_offline() check both the global
5555 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5556 	 * check this value before scrubbing.  Currently, we use it to
5557 	 * disable the E$ scrubber on multi-core cpus or while running at
5558 	 * slowed speed.  For now, just turn everything on and allow
5559 	 * cpu_init_private() to change it if necessary.
5560 	 */
5561 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5562 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5563 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5564 
5565 	cpu_busy_ecache_scrub(cp);
5566 }
5567 
5568 /*
5569  * Un-initialization for cache scrubbing for the specified cpu.
5570  */
5571 static void
5572 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5573 {
5574 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5575 
5576 	/*
5577 	 * un-initialize bookkeeping for cache scrubbing
5578 	 */
5579 	bzero(csmp, sizeof (ch_scrub_misc_t));
5580 
5581 	cpu_idle_ecache_scrub(cp);
5582 }
5583 
5584 /*
5585  * Called periodically on each CPU to scrub the D$.
5586  */
5587 static void
5588 scrub_dcache(int how_many)
5589 {
5590 	int i;
5591 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5592 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5593 
5594 	/*
5595 	 * scrub the desired number of lines
5596 	 */
5597 	for (i = 0; i < how_many; i++) {
5598 		/*
5599 		 * scrub a D$ line
5600 		 */
5601 		dcache_inval_line(index);
5602 
5603 		/*
5604 		 * calculate the next D$ line to scrub, assumes
5605 		 * that dcache_nlines is a power of 2
5606 		 */
5607 		index = (index + 1) & (dcache_nlines - 1);
5608 	}
5609 
5610 	/*
5611 	 * set the scrub index for the next visit
5612 	 */
5613 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5614 }
5615 
5616 /*
5617  * Handler for D$ scrub inum softint. Call scrub_dcache until
5618  * we decrement the outstanding request count to zero.
5619  */
5620 /*ARGSUSED*/
5621 static uint_t
5622 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5623 {
5624 	int i;
5625 	int how_many;
5626 	int outstanding;
5627 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5628 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5629 	struct scrub_info *csi = (struct scrub_info *)arg1;
5630 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5631 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5632 
5633 	/*
5634 	 * The scan rates are expressed in units of tenths of a
5635 	 * percent.  A scan rate of 1000 (100%) means the whole
5636 	 * cache is scanned every second.
5637 	 */
5638 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5639 
5640 	do {
5641 		outstanding = *countp;
5642 		for (i = 0; i < outstanding; i++) {
5643 			scrub_dcache(how_many);
5644 		}
5645 	} while (atomic_add_32_nv(countp, -outstanding));
5646 
5647 	return (DDI_INTR_CLAIMED);
5648 }
5649 
5650 /*
5651  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5652  * by invalidating lines. Due to the characteristics of the ASI which
5653  * is used to invalidate an I$ line, the entire I$ must be invalidated
5654  * vs. an individual I$ line.
5655  */
5656 static void
5657 scrub_icache(int how_many)
5658 {
5659 	int i;
5660 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5661 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5662 	int icache_nlines = csmp->chsm_icache_nlines;
5663 
5664 	/*
5665 	 * scrub the desired number of lines
5666 	 */
5667 	for (i = 0; i < how_many; i++) {
5668 		/*
5669 		 * since the entire I$ must be scrubbed at once,
5670 		 * wait until the index wraps to zero to invalidate
5671 		 * the entire I$
5672 		 */
5673 		if (index == 0) {
5674 			icache_inval_all();
5675 		}
5676 
5677 		/*
5678 		 * calculate the next I$ line to scrub, assumes
5679 		 * that chsm_icache_nlines is a power of 2
5680 		 */
5681 		index = (index + 1) & (icache_nlines - 1);
5682 	}
5683 
5684 	/*
5685 	 * set the scrub index for the next visit
5686 	 */
5687 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5688 }
5689 
5690 /*
5691  * Handler for I$ scrub inum softint. Call scrub_icache until
5692  * we decrement the outstanding request count to zero.
5693  */
5694 /*ARGSUSED*/
5695 static uint_t
5696 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5697 {
5698 	int i;
5699 	int how_many;
5700 	int outstanding;
5701 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5702 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5703 	struct scrub_info *csi = (struct scrub_info *)arg1;
5704 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5705 	    icache_scan_rate_idle : icache_scan_rate_busy;
5706 	int icache_nlines = csmp->chsm_icache_nlines;
5707 
5708 	/*
5709 	 * The scan rates are expressed in units of tenths of a
5710 	 * percent.  A scan rate of 1000 (100%) means the whole
5711 	 * cache is scanned every second.
5712 	 */
5713 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5714 
5715 	do {
5716 		outstanding = *countp;
5717 		for (i = 0; i < outstanding; i++) {
5718 			scrub_icache(how_many);
5719 		}
5720 	} while (atomic_add_32_nv(countp, -outstanding));
5721 
5722 	return (DDI_INTR_CLAIMED);
5723 }
5724 
5725 /*
5726  * Called periodically on each CPU to scrub the E$.
5727  */
5728 static void
5729 scrub_ecache(int how_many)
5730 {
5731 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5732 	int i;
5733 	int cpuid = CPU->cpu_id;
5734 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5735 	int nlines = csmp->chsm_ecache_nlines;
5736 	int linesize = cpunodes[cpuid].ecache_linesize;
5737 	int ec_set_size = cpu_ecache_set_size(CPU);
5738 
5739 	/*
5740 	 * scrub the desired number of lines
5741 	 */
5742 	for (i = 0; i < how_many; i++) {
5743 		/*
5744 		 * scrub the E$ line
5745 		 */
5746 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5747 		    ec_set_size);
5748 
5749 		/*
5750 		 * calculate the next E$ line to scrub based on twice
5751 		 * the number of E$ lines (to displace lines containing
5752 		 * flush area data), assumes that the number of lines
5753 		 * is a power of 2
5754 		 */
5755 		index = (index + 1) & ((nlines << 1) - 1);
5756 	}
5757 
5758 	/*
5759 	 * set the ecache scrub index for the next visit
5760 	 */
5761 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5762 }
5763 
5764 /*
5765  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5766  * we decrement the outstanding request count to zero.
5767  *
5768  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5769  * become negative after the atomic_add_32_nv().  This is not a problem, as
5770  * the next trip around the loop won't scrub anything, and the next add will
5771  * reset the count back to zero.
5772  */
5773 /*ARGSUSED*/
5774 static uint_t
5775 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5776 {
5777 	int i;
5778 	int how_many;
5779 	int outstanding;
5780 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5781 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5782 	struct scrub_info *csi = (struct scrub_info *)arg1;
5783 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5784 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5785 	int ecache_nlines = csmp->chsm_ecache_nlines;
5786 
5787 	/*
5788 	 * The scan rates are expressed in units of tenths of a
5789 	 * percent.  A scan rate of 1000 (100%) means the whole
5790 	 * cache is scanned every second.
5791 	 */
5792 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5793 
5794 	do {
5795 		outstanding = *countp;
5796 		for (i = 0; i < outstanding; i++) {
5797 			scrub_ecache(how_many);
5798 		}
5799 	} while (atomic_add_32_nv(countp, -outstanding));
5800 
5801 	return (DDI_INTR_CLAIMED);
5802 }
5803 
5804 /*
5805  * Timeout function to reenable CE
5806  */
5807 static void
5808 cpu_delayed_check_ce_errors(void *arg)
5809 {
5810 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5811 	    TQ_NOSLEEP)) {
5812 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5813 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5814 	}
5815 }
5816 
5817 /*
5818  * CE Deferred Re-enable after trap.
5819  *
5820  * When the CPU gets a disrupting trap for any of the errors
5821  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5822  * immediately. To eliminate the possibility of multiple CEs causing
5823  * recursive stack overflow in the trap handler, we cannot
5824  * reenable CEEN while still running in the trap handler. Instead,
5825  * after a CE is logged on a CPU, we schedule a timeout function,
5826  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5827  * seconds. This function will check whether any further CEs
5828  * have occurred on that CPU, and if none have, will reenable CEEN.
5829  *
5830  * If further CEs have occurred while CEEN is disabled, another
5831  * timeout will be scheduled. This is to ensure that the CPU can
5832  * make progress in the face of CE 'storms', and that it does not
5833  * spend all its time logging CE errors.
5834  */
5835 static void
5836 cpu_check_ce_errors(void *arg)
5837 {
5838 	int	cpuid = (int)(uintptr_t)arg;
5839 	cpu_t	*cp;
5840 
5841 	/*
5842 	 * We acquire cpu_lock.
5843 	 */
5844 	ASSERT(curthread->t_pil == 0);
5845 
5846 	/*
5847 	 * verify that the cpu is still around, DR
5848 	 * could have got there first ...
5849 	 */
5850 	mutex_enter(&cpu_lock);
5851 	cp = cpu_get(cpuid);
5852 	if (cp == NULL) {
5853 		mutex_exit(&cpu_lock);
5854 		return;
5855 	}
5856 	/*
5857 	 * make sure we don't migrate across CPUs
5858 	 * while checking our CE status.
5859 	 */
5860 	kpreempt_disable();
5861 
5862 	/*
5863 	 * If we are running on the CPU that got the
5864 	 * CE, we can do the checks directly.
5865 	 */
5866 	if (cp->cpu_id == CPU->cpu_id) {
5867 		mutex_exit(&cpu_lock);
5868 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5869 		kpreempt_enable();
5870 		return;
5871 	}
5872 	kpreempt_enable();
5873 
5874 	/*
5875 	 * send an x-call to get the CPU that originally
5876 	 * got the CE to do the necessary checks. If we can't
5877 	 * send the x-call, reschedule the timeout, otherwise we
5878 	 * lose CEEN forever on that CPU.
5879 	 */
5880 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5881 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5882 		    TIMEOUT_CEEN_CHECK, 0);
5883 		mutex_exit(&cpu_lock);
5884 	} else {
5885 		/*
5886 		 * When the CPU is not accepting xcalls, or
5887 		 * the processor is offlined, we don't want to
5888 		 * incur the extra overhead of trying to schedule the
5889 		 * CE timeout indefinitely. However, we don't want to lose
5890 		 * CE checking forever.
5891 		 *
5892 		 * Keep rescheduling the timeout, accepting the additional
5893 		 * overhead as the cost of correctness in the case where we get
5894 		 * a CE, disable CEEN, offline the CPU during the
5895 		 * the timeout interval, and then online it at some
5896 		 * point in the future. This is unlikely given the short
5897 		 * cpu_ceen_delay_secs.
5898 		 */
5899 		mutex_exit(&cpu_lock);
5900 		(void) timeout(cpu_delayed_check_ce_errors,
5901 		    (void *)(uintptr_t)cp->cpu_id,
5902 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5903 	}
5904 }
5905 
5906 /*
5907  * This routine will check whether CEs have occurred while
5908  * CEEN is disabled. Any CEs detected will be logged and, if
5909  * possible, scrubbed.
5910  *
5911  * The memscrubber will also use this routine to clear any errors
5912  * caused by its scrubbing with CEEN disabled.
5913  *
5914  * flag == SCRUBBER_CEEN_CHECK
5915  *		called from memscrubber, just check/scrub, no reset
5916  *		paddr 	physical addr. for start of scrub pages
5917  *		vaddr 	virtual addr. for scrub area
5918  *		psz	page size of area to be scrubbed
5919  *
5920  * flag == TIMEOUT_CEEN_CHECK
5921  *		timeout function has triggered, reset timeout or CEEN
5922  *
5923  * Note: We must not migrate cpus during this function.  This can be
5924  * achieved by one of:
5925  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5926  *	The flag value must be first xcall argument.
5927  *    - disabling kernel preemption.  This should be done for very short
5928  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5929  *	scrub an extended area with cpu_check_block.  The call for
5930  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5931  *	brief for this case.
5932  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5933  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5934  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5935  */
5936 void
5937 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5938 {
5939 	ch_cpu_errors_t	cpu_error_regs;
5940 	uint64_t	ec_err_enable;
5941 	uint64_t	page_offset;
5942 
5943 	/* Read AFSR */
5944 	get_cpu_error_state(&cpu_error_regs);
5945 
5946 	/*
5947 	 * If no CEEN errors have occurred during the timeout
5948 	 * interval, it is safe to re-enable CEEN and exit.
5949 	 */
5950 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5951 		if (flag == TIMEOUT_CEEN_CHECK &&
5952 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5953 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5954 		return;
5955 	}
5956 
5957 	/*
5958 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5959 	 * we log/clear the error.
5960 	 */
5961 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5962 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5963 
5964 	/*
5965 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5966 	 * timeout will be rescheduled when the error is logged.
5967 	 */
5968 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5969 	    cpu_ce_detected(&cpu_error_regs,
5970 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5971 	else
5972 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5973 
5974 	/*
5975 	 * If the memory scrubber runs while CEEN is
5976 	 * disabled, (or if CEEN is disabled during the
5977 	 * scrub as a result of a CE being triggered by
5978 	 * it), the range being scrubbed will not be
5979 	 * completely cleaned. If there are multiple CEs
5980 	 * in the range at most two of these will be dealt
5981 	 * with, (one by the trap handler and one by the
5982 	 * timeout). It is also possible that none are dealt
5983 	 * with, (CEEN disabled and another CE occurs before
5984 	 * the timeout triggers). So to ensure that the
5985 	 * memory is actually scrubbed, we have to access each
5986 	 * memory location in the range and then check whether
5987 	 * that access causes a CE.
5988 	 */
5989 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5990 		if ((cpu_error_regs.afar >= pa) &&
5991 		    (cpu_error_regs.afar < (pa + psz))) {
5992 			/*
5993 			 * Force a load from physical memory for each
5994 			 * 64-byte block, then check AFSR to determine
5995 			 * whether this access caused an error.
5996 			 *
5997 			 * This is a slow way to do a scrub, but as it will
5998 			 * only be invoked when the memory scrubber actually
5999 			 * triggered a CE, it should not happen too
6000 			 * frequently.
6001 			 *
6002 			 * cut down what we need to check as the scrubber
6003 			 * has verified up to AFAR, so get it's offset
6004 			 * into the page and start there.
6005 			 */
6006 			page_offset = (uint64_t)(cpu_error_regs.afar &
6007 			    (psz - 1));
6008 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6009 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6010 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6011 			    psz);
6012 		}
6013 	}
6014 
6015 	/*
6016 	 * Reset error enable if this CE is not masked.
6017 	 */
6018 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6019 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6020 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
6021 
6022 }
6023 
6024 /*
6025  * Attempt a cpu logout for an error that we did not trap for, such
6026  * as a CE noticed with CEEN off.  It is assumed that we are still running
6027  * on the cpu that took the error and that we cannot migrate.  Returns
6028  * 0 on success, otherwise nonzero.
6029  */
6030 static int
6031 cpu_ce_delayed_ec_logout(uint64_t afar)
6032 {
6033 	ch_cpu_logout_t *clop;
6034 
6035 	if (CPU_PRIVATE(CPU) == NULL)
6036 		return (0);
6037 
6038 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6039 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6040 	    LOGOUT_INVALID)
6041 		return (0);
6042 
6043 	cpu_delayed_logout(afar, clop);
6044 	return (1);
6045 }
6046 
6047 /*
6048  * We got an error while CEEN was disabled. We
6049  * need to clean up after it and log whatever
6050  * information we have on the CE.
6051  */
6052 void
6053 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6054 {
6055 	ch_async_flt_t 	ch_flt;
6056 	struct async_flt *aflt;
6057 	char 		pr_reason[MAX_REASON_STRING];
6058 
6059 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6060 	ch_flt.flt_trapped_ce = flag;
6061 	aflt = (struct async_flt *)&ch_flt;
6062 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6063 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6064 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6065 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6066 	aflt->flt_addr = cpu_error_regs->afar;
6067 #if defined(SERRANO)
6068 	ch_flt.afar2 = cpu_error_regs->afar2;
6069 #endif	/* SERRANO */
6070 	aflt->flt_pc = NULL;
6071 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6072 	aflt->flt_tl = 0;
6073 	aflt->flt_panic = 0;
6074 	cpu_log_and_clear_ce(&ch_flt);
6075 
6076 	/*
6077 	 * check if we caused any errors during cleanup
6078 	 */
6079 	if (clear_errors(&ch_flt)) {
6080 		pr_reason[0] = '\0';
6081 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6082 		    NULL);
6083 	}
6084 }
6085 
6086 /*
6087  * Log/clear CEEN-controlled disrupting errors
6088  */
6089 static void
6090 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6091 {
6092 	struct async_flt *aflt;
6093 	uint64_t afsr, afsr_errs;
6094 	ch_cpu_logout_t *clop;
6095 	char 		pr_reason[MAX_REASON_STRING];
6096 	on_trap_data_t	*otp = curthread->t_ontrap;
6097 
6098 	aflt = (struct async_flt *)ch_flt;
6099 	afsr = aflt->flt_stat;
6100 	afsr_errs = ch_flt->afsr_errs;
6101 	aflt->flt_id = gethrtime_waitfree();
6102 	aflt->flt_bus_id = getprocessorid();
6103 	aflt->flt_inst = CPU->cpu_id;
6104 	aflt->flt_prot = AFLT_PROT_NONE;
6105 	aflt->flt_class = CPU_FAULT;
6106 	aflt->flt_status = ECC_C_TRAP;
6107 
6108 	pr_reason[0] = '\0';
6109 	/*
6110 	 * Get the CPU log out info for Disrupting Trap.
6111 	 */
6112 	if (CPU_PRIVATE(CPU) == NULL) {
6113 		clop = NULL;
6114 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6115 	} else {
6116 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6117 	}
6118 
6119 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6120 		ch_cpu_errors_t cpu_error_regs;
6121 
6122 		get_cpu_error_state(&cpu_error_regs);
6123 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6124 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6125 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6126 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6127 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6128 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6129 		clop->clo_sdw_data.chd_afsr_ext =
6130 		    cpu_error_regs.shadow_afsr_ext;
6131 #if defined(SERRANO)
6132 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6133 #endif	/* SERRANO */
6134 		ch_flt->flt_data_incomplete = 1;
6135 
6136 		/*
6137 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6138 		 * The trap handler does it for CEEN enabled errors
6139 		 * so we need to do it here.
6140 		 */
6141 		set_cpu_error_state(&cpu_error_regs);
6142 	}
6143 
6144 #if defined(JALAPENO) || defined(SERRANO)
6145 	/*
6146 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6147 	 * For Serrano, even thou we do have the AFAR, we still do the
6148 	 * scrub on the RCE side since that's where the error type can
6149 	 * be properly classified as intermittent, persistent, etc.
6150 	 *
6151 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6152 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6153 	 * the flt_status bits.
6154 	 */
6155 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6156 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6157 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6158 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6159 	}
6160 #else /* JALAPENO || SERRANO */
6161 	/*
6162 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6163 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6164 	 * the flt_status bits.
6165 	 */
6166 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6167 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6168 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6169 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6170 		}
6171 	}
6172 
6173 #endif /* JALAPENO || SERRANO */
6174 
6175 	/*
6176 	 * Update flt_prot if this error occurred under on_trap protection.
6177 	 */
6178 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6179 		aflt->flt_prot = AFLT_PROT_EC;
6180 
6181 	/*
6182 	 * Queue events on the async event queue, one event per error bit.
6183 	 */
6184 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6185 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6186 		ch_flt->flt_type = CPU_INV_AFSR;
6187 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6188 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6189 		    aflt->flt_panic);
6190 	}
6191 
6192 	/*
6193 	 * Zero out + invalidate CPU logout.
6194 	 */
6195 	if (clop) {
6196 		bzero(clop, sizeof (ch_cpu_logout_t));
6197 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6198 	}
6199 
6200 	/*
6201 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6202 	 * was disabled, we need to flush either the entire
6203 	 * E$ or an E$ line.
6204 	 */
6205 #if defined(JALAPENO) || defined(SERRANO)
6206 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6207 #else	/* JALAPENO || SERRANO */
6208 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6209 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6210 #endif	/* JALAPENO || SERRANO */
6211 		cpu_error_ecache_flush(ch_flt);
6212 
6213 }
6214 
6215 /*
6216  * depending on the error type, we determine whether we
6217  * need to flush the entire ecache or just a line.
6218  */
6219 static int
6220 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6221 {
6222 	struct async_flt *aflt;
6223 	uint64_t	afsr;
6224 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6225 
6226 	aflt = (struct async_flt *)ch_flt;
6227 	afsr = aflt->flt_stat;
6228 
6229 	/*
6230 	 * If we got multiple errors, no point in trying
6231 	 * the individual cases, just flush the whole cache
6232 	 */
6233 	if (afsr & C_AFSR_ME) {
6234 		return (ECACHE_FLUSH_ALL);
6235 	}
6236 
6237 	/*
6238 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6239 	 * was disabled, we need to flush entire E$. We can't just
6240 	 * flush the cache line affected as the ME bit
6241 	 * is not set when multiple correctable errors of the same
6242 	 * type occur, so we might have multiple CPC or EDC errors,
6243 	 * with only the first recorded.
6244 	 */
6245 #if defined(JALAPENO) || defined(SERRANO)
6246 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6247 #else	/* JALAPENO || SERRANO */
6248 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6249 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6250 #endif	/* JALAPENO || SERRANO */
6251 		return (ECACHE_FLUSH_ALL);
6252 	}
6253 
6254 #if defined(JALAPENO) || defined(SERRANO)
6255 	/*
6256 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6257 	 * flush the entire Ecache.
6258 	 */
6259 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6260 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6261 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6262 			return (ECACHE_FLUSH_LINE);
6263 		} else {
6264 			return (ECACHE_FLUSH_ALL);
6265 		}
6266 	}
6267 #else /* JALAPENO || SERRANO */
6268 	/*
6269 	 * If UE only is set, flush the Ecache line, otherwise
6270 	 * flush the entire Ecache.
6271 	 */
6272 	if (afsr_errs & C_AFSR_UE) {
6273 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6274 		    C_AFSR_UE) {
6275 			return (ECACHE_FLUSH_LINE);
6276 		} else {
6277 			return (ECACHE_FLUSH_ALL);
6278 		}
6279 	}
6280 #endif /* JALAPENO || SERRANO */
6281 
6282 	/*
6283 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6284 	 * flush the entire Ecache.
6285 	 */
6286 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6287 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6288 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6289 			return (ECACHE_FLUSH_LINE);
6290 		} else {
6291 			return (ECACHE_FLUSH_ALL);
6292 		}
6293 	}
6294 
6295 	/*
6296 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6297 	 * flush the entire Ecache.
6298 	 */
6299 	if (afsr_errs & C_AFSR_BERR) {
6300 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6301 			return (ECACHE_FLUSH_LINE);
6302 		} else {
6303 			return (ECACHE_FLUSH_ALL);
6304 		}
6305 	}
6306 
6307 	return (0);
6308 }
6309 
6310 void
6311 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6312 {
6313 	int	ecache_flush_flag =
6314 	    cpu_error_ecache_flush_required(ch_flt);
6315 
6316 	/*
6317 	 * Flush Ecache line or entire Ecache based on above checks.
6318 	 */
6319 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6320 		cpu_flush_ecache();
6321 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6322 		cpu_flush_ecache_line(ch_flt);
6323 	}
6324 
6325 }
6326 
6327 /*
6328  * Extract the PA portion from the E$ tag.
6329  */
6330 uint64_t
6331 cpu_ectag_to_pa(int setsize, uint64_t tag)
6332 {
6333 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6334 		return (JG_ECTAG_TO_PA(setsize, tag));
6335 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6336 		return (PN_L3TAG_TO_PA(tag));
6337 	else
6338 		return (CH_ECTAG_TO_PA(setsize, tag));
6339 }
6340 
6341 /*
6342  * Convert the E$ tag PA into an E$ subblock index.
6343  */
6344 static int
6345 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6346 {
6347 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6348 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6349 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6350 		/* Panther has only one subblock per line */
6351 		return (0);
6352 	else
6353 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6354 }
6355 
6356 /*
6357  * All subblocks in an E$ line must be invalid for
6358  * the line to be invalid.
6359  */
6360 int
6361 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6362 {
6363 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6364 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6365 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6366 		return (PN_L3_LINE_INVALID(tag));
6367 	else
6368 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6369 }
6370 
6371 /*
6372  * Extract state bits for a subblock given the tag.  Note that for Panther
6373  * this works on both l2 and l3 tags.
6374  */
6375 static int
6376 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6377 {
6378 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6379 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6380 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6381 		return (tag & CH_ECSTATE_MASK);
6382 	else
6383 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6384 }
6385 
6386 /*
6387  * Cpu specific initialization.
6388  */
6389 void
6390 cpu_mp_init(void)
6391 {
6392 #ifdef	CHEETAHPLUS_ERRATUM_25
6393 	if (cheetah_sendmondo_recover) {
6394 		cheetah_nudge_init();
6395 	}
6396 #endif
6397 }
6398 
6399 void
6400 cpu_ereport_post(struct async_flt *aflt)
6401 {
6402 	char *cpu_type, buf[FM_MAX_CLASS];
6403 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
6404 	nv_alloc_t *nva = NULL;
6405 	nvlist_t *ereport, *detector, *resource;
6406 	errorq_elem_t *eqep;
6407 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6408 	char unum[UNUM_NAMLEN];
6409 	int len = 0;
6410 	uint8_t msg_type, mask;
6411 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6412 
6413 	if (aflt->flt_panic || panicstr) {
6414 		eqep = errorq_reserve(ereport_errorq);
6415 		if (eqep == NULL)
6416 			return;
6417 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6418 		nva = errorq_elem_nva(ereport_errorq, eqep);
6419 	} else {
6420 		ereport = fm_nvlist_create(nva);
6421 	}
6422 
6423 	/*
6424 	 * Create the scheme "cpu" FMRI.
6425 	 */
6426 	detector = fm_nvlist_create(nva);
6427 	resource = fm_nvlist_create(nva);
6428 	switch (cpunodes[aflt->flt_inst].implementation) {
6429 	case CHEETAH_IMPL:
6430 		cpu_type = FM_EREPORT_CPU_USIII;
6431 		break;
6432 	case CHEETAH_PLUS_IMPL:
6433 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6434 		break;
6435 	case JALAPENO_IMPL:
6436 		cpu_type = FM_EREPORT_CPU_USIIIi;
6437 		break;
6438 	case SERRANO_IMPL:
6439 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6440 		break;
6441 	case JAGUAR_IMPL:
6442 		cpu_type = FM_EREPORT_CPU_USIV;
6443 		break;
6444 	case PANTHER_IMPL:
6445 		cpu_type = FM_EREPORT_CPU_USIVplus;
6446 		break;
6447 	default:
6448 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6449 		break;
6450 	}
6451 	mask = cpunodes[aflt->flt_inst].version;
6452 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
6453 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
6454 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6455 	    aflt->flt_inst, &mask, (const char *)sbuf);
6456 
6457 	/*
6458 	 * Encode all the common data into the ereport.
6459 	 */
6460 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6461 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6462 
6463 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6464 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6465 	    detector, NULL);
6466 
6467 	/*
6468 	 * Encode the error specific data that was saved in
6469 	 * the async_flt structure into the ereport.
6470 	 */
6471 	cpu_payload_add_aflt(aflt, ereport, resource,
6472 	    &plat_ecc_ch_flt.ecaf_afar_status,
6473 	    &plat_ecc_ch_flt.ecaf_synd_status);
6474 
6475 	if (aflt->flt_panic || panicstr) {
6476 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6477 	} else {
6478 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6479 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6480 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6481 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6482 	}
6483 	/*
6484 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6485 	 * to the SC olny if it can process it.
6486 	 */
6487 
6488 	if (&plat_ecc_capability_sc_get &&
6489 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6490 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6491 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6492 			/*
6493 			 * If afar status is not invalid do a unum lookup.
6494 			 */
6495 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6496 			    AFLT_STAT_INVALID) {
6497 				(void) cpu_get_mem_unum_aflt(
6498 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6499 				    unum, UNUM_NAMLEN, &len);
6500 			} else {
6501 				unum[0] = '\0';
6502 			}
6503 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6504 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6505 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6506 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6507 			    ch_flt->flt_sdw_afsr_ext;
6508 
6509 			if (&plat_log_fruid_error2)
6510 				plat_log_fruid_error2(msg_type, unum, aflt,
6511 				    &plat_ecc_ch_flt);
6512 		}
6513 	}
6514 }
6515 
6516 void
6517 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6518 {
6519 	int status;
6520 	ddi_fm_error_t de;
6521 
6522 	bzero(&de, sizeof (ddi_fm_error_t));
6523 
6524 	de.fme_version = DDI_FME_VERSION;
6525 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6526 	    FM_ENA_FMT1);
6527 	de.fme_flag = expected;
6528 	de.fme_bus_specific = (void *)aflt->flt_addr;
6529 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6530 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6531 		aflt->flt_panic = 1;
6532 }
6533 
6534 void
6535 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6536     errorq_t *eqp, uint_t flag)
6537 {
6538 	struct async_flt *aflt = (struct async_flt *)payload;
6539 
6540 	aflt->flt_erpt_class = error_class;
6541 	errorq_dispatch(eqp, payload, payload_sz, flag);
6542 }
6543 
6544 /*
6545  * This routine may be called by the IO module, but does not do
6546  * anything in this cpu module. The SERD algorithm is handled by
6547  * cpumem-diagnosis engine instead.
6548  */
6549 /*ARGSUSED*/
6550 void
6551 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6552 {}
6553 
6554 void
6555 adjust_hw_copy_limits(int ecache_size)
6556 {
6557 	/*
6558 	 * Set hw copy limits.
6559 	 *
6560 	 * /etc/system will be parsed later and can override one or more
6561 	 * of these settings.
6562 	 *
6563 	 * At this time, ecache size seems only mildly relevant.
6564 	 * We seem to run into issues with the d-cache and stalls
6565 	 * we see on misses.
6566 	 *
6567 	 * Cycle measurement indicates that 2 byte aligned copies fare
6568 	 * little better than doing things with VIS at around 512 bytes.
6569 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6570 	 * aligned is faster whenever the source and destination data
6571 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6572 	 * limit seems to be driven by the 2K write cache.
6573 	 * When more than 2K of copies are done in non-VIS mode, stores
6574 	 * backup in the write cache.  In VIS mode, the write cache is
6575 	 * bypassed, allowing faster cache-line writes aligned on cache
6576 	 * boundaries.
6577 	 *
6578 	 * In addition, in non-VIS mode, there is no prefetching, so
6579 	 * for larger copies, the advantage of prefetching to avoid even
6580 	 * occasional cache misses is enough to justify using the VIS code.
6581 	 *
6582 	 * During testing, it was discovered that netbench ran 3% slower
6583 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6584 	 * applications, data is only used once (copied to the output
6585 	 * buffer, then copied by the network device off the system).  Using
6586 	 * the VIS copy saves more L2 cache state.  Network copies are
6587 	 * around 1.3K to 1.5K in size for historical reasons.
6588 	 *
6589 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6590 	 * aligned copy even for large caches and 8 MB ecache.  The
6591 	 * infrastructure to allow different limits for different sized
6592 	 * caches is kept to allow further tuning in later releases.
6593 	 */
6594 
6595 	if (min_ecache_size == 0 && use_hw_bcopy) {
6596 		/*
6597 		 * First time through - should be before /etc/system
6598 		 * is read.
6599 		 * Could skip the checks for zero but this lets us
6600 		 * preserve any debugger rewrites.
6601 		 */
6602 		if (hw_copy_limit_1 == 0) {
6603 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6604 			priv_hcl_1 = hw_copy_limit_1;
6605 		}
6606 		if (hw_copy_limit_2 == 0) {
6607 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6608 			priv_hcl_2 = hw_copy_limit_2;
6609 		}
6610 		if (hw_copy_limit_4 == 0) {
6611 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6612 			priv_hcl_4 = hw_copy_limit_4;
6613 		}
6614 		if (hw_copy_limit_8 == 0) {
6615 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6616 			priv_hcl_8 = hw_copy_limit_8;
6617 		}
6618 		min_ecache_size = ecache_size;
6619 	} else {
6620 		/*
6621 		 * MP initialization. Called *after* /etc/system has
6622 		 * been parsed. One CPU has already been initialized.
6623 		 * Need to cater for /etc/system having scragged one
6624 		 * of our values.
6625 		 */
6626 		if (ecache_size == min_ecache_size) {
6627 			/*
6628 			 * Same size ecache. We do nothing unless we
6629 			 * have a pessimistic ecache setting. In that
6630 			 * case we become more optimistic (if the cache is
6631 			 * large enough).
6632 			 */
6633 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6634 				/*
6635 				 * Need to adjust hw_copy_limit* from our
6636 				 * pessimistic uniprocessor value to a more
6637 				 * optimistic UP value *iff* it hasn't been
6638 				 * reset.
6639 				 */
6640 				if ((ecache_size > 1048576) &&
6641 				    (priv_hcl_8 == hw_copy_limit_8)) {
6642 					if (ecache_size <= 2097152)
6643 						hw_copy_limit_8 = 4 *
6644 						    VIS_COPY_THRESHOLD;
6645 					else if (ecache_size <= 4194304)
6646 						hw_copy_limit_8 = 4 *
6647 						    VIS_COPY_THRESHOLD;
6648 					else
6649 						hw_copy_limit_8 = 4 *
6650 						    VIS_COPY_THRESHOLD;
6651 					priv_hcl_8 = hw_copy_limit_8;
6652 				}
6653 			}
6654 		} else if (ecache_size < min_ecache_size) {
6655 			/*
6656 			 * A different ecache size. Can this even happen?
6657 			 */
6658 			if (priv_hcl_8 == hw_copy_limit_8) {
6659 				/*
6660 				 * The previous value that we set
6661 				 * is unchanged (i.e., it hasn't been
6662 				 * scragged by /etc/system). Rewrite it.
6663 				 */
6664 				if (ecache_size <= 1048576)
6665 					hw_copy_limit_8 = 8 *
6666 					    VIS_COPY_THRESHOLD;
6667 				else if (ecache_size <= 2097152)
6668 					hw_copy_limit_8 = 8 *
6669 					    VIS_COPY_THRESHOLD;
6670 				else if (ecache_size <= 4194304)
6671 					hw_copy_limit_8 = 8 *
6672 					    VIS_COPY_THRESHOLD;
6673 				else
6674 					hw_copy_limit_8 = 10 *
6675 					    VIS_COPY_THRESHOLD;
6676 				priv_hcl_8 = hw_copy_limit_8;
6677 				min_ecache_size = ecache_size;
6678 			}
6679 		}
6680 	}
6681 }
6682 
6683 /*
6684  * Called from illegal instruction trap handler to see if we can attribute
6685  * the trap to a fpras check.
6686  */
6687 int
6688 fpras_chktrap(struct regs *rp)
6689 {
6690 	int op;
6691 	struct fpras_chkfngrp *cgp;
6692 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6693 
6694 	if (fpras_chkfngrps == NULL)
6695 		return (0);
6696 
6697 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6698 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6699 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6700 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6701 			break;
6702 	}
6703 	if (op == FPRAS_NCOPYOPS)
6704 		return (0);
6705 
6706 	/*
6707 	 * This is an fpRAS failure caught through an illegal
6708 	 * instruction - trampoline.
6709 	 */
6710 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6711 	rp->r_npc = rp->r_pc + 4;
6712 	return (1);
6713 }
6714 
6715 /*
6716  * fpras_failure is called when a fpras check detects a bad calculation
6717  * result or an illegal instruction trap is attributed to an fpras
6718  * check.  In all cases we are still bound to CPU.
6719  */
6720 int
6721 fpras_failure(int op, int how)
6722 {
6723 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6724 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6725 	ch_async_flt_t ch_flt;
6726 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6727 	struct fpras_chkfn *sfp, *cfp;
6728 	uint32_t *sip, *cip;
6729 	int i;
6730 
6731 	/*
6732 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6733 	 * the time in which we dispatch an ereport and (if applicable) panic.
6734 	 */
6735 	use_hw_bcopy_orig = use_hw_bcopy;
6736 	use_hw_bzero_orig = use_hw_bzero;
6737 	hcl1_orig = hw_copy_limit_1;
6738 	hcl2_orig = hw_copy_limit_2;
6739 	hcl4_orig = hw_copy_limit_4;
6740 	hcl8_orig = hw_copy_limit_8;
6741 	use_hw_bcopy = use_hw_bzero = 0;
6742 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6743 	    hw_copy_limit_8 = 0;
6744 
6745 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6746 	aflt->flt_id = gethrtime_waitfree();
6747 	aflt->flt_class = CPU_FAULT;
6748 	aflt->flt_inst = CPU->cpu_id;
6749 	aflt->flt_status = (how << 8) | op;
6750 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6751 	ch_flt.flt_type = CPU_FPUERR;
6752 
6753 	/*
6754 	 * We must panic if the copy operation had no lofault protection -
6755 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6756 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6757 	 */
6758 	aflt->flt_panic = (curthread->t_lofault == NULL);
6759 
6760 	/*
6761 	 * XOR the source instruction block with the copied instruction
6762 	 * block - this will show us which bit(s) are corrupted.
6763 	 */
6764 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6765 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6766 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6767 		sip = &sfp->fpras_blk0[0];
6768 		cip = &cfp->fpras_blk0[0];
6769 	} else {
6770 		sip = &sfp->fpras_blk1[0];
6771 		cip = &cfp->fpras_blk1[0];
6772 	}
6773 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6774 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6775 
6776 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6777 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6778 
6779 	if (aflt->flt_panic)
6780 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6781 
6782 	/*
6783 	 * We get here for copyin/copyout and kcopy or bcopy where the
6784 	 * caller has used on_fault.  We will flag the error so that
6785 	 * the process may be killed  The trap_async_hwerr mechanism will
6786 	 * take appropriate further action (such as a reboot, contract
6787 	 * notification etc).  Since we may be continuing we will
6788 	 * restore the global hardware copy acceleration switches.
6789 	 *
6790 	 * When we return from this function to the copy function we want to
6791 	 * avoid potentially bad data being used, ie we want the affected
6792 	 * copy function to return an error.  The caller should therefore
6793 	 * invoke its lofault handler (which always exists for these functions)
6794 	 * which will return the appropriate error.
6795 	 */
6796 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6797 	aston(curthread);
6798 
6799 	use_hw_bcopy = use_hw_bcopy_orig;
6800 	use_hw_bzero = use_hw_bzero_orig;
6801 	hw_copy_limit_1 = hcl1_orig;
6802 	hw_copy_limit_2 = hcl2_orig;
6803 	hw_copy_limit_4 = hcl4_orig;
6804 	hw_copy_limit_8 = hcl8_orig;
6805 
6806 	return (1);
6807 }
6808 
6809 #define	VIS_BLOCKSIZE		64
6810 
6811 int
6812 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6813 {
6814 	int ret, watched;
6815 
6816 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6817 	ret = dtrace_blksuword32(addr, data, 0);
6818 	if (watched)
6819 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6820 
6821 	return (ret);
6822 }
6823 
6824 /*
6825  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6826  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6827  * CEEN from the EER to disable traps for further disrupting error types
6828  * on that cpu.  We could cross-call instead, but that has a larger
6829  * instruction and data footprint than cross-trapping, and the cpu is known
6830  * to be faulted.
6831  */
6832 
6833 void
6834 cpu_faulted_enter(struct cpu *cp)
6835 {
6836 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6837 }
6838 
6839 /*
6840  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6841  * offline, spare, or online (by the cpu requesting this state change).
6842  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6843  * disrupting error bits that have accumulated without trapping, then
6844  * we cross-trap to re-enable CEEN controlled traps.
6845  */
6846 void
6847 cpu_faulted_exit(struct cpu *cp)
6848 {
6849 	ch_cpu_errors_t cpu_error_regs;
6850 
6851 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6852 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6853 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6854 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6855 	    (uint64_t)&cpu_error_regs, 0);
6856 
6857 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6858 }
6859 
6860 /*
6861  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6862  * the errors in the original AFSR, 0 otherwise.
6863  *
6864  * For all procs if the initial error was a BERR or TO, then it is possible
6865  * that we may have caused a secondary BERR or TO in the process of logging the
6866  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6867  * if the request was protected then a panic is still not necessary, if not
6868  * protected then aft_panic is already set - so either way there's no need
6869  * to set aft_panic for the secondary error.
6870  *
6871  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6872  * a store merge, then the error handling code will call cpu_deferred_error().
6873  * When clear_errors() is called, it will determine that secondary errors have
6874  * occurred - in particular, the store merge also caused a EDU and WDU that
6875  * weren't discovered until this point.
6876  *
6877  * We do three checks to verify that we are in this case.  If we pass all three
6878  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6879  * errors occur, we return 0.
6880  *
6881  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6882  * handled in cpu_disrupting_errors().  Since this function is not even called
6883  * in the case we are interested in, we just return 0 for these processors.
6884  */
6885 /*ARGSUSED*/
6886 static int
6887 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6888     uint64_t t_afar)
6889 {
6890 #if defined(CHEETAH_PLUS)
6891 #else	/* CHEETAH_PLUS */
6892 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6893 #endif	/* CHEETAH_PLUS */
6894 
6895 	/*
6896 	 * Was the original error a BERR or TO and only a BERR or TO
6897 	 * (multiple errors are also OK)
6898 	 */
6899 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6900 		/*
6901 		 * Is the new error a BERR or TO and only a BERR or TO
6902 		 * (multiple errors are also OK)
6903 		 */
6904 		if ((ch_flt->afsr_errs &
6905 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6906 			return (1);
6907 	}
6908 
6909 #if defined(CHEETAH_PLUS)
6910 	return (0);
6911 #else	/* CHEETAH_PLUS */
6912 	/*
6913 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6914 	 *
6915 	 * Check the original error was a UE, and only a UE.  Note that
6916 	 * the ME bit will cause us to fail this check.
6917 	 */
6918 	if (t_afsr_errs != C_AFSR_UE)
6919 		return (0);
6920 
6921 	/*
6922 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6923 	 */
6924 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6925 		return (0);
6926 
6927 	/*
6928 	 * Check the AFAR of the original error and secondary errors
6929 	 * match to the 64-byte boundary
6930 	 */
6931 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6932 		return (0);
6933 
6934 	/*
6935 	 * We've passed all the checks, so it's a secondary error!
6936 	 */
6937 	return (1);
6938 #endif	/* CHEETAH_PLUS */
6939 }
6940 
6941 /*
6942  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6943  * is checked for any valid errors.  If found, the error type is
6944  * returned. If not found, the flt_type is checked for L1$ parity errors.
6945  */
6946 /*ARGSUSED*/
6947 static uint8_t
6948 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6949 {
6950 #if defined(JALAPENO)
6951 	/*
6952 	 * Currently, logging errors to the SC is not supported on Jalapeno
6953 	 */
6954 	return (PLAT_ECC_ERROR2_NONE);
6955 #else
6956 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6957 
6958 	switch (ch_flt->flt_bit) {
6959 	case C_AFSR_CE:
6960 		return (PLAT_ECC_ERROR2_CE);
6961 	case C_AFSR_UCC:
6962 	case C_AFSR_EDC:
6963 	case C_AFSR_WDC:
6964 	case C_AFSR_CPC:
6965 		return (PLAT_ECC_ERROR2_L2_CE);
6966 	case C_AFSR_EMC:
6967 		return (PLAT_ECC_ERROR2_EMC);
6968 	case C_AFSR_IVC:
6969 		return (PLAT_ECC_ERROR2_IVC);
6970 	case C_AFSR_UE:
6971 		return (PLAT_ECC_ERROR2_UE);
6972 	case C_AFSR_UCU:
6973 	case C_AFSR_EDU:
6974 	case C_AFSR_WDU:
6975 	case C_AFSR_CPU:
6976 		return (PLAT_ECC_ERROR2_L2_UE);
6977 	case C_AFSR_IVU:
6978 		return (PLAT_ECC_ERROR2_IVU);
6979 	case C_AFSR_TO:
6980 		return (PLAT_ECC_ERROR2_TO);
6981 	case C_AFSR_BERR:
6982 		return (PLAT_ECC_ERROR2_BERR);
6983 #if defined(CHEETAH_PLUS)
6984 	case C_AFSR_L3_EDC:
6985 	case C_AFSR_L3_UCC:
6986 	case C_AFSR_L3_CPC:
6987 	case C_AFSR_L3_WDC:
6988 		return (PLAT_ECC_ERROR2_L3_CE);
6989 	case C_AFSR_IMC:
6990 		return (PLAT_ECC_ERROR2_IMC);
6991 	case C_AFSR_TSCE:
6992 		return (PLAT_ECC_ERROR2_L2_TSCE);
6993 	case C_AFSR_THCE:
6994 		return (PLAT_ECC_ERROR2_L2_THCE);
6995 	case C_AFSR_L3_MECC:
6996 		return (PLAT_ECC_ERROR2_L3_MECC);
6997 	case C_AFSR_L3_THCE:
6998 		return (PLAT_ECC_ERROR2_L3_THCE);
6999 	case C_AFSR_L3_CPU:
7000 	case C_AFSR_L3_EDU:
7001 	case C_AFSR_L3_UCU:
7002 	case C_AFSR_L3_WDU:
7003 		return (PLAT_ECC_ERROR2_L3_UE);
7004 	case C_AFSR_DUE:
7005 		return (PLAT_ECC_ERROR2_DUE);
7006 	case C_AFSR_DTO:
7007 		return (PLAT_ECC_ERROR2_DTO);
7008 	case C_AFSR_DBERR:
7009 		return (PLAT_ECC_ERROR2_DBERR);
7010 #endif	/* CHEETAH_PLUS */
7011 	default:
7012 		switch (ch_flt->flt_type) {
7013 #if defined(CPU_IMP_L1_CACHE_PARITY)
7014 		case CPU_IC_PARITY:
7015 			return (PLAT_ECC_ERROR2_IPE);
7016 		case CPU_DC_PARITY:
7017 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7018 				if (ch_flt->parity_data.dpe.cpl_cache ==
7019 				    CPU_PC_PARITY) {
7020 					return (PLAT_ECC_ERROR2_PCACHE);
7021 				}
7022 			}
7023 			return (PLAT_ECC_ERROR2_DPE);
7024 #endif /* CPU_IMP_L1_CACHE_PARITY */
7025 		case CPU_ITLB_PARITY:
7026 			return (PLAT_ECC_ERROR2_ITLB);
7027 		case CPU_DTLB_PARITY:
7028 			return (PLAT_ECC_ERROR2_DTLB);
7029 		default:
7030 			return (PLAT_ECC_ERROR2_NONE);
7031 		}
7032 	}
7033 #endif	/* JALAPENO */
7034 }
7035