xref: /titanic_51/usr/src/uts/sun4u/cpu/us3_common.c (revision 505d05c73a6e56769f263d4803b22eddd168ee24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/archsystm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/machthread.h>
38 #include <sys/cpu.h>
39 #include <sys/cmp.h>
40 #include <sys/elf_SPARC.h>
41 #include <vm/vm_dep.h>
42 #include <vm/hat_sfmmu.h>
43 #include <vm/seg_kpm.h>
44 #include <sys/cpuvar.h>
45 #include <sys/cheetahregs.h>
46 #include <sys/us3_module.h>
47 #include <sys/async.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/dditypes.h>
51 #include <sys/prom_debug.h>
52 #include <sys/prom_plat.h>
53 #include <sys/cpu_module.h>
54 #include <sys/sysmacros.h>
55 #include <sys/intreg.h>
56 #include <sys/clock.h>
57 #include <sys/platform_module.h>
58 #include <sys/machtrap.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/memlist.h>
62 #include <sys/bootconf.h>
63 #include <sys/ivintr.h>
64 #include <sys/atomic.h>
65 #include <sys/taskq.h>
66 #include <sys/note.h>
67 #include <sys/ndifm.h>
68 #include <sys/ddifm.h>
69 #include <sys/fm/protocol.h>
70 #include <sys/fm/util.h>
71 #include <sys/fm/cpu/UltraSPARC-III.h>
72 #include <sys/fpras_impl.h>
73 #include <sys/dtrace.h>
74 #include <sys/watchpoint.h>
75 #include <sys/plat_ecc_unum.h>
76 #include <sys/cyclic.h>
77 #include <sys/errorq.h>
78 #include <sys/errclassify.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int clear_ecc(struct async_flt *ecc);
120 #if defined(CPU_IMP_ECACHE_ASSOC)
121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
122 #endif
123 static int cpu_ecache_set_size(struct cpu *cp);
124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk_state(int cachesize,
128 				uint64_t subaddr, uint64_t tag);
129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
136 static void cpu_scrubphys(struct async_flt *aflt);
137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
138     int *, int *);
139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
140 static void cpu_ereport_init(struct async_flt *aflt);
141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144     ch_cpu_logout_t *clop);
145 static int cpu_ce_delayed_ec_logout(uint64_t);
146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
147 
148 #ifdef	CHEETAHPLUS_ERRATUM_25
149 static int mondo_recover_proc(uint16_t, int);
150 static void cheetah_nudge_init(void);
151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
152     cyc_time_t *when);
153 static void cheetah_nudge_buddy(void);
154 #endif	/* CHEETAHPLUS_ERRATUM_25 */
155 
156 #if defined(CPU_IMP_L1_CACHE_PARITY)
157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
160     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
167 #endif	/* CPU_IMP_L1_CACHE_PARITY */
168 
169 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
170     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
171     int *segsp, int *banksp, int *mcidp);
172 
173 /*
174  * This table is used to determine which bit(s) is(are) bad when an ECC
175  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
176  * of this array have the following semantics:
177  *
178  *      00-127  The number of the bad bit, when only one bit is bad.
179  *      128     ECC bit C0 is bad.
180  *      129     ECC bit C1 is bad.
181  *      130     ECC bit C2 is bad.
182  *      131     ECC bit C3 is bad.
183  *      132     ECC bit C4 is bad.
184  *      133     ECC bit C5 is bad.
185  *      134     ECC bit C6 is bad.
186  *      135     ECC bit C7 is bad.
187  *      136     ECC bit C8 is bad.
188  *	137-143 reserved for Mtag Data and ECC.
189  *      144(M2) Two bits are bad within a nibble.
190  *      145(M3) Three bits are bad within a nibble.
191  *      146(M3) Four bits are bad within a nibble.
192  *      147(M)  Multiple bits (5 or more) are bad.
193  *      148     NO bits are bad.
194  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
195  */
196 
197 #define	C0	128
198 #define	C1	129
199 #define	C2	130
200 #define	C3	131
201 #define	C4	132
202 #define	C5	133
203 #define	C6	134
204 #define	C7	135
205 #define	C8	136
206 #define	MT0	137	/* Mtag Data bit 0 */
207 #define	MT1	138
208 #define	MT2	139
209 #define	MTC0	140	/* Mtag Check bit 0 */
210 #define	MTC1	141
211 #define	MTC2	142
212 #define	MTC3	143
213 #define	M2	144
214 #define	M3	145
215 #define	M4	146
216 #define	M	147
217 #define	NA	148
218 #if defined(JALAPENO) || defined(SERRANO)
219 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
220 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
221 #define	SLAST	S003MEM	/* last special syndrome */
222 #else /* JALAPENO || SERRANO */
223 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
224 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
225 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
226 #define	SLAST	S11C	/* last special syndrome */
227 #endif /* JALAPENO || SERRANO */
228 #if defined(JALAPENO) || defined(SERRANO)
229 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
230 #define	BPAR15	167
231 #endif	/* JALAPENO || SERRANO */
232 
233 static uint8_t ecc_syndrome_tab[] =
234 {
235 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
236 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
237 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
238 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
239 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
240 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
241 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
242 #if defined(JALAPENO) || defined(SERRANO)
243 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
244 #else	/* JALAPENO || SERRANO */
245 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246 #endif	/* JALAPENO || SERRANO */
247 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
248 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
249 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
250 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
251 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
252 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
253 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
254 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
255 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
256 #if defined(JALAPENO) || defined(SERRANO)
257 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
258 #else	/* JALAPENO || SERRANO */
259 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
260 #endif	/* JALAPENO || SERRANO */
261 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
262 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
263 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
264 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
265 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
266 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
267 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
268 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
269 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
270 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
271 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
272 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
273 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
274 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
275 };
276 
277 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
278 
279 #if !(defined(JALAPENO) || defined(SERRANO))
280 /*
281  * This table is used to determine which bit(s) is(are) bad when a Mtag
282  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
283  * of this array have the following semantics:
284  *
285  *      -1	Invalid mtag syndrome.
286  *      137     Mtag Data 0 is bad.
287  *      138     Mtag Data 1 is bad.
288  *      139     Mtag Data 2 is bad.
289  *      140     Mtag ECC 0 is bad.
290  *      141     Mtag ECC 1 is bad.
291  *      142     Mtag ECC 2 is bad.
292  *      143     Mtag ECC 3 is bad.
293  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
294  */
295 short mtag_syndrome_tab[] =
296 {
297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
298 };
299 
300 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
301 
302 #else /* !(JALAPENO || SERRANO) */
303 
304 #define	BSYND_TBL_SIZE	16
305 
306 #endif /* !(JALAPENO || SERRANO) */
307 
308 /*
309  * CE initial classification and subsequent action lookup table
310  */
311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
312 static int ce_disp_inited;
313 
314 /*
315  * Set to disable leaky and partner check for memory correctables
316  */
317 int ce_xdiag_off;
318 
319 /*
320  * The following are not incremented atomically so are indicative only
321  */
322 static int ce_xdiag_drops;
323 static int ce_xdiag_lkydrops;
324 static int ce_xdiag_ptnrdrops;
325 static int ce_xdiag_bad;
326 
327 /*
328  * CE leaky check callback structure
329  */
330 typedef struct {
331 	struct async_flt *lkycb_aflt;
332 	errorq_t *lkycb_eqp;
333 	errorq_elem_t *lkycb_eqep;
334 } ce_lkychk_cb_t;
335 
336 /*
337  * defines for various ecache_flush_flag's
338  */
339 #define	ECACHE_FLUSH_LINE	1
340 #define	ECACHE_FLUSH_ALL	2
341 
342 /*
343  * STICK sync
344  */
345 #define	STICK_ITERATION 10
346 #define	MAX_TSKEW	1
347 #define	EV_A_START	0
348 #define	EV_A_END	1
349 #define	EV_B_START	2
350 #define	EV_B_END	3
351 #define	EVENTS		4
352 
353 static int64_t stick_iter = STICK_ITERATION;
354 static int64_t stick_tsk = MAX_TSKEW;
355 
356 typedef enum {
357 	EVENT_NULL = 0,
358 	SLAVE_START,
359 	SLAVE_CONT,
360 	MASTER_START
361 } event_cmd_t;
362 
363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
364 static int64_t timestamp[EVENTS];
365 static volatile int slave_done;
366 
367 #ifdef DEBUG
368 #define	DSYNC_ATTEMPTS 64
369 typedef struct {
370 	int64_t	skew_val[DSYNC_ATTEMPTS];
371 } ss_t;
372 
373 ss_t stick_sync_stats[NCPU];
374 #endif /* DEBUG */
375 
376 /*
377  * Maximum number of contexts for Cheetah.
378  */
379 #define	MAX_NCTXS	(1 << 13)
380 
381 /* Will be set !NULL for Cheetah+ and derivatives. */
382 uchar_t *ctx_pgsz_array = NULL;
383 #if defined(CPU_IMP_DUAL_PAGESIZE)
384 static uchar_t ctx_pgsz_arr[MAX_NCTXS];
385 uint_t disable_dual_pgsz = 0;
386 #endif	/* CPU_IMP_DUAL_PAGESIZE */
387 
388 /*
389  * Save the cache bootup state for use when internal
390  * caches are to be re-enabled after an error occurs.
391  */
392 uint64_t cache_boot_state;
393 
394 /*
395  * PA[22:0] represent Displacement in Safari configuration space.
396  */
397 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
398 
399 bus_config_eclk_t bus_config_eclk[] = {
400 #if defined(JALAPENO) || defined(SERRANO)
401 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
402 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
403 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
404 #else /* JALAPENO || SERRANO */
405 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
406 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
407 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
408 #endif /* JALAPENO || SERRANO */
409 	{0, 0}
410 };
411 
412 /*
413  * Interval for deferred CEEN reenable
414  */
415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
416 
417 /*
418  * set in /etc/system to control logging of user BERR/TO's
419  */
420 int cpu_berr_to_verbose = 0;
421 
422 /*
423  * set to 0 in /etc/system to defer CEEN reenable for all CEs
424  */
425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
427 
428 /*
429  * Set of all offline cpus
430  */
431 cpuset_t cpu_offline_set;
432 
433 static void cpu_delayed_check_ce_errors(void *);
434 static void cpu_check_ce_errors(void *);
435 void cpu_error_ecache_flush(ch_async_flt_t *);
436 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
437 static void cpu_log_and_clear_ce(ch_async_flt_t *);
438 void cpu_ce_detected(ch_cpu_errors_t *, int);
439 
440 /*
441  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
442  * memory refresh interval of current DIMMs (64ms).  After initial fix that
443  * gives at least one full refresh cycle in which the cell can leak
444  * (whereafter further refreshes simply reinforce any incorrect bit value).
445  */
446 clock_t cpu_ce_lkychk_timeout_usec = 128000;
447 
448 /*
449  * CE partner check partner caching period in seconds
450  */
451 int cpu_ce_ptnr_cachetime_sec = 60;
452 
453 /*
454  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
455  */
456 #define	CH_SET_TRAP(ttentry, ttlabel)			\
457 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
458 		flush_instr_mem((caddr_t)&ttentry, 32);
459 
460 static int min_ecache_size;
461 static uint_t priv_hcl_1;
462 static uint_t priv_hcl_2;
463 static uint_t priv_hcl_4;
464 static uint_t priv_hcl_8;
465 
466 void
467 cpu_setup(void)
468 {
469 	extern int at_flags;
470 	extern int disable_delay_tlb_flush, delay_tlb_flush;
471 	extern int cpc_has_overflow_intr;
472 	extern int disable_text_largepages;
473 	extern int use_text_pgsz4m;
474 
475 	/*
476 	 * Setup chip-specific trap handlers.
477 	 */
478 	cpu_init_trap();
479 
480 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
481 
482 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
483 
484 	/*
485 	 * save the cache bootup state.
486 	 */
487 	cache_boot_state = get_dcu() & DCU_CACHE;
488 
489 	/*
490 	 * Use the maximum number of contexts available for Cheetah
491 	 * unless it has been tuned for debugging.
492 	 * We are checking against 0 here since this value can be patched
493 	 * while booting.  It can not be patched via /etc/system since it
494 	 * will be patched too late and thus cause the system to panic.
495 	 */
496 	if (nctxs == 0)
497 		nctxs = MAX_NCTXS;
498 
499 	/*
500 	 * Due to the number of entries in the fully-associative tlb
501 	 * this may have to be tuned lower than in spitfire.
502 	 */
503 	pp_slots = MIN(8, MAXPP_SLOTS);
504 
505 	/*
506 	 * Block stores do not invalidate all pages of the d$, pagecopy
507 	 * et. al. need virtual translations with virtual coloring taken
508 	 * into consideration.  prefetch/ldd will pollute the d$ on the
509 	 * load side.
510 	 */
511 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
512 
513 	if (use_page_coloring) {
514 		do_pg_coloring = 1;
515 		if (use_virtual_coloring)
516 			do_virtual_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 	/*
561 	 * Use cheetah flush-all support
562 	 */
563 	if (!disable_delay_tlb_flush)
564 		delay_tlb_flush = 1;
565 
566 #if defined(CPU_IMP_DUAL_PAGESIZE)
567 	/*
568 	 * Use Cheetah+ and later dual page size support.
569 	 */
570 	if (!disable_dual_pgsz) {
571 		ctx_pgsz_array = ctx_pgsz_arr;
572 	}
573 #endif	/* CPU_IMP_DUAL_PAGESIZE */
574 
575 	/*
576 	 * Declare that this architecture/cpu combination does fpRAS.
577 	 */
578 	fpras_implemented = 1;
579 
580 	/*
581 	 * Enable 4M pages to be used for mapping user text by default.  Don't
582 	 * use large pages for initialized data segments since we may not know
583 	 * at exec() time what should be the preferred large page size for DTLB
584 	 * programming.
585 	 */
586 	use_text_pgsz4m = 1;
587 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
588 	    (1 << TTE32M) | (1 << TTE256M);
589 
590 	/*
591 	 * Setup CE lookup table
592 	 */
593 	CE_INITDISPTBL_POPULATE(ce_disp_table);
594 	ce_disp_inited = 1;
595 }
596 
597 /*
598  * Called by setcpudelay
599  */
600 void
601 cpu_init_tick_freq(void)
602 {
603 	/*
604 	 * For UltraSPARC III and beyond we want to use the
605 	 * system clock rate as the basis for low level timing,
606 	 * due to support of mixed speed CPUs and power managment.
607 	 */
608 	if (system_clock_freq == 0)
609 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
610 
611 	sys_tick_freq = system_clock_freq;
612 }
613 
614 #ifdef CHEETAHPLUS_ERRATUM_25
615 /*
616  * Tunables
617  */
618 int cheetah_bpe_off = 0;
619 int cheetah_sendmondo_recover = 1;
620 int cheetah_sendmondo_fullscan = 0;
621 int cheetah_sendmondo_recover_delay = 5;
622 
623 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
624 
625 /*
626  * Recovery Statistics
627  */
628 typedef struct cheetah_livelock_entry	{
629 	int cpuid;		/* fallen cpu */
630 	int buddy;		/* cpu that ran recovery */
631 	clock_t lbolt;		/* when recovery started */
632 	hrtime_t recovery_time;	/* time spent in recovery */
633 } cheetah_livelock_entry_t;
634 
635 #define	CHEETAH_LIVELOCK_NENTRY	32
636 
637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
638 int cheetah_livelock_entry_nxt;
639 
640 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
641 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
642 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
643 		cheetah_livelock_entry_nxt = 0;				\
644 	}								\
645 }
646 
647 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
648 
649 struct {
650 	hrtime_t hrt;		/* maximum recovery time */
651 	int recovery;		/* recovered */
652 	int full_claimed;	/* maximum pages claimed in full recovery */
653 	int proc_entry;		/* attempted to claim TSB */
654 	int proc_tsb_scan;	/* tsb scanned */
655 	int proc_tsb_partscan;	/* tsb partially scanned */
656 	int proc_tsb_fullscan;	/* whole tsb scanned */
657 	int proc_claimed;	/* maximum pages claimed in tsb scan */
658 	int proc_user;		/* user thread */
659 	int proc_kernel;	/* kernel thread */
660 	int proc_onflt;		/* bad stack */
661 	int proc_cpu;		/* null cpu */
662 	int proc_thread;	/* null thread */
663 	int proc_proc;		/* null proc */
664 	int proc_as;		/* null as */
665 	int proc_hat;		/* null hat */
666 	int proc_hat_inval;	/* hat contents don't make sense */
667 	int proc_hat_busy;	/* hat is changing TSBs */
668 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
669 	int proc_cnum_bad;	/* cnum out of range */
670 	int proc_cnum;		/* last cnum processed */
671 	tte_t proc_tte;		/* last tte processed */
672 } cheetah_livelock_stat;
673 
674 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
675 
676 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
677 	cheetah_livelock_stat.item = value
678 
679 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
680 	if (value > cheetah_livelock_stat.item)		\
681 		cheetah_livelock_stat.item = value;	\
682 }
683 
684 /*
685  * Attempt to recover a cpu by claiming every cache line as saved
686  * in the TSB that the non-responsive cpu is using. Since we can't
687  * grab any adaptive lock, this is at best an attempt to do so. Because
688  * we don't grab any locks, we must operate under the protection of
689  * on_fault().
690  *
691  * Return 1 if cpuid could be recovered, 0 if failed.
692  */
693 int
694 mondo_recover_proc(uint16_t cpuid, int bn)
695 {
696 	label_t ljb;
697 	cpu_t *cp;
698 	kthread_t *t;
699 	proc_t *p;
700 	struct as *as;
701 	struct hat *hat;
702 	short  cnum;
703 	struct tsb_info *tsbinfop;
704 	struct tsbe *tsbep;
705 	caddr_t tsbp;
706 	caddr_t end_tsbp;
707 	uint64_t paddr;
708 	uint64_t idsr;
709 	u_longlong_t pahi, palo;
710 	int pages_claimed = 0;
711 	tte_t tsbe_tte;
712 	int tried_kernel_tsb = 0;
713 
714 	CHEETAH_LIVELOCK_STAT(proc_entry);
715 
716 	if (on_fault(&ljb)) {
717 		CHEETAH_LIVELOCK_STAT(proc_onflt);
718 		goto badstruct;
719 	}
720 
721 	if ((cp = cpu[cpuid]) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_cpu);
723 		goto badstruct;
724 	}
725 
726 	if ((t = cp->cpu_thread) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_thread);
728 		goto badstruct;
729 	}
730 
731 	if ((p = ttoproc(t)) == NULL) {
732 		CHEETAH_LIVELOCK_STAT(proc_proc);
733 		goto badstruct;
734 	}
735 
736 	if ((as = p->p_as) == NULL) {
737 		CHEETAH_LIVELOCK_STAT(proc_as);
738 		goto badstruct;
739 	}
740 
741 	if ((hat = as->a_hat) == NULL) {
742 		CHEETAH_LIVELOCK_STAT(proc_hat);
743 		goto badstruct;
744 	}
745 
746 	if (hat != ksfmmup) {
747 		CHEETAH_LIVELOCK_STAT(proc_user);
748 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
749 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
750 			goto badstruct;
751 		}
752 		tsbinfop = hat->sfmmu_tsb;
753 		if (tsbinfop == NULL) {
754 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 			goto badstruct;
756 		}
757 		tsbp = tsbinfop->tsb_va;
758 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
759 	} else {
760 		CHEETAH_LIVELOCK_STAT(proc_kernel);
761 		tsbinfop = NULL;
762 		tsbp = ktsb_base;
763 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
764 	}
765 
766 	/* Verify as */
767 	if (hat->sfmmu_as != as) {
768 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
769 		goto badstruct;
770 	}
771 
772 	cnum = hat->sfmmu_cnum;
773 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
774 
775 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
776 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
777 		goto badstruct;
778 	}
779 
780 	do {
781 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
782 
783 		/*
784 		 * Skip TSBs being relocated.  This is important because
785 		 * we want to avoid the following deadlock scenario:
786 		 *
787 		 * 1) when we came in we set ourselves to "in recover" state.
788 		 * 2) when we try to touch TSB being relocated the mapping
789 		 *    will be in the suspended state so we'll spin waiting
790 		 *    for it to be unlocked.
791 		 * 3) when the CPU that holds the TSB mapping locked tries to
792 		 *    unlock it it will send a xtrap which will fail to xcall
793 		 *    us or the CPU we're trying to recover, and will in turn
794 		 *    enter the mondo code.
795 		 * 4) since we are still spinning on the locked mapping
796 		 *    no further progress will be made and the system will
797 		 *    inevitably hard hang.
798 		 *
799 		 * A TSB not being relocated can't begin being relocated
800 		 * while we're accessing it because we check
801 		 * sendmondo_in_recover before relocating TSBs.
802 		 */
803 		if (hat != ksfmmup &&
804 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
805 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
806 			goto next_tsbinfo;
807 		}
808 
809 		for (tsbep = (struct tsbe *)tsbp;
810 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
811 			tsbe_tte = tsbep->tte_data;
812 
813 			if (tsbe_tte.tte_val == 0) {
814 				/*
815 				 * Invalid tte
816 				 */
817 				continue;
818 			}
819 			if (tsbe_tte.tte_se) {
820 				/*
821 				 * Don't want device registers
822 				 */
823 				continue;
824 			}
825 			if (tsbe_tte.tte_cp == 0) {
826 				/*
827 				 * Must be cached in E$
828 				 */
829 				continue;
830 			}
831 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
832 			idsr = getidsr();
833 			if ((idsr & (IDSR_NACK_BIT(bn) |
834 			    IDSR_BUSY_BIT(bn))) == 0) {
835 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
836 				goto done;
837 			}
838 			pahi = tsbe_tte.tte_pahi;
839 			palo = tsbe_tte.tte_palo;
840 			paddr = (uint64_t)((pahi << 32) |
841 			    (palo << MMU_PAGESHIFT));
842 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
843 			    CH_ECACHE_SUBBLK_SIZE);
844 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
845 				shipit(cpuid, bn);
846 			}
847 			pages_claimed++;
848 		}
849 next_tsbinfo:
850 		if (tsbinfop != NULL)
851 			tsbinfop = tsbinfop->tsb_next;
852 		if (tsbinfop != NULL) {
853 			tsbp = tsbinfop->tsb_va;
854 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
855 		} else if (tsbp == ktsb_base) {
856 			tried_kernel_tsb = 1;
857 		} else if (!tried_kernel_tsb) {
858 			tsbp = ktsb_base;
859 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
860 			hat = ksfmmup;
861 			tsbinfop = NULL;
862 		}
863 	} while (tsbinfop != NULL ||
864 			((tsbp == ktsb_base) && !tried_kernel_tsb));
865 
866 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
867 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
868 	no_fault();
869 	idsr = getidsr();
870 	if ((idsr & (IDSR_NACK_BIT(bn) |
871 	    IDSR_BUSY_BIT(bn))) == 0) {
872 		return (1);
873 	} else {
874 		return (0);
875 	}
876 
877 done:
878 	no_fault();
879 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
880 	return (1);
881 
882 badstruct:
883 	no_fault();
884 	return (0);
885 }
886 
887 /*
888  * Attempt to claim ownership, temporarily, of every cache line that a
889  * non-responsive cpu might be using.  This might kick that cpu out of
890  * this state.
891  *
892  * The return value indicates to the caller if we have exhausted all recovery
893  * techniques. If 1 is returned, it is useless to call this function again
894  * even for a different target CPU.
895  */
896 int
897 mondo_recover(uint16_t cpuid, int bn)
898 {
899 	struct memseg *seg;
900 	uint64_t begin_pa, end_pa, cur_pa;
901 	hrtime_t begin_hrt, end_hrt;
902 	int retval = 0;
903 	int pages_claimed = 0;
904 	cheetah_livelock_entry_t *histp;
905 	uint64_t idsr;
906 
907 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
908 		/*
909 		 * Wait while recovery takes place
910 		 */
911 		while (sendmondo_in_recover) {
912 			drv_usecwait(1);
913 		}
914 		/*
915 		 * Assume we didn't claim the whole memory. If
916 		 * the target of this caller is not recovered,
917 		 * it will come back.
918 		 */
919 		return (retval);
920 	}
921 
922 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
923 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
924 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
925 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
926 
927 	begin_hrt = gethrtime_waitfree();
928 	/*
929 	 * First try to claim the lines in the TSB the target
930 	 * may have been using.
931 	 */
932 	if (mondo_recover_proc(cpuid, bn) == 1) {
933 		/*
934 		 * Didn't claim the whole memory
935 		 */
936 		goto done;
937 	}
938 
939 	/*
940 	 * We tried using the TSB. The target is still
941 	 * not recovered. Check if complete memory scan is
942 	 * enabled.
943 	 */
944 	if (cheetah_sendmondo_fullscan == 0) {
945 		/*
946 		 * Full memory scan is disabled.
947 		 */
948 		retval = 1;
949 		goto done;
950 	}
951 
952 	/*
953 	 * Try claiming the whole memory.
954 	 */
955 	for (seg = memsegs; seg; seg = seg->next) {
956 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
957 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
958 		for (cur_pa = begin_pa; cur_pa < end_pa;
959 		    cur_pa += MMU_PAGESIZE) {
960 			idsr = getidsr();
961 			if ((idsr & (IDSR_NACK_BIT(bn) |
962 			    IDSR_BUSY_BIT(bn))) == 0) {
963 				/*
964 				 * Didn't claim all memory
965 				 */
966 				goto done;
967 			}
968 			claimlines(cur_pa, MMU_PAGESIZE,
969 			    CH_ECACHE_SUBBLK_SIZE);
970 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
971 				shipit(cpuid, bn);
972 			}
973 			pages_claimed++;
974 		}
975 	}
976 
977 	/*
978 	 * We did all we could.
979 	 */
980 	retval = 1;
981 
982 done:
983 	/*
984 	 * Update statistics
985 	 */
986 	end_hrt = gethrtime_waitfree();
987 	CHEETAH_LIVELOCK_STAT(recovery);
988 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
989 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
990 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
991 	    (end_hrt -  begin_hrt));
992 
993 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
994 
995 	return (retval);
996 }
997 
998 /*
999  * This is called by the cyclic framework when this CPU becomes online
1000  */
1001 /*ARGSUSED*/
1002 static void
1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1004 {
1005 
1006 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1007 	hdlr->cyh_level = CY_LOW_LEVEL;
1008 	hdlr->cyh_arg = NULL;
1009 
1010 	/*
1011 	 * Stagger the start time
1012 	 */
1013 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1014 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1015 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1016 	}
1017 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1018 }
1019 
1020 /*
1021  * Create a low level cyclic to send a xtrap to the next cpu online.
1022  * However, there's no need to have this running on a uniprocessor system.
1023  */
1024 static void
1025 cheetah_nudge_init(void)
1026 {
1027 	cyc_omni_handler_t hdlr;
1028 
1029 	if (max_ncpus == 1) {
1030 		return;
1031 	}
1032 
1033 	hdlr.cyo_online = cheetah_nudge_onln;
1034 	hdlr.cyo_offline = NULL;
1035 	hdlr.cyo_arg = NULL;
1036 
1037 	mutex_enter(&cpu_lock);
1038 	(void) cyclic_add_omni(&hdlr);
1039 	mutex_exit(&cpu_lock);
1040 }
1041 
1042 /*
1043  * Cyclic handler to wake up buddy
1044  */
1045 void
1046 cheetah_nudge_buddy(void)
1047 {
1048 	/*
1049 	 * Disable kernel preemption to protect the cpu list
1050 	 */
1051 	kpreempt_disable();
1052 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1053 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1054 		    0, 0);
1055 	}
1056 	kpreempt_enable();
1057 }
1058 
1059 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1060 
1061 #ifdef SEND_MONDO_STATS
1062 uint32_t x_one_stimes[64];
1063 uint32_t x_one_ltimes[16];
1064 uint32_t x_set_stimes[64];
1065 uint32_t x_set_ltimes[16];
1066 uint32_t x_set_cpus[NCPU];
1067 uint32_t x_nack_stimes[64];
1068 #endif
1069 
1070 /*
1071  * Note: A version of this function is used by the debugger via the KDI,
1072  * and must be kept in sync with this version.  Any changes made to this
1073  * function to support new chips or to accomodate errata must also be included
1074  * in the KDI-specific version.  See us3_kdi.c.
1075  */
1076 void
1077 send_one_mondo(int cpuid)
1078 {
1079 	int busy, nack;
1080 	uint64_t idsr, starttick, endtick, tick, lasttick;
1081 	uint64_t busymask;
1082 #ifdef	CHEETAHPLUS_ERRATUM_25
1083 	int recovered = 0;
1084 #endif
1085 
1086 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1087 	starttick = lasttick = gettick();
1088 	shipit(cpuid, 0);
1089 	endtick = starttick + xc_tick_limit;
1090 	busy = nack = 0;
1091 #if defined(JALAPENO) || defined(SERRANO)
1092 	/*
1093 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1094 	 * will be used for dispatching interrupt. For now, assume
1095 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1096 	 * issues with respect to BUSY/NACK pair usage.
1097 	 */
1098 	busymask  = IDSR_BUSY_BIT(cpuid);
1099 #else /* JALAPENO || SERRANO */
1100 	busymask = IDSR_BUSY;
1101 #endif /* JALAPENO || SERRANO */
1102 	for (;;) {
1103 		idsr = getidsr();
1104 		if (idsr == 0)
1105 			break;
1106 
1107 		tick = gettick();
1108 		/*
1109 		 * If there is a big jump between the current tick
1110 		 * count and lasttick, we have probably hit a break
1111 		 * point.  Adjust endtick accordingly to avoid panic.
1112 		 */
1113 		if (tick > (lasttick + xc_tick_jump_limit))
1114 			endtick += (tick - lasttick);
1115 		lasttick = tick;
1116 		if (tick > endtick) {
1117 			if (panic_quiesce)
1118 				return;
1119 #ifdef	CHEETAHPLUS_ERRATUM_25
1120 			if (cheetah_sendmondo_recover && recovered == 0) {
1121 				if (mondo_recover(cpuid, 0)) {
1122 					/*
1123 					 * We claimed the whole memory or
1124 					 * full scan is disabled.
1125 					 */
1126 					recovered++;
1127 				}
1128 				tick = gettick();
1129 				endtick = tick + xc_tick_limit;
1130 				lasttick = tick;
1131 				/*
1132 				 * Recheck idsr
1133 				 */
1134 				continue;
1135 			} else
1136 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1137 			{
1138 				cmn_err(CE_PANIC, "send mondo timeout "
1139 				    "(target 0x%x) [%d NACK %d BUSY]",
1140 				    cpuid, nack, busy);
1141 			}
1142 		}
1143 
1144 		if (idsr & busymask) {
1145 			busy++;
1146 			continue;
1147 		}
1148 		drv_usecwait(1);
1149 		shipit(cpuid, 0);
1150 		nack++;
1151 		busy = 0;
1152 	}
1153 #ifdef SEND_MONDO_STATS
1154 	{
1155 		int n = gettick() - starttick;
1156 		if (n < 8192)
1157 			x_one_stimes[n >> 7]++;
1158 		else
1159 			x_one_ltimes[(n >> 13) & 0xf]++;
1160 	}
1161 #endif
1162 }
1163 
1164 void
1165 syncfpu(void)
1166 {
1167 }
1168 
1169 /*
1170  * Return processor specific async error structure
1171  * size used.
1172  */
1173 int
1174 cpu_aflt_size(void)
1175 {
1176 	return (sizeof (ch_async_flt_t));
1177 }
1178 
1179 /*
1180  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1181  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1182  * flush the error that caused the UCU/UCC, then again here at the end to
1183  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1184  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1185  * another Fast ECC trap.
1186  *
1187  * Cheetah+ also handles: TSCE: No additional processing required.
1188  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1189  *
1190  * Note that the p_clo_flags input is only valid in cases where the
1191  * cpu_private struct is not yet initialized (since that is the only
1192  * time that information cannot be obtained from the logout struct.)
1193  */
1194 /*ARGSUSED*/
1195 void
1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1197 {
1198 	ch_cpu_logout_t *clop;
1199 	uint64_t ceen;
1200 
1201 	/*
1202 	 * Get the CPU log out info. If we can't find our CPU private
1203 	 * pointer, then we will have to make due without any detailed
1204 	 * logout information.
1205 	 */
1206 	if (CPU_PRIVATE(CPU) == NULL) {
1207 		clop = NULL;
1208 		ceen = p_clo_flags & EN_REG_CEEN;
1209 	} else {
1210 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1211 		ceen = clop->clo_flags & EN_REG_CEEN;
1212 	}
1213 
1214 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1215 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, clop);
1216 }
1217 
1218 /*
1219  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1220  * ECC at TL>0.  Need to supply either a error register pointer or a
1221  * cpu logout structure pointer.
1222  */
1223 static void
1224 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1225     ch_cpu_logout_t *clop)
1226 {
1227 	struct async_flt *aflt;
1228 	ch_async_flt_t ch_flt;
1229 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1230 	char pr_reason[MAX_REASON_STRING];
1231 	ch_cpu_errors_t cpu_error_regs;
1232 
1233 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1234 	/*
1235 	 * If no cpu logout data, then we will have to make due without
1236 	 * any detailed logout information.
1237 	 */
1238 	if (clop == NULL) {
1239 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1240 		get_cpu_error_state(&cpu_error_regs);
1241 		set_cpu_error_state(&cpu_error_regs);
1242 		t_afar = cpu_error_regs.afar;
1243 		t_afsr = cpu_error_regs.afsr;
1244 		t_afsr_ext = cpu_error_regs.afsr_ext;
1245 #if defined(SERRANO)
1246 		ch_flt.afar2 = cpu_error_regs.afar2;
1247 #endif	/* SERRANO */
1248 	} else {
1249 		t_afar = clop->clo_data.chd_afar;
1250 		t_afsr = clop->clo_data.chd_afsr;
1251 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1252 #if defined(SERRANO)
1253 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1254 #endif	/* SERRANO */
1255 	}
1256 
1257 	/*
1258 	 * In order to simplify code, we maintain this afsr_errs
1259 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1260 	 * sticky bits.
1261 	 */
1262 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1263 	    (t_afsr & C_AFSR_ALL_ERRS);
1264 	pr_reason[0] = '\0';
1265 
1266 	/* Setup the async fault structure */
1267 	aflt = (struct async_flt *)&ch_flt;
1268 	aflt->flt_id = gethrtime_waitfree();
1269 	ch_flt.afsr_ext = t_afsr_ext;
1270 	ch_flt.afsr_errs = t_afsr_errs;
1271 	aflt->flt_stat = t_afsr;
1272 	aflt->flt_addr = t_afar;
1273 	aflt->flt_bus_id = getprocessorid();
1274 	aflt->flt_inst = CPU->cpu_id;
1275 	aflt->flt_pc = tpc;
1276 	aflt->flt_prot = AFLT_PROT_NONE;
1277 	aflt->flt_class = CPU_FAULT;
1278 	aflt->flt_priv = priv;
1279 	aflt->flt_tl = tl;
1280 	aflt->flt_status = ECC_F_TRAP;
1281 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1282 
1283 	/*
1284 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1285 	 * cmn_err messages out to the console.  The situation is a UCU (in
1286 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1287 	 * The messages for the UCU and WDU are enqueued and then pulled off
1288 	 * the async queue via softint and syslogd starts to process them
1289 	 * but doesn't get them to the console.  The UE causes a panic, but
1290 	 * since the UCU/WDU messages are already in transit, those aren't
1291 	 * on the async queue.  The hack is to check if we have a matching
1292 	 * WDU event for the UCU, and if it matches, we're more than likely
1293 	 * going to panic with a UE, unless we're under protection.  So, we
1294 	 * check to see if we got a matching WDU event and if we're under
1295 	 * protection.
1296 	 *
1297 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1298 	 * looks like this:
1299 	 *    UCU->WDU->UE
1300 	 * For Panther, it could look like either of these:
1301 	 *    UCU---->WDU->L3_WDU->UE
1302 	 *    L3_UCU->WDU->L3_WDU->UE
1303 	 */
1304 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1305 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1306 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1307 		get_cpu_error_state(&cpu_error_regs);
1308 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1309 		    (cpu_error_regs.afar == t_afar));
1310 		aflt->flt_panic |= ((clop == NULL) &&
1311 		    (t_afsr_errs & C_AFSR_WDU));
1312 	}
1313 
1314 	/*
1315 	 * Queue events on the async event queue, one event per error bit.
1316 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1317 	 * queue an event to complain.
1318 	 */
1319 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1320 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1321 		ch_flt.flt_type = CPU_INV_AFSR;
1322 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1323 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1324 		    aflt->flt_panic);
1325 	}
1326 
1327 	/*
1328 	 * Zero out + invalidate CPU logout.
1329 	 */
1330 	if (clop) {
1331 		bzero(clop, sizeof (ch_cpu_logout_t));
1332 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1333 	}
1334 
1335 	/*
1336 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1337 	 * or disrupting errors have happened.  We do this because if a
1338 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1339 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1340 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1341 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1342 	 * deferred or disrupting error happening between checking the AFSR and
1343 	 * enabling NCEEN/CEEN.
1344 	 *
1345 	 * Note: CEEN reenabled only if it was on when trap taken.
1346 	 */
1347 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1348 	if (clear_errors(&ch_flt)) {
1349 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1350 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1351 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1352 		    NULL);
1353 	}
1354 
1355 	/*
1356 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1357 	 * be logged as part of the panic flow.
1358 	 */
1359 	if (aflt->flt_panic)
1360 		fm_panic("%sError(s)", pr_reason);
1361 
1362 	/*
1363 	 * Flushing the Ecache here gets the part of the trap handler that
1364 	 * is run at TL=1 out of the Ecache.
1365 	 */
1366 	cpu_flush_ecache();
1367 }
1368 
1369 /*
1370  * This is called via sys_trap from pil15_interrupt code if the
1371  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1372  * various ch_err_tl1_data structures for valid entries based on the bit
1373  * settings in the ch_err_tl1_flags entry of the structure.
1374  */
1375 /*ARGSUSED*/
1376 void
1377 cpu_tl1_error(struct regs *rp, int panic)
1378 {
1379 	ch_err_tl1_data_t *cl1p, cl1;
1380 	int i, ncl1ps;
1381 	uint64_t me_flags;
1382 	uint64_t ceen;
1383 
1384 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1385 		cl1p = &ch_err_tl1_data;
1386 		ncl1ps = 1;
1387 	} else if (CPU_PRIVATE(CPU) != NULL) {
1388 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1389 		ncl1ps = CH_ERR_TL1_TLMAX;
1390 	} else {
1391 		ncl1ps = 0;
1392 	}
1393 
1394 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1395 		if (cl1p->ch_err_tl1_flags == 0)
1396 			continue;
1397 
1398 		/*
1399 		 * Grab a copy of the logout data and invalidate
1400 		 * the logout area.
1401 		 */
1402 		cl1 = *cl1p;
1403 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1404 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1405 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1406 
1407 		/*
1408 		 * Log "first error" in ch_err_tl1_data.
1409 		 */
1410 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1411 			ceen = get_error_enable() & EN_REG_CEEN;
1412 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1413 			    1, ceen, &cl1.ch_err_tl1_logout);
1414 		}
1415 #if defined(CPU_IMP_L1_CACHE_PARITY)
1416 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1417 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1418 			    (caddr_t)cl1.ch_err_tl1_tpc);
1419 		}
1420 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1421 
1422 		/*
1423 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1424 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1425 		 * if the structure is busy, we just do the cache flushing
1426 		 * we have to do and then do the retry.  So the AFSR/AFAR
1427 		 * at this point *should* have some relevant info.  If there
1428 		 * are no valid errors in the AFSR, we'll assume they've
1429 		 * already been picked up and logged.  For I$/D$ parity,
1430 		 * we just log an event with an "Unknown" (NULL) TPC.
1431 		 */
1432 		if (me_flags & CH_ERR_FECC) {
1433 			ch_cpu_errors_t cpu_error_regs;
1434 			uint64_t t_afsr_errs;
1435 
1436 			/*
1437 			 * Get the error registers and see if there's
1438 			 * a pending error.  If not, don't bother
1439 			 * generating an "Invalid AFSR" error event.
1440 			 */
1441 			get_cpu_error_state(&cpu_error_regs);
1442 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1443 			    C_AFSR_EXT_ALL_ERRS) |
1444 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1445 			if (t_afsr_errs != 0) {
1446 				ceen = get_error_enable() & EN_REG_CEEN;
1447 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1448 				    1, ceen, NULL);
1449 			}
1450 		}
1451 #if defined(CPU_IMP_L1_CACHE_PARITY)
1452 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1453 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1454 		}
1455 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1456 	}
1457 }
1458 
1459 /*
1460  * Called from Fast ECC TL>0 handler in case of fatal error.
1461  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1462  * but if we don't, we'll panic with something reasonable.
1463  */
1464 /*ARGSUSED*/
1465 void
1466 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1467 {
1468 	cpu_tl1_error(rp, 1);
1469 	/*
1470 	 * Should never return, but just in case.
1471 	 */
1472 	fm_panic("Unsurvivable ECC Error at TL>0");
1473 }
1474 
1475 /*
1476  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1477  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1478  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1479  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1480  *
1481  * Cheetah+ also handles (No additional processing required):
1482  *    DUE, DTO, DBERR	(NCEEN controlled)
1483  *    THCE		(CEEN and ET_ECC_en controlled)
1484  *    TUE		(ET_ECC_en controlled)
1485  *
1486  * Panther further adds:
1487  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1488  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1489  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1490  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1491  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1492  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1493  *
1494  * Note that the p_clo_flags input is only valid in cases where the
1495  * cpu_private struct is not yet initialized (since that is the only
1496  * time that information cannot be obtained from the logout struct.)
1497  */
1498 /*ARGSUSED*/
1499 void
1500 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1501 {
1502 	struct async_flt *aflt;
1503 	ch_async_flt_t ch_flt;
1504 	char pr_reason[MAX_REASON_STRING];
1505 	ch_cpu_logout_t *clop;
1506 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1507 	ch_cpu_errors_t cpu_error_regs;
1508 
1509 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1510 	/*
1511 	 * Get the CPU log out info. If we can't find our CPU private
1512 	 * pointer, then we will have to make due without any detailed
1513 	 * logout information.
1514 	 */
1515 	if (CPU_PRIVATE(CPU) == NULL) {
1516 		clop = NULL;
1517 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1518 		get_cpu_error_state(&cpu_error_regs);
1519 		set_cpu_error_state(&cpu_error_regs);
1520 		t_afar = cpu_error_regs.afar;
1521 		t_afsr = cpu_error_regs.afsr;
1522 		t_afsr_ext = cpu_error_regs.afsr_ext;
1523 #if defined(SERRANO)
1524 		ch_flt.afar2 = cpu_error_regs.afar2;
1525 #endif	/* SERRANO */
1526 	} else {
1527 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1528 		t_afar = clop->clo_data.chd_afar;
1529 		t_afsr = clop->clo_data.chd_afsr;
1530 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1531 #if defined(SERRANO)
1532 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1533 #endif	/* SERRANO */
1534 	}
1535 
1536 	/*
1537 	 * In order to simplify code, we maintain this afsr_errs
1538 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1539 	 * sticky bits.
1540 	 */
1541 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1542 	    (t_afsr & C_AFSR_ALL_ERRS);
1543 
1544 	pr_reason[0] = '\0';
1545 	/* Setup the async fault structure */
1546 	aflt = (struct async_flt *)&ch_flt;
1547 	ch_flt.afsr_ext = t_afsr_ext;
1548 	ch_flt.afsr_errs = t_afsr_errs;
1549 	aflt->flt_stat = t_afsr;
1550 	aflt->flt_addr = t_afar;
1551 	aflt->flt_pc = (caddr_t)rp->r_pc;
1552 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1553 	aflt->flt_tl = 0;
1554 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1555 
1556 	/*
1557 	 * If this trap is a result of one of the errors not masked
1558 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1559 	 * indicate that a timeout is to be set later.
1560 	 */
1561 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1562 	    !aflt->flt_panic)
1563 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1564 	else
1565 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1566 
1567 	/*
1568 	 * log the CE and clean up
1569 	 */
1570 	cpu_log_and_clear_ce(&ch_flt);
1571 
1572 	/*
1573 	 * We re-enable CEEN (if required) and check if any disrupting errors
1574 	 * have happened.  We do this because if a disrupting error had occurred
1575 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1576 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1577 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1578 	 * of a error happening between checking the AFSR and enabling CEEN.
1579 	 */
1580 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1581 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1582 	if (clear_errors(&ch_flt)) {
1583 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1584 		    NULL);
1585 	}
1586 
1587 	/*
1588 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1589 	 * be logged as part of the panic flow.
1590 	 */
1591 	if (aflt->flt_panic)
1592 		fm_panic("%sError(s)", pr_reason);
1593 }
1594 
1595 /*
1596  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1597  * L3_EDU:BLD, TO, and BERR events.
1598  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1599  *
1600  * Cheetah+: No additional errors handled.
1601  *
1602  * Note that the p_clo_flags input is only valid in cases where the
1603  * cpu_private struct is not yet initialized (since that is the only
1604  * time that information cannot be obtained from the logout struct.)
1605  */
1606 /*ARGSUSED*/
1607 void
1608 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1609 {
1610 	ushort_t ttype, tl;
1611 	ch_async_flt_t ch_flt;
1612 	struct async_flt *aflt;
1613 	int trampolined = 0;
1614 	char pr_reason[MAX_REASON_STRING];
1615 	ch_cpu_logout_t *clop;
1616 	uint64_t ceen, clo_flags;
1617 	uint64_t log_afsr;
1618 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1619 	ch_cpu_errors_t cpu_error_regs;
1620 	int expected = DDI_FM_ERR_UNEXPECTED;
1621 	ddi_acc_hdl_t *hp;
1622 
1623 	/*
1624 	 * We need to look at p_flag to determine if the thread detected an
1625 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1626 	 * because we just need a consistent snapshot and we know that everyone
1627 	 * else will store a consistent set of bits while holding p_lock.  We
1628 	 * don't have to worry about a race because SDOCORE is set once prior
1629 	 * to doing i/o from the process's address space and is never cleared.
1630 	 */
1631 	uint_t pflag = ttoproc(curthread)->p_flag;
1632 
1633 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1634 	/*
1635 	 * Get the CPU log out info. If we can't find our CPU private
1636 	 * pointer then we will have to make due without any detailed
1637 	 * logout information.
1638 	 */
1639 	if (CPU_PRIVATE(CPU) == NULL) {
1640 		clop = NULL;
1641 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1642 		get_cpu_error_state(&cpu_error_regs);
1643 		set_cpu_error_state(&cpu_error_regs);
1644 		t_afar = cpu_error_regs.afar;
1645 		t_afsr = cpu_error_regs.afsr;
1646 		t_afsr_ext = cpu_error_regs.afsr_ext;
1647 #if defined(SERRANO)
1648 		ch_flt.afar2 = cpu_error_regs.afar2;
1649 #endif	/* SERRANO */
1650 		clo_flags = p_clo_flags;
1651 	} else {
1652 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1653 		t_afar = clop->clo_data.chd_afar;
1654 		t_afsr = clop->clo_data.chd_afsr;
1655 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1656 #if defined(SERRANO)
1657 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1658 #endif	/* SERRANO */
1659 		clo_flags = clop->clo_flags;
1660 	}
1661 
1662 	/*
1663 	 * In order to simplify code, we maintain this afsr_errs
1664 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1665 	 * sticky bits.
1666 	 */
1667 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1668 	    (t_afsr & C_AFSR_ALL_ERRS);
1669 	pr_reason[0] = '\0';
1670 
1671 	/*
1672 	 * Grab information encoded into our clo_flags field.
1673 	 */
1674 	ceen = clo_flags & EN_REG_CEEN;
1675 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1676 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1677 
1678 	/*
1679 	 * handle the specific error
1680 	 */
1681 	aflt = (struct async_flt *)&ch_flt;
1682 	aflt->flt_id = gethrtime_waitfree();
1683 	aflt->flt_bus_id = getprocessorid();
1684 	aflt->flt_inst = CPU->cpu_id;
1685 	ch_flt.afsr_ext = t_afsr_ext;
1686 	ch_flt.afsr_errs = t_afsr_errs;
1687 	aflt->flt_stat = t_afsr;
1688 	aflt->flt_addr = t_afar;
1689 	aflt->flt_pc = (caddr_t)rp->r_pc;
1690 	aflt->flt_prot = AFLT_PROT_NONE;
1691 	aflt->flt_class = CPU_FAULT;
1692 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1693 	aflt->flt_tl = (uchar_t)tl;
1694 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1695 	    C_AFSR_PANIC(t_afsr_errs));
1696 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1697 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1698 
1699 	/*
1700 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1701 	 * see if we were executing in the kernel under on_trap() or t_lofault
1702 	 * protection.  If so, modify the saved registers so that we return
1703 	 * from the trap to the appropriate trampoline routine.
1704 	 */
1705 	if (aflt->flt_priv && tl == 0) {
1706 		if (curthread->t_ontrap != NULL) {
1707 			on_trap_data_t *otp = curthread->t_ontrap;
1708 
1709 			if (otp->ot_prot & OT_DATA_EC) {
1710 				aflt->flt_prot = AFLT_PROT_EC;
1711 				otp->ot_trap |= OT_DATA_EC;
1712 				rp->r_pc = otp->ot_trampoline;
1713 				rp->r_npc = rp->r_pc + 4;
1714 				trampolined = 1;
1715 			}
1716 
1717 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1718 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1719 				aflt->flt_prot = AFLT_PROT_ACCESS;
1720 				otp->ot_trap |= OT_DATA_ACCESS;
1721 				rp->r_pc = otp->ot_trampoline;
1722 				rp->r_npc = rp->r_pc + 4;
1723 				trampolined = 1;
1724 				/*
1725 				 * for peeks and caut_gets errors are expected
1726 				 */
1727 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1728 				if (!hp)
1729 					expected = DDI_FM_ERR_PEEK;
1730 				else if (hp->ah_acc.devacc_attr_access ==
1731 				    DDI_CAUTIOUS_ACC)
1732 					expected = DDI_FM_ERR_EXPECTED;
1733 			}
1734 
1735 		} else if (curthread->t_lofault) {
1736 			aflt->flt_prot = AFLT_PROT_COPY;
1737 			rp->r_g1 = EFAULT;
1738 			rp->r_pc = curthread->t_lofault;
1739 			rp->r_npc = rp->r_pc + 4;
1740 			trampolined = 1;
1741 		}
1742 	}
1743 
1744 	/*
1745 	 * If we're in user mode or we're doing a protected copy, we either
1746 	 * want the ASTON code below to send a signal to the user process
1747 	 * or we want to panic if aft_panic is set.
1748 	 *
1749 	 * If we're in privileged mode and we're not doing a copy, then we
1750 	 * need to check if we've trampolined.  If we haven't trampolined,
1751 	 * we should panic.
1752 	 */
1753 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1754 		if (t_afsr_errs &
1755 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1756 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1757 			aflt->flt_panic |= aft_panic;
1758 	} else if (!trampolined) {
1759 			aflt->flt_panic = 1;
1760 	}
1761 
1762 	/*
1763 	 * If we've trampolined due to a privileged TO or BERR, or if an
1764 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1765 	 * event for that TO or BERR.  Queue all other events (if any) besides
1766 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1767 	 * ignore the number of events queued.  If we haven't trampolined due
1768 	 * to a TO or BERR, just enqueue events normally.
1769 	 */
1770 	log_afsr = t_afsr_errs;
1771 	if (trampolined) {
1772 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1773 	} else if (!aflt->flt_priv) {
1774 		/*
1775 		 * User mode, suppress messages if
1776 		 * cpu_berr_to_verbose is not set.
1777 		 */
1778 		if (!cpu_berr_to_verbose)
1779 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1780 	}
1781 
1782 	/*
1783 	 * Log any errors that occurred
1784 	 */
1785 	if (((log_afsr &
1786 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1787 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1788 		(t_afsr_errs &
1789 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1790 		ch_flt.flt_type = CPU_INV_AFSR;
1791 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1792 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1793 		    aflt->flt_panic);
1794 	}
1795 
1796 	/*
1797 	 * Zero out + invalidate CPU logout.
1798 	 */
1799 	if (clop) {
1800 		bzero(clop, sizeof (ch_cpu_logout_t));
1801 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1802 	}
1803 
1804 #if defined(JALAPENO) || defined(SERRANO)
1805 	/*
1806 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1807 	 * IO errors that may have resulted in this trap.
1808 	 */
1809 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1810 		cpu_run_bus_error_handlers(aflt, expected);
1811 	}
1812 
1813 	/*
1814 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1815 	 * line from the Ecache.  We also need to query the bus nexus for
1816 	 * fatal errors.  Attempts to do diagnostic read on caches may
1817 	 * introduce more errors (especially when the module is bad).
1818 	 */
1819 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1820 		/*
1821 		 * Ask our bus nexus friends if they have any fatal errors.  If
1822 		 * so, they will log appropriate error messages.
1823 		 */
1824 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1825 			aflt->flt_panic = 1;
1826 
1827 		/*
1828 		 * We got a UE or RUE and are panicking, save the fault PA in
1829 		 * a known location so that the platform specific panic code
1830 		 * can check for copyback errors.
1831 		 */
1832 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1833 			panic_aflt = *aflt;
1834 		}
1835 	}
1836 
1837 	/*
1838 	 * Flush Ecache line or entire Ecache
1839 	 */
1840 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1841 		cpu_error_ecache_flush(&ch_flt);
1842 #else /* JALAPENO || SERRANO */
1843 	/*
1844 	 * UE/BERR/TO: Call our bus nexus friends to check for
1845 	 * IO errors that may have resulted in this trap.
1846 	 */
1847 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1848 		cpu_run_bus_error_handlers(aflt, expected);
1849 	}
1850 
1851 	/*
1852 	 * UE: If the UE is in memory, we need to flush the bad
1853 	 * line from the Ecache.  We also need to query the bus nexus for
1854 	 * fatal errors.  Attempts to do diagnostic read on caches may
1855 	 * introduce more errors (especially when the module is bad).
1856 	 */
1857 	if (t_afsr & C_AFSR_UE) {
1858 		/*
1859 		 * Ask our legacy bus nexus friends if they have any fatal
1860 		 * errors.  If so, they will log appropriate error messages.
1861 		 */
1862 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1863 			aflt->flt_panic = 1;
1864 
1865 		/*
1866 		 * We got a UE and are panicking, save the fault PA in a known
1867 		 * location so that the platform specific panic code can check
1868 		 * for copyback errors.
1869 		 */
1870 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1871 			panic_aflt = *aflt;
1872 		}
1873 	}
1874 
1875 	/*
1876 	 * Flush Ecache line or entire Ecache
1877 	 */
1878 	if (t_afsr_errs &
1879 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1880 		cpu_error_ecache_flush(&ch_flt);
1881 #endif /* JALAPENO || SERRANO */
1882 
1883 	/*
1884 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1885 	 * or disrupting errors have happened.  We do this because if a
1886 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1887 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1888 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1889 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1890 	 * deferred or disrupting error happening between checking the AFSR and
1891 	 * enabling NCEEN/CEEN.
1892 	 *
1893 	 * Note: CEEN reenabled only if it was on when trap taken.
1894 	 */
1895 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1896 	if (clear_errors(&ch_flt)) {
1897 		/*
1898 		 * Check for secondary errors, and avoid panicking if we
1899 		 * have them
1900 		 */
1901 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
1902 		    t_afar) == 0) {
1903 			aflt->flt_panic |= ((ch_flt.afsr_errs &
1904 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
1905 		}
1906 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1907 		    NULL);
1908 	}
1909 
1910 	/*
1911 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1912 	 * be logged as part of the panic flow.
1913 	 */
1914 	if (aflt->flt_panic)
1915 		fm_panic("%sError(s)", pr_reason);
1916 
1917 	/*
1918 	 * If we queued an error and we are going to return from the trap and
1919 	 * the error was in user mode or inside of a copy routine, set AST flag
1920 	 * so the queue will be drained before returning to user mode.  The
1921 	 * AST processing will also act on our failure policy.
1922 	 */
1923 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1924 		int pcb_flag = 0;
1925 
1926 		if (t_afsr_errs &
1927 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
1928 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1929 			pcb_flag |= ASYNC_HWERR;
1930 
1931 		if (t_afsr & C_AFSR_BERR)
1932 			pcb_flag |= ASYNC_BERR;
1933 
1934 		if (t_afsr & C_AFSR_TO)
1935 			pcb_flag |= ASYNC_BTO;
1936 
1937 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1938 		aston(curthread);
1939 	}
1940 }
1941 
1942 #if defined(CPU_IMP_L1_CACHE_PARITY)
1943 /*
1944  * Handling of data and instruction parity errors (traps 0x71, 0x72).
1945  *
1946  * For Panther, P$ data parity errors during floating point load hits
1947  * are also detected (reported as TT 0x71) and handled by this trap
1948  * handler.
1949  *
1950  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
1951  * is available.
1952  */
1953 /*ARGSUSED*/
1954 void
1955 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
1956 {
1957 	ch_async_flt_t ch_flt;
1958 	struct async_flt *aflt;
1959 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
1960 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
1961 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
1962 	char *error_class;
1963 
1964 	/*
1965 	 * Log the error.
1966 	 * For icache parity errors the fault address is the trap PC.
1967 	 * For dcache/pcache parity errors the instruction would have to
1968 	 * be decoded to determine the address and that isn't possible
1969 	 * at high PIL.
1970 	 */
1971 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1972 	aflt = (struct async_flt *)&ch_flt;
1973 	aflt->flt_id = gethrtime_waitfree();
1974 	aflt->flt_bus_id = getprocessorid();
1975 	aflt->flt_inst = CPU->cpu_id;
1976 	aflt->flt_pc = tpc;
1977 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
1978 	aflt->flt_prot = AFLT_PROT_NONE;
1979 	aflt->flt_class = CPU_FAULT;
1980 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
1981 	aflt->flt_tl = tl;
1982 	aflt->flt_panic = panic;
1983 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
1984 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
1985 
1986 	if (iparity) {
1987 		cpu_icache_parity_info(&ch_flt);
1988 		if (ch_flt.parity_data.ipe.cpl_off != -1)
1989 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
1990 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
1991 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
1992 		else
1993 			error_class = FM_EREPORT_CPU_USIII_IPE;
1994 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
1995 	} else {
1996 		cpu_dcache_parity_info(&ch_flt);
1997 		if (ch_flt.parity_data.dpe.cpl_off != -1)
1998 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
1999 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2000 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2001 		else
2002 			error_class = FM_EREPORT_CPU_USIII_DPE;
2003 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2004 		/*
2005 		 * For panther we also need to check the P$ for parity errors.
2006 		 */
2007 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2008 			cpu_pcache_parity_info(&ch_flt);
2009 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2010 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2011 				aflt->flt_payload =
2012 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2013 			}
2014 		}
2015 	}
2016 
2017 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2018 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2019 
2020 	if (iparity) {
2021 		/*
2022 		 * Invalidate entire I$.
2023 		 * This is required due to the use of diagnostic ASI
2024 		 * accesses that may result in a loss of I$ coherency.
2025 		 */
2026 		if (cache_boot_state & DCU_IC) {
2027 			flush_icache();
2028 		}
2029 		/*
2030 		 * According to section P.3.1 of the Panther PRM, we
2031 		 * need to do a little more for recovery on those
2032 		 * CPUs after encountering an I$ parity error.
2033 		 */
2034 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2035 			flush_ipb();
2036 			correct_dcache_parity(dcache_size,
2037 			    dcache_linesize);
2038 			flush_pcache();
2039 		}
2040 	} else {
2041 		/*
2042 		 * Since the valid bit is ignored when checking parity the
2043 		 * D$ data and tag must also be corrected.  Set D$ data bits
2044 		 * to zero and set utag to 0, 1, 2, 3.
2045 		 */
2046 		correct_dcache_parity(dcache_size, dcache_linesize);
2047 
2048 		/*
2049 		 * According to section P.3.3 of the Panther PRM, we
2050 		 * need to do a little more for recovery on those
2051 		 * CPUs after encountering a D$ or P$ parity error.
2052 		 *
2053 		 * As far as clearing P$ parity errors, it is enough to
2054 		 * simply invalidate all entries in the P$ since P$ parity
2055 		 * error traps are only generated for floating point load
2056 		 * hits.
2057 		 */
2058 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2059 			flush_icache();
2060 			flush_ipb();
2061 			flush_pcache();
2062 		}
2063 	}
2064 
2065 	/*
2066 	 * Invalidate entire D$ if it was enabled.
2067 	 * This is done to avoid stale data in the D$ which might
2068 	 * occur with the D$ disabled and the trap handler doing
2069 	 * stores affecting lines already in the D$.
2070 	 */
2071 	if (cache_boot_state & DCU_DC) {
2072 		flush_dcache();
2073 	}
2074 
2075 	/*
2076 	 * Restore caches to their bootup state.
2077 	 */
2078 	set_dcu(get_dcu() | cache_boot_state);
2079 
2080 	/*
2081 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2082 	 * be logged as part of the panic flow.
2083 	 */
2084 	if (aflt->flt_panic)
2085 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2086 
2087 	/*
2088 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2089 	 * the chance of getting an unrecoverable Fast ECC error.  This
2090 	 * flush will evict the part of the parity trap handler that is run
2091 	 * at TL>1.
2092 	 */
2093 	if (tl) {
2094 		cpu_flush_ecache();
2095 	}
2096 }
2097 
2098 /*
2099  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2100  * to indicate which portions of the captured data should be in the ereport.
2101  */
2102 void
2103 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2104 {
2105 	int way = ch_flt->parity_data.ipe.cpl_way;
2106 	int offset = ch_flt->parity_data.ipe.cpl_off;
2107 	int tag_index;
2108 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2109 
2110 
2111 	if ((offset != -1) || (way != -1)) {
2112 		/*
2113 		 * Parity error in I$ tag or data
2114 		 */
2115 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2116 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2117 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2118 			    PN_ICIDX_TO_WAY(tag_index);
2119 		else
2120 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2121 			    CH_ICIDX_TO_WAY(tag_index);
2122 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2123 		    IC_LOGFLAG_MAGIC;
2124 	} else {
2125 		/*
2126 		 * Parity error was not identified.
2127 		 * Log tags and data for all ways.
2128 		 */
2129 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2130 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2131 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2132 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2133 				    PN_ICIDX_TO_WAY(tag_index);
2134 			else
2135 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2136 				    CH_ICIDX_TO_WAY(tag_index);
2137 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2138 			    IC_LOGFLAG_MAGIC;
2139 		}
2140 	}
2141 }
2142 
2143 /*
2144  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2145  * to indicate which portions of the captured data should be in the ereport.
2146  */
2147 void
2148 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2149 {
2150 	int way = ch_flt->parity_data.dpe.cpl_way;
2151 	int offset = ch_flt->parity_data.dpe.cpl_off;
2152 	int tag_index;
2153 
2154 	if (offset != -1) {
2155 		/*
2156 		 * Parity error in D$ or P$ data array.
2157 		 *
2158 		 * First check to see whether the parity error is in D$ or P$
2159 		 * since P$ data parity errors are reported in Panther using
2160 		 * the same trap.
2161 		 */
2162 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2163 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2164 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2165 			    CH_PCIDX_TO_WAY(tag_index);
2166 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2167 			    PC_LOGFLAG_MAGIC;
2168 		} else {
2169 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2170 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2171 			    CH_DCIDX_TO_WAY(tag_index);
2172 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2173 			    DC_LOGFLAG_MAGIC;
2174 		}
2175 	} else if (way != -1) {
2176 		/*
2177 		 * Parity error in D$ tag.
2178 		 */
2179 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2180 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2181 		    CH_DCIDX_TO_WAY(tag_index);
2182 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2183 		    DC_LOGFLAG_MAGIC;
2184 	}
2185 }
2186 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2187 
2188 /*
2189  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2190  * post-process CPU events that are dequeued.  As such, it can be invoked
2191  * from softint context, from AST processing in the trap() flow, or from the
2192  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2193  * Historically this entry point was used to log the actual cmn_err(9F) text;
2194  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2195  * With FMA this function now also returns a flag which indicates to the
2196  * caller whether the ereport should be posted (1) or suppressed (0).
2197  */
2198 static int
2199 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2200 {
2201 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2202 	struct async_flt *aflt = (struct async_flt *)flt;
2203 	page_t *pp;
2204 
2205 	switch (ch_flt->flt_type) {
2206 	case CPU_INV_AFSR:
2207 		/*
2208 		 * If it is a disrupting trap and the AFSR is zero, then
2209 		 * the event has probably already been noted. Do not post
2210 		 * an ereport.
2211 		 */
2212 		if ((aflt->flt_status & ECC_C_TRAP) &&
2213 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2214 			return (0);
2215 		else
2216 			return (1);
2217 	case CPU_TO:
2218 	case CPU_BERR:
2219 	case CPU_FATAL:
2220 	case CPU_FPUERR:
2221 		return (1);
2222 
2223 	case CPU_UE_ECACHE_RETIRE:
2224 		cpu_log_err(aflt);
2225 		cpu_page_retire(ch_flt);
2226 		return (1);
2227 
2228 	/*
2229 	 * Cases where we may want to suppress logging or perform
2230 	 * extended diagnostics.
2231 	 */
2232 	case CPU_CE:
2233 	case CPU_EMC:
2234 		pp = page_numtopp_nolock((pfn_t)
2235 		    (aflt->flt_addr >> MMU_PAGESHIFT));
2236 
2237 		/*
2238 		 * We want to skip logging and further classification
2239 		 * only if ALL the following conditions are true:
2240 		 *
2241 		 *	1. There is only one error
2242 		 *	2. That error is a correctable memory error
2243 		 *	3. The error is caused by the memory scrubber (in
2244 		 *	   which case the error will have occurred under
2245 		 *	   on_trap protection)
2246 		 *	4. The error is on a retired page
2247 		 *
2248 		 * Note: AFLT_PROT_EC is used places other than the memory
2249 		 * scrubber.  However, none of those errors should occur
2250 		 * on a retired page.
2251 		 */
2252 		if ((ch_flt->afsr_errs &
2253 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2254 		    aflt->flt_prot == AFLT_PROT_EC) {
2255 
2256 			if (pp != NULL && page_isretired(pp)) {
2257 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2258 
2259 				/*
2260 				 * Since we're skipping logging, we'll need
2261 				 * to schedule the re-enabling of CEEN
2262 				 */
2263 				(void) timeout(cpu_delayed_check_ce_errors,
2264 				    (void *)aflt->flt_inst, drv_usectohz(
2265 				    (clock_t)cpu_ceen_delay_secs * MICROSEC));
2266 			    }
2267 			    return (0);
2268 			}
2269 		}
2270 
2271 		/*
2272 		 * Perform/schedule further classification actions, but
2273 		 * only if the page is healthy (we don't want bad
2274 		 * pages inducing too much diagnostic activity).  If we could
2275 		 * not find a page pointer then we also skip this.  If
2276 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2277 		 * to copy and recirculate the event (for further diagnostics)
2278 		 * and we should not proceed to log it here.
2279 		 *
2280 		 * This must be the last step here before the cpu_log_err()
2281 		 * below - if an event recirculates cpu_ce_log_err() will
2282 		 * not call the current function but just proceed directly
2283 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2284 		 *
2285 		 * Note: Check cpu_impl_async_log_err if changing this
2286 		 */
2287 		if (pp) {
2288 			if (page_isretired(pp) || page_deteriorating(pp)) {
2289 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2290 				    CE_XDIAG_SKIP_PAGEDET);
2291 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2292 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2293 				return (0);
2294 			}
2295 		} else {
2296 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2297 			    CE_XDIAG_SKIP_NOPP);
2298 		}
2299 		/*FALLTHRU*/
2300 
2301 	/*
2302 	 * Cases where we just want to report the error and continue.
2303 	 */
2304 	case CPU_CE_ECACHE:
2305 	case CPU_UE_ECACHE:
2306 	case CPU_IV:
2307 	case CPU_ORPH:
2308 		cpu_log_err(aflt);
2309 		return (1);
2310 
2311 	/*
2312 	 * Cases where we want to fall through to handle panicking.
2313 	 */
2314 	case CPU_UE:
2315 		/*
2316 		 * We want to skip logging in the same conditions as the
2317 		 * CE case.  In addition, we want to make sure we're not
2318 		 * panicking.
2319 		 */
2320 		if (!panicstr && (ch_flt->afsr_errs &
2321 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2322 		    aflt->flt_prot == AFLT_PROT_EC) {
2323 			page_t *pp = page_numtopp_nolock((pfn_t)
2324 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2325 
2326 			if (pp != NULL && page_isretired(pp)) {
2327 
2328 				/* Zero the address to clear the error */
2329 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2330 				return (0);
2331 			}
2332 		}
2333 		cpu_log_err(aflt);
2334 		break;
2335 
2336 	default:
2337 		/*
2338 		 * If the us3_common.c code doesn't know the flt_type, it may
2339 		 * be an implementation-specific code.  Call into the impldep
2340 		 * backend to find out what to do: if it tells us to continue,
2341 		 * break and handle as if falling through from a UE; if not,
2342 		 * the impldep backend has handled the error and we're done.
2343 		 */
2344 		switch (cpu_impl_async_log_err(flt, eqep)) {
2345 		case CH_ASYNC_LOG_DONE:
2346 			return (1);
2347 		case CH_ASYNC_LOG_RECIRC:
2348 			return (0);
2349 		case CH_ASYNC_LOG_CONTINUE:
2350 			break; /* continue on to handle UE-like error */
2351 		default:
2352 			cmn_err(CE_WARN, "discarding error 0x%p with "
2353 			    "invalid fault type (0x%x)",
2354 			    (void *)aflt, ch_flt->flt_type);
2355 			return (0);
2356 		}
2357 	}
2358 
2359 	/* ... fall through from the UE case */
2360 
2361 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2362 		if (!panicstr) {
2363 			cpu_page_retire(ch_flt);
2364 		} else {
2365 			/*
2366 			 * Clear UEs on panic so that we don't
2367 			 * get haunted by them during panic or
2368 			 * after reboot
2369 			 */
2370 			cpu_clearphys(aflt);
2371 			(void) clear_errors(NULL);
2372 		}
2373 	}
2374 
2375 	return (1);
2376 }
2377 
2378 /*
2379  * Retire the bad page that may contain the flushed error.
2380  */
2381 void
2382 cpu_page_retire(ch_async_flt_t *ch_flt)
2383 {
2384 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2385 	page_t *pp = page_numtopp_nolock(aflt->flt_addr >> MMU_PAGESHIFT);
2386 
2387 	if (pp != NULL) {
2388 		page_settoxic(pp, PAGE_IS_FAULTY);
2389 		(void) page_retire(pp, PAGE_IS_TOXIC);
2390 	}
2391 }
2392 
2393 /*
2394  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2395  * generic event post-processing for correctable and uncorrectable memory,
2396  * E$, and MTag errors.  Historically this entry point was used to log bits of
2397  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2398  * converted into an ereport.  In addition, it transmits the error to any
2399  * platform-specific service-processor FRU logging routines, if available.
2400  */
2401 void
2402 cpu_log_err(struct async_flt *aflt)
2403 {
2404 	char unum[UNUM_NAMLEN];
2405 	int len = 0;
2406 	int synd_status, synd_code, afar_status;
2407 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2408 
2409 	/*
2410 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2411 	 * For Panther, L2$ is not external, so we don't want to
2412 	 * generate an E$ unum for those errors.
2413 	 */
2414 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2415 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2416 			aflt->flt_status |= ECC_ECACHE;
2417 	} else {
2418 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2419 			aflt->flt_status |= ECC_ECACHE;
2420 	}
2421 
2422 	/*
2423 	 * Determine syndrome status.
2424 	 */
2425 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2426 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2427 
2428 	/*
2429 	 * Determine afar status.
2430 	 */
2431 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2432 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2433 				ch_flt->flt_bit);
2434 	else
2435 		afar_status = AFLT_STAT_INVALID;
2436 
2437 	/*
2438 	 * If afar status is not invalid do a unum lookup.
2439 	 */
2440 	if (afar_status != AFLT_STAT_INVALID) {
2441 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2442 			UNUM_NAMLEN, &len);
2443 	} else {
2444 		unum[0] = '\0';
2445 	}
2446 
2447 	synd_code = synd_to_synd_code(synd_status,
2448 	    aflt->flt_synd, ch_flt->flt_bit);
2449 
2450 	/*
2451 	 * Do not send the fruid message (plat_ecc_error_data_t)
2452 	 * to the SC if it can handle the enhanced error information
2453 	 * (plat_ecc_error2_data_t) or when the tunable
2454 	 * ecc_log_fruid_enable is set to 0.
2455 	 */
2456 
2457 	if (&plat_ecc_capability_sc_get &&
2458 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2459 		if (&plat_log_fruid_error)
2460 			plat_log_fruid_error(synd_code, aflt, unum,
2461 			    ch_flt->flt_bit);
2462 	}
2463 
2464 	if (aflt->flt_func != NULL)
2465 		aflt->flt_func(aflt, unum);
2466 
2467 	if (afar_status != AFLT_STAT_INVALID)
2468 		cpu_log_diag_info(ch_flt);
2469 
2470 	/*
2471 	 * If we have a CEEN error , we do not reenable CEEN until after
2472 	 * we exit the trap handler. Otherwise, another error may
2473 	 * occur causing the handler to be entered recursively.
2474 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2475 	 * to try and ensure that the CPU makes progress in the face
2476 	 * of a CE storm.
2477 	 */
2478 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2479 		(void) timeout(cpu_delayed_check_ce_errors,
2480 		    (void *)aflt->flt_inst,
2481 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2482 	}
2483 }
2484 
2485 /*
2486  * Invoked by error_init() early in startup and therefore before
2487  * startup_errorq() is called to drain any error Q -
2488  *
2489  * startup()
2490  *   startup_end()
2491  *     error_init()
2492  *       cpu_error_init()
2493  * errorq_init()
2494  *   errorq_drain()
2495  * start_other_cpus()
2496  *
2497  * The purpose of this routine is to create error-related taskqs.  Taskqs
2498  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2499  * context.
2500  */
2501 void
2502 cpu_error_init(int items)
2503 {
2504 	/*
2505 	 * Create taskq(s) to reenable CE
2506 	 */
2507 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2508 	    items, items, TASKQ_PREPOPULATE);
2509 }
2510 
2511 void
2512 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2513 {
2514 	char unum[UNUM_NAMLEN];
2515 	int len;
2516 
2517 	switch (aflt->flt_class) {
2518 	case CPU_FAULT:
2519 		cpu_ereport_init(aflt);
2520 		if (cpu_async_log_err(aflt, eqep))
2521 			cpu_ereport_post(aflt);
2522 		break;
2523 
2524 	case BUS_FAULT:
2525 		if (aflt->flt_func != NULL) {
2526 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2527 			    unum, UNUM_NAMLEN, &len);
2528 			aflt->flt_func(aflt, unum);
2529 		}
2530 		break;
2531 
2532 	case RECIRC_CPU_FAULT:
2533 		aflt->flt_class = CPU_FAULT;
2534 		cpu_log_err(aflt);
2535 		cpu_ereport_post(aflt);
2536 		break;
2537 
2538 	case RECIRC_BUS_FAULT:
2539 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2540 		/*FALLTHRU*/
2541 	default:
2542 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2543 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2544 		return;
2545 	}
2546 }
2547 
2548 /*
2549  * Scrub and classify a CE.  This function must not modify the
2550  * fault structure passed to it but instead should return the classification
2551  * information.
2552  */
2553 
2554 static uchar_t
2555 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2556 {
2557 	uchar_t disp = CE_XDIAG_EXTALG;
2558 	on_trap_data_t otd;
2559 	uint64_t orig_err;
2560 	ch_cpu_logout_t *clop;
2561 
2562 	/*
2563 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2564 	 * this, but our other callers have not.  Disable preemption to
2565 	 * avoid CPU migration so that we restore CEEN on the correct
2566 	 * cpu later.
2567 	 *
2568 	 * CEEN is cleared so that further CEs that our instruction and
2569 	 * data footprint induce do not cause use to either creep down
2570 	 * kernel stack to the point of overflow, or do so much CE
2571 	 * notification as to make little real forward progress.
2572 	 *
2573 	 * NCEEN must not be cleared.  However it is possible that
2574 	 * our accesses to the flt_addr may provoke a bus error or timeout
2575 	 * if the offending address has just been unconfigured as part of
2576 	 * a DR action.  So we must operate under on_trap protection.
2577 	 */
2578 	kpreempt_disable();
2579 	orig_err = get_error_enable();
2580 	if (orig_err & EN_REG_CEEN)
2581 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2582 
2583 	/*
2584 	 * Our classification algorithm includes the line state before
2585 	 * the scrub; we'd like this captured after the detection and
2586 	 * before the algorithm below - the earlier the better.
2587 	 *
2588 	 * If we've come from a cpu CE trap then this info already exists
2589 	 * in the cpu logout area.
2590 	 *
2591 	 * For a CE detected by memscrub for which there was no trap
2592 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2593 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2594 	 * marked the fault structure as incomplete as a flag to later
2595 	 * logging code.
2596 	 *
2597 	 * If called directly from an IO detected CE there has been
2598 	 * no line data capture.  In this case we logout to the cpu logout
2599 	 * area - that's appropriate since it's the cpu cache data we need
2600 	 * for classification.  We thus borrow the cpu logout area for a
2601 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2602 	 * this time (we will invalidate it again below).
2603 	 *
2604 	 * If called from the partner check xcall handler then this cpu
2605 	 * (the partner) has not necessarily experienced a CE at this
2606 	 * address.  But we want to capture line state before its scrub
2607 	 * attempt since we use that in our classification.
2608 	 */
2609 	if (logout_tried == B_FALSE) {
2610 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2611 			disp |= CE_XDIAG_NOLOGOUT;
2612 	}
2613 
2614 	/*
2615 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2616 	 * no longer be valid (if DR'd since the initial event) so we
2617 	 * perform this scrub under on_trap protection.  If this access is
2618 	 * ok then further accesses below will also be ok - DR cannot
2619 	 * proceed while this thread is active (preemption is disabled);
2620 	 * to be safe we'll nonetheless use on_trap again below.
2621 	 */
2622 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2623 		cpu_scrubphys(ecc);
2624 	} else {
2625 		no_trap();
2626 		if (orig_err & EN_REG_CEEN)
2627 		    set_error_enable(orig_err);
2628 		kpreempt_enable();
2629 		return (disp);
2630 	}
2631 	no_trap();
2632 
2633 	/*
2634 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2635 	 * Note that it's quite possible that the read sourced the data from
2636 	 * another cpu.
2637 	 */
2638 	if (clear_ecc(ecc))
2639 		disp |= CE_XDIAG_CE1;
2640 
2641 	/*
2642 	 * Read the data again.  This time the read is very likely to
2643 	 * come from memory since the scrub induced a writeback to memory.
2644 	 */
2645 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2646 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2647 	} else {
2648 		no_trap();
2649 		if (orig_err & EN_REG_CEEN)
2650 		    set_error_enable(orig_err);
2651 		kpreempt_enable();
2652 		return (disp);
2653 	}
2654 	no_trap();
2655 
2656 	/* Did that read induce a CE that matches the AFAR? */
2657 	if (clear_ecc(ecc))
2658 		disp |= CE_XDIAG_CE2;
2659 
2660 	/*
2661 	 * Look at the logout information and record whether we found the
2662 	 * line in l2/l3 cache.  For Panther we are interested in whether
2663 	 * we found it in either cache (it won't reside in both but
2664 	 * it is possible to read it that way given the moving target).
2665 	 */
2666 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2667 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2668 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2669 		int hit, level;
2670 		int state;
2671 		int totalsize;
2672 		ch_ec_data_t *ecp;
2673 
2674 		/*
2675 		 * If hit is nonzero then a match was found and hit will
2676 		 * be one greater than the index which hit.  For Panther we
2677 		 * also need to pay attention to level to see which of l2$ or
2678 		 * l3$ it hit in.
2679 		 */
2680 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2681 		    0, &level);
2682 
2683 		if (hit) {
2684 			--hit;
2685 			disp |= CE_XDIAG_AFARMATCH;
2686 
2687 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2688 				if (level == 2)
2689 					ecp = &clop->clo_data.chd_l2_data[hit];
2690 				else
2691 					ecp = &clop->clo_data.chd_ec_data[hit];
2692 			} else {
2693 				ASSERT(level == 2);
2694 				ecp = &clop->clo_data.chd_ec_data[hit];
2695 			}
2696 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2697 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2698 			    ecc->flt_addr, ecp->ec_tag);
2699 
2700 			/*
2701 			 * Cheetah variants use different state encodings -
2702 			 * the CH_ECSTATE_* defines vary depending on the
2703 			 * module we're compiled for.  Translate into our
2704 			 * one true version.  Conflate Owner-Shared state
2705 			 * of SSM mode with Owner as victimisation of such
2706 			 * lines may cause a writeback.
2707 			 */
2708 			switch (state) {
2709 			case CH_ECSTATE_MOD:
2710 				disp |= EC_STATE_M;
2711 				break;
2712 
2713 			case CH_ECSTATE_OWN:
2714 			case CH_ECSTATE_OWS:
2715 				disp |= EC_STATE_O;
2716 				break;
2717 
2718 			case CH_ECSTATE_EXL:
2719 				disp |= EC_STATE_E;
2720 				break;
2721 
2722 			case CH_ECSTATE_SHR:
2723 				disp |= EC_STATE_S;
2724 				break;
2725 
2726 			default:
2727 				disp |= EC_STATE_I;
2728 				break;
2729 			}
2730 		}
2731 
2732 		/*
2733 		 * If we initiated the delayed logout then we are responsible
2734 		 * for invalidating the logout area.
2735 		 */
2736 		if (logout_tried == B_FALSE) {
2737 			bzero(clop, sizeof (ch_cpu_logout_t));
2738 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2739 		}
2740 	}
2741 
2742 	/*
2743 	 * Re-enable CEEN if we turned it off.
2744 	 */
2745 	if (orig_err & EN_REG_CEEN)
2746 	    set_error_enable(orig_err);
2747 	kpreempt_enable();
2748 
2749 	return (disp);
2750 }
2751 
2752 /*
2753  * Scrub a correctable memory error and collect data for classification
2754  * of CE type.  This function is called in the detection path, ie tl0 handling
2755  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2756  */
2757 void
2758 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2759 {
2760 	/*
2761 	 * Cheetah CE classification does not set any bits in flt_status.
2762 	 * Instead we will record classification datapoints in flt_disp.
2763 	 */
2764 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2765 
2766 	/*
2767 	 * To check if the error detected by IO is persistent, sticky or
2768 	 * intermittent.  This is noticed by clear_ecc().
2769 	 */
2770 	if (ecc->flt_status & ECC_IOBUS)
2771 		ecc->flt_stat = C_AFSR_MEMORY;
2772 
2773 	/*
2774 	 * Record information from this first part of the algorithm in
2775 	 * flt_disp.
2776 	 */
2777 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2778 }
2779 
2780 /*
2781  * Select a partner to perform a further CE classification check from.
2782  * Must be called with kernel preemption disabled (to stop the cpu list
2783  * from changing).  The detecting cpu we are partnering has cpuid
2784  * aflt->flt_inst; we might not be running on the detecting cpu.
2785  *
2786  * Restrict choice to active cpus in the same cpu partition as ourselves in
2787  * an effort to stop bad cpus in one partition causing other partitions to
2788  * perform excessive diagnostic activity.  Actually since the errorq drain
2789  * is run from a softint most of the time and that is a global mechanism
2790  * this isolation is only partial.  Return NULL if we fail to find a
2791  * suitable partner.
2792  *
2793  * We prefer a partner that is in a different latency group to ourselves as
2794  * we will share fewer datapaths.  If such a partner is unavailable then
2795  * choose one in the same lgroup but prefer a different chip and only allow
2796  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2797  * flags includes PTNR_SELFOK then permit selection of the original detector.
2798  *
2799  * We keep a cache of the last partner selected for a cpu, and we'll try to
2800  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2801  * have passed since that selection was made.  This provides the benefit
2802  * of the point-of-view of different partners over time but without
2803  * requiring frequent cpu list traversals.
2804  */
2805 
2806 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2807 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2808 
2809 static cpu_t *
2810 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2811 {
2812 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2813 	hrtime_t lasttime, thistime;
2814 
2815 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2816 
2817 	dtcr = cpu[aflt->flt_inst];
2818 
2819 	/*
2820 	 * Short-circuit for the following cases:
2821 	 *	. the dtcr is not flagged active
2822 	 *	. there is just one cpu present
2823 	 *	. the detector has disappeared
2824 	 *	. we were given a bad flt_inst cpuid; this should not happen
2825 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2826 	 *	  reason to panic.
2827 	 *	. there is just one cpu left online in the cpu partition
2828 	 *
2829 	 * If we return NULL after this point then we do not update the
2830 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2831 	 * again next time; this is the case where the only other cpu online
2832 	 * in the detector's partition is on the same chip as the detector
2833 	 * and since CEEN re-enable is throttled even that case should not
2834 	 * hurt performance.
2835 	 */
2836 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2837 		return (NULL);
2838 	}
2839 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2840 		if (flags & PTNR_SELFOK) {
2841 			*typep = CE_XDIAG_PTNR_SELF;
2842 			return (dtcr);
2843 		} else {
2844 			return (NULL);
2845 		}
2846 	}
2847 
2848 	thistime = gethrtime();
2849 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2850 
2851 	/*
2852 	 * Select a starting point.
2853 	 */
2854 	if (!lasttime) {
2855 		/*
2856 		 * We've never selected a partner for this detector before.
2857 		 * Start the scan at the next online cpu in the same cpu
2858 		 * partition.
2859 		 */
2860 		sp = dtcr->cpu_next_part;
2861 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2862 		/*
2863 		 * Our last selection has not aged yet.  If this partner:
2864 		 *	. is still a valid cpu,
2865 		 *	. is still in the same partition as the detector
2866 		 *	. is still marked active
2867 		 *	. satisfies the 'flags' argument criteria
2868 		 * then select it again without updating the timestamp.
2869 		 */
2870 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2871 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2872 		    !cpu_flagged_active(sp->cpu_flags) ||
2873 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2874 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2875 		    !(flags & PTNR_SIBLINGOK))) {
2876 			sp = dtcr->cpu_next_part;
2877 		} else {
2878 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2879 				*typep = CE_XDIAG_PTNR_REMOTE;
2880 			} else if (sp == dtcr) {
2881 				*typep = CE_XDIAG_PTNR_SELF;
2882 			} else if (sp->cpu_chip->chip_id ==
2883 			    dtcr->cpu_chip->chip_id) {
2884 				*typep = CE_XDIAG_PTNR_SIBLING;
2885 			} else {
2886 				*typep = CE_XDIAG_PTNR_LOCAL;
2887 			}
2888 			return (sp);
2889 		}
2890 	} else {
2891 		/*
2892 		 * Our last selection has aged.  If it is nonetheless still a
2893 		 * valid cpu then start the scan at the next cpu in the
2894 		 * partition after our last partner.  If the last selection
2895 		 * is no longer a valid cpu then go with our default.  In
2896 		 * this way we slowly cycle through possible partners to
2897 		 * obtain multiple viewpoints over time.
2898 		 */
2899 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2900 		if (sp == NULL) {
2901 			sp = dtcr->cpu_next_part;
2902 		} else {
2903 			sp = sp->cpu_next_part;		/* may be dtcr */
2904 			if (sp->cpu_part != dtcr->cpu_part)
2905 				sp = dtcr;
2906 		}
2907 	}
2908 
2909 	/*
2910 	 * We have a proposed starting point for our search, but if this
2911 	 * cpu is offline then its cpu_next_part will point to itself
2912 	 * so we can't use that to iterate over cpus in this partition in
2913 	 * the loop below.  We still want to avoid iterating over cpus not
2914 	 * in our partition, so in the case that our starting point is offline
2915 	 * we will repoint it to be the detector itself;  and if the detector
2916 	 * happens to be offline we'll return NULL from the following loop.
2917 	 */
2918 	if (!cpu_flagged_active(sp->cpu_flags)) {
2919 		sp = dtcr;
2920 	}
2921 
2922 	ptnr = sp;
2923 	locptnr = NULL;
2924 	sibptnr = NULL;
2925 	do {
2926 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
2927 			continue;
2928 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2929 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
2930 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2931 			*typep = CE_XDIAG_PTNR_REMOTE;
2932 			return (ptnr);
2933 		}
2934 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
2935 			if (sibptnr == NULL)
2936 				sibptnr = ptnr;
2937 			continue;
2938 		}
2939 		if (locptnr == NULL)
2940 			locptnr = ptnr;
2941 	} while ((ptnr = ptnr->cpu_next_part) != sp);
2942 
2943 	/*
2944 	 * A foreign partner has already been returned if one was available.
2945 	 *
2946 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
2947 	 * detector, is active, and is not a sibling of the detector.
2948 	 *
2949 	 * If sibptnr is not NULL it is a sibling of the detector, and is
2950 	 * active.
2951 	 *
2952 	 * If we have to resort to using the detector itself we have already
2953 	 * checked that it is active.
2954 	 */
2955 	if (locptnr) {
2956 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
2957 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2958 		*typep = CE_XDIAG_PTNR_LOCAL;
2959 		return (locptnr);
2960 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
2961 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
2962 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2963 		*typep = CE_XDIAG_PTNR_SIBLING;
2964 		return (sibptnr);
2965 	} else if (flags & PTNR_SELFOK) {
2966 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
2967 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2968 		*typep = CE_XDIAG_PTNR_SELF;
2969 		return (dtcr);
2970 	}
2971 
2972 	return (NULL);
2973 }
2974 
2975 /*
2976  * Cross call handler that is requested to run on the designated partner of
2977  * a cpu that experienced a possibly sticky or possibly persistnet CE.
2978  */
2979 static void
2980 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
2981 {
2982 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
2983 }
2984 
2985 /*
2986  * The associated errorqs are never destroyed so we do not need to deal with
2987  * them disappearing before this timeout fires.  If the affected memory
2988  * has been DR'd out since the original event the scrub algrithm will catch
2989  * any errors and return null disposition info.  If the original detecting
2990  * cpu has been DR'd out then ereport detector info will not be able to
2991  * lookup CPU type;  with a small timeout this is unlikely.
2992  */
2993 static void
2994 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
2995 {
2996 	struct async_flt *aflt = cbarg->lkycb_aflt;
2997 	uchar_t disp;
2998 	cpu_t *cp;
2999 	int ptnrtype;
3000 
3001 	kpreempt_disable();
3002 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3003 	    &ptnrtype)) {
3004 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3005 		    (uint64_t)&disp);
3006 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3007 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3008 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3009 	} else {
3010 		ce_xdiag_lkydrops++;
3011 		if (ncpus > 1)
3012 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3013 			    CE_XDIAG_SKIP_NOPTNR);
3014 	}
3015 	kpreempt_enable();
3016 
3017 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3018 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3019 }
3020 
3021 /*
3022  * Called from errorq drain code when processing a CE error, both from
3023  * CPU and PCI drain functions.  Decide what further classification actions,
3024  * if any, we will perform.  Perform immediate actions now, and schedule
3025  * delayed actions as required.  Note that we are no longer necessarily running
3026  * on the detecting cpu, and that the async_flt structure will not persist on
3027  * return from this function.
3028  *
3029  * Calls to this function should aim to be self-throtlling in some way.  With
3030  * the delayed re-enable of CEEN the absolute rate of calls should not
3031  * be excessive.  Callers should also avoid performing in-depth classification
3032  * for events in pages that are already known to be suspect.
3033  *
3034  * We return nonzero to indicate that the event has been copied and
3035  * recirculated for further testing.  The caller should not log the event
3036  * in this case - it will be logged when further test results are available.
3037  *
3038  * Our possible contexts are that of errorq_drain: below lock level or from
3039  * panic context.  We can assume that the cpu we are running on is online.
3040  */
3041 
3042 
3043 #ifdef DEBUG
3044 static int ce_xdiag_forceaction;
3045 #endif
3046 
3047 int
3048 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3049     errorq_elem_t *eqep, size_t afltoffset)
3050 {
3051 	ce_dispact_t dispact, action;
3052 	cpu_t *cp;
3053 	uchar_t dtcrinfo, disp;
3054 	int ptnrtype;
3055 
3056 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3057 		ce_xdiag_drops++;
3058 		return (0);
3059 	} else if (!aflt->flt_in_memory) {
3060 		ce_xdiag_drops++;
3061 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3062 		return (0);
3063 	}
3064 
3065 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3066 
3067 	/*
3068 	 * Some correctable events are not scrubbed/classified, such as those
3069 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3070 	 * initial detector classification go no further.
3071 	 */
3072 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3073 		ce_xdiag_drops++;
3074 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3075 		return (0);
3076 	}
3077 
3078 	dispact = CE_DISPACT(ce_disp_table,
3079 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3080 	    CE_XDIAG_STATE(dtcrinfo),
3081 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3082 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3083 
3084 
3085 	action = CE_ACT(dispact);	/* bad lookup caught below */
3086 #ifdef DEBUG
3087 	if (ce_xdiag_forceaction != 0)
3088 		action = ce_xdiag_forceaction;
3089 #endif
3090 
3091 	switch (action) {
3092 	case CE_ACT_LKYCHK: {
3093 		caddr_t ndata;
3094 		errorq_elem_t *neqep;
3095 		struct async_flt *ecc;
3096 		ce_lkychk_cb_t *cbargp;
3097 
3098 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3099 			ce_xdiag_lkydrops++;
3100 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3101 			    CE_XDIAG_SKIP_DUPFAIL);
3102 			break;
3103 		}
3104 		ecc = (struct async_flt *)(ndata + afltoffset);
3105 
3106 		ASSERT(ecc->flt_class == CPU_FAULT ||
3107 		    ecc->flt_class == BUS_FAULT);
3108 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3109 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3110 
3111 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3112 		cbargp->lkycb_aflt = ecc;
3113 		cbargp->lkycb_eqp = eqp;
3114 		cbargp->lkycb_eqep = neqep;
3115 
3116 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3117 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3118 		return (1);
3119 	}
3120 
3121 	case CE_ACT_PTNRCHK:
3122 		kpreempt_disable();	/* stop cpu list changing */
3123 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3124 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3125 			    (uint64_t)aflt, (uint64_t)&disp);
3126 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3127 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3128 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3129 		} else if (ncpus > 1) {
3130 			ce_xdiag_ptnrdrops++;
3131 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3132 			    CE_XDIAG_SKIP_NOPTNR);
3133 		} else {
3134 			ce_xdiag_ptnrdrops++;
3135 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3136 			    CE_XDIAG_SKIP_UNIPROC);
3137 		}
3138 		kpreempt_enable();
3139 		break;
3140 
3141 	case CE_ACT_DONE:
3142 		break;
3143 
3144 	case CE_ACT(CE_DISP_BAD):
3145 	default:
3146 #ifdef DEBUG
3147 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3148 #endif
3149 		ce_xdiag_bad++;
3150 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3151 		break;
3152 	}
3153 
3154 	return (0);
3155 }
3156 
3157 /*
3158  * We route all errors through a single switch statement.
3159  */
3160 void
3161 cpu_ue_log_err(struct async_flt *aflt)
3162 {
3163 	switch (aflt->flt_class) {
3164 	case CPU_FAULT:
3165 		cpu_ereport_init(aflt);
3166 		if (cpu_async_log_err(aflt, NULL))
3167 			cpu_ereport_post(aflt);
3168 		break;
3169 
3170 	case BUS_FAULT:
3171 		bus_async_log_err(aflt);
3172 		break;
3173 
3174 	default:
3175 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3176 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3177 		return;
3178 	}
3179 }
3180 
3181 /*
3182  * Routine for panic hook callback from panic_idle().
3183  */
3184 void
3185 cpu_async_panic_callb(void)
3186 {
3187 	ch_async_flt_t ch_flt;
3188 	struct async_flt *aflt;
3189 	ch_cpu_errors_t cpu_error_regs;
3190 	uint64_t afsr_errs;
3191 
3192 	get_cpu_error_state(&cpu_error_regs);
3193 
3194 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3195 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3196 
3197 	if (afsr_errs) {
3198 
3199 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3200 		aflt = (struct async_flt *)&ch_flt;
3201 		aflt->flt_id = gethrtime_waitfree();
3202 		aflt->flt_bus_id = getprocessorid();
3203 		aflt->flt_inst = CPU->cpu_id;
3204 		aflt->flt_stat = cpu_error_regs.afsr;
3205 		aflt->flt_addr = cpu_error_regs.afar;
3206 		aflt->flt_prot = AFLT_PROT_NONE;
3207 		aflt->flt_class = CPU_FAULT;
3208 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3209 		aflt->flt_panic = 1;
3210 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3211 		ch_flt.afsr_errs = afsr_errs;
3212 #if defined(SERRANO)
3213 		ch_flt.afar2 = cpu_error_regs.afar2;
3214 #endif	/* SERRANO */
3215 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3216 	}
3217 }
3218 
3219 /*
3220  * Routine to convert a syndrome into a syndrome code.
3221  */
3222 static int
3223 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3224 {
3225 	if (synd_status == AFLT_STAT_INVALID)
3226 		return (-1);
3227 
3228 	/*
3229 	 * Use the syndrome to index the appropriate syndrome table,
3230 	 * to get the code indicating which bit(s) is(are) bad.
3231 	 */
3232 	if (afsr_bit &
3233 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3234 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3235 #if defined(JALAPENO) || defined(SERRANO)
3236 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3237 				return (-1);
3238 			else
3239 				return (BPAR0 + synd);
3240 #else /* JALAPENO || SERRANO */
3241 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3242 				return (-1);
3243 			else
3244 				return (mtag_syndrome_tab[synd]);
3245 #endif /* JALAPENO || SERRANO */
3246 		} else {
3247 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3248 				return (-1);
3249 			else
3250 				return (ecc_syndrome_tab[synd]);
3251 		}
3252 	} else {
3253 		return (-1);
3254 	}
3255 }
3256 
3257 /*
3258  * Routine to return a string identifying the physical name
3259  * associated with a memory/cache error.
3260  */
3261 int
3262 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3263     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3264     ushort_t flt_status, char *buf, int buflen, int *lenp)
3265 {
3266 	int synd_code;
3267 	int ret;
3268 
3269 	/*
3270 	 * An AFSR of -1 defaults to a memory syndrome.
3271 	 */
3272 	if (flt_stat == (uint64_t)-1)
3273 		flt_stat = C_AFSR_CE;
3274 
3275 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3276 
3277 	/*
3278 	 * Syndrome code must be either a single-bit error code
3279 	 * (0...143) or -1 for unum lookup.
3280 	 */
3281 	if (synd_code < 0 || synd_code >= M2)
3282 		synd_code = -1;
3283 	if (&plat_get_mem_unum) {
3284 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3285 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3286 			buf[0] = '\0';
3287 			*lenp = 0;
3288 		}
3289 
3290 		return (ret);
3291 	}
3292 
3293 	return (ENOTSUP);
3294 }
3295 
3296 /*
3297  * Wrapper for cpu_get_mem_unum() routine that takes an
3298  * async_flt struct rather than explicit arguments.
3299  */
3300 int
3301 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3302     char *buf, int buflen, int *lenp)
3303 {
3304 	/*
3305 	 * If we come thru here for an IO bus error aflt->flt_stat will
3306 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3307 	 * so it will interpret this as a memory error.
3308 	 */
3309 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3310 	    (aflt->flt_class == BUS_FAULT) ?
3311 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3312 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3313 	    aflt->flt_status, buf, buflen, lenp));
3314 }
3315 
3316 /*
3317  * This routine is a more generic interface to cpu_get_mem_unum()
3318  * that may be used by other modules (e.g. mm).
3319  */
3320 int
3321 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3322     char *buf, int buflen, int *lenp)
3323 {
3324 	int synd_status, flt_in_memory, ret;
3325 	ushort_t flt_status = 0;
3326 	char unum[UNUM_NAMLEN];
3327 
3328 	/*
3329 	 * Check for an invalid address.
3330 	 */
3331 	if (afar == (uint64_t)-1)
3332 		return (ENXIO);
3333 
3334 	if (synd == (uint64_t)-1)
3335 		synd_status = AFLT_STAT_INVALID;
3336 	else
3337 		synd_status = AFLT_STAT_VALID;
3338 
3339 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3340 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3341 
3342 	/*
3343 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3344 	 * For Panther, L2$ is not external, so we don't want to
3345 	 * generate an E$ unum for those errors.
3346 	 */
3347 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3348 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3349 			flt_status |= ECC_ECACHE;
3350 	} else {
3351 		if (*afsr & C_AFSR_ECACHE)
3352 			flt_status |= ECC_ECACHE;
3353 	}
3354 
3355 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3356 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3357 	if (ret != 0)
3358 		return (ret);
3359 
3360 	if (*lenp >= buflen)
3361 		return (ENAMETOOLONG);
3362 
3363 	(void) strncpy(buf, unum, buflen);
3364 
3365 	return (0);
3366 }
3367 
3368 /*
3369  * Routine to return memory information associated
3370  * with a physical address and syndrome.
3371  */
3372 int
3373 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3374     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3375     int *segsp, int *banksp, int *mcidp)
3376 {
3377 	int synd_status, synd_code;
3378 
3379 	if (afar == (uint64_t)-1)
3380 		return (ENXIO);
3381 
3382 	if (synd == (uint64_t)-1)
3383 		synd_status = AFLT_STAT_INVALID;
3384 	else
3385 		synd_status = AFLT_STAT_VALID;
3386 
3387 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3388 
3389 	if (p2get_mem_info != NULL)
3390 		return ((p2get_mem_info)(synd_code, afar,
3391 			mem_sizep, seg_sizep, bank_sizep,
3392 			segsp, banksp, mcidp));
3393 	else
3394 		return (ENOTSUP);
3395 }
3396 
3397 /*
3398  * Routine to return a string identifying the physical
3399  * name associated with a cpuid.
3400  */
3401 int
3402 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3403 {
3404 	int ret;
3405 	char unum[UNUM_NAMLEN];
3406 
3407 	if (&plat_get_cpu_unum) {
3408 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3409 		    != 0)
3410 			return (ret);
3411 	} else {
3412 		return (ENOTSUP);
3413 	}
3414 
3415 	if (*lenp >= buflen)
3416 		return (ENAMETOOLONG);
3417 
3418 	(void) strncpy(buf, unum, buflen);
3419 
3420 	return (0);
3421 }
3422 
3423 /*
3424  * This routine exports the name buffer size.
3425  */
3426 size_t
3427 cpu_get_name_bufsize()
3428 {
3429 	return (UNUM_NAMLEN);
3430 }
3431 
3432 /*
3433  * Historical function, apparantly not used.
3434  */
3435 /* ARGSUSED */
3436 void
3437 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3438 {}
3439 
3440 /*
3441  * Historical function only called for SBus errors in debugging.
3442  */
3443 /*ARGSUSED*/
3444 void
3445 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3446 {}
3447 
3448 /*
3449  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3450  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3451  * an async fault structure argument is passed in, the captured error state
3452  * (AFSR, AFAR) info will be returned in the structure.
3453  */
3454 int
3455 clear_errors(ch_async_flt_t *ch_flt)
3456 {
3457 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3458 	ch_cpu_errors_t	cpu_error_regs;
3459 
3460 	get_cpu_error_state(&cpu_error_regs);
3461 
3462 	if (ch_flt != NULL) {
3463 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3464 		aflt->flt_addr = cpu_error_regs.afar;
3465 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3466 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3467 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3468 #if defined(SERRANO)
3469 		ch_flt->afar2 = cpu_error_regs.afar2;
3470 #endif	/* SERRANO */
3471 	}
3472 
3473 	set_cpu_error_state(&cpu_error_regs);
3474 
3475 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3476 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3477 }
3478 
3479 /*
3480  * Clear any AFSR error bits, and check for persistence.
3481  *
3482  * It would be desirable to also insist that syndrome match.  PCI handling
3483  * has already filled flt_synd.  For errors trapped by CPU we only fill
3484  * flt_synd when we queue the event, so we do not have a valid flt_synd
3485  * during initial classification (it is valid if we're called as part of
3486  * subsequent low-pil additional classification attempts).  We could try
3487  * to determine which syndrome to use: we know we're only called for
3488  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3489  * would be esynd/none and esynd/msynd, respectively.  If that is
3490  * implemented then what do we do in the case that we do experience an
3491  * error on the same afar but with different syndrome?  At the very least
3492  * we should count such occurences.  Anyway, for now, we'll leave it as
3493  * it has been for ages.
3494  */
3495 static int
3496 clear_ecc(struct async_flt *aflt)
3497 {
3498 	ch_cpu_errors_t	cpu_error_regs;
3499 
3500 	/*
3501 	 * Snapshot the AFSR and AFAR and clear any errors
3502 	 */
3503 	get_cpu_error_state(&cpu_error_regs);
3504 	set_cpu_error_state(&cpu_error_regs);
3505 
3506 	/*
3507 	 * If any of the same memory access error bits are still on and
3508 	 * the AFAR matches, return that the error is persistent.
3509 	 */
3510 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3511 	    cpu_error_regs.afar == aflt->flt_addr);
3512 }
3513 
3514 /*
3515  * Turn off all cpu error detection, normally only used for panics.
3516  */
3517 void
3518 cpu_disable_errors(void)
3519 {
3520 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3521 }
3522 
3523 /*
3524  * Enable errors.
3525  */
3526 void
3527 cpu_enable_errors(void)
3528 {
3529 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3530 }
3531 
3532 /*
3533  * Flush the entire ecache using displacement flush by reading through a
3534  * physical address range twice as large as the Ecache.
3535  */
3536 void
3537 cpu_flush_ecache(void)
3538 {
3539 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3540 	    cpunodes[CPU->cpu_id].ecache_linesize);
3541 }
3542 
3543 /*
3544  * Return CPU E$ set size - E$ size divided by the associativity.
3545  * We use this function in places where the CPU_PRIVATE ptr may not be
3546  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3547  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3548  * up before the kernel switches from OBP's to the kernel's trap table, so
3549  * we don't have to worry about cpunodes being unitialized.
3550  */
3551 int
3552 cpu_ecache_set_size(struct cpu *cp)
3553 {
3554 	if (CPU_PRIVATE(cp))
3555 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3556 
3557 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3558 }
3559 
3560 /*
3561  * Flush Ecache line.
3562  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3563  * Uses normal displacement flush for Cheetah.
3564  */
3565 static void
3566 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3567 {
3568 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3569 	int ec_set_size = cpu_ecache_set_size(CPU);
3570 
3571 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3572 }
3573 
3574 /*
3575  * Scrub physical address.
3576  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3577  * Ecache or direct-mapped Ecache.
3578  */
3579 static void
3580 cpu_scrubphys(struct async_flt *aflt)
3581 {
3582 	int ec_set_size = cpu_ecache_set_size(CPU);
3583 
3584 	scrubphys(aflt->flt_addr, ec_set_size);
3585 }
3586 
3587 /*
3588  * Clear physical address.
3589  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3590  * Ecache or direct-mapped Ecache.
3591  */
3592 void
3593 cpu_clearphys(struct async_flt *aflt)
3594 {
3595 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3596 	int ec_set_size = cpu_ecache_set_size(CPU);
3597 
3598 
3599 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3600 }
3601 
3602 #if defined(CPU_IMP_ECACHE_ASSOC)
3603 /*
3604  * Check for a matching valid line in all the sets.
3605  * If found, return set# + 1. Otherwise return 0.
3606  */
3607 static int
3608 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3609 {
3610 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3611 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3612 	int ec_set_size = cpu_ecache_set_size(CPU);
3613 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3614 	int nway = cpu_ecache_nway();
3615 	int i;
3616 
3617 	for (i = 0; i < nway; i++, ecp++) {
3618 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3619 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3620 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3621 			return (i+1);
3622 	}
3623 	return (0);
3624 }
3625 #endif /* CPU_IMP_ECACHE_ASSOC */
3626 
3627 /*
3628  * Check whether a line in the given logout info matches the specified
3629  * fault address.  If reqval is set then the line must not be Invalid.
3630  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3631  * set to 2 for l2$ or 3 for l3$.
3632  */
3633 static int
3634 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3635 {
3636 	ch_diag_data_t *cdp = data;
3637 	ch_ec_data_t *ecp;
3638 	int totalsize, ec_set_size;
3639 	int i, ways;
3640 	int match = 0;
3641 	int tagvalid;
3642 	uint64_t addr, tagpa;
3643 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3644 
3645 	/*
3646 	 * Check the l2$ logout data
3647 	 */
3648 	if (ispanther) {
3649 		ecp = &cdp->chd_l2_data[0];
3650 		ec_set_size = PN_L2_SET_SIZE;
3651 		ways = PN_L2_NWAYS;
3652 	} else {
3653 		ecp = &cdp->chd_ec_data[0];
3654 		ec_set_size = cpu_ecache_set_size(CPU);
3655 		ways = cpu_ecache_nway();
3656 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3657 	}
3658 	/* remove low order PA bits from fault address not used in PA tag */
3659 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3660 	for (i = 0; i < ways; i++, ecp++) {
3661 		if (ispanther) {
3662 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3663 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3664 		} else {
3665 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3666 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3667 			    ecp->ec_tag);
3668 		}
3669 		if (tagpa == addr && (!reqval || tagvalid)) {
3670 			match = i + 1;
3671 			*level = 2;
3672 			break;
3673 		}
3674 	}
3675 
3676 	if (match || !ispanther)
3677 		return (match);
3678 
3679 	/* For Panther we also check the l3$ */
3680 	ecp = &cdp->chd_ec_data[0];
3681 	ec_set_size = PN_L3_SET_SIZE;
3682 	ways = PN_L3_NWAYS;
3683 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3684 
3685 	for (i = 0; i < ways; i++, ecp++) {
3686 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3687 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3688 			match = i + 1;
3689 			*level = 3;
3690 			break;
3691 		}
3692 	}
3693 
3694 	return (match);
3695 }
3696 
3697 #if defined(CPU_IMP_L1_CACHE_PARITY)
3698 /*
3699  * Record information related to the source of an Dcache Parity Error.
3700  */
3701 static void
3702 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3703 {
3704 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3705 	int index;
3706 
3707 	/*
3708 	 * Since instruction decode cannot be done at high PIL
3709 	 * just examine the entire Dcache to locate the error.
3710 	 */
3711 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3712 		ch_flt->parity_data.dpe.cpl_way = -1;
3713 		ch_flt->parity_data.dpe.cpl_off = -1;
3714 	}
3715 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3716 		cpu_dcache_parity_check(ch_flt, index);
3717 }
3718 
3719 /*
3720  * Check all ways of the Dcache at a specified index for good parity.
3721  */
3722 static void
3723 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3724 {
3725 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3726 	uint64_t parity_bits, pbits, data_word;
3727 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3728 	int way, word, data_byte;
3729 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3730 	ch_dc_data_t tmp_dcp;
3731 
3732 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3733 		/*
3734 		 * Perform diagnostic read.
3735 		 */
3736 		get_dcache_dtag(index + way * dc_set_size,
3737 				(uint64_t *)&tmp_dcp);
3738 
3739 		/*
3740 		 * Check tag for even parity.
3741 		 * Sum of 1 bits (including parity bit) should be even.
3742 		 */
3743 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3744 			/*
3745 			 * If this is the first error log detailed information
3746 			 * about it and check the snoop tag. Otherwise just
3747 			 * record the fact that we found another error.
3748 			 */
3749 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3750 				ch_flt->parity_data.dpe.cpl_way = way;
3751 				ch_flt->parity_data.dpe.cpl_cache =
3752 				    CPU_DC_PARITY;
3753 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3754 
3755 				if (popc64(tmp_dcp.dc_sntag &
3756 						CHP_DCSNTAG_PARMASK) & 1) {
3757 					ch_flt->parity_data.dpe.cpl_tag |=
3758 								CHP_DC_SNTAG;
3759 					ch_flt->parity_data.dpe.cpl_lcnt++;
3760 				}
3761 
3762 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3763 			}
3764 
3765 			ch_flt->parity_data.dpe.cpl_lcnt++;
3766 		}
3767 
3768 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3769 			/*
3770 			 * Panther has more parity bits than the other
3771 			 * processors for covering dcache data and so each
3772 			 * byte of data in each word has its own parity bit.
3773 			 */
3774 			parity_bits = tmp_dcp.dc_pn_data_parity;
3775 			for (word = 0; word < 4; word++) {
3776 				data_word = tmp_dcp.dc_data[word];
3777 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3778 				for (data_byte = 0; data_byte < 8;
3779 				    data_byte++) {
3780 					if (((popc64(data_word &
3781 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3782 					    (pbits & 1)) {
3783 						cpu_record_dc_data_parity(
3784 						ch_flt, dcp, &tmp_dcp, way,
3785 						word);
3786 					}
3787 					pbits >>= 1;
3788 					data_word >>= 8;
3789 				}
3790 				parity_bits >>= 8;
3791 			}
3792 		} else {
3793 			/*
3794 			 * Check data array for even parity.
3795 			 * The 8 parity bits are grouped into 4 pairs each
3796 			 * of which covers a 64-bit word.  The endianness is
3797 			 * reversed -- the low-order parity bits cover the
3798 			 * high-order data words.
3799 			 */
3800 			parity_bits = tmp_dcp.dc_utag >> 8;
3801 			for (word = 0; word < 4; word++) {
3802 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3803 				if ((popc64(tmp_dcp.dc_data[word]) +
3804 				    parity_bits_popc[pbits]) & 1) {
3805 					cpu_record_dc_data_parity(ch_flt, dcp,
3806 					    &tmp_dcp, way, word);
3807 				}
3808 			}
3809 		}
3810 	}
3811 }
3812 
3813 static void
3814 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3815     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3816 {
3817 	/*
3818 	 * If this is the first error log detailed information about it.
3819 	 * Otherwise just record the fact that we found another error.
3820 	 */
3821 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3822 		ch_flt->parity_data.dpe.cpl_way = way;
3823 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3824 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3825 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3826 	}
3827 	ch_flt->parity_data.dpe.cpl_lcnt++;
3828 }
3829 
3830 /*
3831  * Record information related to the source of an Icache Parity Error.
3832  *
3833  * Called with the Icache disabled so any diagnostic accesses are safe.
3834  */
3835 static void
3836 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3837 {
3838 	int	ic_set_size;
3839 	int	ic_linesize;
3840 	int	index;
3841 
3842 	if (CPU_PRIVATE(CPU)) {
3843 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3844 		    CH_ICACHE_NWAY;
3845 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3846 	} else {
3847 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3848 		ic_linesize = icache_linesize;
3849 	}
3850 
3851 	ch_flt->parity_data.ipe.cpl_way = -1;
3852 	ch_flt->parity_data.ipe.cpl_off = -1;
3853 
3854 	for (index = 0; index < ic_set_size; index += ic_linesize)
3855 		cpu_icache_parity_check(ch_flt, index);
3856 }
3857 
3858 /*
3859  * Check all ways of the Icache at a specified index for good parity.
3860  */
3861 static void
3862 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
3863 {
3864 	uint64_t parmask, pn_inst_parity;
3865 	int ic_set_size;
3866 	int ic_linesize;
3867 	int flt_index, way, instr, num_instr;
3868 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3869 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
3870 	ch_ic_data_t tmp_icp;
3871 
3872 	if (CPU_PRIVATE(CPU)) {
3873 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3874 		    CH_ICACHE_NWAY;
3875 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3876 	} else {
3877 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3878 		ic_linesize = icache_linesize;
3879 	}
3880 
3881 	/*
3882 	 * Panther has twice as many instructions per icache line and the
3883 	 * instruction parity bit is in a different location.
3884 	 */
3885 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3886 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
3887 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
3888 	} else {
3889 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
3890 		pn_inst_parity = 0;
3891 	}
3892 
3893 	/*
3894 	 * Index at which we expect to find the parity error.
3895 	 */
3896 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
3897 
3898 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
3899 		/*
3900 		 * Diagnostic reads expect address argument in ASI format.
3901 		 */
3902 		get_icache_dtag(2 * (index + way * ic_set_size),
3903 				(uint64_t *)&tmp_icp);
3904 
3905 		/*
3906 		 * If this is the index in which we expect to find the
3907 		 * error log detailed information about each of the ways.
3908 		 * This information will be displayed later if we can't
3909 		 * determine the exact way in which the error is located.
3910 		 */
3911 		if (flt_index == index)
3912 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
3913 
3914 		/*
3915 		 * Check tag for even parity.
3916 		 * Sum of 1 bits (including parity bit) should be even.
3917 		 */
3918 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
3919 			/*
3920 			 * If this way is the one in which we expected
3921 			 * to find the error record the way and check the
3922 			 * snoop tag. Otherwise just record the fact we
3923 			 * found another error.
3924 			 */
3925 			if (flt_index == index) {
3926 				ch_flt->parity_data.ipe.cpl_way = way;
3927 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
3928 
3929 				if (popc64(tmp_icp.ic_sntag &
3930 						CHP_ICSNTAG_PARMASK) & 1) {
3931 					ch_flt->parity_data.ipe.cpl_tag |=
3932 								CHP_IC_SNTAG;
3933 					ch_flt->parity_data.ipe.cpl_lcnt++;
3934 				}
3935 
3936 			}
3937 			ch_flt->parity_data.ipe.cpl_lcnt++;
3938 			continue;
3939 		}
3940 
3941 		/*
3942 		 * Check instruction data for even parity.
3943 		 * Bits participating in parity differ for PC-relative
3944 		 * versus non-PC-relative instructions.
3945 		 */
3946 		for (instr = 0; instr < num_instr; instr++) {
3947 			parmask = (tmp_icp.ic_data[instr] &
3948 					CH_ICDATA_PRED_ISPCREL) ?
3949 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
3950 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
3951 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
3952 				/*
3953 				 * If this way is the one in which we expected
3954 				 * to find the error record the way and offset.
3955 				 * Otherwise just log the fact we found another
3956 				 * error.
3957 				 */
3958 				if (flt_index == index) {
3959 					ch_flt->parity_data.ipe.cpl_way = way;
3960 					ch_flt->parity_data.ipe.cpl_off =
3961 								instr * 4;
3962 				}
3963 				ch_flt->parity_data.ipe.cpl_lcnt++;
3964 				continue;
3965 			}
3966 		}
3967 	}
3968 }
3969 
3970 /*
3971  * Record information related to the source of an Pcache Parity Error.
3972  */
3973 static void
3974 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
3975 {
3976 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3977 	int index;
3978 
3979 	/*
3980 	 * Since instruction decode cannot be done at high PIL just
3981 	 * examine the entire Pcache to check for any parity errors.
3982 	 */
3983 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3984 		ch_flt->parity_data.dpe.cpl_way = -1;
3985 		ch_flt->parity_data.dpe.cpl_off = -1;
3986 	}
3987 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
3988 		cpu_pcache_parity_check(ch_flt, index);
3989 }
3990 
3991 /*
3992  * Check all ways of the Pcache at a specified index for good parity.
3993  */
3994 static void
3995 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
3996 {
3997 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3998 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
3999 	int way, word, pbit, parity_bits;
4000 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4001 	ch_pc_data_t tmp_pcp;
4002 
4003 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4004 		/*
4005 		 * Perform diagnostic read.
4006 		 */
4007 		get_pcache_dtag(index + way * pc_set_size,
4008 				(uint64_t *)&tmp_pcp);
4009 		/*
4010 		 * Check data array for odd parity. There are 8 parity
4011 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4012 		 * of those bits covers exactly 8 bytes of the data
4013 		 * array:
4014 		 *
4015 		 *	parity bit	P$ data bytes covered
4016 		 *	----------	---------------------
4017 		 *	50		63:56
4018 		 *	51		55:48
4019 		 *	52		47:40
4020 		 *	53		39:32
4021 		 *	54		31:24
4022 		 *	55		23:16
4023 		 *	56		15:8
4024 		 *	57		7:0
4025 		 */
4026 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4027 		for (word = 0; word < pc_data_words; word++) {
4028 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4029 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4030 				/*
4031 				 * If this is the first error log detailed
4032 				 * information about it. Otherwise just record
4033 				 * the fact that we found another error.
4034 				 */
4035 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4036 					ch_flt->parity_data.dpe.cpl_way = way;
4037 					ch_flt->parity_data.dpe.cpl_cache =
4038 					    CPU_PC_PARITY;
4039 					ch_flt->parity_data.dpe.cpl_off =
4040 					    word * sizeof (uint64_t);
4041 					bcopy(&tmp_pcp, pcp,
4042 							sizeof (ch_pc_data_t));
4043 				}
4044 				ch_flt->parity_data.dpe.cpl_lcnt++;
4045 			}
4046 		}
4047 	}
4048 }
4049 
4050 
4051 /*
4052  * Add L1 Data cache data to the ereport payload.
4053  */
4054 static void
4055 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4056 {
4057 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4058 	ch_dc_data_t *dcp;
4059 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4060 	uint_t nelem;
4061 	int i, ways_to_check, ways_logged = 0;
4062 
4063 	/*
4064 	 * If this is an D$ fault then there may be multiple
4065 	 * ways captured in the ch_parity_log_t structure.
4066 	 * Otherwise, there will be at most one way captured
4067 	 * in the ch_diag_data_t struct.
4068 	 * Check each way to see if it should be encoded.
4069 	 */
4070 	if (ch_flt->flt_type == CPU_DC_PARITY)
4071 		ways_to_check = CH_DCACHE_NWAY;
4072 	else
4073 		ways_to_check = 1;
4074 	for (i = 0; i < ways_to_check; i++) {
4075 		if (ch_flt->flt_type == CPU_DC_PARITY)
4076 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4077 		else
4078 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4079 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4080 			bcopy(dcp, &dcdata[ways_logged],
4081 				sizeof (ch_dc_data_t));
4082 			ways_logged++;
4083 		}
4084 	}
4085 
4086 	/*
4087 	 * Add the dcache data to the payload.
4088 	 */
4089 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4090 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4091 	if (ways_logged != 0) {
4092 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4093 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4094 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4095 	}
4096 }
4097 
4098 /*
4099  * Add L1 Instruction cache data to the ereport payload.
4100  */
4101 static void
4102 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4103 {
4104 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4105 	ch_ic_data_t *icp;
4106 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4107 	uint_t nelem;
4108 	int i, ways_to_check, ways_logged = 0;
4109 
4110 	/*
4111 	 * If this is an I$ fault then there may be multiple
4112 	 * ways captured in the ch_parity_log_t structure.
4113 	 * Otherwise, there will be at most one way captured
4114 	 * in the ch_diag_data_t struct.
4115 	 * Check each way to see if it should be encoded.
4116 	 */
4117 	if (ch_flt->flt_type == CPU_IC_PARITY)
4118 		ways_to_check = CH_ICACHE_NWAY;
4119 	else
4120 		ways_to_check = 1;
4121 	for (i = 0; i < ways_to_check; i++) {
4122 		if (ch_flt->flt_type == CPU_IC_PARITY)
4123 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4124 		else
4125 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4126 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4127 			bcopy(icp, &icdata[ways_logged],
4128 				sizeof (ch_ic_data_t));
4129 			ways_logged++;
4130 		}
4131 	}
4132 
4133 	/*
4134 	 * Add the icache data to the payload.
4135 	 */
4136 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4137 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4138 	if (ways_logged != 0) {
4139 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4140 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4141 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4142 	}
4143 }
4144 
4145 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4146 
4147 /*
4148  * Add ecache data to payload.
4149  */
4150 static void
4151 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4152 {
4153 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4154 	ch_ec_data_t *ecp;
4155 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4156 	uint_t nelem;
4157 	int i, ways_logged = 0;
4158 
4159 	/*
4160 	 * Check each way to see if it should be encoded
4161 	 * and concatinate it into a temporary buffer.
4162 	 */
4163 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4164 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4165 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4166 			bcopy(ecp, &ecdata[ways_logged],
4167 				sizeof (ch_ec_data_t));
4168 			ways_logged++;
4169 		}
4170 	}
4171 
4172 	/*
4173 	 * Panther CPUs have an additional level of cache and so
4174 	 * what we just collected was the L3 (ecache) and not the
4175 	 * L2 cache.
4176 	 */
4177 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4178 		/*
4179 		 * Add the L3 (ecache) data to the payload.
4180 		 */
4181 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4182 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4183 		if (ways_logged != 0) {
4184 			nelem = sizeof (ch_ec_data_t) /
4185 			    sizeof (uint64_t) * ways_logged;
4186 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4187 			    DATA_TYPE_UINT64_ARRAY, nelem,
4188 			    (uint64_t *)ecdata, NULL);
4189 		}
4190 
4191 		/*
4192 		 * Now collect the L2 cache.
4193 		 */
4194 		ways_logged = 0;
4195 		for (i = 0; i < PN_L2_NWAYS; i++) {
4196 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4197 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4198 				bcopy(ecp, &ecdata[ways_logged],
4199 				    sizeof (ch_ec_data_t));
4200 				ways_logged++;
4201 			}
4202 		}
4203 	}
4204 
4205 	/*
4206 	 * Add the L2 cache data to the payload.
4207 	 */
4208 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4209 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4210 	if (ways_logged != 0) {
4211 		nelem = sizeof (ch_ec_data_t) /
4212 			sizeof (uint64_t) * ways_logged;
4213 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4214 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4215 	}
4216 }
4217 
4218 /*
4219  * Encode the data saved in the ch_async_flt_t struct into
4220  * the FM ereport payload.
4221  */
4222 static void
4223 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4224 	nvlist_t *resource, int *afar_status, int *synd_status)
4225 {
4226 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4227 	*synd_status = AFLT_STAT_INVALID;
4228 	*afar_status = AFLT_STAT_INVALID;
4229 
4230 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4231 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4232 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4233 	}
4234 
4235 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4236 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4237 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4238 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4239 	}
4240 
4241 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4242 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4243 		    ch_flt->flt_bit);
4244 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4245 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4246 	}
4247 
4248 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4249 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4250 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4251 	}
4252 
4253 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4254 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4255 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4256 	}
4257 
4258 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4259 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4260 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4261 	}
4262 
4263 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4264 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4265 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4266 	}
4267 
4268 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4269 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4270 		    DATA_TYPE_BOOLEAN_VALUE,
4271 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4272 	}
4273 
4274 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4275 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4276 		    DATA_TYPE_BOOLEAN_VALUE,
4277 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4278 	}
4279 
4280 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4281 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4282 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4283 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4284 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4285 	}
4286 
4287 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4288 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4289 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4290 	}
4291 
4292 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4293 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4294 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4295 	}
4296 
4297 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4298 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4299 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4300 	}
4301 
4302 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4303 		cpu_payload_add_ecache(aflt, payload);
4304 
4305 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4306 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4307 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4308 	}
4309 
4310 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4311 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4312 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4313 	}
4314 
4315 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4316 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4317 		    DATA_TYPE_UINT32_ARRAY, 16,
4318 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4319 	}
4320 
4321 #if defined(CPU_IMP_L1_CACHE_PARITY)
4322 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4323 		cpu_payload_add_dcache(aflt, payload);
4324 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4325 		cpu_payload_add_icache(aflt, payload);
4326 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4327 
4328 #if defined(CHEETAH_PLUS)
4329 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4330 		cpu_payload_add_pcache(aflt, payload);
4331 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4332 		cpu_payload_add_tlb(aflt, payload);
4333 #endif	/* CHEETAH_PLUS */
4334 	/*
4335 	 * Create the FMRI that goes into the payload
4336 	 * and contains the unum info if necessary.
4337 	 */
4338 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4339 	    (*afar_status == AFLT_STAT_VALID)) {
4340 		char unum[UNUM_NAMLEN];
4341 		int len;
4342 
4343 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4344 		    UNUM_NAMLEN, &len) == 0) {
4345 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4346 			    NULL, unum, NULL);
4347 			fm_payload_set(payload,
4348 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4349 			    DATA_TYPE_NVLIST, resource, NULL);
4350 		}
4351 	}
4352 }
4353 
4354 /*
4355  * Initialize the way info if necessary.
4356  */
4357 void
4358 cpu_ereport_init(struct async_flt *aflt)
4359 {
4360 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4361 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4362 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4363 	int i;
4364 
4365 	/*
4366 	 * Initialize the info in the CPU logout structure.
4367 	 * The I$/D$ way information is not initialized here
4368 	 * since it is captured in the logout assembly code.
4369 	 */
4370 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4371 		(ecp + i)->ec_way = i;
4372 
4373 	for (i = 0; i < PN_L2_NWAYS; i++)
4374 		(l2p + i)->ec_way = i;
4375 }
4376 
4377 /*
4378  * Returns whether fault address is valid for this error bit and
4379  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4380  */
4381 int
4382 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4383 {
4384 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4385 
4386 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4387 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4388 	    AFLT_STAT_VALID &&
4389 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4390 }
4391 
4392 static void
4393 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4394 {
4395 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4396 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4397 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4398 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4399 #if defined(CPU_IMP_ECACHE_ASSOC)
4400 	int i, nway;
4401 #endif /* CPU_IMP_ECACHE_ASSOC */
4402 
4403 	/*
4404 	 * Check if the CPU log out captured was valid.
4405 	 */
4406 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4407 	    ch_flt->flt_data_incomplete)
4408 		return;
4409 
4410 #if defined(CPU_IMP_ECACHE_ASSOC)
4411 	nway = cpu_ecache_nway();
4412 	i =  cpu_ecache_line_valid(ch_flt);
4413 	if (i == 0 || i > nway) {
4414 		for (i = 0; i < nway; i++)
4415 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4416 	} else
4417 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4418 #else /* CPU_IMP_ECACHE_ASSOC */
4419 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4420 #endif /* CPU_IMP_ECACHE_ASSOC */
4421 
4422 #if defined(CHEETAH_PLUS)
4423 	pn_cpu_log_diag_l2_info(ch_flt);
4424 #endif /* CHEETAH_PLUS */
4425 
4426 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4427 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4428 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4429 	}
4430 
4431 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4432 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4433 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4434 		else
4435 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4436 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4437 	}
4438 }
4439 
4440 /*
4441  * Cheetah ECC calculation.
4442  *
4443  * We only need to do the calculation on the data bits and can ignore check
4444  * bit and Mtag bit terms in the calculation.
4445  */
4446 static uint64_t ch_ecc_table[9][2] = {
4447 	/*
4448 	 * low order 64-bits   high-order 64-bits
4449 	 */
4450 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4451 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4452 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4453 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4454 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4455 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4456 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4457 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4458 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4459 };
4460 
4461 /*
4462  * 64-bit population count, use well-known popcnt trick.
4463  * We could use the UltraSPARC V9 POPC instruction, but some
4464  * CPUs including Cheetahplus and Jaguar do not support that
4465  * instruction.
4466  */
4467 int
4468 popc64(uint64_t val)
4469 {
4470 	int cnt;
4471 
4472 	for (cnt = 0; val != 0; val &= val - 1)
4473 		cnt++;
4474 	return (cnt);
4475 }
4476 
4477 /*
4478  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4479  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4480  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4481  * instead of doing all the xor's.
4482  */
4483 uint32_t
4484 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4485 {
4486 	int bitno, s;
4487 	int synd = 0;
4488 
4489 	for (bitno = 0; bitno < 9; bitno++) {
4490 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4491 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4492 		synd |= (s << bitno);
4493 	}
4494 	return (synd);
4495 
4496 }
4497 
4498 /*
4499  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4500  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4501  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4502  */
4503 static void
4504 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4505     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4506 {
4507 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4508 
4509 	if (reason &&
4510 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4511 		(void) strcat(reason, eccp->ec_reason);
4512 	}
4513 
4514 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4515 	ch_flt->flt_type = eccp->ec_flt_type;
4516 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4517 		ch_flt->flt_diag_data = *cdp;
4518 	else
4519 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4520 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4521 
4522 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4523 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4524 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4525 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4526 	else
4527 		aflt->flt_synd = 0;
4528 
4529 	aflt->flt_payload = eccp->ec_err_payload;
4530 
4531 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4532 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4533 		cpu_errorq_dispatch(eccp->ec_err_class,
4534 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4535 		    aflt->flt_panic);
4536 	else
4537 		cpu_errorq_dispatch(eccp->ec_err_class,
4538 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4539 		    aflt->flt_panic);
4540 }
4541 
4542 /*
4543  * Queue events on async event queue one event per error bit.  First we
4544  * queue the events that we "expect" for the given trap, then we queue events
4545  * that we may not expect.  Return number of events queued.
4546  */
4547 int
4548 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4549     ch_cpu_logout_t *clop)
4550 {
4551 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4552 	ecc_type_to_info_t *eccp;
4553 	int nevents = 0;
4554 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4555 #if defined(CHEETAH_PLUS)
4556 	uint64_t orig_t_afsr_errs;
4557 #endif
4558 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4559 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4560 	ch_diag_data_t *cdp = NULL;
4561 
4562 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4563 
4564 #if defined(CHEETAH_PLUS)
4565 	orig_t_afsr_errs = t_afsr_errs;
4566 
4567 	/*
4568 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4569 	 */
4570 	if (clop != NULL) {
4571 		/*
4572 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4573 		 * flt_addr and flt_stat fields will be reset to the primaries
4574 		 * below, but the sdw_addr and sdw_stat will stay as the
4575 		 * secondaries.
4576 		 */
4577 		cdp = &clop->clo_sdw_data;
4578 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4579 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4580 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4581 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4582 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4583 
4584 		/*
4585 		 * If the primary and shadow AFSR differ, tag the shadow as
4586 		 * the first fault.
4587 		 */
4588 		if ((primary_afar != cdp->chd_afar) ||
4589 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4590 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4591 		}
4592 
4593 		/*
4594 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4595 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4596 		 * is expected to be zero for those CPUs which do not have
4597 		 * an AFSR_EXT register.
4598 		 */
4599 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4600 			if ((eccp->ec_afsr_bit &
4601 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4602 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4603 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4604 				cdp = NULL;
4605 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4606 				nevents++;
4607 			}
4608 		}
4609 
4610 		/*
4611 		 * If the ME bit is on in the primary AFSR turn all the
4612 		 * error bits on again that may set the ME bit to make
4613 		 * sure we see the ME AFSR error logs.
4614 		 */
4615 		if ((primary_afsr & C_AFSR_ME) != 0)
4616 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4617 	}
4618 #endif	/* CHEETAH_PLUS */
4619 
4620 	if (clop != NULL)
4621 		cdp = &clop->clo_data;
4622 
4623 	/*
4624 	 * Queue expected errors, error bit and fault type must match
4625 	 * in the ecc_type_to_info table.
4626 	 */
4627 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4628 	    eccp++) {
4629 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4630 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4631 #if defined(SERRANO)
4632 			/*
4633 			 * For FRC/FRU errors on Serrano the afar2 captures
4634 			 * the address and the associated data is
4635 			 * in the shadow logout area.
4636 			 */
4637 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4638 				if (clop != NULL)
4639 					cdp = &clop->clo_sdw_data;
4640 				aflt->flt_addr = ch_flt->afar2;
4641 			} else {
4642 				if (clop != NULL)
4643 					cdp = &clop->clo_data;
4644 				aflt->flt_addr = primary_afar;
4645 			}
4646 #else	/* SERRANO */
4647 			aflt->flt_addr = primary_afar;
4648 #endif	/* SERRANO */
4649 			aflt->flt_stat = primary_afsr;
4650 			ch_flt->afsr_ext = primary_afsr_ext;
4651 			ch_flt->afsr_errs = primary_afsr_errs;
4652 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4653 			cdp = NULL;
4654 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4655 			nevents++;
4656 		}
4657 	}
4658 
4659 	/*
4660 	 * Queue unexpected errors, error bit only match.
4661 	 */
4662 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4663 	    eccp++) {
4664 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4665 #if defined(SERRANO)
4666 			/*
4667 			 * For FRC/FRU errors on Serrano the afar2 captures
4668 			 * the address and the associated data is
4669 			 * in the shadow logout area.
4670 			 */
4671 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4672 				if (clop != NULL)
4673 					cdp = &clop->clo_sdw_data;
4674 				aflt->flt_addr = ch_flt->afar2;
4675 			} else {
4676 				if (clop != NULL)
4677 					cdp = &clop->clo_data;
4678 				aflt->flt_addr = primary_afar;
4679 			}
4680 #else	/* SERRANO */
4681 			aflt->flt_addr = primary_afar;
4682 #endif	/* SERRANO */
4683 			aflt->flt_stat = primary_afsr;
4684 			ch_flt->afsr_ext = primary_afsr_ext;
4685 			ch_flt->afsr_errs = primary_afsr_errs;
4686 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4687 			cdp = NULL;
4688 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4689 			nevents++;
4690 		}
4691 	}
4692 	return (nevents);
4693 }
4694 
4695 /*
4696  * Return trap type number.
4697  */
4698 uint8_t
4699 flt_to_trap_type(struct async_flt *aflt)
4700 {
4701 	if (aflt->flt_status & ECC_I_TRAP)
4702 		return (TRAP_TYPE_ECC_I);
4703 	if (aflt->flt_status & ECC_D_TRAP)
4704 		return (TRAP_TYPE_ECC_D);
4705 	if (aflt->flt_status & ECC_F_TRAP)
4706 		return (TRAP_TYPE_ECC_F);
4707 	if (aflt->flt_status & ECC_C_TRAP)
4708 		return (TRAP_TYPE_ECC_C);
4709 	if (aflt->flt_status & ECC_DP_TRAP)
4710 		return (TRAP_TYPE_ECC_DP);
4711 	if (aflt->flt_status & ECC_IP_TRAP)
4712 		return (TRAP_TYPE_ECC_IP);
4713 	if (aflt->flt_status & ECC_ITLB_TRAP)
4714 		return (TRAP_TYPE_ECC_ITLB);
4715 	if (aflt->flt_status & ECC_DTLB_TRAP)
4716 		return (TRAP_TYPE_ECC_DTLB);
4717 	return (TRAP_TYPE_UNKNOWN);
4718 }
4719 
4720 /*
4721  * Decide an error type based on detector and leaky/partner tests.
4722  * The following array is used for quick translation - it must
4723  * stay in sync with ce_dispact_t.
4724  */
4725 
4726 static char *cetypes[] = {
4727 	CE_DISP_DESC_U,
4728 	CE_DISP_DESC_I,
4729 	CE_DISP_DESC_PP,
4730 	CE_DISP_DESC_P,
4731 	CE_DISP_DESC_L,
4732 	CE_DISP_DESC_PS,
4733 	CE_DISP_DESC_S
4734 };
4735 
4736 char *
4737 flt_to_error_type(struct async_flt *aflt)
4738 {
4739 	ce_dispact_t dispact, disp;
4740 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4741 
4742 	/*
4743 	 * The memory payload bundle is shared by some events that do
4744 	 * not perform any classification.  For those flt_disp will be
4745 	 * 0 and we will return "unknown".
4746 	 */
4747 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4748 		return (cetypes[CE_DISP_UNKNOWN]);
4749 
4750 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4751 
4752 	/*
4753 	 * It is also possible that no scrub/classification was performed
4754 	 * by the detector, for instance where a disrupting error logged
4755 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4756 	 */
4757 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4758 		return (cetypes[CE_DISP_UNKNOWN]);
4759 
4760 	/*
4761 	 * Lookup type in initial classification/action table
4762 	 */
4763 	dispact = CE_DISPACT(ce_disp_table,
4764 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4765 	    CE_XDIAG_STATE(dtcrinfo),
4766 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4767 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4768 
4769 	/*
4770 	 * A bad lookup is not something to panic production systems for.
4771 	 */
4772 	ASSERT(dispact != CE_DISP_BAD);
4773 	if (dispact == CE_DISP_BAD)
4774 		return (cetypes[CE_DISP_UNKNOWN]);
4775 
4776 	disp = CE_DISP(dispact);
4777 
4778 	switch (disp) {
4779 	case CE_DISP_UNKNOWN:
4780 	case CE_DISP_INTERMITTENT:
4781 		break;
4782 
4783 	case CE_DISP_POSS_PERS:
4784 		/*
4785 		 * "Possible persistent" errors to which we have applied a valid
4786 		 * leaky test can be separated into "persistent" or "leaky".
4787 		 */
4788 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4789 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4790 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4791 			    CE_XDIAG_CE2SEEN(lkyinfo))
4792 				disp = CE_DISP_LEAKY;
4793 			else
4794 				disp = CE_DISP_PERS;
4795 		}
4796 		break;
4797 
4798 	case CE_DISP_POSS_STICKY:
4799 		/*
4800 		 * Promote "possible sticky" results that have been
4801 		 * confirmed by a partner test to "sticky".  Unconfirmed
4802 		 * "possible sticky" events are left at that status - we do not
4803 		 * guess at any bad reader/writer etc status here.
4804 		 */
4805 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4806 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4807 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4808 			disp = CE_DISP_STICKY;
4809 
4810 		/*
4811 		 * Promote "possible sticky" results on a uniprocessor
4812 		 * to "sticky"
4813 		 */
4814 		if (disp == CE_DISP_POSS_STICKY &&
4815 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4816 			disp = CE_DISP_STICKY;
4817 		break;
4818 
4819 	default:
4820 		disp = CE_DISP_UNKNOWN;
4821 		break;
4822 	}
4823 
4824 	return (cetypes[disp]);
4825 }
4826 
4827 /*
4828  * Given the entire afsr, the specific bit to check and a prioritized list of
4829  * error bits, determine the validity of the various overwrite priority
4830  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4831  * different overwrite priorities.
4832  *
4833  * Given a specific afsr error bit and the entire afsr, there are three cases:
4834  *   INVALID:	The specified bit is lower overwrite priority than some other
4835  *		error bit which is on in the afsr (or IVU/IVC).
4836  *   VALID:	The specified bit is higher priority than all other error bits
4837  *		which are on in the afsr.
4838  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
4839  *		bit is on in the afsr.
4840  */
4841 int
4842 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
4843 {
4844 	uint64_t afsr_ow;
4845 
4846 	while ((afsr_ow = *ow_bits++) != 0) {
4847 		/*
4848 		 * If bit is in the priority class, check to see if another
4849 		 * bit in the same class is on => ambiguous.  Otherwise,
4850 		 * the value is valid.  If the bit is not on at this priority
4851 		 * class, but a higher priority bit is on, then the value is
4852 		 * invalid.
4853 		 */
4854 		if (afsr_ow & afsr_bit) {
4855 			/*
4856 			 * If equal pri bit is on, ambiguous.
4857 			 */
4858 			if (afsr & (afsr_ow & ~afsr_bit))
4859 				return (AFLT_STAT_AMBIGUOUS);
4860 			return (AFLT_STAT_VALID);
4861 		} else if (afsr & afsr_ow)
4862 			break;
4863 	}
4864 
4865 	/*
4866 	 * We didn't find a match or a higher priority bit was on.  Not
4867 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
4868 	 */
4869 	return (AFLT_STAT_INVALID);
4870 }
4871 
4872 static int
4873 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
4874 {
4875 #if defined(SERRANO)
4876 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
4877 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
4878 	else
4879 #endif	/* SERRANO */
4880 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
4881 }
4882 
4883 static int
4884 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
4885 {
4886 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
4887 }
4888 
4889 static int
4890 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
4891 {
4892 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
4893 }
4894 
4895 static int
4896 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
4897 {
4898 #ifdef lint
4899 	cpuid = cpuid;
4900 #endif
4901 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
4902 		return (afsr_to_msynd_status(afsr, afsr_bit));
4903 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
4904 #if defined(CHEETAH_PLUS)
4905 		/*
4906 		 * The E_SYND overwrite policy is slightly different
4907 		 * for Panther CPUs.
4908 		 */
4909 		if (IS_PANTHER(cpunodes[cpuid].implementation))
4910 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
4911 		else
4912 			return (afsr_to_esynd_status(afsr, afsr_bit));
4913 #else /* CHEETAH_PLUS */
4914 		return (afsr_to_esynd_status(afsr, afsr_bit));
4915 #endif /* CHEETAH_PLUS */
4916 	} else {
4917 		return (AFLT_STAT_INVALID);
4918 	}
4919 }
4920 
4921 /*
4922  * Slave CPU stick synchronization.
4923  */
4924 void
4925 sticksync_slave(void)
4926 {
4927 	int 		i;
4928 	int		tries = 0;
4929 	int64_t		tskew;
4930 	int64_t		av_tskew;
4931 
4932 	kpreempt_disable();
4933 	/* wait for the master side */
4934 	while (stick_sync_cmd != SLAVE_START)
4935 		;
4936 	/*
4937 	 * Synchronization should only take a few tries at most. But in the
4938 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
4939 	 * without it's stick synchronized wouldn't be a good citizen.
4940 	 */
4941 	while (slave_done == 0) {
4942 		/*
4943 		 * Time skew calculation.
4944 		 */
4945 		av_tskew = tskew = 0;
4946 
4947 		for (i = 0; i < stick_iter; i++) {
4948 			/* make location hot */
4949 			timestamp[EV_A_START] = 0;
4950 			stick_timestamp(&timestamp[EV_A_START]);
4951 
4952 			/* tell the master we're ready */
4953 			stick_sync_cmd = MASTER_START;
4954 
4955 			/* and wait */
4956 			while (stick_sync_cmd != SLAVE_CONT)
4957 				;
4958 			/* Event B end */
4959 			stick_timestamp(&timestamp[EV_B_END]);
4960 
4961 			/* calculate time skew */
4962 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
4963 				- (timestamp[EV_A_END] -
4964 				timestamp[EV_A_START])) / 2;
4965 
4966 			/* keep running count */
4967 			av_tskew += tskew;
4968 		} /* for */
4969 
4970 		/*
4971 		 * Adjust stick for time skew if not within the max allowed;
4972 		 * otherwise we're all done.
4973 		 */
4974 		if (stick_iter != 0)
4975 			av_tskew = av_tskew/stick_iter;
4976 		if (ABS(av_tskew) > stick_tsk) {
4977 			/*
4978 			 * If the skew is 1 (the slave's STICK register
4979 			 * is 1 STICK ahead of the master's), stick_adj
4980 			 * could fail to adjust the slave's STICK register
4981 			 * if the STICK read on the slave happens to
4982 			 * align with the increment of the STICK.
4983 			 * Therefore, we increment the skew to 2.
4984 			 */
4985 			if (av_tskew == 1)
4986 				av_tskew++;
4987 			stick_adj(-av_tskew);
4988 		} else
4989 			slave_done = 1;
4990 #ifdef DEBUG
4991 		if (tries < DSYNC_ATTEMPTS)
4992 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
4993 				av_tskew;
4994 		++tries;
4995 #endif /* DEBUG */
4996 #ifdef lint
4997 		tries = tries;
4998 #endif
4999 
5000 	} /* while */
5001 
5002 	/* allow the master to finish */
5003 	stick_sync_cmd = EVENT_NULL;
5004 	kpreempt_enable();
5005 }
5006 
5007 /*
5008  * Master CPU side of stick synchronization.
5009  *  - timestamp end of Event A
5010  *  - timestamp beginning of Event B
5011  */
5012 void
5013 sticksync_master(void)
5014 {
5015 	int		i;
5016 
5017 	kpreempt_disable();
5018 	/* tell the slave we've started */
5019 	slave_done = 0;
5020 	stick_sync_cmd = SLAVE_START;
5021 
5022 	while (slave_done == 0) {
5023 		for (i = 0; i < stick_iter; i++) {
5024 			/* wait for the slave */
5025 			while (stick_sync_cmd != MASTER_START)
5026 				;
5027 			/* Event A end */
5028 			stick_timestamp(&timestamp[EV_A_END]);
5029 
5030 			/* make location hot */
5031 			timestamp[EV_B_START] = 0;
5032 			stick_timestamp(&timestamp[EV_B_START]);
5033 
5034 			/* tell the slave to continue */
5035 			stick_sync_cmd = SLAVE_CONT;
5036 		} /* for */
5037 
5038 		/* wait while slave calculates time skew */
5039 		while (stick_sync_cmd == SLAVE_CONT)
5040 			;
5041 	} /* while */
5042 	kpreempt_enable();
5043 }
5044 
5045 /*
5046  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5047  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5048  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5049  * panic idle.
5050  */
5051 /*ARGSUSED*/
5052 void
5053 cpu_check_allcpus(struct async_flt *aflt)
5054 {}
5055 
5056 struct kmem_cache *ch_private_cache;
5057 
5058 /*
5059  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5060  * deallocate the scrubber data structures and cpu_private data structure.
5061  */
5062 void
5063 cpu_uninit_private(struct cpu *cp)
5064 {
5065 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5066 
5067 	ASSERT(chprp);
5068 	cpu_uninit_ecache_scrub_dr(cp);
5069 	CPU_PRIVATE(cp) = NULL;
5070 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5071 	kmem_cache_free(ch_private_cache, chprp);
5072 	cmp_delete_cpu(cp->cpu_id);
5073 
5074 }
5075 
5076 /*
5077  * Cheetah Cache Scrubbing
5078  *
5079  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5080  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5081  * protected by either parity or ECC.
5082  *
5083  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5084  * cache per second). Due to the the specifics of how the I$ control
5085  * logic works with respect to the ASI used to scrub I$ lines, the entire
5086  * I$ is scanned at once.
5087  */
5088 
5089 /*
5090  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5091  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5092  * on a running system.
5093  */
5094 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5095 
5096 /*
5097  * The following are the PIL levels that the softints/cross traps will fire at.
5098  */
5099 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5100 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5101 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5102 
5103 #if defined(JALAPENO)
5104 
5105 /*
5106  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5107  * on Jalapeno.
5108  */
5109 int ecache_scrub_enable = 0;
5110 
5111 #else	/* JALAPENO */
5112 
5113 /*
5114  * With all other cpu types, E$ scrubbing is on by default
5115  */
5116 int ecache_scrub_enable = 1;
5117 
5118 #endif	/* JALAPENO */
5119 
5120 
5121 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5122 
5123 /*
5124  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5125  * is disabled by default on non-Cheetah systems
5126  */
5127 int icache_scrub_enable = 0;
5128 
5129 /*
5130  * Tuneables specifying the scrub calls per second and the scan rate
5131  * for each cache
5132  *
5133  * The cyclic times are set during boot based on the following values.
5134  * Changing these values in mdb after this time will have no effect.  If
5135  * a different value is desired, it must be set in /etc/system before a
5136  * reboot.
5137  */
5138 int ecache_calls_a_sec = 1;
5139 int dcache_calls_a_sec = 2;
5140 int icache_calls_a_sec = 2;
5141 
5142 int ecache_scan_rate_idle = 1;
5143 int ecache_scan_rate_busy = 1;
5144 int dcache_scan_rate_idle = 1;
5145 int dcache_scan_rate_busy = 1;
5146 int icache_scan_rate_idle = 1;
5147 int icache_scan_rate_busy = 1;
5148 
5149 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5150 
5151 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5152 
5153 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5154 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5155 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5156 
5157 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5158 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5159 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5160 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5161 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5162 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5163 
5164 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5165 
5166 /*
5167  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5168  * increment the outstanding request counter and schedule a softint to run
5169  * the scrubber.
5170  */
5171 extern xcfunc_t cache_scrubreq_tl1;
5172 
5173 /*
5174  * These are the softint functions for each cache scrubber
5175  */
5176 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5177 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5178 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5179 
5180 /*
5181  * The cache scrub info table contains cache specific information
5182  * and allows for some of the scrub code to be table driven, reducing
5183  * duplication of cache similar code.
5184  *
5185  * This table keeps a copy of the value in the calls per second variable
5186  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5187  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5188  * mdb in a misguided attempt to disable the scrubber).
5189  */
5190 struct scrub_info {
5191 	int		*csi_enable;	/* scrubber enable flag */
5192 	int		csi_freq;	/* scrubber calls per second */
5193 	int		csi_index;	/* index to chsm_outstanding[] */
5194 	uint_t		csi_inum;	/* scrubber interrupt number */
5195 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5196 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5197 	char		csi_name[3];	/* cache name for this scrub entry */
5198 } cache_scrub_info[] = {
5199 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5200 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5201 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5202 };
5203 
5204 /*
5205  * If scrubbing is enabled, increment the outstanding request counter.  If it
5206  * is 1 (meaning there were no previous requests outstanding), call
5207  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5208  * a self trap.
5209  */
5210 static void
5211 do_scrub(struct scrub_info *csi)
5212 {
5213 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5214 	int index = csi->csi_index;
5215 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5216 
5217 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5218 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5219 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5220 			    csi->csi_inum, 0);
5221 		}
5222 	}
5223 }
5224 
5225 /*
5226  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5227  * cross-trap the offline cpus.
5228  */
5229 static void
5230 do_scrub_offline(struct scrub_info *csi)
5231 {
5232 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5233 
5234 	if (CPUSET_ISNULL(cpu_offline_set)) {
5235 		/*
5236 		 * No offline cpus - nothing to do
5237 		 */
5238 		return;
5239 	}
5240 
5241 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5242 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5243 		    csi->csi_index);
5244 	}
5245 }
5246 
5247 /*
5248  * This is the initial setup for the scrubber cyclics - it sets the
5249  * interrupt level, frequency, and function to call.
5250  */
5251 /*ARGSUSED*/
5252 static void
5253 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5254     cyc_time_t *when)
5255 {
5256 	struct scrub_info *csi = (struct scrub_info *)arg;
5257 
5258 	ASSERT(csi != NULL);
5259 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5260 	hdlr->cyh_level = CY_LOW_LEVEL;
5261 	hdlr->cyh_arg = arg;
5262 
5263 	when->cyt_when = 0;	/* Start immediately */
5264 	when->cyt_interval = NANOSEC / csi->csi_freq;
5265 }
5266 
5267 /*
5268  * Initialization for cache scrubbing.
5269  * This routine is called AFTER all cpus have had cpu_init_private called
5270  * to initialize their private data areas.
5271  */
5272 void
5273 cpu_init_cache_scrub(void)
5274 {
5275 	int i;
5276 	struct scrub_info *csi;
5277 	cyc_omni_handler_t omni_hdlr;
5278 	cyc_handler_t offline_hdlr;
5279 	cyc_time_t when;
5280 
5281 	/*
5282 	 * save away the maximum number of lines for the D$
5283 	 */
5284 	dcache_nlines = dcache_size / dcache_linesize;
5285 
5286 	/*
5287 	 * register the softints for the cache scrubbing
5288 	 */
5289 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5290 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5291 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5292 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5293 
5294 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5295 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5296 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5297 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5298 
5299 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5300 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5301 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5302 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5303 
5304 	/*
5305 	 * start the scrubbing for all the caches
5306 	 */
5307 	mutex_enter(&cpu_lock);
5308 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5309 
5310 		csi = &cache_scrub_info[i];
5311 
5312 		if (!(*csi->csi_enable))
5313 			continue;
5314 
5315 		/*
5316 		 * force the following to be true:
5317 		 *	1 <= calls_a_sec <= hz
5318 		 */
5319 		if (csi->csi_freq > hz) {
5320 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5321 				"(%d); resetting to hz (%d)", csi->csi_name,
5322 				csi->csi_freq, hz);
5323 			csi->csi_freq = hz;
5324 		} else if (csi->csi_freq < 1) {
5325 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5326 				"(%d); resetting to 1", csi->csi_name,
5327 				csi->csi_freq);
5328 			csi->csi_freq = 1;
5329 		}
5330 
5331 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5332 		omni_hdlr.cyo_offline = NULL;
5333 		omni_hdlr.cyo_arg = (void *)csi;
5334 
5335 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5336 		offline_hdlr.cyh_arg = (void *)csi;
5337 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5338 
5339 		when.cyt_when = 0;	/* Start immediately */
5340 		when.cyt_interval = NANOSEC / csi->csi_freq;
5341 
5342 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5343 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5344 	}
5345 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5346 	mutex_exit(&cpu_lock);
5347 }
5348 
5349 /*
5350  * Indicate that the specified cpu is idle.
5351  */
5352 void
5353 cpu_idle_ecache_scrub(struct cpu *cp)
5354 {
5355 	if (CPU_PRIVATE(cp) != NULL) {
5356 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5357 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5358 	}
5359 }
5360 
5361 /*
5362  * Indicate that the specified cpu is busy.
5363  */
5364 void
5365 cpu_busy_ecache_scrub(struct cpu *cp)
5366 {
5367 	if (CPU_PRIVATE(cp) != NULL) {
5368 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5369 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5370 	}
5371 }
5372 
5373 /*
5374  * Initialization for cache scrubbing for the specified cpu.
5375  */
5376 void
5377 cpu_init_ecache_scrub_dr(struct cpu *cp)
5378 {
5379 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5380 	int cpuid = cp->cpu_id;
5381 
5382 	/* initialize the number of lines in the caches */
5383 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5384 	    cpunodes[cpuid].ecache_linesize;
5385 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5386 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5387 
5388 	/*
5389 	 * do_scrub() and do_scrub_offline() check both the global
5390 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5391 	 * check this value before scrubbing.  Currently, we use it to
5392 	 * disable the E$ scrubber on multi-core cpus or while running at
5393 	 * slowed speed.  For now, just turn everything on and allow
5394 	 * cpu_init_private() to change it if necessary.
5395 	 */
5396 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5397 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5398 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5399 
5400 	cpu_busy_ecache_scrub(cp);
5401 }
5402 
5403 /*
5404  * Un-initialization for cache scrubbing for the specified cpu.
5405  */
5406 static void
5407 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5408 {
5409 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5410 
5411 	/*
5412 	 * un-initialize bookkeeping for cache scrubbing
5413 	 */
5414 	bzero(csmp, sizeof (ch_scrub_misc_t));
5415 
5416 	cpu_idle_ecache_scrub(cp);
5417 }
5418 
5419 /*
5420  * Called periodically on each CPU to scrub the D$.
5421  */
5422 static void
5423 scrub_dcache(int how_many)
5424 {
5425 	int i;
5426 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5427 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5428 
5429 	/*
5430 	 * scrub the desired number of lines
5431 	 */
5432 	for (i = 0; i < how_many; i++) {
5433 		/*
5434 		 * scrub a D$ line
5435 		 */
5436 		dcache_inval_line(index);
5437 
5438 		/*
5439 		 * calculate the next D$ line to scrub, assumes
5440 		 * that dcache_nlines is a power of 2
5441 		 */
5442 		index = (index + 1) & (dcache_nlines - 1);
5443 	}
5444 
5445 	/*
5446 	 * set the scrub index for the next visit
5447 	 */
5448 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5449 }
5450 
5451 /*
5452  * Handler for D$ scrub inum softint. Call scrub_dcache until
5453  * we decrement the outstanding request count to zero.
5454  */
5455 /*ARGSUSED*/
5456 static uint_t
5457 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5458 {
5459 	int i;
5460 	int how_many;
5461 	int outstanding;
5462 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5463 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5464 	struct scrub_info *csi = (struct scrub_info *)arg1;
5465 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5466 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5467 
5468 	/*
5469 	 * The scan rates are expressed in units of tenths of a
5470 	 * percent.  A scan rate of 1000 (100%) means the whole
5471 	 * cache is scanned every second.
5472 	 */
5473 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5474 
5475 	do {
5476 		outstanding = *countp;
5477 		for (i = 0; i < outstanding; i++) {
5478 			scrub_dcache(how_many);
5479 		}
5480 	} while (atomic_add_32_nv(countp, -outstanding));
5481 
5482 	return (DDI_INTR_CLAIMED);
5483 }
5484 
5485 /*
5486  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5487  * by invalidating lines. Due to the characteristics of the ASI which
5488  * is used to invalidate an I$ line, the entire I$ must be invalidated
5489  * vs. an individual I$ line.
5490  */
5491 static void
5492 scrub_icache(int how_many)
5493 {
5494 	int i;
5495 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5496 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5497 	int icache_nlines = csmp->chsm_icache_nlines;
5498 
5499 	/*
5500 	 * scrub the desired number of lines
5501 	 */
5502 	for (i = 0; i < how_many; i++) {
5503 		/*
5504 		 * since the entire I$ must be scrubbed at once,
5505 		 * wait until the index wraps to zero to invalidate
5506 		 * the entire I$
5507 		 */
5508 		if (index == 0) {
5509 			icache_inval_all();
5510 		}
5511 
5512 		/*
5513 		 * calculate the next I$ line to scrub, assumes
5514 		 * that chsm_icache_nlines is a power of 2
5515 		 */
5516 		index = (index + 1) & (icache_nlines - 1);
5517 	}
5518 
5519 	/*
5520 	 * set the scrub index for the next visit
5521 	 */
5522 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5523 }
5524 
5525 /*
5526  * Handler for I$ scrub inum softint. Call scrub_icache until
5527  * we decrement the outstanding request count to zero.
5528  */
5529 /*ARGSUSED*/
5530 static uint_t
5531 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5532 {
5533 	int i;
5534 	int how_many;
5535 	int outstanding;
5536 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5537 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5538 	struct scrub_info *csi = (struct scrub_info *)arg1;
5539 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5540 	    icache_scan_rate_idle : icache_scan_rate_busy;
5541 	int icache_nlines = csmp->chsm_icache_nlines;
5542 
5543 	/*
5544 	 * The scan rates are expressed in units of tenths of a
5545 	 * percent.  A scan rate of 1000 (100%) means the whole
5546 	 * cache is scanned every second.
5547 	 */
5548 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5549 
5550 	do {
5551 		outstanding = *countp;
5552 		for (i = 0; i < outstanding; i++) {
5553 			scrub_icache(how_many);
5554 		}
5555 	} while (atomic_add_32_nv(countp, -outstanding));
5556 
5557 	return (DDI_INTR_CLAIMED);
5558 }
5559 
5560 /*
5561  * Called periodically on each CPU to scrub the E$.
5562  */
5563 static void
5564 scrub_ecache(int how_many)
5565 {
5566 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5567 	int i;
5568 	int cpuid = CPU->cpu_id;
5569 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5570 	int nlines = csmp->chsm_ecache_nlines;
5571 	int linesize = cpunodes[cpuid].ecache_linesize;
5572 	int ec_set_size = cpu_ecache_set_size(CPU);
5573 
5574 	/*
5575 	 * scrub the desired number of lines
5576 	 */
5577 	for (i = 0; i < how_many; i++) {
5578 		/*
5579 		 * scrub the E$ line
5580 		 */
5581 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5582 		    ec_set_size);
5583 
5584 		/*
5585 		 * calculate the next E$ line to scrub based on twice
5586 		 * the number of E$ lines (to displace lines containing
5587 		 * flush area data), assumes that the number of lines
5588 		 * is a power of 2
5589 		 */
5590 		index = (index + 1) & ((nlines << 1) - 1);
5591 	}
5592 
5593 	/*
5594 	 * set the ecache scrub index for the next visit
5595 	 */
5596 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5597 }
5598 
5599 /*
5600  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5601  * we decrement the outstanding request count to zero.
5602  *
5603  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5604  * become negative after the atomic_add_32_nv().  This is not a problem, as
5605  * the next trip around the loop won't scrub anything, and the next add will
5606  * reset the count back to zero.
5607  */
5608 /*ARGSUSED*/
5609 static uint_t
5610 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5611 {
5612 	int i;
5613 	int how_many;
5614 	int outstanding;
5615 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5616 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5617 	struct scrub_info *csi = (struct scrub_info *)arg1;
5618 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5619 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5620 	int ecache_nlines = csmp->chsm_ecache_nlines;
5621 
5622 	/*
5623 	 * The scan rates are expressed in units of tenths of a
5624 	 * percent.  A scan rate of 1000 (100%) means the whole
5625 	 * cache is scanned every second.
5626 	 */
5627 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5628 
5629 	do {
5630 		outstanding = *countp;
5631 		for (i = 0; i < outstanding; i++) {
5632 			scrub_ecache(how_many);
5633 		}
5634 	} while (atomic_add_32_nv(countp, -outstanding));
5635 
5636 	return (DDI_INTR_CLAIMED);
5637 }
5638 
5639 /*
5640  * Timeout function to reenable CE
5641  */
5642 static void
5643 cpu_delayed_check_ce_errors(void *arg)
5644 {
5645 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5646 	    TQ_NOSLEEP)) {
5647 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5648 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5649 	}
5650 }
5651 
5652 /*
5653  * CE Deferred Re-enable after trap.
5654  *
5655  * When the CPU gets a disrupting trap for any of the errors
5656  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5657  * immediately. To eliminate the possibility of multiple CEs causing
5658  * recursive stack overflow in the trap handler, we cannot
5659  * reenable CEEN while still running in the trap handler. Instead,
5660  * after a CE is logged on a CPU, we schedule a timeout function,
5661  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5662  * seconds. This function will check whether any further CEs
5663  * have occurred on that CPU, and if none have, will reenable CEEN.
5664  *
5665  * If further CEs have occurred while CEEN is disabled, another
5666  * timeout will be scheduled. This is to ensure that the CPU can
5667  * make progress in the face of CE 'storms', and that it does not
5668  * spend all its time logging CE errors.
5669  */
5670 static void
5671 cpu_check_ce_errors(void *arg)
5672 {
5673 	int	cpuid = (int)arg;
5674 	cpu_t	*cp;
5675 
5676 	/*
5677 	 * We acquire cpu_lock.
5678 	 */
5679 	ASSERT(curthread->t_pil == 0);
5680 
5681 	/*
5682 	 * verify that the cpu is still around, DR
5683 	 * could have got there first ...
5684 	 */
5685 	mutex_enter(&cpu_lock);
5686 	cp = cpu_get(cpuid);
5687 	if (cp == NULL) {
5688 		mutex_exit(&cpu_lock);
5689 		return;
5690 	}
5691 	/*
5692 	 * make sure we don't migrate across CPUs
5693 	 * while checking our CE status.
5694 	 */
5695 	kpreempt_disable();
5696 
5697 	/*
5698 	 * If we are running on the CPU that got the
5699 	 * CE, we can do the checks directly.
5700 	 */
5701 	if (cp->cpu_id == CPU->cpu_id) {
5702 		mutex_exit(&cpu_lock);
5703 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5704 		kpreempt_enable();
5705 		return;
5706 	}
5707 	kpreempt_enable();
5708 
5709 	/*
5710 	 * send an x-call to get the CPU that originally
5711 	 * got the CE to do the necessary checks. If we can't
5712 	 * send the x-call, reschedule the timeout, otherwise we
5713 	 * lose CEEN forever on that CPU.
5714 	 */
5715 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5716 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5717 		    TIMEOUT_CEEN_CHECK, 0);
5718 		mutex_exit(&cpu_lock);
5719 	} else {
5720 		/*
5721 		 * When the CPU is not accepting xcalls, or
5722 		 * the processor is offlined, we don't want to
5723 		 * incur the extra overhead of trying to schedule the
5724 		 * CE timeout indefinitely. However, we don't want to lose
5725 		 * CE checking forever.
5726 		 *
5727 		 * Keep rescheduling the timeout, accepting the additional
5728 		 * overhead as the cost of correctness in the case where we get
5729 		 * a CE, disable CEEN, offline the CPU during the
5730 		 * the timeout interval, and then online it at some
5731 		 * point in the future. This is unlikely given the short
5732 		 * cpu_ceen_delay_secs.
5733 		 */
5734 		mutex_exit(&cpu_lock);
5735 		(void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id,
5736 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5737 	}
5738 }
5739 
5740 /*
5741  * This routine will check whether CEs have occurred while
5742  * CEEN is disabled. Any CEs detected will be logged and, if
5743  * possible, scrubbed.
5744  *
5745  * The memscrubber will also use this routine to clear any errors
5746  * caused by its scrubbing with CEEN disabled.
5747  *
5748  * flag == SCRUBBER_CEEN_CHECK
5749  *		called from memscrubber, just check/scrub, no reset
5750  *		paddr 	physical addr. for start of scrub pages
5751  *		vaddr 	virtual addr. for scrub area
5752  *		psz	page size of area to be scrubbed
5753  *
5754  * flag == TIMEOUT_CEEN_CHECK
5755  *		timeout function has triggered, reset timeout or CEEN
5756  *
5757  * Note: We must not migrate cpus during this function.  This can be
5758  * achieved by one of:
5759  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5760  *	The flag value must be first xcall argument.
5761  *    - disabling kernel preemption.  This should be done for very short
5762  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5763  *	scrub an extended area with cpu_check_block.  The call for
5764  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5765  *	brief for this case.
5766  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5767  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5768  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5769  */
5770 void
5771 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5772 {
5773 	ch_cpu_errors_t	cpu_error_regs;
5774 	uint64_t	ec_err_enable;
5775 	uint64_t	page_offset;
5776 
5777 	/* Read AFSR */
5778 	get_cpu_error_state(&cpu_error_regs);
5779 
5780 	/*
5781 	 * If no CEEN errors have occurred during the timeout
5782 	 * interval, it is safe to re-enable CEEN and exit.
5783 	 */
5784 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5785 		if (flag == TIMEOUT_CEEN_CHECK &&
5786 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5787 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5788 		return;
5789 	}
5790 
5791 	/*
5792 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5793 	 * we log/clear the error.
5794 	 */
5795 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5796 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5797 
5798 	/*
5799 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5800 	 * timeout will be rescheduled when the error is logged.
5801 	 */
5802 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5803 	    cpu_ce_detected(&cpu_error_regs,
5804 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5805 	else
5806 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5807 
5808 	/*
5809 	 * If the memory scrubber runs while CEEN is
5810 	 * disabled, (or if CEEN is disabled during the
5811 	 * scrub as a result of a CE being triggered by
5812 	 * it), the range being scrubbed will not be
5813 	 * completely cleaned. If there are multiple CEs
5814 	 * in the range at most two of these will be dealt
5815 	 * with, (one by the trap handler and one by the
5816 	 * timeout). It is also possible that none are dealt
5817 	 * with, (CEEN disabled and another CE occurs before
5818 	 * the timeout triggers). So to ensure that the
5819 	 * memory is actually scrubbed, we have to access each
5820 	 * memory location in the range and then check whether
5821 	 * that access causes a CE.
5822 	 */
5823 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5824 		if ((cpu_error_regs.afar >= pa) &&
5825 		    (cpu_error_regs.afar < (pa + psz))) {
5826 			/*
5827 			 * Force a load from physical memory for each
5828 			 * 64-byte block, then check AFSR to determine
5829 			 * whether this access caused an error.
5830 			 *
5831 			 * This is a slow way to do a scrub, but as it will
5832 			 * only be invoked when the memory scrubber actually
5833 			 * triggered a CE, it should not happen too
5834 			 * frequently.
5835 			 *
5836 			 * cut down what we need to check as the scrubber
5837 			 * has verified up to AFAR, so get it's offset
5838 			 * into the page and start there.
5839 			 */
5840 			page_offset = (uint64_t)(cpu_error_regs.afar &
5841 			    (psz - 1));
5842 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
5843 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
5844 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
5845 			    psz);
5846 		}
5847 	}
5848 
5849 	/*
5850 	 * Reset error enable if this CE is not masked.
5851 	 */
5852 	if ((flag == TIMEOUT_CEEN_CHECK) &&
5853 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
5854 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
5855 
5856 }
5857 
5858 /*
5859  * Attempt a cpu logout for an error that we did not trap for, such
5860  * as a CE noticed with CEEN off.  It is assumed that we are still running
5861  * on the cpu that took the error and that we cannot migrate.  Returns
5862  * 0 on success, otherwise nonzero.
5863  */
5864 static int
5865 cpu_ce_delayed_ec_logout(uint64_t afar)
5866 {
5867 	ch_cpu_logout_t *clop;
5868 
5869 	if (CPU_PRIVATE(CPU) == NULL)
5870 		return (0);
5871 
5872 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5873 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
5874 	    LOGOUT_INVALID)
5875 		return (0);
5876 
5877 	cpu_delayed_logout(afar, clop);
5878 	return (1);
5879 }
5880 
5881 /*
5882  * We got an error while CEEN was disabled. We
5883  * need to clean up after it and log whatever
5884  * information we have on the CE.
5885  */
5886 void
5887 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
5888 {
5889 	ch_async_flt_t 	ch_flt;
5890 	struct async_flt *aflt;
5891 	char 		pr_reason[MAX_REASON_STRING];
5892 
5893 	bzero(&ch_flt, sizeof (ch_async_flt_t));
5894 	ch_flt.flt_trapped_ce = flag;
5895 	aflt = (struct async_flt *)&ch_flt;
5896 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
5897 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
5898 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5899 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
5900 	aflt->flt_addr = cpu_error_regs->afar;
5901 #if defined(SERRANO)
5902 	ch_flt.afar2 = cpu_error_regs->afar2;
5903 #endif	/* SERRANO */
5904 	aflt->flt_pc = NULL;
5905 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
5906 	aflt->flt_tl = 0;
5907 	aflt->flt_panic = 0;
5908 	cpu_log_and_clear_ce(&ch_flt);
5909 
5910 	/*
5911 	 * check if we caused any errors during cleanup
5912 	 */
5913 	if (clear_errors(&ch_flt)) {
5914 		pr_reason[0] = '\0';
5915 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
5916 		    NULL);
5917 	}
5918 }
5919 
5920 /*
5921  * Log/clear CEEN-controlled disrupting errors
5922  */
5923 static void
5924 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
5925 {
5926 	struct async_flt *aflt;
5927 	uint64_t afsr, afsr_errs;
5928 	ch_cpu_logout_t *clop;
5929 	char 		pr_reason[MAX_REASON_STRING];
5930 	on_trap_data_t	*otp = curthread->t_ontrap;
5931 
5932 	aflt = (struct async_flt *)ch_flt;
5933 	afsr = aflt->flt_stat;
5934 	afsr_errs = ch_flt->afsr_errs;
5935 	aflt->flt_id = gethrtime_waitfree();
5936 	aflt->flt_bus_id = getprocessorid();
5937 	aflt->flt_inst = CPU->cpu_id;
5938 	aflt->flt_prot = AFLT_PROT_NONE;
5939 	aflt->flt_class = CPU_FAULT;
5940 	aflt->flt_status = ECC_C_TRAP;
5941 
5942 	pr_reason[0] = '\0';
5943 	/*
5944 	 * Get the CPU log out info for Disrupting Trap.
5945 	 */
5946 	if (CPU_PRIVATE(CPU) == NULL) {
5947 		clop = NULL;
5948 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
5949 	} else {
5950 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5951 	}
5952 
5953 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
5954 		ch_cpu_errors_t cpu_error_regs;
5955 
5956 		get_cpu_error_state(&cpu_error_regs);
5957 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
5958 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
5959 		clop->clo_data.chd_afar = cpu_error_regs.afar;
5960 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
5961 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
5962 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
5963 		clop->clo_sdw_data.chd_afsr_ext =
5964 		    cpu_error_regs.shadow_afsr_ext;
5965 #if defined(SERRANO)
5966 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
5967 #endif	/* SERRANO */
5968 		ch_flt->flt_data_incomplete = 1;
5969 
5970 		/*
5971 		 * The logging/clear code expects AFSR/AFAR to be cleared.
5972 		 * The trap handler does it for CEEN enabled errors
5973 		 * so we need to do it here.
5974 		 */
5975 		set_cpu_error_state(&cpu_error_regs);
5976 	}
5977 
5978 #if defined(JALAPENO) || defined(SERRANO)
5979 	/*
5980 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
5981 	 * For Serrano, even thou we do have the AFAR, we still do the
5982 	 * scrub on the RCE side since that's where the error type can
5983 	 * be properly classified as intermittent, persistent, etc.
5984 	 *
5985 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
5986 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5987 	 * the flt_status bits.
5988 	 */
5989 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
5990 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5991 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
5992 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
5993 	}
5994 #else /* JALAPENO || SERRANO */
5995 	/*
5996 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
5997 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5998 	 * the flt_status bits.
5999 	 */
6000 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6001 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6002 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6003 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6004 		}
6005 	}
6006 
6007 #endif /* JALAPENO || SERRANO */
6008 
6009 	/*
6010 	 * Update flt_prot if this error occurred under on_trap protection.
6011 	 */
6012 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6013 		aflt->flt_prot = AFLT_PROT_EC;
6014 
6015 	/*
6016 	 * Queue events on the async event queue, one event per error bit.
6017 	 */
6018 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6019 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6020 		ch_flt->flt_type = CPU_INV_AFSR;
6021 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6022 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6023 		    aflt->flt_panic);
6024 	}
6025 
6026 	/*
6027 	 * Zero out + invalidate CPU logout.
6028 	 */
6029 	if (clop) {
6030 		bzero(clop, sizeof (ch_cpu_logout_t));
6031 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6032 	}
6033 
6034 	/*
6035 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6036 	 * was disabled, we need to flush either the entire
6037 	 * E$ or an E$ line.
6038 	 */
6039 #if defined(JALAPENO) || defined(SERRANO)
6040 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6041 #else	/* JALAPENO || SERRANO */
6042 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6043 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6044 #endif	/* JALAPENO || SERRANO */
6045 		cpu_error_ecache_flush(ch_flt);
6046 
6047 }
6048 
6049 /*
6050  * depending on the error type, we determine whether we
6051  * need to flush the entire ecache or just a line.
6052  */
6053 static int
6054 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6055 {
6056 	struct async_flt *aflt;
6057 	uint64_t	afsr;
6058 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6059 
6060 	aflt = (struct async_flt *)ch_flt;
6061 	afsr = aflt->flt_stat;
6062 
6063 	/*
6064 	 * If we got multiple errors, no point in trying
6065 	 * the individual cases, just flush the whole cache
6066 	 */
6067 	if (afsr & C_AFSR_ME) {
6068 		return (ECACHE_FLUSH_ALL);
6069 	}
6070 
6071 	/*
6072 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6073 	 * was disabled, we need to flush entire E$. We can't just
6074 	 * flush the cache line affected as the ME bit
6075 	 * is not set when multiple correctable errors of the same
6076 	 * type occur, so we might have multiple CPC or EDC errors,
6077 	 * with only the first recorded.
6078 	 */
6079 #if defined(JALAPENO) || defined(SERRANO)
6080 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6081 #else	/* JALAPENO || SERRANO */
6082 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6083 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6084 #endif	/* JALAPENO || SERRANO */
6085 		return (ECACHE_FLUSH_ALL);
6086 	}
6087 
6088 #if defined(JALAPENO) || defined(SERRANO)
6089 	/*
6090 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6091 	 * flush the entire Ecache.
6092 	 */
6093 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6094 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6095 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6096 			return (ECACHE_FLUSH_LINE);
6097 		} else {
6098 			return (ECACHE_FLUSH_ALL);
6099 		}
6100 	}
6101 #else /* JALAPENO || SERRANO */
6102 	/*
6103 	 * If UE only is set, flush the Ecache line, otherwise
6104 	 * flush the entire Ecache.
6105 	 */
6106 	if (afsr_errs & C_AFSR_UE) {
6107 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6108 		    C_AFSR_UE) {
6109 			return (ECACHE_FLUSH_LINE);
6110 		} else {
6111 			return (ECACHE_FLUSH_ALL);
6112 		}
6113 	}
6114 #endif /* JALAPENO || SERRANO */
6115 
6116 	/*
6117 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6118 	 * flush the entire Ecache.
6119 	 */
6120 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6121 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6122 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6123 			return (ECACHE_FLUSH_LINE);
6124 		} else {
6125 			return (ECACHE_FLUSH_ALL);
6126 		}
6127 	}
6128 
6129 	/*
6130 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6131 	 * flush the entire Ecache.
6132 	 */
6133 	if (afsr_errs & C_AFSR_BERR) {
6134 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6135 			return (ECACHE_FLUSH_LINE);
6136 		} else {
6137 			return (ECACHE_FLUSH_ALL);
6138 		}
6139 	}
6140 
6141 	return (0);
6142 }
6143 
6144 void
6145 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6146 {
6147 	int	ecache_flush_flag =
6148 	    cpu_error_ecache_flush_required(ch_flt);
6149 
6150 	/*
6151 	 * Flush Ecache line or entire Ecache based on above checks.
6152 	 */
6153 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6154 		cpu_flush_ecache();
6155 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6156 		cpu_flush_ecache_line(ch_flt);
6157 	}
6158 
6159 }
6160 
6161 /*
6162  * Extract the PA portion from the E$ tag.
6163  */
6164 uint64_t
6165 cpu_ectag_to_pa(int setsize, uint64_t tag)
6166 {
6167 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6168 		return (JG_ECTAG_TO_PA(setsize, tag));
6169 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6170 		return (PN_L3TAG_TO_PA(tag));
6171 	else
6172 		return (CH_ECTAG_TO_PA(setsize, tag));
6173 }
6174 
6175 /*
6176  * Convert the E$ tag PA into an E$ subblock index.
6177  */
6178 static int
6179 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6180 {
6181 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6182 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6183 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6184 		/* Panther has only one subblock per line */
6185 		return (0);
6186 	else
6187 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6188 }
6189 
6190 /*
6191  * All subblocks in an E$ line must be invalid for
6192  * the line to be invalid.
6193  */
6194 int
6195 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6196 {
6197 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6198 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6199 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6200 		return (PN_L3_LINE_INVALID(tag));
6201 	else
6202 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6203 }
6204 
6205 /*
6206  * Extract state bits for a subblock given the tag.  Note that for Panther
6207  * this works on both l2 and l3 tags.
6208  */
6209 static int
6210 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6211 {
6212 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6213 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6214 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6215 		return (tag & CH_ECSTATE_MASK);
6216 	else
6217 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6218 }
6219 
6220 /*
6221  * Cpu specific initialization.
6222  */
6223 void
6224 cpu_mp_init(void)
6225 {
6226 #ifdef	CHEETAHPLUS_ERRATUM_25
6227 	if (cheetah_sendmondo_recover) {
6228 		cheetah_nudge_init();
6229 	}
6230 #endif
6231 }
6232 
6233 void
6234 cpu_ereport_post(struct async_flt *aflt)
6235 {
6236 	char *cpu_type, buf[FM_MAX_CLASS];
6237 	nv_alloc_t *nva = NULL;
6238 	nvlist_t *ereport, *detector, *resource;
6239 	errorq_elem_t *eqep;
6240 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6241 	char unum[UNUM_NAMLEN];
6242 	int len = 0;
6243 	uint8_t  msg_type;
6244 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6245 
6246 	if (aflt->flt_panic || panicstr) {
6247 		eqep = errorq_reserve(ereport_errorq);
6248 		if (eqep == NULL)
6249 			return;
6250 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6251 		nva = errorq_elem_nva(ereport_errorq, eqep);
6252 	} else {
6253 		ereport = fm_nvlist_create(nva);
6254 	}
6255 
6256 	/*
6257 	 * Create the scheme "cpu" FMRI.
6258 	 */
6259 	detector = fm_nvlist_create(nva);
6260 	resource = fm_nvlist_create(nva);
6261 	switch (cpunodes[aflt->flt_inst].implementation) {
6262 	case CHEETAH_IMPL:
6263 		cpu_type = FM_EREPORT_CPU_USIII;
6264 		break;
6265 	case CHEETAH_PLUS_IMPL:
6266 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6267 		break;
6268 	case JALAPENO_IMPL:
6269 		cpu_type = FM_EREPORT_CPU_USIIIi;
6270 		break;
6271 	case SERRANO_IMPL:
6272 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6273 		break;
6274 	case JAGUAR_IMPL:
6275 		cpu_type = FM_EREPORT_CPU_USIV;
6276 		break;
6277 	case PANTHER_IMPL:
6278 		cpu_type = FM_EREPORT_CPU_USIVplus;
6279 		break;
6280 	default:
6281 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6282 		break;
6283 	}
6284 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6285 	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
6286 	    cpunodes[aflt->flt_inst].device_id);
6287 
6288 	/*
6289 	 * Encode all the common data into the ereport.
6290 	 */
6291 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6292 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6293 
6294 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6295 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6296 	    detector, NULL);
6297 
6298 	/*
6299 	 * Encode the error specific data that was saved in
6300 	 * the async_flt structure into the ereport.
6301 	 */
6302 	cpu_payload_add_aflt(aflt, ereport, resource,
6303 	    &plat_ecc_ch_flt.ecaf_afar_status,
6304 	    &plat_ecc_ch_flt.ecaf_synd_status);
6305 
6306 	if (aflt->flt_panic || panicstr) {
6307 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6308 	} else {
6309 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6310 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6311 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6312 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6313 	}
6314 	/*
6315 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6316 	 * to the SC olny if it can process it.
6317 	 */
6318 
6319 	if (&plat_ecc_capability_sc_get &&
6320 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6321 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6322 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6323 			/*
6324 			 * If afar status is not invalid do a unum lookup.
6325 			 */
6326 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6327 			    AFLT_STAT_INVALID) {
6328 				(void) cpu_get_mem_unum_aflt(
6329 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6330 				    unum, UNUM_NAMLEN, &len);
6331 			} else {
6332 				unum[0] = '\0';
6333 			}
6334 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6335 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6336 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6337 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6338 			    ch_flt->flt_sdw_afsr_ext;
6339 
6340 			if (&plat_log_fruid_error2)
6341 				plat_log_fruid_error2(msg_type, unum, aflt,
6342 				    &plat_ecc_ch_flt);
6343 		}
6344 	}
6345 }
6346 
6347 void
6348 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6349 {
6350 	int status;
6351 	ddi_fm_error_t de;
6352 
6353 	bzero(&de, sizeof (ddi_fm_error_t));
6354 
6355 	de.fme_version = DDI_FME_VERSION;
6356 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6357 	    FM_ENA_FMT1);
6358 	de.fme_flag = expected;
6359 	de.fme_bus_specific = (void *)aflt->flt_addr;
6360 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6361 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6362 		aflt->flt_panic = 1;
6363 }
6364 
6365 void
6366 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6367     errorq_t *eqp, uint_t flag)
6368 {
6369 	struct async_flt *aflt = (struct async_flt *)payload;
6370 
6371 	aflt->flt_erpt_class = error_class;
6372 	errorq_dispatch(eqp, payload, payload_sz, flag);
6373 }
6374 
6375 /*
6376  * This routine may be called by the IO module, but does not do
6377  * anything in this cpu module. The SERD algorithm is handled by
6378  * cpumem-diagnosis engine instead.
6379  */
6380 /*ARGSUSED*/
6381 void
6382 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6383 {}
6384 
6385 void
6386 adjust_hw_copy_limits(int ecache_size)
6387 {
6388 	/*
6389 	 * Set hw copy limits.
6390 	 *
6391 	 * /etc/system will be parsed later and can override one or more
6392 	 * of these settings.
6393 	 *
6394 	 * At this time, ecache size seems only mildly relevant.
6395 	 * We seem to run into issues with the d-cache and stalls
6396 	 * we see on misses.
6397 	 *
6398 	 * Cycle measurement indicates that 2 byte aligned copies fare
6399 	 * little better than doing things with VIS at around 512 bytes.
6400 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6401 	 * aligned is faster whenever the source and destination data
6402 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6403 	 * limit seems to be driven by the 2K write cache.
6404 	 * When more than 2K of copies are done in non-VIS mode, stores
6405 	 * backup in the write cache.  In VIS mode, the write cache is
6406 	 * bypassed, allowing faster cache-line writes aligned on cache
6407 	 * boundaries.
6408 	 *
6409 	 * In addition, in non-VIS mode, there is no prefetching, so
6410 	 * for larger copies, the advantage of prefetching to avoid even
6411 	 * occasional cache misses is enough to justify using the VIS code.
6412 	 *
6413 	 * During testing, it was discovered that netbench ran 3% slower
6414 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6415 	 * applications, data is only used once (copied to the output
6416 	 * buffer, then copied by the network device off the system).  Using
6417 	 * the VIS copy saves more L2 cache state.  Network copies are
6418 	 * around 1.3K to 1.5K in size for historical reasons.
6419 	 *
6420 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6421 	 * aligned copy even for large caches and 8 MB ecache.  The
6422 	 * infrastructure to allow different limits for different sized
6423 	 * caches is kept to allow further tuning in later releases.
6424 	 */
6425 
6426 	if (min_ecache_size == 0 && use_hw_bcopy) {
6427 		/*
6428 		 * First time through - should be before /etc/system
6429 		 * is read.
6430 		 * Could skip the checks for zero but this lets us
6431 		 * preserve any debugger rewrites.
6432 		 */
6433 		if (hw_copy_limit_1 == 0) {
6434 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6435 			priv_hcl_1 = hw_copy_limit_1;
6436 		}
6437 		if (hw_copy_limit_2 == 0) {
6438 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6439 			priv_hcl_2 = hw_copy_limit_2;
6440 		}
6441 		if (hw_copy_limit_4 == 0) {
6442 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6443 			priv_hcl_4 = hw_copy_limit_4;
6444 		}
6445 		if (hw_copy_limit_8 == 0) {
6446 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6447 			priv_hcl_8 = hw_copy_limit_8;
6448 		}
6449 		min_ecache_size = ecache_size;
6450 	} else {
6451 		/*
6452 		 * MP initialization. Called *after* /etc/system has
6453 		 * been parsed. One CPU has already been initialized.
6454 		 * Need to cater for /etc/system having scragged one
6455 		 * of our values.
6456 		 */
6457 		if (ecache_size == min_ecache_size) {
6458 			/*
6459 			 * Same size ecache. We do nothing unless we
6460 			 * have a pessimistic ecache setting. In that
6461 			 * case we become more optimistic (if the cache is
6462 			 * large enough).
6463 			 */
6464 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6465 				/*
6466 				 * Need to adjust hw_copy_limit* from our
6467 				 * pessimistic uniprocessor value to a more
6468 				 * optimistic UP value *iff* it hasn't been
6469 				 * reset.
6470 				 */
6471 				if ((ecache_size > 1048576) &&
6472 				    (priv_hcl_8 == hw_copy_limit_8)) {
6473 					if (ecache_size <= 2097152)
6474 						hw_copy_limit_8 = 4 *
6475 						    VIS_COPY_THRESHOLD;
6476 					else if (ecache_size <= 4194304)
6477 						hw_copy_limit_8 = 4 *
6478 						    VIS_COPY_THRESHOLD;
6479 					else
6480 						hw_copy_limit_8 = 4 *
6481 						    VIS_COPY_THRESHOLD;
6482 					priv_hcl_8 = hw_copy_limit_8;
6483 				}
6484 			}
6485 		} else if (ecache_size < min_ecache_size) {
6486 			/*
6487 			 * A different ecache size. Can this even happen?
6488 			 */
6489 			if (priv_hcl_8 == hw_copy_limit_8) {
6490 				/*
6491 				 * The previous value that we set
6492 				 * is unchanged (i.e., it hasn't been
6493 				 * scragged by /etc/system). Rewrite it.
6494 				 */
6495 				if (ecache_size <= 1048576)
6496 					hw_copy_limit_8 = 8 *
6497 					    VIS_COPY_THRESHOLD;
6498 				else if (ecache_size <= 2097152)
6499 					hw_copy_limit_8 = 8 *
6500 					    VIS_COPY_THRESHOLD;
6501 				else if (ecache_size <= 4194304)
6502 					hw_copy_limit_8 = 8 *
6503 					    VIS_COPY_THRESHOLD;
6504 				else
6505 					hw_copy_limit_8 = 10 *
6506 					    VIS_COPY_THRESHOLD;
6507 				priv_hcl_8 = hw_copy_limit_8;
6508 				min_ecache_size = ecache_size;
6509 			}
6510 		}
6511 	}
6512 }
6513 
6514 /*
6515  * Called from illegal instruction trap handler to see if we can attribute
6516  * the trap to a fpras check.
6517  */
6518 int
6519 fpras_chktrap(struct regs *rp)
6520 {
6521 	int op;
6522 	struct fpras_chkfngrp *cgp;
6523 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6524 
6525 	if (fpras_chkfngrps == NULL)
6526 		return (0);
6527 
6528 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6529 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6530 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6531 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6532 			break;
6533 	}
6534 	if (op == FPRAS_NCOPYOPS)
6535 		return (0);
6536 
6537 	/*
6538 	 * This is an fpRAS failure caught through an illegal
6539 	 * instruction - trampoline.
6540 	 */
6541 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6542 	rp->r_npc = rp->r_pc + 4;
6543 	return (1);
6544 }
6545 
6546 /*
6547  * fpras_failure is called when a fpras check detects a bad calculation
6548  * result or an illegal instruction trap is attributed to an fpras
6549  * check.  In all cases we are still bound to CPU.
6550  */
6551 int
6552 fpras_failure(int op, int how)
6553 {
6554 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6555 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6556 	ch_async_flt_t ch_flt;
6557 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6558 	struct fpras_chkfn *sfp, *cfp;
6559 	uint32_t *sip, *cip;
6560 	int i;
6561 
6562 	/*
6563 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6564 	 * the time in which we dispatch an ereport and (if applicable) panic.
6565 	 */
6566 	use_hw_bcopy_orig = use_hw_bcopy;
6567 	use_hw_bzero_orig = use_hw_bzero;
6568 	hcl1_orig = hw_copy_limit_1;
6569 	hcl2_orig = hw_copy_limit_2;
6570 	hcl4_orig = hw_copy_limit_4;
6571 	hcl8_orig = hw_copy_limit_8;
6572 	use_hw_bcopy = use_hw_bzero = 0;
6573 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6574 	    hw_copy_limit_8 = 0;
6575 
6576 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6577 	aflt->flt_id = gethrtime_waitfree();
6578 	aflt->flt_class = CPU_FAULT;
6579 	aflt->flt_inst = CPU->cpu_id;
6580 	aflt->flt_status = (how << 8) | op;
6581 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6582 	ch_flt.flt_type = CPU_FPUERR;
6583 
6584 	/*
6585 	 * We must panic if the copy operation had no lofault protection -
6586 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6587 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6588 	 */
6589 	aflt->flt_panic = (curthread->t_lofault == NULL);
6590 
6591 	/*
6592 	 * XOR the source instruction block with the copied instruction
6593 	 * block - this will show us which bit(s) are corrupted.
6594 	 */
6595 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6596 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6597 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6598 		sip = &sfp->fpras_blk0[0];
6599 		cip = &cfp->fpras_blk0[0];
6600 	} else {
6601 		sip = &sfp->fpras_blk1[0];
6602 		cip = &cfp->fpras_blk1[0];
6603 	}
6604 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6605 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6606 
6607 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6608 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6609 
6610 	if (aflt->flt_panic)
6611 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6612 
6613 	/*
6614 	 * We get here for copyin/copyout and kcopy or bcopy where the
6615 	 * caller has used on_fault.  We will flag the error so that
6616 	 * the process may be killed  The trap_async_hwerr mechanism will
6617 	 * take appropriate further action (such as a reboot, contract
6618 	 * notification etc).  Since we may be continuing we will
6619 	 * restore the global hardware copy acceleration switches.
6620 	 *
6621 	 * When we return from this function to the copy function we want to
6622 	 * avoid potentially bad data being used, ie we want the affected
6623 	 * copy function to return an error.  The caller should therefore
6624 	 * invoke its lofault handler (which always exists for these functions)
6625 	 * which will return the appropriate error.
6626 	 */
6627 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6628 	aston(curthread);
6629 
6630 	use_hw_bcopy = use_hw_bcopy_orig;
6631 	use_hw_bzero = use_hw_bzero_orig;
6632 	hw_copy_limit_1 = hcl1_orig;
6633 	hw_copy_limit_2 = hcl2_orig;
6634 	hw_copy_limit_4 = hcl4_orig;
6635 	hw_copy_limit_8 = hcl8_orig;
6636 
6637 	return (1);
6638 }
6639 
6640 #define	VIS_BLOCKSIZE		64
6641 
6642 int
6643 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6644 {
6645 	int ret, watched;
6646 
6647 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6648 	ret = dtrace_blksuword32(addr, data, 0);
6649 	if (watched)
6650 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6651 
6652 	return (ret);
6653 }
6654 
6655 /*
6656  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6657  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6658  * CEEN from the EER to disable traps for further disrupting error types
6659  * on that cpu.  We could cross-call instead, but that has a larger
6660  * instruction and data footprint than cross-trapping, and the cpu is known
6661  * to be faulted.
6662  */
6663 
6664 void
6665 cpu_faulted_enter(struct cpu *cp)
6666 {
6667 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6668 }
6669 
6670 /*
6671  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6672  * offline, spare, or online (by the cpu requesting this state change).
6673  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6674  * disrupting error bits that have accumulated without trapping, then
6675  * we cross-trap to re-enable CEEN controlled traps.
6676  */
6677 void
6678 cpu_faulted_exit(struct cpu *cp)
6679 {
6680 	ch_cpu_errors_t cpu_error_regs;
6681 
6682 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6683 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6684 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6685 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6686 	    (uint64_t)&cpu_error_regs, 0);
6687 
6688 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6689 }
6690 
6691 /*
6692  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6693  * the errors in the original AFSR, 0 otherwise.
6694  *
6695  * For all procs if the initial error was a BERR or TO, then it is possible
6696  * that we may have caused a secondary BERR or TO in the process of logging the
6697  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6698  * if the request was protected then a panic is still not necessary, if not
6699  * protected then aft_panic is already set - so either way there's no need
6700  * to set aft_panic for the secondary error.
6701  *
6702  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6703  * a store merge, then the error handling code will call cpu_deferred_error().
6704  * When clear_errors() is called, it will determine that secondary errors have
6705  * occurred - in particular, the store merge also caused a EDU and WDU that
6706  * weren't discovered until this point.
6707  *
6708  * We do three checks to verify that we are in this case.  If we pass all three
6709  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6710  * errors occur, we return 0.
6711  *
6712  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6713  * handled in cpu_disrupting_errors().  Since this function is not even called
6714  * in the case we are interested in, we just return 0 for these processors.
6715  */
6716 /*ARGSUSED*/
6717 static int
6718 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6719     uint64_t t_afar)
6720 {
6721 #if defined(CHEETAH_PLUS)
6722 #else	/* CHEETAH_PLUS */
6723 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6724 #endif	/* CHEETAH_PLUS */
6725 
6726 	/*
6727 	 * Was the original error a BERR or TO and only a BERR or TO
6728 	 * (multiple errors are also OK)
6729 	 */
6730 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6731 		/*
6732 		 * Is the new error a BERR or TO and only a BERR or TO
6733 		 * (multiple errors are also OK)
6734 		 */
6735 		if ((ch_flt->afsr_errs &
6736 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6737 			return (1);
6738 	}
6739 
6740 #if defined(CHEETAH_PLUS)
6741 	return (0);
6742 #else	/* CHEETAH_PLUS */
6743 	/*
6744 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6745 	 *
6746 	 * Check the original error was a UE, and only a UE.  Note that
6747 	 * the ME bit will cause us to fail this check.
6748 	 */
6749 	if (t_afsr_errs != C_AFSR_UE)
6750 		return (0);
6751 
6752 	/*
6753 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6754 	 */
6755 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6756 		return (0);
6757 
6758 	/*
6759 	 * Check the AFAR of the original error and secondary errors
6760 	 * match to the 64-byte boundary
6761 	 */
6762 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6763 		return (0);
6764 
6765 	/*
6766 	 * We've passed all the checks, so it's a secondary error!
6767 	 */
6768 	return (1);
6769 #endif	/* CHEETAH_PLUS */
6770 }
6771 
6772 /*
6773  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6774  * is checked for any valid errors.  If found, the error type is
6775  * returned. If not found, the flt_type is checked for L1$ parity errors.
6776  */
6777 /*ARGSUSED*/
6778 static uint8_t
6779 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6780 {
6781 #if defined(JALAPENO)
6782 	/*
6783 	 * Currently, logging errors to the SC is not supported on Jalapeno
6784 	 */
6785 	return (PLAT_ECC_ERROR2_NONE);
6786 #else
6787 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6788 
6789 	switch (ch_flt->flt_bit) {
6790 	case C_AFSR_CE:
6791 		return (PLAT_ECC_ERROR2_CE);
6792 	case C_AFSR_UCC:
6793 	case C_AFSR_EDC:
6794 	case C_AFSR_WDC:
6795 	case C_AFSR_CPC:
6796 		return (PLAT_ECC_ERROR2_L2_CE);
6797 	case C_AFSR_EMC:
6798 		return (PLAT_ECC_ERROR2_EMC);
6799 	case C_AFSR_IVC:
6800 		return (PLAT_ECC_ERROR2_IVC);
6801 	case C_AFSR_UE:
6802 		return (PLAT_ECC_ERROR2_UE);
6803 	case C_AFSR_UCU:
6804 	case C_AFSR_EDU:
6805 	case C_AFSR_WDU:
6806 	case C_AFSR_CPU:
6807 		return (PLAT_ECC_ERROR2_L2_UE);
6808 	case C_AFSR_IVU:
6809 		return (PLAT_ECC_ERROR2_IVU);
6810 	case C_AFSR_TO:
6811 		return (PLAT_ECC_ERROR2_TO);
6812 	case C_AFSR_BERR:
6813 		return (PLAT_ECC_ERROR2_BERR);
6814 #if defined(CHEETAH_PLUS)
6815 	case C_AFSR_L3_EDC:
6816 	case C_AFSR_L3_UCC:
6817 	case C_AFSR_L3_CPC:
6818 	case C_AFSR_L3_WDC:
6819 		return (PLAT_ECC_ERROR2_L3_CE);
6820 	case C_AFSR_IMC:
6821 		return (PLAT_ECC_ERROR2_IMC);
6822 	case C_AFSR_TSCE:
6823 		return (PLAT_ECC_ERROR2_L2_TSCE);
6824 	case C_AFSR_THCE:
6825 		return (PLAT_ECC_ERROR2_L2_THCE);
6826 	case C_AFSR_L3_MECC:
6827 		return (PLAT_ECC_ERROR2_L3_MECC);
6828 	case C_AFSR_L3_THCE:
6829 		return (PLAT_ECC_ERROR2_L3_THCE);
6830 	case C_AFSR_L3_CPU:
6831 	case C_AFSR_L3_EDU:
6832 	case C_AFSR_L3_UCU:
6833 	case C_AFSR_L3_WDU:
6834 		return (PLAT_ECC_ERROR2_L3_UE);
6835 	case C_AFSR_DUE:
6836 		return (PLAT_ECC_ERROR2_DUE);
6837 	case C_AFSR_DTO:
6838 		return (PLAT_ECC_ERROR2_DTO);
6839 	case C_AFSR_DBERR:
6840 		return (PLAT_ECC_ERROR2_DBERR);
6841 #endif	/* CHEETAH_PLUS */
6842 	default:
6843 		switch (ch_flt->flt_type) {
6844 #if defined(CPU_IMP_L1_CACHE_PARITY)
6845 		case CPU_IC_PARITY:
6846 			return (PLAT_ECC_ERROR2_IPE);
6847 		case CPU_DC_PARITY:
6848 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
6849 				if (ch_flt->parity_data.dpe.cpl_cache ==
6850 				    CPU_PC_PARITY) {
6851 					return (PLAT_ECC_ERROR2_PCACHE);
6852 				}
6853 			}
6854 			return (PLAT_ECC_ERROR2_DPE);
6855 #endif /* CPU_IMP_L1_CACHE_PARITY */
6856 		case CPU_ITLB_PARITY:
6857 			return (PLAT_ECC_ERROR2_ITLB);
6858 		case CPU_DTLB_PARITY:
6859 			return (PLAT_ECC_ERROR2_DTLB);
6860 		default:
6861 			return (PLAT_ECC_ERROR2_NONE);
6862 		}
6863 	}
6864 #endif	/* JALAPENO */
6865 }
6866