xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common.c (revision 7a364d25fde47aa82704b12b5251bf7fac37f02e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/archsystm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/machthread.h>
38 #include <sys/cpu.h>
39 #include <sys/cmp.h>
40 #include <sys/elf_SPARC.h>
41 #include <vm/vm_dep.h>
42 #include <vm/hat_sfmmu.h>
43 #include <vm/seg_kpm.h>
44 #include <sys/cpuvar.h>
45 #include <sys/cheetahregs.h>
46 #include <sys/us3_module.h>
47 #include <sys/async.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/dditypes.h>
51 #include <sys/prom_debug.h>
52 #include <sys/prom_plat.h>
53 #include <sys/cpu_module.h>
54 #include <sys/sysmacros.h>
55 #include <sys/intreg.h>
56 #include <sys/clock.h>
57 #include <sys/platform_module.h>
58 #include <sys/machtrap.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/memlist.h>
62 #include <sys/bootconf.h>
63 #include <sys/ivintr.h>
64 #include <sys/atomic.h>
65 #include <sys/taskq.h>
66 #include <sys/note.h>
67 #include <sys/ndifm.h>
68 #include <sys/ddifm.h>
69 #include <sys/fm/protocol.h>
70 #include <sys/fm/util.h>
71 #include <sys/fm/cpu/UltraSPARC-III.h>
72 #include <sys/fpras_impl.h>
73 #include <sys/dtrace.h>
74 #include <sys/watchpoint.h>
75 #include <sys/plat_ecc_unum.h>
76 #include <sys/cyclic.h>
77 #include <sys/errorq.h>
78 #include <sys/errclassify.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int clear_ecc(struct async_flt *ecc);
120 #if defined(CPU_IMP_ECACHE_ASSOC)
121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
122 #endif
123 static int cpu_ecache_set_size(struct cpu *cp);
124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk_state(int cachesize,
128 				uint64_t subaddr, uint64_t tag);
129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
136 static void cpu_scrubphys(struct async_flt *aflt);
137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
138     int *, int *);
139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
140 static void cpu_ereport_init(struct async_flt *aflt);
141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144     uint64_t nceen, ch_cpu_logout_t *clop);
145 static int cpu_ce_delayed_ec_logout(uint64_t);
146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
147 
148 #ifdef	CHEETAHPLUS_ERRATUM_25
149 static int mondo_recover_proc(uint16_t, int);
150 static void cheetah_nudge_init(void);
151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
152     cyc_time_t *when);
153 static void cheetah_nudge_buddy(void);
154 #endif	/* CHEETAHPLUS_ERRATUM_25 */
155 
156 #if defined(CPU_IMP_L1_CACHE_PARITY)
157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
160     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
167 #endif	/* CPU_IMP_L1_CACHE_PARITY */
168 
169 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
170     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
171     int *segsp, int *banksp, int *mcidp);
172 
173 /*
174  * This table is used to determine which bit(s) is(are) bad when an ECC
175  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
176  * of this array have the following semantics:
177  *
178  *      00-127  The number of the bad bit, when only one bit is bad.
179  *      128     ECC bit C0 is bad.
180  *      129     ECC bit C1 is bad.
181  *      130     ECC bit C2 is bad.
182  *      131     ECC bit C3 is bad.
183  *      132     ECC bit C4 is bad.
184  *      133     ECC bit C5 is bad.
185  *      134     ECC bit C6 is bad.
186  *      135     ECC bit C7 is bad.
187  *      136     ECC bit C8 is bad.
188  *	137-143 reserved for Mtag Data and ECC.
189  *      144(M2) Two bits are bad within a nibble.
190  *      145(M3) Three bits are bad within a nibble.
191  *      146(M3) Four bits are bad within a nibble.
192  *      147(M)  Multiple bits (5 or more) are bad.
193  *      148     NO bits are bad.
194  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
195  */
196 
197 #define	C0	128
198 #define	C1	129
199 #define	C2	130
200 #define	C3	131
201 #define	C4	132
202 #define	C5	133
203 #define	C6	134
204 #define	C7	135
205 #define	C8	136
206 #define	MT0	137	/* Mtag Data bit 0 */
207 #define	MT1	138
208 #define	MT2	139
209 #define	MTC0	140	/* Mtag Check bit 0 */
210 #define	MTC1	141
211 #define	MTC2	142
212 #define	MTC3	143
213 #define	M2	144
214 #define	M3	145
215 #define	M4	146
216 #define	M	147
217 #define	NA	148
218 #if defined(JALAPENO) || defined(SERRANO)
219 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
220 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
221 #define	SLAST	S003MEM	/* last special syndrome */
222 #else /* JALAPENO || SERRANO */
223 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
224 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
225 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
226 #define	SLAST	S11C	/* last special syndrome */
227 #endif /* JALAPENO || SERRANO */
228 #if defined(JALAPENO) || defined(SERRANO)
229 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
230 #define	BPAR15	167
231 #endif	/* JALAPENO || SERRANO */
232 
233 static uint8_t ecc_syndrome_tab[] =
234 {
235 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
236 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
237 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
238 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
239 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
240 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
241 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
242 #if defined(JALAPENO) || defined(SERRANO)
243 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
244 #else	/* JALAPENO || SERRANO */
245 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246 #endif	/* JALAPENO || SERRANO */
247 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
248 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
249 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
250 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
251 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
252 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
253 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
254 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
255 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
256 #if defined(JALAPENO) || defined(SERRANO)
257 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
258 #else	/* JALAPENO || SERRANO */
259 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
260 #endif	/* JALAPENO || SERRANO */
261 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
262 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
263 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
264 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
265 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
266 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
267 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
268 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
269 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
270 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
271 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
272 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
273 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
274 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
275 };
276 
277 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
278 
279 #if !(defined(JALAPENO) || defined(SERRANO))
280 /*
281  * This table is used to determine which bit(s) is(are) bad when a Mtag
282  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
283  * of this array have the following semantics:
284  *
285  *      -1	Invalid mtag syndrome.
286  *      137     Mtag Data 0 is bad.
287  *      138     Mtag Data 1 is bad.
288  *      139     Mtag Data 2 is bad.
289  *      140     Mtag ECC 0 is bad.
290  *      141     Mtag ECC 1 is bad.
291  *      142     Mtag ECC 2 is bad.
292  *      143     Mtag ECC 3 is bad.
293  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
294  */
295 short mtag_syndrome_tab[] =
296 {
297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
298 };
299 
300 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
301 
302 #else /* !(JALAPENO || SERRANO) */
303 
304 #define	BSYND_TBL_SIZE	16
305 
306 #endif /* !(JALAPENO || SERRANO) */
307 
308 /*
309  * CE initial classification and subsequent action lookup table
310  */
311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
312 static int ce_disp_inited;
313 
314 /*
315  * Set to disable leaky and partner check for memory correctables
316  */
317 int ce_xdiag_off;
318 
319 /*
320  * The following are not incremented atomically so are indicative only
321  */
322 static int ce_xdiag_drops;
323 static int ce_xdiag_lkydrops;
324 static int ce_xdiag_ptnrdrops;
325 static int ce_xdiag_bad;
326 
327 /*
328  * CE leaky check callback structure
329  */
330 typedef struct {
331 	struct async_flt *lkycb_aflt;
332 	errorq_t *lkycb_eqp;
333 	errorq_elem_t *lkycb_eqep;
334 } ce_lkychk_cb_t;
335 
336 /*
337  * defines for various ecache_flush_flag's
338  */
339 #define	ECACHE_FLUSH_LINE	1
340 #define	ECACHE_FLUSH_ALL	2
341 
342 /*
343  * STICK sync
344  */
345 #define	STICK_ITERATION 10
346 #define	MAX_TSKEW	1
347 #define	EV_A_START	0
348 #define	EV_A_END	1
349 #define	EV_B_START	2
350 #define	EV_B_END	3
351 #define	EVENTS		4
352 
353 static int64_t stick_iter = STICK_ITERATION;
354 static int64_t stick_tsk = MAX_TSKEW;
355 
356 typedef enum {
357 	EVENT_NULL = 0,
358 	SLAVE_START,
359 	SLAVE_CONT,
360 	MASTER_START
361 } event_cmd_t;
362 
363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
364 static int64_t timestamp[EVENTS];
365 static volatile int slave_done;
366 
367 #ifdef DEBUG
368 #define	DSYNC_ATTEMPTS 64
369 typedef struct {
370 	int64_t	skew_val[DSYNC_ATTEMPTS];
371 } ss_t;
372 
373 ss_t stick_sync_stats[NCPU];
374 #endif /* DEBUG */
375 
376 /*
377  * Maximum number of contexts for Cheetah.
378  */
379 #define	MAX_NCTXS	(1 << 13)
380 
381 /* Will be set !NULL for Cheetah+ and derivatives. */
382 uchar_t *ctx_pgsz_array = NULL;
383 #if defined(CPU_IMP_DUAL_PAGESIZE)
384 static uchar_t ctx_pgsz_arr[MAX_NCTXS];
385 uint_t disable_dual_pgsz = 0;
386 #endif	/* CPU_IMP_DUAL_PAGESIZE */
387 
388 /*
389  * Save the cache bootup state for use when internal
390  * caches are to be re-enabled after an error occurs.
391  */
392 uint64_t cache_boot_state;
393 
394 /*
395  * PA[22:0] represent Displacement in Safari configuration space.
396  */
397 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
398 
399 bus_config_eclk_t bus_config_eclk[] = {
400 #if defined(JALAPENO) || defined(SERRANO)
401 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
402 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
403 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
404 #else /* JALAPENO || SERRANO */
405 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
406 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
407 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
408 #endif /* JALAPENO || SERRANO */
409 	{0, 0}
410 };
411 
412 /*
413  * Interval for deferred CEEN reenable
414  */
415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
416 
417 /*
418  * set in /etc/system to control logging of user BERR/TO's
419  */
420 int cpu_berr_to_verbose = 0;
421 
422 /*
423  * set to 0 in /etc/system to defer CEEN reenable for all CEs
424  */
425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
427 
428 /*
429  * Set of all offline cpus
430  */
431 cpuset_t cpu_offline_set;
432 
433 static void cpu_delayed_check_ce_errors(void *);
434 static void cpu_check_ce_errors(void *);
435 void cpu_error_ecache_flush(ch_async_flt_t *);
436 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
437 static void cpu_log_and_clear_ce(ch_async_flt_t *);
438 void cpu_ce_detected(ch_cpu_errors_t *, int);
439 
440 /*
441  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
442  * memory refresh interval of current DIMMs (64ms).  After initial fix that
443  * gives at least one full refresh cycle in which the cell can leak
444  * (whereafter further refreshes simply reinforce any incorrect bit value).
445  */
446 clock_t cpu_ce_lkychk_timeout_usec = 128000;
447 
448 /*
449  * CE partner check partner caching period in seconds
450  */
451 int cpu_ce_ptnr_cachetime_sec = 60;
452 
453 /*
454  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
455  */
456 #define	CH_SET_TRAP(ttentry, ttlabel)			\
457 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
458 		flush_instr_mem((caddr_t)&ttentry, 32);
459 
460 static int min_ecache_size;
461 static uint_t priv_hcl_1;
462 static uint_t priv_hcl_2;
463 static uint_t priv_hcl_4;
464 static uint_t priv_hcl_8;
465 
466 void
467 cpu_setup(void)
468 {
469 	extern int at_flags;
470 	extern int disable_delay_tlb_flush, delay_tlb_flush;
471 	extern int cpc_has_overflow_intr;
472 	extern int disable_text_largepages;
473 	extern int use_text_pgsz4m;
474 
475 	/*
476 	 * Setup chip-specific trap handlers.
477 	 */
478 	cpu_init_trap();
479 
480 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
481 
482 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
483 
484 	/*
485 	 * save the cache bootup state.
486 	 */
487 	cache_boot_state = get_dcu() & DCU_CACHE;
488 
489 	/*
490 	 * Use the maximum number of contexts available for Cheetah
491 	 * unless it has been tuned for debugging.
492 	 * We are checking against 0 here since this value can be patched
493 	 * while booting.  It can not be patched via /etc/system since it
494 	 * will be patched too late and thus cause the system to panic.
495 	 */
496 	if (nctxs == 0)
497 		nctxs = MAX_NCTXS;
498 
499 	/*
500 	 * Due to the number of entries in the fully-associative tlb
501 	 * this may have to be tuned lower than in spitfire.
502 	 */
503 	pp_slots = MIN(8, MAXPP_SLOTS);
504 
505 	/*
506 	 * Block stores do not invalidate all pages of the d$, pagecopy
507 	 * et. al. need virtual translations with virtual coloring taken
508 	 * into consideration.  prefetch/ldd will pollute the d$ on the
509 	 * load side.
510 	 */
511 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
512 
513 	if (use_page_coloring) {
514 		do_pg_coloring = 1;
515 		if (use_virtual_coloring)
516 			do_virtual_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 	/*
561 	 * Use cheetah flush-all support
562 	 */
563 	if (!disable_delay_tlb_flush)
564 		delay_tlb_flush = 1;
565 
566 #if defined(CPU_IMP_DUAL_PAGESIZE)
567 	/*
568 	 * Use Cheetah+ and later dual page size support.
569 	 */
570 	if (!disable_dual_pgsz) {
571 		ctx_pgsz_array = ctx_pgsz_arr;
572 	}
573 #endif	/* CPU_IMP_DUAL_PAGESIZE */
574 
575 	/*
576 	 * Declare that this architecture/cpu combination does fpRAS.
577 	 */
578 	fpras_implemented = 1;
579 
580 	/*
581 	 * Enable 4M pages to be used for mapping user text by default.  Don't
582 	 * use large pages for initialized data segments since we may not know
583 	 * at exec() time what should be the preferred large page size for DTLB
584 	 * programming.
585 	 */
586 	use_text_pgsz4m = 1;
587 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
588 	    (1 << TTE32M) | (1 << TTE256M);
589 
590 	/*
591 	 * Setup CE lookup table
592 	 */
593 	CE_INITDISPTBL_POPULATE(ce_disp_table);
594 	ce_disp_inited = 1;
595 }
596 
597 /*
598  * Called by setcpudelay
599  */
600 void
601 cpu_init_tick_freq(void)
602 {
603 	/*
604 	 * For UltraSPARC III and beyond we want to use the
605 	 * system clock rate as the basis for low level timing,
606 	 * due to support of mixed speed CPUs and power managment.
607 	 */
608 	if (system_clock_freq == 0)
609 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
610 
611 	sys_tick_freq = system_clock_freq;
612 }
613 
614 #ifdef CHEETAHPLUS_ERRATUM_25
615 /*
616  * Tunables
617  */
618 int cheetah_bpe_off = 0;
619 int cheetah_sendmondo_recover = 1;
620 int cheetah_sendmondo_fullscan = 0;
621 int cheetah_sendmondo_recover_delay = 5;
622 
623 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
624 
625 /*
626  * Recovery Statistics
627  */
628 typedef struct cheetah_livelock_entry	{
629 	int cpuid;		/* fallen cpu */
630 	int buddy;		/* cpu that ran recovery */
631 	clock_t lbolt;		/* when recovery started */
632 	hrtime_t recovery_time;	/* time spent in recovery */
633 } cheetah_livelock_entry_t;
634 
635 #define	CHEETAH_LIVELOCK_NENTRY	32
636 
637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
638 int cheetah_livelock_entry_nxt;
639 
640 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
641 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
642 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
643 		cheetah_livelock_entry_nxt = 0;				\
644 	}								\
645 }
646 
647 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
648 
649 struct {
650 	hrtime_t hrt;		/* maximum recovery time */
651 	int recovery;		/* recovered */
652 	int full_claimed;	/* maximum pages claimed in full recovery */
653 	int proc_entry;		/* attempted to claim TSB */
654 	int proc_tsb_scan;	/* tsb scanned */
655 	int proc_tsb_partscan;	/* tsb partially scanned */
656 	int proc_tsb_fullscan;	/* whole tsb scanned */
657 	int proc_claimed;	/* maximum pages claimed in tsb scan */
658 	int proc_user;		/* user thread */
659 	int proc_kernel;	/* kernel thread */
660 	int proc_onflt;		/* bad stack */
661 	int proc_cpu;		/* null cpu */
662 	int proc_thread;	/* null thread */
663 	int proc_proc;		/* null proc */
664 	int proc_as;		/* null as */
665 	int proc_hat;		/* null hat */
666 	int proc_hat_inval;	/* hat contents don't make sense */
667 	int proc_hat_busy;	/* hat is changing TSBs */
668 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
669 	int proc_cnum_bad;	/* cnum out of range */
670 	int proc_cnum;		/* last cnum processed */
671 	tte_t proc_tte;		/* last tte processed */
672 } cheetah_livelock_stat;
673 
674 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
675 
676 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
677 	cheetah_livelock_stat.item = value
678 
679 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
680 	if (value > cheetah_livelock_stat.item)		\
681 		cheetah_livelock_stat.item = value;	\
682 }
683 
684 /*
685  * Attempt to recover a cpu by claiming every cache line as saved
686  * in the TSB that the non-responsive cpu is using. Since we can't
687  * grab any adaptive lock, this is at best an attempt to do so. Because
688  * we don't grab any locks, we must operate under the protection of
689  * on_fault().
690  *
691  * Return 1 if cpuid could be recovered, 0 if failed.
692  */
693 int
694 mondo_recover_proc(uint16_t cpuid, int bn)
695 {
696 	label_t ljb;
697 	cpu_t *cp;
698 	kthread_t *t;
699 	proc_t *p;
700 	struct as *as;
701 	struct hat *hat;
702 	short  cnum;
703 	struct tsb_info *tsbinfop;
704 	struct tsbe *tsbep;
705 	caddr_t tsbp;
706 	caddr_t end_tsbp;
707 	uint64_t paddr;
708 	uint64_t idsr;
709 	u_longlong_t pahi, palo;
710 	int pages_claimed = 0;
711 	tte_t tsbe_tte;
712 	int tried_kernel_tsb = 0;
713 
714 	CHEETAH_LIVELOCK_STAT(proc_entry);
715 
716 	if (on_fault(&ljb)) {
717 		CHEETAH_LIVELOCK_STAT(proc_onflt);
718 		goto badstruct;
719 	}
720 
721 	if ((cp = cpu[cpuid]) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_cpu);
723 		goto badstruct;
724 	}
725 
726 	if ((t = cp->cpu_thread) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_thread);
728 		goto badstruct;
729 	}
730 
731 	if ((p = ttoproc(t)) == NULL) {
732 		CHEETAH_LIVELOCK_STAT(proc_proc);
733 		goto badstruct;
734 	}
735 
736 	if ((as = p->p_as) == NULL) {
737 		CHEETAH_LIVELOCK_STAT(proc_as);
738 		goto badstruct;
739 	}
740 
741 	if ((hat = as->a_hat) == NULL) {
742 		CHEETAH_LIVELOCK_STAT(proc_hat);
743 		goto badstruct;
744 	}
745 
746 	if (hat != ksfmmup) {
747 		CHEETAH_LIVELOCK_STAT(proc_user);
748 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
749 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
750 			goto badstruct;
751 		}
752 		tsbinfop = hat->sfmmu_tsb;
753 		if (tsbinfop == NULL) {
754 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 			goto badstruct;
756 		}
757 		tsbp = tsbinfop->tsb_va;
758 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
759 	} else {
760 		CHEETAH_LIVELOCK_STAT(proc_kernel);
761 		tsbinfop = NULL;
762 		tsbp = ktsb_base;
763 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
764 	}
765 
766 	/* Verify as */
767 	if (hat->sfmmu_as != as) {
768 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
769 		goto badstruct;
770 	}
771 
772 	cnum = hat->sfmmu_cnum;
773 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
774 
775 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
776 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
777 		goto badstruct;
778 	}
779 
780 	do {
781 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
782 
783 		/*
784 		 * Skip TSBs being relocated.  This is important because
785 		 * we want to avoid the following deadlock scenario:
786 		 *
787 		 * 1) when we came in we set ourselves to "in recover" state.
788 		 * 2) when we try to touch TSB being relocated the mapping
789 		 *    will be in the suspended state so we'll spin waiting
790 		 *    for it to be unlocked.
791 		 * 3) when the CPU that holds the TSB mapping locked tries to
792 		 *    unlock it it will send a xtrap which will fail to xcall
793 		 *    us or the CPU we're trying to recover, and will in turn
794 		 *    enter the mondo code.
795 		 * 4) since we are still spinning on the locked mapping
796 		 *    no further progress will be made and the system will
797 		 *    inevitably hard hang.
798 		 *
799 		 * A TSB not being relocated can't begin being relocated
800 		 * while we're accessing it because we check
801 		 * sendmondo_in_recover before relocating TSBs.
802 		 */
803 		if (hat != ksfmmup &&
804 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
805 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
806 			goto next_tsbinfo;
807 		}
808 
809 		for (tsbep = (struct tsbe *)tsbp;
810 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
811 			tsbe_tte = tsbep->tte_data;
812 
813 			if (tsbe_tte.tte_val == 0) {
814 				/*
815 				 * Invalid tte
816 				 */
817 				continue;
818 			}
819 			if (tsbe_tte.tte_se) {
820 				/*
821 				 * Don't want device registers
822 				 */
823 				continue;
824 			}
825 			if (tsbe_tte.tte_cp == 0) {
826 				/*
827 				 * Must be cached in E$
828 				 */
829 				continue;
830 			}
831 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
832 			idsr = getidsr();
833 			if ((idsr & (IDSR_NACK_BIT(bn) |
834 			    IDSR_BUSY_BIT(bn))) == 0) {
835 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
836 				goto done;
837 			}
838 			pahi = tsbe_tte.tte_pahi;
839 			palo = tsbe_tte.tte_palo;
840 			paddr = (uint64_t)((pahi << 32) |
841 			    (palo << MMU_PAGESHIFT));
842 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
843 			    CH_ECACHE_SUBBLK_SIZE);
844 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
845 				shipit(cpuid, bn);
846 			}
847 			pages_claimed++;
848 		}
849 next_tsbinfo:
850 		if (tsbinfop != NULL)
851 			tsbinfop = tsbinfop->tsb_next;
852 		if (tsbinfop != NULL) {
853 			tsbp = tsbinfop->tsb_va;
854 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
855 		} else if (tsbp == ktsb_base) {
856 			tried_kernel_tsb = 1;
857 		} else if (!tried_kernel_tsb) {
858 			tsbp = ktsb_base;
859 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
860 			hat = ksfmmup;
861 			tsbinfop = NULL;
862 		}
863 	} while (tsbinfop != NULL ||
864 			((tsbp == ktsb_base) && !tried_kernel_tsb));
865 
866 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
867 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
868 	no_fault();
869 	idsr = getidsr();
870 	if ((idsr & (IDSR_NACK_BIT(bn) |
871 	    IDSR_BUSY_BIT(bn))) == 0) {
872 		return (1);
873 	} else {
874 		return (0);
875 	}
876 
877 done:
878 	no_fault();
879 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
880 	return (1);
881 
882 badstruct:
883 	no_fault();
884 	return (0);
885 }
886 
887 /*
888  * Attempt to claim ownership, temporarily, of every cache line that a
889  * non-responsive cpu might be using.  This might kick that cpu out of
890  * this state.
891  *
892  * The return value indicates to the caller if we have exhausted all recovery
893  * techniques. If 1 is returned, it is useless to call this function again
894  * even for a different target CPU.
895  */
896 int
897 mondo_recover(uint16_t cpuid, int bn)
898 {
899 	struct memseg *seg;
900 	uint64_t begin_pa, end_pa, cur_pa;
901 	hrtime_t begin_hrt, end_hrt;
902 	int retval = 0;
903 	int pages_claimed = 0;
904 	cheetah_livelock_entry_t *histp;
905 	uint64_t idsr;
906 
907 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
908 		/*
909 		 * Wait while recovery takes place
910 		 */
911 		while (sendmondo_in_recover) {
912 			drv_usecwait(1);
913 		}
914 		/*
915 		 * Assume we didn't claim the whole memory. If
916 		 * the target of this caller is not recovered,
917 		 * it will come back.
918 		 */
919 		return (retval);
920 	}
921 
922 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
923 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
924 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
925 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
926 
927 	begin_hrt = gethrtime_waitfree();
928 	/*
929 	 * First try to claim the lines in the TSB the target
930 	 * may have been using.
931 	 */
932 	if (mondo_recover_proc(cpuid, bn) == 1) {
933 		/*
934 		 * Didn't claim the whole memory
935 		 */
936 		goto done;
937 	}
938 
939 	/*
940 	 * We tried using the TSB. The target is still
941 	 * not recovered. Check if complete memory scan is
942 	 * enabled.
943 	 */
944 	if (cheetah_sendmondo_fullscan == 0) {
945 		/*
946 		 * Full memory scan is disabled.
947 		 */
948 		retval = 1;
949 		goto done;
950 	}
951 
952 	/*
953 	 * Try claiming the whole memory.
954 	 */
955 	for (seg = memsegs; seg; seg = seg->next) {
956 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
957 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
958 		for (cur_pa = begin_pa; cur_pa < end_pa;
959 		    cur_pa += MMU_PAGESIZE) {
960 			idsr = getidsr();
961 			if ((idsr & (IDSR_NACK_BIT(bn) |
962 			    IDSR_BUSY_BIT(bn))) == 0) {
963 				/*
964 				 * Didn't claim all memory
965 				 */
966 				goto done;
967 			}
968 			claimlines(cur_pa, MMU_PAGESIZE,
969 			    CH_ECACHE_SUBBLK_SIZE);
970 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
971 				shipit(cpuid, bn);
972 			}
973 			pages_claimed++;
974 		}
975 	}
976 
977 	/*
978 	 * We did all we could.
979 	 */
980 	retval = 1;
981 
982 done:
983 	/*
984 	 * Update statistics
985 	 */
986 	end_hrt = gethrtime_waitfree();
987 	CHEETAH_LIVELOCK_STAT(recovery);
988 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
989 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
990 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
991 	    (end_hrt -  begin_hrt));
992 
993 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
994 
995 	return (retval);
996 }
997 
998 /*
999  * This is called by the cyclic framework when this CPU becomes online
1000  */
1001 /*ARGSUSED*/
1002 static void
1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1004 {
1005 
1006 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1007 	hdlr->cyh_level = CY_LOW_LEVEL;
1008 	hdlr->cyh_arg = NULL;
1009 
1010 	/*
1011 	 * Stagger the start time
1012 	 */
1013 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1014 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1015 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1016 	}
1017 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1018 }
1019 
1020 /*
1021  * Create a low level cyclic to send a xtrap to the next cpu online.
1022  * However, there's no need to have this running on a uniprocessor system.
1023  */
1024 static void
1025 cheetah_nudge_init(void)
1026 {
1027 	cyc_omni_handler_t hdlr;
1028 
1029 	if (max_ncpus == 1) {
1030 		return;
1031 	}
1032 
1033 	hdlr.cyo_online = cheetah_nudge_onln;
1034 	hdlr.cyo_offline = NULL;
1035 	hdlr.cyo_arg = NULL;
1036 
1037 	mutex_enter(&cpu_lock);
1038 	(void) cyclic_add_omni(&hdlr);
1039 	mutex_exit(&cpu_lock);
1040 }
1041 
1042 /*
1043  * Cyclic handler to wake up buddy
1044  */
1045 void
1046 cheetah_nudge_buddy(void)
1047 {
1048 	/*
1049 	 * Disable kernel preemption to protect the cpu list
1050 	 */
1051 	kpreempt_disable();
1052 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1053 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1054 		    0, 0);
1055 	}
1056 	kpreempt_enable();
1057 }
1058 
1059 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1060 
1061 #ifdef SEND_MONDO_STATS
1062 uint32_t x_one_stimes[64];
1063 uint32_t x_one_ltimes[16];
1064 uint32_t x_set_stimes[64];
1065 uint32_t x_set_ltimes[16];
1066 uint32_t x_set_cpus[NCPU];
1067 uint32_t x_nack_stimes[64];
1068 #endif
1069 
1070 /*
1071  * Note: A version of this function is used by the debugger via the KDI,
1072  * and must be kept in sync with this version.  Any changes made to this
1073  * function to support new chips or to accomodate errata must also be included
1074  * in the KDI-specific version.  See us3_kdi.c.
1075  */
1076 void
1077 send_one_mondo(int cpuid)
1078 {
1079 	int busy, nack;
1080 	uint64_t idsr, starttick, endtick, tick, lasttick;
1081 	uint64_t busymask;
1082 #ifdef	CHEETAHPLUS_ERRATUM_25
1083 	int recovered = 0;
1084 #endif
1085 
1086 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1087 	starttick = lasttick = gettick();
1088 	shipit(cpuid, 0);
1089 	endtick = starttick + xc_tick_limit;
1090 	busy = nack = 0;
1091 #if defined(JALAPENO) || defined(SERRANO)
1092 	/*
1093 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1094 	 * will be used for dispatching interrupt. For now, assume
1095 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1096 	 * issues with respect to BUSY/NACK pair usage.
1097 	 */
1098 	busymask  = IDSR_BUSY_BIT(cpuid);
1099 #else /* JALAPENO || SERRANO */
1100 	busymask = IDSR_BUSY;
1101 #endif /* JALAPENO || SERRANO */
1102 	for (;;) {
1103 		idsr = getidsr();
1104 		if (idsr == 0)
1105 			break;
1106 
1107 		tick = gettick();
1108 		/*
1109 		 * If there is a big jump between the current tick
1110 		 * count and lasttick, we have probably hit a break
1111 		 * point.  Adjust endtick accordingly to avoid panic.
1112 		 */
1113 		if (tick > (lasttick + xc_tick_jump_limit))
1114 			endtick += (tick - lasttick);
1115 		lasttick = tick;
1116 		if (tick > endtick) {
1117 			if (panic_quiesce)
1118 				return;
1119 #ifdef	CHEETAHPLUS_ERRATUM_25
1120 			if (cheetah_sendmondo_recover && recovered == 0) {
1121 				if (mondo_recover(cpuid, 0)) {
1122 					/*
1123 					 * We claimed the whole memory or
1124 					 * full scan is disabled.
1125 					 */
1126 					recovered++;
1127 				}
1128 				tick = gettick();
1129 				endtick = tick + xc_tick_limit;
1130 				lasttick = tick;
1131 				/*
1132 				 * Recheck idsr
1133 				 */
1134 				continue;
1135 			} else
1136 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1137 			{
1138 				cmn_err(CE_PANIC, "send mondo timeout "
1139 				    "(target 0x%x) [%d NACK %d BUSY]",
1140 				    cpuid, nack, busy);
1141 			}
1142 		}
1143 
1144 		if (idsr & busymask) {
1145 			busy++;
1146 			continue;
1147 		}
1148 		drv_usecwait(1);
1149 		shipit(cpuid, 0);
1150 		nack++;
1151 		busy = 0;
1152 	}
1153 #ifdef SEND_MONDO_STATS
1154 	{
1155 		int n = gettick() - starttick;
1156 		if (n < 8192)
1157 			x_one_stimes[n >> 7]++;
1158 		else
1159 			x_one_ltimes[(n >> 13) & 0xf]++;
1160 	}
1161 #endif
1162 }
1163 
1164 void
1165 syncfpu(void)
1166 {
1167 }
1168 
1169 /*
1170  * Return processor specific async error structure
1171  * size used.
1172  */
1173 int
1174 cpu_aflt_size(void)
1175 {
1176 	return (sizeof (ch_async_flt_t));
1177 }
1178 
1179 /*
1180  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1181  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1182  * flush the error that caused the UCU/UCC, then again here at the end to
1183  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1184  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1185  * another Fast ECC trap.
1186  *
1187  * Cheetah+ also handles: TSCE: No additional processing required.
1188  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1189  *
1190  * Note that the p_clo_flags input is only valid in cases where the
1191  * cpu_private struct is not yet initialized (since that is the only
1192  * time that information cannot be obtained from the logout struct.)
1193  */
1194 /*ARGSUSED*/
1195 void
1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1197 {
1198 	ch_cpu_logout_t *clop;
1199 	uint64_t ceen, nceen;
1200 
1201 	/*
1202 	 * Get the CPU log out info. If we can't find our CPU private
1203 	 * pointer, then we will have to make due without any detailed
1204 	 * logout information.
1205 	 */
1206 	if (CPU_PRIVATE(CPU) == NULL) {
1207 		clop = NULL;
1208 		ceen = p_clo_flags & EN_REG_CEEN;
1209 		nceen = p_clo_flags & EN_REG_NCEEN;
1210 	} else {
1211 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1212 		ceen = clop->clo_flags & EN_REG_CEEN;
1213 		nceen = clop->clo_flags & EN_REG_NCEEN;
1214 	}
1215 
1216 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1217 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1218 }
1219 
1220 /*
1221  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1222  * ECC at TL>0.  Need to supply either a error register pointer or a
1223  * cpu logout structure pointer.
1224  */
1225 static void
1226 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1227     uint64_t nceen, ch_cpu_logout_t *clop)
1228 {
1229 	struct async_flt *aflt;
1230 	ch_async_flt_t ch_flt;
1231 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1232 	char pr_reason[MAX_REASON_STRING];
1233 	ch_cpu_errors_t cpu_error_regs;
1234 
1235 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1236 	/*
1237 	 * If no cpu logout data, then we will have to make due without
1238 	 * any detailed logout information.
1239 	 */
1240 	if (clop == NULL) {
1241 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1242 		get_cpu_error_state(&cpu_error_regs);
1243 		set_cpu_error_state(&cpu_error_regs);
1244 		t_afar = cpu_error_regs.afar;
1245 		t_afsr = cpu_error_regs.afsr;
1246 		t_afsr_ext = cpu_error_regs.afsr_ext;
1247 #if defined(SERRANO)
1248 		ch_flt.afar2 = cpu_error_regs.afar2;
1249 #endif	/* SERRANO */
1250 	} else {
1251 		t_afar = clop->clo_data.chd_afar;
1252 		t_afsr = clop->clo_data.chd_afsr;
1253 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1254 #if defined(SERRANO)
1255 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1256 #endif	/* SERRANO */
1257 	}
1258 
1259 	/*
1260 	 * In order to simplify code, we maintain this afsr_errs
1261 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1262 	 * sticky bits.
1263 	 */
1264 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1265 	    (t_afsr & C_AFSR_ALL_ERRS);
1266 	pr_reason[0] = '\0';
1267 
1268 	/* Setup the async fault structure */
1269 	aflt = (struct async_flt *)&ch_flt;
1270 	aflt->flt_id = gethrtime_waitfree();
1271 	ch_flt.afsr_ext = t_afsr_ext;
1272 	ch_flt.afsr_errs = t_afsr_errs;
1273 	aflt->flt_stat = t_afsr;
1274 	aflt->flt_addr = t_afar;
1275 	aflt->flt_bus_id = getprocessorid();
1276 	aflt->flt_inst = CPU->cpu_id;
1277 	aflt->flt_pc = tpc;
1278 	aflt->flt_prot = AFLT_PROT_NONE;
1279 	aflt->flt_class = CPU_FAULT;
1280 	aflt->flt_priv = priv;
1281 	aflt->flt_tl = tl;
1282 	aflt->flt_status = ECC_F_TRAP;
1283 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1284 
1285 	/*
1286 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1287 	 * cmn_err messages out to the console.  The situation is a UCU (in
1288 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1289 	 * The messages for the UCU and WDU are enqueued and then pulled off
1290 	 * the async queue via softint and syslogd starts to process them
1291 	 * but doesn't get them to the console.  The UE causes a panic, but
1292 	 * since the UCU/WDU messages are already in transit, those aren't
1293 	 * on the async queue.  The hack is to check if we have a matching
1294 	 * WDU event for the UCU, and if it matches, we're more than likely
1295 	 * going to panic with a UE, unless we're under protection.  So, we
1296 	 * check to see if we got a matching WDU event and if we're under
1297 	 * protection.
1298 	 *
1299 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1300 	 * looks like this:
1301 	 *    UCU->WDU->UE
1302 	 * For Panther, it could look like either of these:
1303 	 *    UCU---->WDU->L3_WDU->UE
1304 	 *    L3_UCU->WDU->L3_WDU->UE
1305 	 */
1306 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1307 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1308 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1309 		get_cpu_error_state(&cpu_error_regs);
1310 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1311 		    (cpu_error_regs.afar == t_afar));
1312 		aflt->flt_panic |= ((clop == NULL) &&
1313 		    (t_afsr_errs & C_AFSR_WDU));
1314 	}
1315 
1316 	/*
1317 	 * Queue events on the async event queue, one event per error bit.
1318 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1319 	 * queue an event to complain.
1320 	 */
1321 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1322 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1323 		ch_flt.flt_type = CPU_INV_AFSR;
1324 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1325 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1326 		    aflt->flt_panic);
1327 	}
1328 
1329 	/*
1330 	 * Zero out + invalidate CPU logout.
1331 	 */
1332 	if (clop) {
1333 		bzero(clop, sizeof (ch_cpu_logout_t));
1334 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1335 	}
1336 
1337 	/*
1338 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1339 	 * or disrupting errors have happened.  We do this because if a
1340 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1341 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1342 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1343 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1344 	 * deferred or disrupting error happening between checking the AFSR and
1345 	 * enabling NCEEN/CEEN.
1346 	 *
1347 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1348 	 * taken.
1349 	 */
1350 	set_error_enable(get_error_enable() | (nceen | ceen));
1351 	if (clear_errors(&ch_flt)) {
1352 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1353 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1354 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1355 		    NULL);
1356 	}
1357 
1358 	/*
1359 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1360 	 * be logged as part of the panic flow.
1361 	 */
1362 	if (aflt->flt_panic)
1363 		fm_panic("%sError(s)", pr_reason);
1364 
1365 	/*
1366 	 * Flushing the Ecache here gets the part of the trap handler that
1367 	 * is run at TL=1 out of the Ecache.
1368 	 */
1369 	cpu_flush_ecache();
1370 }
1371 
1372 /*
1373  * This is called via sys_trap from pil15_interrupt code if the
1374  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1375  * various ch_err_tl1_data structures for valid entries based on the bit
1376  * settings in the ch_err_tl1_flags entry of the structure.
1377  */
1378 /*ARGSUSED*/
1379 void
1380 cpu_tl1_error(struct regs *rp, int panic)
1381 {
1382 	ch_err_tl1_data_t *cl1p, cl1;
1383 	int i, ncl1ps;
1384 	uint64_t me_flags;
1385 	uint64_t ceen, nceen;
1386 
1387 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1388 		cl1p = &ch_err_tl1_data;
1389 		ncl1ps = 1;
1390 	} else if (CPU_PRIVATE(CPU) != NULL) {
1391 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1392 		ncl1ps = CH_ERR_TL1_TLMAX;
1393 	} else {
1394 		ncl1ps = 0;
1395 	}
1396 
1397 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1398 		if (cl1p->ch_err_tl1_flags == 0)
1399 			continue;
1400 
1401 		/*
1402 		 * Grab a copy of the logout data and invalidate
1403 		 * the logout area.
1404 		 */
1405 		cl1 = *cl1p;
1406 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1407 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1408 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1409 
1410 		/*
1411 		 * Log "first error" in ch_err_tl1_data.
1412 		 */
1413 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1414 			ceen = get_error_enable() & EN_REG_CEEN;
1415 			nceen = get_error_enable() & EN_REG_NCEEN;
1416 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1417 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1418 		}
1419 #if defined(CPU_IMP_L1_CACHE_PARITY)
1420 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1421 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1422 			    (caddr_t)cl1.ch_err_tl1_tpc);
1423 		}
1424 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1425 
1426 		/*
1427 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1428 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1429 		 * if the structure is busy, we just do the cache flushing
1430 		 * we have to do and then do the retry.  So the AFSR/AFAR
1431 		 * at this point *should* have some relevant info.  If there
1432 		 * are no valid errors in the AFSR, we'll assume they've
1433 		 * already been picked up and logged.  For I$/D$ parity,
1434 		 * we just log an event with an "Unknown" (NULL) TPC.
1435 		 */
1436 		if (me_flags & CH_ERR_FECC) {
1437 			ch_cpu_errors_t cpu_error_regs;
1438 			uint64_t t_afsr_errs;
1439 
1440 			/*
1441 			 * Get the error registers and see if there's
1442 			 * a pending error.  If not, don't bother
1443 			 * generating an "Invalid AFSR" error event.
1444 			 */
1445 			get_cpu_error_state(&cpu_error_regs);
1446 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1447 			    C_AFSR_EXT_ALL_ERRS) |
1448 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1449 			if (t_afsr_errs != 0) {
1450 				ceen = get_error_enable() & EN_REG_CEEN;
1451 				nceen = get_error_enable() & EN_REG_NCEEN;
1452 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1453 				    1, ceen, nceen, NULL);
1454 			}
1455 		}
1456 #if defined(CPU_IMP_L1_CACHE_PARITY)
1457 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1458 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1459 		}
1460 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1461 	}
1462 }
1463 
1464 /*
1465  * Called from Fast ECC TL>0 handler in case of fatal error.
1466  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1467  * but if we don't, we'll panic with something reasonable.
1468  */
1469 /*ARGSUSED*/
1470 void
1471 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1472 {
1473 	cpu_tl1_error(rp, 1);
1474 	/*
1475 	 * Should never return, but just in case.
1476 	 */
1477 	fm_panic("Unsurvivable ECC Error at TL>0");
1478 }
1479 
1480 /*
1481  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1482  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1483  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1484  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1485  *
1486  * Cheetah+ also handles (No additional processing required):
1487  *    DUE, DTO, DBERR	(NCEEN controlled)
1488  *    THCE		(CEEN and ET_ECC_en controlled)
1489  *    TUE		(ET_ECC_en controlled)
1490  *
1491  * Panther further adds:
1492  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1493  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1494  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1495  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1496  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1497  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1498  *
1499  * Note that the p_clo_flags input is only valid in cases where the
1500  * cpu_private struct is not yet initialized (since that is the only
1501  * time that information cannot be obtained from the logout struct.)
1502  */
1503 /*ARGSUSED*/
1504 void
1505 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1506 {
1507 	struct async_flt *aflt;
1508 	ch_async_flt_t ch_flt;
1509 	char pr_reason[MAX_REASON_STRING];
1510 	ch_cpu_logout_t *clop;
1511 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1512 	ch_cpu_errors_t cpu_error_regs;
1513 
1514 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1515 	/*
1516 	 * Get the CPU log out info. If we can't find our CPU private
1517 	 * pointer, then we will have to make due without any detailed
1518 	 * logout information.
1519 	 */
1520 	if (CPU_PRIVATE(CPU) == NULL) {
1521 		clop = NULL;
1522 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1523 		get_cpu_error_state(&cpu_error_regs);
1524 		set_cpu_error_state(&cpu_error_regs);
1525 		t_afar = cpu_error_regs.afar;
1526 		t_afsr = cpu_error_regs.afsr;
1527 		t_afsr_ext = cpu_error_regs.afsr_ext;
1528 #if defined(SERRANO)
1529 		ch_flt.afar2 = cpu_error_regs.afar2;
1530 #endif	/* SERRANO */
1531 	} else {
1532 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1533 		t_afar = clop->clo_data.chd_afar;
1534 		t_afsr = clop->clo_data.chd_afsr;
1535 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1536 #if defined(SERRANO)
1537 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1538 #endif	/* SERRANO */
1539 	}
1540 
1541 	/*
1542 	 * In order to simplify code, we maintain this afsr_errs
1543 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1544 	 * sticky bits.
1545 	 */
1546 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1547 	    (t_afsr & C_AFSR_ALL_ERRS);
1548 
1549 	pr_reason[0] = '\0';
1550 	/* Setup the async fault structure */
1551 	aflt = (struct async_flt *)&ch_flt;
1552 	ch_flt.afsr_ext = t_afsr_ext;
1553 	ch_flt.afsr_errs = t_afsr_errs;
1554 	aflt->flt_stat = t_afsr;
1555 	aflt->flt_addr = t_afar;
1556 	aflt->flt_pc = (caddr_t)rp->r_pc;
1557 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1558 	aflt->flt_tl = 0;
1559 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1560 
1561 	/*
1562 	 * If this trap is a result of one of the errors not masked
1563 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1564 	 * indicate that a timeout is to be set later.
1565 	 */
1566 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1567 	    !aflt->flt_panic)
1568 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1569 	else
1570 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1571 
1572 	/*
1573 	 * log the CE and clean up
1574 	 */
1575 	cpu_log_and_clear_ce(&ch_flt);
1576 
1577 	/*
1578 	 * We re-enable CEEN (if required) and check if any disrupting errors
1579 	 * have happened.  We do this because if a disrupting error had occurred
1580 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1581 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1582 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1583 	 * of a error happening between checking the AFSR and enabling CEEN.
1584 	 */
1585 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1586 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1587 	if (clear_errors(&ch_flt)) {
1588 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1589 		    NULL);
1590 	}
1591 
1592 	/*
1593 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1594 	 * be logged as part of the panic flow.
1595 	 */
1596 	if (aflt->flt_panic)
1597 		fm_panic("%sError(s)", pr_reason);
1598 }
1599 
1600 /*
1601  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1602  * L3_EDU:BLD, TO, and BERR events.
1603  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1604  *
1605  * Cheetah+: No additional errors handled.
1606  *
1607  * Note that the p_clo_flags input is only valid in cases where the
1608  * cpu_private struct is not yet initialized (since that is the only
1609  * time that information cannot be obtained from the logout struct.)
1610  */
1611 /*ARGSUSED*/
1612 void
1613 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1614 {
1615 	ushort_t ttype, tl;
1616 	ch_async_flt_t ch_flt;
1617 	struct async_flt *aflt;
1618 	int trampolined = 0;
1619 	char pr_reason[MAX_REASON_STRING];
1620 	ch_cpu_logout_t *clop;
1621 	uint64_t ceen, clo_flags;
1622 	uint64_t log_afsr;
1623 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1624 	ch_cpu_errors_t cpu_error_regs;
1625 	int expected = DDI_FM_ERR_UNEXPECTED;
1626 	ddi_acc_hdl_t *hp;
1627 
1628 	/*
1629 	 * We need to look at p_flag to determine if the thread detected an
1630 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1631 	 * because we just need a consistent snapshot and we know that everyone
1632 	 * else will store a consistent set of bits while holding p_lock.  We
1633 	 * don't have to worry about a race because SDOCORE is set once prior
1634 	 * to doing i/o from the process's address space and is never cleared.
1635 	 */
1636 	uint_t pflag = ttoproc(curthread)->p_flag;
1637 
1638 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1639 	/*
1640 	 * Get the CPU log out info. If we can't find our CPU private
1641 	 * pointer then we will have to make due without any detailed
1642 	 * logout information.
1643 	 */
1644 	if (CPU_PRIVATE(CPU) == NULL) {
1645 		clop = NULL;
1646 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1647 		get_cpu_error_state(&cpu_error_regs);
1648 		set_cpu_error_state(&cpu_error_regs);
1649 		t_afar = cpu_error_regs.afar;
1650 		t_afsr = cpu_error_regs.afsr;
1651 		t_afsr_ext = cpu_error_regs.afsr_ext;
1652 #if defined(SERRANO)
1653 		ch_flt.afar2 = cpu_error_regs.afar2;
1654 #endif	/* SERRANO */
1655 		clo_flags = p_clo_flags;
1656 	} else {
1657 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1658 		t_afar = clop->clo_data.chd_afar;
1659 		t_afsr = clop->clo_data.chd_afsr;
1660 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1661 #if defined(SERRANO)
1662 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1663 #endif	/* SERRANO */
1664 		clo_flags = clop->clo_flags;
1665 	}
1666 
1667 	/*
1668 	 * In order to simplify code, we maintain this afsr_errs
1669 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1670 	 * sticky bits.
1671 	 */
1672 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1673 	    (t_afsr & C_AFSR_ALL_ERRS);
1674 	pr_reason[0] = '\0';
1675 
1676 	/*
1677 	 * Grab information encoded into our clo_flags field.
1678 	 */
1679 	ceen = clo_flags & EN_REG_CEEN;
1680 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1681 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1682 
1683 	/*
1684 	 * handle the specific error
1685 	 */
1686 	aflt = (struct async_flt *)&ch_flt;
1687 	aflt->flt_id = gethrtime_waitfree();
1688 	aflt->flt_bus_id = getprocessorid();
1689 	aflt->flt_inst = CPU->cpu_id;
1690 	ch_flt.afsr_ext = t_afsr_ext;
1691 	ch_flt.afsr_errs = t_afsr_errs;
1692 	aflt->flt_stat = t_afsr;
1693 	aflt->flt_addr = t_afar;
1694 	aflt->flt_pc = (caddr_t)rp->r_pc;
1695 	aflt->flt_prot = AFLT_PROT_NONE;
1696 	aflt->flt_class = CPU_FAULT;
1697 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1698 	aflt->flt_tl = (uchar_t)tl;
1699 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1700 	    C_AFSR_PANIC(t_afsr_errs));
1701 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1702 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1703 
1704 	/*
1705 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1706 	 * see if we were executing in the kernel under on_trap() or t_lofault
1707 	 * protection.  If so, modify the saved registers so that we return
1708 	 * from the trap to the appropriate trampoline routine.
1709 	 */
1710 	if (aflt->flt_priv && tl == 0) {
1711 		if (curthread->t_ontrap != NULL) {
1712 			on_trap_data_t *otp = curthread->t_ontrap;
1713 
1714 			if (otp->ot_prot & OT_DATA_EC) {
1715 				aflt->flt_prot = AFLT_PROT_EC;
1716 				otp->ot_trap |= OT_DATA_EC;
1717 				rp->r_pc = otp->ot_trampoline;
1718 				rp->r_npc = rp->r_pc + 4;
1719 				trampolined = 1;
1720 			}
1721 
1722 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1723 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1724 				aflt->flt_prot = AFLT_PROT_ACCESS;
1725 				otp->ot_trap |= OT_DATA_ACCESS;
1726 				rp->r_pc = otp->ot_trampoline;
1727 				rp->r_npc = rp->r_pc + 4;
1728 				trampolined = 1;
1729 				/*
1730 				 * for peeks and caut_gets errors are expected
1731 				 */
1732 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1733 				if (!hp)
1734 					expected = DDI_FM_ERR_PEEK;
1735 				else if (hp->ah_acc.devacc_attr_access ==
1736 				    DDI_CAUTIOUS_ACC)
1737 					expected = DDI_FM_ERR_EXPECTED;
1738 			}
1739 
1740 		} else if (curthread->t_lofault) {
1741 			aflt->flt_prot = AFLT_PROT_COPY;
1742 			rp->r_g1 = EFAULT;
1743 			rp->r_pc = curthread->t_lofault;
1744 			rp->r_npc = rp->r_pc + 4;
1745 			trampolined = 1;
1746 		}
1747 	}
1748 
1749 	/*
1750 	 * If we're in user mode or we're doing a protected copy, we either
1751 	 * want the ASTON code below to send a signal to the user process
1752 	 * or we want to panic if aft_panic is set.
1753 	 *
1754 	 * If we're in privileged mode and we're not doing a copy, then we
1755 	 * need to check if we've trampolined.  If we haven't trampolined,
1756 	 * we should panic.
1757 	 */
1758 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1759 		if (t_afsr_errs &
1760 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1761 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1762 			aflt->flt_panic |= aft_panic;
1763 	} else if (!trampolined) {
1764 			aflt->flt_panic = 1;
1765 	}
1766 
1767 	/*
1768 	 * If we've trampolined due to a privileged TO or BERR, or if an
1769 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1770 	 * event for that TO or BERR.  Queue all other events (if any) besides
1771 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1772 	 * ignore the number of events queued.  If we haven't trampolined due
1773 	 * to a TO or BERR, just enqueue events normally.
1774 	 */
1775 	log_afsr = t_afsr_errs;
1776 	if (trampolined) {
1777 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1778 	} else if (!aflt->flt_priv) {
1779 		/*
1780 		 * User mode, suppress messages if
1781 		 * cpu_berr_to_verbose is not set.
1782 		 */
1783 		if (!cpu_berr_to_verbose)
1784 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1785 	}
1786 
1787 	/*
1788 	 * Log any errors that occurred
1789 	 */
1790 	if (((log_afsr &
1791 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1792 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1793 		(t_afsr_errs &
1794 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1795 		ch_flt.flt_type = CPU_INV_AFSR;
1796 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1797 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1798 		    aflt->flt_panic);
1799 	}
1800 
1801 	/*
1802 	 * Zero out + invalidate CPU logout.
1803 	 */
1804 	if (clop) {
1805 		bzero(clop, sizeof (ch_cpu_logout_t));
1806 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1807 	}
1808 
1809 #if defined(JALAPENO) || defined(SERRANO)
1810 	/*
1811 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1812 	 * IO errors that may have resulted in this trap.
1813 	 */
1814 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1815 		cpu_run_bus_error_handlers(aflt, expected);
1816 	}
1817 
1818 	/*
1819 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1820 	 * line from the Ecache.  We also need to query the bus nexus for
1821 	 * fatal errors.  Attempts to do diagnostic read on caches may
1822 	 * introduce more errors (especially when the module is bad).
1823 	 */
1824 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1825 		/*
1826 		 * Ask our bus nexus friends if they have any fatal errors.  If
1827 		 * so, they will log appropriate error messages.
1828 		 */
1829 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1830 			aflt->flt_panic = 1;
1831 
1832 		/*
1833 		 * We got a UE or RUE and are panicking, save the fault PA in
1834 		 * a known location so that the platform specific panic code
1835 		 * can check for copyback errors.
1836 		 */
1837 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1838 			panic_aflt = *aflt;
1839 		}
1840 	}
1841 
1842 	/*
1843 	 * Flush Ecache line or entire Ecache
1844 	 */
1845 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1846 		cpu_error_ecache_flush(&ch_flt);
1847 #else /* JALAPENO || SERRANO */
1848 	/*
1849 	 * UE/BERR/TO: Call our bus nexus friends to check for
1850 	 * IO errors that may have resulted in this trap.
1851 	 */
1852 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1853 		cpu_run_bus_error_handlers(aflt, expected);
1854 	}
1855 
1856 	/*
1857 	 * UE: If the UE is in memory, we need to flush the bad
1858 	 * line from the Ecache.  We also need to query the bus nexus for
1859 	 * fatal errors.  Attempts to do diagnostic read on caches may
1860 	 * introduce more errors (especially when the module is bad).
1861 	 */
1862 	if (t_afsr & C_AFSR_UE) {
1863 		/*
1864 		 * Ask our legacy bus nexus friends if they have any fatal
1865 		 * errors.  If so, they will log appropriate error messages.
1866 		 */
1867 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1868 			aflt->flt_panic = 1;
1869 
1870 		/*
1871 		 * We got a UE and are panicking, save the fault PA in a known
1872 		 * location so that the platform specific panic code can check
1873 		 * for copyback errors.
1874 		 */
1875 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1876 			panic_aflt = *aflt;
1877 		}
1878 	}
1879 
1880 	/*
1881 	 * Flush Ecache line or entire Ecache
1882 	 */
1883 	if (t_afsr_errs &
1884 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1885 		cpu_error_ecache_flush(&ch_flt);
1886 #endif /* JALAPENO || SERRANO */
1887 
1888 	/*
1889 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1890 	 * or disrupting errors have happened.  We do this because if a
1891 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1892 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1893 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1894 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1895 	 * deferred or disrupting error happening between checking the AFSR and
1896 	 * enabling NCEEN/CEEN.
1897 	 *
1898 	 * Note: CEEN reenabled only if it was on when trap taken.
1899 	 */
1900 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1901 	if (clear_errors(&ch_flt)) {
1902 		/*
1903 		 * Check for secondary errors, and avoid panicking if we
1904 		 * have them
1905 		 */
1906 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
1907 		    t_afar) == 0) {
1908 			aflt->flt_panic |= ((ch_flt.afsr_errs &
1909 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
1910 		}
1911 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1912 		    NULL);
1913 	}
1914 
1915 	/*
1916 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1917 	 * be logged as part of the panic flow.
1918 	 */
1919 	if (aflt->flt_panic)
1920 		fm_panic("%sError(s)", pr_reason);
1921 
1922 	/*
1923 	 * If we queued an error and we are going to return from the trap and
1924 	 * the error was in user mode or inside of a copy routine, set AST flag
1925 	 * so the queue will be drained before returning to user mode.  The
1926 	 * AST processing will also act on our failure policy.
1927 	 */
1928 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1929 		int pcb_flag = 0;
1930 
1931 		if (t_afsr_errs &
1932 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
1933 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1934 			pcb_flag |= ASYNC_HWERR;
1935 
1936 		if (t_afsr & C_AFSR_BERR)
1937 			pcb_flag |= ASYNC_BERR;
1938 
1939 		if (t_afsr & C_AFSR_TO)
1940 			pcb_flag |= ASYNC_BTO;
1941 
1942 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1943 		aston(curthread);
1944 	}
1945 }
1946 
1947 #if defined(CPU_IMP_L1_CACHE_PARITY)
1948 /*
1949  * Handling of data and instruction parity errors (traps 0x71, 0x72).
1950  *
1951  * For Panther, P$ data parity errors during floating point load hits
1952  * are also detected (reported as TT 0x71) and handled by this trap
1953  * handler.
1954  *
1955  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
1956  * is available.
1957  */
1958 /*ARGSUSED*/
1959 void
1960 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
1961 {
1962 	ch_async_flt_t ch_flt;
1963 	struct async_flt *aflt;
1964 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
1965 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
1966 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
1967 	char *error_class;
1968 
1969 	/*
1970 	 * Log the error.
1971 	 * For icache parity errors the fault address is the trap PC.
1972 	 * For dcache/pcache parity errors the instruction would have to
1973 	 * be decoded to determine the address and that isn't possible
1974 	 * at high PIL.
1975 	 */
1976 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1977 	aflt = (struct async_flt *)&ch_flt;
1978 	aflt->flt_id = gethrtime_waitfree();
1979 	aflt->flt_bus_id = getprocessorid();
1980 	aflt->flt_inst = CPU->cpu_id;
1981 	aflt->flt_pc = tpc;
1982 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
1983 	aflt->flt_prot = AFLT_PROT_NONE;
1984 	aflt->flt_class = CPU_FAULT;
1985 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
1986 	aflt->flt_tl = tl;
1987 	aflt->flt_panic = panic;
1988 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
1989 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
1990 
1991 	if (iparity) {
1992 		cpu_icache_parity_info(&ch_flt);
1993 		if (ch_flt.parity_data.ipe.cpl_off != -1)
1994 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
1995 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
1996 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
1997 		else
1998 			error_class = FM_EREPORT_CPU_USIII_IPE;
1999 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2000 	} else {
2001 		cpu_dcache_parity_info(&ch_flt);
2002 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2003 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2004 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2005 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2006 		else
2007 			error_class = FM_EREPORT_CPU_USIII_DPE;
2008 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2009 		/*
2010 		 * For panther we also need to check the P$ for parity errors.
2011 		 */
2012 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2013 			cpu_pcache_parity_info(&ch_flt);
2014 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2015 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2016 				aflt->flt_payload =
2017 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2018 			}
2019 		}
2020 	}
2021 
2022 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2023 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2024 
2025 	if (iparity) {
2026 		/*
2027 		 * Invalidate entire I$.
2028 		 * This is required due to the use of diagnostic ASI
2029 		 * accesses that may result in a loss of I$ coherency.
2030 		 */
2031 		if (cache_boot_state & DCU_IC) {
2032 			flush_icache();
2033 		}
2034 		/*
2035 		 * According to section P.3.1 of the Panther PRM, we
2036 		 * need to do a little more for recovery on those
2037 		 * CPUs after encountering an I$ parity error.
2038 		 */
2039 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2040 			flush_ipb();
2041 			correct_dcache_parity(dcache_size,
2042 			    dcache_linesize);
2043 			flush_pcache();
2044 		}
2045 	} else {
2046 		/*
2047 		 * Since the valid bit is ignored when checking parity the
2048 		 * D$ data and tag must also be corrected.  Set D$ data bits
2049 		 * to zero and set utag to 0, 1, 2, 3.
2050 		 */
2051 		correct_dcache_parity(dcache_size, dcache_linesize);
2052 
2053 		/*
2054 		 * According to section P.3.3 of the Panther PRM, we
2055 		 * need to do a little more for recovery on those
2056 		 * CPUs after encountering a D$ or P$ parity error.
2057 		 *
2058 		 * As far as clearing P$ parity errors, it is enough to
2059 		 * simply invalidate all entries in the P$ since P$ parity
2060 		 * error traps are only generated for floating point load
2061 		 * hits.
2062 		 */
2063 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2064 			flush_icache();
2065 			flush_ipb();
2066 			flush_pcache();
2067 		}
2068 	}
2069 
2070 	/*
2071 	 * Invalidate entire D$ if it was enabled.
2072 	 * This is done to avoid stale data in the D$ which might
2073 	 * occur with the D$ disabled and the trap handler doing
2074 	 * stores affecting lines already in the D$.
2075 	 */
2076 	if (cache_boot_state & DCU_DC) {
2077 		flush_dcache();
2078 	}
2079 
2080 	/*
2081 	 * Restore caches to their bootup state.
2082 	 */
2083 	set_dcu(get_dcu() | cache_boot_state);
2084 
2085 	/*
2086 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2087 	 * be logged as part of the panic flow.
2088 	 */
2089 	if (aflt->flt_panic)
2090 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2091 
2092 	/*
2093 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2094 	 * the chance of getting an unrecoverable Fast ECC error.  This
2095 	 * flush will evict the part of the parity trap handler that is run
2096 	 * at TL>1.
2097 	 */
2098 	if (tl) {
2099 		cpu_flush_ecache();
2100 	}
2101 }
2102 
2103 /*
2104  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2105  * to indicate which portions of the captured data should be in the ereport.
2106  */
2107 void
2108 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2109 {
2110 	int way = ch_flt->parity_data.ipe.cpl_way;
2111 	int offset = ch_flt->parity_data.ipe.cpl_off;
2112 	int tag_index;
2113 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2114 
2115 
2116 	if ((offset != -1) || (way != -1)) {
2117 		/*
2118 		 * Parity error in I$ tag or data
2119 		 */
2120 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2121 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2122 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2123 			    PN_ICIDX_TO_WAY(tag_index);
2124 		else
2125 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2126 			    CH_ICIDX_TO_WAY(tag_index);
2127 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2128 		    IC_LOGFLAG_MAGIC;
2129 	} else {
2130 		/*
2131 		 * Parity error was not identified.
2132 		 * Log tags and data for all ways.
2133 		 */
2134 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2135 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2136 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2137 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2138 				    PN_ICIDX_TO_WAY(tag_index);
2139 			else
2140 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2141 				    CH_ICIDX_TO_WAY(tag_index);
2142 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2143 			    IC_LOGFLAG_MAGIC;
2144 		}
2145 	}
2146 }
2147 
2148 /*
2149  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2150  * to indicate which portions of the captured data should be in the ereport.
2151  */
2152 void
2153 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2154 {
2155 	int way = ch_flt->parity_data.dpe.cpl_way;
2156 	int offset = ch_flt->parity_data.dpe.cpl_off;
2157 	int tag_index;
2158 
2159 	if (offset != -1) {
2160 		/*
2161 		 * Parity error in D$ or P$ data array.
2162 		 *
2163 		 * First check to see whether the parity error is in D$ or P$
2164 		 * since P$ data parity errors are reported in Panther using
2165 		 * the same trap.
2166 		 */
2167 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2168 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2169 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2170 			    CH_PCIDX_TO_WAY(tag_index);
2171 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2172 			    PC_LOGFLAG_MAGIC;
2173 		} else {
2174 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2175 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2176 			    CH_DCIDX_TO_WAY(tag_index);
2177 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2178 			    DC_LOGFLAG_MAGIC;
2179 		}
2180 	} else if (way != -1) {
2181 		/*
2182 		 * Parity error in D$ tag.
2183 		 */
2184 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2185 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2186 		    CH_DCIDX_TO_WAY(tag_index);
2187 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2188 		    DC_LOGFLAG_MAGIC;
2189 	}
2190 }
2191 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2192 
2193 /*
2194  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2195  * post-process CPU events that are dequeued.  As such, it can be invoked
2196  * from softint context, from AST processing in the trap() flow, or from the
2197  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2198  * Historically this entry point was used to log the actual cmn_err(9F) text;
2199  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2200  * With FMA this function now also returns a flag which indicates to the
2201  * caller whether the ereport should be posted (1) or suppressed (0).
2202  */
2203 static int
2204 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2205 {
2206 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2207 	struct async_flt *aflt = (struct async_flt *)flt;
2208 	page_t *pp;
2209 
2210 	switch (ch_flt->flt_type) {
2211 	case CPU_INV_AFSR:
2212 		/*
2213 		 * If it is a disrupting trap and the AFSR is zero, then
2214 		 * the event has probably already been noted. Do not post
2215 		 * an ereport.
2216 		 */
2217 		if ((aflt->flt_status & ECC_C_TRAP) &&
2218 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2219 			return (0);
2220 		else
2221 			return (1);
2222 	case CPU_TO:
2223 	case CPU_BERR:
2224 	case CPU_FATAL:
2225 	case CPU_FPUERR:
2226 		return (1);
2227 
2228 	case CPU_UE_ECACHE_RETIRE:
2229 		cpu_log_err(aflt);
2230 		cpu_page_retire(ch_flt);
2231 		return (1);
2232 
2233 	/*
2234 	 * Cases where we may want to suppress logging or perform
2235 	 * extended diagnostics.
2236 	 */
2237 	case CPU_CE:
2238 	case CPU_EMC:
2239 		pp = page_numtopp_nolock((pfn_t)
2240 		    (aflt->flt_addr >> MMU_PAGESHIFT));
2241 
2242 		/*
2243 		 * We want to skip logging and further classification
2244 		 * only if ALL the following conditions are true:
2245 		 *
2246 		 *	1. There is only one error
2247 		 *	2. That error is a correctable memory error
2248 		 *	3. The error is caused by the memory scrubber (in
2249 		 *	   which case the error will have occurred under
2250 		 *	   on_trap protection)
2251 		 *	4. The error is on a retired page
2252 		 *
2253 		 * Note: AFLT_PROT_EC is used places other than the memory
2254 		 * scrubber.  However, none of those errors should occur
2255 		 * on a retired page.
2256 		 */
2257 		if ((ch_flt->afsr_errs &
2258 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2259 		    aflt->flt_prot == AFLT_PROT_EC) {
2260 
2261 			if (pp != NULL && page_isretired(pp)) {
2262 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2263 
2264 				/*
2265 				 * Since we're skipping logging, we'll need
2266 				 * to schedule the re-enabling of CEEN
2267 				 */
2268 				(void) timeout(cpu_delayed_check_ce_errors,
2269 				    (void *)aflt->flt_inst, drv_usectohz(
2270 				    (clock_t)cpu_ceen_delay_secs * MICROSEC));
2271 			    }
2272 			    return (0);
2273 			}
2274 		}
2275 
2276 		/*
2277 		 * Perform/schedule further classification actions, but
2278 		 * only if the page is healthy (we don't want bad
2279 		 * pages inducing too much diagnostic activity).  If we could
2280 		 * not find a page pointer then we also skip this.  If
2281 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2282 		 * to copy and recirculate the event (for further diagnostics)
2283 		 * and we should not proceed to log it here.
2284 		 *
2285 		 * This must be the last step here before the cpu_log_err()
2286 		 * below - if an event recirculates cpu_ce_log_err() will
2287 		 * not call the current function but just proceed directly
2288 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2289 		 *
2290 		 * Note: Check cpu_impl_async_log_err if changing this
2291 		 */
2292 		if (pp) {
2293 			if (page_isretired(pp) || page_deteriorating(pp)) {
2294 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2295 				    CE_XDIAG_SKIP_PAGEDET);
2296 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2297 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2298 				return (0);
2299 			}
2300 		} else {
2301 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2302 			    CE_XDIAG_SKIP_NOPP);
2303 		}
2304 		/*FALLTHRU*/
2305 
2306 	/*
2307 	 * Cases where we just want to report the error and continue.
2308 	 */
2309 	case CPU_CE_ECACHE:
2310 	case CPU_UE_ECACHE:
2311 	case CPU_IV:
2312 	case CPU_ORPH:
2313 		cpu_log_err(aflt);
2314 		return (1);
2315 
2316 	/*
2317 	 * Cases where we want to fall through to handle panicking.
2318 	 */
2319 	case CPU_UE:
2320 		/*
2321 		 * We want to skip logging in the same conditions as the
2322 		 * CE case.  In addition, we want to make sure we're not
2323 		 * panicking.
2324 		 */
2325 		if (!panicstr && (ch_flt->afsr_errs &
2326 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2327 		    aflt->flt_prot == AFLT_PROT_EC) {
2328 			page_t *pp = page_numtopp_nolock((pfn_t)
2329 			    (aflt->flt_addr >> MMU_PAGESHIFT));
2330 
2331 			if (pp != NULL && page_isretired(pp)) {
2332 
2333 				/* Zero the address to clear the error */
2334 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2335 				return (0);
2336 			}
2337 		}
2338 		cpu_log_err(aflt);
2339 		break;
2340 
2341 	default:
2342 		/*
2343 		 * If the us3_common.c code doesn't know the flt_type, it may
2344 		 * be an implementation-specific code.  Call into the impldep
2345 		 * backend to find out what to do: if it tells us to continue,
2346 		 * break and handle as if falling through from a UE; if not,
2347 		 * the impldep backend has handled the error and we're done.
2348 		 */
2349 		switch (cpu_impl_async_log_err(flt, eqep)) {
2350 		case CH_ASYNC_LOG_DONE:
2351 			return (1);
2352 		case CH_ASYNC_LOG_RECIRC:
2353 			return (0);
2354 		case CH_ASYNC_LOG_CONTINUE:
2355 			break; /* continue on to handle UE-like error */
2356 		default:
2357 			cmn_err(CE_WARN, "discarding error 0x%p with "
2358 			    "invalid fault type (0x%x)",
2359 			    (void *)aflt, ch_flt->flt_type);
2360 			return (0);
2361 		}
2362 	}
2363 
2364 	/* ... fall through from the UE case */
2365 
2366 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2367 		if (!panicstr) {
2368 			cpu_page_retire(ch_flt);
2369 		} else {
2370 			/*
2371 			 * Clear UEs on panic so that we don't
2372 			 * get haunted by them during panic or
2373 			 * after reboot
2374 			 */
2375 			cpu_clearphys(aflt);
2376 			(void) clear_errors(NULL);
2377 		}
2378 	}
2379 
2380 	return (1);
2381 }
2382 
2383 /*
2384  * Retire the bad page that may contain the flushed error.
2385  */
2386 void
2387 cpu_page_retire(ch_async_flt_t *ch_flt)
2388 {
2389 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2390 	page_t *pp = page_numtopp_nolock(aflt->flt_addr >> MMU_PAGESHIFT);
2391 
2392 	if (pp != NULL) {
2393 		page_settoxic(pp, PAGE_IS_FAULTY);
2394 		(void) page_retire(pp, PAGE_IS_TOXIC);
2395 	}
2396 }
2397 
2398 /*
2399  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2400  * generic event post-processing for correctable and uncorrectable memory,
2401  * E$, and MTag errors.  Historically this entry point was used to log bits of
2402  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2403  * converted into an ereport.  In addition, it transmits the error to any
2404  * platform-specific service-processor FRU logging routines, if available.
2405  */
2406 void
2407 cpu_log_err(struct async_flt *aflt)
2408 {
2409 	char unum[UNUM_NAMLEN];
2410 	int len = 0;
2411 	int synd_status, synd_code, afar_status;
2412 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2413 
2414 	/*
2415 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2416 	 * For Panther, L2$ is not external, so we don't want to
2417 	 * generate an E$ unum for those errors.
2418 	 */
2419 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2420 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2421 			aflt->flt_status |= ECC_ECACHE;
2422 	} else {
2423 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2424 			aflt->flt_status |= ECC_ECACHE;
2425 	}
2426 
2427 	/*
2428 	 * Determine syndrome status.
2429 	 */
2430 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2431 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2432 
2433 	/*
2434 	 * Determine afar status.
2435 	 */
2436 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2437 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2438 				ch_flt->flt_bit);
2439 	else
2440 		afar_status = AFLT_STAT_INVALID;
2441 
2442 	/*
2443 	 * If afar status is not invalid do a unum lookup.
2444 	 */
2445 	if (afar_status != AFLT_STAT_INVALID) {
2446 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2447 			UNUM_NAMLEN, &len);
2448 	} else {
2449 		unum[0] = '\0';
2450 	}
2451 
2452 	synd_code = synd_to_synd_code(synd_status,
2453 	    aflt->flt_synd, ch_flt->flt_bit);
2454 
2455 	/*
2456 	 * Do not send the fruid message (plat_ecc_error_data_t)
2457 	 * to the SC if it can handle the enhanced error information
2458 	 * (plat_ecc_error2_data_t) or when the tunable
2459 	 * ecc_log_fruid_enable is set to 0.
2460 	 */
2461 
2462 	if (&plat_ecc_capability_sc_get &&
2463 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2464 		if (&plat_log_fruid_error)
2465 			plat_log_fruid_error(synd_code, aflt, unum,
2466 			    ch_flt->flt_bit);
2467 	}
2468 
2469 	if (aflt->flt_func != NULL)
2470 		aflt->flt_func(aflt, unum);
2471 
2472 	if (afar_status != AFLT_STAT_INVALID)
2473 		cpu_log_diag_info(ch_flt);
2474 
2475 	/*
2476 	 * If we have a CEEN error , we do not reenable CEEN until after
2477 	 * we exit the trap handler. Otherwise, another error may
2478 	 * occur causing the handler to be entered recursively.
2479 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2480 	 * to try and ensure that the CPU makes progress in the face
2481 	 * of a CE storm.
2482 	 */
2483 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2484 		(void) timeout(cpu_delayed_check_ce_errors,
2485 		    (void *)aflt->flt_inst,
2486 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2487 	}
2488 }
2489 
2490 /*
2491  * Invoked by error_init() early in startup and therefore before
2492  * startup_errorq() is called to drain any error Q -
2493  *
2494  * startup()
2495  *   startup_end()
2496  *     error_init()
2497  *       cpu_error_init()
2498  * errorq_init()
2499  *   errorq_drain()
2500  * start_other_cpus()
2501  *
2502  * The purpose of this routine is to create error-related taskqs.  Taskqs
2503  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2504  * context.
2505  */
2506 void
2507 cpu_error_init(int items)
2508 {
2509 	/*
2510 	 * Create taskq(s) to reenable CE
2511 	 */
2512 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2513 	    items, items, TASKQ_PREPOPULATE);
2514 }
2515 
2516 void
2517 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2518 {
2519 	char unum[UNUM_NAMLEN];
2520 	int len;
2521 
2522 	switch (aflt->flt_class) {
2523 	case CPU_FAULT:
2524 		cpu_ereport_init(aflt);
2525 		if (cpu_async_log_err(aflt, eqep))
2526 			cpu_ereport_post(aflt);
2527 		break;
2528 
2529 	case BUS_FAULT:
2530 		if (aflt->flt_func != NULL) {
2531 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2532 			    unum, UNUM_NAMLEN, &len);
2533 			aflt->flt_func(aflt, unum);
2534 		}
2535 		break;
2536 
2537 	case RECIRC_CPU_FAULT:
2538 		aflt->flt_class = CPU_FAULT;
2539 		cpu_log_err(aflt);
2540 		cpu_ereport_post(aflt);
2541 		break;
2542 
2543 	case RECIRC_BUS_FAULT:
2544 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2545 		/*FALLTHRU*/
2546 	default:
2547 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2548 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2549 		return;
2550 	}
2551 }
2552 
2553 /*
2554  * Scrub and classify a CE.  This function must not modify the
2555  * fault structure passed to it but instead should return the classification
2556  * information.
2557  */
2558 
2559 static uchar_t
2560 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2561 {
2562 	uchar_t disp = CE_XDIAG_EXTALG;
2563 	on_trap_data_t otd;
2564 	uint64_t orig_err;
2565 	ch_cpu_logout_t *clop;
2566 
2567 	/*
2568 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2569 	 * this, but our other callers have not.  Disable preemption to
2570 	 * avoid CPU migration so that we restore CEEN on the correct
2571 	 * cpu later.
2572 	 *
2573 	 * CEEN is cleared so that further CEs that our instruction and
2574 	 * data footprint induce do not cause use to either creep down
2575 	 * kernel stack to the point of overflow, or do so much CE
2576 	 * notification as to make little real forward progress.
2577 	 *
2578 	 * NCEEN must not be cleared.  However it is possible that
2579 	 * our accesses to the flt_addr may provoke a bus error or timeout
2580 	 * if the offending address has just been unconfigured as part of
2581 	 * a DR action.  So we must operate under on_trap protection.
2582 	 */
2583 	kpreempt_disable();
2584 	orig_err = get_error_enable();
2585 	if (orig_err & EN_REG_CEEN)
2586 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2587 
2588 	/*
2589 	 * Our classification algorithm includes the line state before
2590 	 * the scrub; we'd like this captured after the detection and
2591 	 * before the algorithm below - the earlier the better.
2592 	 *
2593 	 * If we've come from a cpu CE trap then this info already exists
2594 	 * in the cpu logout area.
2595 	 *
2596 	 * For a CE detected by memscrub for which there was no trap
2597 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2598 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2599 	 * marked the fault structure as incomplete as a flag to later
2600 	 * logging code.
2601 	 *
2602 	 * If called directly from an IO detected CE there has been
2603 	 * no line data capture.  In this case we logout to the cpu logout
2604 	 * area - that's appropriate since it's the cpu cache data we need
2605 	 * for classification.  We thus borrow the cpu logout area for a
2606 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2607 	 * this time (we will invalidate it again below).
2608 	 *
2609 	 * If called from the partner check xcall handler then this cpu
2610 	 * (the partner) has not necessarily experienced a CE at this
2611 	 * address.  But we want to capture line state before its scrub
2612 	 * attempt since we use that in our classification.
2613 	 */
2614 	if (logout_tried == B_FALSE) {
2615 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2616 			disp |= CE_XDIAG_NOLOGOUT;
2617 	}
2618 
2619 	/*
2620 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2621 	 * no longer be valid (if DR'd since the initial event) so we
2622 	 * perform this scrub under on_trap protection.  If this access is
2623 	 * ok then further accesses below will also be ok - DR cannot
2624 	 * proceed while this thread is active (preemption is disabled);
2625 	 * to be safe we'll nonetheless use on_trap again below.
2626 	 */
2627 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2628 		cpu_scrubphys(ecc);
2629 	} else {
2630 		no_trap();
2631 		if (orig_err & EN_REG_CEEN)
2632 		    set_error_enable(orig_err);
2633 		kpreempt_enable();
2634 		return (disp);
2635 	}
2636 	no_trap();
2637 
2638 	/*
2639 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2640 	 * Note that it's quite possible that the read sourced the data from
2641 	 * another cpu.
2642 	 */
2643 	if (clear_ecc(ecc))
2644 		disp |= CE_XDIAG_CE1;
2645 
2646 	/*
2647 	 * Read the data again.  This time the read is very likely to
2648 	 * come from memory since the scrub induced a writeback to memory.
2649 	 */
2650 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2651 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2652 	} else {
2653 		no_trap();
2654 		if (orig_err & EN_REG_CEEN)
2655 		    set_error_enable(orig_err);
2656 		kpreempt_enable();
2657 		return (disp);
2658 	}
2659 	no_trap();
2660 
2661 	/* Did that read induce a CE that matches the AFAR? */
2662 	if (clear_ecc(ecc))
2663 		disp |= CE_XDIAG_CE2;
2664 
2665 	/*
2666 	 * Look at the logout information and record whether we found the
2667 	 * line in l2/l3 cache.  For Panther we are interested in whether
2668 	 * we found it in either cache (it won't reside in both but
2669 	 * it is possible to read it that way given the moving target).
2670 	 */
2671 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2672 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2673 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2674 		int hit, level;
2675 		int state;
2676 		int totalsize;
2677 		ch_ec_data_t *ecp;
2678 
2679 		/*
2680 		 * If hit is nonzero then a match was found and hit will
2681 		 * be one greater than the index which hit.  For Panther we
2682 		 * also need to pay attention to level to see which of l2$ or
2683 		 * l3$ it hit in.
2684 		 */
2685 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2686 		    0, &level);
2687 
2688 		if (hit) {
2689 			--hit;
2690 			disp |= CE_XDIAG_AFARMATCH;
2691 
2692 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2693 				if (level == 2)
2694 					ecp = &clop->clo_data.chd_l2_data[hit];
2695 				else
2696 					ecp = &clop->clo_data.chd_ec_data[hit];
2697 			} else {
2698 				ASSERT(level == 2);
2699 				ecp = &clop->clo_data.chd_ec_data[hit];
2700 			}
2701 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2702 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2703 			    ecc->flt_addr, ecp->ec_tag);
2704 
2705 			/*
2706 			 * Cheetah variants use different state encodings -
2707 			 * the CH_ECSTATE_* defines vary depending on the
2708 			 * module we're compiled for.  Translate into our
2709 			 * one true version.  Conflate Owner-Shared state
2710 			 * of SSM mode with Owner as victimisation of such
2711 			 * lines may cause a writeback.
2712 			 */
2713 			switch (state) {
2714 			case CH_ECSTATE_MOD:
2715 				disp |= EC_STATE_M;
2716 				break;
2717 
2718 			case CH_ECSTATE_OWN:
2719 			case CH_ECSTATE_OWS:
2720 				disp |= EC_STATE_O;
2721 				break;
2722 
2723 			case CH_ECSTATE_EXL:
2724 				disp |= EC_STATE_E;
2725 				break;
2726 
2727 			case CH_ECSTATE_SHR:
2728 				disp |= EC_STATE_S;
2729 				break;
2730 
2731 			default:
2732 				disp |= EC_STATE_I;
2733 				break;
2734 			}
2735 		}
2736 
2737 		/*
2738 		 * If we initiated the delayed logout then we are responsible
2739 		 * for invalidating the logout area.
2740 		 */
2741 		if (logout_tried == B_FALSE) {
2742 			bzero(clop, sizeof (ch_cpu_logout_t));
2743 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2744 		}
2745 	}
2746 
2747 	/*
2748 	 * Re-enable CEEN if we turned it off.
2749 	 */
2750 	if (orig_err & EN_REG_CEEN)
2751 	    set_error_enable(orig_err);
2752 	kpreempt_enable();
2753 
2754 	return (disp);
2755 }
2756 
2757 /*
2758  * Scrub a correctable memory error and collect data for classification
2759  * of CE type.  This function is called in the detection path, ie tl0 handling
2760  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2761  */
2762 void
2763 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2764 {
2765 	/*
2766 	 * Cheetah CE classification does not set any bits in flt_status.
2767 	 * Instead we will record classification datapoints in flt_disp.
2768 	 */
2769 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2770 
2771 	/*
2772 	 * To check if the error detected by IO is persistent, sticky or
2773 	 * intermittent.  This is noticed by clear_ecc().
2774 	 */
2775 	if (ecc->flt_status & ECC_IOBUS)
2776 		ecc->flt_stat = C_AFSR_MEMORY;
2777 
2778 	/*
2779 	 * Record information from this first part of the algorithm in
2780 	 * flt_disp.
2781 	 */
2782 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2783 }
2784 
2785 /*
2786  * Select a partner to perform a further CE classification check from.
2787  * Must be called with kernel preemption disabled (to stop the cpu list
2788  * from changing).  The detecting cpu we are partnering has cpuid
2789  * aflt->flt_inst; we might not be running on the detecting cpu.
2790  *
2791  * Restrict choice to active cpus in the same cpu partition as ourselves in
2792  * an effort to stop bad cpus in one partition causing other partitions to
2793  * perform excessive diagnostic activity.  Actually since the errorq drain
2794  * is run from a softint most of the time and that is a global mechanism
2795  * this isolation is only partial.  Return NULL if we fail to find a
2796  * suitable partner.
2797  *
2798  * We prefer a partner that is in a different latency group to ourselves as
2799  * we will share fewer datapaths.  If such a partner is unavailable then
2800  * choose one in the same lgroup but prefer a different chip and only allow
2801  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2802  * flags includes PTNR_SELFOK then permit selection of the original detector.
2803  *
2804  * We keep a cache of the last partner selected for a cpu, and we'll try to
2805  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2806  * have passed since that selection was made.  This provides the benefit
2807  * of the point-of-view of different partners over time but without
2808  * requiring frequent cpu list traversals.
2809  */
2810 
2811 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2812 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2813 
2814 static cpu_t *
2815 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2816 {
2817 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2818 	hrtime_t lasttime, thistime;
2819 
2820 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2821 
2822 	dtcr = cpu[aflt->flt_inst];
2823 
2824 	/*
2825 	 * Short-circuit for the following cases:
2826 	 *	. the dtcr is not flagged active
2827 	 *	. there is just one cpu present
2828 	 *	. the detector has disappeared
2829 	 *	. we were given a bad flt_inst cpuid; this should not happen
2830 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2831 	 *	  reason to panic.
2832 	 *	. there is just one cpu left online in the cpu partition
2833 	 *
2834 	 * If we return NULL after this point then we do not update the
2835 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2836 	 * again next time; this is the case where the only other cpu online
2837 	 * in the detector's partition is on the same chip as the detector
2838 	 * and since CEEN re-enable is throttled even that case should not
2839 	 * hurt performance.
2840 	 */
2841 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2842 		return (NULL);
2843 	}
2844 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2845 		if (flags & PTNR_SELFOK) {
2846 			*typep = CE_XDIAG_PTNR_SELF;
2847 			return (dtcr);
2848 		} else {
2849 			return (NULL);
2850 		}
2851 	}
2852 
2853 	thistime = gethrtime();
2854 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2855 
2856 	/*
2857 	 * Select a starting point.
2858 	 */
2859 	if (!lasttime) {
2860 		/*
2861 		 * We've never selected a partner for this detector before.
2862 		 * Start the scan at the next online cpu in the same cpu
2863 		 * partition.
2864 		 */
2865 		sp = dtcr->cpu_next_part;
2866 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2867 		/*
2868 		 * Our last selection has not aged yet.  If this partner:
2869 		 *	. is still a valid cpu,
2870 		 *	. is still in the same partition as the detector
2871 		 *	. is still marked active
2872 		 *	. satisfies the 'flags' argument criteria
2873 		 * then select it again without updating the timestamp.
2874 		 */
2875 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2876 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2877 		    !cpu_flagged_active(sp->cpu_flags) ||
2878 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2879 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2880 		    !(flags & PTNR_SIBLINGOK))) {
2881 			sp = dtcr->cpu_next_part;
2882 		} else {
2883 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2884 				*typep = CE_XDIAG_PTNR_REMOTE;
2885 			} else if (sp == dtcr) {
2886 				*typep = CE_XDIAG_PTNR_SELF;
2887 			} else if (sp->cpu_chip->chip_id ==
2888 			    dtcr->cpu_chip->chip_id) {
2889 				*typep = CE_XDIAG_PTNR_SIBLING;
2890 			} else {
2891 				*typep = CE_XDIAG_PTNR_LOCAL;
2892 			}
2893 			return (sp);
2894 		}
2895 	} else {
2896 		/*
2897 		 * Our last selection has aged.  If it is nonetheless still a
2898 		 * valid cpu then start the scan at the next cpu in the
2899 		 * partition after our last partner.  If the last selection
2900 		 * is no longer a valid cpu then go with our default.  In
2901 		 * this way we slowly cycle through possible partners to
2902 		 * obtain multiple viewpoints over time.
2903 		 */
2904 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2905 		if (sp == NULL) {
2906 			sp = dtcr->cpu_next_part;
2907 		} else {
2908 			sp = sp->cpu_next_part;		/* may be dtcr */
2909 			if (sp->cpu_part != dtcr->cpu_part)
2910 				sp = dtcr;
2911 		}
2912 	}
2913 
2914 	/*
2915 	 * We have a proposed starting point for our search, but if this
2916 	 * cpu is offline then its cpu_next_part will point to itself
2917 	 * so we can't use that to iterate over cpus in this partition in
2918 	 * the loop below.  We still want to avoid iterating over cpus not
2919 	 * in our partition, so in the case that our starting point is offline
2920 	 * we will repoint it to be the detector itself;  and if the detector
2921 	 * happens to be offline we'll return NULL from the following loop.
2922 	 */
2923 	if (!cpu_flagged_active(sp->cpu_flags)) {
2924 		sp = dtcr;
2925 	}
2926 
2927 	ptnr = sp;
2928 	locptnr = NULL;
2929 	sibptnr = NULL;
2930 	do {
2931 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
2932 			continue;
2933 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2934 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
2935 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2936 			*typep = CE_XDIAG_PTNR_REMOTE;
2937 			return (ptnr);
2938 		}
2939 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
2940 			if (sibptnr == NULL)
2941 				sibptnr = ptnr;
2942 			continue;
2943 		}
2944 		if (locptnr == NULL)
2945 			locptnr = ptnr;
2946 	} while ((ptnr = ptnr->cpu_next_part) != sp);
2947 
2948 	/*
2949 	 * A foreign partner has already been returned if one was available.
2950 	 *
2951 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
2952 	 * detector, is active, and is not a sibling of the detector.
2953 	 *
2954 	 * If sibptnr is not NULL it is a sibling of the detector, and is
2955 	 * active.
2956 	 *
2957 	 * If we have to resort to using the detector itself we have already
2958 	 * checked that it is active.
2959 	 */
2960 	if (locptnr) {
2961 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
2962 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2963 		*typep = CE_XDIAG_PTNR_LOCAL;
2964 		return (locptnr);
2965 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
2966 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
2967 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2968 		*typep = CE_XDIAG_PTNR_SIBLING;
2969 		return (sibptnr);
2970 	} else if (flags & PTNR_SELFOK) {
2971 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
2972 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2973 		*typep = CE_XDIAG_PTNR_SELF;
2974 		return (dtcr);
2975 	}
2976 
2977 	return (NULL);
2978 }
2979 
2980 /*
2981  * Cross call handler that is requested to run on the designated partner of
2982  * a cpu that experienced a possibly sticky or possibly persistnet CE.
2983  */
2984 static void
2985 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
2986 {
2987 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
2988 }
2989 
2990 /*
2991  * The associated errorqs are never destroyed so we do not need to deal with
2992  * them disappearing before this timeout fires.  If the affected memory
2993  * has been DR'd out since the original event the scrub algrithm will catch
2994  * any errors and return null disposition info.  If the original detecting
2995  * cpu has been DR'd out then ereport detector info will not be able to
2996  * lookup CPU type;  with a small timeout this is unlikely.
2997  */
2998 static void
2999 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3000 {
3001 	struct async_flt *aflt = cbarg->lkycb_aflt;
3002 	uchar_t disp;
3003 	cpu_t *cp;
3004 	int ptnrtype;
3005 
3006 	kpreempt_disable();
3007 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3008 	    &ptnrtype)) {
3009 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3010 		    (uint64_t)&disp);
3011 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3012 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3013 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3014 	} else {
3015 		ce_xdiag_lkydrops++;
3016 		if (ncpus > 1)
3017 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3018 			    CE_XDIAG_SKIP_NOPTNR);
3019 	}
3020 	kpreempt_enable();
3021 
3022 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3023 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3024 }
3025 
3026 /*
3027  * Called from errorq drain code when processing a CE error, both from
3028  * CPU and PCI drain functions.  Decide what further classification actions,
3029  * if any, we will perform.  Perform immediate actions now, and schedule
3030  * delayed actions as required.  Note that we are no longer necessarily running
3031  * on the detecting cpu, and that the async_flt structure will not persist on
3032  * return from this function.
3033  *
3034  * Calls to this function should aim to be self-throtlling in some way.  With
3035  * the delayed re-enable of CEEN the absolute rate of calls should not
3036  * be excessive.  Callers should also avoid performing in-depth classification
3037  * for events in pages that are already known to be suspect.
3038  *
3039  * We return nonzero to indicate that the event has been copied and
3040  * recirculated for further testing.  The caller should not log the event
3041  * in this case - it will be logged when further test results are available.
3042  *
3043  * Our possible contexts are that of errorq_drain: below lock level or from
3044  * panic context.  We can assume that the cpu we are running on is online.
3045  */
3046 
3047 
3048 #ifdef DEBUG
3049 static int ce_xdiag_forceaction;
3050 #endif
3051 
3052 int
3053 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3054     errorq_elem_t *eqep, size_t afltoffset)
3055 {
3056 	ce_dispact_t dispact, action;
3057 	cpu_t *cp;
3058 	uchar_t dtcrinfo, disp;
3059 	int ptnrtype;
3060 
3061 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3062 		ce_xdiag_drops++;
3063 		return (0);
3064 	} else if (!aflt->flt_in_memory) {
3065 		ce_xdiag_drops++;
3066 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3067 		return (0);
3068 	}
3069 
3070 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3071 
3072 	/*
3073 	 * Some correctable events are not scrubbed/classified, such as those
3074 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3075 	 * initial detector classification go no further.
3076 	 */
3077 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3078 		ce_xdiag_drops++;
3079 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3080 		return (0);
3081 	}
3082 
3083 	dispact = CE_DISPACT(ce_disp_table,
3084 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3085 	    CE_XDIAG_STATE(dtcrinfo),
3086 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3087 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3088 
3089 
3090 	action = CE_ACT(dispact);	/* bad lookup caught below */
3091 #ifdef DEBUG
3092 	if (ce_xdiag_forceaction != 0)
3093 		action = ce_xdiag_forceaction;
3094 #endif
3095 
3096 	switch (action) {
3097 	case CE_ACT_LKYCHK: {
3098 		caddr_t ndata;
3099 		errorq_elem_t *neqep;
3100 		struct async_flt *ecc;
3101 		ce_lkychk_cb_t *cbargp;
3102 
3103 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3104 			ce_xdiag_lkydrops++;
3105 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3106 			    CE_XDIAG_SKIP_DUPFAIL);
3107 			break;
3108 		}
3109 		ecc = (struct async_flt *)(ndata + afltoffset);
3110 
3111 		ASSERT(ecc->flt_class == CPU_FAULT ||
3112 		    ecc->flt_class == BUS_FAULT);
3113 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3114 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3115 
3116 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3117 		cbargp->lkycb_aflt = ecc;
3118 		cbargp->lkycb_eqp = eqp;
3119 		cbargp->lkycb_eqep = neqep;
3120 
3121 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3122 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3123 		return (1);
3124 	}
3125 
3126 	case CE_ACT_PTNRCHK:
3127 		kpreempt_disable();	/* stop cpu list changing */
3128 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3129 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3130 			    (uint64_t)aflt, (uint64_t)&disp);
3131 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3132 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3133 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3134 		} else if (ncpus > 1) {
3135 			ce_xdiag_ptnrdrops++;
3136 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3137 			    CE_XDIAG_SKIP_NOPTNR);
3138 		} else {
3139 			ce_xdiag_ptnrdrops++;
3140 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3141 			    CE_XDIAG_SKIP_UNIPROC);
3142 		}
3143 		kpreempt_enable();
3144 		break;
3145 
3146 	case CE_ACT_DONE:
3147 		break;
3148 
3149 	case CE_ACT(CE_DISP_BAD):
3150 	default:
3151 #ifdef DEBUG
3152 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3153 #endif
3154 		ce_xdiag_bad++;
3155 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3156 		break;
3157 	}
3158 
3159 	return (0);
3160 }
3161 
3162 /*
3163  * We route all errors through a single switch statement.
3164  */
3165 void
3166 cpu_ue_log_err(struct async_flt *aflt)
3167 {
3168 	switch (aflt->flt_class) {
3169 	case CPU_FAULT:
3170 		cpu_ereport_init(aflt);
3171 		if (cpu_async_log_err(aflt, NULL))
3172 			cpu_ereport_post(aflt);
3173 		break;
3174 
3175 	case BUS_FAULT:
3176 		bus_async_log_err(aflt);
3177 		break;
3178 
3179 	default:
3180 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3181 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3182 		return;
3183 	}
3184 }
3185 
3186 /*
3187  * Routine for panic hook callback from panic_idle().
3188  */
3189 void
3190 cpu_async_panic_callb(void)
3191 {
3192 	ch_async_flt_t ch_flt;
3193 	struct async_flt *aflt;
3194 	ch_cpu_errors_t cpu_error_regs;
3195 	uint64_t afsr_errs;
3196 
3197 	get_cpu_error_state(&cpu_error_regs);
3198 
3199 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3200 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3201 
3202 	if (afsr_errs) {
3203 
3204 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3205 		aflt = (struct async_flt *)&ch_flt;
3206 		aflt->flt_id = gethrtime_waitfree();
3207 		aflt->flt_bus_id = getprocessorid();
3208 		aflt->flt_inst = CPU->cpu_id;
3209 		aflt->flt_stat = cpu_error_regs.afsr;
3210 		aflt->flt_addr = cpu_error_regs.afar;
3211 		aflt->flt_prot = AFLT_PROT_NONE;
3212 		aflt->flt_class = CPU_FAULT;
3213 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3214 		aflt->flt_panic = 1;
3215 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3216 		ch_flt.afsr_errs = afsr_errs;
3217 #if defined(SERRANO)
3218 		ch_flt.afar2 = cpu_error_regs.afar2;
3219 #endif	/* SERRANO */
3220 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3221 	}
3222 }
3223 
3224 /*
3225  * Routine to convert a syndrome into a syndrome code.
3226  */
3227 static int
3228 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3229 {
3230 	if (synd_status == AFLT_STAT_INVALID)
3231 		return (-1);
3232 
3233 	/*
3234 	 * Use the syndrome to index the appropriate syndrome table,
3235 	 * to get the code indicating which bit(s) is(are) bad.
3236 	 */
3237 	if (afsr_bit &
3238 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3239 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3240 #if defined(JALAPENO) || defined(SERRANO)
3241 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3242 				return (-1);
3243 			else
3244 				return (BPAR0 + synd);
3245 #else /* JALAPENO || SERRANO */
3246 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3247 				return (-1);
3248 			else
3249 				return (mtag_syndrome_tab[synd]);
3250 #endif /* JALAPENO || SERRANO */
3251 		} else {
3252 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3253 				return (-1);
3254 			else
3255 				return (ecc_syndrome_tab[synd]);
3256 		}
3257 	} else {
3258 		return (-1);
3259 	}
3260 }
3261 
3262 /*
3263  * Routine to return a string identifying the physical name
3264  * associated with a memory/cache error.
3265  */
3266 int
3267 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3268     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3269     ushort_t flt_status, char *buf, int buflen, int *lenp)
3270 {
3271 	int synd_code;
3272 	int ret;
3273 
3274 	/*
3275 	 * An AFSR of -1 defaults to a memory syndrome.
3276 	 */
3277 	if (flt_stat == (uint64_t)-1)
3278 		flt_stat = C_AFSR_CE;
3279 
3280 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3281 
3282 	/*
3283 	 * Syndrome code must be either a single-bit error code
3284 	 * (0...143) or -1 for unum lookup.
3285 	 */
3286 	if (synd_code < 0 || synd_code >= M2)
3287 		synd_code = -1;
3288 	if (&plat_get_mem_unum) {
3289 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3290 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3291 			buf[0] = '\0';
3292 			*lenp = 0;
3293 		}
3294 
3295 		return (ret);
3296 	}
3297 
3298 	return (ENOTSUP);
3299 }
3300 
3301 /*
3302  * Wrapper for cpu_get_mem_unum() routine that takes an
3303  * async_flt struct rather than explicit arguments.
3304  */
3305 int
3306 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3307     char *buf, int buflen, int *lenp)
3308 {
3309 	/*
3310 	 * If we come thru here for an IO bus error aflt->flt_stat will
3311 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3312 	 * so it will interpret this as a memory error.
3313 	 */
3314 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3315 	    (aflt->flt_class == BUS_FAULT) ?
3316 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3317 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3318 	    aflt->flt_status, buf, buflen, lenp));
3319 }
3320 
3321 /*
3322  * This routine is a more generic interface to cpu_get_mem_unum()
3323  * that may be used by other modules (e.g. mm).
3324  */
3325 int
3326 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3327     char *buf, int buflen, int *lenp)
3328 {
3329 	int synd_status, flt_in_memory, ret;
3330 	ushort_t flt_status = 0;
3331 	char unum[UNUM_NAMLEN];
3332 
3333 	/*
3334 	 * Check for an invalid address.
3335 	 */
3336 	if (afar == (uint64_t)-1)
3337 		return (ENXIO);
3338 
3339 	if (synd == (uint64_t)-1)
3340 		synd_status = AFLT_STAT_INVALID;
3341 	else
3342 		synd_status = AFLT_STAT_VALID;
3343 
3344 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3345 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3346 
3347 	/*
3348 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3349 	 * For Panther, L2$ is not external, so we don't want to
3350 	 * generate an E$ unum for those errors.
3351 	 */
3352 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3353 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3354 			flt_status |= ECC_ECACHE;
3355 	} else {
3356 		if (*afsr & C_AFSR_ECACHE)
3357 			flt_status |= ECC_ECACHE;
3358 	}
3359 
3360 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3361 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3362 	if (ret != 0)
3363 		return (ret);
3364 
3365 	if (*lenp >= buflen)
3366 		return (ENAMETOOLONG);
3367 
3368 	(void) strncpy(buf, unum, buflen);
3369 
3370 	return (0);
3371 }
3372 
3373 /*
3374  * Routine to return memory information associated
3375  * with a physical address and syndrome.
3376  */
3377 int
3378 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3379     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3380     int *segsp, int *banksp, int *mcidp)
3381 {
3382 	int synd_status, synd_code;
3383 
3384 	if (afar == (uint64_t)-1)
3385 		return (ENXIO);
3386 
3387 	if (synd == (uint64_t)-1)
3388 		synd_status = AFLT_STAT_INVALID;
3389 	else
3390 		synd_status = AFLT_STAT_VALID;
3391 
3392 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3393 
3394 	if (p2get_mem_info != NULL)
3395 		return ((p2get_mem_info)(synd_code, afar,
3396 			mem_sizep, seg_sizep, bank_sizep,
3397 			segsp, banksp, mcidp));
3398 	else
3399 		return (ENOTSUP);
3400 }
3401 
3402 /*
3403  * Routine to return a string identifying the physical
3404  * name associated with a cpuid.
3405  */
3406 int
3407 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3408 {
3409 	int ret;
3410 	char unum[UNUM_NAMLEN];
3411 
3412 	if (&plat_get_cpu_unum) {
3413 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3414 		    != 0)
3415 			return (ret);
3416 	} else {
3417 		return (ENOTSUP);
3418 	}
3419 
3420 	if (*lenp >= buflen)
3421 		return (ENAMETOOLONG);
3422 
3423 	(void) strncpy(buf, unum, buflen);
3424 
3425 	return (0);
3426 }
3427 
3428 /*
3429  * This routine exports the name buffer size.
3430  */
3431 size_t
3432 cpu_get_name_bufsize()
3433 {
3434 	return (UNUM_NAMLEN);
3435 }
3436 
3437 /*
3438  * Historical function, apparantly not used.
3439  */
3440 /* ARGSUSED */
3441 void
3442 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3443 {}
3444 
3445 /*
3446  * Historical function only called for SBus errors in debugging.
3447  */
3448 /*ARGSUSED*/
3449 void
3450 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3451 {}
3452 
3453 /*
3454  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3455  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3456  * an async fault structure argument is passed in, the captured error state
3457  * (AFSR, AFAR) info will be returned in the structure.
3458  */
3459 int
3460 clear_errors(ch_async_flt_t *ch_flt)
3461 {
3462 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3463 	ch_cpu_errors_t	cpu_error_regs;
3464 
3465 	get_cpu_error_state(&cpu_error_regs);
3466 
3467 	if (ch_flt != NULL) {
3468 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3469 		aflt->flt_addr = cpu_error_regs.afar;
3470 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3471 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3472 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3473 #if defined(SERRANO)
3474 		ch_flt->afar2 = cpu_error_regs.afar2;
3475 #endif	/* SERRANO */
3476 	}
3477 
3478 	set_cpu_error_state(&cpu_error_regs);
3479 
3480 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3481 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3482 }
3483 
3484 /*
3485  * Clear any AFSR error bits, and check for persistence.
3486  *
3487  * It would be desirable to also insist that syndrome match.  PCI handling
3488  * has already filled flt_synd.  For errors trapped by CPU we only fill
3489  * flt_synd when we queue the event, so we do not have a valid flt_synd
3490  * during initial classification (it is valid if we're called as part of
3491  * subsequent low-pil additional classification attempts).  We could try
3492  * to determine which syndrome to use: we know we're only called for
3493  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3494  * would be esynd/none and esynd/msynd, respectively.  If that is
3495  * implemented then what do we do in the case that we do experience an
3496  * error on the same afar but with different syndrome?  At the very least
3497  * we should count such occurences.  Anyway, for now, we'll leave it as
3498  * it has been for ages.
3499  */
3500 static int
3501 clear_ecc(struct async_flt *aflt)
3502 {
3503 	ch_cpu_errors_t	cpu_error_regs;
3504 
3505 	/*
3506 	 * Snapshot the AFSR and AFAR and clear any errors
3507 	 */
3508 	get_cpu_error_state(&cpu_error_regs);
3509 	set_cpu_error_state(&cpu_error_regs);
3510 
3511 	/*
3512 	 * If any of the same memory access error bits are still on and
3513 	 * the AFAR matches, return that the error is persistent.
3514 	 */
3515 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3516 	    cpu_error_regs.afar == aflt->flt_addr);
3517 }
3518 
3519 /*
3520  * Turn off all cpu error detection, normally only used for panics.
3521  */
3522 void
3523 cpu_disable_errors(void)
3524 {
3525 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3526 }
3527 
3528 /*
3529  * Enable errors.
3530  */
3531 void
3532 cpu_enable_errors(void)
3533 {
3534 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3535 }
3536 
3537 /*
3538  * Flush the entire ecache using displacement flush by reading through a
3539  * physical address range twice as large as the Ecache.
3540  */
3541 void
3542 cpu_flush_ecache(void)
3543 {
3544 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3545 	    cpunodes[CPU->cpu_id].ecache_linesize);
3546 }
3547 
3548 /*
3549  * Return CPU E$ set size - E$ size divided by the associativity.
3550  * We use this function in places where the CPU_PRIVATE ptr may not be
3551  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3552  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3553  * up before the kernel switches from OBP's to the kernel's trap table, so
3554  * we don't have to worry about cpunodes being unitialized.
3555  */
3556 int
3557 cpu_ecache_set_size(struct cpu *cp)
3558 {
3559 	if (CPU_PRIVATE(cp))
3560 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3561 
3562 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3563 }
3564 
3565 /*
3566  * Flush Ecache line.
3567  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3568  * Uses normal displacement flush for Cheetah.
3569  */
3570 static void
3571 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3572 {
3573 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3574 	int ec_set_size = cpu_ecache_set_size(CPU);
3575 
3576 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3577 }
3578 
3579 /*
3580  * Scrub physical address.
3581  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3582  * Ecache or direct-mapped Ecache.
3583  */
3584 static void
3585 cpu_scrubphys(struct async_flt *aflt)
3586 {
3587 	int ec_set_size = cpu_ecache_set_size(CPU);
3588 
3589 	scrubphys(aflt->flt_addr, ec_set_size);
3590 }
3591 
3592 /*
3593  * Clear physical address.
3594  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3595  * Ecache or direct-mapped Ecache.
3596  */
3597 void
3598 cpu_clearphys(struct async_flt *aflt)
3599 {
3600 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3601 	int ec_set_size = cpu_ecache_set_size(CPU);
3602 
3603 
3604 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3605 }
3606 
3607 #if defined(CPU_IMP_ECACHE_ASSOC)
3608 /*
3609  * Check for a matching valid line in all the sets.
3610  * If found, return set# + 1. Otherwise return 0.
3611  */
3612 static int
3613 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3614 {
3615 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3616 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3617 	int ec_set_size = cpu_ecache_set_size(CPU);
3618 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3619 	int nway = cpu_ecache_nway();
3620 	int i;
3621 
3622 	for (i = 0; i < nway; i++, ecp++) {
3623 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3624 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3625 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3626 			return (i+1);
3627 	}
3628 	return (0);
3629 }
3630 #endif /* CPU_IMP_ECACHE_ASSOC */
3631 
3632 /*
3633  * Check whether a line in the given logout info matches the specified
3634  * fault address.  If reqval is set then the line must not be Invalid.
3635  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3636  * set to 2 for l2$ or 3 for l3$.
3637  */
3638 static int
3639 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3640 {
3641 	ch_diag_data_t *cdp = data;
3642 	ch_ec_data_t *ecp;
3643 	int totalsize, ec_set_size;
3644 	int i, ways;
3645 	int match = 0;
3646 	int tagvalid;
3647 	uint64_t addr, tagpa;
3648 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3649 
3650 	/*
3651 	 * Check the l2$ logout data
3652 	 */
3653 	if (ispanther) {
3654 		ecp = &cdp->chd_l2_data[0];
3655 		ec_set_size = PN_L2_SET_SIZE;
3656 		ways = PN_L2_NWAYS;
3657 	} else {
3658 		ecp = &cdp->chd_ec_data[0];
3659 		ec_set_size = cpu_ecache_set_size(CPU);
3660 		ways = cpu_ecache_nway();
3661 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3662 	}
3663 	/* remove low order PA bits from fault address not used in PA tag */
3664 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3665 	for (i = 0; i < ways; i++, ecp++) {
3666 		if (ispanther) {
3667 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3668 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3669 		} else {
3670 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3671 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3672 			    ecp->ec_tag);
3673 		}
3674 		if (tagpa == addr && (!reqval || tagvalid)) {
3675 			match = i + 1;
3676 			*level = 2;
3677 			break;
3678 		}
3679 	}
3680 
3681 	if (match || !ispanther)
3682 		return (match);
3683 
3684 	/* For Panther we also check the l3$ */
3685 	ecp = &cdp->chd_ec_data[0];
3686 	ec_set_size = PN_L3_SET_SIZE;
3687 	ways = PN_L3_NWAYS;
3688 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3689 
3690 	for (i = 0; i < ways; i++, ecp++) {
3691 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3692 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3693 			match = i + 1;
3694 			*level = 3;
3695 			break;
3696 		}
3697 	}
3698 
3699 	return (match);
3700 }
3701 
3702 #if defined(CPU_IMP_L1_CACHE_PARITY)
3703 /*
3704  * Record information related to the source of an Dcache Parity Error.
3705  */
3706 static void
3707 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3708 {
3709 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3710 	int index;
3711 
3712 	/*
3713 	 * Since instruction decode cannot be done at high PIL
3714 	 * just examine the entire Dcache to locate the error.
3715 	 */
3716 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3717 		ch_flt->parity_data.dpe.cpl_way = -1;
3718 		ch_flt->parity_data.dpe.cpl_off = -1;
3719 	}
3720 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3721 		cpu_dcache_parity_check(ch_flt, index);
3722 }
3723 
3724 /*
3725  * Check all ways of the Dcache at a specified index for good parity.
3726  */
3727 static void
3728 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3729 {
3730 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3731 	uint64_t parity_bits, pbits, data_word;
3732 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3733 	int way, word, data_byte;
3734 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3735 	ch_dc_data_t tmp_dcp;
3736 
3737 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3738 		/*
3739 		 * Perform diagnostic read.
3740 		 */
3741 		get_dcache_dtag(index + way * dc_set_size,
3742 				(uint64_t *)&tmp_dcp);
3743 
3744 		/*
3745 		 * Check tag for even parity.
3746 		 * Sum of 1 bits (including parity bit) should be even.
3747 		 */
3748 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3749 			/*
3750 			 * If this is the first error log detailed information
3751 			 * about it and check the snoop tag. Otherwise just
3752 			 * record the fact that we found another error.
3753 			 */
3754 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3755 				ch_flt->parity_data.dpe.cpl_way = way;
3756 				ch_flt->parity_data.dpe.cpl_cache =
3757 				    CPU_DC_PARITY;
3758 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3759 
3760 				if (popc64(tmp_dcp.dc_sntag &
3761 						CHP_DCSNTAG_PARMASK) & 1) {
3762 					ch_flt->parity_data.dpe.cpl_tag |=
3763 								CHP_DC_SNTAG;
3764 					ch_flt->parity_data.dpe.cpl_lcnt++;
3765 				}
3766 
3767 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3768 			}
3769 
3770 			ch_flt->parity_data.dpe.cpl_lcnt++;
3771 		}
3772 
3773 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3774 			/*
3775 			 * Panther has more parity bits than the other
3776 			 * processors for covering dcache data and so each
3777 			 * byte of data in each word has its own parity bit.
3778 			 */
3779 			parity_bits = tmp_dcp.dc_pn_data_parity;
3780 			for (word = 0; word < 4; word++) {
3781 				data_word = tmp_dcp.dc_data[word];
3782 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3783 				for (data_byte = 0; data_byte < 8;
3784 				    data_byte++) {
3785 					if (((popc64(data_word &
3786 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3787 					    (pbits & 1)) {
3788 						cpu_record_dc_data_parity(
3789 						ch_flt, dcp, &tmp_dcp, way,
3790 						word);
3791 					}
3792 					pbits >>= 1;
3793 					data_word >>= 8;
3794 				}
3795 				parity_bits >>= 8;
3796 			}
3797 		} else {
3798 			/*
3799 			 * Check data array for even parity.
3800 			 * The 8 parity bits are grouped into 4 pairs each
3801 			 * of which covers a 64-bit word.  The endianness is
3802 			 * reversed -- the low-order parity bits cover the
3803 			 * high-order data words.
3804 			 */
3805 			parity_bits = tmp_dcp.dc_utag >> 8;
3806 			for (word = 0; word < 4; word++) {
3807 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3808 				if ((popc64(tmp_dcp.dc_data[word]) +
3809 				    parity_bits_popc[pbits]) & 1) {
3810 					cpu_record_dc_data_parity(ch_flt, dcp,
3811 					    &tmp_dcp, way, word);
3812 				}
3813 			}
3814 		}
3815 	}
3816 }
3817 
3818 static void
3819 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3820     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3821 {
3822 	/*
3823 	 * If this is the first error log detailed information about it.
3824 	 * Otherwise just record the fact that we found another error.
3825 	 */
3826 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3827 		ch_flt->parity_data.dpe.cpl_way = way;
3828 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3829 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3830 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3831 	}
3832 	ch_flt->parity_data.dpe.cpl_lcnt++;
3833 }
3834 
3835 /*
3836  * Record information related to the source of an Icache Parity Error.
3837  *
3838  * Called with the Icache disabled so any diagnostic accesses are safe.
3839  */
3840 static void
3841 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3842 {
3843 	int	ic_set_size;
3844 	int	ic_linesize;
3845 	int	index;
3846 
3847 	if (CPU_PRIVATE(CPU)) {
3848 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3849 		    CH_ICACHE_NWAY;
3850 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3851 	} else {
3852 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3853 		ic_linesize = icache_linesize;
3854 	}
3855 
3856 	ch_flt->parity_data.ipe.cpl_way = -1;
3857 	ch_flt->parity_data.ipe.cpl_off = -1;
3858 
3859 	for (index = 0; index < ic_set_size; index += ic_linesize)
3860 		cpu_icache_parity_check(ch_flt, index);
3861 }
3862 
3863 /*
3864  * Check all ways of the Icache at a specified index for good parity.
3865  */
3866 static void
3867 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
3868 {
3869 	uint64_t parmask, pn_inst_parity;
3870 	int ic_set_size;
3871 	int ic_linesize;
3872 	int flt_index, way, instr, num_instr;
3873 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3874 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
3875 	ch_ic_data_t tmp_icp;
3876 
3877 	if (CPU_PRIVATE(CPU)) {
3878 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3879 		    CH_ICACHE_NWAY;
3880 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3881 	} else {
3882 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3883 		ic_linesize = icache_linesize;
3884 	}
3885 
3886 	/*
3887 	 * Panther has twice as many instructions per icache line and the
3888 	 * instruction parity bit is in a different location.
3889 	 */
3890 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3891 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
3892 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
3893 	} else {
3894 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
3895 		pn_inst_parity = 0;
3896 	}
3897 
3898 	/*
3899 	 * Index at which we expect to find the parity error.
3900 	 */
3901 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
3902 
3903 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
3904 		/*
3905 		 * Diagnostic reads expect address argument in ASI format.
3906 		 */
3907 		get_icache_dtag(2 * (index + way * ic_set_size),
3908 				(uint64_t *)&tmp_icp);
3909 
3910 		/*
3911 		 * If this is the index in which we expect to find the
3912 		 * error log detailed information about each of the ways.
3913 		 * This information will be displayed later if we can't
3914 		 * determine the exact way in which the error is located.
3915 		 */
3916 		if (flt_index == index)
3917 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
3918 
3919 		/*
3920 		 * Check tag for even parity.
3921 		 * Sum of 1 bits (including parity bit) should be even.
3922 		 */
3923 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
3924 			/*
3925 			 * If this way is the one in which we expected
3926 			 * to find the error record the way and check the
3927 			 * snoop tag. Otherwise just record the fact we
3928 			 * found another error.
3929 			 */
3930 			if (flt_index == index) {
3931 				ch_flt->parity_data.ipe.cpl_way = way;
3932 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
3933 
3934 				if (popc64(tmp_icp.ic_sntag &
3935 						CHP_ICSNTAG_PARMASK) & 1) {
3936 					ch_flt->parity_data.ipe.cpl_tag |=
3937 								CHP_IC_SNTAG;
3938 					ch_flt->parity_data.ipe.cpl_lcnt++;
3939 				}
3940 
3941 			}
3942 			ch_flt->parity_data.ipe.cpl_lcnt++;
3943 			continue;
3944 		}
3945 
3946 		/*
3947 		 * Check instruction data for even parity.
3948 		 * Bits participating in parity differ for PC-relative
3949 		 * versus non-PC-relative instructions.
3950 		 */
3951 		for (instr = 0; instr < num_instr; instr++) {
3952 			parmask = (tmp_icp.ic_data[instr] &
3953 					CH_ICDATA_PRED_ISPCREL) ?
3954 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
3955 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
3956 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
3957 				/*
3958 				 * If this way is the one in which we expected
3959 				 * to find the error record the way and offset.
3960 				 * Otherwise just log the fact we found another
3961 				 * error.
3962 				 */
3963 				if (flt_index == index) {
3964 					ch_flt->parity_data.ipe.cpl_way = way;
3965 					ch_flt->parity_data.ipe.cpl_off =
3966 								instr * 4;
3967 				}
3968 				ch_flt->parity_data.ipe.cpl_lcnt++;
3969 				continue;
3970 			}
3971 		}
3972 	}
3973 }
3974 
3975 /*
3976  * Record information related to the source of an Pcache Parity Error.
3977  */
3978 static void
3979 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
3980 {
3981 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3982 	int index;
3983 
3984 	/*
3985 	 * Since instruction decode cannot be done at high PIL just
3986 	 * examine the entire Pcache to check for any parity errors.
3987 	 */
3988 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3989 		ch_flt->parity_data.dpe.cpl_way = -1;
3990 		ch_flt->parity_data.dpe.cpl_off = -1;
3991 	}
3992 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
3993 		cpu_pcache_parity_check(ch_flt, index);
3994 }
3995 
3996 /*
3997  * Check all ways of the Pcache at a specified index for good parity.
3998  */
3999 static void
4000 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4001 {
4002 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4003 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4004 	int way, word, pbit, parity_bits;
4005 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4006 	ch_pc_data_t tmp_pcp;
4007 
4008 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4009 		/*
4010 		 * Perform diagnostic read.
4011 		 */
4012 		get_pcache_dtag(index + way * pc_set_size,
4013 				(uint64_t *)&tmp_pcp);
4014 		/*
4015 		 * Check data array for odd parity. There are 8 parity
4016 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4017 		 * of those bits covers exactly 8 bytes of the data
4018 		 * array:
4019 		 *
4020 		 *	parity bit	P$ data bytes covered
4021 		 *	----------	---------------------
4022 		 *	50		63:56
4023 		 *	51		55:48
4024 		 *	52		47:40
4025 		 *	53		39:32
4026 		 *	54		31:24
4027 		 *	55		23:16
4028 		 *	56		15:8
4029 		 *	57		7:0
4030 		 */
4031 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4032 		for (word = 0; word < pc_data_words; word++) {
4033 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4034 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4035 				/*
4036 				 * If this is the first error log detailed
4037 				 * information about it. Otherwise just record
4038 				 * the fact that we found another error.
4039 				 */
4040 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4041 					ch_flt->parity_data.dpe.cpl_way = way;
4042 					ch_flt->parity_data.dpe.cpl_cache =
4043 					    CPU_PC_PARITY;
4044 					ch_flt->parity_data.dpe.cpl_off =
4045 					    word * sizeof (uint64_t);
4046 					bcopy(&tmp_pcp, pcp,
4047 							sizeof (ch_pc_data_t));
4048 				}
4049 				ch_flt->parity_data.dpe.cpl_lcnt++;
4050 			}
4051 		}
4052 	}
4053 }
4054 
4055 
4056 /*
4057  * Add L1 Data cache data to the ereport payload.
4058  */
4059 static void
4060 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4061 {
4062 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4063 	ch_dc_data_t *dcp;
4064 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4065 	uint_t nelem;
4066 	int i, ways_to_check, ways_logged = 0;
4067 
4068 	/*
4069 	 * If this is an D$ fault then there may be multiple
4070 	 * ways captured in the ch_parity_log_t structure.
4071 	 * Otherwise, there will be at most one way captured
4072 	 * in the ch_diag_data_t struct.
4073 	 * Check each way to see if it should be encoded.
4074 	 */
4075 	if (ch_flt->flt_type == CPU_DC_PARITY)
4076 		ways_to_check = CH_DCACHE_NWAY;
4077 	else
4078 		ways_to_check = 1;
4079 	for (i = 0; i < ways_to_check; i++) {
4080 		if (ch_flt->flt_type == CPU_DC_PARITY)
4081 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4082 		else
4083 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4084 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4085 			bcopy(dcp, &dcdata[ways_logged],
4086 				sizeof (ch_dc_data_t));
4087 			ways_logged++;
4088 		}
4089 	}
4090 
4091 	/*
4092 	 * Add the dcache data to the payload.
4093 	 */
4094 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4095 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4096 	if (ways_logged != 0) {
4097 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4098 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4099 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4100 	}
4101 }
4102 
4103 /*
4104  * Add L1 Instruction cache data to the ereport payload.
4105  */
4106 static void
4107 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4108 {
4109 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4110 	ch_ic_data_t *icp;
4111 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4112 	uint_t nelem;
4113 	int i, ways_to_check, ways_logged = 0;
4114 
4115 	/*
4116 	 * If this is an I$ fault then there may be multiple
4117 	 * ways captured in the ch_parity_log_t structure.
4118 	 * Otherwise, there will be at most one way captured
4119 	 * in the ch_diag_data_t struct.
4120 	 * Check each way to see if it should be encoded.
4121 	 */
4122 	if (ch_flt->flt_type == CPU_IC_PARITY)
4123 		ways_to_check = CH_ICACHE_NWAY;
4124 	else
4125 		ways_to_check = 1;
4126 	for (i = 0; i < ways_to_check; i++) {
4127 		if (ch_flt->flt_type == CPU_IC_PARITY)
4128 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4129 		else
4130 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4131 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4132 			bcopy(icp, &icdata[ways_logged],
4133 				sizeof (ch_ic_data_t));
4134 			ways_logged++;
4135 		}
4136 	}
4137 
4138 	/*
4139 	 * Add the icache data to the payload.
4140 	 */
4141 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4142 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4143 	if (ways_logged != 0) {
4144 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4145 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4146 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4147 	}
4148 }
4149 
4150 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4151 
4152 /*
4153  * Add ecache data to payload.
4154  */
4155 static void
4156 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4157 {
4158 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4159 	ch_ec_data_t *ecp;
4160 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4161 	uint_t nelem;
4162 	int i, ways_logged = 0;
4163 
4164 	/*
4165 	 * Check each way to see if it should be encoded
4166 	 * and concatinate it into a temporary buffer.
4167 	 */
4168 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4169 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4170 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4171 			bcopy(ecp, &ecdata[ways_logged],
4172 				sizeof (ch_ec_data_t));
4173 			ways_logged++;
4174 		}
4175 	}
4176 
4177 	/*
4178 	 * Panther CPUs have an additional level of cache and so
4179 	 * what we just collected was the L3 (ecache) and not the
4180 	 * L2 cache.
4181 	 */
4182 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4183 		/*
4184 		 * Add the L3 (ecache) data to the payload.
4185 		 */
4186 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4187 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4188 		if (ways_logged != 0) {
4189 			nelem = sizeof (ch_ec_data_t) /
4190 			    sizeof (uint64_t) * ways_logged;
4191 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4192 			    DATA_TYPE_UINT64_ARRAY, nelem,
4193 			    (uint64_t *)ecdata, NULL);
4194 		}
4195 
4196 		/*
4197 		 * Now collect the L2 cache.
4198 		 */
4199 		ways_logged = 0;
4200 		for (i = 0; i < PN_L2_NWAYS; i++) {
4201 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4202 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4203 				bcopy(ecp, &ecdata[ways_logged],
4204 				    sizeof (ch_ec_data_t));
4205 				ways_logged++;
4206 			}
4207 		}
4208 	}
4209 
4210 	/*
4211 	 * Add the L2 cache data to the payload.
4212 	 */
4213 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4214 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4215 	if (ways_logged != 0) {
4216 		nelem = sizeof (ch_ec_data_t) /
4217 			sizeof (uint64_t) * ways_logged;
4218 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4219 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4220 	}
4221 }
4222 
4223 /*
4224  * Encode the data saved in the ch_async_flt_t struct into
4225  * the FM ereport payload.
4226  */
4227 static void
4228 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4229 	nvlist_t *resource, int *afar_status, int *synd_status)
4230 {
4231 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4232 	*synd_status = AFLT_STAT_INVALID;
4233 	*afar_status = AFLT_STAT_INVALID;
4234 
4235 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4236 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4237 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4238 	}
4239 
4240 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4241 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4242 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4243 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4244 	}
4245 
4246 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4247 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4248 		    ch_flt->flt_bit);
4249 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4250 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4251 	}
4252 
4253 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4254 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4255 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4256 	}
4257 
4258 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4259 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4260 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4261 	}
4262 
4263 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4264 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4265 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4266 	}
4267 
4268 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4269 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4270 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4271 	}
4272 
4273 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4274 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4275 		    DATA_TYPE_BOOLEAN_VALUE,
4276 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4277 	}
4278 
4279 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4280 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4281 		    DATA_TYPE_BOOLEAN_VALUE,
4282 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4283 	}
4284 
4285 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4286 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4287 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4288 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4289 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4290 	}
4291 
4292 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4293 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4294 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4295 	}
4296 
4297 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4298 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4299 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4300 	}
4301 
4302 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4303 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4304 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4305 	}
4306 
4307 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4308 		cpu_payload_add_ecache(aflt, payload);
4309 
4310 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4311 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4312 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4313 	}
4314 
4315 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4316 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4317 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4318 	}
4319 
4320 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4321 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4322 		    DATA_TYPE_UINT32_ARRAY, 16,
4323 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4324 	}
4325 
4326 #if defined(CPU_IMP_L1_CACHE_PARITY)
4327 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4328 		cpu_payload_add_dcache(aflt, payload);
4329 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4330 		cpu_payload_add_icache(aflt, payload);
4331 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4332 
4333 #if defined(CHEETAH_PLUS)
4334 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4335 		cpu_payload_add_pcache(aflt, payload);
4336 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4337 		cpu_payload_add_tlb(aflt, payload);
4338 #endif	/* CHEETAH_PLUS */
4339 	/*
4340 	 * Create the FMRI that goes into the payload
4341 	 * and contains the unum info if necessary.
4342 	 */
4343 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4344 	    (*afar_status == AFLT_STAT_VALID)) {
4345 		char unum[UNUM_NAMLEN];
4346 		int len;
4347 
4348 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4349 		    UNUM_NAMLEN, &len) == 0) {
4350 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4351 			    NULL, unum, NULL);
4352 			fm_payload_set(payload,
4353 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4354 			    DATA_TYPE_NVLIST, resource, NULL);
4355 		}
4356 	}
4357 }
4358 
4359 /*
4360  * Initialize the way info if necessary.
4361  */
4362 void
4363 cpu_ereport_init(struct async_flt *aflt)
4364 {
4365 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4366 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4367 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4368 	int i;
4369 
4370 	/*
4371 	 * Initialize the info in the CPU logout structure.
4372 	 * The I$/D$ way information is not initialized here
4373 	 * since it is captured in the logout assembly code.
4374 	 */
4375 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4376 		(ecp + i)->ec_way = i;
4377 
4378 	for (i = 0; i < PN_L2_NWAYS; i++)
4379 		(l2p + i)->ec_way = i;
4380 }
4381 
4382 /*
4383  * Returns whether fault address is valid for this error bit and
4384  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4385  */
4386 int
4387 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4388 {
4389 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4390 
4391 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4392 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4393 	    AFLT_STAT_VALID &&
4394 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4395 }
4396 
4397 static void
4398 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4399 {
4400 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4401 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4402 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4403 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4404 #if defined(CPU_IMP_ECACHE_ASSOC)
4405 	int i, nway;
4406 #endif /* CPU_IMP_ECACHE_ASSOC */
4407 
4408 	/*
4409 	 * Check if the CPU log out captured was valid.
4410 	 */
4411 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4412 	    ch_flt->flt_data_incomplete)
4413 		return;
4414 
4415 #if defined(CPU_IMP_ECACHE_ASSOC)
4416 	nway = cpu_ecache_nway();
4417 	i =  cpu_ecache_line_valid(ch_flt);
4418 	if (i == 0 || i > nway) {
4419 		for (i = 0; i < nway; i++)
4420 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4421 	} else
4422 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4423 #else /* CPU_IMP_ECACHE_ASSOC */
4424 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4425 #endif /* CPU_IMP_ECACHE_ASSOC */
4426 
4427 #if defined(CHEETAH_PLUS)
4428 	pn_cpu_log_diag_l2_info(ch_flt);
4429 #endif /* CHEETAH_PLUS */
4430 
4431 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4432 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4433 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4434 	}
4435 
4436 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4437 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4438 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4439 		else
4440 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4441 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4442 	}
4443 }
4444 
4445 /*
4446  * Cheetah ECC calculation.
4447  *
4448  * We only need to do the calculation on the data bits and can ignore check
4449  * bit and Mtag bit terms in the calculation.
4450  */
4451 static uint64_t ch_ecc_table[9][2] = {
4452 	/*
4453 	 * low order 64-bits   high-order 64-bits
4454 	 */
4455 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4456 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4457 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4458 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4459 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4460 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4461 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4462 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4463 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4464 };
4465 
4466 /*
4467  * 64-bit population count, use well-known popcnt trick.
4468  * We could use the UltraSPARC V9 POPC instruction, but some
4469  * CPUs including Cheetahplus and Jaguar do not support that
4470  * instruction.
4471  */
4472 int
4473 popc64(uint64_t val)
4474 {
4475 	int cnt;
4476 
4477 	for (cnt = 0; val != 0; val &= val - 1)
4478 		cnt++;
4479 	return (cnt);
4480 }
4481 
4482 /*
4483  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4484  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4485  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4486  * instead of doing all the xor's.
4487  */
4488 uint32_t
4489 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4490 {
4491 	int bitno, s;
4492 	int synd = 0;
4493 
4494 	for (bitno = 0; bitno < 9; bitno++) {
4495 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4496 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4497 		synd |= (s << bitno);
4498 	}
4499 	return (synd);
4500 
4501 }
4502 
4503 /*
4504  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4505  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4506  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4507  */
4508 static void
4509 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4510     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4511 {
4512 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4513 
4514 	if (reason &&
4515 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4516 		(void) strcat(reason, eccp->ec_reason);
4517 	}
4518 
4519 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4520 	ch_flt->flt_type = eccp->ec_flt_type;
4521 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4522 		ch_flt->flt_diag_data = *cdp;
4523 	else
4524 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4525 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4526 
4527 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4528 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4529 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4530 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4531 	else
4532 		aflt->flt_synd = 0;
4533 
4534 	aflt->flt_payload = eccp->ec_err_payload;
4535 
4536 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4537 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4538 		cpu_errorq_dispatch(eccp->ec_err_class,
4539 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4540 		    aflt->flt_panic);
4541 	else
4542 		cpu_errorq_dispatch(eccp->ec_err_class,
4543 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4544 		    aflt->flt_panic);
4545 }
4546 
4547 /*
4548  * Queue events on async event queue one event per error bit.  First we
4549  * queue the events that we "expect" for the given trap, then we queue events
4550  * that we may not expect.  Return number of events queued.
4551  */
4552 int
4553 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4554     ch_cpu_logout_t *clop)
4555 {
4556 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4557 	ecc_type_to_info_t *eccp;
4558 	int nevents = 0;
4559 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4560 #if defined(CHEETAH_PLUS)
4561 	uint64_t orig_t_afsr_errs;
4562 #endif
4563 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4564 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4565 	ch_diag_data_t *cdp = NULL;
4566 
4567 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4568 
4569 #if defined(CHEETAH_PLUS)
4570 	orig_t_afsr_errs = t_afsr_errs;
4571 
4572 	/*
4573 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4574 	 */
4575 	if (clop != NULL) {
4576 		/*
4577 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4578 		 * flt_addr and flt_stat fields will be reset to the primaries
4579 		 * below, but the sdw_addr and sdw_stat will stay as the
4580 		 * secondaries.
4581 		 */
4582 		cdp = &clop->clo_sdw_data;
4583 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4584 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4585 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4586 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4587 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4588 
4589 		/*
4590 		 * If the primary and shadow AFSR differ, tag the shadow as
4591 		 * the first fault.
4592 		 */
4593 		if ((primary_afar != cdp->chd_afar) ||
4594 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4595 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4596 		}
4597 
4598 		/*
4599 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4600 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4601 		 * is expected to be zero for those CPUs which do not have
4602 		 * an AFSR_EXT register.
4603 		 */
4604 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4605 			if ((eccp->ec_afsr_bit &
4606 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4607 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4608 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4609 				cdp = NULL;
4610 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4611 				nevents++;
4612 			}
4613 		}
4614 
4615 		/*
4616 		 * If the ME bit is on in the primary AFSR turn all the
4617 		 * error bits on again that may set the ME bit to make
4618 		 * sure we see the ME AFSR error logs.
4619 		 */
4620 		if ((primary_afsr & C_AFSR_ME) != 0)
4621 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4622 	}
4623 #endif	/* CHEETAH_PLUS */
4624 
4625 	if (clop != NULL)
4626 		cdp = &clop->clo_data;
4627 
4628 	/*
4629 	 * Queue expected errors, error bit and fault type must match
4630 	 * in the ecc_type_to_info table.
4631 	 */
4632 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4633 	    eccp++) {
4634 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4635 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4636 #if defined(SERRANO)
4637 			/*
4638 			 * For FRC/FRU errors on Serrano the afar2 captures
4639 			 * the address and the associated data is
4640 			 * in the shadow logout area.
4641 			 */
4642 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4643 				if (clop != NULL)
4644 					cdp = &clop->clo_sdw_data;
4645 				aflt->flt_addr = ch_flt->afar2;
4646 			} else {
4647 				if (clop != NULL)
4648 					cdp = &clop->clo_data;
4649 				aflt->flt_addr = primary_afar;
4650 			}
4651 #else	/* SERRANO */
4652 			aflt->flt_addr = primary_afar;
4653 #endif	/* SERRANO */
4654 			aflt->flt_stat = primary_afsr;
4655 			ch_flt->afsr_ext = primary_afsr_ext;
4656 			ch_flt->afsr_errs = primary_afsr_errs;
4657 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4658 			cdp = NULL;
4659 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4660 			nevents++;
4661 		}
4662 	}
4663 
4664 	/*
4665 	 * Queue unexpected errors, error bit only match.
4666 	 */
4667 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4668 	    eccp++) {
4669 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4670 #if defined(SERRANO)
4671 			/*
4672 			 * For FRC/FRU errors on Serrano the afar2 captures
4673 			 * the address and the associated data is
4674 			 * in the shadow logout area.
4675 			 */
4676 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4677 				if (clop != NULL)
4678 					cdp = &clop->clo_sdw_data;
4679 				aflt->flt_addr = ch_flt->afar2;
4680 			} else {
4681 				if (clop != NULL)
4682 					cdp = &clop->clo_data;
4683 				aflt->flt_addr = primary_afar;
4684 			}
4685 #else	/* SERRANO */
4686 			aflt->flt_addr = primary_afar;
4687 #endif	/* SERRANO */
4688 			aflt->flt_stat = primary_afsr;
4689 			ch_flt->afsr_ext = primary_afsr_ext;
4690 			ch_flt->afsr_errs = primary_afsr_errs;
4691 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4692 			cdp = NULL;
4693 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4694 			nevents++;
4695 		}
4696 	}
4697 	return (nevents);
4698 }
4699 
4700 /*
4701  * Return trap type number.
4702  */
4703 uint8_t
4704 flt_to_trap_type(struct async_flt *aflt)
4705 {
4706 	if (aflt->flt_status & ECC_I_TRAP)
4707 		return (TRAP_TYPE_ECC_I);
4708 	if (aflt->flt_status & ECC_D_TRAP)
4709 		return (TRAP_TYPE_ECC_D);
4710 	if (aflt->flt_status & ECC_F_TRAP)
4711 		return (TRAP_TYPE_ECC_F);
4712 	if (aflt->flt_status & ECC_C_TRAP)
4713 		return (TRAP_TYPE_ECC_C);
4714 	if (aflt->flt_status & ECC_DP_TRAP)
4715 		return (TRAP_TYPE_ECC_DP);
4716 	if (aflt->flt_status & ECC_IP_TRAP)
4717 		return (TRAP_TYPE_ECC_IP);
4718 	if (aflt->flt_status & ECC_ITLB_TRAP)
4719 		return (TRAP_TYPE_ECC_ITLB);
4720 	if (aflt->flt_status & ECC_DTLB_TRAP)
4721 		return (TRAP_TYPE_ECC_DTLB);
4722 	return (TRAP_TYPE_UNKNOWN);
4723 }
4724 
4725 /*
4726  * Decide an error type based on detector and leaky/partner tests.
4727  * The following array is used for quick translation - it must
4728  * stay in sync with ce_dispact_t.
4729  */
4730 
4731 static char *cetypes[] = {
4732 	CE_DISP_DESC_U,
4733 	CE_DISP_DESC_I,
4734 	CE_DISP_DESC_PP,
4735 	CE_DISP_DESC_P,
4736 	CE_DISP_DESC_L,
4737 	CE_DISP_DESC_PS,
4738 	CE_DISP_DESC_S
4739 };
4740 
4741 char *
4742 flt_to_error_type(struct async_flt *aflt)
4743 {
4744 	ce_dispact_t dispact, disp;
4745 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4746 
4747 	/*
4748 	 * The memory payload bundle is shared by some events that do
4749 	 * not perform any classification.  For those flt_disp will be
4750 	 * 0 and we will return "unknown".
4751 	 */
4752 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4753 		return (cetypes[CE_DISP_UNKNOWN]);
4754 
4755 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4756 
4757 	/*
4758 	 * It is also possible that no scrub/classification was performed
4759 	 * by the detector, for instance where a disrupting error logged
4760 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4761 	 */
4762 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4763 		return (cetypes[CE_DISP_UNKNOWN]);
4764 
4765 	/*
4766 	 * Lookup type in initial classification/action table
4767 	 */
4768 	dispact = CE_DISPACT(ce_disp_table,
4769 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4770 	    CE_XDIAG_STATE(dtcrinfo),
4771 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4772 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4773 
4774 	/*
4775 	 * A bad lookup is not something to panic production systems for.
4776 	 */
4777 	ASSERT(dispact != CE_DISP_BAD);
4778 	if (dispact == CE_DISP_BAD)
4779 		return (cetypes[CE_DISP_UNKNOWN]);
4780 
4781 	disp = CE_DISP(dispact);
4782 
4783 	switch (disp) {
4784 	case CE_DISP_UNKNOWN:
4785 	case CE_DISP_INTERMITTENT:
4786 		break;
4787 
4788 	case CE_DISP_POSS_PERS:
4789 		/*
4790 		 * "Possible persistent" errors to which we have applied a valid
4791 		 * leaky test can be separated into "persistent" or "leaky".
4792 		 */
4793 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4794 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4795 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4796 			    CE_XDIAG_CE2SEEN(lkyinfo))
4797 				disp = CE_DISP_LEAKY;
4798 			else
4799 				disp = CE_DISP_PERS;
4800 		}
4801 		break;
4802 
4803 	case CE_DISP_POSS_STICKY:
4804 		/*
4805 		 * Promote "possible sticky" results that have been
4806 		 * confirmed by a partner test to "sticky".  Unconfirmed
4807 		 * "possible sticky" events are left at that status - we do not
4808 		 * guess at any bad reader/writer etc status here.
4809 		 */
4810 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4811 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4812 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4813 			disp = CE_DISP_STICKY;
4814 
4815 		/*
4816 		 * Promote "possible sticky" results on a uniprocessor
4817 		 * to "sticky"
4818 		 */
4819 		if (disp == CE_DISP_POSS_STICKY &&
4820 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4821 			disp = CE_DISP_STICKY;
4822 		break;
4823 
4824 	default:
4825 		disp = CE_DISP_UNKNOWN;
4826 		break;
4827 	}
4828 
4829 	return (cetypes[disp]);
4830 }
4831 
4832 /*
4833  * Given the entire afsr, the specific bit to check and a prioritized list of
4834  * error bits, determine the validity of the various overwrite priority
4835  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4836  * different overwrite priorities.
4837  *
4838  * Given a specific afsr error bit and the entire afsr, there are three cases:
4839  *   INVALID:	The specified bit is lower overwrite priority than some other
4840  *		error bit which is on in the afsr (or IVU/IVC).
4841  *   VALID:	The specified bit is higher priority than all other error bits
4842  *		which are on in the afsr.
4843  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
4844  *		bit is on in the afsr.
4845  */
4846 int
4847 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
4848 {
4849 	uint64_t afsr_ow;
4850 
4851 	while ((afsr_ow = *ow_bits++) != 0) {
4852 		/*
4853 		 * If bit is in the priority class, check to see if another
4854 		 * bit in the same class is on => ambiguous.  Otherwise,
4855 		 * the value is valid.  If the bit is not on at this priority
4856 		 * class, but a higher priority bit is on, then the value is
4857 		 * invalid.
4858 		 */
4859 		if (afsr_ow & afsr_bit) {
4860 			/*
4861 			 * If equal pri bit is on, ambiguous.
4862 			 */
4863 			if (afsr & (afsr_ow & ~afsr_bit))
4864 				return (AFLT_STAT_AMBIGUOUS);
4865 			return (AFLT_STAT_VALID);
4866 		} else if (afsr & afsr_ow)
4867 			break;
4868 	}
4869 
4870 	/*
4871 	 * We didn't find a match or a higher priority bit was on.  Not
4872 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
4873 	 */
4874 	return (AFLT_STAT_INVALID);
4875 }
4876 
4877 static int
4878 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
4879 {
4880 #if defined(SERRANO)
4881 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
4882 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
4883 	else
4884 #endif	/* SERRANO */
4885 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
4886 }
4887 
4888 static int
4889 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
4890 {
4891 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
4892 }
4893 
4894 static int
4895 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
4896 {
4897 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
4898 }
4899 
4900 static int
4901 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
4902 {
4903 #ifdef lint
4904 	cpuid = cpuid;
4905 #endif
4906 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
4907 		return (afsr_to_msynd_status(afsr, afsr_bit));
4908 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
4909 #if defined(CHEETAH_PLUS)
4910 		/*
4911 		 * The E_SYND overwrite policy is slightly different
4912 		 * for Panther CPUs.
4913 		 */
4914 		if (IS_PANTHER(cpunodes[cpuid].implementation))
4915 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
4916 		else
4917 			return (afsr_to_esynd_status(afsr, afsr_bit));
4918 #else /* CHEETAH_PLUS */
4919 		return (afsr_to_esynd_status(afsr, afsr_bit));
4920 #endif /* CHEETAH_PLUS */
4921 	} else {
4922 		return (AFLT_STAT_INVALID);
4923 	}
4924 }
4925 
4926 /*
4927  * Slave CPU stick synchronization.
4928  */
4929 void
4930 sticksync_slave(void)
4931 {
4932 	int 		i;
4933 	int		tries = 0;
4934 	int64_t		tskew;
4935 	int64_t		av_tskew;
4936 
4937 	kpreempt_disable();
4938 	/* wait for the master side */
4939 	while (stick_sync_cmd != SLAVE_START)
4940 		;
4941 	/*
4942 	 * Synchronization should only take a few tries at most. But in the
4943 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
4944 	 * without it's stick synchronized wouldn't be a good citizen.
4945 	 */
4946 	while (slave_done == 0) {
4947 		/*
4948 		 * Time skew calculation.
4949 		 */
4950 		av_tskew = tskew = 0;
4951 
4952 		for (i = 0; i < stick_iter; i++) {
4953 			/* make location hot */
4954 			timestamp[EV_A_START] = 0;
4955 			stick_timestamp(&timestamp[EV_A_START]);
4956 
4957 			/* tell the master we're ready */
4958 			stick_sync_cmd = MASTER_START;
4959 
4960 			/* and wait */
4961 			while (stick_sync_cmd != SLAVE_CONT)
4962 				;
4963 			/* Event B end */
4964 			stick_timestamp(&timestamp[EV_B_END]);
4965 
4966 			/* calculate time skew */
4967 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
4968 				- (timestamp[EV_A_END] -
4969 				timestamp[EV_A_START])) / 2;
4970 
4971 			/* keep running count */
4972 			av_tskew += tskew;
4973 		} /* for */
4974 
4975 		/*
4976 		 * Adjust stick for time skew if not within the max allowed;
4977 		 * otherwise we're all done.
4978 		 */
4979 		if (stick_iter != 0)
4980 			av_tskew = av_tskew/stick_iter;
4981 		if (ABS(av_tskew) > stick_tsk) {
4982 			/*
4983 			 * If the skew is 1 (the slave's STICK register
4984 			 * is 1 STICK ahead of the master's), stick_adj
4985 			 * could fail to adjust the slave's STICK register
4986 			 * if the STICK read on the slave happens to
4987 			 * align with the increment of the STICK.
4988 			 * Therefore, we increment the skew to 2.
4989 			 */
4990 			if (av_tskew == 1)
4991 				av_tskew++;
4992 			stick_adj(-av_tskew);
4993 		} else
4994 			slave_done = 1;
4995 #ifdef DEBUG
4996 		if (tries < DSYNC_ATTEMPTS)
4997 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
4998 				av_tskew;
4999 		++tries;
5000 #endif /* DEBUG */
5001 #ifdef lint
5002 		tries = tries;
5003 #endif
5004 
5005 	} /* while */
5006 
5007 	/* allow the master to finish */
5008 	stick_sync_cmd = EVENT_NULL;
5009 	kpreempt_enable();
5010 }
5011 
5012 /*
5013  * Master CPU side of stick synchronization.
5014  *  - timestamp end of Event A
5015  *  - timestamp beginning of Event B
5016  */
5017 void
5018 sticksync_master(void)
5019 {
5020 	int		i;
5021 
5022 	kpreempt_disable();
5023 	/* tell the slave we've started */
5024 	slave_done = 0;
5025 	stick_sync_cmd = SLAVE_START;
5026 
5027 	while (slave_done == 0) {
5028 		for (i = 0; i < stick_iter; i++) {
5029 			/* wait for the slave */
5030 			while (stick_sync_cmd != MASTER_START)
5031 				;
5032 			/* Event A end */
5033 			stick_timestamp(&timestamp[EV_A_END]);
5034 
5035 			/* make location hot */
5036 			timestamp[EV_B_START] = 0;
5037 			stick_timestamp(&timestamp[EV_B_START]);
5038 
5039 			/* tell the slave to continue */
5040 			stick_sync_cmd = SLAVE_CONT;
5041 		} /* for */
5042 
5043 		/* wait while slave calculates time skew */
5044 		while (stick_sync_cmd == SLAVE_CONT)
5045 			;
5046 	} /* while */
5047 	kpreempt_enable();
5048 }
5049 
5050 /*
5051  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5052  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5053  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5054  * panic idle.
5055  */
5056 /*ARGSUSED*/
5057 void
5058 cpu_check_allcpus(struct async_flt *aflt)
5059 {}
5060 
5061 struct kmem_cache *ch_private_cache;
5062 
5063 /*
5064  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5065  * deallocate the scrubber data structures and cpu_private data structure.
5066  */
5067 void
5068 cpu_uninit_private(struct cpu *cp)
5069 {
5070 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5071 
5072 	ASSERT(chprp);
5073 	cpu_uninit_ecache_scrub_dr(cp);
5074 	CPU_PRIVATE(cp) = NULL;
5075 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5076 	kmem_cache_free(ch_private_cache, chprp);
5077 	cmp_delete_cpu(cp->cpu_id);
5078 
5079 }
5080 
5081 /*
5082  * Cheetah Cache Scrubbing
5083  *
5084  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5085  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5086  * protected by either parity or ECC.
5087  *
5088  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5089  * cache per second). Due to the the specifics of how the I$ control
5090  * logic works with respect to the ASI used to scrub I$ lines, the entire
5091  * I$ is scanned at once.
5092  */
5093 
5094 /*
5095  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5096  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5097  * on a running system.
5098  */
5099 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5100 
5101 /*
5102  * The following are the PIL levels that the softints/cross traps will fire at.
5103  */
5104 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5105 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5106 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5107 
5108 #if defined(JALAPENO)
5109 
5110 /*
5111  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5112  * on Jalapeno.
5113  */
5114 int ecache_scrub_enable = 0;
5115 
5116 #else	/* JALAPENO */
5117 
5118 /*
5119  * With all other cpu types, E$ scrubbing is on by default
5120  */
5121 int ecache_scrub_enable = 1;
5122 
5123 #endif	/* JALAPENO */
5124 
5125 
5126 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5127 
5128 /*
5129  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5130  * is disabled by default on non-Cheetah systems
5131  */
5132 int icache_scrub_enable = 0;
5133 
5134 /*
5135  * Tuneables specifying the scrub calls per second and the scan rate
5136  * for each cache
5137  *
5138  * The cyclic times are set during boot based on the following values.
5139  * Changing these values in mdb after this time will have no effect.  If
5140  * a different value is desired, it must be set in /etc/system before a
5141  * reboot.
5142  */
5143 int ecache_calls_a_sec = 1;
5144 int dcache_calls_a_sec = 2;
5145 int icache_calls_a_sec = 2;
5146 
5147 int ecache_scan_rate_idle = 1;
5148 int ecache_scan_rate_busy = 1;
5149 int dcache_scan_rate_idle = 1;
5150 int dcache_scan_rate_busy = 1;
5151 int icache_scan_rate_idle = 1;
5152 int icache_scan_rate_busy = 1;
5153 
5154 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5155 
5156 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5157 
5158 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5159 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5160 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5161 
5162 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5163 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5164 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5165 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5166 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5167 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5168 
5169 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5170 
5171 /*
5172  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5173  * increment the outstanding request counter and schedule a softint to run
5174  * the scrubber.
5175  */
5176 extern xcfunc_t cache_scrubreq_tl1;
5177 
5178 /*
5179  * These are the softint functions for each cache scrubber
5180  */
5181 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5182 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5183 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5184 
5185 /*
5186  * The cache scrub info table contains cache specific information
5187  * and allows for some of the scrub code to be table driven, reducing
5188  * duplication of cache similar code.
5189  *
5190  * This table keeps a copy of the value in the calls per second variable
5191  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5192  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5193  * mdb in a misguided attempt to disable the scrubber).
5194  */
5195 struct scrub_info {
5196 	int		*csi_enable;	/* scrubber enable flag */
5197 	int		csi_freq;	/* scrubber calls per second */
5198 	int		csi_index;	/* index to chsm_outstanding[] */
5199 	uint_t		csi_inum;	/* scrubber interrupt number */
5200 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5201 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5202 	char		csi_name[3];	/* cache name for this scrub entry */
5203 } cache_scrub_info[] = {
5204 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5205 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5206 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5207 };
5208 
5209 /*
5210  * If scrubbing is enabled, increment the outstanding request counter.  If it
5211  * is 1 (meaning there were no previous requests outstanding), call
5212  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5213  * a self trap.
5214  */
5215 static void
5216 do_scrub(struct scrub_info *csi)
5217 {
5218 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5219 	int index = csi->csi_index;
5220 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5221 
5222 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5223 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5224 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5225 			    csi->csi_inum, 0);
5226 		}
5227 	}
5228 }
5229 
5230 /*
5231  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5232  * cross-trap the offline cpus.
5233  */
5234 static void
5235 do_scrub_offline(struct scrub_info *csi)
5236 {
5237 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5238 
5239 	if (CPUSET_ISNULL(cpu_offline_set)) {
5240 		/*
5241 		 * No offline cpus - nothing to do
5242 		 */
5243 		return;
5244 	}
5245 
5246 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5247 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5248 		    csi->csi_index);
5249 	}
5250 }
5251 
5252 /*
5253  * This is the initial setup for the scrubber cyclics - it sets the
5254  * interrupt level, frequency, and function to call.
5255  */
5256 /*ARGSUSED*/
5257 static void
5258 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5259     cyc_time_t *when)
5260 {
5261 	struct scrub_info *csi = (struct scrub_info *)arg;
5262 
5263 	ASSERT(csi != NULL);
5264 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5265 	hdlr->cyh_level = CY_LOW_LEVEL;
5266 	hdlr->cyh_arg = arg;
5267 
5268 	when->cyt_when = 0;	/* Start immediately */
5269 	when->cyt_interval = NANOSEC / csi->csi_freq;
5270 }
5271 
5272 /*
5273  * Initialization for cache scrubbing.
5274  * This routine is called AFTER all cpus have had cpu_init_private called
5275  * to initialize their private data areas.
5276  */
5277 void
5278 cpu_init_cache_scrub(void)
5279 {
5280 	int i;
5281 	struct scrub_info *csi;
5282 	cyc_omni_handler_t omni_hdlr;
5283 	cyc_handler_t offline_hdlr;
5284 	cyc_time_t when;
5285 
5286 	/*
5287 	 * save away the maximum number of lines for the D$
5288 	 */
5289 	dcache_nlines = dcache_size / dcache_linesize;
5290 
5291 	/*
5292 	 * register the softints for the cache scrubbing
5293 	 */
5294 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5295 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5296 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5297 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5298 
5299 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5300 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5301 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5302 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5303 
5304 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5305 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5306 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5307 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5308 
5309 	/*
5310 	 * start the scrubbing for all the caches
5311 	 */
5312 	mutex_enter(&cpu_lock);
5313 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5314 
5315 		csi = &cache_scrub_info[i];
5316 
5317 		if (!(*csi->csi_enable))
5318 			continue;
5319 
5320 		/*
5321 		 * force the following to be true:
5322 		 *	1 <= calls_a_sec <= hz
5323 		 */
5324 		if (csi->csi_freq > hz) {
5325 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5326 				"(%d); resetting to hz (%d)", csi->csi_name,
5327 				csi->csi_freq, hz);
5328 			csi->csi_freq = hz;
5329 		} else if (csi->csi_freq < 1) {
5330 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5331 				"(%d); resetting to 1", csi->csi_name,
5332 				csi->csi_freq);
5333 			csi->csi_freq = 1;
5334 		}
5335 
5336 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5337 		omni_hdlr.cyo_offline = NULL;
5338 		omni_hdlr.cyo_arg = (void *)csi;
5339 
5340 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5341 		offline_hdlr.cyh_arg = (void *)csi;
5342 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5343 
5344 		when.cyt_when = 0;	/* Start immediately */
5345 		when.cyt_interval = NANOSEC / csi->csi_freq;
5346 
5347 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5348 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5349 	}
5350 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5351 	mutex_exit(&cpu_lock);
5352 }
5353 
5354 /*
5355  * Indicate that the specified cpu is idle.
5356  */
5357 void
5358 cpu_idle_ecache_scrub(struct cpu *cp)
5359 {
5360 	if (CPU_PRIVATE(cp) != NULL) {
5361 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5362 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5363 	}
5364 }
5365 
5366 /*
5367  * Indicate that the specified cpu is busy.
5368  */
5369 void
5370 cpu_busy_ecache_scrub(struct cpu *cp)
5371 {
5372 	if (CPU_PRIVATE(cp) != NULL) {
5373 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5374 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5375 	}
5376 }
5377 
5378 /*
5379  * Initialization for cache scrubbing for the specified cpu.
5380  */
5381 void
5382 cpu_init_ecache_scrub_dr(struct cpu *cp)
5383 {
5384 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5385 	int cpuid = cp->cpu_id;
5386 
5387 	/* initialize the number of lines in the caches */
5388 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5389 	    cpunodes[cpuid].ecache_linesize;
5390 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5391 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5392 
5393 	/*
5394 	 * do_scrub() and do_scrub_offline() check both the global
5395 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5396 	 * check this value before scrubbing.  Currently, we use it to
5397 	 * disable the E$ scrubber on multi-core cpus or while running at
5398 	 * slowed speed.  For now, just turn everything on and allow
5399 	 * cpu_init_private() to change it if necessary.
5400 	 */
5401 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5402 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5403 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5404 
5405 	cpu_busy_ecache_scrub(cp);
5406 }
5407 
5408 /*
5409  * Un-initialization for cache scrubbing for the specified cpu.
5410  */
5411 static void
5412 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5413 {
5414 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5415 
5416 	/*
5417 	 * un-initialize bookkeeping for cache scrubbing
5418 	 */
5419 	bzero(csmp, sizeof (ch_scrub_misc_t));
5420 
5421 	cpu_idle_ecache_scrub(cp);
5422 }
5423 
5424 /*
5425  * Called periodically on each CPU to scrub the D$.
5426  */
5427 static void
5428 scrub_dcache(int how_many)
5429 {
5430 	int i;
5431 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5432 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5433 
5434 	/*
5435 	 * scrub the desired number of lines
5436 	 */
5437 	for (i = 0; i < how_many; i++) {
5438 		/*
5439 		 * scrub a D$ line
5440 		 */
5441 		dcache_inval_line(index);
5442 
5443 		/*
5444 		 * calculate the next D$ line to scrub, assumes
5445 		 * that dcache_nlines is a power of 2
5446 		 */
5447 		index = (index + 1) & (dcache_nlines - 1);
5448 	}
5449 
5450 	/*
5451 	 * set the scrub index for the next visit
5452 	 */
5453 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5454 }
5455 
5456 /*
5457  * Handler for D$ scrub inum softint. Call scrub_dcache until
5458  * we decrement the outstanding request count to zero.
5459  */
5460 /*ARGSUSED*/
5461 static uint_t
5462 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5463 {
5464 	int i;
5465 	int how_many;
5466 	int outstanding;
5467 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5468 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5469 	struct scrub_info *csi = (struct scrub_info *)arg1;
5470 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5471 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5472 
5473 	/*
5474 	 * The scan rates are expressed in units of tenths of a
5475 	 * percent.  A scan rate of 1000 (100%) means the whole
5476 	 * cache is scanned every second.
5477 	 */
5478 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5479 
5480 	do {
5481 		outstanding = *countp;
5482 		for (i = 0; i < outstanding; i++) {
5483 			scrub_dcache(how_many);
5484 		}
5485 	} while (atomic_add_32_nv(countp, -outstanding));
5486 
5487 	return (DDI_INTR_CLAIMED);
5488 }
5489 
5490 /*
5491  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5492  * by invalidating lines. Due to the characteristics of the ASI which
5493  * is used to invalidate an I$ line, the entire I$ must be invalidated
5494  * vs. an individual I$ line.
5495  */
5496 static void
5497 scrub_icache(int how_many)
5498 {
5499 	int i;
5500 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5501 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5502 	int icache_nlines = csmp->chsm_icache_nlines;
5503 
5504 	/*
5505 	 * scrub the desired number of lines
5506 	 */
5507 	for (i = 0; i < how_many; i++) {
5508 		/*
5509 		 * since the entire I$ must be scrubbed at once,
5510 		 * wait until the index wraps to zero to invalidate
5511 		 * the entire I$
5512 		 */
5513 		if (index == 0) {
5514 			icache_inval_all();
5515 		}
5516 
5517 		/*
5518 		 * calculate the next I$ line to scrub, assumes
5519 		 * that chsm_icache_nlines is a power of 2
5520 		 */
5521 		index = (index + 1) & (icache_nlines - 1);
5522 	}
5523 
5524 	/*
5525 	 * set the scrub index for the next visit
5526 	 */
5527 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5528 }
5529 
5530 /*
5531  * Handler for I$ scrub inum softint. Call scrub_icache until
5532  * we decrement the outstanding request count to zero.
5533  */
5534 /*ARGSUSED*/
5535 static uint_t
5536 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5537 {
5538 	int i;
5539 	int how_many;
5540 	int outstanding;
5541 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5542 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5543 	struct scrub_info *csi = (struct scrub_info *)arg1;
5544 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5545 	    icache_scan_rate_idle : icache_scan_rate_busy;
5546 	int icache_nlines = csmp->chsm_icache_nlines;
5547 
5548 	/*
5549 	 * The scan rates are expressed in units of tenths of a
5550 	 * percent.  A scan rate of 1000 (100%) means the whole
5551 	 * cache is scanned every second.
5552 	 */
5553 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5554 
5555 	do {
5556 		outstanding = *countp;
5557 		for (i = 0; i < outstanding; i++) {
5558 			scrub_icache(how_many);
5559 		}
5560 	} while (atomic_add_32_nv(countp, -outstanding));
5561 
5562 	return (DDI_INTR_CLAIMED);
5563 }
5564 
5565 /*
5566  * Called periodically on each CPU to scrub the E$.
5567  */
5568 static void
5569 scrub_ecache(int how_many)
5570 {
5571 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5572 	int i;
5573 	int cpuid = CPU->cpu_id;
5574 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5575 	int nlines = csmp->chsm_ecache_nlines;
5576 	int linesize = cpunodes[cpuid].ecache_linesize;
5577 	int ec_set_size = cpu_ecache_set_size(CPU);
5578 
5579 	/*
5580 	 * scrub the desired number of lines
5581 	 */
5582 	for (i = 0; i < how_many; i++) {
5583 		/*
5584 		 * scrub the E$ line
5585 		 */
5586 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5587 		    ec_set_size);
5588 
5589 		/*
5590 		 * calculate the next E$ line to scrub based on twice
5591 		 * the number of E$ lines (to displace lines containing
5592 		 * flush area data), assumes that the number of lines
5593 		 * is a power of 2
5594 		 */
5595 		index = (index + 1) & ((nlines << 1) - 1);
5596 	}
5597 
5598 	/*
5599 	 * set the ecache scrub index for the next visit
5600 	 */
5601 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5602 }
5603 
5604 /*
5605  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5606  * we decrement the outstanding request count to zero.
5607  *
5608  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5609  * become negative after the atomic_add_32_nv().  This is not a problem, as
5610  * the next trip around the loop won't scrub anything, and the next add will
5611  * reset the count back to zero.
5612  */
5613 /*ARGSUSED*/
5614 static uint_t
5615 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5616 {
5617 	int i;
5618 	int how_many;
5619 	int outstanding;
5620 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5621 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5622 	struct scrub_info *csi = (struct scrub_info *)arg1;
5623 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5624 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5625 	int ecache_nlines = csmp->chsm_ecache_nlines;
5626 
5627 	/*
5628 	 * The scan rates are expressed in units of tenths of a
5629 	 * percent.  A scan rate of 1000 (100%) means the whole
5630 	 * cache is scanned every second.
5631 	 */
5632 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5633 
5634 	do {
5635 		outstanding = *countp;
5636 		for (i = 0; i < outstanding; i++) {
5637 			scrub_ecache(how_many);
5638 		}
5639 	} while (atomic_add_32_nv(countp, -outstanding));
5640 
5641 	return (DDI_INTR_CLAIMED);
5642 }
5643 
5644 /*
5645  * Timeout function to reenable CE
5646  */
5647 static void
5648 cpu_delayed_check_ce_errors(void *arg)
5649 {
5650 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5651 	    TQ_NOSLEEP)) {
5652 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5653 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5654 	}
5655 }
5656 
5657 /*
5658  * CE Deferred Re-enable after trap.
5659  *
5660  * When the CPU gets a disrupting trap for any of the errors
5661  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5662  * immediately. To eliminate the possibility of multiple CEs causing
5663  * recursive stack overflow in the trap handler, we cannot
5664  * reenable CEEN while still running in the trap handler. Instead,
5665  * after a CE is logged on a CPU, we schedule a timeout function,
5666  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5667  * seconds. This function will check whether any further CEs
5668  * have occurred on that CPU, and if none have, will reenable CEEN.
5669  *
5670  * If further CEs have occurred while CEEN is disabled, another
5671  * timeout will be scheduled. This is to ensure that the CPU can
5672  * make progress in the face of CE 'storms', and that it does not
5673  * spend all its time logging CE errors.
5674  */
5675 static void
5676 cpu_check_ce_errors(void *arg)
5677 {
5678 	int	cpuid = (int)arg;
5679 	cpu_t	*cp;
5680 
5681 	/*
5682 	 * We acquire cpu_lock.
5683 	 */
5684 	ASSERT(curthread->t_pil == 0);
5685 
5686 	/*
5687 	 * verify that the cpu is still around, DR
5688 	 * could have got there first ...
5689 	 */
5690 	mutex_enter(&cpu_lock);
5691 	cp = cpu_get(cpuid);
5692 	if (cp == NULL) {
5693 		mutex_exit(&cpu_lock);
5694 		return;
5695 	}
5696 	/*
5697 	 * make sure we don't migrate across CPUs
5698 	 * while checking our CE status.
5699 	 */
5700 	kpreempt_disable();
5701 
5702 	/*
5703 	 * If we are running on the CPU that got the
5704 	 * CE, we can do the checks directly.
5705 	 */
5706 	if (cp->cpu_id == CPU->cpu_id) {
5707 		mutex_exit(&cpu_lock);
5708 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5709 		kpreempt_enable();
5710 		return;
5711 	}
5712 	kpreempt_enable();
5713 
5714 	/*
5715 	 * send an x-call to get the CPU that originally
5716 	 * got the CE to do the necessary checks. If we can't
5717 	 * send the x-call, reschedule the timeout, otherwise we
5718 	 * lose CEEN forever on that CPU.
5719 	 */
5720 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5721 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5722 		    TIMEOUT_CEEN_CHECK, 0);
5723 		mutex_exit(&cpu_lock);
5724 	} else {
5725 		/*
5726 		 * When the CPU is not accepting xcalls, or
5727 		 * the processor is offlined, we don't want to
5728 		 * incur the extra overhead of trying to schedule the
5729 		 * CE timeout indefinitely. However, we don't want to lose
5730 		 * CE checking forever.
5731 		 *
5732 		 * Keep rescheduling the timeout, accepting the additional
5733 		 * overhead as the cost of correctness in the case where we get
5734 		 * a CE, disable CEEN, offline the CPU during the
5735 		 * the timeout interval, and then online it at some
5736 		 * point in the future. This is unlikely given the short
5737 		 * cpu_ceen_delay_secs.
5738 		 */
5739 		mutex_exit(&cpu_lock);
5740 		(void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id,
5741 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5742 	}
5743 }
5744 
5745 /*
5746  * This routine will check whether CEs have occurred while
5747  * CEEN is disabled. Any CEs detected will be logged and, if
5748  * possible, scrubbed.
5749  *
5750  * The memscrubber will also use this routine to clear any errors
5751  * caused by its scrubbing with CEEN disabled.
5752  *
5753  * flag == SCRUBBER_CEEN_CHECK
5754  *		called from memscrubber, just check/scrub, no reset
5755  *		paddr 	physical addr. for start of scrub pages
5756  *		vaddr 	virtual addr. for scrub area
5757  *		psz	page size of area to be scrubbed
5758  *
5759  * flag == TIMEOUT_CEEN_CHECK
5760  *		timeout function has triggered, reset timeout or CEEN
5761  *
5762  * Note: We must not migrate cpus during this function.  This can be
5763  * achieved by one of:
5764  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5765  *	The flag value must be first xcall argument.
5766  *    - disabling kernel preemption.  This should be done for very short
5767  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5768  *	scrub an extended area with cpu_check_block.  The call for
5769  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5770  *	brief for this case.
5771  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5772  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5773  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5774  */
5775 void
5776 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5777 {
5778 	ch_cpu_errors_t	cpu_error_regs;
5779 	uint64_t	ec_err_enable;
5780 	uint64_t	page_offset;
5781 
5782 	/* Read AFSR */
5783 	get_cpu_error_state(&cpu_error_regs);
5784 
5785 	/*
5786 	 * If no CEEN errors have occurred during the timeout
5787 	 * interval, it is safe to re-enable CEEN and exit.
5788 	 */
5789 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5790 		if (flag == TIMEOUT_CEEN_CHECK &&
5791 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5792 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5793 		return;
5794 	}
5795 
5796 	/*
5797 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5798 	 * we log/clear the error.
5799 	 */
5800 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5801 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5802 
5803 	/*
5804 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5805 	 * timeout will be rescheduled when the error is logged.
5806 	 */
5807 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5808 	    cpu_ce_detected(&cpu_error_regs,
5809 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5810 	else
5811 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5812 
5813 	/*
5814 	 * If the memory scrubber runs while CEEN is
5815 	 * disabled, (or if CEEN is disabled during the
5816 	 * scrub as a result of a CE being triggered by
5817 	 * it), the range being scrubbed will not be
5818 	 * completely cleaned. If there are multiple CEs
5819 	 * in the range at most two of these will be dealt
5820 	 * with, (one by the trap handler and one by the
5821 	 * timeout). It is also possible that none are dealt
5822 	 * with, (CEEN disabled and another CE occurs before
5823 	 * the timeout triggers). So to ensure that the
5824 	 * memory is actually scrubbed, we have to access each
5825 	 * memory location in the range and then check whether
5826 	 * that access causes a CE.
5827 	 */
5828 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5829 		if ((cpu_error_regs.afar >= pa) &&
5830 		    (cpu_error_regs.afar < (pa + psz))) {
5831 			/*
5832 			 * Force a load from physical memory for each
5833 			 * 64-byte block, then check AFSR to determine
5834 			 * whether this access caused an error.
5835 			 *
5836 			 * This is a slow way to do a scrub, but as it will
5837 			 * only be invoked when the memory scrubber actually
5838 			 * triggered a CE, it should not happen too
5839 			 * frequently.
5840 			 *
5841 			 * cut down what we need to check as the scrubber
5842 			 * has verified up to AFAR, so get it's offset
5843 			 * into the page and start there.
5844 			 */
5845 			page_offset = (uint64_t)(cpu_error_regs.afar &
5846 			    (psz - 1));
5847 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
5848 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
5849 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
5850 			    psz);
5851 		}
5852 	}
5853 
5854 	/*
5855 	 * Reset error enable if this CE is not masked.
5856 	 */
5857 	if ((flag == TIMEOUT_CEEN_CHECK) &&
5858 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
5859 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
5860 
5861 }
5862 
5863 /*
5864  * Attempt a cpu logout for an error that we did not trap for, such
5865  * as a CE noticed with CEEN off.  It is assumed that we are still running
5866  * on the cpu that took the error and that we cannot migrate.  Returns
5867  * 0 on success, otherwise nonzero.
5868  */
5869 static int
5870 cpu_ce_delayed_ec_logout(uint64_t afar)
5871 {
5872 	ch_cpu_logout_t *clop;
5873 
5874 	if (CPU_PRIVATE(CPU) == NULL)
5875 		return (0);
5876 
5877 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5878 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
5879 	    LOGOUT_INVALID)
5880 		return (0);
5881 
5882 	cpu_delayed_logout(afar, clop);
5883 	return (1);
5884 }
5885 
5886 /*
5887  * We got an error while CEEN was disabled. We
5888  * need to clean up after it and log whatever
5889  * information we have on the CE.
5890  */
5891 void
5892 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
5893 {
5894 	ch_async_flt_t 	ch_flt;
5895 	struct async_flt *aflt;
5896 	char 		pr_reason[MAX_REASON_STRING];
5897 
5898 	bzero(&ch_flt, sizeof (ch_async_flt_t));
5899 	ch_flt.flt_trapped_ce = flag;
5900 	aflt = (struct async_flt *)&ch_flt;
5901 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
5902 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
5903 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5904 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
5905 	aflt->flt_addr = cpu_error_regs->afar;
5906 #if defined(SERRANO)
5907 	ch_flt.afar2 = cpu_error_regs->afar2;
5908 #endif	/* SERRANO */
5909 	aflt->flt_pc = NULL;
5910 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
5911 	aflt->flt_tl = 0;
5912 	aflt->flt_panic = 0;
5913 	cpu_log_and_clear_ce(&ch_flt);
5914 
5915 	/*
5916 	 * check if we caused any errors during cleanup
5917 	 */
5918 	if (clear_errors(&ch_flt)) {
5919 		pr_reason[0] = '\0';
5920 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
5921 		    NULL);
5922 	}
5923 }
5924 
5925 /*
5926  * Log/clear CEEN-controlled disrupting errors
5927  */
5928 static void
5929 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
5930 {
5931 	struct async_flt *aflt;
5932 	uint64_t afsr, afsr_errs;
5933 	ch_cpu_logout_t *clop;
5934 	char 		pr_reason[MAX_REASON_STRING];
5935 	on_trap_data_t	*otp = curthread->t_ontrap;
5936 
5937 	aflt = (struct async_flt *)ch_flt;
5938 	afsr = aflt->flt_stat;
5939 	afsr_errs = ch_flt->afsr_errs;
5940 	aflt->flt_id = gethrtime_waitfree();
5941 	aflt->flt_bus_id = getprocessorid();
5942 	aflt->flt_inst = CPU->cpu_id;
5943 	aflt->flt_prot = AFLT_PROT_NONE;
5944 	aflt->flt_class = CPU_FAULT;
5945 	aflt->flt_status = ECC_C_TRAP;
5946 
5947 	pr_reason[0] = '\0';
5948 	/*
5949 	 * Get the CPU log out info for Disrupting Trap.
5950 	 */
5951 	if (CPU_PRIVATE(CPU) == NULL) {
5952 		clop = NULL;
5953 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
5954 	} else {
5955 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5956 	}
5957 
5958 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
5959 		ch_cpu_errors_t cpu_error_regs;
5960 
5961 		get_cpu_error_state(&cpu_error_regs);
5962 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
5963 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
5964 		clop->clo_data.chd_afar = cpu_error_regs.afar;
5965 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
5966 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
5967 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
5968 		clop->clo_sdw_data.chd_afsr_ext =
5969 		    cpu_error_regs.shadow_afsr_ext;
5970 #if defined(SERRANO)
5971 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
5972 #endif	/* SERRANO */
5973 		ch_flt->flt_data_incomplete = 1;
5974 
5975 		/*
5976 		 * The logging/clear code expects AFSR/AFAR to be cleared.
5977 		 * The trap handler does it for CEEN enabled errors
5978 		 * so we need to do it here.
5979 		 */
5980 		set_cpu_error_state(&cpu_error_regs);
5981 	}
5982 
5983 #if defined(JALAPENO) || defined(SERRANO)
5984 	/*
5985 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
5986 	 * For Serrano, even thou we do have the AFAR, we still do the
5987 	 * scrub on the RCE side since that's where the error type can
5988 	 * be properly classified as intermittent, persistent, etc.
5989 	 *
5990 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
5991 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5992 	 * the flt_status bits.
5993 	 */
5994 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
5995 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5996 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
5997 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
5998 	}
5999 #else /* JALAPENO || SERRANO */
6000 	/*
6001 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6002 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6003 	 * the flt_status bits.
6004 	 */
6005 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6006 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6007 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6008 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6009 		}
6010 	}
6011 
6012 #endif /* JALAPENO || SERRANO */
6013 
6014 	/*
6015 	 * Update flt_prot if this error occurred under on_trap protection.
6016 	 */
6017 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6018 		aflt->flt_prot = AFLT_PROT_EC;
6019 
6020 	/*
6021 	 * Queue events on the async event queue, one event per error bit.
6022 	 */
6023 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6024 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6025 		ch_flt->flt_type = CPU_INV_AFSR;
6026 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6027 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6028 		    aflt->flt_panic);
6029 	}
6030 
6031 	/*
6032 	 * Zero out + invalidate CPU logout.
6033 	 */
6034 	if (clop) {
6035 		bzero(clop, sizeof (ch_cpu_logout_t));
6036 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6037 	}
6038 
6039 	/*
6040 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6041 	 * was disabled, we need to flush either the entire
6042 	 * E$ or an E$ line.
6043 	 */
6044 #if defined(JALAPENO) || defined(SERRANO)
6045 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6046 #else	/* JALAPENO || SERRANO */
6047 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6048 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6049 #endif	/* JALAPENO || SERRANO */
6050 		cpu_error_ecache_flush(ch_flt);
6051 
6052 }
6053 
6054 /*
6055  * depending on the error type, we determine whether we
6056  * need to flush the entire ecache or just a line.
6057  */
6058 static int
6059 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6060 {
6061 	struct async_flt *aflt;
6062 	uint64_t	afsr;
6063 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6064 
6065 	aflt = (struct async_flt *)ch_flt;
6066 	afsr = aflt->flt_stat;
6067 
6068 	/*
6069 	 * If we got multiple errors, no point in trying
6070 	 * the individual cases, just flush the whole cache
6071 	 */
6072 	if (afsr & C_AFSR_ME) {
6073 		return (ECACHE_FLUSH_ALL);
6074 	}
6075 
6076 	/*
6077 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6078 	 * was disabled, we need to flush entire E$. We can't just
6079 	 * flush the cache line affected as the ME bit
6080 	 * is not set when multiple correctable errors of the same
6081 	 * type occur, so we might have multiple CPC or EDC errors,
6082 	 * with only the first recorded.
6083 	 */
6084 #if defined(JALAPENO) || defined(SERRANO)
6085 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6086 #else	/* JALAPENO || SERRANO */
6087 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6088 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6089 #endif	/* JALAPENO || SERRANO */
6090 		return (ECACHE_FLUSH_ALL);
6091 	}
6092 
6093 #if defined(JALAPENO) || defined(SERRANO)
6094 	/*
6095 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6096 	 * flush the entire Ecache.
6097 	 */
6098 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6099 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6100 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6101 			return (ECACHE_FLUSH_LINE);
6102 		} else {
6103 			return (ECACHE_FLUSH_ALL);
6104 		}
6105 	}
6106 #else /* JALAPENO || SERRANO */
6107 	/*
6108 	 * If UE only is set, flush the Ecache line, otherwise
6109 	 * flush the entire Ecache.
6110 	 */
6111 	if (afsr_errs & C_AFSR_UE) {
6112 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6113 		    C_AFSR_UE) {
6114 			return (ECACHE_FLUSH_LINE);
6115 		} else {
6116 			return (ECACHE_FLUSH_ALL);
6117 		}
6118 	}
6119 #endif /* JALAPENO || SERRANO */
6120 
6121 	/*
6122 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6123 	 * flush the entire Ecache.
6124 	 */
6125 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6126 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6127 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6128 			return (ECACHE_FLUSH_LINE);
6129 		} else {
6130 			return (ECACHE_FLUSH_ALL);
6131 		}
6132 	}
6133 
6134 	/*
6135 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6136 	 * flush the entire Ecache.
6137 	 */
6138 	if (afsr_errs & C_AFSR_BERR) {
6139 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6140 			return (ECACHE_FLUSH_LINE);
6141 		} else {
6142 			return (ECACHE_FLUSH_ALL);
6143 		}
6144 	}
6145 
6146 	return (0);
6147 }
6148 
6149 void
6150 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6151 {
6152 	int	ecache_flush_flag =
6153 	    cpu_error_ecache_flush_required(ch_flt);
6154 
6155 	/*
6156 	 * Flush Ecache line or entire Ecache based on above checks.
6157 	 */
6158 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6159 		cpu_flush_ecache();
6160 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6161 		cpu_flush_ecache_line(ch_flt);
6162 	}
6163 
6164 }
6165 
6166 /*
6167  * Extract the PA portion from the E$ tag.
6168  */
6169 uint64_t
6170 cpu_ectag_to_pa(int setsize, uint64_t tag)
6171 {
6172 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6173 		return (JG_ECTAG_TO_PA(setsize, tag));
6174 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6175 		return (PN_L3TAG_TO_PA(tag));
6176 	else
6177 		return (CH_ECTAG_TO_PA(setsize, tag));
6178 }
6179 
6180 /*
6181  * Convert the E$ tag PA into an E$ subblock index.
6182  */
6183 static int
6184 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6185 {
6186 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6187 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6188 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6189 		/* Panther has only one subblock per line */
6190 		return (0);
6191 	else
6192 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6193 }
6194 
6195 /*
6196  * All subblocks in an E$ line must be invalid for
6197  * the line to be invalid.
6198  */
6199 int
6200 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6201 {
6202 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6203 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6204 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6205 		return (PN_L3_LINE_INVALID(tag));
6206 	else
6207 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6208 }
6209 
6210 /*
6211  * Extract state bits for a subblock given the tag.  Note that for Panther
6212  * this works on both l2 and l3 tags.
6213  */
6214 static int
6215 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6216 {
6217 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6218 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6219 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6220 		return (tag & CH_ECSTATE_MASK);
6221 	else
6222 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6223 }
6224 
6225 /*
6226  * Cpu specific initialization.
6227  */
6228 void
6229 cpu_mp_init(void)
6230 {
6231 #ifdef	CHEETAHPLUS_ERRATUM_25
6232 	if (cheetah_sendmondo_recover) {
6233 		cheetah_nudge_init();
6234 	}
6235 #endif
6236 }
6237 
6238 void
6239 cpu_ereport_post(struct async_flt *aflt)
6240 {
6241 	char *cpu_type, buf[FM_MAX_CLASS];
6242 	nv_alloc_t *nva = NULL;
6243 	nvlist_t *ereport, *detector, *resource;
6244 	errorq_elem_t *eqep;
6245 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6246 	char unum[UNUM_NAMLEN];
6247 	int len = 0;
6248 	uint8_t  msg_type;
6249 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6250 
6251 	if (aflt->flt_panic || panicstr) {
6252 		eqep = errorq_reserve(ereport_errorq);
6253 		if (eqep == NULL)
6254 			return;
6255 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6256 		nva = errorq_elem_nva(ereport_errorq, eqep);
6257 	} else {
6258 		ereport = fm_nvlist_create(nva);
6259 	}
6260 
6261 	/*
6262 	 * Create the scheme "cpu" FMRI.
6263 	 */
6264 	detector = fm_nvlist_create(nva);
6265 	resource = fm_nvlist_create(nva);
6266 	switch (cpunodes[aflt->flt_inst].implementation) {
6267 	case CHEETAH_IMPL:
6268 		cpu_type = FM_EREPORT_CPU_USIII;
6269 		break;
6270 	case CHEETAH_PLUS_IMPL:
6271 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6272 		break;
6273 	case JALAPENO_IMPL:
6274 		cpu_type = FM_EREPORT_CPU_USIIIi;
6275 		break;
6276 	case SERRANO_IMPL:
6277 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6278 		break;
6279 	case JAGUAR_IMPL:
6280 		cpu_type = FM_EREPORT_CPU_USIV;
6281 		break;
6282 	case PANTHER_IMPL:
6283 		cpu_type = FM_EREPORT_CPU_USIVplus;
6284 		break;
6285 	default:
6286 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6287 		break;
6288 	}
6289 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6290 	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
6291 	    cpunodes[aflt->flt_inst].device_id);
6292 
6293 	/*
6294 	 * Encode all the common data into the ereport.
6295 	 */
6296 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6297 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6298 
6299 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6300 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6301 	    detector, NULL);
6302 
6303 	/*
6304 	 * Encode the error specific data that was saved in
6305 	 * the async_flt structure into the ereport.
6306 	 */
6307 	cpu_payload_add_aflt(aflt, ereport, resource,
6308 	    &plat_ecc_ch_flt.ecaf_afar_status,
6309 	    &plat_ecc_ch_flt.ecaf_synd_status);
6310 
6311 	if (aflt->flt_panic || panicstr) {
6312 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6313 	} else {
6314 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6315 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6316 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6317 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6318 	}
6319 	/*
6320 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6321 	 * to the SC olny if it can process it.
6322 	 */
6323 
6324 	if (&plat_ecc_capability_sc_get &&
6325 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6326 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6327 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6328 			/*
6329 			 * If afar status is not invalid do a unum lookup.
6330 			 */
6331 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6332 			    AFLT_STAT_INVALID) {
6333 				(void) cpu_get_mem_unum_aflt(
6334 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6335 				    unum, UNUM_NAMLEN, &len);
6336 			} else {
6337 				unum[0] = '\0';
6338 			}
6339 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6340 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6341 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6342 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6343 			    ch_flt->flt_sdw_afsr_ext;
6344 
6345 			if (&plat_log_fruid_error2)
6346 				plat_log_fruid_error2(msg_type, unum, aflt,
6347 				    &plat_ecc_ch_flt);
6348 		}
6349 	}
6350 }
6351 
6352 void
6353 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6354 {
6355 	int status;
6356 	ddi_fm_error_t de;
6357 
6358 	bzero(&de, sizeof (ddi_fm_error_t));
6359 
6360 	de.fme_version = DDI_FME_VERSION;
6361 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6362 	    FM_ENA_FMT1);
6363 	de.fme_flag = expected;
6364 	de.fme_bus_specific = (void *)aflt->flt_addr;
6365 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6366 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6367 		aflt->flt_panic = 1;
6368 }
6369 
6370 void
6371 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6372     errorq_t *eqp, uint_t flag)
6373 {
6374 	struct async_flt *aflt = (struct async_flt *)payload;
6375 
6376 	aflt->flt_erpt_class = error_class;
6377 	errorq_dispatch(eqp, payload, payload_sz, flag);
6378 }
6379 
6380 /*
6381  * This routine may be called by the IO module, but does not do
6382  * anything in this cpu module. The SERD algorithm is handled by
6383  * cpumem-diagnosis engine instead.
6384  */
6385 /*ARGSUSED*/
6386 void
6387 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6388 {}
6389 
6390 void
6391 adjust_hw_copy_limits(int ecache_size)
6392 {
6393 	/*
6394 	 * Set hw copy limits.
6395 	 *
6396 	 * /etc/system will be parsed later and can override one or more
6397 	 * of these settings.
6398 	 *
6399 	 * At this time, ecache size seems only mildly relevant.
6400 	 * We seem to run into issues with the d-cache and stalls
6401 	 * we see on misses.
6402 	 *
6403 	 * Cycle measurement indicates that 2 byte aligned copies fare
6404 	 * little better than doing things with VIS at around 512 bytes.
6405 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6406 	 * aligned is faster whenever the source and destination data
6407 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6408 	 * limit seems to be driven by the 2K write cache.
6409 	 * When more than 2K of copies are done in non-VIS mode, stores
6410 	 * backup in the write cache.  In VIS mode, the write cache is
6411 	 * bypassed, allowing faster cache-line writes aligned on cache
6412 	 * boundaries.
6413 	 *
6414 	 * In addition, in non-VIS mode, there is no prefetching, so
6415 	 * for larger copies, the advantage of prefetching to avoid even
6416 	 * occasional cache misses is enough to justify using the VIS code.
6417 	 *
6418 	 * During testing, it was discovered that netbench ran 3% slower
6419 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6420 	 * applications, data is only used once (copied to the output
6421 	 * buffer, then copied by the network device off the system).  Using
6422 	 * the VIS copy saves more L2 cache state.  Network copies are
6423 	 * around 1.3K to 1.5K in size for historical reasons.
6424 	 *
6425 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6426 	 * aligned copy even for large caches and 8 MB ecache.  The
6427 	 * infrastructure to allow different limits for different sized
6428 	 * caches is kept to allow further tuning in later releases.
6429 	 */
6430 
6431 	if (min_ecache_size == 0 && use_hw_bcopy) {
6432 		/*
6433 		 * First time through - should be before /etc/system
6434 		 * is read.
6435 		 * Could skip the checks for zero but this lets us
6436 		 * preserve any debugger rewrites.
6437 		 */
6438 		if (hw_copy_limit_1 == 0) {
6439 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6440 			priv_hcl_1 = hw_copy_limit_1;
6441 		}
6442 		if (hw_copy_limit_2 == 0) {
6443 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6444 			priv_hcl_2 = hw_copy_limit_2;
6445 		}
6446 		if (hw_copy_limit_4 == 0) {
6447 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6448 			priv_hcl_4 = hw_copy_limit_4;
6449 		}
6450 		if (hw_copy_limit_8 == 0) {
6451 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6452 			priv_hcl_8 = hw_copy_limit_8;
6453 		}
6454 		min_ecache_size = ecache_size;
6455 	} else {
6456 		/*
6457 		 * MP initialization. Called *after* /etc/system has
6458 		 * been parsed. One CPU has already been initialized.
6459 		 * Need to cater for /etc/system having scragged one
6460 		 * of our values.
6461 		 */
6462 		if (ecache_size == min_ecache_size) {
6463 			/*
6464 			 * Same size ecache. We do nothing unless we
6465 			 * have a pessimistic ecache setting. In that
6466 			 * case we become more optimistic (if the cache is
6467 			 * large enough).
6468 			 */
6469 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6470 				/*
6471 				 * Need to adjust hw_copy_limit* from our
6472 				 * pessimistic uniprocessor value to a more
6473 				 * optimistic UP value *iff* it hasn't been
6474 				 * reset.
6475 				 */
6476 				if ((ecache_size > 1048576) &&
6477 				    (priv_hcl_8 == hw_copy_limit_8)) {
6478 					if (ecache_size <= 2097152)
6479 						hw_copy_limit_8 = 4 *
6480 						    VIS_COPY_THRESHOLD;
6481 					else if (ecache_size <= 4194304)
6482 						hw_copy_limit_8 = 4 *
6483 						    VIS_COPY_THRESHOLD;
6484 					else
6485 						hw_copy_limit_8 = 4 *
6486 						    VIS_COPY_THRESHOLD;
6487 					priv_hcl_8 = hw_copy_limit_8;
6488 				}
6489 			}
6490 		} else if (ecache_size < min_ecache_size) {
6491 			/*
6492 			 * A different ecache size. Can this even happen?
6493 			 */
6494 			if (priv_hcl_8 == hw_copy_limit_8) {
6495 				/*
6496 				 * The previous value that we set
6497 				 * is unchanged (i.e., it hasn't been
6498 				 * scragged by /etc/system). Rewrite it.
6499 				 */
6500 				if (ecache_size <= 1048576)
6501 					hw_copy_limit_8 = 8 *
6502 					    VIS_COPY_THRESHOLD;
6503 				else if (ecache_size <= 2097152)
6504 					hw_copy_limit_8 = 8 *
6505 					    VIS_COPY_THRESHOLD;
6506 				else if (ecache_size <= 4194304)
6507 					hw_copy_limit_8 = 8 *
6508 					    VIS_COPY_THRESHOLD;
6509 				else
6510 					hw_copy_limit_8 = 10 *
6511 					    VIS_COPY_THRESHOLD;
6512 				priv_hcl_8 = hw_copy_limit_8;
6513 				min_ecache_size = ecache_size;
6514 			}
6515 		}
6516 	}
6517 }
6518 
6519 /*
6520  * Called from illegal instruction trap handler to see if we can attribute
6521  * the trap to a fpras check.
6522  */
6523 int
6524 fpras_chktrap(struct regs *rp)
6525 {
6526 	int op;
6527 	struct fpras_chkfngrp *cgp;
6528 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6529 
6530 	if (fpras_chkfngrps == NULL)
6531 		return (0);
6532 
6533 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6534 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6535 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6536 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6537 			break;
6538 	}
6539 	if (op == FPRAS_NCOPYOPS)
6540 		return (0);
6541 
6542 	/*
6543 	 * This is an fpRAS failure caught through an illegal
6544 	 * instruction - trampoline.
6545 	 */
6546 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6547 	rp->r_npc = rp->r_pc + 4;
6548 	return (1);
6549 }
6550 
6551 /*
6552  * fpras_failure is called when a fpras check detects a bad calculation
6553  * result or an illegal instruction trap is attributed to an fpras
6554  * check.  In all cases we are still bound to CPU.
6555  */
6556 int
6557 fpras_failure(int op, int how)
6558 {
6559 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6560 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6561 	ch_async_flt_t ch_flt;
6562 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6563 	struct fpras_chkfn *sfp, *cfp;
6564 	uint32_t *sip, *cip;
6565 	int i;
6566 
6567 	/*
6568 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6569 	 * the time in which we dispatch an ereport and (if applicable) panic.
6570 	 */
6571 	use_hw_bcopy_orig = use_hw_bcopy;
6572 	use_hw_bzero_orig = use_hw_bzero;
6573 	hcl1_orig = hw_copy_limit_1;
6574 	hcl2_orig = hw_copy_limit_2;
6575 	hcl4_orig = hw_copy_limit_4;
6576 	hcl8_orig = hw_copy_limit_8;
6577 	use_hw_bcopy = use_hw_bzero = 0;
6578 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6579 	    hw_copy_limit_8 = 0;
6580 
6581 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6582 	aflt->flt_id = gethrtime_waitfree();
6583 	aflt->flt_class = CPU_FAULT;
6584 	aflt->flt_inst = CPU->cpu_id;
6585 	aflt->flt_status = (how << 8) | op;
6586 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6587 	ch_flt.flt_type = CPU_FPUERR;
6588 
6589 	/*
6590 	 * We must panic if the copy operation had no lofault protection -
6591 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6592 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6593 	 */
6594 	aflt->flt_panic = (curthread->t_lofault == NULL);
6595 
6596 	/*
6597 	 * XOR the source instruction block with the copied instruction
6598 	 * block - this will show us which bit(s) are corrupted.
6599 	 */
6600 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6601 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6602 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6603 		sip = &sfp->fpras_blk0[0];
6604 		cip = &cfp->fpras_blk0[0];
6605 	} else {
6606 		sip = &sfp->fpras_blk1[0];
6607 		cip = &cfp->fpras_blk1[0];
6608 	}
6609 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6610 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6611 
6612 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6613 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6614 
6615 	if (aflt->flt_panic)
6616 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6617 
6618 	/*
6619 	 * We get here for copyin/copyout and kcopy or bcopy where the
6620 	 * caller has used on_fault.  We will flag the error so that
6621 	 * the process may be killed  The trap_async_hwerr mechanism will
6622 	 * take appropriate further action (such as a reboot, contract
6623 	 * notification etc).  Since we may be continuing we will
6624 	 * restore the global hardware copy acceleration switches.
6625 	 *
6626 	 * When we return from this function to the copy function we want to
6627 	 * avoid potentially bad data being used, ie we want the affected
6628 	 * copy function to return an error.  The caller should therefore
6629 	 * invoke its lofault handler (which always exists for these functions)
6630 	 * which will return the appropriate error.
6631 	 */
6632 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6633 	aston(curthread);
6634 
6635 	use_hw_bcopy = use_hw_bcopy_orig;
6636 	use_hw_bzero = use_hw_bzero_orig;
6637 	hw_copy_limit_1 = hcl1_orig;
6638 	hw_copy_limit_2 = hcl2_orig;
6639 	hw_copy_limit_4 = hcl4_orig;
6640 	hw_copy_limit_8 = hcl8_orig;
6641 
6642 	return (1);
6643 }
6644 
6645 #define	VIS_BLOCKSIZE		64
6646 
6647 int
6648 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6649 {
6650 	int ret, watched;
6651 
6652 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6653 	ret = dtrace_blksuword32(addr, data, 0);
6654 	if (watched)
6655 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6656 
6657 	return (ret);
6658 }
6659 
6660 /*
6661  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6662  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6663  * CEEN from the EER to disable traps for further disrupting error types
6664  * on that cpu.  We could cross-call instead, but that has a larger
6665  * instruction and data footprint than cross-trapping, and the cpu is known
6666  * to be faulted.
6667  */
6668 
6669 void
6670 cpu_faulted_enter(struct cpu *cp)
6671 {
6672 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6673 }
6674 
6675 /*
6676  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6677  * offline, spare, or online (by the cpu requesting this state change).
6678  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6679  * disrupting error bits that have accumulated without trapping, then
6680  * we cross-trap to re-enable CEEN controlled traps.
6681  */
6682 void
6683 cpu_faulted_exit(struct cpu *cp)
6684 {
6685 	ch_cpu_errors_t cpu_error_regs;
6686 
6687 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6688 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6689 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6690 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6691 	    (uint64_t)&cpu_error_regs, 0);
6692 
6693 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6694 }
6695 
6696 /*
6697  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6698  * the errors in the original AFSR, 0 otherwise.
6699  *
6700  * For all procs if the initial error was a BERR or TO, then it is possible
6701  * that we may have caused a secondary BERR or TO in the process of logging the
6702  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6703  * if the request was protected then a panic is still not necessary, if not
6704  * protected then aft_panic is already set - so either way there's no need
6705  * to set aft_panic for the secondary error.
6706  *
6707  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6708  * a store merge, then the error handling code will call cpu_deferred_error().
6709  * When clear_errors() is called, it will determine that secondary errors have
6710  * occurred - in particular, the store merge also caused a EDU and WDU that
6711  * weren't discovered until this point.
6712  *
6713  * We do three checks to verify that we are in this case.  If we pass all three
6714  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6715  * errors occur, we return 0.
6716  *
6717  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6718  * handled in cpu_disrupting_errors().  Since this function is not even called
6719  * in the case we are interested in, we just return 0 for these processors.
6720  */
6721 /*ARGSUSED*/
6722 static int
6723 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6724     uint64_t t_afar)
6725 {
6726 #if defined(CHEETAH_PLUS)
6727 #else	/* CHEETAH_PLUS */
6728 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6729 #endif	/* CHEETAH_PLUS */
6730 
6731 	/*
6732 	 * Was the original error a BERR or TO and only a BERR or TO
6733 	 * (multiple errors are also OK)
6734 	 */
6735 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6736 		/*
6737 		 * Is the new error a BERR or TO and only a BERR or TO
6738 		 * (multiple errors are also OK)
6739 		 */
6740 		if ((ch_flt->afsr_errs &
6741 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6742 			return (1);
6743 	}
6744 
6745 #if defined(CHEETAH_PLUS)
6746 	return (0);
6747 #else	/* CHEETAH_PLUS */
6748 	/*
6749 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6750 	 *
6751 	 * Check the original error was a UE, and only a UE.  Note that
6752 	 * the ME bit will cause us to fail this check.
6753 	 */
6754 	if (t_afsr_errs != C_AFSR_UE)
6755 		return (0);
6756 
6757 	/*
6758 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6759 	 */
6760 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6761 		return (0);
6762 
6763 	/*
6764 	 * Check the AFAR of the original error and secondary errors
6765 	 * match to the 64-byte boundary
6766 	 */
6767 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6768 		return (0);
6769 
6770 	/*
6771 	 * We've passed all the checks, so it's a secondary error!
6772 	 */
6773 	return (1);
6774 #endif	/* CHEETAH_PLUS */
6775 }
6776 
6777 /*
6778  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6779  * is checked for any valid errors.  If found, the error type is
6780  * returned. If not found, the flt_type is checked for L1$ parity errors.
6781  */
6782 /*ARGSUSED*/
6783 static uint8_t
6784 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6785 {
6786 #if defined(JALAPENO)
6787 	/*
6788 	 * Currently, logging errors to the SC is not supported on Jalapeno
6789 	 */
6790 	return (PLAT_ECC_ERROR2_NONE);
6791 #else
6792 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6793 
6794 	switch (ch_flt->flt_bit) {
6795 	case C_AFSR_CE:
6796 		return (PLAT_ECC_ERROR2_CE);
6797 	case C_AFSR_UCC:
6798 	case C_AFSR_EDC:
6799 	case C_AFSR_WDC:
6800 	case C_AFSR_CPC:
6801 		return (PLAT_ECC_ERROR2_L2_CE);
6802 	case C_AFSR_EMC:
6803 		return (PLAT_ECC_ERROR2_EMC);
6804 	case C_AFSR_IVC:
6805 		return (PLAT_ECC_ERROR2_IVC);
6806 	case C_AFSR_UE:
6807 		return (PLAT_ECC_ERROR2_UE);
6808 	case C_AFSR_UCU:
6809 	case C_AFSR_EDU:
6810 	case C_AFSR_WDU:
6811 	case C_AFSR_CPU:
6812 		return (PLAT_ECC_ERROR2_L2_UE);
6813 	case C_AFSR_IVU:
6814 		return (PLAT_ECC_ERROR2_IVU);
6815 	case C_AFSR_TO:
6816 		return (PLAT_ECC_ERROR2_TO);
6817 	case C_AFSR_BERR:
6818 		return (PLAT_ECC_ERROR2_BERR);
6819 #if defined(CHEETAH_PLUS)
6820 	case C_AFSR_L3_EDC:
6821 	case C_AFSR_L3_UCC:
6822 	case C_AFSR_L3_CPC:
6823 	case C_AFSR_L3_WDC:
6824 		return (PLAT_ECC_ERROR2_L3_CE);
6825 	case C_AFSR_IMC:
6826 		return (PLAT_ECC_ERROR2_IMC);
6827 	case C_AFSR_TSCE:
6828 		return (PLAT_ECC_ERROR2_L2_TSCE);
6829 	case C_AFSR_THCE:
6830 		return (PLAT_ECC_ERROR2_L2_THCE);
6831 	case C_AFSR_L3_MECC:
6832 		return (PLAT_ECC_ERROR2_L3_MECC);
6833 	case C_AFSR_L3_THCE:
6834 		return (PLAT_ECC_ERROR2_L3_THCE);
6835 	case C_AFSR_L3_CPU:
6836 	case C_AFSR_L3_EDU:
6837 	case C_AFSR_L3_UCU:
6838 	case C_AFSR_L3_WDU:
6839 		return (PLAT_ECC_ERROR2_L3_UE);
6840 	case C_AFSR_DUE:
6841 		return (PLAT_ECC_ERROR2_DUE);
6842 	case C_AFSR_DTO:
6843 		return (PLAT_ECC_ERROR2_DTO);
6844 	case C_AFSR_DBERR:
6845 		return (PLAT_ECC_ERROR2_DBERR);
6846 #endif	/* CHEETAH_PLUS */
6847 	default:
6848 		switch (ch_flt->flt_type) {
6849 #if defined(CPU_IMP_L1_CACHE_PARITY)
6850 		case CPU_IC_PARITY:
6851 			return (PLAT_ECC_ERROR2_IPE);
6852 		case CPU_DC_PARITY:
6853 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
6854 				if (ch_flt->parity_data.dpe.cpl_cache ==
6855 				    CPU_PC_PARITY) {
6856 					return (PLAT_ECC_ERROR2_PCACHE);
6857 				}
6858 			}
6859 			return (PLAT_ECC_ERROR2_DPE);
6860 #endif /* CPU_IMP_L1_CACHE_PARITY */
6861 		case CPU_ITLB_PARITY:
6862 			return (PLAT_ECC_ERROR2_ITLB);
6863 		case CPU_DTLB_PARITY:
6864 			return (PLAT_ECC_ERROR2_DTLB);
6865 		default:
6866 			return (PLAT_ECC_ERROR2_NONE);
6867 		}
6868 	}
6869 #endif	/* JALAPENO */
6870 }
6871