xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common.c (revision 6d9a41ff63273acce102bfb2d51423ed920e6dcd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/archsystm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/machthread.h>
38 #include <sys/cpu.h>
39 #include <sys/cmp.h>
40 #include <sys/elf_SPARC.h>
41 #include <vm/vm_dep.h>
42 #include <vm/hat_sfmmu.h>
43 #include <vm/seg_kpm.h>
44 #include <sys/cpuvar.h>
45 #include <sys/cheetahregs.h>
46 #include <sys/us3_module.h>
47 #include <sys/async.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/dditypes.h>
51 #include <sys/prom_debug.h>
52 #include <sys/prom_plat.h>
53 #include <sys/cpu_module.h>
54 #include <sys/sysmacros.h>
55 #include <sys/intreg.h>
56 #include <sys/clock.h>
57 #include <sys/platform_module.h>
58 #include <sys/machtrap.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/memlist.h>
62 #include <sys/bootconf.h>
63 #include <sys/ivintr.h>
64 #include <sys/atomic.h>
65 #include <sys/taskq.h>
66 #include <sys/note.h>
67 #include <sys/ndifm.h>
68 #include <sys/ddifm.h>
69 #include <sys/fm/protocol.h>
70 #include <sys/fm/util.h>
71 #include <sys/fm/cpu/UltraSPARC-III.h>
72 #include <sys/fpras_impl.h>
73 #include <sys/dtrace.h>
74 #include <sys/watchpoint.h>
75 #include <sys/plat_ecc_unum.h>
76 #include <sys/cyclic.h>
77 #include <sys/errorq.h>
78 #include <sys/errclassify.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int clear_ecc(struct async_flt *ecc);
120 #if defined(CPU_IMP_ECACHE_ASSOC)
121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
122 #endif
123 static int cpu_ecache_set_size(struct cpu *cp);
124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk_state(int cachesize,
128 				uint64_t subaddr, uint64_t tag);
129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
136 static void cpu_scrubphys(struct async_flt *aflt);
137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
138     int *, int *);
139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
140 static void cpu_ereport_init(struct async_flt *aflt);
141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144     uint64_t nceen, ch_cpu_logout_t *clop);
145 static int cpu_ce_delayed_ec_logout(uint64_t);
146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
147 
148 #ifdef	CHEETAHPLUS_ERRATUM_25
149 static int mondo_recover_proc(uint16_t, int);
150 static void cheetah_nudge_init(void);
151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
152     cyc_time_t *when);
153 static void cheetah_nudge_buddy(void);
154 #endif	/* CHEETAHPLUS_ERRATUM_25 */
155 
156 #if defined(CPU_IMP_L1_CACHE_PARITY)
157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
160     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
167 #endif	/* CPU_IMP_L1_CACHE_PARITY */
168 
169 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
170     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
171     int *segsp, int *banksp, int *mcidp);
172 
173 /*
174  * This table is used to determine which bit(s) is(are) bad when an ECC
175  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
176  * of this array have the following semantics:
177  *
178  *      00-127  The number of the bad bit, when only one bit is bad.
179  *      128     ECC bit C0 is bad.
180  *      129     ECC bit C1 is bad.
181  *      130     ECC bit C2 is bad.
182  *      131     ECC bit C3 is bad.
183  *      132     ECC bit C4 is bad.
184  *      133     ECC bit C5 is bad.
185  *      134     ECC bit C6 is bad.
186  *      135     ECC bit C7 is bad.
187  *      136     ECC bit C8 is bad.
188  *	137-143 reserved for Mtag Data and ECC.
189  *      144(M2) Two bits are bad within a nibble.
190  *      145(M3) Three bits are bad within a nibble.
191  *      146(M3) Four bits are bad within a nibble.
192  *      147(M)  Multiple bits (5 or more) are bad.
193  *      148     NO bits are bad.
194  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
195  */
196 
197 #define	C0	128
198 #define	C1	129
199 #define	C2	130
200 #define	C3	131
201 #define	C4	132
202 #define	C5	133
203 #define	C6	134
204 #define	C7	135
205 #define	C8	136
206 #define	MT0	137	/* Mtag Data bit 0 */
207 #define	MT1	138
208 #define	MT2	139
209 #define	MTC0	140	/* Mtag Check bit 0 */
210 #define	MTC1	141
211 #define	MTC2	142
212 #define	MTC3	143
213 #define	M2	144
214 #define	M3	145
215 #define	M4	146
216 #define	M	147
217 #define	NA	148
218 #if defined(JALAPENO) || defined(SERRANO)
219 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
220 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
221 #define	SLAST	S003MEM	/* last special syndrome */
222 #else /* JALAPENO || SERRANO */
223 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
224 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
225 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
226 #define	SLAST	S11C	/* last special syndrome */
227 #endif /* JALAPENO || SERRANO */
228 #if defined(JALAPENO) || defined(SERRANO)
229 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
230 #define	BPAR15	167
231 #endif	/* JALAPENO || SERRANO */
232 
233 static uint8_t ecc_syndrome_tab[] =
234 {
235 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
236 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
237 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
238 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
239 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
240 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
241 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
242 #if defined(JALAPENO) || defined(SERRANO)
243 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
244 #else	/* JALAPENO || SERRANO */
245 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246 #endif	/* JALAPENO || SERRANO */
247 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
248 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
249 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
250 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
251 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
252 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
253 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
254 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
255 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
256 #if defined(JALAPENO) || defined(SERRANO)
257 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
258 #else	/* JALAPENO || SERRANO */
259 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
260 #endif	/* JALAPENO || SERRANO */
261 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
262 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
263 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
264 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
265 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
266 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
267 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
268 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
269 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
270 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
271 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
272 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
273 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
274 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
275 };
276 
277 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
278 
279 #if !(defined(JALAPENO) || defined(SERRANO))
280 /*
281  * This table is used to determine which bit(s) is(are) bad when a Mtag
282  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
283  * of this array have the following semantics:
284  *
285  *      -1	Invalid mtag syndrome.
286  *      137     Mtag Data 0 is bad.
287  *      138     Mtag Data 1 is bad.
288  *      139     Mtag Data 2 is bad.
289  *      140     Mtag ECC 0 is bad.
290  *      141     Mtag ECC 1 is bad.
291  *      142     Mtag ECC 2 is bad.
292  *      143     Mtag ECC 3 is bad.
293  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
294  */
295 short mtag_syndrome_tab[] =
296 {
297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
298 };
299 
300 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
301 
302 #else /* !(JALAPENO || SERRANO) */
303 
304 #define	BSYND_TBL_SIZE	16
305 
306 #endif /* !(JALAPENO || SERRANO) */
307 
308 /*
309  * CE initial classification and subsequent action lookup table
310  */
311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
312 static int ce_disp_inited;
313 
314 /*
315  * Set to disable leaky and partner check for memory correctables
316  */
317 int ce_xdiag_off;
318 
319 /*
320  * The following are not incremented atomically so are indicative only
321  */
322 static int ce_xdiag_drops;
323 static int ce_xdiag_lkydrops;
324 static int ce_xdiag_ptnrdrops;
325 static int ce_xdiag_bad;
326 
327 /*
328  * CE leaky check callback structure
329  */
330 typedef struct {
331 	struct async_flt *lkycb_aflt;
332 	errorq_t *lkycb_eqp;
333 	errorq_elem_t *lkycb_eqep;
334 } ce_lkychk_cb_t;
335 
336 /*
337  * defines for various ecache_flush_flag's
338  */
339 #define	ECACHE_FLUSH_LINE	1
340 #define	ECACHE_FLUSH_ALL	2
341 
342 /*
343  * STICK sync
344  */
345 #define	STICK_ITERATION 10
346 #define	MAX_TSKEW	1
347 #define	EV_A_START	0
348 #define	EV_A_END	1
349 #define	EV_B_START	2
350 #define	EV_B_END	3
351 #define	EVENTS		4
352 
353 static int64_t stick_iter = STICK_ITERATION;
354 static int64_t stick_tsk = MAX_TSKEW;
355 
356 typedef enum {
357 	EVENT_NULL = 0,
358 	SLAVE_START,
359 	SLAVE_CONT,
360 	MASTER_START
361 } event_cmd_t;
362 
363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
364 static int64_t timestamp[EVENTS];
365 static volatile int slave_done;
366 
367 #ifdef DEBUG
368 #define	DSYNC_ATTEMPTS 64
369 typedef struct {
370 	int64_t	skew_val[DSYNC_ATTEMPTS];
371 } ss_t;
372 
373 ss_t stick_sync_stats[NCPU];
374 #endif /* DEBUG */
375 
376 /*
377  * Maximum number of contexts for Cheetah.
378  */
379 #define	MAX_NCTXS	(1 << 13)
380 
381 /* Will be set !NULL for Cheetah+ and derivatives. */
382 uchar_t *ctx_pgsz_array = NULL;
383 #if defined(CPU_IMP_DUAL_PAGESIZE)
384 static uchar_t ctx_pgsz_arr[MAX_NCTXS];
385 uint_t disable_dual_pgsz = 0;
386 #endif	/* CPU_IMP_DUAL_PAGESIZE */
387 
388 /*
389  * Save the cache bootup state for use when internal
390  * caches are to be re-enabled after an error occurs.
391  */
392 uint64_t cache_boot_state;
393 
394 /*
395  * PA[22:0] represent Displacement in Safari configuration space.
396  */
397 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
398 
399 bus_config_eclk_t bus_config_eclk[] = {
400 #if defined(JALAPENO) || defined(SERRANO)
401 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
402 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
403 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
404 #else /* JALAPENO || SERRANO */
405 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
406 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
407 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
408 #endif /* JALAPENO || SERRANO */
409 	{0, 0}
410 };
411 
412 /*
413  * Interval for deferred CEEN reenable
414  */
415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
416 
417 /*
418  * set in /etc/system to control logging of user BERR/TO's
419  */
420 int cpu_berr_to_verbose = 0;
421 
422 /*
423  * set to 0 in /etc/system to defer CEEN reenable for all CEs
424  */
425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
427 
428 /*
429  * Set of all offline cpus
430  */
431 cpuset_t cpu_offline_set;
432 
433 static void cpu_delayed_check_ce_errors(void *);
434 static void cpu_check_ce_errors(void *);
435 void cpu_error_ecache_flush(ch_async_flt_t *);
436 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
437 static void cpu_log_and_clear_ce(ch_async_flt_t *);
438 void cpu_ce_detected(ch_cpu_errors_t *, int);
439 
440 /*
441  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
442  * memory refresh interval of current DIMMs (64ms).  After initial fix that
443  * gives at least one full refresh cycle in which the cell can leak
444  * (whereafter further refreshes simply reinforce any incorrect bit value).
445  */
446 clock_t cpu_ce_lkychk_timeout_usec = 128000;
447 
448 /*
449  * CE partner check partner caching period in seconds
450  */
451 int cpu_ce_ptnr_cachetime_sec = 60;
452 
453 /*
454  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
455  */
456 #define	CH_SET_TRAP(ttentry, ttlabel)			\
457 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
458 		flush_instr_mem((caddr_t)&ttentry, 32);
459 
460 static int min_ecache_size;
461 static uint_t priv_hcl_1;
462 static uint_t priv_hcl_2;
463 static uint_t priv_hcl_4;
464 static uint_t priv_hcl_8;
465 
466 void
467 cpu_setup(void)
468 {
469 	extern int at_flags;
470 	extern int disable_delay_tlb_flush, delay_tlb_flush;
471 	extern int cpc_has_overflow_intr;
472 	extern int disable_text_largepages;
473 	extern int use_text_pgsz4m;
474 
475 	/*
476 	 * Setup chip-specific trap handlers.
477 	 */
478 	cpu_init_trap();
479 
480 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
481 
482 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
483 
484 	/*
485 	 * save the cache bootup state.
486 	 */
487 	cache_boot_state = get_dcu() & DCU_CACHE;
488 
489 	/*
490 	 * Use the maximum number of contexts available for Cheetah
491 	 * unless it has been tuned for debugging.
492 	 * We are checking against 0 here since this value can be patched
493 	 * while booting.  It can not be patched via /etc/system since it
494 	 * will be patched too late and thus cause the system to panic.
495 	 */
496 	if (nctxs == 0)
497 		nctxs = MAX_NCTXS;
498 
499 	/*
500 	 * Due to the number of entries in the fully-associative tlb
501 	 * this may have to be tuned lower than in spitfire.
502 	 */
503 	pp_slots = MIN(8, MAXPP_SLOTS);
504 
505 	/*
506 	 * Block stores do not invalidate all pages of the d$, pagecopy
507 	 * et. al. need virtual translations with virtual coloring taken
508 	 * into consideration.  prefetch/ldd will pollute the d$ on the
509 	 * load side.
510 	 */
511 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
512 
513 	if (use_page_coloring) {
514 		do_pg_coloring = 1;
515 		if (use_virtual_coloring)
516 			do_virtual_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 	/*
561 	 * Use cheetah flush-all support
562 	 */
563 	if (!disable_delay_tlb_flush)
564 		delay_tlb_flush = 1;
565 
566 #if defined(CPU_IMP_DUAL_PAGESIZE)
567 	/*
568 	 * Use Cheetah+ and later dual page size support.
569 	 */
570 	if (!disable_dual_pgsz) {
571 		ctx_pgsz_array = ctx_pgsz_arr;
572 	}
573 #endif	/* CPU_IMP_DUAL_PAGESIZE */
574 
575 	/*
576 	 * Declare that this architecture/cpu combination does fpRAS.
577 	 */
578 	fpras_implemented = 1;
579 
580 	/*
581 	 * Enable 4M pages to be used for mapping user text by default.  Don't
582 	 * use large pages for initialized data segments since we may not know
583 	 * at exec() time what should be the preferred large page size for DTLB
584 	 * programming.
585 	 */
586 	use_text_pgsz4m = 1;
587 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
588 	    (1 << TTE32M) | (1 << TTE256M);
589 
590 	/*
591 	 * Setup CE lookup table
592 	 */
593 	CE_INITDISPTBL_POPULATE(ce_disp_table);
594 	ce_disp_inited = 1;
595 }
596 
597 /*
598  * Called by setcpudelay
599  */
600 void
601 cpu_init_tick_freq(void)
602 {
603 	/*
604 	 * For UltraSPARC III and beyond we want to use the
605 	 * system clock rate as the basis for low level timing,
606 	 * due to support of mixed speed CPUs and power managment.
607 	 */
608 	if (system_clock_freq == 0)
609 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
610 
611 	sys_tick_freq = system_clock_freq;
612 }
613 
614 #ifdef CHEETAHPLUS_ERRATUM_25
615 /*
616  * Tunables
617  */
618 int cheetah_bpe_off = 0;
619 int cheetah_sendmondo_recover = 1;
620 int cheetah_sendmondo_fullscan = 0;
621 int cheetah_sendmondo_recover_delay = 5;
622 
623 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
624 
625 /*
626  * Recovery Statistics
627  */
628 typedef struct cheetah_livelock_entry	{
629 	int cpuid;		/* fallen cpu */
630 	int buddy;		/* cpu that ran recovery */
631 	clock_t lbolt;		/* when recovery started */
632 	hrtime_t recovery_time;	/* time spent in recovery */
633 } cheetah_livelock_entry_t;
634 
635 #define	CHEETAH_LIVELOCK_NENTRY	32
636 
637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
638 int cheetah_livelock_entry_nxt;
639 
640 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
641 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
642 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
643 		cheetah_livelock_entry_nxt = 0;				\
644 	}								\
645 }
646 
647 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
648 
649 struct {
650 	hrtime_t hrt;		/* maximum recovery time */
651 	int recovery;		/* recovered */
652 	int full_claimed;	/* maximum pages claimed in full recovery */
653 	int proc_entry;		/* attempted to claim TSB */
654 	int proc_tsb_scan;	/* tsb scanned */
655 	int proc_tsb_partscan;	/* tsb partially scanned */
656 	int proc_tsb_fullscan;	/* whole tsb scanned */
657 	int proc_claimed;	/* maximum pages claimed in tsb scan */
658 	int proc_user;		/* user thread */
659 	int proc_kernel;	/* kernel thread */
660 	int proc_onflt;		/* bad stack */
661 	int proc_cpu;		/* null cpu */
662 	int proc_thread;	/* null thread */
663 	int proc_proc;		/* null proc */
664 	int proc_as;		/* null as */
665 	int proc_hat;		/* null hat */
666 	int proc_hat_inval;	/* hat contents don't make sense */
667 	int proc_hat_busy;	/* hat is changing TSBs */
668 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
669 	int proc_cnum_bad;	/* cnum out of range */
670 	int proc_cnum;		/* last cnum processed */
671 	tte_t proc_tte;		/* last tte processed */
672 } cheetah_livelock_stat;
673 
674 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
675 
676 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
677 	cheetah_livelock_stat.item = value
678 
679 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
680 	if (value > cheetah_livelock_stat.item)		\
681 		cheetah_livelock_stat.item = value;	\
682 }
683 
684 /*
685  * Attempt to recover a cpu by claiming every cache line as saved
686  * in the TSB that the non-responsive cpu is using. Since we can't
687  * grab any adaptive lock, this is at best an attempt to do so. Because
688  * we don't grab any locks, we must operate under the protection of
689  * on_fault().
690  *
691  * Return 1 if cpuid could be recovered, 0 if failed.
692  */
693 int
694 mondo_recover_proc(uint16_t cpuid, int bn)
695 {
696 	label_t ljb;
697 	cpu_t *cp;
698 	kthread_t *t;
699 	proc_t *p;
700 	struct as *as;
701 	struct hat *hat;
702 	short  cnum;
703 	struct tsb_info *tsbinfop;
704 	struct tsbe *tsbep;
705 	caddr_t tsbp;
706 	caddr_t end_tsbp;
707 	uint64_t paddr;
708 	uint64_t idsr;
709 	u_longlong_t pahi, palo;
710 	int pages_claimed = 0;
711 	tte_t tsbe_tte;
712 	int tried_kernel_tsb = 0;
713 
714 	CHEETAH_LIVELOCK_STAT(proc_entry);
715 
716 	if (on_fault(&ljb)) {
717 		CHEETAH_LIVELOCK_STAT(proc_onflt);
718 		goto badstruct;
719 	}
720 
721 	if ((cp = cpu[cpuid]) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_cpu);
723 		goto badstruct;
724 	}
725 
726 	if ((t = cp->cpu_thread) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_thread);
728 		goto badstruct;
729 	}
730 
731 	if ((p = ttoproc(t)) == NULL) {
732 		CHEETAH_LIVELOCK_STAT(proc_proc);
733 		goto badstruct;
734 	}
735 
736 	if ((as = p->p_as) == NULL) {
737 		CHEETAH_LIVELOCK_STAT(proc_as);
738 		goto badstruct;
739 	}
740 
741 	if ((hat = as->a_hat) == NULL) {
742 		CHEETAH_LIVELOCK_STAT(proc_hat);
743 		goto badstruct;
744 	}
745 
746 	if (hat != ksfmmup) {
747 		CHEETAH_LIVELOCK_STAT(proc_user);
748 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
749 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
750 			goto badstruct;
751 		}
752 		tsbinfop = hat->sfmmu_tsb;
753 		if (tsbinfop == NULL) {
754 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 			goto badstruct;
756 		}
757 		tsbp = tsbinfop->tsb_va;
758 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
759 	} else {
760 		CHEETAH_LIVELOCK_STAT(proc_kernel);
761 		tsbinfop = NULL;
762 		tsbp = ktsb_base;
763 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
764 	}
765 
766 	/* Verify as */
767 	if (hat->sfmmu_as != as) {
768 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
769 		goto badstruct;
770 	}
771 
772 	cnum = hat->sfmmu_cnum;
773 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
774 
775 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
776 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
777 		goto badstruct;
778 	}
779 
780 	do {
781 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
782 
783 		/*
784 		 * Skip TSBs being relocated.  This is important because
785 		 * we want to avoid the following deadlock scenario:
786 		 *
787 		 * 1) when we came in we set ourselves to "in recover" state.
788 		 * 2) when we try to touch TSB being relocated the mapping
789 		 *    will be in the suspended state so we'll spin waiting
790 		 *    for it to be unlocked.
791 		 * 3) when the CPU that holds the TSB mapping locked tries to
792 		 *    unlock it it will send a xtrap which will fail to xcall
793 		 *    us or the CPU we're trying to recover, and will in turn
794 		 *    enter the mondo code.
795 		 * 4) since we are still spinning on the locked mapping
796 		 *    no further progress will be made and the system will
797 		 *    inevitably hard hang.
798 		 *
799 		 * A TSB not being relocated can't begin being relocated
800 		 * while we're accessing it because we check
801 		 * sendmondo_in_recover before relocating TSBs.
802 		 */
803 		if (hat != ksfmmup &&
804 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
805 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
806 			goto next_tsbinfo;
807 		}
808 
809 		for (tsbep = (struct tsbe *)tsbp;
810 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
811 			tsbe_tte = tsbep->tte_data;
812 
813 			if (tsbe_tte.tte_val == 0) {
814 				/*
815 				 * Invalid tte
816 				 */
817 				continue;
818 			}
819 			if (tsbe_tte.tte_se) {
820 				/*
821 				 * Don't want device registers
822 				 */
823 				continue;
824 			}
825 			if (tsbe_tte.tte_cp == 0) {
826 				/*
827 				 * Must be cached in E$
828 				 */
829 				continue;
830 			}
831 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
832 			idsr = getidsr();
833 			if ((idsr & (IDSR_NACK_BIT(bn) |
834 			    IDSR_BUSY_BIT(bn))) == 0) {
835 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
836 				goto done;
837 			}
838 			pahi = tsbe_tte.tte_pahi;
839 			palo = tsbe_tte.tte_palo;
840 			paddr = (uint64_t)((pahi << 32) |
841 			    (palo << MMU_PAGESHIFT));
842 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
843 			    CH_ECACHE_SUBBLK_SIZE);
844 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
845 				shipit(cpuid, bn);
846 			}
847 			pages_claimed++;
848 		}
849 next_tsbinfo:
850 		if (tsbinfop != NULL)
851 			tsbinfop = tsbinfop->tsb_next;
852 		if (tsbinfop != NULL) {
853 			tsbp = tsbinfop->tsb_va;
854 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
855 		} else if (tsbp == ktsb_base) {
856 			tried_kernel_tsb = 1;
857 		} else if (!tried_kernel_tsb) {
858 			tsbp = ktsb_base;
859 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
860 			hat = ksfmmup;
861 			tsbinfop = NULL;
862 		}
863 	} while (tsbinfop != NULL ||
864 			((tsbp == ktsb_base) && !tried_kernel_tsb));
865 
866 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
867 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
868 	no_fault();
869 	idsr = getidsr();
870 	if ((idsr & (IDSR_NACK_BIT(bn) |
871 	    IDSR_BUSY_BIT(bn))) == 0) {
872 		return (1);
873 	} else {
874 		return (0);
875 	}
876 
877 done:
878 	no_fault();
879 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
880 	return (1);
881 
882 badstruct:
883 	no_fault();
884 	return (0);
885 }
886 
887 /*
888  * Attempt to claim ownership, temporarily, of every cache line that a
889  * non-responsive cpu might be using.  This might kick that cpu out of
890  * this state.
891  *
892  * The return value indicates to the caller if we have exhausted all recovery
893  * techniques. If 1 is returned, it is useless to call this function again
894  * even for a different target CPU.
895  */
896 int
897 mondo_recover(uint16_t cpuid, int bn)
898 {
899 	struct memseg *seg;
900 	uint64_t begin_pa, end_pa, cur_pa;
901 	hrtime_t begin_hrt, end_hrt;
902 	int retval = 0;
903 	int pages_claimed = 0;
904 	cheetah_livelock_entry_t *histp;
905 	uint64_t idsr;
906 
907 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
908 		/*
909 		 * Wait while recovery takes place
910 		 */
911 		while (sendmondo_in_recover) {
912 			drv_usecwait(1);
913 		}
914 		/*
915 		 * Assume we didn't claim the whole memory. If
916 		 * the target of this caller is not recovered,
917 		 * it will come back.
918 		 */
919 		return (retval);
920 	}
921 
922 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
923 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
924 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
925 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
926 
927 	begin_hrt = gethrtime_waitfree();
928 	/*
929 	 * First try to claim the lines in the TSB the target
930 	 * may have been using.
931 	 */
932 	if (mondo_recover_proc(cpuid, bn) == 1) {
933 		/*
934 		 * Didn't claim the whole memory
935 		 */
936 		goto done;
937 	}
938 
939 	/*
940 	 * We tried using the TSB. The target is still
941 	 * not recovered. Check if complete memory scan is
942 	 * enabled.
943 	 */
944 	if (cheetah_sendmondo_fullscan == 0) {
945 		/*
946 		 * Full memory scan is disabled.
947 		 */
948 		retval = 1;
949 		goto done;
950 	}
951 
952 	/*
953 	 * Try claiming the whole memory.
954 	 */
955 	for (seg = memsegs; seg; seg = seg->next) {
956 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
957 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
958 		for (cur_pa = begin_pa; cur_pa < end_pa;
959 		    cur_pa += MMU_PAGESIZE) {
960 			idsr = getidsr();
961 			if ((idsr & (IDSR_NACK_BIT(bn) |
962 			    IDSR_BUSY_BIT(bn))) == 0) {
963 				/*
964 				 * Didn't claim all memory
965 				 */
966 				goto done;
967 			}
968 			claimlines(cur_pa, MMU_PAGESIZE,
969 			    CH_ECACHE_SUBBLK_SIZE);
970 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
971 				shipit(cpuid, bn);
972 			}
973 			pages_claimed++;
974 		}
975 	}
976 
977 	/*
978 	 * We did all we could.
979 	 */
980 	retval = 1;
981 
982 done:
983 	/*
984 	 * Update statistics
985 	 */
986 	end_hrt = gethrtime_waitfree();
987 	CHEETAH_LIVELOCK_STAT(recovery);
988 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
989 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
990 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
991 	    (end_hrt -  begin_hrt));
992 
993 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
994 
995 	return (retval);
996 }
997 
998 /*
999  * This is called by the cyclic framework when this CPU becomes online
1000  */
1001 /*ARGSUSED*/
1002 static void
1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1004 {
1005 
1006 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1007 	hdlr->cyh_level = CY_LOW_LEVEL;
1008 	hdlr->cyh_arg = NULL;
1009 
1010 	/*
1011 	 * Stagger the start time
1012 	 */
1013 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1014 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1015 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1016 	}
1017 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1018 }
1019 
1020 /*
1021  * Create a low level cyclic to send a xtrap to the next cpu online.
1022  * However, there's no need to have this running on a uniprocessor system.
1023  */
1024 static void
1025 cheetah_nudge_init(void)
1026 {
1027 	cyc_omni_handler_t hdlr;
1028 
1029 	if (max_ncpus == 1) {
1030 		return;
1031 	}
1032 
1033 	hdlr.cyo_online = cheetah_nudge_onln;
1034 	hdlr.cyo_offline = NULL;
1035 	hdlr.cyo_arg = NULL;
1036 
1037 	mutex_enter(&cpu_lock);
1038 	(void) cyclic_add_omni(&hdlr);
1039 	mutex_exit(&cpu_lock);
1040 }
1041 
1042 /*
1043  * Cyclic handler to wake up buddy
1044  */
1045 void
1046 cheetah_nudge_buddy(void)
1047 {
1048 	/*
1049 	 * Disable kernel preemption to protect the cpu list
1050 	 */
1051 	kpreempt_disable();
1052 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1053 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1054 		    0, 0);
1055 	}
1056 	kpreempt_enable();
1057 }
1058 
1059 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1060 
1061 #ifdef SEND_MONDO_STATS
1062 uint32_t x_one_stimes[64];
1063 uint32_t x_one_ltimes[16];
1064 uint32_t x_set_stimes[64];
1065 uint32_t x_set_ltimes[16];
1066 uint32_t x_set_cpus[NCPU];
1067 uint32_t x_nack_stimes[64];
1068 #endif
1069 
1070 /*
1071  * Note: A version of this function is used by the debugger via the KDI,
1072  * and must be kept in sync with this version.  Any changes made to this
1073  * function to support new chips or to accomodate errata must also be included
1074  * in the KDI-specific version.  See us3_kdi.c.
1075  */
1076 void
1077 send_one_mondo(int cpuid)
1078 {
1079 	int busy, nack;
1080 	uint64_t idsr, starttick, endtick, tick, lasttick;
1081 	uint64_t busymask;
1082 #ifdef	CHEETAHPLUS_ERRATUM_25
1083 	int recovered = 0;
1084 #endif
1085 
1086 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1087 	starttick = lasttick = gettick();
1088 	shipit(cpuid, 0);
1089 	endtick = starttick + xc_tick_limit;
1090 	busy = nack = 0;
1091 #if defined(JALAPENO) || defined(SERRANO)
1092 	/*
1093 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1094 	 * will be used for dispatching interrupt. For now, assume
1095 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1096 	 * issues with respect to BUSY/NACK pair usage.
1097 	 */
1098 	busymask  = IDSR_BUSY_BIT(cpuid);
1099 #else /* JALAPENO || SERRANO */
1100 	busymask = IDSR_BUSY;
1101 #endif /* JALAPENO || SERRANO */
1102 	for (;;) {
1103 		idsr = getidsr();
1104 		if (idsr == 0)
1105 			break;
1106 
1107 		tick = gettick();
1108 		/*
1109 		 * If there is a big jump between the current tick
1110 		 * count and lasttick, we have probably hit a break
1111 		 * point.  Adjust endtick accordingly to avoid panic.
1112 		 */
1113 		if (tick > (lasttick + xc_tick_jump_limit))
1114 			endtick += (tick - lasttick);
1115 		lasttick = tick;
1116 		if (tick > endtick) {
1117 			if (panic_quiesce)
1118 				return;
1119 #ifdef	CHEETAHPLUS_ERRATUM_25
1120 			if (cheetah_sendmondo_recover && recovered == 0) {
1121 				if (mondo_recover(cpuid, 0)) {
1122 					/*
1123 					 * We claimed the whole memory or
1124 					 * full scan is disabled.
1125 					 */
1126 					recovered++;
1127 				}
1128 				tick = gettick();
1129 				endtick = tick + xc_tick_limit;
1130 				lasttick = tick;
1131 				/*
1132 				 * Recheck idsr
1133 				 */
1134 				continue;
1135 			} else
1136 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1137 			{
1138 				cmn_err(CE_PANIC, "send mondo timeout "
1139 				    "(target 0x%x) [%d NACK %d BUSY]",
1140 				    cpuid, nack, busy);
1141 			}
1142 		}
1143 
1144 		if (idsr & busymask) {
1145 			busy++;
1146 			continue;
1147 		}
1148 		drv_usecwait(1);
1149 		shipit(cpuid, 0);
1150 		nack++;
1151 		busy = 0;
1152 	}
1153 #ifdef SEND_MONDO_STATS
1154 	{
1155 		int n = gettick() - starttick;
1156 		if (n < 8192)
1157 			x_one_stimes[n >> 7]++;
1158 		else
1159 			x_one_ltimes[(n >> 13) & 0xf]++;
1160 	}
1161 #endif
1162 }
1163 
1164 void
1165 syncfpu(void)
1166 {
1167 }
1168 
1169 /*
1170  * Return processor specific async error structure
1171  * size used.
1172  */
1173 int
1174 cpu_aflt_size(void)
1175 {
1176 	return (sizeof (ch_async_flt_t));
1177 }
1178 
1179 /*
1180  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1181  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1182  * flush the error that caused the UCU/UCC, then again here at the end to
1183  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1184  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1185  * another Fast ECC trap.
1186  *
1187  * Cheetah+ also handles: TSCE: No additional processing required.
1188  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1189  *
1190  * Note that the p_clo_flags input is only valid in cases where the
1191  * cpu_private struct is not yet initialized (since that is the only
1192  * time that information cannot be obtained from the logout struct.)
1193  */
1194 /*ARGSUSED*/
1195 void
1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1197 {
1198 	ch_cpu_logout_t *clop;
1199 	uint64_t ceen, nceen;
1200 
1201 	/*
1202 	 * Get the CPU log out info. If we can't find our CPU private
1203 	 * pointer, then we will have to make due without any detailed
1204 	 * logout information.
1205 	 */
1206 	if (CPU_PRIVATE(CPU) == NULL) {
1207 		clop = NULL;
1208 		ceen = p_clo_flags & EN_REG_CEEN;
1209 		nceen = p_clo_flags & EN_REG_NCEEN;
1210 	} else {
1211 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1212 		ceen = clop->clo_flags & EN_REG_CEEN;
1213 		nceen = clop->clo_flags & EN_REG_NCEEN;
1214 	}
1215 
1216 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1217 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1218 }
1219 
1220 /*
1221  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1222  * ECC at TL>0.  Need to supply either a error register pointer or a
1223  * cpu logout structure pointer.
1224  */
1225 static void
1226 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1227     uint64_t nceen, ch_cpu_logout_t *clop)
1228 {
1229 	struct async_flt *aflt;
1230 	ch_async_flt_t ch_flt;
1231 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1232 	char pr_reason[MAX_REASON_STRING];
1233 	ch_cpu_errors_t cpu_error_regs;
1234 
1235 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1236 	/*
1237 	 * If no cpu logout data, then we will have to make due without
1238 	 * any detailed logout information.
1239 	 */
1240 	if (clop == NULL) {
1241 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1242 		get_cpu_error_state(&cpu_error_regs);
1243 		set_cpu_error_state(&cpu_error_regs);
1244 		t_afar = cpu_error_regs.afar;
1245 		t_afsr = cpu_error_regs.afsr;
1246 		t_afsr_ext = cpu_error_regs.afsr_ext;
1247 #if defined(SERRANO)
1248 		ch_flt.afar2 = cpu_error_regs.afar2;
1249 #endif	/* SERRANO */
1250 	} else {
1251 		t_afar = clop->clo_data.chd_afar;
1252 		t_afsr = clop->clo_data.chd_afsr;
1253 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1254 #if defined(SERRANO)
1255 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1256 #endif	/* SERRANO */
1257 	}
1258 
1259 	/*
1260 	 * In order to simplify code, we maintain this afsr_errs
1261 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1262 	 * sticky bits.
1263 	 */
1264 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1265 	    (t_afsr & C_AFSR_ALL_ERRS);
1266 	pr_reason[0] = '\0';
1267 
1268 	/* Setup the async fault structure */
1269 	aflt = (struct async_flt *)&ch_flt;
1270 	aflt->flt_id = gethrtime_waitfree();
1271 	ch_flt.afsr_ext = t_afsr_ext;
1272 	ch_flt.afsr_errs = t_afsr_errs;
1273 	aflt->flt_stat = t_afsr;
1274 	aflt->flt_addr = t_afar;
1275 	aflt->flt_bus_id = getprocessorid();
1276 	aflt->flt_inst = CPU->cpu_id;
1277 	aflt->flt_pc = tpc;
1278 	aflt->flt_prot = AFLT_PROT_NONE;
1279 	aflt->flt_class = CPU_FAULT;
1280 	aflt->flt_priv = priv;
1281 	aflt->flt_tl = tl;
1282 	aflt->flt_status = ECC_F_TRAP;
1283 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1284 
1285 	/*
1286 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1287 	 * cmn_err messages out to the console.  The situation is a UCU (in
1288 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1289 	 * The messages for the UCU and WDU are enqueued and then pulled off
1290 	 * the async queue via softint and syslogd starts to process them
1291 	 * but doesn't get them to the console.  The UE causes a panic, but
1292 	 * since the UCU/WDU messages are already in transit, those aren't
1293 	 * on the async queue.  The hack is to check if we have a matching
1294 	 * WDU event for the UCU, and if it matches, we're more than likely
1295 	 * going to panic with a UE, unless we're under protection.  So, we
1296 	 * check to see if we got a matching WDU event and if we're under
1297 	 * protection.
1298 	 *
1299 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1300 	 * looks like this:
1301 	 *    UCU->WDU->UE
1302 	 * For Panther, it could look like either of these:
1303 	 *    UCU---->WDU->L3_WDU->UE
1304 	 *    L3_UCU->WDU->L3_WDU->UE
1305 	 */
1306 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1307 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1308 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1309 		get_cpu_error_state(&cpu_error_regs);
1310 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1311 		    (cpu_error_regs.afar == t_afar));
1312 		aflt->flt_panic |= ((clop == NULL) &&
1313 		    (t_afsr_errs & C_AFSR_WDU));
1314 	}
1315 
1316 	/*
1317 	 * Queue events on the async event queue, one event per error bit.
1318 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1319 	 * queue an event to complain.
1320 	 */
1321 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1322 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1323 		ch_flt.flt_type = CPU_INV_AFSR;
1324 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1325 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1326 		    aflt->flt_panic);
1327 	}
1328 
1329 	/*
1330 	 * Zero out + invalidate CPU logout.
1331 	 */
1332 	if (clop) {
1333 		bzero(clop, sizeof (ch_cpu_logout_t));
1334 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1335 	}
1336 
1337 	/*
1338 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1339 	 * or disrupting errors have happened.  We do this because if a
1340 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1341 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1342 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1343 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1344 	 * deferred or disrupting error happening between checking the AFSR and
1345 	 * enabling NCEEN/CEEN.
1346 	 *
1347 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1348 	 * taken.
1349 	 */
1350 	set_error_enable(get_error_enable() | (nceen | ceen));
1351 	if (clear_errors(&ch_flt)) {
1352 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1353 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1354 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1355 		    NULL);
1356 	}
1357 
1358 	/*
1359 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1360 	 * be logged as part of the panic flow.
1361 	 */
1362 	if (aflt->flt_panic)
1363 		fm_panic("%sError(s)", pr_reason);
1364 
1365 	/*
1366 	 * Flushing the Ecache here gets the part of the trap handler that
1367 	 * is run at TL=1 out of the Ecache.
1368 	 */
1369 	cpu_flush_ecache();
1370 }
1371 
1372 /*
1373  * This is called via sys_trap from pil15_interrupt code if the
1374  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1375  * various ch_err_tl1_data structures for valid entries based on the bit
1376  * settings in the ch_err_tl1_flags entry of the structure.
1377  */
1378 /*ARGSUSED*/
1379 void
1380 cpu_tl1_error(struct regs *rp, int panic)
1381 {
1382 	ch_err_tl1_data_t *cl1p, cl1;
1383 	int i, ncl1ps;
1384 	uint64_t me_flags;
1385 	uint64_t ceen, nceen;
1386 
1387 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1388 		cl1p = &ch_err_tl1_data;
1389 		ncl1ps = 1;
1390 	} else if (CPU_PRIVATE(CPU) != NULL) {
1391 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1392 		ncl1ps = CH_ERR_TL1_TLMAX;
1393 	} else {
1394 		ncl1ps = 0;
1395 	}
1396 
1397 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1398 		if (cl1p->ch_err_tl1_flags == 0)
1399 			continue;
1400 
1401 		/*
1402 		 * Grab a copy of the logout data and invalidate
1403 		 * the logout area.
1404 		 */
1405 		cl1 = *cl1p;
1406 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1407 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1408 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1409 
1410 		/*
1411 		 * Log "first error" in ch_err_tl1_data.
1412 		 */
1413 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1414 			ceen = get_error_enable() & EN_REG_CEEN;
1415 			nceen = get_error_enable() & EN_REG_NCEEN;
1416 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1417 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1418 		}
1419 #if defined(CPU_IMP_L1_CACHE_PARITY)
1420 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1421 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1422 			    (caddr_t)cl1.ch_err_tl1_tpc);
1423 		}
1424 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1425 
1426 		/*
1427 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1428 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1429 		 * if the structure is busy, we just do the cache flushing
1430 		 * we have to do and then do the retry.  So the AFSR/AFAR
1431 		 * at this point *should* have some relevant info.  If there
1432 		 * are no valid errors in the AFSR, we'll assume they've
1433 		 * already been picked up and logged.  For I$/D$ parity,
1434 		 * we just log an event with an "Unknown" (NULL) TPC.
1435 		 */
1436 		if (me_flags & CH_ERR_FECC) {
1437 			ch_cpu_errors_t cpu_error_regs;
1438 			uint64_t t_afsr_errs;
1439 
1440 			/*
1441 			 * Get the error registers and see if there's
1442 			 * a pending error.  If not, don't bother
1443 			 * generating an "Invalid AFSR" error event.
1444 			 */
1445 			get_cpu_error_state(&cpu_error_regs);
1446 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1447 			    C_AFSR_EXT_ALL_ERRS) |
1448 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1449 			if (t_afsr_errs != 0) {
1450 				ceen = get_error_enable() & EN_REG_CEEN;
1451 				nceen = get_error_enable() & EN_REG_NCEEN;
1452 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1453 				    1, ceen, nceen, NULL);
1454 			}
1455 		}
1456 #if defined(CPU_IMP_L1_CACHE_PARITY)
1457 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1458 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1459 		}
1460 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1461 	}
1462 }
1463 
1464 /*
1465  * Called from Fast ECC TL>0 handler in case of fatal error.
1466  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1467  * but if we don't, we'll panic with something reasonable.
1468  */
1469 /*ARGSUSED*/
1470 void
1471 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1472 {
1473 	cpu_tl1_error(rp, 1);
1474 	/*
1475 	 * Should never return, but just in case.
1476 	 */
1477 	fm_panic("Unsurvivable ECC Error at TL>0");
1478 }
1479 
1480 /*
1481  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1482  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1483  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1484  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1485  *
1486  * Cheetah+ also handles (No additional processing required):
1487  *    DUE, DTO, DBERR	(NCEEN controlled)
1488  *    THCE		(CEEN and ET_ECC_en controlled)
1489  *    TUE		(ET_ECC_en controlled)
1490  *
1491  * Panther further adds:
1492  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1493  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1494  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1495  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1496  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1497  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1498  *
1499  * Note that the p_clo_flags input is only valid in cases where the
1500  * cpu_private struct is not yet initialized (since that is the only
1501  * time that information cannot be obtained from the logout struct.)
1502  */
1503 /*ARGSUSED*/
1504 void
1505 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1506 {
1507 	struct async_flt *aflt;
1508 	ch_async_flt_t ch_flt;
1509 	char pr_reason[MAX_REASON_STRING];
1510 	ch_cpu_logout_t *clop;
1511 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1512 	ch_cpu_errors_t cpu_error_regs;
1513 
1514 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1515 	/*
1516 	 * Get the CPU log out info. If we can't find our CPU private
1517 	 * pointer, then we will have to make due without any detailed
1518 	 * logout information.
1519 	 */
1520 	if (CPU_PRIVATE(CPU) == NULL) {
1521 		clop = NULL;
1522 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1523 		get_cpu_error_state(&cpu_error_regs);
1524 		set_cpu_error_state(&cpu_error_regs);
1525 		t_afar = cpu_error_regs.afar;
1526 		t_afsr = cpu_error_regs.afsr;
1527 		t_afsr_ext = cpu_error_regs.afsr_ext;
1528 #if defined(SERRANO)
1529 		ch_flt.afar2 = cpu_error_regs.afar2;
1530 #endif	/* SERRANO */
1531 	} else {
1532 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1533 		t_afar = clop->clo_data.chd_afar;
1534 		t_afsr = clop->clo_data.chd_afsr;
1535 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1536 #if defined(SERRANO)
1537 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1538 #endif	/* SERRANO */
1539 	}
1540 
1541 	/*
1542 	 * In order to simplify code, we maintain this afsr_errs
1543 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1544 	 * sticky bits.
1545 	 */
1546 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1547 	    (t_afsr & C_AFSR_ALL_ERRS);
1548 
1549 	pr_reason[0] = '\0';
1550 	/* Setup the async fault structure */
1551 	aflt = (struct async_flt *)&ch_flt;
1552 	ch_flt.afsr_ext = t_afsr_ext;
1553 	ch_flt.afsr_errs = t_afsr_errs;
1554 	aflt->flt_stat = t_afsr;
1555 	aflt->flt_addr = t_afar;
1556 	aflt->flt_pc = (caddr_t)rp->r_pc;
1557 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1558 	aflt->flt_tl = 0;
1559 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1560 
1561 	/*
1562 	 * If this trap is a result of one of the errors not masked
1563 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1564 	 * indicate that a timeout is to be set later.
1565 	 */
1566 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1567 	    !aflt->flt_panic)
1568 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1569 	else
1570 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1571 
1572 	/*
1573 	 * log the CE and clean up
1574 	 */
1575 	cpu_log_and_clear_ce(&ch_flt);
1576 
1577 	/*
1578 	 * We re-enable CEEN (if required) and check if any disrupting errors
1579 	 * have happened.  We do this because if a disrupting error had occurred
1580 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1581 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1582 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1583 	 * of a error happening between checking the AFSR and enabling CEEN.
1584 	 */
1585 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1586 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1587 	if (clear_errors(&ch_flt)) {
1588 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1589 		    NULL);
1590 	}
1591 
1592 	/*
1593 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1594 	 * be logged as part of the panic flow.
1595 	 */
1596 	if (aflt->flt_panic)
1597 		fm_panic("%sError(s)", pr_reason);
1598 }
1599 
1600 /*
1601  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1602  * L3_EDU:BLD, TO, and BERR events.
1603  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1604  *
1605  * Cheetah+: No additional errors handled.
1606  *
1607  * Note that the p_clo_flags input is only valid in cases where the
1608  * cpu_private struct is not yet initialized (since that is the only
1609  * time that information cannot be obtained from the logout struct.)
1610  */
1611 /*ARGSUSED*/
1612 void
1613 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1614 {
1615 	ushort_t ttype, tl;
1616 	ch_async_flt_t ch_flt;
1617 	struct async_flt *aflt;
1618 	int trampolined = 0;
1619 	char pr_reason[MAX_REASON_STRING];
1620 	ch_cpu_logout_t *clop;
1621 	uint64_t ceen, clo_flags;
1622 	uint64_t log_afsr;
1623 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1624 	ch_cpu_errors_t cpu_error_regs;
1625 	int expected = DDI_FM_ERR_UNEXPECTED;
1626 	ddi_acc_hdl_t *hp;
1627 
1628 	/*
1629 	 * We need to look at p_flag to determine if the thread detected an
1630 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1631 	 * because we just need a consistent snapshot and we know that everyone
1632 	 * else will store a consistent set of bits while holding p_lock.  We
1633 	 * don't have to worry about a race because SDOCORE is set once prior
1634 	 * to doing i/o from the process's address space and is never cleared.
1635 	 */
1636 	uint_t pflag = ttoproc(curthread)->p_flag;
1637 
1638 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1639 	/*
1640 	 * Get the CPU log out info. If we can't find our CPU private
1641 	 * pointer then we will have to make due without any detailed
1642 	 * logout information.
1643 	 */
1644 	if (CPU_PRIVATE(CPU) == NULL) {
1645 		clop = NULL;
1646 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1647 		get_cpu_error_state(&cpu_error_regs);
1648 		set_cpu_error_state(&cpu_error_regs);
1649 		t_afar = cpu_error_regs.afar;
1650 		t_afsr = cpu_error_regs.afsr;
1651 		t_afsr_ext = cpu_error_regs.afsr_ext;
1652 #if defined(SERRANO)
1653 		ch_flt.afar2 = cpu_error_regs.afar2;
1654 #endif	/* SERRANO */
1655 		clo_flags = p_clo_flags;
1656 	} else {
1657 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1658 		t_afar = clop->clo_data.chd_afar;
1659 		t_afsr = clop->clo_data.chd_afsr;
1660 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1661 #if defined(SERRANO)
1662 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1663 #endif	/* SERRANO */
1664 		clo_flags = clop->clo_flags;
1665 	}
1666 
1667 	/*
1668 	 * In order to simplify code, we maintain this afsr_errs
1669 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1670 	 * sticky bits.
1671 	 */
1672 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1673 	    (t_afsr & C_AFSR_ALL_ERRS);
1674 	pr_reason[0] = '\0';
1675 
1676 	/*
1677 	 * Grab information encoded into our clo_flags field.
1678 	 */
1679 	ceen = clo_flags & EN_REG_CEEN;
1680 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1681 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1682 
1683 	/*
1684 	 * handle the specific error
1685 	 */
1686 	aflt = (struct async_flt *)&ch_flt;
1687 	aflt->flt_id = gethrtime_waitfree();
1688 	aflt->flt_bus_id = getprocessorid();
1689 	aflt->flt_inst = CPU->cpu_id;
1690 	ch_flt.afsr_ext = t_afsr_ext;
1691 	ch_flt.afsr_errs = t_afsr_errs;
1692 	aflt->flt_stat = t_afsr;
1693 	aflt->flt_addr = t_afar;
1694 	aflt->flt_pc = (caddr_t)rp->r_pc;
1695 	aflt->flt_prot = AFLT_PROT_NONE;
1696 	aflt->flt_class = CPU_FAULT;
1697 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1698 	aflt->flt_tl = (uchar_t)tl;
1699 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1700 	    C_AFSR_PANIC(t_afsr_errs));
1701 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1702 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1703 
1704 	/*
1705 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1706 	 * see if we were executing in the kernel under on_trap() or t_lofault
1707 	 * protection.  If so, modify the saved registers so that we return
1708 	 * from the trap to the appropriate trampoline routine.
1709 	 */
1710 	if (aflt->flt_priv && tl == 0) {
1711 		if (curthread->t_ontrap != NULL) {
1712 			on_trap_data_t *otp = curthread->t_ontrap;
1713 
1714 			if (otp->ot_prot & OT_DATA_EC) {
1715 				aflt->flt_prot = AFLT_PROT_EC;
1716 				otp->ot_trap |= OT_DATA_EC;
1717 				rp->r_pc = otp->ot_trampoline;
1718 				rp->r_npc = rp->r_pc + 4;
1719 				trampolined = 1;
1720 			}
1721 
1722 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1723 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1724 				aflt->flt_prot = AFLT_PROT_ACCESS;
1725 				otp->ot_trap |= OT_DATA_ACCESS;
1726 				rp->r_pc = otp->ot_trampoline;
1727 				rp->r_npc = rp->r_pc + 4;
1728 				trampolined = 1;
1729 				/*
1730 				 * for peeks and caut_gets errors are expected
1731 				 */
1732 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1733 				if (!hp)
1734 					expected = DDI_FM_ERR_PEEK;
1735 				else if (hp->ah_acc.devacc_attr_access ==
1736 				    DDI_CAUTIOUS_ACC)
1737 					expected = DDI_FM_ERR_EXPECTED;
1738 			}
1739 
1740 		} else if (curthread->t_lofault) {
1741 			aflt->flt_prot = AFLT_PROT_COPY;
1742 			rp->r_g1 = EFAULT;
1743 			rp->r_pc = curthread->t_lofault;
1744 			rp->r_npc = rp->r_pc + 4;
1745 			trampolined = 1;
1746 		}
1747 	}
1748 
1749 	/*
1750 	 * If we're in user mode or we're doing a protected copy, we either
1751 	 * want the ASTON code below to send a signal to the user process
1752 	 * or we want to panic if aft_panic is set.
1753 	 *
1754 	 * If we're in privileged mode and we're not doing a copy, then we
1755 	 * need to check if we've trampolined.  If we haven't trampolined,
1756 	 * we should panic.
1757 	 */
1758 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1759 		if (t_afsr_errs &
1760 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1761 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1762 			aflt->flt_panic |= aft_panic;
1763 	} else if (!trampolined) {
1764 			aflt->flt_panic = 1;
1765 	}
1766 
1767 	/*
1768 	 * If we've trampolined due to a privileged TO or BERR, or if an
1769 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1770 	 * event for that TO or BERR.  Queue all other events (if any) besides
1771 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1772 	 * ignore the number of events queued.  If we haven't trampolined due
1773 	 * to a TO or BERR, just enqueue events normally.
1774 	 */
1775 	log_afsr = t_afsr_errs;
1776 	if (trampolined) {
1777 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1778 	} else if (!aflt->flt_priv) {
1779 		/*
1780 		 * User mode, suppress messages if
1781 		 * cpu_berr_to_verbose is not set.
1782 		 */
1783 		if (!cpu_berr_to_verbose)
1784 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1785 	}
1786 
1787 	/*
1788 	 * Log any errors that occurred
1789 	 */
1790 	if (((log_afsr &
1791 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1792 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1793 		(t_afsr_errs &
1794 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1795 		ch_flt.flt_type = CPU_INV_AFSR;
1796 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1797 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1798 		    aflt->flt_panic);
1799 	}
1800 
1801 	/*
1802 	 * Zero out + invalidate CPU logout.
1803 	 */
1804 	if (clop) {
1805 		bzero(clop, sizeof (ch_cpu_logout_t));
1806 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1807 	}
1808 
1809 #if defined(JALAPENO) || defined(SERRANO)
1810 	/*
1811 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1812 	 * IO errors that may have resulted in this trap.
1813 	 */
1814 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1815 		cpu_run_bus_error_handlers(aflt, expected);
1816 	}
1817 
1818 	/*
1819 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1820 	 * line from the Ecache.  We also need to query the bus nexus for
1821 	 * fatal errors.  Attempts to do diagnostic read on caches may
1822 	 * introduce more errors (especially when the module is bad).
1823 	 */
1824 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1825 		/*
1826 		 * Ask our bus nexus friends if they have any fatal errors.  If
1827 		 * so, they will log appropriate error messages.
1828 		 */
1829 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1830 			aflt->flt_panic = 1;
1831 
1832 		/*
1833 		 * We got a UE or RUE and are panicking, save the fault PA in
1834 		 * a known location so that the platform specific panic code
1835 		 * can check for copyback errors.
1836 		 */
1837 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1838 			panic_aflt = *aflt;
1839 		}
1840 	}
1841 
1842 	/*
1843 	 * Flush Ecache line or entire Ecache
1844 	 */
1845 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1846 		cpu_error_ecache_flush(&ch_flt);
1847 #else /* JALAPENO || SERRANO */
1848 	/*
1849 	 * UE/BERR/TO: Call our bus nexus friends to check for
1850 	 * IO errors that may have resulted in this trap.
1851 	 */
1852 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1853 		cpu_run_bus_error_handlers(aflt, expected);
1854 	}
1855 
1856 	/*
1857 	 * UE: If the UE is in memory, we need to flush the bad
1858 	 * line from the Ecache.  We also need to query the bus nexus for
1859 	 * fatal errors.  Attempts to do diagnostic read on caches may
1860 	 * introduce more errors (especially when the module is bad).
1861 	 */
1862 	if (t_afsr & C_AFSR_UE) {
1863 		/*
1864 		 * Ask our legacy bus nexus friends if they have any fatal
1865 		 * errors.  If so, they will log appropriate error messages.
1866 		 */
1867 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1868 			aflt->flt_panic = 1;
1869 
1870 		/*
1871 		 * We got a UE and are panicking, save the fault PA in a known
1872 		 * location so that the platform specific panic code can check
1873 		 * for copyback errors.
1874 		 */
1875 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1876 			panic_aflt = *aflt;
1877 		}
1878 	}
1879 
1880 	/*
1881 	 * Flush Ecache line or entire Ecache
1882 	 */
1883 	if (t_afsr_errs &
1884 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1885 		cpu_error_ecache_flush(&ch_flt);
1886 #endif /* JALAPENO || SERRANO */
1887 
1888 	/*
1889 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1890 	 * or disrupting errors have happened.  We do this because if a
1891 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1892 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1893 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1894 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1895 	 * deferred or disrupting error happening between checking the AFSR and
1896 	 * enabling NCEEN/CEEN.
1897 	 *
1898 	 * Note: CEEN reenabled only if it was on when trap taken.
1899 	 */
1900 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1901 	if (clear_errors(&ch_flt)) {
1902 		/*
1903 		 * Check for secondary errors, and avoid panicking if we
1904 		 * have them
1905 		 */
1906 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
1907 		    t_afar) == 0) {
1908 			aflt->flt_panic |= ((ch_flt.afsr_errs &
1909 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
1910 		}
1911 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1912 		    NULL);
1913 	}
1914 
1915 	/*
1916 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1917 	 * be logged as part of the panic flow.
1918 	 */
1919 	if (aflt->flt_panic)
1920 		fm_panic("%sError(s)", pr_reason);
1921 
1922 	/*
1923 	 * If we queued an error and we are going to return from the trap and
1924 	 * the error was in user mode or inside of a copy routine, set AST flag
1925 	 * so the queue will be drained before returning to user mode.  The
1926 	 * AST processing will also act on our failure policy.
1927 	 */
1928 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1929 		int pcb_flag = 0;
1930 
1931 		if (t_afsr_errs &
1932 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
1933 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1934 			pcb_flag |= ASYNC_HWERR;
1935 
1936 		if (t_afsr & C_AFSR_BERR)
1937 			pcb_flag |= ASYNC_BERR;
1938 
1939 		if (t_afsr & C_AFSR_TO)
1940 			pcb_flag |= ASYNC_BTO;
1941 
1942 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1943 		aston(curthread);
1944 	}
1945 }
1946 
1947 #if defined(CPU_IMP_L1_CACHE_PARITY)
1948 /*
1949  * Handling of data and instruction parity errors (traps 0x71, 0x72).
1950  *
1951  * For Panther, P$ data parity errors during floating point load hits
1952  * are also detected (reported as TT 0x71) and handled by this trap
1953  * handler.
1954  *
1955  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
1956  * is available.
1957  */
1958 /*ARGSUSED*/
1959 void
1960 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
1961 {
1962 	ch_async_flt_t ch_flt;
1963 	struct async_flt *aflt;
1964 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
1965 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
1966 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
1967 	char *error_class;
1968 
1969 	/*
1970 	 * Log the error.
1971 	 * For icache parity errors the fault address is the trap PC.
1972 	 * For dcache/pcache parity errors the instruction would have to
1973 	 * be decoded to determine the address and that isn't possible
1974 	 * at high PIL.
1975 	 */
1976 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1977 	aflt = (struct async_flt *)&ch_flt;
1978 	aflt->flt_id = gethrtime_waitfree();
1979 	aflt->flt_bus_id = getprocessorid();
1980 	aflt->flt_inst = CPU->cpu_id;
1981 	aflt->flt_pc = tpc;
1982 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
1983 	aflt->flt_prot = AFLT_PROT_NONE;
1984 	aflt->flt_class = CPU_FAULT;
1985 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
1986 	aflt->flt_tl = tl;
1987 	aflt->flt_panic = panic;
1988 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
1989 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
1990 
1991 	if (iparity) {
1992 		cpu_icache_parity_info(&ch_flt);
1993 		if (ch_flt.parity_data.ipe.cpl_off != -1)
1994 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
1995 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
1996 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
1997 		else
1998 			error_class = FM_EREPORT_CPU_USIII_IPE;
1999 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2000 	} else {
2001 		cpu_dcache_parity_info(&ch_flt);
2002 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2003 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2004 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2005 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2006 		else
2007 			error_class = FM_EREPORT_CPU_USIII_DPE;
2008 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2009 		/*
2010 		 * For panther we also need to check the P$ for parity errors.
2011 		 */
2012 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2013 			cpu_pcache_parity_info(&ch_flt);
2014 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2015 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2016 				aflt->flt_payload =
2017 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2018 			}
2019 		}
2020 	}
2021 
2022 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2023 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2024 
2025 	if (iparity) {
2026 		/*
2027 		 * Invalidate entire I$.
2028 		 * This is required due to the use of diagnostic ASI
2029 		 * accesses that may result in a loss of I$ coherency.
2030 		 */
2031 		if (cache_boot_state & DCU_IC) {
2032 			flush_icache();
2033 		}
2034 		/*
2035 		 * According to section P.3.1 of the Panther PRM, we
2036 		 * need to do a little more for recovery on those
2037 		 * CPUs after encountering an I$ parity error.
2038 		 */
2039 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2040 			flush_ipb();
2041 			correct_dcache_parity(dcache_size,
2042 			    dcache_linesize);
2043 			flush_pcache();
2044 		}
2045 	} else {
2046 		/*
2047 		 * Since the valid bit is ignored when checking parity the
2048 		 * D$ data and tag must also be corrected.  Set D$ data bits
2049 		 * to zero and set utag to 0, 1, 2, 3.
2050 		 */
2051 		correct_dcache_parity(dcache_size, dcache_linesize);
2052 
2053 		/*
2054 		 * According to section P.3.3 of the Panther PRM, we
2055 		 * need to do a little more for recovery on those
2056 		 * CPUs after encountering a D$ or P$ parity error.
2057 		 *
2058 		 * As far as clearing P$ parity errors, it is enough to
2059 		 * simply invalidate all entries in the P$ since P$ parity
2060 		 * error traps are only generated for floating point load
2061 		 * hits.
2062 		 */
2063 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2064 			flush_icache();
2065 			flush_ipb();
2066 			flush_pcache();
2067 		}
2068 	}
2069 
2070 	/*
2071 	 * Invalidate entire D$ if it was enabled.
2072 	 * This is done to avoid stale data in the D$ which might
2073 	 * occur with the D$ disabled and the trap handler doing
2074 	 * stores affecting lines already in the D$.
2075 	 */
2076 	if (cache_boot_state & DCU_DC) {
2077 		flush_dcache();
2078 	}
2079 
2080 	/*
2081 	 * Restore caches to their bootup state.
2082 	 */
2083 	set_dcu(get_dcu() | cache_boot_state);
2084 
2085 	/*
2086 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2087 	 * be logged as part of the panic flow.
2088 	 */
2089 	if (aflt->flt_panic)
2090 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2091 
2092 	/*
2093 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2094 	 * the chance of getting an unrecoverable Fast ECC error.  This
2095 	 * flush will evict the part of the parity trap handler that is run
2096 	 * at TL>1.
2097 	 */
2098 	if (tl) {
2099 		cpu_flush_ecache();
2100 	}
2101 }
2102 
2103 /*
2104  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2105  * to indicate which portions of the captured data should be in the ereport.
2106  */
2107 void
2108 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2109 {
2110 	int way = ch_flt->parity_data.ipe.cpl_way;
2111 	int offset = ch_flt->parity_data.ipe.cpl_off;
2112 	int tag_index;
2113 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2114 
2115 
2116 	if ((offset != -1) || (way != -1)) {
2117 		/*
2118 		 * Parity error in I$ tag or data
2119 		 */
2120 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2121 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2122 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2123 			    PN_ICIDX_TO_WAY(tag_index);
2124 		else
2125 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2126 			    CH_ICIDX_TO_WAY(tag_index);
2127 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2128 		    IC_LOGFLAG_MAGIC;
2129 	} else {
2130 		/*
2131 		 * Parity error was not identified.
2132 		 * Log tags and data for all ways.
2133 		 */
2134 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2135 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2136 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2137 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2138 				    PN_ICIDX_TO_WAY(tag_index);
2139 			else
2140 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2141 				    CH_ICIDX_TO_WAY(tag_index);
2142 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2143 			    IC_LOGFLAG_MAGIC;
2144 		}
2145 	}
2146 }
2147 
2148 /*
2149  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2150  * to indicate which portions of the captured data should be in the ereport.
2151  */
2152 void
2153 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2154 {
2155 	int way = ch_flt->parity_data.dpe.cpl_way;
2156 	int offset = ch_flt->parity_data.dpe.cpl_off;
2157 	int tag_index;
2158 
2159 	if (offset != -1) {
2160 		/*
2161 		 * Parity error in D$ or P$ data array.
2162 		 *
2163 		 * First check to see whether the parity error is in D$ or P$
2164 		 * since P$ data parity errors are reported in Panther using
2165 		 * the same trap.
2166 		 */
2167 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2168 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2169 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2170 			    CH_PCIDX_TO_WAY(tag_index);
2171 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2172 			    PC_LOGFLAG_MAGIC;
2173 		} else {
2174 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2175 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2176 			    CH_DCIDX_TO_WAY(tag_index);
2177 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2178 			    DC_LOGFLAG_MAGIC;
2179 		}
2180 	} else if (way != -1) {
2181 		/*
2182 		 * Parity error in D$ tag.
2183 		 */
2184 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2185 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2186 		    CH_DCIDX_TO_WAY(tag_index);
2187 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2188 		    DC_LOGFLAG_MAGIC;
2189 	}
2190 }
2191 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2192 
2193 /*
2194  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2195  * post-process CPU events that are dequeued.  As such, it can be invoked
2196  * from softint context, from AST processing in the trap() flow, or from the
2197  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2198  * Historically this entry point was used to log the actual cmn_err(9F) text;
2199  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2200  * With FMA this function now also returns a flag which indicates to the
2201  * caller whether the ereport should be posted (1) or suppressed (0).
2202  */
2203 static int
2204 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2205 {
2206 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2207 	struct async_flt *aflt = (struct async_flt *)flt;
2208 	uint64_t errors;
2209 
2210 	switch (ch_flt->flt_type) {
2211 	case CPU_INV_AFSR:
2212 		/*
2213 		 * If it is a disrupting trap and the AFSR is zero, then
2214 		 * the event has probably already been noted. Do not post
2215 		 * an ereport.
2216 		 */
2217 		if ((aflt->flt_status & ECC_C_TRAP) &&
2218 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2219 			return (0);
2220 		else
2221 			return (1);
2222 	case CPU_TO:
2223 	case CPU_BERR:
2224 	case CPU_FATAL:
2225 	case CPU_FPUERR:
2226 		return (1);
2227 
2228 	case CPU_UE_ECACHE_RETIRE:
2229 		cpu_log_err(aflt);
2230 		cpu_page_retire(ch_flt);
2231 		return (1);
2232 
2233 	/*
2234 	 * Cases where we may want to suppress logging or perform
2235 	 * extended diagnostics.
2236 	 */
2237 	case CPU_CE:
2238 	case CPU_EMC:
2239 		/*
2240 		 * We want to skip logging and further classification
2241 		 * only if ALL the following conditions are true:
2242 		 *
2243 		 *	1. There is only one error
2244 		 *	2. That error is a correctable memory error
2245 		 *	3. The error is caused by the memory scrubber (in
2246 		 *	   which case the error will have occurred under
2247 		 *	   on_trap protection)
2248 		 *	4. The error is on a retired page
2249 		 *
2250 		 * Note: AFLT_PROT_EC is used places other than the memory
2251 		 * scrubber.  However, none of those errors should occur
2252 		 * on a retired page.
2253 		 */
2254 		if ((ch_flt->afsr_errs &
2255 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2256 		    aflt->flt_prot == AFLT_PROT_EC) {
2257 
2258 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2259 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2260 
2261 				/*
2262 				 * Since we're skipping logging, we'll need
2263 				 * to schedule the re-enabling of CEEN
2264 				 */
2265 				(void) timeout(cpu_delayed_check_ce_errors,
2266 				    (void *)aflt->flt_inst, drv_usectohz(
2267 				    (clock_t)cpu_ceen_delay_secs * MICROSEC));
2268 			    }
2269 			    return (0);
2270 			}
2271 		}
2272 
2273 		/*
2274 		 * Perform/schedule further classification actions, but
2275 		 * only if the page is healthy (we don't want bad
2276 		 * pages inducing too much diagnostic activity).  If we could
2277 		 * not find a page pointer then we also skip this.  If
2278 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2279 		 * to copy and recirculate the event (for further diagnostics)
2280 		 * and we should not proceed to log it here.
2281 		 *
2282 		 * This must be the last step here before the cpu_log_err()
2283 		 * below - if an event recirculates cpu_ce_log_err() will
2284 		 * not call the current function but just proceed directly
2285 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2286 		 *
2287 		 * Note: Check cpu_impl_async_log_err if changing this
2288 		 */
2289 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2290 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2291 			    CE_XDIAG_SKIP_NOPP);
2292 		} else {
2293 			if (errors != PR_OK) {
2294 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2295 				    CE_XDIAG_SKIP_PAGEDET);
2296 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2297 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2298 				return (0);
2299 			}
2300 		}
2301 		/*FALLTHRU*/
2302 
2303 	/*
2304 	 * Cases where we just want to report the error and continue.
2305 	 */
2306 	case CPU_CE_ECACHE:
2307 	case CPU_UE_ECACHE:
2308 	case CPU_IV:
2309 	case CPU_ORPH:
2310 		cpu_log_err(aflt);
2311 		return (1);
2312 
2313 	/*
2314 	 * Cases where we want to fall through to handle panicking.
2315 	 */
2316 	case CPU_UE:
2317 		/*
2318 		 * We want to skip logging in the same conditions as the
2319 		 * CE case.  In addition, we want to make sure we're not
2320 		 * panicking.
2321 		 */
2322 		if (!panicstr && (ch_flt->afsr_errs &
2323 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2324 		    aflt->flt_prot == AFLT_PROT_EC) {
2325 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2326 				/* Zero the address to clear the error */
2327 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2328 				return (0);
2329 			}
2330 		}
2331 		cpu_log_err(aflt);
2332 		break;
2333 
2334 	default:
2335 		/*
2336 		 * If the us3_common.c code doesn't know the flt_type, it may
2337 		 * be an implementation-specific code.  Call into the impldep
2338 		 * backend to find out what to do: if it tells us to continue,
2339 		 * break and handle as if falling through from a UE; if not,
2340 		 * the impldep backend has handled the error and we're done.
2341 		 */
2342 		switch (cpu_impl_async_log_err(flt, eqep)) {
2343 		case CH_ASYNC_LOG_DONE:
2344 			return (1);
2345 		case CH_ASYNC_LOG_RECIRC:
2346 			return (0);
2347 		case CH_ASYNC_LOG_CONTINUE:
2348 			break; /* continue on to handle UE-like error */
2349 		default:
2350 			cmn_err(CE_WARN, "discarding error 0x%p with "
2351 			    "invalid fault type (0x%x)",
2352 			    (void *)aflt, ch_flt->flt_type);
2353 			return (0);
2354 		}
2355 	}
2356 
2357 	/* ... fall through from the UE case */
2358 
2359 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2360 		if (!panicstr) {
2361 			cpu_page_retire(ch_flt);
2362 		} else {
2363 			/*
2364 			 * Clear UEs on panic so that we don't
2365 			 * get haunted by them during panic or
2366 			 * after reboot
2367 			 */
2368 			cpu_clearphys(aflt);
2369 			(void) clear_errors(NULL);
2370 		}
2371 	}
2372 
2373 	return (1);
2374 }
2375 
2376 /*
2377  * Retire the bad page that may contain the flushed error.
2378  */
2379 void
2380 cpu_page_retire(ch_async_flt_t *ch_flt)
2381 {
2382 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2383 	(void) page_retire(aflt->flt_addr, PR_UE);
2384 }
2385 
2386 /*
2387  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2388  * generic event post-processing for correctable and uncorrectable memory,
2389  * E$, and MTag errors.  Historically this entry point was used to log bits of
2390  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2391  * converted into an ereport.  In addition, it transmits the error to any
2392  * platform-specific service-processor FRU logging routines, if available.
2393  */
2394 void
2395 cpu_log_err(struct async_flt *aflt)
2396 {
2397 	char unum[UNUM_NAMLEN];
2398 	int len = 0;
2399 	int synd_status, synd_code, afar_status;
2400 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2401 
2402 	/*
2403 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2404 	 * For Panther, L2$ is not external, so we don't want to
2405 	 * generate an E$ unum for those errors.
2406 	 */
2407 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2408 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2409 			aflt->flt_status |= ECC_ECACHE;
2410 	} else {
2411 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2412 			aflt->flt_status |= ECC_ECACHE;
2413 	}
2414 
2415 	/*
2416 	 * Determine syndrome status.
2417 	 */
2418 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2419 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2420 
2421 	/*
2422 	 * Determine afar status.
2423 	 */
2424 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2425 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2426 				ch_flt->flt_bit);
2427 	else
2428 		afar_status = AFLT_STAT_INVALID;
2429 
2430 	/*
2431 	 * If afar status is not invalid do a unum lookup.
2432 	 */
2433 	if (afar_status != AFLT_STAT_INVALID) {
2434 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2435 			UNUM_NAMLEN, &len);
2436 	} else {
2437 		unum[0] = '\0';
2438 	}
2439 
2440 	synd_code = synd_to_synd_code(synd_status,
2441 	    aflt->flt_synd, ch_flt->flt_bit);
2442 
2443 	/*
2444 	 * Do not send the fruid message (plat_ecc_error_data_t)
2445 	 * to the SC if it can handle the enhanced error information
2446 	 * (plat_ecc_error2_data_t) or when the tunable
2447 	 * ecc_log_fruid_enable is set to 0.
2448 	 */
2449 
2450 	if (&plat_ecc_capability_sc_get &&
2451 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2452 		if (&plat_log_fruid_error)
2453 			plat_log_fruid_error(synd_code, aflt, unum,
2454 			    ch_flt->flt_bit);
2455 	}
2456 
2457 	if (aflt->flt_func != NULL)
2458 		aflt->flt_func(aflt, unum);
2459 
2460 	if (afar_status != AFLT_STAT_INVALID)
2461 		cpu_log_diag_info(ch_flt);
2462 
2463 	/*
2464 	 * If we have a CEEN error , we do not reenable CEEN until after
2465 	 * we exit the trap handler. Otherwise, another error may
2466 	 * occur causing the handler to be entered recursively.
2467 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2468 	 * to try and ensure that the CPU makes progress in the face
2469 	 * of a CE storm.
2470 	 */
2471 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2472 		(void) timeout(cpu_delayed_check_ce_errors,
2473 		    (void *)aflt->flt_inst,
2474 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2475 	}
2476 }
2477 
2478 /*
2479  * Invoked by error_init() early in startup and therefore before
2480  * startup_errorq() is called to drain any error Q -
2481  *
2482  * startup()
2483  *   startup_end()
2484  *     error_init()
2485  *       cpu_error_init()
2486  * errorq_init()
2487  *   errorq_drain()
2488  * start_other_cpus()
2489  *
2490  * The purpose of this routine is to create error-related taskqs.  Taskqs
2491  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2492  * context.
2493  */
2494 void
2495 cpu_error_init(int items)
2496 {
2497 	/*
2498 	 * Create taskq(s) to reenable CE
2499 	 */
2500 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2501 	    items, items, TASKQ_PREPOPULATE);
2502 }
2503 
2504 void
2505 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2506 {
2507 	char unum[UNUM_NAMLEN];
2508 	int len;
2509 
2510 	switch (aflt->flt_class) {
2511 	case CPU_FAULT:
2512 		cpu_ereport_init(aflt);
2513 		if (cpu_async_log_err(aflt, eqep))
2514 			cpu_ereport_post(aflt);
2515 		break;
2516 
2517 	case BUS_FAULT:
2518 		if (aflt->flt_func != NULL) {
2519 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2520 			    unum, UNUM_NAMLEN, &len);
2521 			aflt->flt_func(aflt, unum);
2522 		}
2523 		break;
2524 
2525 	case RECIRC_CPU_FAULT:
2526 		aflt->flt_class = CPU_FAULT;
2527 		cpu_log_err(aflt);
2528 		cpu_ereport_post(aflt);
2529 		break;
2530 
2531 	case RECIRC_BUS_FAULT:
2532 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2533 		/*FALLTHRU*/
2534 	default:
2535 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2536 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2537 		return;
2538 	}
2539 }
2540 
2541 /*
2542  * Scrub and classify a CE.  This function must not modify the
2543  * fault structure passed to it but instead should return the classification
2544  * information.
2545  */
2546 
2547 static uchar_t
2548 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2549 {
2550 	uchar_t disp = CE_XDIAG_EXTALG;
2551 	on_trap_data_t otd;
2552 	uint64_t orig_err;
2553 	ch_cpu_logout_t *clop;
2554 
2555 	/*
2556 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2557 	 * this, but our other callers have not.  Disable preemption to
2558 	 * avoid CPU migration so that we restore CEEN on the correct
2559 	 * cpu later.
2560 	 *
2561 	 * CEEN is cleared so that further CEs that our instruction and
2562 	 * data footprint induce do not cause use to either creep down
2563 	 * kernel stack to the point of overflow, or do so much CE
2564 	 * notification as to make little real forward progress.
2565 	 *
2566 	 * NCEEN must not be cleared.  However it is possible that
2567 	 * our accesses to the flt_addr may provoke a bus error or timeout
2568 	 * if the offending address has just been unconfigured as part of
2569 	 * a DR action.  So we must operate under on_trap protection.
2570 	 */
2571 	kpreempt_disable();
2572 	orig_err = get_error_enable();
2573 	if (orig_err & EN_REG_CEEN)
2574 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2575 
2576 	/*
2577 	 * Our classification algorithm includes the line state before
2578 	 * the scrub; we'd like this captured after the detection and
2579 	 * before the algorithm below - the earlier the better.
2580 	 *
2581 	 * If we've come from a cpu CE trap then this info already exists
2582 	 * in the cpu logout area.
2583 	 *
2584 	 * For a CE detected by memscrub for which there was no trap
2585 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2586 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2587 	 * marked the fault structure as incomplete as a flag to later
2588 	 * logging code.
2589 	 *
2590 	 * If called directly from an IO detected CE there has been
2591 	 * no line data capture.  In this case we logout to the cpu logout
2592 	 * area - that's appropriate since it's the cpu cache data we need
2593 	 * for classification.  We thus borrow the cpu logout area for a
2594 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2595 	 * this time (we will invalidate it again below).
2596 	 *
2597 	 * If called from the partner check xcall handler then this cpu
2598 	 * (the partner) has not necessarily experienced a CE at this
2599 	 * address.  But we want to capture line state before its scrub
2600 	 * attempt since we use that in our classification.
2601 	 */
2602 	if (logout_tried == B_FALSE) {
2603 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2604 			disp |= CE_XDIAG_NOLOGOUT;
2605 	}
2606 
2607 	/*
2608 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2609 	 * no longer be valid (if DR'd since the initial event) so we
2610 	 * perform this scrub under on_trap protection.  If this access is
2611 	 * ok then further accesses below will also be ok - DR cannot
2612 	 * proceed while this thread is active (preemption is disabled);
2613 	 * to be safe we'll nonetheless use on_trap again below.
2614 	 */
2615 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2616 		cpu_scrubphys(ecc);
2617 	} else {
2618 		no_trap();
2619 		if (orig_err & EN_REG_CEEN)
2620 		    set_error_enable(orig_err);
2621 		kpreempt_enable();
2622 		return (disp);
2623 	}
2624 	no_trap();
2625 
2626 	/*
2627 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2628 	 * Note that it's quite possible that the read sourced the data from
2629 	 * another cpu.
2630 	 */
2631 	if (clear_ecc(ecc))
2632 		disp |= CE_XDIAG_CE1;
2633 
2634 	/*
2635 	 * Read the data again.  This time the read is very likely to
2636 	 * come from memory since the scrub induced a writeback to memory.
2637 	 */
2638 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2639 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2640 	} else {
2641 		no_trap();
2642 		if (orig_err & EN_REG_CEEN)
2643 		    set_error_enable(orig_err);
2644 		kpreempt_enable();
2645 		return (disp);
2646 	}
2647 	no_trap();
2648 
2649 	/* Did that read induce a CE that matches the AFAR? */
2650 	if (clear_ecc(ecc))
2651 		disp |= CE_XDIAG_CE2;
2652 
2653 	/*
2654 	 * Look at the logout information and record whether we found the
2655 	 * line in l2/l3 cache.  For Panther we are interested in whether
2656 	 * we found it in either cache (it won't reside in both but
2657 	 * it is possible to read it that way given the moving target).
2658 	 */
2659 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2660 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2661 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2662 		int hit, level;
2663 		int state;
2664 		int totalsize;
2665 		ch_ec_data_t *ecp;
2666 
2667 		/*
2668 		 * If hit is nonzero then a match was found and hit will
2669 		 * be one greater than the index which hit.  For Panther we
2670 		 * also need to pay attention to level to see which of l2$ or
2671 		 * l3$ it hit in.
2672 		 */
2673 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2674 		    0, &level);
2675 
2676 		if (hit) {
2677 			--hit;
2678 			disp |= CE_XDIAG_AFARMATCH;
2679 
2680 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2681 				if (level == 2)
2682 					ecp = &clop->clo_data.chd_l2_data[hit];
2683 				else
2684 					ecp = &clop->clo_data.chd_ec_data[hit];
2685 			} else {
2686 				ASSERT(level == 2);
2687 				ecp = &clop->clo_data.chd_ec_data[hit];
2688 			}
2689 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2690 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2691 			    ecc->flt_addr, ecp->ec_tag);
2692 
2693 			/*
2694 			 * Cheetah variants use different state encodings -
2695 			 * the CH_ECSTATE_* defines vary depending on the
2696 			 * module we're compiled for.  Translate into our
2697 			 * one true version.  Conflate Owner-Shared state
2698 			 * of SSM mode with Owner as victimisation of such
2699 			 * lines may cause a writeback.
2700 			 */
2701 			switch (state) {
2702 			case CH_ECSTATE_MOD:
2703 				disp |= EC_STATE_M;
2704 				break;
2705 
2706 			case CH_ECSTATE_OWN:
2707 			case CH_ECSTATE_OWS:
2708 				disp |= EC_STATE_O;
2709 				break;
2710 
2711 			case CH_ECSTATE_EXL:
2712 				disp |= EC_STATE_E;
2713 				break;
2714 
2715 			case CH_ECSTATE_SHR:
2716 				disp |= EC_STATE_S;
2717 				break;
2718 
2719 			default:
2720 				disp |= EC_STATE_I;
2721 				break;
2722 			}
2723 		}
2724 
2725 		/*
2726 		 * If we initiated the delayed logout then we are responsible
2727 		 * for invalidating the logout area.
2728 		 */
2729 		if (logout_tried == B_FALSE) {
2730 			bzero(clop, sizeof (ch_cpu_logout_t));
2731 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2732 		}
2733 	}
2734 
2735 	/*
2736 	 * Re-enable CEEN if we turned it off.
2737 	 */
2738 	if (orig_err & EN_REG_CEEN)
2739 	    set_error_enable(orig_err);
2740 	kpreempt_enable();
2741 
2742 	return (disp);
2743 }
2744 
2745 /*
2746  * Scrub a correctable memory error and collect data for classification
2747  * of CE type.  This function is called in the detection path, ie tl0 handling
2748  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2749  */
2750 void
2751 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2752 {
2753 	/*
2754 	 * Cheetah CE classification does not set any bits in flt_status.
2755 	 * Instead we will record classification datapoints in flt_disp.
2756 	 */
2757 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2758 
2759 	/*
2760 	 * To check if the error detected by IO is persistent, sticky or
2761 	 * intermittent.  This is noticed by clear_ecc().
2762 	 */
2763 	if (ecc->flt_status & ECC_IOBUS)
2764 		ecc->flt_stat = C_AFSR_MEMORY;
2765 
2766 	/*
2767 	 * Record information from this first part of the algorithm in
2768 	 * flt_disp.
2769 	 */
2770 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2771 }
2772 
2773 /*
2774  * Select a partner to perform a further CE classification check from.
2775  * Must be called with kernel preemption disabled (to stop the cpu list
2776  * from changing).  The detecting cpu we are partnering has cpuid
2777  * aflt->flt_inst; we might not be running on the detecting cpu.
2778  *
2779  * Restrict choice to active cpus in the same cpu partition as ourselves in
2780  * an effort to stop bad cpus in one partition causing other partitions to
2781  * perform excessive diagnostic activity.  Actually since the errorq drain
2782  * is run from a softint most of the time and that is a global mechanism
2783  * this isolation is only partial.  Return NULL if we fail to find a
2784  * suitable partner.
2785  *
2786  * We prefer a partner that is in a different latency group to ourselves as
2787  * we will share fewer datapaths.  If such a partner is unavailable then
2788  * choose one in the same lgroup but prefer a different chip and only allow
2789  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2790  * flags includes PTNR_SELFOK then permit selection of the original detector.
2791  *
2792  * We keep a cache of the last partner selected for a cpu, and we'll try to
2793  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2794  * have passed since that selection was made.  This provides the benefit
2795  * of the point-of-view of different partners over time but without
2796  * requiring frequent cpu list traversals.
2797  */
2798 
2799 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2800 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2801 
2802 static cpu_t *
2803 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2804 {
2805 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2806 	hrtime_t lasttime, thistime;
2807 
2808 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2809 
2810 	dtcr = cpu[aflt->flt_inst];
2811 
2812 	/*
2813 	 * Short-circuit for the following cases:
2814 	 *	. the dtcr is not flagged active
2815 	 *	. there is just one cpu present
2816 	 *	. the detector has disappeared
2817 	 *	. we were given a bad flt_inst cpuid; this should not happen
2818 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2819 	 *	  reason to panic.
2820 	 *	. there is just one cpu left online in the cpu partition
2821 	 *
2822 	 * If we return NULL after this point then we do not update the
2823 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2824 	 * again next time; this is the case where the only other cpu online
2825 	 * in the detector's partition is on the same chip as the detector
2826 	 * and since CEEN re-enable is throttled even that case should not
2827 	 * hurt performance.
2828 	 */
2829 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2830 		return (NULL);
2831 	}
2832 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2833 		if (flags & PTNR_SELFOK) {
2834 			*typep = CE_XDIAG_PTNR_SELF;
2835 			return (dtcr);
2836 		} else {
2837 			return (NULL);
2838 		}
2839 	}
2840 
2841 	thistime = gethrtime();
2842 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2843 
2844 	/*
2845 	 * Select a starting point.
2846 	 */
2847 	if (!lasttime) {
2848 		/*
2849 		 * We've never selected a partner for this detector before.
2850 		 * Start the scan at the next online cpu in the same cpu
2851 		 * partition.
2852 		 */
2853 		sp = dtcr->cpu_next_part;
2854 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2855 		/*
2856 		 * Our last selection has not aged yet.  If this partner:
2857 		 *	. is still a valid cpu,
2858 		 *	. is still in the same partition as the detector
2859 		 *	. is still marked active
2860 		 *	. satisfies the 'flags' argument criteria
2861 		 * then select it again without updating the timestamp.
2862 		 */
2863 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2864 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2865 		    !cpu_flagged_active(sp->cpu_flags) ||
2866 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2867 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2868 		    !(flags & PTNR_SIBLINGOK))) {
2869 			sp = dtcr->cpu_next_part;
2870 		} else {
2871 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2872 				*typep = CE_XDIAG_PTNR_REMOTE;
2873 			} else if (sp == dtcr) {
2874 				*typep = CE_XDIAG_PTNR_SELF;
2875 			} else if (sp->cpu_chip->chip_id ==
2876 			    dtcr->cpu_chip->chip_id) {
2877 				*typep = CE_XDIAG_PTNR_SIBLING;
2878 			} else {
2879 				*typep = CE_XDIAG_PTNR_LOCAL;
2880 			}
2881 			return (sp);
2882 		}
2883 	} else {
2884 		/*
2885 		 * Our last selection has aged.  If it is nonetheless still a
2886 		 * valid cpu then start the scan at the next cpu in the
2887 		 * partition after our last partner.  If the last selection
2888 		 * is no longer a valid cpu then go with our default.  In
2889 		 * this way we slowly cycle through possible partners to
2890 		 * obtain multiple viewpoints over time.
2891 		 */
2892 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2893 		if (sp == NULL) {
2894 			sp = dtcr->cpu_next_part;
2895 		} else {
2896 			sp = sp->cpu_next_part;		/* may be dtcr */
2897 			if (sp->cpu_part != dtcr->cpu_part)
2898 				sp = dtcr;
2899 		}
2900 	}
2901 
2902 	/*
2903 	 * We have a proposed starting point for our search, but if this
2904 	 * cpu is offline then its cpu_next_part will point to itself
2905 	 * so we can't use that to iterate over cpus in this partition in
2906 	 * the loop below.  We still want to avoid iterating over cpus not
2907 	 * in our partition, so in the case that our starting point is offline
2908 	 * we will repoint it to be the detector itself;  and if the detector
2909 	 * happens to be offline we'll return NULL from the following loop.
2910 	 */
2911 	if (!cpu_flagged_active(sp->cpu_flags)) {
2912 		sp = dtcr;
2913 	}
2914 
2915 	ptnr = sp;
2916 	locptnr = NULL;
2917 	sibptnr = NULL;
2918 	do {
2919 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
2920 			continue;
2921 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2922 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
2923 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2924 			*typep = CE_XDIAG_PTNR_REMOTE;
2925 			return (ptnr);
2926 		}
2927 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
2928 			if (sibptnr == NULL)
2929 				sibptnr = ptnr;
2930 			continue;
2931 		}
2932 		if (locptnr == NULL)
2933 			locptnr = ptnr;
2934 	} while ((ptnr = ptnr->cpu_next_part) != sp);
2935 
2936 	/*
2937 	 * A foreign partner has already been returned if one was available.
2938 	 *
2939 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
2940 	 * detector, is active, and is not a sibling of the detector.
2941 	 *
2942 	 * If sibptnr is not NULL it is a sibling of the detector, and is
2943 	 * active.
2944 	 *
2945 	 * If we have to resort to using the detector itself we have already
2946 	 * checked that it is active.
2947 	 */
2948 	if (locptnr) {
2949 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
2950 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2951 		*typep = CE_XDIAG_PTNR_LOCAL;
2952 		return (locptnr);
2953 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
2954 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
2955 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2956 		*typep = CE_XDIAG_PTNR_SIBLING;
2957 		return (sibptnr);
2958 	} else if (flags & PTNR_SELFOK) {
2959 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
2960 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2961 		*typep = CE_XDIAG_PTNR_SELF;
2962 		return (dtcr);
2963 	}
2964 
2965 	return (NULL);
2966 }
2967 
2968 /*
2969  * Cross call handler that is requested to run on the designated partner of
2970  * a cpu that experienced a possibly sticky or possibly persistnet CE.
2971  */
2972 static void
2973 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
2974 {
2975 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
2976 }
2977 
2978 /*
2979  * The associated errorqs are never destroyed so we do not need to deal with
2980  * them disappearing before this timeout fires.  If the affected memory
2981  * has been DR'd out since the original event the scrub algrithm will catch
2982  * any errors and return null disposition info.  If the original detecting
2983  * cpu has been DR'd out then ereport detector info will not be able to
2984  * lookup CPU type;  with a small timeout this is unlikely.
2985  */
2986 static void
2987 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
2988 {
2989 	struct async_flt *aflt = cbarg->lkycb_aflt;
2990 	uchar_t disp;
2991 	cpu_t *cp;
2992 	int ptnrtype;
2993 
2994 	kpreempt_disable();
2995 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
2996 	    &ptnrtype)) {
2997 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
2998 		    (uint64_t)&disp);
2999 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3000 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3001 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3002 	} else {
3003 		ce_xdiag_lkydrops++;
3004 		if (ncpus > 1)
3005 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3006 			    CE_XDIAG_SKIP_NOPTNR);
3007 	}
3008 	kpreempt_enable();
3009 
3010 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3011 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3012 }
3013 
3014 /*
3015  * Called from errorq drain code when processing a CE error, both from
3016  * CPU and PCI drain functions.  Decide what further classification actions,
3017  * if any, we will perform.  Perform immediate actions now, and schedule
3018  * delayed actions as required.  Note that we are no longer necessarily running
3019  * on the detecting cpu, and that the async_flt structure will not persist on
3020  * return from this function.
3021  *
3022  * Calls to this function should aim to be self-throtlling in some way.  With
3023  * the delayed re-enable of CEEN the absolute rate of calls should not
3024  * be excessive.  Callers should also avoid performing in-depth classification
3025  * for events in pages that are already known to be suspect.
3026  *
3027  * We return nonzero to indicate that the event has been copied and
3028  * recirculated for further testing.  The caller should not log the event
3029  * in this case - it will be logged when further test results are available.
3030  *
3031  * Our possible contexts are that of errorq_drain: below lock level or from
3032  * panic context.  We can assume that the cpu we are running on is online.
3033  */
3034 
3035 
3036 #ifdef DEBUG
3037 static int ce_xdiag_forceaction;
3038 #endif
3039 
3040 int
3041 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3042     errorq_elem_t *eqep, size_t afltoffset)
3043 {
3044 	ce_dispact_t dispact, action;
3045 	cpu_t *cp;
3046 	uchar_t dtcrinfo, disp;
3047 	int ptnrtype;
3048 
3049 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3050 		ce_xdiag_drops++;
3051 		return (0);
3052 	} else if (!aflt->flt_in_memory) {
3053 		ce_xdiag_drops++;
3054 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3055 		return (0);
3056 	}
3057 
3058 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3059 
3060 	/*
3061 	 * Some correctable events are not scrubbed/classified, such as those
3062 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3063 	 * initial detector classification go no further.
3064 	 */
3065 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3066 		ce_xdiag_drops++;
3067 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3068 		return (0);
3069 	}
3070 
3071 	dispact = CE_DISPACT(ce_disp_table,
3072 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3073 	    CE_XDIAG_STATE(dtcrinfo),
3074 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3075 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3076 
3077 
3078 	action = CE_ACT(dispact);	/* bad lookup caught below */
3079 #ifdef DEBUG
3080 	if (ce_xdiag_forceaction != 0)
3081 		action = ce_xdiag_forceaction;
3082 #endif
3083 
3084 	switch (action) {
3085 	case CE_ACT_LKYCHK: {
3086 		caddr_t ndata;
3087 		errorq_elem_t *neqep;
3088 		struct async_flt *ecc;
3089 		ce_lkychk_cb_t *cbargp;
3090 
3091 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3092 			ce_xdiag_lkydrops++;
3093 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3094 			    CE_XDIAG_SKIP_DUPFAIL);
3095 			break;
3096 		}
3097 		ecc = (struct async_flt *)(ndata + afltoffset);
3098 
3099 		ASSERT(ecc->flt_class == CPU_FAULT ||
3100 		    ecc->flt_class == BUS_FAULT);
3101 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3102 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3103 
3104 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3105 		cbargp->lkycb_aflt = ecc;
3106 		cbargp->lkycb_eqp = eqp;
3107 		cbargp->lkycb_eqep = neqep;
3108 
3109 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3110 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3111 		return (1);
3112 	}
3113 
3114 	case CE_ACT_PTNRCHK:
3115 		kpreempt_disable();	/* stop cpu list changing */
3116 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3117 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3118 			    (uint64_t)aflt, (uint64_t)&disp);
3119 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3120 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3121 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3122 		} else if (ncpus > 1) {
3123 			ce_xdiag_ptnrdrops++;
3124 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3125 			    CE_XDIAG_SKIP_NOPTNR);
3126 		} else {
3127 			ce_xdiag_ptnrdrops++;
3128 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3129 			    CE_XDIAG_SKIP_UNIPROC);
3130 		}
3131 		kpreempt_enable();
3132 		break;
3133 
3134 	case CE_ACT_DONE:
3135 		break;
3136 
3137 	case CE_ACT(CE_DISP_BAD):
3138 	default:
3139 #ifdef DEBUG
3140 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3141 #endif
3142 		ce_xdiag_bad++;
3143 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3144 		break;
3145 	}
3146 
3147 	return (0);
3148 }
3149 
3150 /*
3151  * We route all errors through a single switch statement.
3152  */
3153 void
3154 cpu_ue_log_err(struct async_flt *aflt)
3155 {
3156 	switch (aflt->flt_class) {
3157 	case CPU_FAULT:
3158 		cpu_ereport_init(aflt);
3159 		if (cpu_async_log_err(aflt, NULL))
3160 			cpu_ereport_post(aflt);
3161 		break;
3162 
3163 	case BUS_FAULT:
3164 		bus_async_log_err(aflt);
3165 		break;
3166 
3167 	default:
3168 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3169 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3170 		return;
3171 	}
3172 }
3173 
3174 /*
3175  * Routine for panic hook callback from panic_idle().
3176  */
3177 void
3178 cpu_async_panic_callb(void)
3179 {
3180 	ch_async_flt_t ch_flt;
3181 	struct async_flt *aflt;
3182 	ch_cpu_errors_t cpu_error_regs;
3183 	uint64_t afsr_errs;
3184 
3185 	get_cpu_error_state(&cpu_error_regs);
3186 
3187 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3188 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3189 
3190 	if (afsr_errs) {
3191 
3192 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3193 		aflt = (struct async_flt *)&ch_flt;
3194 		aflt->flt_id = gethrtime_waitfree();
3195 		aflt->flt_bus_id = getprocessorid();
3196 		aflt->flt_inst = CPU->cpu_id;
3197 		aflt->flt_stat = cpu_error_regs.afsr;
3198 		aflt->flt_addr = cpu_error_regs.afar;
3199 		aflt->flt_prot = AFLT_PROT_NONE;
3200 		aflt->flt_class = CPU_FAULT;
3201 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3202 		aflt->flt_panic = 1;
3203 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3204 		ch_flt.afsr_errs = afsr_errs;
3205 #if defined(SERRANO)
3206 		ch_flt.afar2 = cpu_error_regs.afar2;
3207 #endif	/* SERRANO */
3208 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3209 	}
3210 }
3211 
3212 /*
3213  * Routine to convert a syndrome into a syndrome code.
3214  */
3215 static int
3216 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3217 {
3218 	if (synd_status == AFLT_STAT_INVALID)
3219 		return (-1);
3220 
3221 	/*
3222 	 * Use the syndrome to index the appropriate syndrome table,
3223 	 * to get the code indicating which bit(s) is(are) bad.
3224 	 */
3225 	if (afsr_bit &
3226 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3227 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3228 #if defined(JALAPENO) || defined(SERRANO)
3229 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3230 				return (-1);
3231 			else
3232 				return (BPAR0 + synd);
3233 #else /* JALAPENO || SERRANO */
3234 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3235 				return (-1);
3236 			else
3237 				return (mtag_syndrome_tab[synd]);
3238 #endif /* JALAPENO || SERRANO */
3239 		} else {
3240 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3241 				return (-1);
3242 			else
3243 				return (ecc_syndrome_tab[synd]);
3244 		}
3245 	} else {
3246 		return (-1);
3247 	}
3248 }
3249 
3250 /*
3251  * Routine to return a string identifying the physical name
3252  * associated with a memory/cache error.
3253  */
3254 int
3255 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3256     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3257     ushort_t flt_status, char *buf, int buflen, int *lenp)
3258 {
3259 	int synd_code;
3260 	int ret;
3261 
3262 	/*
3263 	 * An AFSR of -1 defaults to a memory syndrome.
3264 	 */
3265 	if (flt_stat == (uint64_t)-1)
3266 		flt_stat = C_AFSR_CE;
3267 
3268 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3269 
3270 	/*
3271 	 * Syndrome code must be either a single-bit error code
3272 	 * (0...143) or -1 for unum lookup.
3273 	 */
3274 	if (synd_code < 0 || synd_code >= M2)
3275 		synd_code = -1;
3276 	if (&plat_get_mem_unum) {
3277 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3278 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3279 			buf[0] = '\0';
3280 			*lenp = 0;
3281 		}
3282 
3283 		return (ret);
3284 	}
3285 
3286 	return (ENOTSUP);
3287 }
3288 
3289 /*
3290  * Wrapper for cpu_get_mem_unum() routine that takes an
3291  * async_flt struct rather than explicit arguments.
3292  */
3293 int
3294 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3295     char *buf, int buflen, int *lenp)
3296 {
3297 	/*
3298 	 * If we come thru here for an IO bus error aflt->flt_stat will
3299 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3300 	 * so it will interpret this as a memory error.
3301 	 */
3302 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3303 	    (aflt->flt_class == BUS_FAULT) ?
3304 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3305 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3306 	    aflt->flt_status, buf, buflen, lenp));
3307 }
3308 
3309 /*
3310  * This routine is a more generic interface to cpu_get_mem_unum()
3311  * that may be used by other modules (e.g. mm).
3312  */
3313 int
3314 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3315     char *buf, int buflen, int *lenp)
3316 {
3317 	int synd_status, flt_in_memory, ret;
3318 	ushort_t flt_status = 0;
3319 	char unum[UNUM_NAMLEN];
3320 
3321 	/*
3322 	 * Check for an invalid address.
3323 	 */
3324 	if (afar == (uint64_t)-1)
3325 		return (ENXIO);
3326 
3327 	if (synd == (uint64_t)-1)
3328 		synd_status = AFLT_STAT_INVALID;
3329 	else
3330 		synd_status = AFLT_STAT_VALID;
3331 
3332 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3333 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3334 
3335 	/*
3336 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3337 	 * For Panther, L2$ is not external, so we don't want to
3338 	 * generate an E$ unum for those errors.
3339 	 */
3340 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3341 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3342 			flt_status |= ECC_ECACHE;
3343 	} else {
3344 		if (*afsr & C_AFSR_ECACHE)
3345 			flt_status |= ECC_ECACHE;
3346 	}
3347 
3348 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3349 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3350 	if (ret != 0)
3351 		return (ret);
3352 
3353 	if (*lenp >= buflen)
3354 		return (ENAMETOOLONG);
3355 
3356 	(void) strncpy(buf, unum, buflen);
3357 
3358 	return (0);
3359 }
3360 
3361 /*
3362  * Routine to return memory information associated
3363  * with a physical address and syndrome.
3364  */
3365 int
3366 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3367     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3368     int *segsp, int *banksp, int *mcidp)
3369 {
3370 	int synd_status, synd_code;
3371 
3372 	if (afar == (uint64_t)-1)
3373 		return (ENXIO);
3374 
3375 	if (synd == (uint64_t)-1)
3376 		synd_status = AFLT_STAT_INVALID;
3377 	else
3378 		synd_status = AFLT_STAT_VALID;
3379 
3380 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3381 
3382 	if (p2get_mem_info != NULL)
3383 		return ((p2get_mem_info)(synd_code, afar,
3384 			mem_sizep, seg_sizep, bank_sizep,
3385 			segsp, banksp, mcidp));
3386 	else
3387 		return (ENOTSUP);
3388 }
3389 
3390 /*
3391  * Routine to return a string identifying the physical
3392  * name associated with a cpuid.
3393  */
3394 int
3395 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3396 {
3397 	int ret;
3398 	char unum[UNUM_NAMLEN];
3399 
3400 	if (&plat_get_cpu_unum) {
3401 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3402 		    != 0)
3403 			return (ret);
3404 	} else {
3405 		return (ENOTSUP);
3406 	}
3407 
3408 	if (*lenp >= buflen)
3409 		return (ENAMETOOLONG);
3410 
3411 	(void) strncpy(buf, unum, buflen);
3412 
3413 	return (0);
3414 }
3415 
3416 /*
3417  * This routine exports the name buffer size.
3418  */
3419 size_t
3420 cpu_get_name_bufsize()
3421 {
3422 	return (UNUM_NAMLEN);
3423 }
3424 
3425 /*
3426  * Historical function, apparantly not used.
3427  */
3428 /* ARGSUSED */
3429 void
3430 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3431 {}
3432 
3433 /*
3434  * Historical function only called for SBus errors in debugging.
3435  */
3436 /*ARGSUSED*/
3437 void
3438 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3439 {}
3440 
3441 /*
3442  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3443  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3444  * an async fault structure argument is passed in, the captured error state
3445  * (AFSR, AFAR) info will be returned in the structure.
3446  */
3447 int
3448 clear_errors(ch_async_flt_t *ch_flt)
3449 {
3450 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3451 	ch_cpu_errors_t	cpu_error_regs;
3452 
3453 	get_cpu_error_state(&cpu_error_regs);
3454 
3455 	if (ch_flt != NULL) {
3456 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3457 		aflt->flt_addr = cpu_error_regs.afar;
3458 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3459 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3460 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3461 #if defined(SERRANO)
3462 		ch_flt->afar2 = cpu_error_regs.afar2;
3463 #endif	/* SERRANO */
3464 	}
3465 
3466 	set_cpu_error_state(&cpu_error_regs);
3467 
3468 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3469 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3470 }
3471 
3472 /*
3473  * Clear any AFSR error bits, and check for persistence.
3474  *
3475  * It would be desirable to also insist that syndrome match.  PCI handling
3476  * has already filled flt_synd.  For errors trapped by CPU we only fill
3477  * flt_synd when we queue the event, so we do not have a valid flt_synd
3478  * during initial classification (it is valid if we're called as part of
3479  * subsequent low-pil additional classification attempts).  We could try
3480  * to determine which syndrome to use: we know we're only called for
3481  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3482  * would be esynd/none and esynd/msynd, respectively.  If that is
3483  * implemented then what do we do in the case that we do experience an
3484  * error on the same afar but with different syndrome?  At the very least
3485  * we should count such occurences.  Anyway, for now, we'll leave it as
3486  * it has been for ages.
3487  */
3488 static int
3489 clear_ecc(struct async_flt *aflt)
3490 {
3491 	ch_cpu_errors_t	cpu_error_regs;
3492 
3493 	/*
3494 	 * Snapshot the AFSR and AFAR and clear any errors
3495 	 */
3496 	get_cpu_error_state(&cpu_error_regs);
3497 	set_cpu_error_state(&cpu_error_regs);
3498 
3499 	/*
3500 	 * If any of the same memory access error bits are still on and
3501 	 * the AFAR matches, return that the error is persistent.
3502 	 */
3503 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3504 	    cpu_error_regs.afar == aflt->flt_addr);
3505 }
3506 
3507 /*
3508  * Turn off all cpu error detection, normally only used for panics.
3509  */
3510 void
3511 cpu_disable_errors(void)
3512 {
3513 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3514 }
3515 
3516 /*
3517  * Enable errors.
3518  */
3519 void
3520 cpu_enable_errors(void)
3521 {
3522 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3523 }
3524 
3525 /*
3526  * Flush the entire ecache using displacement flush by reading through a
3527  * physical address range twice as large as the Ecache.
3528  */
3529 void
3530 cpu_flush_ecache(void)
3531 {
3532 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3533 	    cpunodes[CPU->cpu_id].ecache_linesize);
3534 }
3535 
3536 /*
3537  * Return CPU E$ set size - E$ size divided by the associativity.
3538  * We use this function in places where the CPU_PRIVATE ptr may not be
3539  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3540  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3541  * up before the kernel switches from OBP's to the kernel's trap table, so
3542  * we don't have to worry about cpunodes being unitialized.
3543  */
3544 int
3545 cpu_ecache_set_size(struct cpu *cp)
3546 {
3547 	if (CPU_PRIVATE(cp))
3548 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3549 
3550 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3551 }
3552 
3553 /*
3554  * Flush Ecache line.
3555  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3556  * Uses normal displacement flush for Cheetah.
3557  */
3558 static void
3559 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3560 {
3561 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3562 	int ec_set_size = cpu_ecache_set_size(CPU);
3563 
3564 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3565 }
3566 
3567 /*
3568  * Scrub physical address.
3569  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3570  * Ecache or direct-mapped Ecache.
3571  */
3572 static void
3573 cpu_scrubphys(struct async_flt *aflt)
3574 {
3575 	int ec_set_size = cpu_ecache_set_size(CPU);
3576 
3577 	scrubphys(aflt->flt_addr, ec_set_size);
3578 }
3579 
3580 /*
3581  * Clear physical address.
3582  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3583  * Ecache or direct-mapped Ecache.
3584  */
3585 void
3586 cpu_clearphys(struct async_flt *aflt)
3587 {
3588 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3589 	int ec_set_size = cpu_ecache_set_size(CPU);
3590 
3591 
3592 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3593 }
3594 
3595 #if defined(CPU_IMP_ECACHE_ASSOC)
3596 /*
3597  * Check for a matching valid line in all the sets.
3598  * If found, return set# + 1. Otherwise return 0.
3599  */
3600 static int
3601 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3602 {
3603 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3604 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3605 	int ec_set_size = cpu_ecache_set_size(CPU);
3606 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3607 	int nway = cpu_ecache_nway();
3608 	int i;
3609 
3610 	for (i = 0; i < nway; i++, ecp++) {
3611 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3612 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3613 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3614 			return (i+1);
3615 	}
3616 	return (0);
3617 }
3618 #endif /* CPU_IMP_ECACHE_ASSOC */
3619 
3620 /*
3621  * Check whether a line in the given logout info matches the specified
3622  * fault address.  If reqval is set then the line must not be Invalid.
3623  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3624  * set to 2 for l2$ or 3 for l3$.
3625  */
3626 static int
3627 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3628 {
3629 	ch_diag_data_t *cdp = data;
3630 	ch_ec_data_t *ecp;
3631 	int totalsize, ec_set_size;
3632 	int i, ways;
3633 	int match = 0;
3634 	int tagvalid;
3635 	uint64_t addr, tagpa;
3636 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3637 
3638 	/*
3639 	 * Check the l2$ logout data
3640 	 */
3641 	if (ispanther) {
3642 		ecp = &cdp->chd_l2_data[0];
3643 		ec_set_size = PN_L2_SET_SIZE;
3644 		ways = PN_L2_NWAYS;
3645 	} else {
3646 		ecp = &cdp->chd_ec_data[0];
3647 		ec_set_size = cpu_ecache_set_size(CPU);
3648 		ways = cpu_ecache_nway();
3649 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3650 	}
3651 	/* remove low order PA bits from fault address not used in PA tag */
3652 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3653 	for (i = 0; i < ways; i++, ecp++) {
3654 		if (ispanther) {
3655 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3656 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3657 		} else {
3658 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3659 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3660 			    ecp->ec_tag);
3661 		}
3662 		if (tagpa == addr && (!reqval || tagvalid)) {
3663 			match = i + 1;
3664 			*level = 2;
3665 			break;
3666 		}
3667 	}
3668 
3669 	if (match || !ispanther)
3670 		return (match);
3671 
3672 	/* For Panther we also check the l3$ */
3673 	ecp = &cdp->chd_ec_data[0];
3674 	ec_set_size = PN_L3_SET_SIZE;
3675 	ways = PN_L3_NWAYS;
3676 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3677 
3678 	for (i = 0; i < ways; i++, ecp++) {
3679 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3680 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3681 			match = i + 1;
3682 			*level = 3;
3683 			break;
3684 		}
3685 	}
3686 
3687 	return (match);
3688 }
3689 
3690 #if defined(CPU_IMP_L1_CACHE_PARITY)
3691 /*
3692  * Record information related to the source of an Dcache Parity Error.
3693  */
3694 static void
3695 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3696 {
3697 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3698 	int index;
3699 
3700 	/*
3701 	 * Since instruction decode cannot be done at high PIL
3702 	 * just examine the entire Dcache to locate the error.
3703 	 */
3704 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3705 		ch_flt->parity_data.dpe.cpl_way = -1;
3706 		ch_flt->parity_data.dpe.cpl_off = -1;
3707 	}
3708 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3709 		cpu_dcache_parity_check(ch_flt, index);
3710 }
3711 
3712 /*
3713  * Check all ways of the Dcache at a specified index for good parity.
3714  */
3715 static void
3716 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3717 {
3718 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3719 	uint64_t parity_bits, pbits, data_word;
3720 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3721 	int way, word, data_byte;
3722 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3723 	ch_dc_data_t tmp_dcp;
3724 
3725 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3726 		/*
3727 		 * Perform diagnostic read.
3728 		 */
3729 		get_dcache_dtag(index + way * dc_set_size,
3730 				(uint64_t *)&tmp_dcp);
3731 
3732 		/*
3733 		 * Check tag for even parity.
3734 		 * Sum of 1 bits (including parity bit) should be even.
3735 		 */
3736 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3737 			/*
3738 			 * If this is the first error log detailed information
3739 			 * about it and check the snoop tag. Otherwise just
3740 			 * record the fact that we found another error.
3741 			 */
3742 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3743 				ch_flt->parity_data.dpe.cpl_way = way;
3744 				ch_flt->parity_data.dpe.cpl_cache =
3745 				    CPU_DC_PARITY;
3746 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3747 
3748 				if (popc64(tmp_dcp.dc_sntag &
3749 						CHP_DCSNTAG_PARMASK) & 1) {
3750 					ch_flt->parity_data.dpe.cpl_tag |=
3751 								CHP_DC_SNTAG;
3752 					ch_flt->parity_data.dpe.cpl_lcnt++;
3753 				}
3754 
3755 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3756 			}
3757 
3758 			ch_flt->parity_data.dpe.cpl_lcnt++;
3759 		}
3760 
3761 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3762 			/*
3763 			 * Panther has more parity bits than the other
3764 			 * processors for covering dcache data and so each
3765 			 * byte of data in each word has its own parity bit.
3766 			 */
3767 			parity_bits = tmp_dcp.dc_pn_data_parity;
3768 			for (word = 0; word < 4; word++) {
3769 				data_word = tmp_dcp.dc_data[word];
3770 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3771 				for (data_byte = 0; data_byte < 8;
3772 				    data_byte++) {
3773 					if (((popc64(data_word &
3774 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3775 					    (pbits & 1)) {
3776 						cpu_record_dc_data_parity(
3777 						ch_flt, dcp, &tmp_dcp, way,
3778 						word);
3779 					}
3780 					pbits >>= 1;
3781 					data_word >>= 8;
3782 				}
3783 				parity_bits >>= 8;
3784 			}
3785 		} else {
3786 			/*
3787 			 * Check data array for even parity.
3788 			 * The 8 parity bits are grouped into 4 pairs each
3789 			 * of which covers a 64-bit word.  The endianness is
3790 			 * reversed -- the low-order parity bits cover the
3791 			 * high-order data words.
3792 			 */
3793 			parity_bits = tmp_dcp.dc_utag >> 8;
3794 			for (word = 0; word < 4; word++) {
3795 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3796 				if ((popc64(tmp_dcp.dc_data[word]) +
3797 				    parity_bits_popc[pbits]) & 1) {
3798 					cpu_record_dc_data_parity(ch_flt, dcp,
3799 					    &tmp_dcp, way, word);
3800 				}
3801 			}
3802 		}
3803 	}
3804 }
3805 
3806 static void
3807 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3808     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3809 {
3810 	/*
3811 	 * If this is the first error log detailed information about it.
3812 	 * Otherwise just record the fact that we found another error.
3813 	 */
3814 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3815 		ch_flt->parity_data.dpe.cpl_way = way;
3816 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3817 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3818 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3819 	}
3820 	ch_flt->parity_data.dpe.cpl_lcnt++;
3821 }
3822 
3823 /*
3824  * Record information related to the source of an Icache Parity Error.
3825  *
3826  * Called with the Icache disabled so any diagnostic accesses are safe.
3827  */
3828 static void
3829 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3830 {
3831 	int	ic_set_size;
3832 	int	ic_linesize;
3833 	int	index;
3834 
3835 	if (CPU_PRIVATE(CPU)) {
3836 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3837 		    CH_ICACHE_NWAY;
3838 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3839 	} else {
3840 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3841 		ic_linesize = icache_linesize;
3842 	}
3843 
3844 	ch_flt->parity_data.ipe.cpl_way = -1;
3845 	ch_flt->parity_data.ipe.cpl_off = -1;
3846 
3847 	for (index = 0; index < ic_set_size; index += ic_linesize)
3848 		cpu_icache_parity_check(ch_flt, index);
3849 }
3850 
3851 /*
3852  * Check all ways of the Icache at a specified index for good parity.
3853  */
3854 static void
3855 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
3856 {
3857 	uint64_t parmask, pn_inst_parity;
3858 	int ic_set_size;
3859 	int ic_linesize;
3860 	int flt_index, way, instr, num_instr;
3861 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3862 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
3863 	ch_ic_data_t tmp_icp;
3864 
3865 	if (CPU_PRIVATE(CPU)) {
3866 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3867 		    CH_ICACHE_NWAY;
3868 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3869 	} else {
3870 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3871 		ic_linesize = icache_linesize;
3872 	}
3873 
3874 	/*
3875 	 * Panther has twice as many instructions per icache line and the
3876 	 * instruction parity bit is in a different location.
3877 	 */
3878 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3879 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
3880 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
3881 	} else {
3882 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
3883 		pn_inst_parity = 0;
3884 	}
3885 
3886 	/*
3887 	 * Index at which we expect to find the parity error.
3888 	 */
3889 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
3890 
3891 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
3892 		/*
3893 		 * Diagnostic reads expect address argument in ASI format.
3894 		 */
3895 		get_icache_dtag(2 * (index + way * ic_set_size),
3896 				(uint64_t *)&tmp_icp);
3897 
3898 		/*
3899 		 * If this is the index in which we expect to find the
3900 		 * error log detailed information about each of the ways.
3901 		 * This information will be displayed later if we can't
3902 		 * determine the exact way in which the error is located.
3903 		 */
3904 		if (flt_index == index)
3905 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
3906 
3907 		/*
3908 		 * Check tag for even parity.
3909 		 * Sum of 1 bits (including parity bit) should be even.
3910 		 */
3911 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
3912 			/*
3913 			 * If this way is the one in which we expected
3914 			 * to find the error record the way and check the
3915 			 * snoop tag. Otherwise just record the fact we
3916 			 * found another error.
3917 			 */
3918 			if (flt_index == index) {
3919 				ch_flt->parity_data.ipe.cpl_way = way;
3920 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
3921 
3922 				if (popc64(tmp_icp.ic_sntag &
3923 						CHP_ICSNTAG_PARMASK) & 1) {
3924 					ch_flt->parity_data.ipe.cpl_tag |=
3925 								CHP_IC_SNTAG;
3926 					ch_flt->parity_data.ipe.cpl_lcnt++;
3927 				}
3928 
3929 			}
3930 			ch_flt->parity_data.ipe.cpl_lcnt++;
3931 			continue;
3932 		}
3933 
3934 		/*
3935 		 * Check instruction data for even parity.
3936 		 * Bits participating in parity differ for PC-relative
3937 		 * versus non-PC-relative instructions.
3938 		 */
3939 		for (instr = 0; instr < num_instr; instr++) {
3940 			parmask = (tmp_icp.ic_data[instr] &
3941 					CH_ICDATA_PRED_ISPCREL) ?
3942 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
3943 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
3944 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
3945 				/*
3946 				 * If this way is the one in which we expected
3947 				 * to find the error record the way and offset.
3948 				 * Otherwise just log the fact we found another
3949 				 * error.
3950 				 */
3951 				if (flt_index == index) {
3952 					ch_flt->parity_data.ipe.cpl_way = way;
3953 					ch_flt->parity_data.ipe.cpl_off =
3954 								instr * 4;
3955 				}
3956 				ch_flt->parity_data.ipe.cpl_lcnt++;
3957 				continue;
3958 			}
3959 		}
3960 	}
3961 }
3962 
3963 /*
3964  * Record information related to the source of an Pcache Parity Error.
3965  */
3966 static void
3967 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
3968 {
3969 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3970 	int index;
3971 
3972 	/*
3973 	 * Since instruction decode cannot be done at high PIL just
3974 	 * examine the entire Pcache to check for any parity errors.
3975 	 */
3976 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3977 		ch_flt->parity_data.dpe.cpl_way = -1;
3978 		ch_flt->parity_data.dpe.cpl_off = -1;
3979 	}
3980 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
3981 		cpu_pcache_parity_check(ch_flt, index);
3982 }
3983 
3984 /*
3985  * Check all ways of the Pcache at a specified index for good parity.
3986  */
3987 static void
3988 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
3989 {
3990 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3991 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
3992 	int way, word, pbit, parity_bits;
3993 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
3994 	ch_pc_data_t tmp_pcp;
3995 
3996 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
3997 		/*
3998 		 * Perform diagnostic read.
3999 		 */
4000 		get_pcache_dtag(index + way * pc_set_size,
4001 				(uint64_t *)&tmp_pcp);
4002 		/*
4003 		 * Check data array for odd parity. There are 8 parity
4004 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4005 		 * of those bits covers exactly 8 bytes of the data
4006 		 * array:
4007 		 *
4008 		 *	parity bit	P$ data bytes covered
4009 		 *	----------	---------------------
4010 		 *	50		63:56
4011 		 *	51		55:48
4012 		 *	52		47:40
4013 		 *	53		39:32
4014 		 *	54		31:24
4015 		 *	55		23:16
4016 		 *	56		15:8
4017 		 *	57		7:0
4018 		 */
4019 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4020 		for (word = 0; word < pc_data_words; word++) {
4021 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4022 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4023 				/*
4024 				 * If this is the first error log detailed
4025 				 * information about it. Otherwise just record
4026 				 * the fact that we found another error.
4027 				 */
4028 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4029 					ch_flt->parity_data.dpe.cpl_way = way;
4030 					ch_flt->parity_data.dpe.cpl_cache =
4031 					    CPU_PC_PARITY;
4032 					ch_flt->parity_data.dpe.cpl_off =
4033 					    word * sizeof (uint64_t);
4034 					bcopy(&tmp_pcp, pcp,
4035 							sizeof (ch_pc_data_t));
4036 				}
4037 				ch_flt->parity_data.dpe.cpl_lcnt++;
4038 			}
4039 		}
4040 	}
4041 }
4042 
4043 
4044 /*
4045  * Add L1 Data cache data to the ereport payload.
4046  */
4047 static void
4048 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4049 {
4050 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4051 	ch_dc_data_t *dcp;
4052 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4053 	uint_t nelem;
4054 	int i, ways_to_check, ways_logged = 0;
4055 
4056 	/*
4057 	 * If this is an D$ fault then there may be multiple
4058 	 * ways captured in the ch_parity_log_t structure.
4059 	 * Otherwise, there will be at most one way captured
4060 	 * in the ch_diag_data_t struct.
4061 	 * Check each way to see if it should be encoded.
4062 	 */
4063 	if (ch_flt->flt_type == CPU_DC_PARITY)
4064 		ways_to_check = CH_DCACHE_NWAY;
4065 	else
4066 		ways_to_check = 1;
4067 	for (i = 0; i < ways_to_check; i++) {
4068 		if (ch_flt->flt_type == CPU_DC_PARITY)
4069 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4070 		else
4071 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4072 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4073 			bcopy(dcp, &dcdata[ways_logged],
4074 				sizeof (ch_dc_data_t));
4075 			ways_logged++;
4076 		}
4077 	}
4078 
4079 	/*
4080 	 * Add the dcache data to the payload.
4081 	 */
4082 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4083 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4084 	if (ways_logged != 0) {
4085 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4086 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4087 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4088 	}
4089 }
4090 
4091 /*
4092  * Add L1 Instruction cache data to the ereport payload.
4093  */
4094 static void
4095 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4096 {
4097 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4098 	ch_ic_data_t *icp;
4099 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4100 	uint_t nelem;
4101 	int i, ways_to_check, ways_logged = 0;
4102 
4103 	/*
4104 	 * If this is an I$ fault then there may be multiple
4105 	 * ways captured in the ch_parity_log_t structure.
4106 	 * Otherwise, there will be at most one way captured
4107 	 * in the ch_diag_data_t struct.
4108 	 * Check each way to see if it should be encoded.
4109 	 */
4110 	if (ch_flt->flt_type == CPU_IC_PARITY)
4111 		ways_to_check = CH_ICACHE_NWAY;
4112 	else
4113 		ways_to_check = 1;
4114 	for (i = 0; i < ways_to_check; i++) {
4115 		if (ch_flt->flt_type == CPU_IC_PARITY)
4116 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4117 		else
4118 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4119 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4120 			bcopy(icp, &icdata[ways_logged],
4121 				sizeof (ch_ic_data_t));
4122 			ways_logged++;
4123 		}
4124 	}
4125 
4126 	/*
4127 	 * Add the icache data to the payload.
4128 	 */
4129 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4130 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4131 	if (ways_logged != 0) {
4132 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4133 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4134 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4135 	}
4136 }
4137 
4138 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4139 
4140 /*
4141  * Add ecache data to payload.
4142  */
4143 static void
4144 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4145 {
4146 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4147 	ch_ec_data_t *ecp;
4148 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4149 	uint_t nelem;
4150 	int i, ways_logged = 0;
4151 
4152 	/*
4153 	 * Check each way to see if it should be encoded
4154 	 * and concatinate it into a temporary buffer.
4155 	 */
4156 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4157 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4158 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4159 			bcopy(ecp, &ecdata[ways_logged],
4160 				sizeof (ch_ec_data_t));
4161 			ways_logged++;
4162 		}
4163 	}
4164 
4165 	/*
4166 	 * Panther CPUs have an additional level of cache and so
4167 	 * what we just collected was the L3 (ecache) and not the
4168 	 * L2 cache.
4169 	 */
4170 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4171 		/*
4172 		 * Add the L3 (ecache) data to the payload.
4173 		 */
4174 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4175 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4176 		if (ways_logged != 0) {
4177 			nelem = sizeof (ch_ec_data_t) /
4178 			    sizeof (uint64_t) * ways_logged;
4179 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4180 			    DATA_TYPE_UINT64_ARRAY, nelem,
4181 			    (uint64_t *)ecdata, NULL);
4182 		}
4183 
4184 		/*
4185 		 * Now collect the L2 cache.
4186 		 */
4187 		ways_logged = 0;
4188 		for (i = 0; i < PN_L2_NWAYS; i++) {
4189 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4190 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4191 				bcopy(ecp, &ecdata[ways_logged],
4192 				    sizeof (ch_ec_data_t));
4193 				ways_logged++;
4194 			}
4195 		}
4196 	}
4197 
4198 	/*
4199 	 * Add the L2 cache data to the payload.
4200 	 */
4201 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4202 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4203 	if (ways_logged != 0) {
4204 		nelem = sizeof (ch_ec_data_t) /
4205 			sizeof (uint64_t) * ways_logged;
4206 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4207 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4208 	}
4209 }
4210 
4211 /*
4212  * Encode the data saved in the ch_async_flt_t struct into
4213  * the FM ereport payload.
4214  */
4215 static void
4216 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4217 	nvlist_t *resource, int *afar_status, int *synd_status)
4218 {
4219 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4220 	*synd_status = AFLT_STAT_INVALID;
4221 	*afar_status = AFLT_STAT_INVALID;
4222 
4223 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4224 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4225 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4226 	}
4227 
4228 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4229 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4230 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4231 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4232 	}
4233 
4234 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4235 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4236 		    ch_flt->flt_bit);
4237 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4238 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4239 	}
4240 
4241 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4242 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4243 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4244 	}
4245 
4246 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4247 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4248 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4249 	}
4250 
4251 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4252 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4253 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4254 	}
4255 
4256 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4257 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4258 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4259 	}
4260 
4261 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4262 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4263 		    DATA_TYPE_BOOLEAN_VALUE,
4264 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4265 	}
4266 
4267 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4268 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4269 		    DATA_TYPE_BOOLEAN_VALUE,
4270 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4271 	}
4272 
4273 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4274 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4275 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4276 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4277 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4278 	}
4279 
4280 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4281 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4282 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4283 	}
4284 
4285 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4286 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4287 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4288 	}
4289 
4290 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4291 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4292 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4293 	}
4294 
4295 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4296 		cpu_payload_add_ecache(aflt, payload);
4297 
4298 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4299 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4300 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4301 	}
4302 
4303 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4304 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4305 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4306 	}
4307 
4308 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4309 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4310 		    DATA_TYPE_UINT32_ARRAY, 16,
4311 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4312 	}
4313 
4314 #if defined(CPU_IMP_L1_CACHE_PARITY)
4315 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4316 		cpu_payload_add_dcache(aflt, payload);
4317 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4318 		cpu_payload_add_icache(aflt, payload);
4319 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4320 
4321 #if defined(CHEETAH_PLUS)
4322 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4323 		cpu_payload_add_pcache(aflt, payload);
4324 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4325 		cpu_payload_add_tlb(aflt, payload);
4326 #endif	/* CHEETAH_PLUS */
4327 	/*
4328 	 * Create the FMRI that goes into the payload
4329 	 * and contains the unum info if necessary.
4330 	 */
4331 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4332 	    (*afar_status == AFLT_STAT_VALID)) {
4333 		char unum[UNUM_NAMLEN];
4334 		int len;
4335 
4336 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4337 		    UNUM_NAMLEN, &len) == 0) {
4338 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4339 			    NULL, unum, NULL);
4340 			fm_payload_set(payload,
4341 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4342 			    DATA_TYPE_NVLIST, resource, NULL);
4343 		}
4344 	}
4345 }
4346 
4347 /*
4348  * Initialize the way info if necessary.
4349  */
4350 void
4351 cpu_ereport_init(struct async_flt *aflt)
4352 {
4353 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4354 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4355 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4356 	int i;
4357 
4358 	/*
4359 	 * Initialize the info in the CPU logout structure.
4360 	 * The I$/D$ way information is not initialized here
4361 	 * since it is captured in the logout assembly code.
4362 	 */
4363 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4364 		(ecp + i)->ec_way = i;
4365 
4366 	for (i = 0; i < PN_L2_NWAYS; i++)
4367 		(l2p + i)->ec_way = i;
4368 }
4369 
4370 /*
4371  * Returns whether fault address is valid for this error bit and
4372  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4373  */
4374 int
4375 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4376 {
4377 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4378 
4379 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4380 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4381 	    AFLT_STAT_VALID &&
4382 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4383 }
4384 
4385 static void
4386 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4387 {
4388 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4389 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4390 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4391 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4392 #if defined(CPU_IMP_ECACHE_ASSOC)
4393 	int i, nway;
4394 #endif /* CPU_IMP_ECACHE_ASSOC */
4395 
4396 	/*
4397 	 * Check if the CPU log out captured was valid.
4398 	 */
4399 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4400 	    ch_flt->flt_data_incomplete)
4401 		return;
4402 
4403 #if defined(CPU_IMP_ECACHE_ASSOC)
4404 	nway = cpu_ecache_nway();
4405 	i =  cpu_ecache_line_valid(ch_flt);
4406 	if (i == 0 || i > nway) {
4407 		for (i = 0; i < nway; i++)
4408 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4409 	} else
4410 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4411 #else /* CPU_IMP_ECACHE_ASSOC */
4412 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4413 #endif /* CPU_IMP_ECACHE_ASSOC */
4414 
4415 #if defined(CHEETAH_PLUS)
4416 	pn_cpu_log_diag_l2_info(ch_flt);
4417 #endif /* CHEETAH_PLUS */
4418 
4419 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4420 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4421 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4422 	}
4423 
4424 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4425 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4426 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4427 		else
4428 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4429 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4430 	}
4431 }
4432 
4433 /*
4434  * Cheetah ECC calculation.
4435  *
4436  * We only need to do the calculation on the data bits and can ignore check
4437  * bit and Mtag bit terms in the calculation.
4438  */
4439 static uint64_t ch_ecc_table[9][2] = {
4440 	/*
4441 	 * low order 64-bits   high-order 64-bits
4442 	 */
4443 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4444 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4445 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4446 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4447 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4448 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4449 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4450 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4451 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4452 };
4453 
4454 /*
4455  * 64-bit population count, use well-known popcnt trick.
4456  * We could use the UltraSPARC V9 POPC instruction, but some
4457  * CPUs including Cheetahplus and Jaguar do not support that
4458  * instruction.
4459  */
4460 int
4461 popc64(uint64_t val)
4462 {
4463 	int cnt;
4464 
4465 	for (cnt = 0; val != 0; val &= val - 1)
4466 		cnt++;
4467 	return (cnt);
4468 }
4469 
4470 /*
4471  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4472  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4473  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4474  * instead of doing all the xor's.
4475  */
4476 uint32_t
4477 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4478 {
4479 	int bitno, s;
4480 	int synd = 0;
4481 
4482 	for (bitno = 0; bitno < 9; bitno++) {
4483 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4484 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4485 		synd |= (s << bitno);
4486 	}
4487 	return (synd);
4488 
4489 }
4490 
4491 /*
4492  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4493  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4494  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4495  */
4496 static void
4497 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4498     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4499 {
4500 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4501 
4502 	if (reason &&
4503 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4504 		(void) strcat(reason, eccp->ec_reason);
4505 	}
4506 
4507 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4508 	ch_flt->flt_type = eccp->ec_flt_type;
4509 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4510 		ch_flt->flt_diag_data = *cdp;
4511 	else
4512 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4513 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4514 
4515 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4516 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4517 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4518 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4519 	else
4520 		aflt->flt_synd = 0;
4521 
4522 	aflt->flt_payload = eccp->ec_err_payload;
4523 
4524 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4525 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4526 		cpu_errorq_dispatch(eccp->ec_err_class,
4527 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4528 		    aflt->flt_panic);
4529 	else
4530 		cpu_errorq_dispatch(eccp->ec_err_class,
4531 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4532 		    aflt->flt_panic);
4533 }
4534 
4535 /*
4536  * Queue events on async event queue one event per error bit.  First we
4537  * queue the events that we "expect" for the given trap, then we queue events
4538  * that we may not expect.  Return number of events queued.
4539  */
4540 int
4541 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4542     ch_cpu_logout_t *clop)
4543 {
4544 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4545 	ecc_type_to_info_t *eccp;
4546 	int nevents = 0;
4547 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4548 #if defined(CHEETAH_PLUS)
4549 	uint64_t orig_t_afsr_errs;
4550 #endif
4551 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4552 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4553 	ch_diag_data_t *cdp = NULL;
4554 
4555 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4556 
4557 #if defined(CHEETAH_PLUS)
4558 	orig_t_afsr_errs = t_afsr_errs;
4559 
4560 	/*
4561 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4562 	 */
4563 	if (clop != NULL) {
4564 		/*
4565 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4566 		 * flt_addr and flt_stat fields will be reset to the primaries
4567 		 * below, but the sdw_addr and sdw_stat will stay as the
4568 		 * secondaries.
4569 		 */
4570 		cdp = &clop->clo_sdw_data;
4571 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4572 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4573 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4574 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4575 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4576 
4577 		/*
4578 		 * If the primary and shadow AFSR differ, tag the shadow as
4579 		 * the first fault.
4580 		 */
4581 		if ((primary_afar != cdp->chd_afar) ||
4582 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4583 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4584 		}
4585 
4586 		/*
4587 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4588 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4589 		 * is expected to be zero for those CPUs which do not have
4590 		 * an AFSR_EXT register.
4591 		 */
4592 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4593 			if ((eccp->ec_afsr_bit &
4594 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4595 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4596 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4597 				cdp = NULL;
4598 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4599 				nevents++;
4600 			}
4601 		}
4602 
4603 		/*
4604 		 * If the ME bit is on in the primary AFSR turn all the
4605 		 * error bits on again that may set the ME bit to make
4606 		 * sure we see the ME AFSR error logs.
4607 		 */
4608 		if ((primary_afsr & C_AFSR_ME) != 0)
4609 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4610 	}
4611 #endif	/* CHEETAH_PLUS */
4612 
4613 	if (clop != NULL)
4614 		cdp = &clop->clo_data;
4615 
4616 	/*
4617 	 * Queue expected errors, error bit and fault type must match
4618 	 * in the ecc_type_to_info table.
4619 	 */
4620 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4621 	    eccp++) {
4622 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4623 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4624 #if defined(SERRANO)
4625 			/*
4626 			 * For FRC/FRU errors on Serrano the afar2 captures
4627 			 * the address and the associated data is
4628 			 * in the shadow logout area.
4629 			 */
4630 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4631 				if (clop != NULL)
4632 					cdp = &clop->clo_sdw_data;
4633 				aflt->flt_addr = ch_flt->afar2;
4634 			} else {
4635 				if (clop != NULL)
4636 					cdp = &clop->clo_data;
4637 				aflt->flt_addr = primary_afar;
4638 			}
4639 #else	/* SERRANO */
4640 			aflt->flt_addr = primary_afar;
4641 #endif	/* SERRANO */
4642 			aflt->flt_stat = primary_afsr;
4643 			ch_flt->afsr_ext = primary_afsr_ext;
4644 			ch_flt->afsr_errs = primary_afsr_errs;
4645 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4646 			cdp = NULL;
4647 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4648 			nevents++;
4649 		}
4650 	}
4651 
4652 	/*
4653 	 * Queue unexpected errors, error bit only match.
4654 	 */
4655 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4656 	    eccp++) {
4657 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4658 #if defined(SERRANO)
4659 			/*
4660 			 * For FRC/FRU errors on Serrano the afar2 captures
4661 			 * the address and the associated data is
4662 			 * in the shadow logout area.
4663 			 */
4664 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4665 				if (clop != NULL)
4666 					cdp = &clop->clo_sdw_data;
4667 				aflt->flt_addr = ch_flt->afar2;
4668 			} else {
4669 				if (clop != NULL)
4670 					cdp = &clop->clo_data;
4671 				aflt->flt_addr = primary_afar;
4672 			}
4673 #else	/* SERRANO */
4674 			aflt->flt_addr = primary_afar;
4675 #endif	/* SERRANO */
4676 			aflt->flt_stat = primary_afsr;
4677 			ch_flt->afsr_ext = primary_afsr_ext;
4678 			ch_flt->afsr_errs = primary_afsr_errs;
4679 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4680 			cdp = NULL;
4681 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4682 			nevents++;
4683 		}
4684 	}
4685 	return (nevents);
4686 }
4687 
4688 /*
4689  * Return trap type number.
4690  */
4691 uint8_t
4692 flt_to_trap_type(struct async_flt *aflt)
4693 {
4694 	if (aflt->flt_status & ECC_I_TRAP)
4695 		return (TRAP_TYPE_ECC_I);
4696 	if (aflt->flt_status & ECC_D_TRAP)
4697 		return (TRAP_TYPE_ECC_D);
4698 	if (aflt->flt_status & ECC_F_TRAP)
4699 		return (TRAP_TYPE_ECC_F);
4700 	if (aflt->flt_status & ECC_C_TRAP)
4701 		return (TRAP_TYPE_ECC_C);
4702 	if (aflt->flt_status & ECC_DP_TRAP)
4703 		return (TRAP_TYPE_ECC_DP);
4704 	if (aflt->flt_status & ECC_IP_TRAP)
4705 		return (TRAP_TYPE_ECC_IP);
4706 	if (aflt->flt_status & ECC_ITLB_TRAP)
4707 		return (TRAP_TYPE_ECC_ITLB);
4708 	if (aflt->flt_status & ECC_DTLB_TRAP)
4709 		return (TRAP_TYPE_ECC_DTLB);
4710 	return (TRAP_TYPE_UNKNOWN);
4711 }
4712 
4713 /*
4714  * Decide an error type based on detector and leaky/partner tests.
4715  * The following array is used for quick translation - it must
4716  * stay in sync with ce_dispact_t.
4717  */
4718 
4719 static char *cetypes[] = {
4720 	CE_DISP_DESC_U,
4721 	CE_DISP_DESC_I,
4722 	CE_DISP_DESC_PP,
4723 	CE_DISP_DESC_P,
4724 	CE_DISP_DESC_L,
4725 	CE_DISP_DESC_PS,
4726 	CE_DISP_DESC_S
4727 };
4728 
4729 char *
4730 flt_to_error_type(struct async_flt *aflt)
4731 {
4732 	ce_dispact_t dispact, disp;
4733 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4734 
4735 	/*
4736 	 * The memory payload bundle is shared by some events that do
4737 	 * not perform any classification.  For those flt_disp will be
4738 	 * 0 and we will return "unknown".
4739 	 */
4740 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4741 		return (cetypes[CE_DISP_UNKNOWN]);
4742 
4743 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4744 
4745 	/*
4746 	 * It is also possible that no scrub/classification was performed
4747 	 * by the detector, for instance where a disrupting error logged
4748 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4749 	 */
4750 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4751 		return (cetypes[CE_DISP_UNKNOWN]);
4752 
4753 	/*
4754 	 * Lookup type in initial classification/action table
4755 	 */
4756 	dispact = CE_DISPACT(ce_disp_table,
4757 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4758 	    CE_XDIAG_STATE(dtcrinfo),
4759 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4760 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4761 
4762 	/*
4763 	 * A bad lookup is not something to panic production systems for.
4764 	 */
4765 	ASSERT(dispact != CE_DISP_BAD);
4766 	if (dispact == CE_DISP_BAD)
4767 		return (cetypes[CE_DISP_UNKNOWN]);
4768 
4769 	disp = CE_DISP(dispact);
4770 
4771 	switch (disp) {
4772 	case CE_DISP_UNKNOWN:
4773 	case CE_DISP_INTERMITTENT:
4774 		break;
4775 
4776 	case CE_DISP_POSS_PERS:
4777 		/*
4778 		 * "Possible persistent" errors to which we have applied a valid
4779 		 * leaky test can be separated into "persistent" or "leaky".
4780 		 */
4781 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4782 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4783 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4784 			    CE_XDIAG_CE2SEEN(lkyinfo))
4785 				disp = CE_DISP_LEAKY;
4786 			else
4787 				disp = CE_DISP_PERS;
4788 		}
4789 		break;
4790 
4791 	case CE_DISP_POSS_STICKY:
4792 		/*
4793 		 * Promote "possible sticky" results that have been
4794 		 * confirmed by a partner test to "sticky".  Unconfirmed
4795 		 * "possible sticky" events are left at that status - we do not
4796 		 * guess at any bad reader/writer etc status here.
4797 		 */
4798 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4799 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4800 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4801 			disp = CE_DISP_STICKY;
4802 
4803 		/*
4804 		 * Promote "possible sticky" results on a uniprocessor
4805 		 * to "sticky"
4806 		 */
4807 		if (disp == CE_DISP_POSS_STICKY &&
4808 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4809 			disp = CE_DISP_STICKY;
4810 		break;
4811 
4812 	default:
4813 		disp = CE_DISP_UNKNOWN;
4814 		break;
4815 	}
4816 
4817 	return (cetypes[disp]);
4818 }
4819 
4820 /*
4821  * Given the entire afsr, the specific bit to check and a prioritized list of
4822  * error bits, determine the validity of the various overwrite priority
4823  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4824  * different overwrite priorities.
4825  *
4826  * Given a specific afsr error bit and the entire afsr, there are three cases:
4827  *   INVALID:	The specified bit is lower overwrite priority than some other
4828  *		error bit which is on in the afsr (or IVU/IVC).
4829  *   VALID:	The specified bit is higher priority than all other error bits
4830  *		which are on in the afsr.
4831  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
4832  *		bit is on in the afsr.
4833  */
4834 int
4835 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
4836 {
4837 	uint64_t afsr_ow;
4838 
4839 	while ((afsr_ow = *ow_bits++) != 0) {
4840 		/*
4841 		 * If bit is in the priority class, check to see if another
4842 		 * bit in the same class is on => ambiguous.  Otherwise,
4843 		 * the value is valid.  If the bit is not on at this priority
4844 		 * class, but a higher priority bit is on, then the value is
4845 		 * invalid.
4846 		 */
4847 		if (afsr_ow & afsr_bit) {
4848 			/*
4849 			 * If equal pri bit is on, ambiguous.
4850 			 */
4851 			if (afsr & (afsr_ow & ~afsr_bit))
4852 				return (AFLT_STAT_AMBIGUOUS);
4853 			return (AFLT_STAT_VALID);
4854 		} else if (afsr & afsr_ow)
4855 			break;
4856 	}
4857 
4858 	/*
4859 	 * We didn't find a match or a higher priority bit was on.  Not
4860 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
4861 	 */
4862 	return (AFLT_STAT_INVALID);
4863 }
4864 
4865 static int
4866 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
4867 {
4868 #if defined(SERRANO)
4869 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
4870 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
4871 	else
4872 #endif	/* SERRANO */
4873 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
4874 }
4875 
4876 static int
4877 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
4878 {
4879 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
4880 }
4881 
4882 static int
4883 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
4884 {
4885 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
4886 }
4887 
4888 static int
4889 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
4890 {
4891 #ifdef lint
4892 	cpuid = cpuid;
4893 #endif
4894 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
4895 		return (afsr_to_msynd_status(afsr, afsr_bit));
4896 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
4897 #if defined(CHEETAH_PLUS)
4898 		/*
4899 		 * The E_SYND overwrite policy is slightly different
4900 		 * for Panther CPUs.
4901 		 */
4902 		if (IS_PANTHER(cpunodes[cpuid].implementation))
4903 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
4904 		else
4905 			return (afsr_to_esynd_status(afsr, afsr_bit));
4906 #else /* CHEETAH_PLUS */
4907 		return (afsr_to_esynd_status(afsr, afsr_bit));
4908 #endif /* CHEETAH_PLUS */
4909 	} else {
4910 		return (AFLT_STAT_INVALID);
4911 	}
4912 }
4913 
4914 /*
4915  * Slave CPU stick synchronization.
4916  */
4917 void
4918 sticksync_slave(void)
4919 {
4920 	int 		i;
4921 	int		tries = 0;
4922 	int64_t		tskew;
4923 	int64_t		av_tskew;
4924 
4925 	kpreempt_disable();
4926 	/* wait for the master side */
4927 	while (stick_sync_cmd != SLAVE_START)
4928 		;
4929 	/*
4930 	 * Synchronization should only take a few tries at most. But in the
4931 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
4932 	 * without it's stick synchronized wouldn't be a good citizen.
4933 	 */
4934 	while (slave_done == 0) {
4935 		/*
4936 		 * Time skew calculation.
4937 		 */
4938 		av_tskew = tskew = 0;
4939 
4940 		for (i = 0; i < stick_iter; i++) {
4941 			/* make location hot */
4942 			timestamp[EV_A_START] = 0;
4943 			stick_timestamp(&timestamp[EV_A_START]);
4944 
4945 			/* tell the master we're ready */
4946 			stick_sync_cmd = MASTER_START;
4947 
4948 			/* and wait */
4949 			while (stick_sync_cmd != SLAVE_CONT)
4950 				;
4951 			/* Event B end */
4952 			stick_timestamp(&timestamp[EV_B_END]);
4953 
4954 			/* calculate time skew */
4955 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
4956 				- (timestamp[EV_A_END] -
4957 				timestamp[EV_A_START])) / 2;
4958 
4959 			/* keep running count */
4960 			av_tskew += tskew;
4961 		} /* for */
4962 
4963 		/*
4964 		 * Adjust stick for time skew if not within the max allowed;
4965 		 * otherwise we're all done.
4966 		 */
4967 		if (stick_iter != 0)
4968 			av_tskew = av_tskew/stick_iter;
4969 		if (ABS(av_tskew) > stick_tsk) {
4970 			/*
4971 			 * If the skew is 1 (the slave's STICK register
4972 			 * is 1 STICK ahead of the master's), stick_adj
4973 			 * could fail to adjust the slave's STICK register
4974 			 * if the STICK read on the slave happens to
4975 			 * align with the increment of the STICK.
4976 			 * Therefore, we increment the skew to 2.
4977 			 */
4978 			if (av_tskew == 1)
4979 				av_tskew++;
4980 			stick_adj(-av_tskew);
4981 		} else
4982 			slave_done = 1;
4983 #ifdef DEBUG
4984 		if (tries < DSYNC_ATTEMPTS)
4985 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
4986 				av_tskew;
4987 		++tries;
4988 #endif /* DEBUG */
4989 #ifdef lint
4990 		tries = tries;
4991 #endif
4992 
4993 	} /* while */
4994 
4995 	/* allow the master to finish */
4996 	stick_sync_cmd = EVENT_NULL;
4997 	kpreempt_enable();
4998 }
4999 
5000 /*
5001  * Master CPU side of stick synchronization.
5002  *  - timestamp end of Event A
5003  *  - timestamp beginning of Event B
5004  */
5005 void
5006 sticksync_master(void)
5007 {
5008 	int		i;
5009 
5010 	kpreempt_disable();
5011 	/* tell the slave we've started */
5012 	slave_done = 0;
5013 	stick_sync_cmd = SLAVE_START;
5014 
5015 	while (slave_done == 0) {
5016 		for (i = 0; i < stick_iter; i++) {
5017 			/* wait for the slave */
5018 			while (stick_sync_cmd != MASTER_START)
5019 				;
5020 			/* Event A end */
5021 			stick_timestamp(&timestamp[EV_A_END]);
5022 
5023 			/* make location hot */
5024 			timestamp[EV_B_START] = 0;
5025 			stick_timestamp(&timestamp[EV_B_START]);
5026 
5027 			/* tell the slave to continue */
5028 			stick_sync_cmd = SLAVE_CONT;
5029 		} /* for */
5030 
5031 		/* wait while slave calculates time skew */
5032 		while (stick_sync_cmd == SLAVE_CONT)
5033 			;
5034 	} /* while */
5035 	kpreempt_enable();
5036 }
5037 
5038 /*
5039  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5040  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5041  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5042  * panic idle.
5043  */
5044 /*ARGSUSED*/
5045 void
5046 cpu_check_allcpus(struct async_flt *aflt)
5047 {}
5048 
5049 struct kmem_cache *ch_private_cache;
5050 
5051 /*
5052  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5053  * deallocate the scrubber data structures and cpu_private data structure.
5054  */
5055 void
5056 cpu_uninit_private(struct cpu *cp)
5057 {
5058 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5059 
5060 	ASSERT(chprp);
5061 	cpu_uninit_ecache_scrub_dr(cp);
5062 	CPU_PRIVATE(cp) = NULL;
5063 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5064 	kmem_cache_free(ch_private_cache, chprp);
5065 	cmp_delete_cpu(cp->cpu_id);
5066 
5067 }
5068 
5069 /*
5070  * Cheetah Cache Scrubbing
5071  *
5072  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5073  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5074  * protected by either parity or ECC.
5075  *
5076  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5077  * cache per second). Due to the the specifics of how the I$ control
5078  * logic works with respect to the ASI used to scrub I$ lines, the entire
5079  * I$ is scanned at once.
5080  */
5081 
5082 /*
5083  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5084  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5085  * on a running system.
5086  */
5087 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5088 
5089 /*
5090  * The following are the PIL levels that the softints/cross traps will fire at.
5091  */
5092 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5093 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5094 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5095 
5096 #if defined(JALAPENO)
5097 
5098 /*
5099  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5100  * on Jalapeno.
5101  */
5102 int ecache_scrub_enable = 0;
5103 
5104 #else	/* JALAPENO */
5105 
5106 /*
5107  * With all other cpu types, E$ scrubbing is on by default
5108  */
5109 int ecache_scrub_enable = 1;
5110 
5111 #endif	/* JALAPENO */
5112 
5113 
5114 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5115 
5116 /*
5117  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5118  * is disabled by default on non-Cheetah systems
5119  */
5120 int icache_scrub_enable = 0;
5121 
5122 /*
5123  * Tuneables specifying the scrub calls per second and the scan rate
5124  * for each cache
5125  *
5126  * The cyclic times are set during boot based on the following values.
5127  * Changing these values in mdb after this time will have no effect.  If
5128  * a different value is desired, it must be set in /etc/system before a
5129  * reboot.
5130  */
5131 int ecache_calls_a_sec = 1;
5132 int dcache_calls_a_sec = 2;
5133 int icache_calls_a_sec = 2;
5134 
5135 int ecache_scan_rate_idle = 1;
5136 int ecache_scan_rate_busy = 1;
5137 int dcache_scan_rate_idle = 1;
5138 int dcache_scan_rate_busy = 1;
5139 int icache_scan_rate_idle = 1;
5140 int icache_scan_rate_busy = 1;
5141 
5142 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5143 
5144 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5145 
5146 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5147 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5148 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5149 
5150 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5151 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5152 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5153 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5154 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5155 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5156 
5157 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5158 
5159 /*
5160  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5161  * increment the outstanding request counter and schedule a softint to run
5162  * the scrubber.
5163  */
5164 extern xcfunc_t cache_scrubreq_tl1;
5165 
5166 /*
5167  * These are the softint functions for each cache scrubber
5168  */
5169 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5170 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5171 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5172 
5173 /*
5174  * The cache scrub info table contains cache specific information
5175  * and allows for some of the scrub code to be table driven, reducing
5176  * duplication of cache similar code.
5177  *
5178  * This table keeps a copy of the value in the calls per second variable
5179  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5180  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5181  * mdb in a misguided attempt to disable the scrubber).
5182  */
5183 struct scrub_info {
5184 	int		*csi_enable;	/* scrubber enable flag */
5185 	int		csi_freq;	/* scrubber calls per second */
5186 	int		csi_index;	/* index to chsm_outstanding[] */
5187 	uint_t		csi_inum;	/* scrubber interrupt number */
5188 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5189 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5190 	char		csi_name[3];	/* cache name for this scrub entry */
5191 } cache_scrub_info[] = {
5192 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5193 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5194 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5195 };
5196 
5197 /*
5198  * If scrubbing is enabled, increment the outstanding request counter.  If it
5199  * is 1 (meaning there were no previous requests outstanding), call
5200  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5201  * a self trap.
5202  */
5203 static void
5204 do_scrub(struct scrub_info *csi)
5205 {
5206 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5207 	int index = csi->csi_index;
5208 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5209 
5210 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5211 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5212 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5213 			    csi->csi_inum, 0);
5214 		}
5215 	}
5216 }
5217 
5218 /*
5219  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5220  * cross-trap the offline cpus.
5221  */
5222 static void
5223 do_scrub_offline(struct scrub_info *csi)
5224 {
5225 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5226 
5227 	if (CPUSET_ISNULL(cpu_offline_set)) {
5228 		/*
5229 		 * No offline cpus - nothing to do
5230 		 */
5231 		return;
5232 	}
5233 
5234 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5235 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5236 		    csi->csi_index);
5237 	}
5238 }
5239 
5240 /*
5241  * This is the initial setup for the scrubber cyclics - it sets the
5242  * interrupt level, frequency, and function to call.
5243  */
5244 /*ARGSUSED*/
5245 static void
5246 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5247     cyc_time_t *when)
5248 {
5249 	struct scrub_info *csi = (struct scrub_info *)arg;
5250 
5251 	ASSERT(csi != NULL);
5252 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5253 	hdlr->cyh_level = CY_LOW_LEVEL;
5254 	hdlr->cyh_arg = arg;
5255 
5256 	when->cyt_when = 0;	/* Start immediately */
5257 	when->cyt_interval = NANOSEC / csi->csi_freq;
5258 }
5259 
5260 /*
5261  * Initialization for cache scrubbing.
5262  * This routine is called AFTER all cpus have had cpu_init_private called
5263  * to initialize their private data areas.
5264  */
5265 void
5266 cpu_init_cache_scrub(void)
5267 {
5268 	int i;
5269 	struct scrub_info *csi;
5270 	cyc_omni_handler_t omni_hdlr;
5271 	cyc_handler_t offline_hdlr;
5272 	cyc_time_t when;
5273 
5274 	/*
5275 	 * save away the maximum number of lines for the D$
5276 	 */
5277 	dcache_nlines = dcache_size / dcache_linesize;
5278 
5279 	/*
5280 	 * register the softints for the cache scrubbing
5281 	 */
5282 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5283 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5284 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5285 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5286 
5287 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5288 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5289 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5290 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5291 
5292 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5293 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5294 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5295 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5296 
5297 	/*
5298 	 * start the scrubbing for all the caches
5299 	 */
5300 	mutex_enter(&cpu_lock);
5301 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5302 
5303 		csi = &cache_scrub_info[i];
5304 
5305 		if (!(*csi->csi_enable))
5306 			continue;
5307 
5308 		/*
5309 		 * force the following to be true:
5310 		 *	1 <= calls_a_sec <= hz
5311 		 */
5312 		if (csi->csi_freq > hz) {
5313 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5314 				"(%d); resetting to hz (%d)", csi->csi_name,
5315 				csi->csi_freq, hz);
5316 			csi->csi_freq = hz;
5317 		} else if (csi->csi_freq < 1) {
5318 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5319 				"(%d); resetting to 1", csi->csi_name,
5320 				csi->csi_freq);
5321 			csi->csi_freq = 1;
5322 		}
5323 
5324 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5325 		omni_hdlr.cyo_offline = NULL;
5326 		omni_hdlr.cyo_arg = (void *)csi;
5327 
5328 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5329 		offline_hdlr.cyh_arg = (void *)csi;
5330 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5331 
5332 		when.cyt_when = 0;	/* Start immediately */
5333 		when.cyt_interval = NANOSEC / csi->csi_freq;
5334 
5335 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5336 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5337 	}
5338 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5339 	mutex_exit(&cpu_lock);
5340 }
5341 
5342 /*
5343  * Indicate that the specified cpu is idle.
5344  */
5345 void
5346 cpu_idle_ecache_scrub(struct cpu *cp)
5347 {
5348 	if (CPU_PRIVATE(cp) != NULL) {
5349 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5350 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5351 	}
5352 }
5353 
5354 /*
5355  * Indicate that the specified cpu is busy.
5356  */
5357 void
5358 cpu_busy_ecache_scrub(struct cpu *cp)
5359 {
5360 	if (CPU_PRIVATE(cp) != NULL) {
5361 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5362 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5363 	}
5364 }
5365 
5366 /*
5367  * Initialization for cache scrubbing for the specified cpu.
5368  */
5369 void
5370 cpu_init_ecache_scrub_dr(struct cpu *cp)
5371 {
5372 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5373 	int cpuid = cp->cpu_id;
5374 
5375 	/* initialize the number of lines in the caches */
5376 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5377 	    cpunodes[cpuid].ecache_linesize;
5378 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5379 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5380 
5381 	/*
5382 	 * do_scrub() and do_scrub_offline() check both the global
5383 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5384 	 * check this value before scrubbing.  Currently, we use it to
5385 	 * disable the E$ scrubber on multi-core cpus or while running at
5386 	 * slowed speed.  For now, just turn everything on and allow
5387 	 * cpu_init_private() to change it if necessary.
5388 	 */
5389 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5390 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5391 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5392 
5393 	cpu_busy_ecache_scrub(cp);
5394 }
5395 
5396 /*
5397  * Un-initialization for cache scrubbing for the specified cpu.
5398  */
5399 static void
5400 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5401 {
5402 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5403 
5404 	/*
5405 	 * un-initialize bookkeeping for cache scrubbing
5406 	 */
5407 	bzero(csmp, sizeof (ch_scrub_misc_t));
5408 
5409 	cpu_idle_ecache_scrub(cp);
5410 }
5411 
5412 /*
5413  * Called periodically on each CPU to scrub the D$.
5414  */
5415 static void
5416 scrub_dcache(int how_many)
5417 {
5418 	int i;
5419 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5420 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5421 
5422 	/*
5423 	 * scrub the desired number of lines
5424 	 */
5425 	for (i = 0; i < how_many; i++) {
5426 		/*
5427 		 * scrub a D$ line
5428 		 */
5429 		dcache_inval_line(index);
5430 
5431 		/*
5432 		 * calculate the next D$ line to scrub, assumes
5433 		 * that dcache_nlines is a power of 2
5434 		 */
5435 		index = (index + 1) & (dcache_nlines - 1);
5436 	}
5437 
5438 	/*
5439 	 * set the scrub index for the next visit
5440 	 */
5441 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5442 }
5443 
5444 /*
5445  * Handler for D$ scrub inum softint. Call scrub_dcache until
5446  * we decrement the outstanding request count to zero.
5447  */
5448 /*ARGSUSED*/
5449 static uint_t
5450 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5451 {
5452 	int i;
5453 	int how_many;
5454 	int outstanding;
5455 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5456 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5457 	struct scrub_info *csi = (struct scrub_info *)arg1;
5458 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5459 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5460 
5461 	/*
5462 	 * The scan rates are expressed in units of tenths of a
5463 	 * percent.  A scan rate of 1000 (100%) means the whole
5464 	 * cache is scanned every second.
5465 	 */
5466 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5467 
5468 	do {
5469 		outstanding = *countp;
5470 		for (i = 0; i < outstanding; i++) {
5471 			scrub_dcache(how_many);
5472 		}
5473 	} while (atomic_add_32_nv(countp, -outstanding));
5474 
5475 	return (DDI_INTR_CLAIMED);
5476 }
5477 
5478 /*
5479  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5480  * by invalidating lines. Due to the characteristics of the ASI which
5481  * is used to invalidate an I$ line, the entire I$ must be invalidated
5482  * vs. an individual I$ line.
5483  */
5484 static void
5485 scrub_icache(int how_many)
5486 {
5487 	int i;
5488 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5489 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5490 	int icache_nlines = csmp->chsm_icache_nlines;
5491 
5492 	/*
5493 	 * scrub the desired number of lines
5494 	 */
5495 	for (i = 0; i < how_many; i++) {
5496 		/*
5497 		 * since the entire I$ must be scrubbed at once,
5498 		 * wait until the index wraps to zero to invalidate
5499 		 * the entire I$
5500 		 */
5501 		if (index == 0) {
5502 			icache_inval_all();
5503 		}
5504 
5505 		/*
5506 		 * calculate the next I$ line to scrub, assumes
5507 		 * that chsm_icache_nlines is a power of 2
5508 		 */
5509 		index = (index + 1) & (icache_nlines - 1);
5510 	}
5511 
5512 	/*
5513 	 * set the scrub index for the next visit
5514 	 */
5515 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5516 }
5517 
5518 /*
5519  * Handler for I$ scrub inum softint. Call scrub_icache until
5520  * we decrement the outstanding request count to zero.
5521  */
5522 /*ARGSUSED*/
5523 static uint_t
5524 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5525 {
5526 	int i;
5527 	int how_many;
5528 	int outstanding;
5529 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5530 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5531 	struct scrub_info *csi = (struct scrub_info *)arg1;
5532 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5533 	    icache_scan_rate_idle : icache_scan_rate_busy;
5534 	int icache_nlines = csmp->chsm_icache_nlines;
5535 
5536 	/*
5537 	 * The scan rates are expressed in units of tenths of a
5538 	 * percent.  A scan rate of 1000 (100%) means the whole
5539 	 * cache is scanned every second.
5540 	 */
5541 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5542 
5543 	do {
5544 		outstanding = *countp;
5545 		for (i = 0; i < outstanding; i++) {
5546 			scrub_icache(how_many);
5547 		}
5548 	} while (atomic_add_32_nv(countp, -outstanding));
5549 
5550 	return (DDI_INTR_CLAIMED);
5551 }
5552 
5553 /*
5554  * Called periodically on each CPU to scrub the E$.
5555  */
5556 static void
5557 scrub_ecache(int how_many)
5558 {
5559 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5560 	int i;
5561 	int cpuid = CPU->cpu_id;
5562 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5563 	int nlines = csmp->chsm_ecache_nlines;
5564 	int linesize = cpunodes[cpuid].ecache_linesize;
5565 	int ec_set_size = cpu_ecache_set_size(CPU);
5566 
5567 	/*
5568 	 * scrub the desired number of lines
5569 	 */
5570 	for (i = 0; i < how_many; i++) {
5571 		/*
5572 		 * scrub the E$ line
5573 		 */
5574 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5575 		    ec_set_size);
5576 
5577 		/*
5578 		 * calculate the next E$ line to scrub based on twice
5579 		 * the number of E$ lines (to displace lines containing
5580 		 * flush area data), assumes that the number of lines
5581 		 * is a power of 2
5582 		 */
5583 		index = (index + 1) & ((nlines << 1) - 1);
5584 	}
5585 
5586 	/*
5587 	 * set the ecache scrub index for the next visit
5588 	 */
5589 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5590 }
5591 
5592 /*
5593  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5594  * we decrement the outstanding request count to zero.
5595  *
5596  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5597  * become negative after the atomic_add_32_nv().  This is not a problem, as
5598  * the next trip around the loop won't scrub anything, and the next add will
5599  * reset the count back to zero.
5600  */
5601 /*ARGSUSED*/
5602 static uint_t
5603 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5604 {
5605 	int i;
5606 	int how_many;
5607 	int outstanding;
5608 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5609 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5610 	struct scrub_info *csi = (struct scrub_info *)arg1;
5611 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5612 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5613 	int ecache_nlines = csmp->chsm_ecache_nlines;
5614 
5615 	/*
5616 	 * The scan rates are expressed in units of tenths of a
5617 	 * percent.  A scan rate of 1000 (100%) means the whole
5618 	 * cache is scanned every second.
5619 	 */
5620 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5621 
5622 	do {
5623 		outstanding = *countp;
5624 		for (i = 0; i < outstanding; i++) {
5625 			scrub_ecache(how_many);
5626 		}
5627 	} while (atomic_add_32_nv(countp, -outstanding));
5628 
5629 	return (DDI_INTR_CLAIMED);
5630 }
5631 
5632 /*
5633  * Timeout function to reenable CE
5634  */
5635 static void
5636 cpu_delayed_check_ce_errors(void *arg)
5637 {
5638 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5639 	    TQ_NOSLEEP)) {
5640 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5641 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5642 	}
5643 }
5644 
5645 /*
5646  * CE Deferred Re-enable after trap.
5647  *
5648  * When the CPU gets a disrupting trap for any of the errors
5649  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5650  * immediately. To eliminate the possibility of multiple CEs causing
5651  * recursive stack overflow in the trap handler, we cannot
5652  * reenable CEEN while still running in the trap handler. Instead,
5653  * after a CE is logged on a CPU, we schedule a timeout function,
5654  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5655  * seconds. This function will check whether any further CEs
5656  * have occurred on that CPU, and if none have, will reenable CEEN.
5657  *
5658  * If further CEs have occurred while CEEN is disabled, another
5659  * timeout will be scheduled. This is to ensure that the CPU can
5660  * make progress in the face of CE 'storms', and that it does not
5661  * spend all its time logging CE errors.
5662  */
5663 static void
5664 cpu_check_ce_errors(void *arg)
5665 {
5666 	int	cpuid = (int)arg;
5667 	cpu_t	*cp;
5668 
5669 	/*
5670 	 * We acquire cpu_lock.
5671 	 */
5672 	ASSERT(curthread->t_pil == 0);
5673 
5674 	/*
5675 	 * verify that the cpu is still around, DR
5676 	 * could have got there first ...
5677 	 */
5678 	mutex_enter(&cpu_lock);
5679 	cp = cpu_get(cpuid);
5680 	if (cp == NULL) {
5681 		mutex_exit(&cpu_lock);
5682 		return;
5683 	}
5684 	/*
5685 	 * make sure we don't migrate across CPUs
5686 	 * while checking our CE status.
5687 	 */
5688 	kpreempt_disable();
5689 
5690 	/*
5691 	 * If we are running on the CPU that got the
5692 	 * CE, we can do the checks directly.
5693 	 */
5694 	if (cp->cpu_id == CPU->cpu_id) {
5695 		mutex_exit(&cpu_lock);
5696 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5697 		kpreempt_enable();
5698 		return;
5699 	}
5700 	kpreempt_enable();
5701 
5702 	/*
5703 	 * send an x-call to get the CPU that originally
5704 	 * got the CE to do the necessary checks. If we can't
5705 	 * send the x-call, reschedule the timeout, otherwise we
5706 	 * lose CEEN forever on that CPU.
5707 	 */
5708 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5709 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5710 		    TIMEOUT_CEEN_CHECK, 0);
5711 		mutex_exit(&cpu_lock);
5712 	} else {
5713 		/*
5714 		 * When the CPU is not accepting xcalls, or
5715 		 * the processor is offlined, we don't want to
5716 		 * incur the extra overhead of trying to schedule the
5717 		 * CE timeout indefinitely. However, we don't want to lose
5718 		 * CE checking forever.
5719 		 *
5720 		 * Keep rescheduling the timeout, accepting the additional
5721 		 * overhead as the cost of correctness in the case where we get
5722 		 * a CE, disable CEEN, offline the CPU during the
5723 		 * the timeout interval, and then online it at some
5724 		 * point in the future. This is unlikely given the short
5725 		 * cpu_ceen_delay_secs.
5726 		 */
5727 		mutex_exit(&cpu_lock);
5728 		(void) timeout(cpu_delayed_check_ce_errors, (void *)cp->cpu_id,
5729 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5730 	}
5731 }
5732 
5733 /*
5734  * This routine will check whether CEs have occurred while
5735  * CEEN is disabled. Any CEs detected will be logged and, if
5736  * possible, scrubbed.
5737  *
5738  * The memscrubber will also use this routine to clear any errors
5739  * caused by its scrubbing with CEEN disabled.
5740  *
5741  * flag == SCRUBBER_CEEN_CHECK
5742  *		called from memscrubber, just check/scrub, no reset
5743  *		paddr 	physical addr. for start of scrub pages
5744  *		vaddr 	virtual addr. for scrub area
5745  *		psz	page size of area to be scrubbed
5746  *
5747  * flag == TIMEOUT_CEEN_CHECK
5748  *		timeout function has triggered, reset timeout or CEEN
5749  *
5750  * Note: We must not migrate cpus during this function.  This can be
5751  * achieved by one of:
5752  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5753  *	The flag value must be first xcall argument.
5754  *    - disabling kernel preemption.  This should be done for very short
5755  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5756  *	scrub an extended area with cpu_check_block.  The call for
5757  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5758  *	brief for this case.
5759  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5760  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5761  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5762  */
5763 void
5764 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5765 {
5766 	ch_cpu_errors_t	cpu_error_regs;
5767 	uint64_t	ec_err_enable;
5768 	uint64_t	page_offset;
5769 
5770 	/* Read AFSR */
5771 	get_cpu_error_state(&cpu_error_regs);
5772 
5773 	/*
5774 	 * If no CEEN errors have occurred during the timeout
5775 	 * interval, it is safe to re-enable CEEN and exit.
5776 	 */
5777 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5778 		if (flag == TIMEOUT_CEEN_CHECK &&
5779 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5780 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5781 		return;
5782 	}
5783 
5784 	/*
5785 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5786 	 * we log/clear the error.
5787 	 */
5788 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5789 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5790 
5791 	/*
5792 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5793 	 * timeout will be rescheduled when the error is logged.
5794 	 */
5795 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5796 	    cpu_ce_detected(&cpu_error_regs,
5797 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5798 	else
5799 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5800 
5801 	/*
5802 	 * If the memory scrubber runs while CEEN is
5803 	 * disabled, (or if CEEN is disabled during the
5804 	 * scrub as a result of a CE being triggered by
5805 	 * it), the range being scrubbed will not be
5806 	 * completely cleaned. If there are multiple CEs
5807 	 * in the range at most two of these will be dealt
5808 	 * with, (one by the trap handler and one by the
5809 	 * timeout). It is also possible that none are dealt
5810 	 * with, (CEEN disabled and another CE occurs before
5811 	 * the timeout triggers). So to ensure that the
5812 	 * memory is actually scrubbed, we have to access each
5813 	 * memory location in the range and then check whether
5814 	 * that access causes a CE.
5815 	 */
5816 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5817 		if ((cpu_error_regs.afar >= pa) &&
5818 		    (cpu_error_regs.afar < (pa + psz))) {
5819 			/*
5820 			 * Force a load from physical memory for each
5821 			 * 64-byte block, then check AFSR to determine
5822 			 * whether this access caused an error.
5823 			 *
5824 			 * This is a slow way to do a scrub, but as it will
5825 			 * only be invoked when the memory scrubber actually
5826 			 * triggered a CE, it should not happen too
5827 			 * frequently.
5828 			 *
5829 			 * cut down what we need to check as the scrubber
5830 			 * has verified up to AFAR, so get it's offset
5831 			 * into the page and start there.
5832 			 */
5833 			page_offset = (uint64_t)(cpu_error_regs.afar &
5834 			    (psz - 1));
5835 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
5836 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
5837 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
5838 			    psz);
5839 		}
5840 	}
5841 
5842 	/*
5843 	 * Reset error enable if this CE is not masked.
5844 	 */
5845 	if ((flag == TIMEOUT_CEEN_CHECK) &&
5846 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
5847 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
5848 
5849 }
5850 
5851 /*
5852  * Attempt a cpu logout for an error that we did not trap for, such
5853  * as a CE noticed with CEEN off.  It is assumed that we are still running
5854  * on the cpu that took the error and that we cannot migrate.  Returns
5855  * 0 on success, otherwise nonzero.
5856  */
5857 static int
5858 cpu_ce_delayed_ec_logout(uint64_t afar)
5859 {
5860 	ch_cpu_logout_t *clop;
5861 
5862 	if (CPU_PRIVATE(CPU) == NULL)
5863 		return (0);
5864 
5865 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5866 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
5867 	    LOGOUT_INVALID)
5868 		return (0);
5869 
5870 	cpu_delayed_logout(afar, clop);
5871 	return (1);
5872 }
5873 
5874 /*
5875  * We got an error while CEEN was disabled. We
5876  * need to clean up after it and log whatever
5877  * information we have on the CE.
5878  */
5879 void
5880 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
5881 {
5882 	ch_async_flt_t 	ch_flt;
5883 	struct async_flt *aflt;
5884 	char 		pr_reason[MAX_REASON_STRING];
5885 
5886 	bzero(&ch_flt, sizeof (ch_async_flt_t));
5887 	ch_flt.flt_trapped_ce = flag;
5888 	aflt = (struct async_flt *)&ch_flt;
5889 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
5890 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
5891 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5892 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
5893 	aflt->flt_addr = cpu_error_regs->afar;
5894 #if defined(SERRANO)
5895 	ch_flt.afar2 = cpu_error_regs->afar2;
5896 #endif	/* SERRANO */
5897 	aflt->flt_pc = NULL;
5898 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
5899 	aflt->flt_tl = 0;
5900 	aflt->flt_panic = 0;
5901 	cpu_log_and_clear_ce(&ch_flt);
5902 
5903 	/*
5904 	 * check if we caused any errors during cleanup
5905 	 */
5906 	if (clear_errors(&ch_flt)) {
5907 		pr_reason[0] = '\0';
5908 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
5909 		    NULL);
5910 	}
5911 }
5912 
5913 /*
5914  * Log/clear CEEN-controlled disrupting errors
5915  */
5916 static void
5917 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
5918 {
5919 	struct async_flt *aflt;
5920 	uint64_t afsr, afsr_errs;
5921 	ch_cpu_logout_t *clop;
5922 	char 		pr_reason[MAX_REASON_STRING];
5923 	on_trap_data_t	*otp = curthread->t_ontrap;
5924 
5925 	aflt = (struct async_flt *)ch_flt;
5926 	afsr = aflt->flt_stat;
5927 	afsr_errs = ch_flt->afsr_errs;
5928 	aflt->flt_id = gethrtime_waitfree();
5929 	aflt->flt_bus_id = getprocessorid();
5930 	aflt->flt_inst = CPU->cpu_id;
5931 	aflt->flt_prot = AFLT_PROT_NONE;
5932 	aflt->flt_class = CPU_FAULT;
5933 	aflt->flt_status = ECC_C_TRAP;
5934 
5935 	pr_reason[0] = '\0';
5936 	/*
5937 	 * Get the CPU log out info for Disrupting Trap.
5938 	 */
5939 	if (CPU_PRIVATE(CPU) == NULL) {
5940 		clop = NULL;
5941 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
5942 	} else {
5943 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5944 	}
5945 
5946 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
5947 		ch_cpu_errors_t cpu_error_regs;
5948 
5949 		get_cpu_error_state(&cpu_error_regs);
5950 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
5951 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
5952 		clop->clo_data.chd_afar = cpu_error_regs.afar;
5953 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
5954 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
5955 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
5956 		clop->clo_sdw_data.chd_afsr_ext =
5957 		    cpu_error_regs.shadow_afsr_ext;
5958 #if defined(SERRANO)
5959 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
5960 #endif	/* SERRANO */
5961 		ch_flt->flt_data_incomplete = 1;
5962 
5963 		/*
5964 		 * The logging/clear code expects AFSR/AFAR to be cleared.
5965 		 * The trap handler does it for CEEN enabled errors
5966 		 * so we need to do it here.
5967 		 */
5968 		set_cpu_error_state(&cpu_error_regs);
5969 	}
5970 
5971 #if defined(JALAPENO) || defined(SERRANO)
5972 	/*
5973 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
5974 	 * For Serrano, even thou we do have the AFAR, we still do the
5975 	 * scrub on the RCE side since that's where the error type can
5976 	 * be properly classified as intermittent, persistent, etc.
5977 	 *
5978 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
5979 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5980 	 * the flt_status bits.
5981 	 */
5982 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
5983 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5984 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
5985 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
5986 	}
5987 #else /* JALAPENO || SERRANO */
5988 	/*
5989 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
5990 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5991 	 * the flt_status bits.
5992 	 */
5993 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
5994 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5995 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
5996 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
5997 		}
5998 	}
5999 
6000 #endif /* JALAPENO || SERRANO */
6001 
6002 	/*
6003 	 * Update flt_prot if this error occurred under on_trap protection.
6004 	 */
6005 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6006 		aflt->flt_prot = AFLT_PROT_EC;
6007 
6008 	/*
6009 	 * Queue events on the async event queue, one event per error bit.
6010 	 */
6011 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6012 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6013 		ch_flt->flt_type = CPU_INV_AFSR;
6014 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6015 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6016 		    aflt->flt_panic);
6017 	}
6018 
6019 	/*
6020 	 * Zero out + invalidate CPU logout.
6021 	 */
6022 	if (clop) {
6023 		bzero(clop, sizeof (ch_cpu_logout_t));
6024 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6025 	}
6026 
6027 	/*
6028 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6029 	 * was disabled, we need to flush either the entire
6030 	 * E$ or an E$ line.
6031 	 */
6032 #if defined(JALAPENO) || defined(SERRANO)
6033 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6034 #else	/* JALAPENO || SERRANO */
6035 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6036 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6037 #endif	/* JALAPENO || SERRANO */
6038 		cpu_error_ecache_flush(ch_flt);
6039 
6040 }
6041 
6042 /*
6043  * depending on the error type, we determine whether we
6044  * need to flush the entire ecache or just a line.
6045  */
6046 static int
6047 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6048 {
6049 	struct async_flt *aflt;
6050 	uint64_t	afsr;
6051 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6052 
6053 	aflt = (struct async_flt *)ch_flt;
6054 	afsr = aflt->flt_stat;
6055 
6056 	/*
6057 	 * If we got multiple errors, no point in trying
6058 	 * the individual cases, just flush the whole cache
6059 	 */
6060 	if (afsr & C_AFSR_ME) {
6061 		return (ECACHE_FLUSH_ALL);
6062 	}
6063 
6064 	/*
6065 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6066 	 * was disabled, we need to flush entire E$. We can't just
6067 	 * flush the cache line affected as the ME bit
6068 	 * is not set when multiple correctable errors of the same
6069 	 * type occur, so we might have multiple CPC or EDC errors,
6070 	 * with only the first recorded.
6071 	 */
6072 #if defined(JALAPENO) || defined(SERRANO)
6073 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6074 #else	/* JALAPENO || SERRANO */
6075 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6076 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6077 #endif	/* JALAPENO || SERRANO */
6078 		return (ECACHE_FLUSH_ALL);
6079 	}
6080 
6081 #if defined(JALAPENO) || defined(SERRANO)
6082 	/*
6083 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6084 	 * flush the entire Ecache.
6085 	 */
6086 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6087 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6088 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6089 			return (ECACHE_FLUSH_LINE);
6090 		} else {
6091 			return (ECACHE_FLUSH_ALL);
6092 		}
6093 	}
6094 #else /* JALAPENO || SERRANO */
6095 	/*
6096 	 * If UE only is set, flush the Ecache line, otherwise
6097 	 * flush the entire Ecache.
6098 	 */
6099 	if (afsr_errs & C_AFSR_UE) {
6100 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6101 		    C_AFSR_UE) {
6102 			return (ECACHE_FLUSH_LINE);
6103 		} else {
6104 			return (ECACHE_FLUSH_ALL);
6105 		}
6106 	}
6107 #endif /* JALAPENO || SERRANO */
6108 
6109 	/*
6110 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6111 	 * flush the entire Ecache.
6112 	 */
6113 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6114 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6115 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6116 			return (ECACHE_FLUSH_LINE);
6117 		} else {
6118 			return (ECACHE_FLUSH_ALL);
6119 		}
6120 	}
6121 
6122 	/*
6123 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6124 	 * flush the entire Ecache.
6125 	 */
6126 	if (afsr_errs & C_AFSR_BERR) {
6127 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6128 			return (ECACHE_FLUSH_LINE);
6129 		} else {
6130 			return (ECACHE_FLUSH_ALL);
6131 		}
6132 	}
6133 
6134 	return (0);
6135 }
6136 
6137 void
6138 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6139 {
6140 	int	ecache_flush_flag =
6141 	    cpu_error_ecache_flush_required(ch_flt);
6142 
6143 	/*
6144 	 * Flush Ecache line or entire Ecache based on above checks.
6145 	 */
6146 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6147 		cpu_flush_ecache();
6148 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6149 		cpu_flush_ecache_line(ch_flt);
6150 	}
6151 
6152 }
6153 
6154 /*
6155  * Extract the PA portion from the E$ tag.
6156  */
6157 uint64_t
6158 cpu_ectag_to_pa(int setsize, uint64_t tag)
6159 {
6160 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6161 		return (JG_ECTAG_TO_PA(setsize, tag));
6162 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6163 		return (PN_L3TAG_TO_PA(tag));
6164 	else
6165 		return (CH_ECTAG_TO_PA(setsize, tag));
6166 }
6167 
6168 /*
6169  * Convert the E$ tag PA into an E$ subblock index.
6170  */
6171 static int
6172 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6173 {
6174 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6175 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6176 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6177 		/* Panther has only one subblock per line */
6178 		return (0);
6179 	else
6180 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6181 }
6182 
6183 /*
6184  * All subblocks in an E$ line must be invalid for
6185  * the line to be invalid.
6186  */
6187 int
6188 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6189 {
6190 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6191 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6192 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6193 		return (PN_L3_LINE_INVALID(tag));
6194 	else
6195 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6196 }
6197 
6198 /*
6199  * Extract state bits for a subblock given the tag.  Note that for Panther
6200  * this works on both l2 and l3 tags.
6201  */
6202 static int
6203 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6204 {
6205 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6206 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6207 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6208 		return (tag & CH_ECSTATE_MASK);
6209 	else
6210 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6211 }
6212 
6213 /*
6214  * Cpu specific initialization.
6215  */
6216 void
6217 cpu_mp_init(void)
6218 {
6219 #ifdef	CHEETAHPLUS_ERRATUM_25
6220 	if (cheetah_sendmondo_recover) {
6221 		cheetah_nudge_init();
6222 	}
6223 #endif
6224 }
6225 
6226 void
6227 cpu_ereport_post(struct async_flt *aflt)
6228 {
6229 	char *cpu_type, buf[FM_MAX_CLASS];
6230 	nv_alloc_t *nva = NULL;
6231 	nvlist_t *ereport, *detector, *resource;
6232 	errorq_elem_t *eqep;
6233 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6234 	char unum[UNUM_NAMLEN];
6235 	int len = 0;
6236 	uint8_t  msg_type;
6237 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6238 
6239 	if (aflt->flt_panic || panicstr) {
6240 		eqep = errorq_reserve(ereport_errorq);
6241 		if (eqep == NULL)
6242 			return;
6243 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6244 		nva = errorq_elem_nva(ereport_errorq, eqep);
6245 	} else {
6246 		ereport = fm_nvlist_create(nva);
6247 	}
6248 
6249 	/*
6250 	 * Create the scheme "cpu" FMRI.
6251 	 */
6252 	detector = fm_nvlist_create(nva);
6253 	resource = fm_nvlist_create(nva);
6254 	switch (cpunodes[aflt->flt_inst].implementation) {
6255 	case CHEETAH_IMPL:
6256 		cpu_type = FM_EREPORT_CPU_USIII;
6257 		break;
6258 	case CHEETAH_PLUS_IMPL:
6259 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6260 		break;
6261 	case JALAPENO_IMPL:
6262 		cpu_type = FM_EREPORT_CPU_USIIIi;
6263 		break;
6264 	case SERRANO_IMPL:
6265 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6266 		break;
6267 	case JAGUAR_IMPL:
6268 		cpu_type = FM_EREPORT_CPU_USIV;
6269 		break;
6270 	case PANTHER_IMPL:
6271 		cpu_type = FM_EREPORT_CPU_USIVplus;
6272 		break;
6273 	default:
6274 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6275 		break;
6276 	}
6277 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6278 	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
6279 	    cpunodes[aflt->flt_inst].device_id);
6280 
6281 	/*
6282 	 * Encode all the common data into the ereport.
6283 	 */
6284 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6285 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6286 
6287 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6288 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6289 	    detector, NULL);
6290 
6291 	/*
6292 	 * Encode the error specific data that was saved in
6293 	 * the async_flt structure into the ereport.
6294 	 */
6295 	cpu_payload_add_aflt(aflt, ereport, resource,
6296 	    &plat_ecc_ch_flt.ecaf_afar_status,
6297 	    &plat_ecc_ch_flt.ecaf_synd_status);
6298 
6299 	if (aflt->flt_panic || panicstr) {
6300 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6301 	} else {
6302 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6303 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6304 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6305 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6306 	}
6307 	/*
6308 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6309 	 * to the SC olny if it can process it.
6310 	 */
6311 
6312 	if (&plat_ecc_capability_sc_get &&
6313 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6314 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6315 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6316 			/*
6317 			 * If afar status is not invalid do a unum lookup.
6318 			 */
6319 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6320 			    AFLT_STAT_INVALID) {
6321 				(void) cpu_get_mem_unum_aflt(
6322 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6323 				    unum, UNUM_NAMLEN, &len);
6324 			} else {
6325 				unum[0] = '\0';
6326 			}
6327 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6328 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6329 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6330 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6331 			    ch_flt->flt_sdw_afsr_ext;
6332 
6333 			if (&plat_log_fruid_error2)
6334 				plat_log_fruid_error2(msg_type, unum, aflt,
6335 				    &plat_ecc_ch_flt);
6336 		}
6337 	}
6338 }
6339 
6340 void
6341 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6342 {
6343 	int status;
6344 	ddi_fm_error_t de;
6345 
6346 	bzero(&de, sizeof (ddi_fm_error_t));
6347 
6348 	de.fme_version = DDI_FME_VERSION;
6349 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6350 	    FM_ENA_FMT1);
6351 	de.fme_flag = expected;
6352 	de.fme_bus_specific = (void *)aflt->flt_addr;
6353 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6354 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6355 		aflt->flt_panic = 1;
6356 }
6357 
6358 void
6359 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6360     errorq_t *eqp, uint_t flag)
6361 {
6362 	struct async_flt *aflt = (struct async_flt *)payload;
6363 
6364 	aflt->flt_erpt_class = error_class;
6365 	errorq_dispatch(eqp, payload, payload_sz, flag);
6366 }
6367 
6368 /*
6369  * This routine may be called by the IO module, but does not do
6370  * anything in this cpu module. The SERD algorithm is handled by
6371  * cpumem-diagnosis engine instead.
6372  */
6373 /*ARGSUSED*/
6374 void
6375 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6376 {}
6377 
6378 void
6379 adjust_hw_copy_limits(int ecache_size)
6380 {
6381 	/*
6382 	 * Set hw copy limits.
6383 	 *
6384 	 * /etc/system will be parsed later and can override one or more
6385 	 * of these settings.
6386 	 *
6387 	 * At this time, ecache size seems only mildly relevant.
6388 	 * We seem to run into issues with the d-cache and stalls
6389 	 * we see on misses.
6390 	 *
6391 	 * Cycle measurement indicates that 2 byte aligned copies fare
6392 	 * little better than doing things with VIS at around 512 bytes.
6393 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6394 	 * aligned is faster whenever the source and destination data
6395 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6396 	 * limit seems to be driven by the 2K write cache.
6397 	 * When more than 2K of copies are done in non-VIS mode, stores
6398 	 * backup in the write cache.  In VIS mode, the write cache is
6399 	 * bypassed, allowing faster cache-line writes aligned on cache
6400 	 * boundaries.
6401 	 *
6402 	 * In addition, in non-VIS mode, there is no prefetching, so
6403 	 * for larger copies, the advantage of prefetching to avoid even
6404 	 * occasional cache misses is enough to justify using the VIS code.
6405 	 *
6406 	 * During testing, it was discovered that netbench ran 3% slower
6407 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6408 	 * applications, data is only used once (copied to the output
6409 	 * buffer, then copied by the network device off the system).  Using
6410 	 * the VIS copy saves more L2 cache state.  Network copies are
6411 	 * around 1.3K to 1.5K in size for historical reasons.
6412 	 *
6413 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6414 	 * aligned copy even for large caches and 8 MB ecache.  The
6415 	 * infrastructure to allow different limits for different sized
6416 	 * caches is kept to allow further tuning in later releases.
6417 	 */
6418 
6419 	if (min_ecache_size == 0 && use_hw_bcopy) {
6420 		/*
6421 		 * First time through - should be before /etc/system
6422 		 * is read.
6423 		 * Could skip the checks for zero but this lets us
6424 		 * preserve any debugger rewrites.
6425 		 */
6426 		if (hw_copy_limit_1 == 0) {
6427 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6428 			priv_hcl_1 = hw_copy_limit_1;
6429 		}
6430 		if (hw_copy_limit_2 == 0) {
6431 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6432 			priv_hcl_2 = hw_copy_limit_2;
6433 		}
6434 		if (hw_copy_limit_4 == 0) {
6435 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6436 			priv_hcl_4 = hw_copy_limit_4;
6437 		}
6438 		if (hw_copy_limit_8 == 0) {
6439 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6440 			priv_hcl_8 = hw_copy_limit_8;
6441 		}
6442 		min_ecache_size = ecache_size;
6443 	} else {
6444 		/*
6445 		 * MP initialization. Called *after* /etc/system has
6446 		 * been parsed. One CPU has already been initialized.
6447 		 * Need to cater for /etc/system having scragged one
6448 		 * of our values.
6449 		 */
6450 		if (ecache_size == min_ecache_size) {
6451 			/*
6452 			 * Same size ecache. We do nothing unless we
6453 			 * have a pessimistic ecache setting. In that
6454 			 * case we become more optimistic (if the cache is
6455 			 * large enough).
6456 			 */
6457 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6458 				/*
6459 				 * Need to adjust hw_copy_limit* from our
6460 				 * pessimistic uniprocessor value to a more
6461 				 * optimistic UP value *iff* it hasn't been
6462 				 * reset.
6463 				 */
6464 				if ((ecache_size > 1048576) &&
6465 				    (priv_hcl_8 == hw_copy_limit_8)) {
6466 					if (ecache_size <= 2097152)
6467 						hw_copy_limit_8 = 4 *
6468 						    VIS_COPY_THRESHOLD;
6469 					else if (ecache_size <= 4194304)
6470 						hw_copy_limit_8 = 4 *
6471 						    VIS_COPY_THRESHOLD;
6472 					else
6473 						hw_copy_limit_8 = 4 *
6474 						    VIS_COPY_THRESHOLD;
6475 					priv_hcl_8 = hw_copy_limit_8;
6476 				}
6477 			}
6478 		} else if (ecache_size < min_ecache_size) {
6479 			/*
6480 			 * A different ecache size. Can this even happen?
6481 			 */
6482 			if (priv_hcl_8 == hw_copy_limit_8) {
6483 				/*
6484 				 * The previous value that we set
6485 				 * is unchanged (i.e., it hasn't been
6486 				 * scragged by /etc/system). Rewrite it.
6487 				 */
6488 				if (ecache_size <= 1048576)
6489 					hw_copy_limit_8 = 8 *
6490 					    VIS_COPY_THRESHOLD;
6491 				else if (ecache_size <= 2097152)
6492 					hw_copy_limit_8 = 8 *
6493 					    VIS_COPY_THRESHOLD;
6494 				else if (ecache_size <= 4194304)
6495 					hw_copy_limit_8 = 8 *
6496 					    VIS_COPY_THRESHOLD;
6497 				else
6498 					hw_copy_limit_8 = 10 *
6499 					    VIS_COPY_THRESHOLD;
6500 				priv_hcl_8 = hw_copy_limit_8;
6501 				min_ecache_size = ecache_size;
6502 			}
6503 		}
6504 	}
6505 }
6506 
6507 /*
6508  * Called from illegal instruction trap handler to see if we can attribute
6509  * the trap to a fpras check.
6510  */
6511 int
6512 fpras_chktrap(struct regs *rp)
6513 {
6514 	int op;
6515 	struct fpras_chkfngrp *cgp;
6516 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6517 
6518 	if (fpras_chkfngrps == NULL)
6519 		return (0);
6520 
6521 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6522 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6523 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6524 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6525 			break;
6526 	}
6527 	if (op == FPRAS_NCOPYOPS)
6528 		return (0);
6529 
6530 	/*
6531 	 * This is an fpRAS failure caught through an illegal
6532 	 * instruction - trampoline.
6533 	 */
6534 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6535 	rp->r_npc = rp->r_pc + 4;
6536 	return (1);
6537 }
6538 
6539 /*
6540  * fpras_failure is called when a fpras check detects a bad calculation
6541  * result or an illegal instruction trap is attributed to an fpras
6542  * check.  In all cases we are still bound to CPU.
6543  */
6544 int
6545 fpras_failure(int op, int how)
6546 {
6547 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6548 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6549 	ch_async_flt_t ch_flt;
6550 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6551 	struct fpras_chkfn *sfp, *cfp;
6552 	uint32_t *sip, *cip;
6553 	int i;
6554 
6555 	/*
6556 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6557 	 * the time in which we dispatch an ereport and (if applicable) panic.
6558 	 */
6559 	use_hw_bcopy_orig = use_hw_bcopy;
6560 	use_hw_bzero_orig = use_hw_bzero;
6561 	hcl1_orig = hw_copy_limit_1;
6562 	hcl2_orig = hw_copy_limit_2;
6563 	hcl4_orig = hw_copy_limit_4;
6564 	hcl8_orig = hw_copy_limit_8;
6565 	use_hw_bcopy = use_hw_bzero = 0;
6566 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6567 	    hw_copy_limit_8 = 0;
6568 
6569 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6570 	aflt->flt_id = gethrtime_waitfree();
6571 	aflt->flt_class = CPU_FAULT;
6572 	aflt->flt_inst = CPU->cpu_id;
6573 	aflt->flt_status = (how << 8) | op;
6574 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6575 	ch_flt.flt_type = CPU_FPUERR;
6576 
6577 	/*
6578 	 * We must panic if the copy operation had no lofault protection -
6579 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6580 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6581 	 */
6582 	aflt->flt_panic = (curthread->t_lofault == NULL);
6583 
6584 	/*
6585 	 * XOR the source instruction block with the copied instruction
6586 	 * block - this will show us which bit(s) are corrupted.
6587 	 */
6588 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6589 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6590 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6591 		sip = &sfp->fpras_blk0[0];
6592 		cip = &cfp->fpras_blk0[0];
6593 	} else {
6594 		sip = &sfp->fpras_blk1[0];
6595 		cip = &cfp->fpras_blk1[0];
6596 	}
6597 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6598 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6599 
6600 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6601 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6602 
6603 	if (aflt->flt_panic)
6604 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6605 
6606 	/*
6607 	 * We get here for copyin/copyout and kcopy or bcopy where the
6608 	 * caller has used on_fault.  We will flag the error so that
6609 	 * the process may be killed  The trap_async_hwerr mechanism will
6610 	 * take appropriate further action (such as a reboot, contract
6611 	 * notification etc).  Since we may be continuing we will
6612 	 * restore the global hardware copy acceleration switches.
6613 	 *
6614 	 * When we return from this function to the copy function we want to
6615 	 * avoid potentially bad data being used, ie we want the affected
6616 	 * copy function to return an error.  The caller should therefore
6617 	 * invoke its lofault handler (which always exists for these functions)
6618 	 * which will return the appropriate error.
6619 	 */
6620 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6621 	aston(curthread);
6622 
6623 	use_hw_bcopy = use_hw_bcopy_orig;
6624 	use_hw_bzero = use_hw_bzero_orig;
6625 	hw_copy_limit_1 = hcl1_orig;
6626 	hw_copy_limit_2 = hcl2_orig;
6627 	hw_copy_limit_4 = hcl4_orig;
6628 	hw_copy_limit_8 = hcl8_orig;
6629 
6630 	return (1);
6631 }
6632 
6633 #define	VIS_BLOCKSIZE		64
6634 
6635 int
6636 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6637 {
6638 	int ret, watched;
6639 
6640 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6641 	ret = dtrace_blksuword32(addr, data, 0);
6642 	if (watched)
6643 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6644 
6645 	return (ret);
6646 }
6647 
6648 /*
6649  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6650  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6651  * CEEN from the EER to disable traps for further disrupting error types
6652  * on that cpu.  We could cross-call instead, but that has a larger
6653  * instruction and data footprint than cross-trapping, and the cpu is known
6654  * to be faulted.
6655  */
6656 
6657 void
6658 cpu_faulted_enter(struct cpu *cp)
6659 {
6660 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6661 }
6662 
6663 /*
6664  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6665  * offline, spare, or online (by the cpu requesting this state change).
6666  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6667  * disrupting error bits that have accumulated without trapping, then
6668  * we cross-trap to re-enable CEEN controlled traps.
6669  */
6670 void
6671 cpu_faulted_exit(struct cpu *cp)
6672 {
6673 	ch_cpu_errors_t cpu_error_regs;
6674 
6675 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6676 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6677 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6678 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6679 	    (uint64_t)&cpu_error_regs, 0);
6680 
6681 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6682 }
6683 
6684 /*
6685  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6686  * the errors in the original AFSR, 0 otherwise.
6687  *
6688  * For all procs if the initial error was a BERR or TO, then it is possible
6689  * that we may have caused a secondary BERR or TO in the process of logging the
6690  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6691  * if the request was protected then a panic is still not necessary, if not
6692  * protected then aft_panic is already set - so either way there's no need
6693  * to set aft_panic for the secondary error.
6694  *
6695  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6696  * a store merge, then the error handling code will call cpu_deferred_error().
6697  * When clear_errors() is called, it will determine that secondary errors have
6698  * occurred - in particular, the store merge also caused a EDU and WDU that
6699  * weren't discovered until this point.
6700  *
6701  * We do three checks to verify that we are in this case.  If we pass all three
6702  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6703  * errors occur, we return 0.
6704  *
6705  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6706  * handled in cpu_disrupting_errors().  Since this function is not even called
6707  * in the case we are interested in, we just return 0 for these processors.
6708  */
6709 /*ARGSUSED*/
6710 static int
6711 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6712     uint64_t t_afar)
6713 {
6714 #if defined(CHEETAH_PLUS)
6715 #else	/* CHEETAH_PLUS */
6716 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6717 #endif	/* CHEETAH_PLUS */
6718 
6719 	/*
6720 	 * Was the original error a BERR or TO and only a BERR or TO
6721 	 * (multiple errors are also OK)
6722 	 */
6723 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6724 		/*
6725 		 * Is the new error a BERR or TO and only a BERR or TO
6726 		 * (multiple errors are also OK)
6727 		 */
6728 		if ((ch_flt->afsr_errs &
6729 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6730 			return (1);
6731 	}
6732 
6733 #if defined(CHEETAH_PLUS)
6734 	return (0);
6735 #else	/* CHEETAH_PLUS */
6736 	/*
6737 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6738 	 *
6739 	 * Check the original error was a UE, and only a UE.  Note that
6740 	 * the ME bit will cause us to fail this check.
6741 	 */
6742 	if (t_afsr_errs != C_AFSR_UE)
6743 		return (0);
6744 
6745 	/*
6746 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6747 	 */
6748 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6749 		return (0);
6750 
6751 	/*
6752 	 * Check the AFAR of the original error and secondary errors
6753 	 * match to the 64-byte boundary
6754 	 */
6755 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6756 		return (0);
6757 
6758 	/*
6759 	 * We've passed all the checks, so it's a secondary error!
6760 	 */
6761 	return (1);
6762 #endif	/* CHEETAH_PLUS */
6763 }
6764 
6765 /*
6766  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6767  * is checked for any valid errors.  If found, the error type is
6768  * returned. If not found, the flt_type is checked for L1$ parity errors.
6769  */
6770 /*ARGSUSED*/
6771 static uint8_t
6772 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6773 {
6774 #if defined(JALAPENO)
6775 	/*
6776 	 * Currently, logging errors to the SC is not supported on Jalapeno
6777 	 */
6778 	return (PLAT_ECC_ERROR2_NONE);
6779 #else
6780 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6781 
6782 	switch (ch_flt->flt_bit) {
6783 	case C_AFSR_CE:
6784 		return (PLAT_ECC_ERROR2_CE);
6785 	case C_AFSR_UCC:
6786 	case C_AFSR_EDC:
6787 	case C_AFSR_WDC:
6788 	case C_AFSR_CPC:
6789 		return (PLAT_ECC_ERROR2_L2_CE);
6790 	case C_AFSR_EMC:
6791 		return (PLAT_ECC_ERROR2_EMC);
6792 	case C_AFSR_IVC:
6793 		return (PLAT_ECC_ERROR2_IVC);
6794 	case C_AFSR_UE:
6795 		return (PLAT_ECC_ERROR2_UE);
6796 	case C_AFSR_UCU:
6797 	case C_AFSR_EDU:
6798 	case C_AFSR_WDU:
6799 	case C_AFSR_CPU:
6800 		return (PLAT_ECC_ERROR2_L2_UE);
6801 	case C_AFSR_IVU:
6802 		return (PLAT_ECC_ERROR2_IVU);
6803 	case C_AFSR_TO:
6804 		return (PLAT_ECC_ERROR2_TO);
6805 	case C_AFSR_BERR:
6806 		return (PLAT_ECC_ERROR2_BERR);
6807 #if defined(CHEETAH_PLUS)
6808 	case C_AFSR_L3_EDC:
6809 	case C_AFSR_L3_UCC:
6810 	case C_AFSR_L3_CPC:
6811 	case C_AFSR_L3_WDC:
6812 		return (PLAT_ECC_ERROR2_L3_CE);
6813 	case C_AFSR_IMC:
6814 		return (PLAT_ECC_ERROR2_IMC);
6815 	case C_AFSR_TSCE:
6816 		return (PLAT_ECC_ERROR2_L2_TSCE);
6817 	case C_AFSR_THCE:
6818 		return (PLAT_ECC_ERROR2_L2_THCE);
6819 	case C_AFSR_L3_MECC:
6820 		return (PLAT_ECC_ERROR2_L3_MECC);
6821 	case C_AFSR_L3_THCE:
6822 		return (PLAT_ECC_ERROR2_L3_THCE);
6823 	case C_AFSR_L3_CPU:
6824 	case C_AFSR_L3_EDU:
6825 	case C_AFSR_L3_UCU:
6826 	case C_AFSR_L3_WDU:
6827 		return (PLAT_ECC_ERROR2_L3_UE);
6828 	case C_AFSR_DUE:
6829 		return (PLAT_ECC_ERROR2_DUE);
6830 	case C_AFSR_DTO:
6831 		return (PLAT_ECC_ERROR2_DTO);
6832 	case C_AFSR_DBERR:
6833 		return (PLAT_ECC_ERROR2_DBERR);
6834 #endif	/* CHEETAH_PLUS */
6835 	default:
6836 		switch (ch_flt->flt_type) {
6837 #if defined(CPU_IMP_L1_CACHE_PARITY)
6838 		case CPU_IC_PARITY:
6839 			return (PLAT_ECC_ERROR2_IPE);
6840 		case CPU_DC_PARITY:
6841 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
6842 				if (ch_flt->parity_data.dpe.cpl_cache ==
6843 				    CPU_PC_PARITY) {
6844 					return (PLAT_ECC_ERROR2_PCACHE);
6845 				}
6846 			}
6847 			return (PLAT_ECC_ERROR2_DPE);
6848 #endif /* CPU_IMP_L1_CACHE_PARITY */
6849 		case CPU_ITLB_PARITY:
6850 			return (PLAT_ECC_ERROR2_ITLB);
6851 		case CPU_DTLB_PARITY:
6852 			return (PLAT_ECC_ERROR2_DTLB);
6853 		default:
6854 			return (PLAT_ECC_ERROR2_NONE);
6855 		}
6856 	}
6857 #endif	/* JALAPENO */
6858 }
6859