xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common.c (revision 088e9d477eee66081e407fbc5a33c4da25f66f6a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/ddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/archsystm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/machthread.h>
38 #include <sys/cpu.h>
39 #include <sys/cmp.h>
40 #include <sys/elf_SPARC.h>
41 #include <vm/vm_dep.h>
42 #include <vm/hat_sfmmu.h>
43 #include <vm/seg_kpm.h>
44 #include <sys/cpuvar.h>
45 #include <sys/cheetahregs.h>
46 #include <sys/us3_module.h>
47 #include <sys/async.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/dditypes.h>
51 #include <sys/prom_debug.h>
52 #include <sys/prom_plat.h>
53 #include <sys/cpu_module.h>
54 #include <sys/sysmacros.h>
55 #include <sys/intreg.h>
56 #include <sys/clock.h>
57 #include <sys/platform_module.h>
58 #include <sys/machtrap.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/memlist.h>
62 #include <sys/bootconf.h>
63 #include <sys/ivintr.h>
64 #include <sys/atomic.h>
65 #include <sys/taskq.h>
66 #include <sys/note.h>
67 #include <sys/ndifm.h>
68 #include <sys/ddifm.h>
69 #include <sys/fm/protocol.h>
70 #include <sys/fm/util.h>
71 #include <sys/fm/cpu/UltraSPARC-III.h>
72 #include <sys/fpras_impl.h>
73 #include <sys/dtrace.h>
74 #include <sys/watchpoint.h>
75 #include <sys/plat_ecc_unum.h>
76 #include <sys/cyclic.h>
77 #include <sys/errorq.h>
78 #include <sys/errclassify.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 /*
85  * Note that 'Cheetah PRM' refers to:
86  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
87  */
88 
89 /*
90  * Per CPU pointers to physical address of TL>0 logout data areas.
91  * These pointers have to be in the kernel nucleus to avoid MMU
92  * misses.
93  */
94 uint64_t ch_err_tl1_paddrs[NCPU];
95 
96 /*
97  * One statically allocated structure to use during startup/DR
98  * to prevent unnecessary panics.
99  */
100 ch_err_tl1_data_t ch_err_tl1_data;
101 
102 /*
103  * Per CPU pending error at TL>0, used by level15 softint handler
104  */
105 uchar_t ch_err_tl1_pending[NCPU];
106 
107 /*
108  * For deferred CE re-enable after trap.
109  */
110 taskq_t		*ch_check_ce_tq;
111 
112 /*
113  * Internal functions.
114  */
115 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
116 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
117 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
118     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
119 static int clear_ecc(struct async_flt *ecc);
120 #if defined(CPU_IMP_ECACHE_ASSOC)
121 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
122 #endif
123 static int cpu_ecache_set_size(struct cpu *cp);
124 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
125 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
126 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
127 static int cpu_ectag_pa_to_subblk_state(int cachesize,
128 				uint64_t subaddr, uint64_t tag);
129 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
130 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
133 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
134 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
135 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
136 static void cpu_scrubphys(struct async_flt *aflt);
137 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
138     int *, int *);
139 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
140 static void cpu_ereport_init(struct async_flt *aflt);
141 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
142 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
143 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
144     uint64_t nceen, ch_cpu_logout_t *clop);
145 static int cpu_ce_delayed_ec_logout(uint64_t);
146 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
147 
148 #ifdef	CHEETAHPLUS_ERRATUM_25
149 static int mondo_recover_proc(uint16_t, int);
150 static void cheetah_nudge_init(void);
151 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
152     cyc_time_t *when);
153 static void cheetah_nudge_buddy(void);
154 #endif	/* CHEETAHPLUS_ERRATUM_25 */
155 
156 #if defined(CPU_IMP_L1_CACHE_PARITY)
157 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
158 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
159 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
160     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
161 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
162 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
163 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
166 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
167 #endif	/* CPU_IMP_L1_CACHE_PARITY */
168 
169 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
170     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
171     int *segsp, int *banksp, int *mcidp);
172 
173 /*
174  * This table is used to determine which bit(s) is(are) bad when an ECC
175  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
176  * of this array have the following semantics:
177  *
178  *      00-127  The number of the bad bit, when only one bit is bad.
179  *      128     ECC bit C0 is bad.
180  *      129     ECC bit C1 is bad.
181  *      130     ECC bit C2 is bad.
182  *      131     ECC bit C3 is bad.
183  *      132     ECC bit C4 is bad.
184  *      133     ECC bit C5 is bad.
185  *      134     ECC bit C6 is bad.
186  *      135     ECC bit C7 is bad.
187  *      136     ECC bit C8 is bad.
188  *	137-143 reserved for Mtag Data and ECC.
189  *      144(M2) Two bits are bad within a nibble.
190  *      145(M3) Three bits are bad within a nibble.
191  *      146(M3) Four bits are bad within a nibble.
192  *      147(M)  Multiple bits (5 or more) are bad.
193  *      148     NO bits are bad.
194  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
195  */
196 
197 #define	C0	128
198 #define	C1	129
199 #define	C2	130
200 #define	C3	131
201 #define	C4	132
202 #define	C5	133
203 #define	C6	134
204 #define	C7	135
205 #define	C8	136
206 #define	MT0	137	/* Mtag Data bit 0 */
207 #define	MT1	138
208 #define	MT2	139
209 #define	MTC0	140	/* Mtag Check bit 0 */
210 #define	MTC1	141
211 #define	MTC2	142
212 #define	MTC3	143
213 #define	M2	144
214 #define	M3	145
215 #define	M4	146
216 #define	M	147
217 #define	NA	148
218 #if defined(JALAPENO) || defined(SERRANO)
219 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
220 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
221 #define	SLAST	S003MEM	/* last special syndrome */
222 #else /* JALAPENO || SERRANO */
223 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
224 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
225 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
226 #define	SLAST	S11C	/* last special syndrome */
227 #endif /* JALAPENO || SERRANO */
228 #if defined(JALAPENO) || defined(SERRANO)
229 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
230 #define	BPAR15	167
231 #endif	/* JALAPENO || SERRANO */
232 
233 static uint8_t ecc_syndrome_tab[] =
234 {
235 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
236 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
237 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
238 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
239 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
240 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
241 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
242 #if defined(JALAPENO) || defined(SERRANO)
243 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
244 #else	/* JALAPENO || SERRANO */
245 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246 #endif	/* JALAPENO || SERRANO */
247 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
248 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
249 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
250 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
251 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
252 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
253 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
254 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
255 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
256 #if defined(JALAPENO) || defined(SERRANO)
257 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
258 #else	/* JALAPENO || SERRANO */
259 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
260 #endif	/* JALAPENO || SERRANO */
261 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
262 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
263 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
264 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
265 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
266 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
267 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
268 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
269 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
270 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
271 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
272 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
273 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
274 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
275 };
276 
277 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
278 
279 #if !(defined(JALAPENO) || defined(SERRANO))
280 /*
281  * This table is used to determine which bit(s) is(are) bad when a Mtag
282  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
283  * of this array have the following semantics:
284  *
285  *      -1	Invalid mtag syndrome.
286  *      137     Mtag Data 0 is bad.
287  *      138     Mtag Data 1 is bad.
288  *      139     Mtag Data 2 is bad.
289  *      140     Mtag ECC 0 is bad.
290  *      141     Mtag ECC 1 is bad.
291  *      142     Mtag ECC 2 is bad.
292  *      143     Mtag ECC 3 is bad.
293  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
294  */
295 short mtag_syndrome_tab[] =
296 {
297 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
298 };
299 
300 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
301 
302 #else /* !(JALAPENO || SERRANO) */
303 
304 #define	BSYND_TBL_SIZE	16
305 
306 #endif /* !(JALAPENO || SERRANO) */
307 
308 /*
309  * CE initial classification and subsequent action lookup table
310  */
311 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
312 static int ce_disp_inited;
313 
314 /*
315  * Set to disable leaky and partner check for memory correctables
316  */
317 int ce_xdiag_off;
318 
319 /*
320  * The following are not incremented atomically so are indicative only
321  */
322 static int ce_xdiag_drops;
323 static int ce_xdiag_lkydrops;
324 static int ce_xdiag_ptnrdrops;
325 static int ce_xdiag_bad;
326 
327 /*
328  * CE leaky check callback structure
329  */
330 typedef struct {
331 	struct async_flt *lkycb_aflt;
332 	errorq_t *lkycb_eqp;
333 	errorq_elem_t *lkycb_eqep;
334 } ce_lkychk_cb_t;
335 
336 /*
337  * defines for various ecache_flush_flag's
338  */
339 #define	ECACHE_FLUSH_LINE	1
340 #define	ECACHE_FLUSH_ALL	2
341 
342 /*
343  * STICK sync
344  */
345 #define	STICK_ITERATION 10
346 #define	MAX_TSKEW	1
347 #define	EV_A_START	0
348 #define	EV_A_END	1
349 #define	EV_B_START	2
350 #define	EV_B_END	3
351 #define	EVENTS		4
352 
353 static int64_t stick_iter = STICK_ITERATION;
354 static int64_t stick_tsk = MAX_TSKEW;
355 
356 typedef enum {
357 	EVENT_NULL = 0,
358 	SLAVE_START,
359 	SLAVE_CONT,
360 	MASTER_START
361 } event_cmd_t;
362 
363 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
364 static int64_t timestamp[EVENTS];
365 static volatile int slave_done;
366 
367 #ifdef DEBUG
368 #define	DSYNC_ATTEMPTS 64
369 typedef struct {
370 	int64_t	skew_val[DSYNC_ATTEMPTS];
371 } ss_t;
372 
373 ss_t stick_sync_stats[NCPU];
374 #endif /* DEBUG */
375 
376 /*
377  * Maximum number of contexts for Cheetah.
378  */
379 #define	MAX_NCTXS	(1 << 13)
380 
381 /* Will be set !NULL for Cheetah+ and derivatives. */
382 uchar_t *ctx_pgsz_array = NULL;
383 #if defined(CPU_IMP_DUAL_PAGESIZE)
384 static uchar_t ctx_pgsz_arr[MAX_NCTXS];
385 uint_t disable_dual_pgsz = 0;
386 #endif	/* CPU_IMP_DUAL_PAGESIZE */
387 
388 /*
389  * Save the cache bootup state for use when internal
390  * caches are to be re-enabled after an error occurs.
391  */
392 uint64_t cache_boot_state;
393 
394 /*
395  * PA[22:0] represent Displacement in Safari configuration space.
396  */
397 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
398 
399 bus_config_eclk_t bus_config_eclk[] = {
400 #if defined(JALAPENO) || defined(SERRANO)
401 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
402 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
403 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
404 #else /* JALAPENO || SERRANO */
405 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
406 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
407 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
408 #endif /* JALAPENO || SERRANO */
409 	{0, 0}
410 };
411 
412 /*
413  * Interval for deferred CEEN reenable
414  */
415 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
416 
417 /*
418  * set in /etc/system to control logging of user BERR/TO's
419  */
420 int cpu_berr_to_verbose = 0;
421 
422 /*
423  * set to 0 in /etc/system to defer CEEN reenable for all CEs
424  */
425 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
426 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
427 
428 /*
429  * Set of all offline cpus
430  */
431 cpuset_t cpu_offline_set;
432 
433 static void cpu_delayed_check_ce_errors(void *);
434 static void cpu_check_ce_errors(void *);
435 void cpu_error_ecache_flush(ch_async_flt_t *);
436 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
437 static void cpu_log_and_clear_ce(ch_async_flt_t *);
438 void cpu_ce_detected(ch_cpu_errors_t *, int);
439 
440 /*
441  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
442  * memory refresh interval of current DIMMs (64ms).  After initial fix that
443  * gives at least one full refresh cycle in which the cell can leak
444  * (whereafter further refreshes simply reinforce any incorrect bit value).
445  */
446 clock_t cpu_ce_lkychk_timeout_usec = 128000;
447 
448 /*
449  * CE partner check partner caching period in seconds
450  */
451 int cpu_ce_ptnr_cachetime_sec = 60;
452 
453 /*
454  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
455  */
456 #define	CH_SET_TRAP(ttentry, ttlabel)			\
457 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
458 		flush_instr_mem((caddr_t)&ttentry, 32);
459 
460 static int min_ecache_size;
461 static uint_t priv_hcl_1;
462 static uint_t priv_hcl_2;
463 static uint_t priv_hcl_4;
464 static uint_t priv_hcl_8;
465 
466 void
467 cpu_setup(void)
468 {
469 	extern int at_flags;
470 	extern int disable_delay_tlb_flush, delay_tlb_flush;
471 	extern int cpc_has_overflow_intr;
472 	extern int disable_text_largepages;
473 	extern int use_text_pgsz4m;
474 
475 	/*
476 	 * Setup chip-specific trap handlers.
477 	 */
478 	cpu_init_trap();
479 
480 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
481 
482 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
483 
484 	/*
485 	 * save the cache bootup state.
486 	 */
487 	cache_boot_state = get_dcu() & DCU_CACHE;
488 
489 	/*
490 	 * Use the maximum number of contexts available for Cheetah
491 	 * unless it has been tuned for debugging.
492 	 * We are checking against 0 here since this value can be patched
493 	 * while booting.  It can not be patched via /etc/system since it
494 	 * will be patched too late and thus cause the system to panic.
495 	 */
496 	if (nctxs == 0)
497 		nctxs = MAX_NCTXS;
498 
499 	/*
500 	 * Due to the number of entries in the fully-associative tlb
501 	 * this may have to be tuned lower than in spitfire.
502 	 */
503 	pp_slots = MIN(8, MAXPP_SLOTS);
504 
505 	/*
506 	 * Block stores do not invalidate all pages of the d$, pagecopy
507 	 * et. al. need virtual translations with virtual coloring taken
508 	 * into consideration.  prefetch/ldd will pollute the d$ on the
509 	 * load side.
510 	 */
511 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
512 
513 	if (use_page_coloring) {
514 		do_pg_coloring = 1;
515 		if (use_virtual_coloring)
516 			do_virtual_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 	/*
561 	 * Use cheetah flush-all support
562 	 */
563 	if (!disable_delay_tlb_flush)
564 		delay_tlb_flush = 1;
565 
566 #if defined(CPU_IMP_DUAL_PAGESIZE)
567 	/*
568 	 * Use Cheetah+ and later dual page size support.
569 	 */
570 	if (!disable_dual_pgsz) {
571 		ctx_pgsz_array = ctx_pgsz_arr;
572 	}
573 #endif	/* CPU_IMP_DUAL_PAGESIZE */
574 
575 	/*
576 	 * Declare that this architecture/cpu combination does fpRAS.
577 	 */
578 	fpras_implemented = 1;
579 
580 	/*
581 	 * Enable 4M pages to be used for mapping user text by default.  Don't
582 	 * use large pages for initialized data segments since we may not know
583 	 * at exec() time what should be the preferred large page size for DTLB
584 	 * programming.
585 	 */
586 	use_text_pgsz4m = 1;
587 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
588 	    (1 << TTE32M) | (1 << TTE256M);
589 
590 	/*
591 	 * Setup CE lookup table
592 	 */
593 	CE_INITDISPTBL_POPULATE(ce_disp_table);
594 	ce_disp_inited = 1;
595 }
596 
597 /*
598  * Called by setcpudelay
599  */
600 void
601 cpu_init_tick_freq(void)
602 {
603 	/*
604 	 * For UltraSPARC III and beyond we want to use the
605 	 * system clock rate as the basis for low level timing,
606 	 * due to support of mixed speed CPUs and power managment.
607 	 */
608 	if (system_clock_freq == 0)
609 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
610 
611 	sys_tick_freq = system_clock_freq;
612 }
613 
614 #ifdef CHEETAHPLUS_ERRATUM_25
615 /*
616  * Tunables
617  */
618 int cheetah_bpe_off = 0;
619 int cheetah_sendmondo_recover = 1;
620 int cheetah_sendmondo_fullscan = 0;
621 int cheetah_sendmondo_recover_delay = 5;
622 
623 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
624 
625 /*
626  * Recovery Statistics
627  */
628 typedef struct cheetah_livelock_entry	{
629 	int cpuid;		/* fallen cpu */
630 	int buddy;		/* cpu that ran recovery */
631 	clock_t lbolt;		/* when recovery started */
632 	hrtime_t recovery_time;	/* time spent in recovery */
633 } cheetah_livelock_entry_t;
634 
635 #define	CHEETAH_LIVELOCK_NENTRY	32
636 
637 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
638 int cheetah_livelock_entry_nxt;
639 
640 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
641 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
642 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
643 		cheetah_livelock_entry_nxt = 0;				\
644 	}								\
645 }
646 
647 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
648 
649 struct {
650 	hrtime_t hrt;		/* maximum recovery time */
651 	int recovery;		/* recovered */
652 	int full_claimed;	/* maximum pages claimed in full recovery */
653 	int proc_entry;		/* attempted to claim TSB */
654 	int proc_tsb_scan;	/* tsb scanned */
655 	int proc_tsb_partscan;	/* tsb partially scanned */
656 	int proc_tsb_fullscan;	/* whole tsb scanned */
657 	int proc_claimed;	/* maximum pages claimed in tsb scan */
658 	int proc_user;		/* user thread */
659 	int proc_kernel;	/* kernel thread */
660 	int proc_onflt;		/* bad stack */
661 	int proc_cpu;		/* null cpu */
662 	int proc_thread;	/* null thread */
663 	int proc_proc;		/* null proc */
664 	int proc_as;		/* null as */
665 	int proc_hat;		/* null hat */
666 	int proc_hat_inval;	/* hat contents don't make sense */
667 	int proc_hat_busy;	/* hat is changing TSBs */
668 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
669 	int proc_cnum_bad;	/* cnum out of range */
670 	int proc_cnum;		/* last cnum processed */
671 	tte_t proc_tte;		/* last tte processed */
672 } cheetah_livelock_stat;
673 
674 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
675 
676 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
677 	cheetah_livelock_stat.item = value
678 
679 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
680 	if (value > cheetah_livelock_stat.item)		\
681 		cheetah_livelock_stat.item = value;	\
682 }
683 
684 /*
685  * Attempt to recover a cpu by claiming every cache line as saved
686  * in the TSB that the non-responsive cpu is using. Since we can't
687  * grab any adaptive lock, this is at best an attempt to do so. Because
688  * we don't grab any locks, we must operate under the protection of
689  * on_fault().
690  *
691  * Return 1 if cpuid could be recovered, 0 if failed.
692  */
693 int
694 mondo_recover_proc(uint16_t cpuid, int bn)
695 {
696 	label_t ljb;
697 	cpu_t *cp;
698 	kthread_t *t;
699 	proc_t *p;
700 	struct as *as;
701 	struct hat *hat;
702 	short  cnum;
703 	struct tsb_info *tsbinfop;
704 	struct tsbe *tsbep;
705 	caddr_t tsbp;
706 	caddr_t end_tsbp;
707 	uint64_t paddr;
708 	uint64_t idsr;
709 	u_longlong_t pahi, palo;
710 	int pages_claimed = 0;
711 	tte_t tsbe_tte;
712 	int tried_kernel_tsb = 0;
713 
714 	CHEETAH_LIVELOCK_STAT(proc_entry);
715 
716 	if (on_fault(&ljb)) {
717 		CHEETAH_LIVELOCK_STAT(proc_onflt);
718 		goto badstruct;
719 	}
720 
721 	if ((cp = cpu[cpuid]) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_cpu);
723 		goto badstruct;
724 	}
725 
726 	if ((t = cp->cpu_thread) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_thread);
728 		goto badstruct;
729 	}
730 
731 	if ((p = ttoproc(t)) == NULL) {
732 		CHEETAH_LIVELOCK_STAT(proc_proc);
733 		goto badstruct;
734 	}
735 
736 	if ((as = p->p_as) == NULL) {
737 		CHEETAH_LIVELOCK_STAT(proc_as);
738 		goto badstruct;
739 	}
740 
741 	if ((hat = as->a_hat) == NULL) {
742 		CHEETAH_LIVELOCK_STAT(proc_hat);
743 		goto badstruct;
744 	}
745 
746 	if (hat != ksfmmup) {
747 		CHEETAH_LIVELOCK_STAT(proc_user);
748 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
749 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
750 			goto badstruct;
751 		}
752 		tsbinfop = hat->sfmmu_tsb;
753 		if (tsbinfop == NULL) {
754 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
755 			goto badstruct;
756 		}
757 		tsbp = tsbinfop->tsb_va;
758 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
759 	} else {
760 		CHEETAH_LIVELOCK_STAT(proc_kernel);
761 		tsbinfop = NULL;
762 		tsbp = ktsb_base;
763 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
764 	}
765 
766 	/* Verify as */
767 	if (hat->sfmmu_as != as) {
768 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
769 		goto badstruct;
770 	}
771 
772 	cnum = hat->sfmmu_cnum;
773 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
774 
775 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
776 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
777 		goto badstruct;
778 	}
779 
780 	do {
781 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
782 
783 		/*
784 		 * Skip TSBs being relocated.  This is important because
785 		 * we want to avoid the following deadlock scenario:
786 		 *
787 		 * 1) when we came in we set ourselves to "in recover" state.
788 		 * 2) when we try to touch TSB being relocated the mapping
789 		 *    will be in the suspended state so we'll spin waiting
790 		 *    for it to be unlocked.
791 		 * 3) when the CPU that holds the TSB mapping locked tries to
792 		 *    unlock it it will send a xtrap which will fail to xcall
793 		 *    us or the CPU we're trying to recover, and will in turn
794 		 *    enter the mondo code.
795 		 * 4) since we are still spinning on the locked mapping
796 		 *    no further progress will be made and the system will
797 		 *    inevitably hard hang.
798 		 *
799 		 * A TSB not being relocated can't begin being relocated
800 		 * while we're accessing it because we check
801 		 * sendmondo_in_recover before relocating TSBs.
802 		 */
803 		if (hat != ksfmmup &&
804 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
805 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
806 			goto next_tsbinfo;
807 		}
808 
809 		for (tsbep = (struct tsbe *)tsbp;
810 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
811 			tsbe_tte = tsbep->tte_data;
812 
813 			if (tsbe_tte.tte_val == 0) {
814 				/*
815 				 * Invalid tte
816 				 */
817 				continue;
818 			}
819 			if (tsbe_tte.tte_se) {
820 				/*
821 				 * Don't want device registers
822 				 */
823 				continue;
824 			}
825 			if (tsbe_tte.tte_cp == 0) {
826 				/*
827 				 * Must be cached in E$
828 				 */
829 				continue;
830 			}
831 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
832 			idsr = getidsr();
833 			if ((idsr & (IDSR_NACK_BIT(bn) |
834 			    IDSR_BUSY_BIT(bn))) == 0) {
835 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
836 				goto done;
837 			}
838 			pahi = tsbe_tte.tte_pahi;
839 			palo = tsbe_tte.tte_palo;
840 			paddr = (uint64_t)((pahi << 32) |
841 			    (palo << MMU_PAGESHIFT));
842 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
843 			    CH_ECACHE_SUBBLK_SIZE);
844 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
845 				shipit(cpuid, bn);
846 			}
847 			pages_claimed++;
848 		}
849 next_tsbinfo:
850 		if (tsbinfop != NULL)
851 			tsbinfop = tsbinfop->tsb_next;
852 		if (tsbinfop != NULL) {
853 			tsbp = tsbinfop->tsb_va;
854 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
855 		} else if (tsbp == ktsb_base) {
856 			tried_kernel_tsb = 1;
857 		} else if (!tried_kernel_tsb) {
858 			tsbp = ktsb_base;
859 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
860 			hat = ksfmmup;
861 			tsbinfop = NULL;
862 		}
863 	} while (tsbinfop != NULL ||
864 			((tsbp == ktsb_base) && !tried_kernel_tsb));
865 
866 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
867 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
868 	no_fault();
869 	idsr = getidsr();
870 	if ((idsr & (IDSR_NACK_BIT(bn) |
871 	    IDSR_BUSY_BIT(bn))) == 0) {
872 		return (1);
873 	} else {
874 		return (0);
875 	}
876 
877 done:
878 	no_fault();
879 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
880 	return (1);
881 
882 badstruct:
883 	no_fault();
884 	return (0);
885 }
886 
887 /*
888  * Attempt to claim ownership, temporarily, of every cache line that a
889  * non-responsive cpu might be using.  This might kick that cpu out of
890  * this state.
891  *
892  * The return value indicates to the caller if we have exhausted all recovery
893  * techniques. If 1 is returned, it is useless to call this function again
894  * even for a different target CPU.
895  */
896 int
897 mondo_recover(uint16_t cpuid, int bn)
898 {
899 	struct memseg *seg;
900 	uint64_t begin_pa, end_pa, cur_pa;
901 	hrtime_t begin_hrt, end_hrt;
902 	int retval = 0;
903 	int pages_claimed = 0;
904 	cheetah_livelock_entry_t *histp;
905 	uint64_t idsr;
906 
907 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
908 		/*
909 		 * Wait while recovery takes place
910 		 */
911 		while (sendmondo_in_recover) {
912 			drv_usecwait(1);
913 		}
914 		/*
915 		 * Assume we didn't claim the whole memory. If
916 		 * the target of this caller is not recovered,
917 		 * it will come back.
918 		 */
919 		return (retval);
920 	}
921 
922 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
923 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
924 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
925 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
926 
927 	begin_hrt = gethrtime_waitfree();
928 	/*
929 	 * First try to claim the lines in the TSB the target
930 	 * may have been using.
931 	 */
932 	if (mondo_recover_proc(cpuid, bn) == 1) {
933 		/*
934 		 * Didn't claim the whole memory
935 		 */
936 		goto done;
937 	}
938 
939 	/*
940 	 * We tried using the TSB. The target is still
941 	 * not recovered. Check if complete memory scan is
942 	 * enabled.
943 	 */
944 	if (cheetah_sendmondo_fullscan == 0) {
945 		/*
946 		 * Full memory scan is disabled.
947 		 */
948 		retval = 1;
949 		goto done;
950 	}
951 
952 	/*
953 	 * Try claiming the whole memory.
954 	 */
955 	for (seg = memsegs; seg; seg = seg->next) {
956 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
957 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
958 		for (cur_pa = begin_pa; cur_pa < end_pa;
959 		    cur_pa += MMU_PAGESIZE) {
960 			idsr = getidsr();
961 			if ((idsr & (IDSR_NACK_BIT(bn) |
962 			    IDSR_BUSY_BIT(bn))) == 0) {
963 				/*
964 				 * Didn't claim all memory
965 				 */
966 				goto done;
967 			}
968 			claimlines(cur_pa, MMU_PAGESIZE,
969 			    CH_ECACHE_SUBBLK_SIZE);
970 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
971 				shipit(cpuid, bn);
972 			}
973 			pages_claimed++;
974 		}
975 	}
976 
977 	/*
978 	 * We did all we could.
979 	 */
980 	retval = 1;
981 
982 done:
983 	/*
984 	 * Update statistics
985 	 */
986 	end_hrt = gethrtime_waitfree();
987 	CHEETAH_LIVELOCK_STAT(recovery);
988 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
989 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
990 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
991 	    (end_hrt -  begin_hrt));
992 
993 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
994 
995 	return (retval);
996 }
997 
998 /*
999  * This is called by the cyclic framework when this CPU becomes online
1000  */
1001 /*ARGSUSED*/
1002 static void
1003 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1004 {
1005 
1006 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1007 	hdlr->cyh_level = CY_LOW_LEVEL;
1008 	hdlr->cyh_arg = NULL;
1009 
1010 	/*
1011 	 * Stagger the start time
1012 	 */
1013 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1014 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1015 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1016 	}
1017 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1018 }
1019 
1020 /*
1021  * Create a low level cyclic to send a xtrap to the next cpu online.
1022  * However, there's no need to have this running on a uniprocessor system.
1023  */
1024 static void
1025 cheetah_nudge_init(void)
1026 {
1027 	cyc_omni_handler_t hdlr;
1028 
1029 	if (max_ncpus == 1) {
1030 		return;
1031 	}
1032 
1033 	hdlr.cyo_online = cheetah_nudge_onln;
1034 	hdlr.cyo_offline = NULL;
1035 	hdlr.cyo_arg = NULL;
1036 
1037 	mutex_enter(&cpu_lock);
1038 	(void) cyclic_add_omni(&hdlr);
1039 	mutex_exit(&cpu_lock);
1040 }
1041 
1042 /*
1043  * Cyclic handler to wake up buddy
1044  */
1045 void
1046 cheetah_nudge_buddy(void)
1047 {
1048 	/*
1049 	 * Disable kernel preemption to protect the cpu list
1050 	 */
1051 	kpreempt_disable();
1052 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1053 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1054 		    0, 0);
1055 	}
1056 	kpreempt_enable();
1057 }
1058 
1059 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1060 
1061 #ifdef SEND_MONDO_STATS
1062 uint32_t x_one_stimes[64];
1063 uint32_t x_one_ltimes[16];
1064 uint32_t x_set_stimes[64];
1065 uint32_t x_set_ltimes[16];
1066 uint32_t x_set_cpus[NCPU];
1067 uint32_t x_nack_stimes[64];
1068 #endif
1069 
1070 /*
1071  * Note: A version of this function is used by the debugger via the KDI,
1072  * and must be kept in sync with this version.  Any changes made to this
1073  * function to support new chips or to accomodate errata must also be included
1074  * in the KDI-specific version.  See us3_kdi.c.
1075  */
1076 void
1077 send_one_mondo(int cpuid)
1078 {
1079 	int busy, nack;
1080 	uint64_t idsr, starttick, endtick, tick, lasttick;
1081 	uint64_t busymask;
1082 #ifdef	CHEETAHPLUS_ERRATUM_25
1083 	int recovered = 0;
1084 #endif
1085 
1086 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1087 	starttick = lasttick = gettick();
1088 	shipit(cpuid, 0);
1089 	endtick = starttick + xc_tick_limit;
1090 	busy = nack = 0;
1091 #if defined(JALAPENO) || defined(SERRANO)
1092 	/*
1093 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1094 	 * will be used for dispatching interrupt. For now, assume
1095 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1096 	 * issues with respect to BUSY/NACK pair usage.
1097 	 */
1098 	busymask  = IDSR_BUSY_BIT(cpuid);
1099 #else /* JALAPENO || SERRANO */
1100 	busymask = IDSR_BUSY;
1101 #endif /* JALAPENO || SERRANO */
1102 	for (;;) {
1103 		idsr = getidsr();
1104 		if (idsr == 0)
1105 			break;
1106 
1107 		tick = gettick();
1108 		/*
1109 		 * If there is a big jump between the current tick
1110 		 * count and lasttick, we have probably hit a break
1111 		 * point.  Adjust endtick accordingly to avoid panic.
1112 		 */
1113 		if (tick > (lasttick + xc_tick_jump_limit))
1114 			endtick += (tick - lasttick);
1115 		lasttick = tick;
1116 		if (tick > endtick) {
1117 			if (panic_quiesce)
1118 				return;
1119 #ifdef	CHEETAHPLUS_ERRATUM_25
1120 			if (cheetah_sendmondo_recover && recovered == 0) {
1121 				if (mondo_recover(cpuid, 0)) {
1122 					/*
1123 					 * We claimed the whole memory or
1124 					 * full scan is disabled.
1125 					 */
1126 					recovered++;
1127 				}
1128 				tick = gettick();
1129 				endtick = tick + xc_tick_limit;
1130 				lasttick = tick;
1131 				/*
1132 				 * Recheck idsr
1133 				 */
1134 				continue;
1135 			} else
1136 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1137 			{
1138 				cmn_err(CE_PANIC, "send mondo timeout "
1139 				    "(target 0x%x) [%d NACK %d BUSY]",
1140 				    cpuid, nack, busy);
1141 			}
1142 		}
1143 
1144 		if (idsr & busymask) {
1145 			busy++;
1146 			continue;
1147 		}
1148 		drv_usecwait(1);
1149 		shipit(cpuid, 0);
1150 		nack++;
1151 		busy = 0;
1152 	}
1153 #ifdef SEND_MONDO_STATS
1154 	{
1155 		int n = gettick() - starttick;
1156 		if (n < 8192)
1157 			x_one_stimes[n >> 7]++;
1158 		else
1159 			x_one_ltimes[(n >> 13) & 0xf]++;
1160 	}
1161 #endif
1162 }
1163 
1164 void
1165 syncfpu(void)
1166 {
1167 }
1168 
1169 /*
1170  * Return processor specific async error structure
1171  * size used.
1172  */
1173 int
1174 cpu_aflt_size(void)
1175 {
1176 	return (sizeof (ch_async_flt_t));
1177 }
1178 
1179 /*
1180  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1181  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1182  * flush the error that caused the UCU/UCC, then again here at the end to
1183  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1184  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1185  * another Fast ECC trap.
1186  *
1187  * Cheetah+ also handles: TSCE: No additional processing required.
1188  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1189  *
1190  * Note that the p_clo_flags input is only valid in cases where the
1191  * cpu_private struct is not yet initialized (since that is the only
1192  * time that information cannot be obtained from the logout struct.)
1193  */
1194 /*ARGSUSED*/
1195 void
1196 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1197 {
1198 	ch_cpu_logout_t *clop;
1199 	uint64_t ceen, nceen;
1200 
1201 	/*
1202 	 * Get the CPU log out info. If we can't find our CPU private
1203 	 * pointer, then we will have to make due without any detailed
1204 	 * logout information.
1205 	 */
1206 	if (CPU_PRIVATE(CPU) == NULL) {
1207 		clop = NULL;
1208 		ceen = p_clo_flags & EN_REG_CEEN;
1209 		nceen = p_clo_flags & EN_REG_NCEEN;
1210 	} else {
1211 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1212 		ceen = clop->clo_flags & EN_REG_CEEN;
1213 		nceen = clop->clo_flags & EN_REG_NCEEN;
1214 	}
1215 
1216 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1217 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1218 }
1219 
1220 /*
1221  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1222  * ECC at TL>0.  Need to supply either a error register pointer or a
1223  * cpu logout structure pointer.
1224  */
1225 static void
1226 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1227     uint64_t nceen, ch_cpu_logout_t *clop)
1228 {
1229 	struct async_flt *aflt;
1230 	ch_async_flt_t ch_flt;
1231 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1232 	char pr_reason[MAX_REASON_STRING];
1233 	ch_cpu_errors_t cpu_error_regs;
1234 
1235 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1236 	/*
1237 	 * If no cpu logout data, then we will have to make due without
1238 	 * any detailed logout information.
1239 	 */
1240 	if (clop == NULL) {
1241 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1242 		get_cpu_error_state(&cpu_error_regs);
1243 		set_cpu_error_state(&cpu_error_regs);
1244 		t_afar = cpu_error_regs.afar;
1245 		t_afsr = cpu_error_regs.afsr;
1246 		t_afsr_ext = cpu_error_regs.afsr_ext;
1247 #if defined(SERRANO)
1248 		ch_flt.afar2 = cpu_error_regs.afar2;
1249 #endif	/* SERRANO */
1250 	} else {
1251 		t_afar = clop->clo_data.chd_afar;
1252 		t_afsr = clop->clo_data.chd_afsr;
1253 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1254 #if defined(SERRANO)
1255 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1256 #endif	/* SERRANO */
1257 	}
1258 
1259 	/*
1260 	 * In order to simplify code, we maintain this afsr_errs
1261 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1262 	 * sticky bits.
1263 	 */
1264 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1265 	    (t_afsr & C_AFSR_ALL_ERRS);
1266 	pr_reason[0] = '\0';
1267 
1268 	/* Setup the async fault structure */
1269 	aflt = (struct async_flt *)&ch_flt;
1270 	aflt->flt_id = gethrtime_waitfree();
1271 	ch_flt.afsr_ext = t_afsr_ext;
1272 	ch_flt.afsr_errs = t_afsr_errs;
1273 	aflt->flt_stat = t_afsr;
1274 	aflt->flt_addr = t_afar;
1275 	aflt->flt_bus_id = getprocessorid();
1276 	aflt->flt_inst = CPU->cpu_id;
1277 	aflt->flt_pc = tpc;
1278 	aflt->flt_prot = AFLT_PROT_NONE;
1279 	aflt->flt_class = CPU_FAULT;
1280 	aflt->flt_priv = priv;
1281 	aflt->flt_tl = tl;
1282 	aflt->flt_status = ECC_F_TRAP;
1283 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1284 
1285 	/*
1286 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1287 	 * cmn_err messages out to the console.  The situation is a UCU (in
1288 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1289 	 * The messages for the UCU and WDU are enqueued and then pulled off
1290 	 * the async queue via softint and syslogd starts to process them
1291 	 * but doesn't get them to the console.  The UE causes a panic, but
1292 	 * since the UCU/WDU messages are already in transit, those aren't
1293 	 * on the async queue.  The hack is to check if we have a matching
1294 	 * WDU event for the UCU, and if it matches, we're more than likely
1295 	 * going to panic with a UE, unless we're under protection.  So, we
1296 	 * check to see if we got a matching WDU event and if we're under
1297 	 * protection.
1298 	 *
1299 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1300 	 * looks like this:
1301 	 *    UCU->WDU->UE
1302 	 * For Panther, it could look like either of these:
1303 	 *    UCU---->WDU->L3_WDU->UE
1304 	 *    L3_UCU->WDU->L3_WDU->UE
1305 	 */
1306 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1307 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1308 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1309 		get_cpu_error_state(&cpu_error_regs);
1310 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1311 		    (cpu_error_regs.afar == t_afar));
1312 		aflt->flt_panic |= ((clop == NULL) &&
1313 		    (t_afsr_errs & C_AFSR_WDU));
1314 	}
1315 
1316 	/*
1317 	 * Queue events on the async event queue, one event per error bit.
1318 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1319 	 * queue an event to complain.
1320 	 */
1321 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1322 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1323 		ch_flt.flt_type = CPU_INV_AFSR;
1324 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1325 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1326 		    aflt->flt_panic);
1327 	}
1328 
1329 	/*
1330 	 * Zero out + invalidate CPU logout.
1331 	 */
1332 	if (clop) {
1333 		bzero(clop, sizeof (ch_cpu_logout_t));
1334 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1335 	}
1336 
1337 	/*
1338 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1339 	 * or disrupting errors have happened.  We do this because if a
1340 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1341 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1342 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1343 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1344 	 * deferred or disrupting error happening between checking the AFSR and
1345 	 * enabling NCEEN/CEEN.
1346 	 *
1347 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1348 	 * taken.
1349 	 */
1350 	set_error_enable(get_error_enable() | (nceen | ceen));
1351 	if (clear_errors(&ch_flt)) {
1352 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1353 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1354 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1355 		    NULL);
1356 	}
1357 
1358 	/*
1359 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1360 	 * be logged as part of the panic flow.
1361 	 */
1362 	if (aflt->flt_panic)
1363 		fm_panic("%sError(s)", pr_reason);
1364 
1365 	/*
1366 	 * Flushing the Ecache here gets the part of the trap handler that
1367 	 * is run at TL=1 out of the Ecache.
1368 	 */
1369 	cpu_flush_ecache();
1370 }
1371 
1372 /*
1373  * This is called via sys_trap from pil15_interrupt code if the
1374  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1375  * various ch_err_tl1_data structures for valid entries based on the bit
1376  * settings in the ch_err_tl1_flags entry of the structure.
1377  */
1378 /*ARGSUSED*/
1379 void
1380 cpu_tl1_error(struct regs *rp, int panic)
1381 {
1382 	ch_err_tl1_data_t *cl1p, cl1;
1383 	int i, ncl1ps;
1384 	uint64_t me_flags;
1385 	uint64_t ceen, nceen;
1386 
1387 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1388 		cl1p = &ch_err_tl1_data;
1389 		ncl1ps = 1;
1390 	} else if (CPU_PRIVATE(CPU) != NULL) {
1391 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1392 		ncl1ps = CH_ERR_TL1_TLMAX;
1393 	} else {
1394 		ncl1ps = 0;
1395 	}
1396 
1397 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1398 		if (cl1p->ch_err_tl1_flags == 0)
1399 			continue;
1400 
1401 		/*
1402 		 * Grab a copy of the logout data and invalidate
1403 		 * the logout area.
1404 		 */
1405 		cl1 = *cl1p;
1406 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1407 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1408 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1409 
1410 		/*
1411 		 * Log "first error" in ch_err_tl1_data.
1412 		 */
1413 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1414 			ceen = get_error_enable() & EN_REG_CEEN;
1415 			nceen = get_error_enable() & EN_REG_NCEEN;
1416 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1417 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1418 		}
1419 #if defined(CPU_IMP_L1_CACHE_PARITY)
1420 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1421 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1422 			    (caddr_t)cl1.ch_err_tl1_tpc);
1423 		}
1424 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1425 
1426 		/*
1427 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1428 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1429 		 * if the structure is busy, we just do the cache flushing
1430 		 * we have to do and then do the retry.  So the AFSR/AFAR
1431 		 * at this point *should* have some relevant info.  If there
1432 		 * are no valid errors in the AFSR, we'll assume they've
1433 		 * already been picked up and logged.  For I$/D$ parity,
1434 		 * we just log an event with an "Unknown" (NULL) TPC.
1435 		 */
1436 		if (me_flags & CH_ERR_FECC) {
1437 			ch_cpu_errors_t cpu_error_regs;
1438 			uint64_t t_afsr_errs;
1439 
1440 			/*
1441 			 * Get the error registers and see if there's
1442 			 * a pending error.  If not, don't bother
1443 			 * generating an "Invalid AFSR" error event.
1444 			 */
1445 			get_cpu_error_state(&cpu_error_regs);
1446 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1447 			    C_AFSR_EXT_ALL_ERRS) |
1448 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1449 			if (t_afsr_errs != 0) {
1450 				ceen = get_error_enable() & EN_REG_CEEN;
1451 				nceen = get_error_enable() & EN_REG_NCEEN;
1452 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1453 				    1, ceen, nceen, NULL);
1454 			}
1455 		}
1456 #if defined(CPU_IMP_L1_CACHE_PARITY)
1457 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1458 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1459 		}
1460 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1461 	}
1462 }
1463 
1464 /*
1465  * Called from Fast ECC TL>0 handler in case of fatal error.
1466  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1467  * but if we don't, we'll panic with something reasonable.
1468  */
1469 /*ARGSUSED*/
1470 void
1471 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1472 {
1473 	cpu_tl1_error(rp, 1);
1474 	/*
1475 	 * Should never return, but just in case.
1476 	 */
1477 	fm_panic("Unsurvivable ECC Error at TL>0");
1478 }
1479 
1480 /*
1481  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1482  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1483  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1484  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1485  *
1486  * Cheetah+ also handles (No additional processing required):
1487  *    DUE, DTO, DBERR	(NCEEN controlled)
1488  *    THCE		(CEEN and ET_ECC_en controlled)
1489  *    TUE		(ET_ECC_en controlled)
1490  *
1491  * Panther further adds:
1492  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1493  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1494  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1495  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1496  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1497  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1498  *
1499  * Note that the p_clo_flags input is only valid in cases where the
1500  * cpu_private struct is not yet initialized (since that is the only
1501  * time that information cannot be obtained from the logout struct.)
1502  */
1503 /*ARGSUSED*/
1504 void
1505 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1506 {
1507 	struct async_flt *aflt;
1508 	ch_async_flt_t ch_flt;
1509 	char pr_reason[MAX_REASON_STRING];
1510 	ch_cpu_logout_t *clop;
1511 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1512 	ch_cpu_errors_t cpu_error_regs;
1513 
1514 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1515 	/*
1516 	 * Get the CPU log out info. If we can't find our CPU private
1517 	 * pointer, then we will have to make due without any detailed
1518 	 * logout information.
1519 	 */
1520 	if (CPU_PRIVATE(CPU) == NULL) {
1521 		clop = NULL;
1522 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1523 		get_cpu_error_state(&cpu_error_regs);
1524 		set_cpu_error_state(&cpu_error_regs);
1525 		t_afar = cpu_error_regs.afar;
1526 		t_afsr = cpu_error_regs.afsr;
1527 		t_afsr_ext = cpu_error_regs.afsr_ext;
1528 #if defined(SERRANO)
1529 		ch_flt.afar2 = cpu_error_regs.afar2;
1530 #endif	/* SERRANO */
1531 	} else {
1532 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1533 		t_afar = clop->clo_data.chd_afar;
1534 		t_afsr = clop->clo_data.chd_afsr;
1535 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1536 #if defined(SERRANO)
1537 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1538 #endif	/* SERRANO */
1539 	}
1540 
1541 	/*
1542 	 * In order to simplify code, we maintain this afsr_errs
1543 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1544 	 * sticky bits.
1545 	 */
1546 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1547 	    (t_afsr & C_AFSR_ALL_ERRS);
1548 
1549 	pr_reason[0] = '\0';
1550 	/* Setup the async fault structure */
1551 	aflt = (struct async_flt *)&ch_flt;
1552 	ch_flt.afsr_ext = t_afsr_ext;
1553 	ch_flt.afsr_errs = t_afsr_errs;
1554 	aflt->flt_stat = t_afsr;
1555 	aflt->flt_addr = t_afar;
1556 	aflt->flt_pc = (caddr_t)rp->r_pc;
1557 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1558 	aflt->flt_tl = 0;
1559 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1560 
1561 	/*
1562 	 * If this trap is a result of one of the errors not masked
1563 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1564 	 * indicate that a timeout is to be set later.
1565 	 */
1566 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1567 	    !aflt->flt_panic)
1568 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1569 	else
1570 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1571 
1572 	/*
1573 	 * log the CE and clean up
1574 	 */
1575 	cpu_log_and_clear_ce(&ch_flt);
1576 
1577 	/*
1578 	 * We re-enable CEEN (if required) and check if any disrupting errors
1579 	 * have happened.  We do this because if a disrupting error had occurred
1580 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1581 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1582 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1583 	 * of a error happening between checking the AFSR and enabling CEEN.
1584 	 */
1585 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1586 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1587 	if (clear_errors(&ch_flt)) {
1588 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1589 		    NULL);
1590 	}
1591 
1592 	/*
1593 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1594 	 * be logged as part of the panic flow.
1595 	 */
1596 	if (aflt->flt_panic)
1597 		fm_panic("%sError(s)", pr_reason);
1598 }
1599 
1600 /*
1601  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1602  * L3_EDU:BLD, TO, and BERR events.
1603  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1604  *
1605  * Cheetah+: No additional errors handled.
1606  *
1607  * Note that the p_clo_flags input is only valid in cases where the
1608  * cpu_private struct is not yet initialized (since that is the only
1609  * time that information cannot be obtained from the logout struct.)
1610  */
1611 /*ARGSUSED*/
1612 void
1613 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1614 {
1615 	ushort_t ttype, tl;
1616 	ch_async_flt_t ch_flt;
1617 	struct async_flt *aflt;
1618 	int trampolined = 0;
1619 	char pr_reason[MAX_REASON_STRING];
1620 	ch_cpu_logout_t *clop;
1621 	uint64_t ceen, clo_flags;
1622 	uint64_t log_afsr;
1623 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1624 	ch_cpu_errors_t cpu_error_regs;
1625 	int expected = DDI_FM_ERR_UNEXPECTED;
1626 	ddi_acc_hdl_t *hp;
1627 
1628 	/*
1629 	 * We need to look at p_flag to determine if the thread detected an
1630 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1631 	 * because we just need a consistent snapshot and we know that everyone
1632 	 * else will store a consistent set of bits while holding p_lock.  We
1633 	 * don't have to worry about a race because SDOCORE is set once prior
1634 	 * to doing i/o from the process's address space and is never cleared.
1635 	 */
1636 	uint_t pflag = ttoproc(curthread)->p_flag;
1637 
1638 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1639 	/*
1640 	 * Get the CPU log out info. If we can't find our CPU private
1641 	 * pointer then we will have to make due without any detailed
1642 	 * logout information.
1643 	 */
1644 	if (CPU_PRIVATE(CPU) == NULL) {
1645 		clop = NULL;
1646 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1647 		get_cpu_error_state(&cpu_error_regs);
1648 		set_cpu_error_state(&cpu_error_regs);
1649 		t_afar = cpu_error_regs.afar;
1650 		t_afsr = cpu_error_regs.afsr;
1651 		t_afsr_ext = cpu_error_regs.afsr_ext;
1652 #if defined(SERRANO)
1653 		ch_flt.afar2 = cpu_error_regs.afar2;
1654 #endif	/* SERRANO */
1655 		clo_flags = p_clo_flags;
1656 	} else {
1657 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1658 		t_afar = clop->clo_data.chd_afar;
1659 		t_afsr = clop->clo_data.chd_afsr;
1660 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1661 #if defined(SERRANO)
1662 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1663 #endif	/* SERRANO */
1664 		clo_flags = clop->clo_flags;
1665 	}
1666 
1667 	/*
1668 	 * In order to simplify code, we maintain this afsr_errs
1669 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1670 	 * sticky bits.
1671 	 */
1672 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1673 	    (t_afsr & C_AFSR_ALL_ERRS);
1674 	pr_reason[0] = '\0';
1675 
1676 	/*
1677 	 * Grab information encoded into our clo_flags field.
1678 	 */
1679 	ceen = clo_flags & EN_REG_CEEN;
1680 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1681 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1682 
1683 	/*
1684 	 * handle the specific error
1685 	 */
1686 	aflt = (struct async_flt *)&ch_flt;
1687 	aflt->flt_id = gethrtime_waitfree();
1688 	aflt->flt_bus_id = getprocessorid();
1689 	aflt->flt_inst = CPU->cpu_id;
1690 	ch_flt.afsr_ext = t_afsr_ext;
1691 	ch_flt.afsr_errs = t_afsr_errs;
1692 	aflt->flt_stat = t_afsr;
1693 	aflt->flt_addr = t_afar;
1694 	aflt->flt_pc = (caddr_t)rp->r_pc;
1695 	aflt->flt_prot = AFLT_PROT_NONE;
1696 	aflt->flt_class = CPU_FAULT;
1697 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1698 	aflt->flt_tl = (uchar_t)tl;
1699 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1700 	    C_AFSR_PANIC(t_afsr_errs));
1701 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1702 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1703 
1704 	/*
1705 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1706 	 * see if we were executing in the kernel under on_trap() or t_lofault
1707 	 * protection.  If so, modify the saved registers so that we return
1708 	 * from the trap to the appropriate trampoline routine.
1709 	 */
1710 	if (aflt->flt_priv && tl == 0) {
1711 		if (curthread->t_ontrap != NULL) {
1712 			on_trap_data_t *otp = curthread->t_ontrap;
1713 
1714 			if (otp->ot_prot & OT_DATA_EC) {
1715 				aflt->flt_prot = AFLT_PROT_EC;
1716 				otp->ot_trap |= OT_DATA_EC;
1717 				rp->r_pc = otp->ot_trampoline;
1718 				rp->r_npc = rp->r_pc + 4;
1719 				trampolined = 1;
1720 			}
1721 
1722 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1723 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1724 				aflt->flt_prot = AFLT_PROT_ACCESS;
1725 				otp->ot_trap |= OT_DATA_ACCESS;
1726 				rp->r_pc = otp->ot_trampoline;
1727 				rp->r_npc = rp->r_pc + 4;
1728 				trampolined = 1;
1729 				/*
1730 				 * for peeks and caut_gets errors are expected
1731 				 */
1732 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1733 				if (!hp)
1734 					expected = DDI_FM_ERR_PEEK;
1735 				else if (hp->ah_acc.devacc_attr_access ==
1736 				    DDI_CAUTIOUS_ACC)
1737 					expected = DDI_FM_ERR_EXPECTED;
1738 			}
1739 
1740 		} else if (curthread->t_lofault) {
1741 			aflt->flt_prot = AFLT_PROT_COPY;
1742 			rp->r_g1 = EFAULT;
1743 			rp->r_pc = curthread->t_lofault;
1744 			rp->r_npc = rp->r_pc + 4;
1745 			trampolined = 1;
1746 		}
1747 	}
1748 
1749 	/*
1750 	 * If we're in user mode or we're doing a protected copy, we either
1751 	 * want the ASTON code below to send a signal to the user process
1752 	 * or we want to panic if aft_panic is set.
1753 	 *
1754 	 * If we're in privileged mode and we're not doing a copy, then we
1755 	 * need to check if we've trampolined.  If we haven't trampolined,
1756 	 * we should panic.
1757 	 */
1758 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1759 		if (t_afsr_errs &
1760 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1761 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1762 			aflt->flt_panic |= aft_panic;
1763 	} else if (!trampolined) {
1764 			aflt->flt_panic = 1;
1765 	}
1766 
1767 	/*
1768 	 * If we've trampolined due to a privileged TO or BERR, or if an
1769 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1770 	 * event for that TO or BERR.  Queue all other events (if any) besides
1771 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1772 	 * ignore the number of events queued.  If we haven't trampolined due
1773 	 * to a TO or BERR, just enqueue events normally.
1774 	 */
1775 	log_afsr = t_afsr_errs;
1776 	if (trampolined) {
1777 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1778 	} else if (!aflt->flt_priv) {
1779 		/*
1780 		 * User mode, suppress messages if
1781 		 * cpu_berr_to_verbose is not set.
1782 		 */
1783 		if (!cpu_berr_to_verbose)
1784 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1785 	}
1786 
1787 	/*
1788 	 * Log any errors that occurred
1789 	 */
1790 	if (((log_afsr &
1791 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1792 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1793 		(t_afsr_errs &
1794 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1795 		ch_flt.flt_type = CPU_INV_AFSR;
1796 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1797 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1798 		    aflt->flt_panic);
1799 	}
1800 
1801 	/*
1802 	 * Zero out + invalidate CPU logout.
1803 	 */
1804 	if (clop) {
1805 		bzero(clop, sizeof (ch_cpu_logout_t));
1806 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1807 	}
1808 
1809 #if defined(JALAPENO) || defined(SERRANO)
1810 	/*
1811 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1812 	 * IO errors that may have resulted in this trap.
1813 	 */
1814 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1815 		cpu_run_bus_error_handlers(aflt, expected);
1816 	}
1817 
1818 	/*
1819 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1820 	 * line from the Ecache.  We also need to query the bus nexus for
1821 	 * fatal errors.  Attempts to do diagnostic read on caches may
1822 	 * introduce more errors (especially when the module is bad).
1823 	 */
1824 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1825 		/*
1826 		 * Ask our bus nexus friends if they have any fatal errors.  If
1827 		 * so, they will log appropriate error messages.
1828 		 */
1829 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1830 			aflt->flt_panic = 1;
1831 
1832 		/*
1833 		 * We got a UE or RUE and are panicking, save the fault PA in
1834 		 * a known location so that the platform specific panic code
1835 		 * can check for copyback errors.
1836 		 */
1837 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1838 			panic_aflt = *aflt;
1839 		}
1840 	}
1841 
1842 	/*
1843 	 * Flush Ecache line or entire Ecache
1844 	 */
1845 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1846 		cpu_error_ecache_flush(&ch_flt);
1847 #else /* JALAPENO || SERRANO */
1848 	/*
1849 	 * UE/BERR/TO: Call our bus nexus friends to check for
1850 	 * IO errors that may have resulted in this trap.
1851 	 */
1852 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1853 		cpu_run_bus_error_handlers(aflt, expected);
1854 	}
1855 
1856 	/*
1857 	 * UE: If the UE is in memory, we need to flush the bad
1858 	 * line from the Ecache.  We also need to query the bus nexus for
1859 	 * fatal errors.  Attempts to do diagnostic read on caches may
1860 	 * introduce more errors (especially when the module is bad).
1861 	 */
1862 	if (t_afsr & C_AFSR_UE) {
1863 		/*
1864 		 * Ask our legacy bus nexus friends if they have any fatal
1865 		 * errors.  If so, they will log appropriate error messages.
1866 		 */
1867 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1868 			aflt->flt_panic = 1;
1869 
1870 		/*
1871 		 * We got a UE and are panicking, save the fault PA in a known
1872 		 * location so that the platform specific panic code can check
1873 		 * for copyback errors.
1874 		 */
1875 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1876 			panic_aflt = *aflt;
1877 		}
1878 	}
1879 
1880 	/*
1881 	 * Flush Ecache line or entire Ecache
1882 	 */
1883 	if (t_afsr_errs &
1884 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1885 		cpu_error_ecache_flush(&ch_flt);
1886 #endif /* JALAPENO || SERRANO */
1887 
1888 	/*
1889 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1890 	 * or disrupting errors have happened.  We do this because if a
1891 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1892 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1893 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1894 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1895 	 * deferred or disrupting error happening between checking the AFSR and
1896 	 * enabling NCEEN/CEEN.
1897 	 *
1898 	 * Note: CEEN reenabled only if it was on when trap taken.
1899 	 */
1900 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
1901 	if (clear_errors(&ch_flt)) {
1902 		/*
1903 		 * Check for secondary errors, and avoid panicking if we
1904 		 * have them
1905 		 */
1906 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
1907 		    t_afar) == 0) {
1908 			aflt->flt_panic |= ((ch_flt.afsr_errs &
1909 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
1910 		}
1911 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1912 		    NULL);
1913 	}
1914 
1915 	/*
1916 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1917 	 * be logged as part of the panic flow.
1918 	 */
1919 	if (aflt->flt_panic)
1920 		fm_panic("%sError(s)", pr_reason);
1921 
1922 	/*
1923 	 * If we queued an error and we are going to return from the trap and
1924 	 * the error was in user mode or inside of a copy routine, set AST flag
1925 	 * so the queue will be drained before returning to user mode.  The
1926 	 * AST processing will also act on our failure policy.
1927 	 */
1928 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1929 		int pcb_flag = 0;
1930 
1931 		if (t_afsr_errs &
1932 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
1933 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1934 			pcb_flag |= ASYNC_HWERR;
1935 
1936 		if (t_afsr & C_AFSR_BERR)
1937 			pcb_flag |= ASYNC_BERR;
1938 
1939 		if (t_afsr & C_AFSR_TO)
1940 			pcb_flag |= ASYNC_BTO;
1941 
1942 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1943 		aston(curthread);
1944 	}
1945 }
1946 
1947 #if defined(CPU_IMP_L1_CACHE_PARITY)
1948 /*
1949  * Handling of data and instruction parity errors (traps 0x71, 0x72).
1950  *
1951  * For Panther, P$ data parity errors during floating point load hits
1952  * are also detected (reported as TT 0x71) and handled by this trap
1953  * handler.
1954  *
1955  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
1956  * is available.
1957  */
1958 /*ARGSUSED*/
1959 void
1960 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
1961 {
1962 	ch_async_flt_t ch_flt;
1963 	struct async_flt *aflt;
1964 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
1965 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
1966 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
1967 	char *error_class;
1968 
1969 	/*
1970 	 * Log the error.
1971 	 * For icache parity errors the fault address is the trap PC.
1972 	 * For dcache/pcache parity errors the instruction would have to
1973 	 * be decoded to determine the address and that isn't possible
1974 	 * at high PIL.
1975 	 */
1976 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1977 	aflt = (struct async_flt *)&ch_flt;
1978 	aflt->flt_id = gethrtime_waitfree();
1979 	aflt->flt_bus_id = getprocessorid();
1980 	aflt->flt_inst = CPU->cpu_id;
1981 	aflt->flt_pc = tpc;
1982 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
1983 	aflt->flt_prot = AFLT_PROT_NONE;
1984 	aflt->flt_class = CPU_FAULT;
1985 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
1986 	aflt->flt_tl = tl;
1987 	aflt->flt_panic = panic;
1988 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
1989 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
1990 
1991 	if (iparity) {
1992 		cpu_icache_parity_info(&ch_flt);
1993 		if (ch_flt.parity_data.ipe.cpl_off != -1)
1994 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
1995 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
1996 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
1997 		else
1998 			error_class = FM_EREPORT_CPU_USIII_IPE;
1999 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2000 	} else {
2001 		cpu_dcache_parity_info(&ch_flt);
2002 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2003 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2004 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2005 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2006 		else
2007 			error_class = FM_EREPORT_CPU_USIII_DPE;
2008 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2009 		/*
2010 		 * For panther we also need to check the P$ for parity errors.
2011 		 */
2012 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2013 			cpu_pcache_parity_info(&ch_flt);
2014 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2015 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2016 				aflt->flt_payload =
2017 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2018 			}
2019 		}
2020 	}
2021 
2022 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2023 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2024 
2025 	if (iparity) {
2026 		/*
2027 		 * Invalidate entire I$.
2028 		 * This is required due to the use of diagnostic ASI
2029 		 * accesses that may result in a loss of I$ coherency.
2030 		 */
2031 		if (cache_boot_state & DCU_IC) {
2032 			flush_icache();
2033 		}
2034 		/*
2035 		 * According to section P.3.1 of the Panther PRM, we
2036 		 * need to do a little more for recovery on those
2037 		 * CPUs after encountering an I$ parity error.
2038 		 */
2039 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2040 			flush_ipb();
2041 			correct_dcache_parity(dcache_size,
2042 			    dcache_linesize);
2043 			flush_pcache();
2044 		}
2045 	} else {
2046 		/*
2047 		 * Since the valid bit is ignored when checking parity the
2048 		 * D$ data and tag must also be corrected.  Set D$ data bits
2049 		 * to zero and set utag to 0, 1, 2, 3.
2050 		 */
2051 		correct_dcache_parity(dcache_size, dcache_linesize);
2052 
2053 		/*
2054 		 * According to section P.3.3 of the Panther PRM, we
2055 		 * need to do a little more for recovery on those
2056 		 * CPUs after encountering a D$ or P$ parity error.
2057 		 *
2058 		 * As far as clearing P$ parity errors, it is enough to
2059 		 * simply invalidate all entries in the P$ since P$ parity
2060 		 * error traps are only generated for floating point load
2061 		 * hits.
2062 		 */
2063 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2064 			flush_icache();
2065 			flush_ipb();
2066 			flush_pcache();
2067 		}
2068 	}
2069 
2070 	/*
2071 	 * Invalidate entire D$ if it was enabled.
2072 	 * This is done to avoid stale data in the D$ which might
2073 	 * occur with the D$ disabled and the trap handler doing
2074 	 * stores affecting lines already in the D$.
2075 	 */
2076 	if (cache_boot_state & DCU_DC) {
2077 		flush_dcache();
2078 	}
2079 
2080 	/*
2081 	 * Restore caches to their bootup state.
2082 	 */
2083 	set_dcu(get_dcu() | cache_boot_state);
2084 
2085 	/*
2086 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2087 	 * be logged as part of the panic flow.
2088 	 */
2089 	if (aflt->flt_panic)
2090 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2091 
2092 	/*
2093 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2094 	 * the chance of getting an unrecoverable Fast ECC error.  This
2095 	 * flush will evict the part of the parity trap handler that is run
2096 	 * at TL>1.
2097 	 */
2098 	if (tl) {
2099 		cpu_flush_ecache();
2100 	}
2101 }
2102 
2103 /*
2104  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2105  * to indicate which portions of the captured data should be in the ereport.
2106  */
2107 void
2108 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2109 {
2110 	int way = ch_flt->parity_data.ipe.cpl_way;
2111 	int offset = ch_flt->parity_data.ipe.cpl_off;
2112 	int tag_index;
2113 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2114 
2115 
2116 	if ((offset != -1) || (way != -1)) {
2117 		/*
2118 		 * Parity error in I$ tag or data
2119 		 */
2120 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2121 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2122 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2123 			    PN_ICIDX_TO_WAY(tag_index);
2124 		else
2125 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2126 			    CH_ICIDX_TO_WAY(tag_index);
2127 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2128 		    IC_LOGFLAG_MAGIC;
2129 	} else {
2130 		/*
2131 		 * Parity error was not identified.
2132 		 * Log tags and data for all ways.
2133 		 */
2134 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2135 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2136 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2137 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2138 				    PN_ICIDX_TO_WAY(tag_index);
2139 			else
2140 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2141 				    CH_ICIDX_TO_WAY(tag_index);
2142 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2143 			    IC_LOGFLAG_MAGIC;
2144 		}
2145 	}
2146 }
2147 
2148 /*
2149  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2150  * to indicate which portions of the captured data should be in the ereport.
2151  */
2152 void
2153 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2154 {
2155 	int way = ch_flt->parity_data.dpe.cpl_way;
2156 	int offset = ch_flt->parity_data.dpe.cpl_off;
2157 	int tag_index;
2158 
2159 	if (offset != -1) {
2160 		/*
2161 		 * Parity error in D$ or P$ data array.
2162 		 *
2163 		 * First check to see whether the parity error is in D$ or P$
2164 		 * since P$ data parity errors are reported in Panther using
2165 		 * the same trap.
2166 		 */
2167 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2168 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2169 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2170 			    CH_PCIDX_TO_WAY(tag_index);
2171 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2172 			    PC_LOGFLAG_MAGIC;
2173 		} else {
2174 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2175 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2176 			    CH_DCIDX_TO_WAY(tag_index);
2177 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2178 			    DC_LOGFLAG_MAGIC;
2179 		}
2180 	} else if (way != -1) {
2181 		/*
2182 		 * Parity error in D$ tag.
2183 		 */
2184 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2185 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2186 		    CH_DCIDX_TO_WAY(tag_index);
2187 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2188 		    DC_LOGFLAG_MAGIC;
2189 	}
2190 }
2191 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2192 
2193 /*
2194  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2195  * post-process CPU events that are dequeued.  As such, it can be invoked
2196  * from softint context, from AST processing in the trap() flow, or from the
2197  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2198  * Historically this entry point was used to log the actual cmn_err(9F) text;
2199  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2200  * With FMA this function now also returns a flag which indicates to the
2201  * caller whether the ereport should be posted (1) or suppressed (0).
2202  */
2203 static int
2204 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2205 {
2206 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2207 	struct async_flt *aflt = (struct async_flt *)flt;
2208 	uint64_t errors;
2209 
2210 	switch (ch_flt->flt_type) {
2211 	case CPU_INV_AFSR:
2212 		/*
2213 		 * If it is a disrupting trap and the AFSR is zero, then
2214 		 * the event has probably already been noted. Do not post
2215 		 * an ereport.
2216 		 */
2217 		if ((aflt->flt_status & ECC_C_TRAP) &&
2218 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2219 			return (0);
2220 		else
2221 			return (1);
2222 	case CPU_TO:
2223 	case CPU_BERR:
2224 	case CPU_FATAL:
2225 	case CPU_FPUERR:
2226 		return (1);
2227 
2228 	case CPU_UE_ECACHE_RETIRE:
2229 		cpu_log_err(aflt);
2230 		cpu_page_retire(ch_flt);
2231 		return (1);
2232 
2233 	/*
2234 	 * Cases where we may want to suppress logging or perform
2235 	 * extended diagnostics.
2236 	 */
2237 	case CPU_CE:
2238 	case CPU_EMC:
2239 		/*
2240 		 * We want to skip logging and further classification
2241 		 * only if ALL the following conditions are true:
2242 		 *
2243 		 *	1. There is only one error
2244 		 *	2. That error is a correctable memory error
2245 		 *	3. The error is caused by the memory scrubber (in
2246 		 *	   which case the error will have occurred under
2247 		 *	   on_trap protection)
2248 		 *	4. The error is on a retired page
2249 		 *
2250 		 * Note: AFLT_PROT_EC is used places other than the memory
2251 		 * scrubber.  However, none of those errors should occur
2252 		 * on a retired page.
2253 		 */
2254 		if ((ch_flt->afsr_errs &
2255 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2256 		    aflt->flt_prot == AFLT_PROT_EC) {
2257 
2258 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2259 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2260 
2261 				/*
2262 				 * Since we're skipping logging, we'll need
2263 				 * to schedule the re-enabling of CEEN
2264 				 */
2265 				(void) timeout(cpu_delayed_check_ce_errors,
2266 				    (void *)(uintptr_t)aflt->flt_inst,
2267 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2268 						 * MICROSEC));
2269 			    }
2270 			    return (0);
2271 			}
2272 		}
2273 
2274 		/*
2275 		 * Perform/schedule further classification actions, but
2276 		 * only if the page is healthy (we don't want bad
2277 		 * pages inducing too much diagnostic activity).  If we could
2278 		 * not find a page pointer then we also skip this.  If
2279 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2280 		 * to copy and recirculate the event (for further diagnostics)
2281 		 * and we should not proceed to log it here.
2282 		 *
2283 		 * This must be the last step here before the cpu_log_err()
2284 		 * below - if an event recirculates cpu_ce_log_err() will
2285 		 * not call the current function but just proceed directly
2286 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2287 		 *
2288 		 * Note: Check cpu_impl_async_log_err if changing this
2289 		 */
2290 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2291 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2292 			    CE_XDIAG_SKIP_NOPP);
2293 		} else {
2294 			if (errors != PR_OK) {
2295 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2296 				    CE_XDIAG_SKIP_PAGEDET);
2297 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2298 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2299 				return (0);
2300 			}
2301 		}
2302 		/*FALLTHRU*/
2303 
2304 	/*
2305 	 * Cases where we just want to report the error and continue.
2306 	 */
2307 	case CPU_CE_ECACHE:
2308 	case CPU_UE_ECACHE:
2309 	case CPU_IV:
2310 	case CPU_ORPH:
2311 		cpu_log_err(aflt);
2312 		return (1);
2313 
2314 	/*
2315 	 * Cases where we want to fall through to handle panicking.
2316 	 */
2317 	case CPU_UE:
2318 		/*
2319 		 * We want to skip logging in the same conditions as the
2320 		 * CE case.  In addition, we want to make sure we're not
2321 		 * panicking.
2322 		 */
2323 		if (!panicstr && (ch_flt->afsr_errs &
2324 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2325 		    aflt->flt_prot == AFLT_PROT_EC) {
2326 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2327 				/* Zero the address to clear the error */
2328 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2329 				return (0);
2330 			}
2331 		}
2332 		cpu_log_err(aflt);
2333 		break;
2334 
2335 	default:
2336 		/*
2337 		 * If the us3_common.c code doesn't know the flt_type, it may
2338 		 * be an implementation-specific code.  Call into the impldep
2339 		 * backend to find out what to do: if it tells us to continue,
2340 		 * break and handle as if falling through from a UE; if not,
2341 		 * the impldep backend has handled the error and we're done.
2342 		 */
2343 		switch (cpu_impl_async_log_err(flt, eqep)) {
2344 		case CH_ASYNC_LOG_DONE:
2345 			return (1);
2346 		case CH_ASYNC_LOG_RECIRC:
2347 			return (0);
2348 		case CH_ASYNC_LOG_CONTINUE:
2349 			break; /* continue on to handle UE-like error */
2350 		default:
2351 			cmn_err(CE_WARN, "discarding error 0x%p with "
2352 			    "invalid fault type (0x%x)",
2353 			    (void *)aflt, ch_flt->flt_type);
2354 			return (0);
2355 		}
2356 	}
2357 
2358 	/* ... fall through from the UE case */
2359 
2360 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2361 		if (!panicstr) {
2362 			cpu_page_retire(ch_flt);
2363 		} else {
2364 			/*
2365 			 * Clear UEs on panic so that we don't
2366 			 * get haunted by them during panic or
2367 			 * after reboot
2368 			 */
2369 			cpu_clearphys(aflt);
2370 			(void) clear_errors(NULL);
2371 		}
2372 	}
2373 
2374 	return (1);
2375 }
2376 
2377 /*
2378  * Retire the bad page that may contain the flushed error.
2379  */
2380 void
2381 cpu_page_retire(ch_async_flt_t *ch_flt)
2382 {
2383 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2384 	(void) page_retire(aflt->flt_addr, PR_UE);
2385 }
2386 
2387 /*
2388  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2389  * generic event post-processing for correctable and uncorrectable memory,
2390  * E$, and MTag errors.  Historically this entry point was used to log bits of
2391  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2392  * converted into an ereport.  In addition, it transmits the error to any
2393  * platform-specific service-processor FRU logging routines, if available.
2394  */
2395 void
2396 cpu_log_err(struct async_flt *aflt)
2397 {
2398 	char unum[UNUM_NAMLEN];
2399 	int len = 0;
2400 	int synd_status, synd_code, afar_status;
2401 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2402 
2403 	/*
2404 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2405 	 * For Panther, L2$ is not external, so we don't want to
2406 	 * generate an E$ unum for those errors.
2407 	 */
2408 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2409 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2410 			aflt->flt_status |= ECC_ECACHE;
2411 	} else {
2412 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2413 			aflt->flt_status |= ECC_ECACHE;
2414 	}
2415 
2416 	/*
2417 	 * Determine syndrome status.
2418 	 */
2419 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2420 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2421 
2422 	/*
2423 	 * Determine afar status.
2424 	 */
2425 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2426 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2427 				ch_flt->flt_bit);
2428 	else
2429 		afar_status = AFLT_STAT_INVALID;
2430 
2431 	/*
2432 	 * If afar status is not invalid do a unum lookup.
2433 	 */
2434 	if (afar_status != AFLT_STAT_INVALID) {
2435 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2436 			UNUM_NAMLEN, &len);
2437 	} else {
2438 		unum[0] = '\0';
2439 	}
2440 
2441 	synd_code = synd_to_synd_code(synd_status,
2442 	    aflt->flt_synd, ch_flt->flt_bit);
2443 
2444 	/*
2445 	 * Do not send the fruid message (plat_ecc_error_data_t)
2446 	 * to the SC if it can handle the enhanced error information
2447 	 * (plat_ecc_error2_data_t) or when the tunable
2448 	 * ecc_log_fruid_enable is set to 0.
2449 	 */
2450 
2451 	if (&plat_ecc_capability_sc_get &&
2452 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2453 		if (&plat_log_fruid_error)
2454 			plat_log_fruid_error(synd_code, aflt, unum,
2455 			    ch_flt->flt_bit);
2456 	}
2457 
2458 	if (aflt->flt_func != NULL)
2459 		aflt->flt_func(aflt, unum);
2460 
2461 	if (afar_status != AFLT_STAT_INVALID)
2462 		cpu_log_diag_info(ch_flt);
2463 
2464 	/*
2465 	 * If we have a CEEN error , we do not reenable CEEN until after
2466 	 * we exit the trap handler. Otherwise, another error may
2467 	 * occur causing the handler to be entered recursively.
2468 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2469 	 * to try and ensure that the CPU makes progress in the face
2470 	 * of a CE storm.
2471 	 */
2472 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2473 		(void) timeout(cpu_delayed_check_ce_errors,
2474 		    (void *)(uintptr_t)aflt->flt_inst,
2475 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2476 	}
2477 }
2478 
2479 /*
2480  * Invoked by error_init() early in startup and therefore before
2481  * startup_errorq() is called to drain any error Q -
2482  *
2483  * startup()
2484  *   startup_end()
2485  *     error_init()
2486  *       cpu_error_init()
2487  * errorq_init()
2488  *   errorq_drain()
2489  * start_other_cpus()
2490  *
2491  * The purpose of this routine is to create error-related taskqs.  Taskqs
2492  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2493  * context.
2494  */
2495 void
2496 cpu_error_init(int items)
2497 {
2498 	/*
2499 	 * Create taskq(s) to reenable CE
2500 	 */
2501 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2502 	    items, items, TASKQ_PREPOPULATE);
2503 }
2504 
2505 void
2506 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2507 {
2508 	char unum[UNUM_NAMLEN];
2509 	int len;
2510 
2511 	switch (aflt->flt_class) {
2512 	case CPU_FAULT:
2513 		cpu_ereport_init(aflt);
2514 		if (cpu_async_log_err(aflt, eqep))
2515 			cpu_ereport_post(aflt);
2516 		break;
2517 
2518 	case BUS_FAULT:
2519 		if (aflt->flt_func != NULL) {
2520 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2521 			    unum, UNUM_NAMLEN, &len);
2522 			aflt->flt_func(aflt, unum);
2523 		}
2524 		break;
2525 
2526 	case RECIRC_CPU_FAULT:
2527 		aflt->flt_class = CPU_FAULT;
2528 		cpu_log_err(aflt);
2529 		cpu_ereport_post(aflt);
2530 		break;
2531 
2532 	case RECIRC_BUS_FAULT:
2533 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2534 		/*FALLTHRU*/
2535 	default:
2536 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2537 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2538 		return;
2539 	}
2540 }
2541 
2542 /*
2543  * Scrub and classify a CE.  This function must not modify the
2544  * fault structure passed to it but instead should return the classification
2545  * information.
2546  */
2547 
2548 static uchar_t
2549 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2550 {
2551 	uchar_t disp = CE_XDIAG_EXTALG;
2552 	on_trap_data_t otd;
2553 	uint64_t orig_err;
2554 	ch_cpu_logout_t *clop;
2555 
2556 	/*
2557 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2558 	 * this, but our other callers have not.  Disable preemption to
2559 	 * avoid CPU migration so that we restore CEEN on the correct
2560 	 * cpu later.
2561 	 *
2562 	 * CEEN is cleared so that further CEs that our instruction and
2563 	 * data footprint induce do not cause use to either creep down
2564 	 * kernel stack to the point of overflow, or do so much CE
2565 	 * notification as to make little real forward progress.
2566 	 *
2567 	 * NCEEN must not be cleared.  However it is possible that
2568 	 * our accesses to the flt_addr may provoke a bus error or timeout
2569 	 * if the offending address has just been unconfigured as part of
2570 	 * a DR action.  So we must operate under on_trap protection.
2571 	 */
2572 	kpreempt_disable();
2573 	orig_err = get_error_enable();
2574 	if (orig_err & EN_REG_CEEN)
2575 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2576 
2577 	/*
2578 	 * Our classification algorithm includes the line state before
2579 	 * the scrub; we'd like this captured after the detection and
2580 	 * before the algorithm below - the earlier the better.
2581 	 *
2582 	 * If we've come from a cpu CE trap then this info already exists
2583 	 * in the cpu logout area.
2584 	 *
2585 	 * For a CE detected by memscrub for which there was no trap
2586 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2587 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2588 	 * marked the fault structure as incomplete as a flag to later
2589 	 * logging code.
2590 	 *
2591 	 * If called directly from an IO detected CE there has been
2592 	 * no line data capture.  In this case we logout to the cpu logout
2593 	 * area - that's appropriate since it's the cpu cache data we need
2594 	 * for classification.  We thus borrow the cpu logout area for a
2595 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2596 	 * this time (we will invalidate it again below).
2597 	 *
2598 	 * If called from the partner check xcall handler then this cpu
2599 	 * (the partner) has not necessarily experienced a CE at this
2600 	 * address.  But we want to capture line state before its scrub
2601 	 * attempt since we use that in our classification.
2602 	 */
2603 	if (logout_tried == B_FALSE) {
2604 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2605 			disp |= CE_XDIAG_NOLOGOUT;
2606 	}
2607 
2608 	/*
2609 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2610 	 * no longer be valid (if DR'd since the initial event) so we
2611 	 * perform this scrub under on_trap protection.  If this access is
2612 	 * ok then further accesses below will also be ok - DR cannot
2613 	 * proceed while this thread is active (preemption is disabled);
2614 	 * to be safe we'll nonetheless use on_trap again below.
2615 	 */
2616 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2617 		cpu_scrubphys(ecc);
2618 	} else {
2619 		no_trap();
2620 		if (orig_err & EN_REG_CEEN)
2621 		    set_error_enable(orig_err);
2622 		kpreempt_enable();
2623 		return (disp);
2624 	}
2625 	no_trap();
2626 
2627 	/*
2628 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2629 	 * Note that it's quite possible that the read sourced the data from
2630 	 * another cpu.
2631 	 */
2632 	if (clear_ecc(ecc))
2633 		disp |= CE_XDIAG_CE1;
2634 
2635 	/*
2636 	 * Read the data again.  This time the read is very likely to
2637 	 * come from memory since the scrub induced a writeback to memory.
2638 	 */
2639 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2640 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2641 	} else {
2642 		no_trap();
2643 		if (orig_err & EN_REG_CEEN)
2644 		    set_error_enable(orig_err);
2645 		kpreempt_enable();
2646 		return (disp);
2647 	}
2648 	no_trap();
2649 
2650 	/* Did that read induce a CE that matches the AFAR? */
2651 	if (clear_ecc(ecc))
2652 		disp |= CE_XDIAG_CE2;
2653 
2654 	/*
2655 	 * Look at the logout information and record whether we found the
2656 	 * line in l2/l3 cache.  For Panther we are interested in whether
2657 	 * we found it in either cache (it won't reside in both but
2658 	 * it is possible to read it that way given the moving target).
2659 	 */
2660 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2661 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2662 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2663 		int hit, level;
2664 		int state;
2665 		int totalsize;
2666 		ch_ec_data_t *ecp;
2667 
2668 		/*
2669 		 * If hit is nonzero then a match was found and hit will
2670 		 * be one greater than the index which hit.  For Panther we
2671 		 * also need to pay attention to level to see which of l2$ or
2672 		 * l3$ it hit in.
2673 		 */
2674 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2675 		    0, &level);
2676 
2677 		if (hit) {
2678 			--hit;
2679 			disp |= CE_XDIAG_AFARMATCH;
2680 
2681 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2682 				if (level == 2)
2683 					ecp = &clop->clo_data.chd_l2_data[hit];
2684 				else
2685 					ecp = &clop->clo_data.chd_ec_data[hit];
2686 			} else {
2687 				ASSERT(level == 2);
2688 				ecp = &clop->clo_data.chd_ec_data[hit];
2689 			}
2690 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2691 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2692 			    ecc->flt_addr, ecp->ec_tag);
2693 
2694 			/*
2695 			 * Cheetah variants use different state encodings -
2696 			 * the CH_ECSTATE_* defines vary depending on the
2697 			 * module we're compiled for.  Translate into our
2698 			 * one true version.  Conflate Owner-Shared state
2699 			 * of SSM mode with Owner as victimisation of such
2700 			 * lines may cause a writeback.
2701 			 */
2702 			switch (state) {
2703 			case CH_ECSTATE_MOD:
2704 				disp |= EC_STATE_M;
2705 				break;
2706 
2707 			case CH_ECSTATE_OWN:
2708 			case CH_ECSTATE_OWS:
2709 				disp |= EC_STATE_O;
2710 				break;
2711 
2712 			case CH_ECSTATE_EXL:
2713 				disp |= EC_STATE_E;
2714 				break;
2715 
2716 			case CH_ECSTATE_SHR:
2717 				disp |= EC_STATE_S;
2718 				break;
2719 
2720 			default:
2721 				disp |= EC_STATE_I;
2722 				break;
2723 			}
2724 		}
2725 
2726 		/*
2727 		 * If we initiated the delayed logout then we are responsible
2728 		 * for invalidating the logout area.
2729 		 */
2730 		if (logout_tried == B_FALSE) {
2731 			bzero(clop, sizeof (ch_cpu_logout_t));
2732 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2733 		}
2734 	}
2735 
2736 	/*
2737 	 * Re-enable CEEN if we turned it off.
2738 	 */
2739 	if (orig_err & EN_REG_CEEN)
2740 	    set_error_enable(orig_err);
2741 	kpreempt_enable();
2742 
2743 	return (disp);
2744 }
2745 
2746 /*
2747  * Scrub a correctable memory error and collect data for classification
2748  * of CE type.  This function is called in the detection path, ie tl0 handling
2749  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2750  */
2751 void
2752 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2753 {
2754 	/*
2755 	 * Cheetah CE classification does not set any bits in flt_status.
2756 	 * Instead we will record classification datapoints in flt_disp.
2757 	 */
2758 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2759 
2760 	/*
2761 	 * To check if the error detected by IO is persistent, sticky or
2762 	 * intermittent.  This is noticed by clear_ecc().
2763 	 */
2764 	if (ecc->flt_status & ECC_IOBUS)
2765 		ecc->flt_stat = C_AFSR_MEMORY;
2766 
2767 	/*
2768 	 * Record information from this first part of the algorithm in
2769 	 * flt_disp.
2770 	 */
2771 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2772 }
2773 
2774 /*
2775  * Select a partner to perform a further CE classification check from.
2776  * Must be called with kernel preemption disabled (to stop the cpu list
2777  * from changing).  The detecting cpu we are partnering has cpuid
2778  * aflt->flt_inst; we might not be running on the detecting cpu.
2779  *
2780  * Restrict choice to active cpus in the same cpu partition as ourselves in
2781  * an effort to stop bad cpus in one partition causing other partitions to
2782  * perform excessive diagnostic activity.  Actually since the errorq drain
2783  * is run from a softint most of the time and that is a global mechanism
2784  * this isolation is only partial.  Return NULL if we fail to find a
2785  * suitable partner.
2786  *
2787  * We prefer a partner that is in a different latency group to ourselves as
2788  * we will share fewer datapaths.  If such a partner is unavailable then
2789  * choose one in the same lgroup but prefer a different chip and only allow
2790  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2791  * flags includes PTNR_SELFOK then permit selection of the original detector.
2792  *
2793  * We keep a cache of the last partner selected for a cpu, and we'll try to
2794  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2795  * have passed since that selection was made.  This provides the benefit
2796  * of the point-of-view of different partners over time but without
2797  * requiring frequent cpu list traversals.
2798  */
2799 
2800 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2801 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2802 
2803 static cpu_t *
2804 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2805 {
2806 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2807 	hrtime_t lasttime, thistime;
2808 
2809 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2810 
2811 	dtcr = cpu[aflt->flt_inst];
2812 
2813 	/*
2814 	 * Short-circuit for the following cases:
2815 	 *	. the dtcr is not flagged active
2816 	 *	. there is just one cpu present
2817 	 *	. the detector has disappeared
2818 	 *	. we were given a bad flt_inst cpuid; this should not happen
2819 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2820 	 *	  reason to panic.
2821 	 *	. there is just one cpu left online in the cpu partition
2822 	 *
2823 	 * If we return NULL after this point then we do not update the
2824 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2825 	 * again next time; this is the case where the only other cpu online
2826 	 * in the detector's partition is on the same chip as the detector
2827 	 * and since CEEN re-enable is throttled even that case should not
2828 	 * hurt performance.
2829 	 */
2830 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2831 		return (NULL);
2832 	}
2833 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2834 		if (flags & PTNR_SELFOK) {
2835 			*typep = CE_XDIAG_PTNR_SELF;
2836 			return (dtcr);
2837 		} else {
2838 			return (NULL);
2839 		}
2840 	}
2841 
2842 	thistime = gethrtime();
2843 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2844 
2845 	/*
2846 	 * Select a starting point.
2847 	 */
2848 	if (!lasttime) {
2849 		/*
2850 		 * We've never selected a partner for this detector before.
2851 		 * Start the scan at the next online cpu in the same cpu
2852 		 * partition.
2853 		 */
2854 		sp = dtcr->cpu_next_part;
2855 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2856 		/*
2857 		 * Our last selection has not aged yet.  If this partner:
2858 		 *	. is still a valid cpu,
2859 		 *	. is still in the same partition as the detector
2860 		 *	. is still marked active
2861 		 *	. satisfies the 'flags' argument criteria
2862 		 * then select it again without updating the timestamp.
2863 		 */
2864 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2865 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2866 		    !cpu_flagged_active(sp->cpu_flags) ||
2867 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2868 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2869 		    !(flags & PTNR_SIBLINGOK))) {
2870 			sp = dtcr->cpu_next_part;
2871 		} else {
2872 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2873 				*typep = CE_XDIAG_PTNR_REMOTE;
2874 			} else if (sp == dtcr) {
2875 				*typep = CE_XDIAG_PTNR_SELF;
2876 			} else if (sp->cpu_chip->chip_id ==
2877 			    dtcr->cpu_chip->chip_id) {
2878 				*typep = CE_XDIAG_PTNR_SIBLING;
2879 			} else {
2880 				*typep = CE_XDIAG_PTNR_LOCAL;
2881 			}
2882 			return (sp);
2883 		}
2884 	} else {
2885 		/*
2886 		 * Our last selection has aged.  If it is nonetheless still a
2887 		 * valid cpu then start the scan at the next cpu in the
2888 		 * partition after our last partner.  If the last selection
2889 		 * is no longer a valid cpu then go with our default.  In
2890 		 * this way we slowly cycle through possible partners to
2891 		 * obtain multiple viewpoints over time.
2892 		 */
2893 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2894 		if (sp == NULL) {
2895 			sp = dtcr->cpu_next_part;
2896 		} else {
2897 			sp = sp->cpu_next_part;		/* may be dtcr */
2898 			if (sp->cpu_part != dtcr->cpu_part)
2899 				sp = dtcr;
2900 		}
2901 	}
2902 
2903 	/*
2904 	 * We have a proposed starting point for our search, but if this
2905 	 * cpu is offline then its cpu_next_part will point to itself
2906 	 * so we can't use that to iterate over cpus in this partition in
2907 	 * the loop below.  We still want to avoid iterating over cpus not
2908 	 * in our partition, so in the case that our starting point is offline
2909 	 * we will repoint it to be the detector itself;  and if the detector
2910 	 * happens to be offline we'll return NULL from the following loop.
2911 	 */
2912 	if (!cpu_flagged_active(sp->cpu_flags)) {
2913 		sp = dtcr;
2914 	}
2915 
2916 	ptnr = sp;
2917 	locptnr = NULL;
2918 	sibptnr = NULL;
2919 	do {
2920 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
2921 			continue;
2922 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2923 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
2924 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2925 			*typep = CE_XDIAG_PTNR_REMOTE;
2926 			return (ptnr);
2927 		}
2928 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
2929 			if (sibptnr == NULL)
2930 				sibptnr = ptnr;
2931 			continue;
2932 		}
2933 		if (locptnr == NULL)
2934 			locptnr = ptnr;
2935 	} while ((ptnr = ptnr->cpu_next_part) != sp);
2936 
2937 	/*
2938 	 * A foreign partner has already been returned if one was available.
2939 	 *
2940 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
2941 	 * detector, is active, and is not a sibling of the detector.
2942 	 *
2943 	 * If sibptnr is not NULL it is a sibling of the detector, and is
2944 	 * active.
2945 	 *
2946 	 * If we have to resort to using the detector itself we have already
2947 	 * checked that it is active.
2948 	 */
2949 	if (locptnr) {
2950 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
2951 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2952 		*typep = CE_XDIAG_PTNR_LOCAL;
2953 		return (locptnr);
2954 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
2955 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
2956 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2957 		*typep = CE_XDIAG_PTNR_SIBLING;
2958 		return (sibptnr);
2959 	} else if (flags & PTNR_SELFOK) {
2960 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
2961 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
2962 		*typep = CE_XDIAG_PTNR_SELF;
2963 		return (dtcr);
2964 	}
2965 
2966 	return (NULL);
2967 }
2968 
2969 /*
2970  * Cross call handler that is requested to run on the designated partner of
2971  * a cpu that experienced a possibly sticky or possibly persistnet CE.
2972  */
2973 static void
2974 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
2975 {
2976 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
2977 }
2978 
2979 /*
2980  * The associated errorqs are never destroyed so we do not need to deal with
2981  * them disappearing before this timeout fires.  If the affected memory
2982  * has been DR'd out since the original event the scrub algrithm will catch
2983  * any errors and return null disposition info.  If the original detecting
2984  * cpu has been DR'd out then ereport detector info will not be able to
2985  * lookup CPU type;  with a small timeout this is unlikely.
2986  */
2987 static void
2988 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
2989 {
2990 	struct async_flt *aflt = cbarg->lkycb_aflt;
2991 	uchar_t disp;
2992 	cpu_t *cp;
2993 	int ptnrtype;
2994 
2995 	kpreempt_disable();
2996 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
2997 	    &ptnrtype)) {
2998 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
2999 		    (uint64_t)&disp);
3000 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3001 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3002 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3003 	} else {
3004 		ce_xdiag_lkydrops++;
3005 		if (ncpus > 1)
3006 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3007 			    CE_XDIAG_SKIP_NOPTNR);
3008 	}
3009 	kpreempt_enable();
3010 
3011 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3012 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3013 }
3014 
3015 /*
3016  * Called from errorq drain code when processing a CE error, both from
3017  * CPU and PCI drain functions.  Decide what further classification actions,
3018  * if any, we will perform.  Perform immediate actions now, and schedule
3019  * delayed actions as required.  Note that we are no longer necessarily running
3020  * on the detecting cpu, and that the async_flt structure will not persist on
3021  * return from this function.
3022  *
3023  * Calls to this function should aim to be self-throtlling in some way.  With
3024  * the delayed re-enable of CEEN the absolute rate of calls should not
3025  * be excessive.  Callers should also avoid performing in-depth classification
3026  * for events in pages that are already known to be suspect.
3027  *
3028  * We return nonzero to indicate that the event has been copied and
3029  * recirculated for further testing.  The caller should not log the event
3030  * in this case - it will be logged when further test results are available.
3031  *
3032  * Our possible contexts are that of errorq_drain: below lock level or from
3033  * panic context.  We can assume that the cpu we are running on is online.
3034  */
3035 
3036 
3037 #ifdef DEBUG
3038 static int ce_xdiag_forceaction;
3039 #endif
3040 
3041 int
3042 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3043     errorq_elem_t *eqep, size_t afltoffset)
3044 {
3045 	ce_dispact_t dispact, action;
3046 	cpu_t *cp;
3047 	uchar_t dtcrinfo, disp;
3048 	int ptnrtype;
3049 
3050 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3051 		ce_xdiag_drops++;
3052 		return (0);
3053 	} else if (!aflt->flt_in_memory) {
3054 		ce_xdiag_drops++;
3055 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3056 		return (0);
3057 	}
3058 
3059 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3060 
3061 	/*
3062 	 * Some correctable events are not scrubbed/classified, such as those
3063 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3064 	 * initial detector classification go no further.
3065 	 */
3066 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3067 		ce_xdiag_drops++;
3068 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3069 		return (0);
3070 	}
3071 
3072 	dispact = CE_DISPACT(ce_disp_table,
3073 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3074 	    CE_XDIAG_STATE(dtcrinfo),
3075 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3076 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3077 
3078 
3079 	action = CE_ACT(dispact);	/* bad lookup caught below */
3080 #ifdef DEBUG
3081 	if (ce_xdiag_forceaction != 0)
3082 		action = ce_xdiag_forceaction;
3083 #endif
3084 
3085 	switch (action) {
3086 	case CE_ACT_LKYCHK: {
3087 		caddr_t ndata;
3088 		errorq_elem_t *neqep;
3089 		struct async_flt *ecc;
3090 		ce_lkychk_cb_t *cbargp;
3091 
3092 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3093 			ce_xdiag_lkydrops++;
3094 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3095 			    CE_XDIAG_SKIP_DUPFAIL);
3096 			break;
3097 		}
3098 		ecc = (struct async_flt *)(ndata + afltoffset);
3099 
3100 		ASSERT(ecc->flt_class == CPU_FAULT ||
3101 		    ecc->flt_class == BUS_FAULT);
3102 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3103 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3104 
3105 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3106 		cbargp->lkycb_aflt = ecc;
3107 		cbargp->lkycb_eqp = eqp;
3108 		cbargp->lkycb_eqep = neqep;
3109 
3110 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3111 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3112 		return (1);
3113 	}
3114 
3115 	case CE_ACT_PTNRCHK:
3116 		kpreempt_disable();	/* stop cpu list changing */
3117 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3118 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3119 			    (uint64_t)aflt, (uint64_t)&disp);
3120 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3121 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3122 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3123 		} else if (ncpus > 1) {
3124 			ce_xdiag_ptnrdrops++;
3125 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3126 			    CE_XDIAG_SKIP_NOPTNR);
3127 		} else {
3128 			ce_xdiag_ptnrdrops++;
3129 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3130 			    CE_XDIAG_SKIP_UNIPROC);
3131 		}
3132 		kpreempt_enable();
3133 		break;
3134 
3135 	case CE_ACT_DONE:
3136 		break;
3137 
3138 	case CE_ACT(CE_DISP_BAD):
3139 	default:
3140 #ifdef DEBUG
3141 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3142 #endif
3143 		ce_xdiag_bad++;
3144 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3145 		break;
3146 	}
3147 
3148 	return (0);
3149 }
3150 
3151 /*
3152  * We route all errors through a single switch statement.
3153  */
3154 void
3155 cpu_ue_log_err(struct async_flt *aflt)
3156 {
3157 	switch (aflt->flt_class) {
3158 	case CPU_FAULT:
3159 		cpu_ereport_init(aflt);
3160 		if (cpu_async_log_err(aflt, NULL))
3161 			cpu_ereport_post(aflt);
3162 		break;
3163 
3164 	case BUS_FAULT:
3165 		bus_async_log_err(aflt);
3166 		break;
3167 
3168 	default:
3169 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3170 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3171 		return;
3172 	}
3173 }
3174 
3175 /*
3176  * Routine for panic hook callback from panic_idle().
3177  */
3178 void
3179 cpu_async_panic_callb(void)
3180 {
3181 	ch_async_flt_t ch_flt;
3182 	struct async_flt *aflt;
3183 	ch_cpu_errors_t cpu_error_regs;
3184 	uint64_t afsr_errs;
3185 
3186 	get_cpu_error_state(&cpu_error_regs);
3187 
3188 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3189 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3190 
3191 	if (afsr_errs) {
3192 
3193 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3194 		aflt = (struct async_flt *)&ch_flt;
3195 		aflt->flt_id = gethrtime_waitfree();
3196 		aflt->flt_bus_id = getprocessorid();
3197 		aflt->flt_inst = CPU->cpu_id;
3198 		aflt->flt_stat = cpu_error_regs.afsr;
3199 		aflt->flt_addr = cpu_error_regs.afar;
3200 		aflt->flt_prot = AFLT_PROT_NONE;
3201 		aflt->flt_class = CPU_FAULT;
3202 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3203 		aflt->flt_panic = 1;
3204 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3205 		ch_flt.afsr_errs = afsr_errs;
3206 #if defined(SERRANO)
3207 		ch_flt.afar2 = cpu_error_regs.afar2;
3208 #endif	/* SERRANO */
3209 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3210 	}
3211 }
3212 
3213 /*
3214  * Routine to convert a syndrome into a syndrome code.
3215  */
3216 static int
3217 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3218 {
3219 	if (synd_status == AFLT_STAT_INVALID)
3220 		return (-1);
3221 
3222 	/*
3223 	 * Use the syndrome to index the appropriate syndrome table,
3224 	 * to get the code indicating which bit(s) is(are) bad.
3225 	 */
3226 	if (afsr_bit &
3227 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3228 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3229 #if defined(JALAPENO) || defined(SERRANO)
3230 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3231 				return (-1);
3232 			else
3233 				return (BPAR0 + synd);
3234 #else /* JALAPENO || SERRANO */
3235 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3236 				return (-1);
3237 			else
3238 				return (mtag_syndrome_tab[synd]);
3239 #endif /* JALAPENO || SERRANO */
3240 		} else {
3241 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3242 				return (-1);
3243 			else
3244 				return (ecc_syndrome_tab[synd]);
3245 		}
3246 	} else {
3247 		return (-1);
3248 	}
3249 }
3250 
3251 /*
3252  * Routine to return a string identifying the physical name
3253  * associated with a memory/cache error.
3254  */
3255 int
3256 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3257     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3258     ushort_t flt_status, char *buf, int buflen, int *lenp)
3259 {
3260 	int synd_code;
3261 	int ret;
3262 
3263 	/*
3264 	 * An AFSR of -1 defaults to a memory syndrome.
3265 	 */
3266 	if (flt_stat == (uint64_t)-1)
3267 		flt_stat = C_AFSR_CE;
3268 
3269 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3270 
3271 	/*
3272 	 * Syndrome code must be either a single-bit error code
3273 	 * (0...143) or -1 for unum lookup.
3274 	 */
3275 	if (synd_code < 0 || synd_code >= M2)
3276 		synd_code = -1;
3277 	if (&plat_get_mem_unum) {
3278 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3279 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3280 			buf[0] = '\0';
3281 			*lenp = 0;
3282 		}
3283 
3284 		return (ret);
3285 	}
3286 
3287 	return (ENOTSUP);
3288 }
3289 
3290 /*
3291  * Wrapper for cpu_get_mem_unum() routine that takes an
3292  * async_flt struct rather than explicit arguments.
3293  */
3294 int
3295 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3296     char *buf, int buflen, int *lenp)
3297 {
3298 	/*
3299 	 * If we come thru here for an IO bus error aflt->flt_stat will
3300 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3301 	 * so it will interpret this as a memory error.
3302 	 */
3303 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3304 	    (aflt->flt_class == BUS_FAULT) ?
3305 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3306 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3307 	    aflt->flt_status, buf, buflen, lenp));
3308 }
3309 
3310 /*
3311  * This routine is a more generic interface to cpu_get_mem_unum()
3312  * that may be used by other modules (e.g. mm).
3313  */
3314 int
3315 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3316     char *buf, int buflen, int *lenp)
3317 {
3318 	int synd_status, flt_in_memory, ret;
3319 	ushort_t flt_status = 0;
3320 	char unum[UNUM_NAMLEN];
3321 
3322 	/*
3323 	 * Check for an invalid address.
3324 	 */
3325 	if (afar == (uint64_t)-1)
3326 		return (ENXIO);
3327 
3328 	if (synd == (uint64_t)-1)
3329 		synd_status = AFLT_STAT_INVALID;
3330 	else
3331 		synd_status = AFLT_STAT_VALID;
3332 
3333 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3334 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3335 
3336 	/*
3337 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3338 	 * For Panther, L2$ is not external, so we don't want to
3339 	 * generate an E$ unum for those errors.
3340 	 */
3341 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3342 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3343 			flt_status |= ECC_ECACHE;
3344 	} else {
3345 		if (*afsr & C_AFSR_ECACHE)
3346 			flt_status |= ECC_ECACHE;
3347 	}
3348 
3349 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3350 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3351 	if (ret != 0)
3352 		return (ret);
3353 
3354 	if (*lenp >= buflen)
3355 		return (ENAMETOOLONG);
3356 
3357 	(void) strncpy(buf, unum, buflen);
3358 
3359 	return (0);
3360 }
3361 
3362 /*
3363  * Routine to return memory information associated
3364  * with a physical address and syndrome.
3365  */
3366 int
3367 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3368     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3369     int *segsp, int *banksp, int *mcidp)
3370 {
3371 	int synd_status, synd_code;
3372 
3373 	if (afar == (uint64_t)-1)
3374 		return (ENXIO);
3375 
3376 	if (synd == (uint64_t)-1)
3377 		synd_status = AFLT_STAT_INVALID;
3378 	else
3379 		synd_status = AFLT_STAT_VALID;
3380 
3381 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3382 
3383 	if (p2get_mem_info != NULL)
3384 		return ((p2get_mem_info)(synd_code, afar,
3385 			mem_sizep, seg_sizep, bank_sizep,
3386 			segsp, banksp, mcidp));
3387 	else
3388 		return (ENOTSUP);
3389 }
3390 
3391 /*
3392  * Routine to return a string identifying the physical
3393  * name associated with a cpuid.
3394  */
3395 int
3396 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3397 {
3398 	int ret;
3399 	char unum[UNUM_NAMLEN];
3400 
3401 	if (&plat_get_cpu_unum) {
3402 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3403 		    != 0)
3404 			return (ret);
3405 	} else {
3406 		return (ENOTSUP);
3407 	}
3408 
3409 	if (*lenp >= buflen)
3410 		return (ENAMETOOLONG);
3411 
3412 	(void) strncpy(buf, unum, buflen);
3413 
3414 	return (0);
3415 }
3416 
3417 /*
3418  * This routine exports the name buffer size.
3419  */
3420 size_t
3421 cpu_get_name_bufsize()
3422 {
3423 	return (UNUM_NAMLEN);
3424 }
3425 
3426 /*
3427  * Historical function, apparantly not used.
3428  */
3429 /* ARGSUSED */
3430 void
3431 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3432 {}
3433 
3434 /*
3435  * Historical function only called for SBus errors in debugging.
3436  */
3437 /*ARGSUSED*/
3438 void
3439 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3440 {}
3441 
3442 /*
3443  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3444  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3445  * an async fault structure argument is passed in, the captured error state
3446  * (AFSR, AFAR) info will be returned in the structure.
3447  */
3448 int
3449 clear_errors(ch_async_flt_t *ch_flt)
3450 {
3451 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3452 	ch_cpu_errors_t	cpu_error_regs;
3453 
3454 	get_cpu_error_state(&cpu_error_regs);
3455 
3456 	if (ch_flt != NULL) {
3457 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3458 		aflt->flt_addr = cpu_error_regs.afar;
3459 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3460 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3461 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3462 #if defined(SERRANO)
3463 		ch_flt->afar2 = cpu_error_regs.afar2;
3464 #endif	/* SERRANO */
3465 	}
3466 
3467 	set_cpu_error_state(&cpu_error_regs);
3468 
3469 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3470 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3471 }
3472 
3473 /*
3474  * Clear any AFSR error bits, and check for persistence.
3475  *
3476  * It would be desirable to also insist that syndrome match.  PCI handling
3477  * has already filled flt_synd.  For errors trapped by CPU we only fill
3478  * flt_synd when we queue the event, so we do not have a valid flt_synd
3479  * during initial classification (it is valid if we're called as part of
3480  * subsequent low-pil additional classification attempts).  We could try
3481  * to determine which syndrome to use: we know we're only called for
3482  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3483  * would be esynd/none and esynd/msynd, respectively.  If that is
3484  * implemented then what do we do in the case that we do experience an
3485  * error on the same afar but with different syndrome?  At the very least
3486  * we should count such occurences.  Anyway, for now, we'll leave it as
3487  * it has been for ages.
3488  */
3489 static int
3490 clear_ecc(struct async_flt *aflt)
3491 {
3492 	ch_cpu_errors_t	cpu_error_regs;
3493 
3494 	/*
3495 	 * Snapshot the AFSR and AFAR and clear any errors
3496 	 */
3497 	get_cpu_error_state(&cpu_error_regs);
3498 	set_cpu_error_state(&cpu_error_regs);
3499 
3500 	/*
3501 	 * If any of the same memory access error bits are still on and
3502 	 * the AFAR matches, return that the error is persistent.
3503 	 */
3504 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3505 	    cpu_error_regs.afar == aflt->flt_addr);
3506 }
3507 
3508 /*
3509  * Turn off all cpu error detection, normally only used for panics.
3510  */
3511 void
3512 cpu_disable_errors(void)
3513 {
3514 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3515 }
3516 
3517 /*
3518  * Enable errors.
3519  */
3520 void
3521 cpu_enable_errors(void)
3522 {
3523 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3524 }
3525 
3526 /*
3527  * Flush the entire ecache using displacement flush by reading through a
3528  * physical address range twice as large as the Ecache.
3529  */
3530 void
3531 cpu_flush_ecache(void)
3532 {
3533 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3534 	    cpunodes[CPU->cpu_id].ecache_linesize);
3535 }
3536 
3537 /*
3538  * Return CPU E$ set size - E$ size divided by the associativity.
3539  * We use this function in places where the CPU_PRIVATE ptr may not be
3540  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3541  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3542  * up before the kernel switches from OBP's to the kernel's trap table, so
3543  * we don't have to worry about cpunodes being unitialized.
3544  */
3545 int
3546 cpu_ecache_set_size(struct cpu *cp)
3547 {
3548 	if (CPU_PRIVATE(cp))
3549 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3550 
3551 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3552 }
3553 
3554 /*
3555  * Flush Ecache line.
3556  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3557  * Uses normal displacement flush for Cheetah.
3558  */
3559 static void
3560 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3561 {
3562 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3563 	int ec_set_size = cpu_ecache_set_size(CPU);
3564 
3565 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3566 }
3567 
3568 /*
3569  * Scrub physical address.
3570  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3571  * Ecache or direct-mapped Ecache.
3572  */
3573 static void
3574 cpu_scrubphys(struct async_flt *aflt)
3575 {
3576 	int ec_set_size = cpu_ecache_set_size(CPU);
3577 
3578 	scrubphys(aflt->flt_addr, ec_set_size);
3579 }
3580 
3581 /*
3582  * Clear physical address.
3583  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3584  * Ecache or direct-mapped Ecache.
3585  */
3586 void
3587 cpu_clearphys(struct async_flt *aflt)
3588 {
3589 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3590 	int ec_set_size = cpu_ecache_set_size(CPU);
3591 
3592 
3593 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3594 }
3595 
3596 #if defined(CPU_IMP_ECACHE_ASSOC)
3597 /*
3598  * Check for a matching valid line in all the sets.
3599  * If found, return set# + 1. Otherwise return 0.
3600  */
3601 static int
3602 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3603 {
3604 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3605 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3606 	int ec_set_size = cpu_ecache_set_size(CPU);
3607 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3608 	int nway = cpu_ecache_nway();
3609 	int i;
3610 
3611 	for (i = 0; i < nway; i++, ecp++) {
3612 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3613 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3614 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3615 			return (i+1);
3616 	}
3617 	return (0);
3618 }
3619 #endif /* CPU_IMP_ECACHE_ASSOC */
3620 
3621 /*
3622  * Check whether a line in the given logout info matches the specified
3623  * fault address.  If reqval is set then the line must not be Invalid.
3624  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3625  * set to 2 for l2$ or 3 for l3$.
3626  */
3627 static int
3628 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3629 {
3630 	ch_diag_data_t *cdp = data;
3631 	ch_ec_data_t *ecp;
3632 	int totalsize, ec_set_size;
3633 	int i, ways;
3634 	int match = 0;
3635 	int tagvalid;
3636 	uint64_t addr, tagpa;
3637 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3638 
3639 	/*
3640 	 * Check the l2$ logout data
3641 	 */
3642 	if (ispanther) {
3643 		ecp = &cdp->chd_l2_data[0];
3644 		ec_set_size = PN_L2_SET_SIZE;
3645 		ways = PN_L2_NWAYS;
3646 	} else {
3647 		ecp = &cdp->chd_ec_data[0];
3648 		ec_set_size = cpu_ecache_set_size(CPU);
3649 		ways = cpu_ecache_nway();
3650 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3651 	}
3652 	/* remove low order PA bits from fault address not used in PA tag */
3653 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3654 	for (i = 0; i < ways; i++, ecp++) {
3655 		if (ispanther) {
3656 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3657 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3658 		} else {
3659 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3660 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3661 			    ecp->ec_tag);
3662 		}
3663 		if (tagpa == addr && (!reqval || tagvalid)) {
3664 			match = i + 1;
3665 			*level = 2;
3666 			break;
3667 		}
3668 	}
3669 
3670 	if (match || !ispanther)
3671 		return (match);
3672 
3673 	/* For Panther we also check the l3$ */
3674 	ecp = &cdp->chd_ec_data[0];
3675 	ec_set_size = PN_L3_SET_SIZE;
3676 	ways = PN_L3_NWAYS;
3677 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3678 
3679 	for (i = 0; i < ways; i++, ecp++) {
3680 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3681 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3682 			match = i + 1;
3683 			*level = 3;
3684 			break;
3685 		}
3686 	}
3687 
3688 	return (match);
3689 }
3690 
3691 #if defined(CPU_IMP_L1_CACHE_PARITY)
3692 /*
3693  * Record information related to the source of an Dcache Parity Error.
3694  */
3695 static void
3696 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3697 {
3698 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3699 	int index;
3700 
3701 	/*
3702 	 * Since instruction decode cannot be done at high PIL
3703 	 * just examine the entire Dcache to locate the error.
3704 	 */
3705 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3706 		ch_flt->parity_data.dpe.cpl_way = -1;
3707 		ch_flt->parity_data.dpe.cpl_off = -1;
3708 	}
3709 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3710 		cpu_dcache_parity_check(ch_flt, index);
3711 }
3712 
3713 /*
3714  * Check all ways of the Dcache at a specified index for good parity.
3715  */
3716 static void
3717 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3718 {
3719 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3720 	uint64_t parity_bits, pbits, data_word;
3721 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3722 	int way, word, data_byte;
3723 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3724 	ch_dc_data_t tmp_dcp;
3725 
3726 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3727 		/*
3728 		 * Perform diagnostic read.
3729 		 */
3730 		get_dcache_dtag(index + way * dc_set_size,
3731 				(uint64_t *)&tmp_dcp);
3732 
3733 		/*
3734 		 * Check tag for even parity.
3735 		 * Sum of 1 bits (including parity bit) should be even.
3736 		 */
3737 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3738 			/*
3739 			 * If this is the first error log detailed information
3740 			 * about it and check the snoop tag. Otherwise just
3741 			 * record the fact that we found another error.
3742 			 */
3743 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3744 				ch_flt->parity_data.dpe.cpl_way = way;
3745 				ch_flt->parity_data.dpe.cpl_cache =
3746 				    CPU_DC_PARITY;
3747 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3748 
3749 				if (popc64(tmp_dcp.dc_sntag &
3750 						CHP_DCSNTAG_PARMASK) & 1) {
3751 					ch_flt->parity_data.dpe.cpl_tag |=
3752 								CHP_DC_SNTAG;
3753 					ch_flt->parity_data.dpe.cpl_lcnt++;
3754 				}
3755 
3756 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3757 			}
3758 
3759 			ch_flt->parity_data.dpe.cpl_lcnt++;
3760 		}
3761 
3762 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3763 			/*
3764 			 * Panther has more parity bits than the other
3765 			 * processors for covering dcache data and so each
3766 			 * byte of data in each word has its own parity bit.
3767 			 */
3768 			parity_bits = tmp_dcp.dc_pn_data_parity;
3769 			for (word = 0; word < 4; word++) {
3770 				data_word = tmp_dcp.dc_data[word];
3771 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3772 				for (data_byte = 0; data_byte < 8;
3773 				    data_byte++) {
3774 					if (((popc64(data_word &
3775 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3776 					    (pbits & 1)) {
3777 						cpu_record_dc_data_parity(
3778 						ch_flt, dcp, &tmp_dcp, way,
3779 						word);
3780 					}
3781 					pbits >>= 1;
3782 					data_word >>= 8;
3783 				}
3784 				parity_bits >>= 8;
3785 			}
3786 		} else {
3787 			/*
3788 			 * Check data array for even parity.
3789 			 * The 8 parity bits are grouped into 4 pairs each
3790 			 * of which covers a 64-bit word.  The endianness is
3791 			 * reversed -- the low-order parity bits cover the
3792 			 * high-order data words.
3793 			 */
3794 			parity_bits = tmp_dcp.dc_utag >> 8;
3795 			for (word = 0; word < 4; word++) {
3796 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3797 				if ((popc64(tmp_dcp.dc_data[word]) +
3798 				    parity_bits_popc[pbits]) & 1) {
3799 					cpu_record_dc_data_parity(ch_flt, dcp,
3800 					    &tmp_dcp, way, word);
3801 				}
3802 			}
3803 		}
3804 	}
3805 }
3806 
3807 static void
3808 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3809     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3810 {
3811 	/*
3812 	 * If this is the first error log detailed information about it.
3813 	 * Otherwise just record the fact that we found another error.
3814 	 */
3815 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3816 		ch_flt->parity_data.dpe.cpl_way = way;
3817 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3818 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3819 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3820 	}
3821 	ch_flt->parity_data.dpe.cpl_lcnt++;
3822 }
3823 
3824 /*
3825  * Record information related to the source of an Icache Parity Error.
3826  *
3827  * Called with the Icache disabled so any diagnostic accesses are safe.
3828  */
3829 static void
3830 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3831 {
3832 	int	ic_set_size;
3833 	int	ic_linesize;
3834 	int	index;
3835 
3836 	if (CPU_PRIVATE(CPU)) {
3837 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3838 		    CH_ICACHE_NWAY;
3839 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3840 	} else {
3841 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3842 		ic_linesize = icache_linesize;
3843 	}
3844 
3845 	ch_flt->parity_data.ipe.cpl_way = -1;
3846 	ch_flt->parity_data.ipe.cpl_off = -1;
3847 
3848 	for (index = 0; index < ic_set_size; index += ic_linesize)
3849 		cpu_icache_parity_check(ch_flt, index);
3850 }
3851 
3852 /*
3853  * Check all ways of the Icache at a specified index for good parity.
3854  */
3855 static void
3856 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
3857 {
3858 	uint64_t parmask, pn_inst_parity;
3859 	int ic_set_size;
3860 	int ic_linesize;
3861 	int flt_index, way, instr, num_instr;
3862 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3863 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
3864 	ch_ic_data_t tmp_icp;
3865 
3866 	if (CPU_PRIVATE(CPU)) {
3867 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3868 		    CH_ICACHE_NWAY;
3869 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3870 	} else {
3871 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3872 		ic_linesize = icache_linesize;
3873 	}
3874 
3875 	/*
3876 	 * Panther has twice as many instructions per icache line and the
3877 	 * instruction parity bit is in a different location.
3878 	 */
3879 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3880 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
3881 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
3882 	} else {
3883 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
3884 		pn_inst_parity = 0;
3885 	}
3886 
3887 	/*
3888 	 * Index at which we expect to find the parity error.
3889 	 */
3890 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
3891 
3892 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
3893 		/*
3894 		 * Diagnostic reads expect address argument in ASI format.
3895 		 */
3896 		get_icache_dtag(2 * (index + way * ic_set_size),
3897 				(uint64_t *)&tmp_icp);
3898 
3899 		/*
3900 		 * If this is the index in which we expect to find the
3901 		 * error log detailed information about each of the ways.
3902 		 * This information will be displayed later if we can't
3903 		 * determine the exact way in which the error is located.
3904 		 */
3905 		if (flt_index == index)
3906 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
3907 
3908 		/*
3909 		 * Check tag for even parity.
3910 		 * Sum of 1 bits (including parity bit) should be even.
3911 		 */
3912 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
3913 			/*
3914 			 * If this way is the one in which we expected
3915 			 * to find the error record the way and check the
3916 			 * snoop tag. Otherwise just record the fact we
3917 			 * found another error.
3918 			 */
3919 			if (flt_index == index) {
3920 				ch_flt->parity_data.ipe.cpl_way = way;
3921 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
3922 
3923 				if (popc64(tmp_icp.ic_sntag &
3924 						CHP_ICSNTAG_PARMASK) & 1) {
3925 					ch_flt->parity_data.ipe.cpl_tag |=
3926 								CHP_IC_SNTAG;
3927 					ch_flt->parity_data.ipe.cpl_lcnt++;
3928 				}
3929 
3930 			}
3931 			ch_flt->parity_data.ipe.cpl_lcnt++;
3932 			continue;
3933 		}
3934 
3935 		/*
3936 		 * Check instruction data for even parity.
3937 		 * Bits participating in parity differ for PC-relative
3938 		 * versus non-PC-relative instructions.
3939 		 */
3940 		for (instr = 0; instr < num_instr; instr++) {
3941 			parmask = (tmp_icp.ic_data[instr] &
3942 					CH_ICDATA_PRED_ISPCREL) ?
3943 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
3944 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
3945 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
3946 				/*
3947 				 * If this way is the one in which we expected
3948 				 * to find the error record the way and offset.
3949 				 * Otherwise just log the fact we found another
3950 				 * error.
3951 				 */
3952 				if (flt_index == index) {
3953 					ch_flt->parity_data.ipe.cpl_way = way;
3954 					ch_flt->parity_data.ipe.cpl_off =
3955 								instr * 4;
3956 				}
3957 				ch_flt->parity_data.ipe.cpl_lcnt++;
3958 				continue;
3959 			}
3960 		}
3961 	}
3962 }
3963 
3964 /*
3965  * Record information related to the source of an Pcache Parity Error.
3966  */
3967 static void
3968 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
3969 {
3970 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3971 	int index;
3972 
3973 	/*
3974 	 * Since instruction decode cannot be done at high PIL just
3975 	 * examine the entire Pcache to check for any parity errors.
3976 	 */
3977 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3978 		ch_flt->parity_data.dpe.cpl_way = -1;
3979 		ch_flt->parity_data.dpe.cpl_off = -1;
3980 	}
3981 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
3982 		cpu_pcache_parity_check(ch_flt, index);
3983 }
3984 
3985 /*
3986  * Check all ways of the Pcache at a specified index for good parity.
3987  */
3988 static void
3989 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
3990 {
3991 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
3992 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
3993 	int way, word, pbit, parity_bits;
3994 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
3995 	ch_pc_data_t tmp_pcp;
3996 
3997 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
3998 		/*
3999 		 * Perform diagnostic read.
4000 		 */
4001 		get_pcache_dtag(index + way * pc_set_size,
4002 				(uint64_t *)&tmp_pcp);
4003 		/*
4004 		 * Check data array for odd parity. There are 8 parity
4005 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4006 		 * of those bits covers exactly 8 bytes of the data
4007 		 * array:
4008 		 *
4009 		 *	parity bit	P$ data bytes covered
4010 		 *	----------	---------------------
4011 		 *	50		63:56
4012 		 *	51		55:48
4013 		 *	52		47:40
4014 		 *	53		39:32
4015 		 *	54		31:24
4016 		 *	55		23:16
4017 		 *	56		15:8
4018 		 *	57		7:0
4019 		 */
4020 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4021 		for (word = 0; word < pc_data_words; word++) {
4022 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4023 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4024 				/*
4025 				 * If this is the first error log detailed
4026 				 * information about it. Otherwise just record
4027 				 * the fact that we found another error.
4028 				 */
4029 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4030 					ch_flt->parity_data.dpe.cpl_way = way;
4031 					ch_flt->parity_data.dpe.cpl_cache =
4032 					    CPU_PC_PARITY;
4033 					ch_flt->parity_data.dpe.cpl_off =
4034 					    word * sizeof (uint64_t);
4035 					bcopy(&tmp_pcp, pcp,
4036 							sizeof (ch_pc_data_t));
4037 				}
4038 				ch_flt->parity_data.dpe.cpl_lcnt++;
4039 			}
4040 		}
4041 	}
4042 }
4043 
4044 
4045 /*
4046  * Add L1 Data cache data to the ereport payload.
4047  */
4048 static void
4049 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4050 {
4051 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4052 	ch_dc_data_t *dcp;
4053 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4054 	uint_t nelem;
4055 	int i, ways_to_check, ways_logged = 0;
4056 
4057 	/*
4058 	 * If this is an D$ fault then there may be multiple
4059 	 * ways captured in the ch_parity_log_t structure.
4060 	 * Otherwise, there will be at most one way captured
4061 	 * in the ch_diag_data_t struct.
4062 	 * Check each way to see if it should be encoded.
4063 	 */
4064 	if (ch_flt->flt_type == CPU_DC_PARITY)
4065 		ways_to_check = CH_DCACHE_NWAY;
4066 	else
4067 		ways_to_check = 1;
4068 	for (i = 0; i < ways_to_check; i++) {
4069 		if (ch_flt->flt_type == CPU_DC_PARITY)
4070 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4071 		else
4072 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4073 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4074 			bcopy(dcp, &dcdata[ways_logged],
4075 				sizeof (ch_dc_data_t));
4076 			ways_logged++;
4077 		}
4078 	}
4079 
4080 	/*
4081 	 * Add the dcache data to the payload.
4082 	 */
4083 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4084 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4085 	if (ways_logged != 0) {
4086 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4087 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4088 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4089 	}
4090 }
4091 
4092 /*
4093  * Add L1 Instruction cache data to the ereport payload.
4094  */
4095 static void
4096 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4097 {
4098 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4099 	ch_ic_data_t *icp;
4100 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4101 	uint_t nelem;
4102 	int i, ways_to_check, ways_logged = 0;
4103 
4104 	/*
4105 	 * If this is an I$ fault then there may be multiple
4106 	 * ways captured in the ch_parity_log_t structure.
4107 	 * Otherwise, there will be at most one way captured
4108 	 * in the ch_diag_data_t struct.
4109 	 * Check each way to see if it should be encoded.
4110 	 */
4111 	if (ch_flt->flt_type == CPU_IC_PARITY)
4112 		ways_to_check = CH_ICACHE_NWAY;
4113 	else
4114 		ways_to_check = 1;
4115 	for (i = 0; i < ways_to_check; i++) {
4116 		if (ch_flt->flt_type == CPU_IC_PARITY)
4117 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4118 		else
4119 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4120 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4121 			bcopy(icp, &icdata[ways_logged],
4122 				sizeof (ch_ic_data_t));
4123 			ways_logged++;
4124 		}
4125 	}
4126 
4127 	/*
4128 	 * Add the icache data to the payload.
4129 	 */
4130 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4131 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4132 	if (ways_logged != 0) {
4133 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4134 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4135 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4136 	}
4137 }
4138 
4139 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4140 
4141 /*
4142  * Add ecache data to payload.
4143  */
4144 static void
4145 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4146 {
4147 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4148 	ch_ec_data_t *ecp;
4149 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4150 	uint_t nelem;
4151 	int i, ways_logged = 0;
4152 
4153 	/*
4154 	 * Check each way to see if it should be encoded
4155 	 * and concatinate it into a temporary buffer.
4156 	 */
4157 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4158 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4159 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4160 			bcopy(ecp, &ecdata[ways_logged],
4161 				sizeof (ch_ec_data_t));
4162 			ways_logged++;
4163 		}
4164 	}
4165 
4166 	/*
4167 	 * Panther CPUs have an additional level of cache and so
4168 	 * what we just collected was the L3 (ecache) and not the
4169 	 * L2 cache.
4170 	 */
4171 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4172 		/*
4173 		 * Add the L3 (ecache) data to the payload.
4174 		 */
4175 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4176 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4177 		if (ways_logged != 0) {
4178 			nelem = sizeof (ch_ec_data_t) /
4179 			    sizeof (uint64_t) * ways_logged;
4180 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4181 			    DATA_TYPE_UINT64_ARRAY, nelem,
4182 			    (uint64_t *)ecdata, NULL);
4183 		}
4184 
4185 		/*
4186 		 * Now collect the L2 cache.
4187 		 */
4188 		ways_logged = 0;
4189 		for (i = 0; i < PN_L2_NWAYS; i++) {
4190 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4191 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4192 				bcopy(ecp, &ecdata[ways_logged],
4193 				    sizeof (ch_ec_data_t));
4194 				ways_logged++;
4195 			}
4196 		}
4197 	}
4198 
4199 	/*
4200 	 * Add the L2 cache data to the payload.
4201 	 */
4202 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4203 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4204 	if (ways_logged != 0) {
4205 		nelem = sizeof (ch_ec_data_t) /
4206 			sizeof (uint64_t) * ways_logged;
4207 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4208 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4209 	}
4210 }
4211 
4212 /*
4213  * Encode the data saved in the ch_async_flt_t struct into
4214  * the FM ereport payload.
4215  */
4216 static void
4217 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4218 	nvlist_t *resource, int *afar_status, int *synd_status)
4219 {
4220 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4221 	*synd_status = AFLT_STAT_INVALID;
4222 	*afar_status = AFLT_STAT_INVALID;
4223 
4224 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4225 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4226 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4227 	}
4228 
4229 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4230 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4231 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4232 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4233 	}
4234 
4235 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4236 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4237 		    ch_flt->flt_bit);
4238 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4239 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4240 	}
4241 
4242 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4243 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4244 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4245 	}
4246 
4247 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4248 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4249 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4250 	}
4251 
4252 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4253 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4254 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4255 	}
4256 
4257 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4258 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4259 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4260 	}
4261 
4262 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4263 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4264 		    DATA_TYPE_BOOLEAN_VALUE,
4265 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4266 	}
4267 
4268 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4269 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4270 		    DATA_TYPE_BOOLEAN_VALUE,
4271 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4272 	}
4273 
4274 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4275 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4276 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4277 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4278 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4279 	}
4280 
4281 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4282 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4283 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4284 	}
4285 
4286 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4287 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4288 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4289 	}
4290 
4291 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4292 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4293 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4294 	}
4295 
4296 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4297 		cpu_payload_add_ecache(aflt, payload);
4298 
4299 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4300 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4301 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4302 	}
4303 
4304 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4305 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4306 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4307 	}
4308 
4309 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4310 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4311 		    DATA_TYPE_UINT32_ARRAY, 16,
4312 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4313 	}
4314 
4315 #if defined(CPU_IMP_L1_CACHE_PARITY)
4316 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4317 		cpu_payload_add_dcache(aflt, payload);
4318 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4319 		cpu_payload_add_icache(aflt, payload);
4320 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4321 
4322 #if defined(CHEETAH_PLUS)
4323 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4324 		cpu_payload_add_pcache(aflt, payload);
4325 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4326 		cpu_payload_add_tlb(aflt, payload);
4327 #endif	/* CHEETAH_PLUS */
4328 	/*
4329 	 * Create the FMRI that goes into the payload
4330 	 * and contains the unum info if necessary.
4331 	 */
4332 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4333 	    (*afar_status == AFLT_STAT_VALID)) {
4334 		char unum[UNUM_NAMLEN];
4335 		int len;
4336 
4337 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4338 		    UNUM_NAMLEN, &len) == 0) {
4339 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4340 			    NULL, unum, NULL);
4341 			fm_payload_set(payload,
4342 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4343 			    DATA_TYPE_NVLIST, resource, NULL);
4344 		}
4345 	}
4346 }
4347 
4348 /*
4349  * Initialize the way info if necessary.
4350  */
4351 void
4352 cpu_ereport_init(struct async_flt *aflt)
4353 {
4354 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4355 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4356 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4357 	int i;
4358 
4359 	/*
4360 	 * Initialize the info in the CPU logout structure.
4361 	 * The I$/D$ way information is not initialized here
4362 	 * since it is captured in the logout assembly code.
4363 	 */
4364 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4365 		(ecp + i)->ec_way = i;
4366 
4367 	for (i = 0; i < PN_L2_NWAYS; i++)
4368 		(l2p + i)->ec_way = i;
4369 }
4370 
4371 /*
4372  * Returns whether fault address is valid for this error bit and
4373  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4374  */
4375 int
4376 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4377 {
4378 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4379 
4380 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4381 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4382 	    AFLT_STAT_VALID &&
4383 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4384 }
4385 
4386 static void
4387 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4388 {
4389 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4390 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4391 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4392 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4393 #if defined(CPU_IMP_ECACHE_ASSOC)
4394 	int i, nway;
4395 #endif /* CPU_IMP_ECACHE_ASSOC */
4396 
4397 	/*
4398 	 * Check if the CPU log out captured was valid.
4399 	 */
4400 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4401 	    ch_flt->flt_data_incomplete)
4402 		return;
4403 
4404 #if defined(CPU_IMP_ECACHE_ASSOC)
4405 	nway = cpu_ecache_nway();
4406 	i =  cpu_ecache_line_valid(ch_flt);
4407 	if (i == 0 || i > nway) {
4408 		for (i = 0; i < nway; i++)
4409 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4410 	} else
4411 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4412 #else /* CPU_IMP_ECACHE_ASSOC */
4413 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4414 #endif /* CPU_IMP_ECACHE_ASSOC */
4415 
4416 #if defined(CHEETAH_PLUS)
4417 	pn_cpu_log_diag_l2_info(ch_flt);
4418 #endif /* CHEETAH_PLUS */
4419 
4420 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4421 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4422 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4423 	}
4424 
4425 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4426 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4427 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4428 		else
4429 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4430 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4431 	}
4432 }
4433 
4434 /*
4435  * Cheetah ECC calculation.
4436  *
4437  * We only need to do the calculation on the data bits and can ignore check
4438  * bit and Mtag bit terms in the calculation.
4439  */
4440 static uint64_t ch_ecc_table[9][2] = {
4441 	/*
4442 	 * low order 64-bits   high-order 64-bits
4443 	 */
4444 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4445 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4446 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4447 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4448 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4449 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4450 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4451 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4452 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4453 };
4454 
4455 /*
4456  * 64-bit population count, use well-known popcnt trick.
4457  * We could use the UltraSPARC V9 POPC instruction, but some
4458  * CPUs including Cheetahplus and Jaguar do not support that
4459  * instruction.
4460  */
4461 int
4462 popc64(uint64_t val)
4463 {
4464 	int cnt;
4465 
4466 	for (cnt = 0; val != 0; val &= val - 1)
4467 		cnt++;
4468 	return (cnt);
4469 }
4470 
4471 /*
4472  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4473  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4474  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4475  * instead of doing all the xor's.
4476  */
4477 uint32_t
4478 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4479 {
4480 	int bitno, s;
4481 	int synd = 0;
4482 
4483 	for (bitno = 0; bitno < 9; bitno++) {
4484 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4485 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4486 		synd |= (s << bitno);
4487 	}
4488 	return (synd);
4489 
4490 }
4491 
4492 /*
4493  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4494  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4495  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4496  */
4497 static void
4498 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4499     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4500 {
4501 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4502 
4503 	if (reason &&
4504 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4505 		(void) strcat(reason, eccp->ec_reason);
4506 	}
4507 
4508 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4509 	ch_flt->flt_type = eccp->ec_flt_type;
4510 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4511 		ch_flt->flt_diag_data = *cdp;
4512 	else
4513 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4514 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4515 
4516 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4517 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4518 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4519 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4520 	else
4521 		aflt->flt_synd = 0;
4522 
4523 	aflt->flt_payload = eccp->ec_err_payload;
4524 
4525 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4526 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4527 		cpu_errorq_dispatch(eccp->ec_err_class,
4528 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4529 		    aflt->flt_panic);
4530 	else
4531 		cpu_errorq_dispatch(eccp->ec_err_class,
4532 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4533 		    aflt->flt_panic);
4534 }
4535 
4536 /*
4537  * Queue events on async event queue one event per error bit.  First we
4538  * queue the events that we "expect" for the given trap, then we queue events
4539  * that we may not expect.  Return number of events queued.
4540  */
4541 int
4542 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4543     ch_cpu_logout_t *clop)
4544 {
4545 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4546 	ecc_type_to_info_t *eccp;
4547 	int nevents = 0;
4548 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4549 #if defined(CHEETAH_PLUS)
4550 	uint64_t orig_t_afsr_errs;
4551 #endif
4552 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4553 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4554 	ch_diag_data_t *cdp = NULL;
4555 
4556 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4557 
4558 #if defined(CHEETAH_PLUS)
4559 	orig_t_afsr_errs = t_afsr_errs;
4560 
4561 	/*
4562 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4563 	 */
4564 	if (clop != NULL) {
4565 		/*
4566 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4567 		 * flt_addr and flt_stat fields will be reset to the primaries
4568 		 * below, but the sdw_addr and sdw_stat will stay as the
4569 		 * secondaries.
4570 		 */
4571 		cdp = &clop->clo_sdw_data;
4572 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4573 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4574 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4575 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4576 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4577 
4578 		/*
4579 		 * If the primary and shadow AFSR differ, tag the shadow as
4580 		 * the first fault.
4581 		 */
4582 		if ((primary_afar != cdp->chd_afar) ||
4583 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4584 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4585 		}
4586 
4587 		/*
4588 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4589 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4590 		 * is expected to be zero for those CPUs which do not have
4591 		 * an AFSR_EXT register.
4592 		 */
4593 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4594 			if ((eccp->ec_afsr_bit &
4595 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4596 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4597 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4598 				cdp = NULL;
4599 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4600 				nevents++;
4601 			}
4602 		}
4603 
4604 		/*
4605 		 * If the ME bit is on in the primary AFSR turn all the
4606 		 * error bits on again that may set the ME bit to make
4607 		 * sure we see the ME AFSR error logs.
4608 		 */
4609 		if ((primary_afsr & C_AFSR_ME) != 0)
4610 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4611 	}
4612 #endif	/* CHEETAH_PLUS */
4613 
4614 	if (clop != NULL)
4615 		cdp = &clop->clo_data;
4616 
4617 	/*
4618 	 * Queue expected errors, error bit and fault type must match
4619 	 * in the ecc_type_to_info table.
4620 	 */
4621 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4622 	    eccp++) {
4623 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4624 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4625 #if defined(SERRANO)
4626 			/*
4627 			 * For FRC/FRU errors on Serrano the afar2 captures
4628 			 * the address and the associated data is
4629 			 * in the shadow logout area.
4630 			 */
4631 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4632 				if (clop != NULL)
4633 					cdp = &clop->clo_sdw_data;
4634 				aflt->flt_addr = ch_flt->afar2;
4635 			} else {
4636 				if (clop != NULL)
4637 					cdp = &clop->clo_data;
4638 				aflt->flt_addr = primary_afar;
4639 			}
4640 #else	/* SERRANO */
4641 			aflt->flt_addr = primary_afar;
4642 #endif	/* SERRANO */
4643 			aflt->flt_stat = primary_afsr;
4644 			ch_flt->afsr_ext = primary_afsr_ext;
4645 			ch_flt->afsr_errs = primary_afsr_errs;
4646 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4647 			cdp = NULL;
4648 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4649 			nevents++;
4650 		}
4651 	}
4652 
4653 	/*
4654 	 * Queue unexpected errors, error bit only match.
4655 	 */
4656 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4657 	    eccp++) {
4658 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4659 #if defined(SERRANO)
4660 			/*
4661 			 * For FRC/FRU errors on Serrano the afar2 captures
4662 			 * the address and the associated data is
4663 			 * in the shadow logout area.
4664 			 */
4665 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4666 				if (clop != NULL)
4667 					cdp = &clop->clo_sdw_data;
4668 				aflt->flt_addr = ch_flt->afar2;
4669 			} else {
4670 				if (clop != NULL)
4671 					cdp = &clop->clo_data;
4672 				aflt->flt_addr = primary_afar;
4673 			}
4674 #else	/* SERRANO */
4675 			aflt->flt_addr = primary_afar;
4676 #endif	/* SERRANO */
4677 			aflt->flt_stat = primary_afsr;
4678 			ch_flt->afsr_ext = primary_afsr_ext;
4679 			ch_flt->afsr_errs = primary_afsr_errs;
4680 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4681 			cdp = NULL;
4682 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4683 			nevents++;
4684 		}
4685 	}
4686 	return (nevents);
4687 }
4688 
4689 /*
4690  * Return trap type number.
4691  */
4692 uint8_t
4693 flt_to_trap_type(struct async_flt *aflt)
4694 {
4695 	if (aflt->flt_status & ECC_I_TRAP)
4696 		return (TRAP_TYPE_ECC_I);
4697 	if (aflt->flt_status & ECC_D_TRAP)
4698 		return (TRAP_TYPE_ECC_D);
4699 	if (aflt->flt_status & ECC_F_TRAP)
4700 		return (TRAP_TYPE_ECC_F);
4701 	if (aflt->flt_status & ECC_C_TRAP)
4702 		return (TRAP_TYPE_ECC_C);
4703 	if (aflt->flt_status & ECC_DP_TRAP)
4704 		return (TRAP_TYPE_ECC_DP);
4705 	if (aflt->flt_status & ECC_IP_TRAP)
4706 		return (TRAP_TYPE_ECC_IP);
4707 	if (aflt->flt_status & ECC_ITLB_TRAP)
4708 		return (TRAP_TYPE_ECC_ITLB);
4709 	if (aflt->flt_status & ECC_DTLB_TRAP)
4710 		return (TRAP_TYPE_ECC_DTLB);
4711 	return (TRAP_TYPE_UNKNOWN);
4712 }
4713 
4714 /*
4715  * Decide an error type based on detector and leaky/partner tests.
4716  * The following array is used for quick translation - it must
4717  * stay in sync with ce_dispact_t.
4718  */
4719 
4720 static char *cetypes[] = {
4721 	CE_DISP_DESC_U,
4722 	CE_DISP_DESC_I,
4723 	CE_DISP_DESC_PP,
4724 	CE_DISP_DESC_P,
4725 	CE_DISP_DESC_L,
4726 	CE_DISP_DESC_PS,
4727 	CE_DISP_DESC_S
4728 };
4729 
4730 char *
4731 flt_to_error_type(struct async_flt *aflt)
4732 {
4733 	ce_dispact_t dispact, disp;
4734 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4735 
4736 	/*
4737 	 * The memory payload bundle is shared by some events that do
4738 	 * not perform any classification.  For those flt_disp will be
4739 	 * 0 and we will return "unknown".
4740 	 */
4741 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4742 		return (cetypes[CE_DISP_UNKNOWN]);
4743 
4744 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4745 
4746 	/*
4747 	 * It is also possible that no scrub/classification was performed
4748 	 * by the detector, for instance where a disrupting error logged
4749 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4750 	 */
4751 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4752 		return (cetypes[CE_DISP_UNKNOWN]);
4753 
4754 	/*
4755 	 * Lookup type in initial classification/action table
4756 	 */
4757 	dispact = CE_DISPACT(ce_disp_table,
4758 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4759 	    CE_XDIAG_STATE(dtcrinfo),
4760 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4761 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4762 
4763 	/*
4764 	 * A bad lookup is not something to panic production systems for.
4765 	 */
4766 	ASSERT(dispact != CE_DISP_BAD);
4767 	if (dispact == CE_DISP_BAD)
4768 		return (cetypes[CE_DISP_UNKNOWN]);
4769 
4770 	disp = CE_DISP(dispact);
4771 
4772 	switch (disp) {
4773 	case CE_DISP_UNKNOWN:
4774 	case CE_DISP_INTERMITTENT:
4775 		break;
4776 
4777 	case CE_DISP_POSS_PERS:
4778 		/*
4779 		 * "Possible persistent" errors to which we have applied a valid
4780 		 * leaky test can be separated into "persistent" or "leaky".
4781 		 */
4782 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4783 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4784 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4785 			    CE_XDIAG_CE2SEEN(lkyinfo))
4786 				disp = CE_DISP_LEAKY;
4787 			else
4788 				disp = CE_DISP_PERS;
4789 		}
4790 		break;
4791 
4792 	case CE_DISP_POSS_STICKY:
4793 		/*
4794 		 * Promote "possible sticky" results that have been
4795 		 * confirmed by a partner test to "sticky".  Unconfirmed
4796 		 * "possible sticky" events are left at that status - we do not
4797 		 * guess at any bad reader/writer etc status here.
4798 		 */
4799 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4800 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4801 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4802 			disp = CE_DISP_STICKY;
4803 
4804 		/*
4805 		 * Promote "possible sticky" results on a uniprocessor
4806 		 * to "sticky"
4807 		 */
4808 		if (disp == CE_DISP_POSS_STICKY &&
4809 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4810 			disp = CE_DISP_STICKY;
4811 		break;
4812 
4813 	default:
4814 		disp = CE_DISP_UNKNOWN;
4815 		break;
4816 	}
4817 
4818 	return (cetypes[disp]);
4819 }
4820 
4821 /*
4822  * Given the entire afsr, the specific bit to check and a prioritized list of
4823  * error bits, determine the validity of the various overwrite priority
4824  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4825  * different overwrite priorities.
4826  *
4827  * Given a specific afsr error bit and the entire afsr, there are three cases:
4828  *   INVALID:	The specified bit is lower overwrite priority than some other
4829  *		error bit which is on in the afsr (or IVU/IVC).
4830  *   VALID:	The specified bit is higher priority than all other error bits
4831  *		which are on in the afsr.
4832  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
4833  *		bit is on in the afsr.
4834  */
4835 int
4836 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
4837 {
4838 	uint64_t afsr_ow;
4839 
4840 	while ((afsr_ow = *ow_bits++) != 0) {
4841 		/*
4842 		 * If bit is in the priority class, check to see if another
4843 		 * bit in the same class is on => ambiguous.  Otherwise,
4844 		 * the value is valid.  If the bit is not on at this priority
4845 		 * class, but a higher priority bit is on, then the value is
4846 		 * invalid.
4847 		 */
4848 		if (afsr_ow & afsr_bit) {
4849 			/*
4850 			 * If equal pri bit is on, ambiguous.
4851 			 */
4852 			if (afsr & (afsr_ow & ~afsr_bit))
4853 				return (AFLT_STAT_AMBIGUOUS);
4854 			return (AFLT_STAT_VALID);
4855 		} else if (afsr & afsr_ow)
4856 			break;
4857 	}
4858 
4859 	/*
4860 	 * We didn't find a match or a higher priority bit was on.  Not
4861 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
4862 	 */
4863 	return (AFLT_STAT_INVALID);
4864 }
4865 
4866 static int
4867 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
4868 {
4869 #if defined(SERRANO)
4870 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
4871 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
4872 	else
4873 #endif	/* SERRANO */
4874 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
4875 }
4876 
4877 static int
4878 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
4879 {
4880 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
4881 }
4882 
4883 static int
4884 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
4885 {
4886 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
4887 }
4888 
4889 static int
4890 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
4891 {
4892 #ifdef lint
4893 	cpuid = cpuid;
4894 #endif
4895 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
4896 		return (afsr_to_msynd_status(afsr, afsr_bit));
4897 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
4898 #if defined(CHEETAH_PLUS)
4899 		/*
4900 		 * The E_SYND overwrite policy is slightly different
4901 		 * for Panther CPUs.
4902 		 */
4903 		if (IS_PANTHER(cpunodes[cpuid].implementation))
4904 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
4905 		else
4906 			return (afsr_to_esynd_status(afsr, afsr_bit));
4907 #else /* CHEETAH_PLUS */
4908 		return (afsr_to_esynd_status(afsr, afsr_bit));
4909 #endif /* CHEETAH_PLUS */
4910 	} else {
4911 		return (AFLT_STAT_INVALID);
4912 	}
4913 }
4914 
4915 /*
4916  * Slave CPU stick synchronization.
4917  */
4918 void
4919 sticksync_slave(void)
4920 {
4921 	int 		i;
4922 	int		tries = 0;
4923 	int64_t		tskew;
4924 	int64_t		av_tskew;
4925 
4926 	kpreempt_disable();
4927 	/* wait for the master side */
4928 	while (stick_sync_cmd != SLAVE_START)
4929 		;
4930 	/*
4931 	 * Synchronization should only take a few tries at most. But in the
4932 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
4933 	 * without it's stick synchronized wouldn't be a good citizen.
4934 	 */
4935 	while (slave_done == 0) {
4936 		/*
4937 		 * Time skew calculation.
4938 		 */
4939 		av_tskew = tskew = 0;
4940 
4941 		for (i = 0; i < stick_iter; i++) {
4942 			/* make location hot */
4943 			timestamp[EV_A_START] = 0;
4944 			stick_timestamp(&timestamp[EV_A_START]);
4945 
4946 			/* tell the master we're ready */
4947 			stick_sync_cmd = MASTER_START;
4948 
4949 			/* and wait */
4950 			while (stick_sync_cmd != SLAVE_CONT)
4951 				;
4952 			/* Event B end */
4953 			stick_timestamp(&timestamp[EV_B_END]);
4954 
4955 			/* calculate time skew */
4956 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
4957 				- (timestamp[EV_A_END] -
4958 				timestamp[EV_A_START])) / 2;
4959 
4960 			/* keep running count */
4961 			av_tskew += tskew;
4962 		} /* for */
4963 
4964 		/*
4965 		 * Adjust stick for time skew if not within the max allowed;
4966 		 * otherwise we're all done.
4967 		 */
4968 		if (stick_iter != 0)
4969 			av_tskew = av_tskew/stick_iter;
4970 		if (ABS(av_tskew) > stick_tsk) {
4971 			/*
4972 			 * If the skew is 1 (the slave's STICK register
4973 			 * is 1 STICK ahead of the master's), stick_adj
4974 			 * could fail to adjust the slave's STICK register
4975 			 * if the STICK read on the slave happens to
4976 			 * align with the increment of the STICK.
4977 			 * Therefore, we increment the skew to 2.
4978 			 */
4979 			if (av_tskew == 1)
4980 				av_tskew++;
4981 			stick_adj(-av_tskew);
4982 		} else
4983 			slave_done = 1;
4984 #ifdef DEBUG
4985 		if (tries < DSYNC_ATTEMPTS)
4986 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
4987 				av_tskew;
4988 		++tries;
4989 #endif /* DEBUG */
4990 #ifdef lint
4991 		tries = tries;
4992 #endif
4993 
4994 	} /* while */
4995 
4996 	/* allow the master to finish */
4997 	stick_sync_cmd = EVENT_NULL;
4998 	kpreempt_enable();
4999 }
5000 
5001 /*
5002  * Master CPU side of stick synchronization.
5003  *  - timestamp end of Event A
5004  *  - timestamp beginning of Event B
5005  */
5006 void
5007 sticksync_master(void)
5008 {
5009 	int		i;
5010 
5011 	kpreempt_disable();
5012 	/* tell the slave we've started */
5013 	slave_done = 0;
5014 	stick_sync_cmd = SLAVE_START;
5015 
5016 	while (slave_done == 0) {
5017 		for (i = 0; i < stick_iter; i++) {
5018 			/* wait for the slave */
5019 			while (stick_sync_cmd != MASTER_START)
5020 				;
5021 			/* Event A end */
5022 			stick_timestamp(&timestamp[EV_A_END]);
5023 
5024 			/* make location hot */
5025 			timestamp[EV_B_START] = 0;
5026 			stick_timestamp(&timestamp[EV_B_START]);
5027 
5028 			/* tell the slave to continue */
5029 			stick_sync_cmd = SLAVE_CONT;
5030 		} /* for */
5031 
5032 		/* wait while slave calculates time skew */
5033 		while (stick_sync_cmd == SLAVE_CONT)
5034 			;
5035 	} /* while */
5036 	kpreempt_enable();
5037 }
5038 
5039 /*
5040  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5041  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5042  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5043  * panic idle.
5044  */
5045 /*ARGSUSED*/
5046 void
5047 cpu_check_allcpus(struct async_flt *aflt)
5048 {}
5049 
5050 struct kmem_cache *ch_private_cache;
5051 
5052 /*
5053  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5054  * deallocate the scrubber data structures and cpu_private data structure.
5055  */
5056 void
5057 cpu_uninit_private(struct cpu *cp)
5058 {
5059 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5060 
5061 	ASSERT(chprp);
5062 	cpu_uninit_ecache_scrub_dr(cp);
5063 	CPU_PRIVATE(cp) = NULL;
5064 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5065 	kmem_cache_free(ch_private_cache, chprp);
5066 	cmp_delete_cpu(cp->cpu_id);
5067 
5068 }
5069 
5070 /*
5071  * Cheetah Cache Scrubbing
5072  *
5073  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5074  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5075  * protected by either parity or ECC.
5076  *
5077  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5078  * cache per second). Due to the the specifics of how the I$ control
5079  * logic works with respect to the ASI used to scrub I$ lines, the entire
5080  * I$ is scanned at once.
5081  */
5082 
5083 /*
5084  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5085  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5086  * on a running system.
5087  */
5088 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5089 
5090 /*
5091  * The following are the PIL levels that the softints/cross traps will fire at.
5092  */
5093 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5094 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5095 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5096 
5097 #if defined(JALAPENO)
5098 
5099 /*
5100  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5101  * on Jalapeno.
5102  */
5103 int ecache_scrub_enable = 0;
5104 
5105 #else	/* JALAPENO */
5106 
5107 /*
5108  * With all other cpu types, E$ scrubbing is on by default
5109  */
5110 int ecache_scrub_enable = 1;
5111 
5112 #endif	/* JALAPENO */
5113 
5114 
5115 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5116 
5117 /*
5118  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5119  * is disabled by default on non-Cheetah systems
5120  */
5121 int icache_scrub_enable = 0;
5122 
5123 /*
5124  * Tuneables specifying the scrub calls per second and the scan rate
5125  * for each cache
5126  *
5127  * The cyclic times are set during boot based on the following values.
5128  * Changing these values in mdb after this time will have no effect.  If
5129  * a different value is desired, it must be set in /etc/system before a
5130  * reboot.
5131  */
5132 int ecache_calls_a_sec = 1;
5133 int dcache_calls_a_sec = 2;
5134 int icache_calls_a_sec = 2;
5135 
5136 int ecache_scan_rate_idle = 1;
5137 int ecache_scan_rate_busy = 1;
5138 int dcache_scan_rate_idle = 1;
5139 int dcache_scan_rate_busy = 1;
5140 int icache_scan_rate_idle = 1;
5141 int icache_scan_rate_busy = 1;
5142 
5143 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5144 
5145 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5146 
5147 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5148 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5149 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5150 
5151 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5152 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5153 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5154 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5155 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5156 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5157 
5158 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5159 
5160 /*
5161  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5162  * increment the outstanding request counter and schedule a softint to run
5163  * the scrubber.
5164  */
5165 extern xcfunc_t cache_scrubreq_tl1;
5166 
5167 /*
5168  * These are the softint functions for each cache scrubber
5169  */
5170 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5171 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5172 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5173 
5174 /*
5175  * The cache scrub info table contains cache specific information
5176  * and allows for some of the scrub code to be table driven, reducing
5177  * duplication of cache similar code.
5178  *
5179  * This table keeps a copy of the value in the calls per second variable
5180  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5181  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5182  * mdb in a misguided attempt to disable the scrubber).
5183  */
5184 struct scrub_info {
5185 	int		*csi_enable;	/* scrubber enable flag */
5186 	int		csi_freq;	/* scrubber calls per second */
5187 	int		csi_index;	/* index to chsm_outstanding[] */
5188 	uint_t		csi_inum;	/* scrubber interrupt number */
5189 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5190 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5191 	char		csi_name[3];	/* cache name for this scrub entry */
5192 } cache_scrub_info[] = {
5193 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5194 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5195 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5196 };
5197 
5198 /*
5199  * If scrubbing is enabled, increment the outstanding request counter.  If it
5200  * is 1 (meaning there were no previous requests outstanding), call
5201  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5202  * a self trap.
5203  */
5204 static void
5205 do_scrub(struct scrub_info *csi)
5206 {
5207 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5208 	int index = csi->csi_index;
5209 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5210 
5211 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5212 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5213 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5214 			    csi->csi_inum, 0);
5215 		}
5216 	}
5217 }
5218 
5219 /*
5220  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5221  * cross-trap the offline cpus.
5222  */
5223 static void
5224 do_scrub_offline(struct scrub_info *csi)
5225 {
5226 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5227 
5228 	if (CPUSET_ISNULL(cpu_offline_set)) {
5229 		/*
5230 		 * No offline cpus - nothing to do
5231 		 */
5232 		return;
5233 	}
5234 
5235 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5236 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5237 		    csi->csi_index);
5238 	}
5239 }
5240 
5241 /*
5242  * This is the initial setup for the scrubber cyclics - it sets the
5243  * interrupt level, frequency, and function to call.
5244  */
5245 /*ARGSUSED*/
5246 static void
5247 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5248     cyc_time_t *when)
5249 {
5250 	struct scrub_info *csi = (struct scrub_info *)arg;
5251 
5252 	ASSERT(csi != NULL);
5253 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5254 	hdlr->cyh_level = CY_LOW_LEVEL;
5255 	hdlr->cyh_arg = arg;
5256 
5257 	when->cyt_when = 0;	/* Start immediately */
5258 	when->cyt_interval = NANOSEC / csi->csi_freq;
5259 }
5260 
5261 /*
5262  * Initialization for cache scrubbing.
5263  * This routine is called AFTER all cpus have had cpu_init_private called
5264  * to initialize their private data areas.
5265  */
5266 void
5267 cpu_init_cache_scrub(void)
5268 {
5269 	int i;
5270 	struct scrub_info *csi;
5271 	cyc_omni_handler_t omni_hdlr;
5272 	cyc_handler_t offline_hdlr;
5273 	cyc_time_t when;
5274 
5275 	/*
5276 	 * save away the maximum number of lines for the D$
5277 	 */
5278 	dcache_nlines = dcache_size / dcache_linesize;
5279 
5280 	/*
5281 	 * register the softints for the cache scrubbing
5282 	 */
5283 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5284 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5285 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5286 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5287 
5288 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5289 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5290 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5291 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5292 
5293 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5294 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5295 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5296 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5297 
5298 	/*
5299 	 * start the scrubbing for all the caches
5300 	 */
5301 	mutex_enter(&cpu_lock);
5302 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5303 
5304 		csi = &cache_scrub_info[i];
5305 
5306 		if (!(*csi->csi_enable))
5307 			continue;
5308 
5309 		/*
5310 		 * force the following to be true:
5311 		 *	1 <= calls_a_sec <= hz
5312 		 */
5313 		if (csi->csi_freq > hz) {
5314 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5315 				"(%d); resetting to hz (%d)", csi->csi_name,
5316 				csi->csi_freq, hz);
5317 			csi->csi_freq = hz;
5318 		} else if (csi->csi_freq < 1) {
5319 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5320 				"(%d); resetting to 1", csi->csi_name,
5321 				csi->csi_freq);
5322 			csi->csi_freq = 1;
5323 		}
5324 
5325 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5326 		omni_hdlr.cyo_offline = NULL;
5327 		omni_hdlr.cyo_arg = (void *)csi;
5328 
5329 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5330 		offline_hdlr.cyh_arg = (void *)csi;
5331 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5332 
5333 		when.cyt_when = 0;	/* Start immediately */
5334 		when.cyt_interval = NANOSEC / csi->csi_freq;
5335 
5336 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5337 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5338 	}
5339 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5340 	mutex_exit(&cpu_lock);
5341 }
5342 
5343 /*
5344  * Indicate that the specified cpu is idle.
5345  */
5346 void
5347 cpu_idle_ecache_scrub(struct cpu *cp)
5348 {
5349 	if (CPU_PRIVATE(cp) != NULL) {
5350 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5351 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5352 	}
5353 }
5354 
5355 /*
5356  * Indicate that the specified cpu is busy.
5357  */
5358 void
5359 cpu_busy_ecache_scrub(struct cpu *cp)
5360 {
5361 	if (CPU_PRIVATE(cp) != NULL) {
5362 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5363 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5364 	}
5365 }
5366 
5367 /*
5368  * Initialization for cache scrubbing for the specified cpu.
5369  */
5370 void
5371 cpu_init_ecache_scrub_dr(struct cpu *cp)
5372 {
5373 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5374 	int cpuid = cp->cpu_id;
5375 
5376 	/* initialize the number of lines in the caches */
5377 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5378 	    cpunodes[cpuid].ecache_linesize;
5379 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5380 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5381 
5382 	/*
5383 	 * do_scrub() and do_scrub_offline() check both the global
5384 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5385 	 * check this value before scrubbing.  Currently, we use it to
5386 	 * disable the E$ scrubber on multi-core cpus or while running at
5387 	 * slowed speed.  For now, just turn everything on and allow
5388 	 * cpu_init_private() to change it if necessary.
5389 	 */
5390 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5391 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5392 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5393 
5394 	cpu_busy_ecache_scrub(cp);
5395 }
5396 
5397 /*
5398  * Un-initialization for cache scrubbing for the specified cpu.
5399  */
5400 static void
5401 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5402 {
5403 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5404 
5405 	/*
5406 	 * un-initialize bookkeeping for cache scrubbing
5407 	 */
5408 	bzero(csmp, sizeof (ch_scrub_misc_t));
5409 
5410 	cpu_idle_ecache_scrub(cp);
5411 }
5412 
5413 /*
5414  * Called periodically on each CPU to scrub the D$.
5415  */
5416 static void
5417 scrub_dcache(int how_many)
5418 {
5419 	int i;
5420 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5421 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5422 
5423 	/*
5424 	 * scrub the desired number of lines
5425 	 */
5426 	for (i = 0; i < how_many; i++) {
5427 		/*
5428 		 * scrub a D$ line
5429 		 */
5430 		dcache_inval_line(index);
5431 
5432 		/*
5433 		 * calculate the next D$ line to scrub, assumes
5434 		 * that dcache_nlines is a power of 2
5435 		 */
5436 		index = (index + 1) & (dcache_nlines - 1);
5437 	}
5438 
5439 	/*
5440 	 * set the scrub index for the next visit
5441 	 */
5442 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5443 }
5444 
5445 /*
5446  * Handler for D$ scrub inum softint. Call scrub_dcache until
5447  * we decrement the outstanding request count to zero.
5448  */
5449 /*ARGSUSED*/
5450 static uint_t
5451 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5452 {
5453 	int i;
5454 	int how_many;
5455 	int outstanding;
5456 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5457 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5458 	struct scrub_info *csi = (struct scrub_info *)arg1;
5459 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5460 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5461 
5462 	/*
5463 	 * The scan rates are expressed in units of tenths of a
5464 	 * percent.  A scan rate of 1000 (100%) means the whole
5465 	 * cache is scanned every second.
5466 	 */
5467 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5468 
5469 	do {
5470 		outstanding = *countp;
5471 		for (i = 0; i < outstanding; i++) {
5472 			scrub_dcache(how_many);
5473 		}
5474 	} while (atomic_add_32_nv(countp, -outstanding));
5475 
5476 	return (DDI_INTR_CLAIMED);
5477 }
5478 
5479 /*
5480  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5481  * by invalidating lines. Due to the characteristics of the ASI which
5482  * is used to invalidate an I$ line, the entire I$ must be invalidated
5483  * vs. an individual I$ line.
5484  */
5485 static void
5486 scrub_icache(int how_many)
5487 {
5488 	int i;
5489 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5490 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5491 	int icache_nlines = csmp->chsm_icache_nlines;
5492 
5493 	/*
5494 	 * scrub the desired number of lines
5495 	 */
5496 	for (i = 0; i < how_many; i++) {
5497 		/*
5498 		 * since the entire I$ must be scrubbed at once,
5499 		 * wait until the index wraps to zero to invalidate
5500 		 * the entire I$
5501 		 */
5502 		if (index == 0) {
5503 			icache_inval_all();
5504 		}
5505 
5506 		/*
5507 		 * calculate the next I$ line to scrub, assumes
5508 		 * that chsm_icache_nlines is a power of 2
5509 		 */
5510 		index = (index + 1) & (icache_nlines - 1);
5511 	}
5512 
5513 	/*
5514 	 * set the scrub index for the next visit
5515 	 */
5516 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5517 }
5518 
5519 /*
5520  * Handler for I$ scrub inum softint. Call scrub_icache until
5521  * we decrement the outstanding request count to zero.
5522  */
5523 /*ARGSUSED*/
5524 static uint_t
5525 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5526 {
5527 	int i;
5528 	int how_many;
5529 	int outstanding;
5530 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5531 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5532 	struct scrub_info *csi = (struct scrub_info *)arg1;
5533 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5534 	    icache_scan_rate_idle : icache_scan_rate_busy;
5535 	int icache_nlines = csmp->chsm_icache_nlines;
5536 
5537 	/*
5538 	 * The scan rates are expressed in units of tenths of a
5539 	 * percent.  A scan rate of 1000 (100%) means the whole
5540 	 * cache is scanned every second.
5541 	 */
5542 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5543 
5544 	do {
5545 		outstanding = *countp;
5546 		for (i = 0; i < outstanding; i++) {
5547 			scrub_icache(how_many);
5548 		}
5549 	} while (atomic_add_32_nv(countp, -outstanding));
5550 
5551 	return (DDI_INTR_CLAIMED);
5552 }
5553 
5554 /*
5555  * Called periodically on each CPU to scrub the E$.
5556  */
5557 static void
5558 scrub_ecache(int how_many)
5559 {
5560 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5561 	int i;
5562 	int cpuid = CPU->cpu_id;
5563 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5564 	int nlines = csmp->chsm_ecache_nlines;
5565 	int linesize = cpunodes[cpuid].ecache_linesize;
5566 	int ec_set_size = cpu_ecache_set_size(CPU);
5567 
5568 	/*
5569 	 * scrub the desired number of lines
5570 	 */
5571 	for (i = 0; i < how_many; i++) {
5572 		/*
5573 		 * scrub the E$ line
5574 		 */
5575 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5576 		    ec_set_size);
5577 
5578 		/*
5579 		 * calculate the next E$ line to scrub based on twice
5580 		 * the number of E$ lines (to displace lines containing
5581 		 * flush area data), assumes that the number of lines
5582 		 * is a power of 2
5583 		 */
5584 		index = (index + 1) & ((nlines << 1) - 1);
5585 	}
5586 
5587 	/*
5588 	 * set the ecache scrub index for the next visit
5589 	 */
5590 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5591 }
5592 
5593 /*
5594  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5595  * we decrement the outstanding request count to zero.
5596  *
5597  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5598  * become negative after the atomic_add_32_nv().  This is not a problem, as
5599  * the next trip around the loop won't scrub anything, and the next add will
5600  * reset the count back to zero.
5601  */
5602 /*ARGSUSED*/
5603 static uint_t
5604 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5605 {
5606 	int i;
5607 	int how_many;
5608 	int outstanding;
5609 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5610 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5611 	struct scrub_info *csi = (struct scrub_info *)arg1;
5612 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5613 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5614 	int ecache_nlines = csmp->chsm_ecache_nlines;
5615 
5616 	/*
5617 	 * The scan rates are expressed in units of tenths of a
5618 	 * percent.  A scan rate of 1000 (100%) means the whole
5619 	 * cache is scanned every second.
5620 	 */
5621 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5622 
5623 	do {
5624 		outstanding = *countp;
5625 		for (i = 0; i < outstanding; i++) {
5626 			scrub_ecache(how_many);
5627 		}
5628 	} while (atomic_add_32_nv(countp, -outstanding));
5629 
5630 	return (DDI_INTR_CLAIMED);
5631 }
5632 
5633 /*
5634  * Timeout function to reenable CE
5635  */
5636 static void
5637 cpu_delayed_check_ce_errors(void *arg)
5638 {
5639 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5640 	    TQ_NOSLEEP)) {
5641 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5642 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5643 	}
5644 }
5645 
5646 /*
5647  * CE Deferred Re-enable after trap.
5648  *
5649  * When the CPU gets a disrupting trap for any of the errors
5650  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5651  * immediately. To eliminate the possibility of multiple CEs causing
5652  * recursive stack overflow in the trap handler, we cannot
5653  * reenable CEEN while still running in the trap handler. Instead,
5654  * after a CE is logged on a CPU, we schedule a timeout function,
5655  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5656  * seconds. This function will check whether any further CEs
5657  * have occurred on that CPU, and if none have, will reenable CEEN.
5658  *
5659  * If further CEs have occurred while CEEN is disabled, another
5660  * timeout will be scheduled. This is to ensure that the CPU can
5661  * make progress in the face of CE 'storms', and that it does not
5662  * spend all its time logging CE errors.
5663  */
5664 static void
5665 cpu_check_ce_errors(void *arg)
5666 {
5667 	int	cpuid = (int)(uintptr_t)arg;
5668 	cpu_t	*cp;
5669 
5670 	/*
5671 	 * We acquire cpu_lock.
5672 	 */
5673 	ASSERT(curthread->t_pil == 0);
5674 
5675 	/*
5676 	 * verify that the cpu is still around, DR
5677 	 * could have got there first ...
5678 	 */
5679 	mutex_enter(&cpu_lock);
5680 	cp = cpu_get(cpuid);
5681 	if (cp == NULL) {
5682 		mutex_exit(&cpu_lock);
5683 		return;
5684 	}
5685 	/*
5686 	 * make sure we don't migrate across CPUs
5687 	 * while checking our CE status.
5688 	 */
5689 	kpreempt_disable();
5690 
5691 	/*
5692 	 * If we are running on the CPU that got the
5693 	 * CE, we can do the checks directly.
5694 	 */
5695 	if (cp->cpu_id == CPU->cpu_id) {
5696 		mutex_exit(&cpu_lock);
5697 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5698 		kpreempt_enable();
5699 		return;
5700 	}
5701 	kpreempt_enable();
5702 
5703 	/*
5704 	 * send an x-call to get the CPU that originally
5705 	 * got the CE to do the necessary checks. If we can't
5706 	 * send the x-call, reschedule the timeout, otherwise we
5707 	 * lose CEEN forever on that CPU.
5708 	 */
5709 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5710 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5711 		    TIMEOUT_CEEN_CHECK, 0);
5712 		mutex_exit(&cpu_lock);
5713 	} else {
5714 		/*
5715 		 * When the CPU is not accepting xcalls, or
5716 		 * the processor is offlined, we don't want to
5717 		 * incur the extra overhead of trying to schedule the
5718 		 * CE timeout indefinitely. However, we don't want to lose
5719 		 * CE checking forever.
5720 		 *
5721 		 * Keep rescheduling the timeout, accepting the additional
5722 		 * overhead as the cost of correctness in the case where we get
5723 		 * a CE, disable CEEN, offline the CPU during the
5724 		 * the timeout interval, and then online it at some
5725 		 * point in the future. This is unlikely given the short
5726 		 * cpu_ceen_delay_secs.
5727 		 */
5728 		mutex_exit(&cpu_lock);
5729 		(void) timeout(cpu_delayed_check_ce_errors,
5730 		    (void *)(uintptr_t)cp->cpu_id,
5731 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5732 	}
5733 }
5734 
5735 /*
5736  * This routine will check whether CEs have occurred while
5737  * CEEN is disabled. Any CEs detected will be logged and, if
5738  * possible, scrubbed.
5739  *
5740  * The memscrubber will also use this routine to clear any errors
5741  * caused by its scrubbing with CEEN disabled.
5742  *
5743  * flag == SCRUBBER_CEEN_CHECK
5744  *		called from memscrubber, just check/scrub, no reset
5745  *		paddr 	physical addr. for start of scrub pages
5746  *		vaddr 	virtual addr. for scrub area
5747  *		psz	page size of area to be scrubbed
5748  *
5749  * flag == TIMEOUT_CEEN_CHECK
5750  *		timeout function has triggered, reset timeout or CEEN
5751  *
5752  * Note: We must not migrate cpus during this function.  This can be
5753  * achieved by one of:
5754  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5755  *	The flag value must be first xcall argument.
5756  *    - disabling kernel preemption.  This should be done for very short
5757  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5758  *	scrub an extended area with cpu_check_block.  The call for
5759  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5760  *	brief for this case.
5761  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5762  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5763  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5764  */
5765 void
5766 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5767 {
5768 	ch_cpu_errors_t	cpu_error_regs;
5769 	uint64_t	ec_err_enable;
5770 	uint64_t	page_offset;
5771 
5772 	/* Read AFSR */
5773 	get_cpu_error_state(&cpu_error_regs);
5774 
5775 	/*
5776 	 * If no CEEN errors have occurred during the timeout
5777 	 * interval, it is safe to re-enable CEEN and exit.
5778 	 */
5779 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5780 		if (flag == TIMEOUT_CEEN_CHECK &&
5781 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5782 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5783 		return;
5784 	}
5785 
5786 	/*
5787 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5788 	 * we log/clear the error.
5789 	 */
5790 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5791 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5792 
5793 	/*
5794 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5795 	 * timeout will be rescheduled when the error is logged.
5796 	 */
5797 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5798 	    cpu_ce_detected(&cpu_error_regs,
5799 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5800 	else
5801 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5802 
5803 	/*
5804 	 * If the memory scrubber runs while CEEN is
5805 	 * disabled, (or if CEEN is disabled during the
5806 	 * scrub as a result of a CE being triggered by
5807 	 * it), the range being scrubbed will not be
5808 	 * completely cleaned. If there are multiple CEs
5809 	 * in the range at most two of these will be dealt
5810 	 * with, (one by the trap handler and one by the
5811 	 * timeout). It is also possible that none are dealt
5812 	 * with, (CEEN disabled and another CE occurs before
5813 	 * the timeout triggers). So to ensure that the
5814 	 * memory is actually scrubbed, we have to access each
5815 	 * memory location in the range and then check whether
5816 	 * that access causes a CE.
5817 	 */
5818 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5819 		if ((cpu_error_regs.afar >= pa) &&
5820 		    (cpu_error_regs.afar < (pa + psz))) {
5821 			/*
5822 			 * Force a load from physical memory for each
5823 			 * 64-byte block, then check AFSR to determine
5824 			 * whether this access caused an error.
5825 			 *
5826 			 * This is a slow way to do a scrub, but as it will
5827 			 * only be invoked when the memory scrubber actually
5828 			 * triggered a CE, it should not happen too
5829 			 * frequently.
5830 			 *
5831 			 * cut down what we need to check as the scrubber
5832 			 * has verified up to AFAR, so get it's offset
5833 			 * into the page and start there.
5834 			 */
5835 			page_offset = (uint64_t)(cpu_error_regs.afar &
5836 			    (psz - 1));
5837 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
5838 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
5839 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
5840 			    psz);
5841 		}
5842 	}
5843 
5844 	/*
5845 	 * Reset error enable if this CE is not masked.
5846 	 */
5847 	if ((flag == TIMEOUT_CEEN_CHECK) &&
5848 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
5849 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
5850 
5851 }
5852 
5853 /*
5854  * Attempt a cpu logout for an error that we did not trap for, such
5855  * as a CE noticed with CEEN off.  It is assumed that we are still running
5856  * on the cpu that took the error and that we cannot migrate.  Returns
5857  * 0 on success, otherwise nonzero.
5858  */
5859 static int
5860 cpu_ce_delayed_ec_logout(uint64_t afar)
5861 {
5862 	ch_cpu_logout_t *clop;
5863 
5864 	if (CPU_PRIVATE(CPU) == NULL)
5865 		return (0);
5866 
5867 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5868 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
5869 	    LOGOUT_INVALID)
5870 		return (0);
5871 
5872 	cpu_delayed_logout(afar, clop);
5873 	return (1);
5874 }
5875 
5876 /*
5877  * We got an error while CEEN was disabled. We
5878  * need to clean up after it and log whatever
5879  * information we have on the CE.
5880  */
5881 void
5882 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
5883 {
5884 	ch_async_flt_t 	ch_flt;
5885 	struct async_flt *aflt;
5886 	char 		pr_reason[MAX_REASON_STRING];
5887 
5888 	bzero(&ch_flt, sizeof (ch_async_flt_t));
5889 	ch_flt.flt_trapped_ce = flag;
5890 	aflt = (struct async_flt *)&ch_flt;
5891 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
5892 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
5893 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5894 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
5895 	aflt->flt_addr = cpu_error_regs->afar;
5896 #if defined(SERRANO)
5897 	ch_flt.afar2 = cpu_error_regs->afar2;
5898 #endif	/* SERRANO */
5899 	aflt->flt_pc = NULL;
5900 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
5901 	aflt->flt_tl = 0;
5902 	aflt->flt_panic = 0;
5903 	cpu_log_and_clear_ce(&ch_flt);
5904 
5905 	/*
5906 	 * check if we caused any errors during cleanup
5907 	 */
5908 	if (clear_errors(&ch_flt)) {
5909 		pr_reason[0] = '\0';
5910 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
5911 		    NULL);
5912 	}
5913 }
5914 
5915 /*
5916  * Log/clear CEEN-controlled disrupting errors
5917  */
5918 static void
5919 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
5920 {
5921 	struct async_flt *aflt;
5922 	uint64_t afsr, afsr_errs;
5923 	ch_cpu_logout_t *clop;
5924 	char 		pr_reason[MAX_REASON_STRING];
5925 	on_trap_data_t	*otp = curthread->t_ontrap;
5926 
5927 	aflt = (struct async_flt *)ch_flt;
5928 	afsr = aflt->flt_stat;
5929 	afsr_errs = ch_flt->afsr_errs;
5930 	aflt->flt_id = gethrtime_waitfree();
5931 	aflt->flt_bus_id = getprocessorid();
5932 	aflt->flt_inst = CPU->cpu_id;
5933 	aflt->flt_prot = AFLT_PROT_NONE;
5934 	aflt->flt_class = CPU_FAULT;
5935 	aflt->flt_status = ECC_C_TRAP;
5936 
5937 	pr_reason[0] = '\0';
5938 	/*
5939 	 * Get the CPU log out info for Disrupting Trap.
5940 	 */
5941 	if (CPU_PRIVATE(CPU) == NULL) {
5942 		clop = NULL;
5943 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
5944 	} else {
5945 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
5946 	}
5947 
5948 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
5949 		ch_cpu_errors_t cpu_error_regs;
5950 
5951 		get_cpu_error_state(&cpu_error_regs);
5952 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
5953 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
5954 		clop->clo_data.chd_afar = cpu_error_regs.afar;
5955 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
5956 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
5957 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
5958 		clop->clo_sdw_data.chd_afsr_ext =
5959 		    cpu_error_regs.shadow_afsr_ext;
5960 #if defined(SERRANO)
5961 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
5962 #endif	/* SERRANO */
5963 		ch_flt->flt_data_incomplete = 1;
5964 
5965 		/*
5966 		 * The logging/clear code expects AFSR/AFAR to be cleared.
5967 		 * The trap handler does it for CEEN enabled errors
5968 		 * so we need to do it here.
5969 		 */
5970 		set_cpu_error_state(&cpu_error_regs);
5971 	}
5972 
5973 #if defined(JALAPENO) || defined(SERRANO)
5974 	/*
5975 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
5976 	 * For Serrano, even thou we do have the AFAR, we still do the
5977 	 * scrub on the RCE side since that's where the error type can
5978 	 * be properly classified as intermittent, persistent, etc.
5979 	 *
5980 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
5981 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5982 	 * the flt_status bits.
5983 	 */
5984 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
5985 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5986 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
5987 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
5988 	}
5989 #else /* JALAPENO || SERRANO */
5990 	/*
5991 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
5992 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
5993 	 * the flt_status bits.
5994 	 */
5995 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
5996 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
5997 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
5998 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
5999 		}
6000 	}
6001 
6002 #endif /* JALAPENO || SERRANO */
6003 
6004 	/*
6005 	 * Update flt_prot if this error occurred under on_trap protection.
6006 	 */
6007 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6008 		aflt->flt_prot = AFLT_PROT_EC;
6009 
6010 	/*
6011 	 * Queue events on the async event queue, one event per error bit.
6012 	 */
6013 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6014 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6015 		ch_flt->flt_type = CPU_INV_AFSR;
6016 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6017 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6018 		    aflt->flt_panic);
6019 	}
6020 
6021 	/*
6022 	 * Zero out + invalidate CPU logout.
6023 	 */
6024 	if (clop) {
6025 		bzero(clop, sizeof (ch_cpu_logout_t));
6026 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6027 	}
6028 
6029 	/*
6030 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6031 	 * was disabled, we need to flush either the entire
6032 	 * E$ or an E$ line.
6033 	 */
6034 #if defined(JALAPENO) || defined(SERRANO)
6035 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6036 #else	/* JALAPENO || SERRANO */
6037 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6038 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6039 #endif	/* JALAPENO || SERRANO */
6040 		cpu_error_ecache_flush(ch_flt);
6041 
6042 }
6043 
6044 /*
6045  * depending on the error type, we determine whether we
6046  * need to flush the entire ecache or just a line.
6047  */
6048 static int
6049 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6050 {
6051 	struct async_flt *aflt;
6052 	uint64_t	afsr;
6053 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6054 
6055 	aflt = (struct async_flt *)ch_flt;
6056 	afsr = aflt->flt_stat;
6057 
6058 	/*
6059 	 * If we got multiple errors, no point in trying
6060 	 * the individual cases, just flush the whole cache
6061 	 */
6062 	if (afsr & C_AFSR_ME) {
6063 		return (ECACHE_FLUSH_ALL);
6064 	}
6065 
6066 	/*
6067 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6068 	 * was disabled, we need to flush entire E$. We can't just
6069 	 * flush the cache line affected as the ME bit
6070 	 * is not set when multiple correctable errors of the same
6071 	 * type occur, so we might have multiple CPC or EDC errors,
6072 	 * with only the first recorded.
6073 	 */
6074 #if defined(JALAPENO) || defined(SERRANO)
6075 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6076 #else	/* JALAPENO || SERRANO */
6077 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6078 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6079 #endif	/* JALAPENO || SERRANO */
6080 		return (ECACHE_FLUSH_ALL);
6081 	}
6082 
6083 #if defined(JALAPENO) || defined(SERRANO)
6084 	/*
6085 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6086 	 * flush the entire Ecache.
6087 	 */
6088 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6089 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6090 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6091 			return (ECACHE_FLUSH_LINE);
6092 		} else {
6093 			return (ECACHE_FLUSH_ALL);
6094 		}
6095 	}
6096 #else /* JALAPENO || SERRANO */
6097 	/*
6098 	 * If UE only is set, flush the Ecache line, otherwise
6099 	 * flush the entire Ecache.
6100 	 */
6101 	if (afsr_errs & C_AFSR_UE) {
6102 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6103 		    C_AFSR_UE) {
6104 			return (ECACHE_FLUSH_LINE);
6105 		} else {
6106 			return (ECACHE_FLUSH_ALL);
6107 		}
6108 	}
6109 #endif /* JALAPENO || SERRANO */
6110 
6111 	/*
6112 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6113 	 * flush the entire Ecache.
6114 	 */
6115 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6116 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6117 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6118 			return (ECACHE_FLUSH_LINE);
6119 		} else {
6120 			return (ECACHE_FLUSH_ALL);
6121 		}
6122 	}
6123 
6124 	/*
6125 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6126 	 * flush the entire Ecache.
6127 	 */
6128 	if (afsr_errs & C_AFSR_BERR) {
6129 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6130 			return (ECACHE_FLUSH_LINE);
6131 		} else {
6132 			return (ECACHE_FLUSH_ALL);
6133 		}
6134 	}
6135 
6136 	return (0);
6137 }
6138 
6139 void
6140 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6141 {
6142 	int	ecache_flush_flag =
6143 	    cpu_error_ecache_flush_required(ch_flt);
6144 
6145 	/*
6146 	 * Flush Ecache line or entire Ecache based on above checks.
6147 	 */
6148 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6149 		cpu_flush_ecache();
6150 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6151 		cpu_flush_ecache_line(ch_flt);
6152 	}
6153 
6154 }
6155 
6156 /*
6157  * Extract the PA portion from the E$ tag.
6158  */
6159 uint64_t
6160 cpu_ectag_to_pa(int setsize, uint64_t tag)
6161 {
6162 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6163 		return (JG_ECTAG_TO_PA(setsize, tag));
6164 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6165 		return (PN_L3TAG_TO_PA(tag));
6166 	else
6167 		return (CH_ECTAG_TO_PA(setsize, tag));
6168 }
6169 
6170 /*
6171  * Convert the E$ tag PA into an E$ subblock index.
6172  */
6173 static int
6174 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6175 {
6176 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6177 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6178 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6179 		/* Panther has only one subblock per line */
6180 		return (0);
6181 	else
6182 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6183 }
6184 
6185 /*
6186  * All subblocks in an E$ line must be invalid for
6187  * the line to be invalid.
6188  */
6189 int
6190 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6191 {
6192 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6193 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6194 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6195 		return (PN_L3_LINE_INVALID(tag));
6196 	else
6197 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6198 }
6199 
6200 /*
6201  * Extract state bits for a subblock given the tag.  Note that for Panther
6202  * this works on both l2 and l3 tags.
6203  */
6204 static int
6205 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6206 {
6207 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6208 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6209 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6210 		return (tag & CH_ECSTATE_MASK);
6211 	else
6212 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6213 }
6214 
6215 /*
6216  * Cpu specific initialization.
6217  */
6218 void
6219 cpu_mp_init(void)
6220 {
6221 #ifdef	CHEETAHPLUS_ERRATUM_25
6222 	if (cheetah_sendmondo_recover) {
6223 		cheetah_nudge_init();
6224 	}
6225 #endif
6226 }
6227 
6228 void
6229 cpu_ereport_post(struct async_flt *aflt)
6230 {
6231 	char *cpu_type, buf[FM_MAX_CLASS];
6232 	nv_alloc_t *nva = NULL;
6233 	nvlist_t *ereport, *detector, *resource;
6234 	errorq_elem_t *eqep;
6235 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6236 	char unum[UNUM_NAMLEN];
6237 	int len = 0;
6238 	uint8_t  msg_type;
6239 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6240 
6241 	if (aflt->flt_panic || panicstr) {
6242 		eqep = errorq_reserve(ereport_errorq);
6243 		if (eqep == NULL)
6244 			return;
6245 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6246 		nva = errorq_elem_nva(ereport_errorq, eqep);
6247 	} else {
6248 		ereport = fm_nvlist_create(nva);
6249 	}
6250 
6251 	/*
6252 	 * Create the scheme "cpu" FMRI.
6253 	 */
6254 	detector = fm_nvlist_create(nva);
6255 	resource = fm_nvlist_create(nva);
6256 	switch (cpunodes[aflt->flt_inst].implementation) {
6257 	case CHEETAH_IMPL:
6258 		cpu_type = FM_EREPORT_CPU_USIII;
6259 		break;
6260 	case CHEETAH_PLUS_IMPL:
6261 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6262 		break;
6263 	case JALAPENO_IMPL:
6264 		cpu_type = FM_EREPORT_CPU_USIIIi;
6265 		break;
6266 	case SERRANO_IMPL:
6267 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6268 		break;
6269 	case JAGUAR_IMPL:
6270 		cpu_type = FM_EREPORT_CPU_USIV;
6271 		break;
6272 	case PANTHER_IMPL:
6273 		cpu_type = FM_EREPORT_CPU_USIVplus;
6274 		break;
6275 	default:
6276 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6277 		break;
6278 	}
6279 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6280 	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
6281 	    cpunodes[aflt->flt_inst].device_id);
6282 
6283 	/*
6284 	 * Encode all the common data into the ereport.
6285 	 */
6286 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6287 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6288 
6289 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6290 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6291 	    detector, NULL);
6292 
6293 	/*
6294 	 * Encode the error specific data that was saved in
6295 	 * the async_flt structure into the ereport.
6296 	 */
6297 	cpu_payload_add_aflt(aflt, ereport, resource,
6298 	    &plat_ecc_ch_flt.ecaf_afar_status,
6299 	    &plat_ecc_ch_flt.ecaf_synd_status);
6300 
6301 	if (aflt->flt_panic || panicstr) {
6302 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6303 	} else {
6304 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6305 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6306 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6307 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6308 	}
6309 	/*
6310 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6311 	 * to the SC olny if it can process it.
6312 	 */
6313 
6314 	if (&plat_ecc_capability_sc_get &&
6315 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6316 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6317 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6318 			/*
6319 			 * If afar status is not invalid do a unum lookup.
6320 			 */
6321 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6322 			    AFLT_STAT_INVALID) {
6323 				(void) cpu_get_mem_unum_aflt(
6324 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6325 				    unum, UNUM_NAMLEN, &len);
6326 			} else {
6327 				unum[0] = '\0';
6328 			}
6329 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6330 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6331 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6332 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6333 			    ch_flt->flt_sdw_afsr_ext;
6334 
6335 			if (&plat_log_fruid_error2)
6336 				plat_log_fruid_error2(msg_type, unum, aflt,
6337 				    &plat_ecc_ch_flt);
6338 		}
6339 	}
6340 }
6341 
6342 void
6343 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6344 {
6345 	int status;
6346 	ddi_fm_error_t de;
6347 
6348 	bzero(&de, sizeof (ddi_fm_error_t));
6349 
6350 	de.fme_version = DDI_FME_VERSION;
6351 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6352 	    FM_ENA_FMT1);
6353 	de.fme_flag = expected;
6354 	de.fme_bus_specific = (void *)aflt->flt_addr;
6355 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6356 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6357 		aflt->flt_panic = 1;
6358 }
6359 
6360 void
6361 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6362     errorq_t *eqp, uint_t flag)
6363 {
6364 	struct async_flt *aflt = (struct async_flt *)payload;
6365 
6366 	aflt->flt_erpt_class = error_class;
6367 	errorq_dispatch(eqp, payload, payload_sz, flag);
6368 }
6369 
6370 /*
6371  * This routine may be called by the IO module, but does not do
6372  * anything in this cpu module. The SERD algorithm is handled by
6373  * cpumem-diagnosis engine instead.
6374  */
6375 /*ARGSUSED*/
6376 void
6377 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6378 {}
6379 
6380 void
6381 adjust_hw_copy_limits(int ecache_size)
6382 {
6383 	/*
6384 	 * Set hw copy limits.
6385 	 *
6386 	 * /etc/system will be parsed later and can override one or more
6387 	 * of these settings.
6388 	 *
6389 	 * At this time, ecache size seems only mildly relevant.
6390 	 * We seem to run into issues with the d-cache and stalls
6391 	 * we see on misses.
6392 	 *
6393 	 * Cycle measurement indicates that 2 byte aligned copies fare
6394 	 * little better than doing things with VIS at around 512 bytes.
6395 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6396 	 * aligned is faster whenever the source and destination data
6397 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6398 	 * limit seems to be driven by the 2K write cache.
6399 	 * When more than 2K of copies are done in non-VIS mode, stores
6400 	 * backup in the write cache.  In VIS mode, the write cache is
6401 	 * bypassed, allowing faster cache-line writes aligned on cache
6402 	 * boundaries.
6403 	 *
6404 	 * In addition, in non-VIS mode, there is no prefetching, so
6405 	 * for larger copies, the advantage of prefetching to avoid even
6406 	 * occasional cache misses is enough to justify using the VIS code.
6407 	 *
6408 	 * During testing, it was discovered that netbench ran 3% slower
6409 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6410 	 * applications, data is only used once (copied to the output
6411 	 * buffer, then copied by the network device off the system).  Using
6412 	 * the VIS copy saves more L2 cache state.  Network copies are
6413 	 * around 1.3K to 1.5K in size for historical reasons.
6414 	 *
6415 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6416 	 * aligned copy even for large caches and 8 MB ecache.  The
6417 	 * infrastructure to allow different limits for different sized
6418 	 * caches is kept to allow further tuning in later releases.
6419 	 */
6420 
6421 	if (min_ecache_size == 0 && use_hw_bcopy) {
6422 		/*
6423 		 * First time through - should be before /etc/system
6424 		 * is read.
6425 		 * Could skip the checks for zero but this lets us
6426 		 * preserve any debugger rewrites.
6427 		 */
6428 		if (hw_copy_limit_1 == 0) {
6429 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6430 			priv_hcl_1 = hw_copy_limit_1;
6431 		}
6432 		if (hw_copy_limit_2 == 0) {
6433 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6434 			priv_hcl_2 = hw_copy_limit_2;
6435 		}
6436 		if (hw_copy_limit_4 == 0) {
6437 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6438 			priv_hcl_4 = hw_copy_limit_4;
6439 		}
6440 		if (hw_copy_limit_8 == 0) {
6441 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6442 			priv_hcl_8 = hw_copy_limit_8;
6443 		}
6444 		min_ecache_size = ecache_size;
6445 	} else {
6446 		/*
6447 		 * MP initialization. Called *after* /etc/system has
6448 		 * been parsed. One CPU has already been initialized.
6449 		 * Need to cater for /etc/system having scragged one
6450 		 * of our values.
6451 		 */
6452 		if (ecache_size == min_ecache_size) {
6453 			/*
6454 			 * Same size ecache. We do nothing unless we
6455 			 * have a pessimistic ecache setting. In that
6456 			 * case we become more optimistic (if the cache is
6457 			 * large enough).
6458 			 */
6459 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6460 				/*
6461 				 * Need to adjust hw_copy_limit* from our
6462 				 * pessimistic uniprocessor value to a more
6463 				 * optimistic UP value *iff* it hasn't been
6464 				 * reset.
6465 				 */
6466 				if ((ecache_size > 1048576) &&
6467 				    (priv_hcl_8 == hw_copy_limit_8)) {
6468 					if (ecache_size <= 2097152)
6469 						hw_copy_limit_8 = 4 *
6470 						    VIS_COPY_THRESHOLD;
6471 					else if (ecache_size <= 4194304)
6472 						hw_copy_limit_8 = 4 *
6473 						    VIS_COPY_THRESHOLD;
6474 					else
6475 						hw_copy_limit_8 = 4 *
6476 						    VIS_COPY_THRESHOLD;
6477 					priv_hcl_8 = hw_copy_limit_8;
6478 				}
6479 			}
6480 		} else if (ecache_size < min_ecache_size) {
6481 			/*
6482 			 * A different ecache size. Can this even happen?
6483 			 */
6484 			if (priv_hcl_8 == hw_copy_limit_8) {
6485 				/*
6486 				 * The previous value that we set
6487 				 * is unchanged (i.e., it hasn't been
6488 				 * scragged by /etc/system). Rewrite it.
6489 				 */
6490 				if (ecache_size <= 1048576)
6491 					hw_copy_limit_8 = 8 *
6492 					    VIS_COPY_THRESHOLD;
6493 				else if (ecache_size <= 2097152)
6494 					hw_copy_limit_8 = 8 *
6495 					    VIS_COPY_THRESHOLD;
6496 				else if (ecache_size <= 4194304)
6497 					hw_copy_limit_8 = 8 *
6498 					    VIS_COPY_THRESHOLD;
6499 				else
6500 					hw_copy_limit_8 = 10 *
6501 					    VIS_COPY_THRESHOLD;
6502 				priv_hcl_8 = hw_copy_limit_8;
6503 				min_ecache_size = ecache_size;
6504 			}
6505 		}
6506 	}
6507 }
6508 
6509 /*
6510  * Called from illegal instruction trap handler to see if we can attribute
6511  * the trap to a fpras check.
6512  */
6513 int
6514 fpras_chktrap(struct regs *rp)
6515 {
6516 	int op;
6517 	struct fpras_chkfngrp *cgp;
6518 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6519 
6520 	if (fpras_chkfngrps == NULL)
6521 		return (0);
6522 
6523 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6524 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6525 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6526 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6527 			break;
6528 	}
6529 	if (op == FPRAS_NCOPYOPS)
6530 		return (0);
6531 
6532 	/*
6533 	 * This is an fpRAS failure caught through an illegal
6534 	 * instruction - trampoline.
6535 	 */
6536 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6537 	rp->r_npc = rp->r_pc + 4;
6538 	return (1);
6539 }
6540 
6541 /*
6542  * fpras_failure is called when a fpras check detects a bad calculation
6543  * result or an illegal instruction trap is attributed to an fpras
6544  * check.  In all cases we are still bound to CPU.
6545  */
6546 int
6547 fpras_failure(int op, int how)
6548 {
6549 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6550 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6551 	ch_async_flt_t ch_flt;
6552 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6553 	struct fpras_chkfn *sfp, *cfp;
6554 	uint32_t *sip, *cip;
6555 	int i;
6556 
6557 	/*
6558 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6559 	 * the time in which we dispatch an ereport and (if applicable) panic.
6560 	 */
6561 	use_hw_bcopy_orig = use_hw_bcopy;
6562 	use_hw_bzero_orig = use_hw_bzero;
6563 	hcl1_orig = hw_copy_limit_1;
6564 	hcl2_orig = hw_copy_limit_2;
6565 	hcl4_orig = hw_copy_limit_4;
6566 	hcl8_orig = hw_copy_limit_8;
6567 	use_hw_bcopy = use_hw_bzero = 0;
6568 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6569 	    hw_copy_limit_8 = 0;
6570 
6571 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6572 	aflt->flt_id = gethrtime_waitfree();
6573 	aflt->flt_class = CPU_FAULT;
6574 	aflt->flt_inst = CPU->cpu_id;
6575 	aflt->flt_status = (how << 8) | op;
6576 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6577 	ch_flt.flt_type = CPU_FPUERR;
6578 
6579 	/*
6580 	 * We must panic if the copy operation had no lofault protection -
6581 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6582 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6583 	 */
6584 	aflt->flt_panic = (curthread->t_lofault == NULL);
6585 
6586 	/*
6587 	 * XOR the source instruction block with the copied instruction
6588 	 * block - this will show us which bit(s) are corrupted.
6589 	 */
6590 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6591 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6592 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6593 		sip = &sfp->fpras_blk0[0];
6594 		cip = &cfp->fpras_blk0[0];
6595 	} else {
6596 		sip = &sfp->fpras_blk1[0];
6597 		cip = &cfp->fpras_blk1[0];
6598 	}
6599 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6600 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6601 
6602 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6603 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6604 
6605 	if (aflt->flt_panic)
6606 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6607 
6608 	/*
6609 	 * We get here for copyin/copyout and kcopy or bcopy where the
6610 	 * caller has used on_fault.  We will flag the error so that
6611 	 * the process may be killed  The trap_async_hwerr mechanism will
6612 	 * take appropriate further action (such as a reboot, contract
6613 	 * notification etc).  Since we may be continuing we will
6614 	 * restore the global hardware copy acceleration switches.
6615 	 *
6616 	 * When we return from this function to the copy function we want to
6617 	 * avoid potentially bad data being used, ie we want the affected
6618 	 * copy function to return an error.  The caller should therefore
6619 	 * invoke its lofault handler (which always exists for these functions)
6620 	 * which will return the appropriate error.
6621 	 */
6622 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6623 	aston(curthread);
6624 
6625 	use_hw_bcopy = use_hw_bcopy_orig;
6626 	use_hw_bzero = use_hw_bzero_orig;
6627 	hw_copy_limit_1 = hcl1_orig;
6628 	hw_copy_limit_2 = hcl2_orig;
6629 	hw_copy_limit_4 = hcl4_orig;
6630 	hw_copy_limit_8 = hcl8_orig;
6631 
6632 	return (1);
6633 }
6634 
6635 #define	VIS_BLOCKSIZE		64
6636 
6637 int
6638 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6639 {
6640 	int ret, watched;
6641 
6642 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6643 	ret = dtrace_blksuword32(addr, data, 0);
6644 	if (watched)
6645 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6646 
6647 	return (ret);
6648 }
6649 
6650 /*
6651  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6652  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6653  * CEEN from the EER to disable traps for further disrupting error types
6654  * on that cpu.  We could cross-call instead, but that has a larger
6655  * instruction and data footprint than cross-trapping, and the cpu is known
6656  * to be faulted.
6657  */
6658 
6659 void
6660 cpu_faulted_enter(struct cpu *cp)
6661 {
6662 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6663 }
6664 
6665 /*
6666  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6667  * offline, spare, or online (by the cpu requesting this state change).
6668  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6669  * disrupting error bits that have accumulated without trapping, then
6670  * we cross-trap to re-enable CEEN controlled traps.
6671  */
6672 void
6673 cpu_faulted_exit(struct cpu *cp)
6674 {
6675 	ch_cpu_errors_t cpu_error_regs;
6676 
6677 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6678 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6679 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6680 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6681 	    (uint64_t)&cpu_error_regs, 0);
6682 
6683 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6684 }
6685 
6686 /*
6687  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6688  * the errors in the original AFSR, 0 otherwise.
6689  *
6690  * For all procs if the initial error was a BERR or TO, then it is possible
6691  * that we may have caused a secondary BERR or TO in the process of logging the
6692  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6693  * if the request was protected then a panic is still not necessary, if not
6694  * protected then aft_panic is already set - so either way there's no need
6695  * to set aft_panic for the secondary error.
6696  *
6697  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6698  * a store merge, then the error handling code will call cpu_deferred_error().
6699  * When clear_errors() is called, it will determine that secondary errors have
6700  * occurred - in particular, the store merge also caused a EDU and WDU that
6701  * weren't discovered until this point.
6702  *
6703  * We do three checks to verify that we are in this case.  If we pass all three
6704  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6705  * errors occur, we return 0.
6706  *
6707  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6708  * handled in cpu_disrupting_errors().  Since this function is not even called
6709  * in the case we are interested in, we just return 0 for these processors.
6710  */
6711 /*ARGSUSED*/
6712 static int
6713 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6714     uint64_t t_afar)
6715 {
6716 #if defined(CHEETAH_PLUS)
6717 #else	/* CHEETAH_PLUS */
6718 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6719 #endif	/* CHEETAH_PLUS */
6720 
6721 	/*
6722 	 * Was the original error a BERR or TO and only a BERR or TO
6723 	 * (multiple errors are also OK)
6724 	 */
6725 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6726 		/*
6727 		 * Is the new error a BERR or TO and only a BERR or TO
6728 		 * (multiple errors are also OK)
6729 		 */
6730 		if ((ch_flt->afsr_errs &
6731 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6732 			return (1);
6733 	}
6734 
6735 #if defined(CHEETAH_PLUS)
6736 	return (0);
6737 #else	/* CHEETAH_PLUS */
6738 	/*
6739 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6740 	 *
6741 	 * Check the original error was a UE, and only a UE.  Note that
6742 	 * the ME bit will cause us to fail this check.
6743 	 */
6744 	if (t_afsr_errs != C_AFSR_UE)
6745 		return (0);
6746 
6747 	/*
6748 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6749 	 */
6750 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6751 		return (0);
6752 
6753 	/*
6754 	 * Check the AFAR of the original error and secondary errors
6755 	 * match to the 64-byte boundary
6756 	 */
6757 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6758 		return (0);
6759 
6760 	/*
6761 	 * We've passed all the checks, so it's a secondary error!
6762 	 */
6763 	return (1);
6764 #endif	/* CHEETAH_PLUS */
6765 }
6766 
6767 /*
6768  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6769  * is checked for any valid errors.  If found, the error type is
6770  * returned. If not found, the flt_type is checked for L1$ parity errors.
6771  */
6772 /*ARGSUSED*/
6773 static uint8_t
6774 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6775 {
6776 #if defined(JALAPENO)
6777 	/*
6778 	 * Currently, logging errors to the SC is not supported on Jalapeno
6779 	 */
6780 	return (PLAT_ECC_ERROR2_NONE);
6781 #else
6782 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6783 
6784 	switch (ch_flt->flt_bit) {
6785 	case C_AFSR_CE:
6786 		return (PLAT_ECC_ERROR2_CE);
6787 	case C_AFSR_UCC:
6788 	case C_AFSR_EDC:
6789 	case C_AFSR_WDC:
6790 	case C_AFSR_CPC:
6791 		return (PLAT_ECC_ERROR2_L2_CE);
6792 	case C_AFSR_EMC:
6793 		return (PLAT_ECC_ERROR2_EMC);
6794 	case C_AFSR_IVC:
6795 		return (PLAT_ECC_ERROR2_IVC);
6796 	case C_AFSR_UE:
6797 		return (PLAT_ECC_ERROR2_UE);
6798 	case C_AFSR_UCU:
6799 	case C_AFSR_EDU:
6800 	case C_AFSR_WDU:
6801 	case C_AFSR_CPU:
6802 		return (PLAT_ECC_ERROR2_L2_UE);
6803 	case C_AFSR_IVU:
6804 		return (PLAT_ECC_ERROR2_IVU);
6805 	case C_AFSR_TO:
6806 		return (PLAT_ECC_ERROR2_TO);
6807 	case C_AFSR_BERR:
6808 		return (PLAT_ECC_ERROR2_BERR);
6809 #if defined(CHEETAH_PLUS)
6810 	case C_AFSR_L3_EDC:
6811 	case C_AFSR_L3_UCC:
6812 	case C_AFSR_L3_CPC:
6813 	case C_AFSR_L3_WDC:
6814 		return (PLAT_ECC_ERROR2_L3_CE);
6815 	case C_AFSR_IMC:
6816 		return (PLAT_ECC_ERROR2_IMC);
6817 	case C_AFSR_TSCE:
6818 		return (PLAT_ECC_ERROR2_L2_TSCE);
6819 	case C_AFSR_THCE:
6820 		return (PLAT_ECC_ERROR2_L2_THCE);
6821 	case C_AFSR_L3_MECC:
6822 		return (PLAT_ECC_ERROR2_L3_MECC);
6823 	case C_AFSR_L3_THCE:
6824 		return (PLAT_ECC_ERROR2_L3_THCE);
6825 	case C_AFSR_L3_CPU:
6826 	case C_AFSR_L3_EDU:
6827 	case C_AFSR_L3_UCU:
6828 	case C_AFSR_L3_WDU:
6829 		return (PLAT_ECC_ERROR2_L3_UE);
6830 	case C_AFSR_DUE:
6831 		return (PLAT_ECC_ERROR2_DUE);
6832 	case C_AFSR_DTO:
6833 		return (PLAT_ECC_ERROR2_DTO);
6834 	case C_AFSR_DBERR:
6835 		return (PLAT_ECC_ERROR2_DBERR);
6836 #endif	/* CHEETAH_PLUS */
6837 	default:
6838 		switch (ch_flt->flt_type) {
6839 #if defined(CPU_IMP_L1_CACHE_PARITY)
6840 		case CPU_IC_PARITY:
6841 			return (PLAT_ECC_ERROR2_IPE);
6842 		case CPU_DC_PARITY:
6843 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
6844 				if (ch_flt->parity_data.dpe.cpl_cache ==
6845 				    CPU_PC_PARITY) {
6846 					return (PLAT_ECC_ERROR2_PCACHE);
6847 				}
6848 			}
6849 			return (PLAT_ECC_ERROR2_DPE);
6850 #endif /* CPU_IMP_L1_CACHE_PARITY */
6851 		case CPU_ITLB_PARITY:
6852 			return (PLAT_ECC_ERROR2_ITLB);
6853 		case CPU_DTLB_PARITY:
6854 			return (PLAT_ECC_ERROR2_DTLB);
6855 		default:
6856 			return (PLAT_ECC_ERROR2_NONE);
6857 		}
6858 	}
6859 #endif	/* JALAPENO */
6860 }
6861