xref: /titanic_50/usr/src/uts/sun4u/cpu/us3_common.c (revision ae347574c7f17d33bb822cb146d7f67c88ab1f68)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 
79 #ifdef	CHEETAHPLUS_ERRATUM_25
80 #include <sys/xc_impl.h>
81 #endif	/* CHEETAHPLUS_ERRATUM_25 */
82 
83 /*
84  * Note that 'Cheetah PRM' refers to:
85  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
86  */
87 
88 /*
89  * Per CPU pointers to physical address of TL>0 logout data areas.
90  * These pointers have to be in the kernel nucleus to avoid MMU
91  * misses.
92  */
93 uint64_t ch_err_tl1_paddrs[NCPU];
94 
95 /*
96  * One statically allocated structure to use during startup/DR
97  * to prevent unnecessary panics.
98  */
99 ch_err_tl1_data_t ch_err_tl1_data;
100 
101 /*
102  * Per CPU pending error at TL>0, used by level15 softint handler
103  */
104 uchar_t ch_err_tl1_pending[NCPU];
105 
106 /*
107  * For deferred CE re-enable after trap.
108  */
109 taskq_t		*ch_check_ce_tq;
110 
111 /*
112  * Internal functions.
113  */
114 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
115 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
116 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
117     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
118 static int clear_ecc(struct async_flt *ecc);
119 #if defined(CPU_IMP_ECACHE_ASSOC)
120 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
121 #endif
122 static int cpu_ecache_set_size(struct cpu *cp);
123 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
124 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
125 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
126 static int cpu_ectag_pa_to_subblk_state(int cachesize,
127 				uint64_t subaddr, uint64_t tag);
128 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
129 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
130 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
133 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
134 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
135 static void cpu_scrubphys(struct async_flt *aflt);
136 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
137     int *, int *);
138 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
139 static void cpu_ereport_init(struct async_flt *aflt);
140 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
141 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
142 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
143     uint64_t nceen, ch_cpu_logout_t *clop);
144 static int cpu_ce_delayed_ec_logout(uint64_t);
145 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
146 static int cpu_error_is_ecache_data(int, uint64_t);
147 static void cpu_fmri_cpu_set(nvlist_t *, int);
148 static int cpu_error_to_resource_type(struct async_flt *aflt);
149 
150 #ifdef	CHEETAHPLUS_ERRATUM_25
151 static int mondo_recover_proc(uint16_t, int);
152 static void cheetah_nudge_init(void);
153 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
154     cyc_time_t *when);
155 static void cheetah_nudge_buddy(void);
156 #endif	/* CHEETAHPLUS_ERRATUM_25 */
157 
158 #if defined(CPU_IMP_L1_CACHE_PARITY)
159 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
160 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
161 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
162     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
163 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
164 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
165 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
166 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
167 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
168 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
169 #endif	/* CPU_IMP_L1_CACHE_PARITY */
170 
171 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
172     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
173     int *segsp, int *banksp, int *mcidp);
174 
175 /*
176  * This table is used to determine which bit(s) is(are) bad when an ECC
177  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
178  * of this array have the following semantics:
179  *
180  *      00-127  The number of the bad bit, when only one bit is bad.
181  *      128     ECC bit C0 is bad.
182  *      129     ECC bit C1 is bad.
183  *      130     ECC bit C2 is bad.
184  *      131     ECC bit C3 is bad.
185  *      132     ECC bit C4 is bad.
186  *      133     ECC bit C5 is bad.
187  *      134     ECC bit C6 is bad.
188  *      135     ECC bit C7 is bad.
189  *      136     ECC bit C8 is bad.
190  *	137-143 reserved for Mtag Data and ECC.
191  *      144(M2) Two bits are bad within a nibble.
192  *      145(M3) Three bits are bad within a nibble.
193  *      146(M3) Four bits are bad within a nibble.
194  *      147(M)  Multiple bits (5 or more) are bad.
195  *      148     NO bits are bad.
196  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
197  */
198 
199 #define	C0	128
200 #define	C1	129
201 #define	C2	130
202 #define	C3	131
203 #define	C4	132
204 #define	C5	133
205 #define	C6	134
206 #define	C7	135
207 #define	C8	136
208 #define	MT0	137	/* Mtag Data bit 0 */
209 #define	MT1	138
210 #define	MT2	139
211 #define	MTC0	140	/* Mtag Check bit 0 */
212 #define	MTC1	141
213 #define	MTC2	142
214 #define	MTC3	143
215 #define	M2	144
216 #define	M3	145
217 #define	M4	146
218 #define	M	147
219 #define	NA	148
220 #if defined(JALAPENO) || defined(SERRANO)
221 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
222 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
223 #define	SLAST	S003MEM	/* last special syndrome */
224 #else /* JALAPENO || SERRANO */
225 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
226 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
227 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
228 #define	SLAST	S11C	/* last special syndrome */
229 #endif /* JALAPENO || SERRANO */
230 #if defined(JALAPENO) || defined(SERRANO)
231 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
232 #define	BPAR15	167
233 #endif	/* JALAPENO || SERRANO */
234 
235 static uint8_t ecc_syndrome_tab[] =
236 {
237 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
238 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
239 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
240 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
241 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
242 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
243 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
244 #if defined(JALAPENO) || defined(SERRANO)
245 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
246 #else	/* JALAPENO || SERRANO */
247 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
248 #endif	/* JALAPENO || SERRANO */
249 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
250 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
251 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
252 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
253 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
254 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
255 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
256 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
257 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
258 #if defined(JALAPENO) || defined(SERRANO)
259 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
260 #else	/* JALAPENO || SERRANO */
261 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
262 #endif	/* JALAPENO || SERRANO */
263 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
264 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
265 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
266 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
267 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
268 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
269 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
270 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
271 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
272 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
273 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
274 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
275 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
276 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
277 };
278 
279 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
280 
281 #if !(defined(JALAPENO) || defined(SERRANO))
282 /*
283  * This table is used to determine which bit(s) is(are) bad when a Mtag
284  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
285  * of this array have the following semantics:
286  *
287  *      -1	Invalid mtag syndrome.
288  *      137     Mtag Data 0 is bad.
289  *      138     Mtag Data 1 is bad.
290  *      139     Mtag Data 2 is bad.
291  *      140     Mtag ECC 0 is bad.
292  *      141     Mtag ECC 1 is bad.
293  *      142     Mtag ECC 2 is bad.
294  *      143     Mtag ECC 3 is bad.
295  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
296  */
297 short mtag_syndrome_tab[] =
298 {
299 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
300 };
301 
302 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
303 
304 #else /* !(JALAPENO || SERRANO) */
305 
306 #define	BSYND_TBL_SIZE	16
307 
308 #endif /* !(JALAPENO || SERRANO) */
309 
310 /*
311  * Types returned from cpu_error_to_resource_type()
312  */
313 #define	ERRTYPE_UNKNOWN		0
314 #define	ERRTYPE_CPU		1
315 #define	ERRTYPE_MEMORY		2
316 #define	ERRTYPE_ECACHE_DATA	3
317 
318 /*
319  * CE initial classification and subsequent action lookup table
320  */
321 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
322 static int ce_disp_inited;
323 
324 /*
325  * Set to disable leaky and partner check for memory correctables
326  */
327 int ce_xdiag_off;
328 
329 /*
330  * The following are not incremented atomically so are indicative only
331  */
332 static int ce_xdiag_drops;
333 static int ce_xdiag_lkydrops;
334 static int ce_xdiag_ptnrdrops;
335 static int ce_xdiag_bad;
336 
337 /*
338  * CE leaky check callback structure
339  */
340 typedef struct {
341 	struct async_flt *lkycb_aflt;
342 	errorq_t *lkycb_eqp;
343 	errorq_elem_t *lkycb_eqep;
344 } ce_lkychk_cb_t;
345 
346 /*
347  * defines for various ecache_flush_flag's
348  */
349 #define	ECACHE_FLUSH_LINE	1
350 #define	ECACHE_FLUSH_ALL	2
351 
352 /*
353  * STICK sync
354  */
355 #define	STICK_ITERATION 10
356 #define	MAX_TSKEW	1
357 #define	EV_A_START	0
358 #define	EV_A_END	1
359 #define	EV_B_START	2
360 #define	EV_B_END	3
361 #define	EVENTS		4
362 
363 static int64_t stick_iter = STICK_ITERATION;
364 static int64_t stick_tsk = MAX_TSKEW;
365 
366 typedef enum {
367 	EVENT_NULL = 0,
368 	SLAVE_START,
369 	SLAVE_CONT,
370 	MASTER_START
371 } event_cmd_t;
372 
373 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
374 static int64_t timestamp[EVENTS];
375 static volatile int slave_done;
376 
377 #ifdef DEBUG
378 #define	DSYNC_ATTEMPTS 64
379 typedef struct {
380 	int64_t	skew_val[DSYNC_ATTEMPTS];
381 } ss_t;
382 
383 ss_t stick_sync_stats[NCPU];
384 #endif /* DEBUG */
385 
386 uint_t cpu_impl_dual_pgsz = 0;
387 #if defined(CPU_IMP_DUAL_PAGESIZE)
388 uint_t disable_dual_pgsz = 0;
389 #endif	/* CPU_IMP_DUAL_PAGESIZE */
390 
391 /*
392  * Save the cache bootup state for use when internal
393  * caches are to be re-enabled after an error occurs.
394  */
395 uint64_t cache_boot_state;
396 
397 /*
398  * PA[22:0] represent Displacement in Safari configuration space.
399  */
400 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
401 
402 bus_config_eclk_t bus_config_eclk[] = {
403 #if defined(JALAPENO) || defined(SERRANO)
404 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
405 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
406 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
407 #else /* JALAPENO || SERRANO */
408 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
409 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
410 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
411 #endif /* JALAPENO || SERRANO */
412 	{0, 0}
413 };
414 
415 /*
416  * Interval for deferred CEEN reenable
417  */
418 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
419 
420 /*
421  * set in /etc/system to control logging of user BERR/TO's
422  */
423 int cpu_berr_to_verbose = 0;
424 
425 /*
426  * set to 0 in /etc/system to defer CEEN reenable for all CEs
427  */
428 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
429 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
430 
431 /*
432  * Set of all offline cpus
433  */
434 cpuset_t cpu_offline_set;
435 
436 static void cpu_delayed_check_ce_errors(void *);
437 static void cpu_check_ce_errors(void *);
438 void cpu_error_ecache_flush(ch_async_flt_t *);
439 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
440 static void cpu_log_and_clear_ce(ch_async_flt_t *);
441 void cpu_ce_detected(ch_cpu_errors_t *, int);
442 
443 /*
444  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
445  * memory refresh interval of current DIMMs (64ms).  After initial fix that
446  * gives at least one full refresh cycle in which the cell can leak
447  * (whereafter further refreshes simply reinforce any incorrect bit value).
448  */
449 clock_t cpu_ce_lkychk_timeout_usec = 128000;
450 
451 /*
452  * CE partner check partner caching period in seconds
453  */
454 int cpu_ce_ptnr_cachetime_sec = 60;
455 
456 /*
457  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
458  */
459 #define	CH_SET_TRAP(ttentry, ttlabel)			\
460 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
461 		flush_instr_mem((caddr_t)&ttentry, 32);
462 
463 static int min_ecache_size;
464 static uint_t priv_hcl_1;
465 static uint_t priv_hcl_2;
466 static uint_t priv_hcl_4;
467 static uint_t priv_hcl_8;
468 
469 void
470 cpu_setup(void)
471 {
472 	extern int at_flags;
473 	extern int disable_delay_tlb_flush, delay_tlb_flush;
474 	extern int cpc_has_overflow_intr;
475 	extern int disable_text_largepages;
476 	extern int use_text_pgsz4m;
477 
478 	/*
479 	 * Setup chip-specific trap handlers.
480 	 */
481 	cpu_init_trap();
482 
483 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
484 
485 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
486 
487 	/*
488 	 * save the cache bootup state.
489 	 */
490 	cache_boot_state = get_dcu() & DCU_CACHE;
491 
492 	/*
493 	 * Due to the number of entries in the fully-associative tlb
494 	 * this may have to be tuned lower than in spitfire.
495 	 */
496 	pp_slots = MIN(8, MAXPP_SLOTS);
497 
498 	/*
499 	 * Block stores do not invalidate all pages of the d$, pagecopy
500 	 * et. al. need virtual translations with virtual coloring taken
501 	 * into consideration.  prefetch/ldd will pollute the d$ on the
502 	 * load side.
503 	 */
504 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
505 
506 	if (use_page_coloring) {
507 		do_pg_coloring = 1;
508 		if (use_virtual_coloring)
509 			do_virtual_coloring = 1;
510 	}
511 
512 	isa_list =
513 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
514 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
515 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
516 
517 	/*
518 	 * On Panther-based machines, this should
519 	 * also include AV_SPARC_POPC too
520 	 */
521 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
522 
523 	/*
524 	 * On cheetah, there's no hole in the virtual address space
525 	 */
526 	hole_start = hole_end = 0;
527 
528 	/*
529 	 * The kpm mapping window.
530 	 * kpm_size:
531 	 *	The size of a single kpm range.
532 	 *	The overall size will be: kpm_size * vac_colors.
533 	 * kpm_vbase:
534 	 *	The virtual start address of the kpm range within the kernel
535 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
536 	 */
537 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
538 	kpm_size_shift = 43;
539 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
540 	kpm_smallpages = 1;
541 
542 	/*
543 	 * The traptrace code uses either %tick or %stick for
544 	 * timestamping.  We have %stick so we can use it.
545 	 */
546 	traptrace_use_stick = 1;
547 
548 	/*
549 	 * Cheetah has a performance counter overflow interrupt
550 	 */
551 	cpc_has_overflow_intr = 1;
552 
553 	/*
554 	 * Use cheetah flush-all support
555 	 */
556 	if (!disable_delay_tlb_flush)
557 		delay_tlb_flush = 1;
558 
559 #if defined(CPU_IMP_DUAL_PAGESIZE)
560 	/*
561 	 * Use Cheetah+ and later dual page size support.
562 	 */
563 	if (!disable_dual_pgsz) {
564 		cpu_impl_dual_pgsz = 1;
565 	}
566 #endif	/* CPU_IMP_DUAL_PAGESIZE */
567 
568 	/*
569 	 * Declare that this architecture/cpu combination does fpRAS.
570 	 */
571 	fpras_implemented = 1;
572 
573 	/*
574 	 * Enable 4M pages to be used for mapping user text by default.  Don't
575 	 * use large pages for initialized data segments since we may not know
576 	 * at exec() time what should be the preferred large page size for DTLB
577 	 * programming.
578 	 */
579 	use_text_pgsz4m = 1;
580 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
581 	    (1 << TTE32M) | (1 << TTE256M);
582 
583 	/*
584 	 * Setup CE lookup table
585 	 */
586 	CE_INITDISPTBL_POPULATE(ce_disp_table);
587 	ce_disp_inited = 1;
588 }
589 
590 /*
591  * Called by setcpudelay
592  */
593 void
594 cpu_init_tick_freq(void)
595 {
596 	/*
597 	 * For UltraSPARC III and beyond we want to use the
598 	 * system clock rate as the basis for low level timing,
599 	 * due to support of mixed speed CPUs and power managment.
600 	 */
601 	if (system_clock_freq == 0)
602 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
603 
604 	sys_tick_freq = system_clock_freq;
605 }
606 
607 #ifdef CHEETAHPLUS_ERRATUM_25
608 /*
609  * Tunables
610  */
611 int cheetah_bpe_off = 0;
612 int cheetah_sendmondo_recover = 1;
613 int cheetah_sendmondo_fullscan = 0;
614 int cheetah_sendmondo_recover_delay = 5;
615 
616 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
617 
618 /*
619  * Recovery Statistics
620  */
621 typedef struct cheetah_livelock_entry	{
622 	int cpuid;		/* fallen cpu */
623 	int buddy;		/* cpu that ran recovery */
624 	clock_t lbolt;		/* when recovery started */
625 	hrtime_t recovery_time;	/* time spent in recovery */
626 } cheetah_livelock_entry_t;
627 
628 #define	CHEETAH_LIVELOCK_NENTRY	32
629 
630 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
631 int cheetah_livelock_entry_nxt;
632 
633 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
634 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
635 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
636 		cheetah_livelock_entry_nxt = 0;				\
637 	}								\
638 }
639 
640 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
641 
642 struct {
643 	hrtime_t hrt;		/* maximum recovery time */
644 	int recovery;		/* recovered */
645 	int full_claimed;	/* maximum pages claimed in full recovery */
646 	int proc_entry;		/* attempted to claim TSB */
647 	int proc_tsb_scan;	/* tsb scanned */
648 	int proc_tsb_partscan;	/* tsb partially scanned */
649 	int proc_tsb_fullscan;	/* whole tsb scanned */
650 	int proc_claimed;	/* maximum pages claimed in tsb scan */
651 	int proc_user;		/* user thread */
652 	int proc_kernel;	/* kernel thread */
653 	int proc_onflt;		/* bad stack */
654 	int proc_cpu;		/* null cpu */
655 	int proc_thread;	/* null thread */
656 	int proc_proc;		/* null proc */
657 	int proc_as;		/* null as */
658 	int proc_hat;		/* null hat */
659 	int proc_hat_inval;	/* hat contents don't make sense */
660 	int proc_hat_busy;	/* hat is changing TSBs */
661 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
662 	int proc_cnum_bad;	/* cnum out of range */
663 	int proc_cnum;		/* last cnum processed */
664 	tte_t proc_tte;		/* last tte processed */
665 } cheetah_livelock_stat;
666 
667 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
668 
669 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
670 	cheetah_livelock_stat.item = value
671 
672 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
673 	if (value > cheetah_livelock_stat.item)		\
674 		cheetah_livelock_stat.item = value;	\
675 }
676 
677 /*
678  * Attempt to recover a cpu by claiming every cache line as saved
679  * in the TSB that the non-responsive cpu is using. Since we can't
680  * grab any adaptive lock, this is at best an attempt to do so. Because
681  * we don't grab any locks, we must operate under the protection of
682  * on_fault().
683  *
684  * Return 1 if cpuid could be recovered, 0 if failed.
685  */
686 int
687 mondo_recover_proc(uint16_t cpuid, int bn)
688 {
689 	label_t ljb;
690 	cpu_t *cp;
691 	kthread_t *t;
692 	proc_t *p;
693 	struct as *as;
694 	struct hat *hat;
695 	uint_t  cnum;
696 	struct tsb_info *tsbinfop;
697 	struct tsbe *tsbep;
698 	caddr_t tsbp;
699 	caddr_t end_tsbp;
700 	uint64_t paddr;
701 	uint64_t idsr;
702 	u_longlong_t pahi, palo;
703 	int pages_claimed = 0;
704 	tte_t tsbe_tte;
705 	int tried_kernel_tsb = 0;
706 	mmu_ctx_t *mmu_ctxp;
707 
708 	CHEETAH_LIVELOCK_STAT(proc_entry);
709 
710 	if (on_fault(&ljb)) {
711 		CHEETAH_LIVELOCK_STAT(proc_onflt);
712 		goto badstruct;
713 	}
714 
715 	if ((cp = cpu[cpuid]) == NULL) {
716 		CHEETAH_LIVELOCK_STAT(proc_cpu);
717 		goto badstruct;
718 	}
719 
720 	if ((t = cp->cpu_thread) == NULL) {
721 		CHEETAH_LIVELOCK_STAT(proc_thread);
722 		goto badstruct;
723 	}
724 
725 	if ((p = ttoproc(t)) == NULL) {
726 		CHEETAH_LIVELOCK_STAT(proc_proc);
727 		goto badstruct;
728 	}
729 
730 	if ((as = p->p_as) == NULL) {
731 		CHEETAH_LIVELOCK_STAT(proc_as);
732 		goto badstruct;
733 	}
734 
735 	if ((hat = as->a_hat) == NULL) {
736 		CHEETAH_LIVELOCK_STAT(proc_hat);
737 		goto badstruct;
738 	}
739 
740 	if (hat != ksfmmup) {
741 		CHEETAH_LIVELOCK_STAT(proc_user);
742 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
743 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
744 			goto badstruct;
745 		}
746 		tsbinfop = hat->sfmmu_tsb;
747 		if (tsbinfop == NULL) {
748 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
749 			goto badstruct;
750 		}
751 		tsbp = tsbinfop->tsb_va;
752 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
753 	} else {
754 		CHEETAH_LIVELOCK_STAT(proc_kernel);
755 		tsbinfop = NULL;
756 		tsbp = ktsb_base;
757 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
758 	}
759 
760 	/* Verify as */
761 	if (hat->sfmmu_as != as) {
762 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
763 		goto badstruct;
764 	}
765 
766 	mmu_ctxp = CPU_MMU_CTXP(cp);
767 	ASSERT(mmu_ctxp);
768 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
769 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
770 
771 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
772 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
773 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
774 		goto badstruct;
775 	}
776 
777 	do {
778 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
779 
780 		/*
781 		 * Skip TSBs being relocated.  This is important because
782 		 * we want to avoid the following deadlock scenario:
783 		 *
784 		 * 1) when we came in we set ourselves to "in recover" state.
785 		 * 2) when we try to touch TSB being relocated the mapping
786 		 *    will be in the suspended state so we'll spin waiting
787 		 *    for it to be unlocked.
788 		 * 3) when the CPU that holds the TSB mapping locked tries to
789 		 *    unlock it it will send a xtrap which will fail to xcall
790 		 *    us or the CPU we're trying to recover, and will in turn
791 		 *    enter the mondo code.
792 		 * 4) since we are still spinning on the locked mapping
793 		 *    no further progress will be made and the system will
794 		 *    inevitably hard hang.
795 		 *
796 		 * A TSB not being relocated can't begin being relocated
797 		 * while we're accessing it because we check
798 		 * sendmondo_in_recover before relocating TSBs.
799 		 */
800 		if (hat != ksfmmup &&
801 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
802 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
803 			goto next_tsbinfo;
804 		}
805 
806 		for (tsbep = (struct tsbe *)tsbp;
807 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
808 			tsbe_tte = tsbep->tte_data;
809 
810 			if (tsbe_tte.tte_val == 0) {
811 				/*
812 				 * Invalid tte
813 				 */
814 				continue;
815 			}
816 			if (tsbe_tte.tte_se) {
817 				/*
818 				 * Don't want device registers
819 				 */
820 				continue;
821 			}
822 			if (tsbe_tte.tte_cp == 0) {
823 				/*
824 				 * Must be cached in E$
825 				 */
826 				continue;
827 			}
828 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
829 			idsr = getidsr();
830 			if ((idsr & (IDSR_NACK_BIT(bn) |
831 			    IDSR_BUSY_BIT(bn))) == 0) {
832 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
833 				goto done;
834 			}
835 			pahi = tsbe_tte.tte_pahi;
836 			palo = tsbe_tte.tte_palo;
837 			paddr = (uint64_t)((pahi << 32) |
838 			    (palo << MMU_PAGESHIFT));
839 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
840 			    CH_ECACHE_SUBBLK_SIZE);
841 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
842 				shipit(cpuid, bn);
843 			}
844 			pages_claimed++;
845 		}
846 next_tsbinfo:
847 		if (tsbinfop != NULL)
848 			tsbinfop = tsbinfop->tsb_next;
849 		if (tsbinfop != NULL) {
850 			tsbp = tsbinfop->tsb_va;
851 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
852 		} else if (tsbp == ktsb_base) {
853 			tried_kernel_tsb = 1;
854 		} else if (!tried_kernel_tsb) {
855 			tsbp = ktsb_base;
856 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
857 			hat = ksfmmup;
858 			tsbinfop = NULL;
859 		}
860 	} while (tsbinfop != NULL ||
861 			((tsbp == ktsb_base) && !tried_kernel_tsb));
862 
863 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
864 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
865 	no_fault();
866 	idsr = getidsr();
867 	if ((idsr & (IDSR_NACK_BIT(bn) |
868 	    IDSR_BUSY_BIT(bn))) == 0) {
869 		return (1);
870 	} else {
871 		return (0);
872 	}
873 
874 done:
875 	no_fault();
876 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
877 	return (1);
878 
879 badstruct:
880 	no_fault();
881 	return (0);
882 }
883 
884 /*
885  * Attempt to claim ownership, temporarily, of every cache line that a
886  * non-responsive cpu might be using.  This might kick that cpu out of
887  * this state.
888  *
889  * The return value indicates to the caller if we have exhausted all recovery
890  * techniques. If 1 is returned, it is useless to call this function again
891  * even for a different target CPU.
892  */
893 int
894 mondo_recover(uint16_t cpuid, int bn)
895 {
896 	struct memseg *seg;
897 	uint64_t begin_pa, end_pa, cur_pa;
898 	hrtime_t begin_hrt, end_hrt;
899 	int retval = 0;
900 	int pages_claimed = 0;
901 	cheetah_livelock_entry_t *histp;
902 	uint64_t idsr;
903 
904 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
905 		/*
906 		 * Wait while recovery takes place
907 		 */
908 		while (sendmondo_in_recover) {
909 			drv_usecwait(1);
910 		}
911 		/*
912 		 * Assume we didn't claim the whole memory. If
913 		 * the target of this caller is not recovered,
914 		 * it will come back.
915 		 */
916 		return (retval);
917 	}
918 
919 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
920 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
921 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
922 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
923 
924 	begin_hrt = gethrtime_waitfree();
925 	/*
926 	 * First try to claim the lines in the TSB the target
927 	 * may have been using.
928 	 */
929 	if (mondo_recover_proc(cpuid, bn) == 1) {
930 		/*
931 		 * Didn't claim the whole memory
932 		 */
933 		goto done;
934 	}
935 
936 	/*
937 	 * We tried using the TSB. The target is still
938 	 * not recovered. Check if complete memory scan is
939 	 * enabled.
940 	 */
941 	if (cheetah_sendmondo_fullscan == 0) {
942 		/*
943 		 * Full memory scan is disabled.
944 		 */
945 		retval = 1;
946 		goto done;
947 	}
948 
949 	/*
950 	 * Try claiming the whole memory.
951 	 */
952 	for (seg = memsegs; seg; seg = seg->next) {
953 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
954 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
955 		for (cur_pa = begin_pa; cur_pa < end_pa;
956 		    cur_pa += MMU_PAGESIZE) {
957 			idsr = getidsr();
958 			if ((idsr & (IDSR_NACK_BIT(bn) |
959 			    IDSR_BUSY_BIT(bn))) == 0) {
960 				/*
961 				 * Didn't claim all memory
962 				 */
963 				goto done;
964 			}
965 			claimlines(cur_pa, MMU_PAGESIZE,
966 			    CH_ECACHE_SUBBLK_SIZE);
967 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
968 				shipit(cpuid, bn);
969 			}
970 			pages_claimed++;
971 		}
972 	}
973 
974 	/*
975 	 * We did all we could.
976 	 */
977 	retval = 1;
978 
979 done:
980 	/*
981 	 * Update statistics
982 	 */
983 	end_hrt = gethrtime_waitfree();
984 	CHEETAH_LIVELOCK_STAT(recovery);
985 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
986 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
987 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
988 	    (end_hrt -  begin_hrt));
989 
990 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
991 
992 	return (retval);
993 }
994 
995 /*
996  * This is called by the cyclic framework when this CPU becomes online
997  */
998 /*ARGSUSED*/
999 static void
1000 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
1001 {
1002 
1003 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1004 	hdlr->cyh_level = CY_LOW_LEVEL;
1005 	hdlr->cyh_arg = NULL;
1006 
1007 	/*
1008 	 * Stagger the start time
1009 	 */
1010 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1011 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1012 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1013 	}
1014 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1015 }
1016 
1017 /*
1018  * Create a low level cyclic to send a xtrap to the next cpu online.
1019  * However, there's no need to have this running on a uniprocessor system.
1020  */
1021 static void
1022 cheetah_nudge_init(void)
1023 {
1024 	cyc_omni_handler_t hdlr;
1025 
1026 	if (max_ncpus == 1) {
1027 		return;
1028 	}
1029 
1030 	hdlr.cyo_online = cheetah_nudge_onln;
1031 	hdlr.cyo_offline = NULL;
1032 	hdlr.cyo_arg = NULL;
1033 
1034 	mutex_enter(&cpu_lock);
1035 	(void) cyclic_add_omni(&hdlr);
1036 	mutex_exit(&cpu_lock);
1037 }
1038 
1039 /*
1040  * Cyclic handler to wake up buddy
1041  */
1042 void
1043 cheetah_nudge_buddy(void)
1044 {
1045 	/*
1046 	 * Disable kernel preemption to protect the cpu list
1047 	 */
1048 	kpreempt_disable();
1049 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1050 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1051 		    0, 0);
1052 	}
1053 	kpreempt_enable();
1054 }
1055 
1056 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1057 
1058 #ifdef SEND_MONDO_STATS
1059 uint32_t x_one_stimes[64];
1060 uint32_t x_one_ltimes[16];
1061 uint32_t x_set_stimes[64];
1062 uint32_t x_set_ltimes[16];
1063 uint32_t x_set_cpus[NCPU];
1064 uint32_t x_nack_stimes[64];
1065 #endif
1066 
1067 /*
1068  * Note: A version of this function is used by the debugger via the KDI,
1069  * and must be kept in sync with this version.  Any changes made to this
1070  * function to support new chips or to accomodate errata must also be included
1071  * in the KDI-specific version.  See us3_kdi.c.
1072  */
1073 void
1074 send_one_mondo(int cpuid)
1075 {
1076 	int busy, nack;
1077 	uint64_t idsr, starttick, endtick, tick, lasttick;
1078 	uint64_t busymask;
1079 #ifdef	CHEETAHPLUS_ERRATUM_25
1080 	int recovered = 0;
1081 #endif
1082 
1083 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1084 	starttick = lasttick = gettick();
1085 	shipit(cpuid, 0);
1086 	endtick = starttick + xc_tick_limit;
1087 	busy = nack = 0;
1088 #if defined(JALAPENO) || defined(SERRANO)
1089 	/*
1090 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1091 	 * will be used for dispatching interrupt. For now, assume
1092 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1093 	 * issues with respect to BUSY/NACK pair usage.
1094 	 */
1095 	busymask  = IDSR_BUSY_BIT(cpuid);
1096 #else /* JALAPENO || SERRANO */
1097 	busymask = IDSR_BUSY;
1098 #endif /* JALAPENO || SERRANO */
1099 	for (;;) {
1100 		idsr = getidsr();
1101 		if (idsr == 0)
1102 			break;
1103 
1104 		tick = gettick();
1105 		/*
1106 		 * If there is a big jump between the current tick
1107 		 * count and lasttick, we have probably hit a break
1108 		 * point.  Adjust endtick accordingly to avoid panic.
1109 		 */
1110 		if (tick > (lasttick + xc_tick_jump_limit))
1111 			endtick += (tick - lasttick);
1112 		lasttick = tick;
1113 		if (tick > endtick) {
1114 			if (panic_quiesce)
1115 				return;
1116 #ifdef	CHEETAHPLUS_ERRATUM_25
1117 			if (cheetah_sendmondo_recover && recovered == 0) {
1118 				if (mondo_recover(cpuid, 0)) {
1119 					/*
1120 					 * We claimed the whole memory or
1121 					 * full scan is disabled.
1122 					 */
1123 					recovered++;
1124 				}
1125 				tick = gettick();
1126 				endtick = tick + xc_tick_limit;
1127 				lasttick = tick;
1128 				/*
1129 				 * Recheck idsr
1130 				 */
1131 				continue;
1132 			} else
1133 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1134 			{
1135 				cmn_err(CE_PANIC, "send mondo timeout "
1136 				    "(target 0x%x) [%d NACK %d BUSY]",
1137 				    cpuid, nack, busy);
1138 			}
1139 		}
1140 
1141 		if (idsr & busymask) {
1142 			busy++;
1143 			continue;
1144 		}
1145 		drv_usecwait(1);
1146 		shipit(cpuid, 0);
1147 		nack++;
1148 		busy = 0;
1149 	}
1150 #ifdef SEND_MONDO_STATS
1151 	{
1152 		int n = gettick() - starttick;
1153 		if (n < 8192)
1154 			x_one_stimes[n >> 7]++;
1155 		else
1156 			x_one_ltimes[(n >> 13) & 0xf]++;
1157 	}
1158 #endif
1159 }
1160 
1161 void
1162 syncfpu(void)
1163 {
1164 }
1165 
1166 /*
1167  * Return processor specific async error structure
1168  * size used.
1169  */
1170 int
1171 cpu_aflt_size(void)
1172 {
1173 	return (sizeof (ch_async_flt_t));
1174 }
1175 
1176 /*
1177  * Tunable to disable the checking of other cpu logout areas during panic for
1178  * potential syndrome 71 generating errors.
1179  */
1180 int enable_check_other_cpus_logout = 1;
1181 
1182 /*
1183  * Check other cpus logout area for potential synd 71 generating
1184  * errors.
1185  */
1186 static void
1187 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1188     ch_cpu_logout_t *clop)
1189 {
1190 	struct async_flt *aflt;
1191 	ch_async_flt_t ch_flt;
1192 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1193 
1194 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1195 		return;
1196 	}
1197 
1198 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1199 
1200 	t_afar = clop->clo_data.chd_afar;
1201 	t_afsr = clop->clo_data.chd_afsr;
1202 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1203 #if defined(SERRANO)
1204 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1205 #endif	/* SERRANO */
1206 
1207 	/*
1208 	 * In order to simplify code, we maintain this afsr_errs
1209 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1210 	 * sticky bits.
1211 	 */
1212 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1213 	    (t_afsr & C_AFSR_ALL_ERRS);
1214 
1215 	/* Setup the async fault structure */
1216 	aflt = (struct async_flt *)&ch_flt;
1217 	aflt->flt_id = gethrtime_waitfree();
1218 	ch_flt.afsr_ext = t_afsr_ext;
1219 	ch_flt.afsr_errs = t_afsr_errs;
1220 	aflt->flt_stat = t_afsr;
1221 	aflt->flt_addr = t_afar;
1222 	aflt->flt_bus_id = cpuid;
1223 	aflt->flt_inst = cpuid;
1224 	aflt->flt_pc = tpc;
1225 	aflt->flt_prot = AFLT_PROT_NONE;
1226 	aflt->flt_class = CPU_FAULT;
1227 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1228 	aflt->flt_tl = tl;
1229 	aflt->flt_status = ecc_type;
1230 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1231 
1232 	/*
1233 	 * Queue events on the async event queue, one event per error bit.
1234 	 * If no events are queued, queue an event to complain.
1235 	 */
1236 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1237 		ch_flt.flt_type = CPU_INV_AFSR;
1238 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1239 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1240 		    aflt->flt_panic);
1241 	}
1242 
1243 	/*
1244 	 * Zero out + invalidate CPU logout.
1245 	 */
1246 	bzero(clop, sizeof (ch_cpu_logout_t));
1247 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1248 }
1249 
1250 /*
1251  * Check the logout areas of all other cpus for unlogged errors.
1252  */
1253 static void
1254 cpu_check_other_cpus_logout(void)
1255 {
1256 	int i, j;
1257 	processorid_t myid;
1258 	struct cpu *cp;
1259 	ch_err_tl1_data_t *cl1p;
1260 
1261 	myid = CPU->cpu_id;
1262 	for (i = 0; i < NCPU; i++) {
1263 		cp = cpu[i];
1264 
1265 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1266 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1267 			continue;
1268 		}
1269 
1270 		/*
1271 		 * Check each of the tl>0 logout areas
1272 		 */
1273 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1274 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1275 			if (cl1p->ch_err_tl1_flags == 0)
1276 				continue;
1277 
1278 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1279 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1280 		}
1281 
1282 		/*
1283 		 * Check each of the remaining logout areas
1284 		 */
1285 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1286 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1287 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1288 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1289 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1290 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1291 	}
1292 }
1293 
1294 /*
1295  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1296  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1297  * flush the error that caused the UCU/UCC, then again here at the end to
1298  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1299  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1300  * another Fast ECC trap.
1301  *
1302  * Cheetah+ also handles: TSCE: No additional processing required.
1303  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1304  *
1305  * Note that the p_clo_flags input is only valid in cases where the
1306  * cpu_private struct is not yet initialized (since that is the only
1307  * time that information cannot be obtained from the logout struct.)
1308  */
1309 /*ARGSUSED*/
1310 void
1311 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1312 {
1313 	ch_cpu_logout_t *clop;
1314 	uint64_t ceen, nceen;
1315 
1316 	/*
1317 	 * Get the CPU log out info. If we can't find our CPU private
1318 	 * pointer, then we will have to make due without any detailed
1319 	 * logout information.
1320 	 */
1321 	if (CPU_PRIVATE(CPU) == NULL) {
1322 		clop = NULL;
1323 		ceen = p_clo_flags & EN_REG_CEEN;
1324 		nceen = p_clo_flags & EN_REG_NCEEN;
1325 	} else {
1326 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1327 		ceen = clop->clo_flags & EN_REG_CEEN;
1328 		nceen = clop->clo_flags & EN_REG_NCEEN;
1329 	}
1330 
1331 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1332 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1333 }
1334 
1335 /*
1336  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1337  * ECC at TL>0.  Need to supply either a error register pointer or a
1338  * cpu logout structure pointer.
1339  */
1340 static void
1341 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1342     uint64_t nceen, ch_cpu_logout_t *clop)
1343 {
1344 	struct async_flt *aflt;
1345 	ch_async_flt_t ch_flt;
1346 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1347 	char pr_reason[MAX_REASON_STRING];
1348 	ch_cpu_errors_t cpu_error_regs;
1349 
1350 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1351 	/*
1352 	 * If no cpu logout data, then we will have to make due without
1353 	 * any detailed logout information.
1354 	 */
1355 	if (clop == NULL) {
1356 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1357 		get_cpu_error_state(&cpu_error_regs);
1358 		set_cpu_error_state(&cpu_error_regs);
1359 		t_afar = cpu_error_regs.afar;
1360 		t_afsr = cpu_error_regs.afsr;
1361 		t_afsr_ext = cpu_error_regs.afsr_ext;
1362 #if defined(SERRANO)
1363 		ch_flt.afar2 = cpu_error_regs.afar2;
1364 #endif	/* SERRANO */
1365 	} else {
1366 		t_afar = clop->clo_data.chd_afar;
1367 		t_afsr = clop->clo_data.chd_afsr;
1368 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1369 #if defined(SERRANO)
1370 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1371 #endif	/* SERRANO */
1372 	}
1373 
1374 	/*
1375 	 * In order to simplify code, we maintain this afsr_errs
1376 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1377 	 * sticky bits.
1378 	 */
1379 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1380 	    (t_afsr & C_AFSR_ALL_ERRS);
1381 	pr_reason[0] = '\0';
1382 
1383 	/* Setup the async fault structure */
1384 	aflt = (struct async_flt *)&ch_flt;
1385 	aflt->flt_id = gethrtime_waitfree();
1386 	ch_flt.afsr_ext = t_afsr_ext;
1387 	ch_flt.afsr_errs = t_afsr_errs;
1388 	aflt->flt_stat = t_afsr;
1389 	aflt->flt_addr = t_afar;
1390 	aflt->flt_bus_id = getprocessorid();
1391 	aflt->flt_inst = CPU->cpu_id;
1392 	aflt->flt_pc = tpc;
1393 	aflt->flt_prot = AFLT_PROT_NONE;
1394 	aflt->flt_class = CPU_FAULT;
1395 	aflt->flt_priv = priv;
1396 	aflt->flt_tl = tl;
1397 	aflt->flt_status = ECC_F_TRAP;
1398 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1399 
1400 	/*
1401 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1402 	 * cmn_err messages out to the console.  The situation is a UCU (in
1403 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1404 	 * The messages for the UCU and WDU are enqueued and then pulled off
1405 	 * the async queue via softint and syslogd starts to process them
1406 	 * but doesn't get them to the console.  The UE causes a panic, but
1407 	 * since the UCU/WDU messages are already in transit, those aren't
1408 	 * on the async queue.  The hack is to check if we have a matching
1409 	 * WDU event for the UCU, and if it matches, we're more than likely
1410 	 * going to panic with a UE, unless we're under protection.  So, we
1411 	 * check to see if we got a matching WDU event and if we're under
1412 	 * protection.
1413 	 *
1414 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1415 	 * looks like this:
1416 	 *    UCU->WDU->UE
1417 	 * For Panther, it could look like either of these:
1418 	 *    UCU---->WDU->L3_WDU->UE
1419 	 *    L3_UCU->WDU->L3_WDU->UE
1420 	 */
1421 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1422 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1423 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1424 		get_cpu_error_state(&cpu_error_regs);
1425 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1426 		    (cpu_error_regs.afar == t_afar));
1427 		aflt->flt_panic |= ((clop == NULL) &&
1428 		    (t_afsr_errs & C_AFSR_WDU));
1429 	}
1430 
1431 	/*
1432 	 * Queue events on the async event queue, one event per error bit.
1433 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1434 	 * queue an event to complain.
1435 	 */
1436 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1437 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1438 		ch_flt.flt_type = CPU_INV_AFSR;
1439 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1440 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1441 		    aflt->flt_panic);
1442 	}
1443 
1444 	/*
1445 	 * Zero out + invalidate CPU logout.
1446 	 */
1447 	if (clop) {
1448 		bzero(clop, sizeof (ch_cpu_logout_t));
1449 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1450 	}
1451 
1452 	/*
1453 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1454 	 * or disrupting errors have happened.  We do this because if a
1455 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1456 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1457 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1458 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1459 	 * deferred or disrupting error happening between checking the AFSR and
1460 	 * enabling NCEEN/CEEN.
1461 	 *
1462 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1463 	 * taken.
1464 	 */
1465 	set_error_enable(get_error_enable() | (nceen | ceen));
1466 	if (clear_errors(&ch_flt)) {
1467 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1468 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1469 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1470 		    NULL);
1471 	}
1472 
1473 	/*
1474 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1475 	 * be logged as part of the panic flow.
1476 	 */
1477 	if (aflt->flt_panic)
1478 		fm_panic("%sError(s)", pr_reason);
1479 
1480 	/*
1481 	 * Flushing the Ecache here gets the part of the trap handler that
1482 	 * is run at TL=1 out of the Ecache.
1483 	 */
1484 	cpu_flush_ecache();
1485 }
1486 
1487 /*
1488  * This is called via sys_trap from pil15_interrupt code if the
1489  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1490  * various ch_err_tl1_data structures for valid entries based on the bit
1491  * settings in the ch_err_tl1_flags entry of the structure.
1492  */
1493 /*ARGSUSED*/
1494 void
1495 cpu_tl1_error(struct regs *rp, int panic)
1496 {
1497 	ch_err_tl1_data_t *cl1p, cl1;
1498 	int i, ncl1ps;
1499 	uint64_t me_flags;
1500 	uint64_t ceen, nceen;
1501 
1502 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1503 		cl1p = &ch_err_tl1_data;
1504 		ncl1ps = 1;
1505 	} else if (CPU_PRIVATE(CPU) != NULL) {
1506 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1507 		ncl1ps = CH_ERR_TL1_TLMAX;
1508 	} else {
1509 		ncl1ps = 0;
1510 	}
1511 
1512 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1513 		if (cl1p->ch_err_tl1_flags == 0)
1514 			continue;
1515 
1516 		/*
1517 		 * Grab a copy of the logout data and invalidate
1518 		 * the logout area.
1519 		 */
1520 		cl1 = *cl1p;
1521 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1522 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1523 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1524 
1525 		/*
1526 		 * Log "first error" in ch_err_tl1_data.
1527 		 */
1528 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1529 			ceen = get_error_enable() & EN_REG_CEEN;
1530 			nceen = get_error_enable() & EN_REG_NCEEN;
1531 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1532 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1533 		}
1534 #if defined(CPU_IMP_L1_CACHE_PARITY)
1535 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1536 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1537 			    (caddr_t)cl1.ch_err_tl1_tpc);
1538 		}
1539 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1540 
1541 		/*
1542 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1543 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1544 		 * if the structure is busy, we just do the cache flushing
1545 		 * we have to do and then do the retry.  So the AFSR/AFAR
1546 		 * at this point *should* have some relevant info.  If there
1547 		 * are no valid errors in the AFSR, we'll assume they've
1548 		 * already been picked up and logged.  For I$/D$ parity,
1549 		 * we just log an event with an "Unknown" (NULL) TPC.
1550 		 */
1551 		if (me_flags & CH_ERR_FECC) {
1552 			ch_cpu_errors_t cpu_error_regs;
1553 			uint64_t t_afsr_errs;
1554 
1555 			/*
1556 			 * Get the error registers and see if there's
1557 			 * a pending error.  If not, don't bother
1558 			 * generating an "Invalid AFSR" error event.
1559 			 */
1560 			get_cpu_error_state(&cpu_error_regs);
1561 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1562 			    C_AFSR_EXT_ALL_ERRS) |
1563 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1564 			if (t_afsr_errs != 0) {
1565 				ceen = get_error_enable() & EN_REG_CEEN;
1566 				nceen = get_error_enable() & EN_REG_NCEEN;
1567 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1568 				    1, ceen, nceen, NULL);
1569 			}
1570 		}
1571 #if defined(CPU_IMP_L1_CACHE_PARITY)
1572 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1573 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1574 		}
1575 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1576 	}
1577 }
1578 
1579 /*
1580  * Called from Fast ECC TL>0 handler in case of fatal error.
1581  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1582  * but if we don't, we'll panic with something reasonable.
1583  */
1584 /*ARGSUSED*/
1585 void
1586 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1587 {
1588 	cpu_tl1_error(rp, 1);
1589 	/*
1590 	 * Should never return, but just in case.
1591 	 */
1592 	fm_panic("Unsurvivable ECC Error at TL>0");
1593 }
1594 
1595 /*
1596  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1597  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1598  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1599  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1600  *
1601  * Cheetah+ also handles (No additional processing required):
1602  *    DUE, DTO, DBERR	(NCEEN controlled)
1603  *    THCE		(CEEN and ET_ECC_en controlled)
1604  *    TUE		(ET_ECC_en controlled)
1605  *
1606  * Panther further adds:
1607  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1608  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1609  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1610  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1611  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1612  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1613  *
1614  * Note that the p_clo_flags input is only valid in cases where the
1615  * cpu_private struct is not yet initialized (since that is the only
1616  * time that information cannot be obtained from the logout struct.)
1617  */
1618 /*ARGSUSED*/
1619 void
1620 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1621 {
1622 	struct async_flt *aflt;
1623 	ch_async_flt_t ch_flt;
1624 	char pr_reason[MAX_REASON_STRING];
1625 	ch_cpu_logout_t *clop;
1626 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1627 	ch_cpu_errors_t cpu_error_regs;
1628 
1629 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1630 	/*
1631 	 * Get the CPU log out info. If we can't find our CPU private
1632 	 * pointer, then we will have to make due without any detailed
1633 	 * logout information.
1634 	 */
1635 	if (CPU_PRIVATE(CPU) == NULL) {
1636 		clop = NULL;
1637 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1638 		get_cpu_error_state(&cpu_error_regs);
1639 		set_cpu_error_state(&cpu_error_regs);
1640 		t_afar = cpu_error_regs.afar;
1641 		t_afsr = cpu_error_regs.afsr;
1642 		t_afsr_ext = cpu_error_regs.afsr_ext;
1643 #if defined(SERRANO)
1644 		ch_flt.afar2 = cpu_error_regs.afar2;
1645 #endif	/* SERRANO */
1646 	} else {
1647 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1648 		t_afar = clop->clo_data.chd_afar;
1649 		t_afsr = clop->clo_data.chd_afsr;
1650 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1651 #if defined(SERRANO)
1652 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1653 #endif	/* SERRANO */
1654 	}
1655 
1656 	/*
1657 	 * In order to simplify code, we maintain this afsr_errs
1658 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1659 	 * sticky bits.
1660 	 */
1661 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1662 	    (t_afsr & C_AFSR_ALL_ERRS);
1663 
1664 	pr_reason[0] = '\0';
1665 	/* Setup the async fault structure */
1666 	aflt = (struct async_flt *)&ch_flt;
1667 	ch_flt.afsr_ext = t_afsr_ext;
1668 	ch_flt.afsr_errs = t_afsr_errs;
1669 	aflt->flt_stat = t_afsr;
1670 	aflt->flt_addr = t_afar;
1671 	aflt->flt_pc = (caddr_t)rp->r_pc;
1672 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1673 	aflt->flt_tl = 0;
1674 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1675 
1676 	/*
1677 	 * If this trap is a result of one of the errors not masked
1678 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1679 	 * indicate that a timeout is to be set later.
1680 	 */
1681 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1682 	    !aflt->flt_panic)
1683 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1684 	else
1685 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1686 
1687 	/*
1688 	 * log the CE and clean up
1689 	 */
1690 	cpu_log_and_clear_ce(&ch_flt);
1691 
1692 	/*
1693 	 * We re-enable CEEN (if required) and check if any disrupting errors
1694 	 * have happened.  We do this because if a disrupting error had occurred
1695 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1696 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1697 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1698 	 * of a error happening between checking the AFSR and enabling CEEN.
1699 	 */
1700 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1701 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1702 	if (clear_errors(&ch_flt)) {
1703 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1704 		    NULL);
1705 	}
1706 
1707 	/*
1708 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1709 	 * be logged as part of the panic flow.
1710 	 */
1711 	if (aflt->flt_panic)
1712 		fm_panic("%sError(s)", pr_reason);
1713 }
1714 
1715 /*
1716  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1717  * L3_EDU:BLD, TO, and BERR events.
1718  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1719  *
1720  * Cheetah+: No additional errors handled.
1721  *
1722  * Note that the p_clo_flags input is only valid in cases where the
1723  * cpu_private struct is not yet initialized (since that is the only
1724  * time that information cannot be obtained from the logout struct.)
1725  */
1726 /*ARGSUSED*/
1727 void
1728 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1729 {
1730 	ushort_t ttype, tl;
1731 	ch_async_flt_t ch_flt;
1732 	struct async_flt *aflt;
1733 	int trampolined = 0;
1734 	char pr_reason[MAX_REASON_STRING];
1735 	ch_cpu_logout_t *clop;
1736 	uint64_t ceen, clo_flags;
1737 	uint64_t log_afsr;
1738 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1739 	ch_cpu_errors_t cpu_error_regs;
1740 	int expected = DDI_FM_ERR_UNEXPECTED;
1741 	ddi_acc_hdl_t *hp;
1742 
1743 	/*
1744 	 * We need to look at p_flag to determine if the thread detected an
1745 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1746 	 * because we just need a consistent snapshot and we know that everyone
1747 	 * else will store a consistent set of bits while holding p_lock.  We
1748 	 * don't have to worry about a race because SDOCORE is set once prior
1749 	 * to doing i/o from the process's address space and is never cleared.
1750 	 */
1751 	uint_t pflag = ttoproc(curthread)->p_flag;
1752 
1753 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1754 	/*
1755 	 * Get the CPU log out info. If we can't find our CPU private
1756 	 * pointer then we will have to make due without any detailed
1757 	 * logout information.
1758 	 */
1759 	if (CPU_PRIVATE(CPU) == NULL) {
1760 		clop = NULL;
1761 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1762 		get_cpu_error_state(&cpu_error_regs);
1763 		set_cpu_error_state(&cpu_error_regs);
1764 		t_afar = cpu_error_regs.afar;
1765 		t_afsr = cpu_error_regs.afsr;
1766 		t_afsr_ext = cpu_error_regs.afsr_ext;
1767 #if defined(SERRANO)
1768 		ch_flt.afar2 = cpu_error_regs.afar2;
1769 #endif	/* SERRANO */
1770 		clo_flags = p_clo_flags;
1771 	} else {
1772 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1773 		t_afar = clop->clo_data.chd_afar;
1774 		t_afsr = clop->clo_data.chd_afsr;
1775 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1776 #if defined(SERRANO)
1777 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1778 #endif	/* SERRANO */
1779 		clo_flags = clop->clo_flags;
1780 	}
1781 
1782 	/*
1783 	 * In order to simplify code, we maintain this afsr_errs
1784 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1785 	 * sticky bits.
1786 	 */
1787 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1788 	    (t_afsr & C_AFSR_ALL_ERRS);
1789 	pr_reason[0] = '\0';
1790 
1791 	/*
1792 	 * Grab information encoded into our clo_flags field.
1793 	 */
1794 	ceen = clo_flags & EN_REG_CEEN;
1795 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1796 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1797 
1798 	/*
1799 	 * handle the specific error
1800 	 */
1801 	aflt = (struct async_flt *)&ch_flt;
1802 	aflt->flt_id = gethrtime_waitfree();
1803 	aflt->flt_bus_id = getprocessorid();
1804 	aflt->flt_inst = CPU->cpu_id;
1805 	ch_flt.afsr_ext = t_afsr_ext;
1806 	ch_flt.afsr_errs = t_afsr_errs;
1807 	aflt->flt_stat = t_afsr;
1808 	aflt->flt_addr = t_afar;
1809 	aflt->flt_pc = (caddr_t)rp->r_pc;
1810 	aflt->flt_prot = AFLT_PROT_NONE;
1811 	aflt->flt_class = CPU_FAULT;
1812 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1813 	aflt->flt_tl = (uchar_t)tl;
1814 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1815 	    C_AFSR_PANIC(t_afsr_errs));
1816 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1817 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1818 
1819 	/*
1820 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1821 	 * see if we were executing in the kernel under on_trap() or t_lofault
1822 	 * protection.  If so, modify the saved registers so that we return
1823 	 * from the trap to the appropriate trampoline routine.
1824 	 */
1825 	if (aflt->flt_priv && tl == 0) {
1826 		if (curthread->t_ontrap != NULL) {
1827 			on_trap_data_t *otp = curthread->t_ontrap;
1828 
1829 			if (otp->ot_prot & OT_DATA_EC) {
1830 				aflt->flt_prot = AFLT_PROT_EC;
1831 				otp->ot_trap |= OT_DATA_EC;
1832 				rp->r_pc = otp->ot_trampoline;
1833 				rp->r_npc = rp->r_pc + 4;
1834 				trampolined = 1;
1835 			}
1836 
1837 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1838 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1839 				aflt->flt_prot = AFLT_PROT_ACCESS;
1840 				otp->ot_trap |= OT_DATA_ACCESS;
1841 				rp->r_pc = otp->ot_trampoline;
1842 				rp->r_npc = rp->r_pc + 4;
1843 				trampolined = 1;
1844 				/*
1845 				 * for peeks and caut_gets errors are expected
1846 				 */
1847 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1848 				if (!hp)
1849 					expected = DDI_FM_ERR_PEEK;
1850 				else if (hp->ah_acc.devacc_attr_access ==
1851 				    DDI_CAUTIOUS_ACC)
1852 					expected = DDI_FM_ERR_EXPECTED;
1853 			}
1854 
1855 		} else if (curthread->t_lofault) {
1856 			aflt->flt_prot = AFLT_PROT_COPY;
1857 			rp->r_g1 = EFAULT;
1858 			rp->r_pc = curthread->t_lofault;
1859 			rp->r_npc = rp->r_pc + 4;
1860 			trampolined = 1;
1861 		}
1862 	}
1863 
1864 	/*
1865 	 * If we're in user mode or we're doing a protected copy, we either
1866 	 * want the ASTON code below to send a signal to the user process
1867 	 * or we want to panic if aft_panic is set.
1868 	 *
1869 	 * If we're in privileged mode and we're not doing a copy, then we
1870 	 * need to check if we've trampolined.  If we haven't trampolined,
1871 	 * we should panic.
1872 	 */
1873 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1874 		if (t_afsr_errs &
1875 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1876 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1877 			aflt->flt_panic |= aft_panic;
1878 	} else if (!trampolined) {
1879 			aflt->flt_panic = 1;
1880 	}
1881 
1882 	/*
1883 	 * If we've trampolined due to a privileged TO or BERR, or if an
1884 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1885 	 * event for that TO or BERR.  Queue all other events (if any) besides
1886 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1887 	 * ignore the number of events queued.  If we haven't trampolined due
1888 	 * to a TO or BERR, just enqueue events normally.
1889 	 */
1890 	log_afsr = t_afsr_errs;
1891 	if (trampolined) {
1892 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1893 	} else if (!aflt->flt_priv) {
1894 		/*
1895 		 * User mode, suppress messages if
1896 		 * cpu_berr_to_verbose is not set.
1897 		 */
1898 		if (!cpu_berr_to_verbose)
1899 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1900 	}
1901 
1902 	/*
1903 	 * Log any errors that occurred
1904 	 */
1905 	if (((log_afsr &
1906 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1907 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1908 		(t_afsr_errs &
1909 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1910 		ch_flt.flt_type = CPU_INV_AFSR;
1911 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1912 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1913 		    aflt->flt_panic);
1914 	}
1915 
1916 	/*
1917 	 * Zero out + invalidate CPU logout.
1918 	 */
1919 	if (clop) {
1920 		bzero(clop, sizeof (ch_cpu_logout_t));
1921 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1922 	}
1923 
1924 #if defined(JALAPENO) || defined(SERRANO)
1925 	/*
1926 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1927 	 * IO errors that may have resulted in this trap.
1928 	 */
1929 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1930 		cpu_run_bus_error_handlers(aflt, expected);
1931 	}
1932 
1933 	/*
1934 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1935 	 * line from the Ecache.  We also need to query the bus nexus for
1936 	 * fatal errors.  Attempts to do diagnostic read on caches may
1937 	 * introduce more errors (especially when the module is bad).
1938 	 */
1939 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1940 		/*
1941 		 * Ask our bus nexus friends if they have any fatal errors.  If
1942 		 * so, they will log appropriate error messages.
1943 		 */
1944 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1945 			aflt->flt_panic = 1;
1946 
1947 		/*
1948 		 * We got a UE or RUE and are panicking, save the fault PA in
1949 		 * a known location so that the platform specific panic code
1950 		 * can check for copyback errors.
1951 		 */
1952 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1953 			panic_aflt = *aflt;
1954 		}
1955 	}
1956 
1957 	/*
1958 	 * Flush Ecache line or entire Ecache
1959 	 */
1960 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1961 		cpu_error_ecache_flush(&ch_flt);
1962 #else /* JALAPENO || SERRANO */
1963 	/*
1964 	 * UE/BERR/TO: Call our bus nexus friends to check for
1965 	 * IO errors that may have resulted in this trap.
1966 	 */
1967 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1968 		cpu_run_bus_error_handlers(aflt, expected);
1969 	}
1970 
1971 	/*
1972 	 * UE: If the UE is in memory, we need to flush the bad
1973 	 * line from the Ecache.  We also need to query the bus nexus for
1974 	 * fatal errors.  Attempts to do diagnostic read on caches may
1975 	 * introduce more errors (especially when the module is bad).
1976 	 */
1977 	if (t_afsr & C_AFSR_UE) {
1978 		/*
1979 		 * Ask our legacy bus nexus friends if they have any fatal
1980 		 * errors.  If so, they will log appropriate error messages.
1981 		 */
1982 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1983 			aflt->flt_panic = 1;
1984 
1985 		/*
1986 		 * We got a UE and are panicking, save the fault PA in a known
1987 		 * location so that the platform specific panic code can check
1988 		 * for copyback errors.
1989 		 */
1990 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1991 			panic_aflt = *aflt;
1992 		}
1993 	}
1994 
1995 	/*
1996 	 * Flush Ecache line or entire Ecache
1997 	 */
1998 	if (t_afsr_errs &
1999 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2000 		cpu_error_ecache_flush(&ch_flt);
2001 #endif /* JALAPENO || SERRANO */
2002 
2003 	/*
2004 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2005 	 * or disrupting errors have happened.  We do this because if a
2006 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2007 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2008 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2009 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2010 	 * deferred or disrupting error happening between checking the AFSR and
2011 	 * enabling NCEEN/CEEN.
2012 	 *
2013 	 * Note: CEEN reenabled only if it was on when trap taken.
2014 	 */
2015 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2016 	if (clear_errors(&ch_flt)) {
2017 		/*
2018 		 * Check for secondary errors, and avoid panicking if we
2019 		 * have them
2020 		 */
2021 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2022 		    t_afar) == 0) {
2023 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2024 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2025 		}
2026 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2027 		    NULL);
2028 	}
2029 
2030 	/*
2031 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2032 	 * be logged as part of the panic flow.
2033 	 */
2034 	if (aflt->flt_panic)
2035 		fm_panic("%sError(s)", pr_reason);
2036 
2037 	/*
2038 	 * If we queued an error and we are going to return from the trap and
2039 	 * the error was in user mode or inside of a copy routine, set AST flag
2040 	 * so the queue will be drained before returning to user mode.  The
2041 	 * AST processing will also act on our failure policy.
2042 	 */
2043 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2044 		int pcb_flag = 0;
2045 
2046 		if (t_afsr_errs &
2047 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2048 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2049 			pcb_flag |= ASYNC_HWERR;
2050 
2051 		if (t_afsr & C_AFSR_BERR)
2052 			pcb_flag |= ASYNC_BERR;
2053 
2054 		if (t_afsr & C_AFSR_TO)
2055 			pcb_flag |= ASYNC_BTO;
2056 
2057 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2058 		aston(curthread);
2059 	}
2060 }
2061 
2062 #if defined(CPU_IMP_L1_CACHE_PARITY)
2063 /*
2064  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2065  *
2066  * For Panther, P$ data parity errors during floating point load hits
2067  * are also detected (reported as TT 0x71) and handled by this trap
2068  * handler.
2069  *
2070  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2071  * is available.
2072  */
2073 /*ARGSUSED*/
2074 void
2075 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2076 {
2077 	ch_async_flt_t ch_flt;
2078 	struct async_flt *aflt;
2079 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2080 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2081 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2082 	char *error_class;
2083 
2084 	/*
2085 	 * Log the error.
2086 	 * For icache parity errors the fault address is the trap PC.
2087 	 * For dcache/pcache parity errors the instruction would have to
2088 	 * be decoded to determine the address and that isn't possible
2089 	 * at high PIL.
2090 	 */
2091 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2092 	aflt = (struct async_flt *)&ch_flt;
2093 	aflt->flt_id = gethrtime_waitfree();
2094 	aflt->flt_bus_id = getprocessorid();
2095 	aflt->flt_inst = CPU->cpu_id;
2096 	aflt->flt_pc = tpc;
2097 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2098 	aflt->flt_prot = AFLT_PROT_NONE;
2099 	aflt->flt_class = CPU_FAULT;
2100 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2101 	aflt->flt_tl = tl;
2102 	aflt->flt_panic = panic;
2103 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2104 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2105 
2106 	if (iparity) {
2107 		cpu_icache_parity_info(&ch_flt);
2108 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2109 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2110 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2111 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2112 		else
2113 			error_class = FM_EREPORT_CPU_USIII_IPE;
2114 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2115 	} else {
2116 		cpu_dcache_parity_info(&ch_flt);
2117 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2118 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2119 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2120 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2121 		else
2122 			error_class = FM_EREPORT_CPU_USIII_DPE;
2123 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2124 		/*
2125 		 * For panther we also need to check the P$ for parity errors.
2126 		 */
2127 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2128 			cpu_pcache_parity_info(&ch_flt);
2129 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2130 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2131 				aflt->flt_payload =
2132 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2133 			}
2134 		}
2135 	}
2136 
2137 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2138 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2139 
2140 	if (iparity) {
2141 		/*
2142 		 * Invalidate entire I$.
2143 		 * This is required due to the use of diagnostic ASI
2144 		 * accesses that may result in a loss of I$ coherency.
2145 		 */
2146 		if (cache_boot_state & DCU_IC) {
2147 			flush_icache();
2148 		}
2149 		/*
2150 		 * According to section P.3.1 of the Panther PRM, we
2151 		 * need to do a little more for recovery on those
2152 		 * CPUs after encountering an I$ parity error.
2153 		 */
2154 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2155 			flush_ipb();
2156 			correct_dcache_parity(dcache_size,
2157 			    dcache_linesize);
2158 			flush_pcache();
2159 		}
2160 	} else {
2161 		/*
2162 		 * Since the valid bit is ignored when checking parity the
2163 		 * D$ data and tag must also be corrected.  Set D$ data bits
2164 		 * to zero and set utag to 0, 1, 2, 3.
2165 		 */
2166 		correct_dcache_parity(dcache_size, dcache_linesize);
2167 
2168 		/*
2169 		 * According to section P.3.3 of the Panther PRM, we
2170 		 * need to do a little more for recovery on those
2171 		 * CPUs after encountering a D$ or P$ parity error.
2172 		 *
2173 		 * As far as clearing P$ parity errors, it is enough to
2174 		 * simply invalidate all entries in the P$ since P$ parity
2175 		 * error traps are only generated for floating point load
2176 		 * hits.
2177 		 */
2178 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2179 			flush_icache();
2180 			flush_ipb();
2181 			flush_pcache();
2182 		}
2183 	}
2184 
2185 	/*
2186 	 * Invalidate entire D$ if it was enabled.
2187 	 * This is done to avoid stale data in the D$ which might
2188 	 * occur with the D$ disabled and the trap handler doing
2189 	 * stores affecting lines already in the D$.
2190 	 */
2191 	if (cache_boot_state & DCU_DC) {
2192 		flush_dcache();
2193 	}
2194 
2195 	/*
2196 	 * Restore caches to their bootup state.
2197 	 */
2198 	set_dcu(get_dcu() | cache_boot_state);
2199 
2200 	/*
2201 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2202 	 * be logged as part of the panic flow.
2203 	 */
2204 	if (aflt->flt_panic)
2205 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2206 
2207 	/*
2208 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2209 	 * the chance of getting an unrecoverable Fast ECC error.  This
2210 	 * flush will evict the part of the parity trap handler that is run
2211 	 * at TL>1.
2212 	 */
2213 	if (tl) {
2214 		cpu_flush_ecache();
2215 	}
2216 }
2217 
2218 /*
2219  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2220  * to indicate which portions of the captured data should be in the ereport.
2221  */
2222 void
2223 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2224 {
2225 	int way = ch_flt->parity_data.ipe.cpl_way;
2226 	int offset = ch_flt->parity_data.ipe.cpl_off;
2227 	int tag_index;
2228 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2229 
2230 
2231 	if ((offset != -1) || (way != -1)) {
2232 		/*
2233 		 * Parity error in I$ tag or data
2234 		 */
2235 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2236 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2237 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2238 			    PN_ICIDX_TO_WAY(tag_index);
2239 		else
2240 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2241 			    CH_ICIDX_TO_WAY(tag_index);
2242 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2243 		    IC_LOGFLAG_MAGIC;
2244 	} else {
2245 		/*
2246 		 * Parity error was not identified.
2247 		 * Log tags and data for all ways.
2248 		 */
2249 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2250 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2251 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2252 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2253 				    PN_ICIDX_TO_WAY(tag_index);
2254 			else
2255 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2256 				    CH_ICIDX_TO_WAY(tag_index);
2257 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2258 			    IC_LOGFLAG_MAGIC;
2259 		}
2260 	}
2261 }
2262 
2263 /*
2264  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2265  * to indicate which portions of the captured data should be in the ereport.
2266  */
2267 void
2268 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2269 {
2270 	int way = ch_flt->parity_data.dpe.cpl_way;
2271 	int offset = ch_flt->parity_data.dpe.cpl_off;
2272 	int tag_index;
2273 
2274 	if (offset != -1) {
2275 		/*
2276 		 * Parity error in D$ or P$ data array.
2277 		 *
2278 		 * First check to see whether the parity error is in D$ or P$
2279 		 * since P$ data parity errors are reported in Panther using
2280 		 * the same trap.
2281 		 */
2282 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2283 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2284 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2285 			    CH_PCIDX_TO_WAY(tag_index);
2286 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2287 			    PC_LOGFLAG_MAGIC;
2288 		} else {
2289 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2290 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2291 			    CH_DCIDX_TO_WAY(tag_index);
2292 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2293 			    DC_LOGFLAG_MAGIC;
2294 		}
2295 	} else if (way != -1) {
2296 		/*
2297 		 * Parity error in D$ tag.
2298 		 */
2299 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2300 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2301 		    CH_DCIDX_TO_WAY(tag_index);
2302 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2303 		    DC_LOGFLAG_MAGIC;
2304 	}
2305 }
2306 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2307 
2308 /*
2309  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2310  * post-process CPU events that are dequeued.  As such, it can be invoked
2311  * from softint context, from AST processing in the trap() flow, or from the
2312  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2313  * Historically this entry point was used to log the actual cmn_err(9F) text;
2314  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2315  * With FMA this function now also returns a flag which indicates to the
2316  * caller whether the ereport should be posted (1) or suppressed (0).
2317  */
2318 static int
2319 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2320 {
2321 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2322 	struct async_flt *aflt = (struct async_flt *)flt;
2323 	uint64_t errors;
2324 
2325 	switch (ch_flt->flt_type) {
2326 	case CPU_INV_AFSR:
2327 		/*
2328 		 * If it is a disrupting trap and the AFSR is zero, then
2329 		 * the event has probably already been noted. Do not post
2330 		 * an ereport.
2331 		 */
2332 		if ((aflt->flt_status & ECC_C_TRAP) &&
2333 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2334 			return (0);
2335 		else
2336 			return (1);
2337 	case CPU_TO:
2338 	case CPU_BERR:
2339 	case CPU_FATAL:
2340 	case CPU_FPUERR:
2341 		return (1);
2342 
2343 	case CPU_UE_ECACHE_RETIRE:
2344 		cpu_log_err(aflt);
2345 		cpu_page_retire(ch_flt);
2346 		return (1);
2347 
2348 	/*
2349 	 * Cases where we may want to suppress logging or perform
2350 	 * extended diagnostics.
2351 	 */
2352 	case CPU_CE:
2353 	case CPU_EMC:
2354 		/*
2355 		 * We want to skip logging and further classification
2356 		 * only if ALL the following conditions are true:
2357 		 *
2358 		 *	1. There is only one error
2359 		 *	2. That error is a correctable memory error
2360 		 *	3. The error is caused by the memory scrubber (in
2361 		 *	   which case the error will have occurred under
2362 		 *	   on_trap protection)
2363 		 *	4. The error is on a retired page
2364 		 *
2365 		 * Note: AFLT_PROT_EC is used places other than the memory
2366 		 * scrubber.  However, none of those errors should occur
2367 		 * on a retired page.
2368 		 */
2369 		if ((ch_flt->afsr_errs &
2370 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2371 		    aflt->flt_prot == AFLT_PROT_EC) {
2372 
2373 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2374 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2375 
2376 				/*
2377 				 * Since we're skipping logging, we'll need
2378 				 * to schedule the re-enabling of CEEN
2379 				 */
2380 				(void) timeout(cpu_delayed_check_ce_errors,
2381 				    (void *)(uintptr_t)aflt->flt_inst,
2382 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2383 						 * MICROSEC));
2384 			    }
2385 			    return (0);
2386 			}
2387 		}
2388 
2389 		/*
2390 		 * Perform/schedule further classification actions, but
2391 		 * only if the page is healthy (we don't want bad
2392 		 * pages inducing too much diagnostic activity).  If we could
2393 		 * not find a page pointer then we also skip this.  If
2394 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2395 		 * to copy and recirculate the event (for further diagnostics)
2396 		 * and we should not proceed to log it here.
2397 		 *
2398 		 * This must be the last step here before the cpu_log_err()
2399 		 * below - if an event recirculates cpu_ce_log_err() will
2400 		 * not call the current function but just proceed directly
2401 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2402 		 *
2403 		 * Note: Check cpu_impl_async_log_err if changing this
2404 		 */
2405 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2406 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2407 			    CE_XDIAG_SKIP_NOPP);
2408 		} else {
2409 			if (errors != PR_OK) {
2410 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2411 				    CE_XDIAG_SKIP_PAGEDET);
2412 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2413 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2414 				return (0);
2415 			}
2416 		}
2417 		/*FALLTHRU*/
2418 
2419 	/*
2420 	 * Cases where we just want to report the error and continue.
2421 	 */
2422 	case CPU_CE_ECACHE:
2423 	case CPU_UE_ECACHE:
2424 	case CPU_IV:
2425 	case CPU_ORPH:
2426 		cpu_log_err(aflt);
2427 		return (1);
2428 
2429 	/*
2430 	 * Cases where we want to fall through to handle panicking.
2431 	 */
2432 	case CPU_UE:
2433 		/*
2434 		 * We want to skip logging in the same conditions as the
2435 		 * CE case.  In addition, we want to make sure we're not
2436 		 * panicking.
2437 		 */
2438 		if (!panicstr && (ch_flt->afsr_errs &
2439 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2440 		    aflt->flt_prot == AFLT_PROT_EC) {
2441 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2442 				/* Zero the address to clear the error */
2443 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2444 				return (0);
2445 			}
2446 		}
2447 		cpu_log_err(aflt);
2448 		break;
2449 
2450 	default:
2451 		/*
2452 		 * If the us3_common.c code doesn't know the flt_type, it may
2453 		 * be an implementation-specific code.  Call into the impldep
2454 		 * backend to find out what to do: if it tells us to continue,
2455 		 * break and handle as if falling through from a UE; if not,
2456 		 * the impldep backend has handled the error and we're done.
2457 		 */
2458 		switch (cpu_impl_async_log_err(flt, eqep)) {
2459 		case CH_ASYNC_LOG_DONE:
2460 			return (1);
2461 		case CH_ASYNC_LOG_RECIRC:
2462 			return (0);
2463 		case CH_ASYNC_LOG_CONTINUE:
2464 			break; /* continue on to handle UE-like error */
2465 		default:
2466 			cmn_err(CE_WARN, "discarding error 0x%p with "
2467 			    "invalid fault type (0x%x)",
2468 			    (void *)aflt, ch_flt->flt_type);
2469 			return (0);
2470 		}
2471 	}
2472 
2473 	/* ... fall through from the UE case */
2474 
2475 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2476 		if (!panicstr) {
2477 			cpu_page_retire(ch_flt);
2478 		} else {
2479 			/*
2480 			 * Clear UEs on panic so that we don't
2481 			 * get haunted by them during panic or
2482 			 * after reboot
2483 			 */
2484 			cpu_clearphys(aflt);
2485 			(void) clear_errors(NULL);
2486 		}
2487 	}
2488 
2489 	return (1);
2490 }
2491 
2492 /*
2493  * Retire the bad page that may contain the flushed error.
2494  */
2495 void
2496 cpu_page_retire(ch_async_flt_t *ch_flt)
2497 {
2498 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2499 	(void) page_retire(aflt->flt_addr, PR_UE);
2500 }
2501 
2502 /*
2503  * Return true if the error specified in the AFSR indicates
2504  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2505  * for Panther, none for Jalapeno/Serrano).
2506  */
2507 /* ARGSUSED */
2508 static int
2509 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2510 {
2511 #if defined(JALAPENO) || defined(SERRANO)
2512 	return (0);
2513 #elif defined(CHEETAH_PLUS)
2514 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2515 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2516 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2517 #else	/* CHEETAH_PLUS */
2518 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2519 #endif
2520 }
2521 
2522 /*
2523  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2524  * generic event post-processing for correctable and uncorrectable memory,
2525  * E$, and MTag errors.  Historically this entry point was used to log bits of
2526  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2527  * converted into an ereport.  In addition, it transmits the error to any
2528  * platform-specific service-processor FRU logging routines, if available.
2529  */
2530 void
2531 cpu_log_err(struct async_flt *aflt)
2532 {
2533 	char unum[UNUM_NAMLEN];
2534 	int len = 0;
2535 	int synd_status, synd_code, afar_status;
2536 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2537 
2538 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2539 		aflt->flt_status |= ECC_ECACHE;
2540 	else
2541 		aflt->flt_status &= ~ECC_ECACHE;
2542 	/*
2543 	 * Determine syndrome status.
2544 	 */
2545 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2546 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2547 
2548 	/*
2549 	 * Determine afar status.
2550 	 */
2551 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2552 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2553 				ch_flt->flt_bit);
2554 	else
2555 		afar_status = AFLT_STAT_INVALID;
2556 
2557 	/*
2558 	 * If afar status is not invalid do a unum lookup.
2559 	 */
2560 	if (afar_status != AFLT_STAT_INVALID) {
2561 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2562 			UNUM_NAMLEN, &len);
2563 	} else {
2564 		unum[0] = '\0';
2565 	}
2566 
2567 	synd_code = synd_to_synd_code(synd_status,
2568 	    aflt->flt_synd, ch_flt->flt_bit);
2569 
2570 	/*
2571 	 * Do not send the fruid message (plat_ecc_error_data_t)
2572 	 * to the SC if it can handle the enhanced error information
2573 	 * (plat_ecc_error2_data_t) or when the tunable
2574 	 * ecc_log_fruid_enable is set to 0.
2575 	 */
2576 
2577 	if (&plat_ecc_capability_sc_get &&
2578 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2579 		if (&plat_log_fruid_error)
2580 			plat_log_fruid_error(synd_code, aflt, unum,
2581 			    ch_flt->flt_bit);
2582 	}
2583 
2584 	if (aflt->flt_func != NULL)
2585 		aflt->flt_func(aflt, unum);
2586 
2587 	if (afar_status != AFLT_STAT_INVALID)
2588 		cpu_log_diag_info(ch_flt);
2589 
2590 	/*
2591 	 * If we have a CEEN error , we do not reenable CEEN until after
2592 	 * we exit the trap handler. Otherwise, another error may
2593 	 * occur causing the handler to be entered recursively.
2594 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2595 	 * to try and ensure that the CPU makes progress in the face
2596 	 * of a CE storm.
2597 	 */
2598 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2599 		(void) timeout(cpu_delayed_check_ce_errors,
2600 		    (void *)(uintptr_t)aflt->flt_inst,
2601 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2602 	}
2603 }
2604 
2605 /*
2606  * Invoked by error_init() early in startup and therefore before
2607  * startup_errorq() is called to drain any error Q -
2608  *
2609  * startup()
2610  *   startup_end()
2611  *     error_init()
2612  *       cpu_error_init()
2613  * errorq_init()
2614  *   errorq_drain()
2615  * start_other_cpus()
2616  *
2617  * The purpose of this routine is to create error-related taskqs.  Taskqs
2618  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2619  * context.
2620  */
2621 void
2622 cpu_error_init(int items)
2623 {
2624 	/*
2625 	 * Create taskq(s) to reenable CE
2626 	 */
2627 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2628 	    items, items, TASKQ_PREPOPULATE);
2629 }
2630 
2631 void
2632 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2633 {
2634 	char unum[UNUM_NAMLEN];
2635 	int len;
2636 
2637 	switch (aflt->flt_class) {
2638 	case CPU_FAULT:
2639 		cpu_ereport_init(aflt);
2640 		if (cpu_async_log_err(aflt, eqep))
2641 			cpu_ereport_post(aflt);
2642 		break;
2643 
2644 	case BUS_FAULT:
2645 		if (aflt->flt_func != NULL) {
2646 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2647 			    unum, UNUM_NAMLEN, &len);
2648 			aflt->flt_func(aflt, unum);
2649 		}
2650 		break;
2651 
2652 	case RECIRC_CPU_FAULT:
2653 		aflt->flt_class = CPU_FAULT;
2654 		cpu_log_err(aflt);
2655 		cpu_ereport_post(aflt);
2656 		break;
2657 
2658 	case RECIRC_BUS_FAULT:
2659 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2660 		/*FALLTHRU*/
2661 	default:
2662 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2663 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2664 		return;
2665 	}
2666 }
2667 
2668 /*
2669  * Scrub and classify a CE.  This function must not modify the
2670  * fault structure passed to it but instead should return the classification
2671  * information.
2672  */
2673 
2674 static uchar_t
2675 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2676 {
2677 	uchar_t disp = CE_XDIAG_EXTALG;
2678 	on_trap_data_t otd;
2679 	uint64_t orig_err;
2680 	ch_cpu_logout_t *clop;
2681 
2682 	/*
2683 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2684 	 * this, but our other callers have not.  Disable preemption to
2685 	 * avoid CPU migration so that we restore CEEN on the correct
2686 	 * cpu later.
2687 	 *
2688 	 * CEEN is cleared so that further CEs that our instruction and
2689 	 * data footprint induce do not cause use to either creep down
2690 	 * kernel stack to the point of overflow, or do so much CE
2691 	 * notification as to make little real forward progress.
2692 	 *
2693 	 * NCEEN must not be cleared.  However it is possible that
2694 	 * our accesses to the flt_addr may provoke a bus error or timeout
2695 	 * if the offending address has just been unconfigured as part of
2696 	 * a DR action.  So we must operate under on_trap protection.
2697 	 */
2698 	kpreempt_disable();
2699 	orig_err = get_error_enable();
2700 	if (orig_err & EN_REG_CEEN)
2701 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2702 
2703 	/*
2704 	 * Our classification algorithm includes the line state before
2705 	 * the scrub; we'd like this captured after the detection and
2706 	 * before the algorithm below - the earlier the better.
2707 	 *
2708 	 * If we've come from a cpu CE trap then this info already exists
2709 	 * in the cpu logout area.
2710 	 *
2711 	 * For a CE detected by memscrub for which there was no trap
2712 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2713 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2714 	 * marked the fault structure as incomplete as a flag to later
2715 	 * logging code.
2716 	 *
2717 	 * If called directly from an IO detected CE there has been
2718 	 * no line data capture.  In this case we logout to the cpu logout
2719 	 * area - that's appropriate since it's the cpu cache data we need
2720 	 * for classification.  We thus borrow the cpu logout area for a
2721 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2722 	 * this time (we will invalidate it again below).
2723 	 *
2724 	 * If called from the partner check xcall handler then this cpu
2725 	 * (the partner) has not necessarily experienced a CE at this
2726 	 * address.  But we want to capture line state before its scrub
2727 	 * attempt since we use that in our classification.
2728 	 */
2729 	if (logout_tried == B_FALSE) {
2730 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2731 			disp |= CE_XDIAG_NOLOGOUT;
2732 	}
2733 
2734 	/*
2735 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2736 	 * no longer be valid (if DR'd since the initial event) so we
2737 	 * perform this scrub under on_trap protection.  If this access is
2738 	 * ok then further accesses below will also be ok - DR cannot
2739 	 * proceed while this thread is active (preemption is disabled);
2740 	 * to be safe we'll nonetheless use on_trap again below.
2741 	 */
2742 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2743 		cpu_scrubphys(ecc);
2744 	} else {
2745 		no_trap();
2746 		if (orig_err & EN_REG_CEEN)
2747 		    set_error_enable(orig_err);
2748 		kpreempt_enable();
2749 		return (disp);
2750 	}
2751 	no_trap();
2752 
2753 	/*
2754 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2755 	 * Note that it's quite possible that the read sourced the data from
2756 	 * another cpu.
2757 	 */
2758 	if (clear_ecc(ecc))
2759 		disp |= CE_XDIAG_CE1;
2760 
2761 	/*
2762 	 * Read the data again.  This time the read is very likely to
2763 	 * come from memory since the scrub induced a writeback to memory.
2764 	 */
2765 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2766 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2767 	} else {
2768 		no_trap();
2769 		if (orig_err & EN_REG_CEEN)
2770 		    set_error_enable(orig_err);
2771 		kpreempt_enable();
2772 		return (disp);
2773 	}
2774 	no_trap();
2775 
2776 	/* Did that read induce a CE that matches the AFAR? */
2777 	if (clear_ecc(ecc))
2778 		disp |= CE_XDIAG_CE2;
2779 
2780 	/*
2781 	 * Look at the logout information and record whether we found the
2782 	 * line in l2/l3 cache.  For Panther we are interested in whether
2783 	 * we found it in either cache (it won't reside in both but
2784 	 * it is possible to read it that way given the moving target).
2785 	 */
2786 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2787 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2788 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2789 		int hit, level;
2790 		int state;
2791 		int totalsize;
2792 		ch_ec_data_t *ecp;
2793 
2794 		/*
2795 		 * If hit is nonzero then a match was found and hit will
2796 		 * be one greater than the index which hit.  For Panther we
2797 		 * also need to pay attention to level to see which of l2$ or
2798 		 * l3$ it hit in.
2799 		 */
2800 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2801 		    0, &level);
2802 
2803 		if (hit) {
2804 			--hit;
2805 			disp |= CE_XDIAG_AFARMATCH;
2806 
2807 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2808 				if (level == 2)
2809 					ecp = &clop->clo_data.chd_l2_data[hit];
2810 				else
2811 					ecp = &clop->clo_data.chd_ec_data[hit];
2812 			} else {
2813 				ASSERT(level == 2);
2814 				ecp = &clop->clo_data.chd_ec_data[hit];
2815 			}
2816 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2817 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2818 			    ecc->flt_addr, ecp->ec_tag);
2819 
2820 			/*
2821 			 * Cheetah variants use different state encodings -
2822 			 * the CH_ECSTATE_* defines vary depending on the
2823 			 * module we're compiled for.  Translate into our
2824 			 * one true version.  Conflate Owner-Shared state
2825 			 * of SSM mode with Owner as victimisation of such
2826 			 * lines may cause a writeback.
2827 			 */
2828 			switch (state) {
2829 			case CH_ECSTATE_MOD:
2830 				disp |= EC_STATE_M;
2831 				break;
2832 
2833 			case CH_ECSTATE_OWN:
2834 			case CH_ECSTATE_OWS:
2835 				disp |= EC_STATE_O;
2836 				break;
2837 
2838 			case CH_ECSTATE_EXL:
2839 				disp |= EC_STATE_E;
2840 				break;
2841 
2842 			case CH_ECSTATE_SHR:
2843 				disp |= EC_STATE_S;
2844 				break;
2845 
2846 			default:
2847 				disp |= EC_STATE_I;
2848 				break;
2849 			}
2850 		}
2851 
2852 		/*
2853 		 * If we initiated the delayed logout then we are responsible
2854 		 * for invalidating the logout area.
2855 		 */
2856 		if (logout_tried == B_FALSE) {
2857 			bzero(clop, sizeof (ch_cpu_logout_t));
2858 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2859 		}
2860 	}
2861 
2862 	/*
2863 	 * Re-enable CEEN if we turned it off.
2864 	 */
2865 	if (orig_err & EN_REG_CEEN)
2866 	    set_error_enable(orig_err);
2867 	kpreempt_enable();
2868 
2869 	return (disp);
2870 }
2871 
2872 /*
2873  * Scrub a correctable memory error and collect data for classification
2874  * of CE type.  This function is called in the detection path, ie tl0 handling
2875  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2876  */
2877 void
2878 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2879 {
2880 	/*
2881 	 * Cheetah CE classification does not set any bits in flt_status.
2882 	 * Instead we will record classification datapoints in flt_disp.
2883 	 */
2884 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2885 
2886 	/*
2887 	 * To check if the error detected by IO is persistent, sticky or
2888 	 * intermittent.  This is noticed by clear_ecc().
2889 	 */
2890 	if (ecc->flt_status & ECC_IOBUS)
2891 		ecc->flt_stat = C_AFSR_MEMORY;
2892 
2893 	/*
2894 	 * Record information from this first part of the algorithm in
2895 	 * flt_disp.
2896 	 */
2897 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2898 }
2899 
2900 /*
2901  * Select a partner to perform a further CE classification check from.
2902  * Must be called with kernel preemption disabled (to stop the cpu list
2903  * from changing).  The detecting cpu we are partnering has cpuid
2904  * aflt->flt_inst; we might not be running on the detecting cpu.
2905  *
2906  * Restrict choice to active cpus in the same cpu partition as ourselves in
2907  * an effort to stop bad cpus in one partition causing other partitions to
2908  * perform excessive diagnostic activity.  Actually since the errorq drain
2909  * is run from a softint most of the time and that is a global mechanism
2910  * this isolation is only partial.  Return NULL if we fail to find a
2911  * suitable partner.
2912  *
2913  * We prefer a partner that is in a different latency group to ourselves as
2914  * we will share fewer datapaths.  If such a partner is unavailable then
2915  * choose one in the same lgroup but prefer a different chip and only allow
2916  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2917  * flags includes PTNR_SELFOK then permit selection of the original detector.
2918  *
2919  * We keep a cache of the last partner selected for a cpu, and we'll try to
2920  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2921  * have passed since that selection was made.  This provides the benefit
2922  * of the point-of-view of different partners over time but without
2923  * requiring frequent cpu list traversals.
2924  */
2925 
2926 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2927 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2928 
2929 static cpu_t *
2930 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2931 {
2932 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2933 	hrtime_t lasttime, thistime;
2934 
2935 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2936 
2937 	dtcr = cpu[aflt->flt_inst];
2938 
2939 	/*
2940 	 * Short-circuit for the following cases:
2941 	 *	. the dtcr is not flagged active
2942 	 *	. there is just one cpu present
2943 	 *	. the detector has disappeared
2944 	 *	. we were given a bad flt_inst cpuid; this should not happen
2945 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2946 	 *	  reason to panic.
2947 	 *	. there is just one cpu left online in the cpu partition
2948 	 *
2949 	 * If we return NULL after this point then we do not update the
2950 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2951 	 * again next time; this is the case where the only other cpu online
2952 	 * in the detector's partition is on the same chip as the detector
2953 	 * and since CEEN re-enable is throttled even that case should not
2954 	 * hurt performance.
2955 	 */
2956 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2957 		return (NULL);
2958 	}
2959 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2960 		if (flags & PTNR_SELFOK) {
2961 			*typep = CE_XDIAG_PTNR_SELF;
2962 			return (dtcr);
2963 		} else {
2964 			return (NULL);
2965 		}
2966 	}
2967 
2968 	thistime = gethrtime();
2969 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2970 
2971 	/*
2972 	 * Select a starting point.
2973 	 */
2974 	if (!lasttime) {
2975 		/*
2976 		 * We've never selected a partner for this detector before.
2977 		 * Start the scan at the next online cpu in the same cpu
2978 		 * partition.
2979 		 */
2980 		sp = dtcr->cpu_next_part;
2981 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2982 		/*
2983 		 * Our last selection has not aged yet.  If this partner:
2984 		 *	. is still a valid cpu,
2985 		 *	. is still in the same partition as the detector
2986 		 *	. is still marked active
2987 		 *	. satisfies the 'flags' argument criteria
2988 		 * then select it again without updating the timestamp.
2989 		 */
2990 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2991 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2992 		    !cpu_flagged_active(sp->cpu_flags) ||
2993 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2994 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2995 		    !(flags & PTNR_SIBLINGOK))) {
2996 			sp = dtcr->cpu_next_part;
2997 		} else {
2998 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2999 				*typep = CE_XDIAG_PTNR_REMOTE;
3000 			} else if (sp == dtcr) {
3001 				*typep = CE_XDIAG_PTNR_SELF;
3002 			} else if (sp->cpu_chip->chip_id ==
3003 			    dtcr->cpu_chip->chip_id) {
3004 				*typep = CE_XDIAG_PTNR_SIBLING;
3005 			} else {
3006 				*typep = CE_XDIAG_PTNR_LOCAL;
3007 			}
3008 			return (sp);
3009 		}
3010 	} else {
3011 		/*
3012 		 * Our last selection has aged.  If it is nonetheless still a
3013 		 * valid cpu then start the scan at the next cpu in the
3014 		 * partition after our last partner.  If the last selection
3015 		 * is no longer a valid cpu then go with our default.  In
3016 		 * this way we slowly cycle through possible partners to
3017 		 * obtain multiple viewpoints over time.
3018 		 */
3019 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3020 		if (sp == NULL) {
3021 			sp = dtcr->cpu_next_part;
3022 		} else {
3023 			sp = sp->cpu_next_part;		/* may be dtcr */
3024 			if (sp->cpu_part != dtcr->cpu_part)
3025 				sp = dtcr;
3026 		}
3027 	}
3028 
3029 	/*
3030 	 * We have a proposed starting point for our search, but if this
3031 	 * cpu is offline then its cpu_next_part will point to itself
3032 	 * so we can't use that to iterate over cpus in this partition in
3033 	 * the loop below.  We still want to avoid iterating over cpus not
3034 	 * in our partition, so in the case that our starting point is offline
3035 	 * we will repoint it to be the detector itself;  and if the detector
3036 	 * happens to be offline we'll return NULL from the following loop.
3037 	 */
3038 	if (!cpu_flagged_active(sp->cpu_flags)) {
3039 		sp = dtcr;
3040 	}
3041 
3042 	ptnr = sp;
3043 	locptnr = NULL;
3044 	sibptnr = NULL;
3045 	do {
3046 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3047 			continue;
3048 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3049 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3050 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3051 			*typep = CE_XDIAG_PTNR_REMOTE;
3052 			return (ptnr);
3053 		}
3054 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
3055 			if (sibptnr == NULL)
3056 				sibptnr = ptnr;
3057 			continue;
3058 		}
3059 		if (locptnr == NULL)
3060 			locptnr = ptnr;
3061 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3062 
3063 	/*
3064 	 * A foreign partner has already been returned if one was available.
3065 	 *
3066 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3067 	 * detector, is active, and is not a sibling of the detector.
3068 	 *
3069 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3070 	 * active.
3071 	 *
3072 	 * If we have to resort to using the detector itself we have already
3073 	 * checked that it is active.
3074 	 */
3075 	if (locptnr) {
3076 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3077 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3078 		*typep = CE_XDIAG_PTNR_LOCAL;
3079 		return (locptnr);
3080 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3081 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3082 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3083 		*typep = CE_XDIAG_PTNR_SIBLING;
3084 		return (sibptnr);
3085 	} else if (flags & PTNR_SELFOK) {
3086 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3087 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3088 		*typep = CE_XDIAG_PTNR_SELF;
3089 		return (dtcr);
3090 	}
3091 
3092 	return (NULL);
3093 }
3094 
3095 /*
3096  * Cross call handler that is requested to run on the designated partner of
3097  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3098  */
3099 static void
3100 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3101 {
3102 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3103 }
3104 
3105 /*
3106  * The associated errorqs are never destroyed so we do not need to deal with
3107  * them disappearing before this timeout fires.  If the affected memory
3108  * has been DR'd out since the original event the scrub algrithm will catch
3109  * any errors and return null disposition info.  If the original detecting
3110  * cpu has been DR'd out then ereport detector info will not be able to
3111  * lookup CPU type;  with a small timeout this is unlikely.
3112  */
3113 static void
3114 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3115 {
3116 	struct async_flt *aflt = cbarg->lkycb_aflt;
3117 	uchar_t disp;
3118 	cpu_t *cp;
3119 	int ptnrtype;
3120 
3121 	kpreempt_disable();
3122 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3123 	    &ptnrtype)) {
3124 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3125 		    (uint64_t)&disp);
3126 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3127 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3128 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3129 	} else {
3130 		ce_xdiag_lkydrops++;
3131 		if (ncpus > 1)
3132 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3133 			    CE_XDIAG_SKIP_NOPTNR);
3134 	}
3135 	kpreempt_enable();
3136 
3137 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3138 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3139 }
3140 
3141 /*
3142  * Called from errorq drain code when processing a CE error, both from
3143  * CPU and PCI drain functions.  Decide what further classification actions,
3144  * if any, we will perform.  Perform immediate actions now, and schedule
3145  * delayed actions as required.  Note that we are no longer necessarily running
3146  * on the detecting cpu, and that the async_flt structure will not persist on
3147  * return from this function.
3148  *
3149  * Calls to this function should aim to be self-throtlling in some way.  With
3150  * the delayed re-enable of CEEN the absolute rate of calls should not
3151  * be excessive.  Callers should also avoid performing in-depth classification
3152  * for events in pages that are already known to be suspect.
3153  *
3154  * We return nonzero to indicate that the event has been copied and
3155  * recirculated for further testing.  The caller should not log the event
3156  * in this case - it will be logged when further test results are available.
3157  *
3158  * Our possible contexts are that of errorq_drain: below lock level or from
3159  * panic context.  We can assume that the cpu we are running on is online.
3160  */
3161 
3162 
3163 #ifdef DEBUG
3164 static int ce_xdiag_forceaction;
3165 #endif
3166 
3167 int
3168 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3169     errorq_elem_t *eqep, size_t afltoffset)
3170 {
3171 	ce_dispact_t dispact, action;
3172 	cpu_t *cp;
3173 	uchar_t dtcrinfo, disp;
3174 	int ptnrtype;
3175 
3176 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3177 		ce_xdiag_drops++;
3178 		return (0);
3179 	} else if (!aflt->flt_in_memory) {
3180 		ce_xdiag_drops++;
3181 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3182 		return (0);
3183 	}
3184 
3185 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3186 
3187 	/*
3188 	 * Some correctable events are not scrubbed/classified, such as those
3189 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3190 	 * initial detector classification go no further.
3191 	 */
3192 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3193 		ce_xdiag_drops++;
3194 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3195 		return (0);
3196 	}
3197 
3198 	dispact = CE_DISPACT(ce_disp_table,
3199 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3200 	    CE_XDIAG_STATE(dtcrinfo),
3201 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3202 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3203 
3204 
3205 	action = CE_ACT(dispact);	/* bad lookup caught below */
3206 #ifdef DEBUG
3207 	if (ce_xdiag_forceaction != 0)
3208 		action = ce_xdiag_forceaction;
3209 #endif
3210 
3211 	switch (action) {
3212 	case CE_ACT_LKYCHK: {
3213 		caddr_t ndata;
3214 		errorq_elem_t *neqep;
3215 		struct async_flt *ecc;
3216 		ce_lkychk_cb_t *cbargp;
3217 
3218 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3219 			ce_xdiag_lkydrops++;
3220 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3221 			    CE_XDIAG_SKIP_DUPFAIL);
3222 			break;
3223 		}
3224 		ecc = (struct async_flt *)(ndata + afltoffset);
3225 
3226 		ASSERT(ecc->flt_class == CPU_FAULT ||
3227 		    ecc->flt_class == BUS_FAULT);
3228 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3229 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3230 
3231 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3232 		cbargp->lkycb_aflt = ecc;
3233 		cbargp->lkycb_eqp = eqp;
3234 		cbargp->lkycb_eqep = neqep;
3235 
3236 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3237 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3238 		return (1);
3239 	}
3240 
3241 	case CE_ACT_PTNRCHK:
3242 		kpreempt_disable();	/* stop cpu list changing */
3243 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3244 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3245 			    (uint64_t)aflt, (uint64_t)&disp);
3246 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3247 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3248 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3249 		} else if (ncpus > 1) {
3250 			ce_xdiag_ptnrdrops++;
3251 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3252 			    CE_XDIAG_SKIP_NOPTNR);
3253 		} else {
3254 			ce_xdiag_ptnrdrops++;
3255 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3256 			    CE_XDIAG_SKIP_UNIPROC);
3257 		}
3258 		kpreempt_enable();
3259 		break;
3260 
3261 	case CE_ACT_DONE:
3262 		break;
3263 
3264 	case CE_ACT(CE_DISP_BAD):
3265 	default:
3266 #ifdef DEBUG
3267 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3268 #endif
3269 		ce_xdiag_bad++;
3270 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3271 		break;
3272 	}
3273 
3274 	return (0);
3275 }
3276 
3277 /*
3278  * We route all errors through a single switch statement.
3279  */
3280 void
3281 cpu_ue_log_err(struct async_flt *aflt)
3282 {
3283 	switch (aflt->flt_class) {
3284 	case CPU_FAULT:
3285 		cpu_ereport_init(aflt);
3286 		if (cpu_async_log_err(aflt, NULL))
3287 			cpu_ereport_post(aflt);
3288 		break;
3289 
3290 	case BUS_FAULT:
3291 		bus_async_log_err(aflt);
3292 		break;
3293 
3294 	default:
3295 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3296 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3297 		return;
3298 	}
3299 }
3300 
3301 /*
3302  * Routine for panic hook callback from panic_idle().
3303  */
3304 void
3305 cpu_async_panic_callb(void)
3306 {
3307 	ch_async_flt_t ch_flt;
3308 	struct async_flt *aflt;
3309 	ch_cpu_errors_t cpu_error_regs;
3310 	uint64_t afsr_errs;
3311 
3312 	get_cpu_error_state(&cpu_error_regs);
3313 
3314 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3315 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3316 
3317 	if (afsr_errs) {
3318 
3319 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3320 		aflt = (struct async_flt *)&ch_flt;
3321 		aflt->flt_id = gethrtime_waitfree();
3322 		aflt->flt_bus_id = getprocessorid();
3323 		aflt->flt_inst = CPU->cpu_id;
3324 		aflt->flt_stat = cpu_error_regs.afsr;
3325 		aflt->flt_addr = cpu_error_regs.afar;
3326 		aflt->flt_prot = AFLT_PROT_NONE;
3327 		aflt->flt_class = CPU_FAULT;
3328 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3329 		aflt->flt_panic = 1;
3330 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3331 		ch_flt.afsr_errs = afsr_errs;
3332 #if defined(SERRANO)
3333 		ch_flt.afar2 = cpu_error_regs.afar2;
3334 #endif	/* SERRANO */
3335 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3336 	}
3337 }
3338 
3339 /*
3340  * Routine to convert a syndrome into a syndrome code.
3341  */
3342 static int
3343 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3344 {
3345 	if (synd_status == AFLT_STAT_INVALID)
3346 		return (-1);
3347 
3348 	/*
3349 	 * Use the syndrome to index the appropriate syndrome table,
3350 	 * to get the code indicating which bit(s) is(are) bad.
3351 	 */
3352 	if (afsr_bit &
3353 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3354 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3355 #if defined(JALAPENO) || defined(SERRANO)
3356 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3357 				return (-1);
3358 			else
3359 				return (BPAR0 + synd);
3360 #else /* JALAPENO || SERRANO */
3361 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3362 				return (-1);
3363 			else
3364 				return (mtag_syndrome_tab[synd]);
3365 #endif /* JALAPENO || SERRANO */
3366 		} else {
3367 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3368 				return (-1);
3369 			else
3370 				return (ecc_syndrome_tab[synd]);
3371 		}
3372 	} else {
3373 		return (-1);
3374 	}
3375 }
3376 
3377 int
3378 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3379 {
3380 	if (&plat_get_mem_sid)
3381 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3382 	else
3383 		return (ENOTSUP);
3384 }
3385 
3386 int
3387 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3388 {
3389 	if (&plat_get_mem_offset)
3390 		return (plat_get_mem_offset(flt_addr, offp));
3391 	else
3392 		return (ENOTSUP);
3393 }
3394 
3395 int
3396 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3397 {
3398 	if (&plat_get_mem_addr)
3399 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3400 	else
3401 		return (ENOTSUP);
3402 }
3403 
3404 /*
3405  * Routine to return a string identifying the physical name
3406  * associated with a memory/cache error.
3407  */
3408 int
3409 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3410     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3411     ushort_t flt_status, char *buf, int buflen, int *lenp)
3412 {
3413 	int synd_code;
3414 	int ret;
3415 
3416 	/*
3417 	 * An AFSR of -1 defaults to a memory syndrome.
3418 	 */
3419 	if (flt_stat == (uint64_t)-1)
3420 		flt_stat = C_AFSR_CE;
3421 
3422 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3423 
3424 	/*
3425 	 * Syndrome code must be either a single-bit error code
3426 	 * (0...143) or -1 for unum lookup.
3427 	 */
3428 	if (synd_code < 0 || synd_code >= M2)
3429 		synd_code = -1;
3430 	if (&plat_get_mem_unum) {
3431 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3432 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3433 			buf[0] = '\0';
3434 			*lenp = 0;
3435 		}
3436 
3437 		return (ret);
3438 	}
3439 
3440 	return (ENOTSUP);
3441 }
3442 
3443 /*
3444  * Wrapper for cpu_get_mem_unum() routine that takes an
3445  * async_flt struct rather than explicit arguments.
3446  */
3447 int
3448 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3449     char *buf, int buflen, int *lenp)
3450 {
3451 	/*
3452 	 * If we come thru here for an IO bus error aflt->flt_stat will
3453 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3454 	 * so it will interpret this as a memory error.
3455 	 */
3456 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3457 	    (aflt->flt_class == BUS_FAULT) ?
3458 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3459 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3460 	    aflt->flt_status, buf, buflen, lenp));
3461 }
3462 
3463 /*
3464  * This routine is a more generic interface to cpu_get_mem_unum()
3465  * that may be used by other modules (e.g. the 'mm' driver, through
3466  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3467  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3468  */
3469 int
3470 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3471     char *buf, int buflen, int *lenp)
3472 {
3473 	int synd_status, flt_in_memory, ret;
3474 	ushort_t flt_status = 0;
3475 	char unum[UNUM_NAMLEN];
3476 	uint64_t t_afsr_errs;
3477 
3478 	/*
3479 	 * Check for an invalid address.
3480 	 */
3481 	if (afar == (uint64_t)-1)
3482 		return (ENXIO);
3483 
3484 	if (synd == (uint64_t)-1)
3485 		synd_status = AFLT_STAT_INVALID;
3486 	else
3487 		synd_status = AFLT_STAT_VALID;
3488 
3489 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3490 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3491 
3492 	/*
3493 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3494 	 */
3495 	if (*afsr == (uint64_t)-1)
3496 		t_afsr_errs = C_AFSR_CE;
3497 	else {
3498 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3499 #if defined(CHEETAH_PLUS)
3500 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3501 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3502 #endif	/* CHEETAH_PLUS */
3503 	}
3504 
3505 	/*
3506 	 * Turn on ECC_ECACHE if error type is E$ Data.
3507 	 */
3508 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3509 		flt_status |= ECC_ECACHE;
3510 
3511 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3512 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3513 	if (ret != 0)
3514 		return (ret);
3515 
3516 	if (*lenp >= buflen)
3517 		return (ENAMETOOLONG);
3518 
3519 	(void) strncpy(buf, unum, buflen);
3520 
3521 	return (0);
3522 }
3523 
3524 /*
3525  * Routine to return memory information associated
3526  * with a physical address and syndrome.
3527  */
3528 int
3529 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3530     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3531     int *segsp, int *banksp, int *mcidp)
3532 {
3533 	int synd_status, synd_code;
3534 
3535 	if (afar == (uint64_t)-1)
3536 		return (ENXIO);
3537 
3538 	if (synd == (uint64_t)-1)
3539 		synd_status = AFLT_STAT_INVALID;
3540 	else
3541 		synd_status = AFLT_STAT_VALID;
3542 
3543 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3544 
3545 	if (p2get_mem_info != NULL)
3546 		return ((p2get_mem_info)(synd_code, afar,
3547 			mem_sizep, seg_sizep, bank_sizep,
3548 			segsp, banksp, mcidp));
3549 	else
3550 		return (ENOTSUP);
3551 }
3552 
3553 /*
3554  * Routine to return a string identifying the physical
3555  * name associated with a cpuid.
3556  */
3557 int
3558 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3559 {
3560 	int ret;
3561 	char unum[UNUM_NAMLEN];
3562 
3563 	if (&plat_get_cpu_unum) {
3564 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3565 		    != 0)
3566 			return (ret);
3567 	} else {
3568 		return (ENOTSUP);
3569 	}
3570 
3571 	if (*lenp >= buflen)
3572 		return (ENAMETOOLONG);
3573 
3574 	(void) strncpy(buf, unum, buflen);
3575 
3576 	return (0);
3577 }
3578 
3579 /*
3580  * This routine exports the name buffer size.
3581  */
3582 size_t
3583 cpu_get_name_bufsize()
3584 {
3585 	return (UNUM_NAMLEN);
3586 }
3587 
3588 /*
3589  * Historical function, apparantly not used.
3590  */
3591 /* ARGSUSED */
3592 void
3593 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3594 {}
3595 
3596 /*
3597  * Historical function only called for SBus errors in debugging.
3598  */
3599 /*ARGSUSED*/
3600 void
3601 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3602 {}
3603 
3604 /*
3605  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3606  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3607  * an async fault structure argument is passed in, the captured error state
3608  * (AFSR, AFAR) info will be returned in the structure.
3609  */
3610 int
3611 clear_errors(ch_async_flt_t *ch_flt)
3612 {
3613 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3614 	ch_cpu_errors_t	cpu_error_regs;
3615 
3616 	get_cpu_error_state(&cpu_error_regs);
3617 
3618 	if (ch_flt != NULL) {
3619 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3620 		aflt->flt_addr = cpu_error_regs.afar;
3621 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3622 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3623 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3624 #if defined(SERRANO)
3625 		ch_flt->afar2 = cpu_error_regs.afar2;
3626 #endif	/* SERRANO */
3627 	}
3628 
3629 	set_cpu_error_state(&cpu_error_regs);
3630 
3631 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3632 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3633 }
3634 
3635 /*
3636  * Clear any AFSR error bits, and check for persistence.
3637  *
3638  * It would be desirable to also insist that syndrome match.  PCI handling
3639  * has already filled flt_synd.  For errors trapped by CPU we only fill
3640  * flt_synd when we queue the event, so we do not have a valid flt_synd
3641  * during initial classification (it is valid if we're called as part of
3642  * subsequent low-pil additional classification attempts).  We could try
3643  * to determine which syndrome to use: we know we're only called for
3644  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3645  * would be esynd/none and esynd/msynd, respectively.  If that is
3646  * implemented then what do we do in the case that we do experience an
3647  * error on the same afar but with different syndrome?  At the very least
3648  * we should count such occurences.  Anyway, for now, we'll leave it as
3649  * it has been for ages.
3650  */
3651 static int
3652 clear_ecc(struct async_flt *aflt)
3653 {
3654 	ch_cpu_errors_t	cpu_error_regs;
3655 
3656 	/*
3657 	 * Snapshot the AFSR and AFAR and clear any errors
3658 	 */
3659 	get_cpu_error_state(&cpu_error_regs);
3660 	set_cpu_error_state(&cpu_error_regs);
3661 
3662 	/*
3663 	 * If any of the same memory access error bits are still on and
3664 	 * the AFAR matches, return that the error is persistent.
3665 	 */
3666 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3667 	    cpu_error_regs.afar == aflt->flt_addr);
3668 }
3669 
3670 /*
3671  * Turn off all cpu error detection, normally only used for panics.
3672  */
3673 void
3674 cpu_disable_errors(void)
3675 {
3676 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3677 
3678 	/*
3679 	 * With error detection now turned off, check the other cpus
3680 	 * logout areas for any unlogged errors.
3681 	 */
3682 	if (enable_check_other_cpus_logout) {
3683 		cpu_check_other_cpus_logout();
3684 		/*
3685 		 * Make a second pass over the logout areas, in case
3686 		 * there is a failing CPU in an error-trap loop which
3687 		 * will write to the logout area once it is emptied.
3688 		 */
3689 		cpu_check_other_cpus_logout();
3690 	}
3691 }
3692 
3693 /*
3694  * Enable errors.
3695  */
3696 void
3697 cpu_enable_errors(void)
3698 {
3699 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3700 }
3701 
3702 /*
3703  * Flush the entire ecache using displacement flush by reading through a
3704  * physical address range twice as large as the Ecache.
3705  */
3706 void
3707 cpu_flush_ecache(void)
3708 {
3709 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3710 	    cpunodes[CPU->cpu_id].ecache_linesize);
3711 }
3712 
3713 /*
3714  * Return CPU E$ set size - E$ size divided by the associativity.
3715  * We use this function in places where the CPU_PRIVATE ptr may not be
3716  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3717  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3718  * up before the kernel switches from OBP's to the kernel's trap table, so
3719  * we don't have to worry about cpunodes being unitialized.
3720  */
3721 int
3722 cpu_ecache_set_size(struct cpu *cp)
3723 {
3724 	if (CPU_PRIVATE(cp))
3725 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3726 
3727 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3728 }
3729 
3730 /*
3731  * Flush Ecache line.
3732  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3733  * Uses normal displacement flush for Cheetah.
3734  */
3735 static void
3736 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3737 {
3738 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3739 	int ec_set_size = cpu_ecache_set_size(CPU);
3740 
3741 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3742 }
3743 
3744 /*
3745  * Scrub physical address.
3746  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3747  * Ecache or direct-mapped Ecache.
3748  */
3749 static void
3750 cpu_scrubphys(struct async_flt *aflt)
3751 {
3752 	int ec_set_size = cpu_ecache_set_size(CPU);
3753 
3754 	scrubphys(aflt->flt_addr, ec_set_size);
3755 }
3756 
3757 /*
3758  * Clear physical address.
3759  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3760  * Ecache or direct-mapped Ecache.
3761  */
3762 void
3763 cpu_clearphys(struct async_flt *aflt)
3764 {
3765 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3766 	int ec_set_size = cpu_ecache_set_size(CPU);
3767 
3768 
3769 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3770 }
3771 
3772 #if defined(CPU_IMP_ECACHE_ASSOC)
3773 /*
3774  * Check for a matching valid line in all the sets.
3775  * If found, return set# + 1. Otherwise return 0.
3776  */
3777 static int
3778 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3779 {
3780 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3781 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3782 	int ec_set_size = cpu_ecache_set_size(CPU);
3783 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3784 	int nway = cpu_ecache_nway();
3785 	int i;
3786 
3787 	for (i = 0; i < nway; i++, ecp++) {
3788 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3789 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3790 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3791 			return (i+1);
3792 	}
3793 	return (0);
3794 }
3795 #endif /* CPU_IMP_ECACHE_ASSOC */
3796 
3797 /*
3798  * Check whether a line in the given logout info matches the specified
3799  * fault address.  If reqval is set then the line must not be Invalid.
3800  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3801  * set to 2 for l2$ or 3 for l3$.
3802  */
3803 static int
3804 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3805 {
3806 	ch_diag_data_t *cdp = data;
3807 	ch_ec_data_t *ecp;
3808 	int totalsize, ec_set_size;
3809 	int i, ways;
3810 	int match = 0;
3811 	int tagvalid;
3812 	uint64_t addr, tagpa;
3813 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3814 
3815 	/*
3816 	 * Check the l2$ logout data
3817 	 */
3818 	if (ispanther) {
3819 		ecp = &cdp->chd_l2_data[0];
3820 		ec_set_size = PN_L2_SET_SIZE;
3821 		ways = PN_L2_NWAYS;
3822 	} else {
3823 		ecp = &cdp->chd_ec_data[0];
3824 		ec_set_size = cpu_ecache_set_size(CPU);
3825 		ways = cpu_ecache_nway();
3826 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3827 	}
3828 	/* remove low order PA bits from fault address not used in PA tag */
3829 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3830 	for (i = 0; i < ways; i++, ecp++) {
3831 		if (ispanther) {
3832 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3833 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3834 		} else {
3835 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3836 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3837 			    ecp->ec_tag);
3838 		}
3839 		if (tagpa == addr && (!reqval || tagvalid)) {
3840 			match = i + 1;
3841 			*level = 2;
3842 			break;
3843 		}
3844 	}
3845 
3846 	if (match || !ispanther)
3847 		return (match);
3848 
3849 	/* For Panther we also check the l3$ */
3850 	ecp = &cdp->chd_ec_data[0];
3851 	ec_set_size = PN_L3_SET_SIZE;
3852 	ways = PN_L3_NWAYS;
3853 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3854 
3855 	for (i = 0; i < ways; i++, ecp++) {
3856 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3857 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3858 			match = i + 1;
3859 			*level = 3;
3860 			break;
3861 		}
3862 	}
3863 
3864 	return (match);
3865 }
3866 
3867 #if defined(CPU_IMP_L1_CACHE_PARITY)
3868 /*
3869  * Record information related to the source of an Dcache Parity Error.
3870  */
3871 static void
3872 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3873 {
3874 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3875 	int index;
3876 
3877 	/*
3878 	 * Since instruction decode cannot be done at high PIL
3879 	 * just examine the entire Dcache to locate the error.
3880 	 */
3881 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3882 		ch_flt->parity_data.dpe.cpl_way = -1;
3883 		ch_flt->parity_data.dpe.cpl_off = -1;
3884 	}
3885 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3886 		cpu_dcache_parity_check(ch_flt, index);
3887 }
3888 
3889 /*
3890  * Check all ways of the Dcache at a specified index for good parity.
3891  */
3892 static void
3893 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3894 {
3895 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3896 	uint64_t parity_bits, pbits, data_word;
3897 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3898 	int way, word, data_byte;
3899 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3900 	ch_dc_data_t tmp_dcp;
3901 
3902 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3903 		/*
3904 		 * Perform diagnostic read.
3905 		 */
3906 		get_dcache_dtag(index + way * dc_set_size,
3907 				(uint64_t *)&tmp_dcp);
3908 
3909 		/*
3910 		 * Check tag for even parity.
3911 		 * Sum of 1 bits (including parity bit) should be even.
3912 		 */
3913 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3914 			/*
3915 			 * If this is the first error log detailed information
3916 			 * about it and check the snoop tag. Otherwise just
3917 			 * record the fact that we found another error.
3918 			 */
3919 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3920 				ch_flt->parity_data.dpe.cpl_way = way;
3921 				ch_flt->parity_data.dpe.cpl_cache =
3922 				    CPU_DC_PARITY;
3923 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3924 
3925 				if (popc64(tmp_dcp.dc_sntag &
3926 						CHP_DCSNTAG_PARMASK) & 1) {
3927 					ch_flt->parity_data.dpe.cpl_tag |=
3928 								CHP_DC_SNTAG;
3929 					ch_flt->parity_data.dpe.cpl_lcnt++;
3930 				}
3931 
3932 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3933 			}
3934 
3935 			ch_flt->parity_data.dpe.cpl_lcnt++;
3936 		}
3937 
3938 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3939 			/*
3940 			 * Panther has more parity bits than the other
3941 			 * processors for covering dcache data and so each
3942 			 * byte of data in each word has its own parity bit.
3943 			 */
3944 			parity_bits = tmp_dcp.dc_pn_data_parity;
3945 			for (word = 0; word < 4; word++) {
3946 				data_word = tmp_dcp.dc_data[word];
3947 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3948 				for (data_byte = 0; data_byte < 8;
3949 				    data_byte++) {
3950 					if (((popc64(data_word &
3951 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3952 					    (pbits & 1)) {
3953 						cpu_record_dc_data_parity(
3954 						ch_flt, dcp, &tmp_dcp, way,
3955 						word);
3956 					}
3957 					pbits >>= 1;
3958 					data_word >>= 8;
3959 				}
3960 				parity_bits >>= 8;
3961 			}
3962 		} else {
3963 			/*
3964 			 * Check data array for even parity.
3965 			 * The 8 parity bits are grouped into 4 pairs each
3966 			 * of which covers a 64-bit word.  The endianness is
3967 			 * reversed -- the low-order parity bits cover the
3968 			 * high-order data words.
3969 			 */
3970 			parity_bits = tmp_dcp.dc_utag >> 8;
3971 			for (word = 0; word < 4; word++) {
3972 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3973 				if ((popc64(tmp_dcp.dc_data[word]) +
3974 				    parity_bits_popc[pbits]) & 1) {
3975 					cpu_record_dc_data_parity(ch_flt, dcp,
3976 					    &tmp_dcp, way, word);
3977 				}
3978 			}
3979 		}
3980 	}
3981 }
3982 
3983 static void
3984 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3985     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3986 {
3987 	/*
3988 	 * If this is the first error log detailed information about it.
3989 	 * Otherwise just record the fact that we found another error.
3990 	 */
3991 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3992 		ch_flt->parity_data.dpe.cpl_way = way;
3993 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3994 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3995 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3996 	}
3997 	ch_flt->parity_data.dpe.cpl_lcnt++;
3998 }
3999 
4000 /*
4001  * Record information related to the source of an Icache Parity Error.
4002  *
4003  * Called with the Icache disabled so any diagnostic accesses are safe.
4004  */
4005 static void
4006 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4007 {
4008 	int	ic_set_size;
4009 	int	ic_linesize;
4010 	int	index;
4011 
4012 	if (CPU_PRIVATE(CPU)) {
4013 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4014 		    CH_ICACHE_NWAY;
4015 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4016 	} else {
4017 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4018 		ic_linesize = icache_linesize;
4019 	}
4020 
4021 	ch_flt->parity_data.ipe.cpl_way = -1;
4022 	ch_flt->parity_data.ipe.cpl_off = -1;
4023 
4024 	for (index = 0; index < ic_set_size; index += ic_linesize)
4025 		cpu_icache_parity_check(ch_flt, index);
4026 }
4027 
4028 /*
4029  * Check all ways of the Icache at a specified index for good parity.
4030  */
4031 static void
4032 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4033 {
4034 	uint64_t parmask, pn_inst_parity;
4035 	int ic_set_size;
4036 	int ic_linesize;
4037 	int flt_index, way, instr, num_instr;
4038 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4039 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4040 	ch_ic_data_t tmp_icp;
4041 
4042 	if (CPU_PRIVATE(CPU)) {
4043 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4044 		    CH_ICACHE_NWAY;
4045 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4046 	} else {
4047 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4048 		ic_linesize = icache_linesize;
4049 	}
4050 
4051 	/*
4052 	 * Panther has twice as many instructions per icache line and the
4053 	 * instruction parity bit is in a different location.
4054 	 */
4055 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4056 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4057 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4058 	} else {
4059 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4060 		pn_inst_parity = 0;
4061 	}
4062 
4063 	/*
4064 	 * Index at which we expect to find the parity error.
4065 	 */
4066 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4067 
4068 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4069 		/*
4070 		 * Diagnostic reads expect address argument in ASI format.
4071 		 */
4072 		get_icache_dtag(2 * (index + way * ic_set_size),
4073 				(uint64_t *)&tmp_icp);
4074 
4075 		/*
4076 		 * If this is the index in which we expect to find the
4077 		 * error log detailed information about each of the ways.
4078 		 * This information will be displayed later if we can't
4079 		 * determine the exact way in which the error is located.
4080 		 */
4081 		if (flt_index == index)
4082 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4083 
4084 		/*
4085 		 * Check tag for even parity.
4086 		 * Sum of 1 bits (including parity bit) should be even.
4087 		 */
4088 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4089 			/*
4090 			 * If this way is the one in which we expected
4091 			 * to find the error record the way and check the
4092 			 * snoop tag. Otherwise just record the fact we
4093 			 * found another error.
4094 			 */
4095 			if (flt_index == index) {
4096 				ch_flt->parity_data.ipe.cpl_way = way;
4097 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4098 
4099 				if (popc64(tmp_icp.ic_sntag &
4100 						CHP_ICSNTAG_PARMASK) & 1) {
4101 					ch_flt->parity_data.ipe.cpl_tag |=
4102 								CHP_IC_SNTAG;
4103 					ch_flt->parity_data.ipe.cpl_lcnt++;
4104 				}
4105 
4106 			}
4107 			ch_flt->parity_data.ipe.cpl_lcnt++;
4108 			continue;
4109 		}
4110 
4111 		/*
4112 		 * Check instruction data for even parity.
4113 		 * Bits participating in parity differ for PC-relative
4114 		 * versus non-PC-relative instructions.
4115 		 */
4116 		for (instr = 0; instr < num_instr; instr++) {
4117 			parmask = (tmp_icp.ic_data[instr] &
4118 					CH_ICDATA_PRED_ISPCREL) ?
4119 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4120 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4121 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4122 				/*
4123 				 * If this way is the one in which we expected
4124 				 * to find the error record the way and offset.
4125 				 * Otherwise just log the fact we found another
4126 				 * error.
4127 				 */
4128 				if (flt_index == index) {
4129 					ch_flt->parity_data.ipe.cpl_way = way;
4130 					ch_flt->parity_data.ipe.cpl_off =
4131 								instr * 4;
4132 				}
4133 				ch_flt->parity_data.ipe.cpl_lcnt++;
4134 				continue;
4135 			}
4136 		}
4137 	}
4138 }
4139 
4140 /*
4141  * Record information related to the source of an Pcache Parity Error.
4142  */
4143 static void
4144 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4145 {
4146 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4147 	int index;
4148 
4149 	/*
4150 	 * Since instruction decode cannot be done at high PIL just
4151 	 * examine the entire Pcache to check for any parity errors.
4152 	 */
4153 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4154 		ch_flt->parity_data.dpe.cpl_way = -1;
4155 		ch_flt->parity_data.dpe.cpl_off = -1;
4156 	}
4157 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4158 		cpu_pcache_parity_check(ch_flt, index);
4159 }
4160 
4161 /*
4162  * Check all ways of the Pcache at a specified index for good parity.
4163  */
4164 static void
4165 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4166 {
4167 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4168 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4169 	int way, word, pbit, parity_bits;
4170 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4171 	ch_pc_data_t tmp_pcp;
4172 
4173 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4174 		/*
4175 		 * Perform diagnostic read.
4176 		 */
4177 		get_pcache_dtag(index + way * pc_set_size,
4178 				(uint64_t *)&tmp_pcp);
4179 		/*
4180 		 * Check data array for odd parity. There are 8 parity
4181 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4182 		 * of those bits covers exactly 8 bytes of the data
4183 		 * array:
4184 		 *
4185 		 *	parity bit	P$ data bytes covered
4186 		 *	----------	---------------------
4187 		 *	50		63:56
4188 		 *	51		55:48
4189 		 *	52		47:40
4190 		 *	53		39:32
4191 		 *	54		31:24
4192 		 *	55		23:16
4193 		 *	56		15:8
4194 		 *	57		7:0
4195 		 */
4196 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4197 		for (word = 0; word < pc_data_words; word++) {
4198 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4199 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4200 				/*
4201 				 * If this is the first error log detailed
4202 				 * information about it. Otherwise just record
4203 				 * the fact that we found another error.
4204 				 */
4205 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4206 					ch_flt->parity_data.dpe.cpl_way = way;
4207 					ch_flt->parity_data.dpe.cpl_cache =
4208 					    CPU_PC_PARITY;
4209 					ch_flt->parity_data.dpe.cpl_off =
4210 					    word * sizeof (uint64_t);
4211 					bcopy(&tmp_pcp, pcp,
4212 							sizeof (ch_pc_data_t));
4213 				}
4214 				ch_flt->parity_data.dpe.cpl_lcnt++;
4215 			}
4216 		}
4217 	}
4218 }
4219 
4220 
4221 /*
4222  * Add L1 Data cache data to the ereport payload.
4223  */
4224 static void
4225 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4226 {
4227 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4228 	ch_dc_data_t *dcp;
4229 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4230 	uint_t nelem;
4231 	int i, ways_to_check, ways_logged = 0;
4232 
4233 	/*
4234 	 * If this is an D$ fault then there may be multiple
4235 	 * ways captured in the ch_parity_log_t structure.
4236 	 * Otherwise, there will be at most one way captured
4237 	 * in the ch_diag_data_t struct.
4238 	 * Check each way to see if it should be encoded.
4239 	 */
4240 	if (ch_flt->flt_type == CPU_DC_PARITY)
4241 		ways_to_check = CH_DCACHE_NWAY;
4242 	else
4243 		ways_to_check = 1;
4244 	for (i = 0; i < ways_to_check; i++) {
4245 		if (ch_flt->flt_type == CPU_DC_PARITY)
4246 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4247 		else
4248 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4249 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4250 			bcopy(dcp, &dcdata[ways_logged],
4251 				sizeof (ch_dc_data_t));
4252 			ways_logged++;
4253 		}
4254 	}
4255 
4256 	/*
4257 	 * Add the dcache data to the payload.
4258 	 */
4259 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4260 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4261 	if (ways_logged != 0) {
4262 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4263 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4264 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4265 	}
4266 }
4267 
4268 /*
4269  * Add L1 Instruction cache data to the ereport payload.
4270  */
4271 static void
4272 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4273 {
4274 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4275 	ch_ic_data_t *icp;
4276 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4277 	uint_t nelem;
4278 	int i, ways_to_check, ways_logged = 0;
4279 
4280 	/*
4281 	 * If this is an I$ fault then there may be multiple
4282 	 * ways captured in the ch_parity_log_t structure.
4283 	 * Otherwise, there will be at most one way captured
4284 	 * in the ch_diag_data_t struct.
4285 	 * Check each way to see if it should be encoded.
4286 	 */
4287 	if (ch_flt->flt_type == CPU_IC_PARITY)
4288 		ways_to_check = CH_ICACHE_NWAY;
4289 	else
4290 		ways_to_check = 1;
4291 	for (i = 0; i < ways_to_check; i++) {
4292 		if (ch_flt->flt_type == CPU_IC_PARITY)
4293 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4294 		else
4295 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4296 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4297 			bcopy(icp, &icdata[ways_logged],
4298 				sizeof (ch_ic_data_t));
4299 			ways_logged++;
4300 		}
4301 	}
4302 
4303 	/*
4304 	 * Add the icache data to the payload.
4305 	 */
4306 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4307 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4308 	if (ways_logged != 0) {
4309 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4310 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4311 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4312 	}
4313 }
4314 
4315 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4316 
4317 /*
4318  * Add ecache data to payload.
4319  */
4320 static void
4321 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4322 {
4323 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4324 	ch_ec_data_t *ecp;
4325 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4326 	uint_t nelem;
4327 	int i, ways_logged = 0;
4328 
4329 	/*
4330 	 * Check each way to see if it should be encoded
4331 	 * and concatinate it into a temporary buffer.
4332 	 */
4333 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4334 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4335 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4336 			bcopy(ecp, &ecdata[ways_logged],
4337 				sizeof (ch_ec_data_t));
4338 			ways_logged++;
4339 		}
4340 	}
4341 
4342 	/*
4343 	 * Panther CPUs have an additional level of cache and so
4344 	 * what we just collected was the L3 (ecache) and not the
4345 	 * L2 cache.
4346 	 */
4347 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4348 		/*
4349 		 * Add the L3 (ecache) data to the payload.
4350 		 */
4351 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4352 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4353 		if (ways_logged != 0) {
4354 			nelem = sizeof (ch_ec_data_t) /
4355 			    sizeof (uint64_t) * ways_logged;
4356 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4357 			    DATA_TYPE_UINT64_ARRAY, nelem,
4358 			    (uint64_t *)ecdata, NULL);
4359 		}
4360 
4361 		/*
4362 		 * Now collect the L2 cache.
4363 		 */
4364 		ways_logged = 0;
4365 		for (i = 0; i < PN_L2_NWAYS; i++) {
4366 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4367 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4368 				bcopy(ecp, &ecdata[ways_logged],
4369 				    sizeof (ch_ec_data_t));
4370 				ways_logged++;
4371 			}
4372 		}
4373 	}
4374 
4375 	/*
4376 	 * Add the L2 cache data to the payload.
4377 	 */
4378 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4379 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4380 	if (ways_logged != 0) {
4381 		nelem = sizeof (ch_ec_data_t) /
4382 			sizeof (uint64_t) * ways_logged;
4383 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4384 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4385 	}
4386 }
4387 
4388 /*
4389  * Initialize cpu scheme for specified cpu.
4390  */
4391 static void
4392 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4393 {
4394 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4395 	uint8_t mask;
4396 
4397 	mask = cpunodes[cpuid].version;
4398 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4399 	    (u_longlong_t)cpunodes[cpuid].device_id);
4400 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4401 	    cpuid, &mask, (const char *)sbuf);
4402 }
4403 
4404 /*
4405  * Returns ereport resource type.
4406  */
4407 static int
4408 cpu_error_to_resource_type(struct async_flt *aflt)
4409 {
4410 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4411 
4412 	switch (ch_flt->flt_type) {
4413 
4414 	case CPU_CE_ECACHE:
4415 	case CPU_UE_ECACHE:
4416 	case CPU_UE_ECACHE_RETIRE:
4417 	case CPU_ORPH:
4418 		/*
4419 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4420 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4421 		 * E$ Data type, otherwise, return CPU type.
4422 		 */
4423 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4424 		    ch_flt->flt_bit))
4425 			return (ERRTYPE_ECACHE_DATA);
4426 		return (ERRTYPE_CPU);
4427 
4428 	case CPU_CE:
4429 	case CPU_UE:
4430 	case CPU_EMC:
4431 	case CPU_DUE:
4432 	case CPU_RCE:
4433 	case CPU_RUE:
4434 	case CPU_FRC:
4435 	case CPU_FRU:
4436 		return (ERRTYPE_MEMORY);
4437 
4438 	case CPU_IC_PARITY:
4439 	case CPU_DC_PARITY:
4440 	case CPU_FPUERR:
4441 	case CPU_PC_PARITY:
4442 	case CPU_ITLB_PARITY:
4443 	case CPU_DTLB_PARITY:
4444 		return (ERRTYPE_CPU);
4445 	}
4446 	return (ERRTYPE_UNKNOWN);
4447 }
4448 
4449 /*
4450  * Encode the data saved in the ch_async_flt_t struct into
4451  * the FM ereport payload.
4452  */
4453 static void
4454 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4455 	nvlist_t *resource, int *afar_status, int *synd_status)
4456 {
4457 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4458 	*synd_status = AFLT_STAT_INVALID;
4459 	*afar_status = AFLT_STAT_INVALID;
4460 
4461 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4462 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4463 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4464 	}
4465 
4466 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4467 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4468 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4469 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4470 	}
4471 
4472 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4473 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4474 		    ch_flt->flt_bit);
4475 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4476 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4477 	}
4478 
4479 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4480 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4481 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4482 	}
4483 
4484 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4485 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4486 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4487 	}
4488 
4489 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4490 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4491 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4492 	}
4493 
4494 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4495 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4496 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4497 	}
4498 
4499 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4500 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4501 		    DATA_TYPE_BOOLEAN_VALUE,
4502 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4503 	}
4504 
4505 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4506 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4507 		    DATA_TYPE_BOOLEAN_VALUE,
4508 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4509 	}
4510 
4511 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4512 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4513 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4514 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4515 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4516 	}
4517 
4518 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4519 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4520 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4521 	}
4522 
4523 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4524 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4525 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4526 	}
4527 
4528 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4529 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4530 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4531 	}
4532 
4533 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4534 		cpu_payload_add_ecache(aflt, payload);
4535 
4536 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4537 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4538 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4539 	}
4540 
4541 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4542 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4543 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4544 	}
4545 
4546 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4547 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4548 		    DATA_TYPE_UINT32_ARRAY, 16,
4549 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4550 	}
4551 
4552 #if defined(CPU_IMP_L1_CACHE_PARITY)
4553 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4554 		cpu_payload_add_dcache(aflt, payload);
4555 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4556 		cpu_payload_add_icache(aflt, payload);
4557 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4558 
4559 #if defined(CHEETAH_PLUS)
4560 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4561 		cpu_payload_add_pcache(aflt, payload);
4562 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4563 		cpu_payload_add_tlb(aflt, payload);
4564 #endif	/* CHEETAH_PLUS */
4565 	/*
4566 	 * Create the FMRI that goes into the payload
4567 	 * and contains the unum info if necessary.
4568 	 */
4569 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4570 		char unum[UNUM_NAMLEN] = "";
4571 		char sid[DIMM_SERIAL_ID_LEN] = "";
4572 		int len, ret, rtype;
4573 		uint64_t offset = (uint64_t)-1;
4574 
4575 		rtype = cpu_error_to_resource_type(aflt);
4576 		switch (rtype) {
4577 
4578 		case ERRTYPE_MEMORY:
4579 		case ERRTYPE_ECACHE_DATA:
4580 
4581 			/*
4582 			 * Memory errors, do unum lookup
4583 			 */
4584 			if (*afar_status == AFLT_STAT_INVALID)
4585 				break;
4586 
4587 			if (rtype == ERRTYPE_ECACHE_DATA)
4588 				aflt->flt_status |= ECC_ECACHE;
4589 			else
4590 				aflt->flt_status &= ~ECC_ECACHE;
4591 
4592 			if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4593 			    UNUM_NAMLEN, &len) != 0)
4594 				break;
4595 
4596 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4597 			    &len);
4598 
4599 			if (ret == 0) {
4600 				(void) cpu_get_mem_offset(aflt->flt_addr,
4601 				    &offset);
4602 			}
4603 
4604 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4605 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4606 			fm_payload_set(payload,
4607 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4608 			    DATA_TYPE_NVLIST, resource, NULL);
4609 			break;
4610 
4611 		case ERRTYPE_CPU:
4612 			/*
4613 			 * On-board processor array error, add cpu resource.
4614 			 */
4615 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4616 			fm_payload_set(payload,
4617 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4618 			    DATA_TYPE_NVLIST, resource, NULL);
4619 			break;
4620 		}
4621 	}
4622 }
4623 
4624 /*
4625  * Initialize the way info if necessary.
4626  */
4627 void
4628 cpu_ereport_init(struct async_flt *aflt)
4629 {
4630 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4631 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4632 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4633 	int i;
4634 
4635 	/*
4636 	 * Initialize the info in the CPU logout structure.
4637 	 * The I$/D$ way information is not initialized here
4638 	 * since it is captured in the logout assembly code.
4639 	 */
4640 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4641 		(ecp + i)->ec_way = i;
4642 
4643 	for (i = 0; i < PN_L2_NWAYS; i++)
4644 		(l2p + i)->ec_way = i;
4645 }
4646 
4647 /*
4648  * Returns whether fault address is valid for this error bit and
4649  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4650  */
4651 int
4652 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4653 {
4654 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4655 
4656 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4657 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4658 	    AFLT_STAT_VALID &&
4659 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4660 }
4661 
4662 static void
4663 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4664 {
4665 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4666 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4667 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4668 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4669 #if defined(CPU_IMP_ECACHE_ASSOC)
4670 	int i, nway;
4671 #endif /* CPU_IMP_ECACHE_ASSOC */
4672 
4673 	/*
4674 	 * Check if the CPU log out captured was valid.
4675 	 */
4676 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4677 	    ch_flt->flt_data_incomplete)
4678 		return;
4679 
4680 #if defined(CPU_IMP_ECACHE_ASSOC)
4681 	nway = cpu_ecache_nway();
4682 	i =  cpu_ecache_line_valid(ch_flt);
4683 	if (i == 0 || i > nway) {
4684 		for (i = 0; i < nway; i++)
4685 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4686 	} else
4687 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4688 #else /* CPU_IMP_ECACHE_ASSOC */
4689 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4690 #endif /* CPU_IMP_ECACHE_ASSOC */
4691 
4692 #if defined(CHEETAH_PLUS)
4693 	pn_cpu_log_diag_l2_info(ch_flt);
4694 #endif /* CHEETAH_PLUS */
4695 
4696 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4697 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4698 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4699 	}
4700 
4701 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4702 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4703 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4704 		else
4705 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4706 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4707 	}
4708 }
4709 
4710 /*
4711  * Cheetah ECC calculation.
4712  *
4713  * We only need to do the calculation on the data bits and can ignore check
4714  * bit and Mtag bit terms in the calculation.
4715  */
4716 static uint64_t ch_ecc_table[9][2] = {
4717 	/*
4718 	 * low order 64-bits   high-order 64-bits
4719 	 */
4720 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4721 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4722 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4723 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4724 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4725 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4726 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4727 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4728 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4729 };
4730 
4731 /*
4732  * 64-bit population count, use well-known popcnt trick.
4733  * We could use the UltraSPARC V9 POPC instruction, but some
4734  * CPUs including Cheetahplus and Jaguar do not support that
4735  * instruction.
4736  */
4737 int
4738 popc64(uint64_t val)
4739 {
4740 	int cnt;
4741 
4742 	for (cnt = 0; val != 0; val &= val - 1)
4743 		cnt++;
4744 	return (cnt);
4745 }
4746 
4747 /*
4748  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4749  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4750  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4751  * instead of doing all the xor's.
4752  */
4753 uint32_t
4754 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4755 {
4756 	int bitno, s;
4757 	int synd = 0;
4758 
4759 	for (bitno = 0; bitno < 9; bitno++) {
4760 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4761 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4762 		synd |= (s << bitno);
4763 	}
4764 	return (synd);
4765 
4766 }
4767 
4768 /*
4769  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4770  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4771  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4772  */
4773 static void
4774 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4775     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4776 {
4777 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4778 
4779 	if (reason &&
4780 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4781 		(void) strcat(reason, eccp->ec_reason);
4782 	}
4783 
4784 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4785 	ch_flt->flt_type = eccp->ec_flt_type;
4786 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4787 		ch_flt->flt_diag_data = *cdp;
4788 	else
4789 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4790 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4791 
4792 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4793 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4794 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4795 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4796 	else
4797 		aflt->flt_synd = 0;
4798 
4799 	aflt->flt_payload = eccp->ec_err_payload;
4800 
4801 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4802 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4803 		cpu_errorq_dispatch(eccp->ec_err_class,
4804 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4805 		    aflt->flt_panic);
4806 	else
4807 		cpu_errorq_dispatch(eccp->ec_err_class,
4808 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4809 		    aflt->flt_panic);
4810 }
4811 
4812 /*
4813  * Queue events on async event queue one event per error bit.  First we
4814  * queue the events that we "expect" for the given trap, then we queue events
4815  * that we may not expect.  Return number of events queued.
4816  */
4817 int
4818 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4819     ch_cpu_logout_t *clop)
4820 {
4821 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4822 	ecc_type_to_info_t *eccp;
4823 	int nevents = 0;
4824 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4825 #if defined(CHEETAH_PLUS)
4826 	uint64_t orig_t_afsr_errs;
4827 #endif
4828 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4829 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4830 	ch_diag_data_t *cdp = NULL;
4831 
4832 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4833 
4834 #if defined(CHEETAH_PLUS)
4835 	orig_t_afsr_errs = t_afsr_errs;
4836 
4837 	/*
4838 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4839 	 */
4840 	if (clop != NULL) {
4841 		/*
4842 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4843 		 * flt_addr and flt_stat fields will be reset to the primaries
4844 		 * below, but the sdw_addr and sdw_stat will stay as the
4845 		 * secondaries.
4846 		 */
4847 		cdp = &clop->clo_sdw_data;
4848 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4849 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4850 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4851 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4852 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4853 
4854 		/*
4855 		 * If the primary and shadow AFSR differ, tag the shadow as
4856 		 * the first fault.
4857 		 */
4858 		if ((primary_afar != cdp->chd_afar) ||
4859 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4860 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4861 		}
4862 
4863 		/*
4864 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4865 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4866 		 * is expected to be zero for those CPUs which do not have
4867 		 * an AFSR_EXT register.
4868 		 */
4869 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4870 			if ((eccp->ec_afsr_bit &
4871 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4872 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4873 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4874 				cdp = NULL;
4875 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4876 				nevents++;
4877 			}
4878 		}
4879 
4880 		/*
4881 		 * If the ME bit is on in the primary AFSR turn all the
4882 		 * error bits on again that may set the ME bit to make
4883 		 * sure we see the ME AFSR error logs.
4884 		 */
4885 		if ((primary_afsr & C_AFSR_ME) != 0)
4886 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4887 	}
4888 #endif	/* CHEETAH_PLUS */
4889 
4890 	if (clop != NULL)
4891 		cdp = &clop->clo_data;
4892 
4893 	/*
4894 	 * Queue expected errors, error bit and fault type must match
4895 	 * in the ecc_type_to_info table.
4896 	 */
4897 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4898 	    eccp++) {
4899 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4900 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4901 #if defined(SERRANO)
4902 			/*
4903 			 * For FRC/FRU errors on Serrano the afar2 captures
4904 			 * the address and the associated data is
4905 			 * in the shadow logout area.
4906 			 */
4907 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4908 				if (clop != NULL)
4909 					cdp = &clop->clo_sdw_data;
4910 				aflt->flt_addr = ch_flt->afar2;
4911 			} else {
4912 				if (clop != NULL)
4913 					cdp = &clop->clo_data;
4914 				aflt->flt_addr = primary_afar;
4915 			}
4916 #else	/* SERRANO */
4917 			aflt->flt_addr = primary_afar;
4918 #endif	/* SERRANO */
4919 			aflt->flt_stat = primary_afsr;
4920 			ch_flt->afsr_ext = primary_afsr_ext;
4921 			ch_flt->afsr_errs = primary_afsr_errs;
4922 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4923 			cdp = NULL;
4924 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4925 			nevents++;
4926 		}
4927 	}
4928 
4929 	/*
4930 	 * Queue unexpected errors, error bit only match.
4931 	 */
4932 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4933 	    eccp++) {
4934 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4935 #if defined(SERRANO)
4936 			/*
4937 			 * For FRC/FRU errors on Serrano the afar2 captures
4938 			 * the address and the associated data is
4939 			 * in the shadow logout area.
4940 			 */
4941 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4942 				if (clop != NULL)
4943 					cdp = &clop->clo_sdw_data;
4944 				aflt->flt_addr = ch_flt->afar2;
4945 			} else {
4946 				if (clop != NULL)
4947 					cdp = &clop->clo_data;
4948 				aflt->flt_addr = primary_afar;
4949 			}
4950 #else	/* SERRANO */
4951 			aflt->flt_addr = primary_afar;
4952 #endif	/* SERRANO */
4953 			aflt->flt_stat = primary_afsr;
4954 			ch_flt->afsr_ext = primary_afsr_ext;
4955 			ch_flt->afsr_errs = primary_afsr_errs;
4956 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4957 			cdp = NULL;
4958 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4959 			nevents++;
4960 		}
4961 	}
4962 	return (nevents);
4963 }
4964 
4965 /*
4966  * Return trap type number.
4967  */
4968 uint8_t
4969 flt_to_trap_type(struct async_flt *aflt)
4970 {
4971 	if (aflt->flt_status & ECC_I_TRAP)
4972 		return (TRAP_TYPE_ECC_I);
4973 	if (aflt->flt_status & ECC_D_TRAP)
4974 		return (TRAP_TYPE_ECC_D);
4975 	if (aflt->flt_status & ECC_F_TRAP)
4976 		return (TRAP_TYPE_ECC_F);
4977 	if (aflt->flt_status & ECC_C_TRAP)
4978 		return (TRAP_TYPE_ECC_C);
4979 	if (aflt->flt_status & ECC_DP_TRAP)
4980 		return (TRAP_TYPE_ECC_DP);
4981 	if (aflt->flt_status & ECC_IP_TRAP)
4982 		return (TRAP_TYPE_ECC_IP);
4983 	if (aflt->flt_status & ECC_ITLB_TRAP)
4984 		return (TRAP_TYPE_ECC_ITLB);
4985 	if (aflt->flt_status & ECC_DTLB_TRAP)
4986 		return (TRAP_TYPE_ECC_DTLB);
4987 	return (TRAP_TYPE_UNKNOWN);
4988 }
4989 
4990 /*
4991  * Decide an error type based on detector and leaky/partner tests.
4992  * The following array is used for quick translation - it must
4993  * stay in sync with ce_dispact_t.
4994  */
4995 
4996 static char *cetypes[] = {
4997 	CE_DISP_DESC_U,
4998 	CE_DISP_DESC_I,
4999 	CE_DISP_DESC_PP,
5000 	CE_DISP_DESC_P,
5001 	CE_DISP_DESC_L,
5002 	CE_DISP_DESC_PS,
5003 	CE_DISP_DESC_S
5004 };
5005 
5006 char *
5007 flt_to_error_type(struct async_flt *aflt)
5008 {
5009 	ce_dispact_t dispact, disp;
5010 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5011 
5012 	/*
5013 	 * The memory payload bundle is shared by some events that do
5014 	 * not perform any classification.  For those flt_disp will be
5015 	 * 0 and we will return "unknown".
5016 	 */
5017 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5018 		return (cetypes[CE_DISP_UNKNOWN]);
5019 
5020 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5021 
5022 	/*
5023 	 * It is also possible that no scrub/classification was performed
5024 	 * by the detector, for instance where a disrupting error logged
5025 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5026 	 */
5027 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5028 		return (cetypes[CE_DISP_UNKNOWN]);
5029 
5030 	/*
5031 	 * Lookup type in initial classification/action table
5032 	 */
5033 	dispact = CE_DISPACT(ce_disp_table,
5034 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5035 	    CE_XDIAG_STATE(dtcrinfo),
5036 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5037 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5038 
5039 	/*
5040 	 * A bad lookup is not something to panic production systems for.
5041 	 */
5042 	ASSERT(dispact != CE_DISP_BAD);
5043 	if (dispact == CE_DISP_BAD)
5044 		return (cetypes[CE_DISP_UNKNOWN]);
5045 
5046 	disp = CE_DISP(dispact);
5047 
5048 	switch (disp) {
5049 	case CE_DISP_UNKNOWN:
5050 	case CE_DISP_INTERMITTENT:
5051 		break;
5052 
5053 	case CE_DISP_POSS_PERS:
5054 		/*
5055 		 * "Possible persistent" errors to which we have applied a valid
5056 		 * leaky test can be separated into "persistent" or "leaky".
5057 		 */
5058 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5059 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5060 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5061 			    CE_XDIAG_CE2SEEN(lkyinfo))
5062 				disp = CE_DISP_LEAKY;
5063 			else
5064 				disp = CE_DISP_PERS;
5065 		}
5066 		break;
5067 
5068 	case CE_DISP_POSS_STICKY:
5069 		/*
5070 		 * Promote "possible sticky" results that have been
5071 		 * confirmed by a partner test to "sticky".  Unconfirmed
5072 		 * "possible sticky" events are left at that status - we do not
5073 		 * guess at any bad reader/writer etc status here.
5074 		 */
5075 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5076 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5077 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5078 			disp = CE_DISP_STICKY;
5079 
5080 		/*
5081 		 * Promote "possible sticky" results on a uniprocessor
5082 		 * to "sticky"
5083 		 */
5084 		if (disp == CE_DISP_POSS_STICKY &&
5085 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5086 			disp = CE_DISP_STICKY;
5087 		break;
5088 
5089 	default:
5090 		disp = CE_DISP_UNKNOWN;
5091 		break;
5092 	}
5093 
5094 	return (cetypes[disp]);
5095 }
5096 
5097 /*
5098  * Given the entire afsr, the specific bit to check and a prioritized list of
5099  * error bits, determine the validity of the various overwrite priority
5100  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5101  * different overwrite priorities.
5102  *
5103  * Given a specific afsr error bit and the entire afsr, there are three cases:
5104  *   INVALID:	The specified bit is lower overwrite priority than some other
5105  *		error bit which is on in the afsr (or IVU/IVC).
5106  *   VALID:	The specified bit is higher priority than all other error bits
5107  *		which are on in the afsr.
5108  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5109  *		bit is on in the afsr.
5110  */
5111 int
5112 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5113 {
5114 	uint64_t afsr_ow;
5115 
5116 	while ((afsr_ow = *ow_bits++) != 0) {
5117 		/*
5118 		 * If bit is in the priority class, check to see if another
5119 		 * bit in the same class is on => ambiguous.  Otherwise,
5120 		 * the value is valid.  If the bit is not on at this priority
5121 		 * class, but a higher priority bit is on, then the value is
5122 		 * invalid.
5123 		 */
5124 		if (afsr_ow & afsr_bit) {
5125 			/*
5126 			 * If equal pri bit is on, ambiguous.
5127 			 */
5128 			if (afsr & (afsr_ow & ~afsr_bit))
5129 				return (AFLT_STAT_AMBIGUOUS);
5130 			return (AFLT_STAT_VALID);
5131 		} else if (afsr & afsr_ow)
5132 			break;
5133 	}
5134 
5135 	/*
5136 	 * We didn't find a match or a higher priority bit was on.  Not
5137 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5138 	 */
5139 	return (AFLT_STAT_INVALID);
5140 }
5141 
5142 static int
5143 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5144 {
5145 #if defined(SERRANO)
5146 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5147 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5148 	else
5149 #endif	/* SERRANO */
5150 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5151 }
5152 
5153 static int
5154 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5155 {
5156 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5157 }
5158 
5159 static int
5160 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5161 {
5162 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5163 }
5164 
5165 static int
5166 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5167 {
5168 #ifdef lint
5169 	cpuid = cpuid;
5170 #endif
5171 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5172 		return (afsr_to_msynd_status(afsr, afsr_bit));
5173 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5174 #if defined(CHEETAH_PLUS)
5175 		/*
5176 		 * The E_SYND overwrite policy is slightly different
5177 		 * for Panther CPUs.
5178 		 */
5179 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5180 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5181 		else
5182 			return (afsr_to_esynd_status(afsr, afsr_bit));
5183 #else /* CHEETAH_PLUS */
5184 		return (afsr_to_esynd_status(afsr, afsr_bit));
5185 #endif /* CHEETAH_PLUS */
5186 	} else {
5187 		return (AFLT_STAT_INVALID);
5188 	}
5189 }
5190 
5191 /*
5192  * Slave CPU stick synchronization.
5193  */
5194 void
5195 sticksync_slave(void)
5196 {
5197 	int 		i;
5198 	int		tries = 0;
5199 	int64_t		tskew;
5200 	int64_t		av_tskew;
5201 
5202 	kpreempt_disable();
5203 	/* wait for the master side */
5204 	while (stick_sync_cmd != SLAVE_START)
5205 		;
5206 	/*
5207 	 * Synchronization should only take a few tries at most. But in the
5208 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5209 	 * without it's stick synchronized wouldn't be a good citizen.
5210 	 */
5211 	while (slave_done == 0) {
5212 		/*
5213 		 * Time skew calculation.
5214 		 */
5215 		av_tskew = tskew = 0;
5216 
5217 		for (i = 0; i < stick_iter; i++) {
5218 			/* make location hot */
5219 			timestamp[EV_A_START] = 0;
5220 			stick_timestamp(&timestamp[EV_A_START]);
5221 
5222 			/* tell the master we're ready */
5223 			stick_sync_cmd = MASTER_START;
5224 
5225 			/* and wait */
5226 			while (stick_sync_cmd != SLAVE_CONT)
5227 				;
5228 			/* Event B end */
5229 			stick_timestamp(&timestamp[EV_B_END]);
5230 
5231 			/* calculate time skew */
5232 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5233 				- (timestamp[EV_A_END] -
5234 				timestamp[EV_A_START])) / 2;
5235 
5236 			/* keep running count */
5237 			av_tskew += tskew;
5238 		} /* for */
5239 
5240 		/*
5241 		 * Adjust stick for time skew if not within the max allowed;
5242 		 * otherwise we're all done.
5243 		 */
5244 		if (stick_iter != 0)
5245 			av_tskew = av_tskew/stick_iter;
5246 		if (ABS(av_tskew) > stick_tsk) {
5247 			/*
5248 			 * If the skew is 1 (the slave's STICK register
5249 			 * is 1 STICK ahead of the master's), stick_adj
5250 			 * could fail to adjust the slave's STICK register
5251 			 * if the STICK read on the slave happens to
5252 			 * align with the increment of the STICK.
5253 			 * Therefore, we increment the skew to 2.
5254 			 */
5255 			if (av_tskew == 1)
5256 				av_tskew++;
5257 			stick_adj(-av_tskew);
5258 		} else
5259 			slave_done = 1;
5260 #ifdef DEBUG
5261 		if (tries < DSYNC_ATTEMPTS)
5262 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5263 				av_tskew;
5264 		++tries;
5265 #endif /* DEBUG */
5266 #ifdef lint
5267 		tries = tries;
5268 #endif
5269 
5270 	} /* while */
5271 
5272 	/* allow the master to finish */
5273 	stick_sync_cmd = EVENT_NULL;
5274 	kpreempt_enable();
5275 }
5276 
5277 /*
5278  * Master CPU side of stick synchronization.
5279  *  - timestamp end of Event A
5280  *  - timestamp beginning of Event B
5281  */
5282 void
5283 sticksync_master(void)
5284 {
5285 	int		i;
5286 
5287 	kpreempt_disable();
5288 	/* tell the slave we've started */
5289 	slave_done = 0;
5290 	stick_sync_cmd = SLAVE_START;
5291 
5292 	while (slave_done == 0) {
5293 		for (i = 0; i < stick_iter; i++) {
5294 			/* wait for the slave */
5295 			while (stick_sync_cmd != MASTER_START)
5296 				;
5297 			/* Event A end */
5298 			stick_timestamp(&timestamp[EV_A_END]);
5299 
5300 			/* make location hot */
5301 			timestamp[EV_B_START] = 0;
5302 			stick_timestamp(&timestamp[EV_B_START]);
5303 
5304 			/* tell the slave to continue */
5305 			stick_sync_cmd = SLAVE_CONT;
5306 		} /* for */
5307 
5308 		/* wait while slave calculates time skew */
5309 		while (stick_sync_cmd == SLAVE_CONT)
5310 			;
5311 	} /* while */
5312 	kpreempt_enable();
5313 }
5314 
5315 /*
5316  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5317  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5318  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5319  * panic idle.
5320  */
5321 /*ARGSUSED*/
5322 void
5323 cpu_check_allcpus(struct async_flt *aflt)
5324 {}
5325 
5326 struct kmem_cache *ch_private_cache;
5327 
5328 /*
5329  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5330  * deallocate the scrubber data structures and cpu_private data structure.
5331  */
5332 void
5333 cpu_uninit_private(struct cpu *cp)
5334 {
5335 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5336 
5337 	ASSERT(chprp);
5338 	cpu_uninit_ecache_scrub_dr(cp);
5339 	CPU_PRIVATE(cp) = NULL;
5340 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5341 	kmem_cache_free(ch_private_cache, chprp);
5342 	cmp_delete_cpu(cp->cpu_id);
5343 
5344 }
5345 
5346 /*
5347  * Cheetah Cache Scrubbing
5348  *
5349  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5350  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5351  * protected by either parity or ECC.
5352  *
5353  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5354  * cache per second). Due to the the specifics of how the I$ control
5355  * logic works with respect to the ASI used to scrub I$ lines, the entire
5356  * I$ is scanned at once.
5357  */
5358 
5359 /*
5360  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5361  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5362  * on a running system.
5363  */
5364 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5365 
5366 /*
5367  * The following are the PIL levels that the softints/cross traps will fire at.
5368  */
5369 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5370 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5371 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5372 
5373 #if defined(JALAPENO)
5374 
5375 /*
5376  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5377  * on Jalapeno.
5378  */
5379 int ecache_scrub_enable = 0;
5380 
5381 #else	/* JALAPENO */
5382 
5383 /*
5384  * With all other cpu types, E$ scrubbing is on by default
5385  */
5386 int ecache_scrub_enable = 1;
5387 
5388 #endif	/* JALAPENO */
5389 
5390 
5391 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5392 
5393 /*
5394  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5395  * is disabled by default on non-Cheetah systems
5396  */
5397 int icache_scrub_enable = 0;
5398 
5399 /*
5400  * Tuneables specifying the scrub calls per second and the scan rate
5401  * for each cache
5402  *
5403  * The cyclic times are set during boot based on the following values.
5404  * Changing these values in mdb after this time will have no effect.  If
5405  * a different value is desired, it must be set in /etc/system before a
5406  * reboot.
5407  */
5408 int ecache_calls_a_sec = 1;
5409 int dcache_calls_a_sec = 2;
5410 int icache_calls_a_sec = 2;
5411 
5412 int ecache_scan_rate_idle = 1;
5413 int ecache_scan_rate_busy = 1;
5414 int dcache_scan_rate_idle = 1;
5415 int dcache_scan_rate_busy = 1;
5416 int icache_scan_rate_idle = 1;
5417 int icache_scan_rate_busy = 1;
5418 
5419 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5420 
5421 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5422 
5423 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5424 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5425 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5426 
5427 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5428 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5429 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5430 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5431 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5432 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5433 
5434 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5435 
5436 /*
5437  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5438  * increment the outstanding request counter and schedule a softint to run
5439  * the scrubber.
5440  */
5441 extern xcfunc_t cache_scrubreq_tl1;
5442 
5443 /*
5444  * These are the softint functions for each cache scrubber
5445  */
5446 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5447 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5448 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5449 
5450 /*
5451  * The cache scrub info table contains cache specific information
5452  * and allows for some of the scrub code to be table driven, reducing
5453  * duplication of cache similar code.
5454  *
5455  * This table keeps a copy of the value in the calls per second variable
5456  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5457  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5458  * mdb in a misguided attempt to disable the scrubber).
5459  */
5460 struct scrub_info {
5461 	int		*csi_enable;	/* scrubber enable flag */
5462 	int		csi_freq;	/* scrubber calls per second */
5463 	int		csi_index;	/* index to chsm_outstanding[] */
5464 	uint_t		csi_inum;	/* scrubber interrupt number */
5465 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5466 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5467 	char		csi_name[3];	/* cache name for this scrub entry */
5468 } cache_scrub_info[] = {
5469 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5470 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5471 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5472 };
5473 
5474 /*
5475  * If scrubbing is enabled, increment the outstanding request counter.  If it
5476  * is 1 (meaning there were no previous requests outstanding), call
5477  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5478  * a self trap.
5479  */
5480 static void
5481 do_scrub(struct scrub_info *csi)
5482 {
5483 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5484 	int index = csi->csi_index;
5485 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5486 
5487 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5488 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5489 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5490 			    csi->csi_inum, 0);
5491 		}
5492 	}
5493 }
5494 
5495 /*
5496  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5497  * cross-trap the offline cpus.
5498  */
5499 static void
5500 do_scrub_offline(struct scrub_info *csi)
5501 {
5502 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5503 
5504 	if (CPUSET_ISNULL(cpu_offline_set)) {
5505 		/*
5506 		 * No offline cpus - nothing to do
5507 		 */
5508 		return;
5509 	}
5510 
5511 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5512 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5513 		    csi->csi_index);
5514 	}
5515 }
5516 
5517 /*
5518  * This is the initial setup for the scrubber cyclics - it sets the
5519  * interrupt level, frequency, and function to call.
5520  */
5521 /*ARGSUSED*/
5522 static void
5523 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5524     cyc_time_t *when)
5525 {
5526 	struct scrub_info *csi = (struct scrub_info *)arg;
5527 
5528 	ASSERT(csi != NULL);
5529 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5530 	hdlr->cyh_level = CY_LOW_LEVEL;
5531 	hdlr->cyh_arg = arg;
5532 
5533 	when->cyt_when = 0;	/* Start immediately */
5534 	when->cyt_interval = NANOSEC / csi->csi_freq;
5535 }
5536 
5537 /*
5538  * Initialization for cache scrubbing.
5539  * This routine is called AFTER all cpus have had cpu_init_private called
5540  * to initialize their private data areas.
5541  */
5542 void
5543 cpu_init_cache_scrub(void)
5544 {
5545 	int i;
5546 	struct scrub_info *csi;
5547 	cyc_omni_handler_t omni_hdlr;
5548 	cyc_handler_t offline_hdlr;
5549 	cyc_time_t when;
5550 
5551 	/*
5552 	 * save away the maximum number of lines for the D$
5553 	 */
5554 	dcache_nlines = dcache_size / dcache_linesize;
5555 
5556 	/*
5557 	 * register the softints for the cache scrubbing
5558 	 */
5559 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5560 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5561 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5562 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5563 
5564 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5565 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5566 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5567 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5568 
5569 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5570 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5571 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5572 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5573 
5574 	/*
5575 	 * start the scrubbing for all the caches
5576 	 */
5577 	mutex_enter(&cpu_lock);
5578 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5579 
5580 		csi = &cache_scrub_info[i];
5581 
5582 		if (!(*csi->csi_enable))
5583 			continue;
5584 
5585 		/*
5586 		 * force the following to be true:
5587 		 *	1 <= calls_a_sec <= hz
5588 		 */
5589 		if (csi->csi_freq > hz) {
5590 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5591 				"(%d); resetting to hz (%d)", csi->csi_name,
5592 				csi->csi_freq, hz);
5593 			csi->csi_freq = hz;
5594 		} else if (csi->csi_freq < 1) {
5595 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5596 				"(%d); resetting to 1", csi->csi_name,
5597 				csi->csi_freq);
5598 			csi->csi_freq = 1;
5599 		}
5600 
5601 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5602 		omni_hdlr.cyo_offline = NULL;
5603 		omni_hdlr.cyo_arg = (void *)csi;
5604 
5605 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5606 		offline_hdlr.cyh_arg = (void *)csi;
5607 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5608 
5609 		when.cyt_when = 0;	/* Start immediately */
5610 		when.cyt_interval = NANOSEC / csi->csi_freq;
5611 
5612 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5613 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5614 	}
5615 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5616 	mutex_exit(&cpu_lock);
5617 }
5618 
5619 /*
5620  * Indicate that the specified cpu is idle.
5621  */
5622 void
5623 cpu_idle_ecache_scrub(struct cpu *cp)
5624 {
5625 	if (CPU_PRIVATE(cp) != NULL) {
5626 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5627 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5628 	}
5629 }
5630 
5631 /*
5632  * Indicate that the specified cpu is busy.
5633  */
5634 void
5635 cpu_busy_ecache_scrub(struct cpu *cp)
5636 {
5637 	if (CPU_PRIVATE(cp) != NULL) {
5638 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5639 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5640 	}
5641 }
5642 
5643 /*
5644  * Initialization for cache scrubbing for the specified cpu.
5645  */
5646 void
5647 cpu_init_ecache_scrub_dr(struct cpu *cp)
5648 {
5649 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5650 	int cpuid = cp->cpu_id;
5651 
5652 	/* initialize the number of lines in the caches */
5653 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5654 	    cpunodes[cpuid].ecache_linesize;
5655 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5656 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5657 
5658 	/*
5659 	 * do_scrub() and do_scrub_offline() check both the global
5660 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5661 	 * check this value before scrubbing.  Currently, we use it to
5662 	 * disable the E$ scrubber on multi-core cpus or while running at
5663 	 * slowed speed.  For now, just turn everything on and allow
5664 	 * cpu_init_private() to change it if necessary.
5665 	 */
5666 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5667 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5668 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5669 
5670 	cpu_busy_ecache_scrub(cp);
5671 }
5672 
5673 /*
5674  * Un-initialization for cache scrubbing for the specified cpu.
5675  */
5676 static void
5677 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5678 {
5679 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5680 
5681 	/*
5682 	 * un-initialize bookkeeping for cache scrubbing
5683 	 */
5684 	bzero(csmp, sizeof (ch_scrub_misc_t));
5685 
5686 	cpu_idle_ecache_scrub(cp);
5687 }
5688 
5689 /*
5690  * Called periodically on each CPU to scrub the D$.
5691  */
5692 static void
5693 scrub_dcache(int how_many)
5694 {
5695 	int i;
5696 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5697 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5698 
5699 	/*
5700 	 * scrub the desired number of lines
5701 	 */
5702 	for (i = 0; i < how_many; i++) {
5703 		/*
5704 		 * scrub a D$ line
5705 		 */
5706 		dcache_inval_line(index);
5707 
5708 		/*
5709 		 * calculate the next D$ line to scrub, assumes
5710 		 * that dcache_nlines is a power of 2
5711 		 */
5712 		index = (index + 1) & (dcache_nlines - 1);
5713 	}
5714 
5715 	/*
5716 	 * set the scrub index for the next visit
5717 	 */
5718 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5719 }
5720 
5721 /*
5722  * Handler for D$ scrub inum softint. Call scrub_dcache until
5723  * we decrement the outstanding request count to zero.
5724  */
5725 /*ARGSUSED*/
5726 static uint_t
5727 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5728 {
5729 	int i;
5730 	int how_many;
5731 	int outstanding;
5732 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5733 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5734 	struct scrub_info *csi = (struct scrub_info *)arg1;
5735 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5736 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5737 
5738 	/*
5739 	 * The scan rates are expressed in units of tenths of a
5740 	 * percent.  A scan rate of 1000 (100%) means the whole
5741 	 * cache is scanned every second.
5742 	 */
5743 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5744 
5745 	do {
5746 		outstanding = *countp;
5747 		for (i = 0; i < outstanding; i++) {
5748 			scrub_dcache(how_many);
5749 		}
5750 	} while (atomic_add_32_nv(countp, -outstanding));
5751 
5752 	return (DDI_INTR_CLAIMED);
5753 }
5754 
5755 /*
5756  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5757  * by invalidating lines. Due to the characteristics of the ASI which
5758  * is used to invalidate an I$ line, the entire I$ must be invalidated
5759  * vs. an individual I$ line.
5760  */
5761 static void
5762 scrub_icache(int how_many)
5763 {
5764 	int i;
5765 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5766 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5767 	int icache_nlines = csmp->chsm_icache_nlines;
5768 
5769 	/*
5770 	 * scrub the desired number of lines
5771 	 */
5772 	for (i = 0; i < how_many; i++) {
5773 		/*
5774 		 * since the entire I$ must be scrubbed at once,
5775 		 * wait until the index wraps to zero to invalidate
5776 		 * the entire I$
5777 		 */
5778 		if (index == 0) {
5779 			icache_inval_all();
5780 		}
5781 
5782 		/*
5783 		 * calculate the next I$ line to scrub, assumes
5784 		 * that chsm_icache_nlines is a power of 2
5785 		 */
5786 		index = (index + 1) & (icache_nlines - 1);
5787 	}
5788 
5789 	/*
5790 	 * set the scrub index for the next visit
5791 	 */
5792 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5793 }
5794 
5795 /*
5796  * Handler for I$ scrub inum softint. Call scrub_icache until
5797  * we decrement the outstanding request count to zero.
5798  */
5799 /*ARGSUSED*/
5800 static uint_t
5801 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5802 {
5803 	int i;
5804 	int how_many;
5805 	int outstanding;
5806 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5807 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5808 	struct scrub_info *csi = (struct scrub_info *)arg1;
5809 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5810 	    icache_scan_rate_idle : icache_scan_rate_busy;
5811 	int icache_nlines = csmp->chsm_icache_nlines;
5812 
5813 	/*
5814 	 * The scan rates are expressed in units of tenths of a
5815 	 * percent.  A scan rate of 1000 (100%) means the whole
5816 	 * cache is scanned every second.
5817 	 */
5818 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5819 
5820 	do {
5821 		outstanding = *countp;
5822 		for (i = 0; i < outstanding; i++) {
5823 			scrub_icache(how_many);
5824 		}
5825 	} while (atomic_add_32_nv(countp, -outstanding));
5826 
5827 	return (DDI_INTR_CLAIMED);
5828 }
5829 
5830 /*
5831  * Called periodically on each CPU to scrub the E$.
5832  */
5833 static void
5834 scrub_ecache(int how_many)
5835 {
5836 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5837 	int i;
5838 	int cpuid = CPU->cpu_id;
5839 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5840 	int nlines = csmp->chsm_ecache_nlines;
5841 	int linesize = cpunodes[cpuid].ecache_linesize;
5842 	int ec_set_size = cpu_ecache_set_size(CPU);
5843 
5844 	/*
5845 	 * scrub the desired number of lines
5846 	 */
5847 	for (i = 0; i < how_many; i++) {
5848 		/*
5849 		 * scrub the E$ line
5850 		 */
5851 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5852 		    ec_set_size);
5853 
5854 		/*
5855 		 * calculate the next E$ line to scrub based on twice
5856 		 * the number of E$ lines (to displace lines containing
5857 		 * flush area data), assumes that the number of lines
5858 		 * is a power of 2
5859 		 */
5860 		index = (index + 1) & ((nlines << 1) - 1);
5861 	}
5862 
5863 	/*
5864 	 * set the ecache scrub index for the next visit
5865 	 */
5866 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5867 }
5868 
5869 /*
5870  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5871  * we decrement the outstanding request count to zero.
5872  *
5873  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5874  * become negative after the atomic_add_32_nv().  This is not a problem, as
5875  * the next trip around the loop won't scrub anything, and the next add will
5876  * reset the count back to zero.
5877  */
5878 /*ARGSUSED*/
5879 static uint_t
5880 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5881 {
5882 	int i;
5883 	int how_many;
5884 	int outstanding;
5885 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5886 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5887 	struct scrub_info *csi = (struct scrub_info *)arg1;
5888 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5889 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5890 	int ecache_nlines = csmp->chsm_ecache_nlines;
5891 
5892 	/*
5893 	 * The scan rates are expressed in units of tenths of a
5894 	 * percent.  A scan rate of 1000 (100%) means the whole
5895 	 * cache is scanned every second.
5896 	 */
5897 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5898 
5899 	do {
5900 		outstanding = *countp;
5901 		for (i = 0; i < outstanding; i++) {
5902 			scrub_ecache(how_many);
5903 		}
5904 	} while (atomic_add_32_nv(countp, -outstanding));
5905 
5906 	return (DDI_INTR_CLAIMED);
5907 }
5908 
5909 /*
5910  * Timeout function to reenable CE
5911  */
5912 static void
5913 cpu_delayed_check_ce_errors(void *arg)
5914 {
5915 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5916 	    TQ_NOSLEEP)) {
5917 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5918 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5919 	}
5920 }
5921 
5922 /*
5923  * CE Deferred Re-enable after trap.
5924  *
5925  * When the CPU gets a disrupting trap for any of the errors
5926  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5927  * immediately. To eliminate the possibility of multiple CEs causing
5928  * recursive stack overflow in the trap handler, we cannot
5929  * reenable CEEN while still running in the trap handler. Instead,
5930  * after a CE is logged on a CPU, we schedule a timeout function,
5931  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5932  * seconds. This function will check whether any further CEs
5933  * have occurred on that CPU, and if none have, will reenable CEEN.
5934  *
5935  * If further CEs have occurred while CEEN is disabled, another
5936  * timeout will be scheduled. This is to ensure that the CPU can
5937  * make progress in the face of CE 'storms', and that it does not
5938  * spend all its time logging CE errors.
5939  */
5940 static void
5941 cpu_check_ce_errors(void *arg)
5942 {
5943 	int	cpuid = (int)(uintptr_t)arg;
5944 	cpu_t	*cp;
5945 
5946 	/*
5947 	 * We acquire cpu_lock.
5948 	 */
5949 	ASSERT(curthread->t_pil == 0);
5950 
5951 	/*
5952 	 * verify that the cpu is still around, DR
5953 	 * could have got there first ...
5954 	 */
5955 	mutex_enter(&cpu_lock);
5956 	cp = cpu_get(cpuid);
5957 	if (cp == NULL) {
5958 		mutex_exit(&cpu_lock);
5959 		return;
5960 	}
5961 	/*
5962 	 * make sure we don't migrate across CPUs
5963 	 * while checking our CE status.
5964 	 */
5965 	kpreempt_disable();
5966 
5967 	/*
5968 	 * If we are running on the CPU that got the
5969 	 * CE, we can do the checks directly.
5970 	 */
5971 	if (cp->cpu_id == CPU->cpu_id) {
5972 		mutex_exit(&cpu_lock);
5973 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5974 		kpreempt_enable();
5975 		return;
5976 	}
5977 	kpreempt_enable();
5978 
5979 	/*
5980 	 * send an x-call to get the CPU that originally
5981 	 * got the CE to do the necessary checks. If we can't
5982 	 * send the x-call, reschedule the timeout, otherwise we
5983 	 * lose CEEN forever on that CPU.
5984 	 */
5985 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5986 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5987 		    TIMEOUT_CEEN_CHECK, 0);
5988 		mutex_exit(&cpu_lock);
5989 	} else {
5990 		/*
5991 		 * When the CPU is not accepting xcalls, or
5992 		 * the processor is offlined, we don't want to
5993 		 * incur the extra overhead of trying to schedule the
5994 		 * CE timeout indefinitely. However, we don't want to lose
5995 		 * CE checking forever.
5996 		 *
5997 		 * Keep rescheduling the timeout, accepting the additional
5998 		 * overhead as the cost of correctness in the case where we get
5999 		 * a CE, disable CEEN, offline the CPU during the
6000 		 * the timeout interval, and then online it at some
6001 		 * point in the future. This is unlikely given the short
6002 		 * cpu_ceen_delay_secs.
6003 		 */
6004 		mutex_exit(&cpu_lock);
6005 		(void) timeout(cpu_delayed_check_ce_errors,
6006 		    (void *)(uintptr_t)cp->cpu_id,
6007 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6008 	}
6009 }
6010 
6011 /*
6012  * This routine will check whether CEs have occurred while
6013  * CEEN is disabled. Any CEs detected will be logged and, if
6014  * possible, scrubbed.
6015  *
6016  * The memscrubber will also use this routine to clear any errors
6017  * caused by its scrubbing with CEEN disabled.
6018  *
6019  * flag == SCRUBBER_CEEN_CHECK
6020  *		called from memscrubber, just check/scrub, no reset
6021  *		paddr 	physical addr. for start of scrub pages
6022  *		vaddr 	virtual addr. for scrub area
6023  *		psz	page size of area to be scrubbed
6024  *
6025  * flag == TIMEOUT_CEEN_CHECK
6026  *		timeout function has triggered, reset timeout or CEEN
6027  *
6028  * Note: We must not migrate cpus during this function.  This can be
6029  * achieved by one of:
6030  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6031  *	The flag value must be first xcall argument.
6032  *    - disabling kernel preemption.  This should be done for very short
6033  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6034  *	scrub an extended area with cpu_check_block.  The call for
6035  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6036  *	brief for this case.
6037  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6038  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6039  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6040  */
6041 void
6042 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6043 {
6044 	ch_cpu_errors_t	cpu_error_regs;
6045 	uint64_t	ec_err_enable;
6046 	uint64_t	page_offset;
6047 
6048 	/* Read AFSR */
6049 	get_cpu_error_state(&cpu_error_regs);
6050 
6051 	/*
6052 	 * If no CEEN errors have occurred during the timeout
6053 	 * interval, it is safe to re-enable CEEN and exit.
6054 	 */
6055 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
6056 		if (flag == TIMEOUT_CEEN_CHECK &&
6057 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6058 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6059 		return;
6060 	}
6061 
6062 	/*
6063 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6064 	 * we log/clear the error.
6065 	 */
6066 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6067 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6068 
6069 	/*
6070 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6071 	 * timeout will be rescheduled when the error is logged.
6072 	 */
6073 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
6074 	    cpu_ce_detected(&cpu_error_regs,
6075 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6076 	else
6077 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6078 
6079 	/*
6080 	 * If the memory scrubber runs while CEEN is
6081 	 * disabled, (or if CEEN is disabled during the
6082 	 * scrub as a result of a CE being triggered by
6083 	 * it), the range being scrubbed will not be
6084 	 * completely cleaned. If there are multiple CEs
6085 	 * in the range at most two of these will be dealt
6086 	 * with, (one by the trap handler and one by the
6087 	 * timeout). It is also possible that none are dealt
6088 	 * with, (CEEN disabled and another CE occurs before
6089 	 * the timeout triggers). So to ensure that the
6090 	 * memory is actually scrubbed, we have to access each
6091 	 * memory location in the range and then check whether
6092 	 * that access causes a CE.
6093 	 */
6094 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6095 		if ((cpu_error_regs.afar >= pa) &&
6096 		    (cpu_error_regs.afar < (pa + psz))) {
6097 			/*
6098 			 * Force a load from physical memory for each
6099 			 * 64-byte block, then check AFSR to determine
6100 			 * whether this access caused an error.
6101 			 *
6102 			 * This is a slow way to do a scrub, but as it will
6103 			 * only be invoked when the memory scrubber actually
6104 			 * triggered a CE, it should not happen too
6105 			 * frequently.
6106 			 *
6107 			 * cut down what we need to check as the scrubber
6108 			 * has verified up to AFAR, so get it's offset
6109 			 * into the page and start there.
6110 			 */
6111 			page_offset = (uint64_t)(cpu_error_regs.afar &
6112 			    (psz - 1));
6113 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6114 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6115 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6116 			    psz);
6117 		}
6118 	}
6119 
6120 	/*
6121 	 * Reset error enable if this CE is not masked.
6122 	 */
6123 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6124 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6125 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
6126 
6127 }
6128 
6129 /*
6130  * Attempt a cpu logout for an error that we did not trap for, such
6131  * as a CE noticed with CEEN off.  It is assumed that we are still running
6132  * on the cpu that took the error and that we cannot migrate.  Returns
6133  * 0 on success, otherwise nonzero.
6134  */
6135 static int
6136 cpu_ce_delayed_ec_logout(uint64_t afar)
6137 {
6138 	ch_cpu_logout_t *clop;
6139 
6140 	if (CPU_PRIVATE(CPU) == NULL)
6141 		return (0);
6142 
6143 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6144 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6145 	    LOGOUT_INVALID)
6146 		return (0);
6147 
6148 	cpu_delayed_logout(afar, clop);
6149 	return (1);
6150 }
6151 
6152 /*
6153  * We got an error while CEEN was disabled. We
6154  * need to clean up after it and log whatever
6155  * information we have on the CE.
6156  */
6157 void
6158 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6159 {
6160 	ch_async_flt_t 	ch_flt;
6161 	struct async_flt *aflt;
6162 	char 		pr_reason[MAX_REASON_STRING];
6163 
6164 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6165 	ch_flt.flt_trapped_ce = flag;
6166 	aflt = (struct async_flt *)&ch_flt;
6167 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6168 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6169 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6170 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6171 	aflt->flt_addr = cpu_error_regs->afar;
6172 #if defined(SERRANO)
6173 	ch_flt.afar2 = cpu_error_regs->afar2;
6174 #endif	/* SERRANO */
6175 	aflt->flt_pc = NULL;
6176 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6177 	aflt->flt_tl = 0;
6178 	aflt->flt_panic = 0;
6179 	cpu_log_and_clear_ce(&ch_flt);
6180 
6181 	/*
6182 	 * check if we caused any errors during cleanup
6183 	 */
6184 	if (clear_errors(&ch_flt)) {
6185 		pr_reason[0] = '\0';
6186 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6187 		    NULL);
6188 	}
6189 }
6190 
6191 /*
6192  * Log/clear CEEN-controlled disrupting errors
6193  */
6194 static void
6195 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6196 {
6197 	struct async_flt *aflt;
6198 	uint64_t afsr, afsr_errs;
6199 	ch_cpu_logout_t *clop;
6200 	char 		pr_reason[MAX_REASON_STRING];
6201 	on_trap_data_t	*otp = curthread->t_ontrap;
6202 
6203 	aflt = (struct async_flt *)ch_flt;
6204 	afsr = aflt->flt_stat;
6205 	afsr_errs = ch_flt->afsr_errs;
6206 	aflt->flt_id = gethrtime_waitfree();
6207 	aflt->flt_bus_id = getprocessorid();
6208 	aflt->flt_inst = CPU->cpu_id;
6209 	aflt->flt_prot = AFLT_PROT_NONE;
6210 	aflt->flt_class = CPU_FAULT;
6211 	aflt->flt_status = ECC_C_TRAP;
6212 
6213 	pr_reason[0] = '\0';
6214 	/*
6215 	 * Get the CPU log out info for Disrupting Trap.
6216 	 */
6217 	if (CPU_PRIVATE(CPU) == NULL) {
6218 		clop = NULL;
6219 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6220 	} else {
6221 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6222 	}
6223 
6224 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6225 		ch_cpu_errors_t cpu_error_regs;
6226 
6227 		get_cpu_error_state(&cpu_error_regs);
6228 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6229 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6230 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6231 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6232 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6233 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6234 		clop->clo_sdw_data.chd_afsr_ext =
6235 		    cpu_error_regs.shadow_afsr_ext;
6236 #if defined(SERRANO)
6237 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6238 #endif	/* SERRANO */
6239 		ch_flt->flt_data_incomplete = 1;
6240 
6241 		/*
6242 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6243 		 * The trap handler does it for CEEN enabled errors
6244 		 * so we need to do it here.
6245 		 */
6246 		set_cpu_error_state(&cpu_error_regs);
6247 	}
6248 
6249 #if defined(JALAPENO) || defined(SERRANO)
6250 	/*
6251 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6252 	 * For Serrano, even thou we do have the AFAR, we still do the
6253 	 * scrub on the RCE side since that's where the error type can
6254 	 * be properly classified as intermittent, persistent, etc.
6255 	 *
6256 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6257 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6258 	 * the flt_status bits.
6259 	 */
6260 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6261 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6262 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6263 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6264 	}
6265 #else /* JALAPENO || SERRANO */
6266 	/*
6267 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6268 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6269 	 * the flt_status bits.
6270 	 */
6271 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6272 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6273 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6274 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6275 		}
6276 	}
6277 
6278 #endif /* JALAPENO || SERRANO */
6279 
6280 	/*
6281 	 * Update flt_prot if this error occurred under on_trap protection.
6282 	 */
6283 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6284 		aflt->flt_prot = AFLT_PROT_EC;
6285 
6286 	/*
6287 	 * Queue events on the async event queue, one event per error bit.
6288 	 */
6289 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6290 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6291 		ch_flt->flt_type = CPU_INV_AFSR;
6292 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6293 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6294 		    aflt->flt_panic);
6295 	}
6296 
6297 	/*
6298 	 * Zero out + invalidate CPU logout.
6299 	 */
6300 	if (clop) {
6301 		bzero(clop, sizeof (ch_cpu_logout_t));
6302 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6303 	}
6304 
6305 	/*
6306 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6307 	 * was disabled, we need to flush either the entire
6308 	 * E$ or an E$ line.
6309 	 */
6310 #if defined(JALAPENO) || defined(SERRANO)
6311 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6312 #else	/* JALAPENO || SERRANO */
6313 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6314 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6315 #endif	/* JALAPENO || SERRANO */
6316 		cpu_error_ecache_flush(ch_flt);
6317 
6318 }
6319 
6320 /*
6321  * depending on the error type, we determine whether we
6322  * need to flush the entire ecache or just a line.
6323  */
6324 static int
6325 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6326 {
6327 	struct async_flt *aflt;
6328 	uint64_t	afsr;
6329 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6330 
6331 	aflt = (struct async_flt *)ch_flt;
6332 	afsr = aflt->flt_stat;
6333 
6334 	/*
6335 	 * If we got multiple errors, no point in trying
6336 	 * the individual cases, just flush the whole cache
6337 	 */
6338 	if (afsr & C_AFSR_ME) {
6339 		return (ECACHE_FLUSH_ALL);
6340 	}
6341 
6342 	/*
6343 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6344 	 * was disabled, we need to flush entire E$. We can't just
6345 	 * flush the cache line affected as the ME bit
6346 	 * is not set when multiple correctable errors of the same
6347 	 * type occur, so we might have multiple CPC or EDC errors,
6348 	 * with only the first recorded.
6349 	 */
6350 #if defined(JALAPENO) || defined(SERRANO)
6351 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6352 #else	/* JALAPENO || SERRANO */
6353 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6354 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6355 #endif	/* JALAPENO || SERRANO */
6356 		return (ECACHE_FLUSH_ALL);
6357 	}
6358 
6359 #if defined(JALAPENO) || defined(SERRANO)
6360 	/*
6361 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6362 	 * flush the entire Ecache.
6363 	 */
6364 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6365 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6366 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6367 			return (ECACHE_FLUSH_LINE);
6368 		} else {
6369 			return (ECACHE_FLUSH_ALL);
6370 		}
6371 	}
6372 #else /* JALAPENO || SERRANO */
6373 	/*
6374 	 * If UE only is set, flush the Ecache line, otherwise
6375 	 * flush the entire Ecache.
6376 	 */
6377 	if (afsr_errs & C_AFSR_UE) {
6378 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6379 		    C_AFSR_UE) {
6380 			return (ECACHE_FLUSH_LINE);
6381 		} else {
6382 			return (ECACHE_FLUSH_ALL);
6383 		}
6384 	}
6385 #endif /* JALAPENO || SERRANO */
6386 
6387 	/*
6388 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6389 	 * flush the entire Ecache.
6390 	 */
6391 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6392 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6393 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6394 			return (ECACHE_FLUSH_LINE);
6395 		} else {
6396 			return (ECACHE_FLUSH_ALL);
6397 		}
6398 	}
6399 
6400 	/*
6401 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6402 	 * flush the entire Ecache.
6403 	 */
6404 	if (afsr_errs & C_AFSR_BERR) {
6405 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6406 			return (ECACHE_FLUSH_LINE);
6407 		} else {
6408 			return (ECACHE_FLUSH_ALL);
6409 		}
6410 	}
6411 
6412 	return (0);
6413 }
6414 
6415 void
6416 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6417 {
6418 	int	ecache_flush_flag =
6419 	    cpu_error_ecache_flush_required(ch_flt);
6420 
6421 	/*
6422 	 * Flush Ecache line or entire Ecache based on above checks.
6423 	 */
6424 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6425 		cpu_flush_ecache();
6426 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6427 		cpu_flush_ecache_line(ch_flt);
6428 	}
6429 
6430 }
6431 
6432 /*
6433  * Extract the PA portion from the E$ tag.
6434  */
6435 uint64_t
6436 cpu_ectag_to_pa(int setsize, uint64_t tag)
6437 {
6438 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6439 		return (JG_ECTAG_TO_PA(setsize, tag));
6440 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6441 		return (PN_L3TAG_TO_PA(tag));
6442 	else
6443 		return (CH_ECTAG_TO_PA(setsize, tag));
6444 }
6445 
6446 /*
6447  * Convert the E$ tag PA into an E$ subblock index.
6448  */
6449 static int
6450 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6451 {
6452 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6453 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6454 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6455 		/* Panther has only one subblock per line */
6456 		return (0);
6457 	else
6458 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6459 }
6460 
6461 /*
6462  * All subblocks in an E$ line must be invalid for
6463  * the line to be invalid.
6464  */
6465 int
6466 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6467 {
6468 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6469 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6470 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6471 		return (PN_L3_LINE_INVALID(tag));
6472 	else
6473 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6474 }
6475 
6476 /*
6477  * Extract state bits for a subblock given the tag.  Note that for Panther
6478  * this works on both l2 and l3 tags.
6479  */
6480 static int
6481 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6482 {
6483 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6484 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6485 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6486 		return (tag & CH_ECSTATE_MASK);
6487 	else
6488 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6489 }
6490 
6491 /*
6492  * Cpu specific initialization.
6493  */
6494 void
6495 cpu_mp_init(void)
6496 {
6497 #ifdef	CHEETAHPLUS_ERRATUM_25
6498 	if (cheetah_sendmondo_recover) {
6499 		cheetah_nudge_init();
6500 	}
6501 #endif
6502 }
6503 
6504 void
6505 cpu_ereport_post(struct async_flt *aflt)
6506 {
6507 	char *cpu_type, buf[FM_MAX_CLASS];
6508 	nv_alloc_t *nva = NULL;
6509 	nvlist_t *ereport, *detector, *resource;
6510 	errorq_elem_t *eqep;
6511 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6512 	char unum[UNUM_NAMLEN];
6513 	int len = 0;
6514 	uint8_t msg_type;
6515 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6516 
6517 	if (aflt->flt_panic || panicstr) {
6518 		eqep = errorq_reserve(ereport_errorq);
6519 		if (eqep == NULL)
6520 			return;
6521 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6522 		nva = errorq_elem_nva(ereport_errorq, eqep);
6523 	} else {
6524 		ereport = fm_nvlist_create(nva);
6525 	}
6526 
6527 	/*
6528 	 * Create the scheme "cpu" FMRI.
6529 	 */
6530 	detector = fm_nvlist_create(nva);
6531 	resource = fm_nvlist_create(nva);
6532 	switch (cpunodes[aflt->flt_inst].implementation) {
6533 	case CHEETAH_IMPL:
6534 		cpu_type = FM_EREPORT_CPU_USIII;
6535 		break;
6536 	case CHEETAH_PLUS_IMPL:
6537 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6538 		break;
6539 	case JALAPENO_IMPL:
6540 		cpu_type = FM_EREPORT_CPU_USIIIi;
6541 		break;
6542 	case SERRANO_IMPL:
6543 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6544 		break;
6545 	case JAGUAR_IMPL:
6546 		cpu_type = FM_EREPORT_CPU_USIV;
6547 		break;
6548 	case PANTHER_IMPL:
6549 		cpu_type = FM_EREPORT_CPU_USIVplus;
6550 		break;
6551 	default:
6552 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6553 		break;
6554 	}
6555 
6556 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6557 
6558 	/*
6559 	 * Encode all the common data into the ereport.
6560 	 */
6561 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6562 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6563 
6564 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6565 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6566 	    detector, NULL);
6567 
6568 	/*
6569 	 * Encode the error specific data that was saved in
6570 	 * the async_flt structure into the ereport.
6571 	 */
6572 	cpu_payload_add_aflt(aflt, ereport, resource,
6573 	    &plat_ecc_ch_flt.ecaf_afar_status,
6574 	    &plat_ecc_ch_flt.ecaf_synd_status);
6575 
6576 	if (aflt->flt_panic || panicstr) {
6577 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6578 	} else {
6579 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6580 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6581 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6582 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6583 	}
6584 	/*
6585 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6586 	 * to the SC olny if it can process it.
6587 	 */
6588 
6589 	if (&plat_ecc_capability_sc_get &&
6590 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6591 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6592 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6593 			/*
6594 			 * If afar status is not invalid do a unum lookup.
6595 			 */
6596 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6597 			    AFLT_STAT_INVALID) {
6598 				(void) cpu_get_mem_unum_aflt(
6599 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6600 				    unum, UNUM_NAMLEN, &len);
6601 			} else {
6602 				unum[0] = '\0';
6603 			}
6604 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6605 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6606 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6607 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6608 			    ch_flt->flt_sdw_afsr_ext;
6609 
6610 			if (&plat_log_fruid_error2)
6611 				plat_log_fruid_error2(msg_type, unum, aflt,
6612 				    &plat_ecc_ch_flt);
6613 		}
6614 	}
6615 }
6616 
6617 void
6618 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6619 {
6620 	int status;
6621 	ddi_fm_error_t de;
6622 
6623 	bzero(&de, sizeof (ddi_fm_error_t));
6624 
6625 	de.fme_version = DDI_FME_VERSION;
6626 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6627 	    FM_ENA_FMT1);
6628 	de.fme_flag = expected;
6629 	de.fme_bus_specific = (void *)aflt->flt_addr;
6630 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6631 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6632 		aflt->flt_panic = 1;
6633 }
6634 
6635 void
6636 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6637     errorq_t *eqp, uint_t flag)
6638 {
6639 	struct async_flt *aflt = (struct async_flt *)payload;
6640 
6641 	aflt->flt_erpt_class = error_class;
6642 	errorq_dispatch(eqp, payload, payload_sz, flag);
6643 }
6644 
6645 /*
6646  * This routine may be called by the IO module, but does not do
6647  * anything in this cpu module. The SERD algorithm is handled by
6648  * cpumem-diagnosis engine instead.
6649  */
6650 /*ARGSUSED*/
6651 void
6652 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6653 {}
6654 
6655 void
6656 adjust_hw_copy_limits(int ecache_size)
6657 {
6658 	/*
6659 	 * Set hw copy limits.
6660 	 *
6661 	 * /etc/system will be parsed later and can override one or more
6662 	 * of these settings.
6663 	 *
6664 	 * At this time, ecache size seems only mildly relevant.
6665 	 * We seem to run into issues with the d-cache and stalls
6666 	 * we see on misses.
6667 	 *
6668 	 * Cycle measurement indicates that 2 byte aligned copies fare
6669 	 * little better than doing things with VIS at around 512 bytes.
6670 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6671 	 * aligned is faster whenever the source and destination data
6672 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6673 	 * limit seems to be driven by the 2K write cache.
6674 	 * When more than 2K of copies are done in non-VIS mode, stores
6675 	 * backup in the write cache.  In VIS mode, the write cache is
6676 	 * bypassed, allowing faster cache-line writes aligned on cache
6677 	 * boundaries.
6678 	 *
6679 	 * In addition, in non-VIS mode, there is no prefetching, so
6680 	 * for larger copies, the advantage of prefetching to avoid even
6681 	 * occasional cache misses is enough to justify using the VIS code.
6682 	 *
6683 	 * During testing, it was discovered that netbench ran 3% slower
6684 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6685 	 * applications, data is only used once (copied to the output
6686 	 * buffer, then copied by the network device off the system).  Using
6687 	 * the VIS copy saves more L2 cache state.  Network copies are
6688 	 * around 1.3K to 1.5K in size for historical reasons.
6689 	 *
6690 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6691 	 * aligned copy even for large caches and 8 MB ecache.  The
6692 	 * infrastructure to allow different limits for different sized
6693 	 * caches is kept to allow further tuning in later releases.
6694 	 */
6695 
6696 	if (min_ecache_size == 0 && use_hw_bcopy) {
6697 		/*
6698 		 * First time through - should be before /etc/system
6699 		 * is read.
6700 		 * Could skip the checks for zero but this lets us
6701 		 * preserve any debugger rewrites.
6702 		 */
6703 		if (hw_copy_limit_1 == 0) {
6704 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6705 			priv_hcl_1 = hw_copy_limit_1;
6706 		}
6707 		if (hw_copy_limit_2 == 0) {
6708 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6709 			priv_hcl_2 = hw_copy_limit_2;
6710 		}
6711 		if (hw_copy_limit_4 == 0) {
6712 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6713 			priv_hcl_4 = hw_copy_limit_4;
6714 		}
6715 		if (hw_copy_limit_8 == 0) {
6716 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6717 			priv_hcl_8 = hw_copy_limit_8;
6718 		}
6719 		min_ecache_size = ecache_size;
6720 	} else {
6721 		/*
6722 		 * MP initialization. Called *after* /etc/system has
6723 		 * been parsed. One CPU has already been initialized.
6724 		 * Need to cater for /etc/system having scragged one
6725 		 * of our values.
6726 		 */
6727 		if (ecache_size == min_ecache_size) {
6728 			/*
6729 			 * Same size ecache. We do nothing unless we
6730 			 * have a pessimistic ecache setting. In that
6731 			 * case we become more optimistic (if the cache is
6732 			 * large enough).
6733 			 */
6734 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6735 				/*
6736 				 * Need to adjust hw_copy_limit* from our
6737 				 * pessimistic uniprocessor value to a more
6738 				 * optimistic UP value *iff* it hasn't been
6739 				 * reset.
6740 				 */
6741 				if ((ecache_size > 1048576) &&
6742 				    (priv_hcl_8 == hw_copy_limit_8)) {
6743 					if (ecache_size <= 2097152)
6744 						hw_copy_limit_8 = 4 *
6745 						    VIS_COPY_THRESHOLD;
6746 					else if (ecache_size <= 4194304)
6747 						hw_copy_limit_8 = 4 *
6748 						    VIS_COPY_THRESHOLD;
6749 					else
6750 						hw_copy_limit_8 = 4 *
6751 						    VIS_COPY_THRESHOLD;
6752 					priv_hcl_8 = hw_copy_limit_8;
6753 				}
6754 			}
6755 		} else if (ecache_size < min_ecache_size) {
6756 			/*
6757 			 * A different ecache size. Can this even happen?
6758 			 */
6759 			if (priv_hcl_8 == hw_copy_limit_8) {
6760 				/*
6761 				 * The previous value that we set
6762 				 * is unchanged (i.e., it hasn't been
6763 				 * scragged by /etc/system). Rewrite it.
6764 				 */
6765 				if (ecache_size <= 1048576)
6766 					hw_copy_limit_8 = 8 *
6767 					    VIS_COPY_THRESHOLD;
6768 				else if (ecache_size <= 2097152)
6769 					hw_copy_limit_8 = 8 *
6770 					    VIS_COPY_THRESHOLD;
6771 				else if (ecache_size <= 4194304)
6772 					hw_copy_limit_8 = 8 *
6773 					    VIS_COPY_THRESHOLD;
6774 				else
6775 					hw_copy_limit_8 = 10 *
6776 					    VIS_COPY_THRESHOLD;
6777 				priv_hcl_8 = hw_copy_limit_8;
6778 				min_ecache_size = ecache_size;
6779 			}
6780 		}
6781 	}
6782 }
6783 
6784 /*
6785  * Called from illegal instruction trap handler to see if we can attribute
6786  * the trap to a fpras check.
6787  */
6788 int
6789 fpras_chktrap(struct regs *rp)
6790 {
6791 	int op;
6792 	struct fpras_chkfngrp *cgp;
6793 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6794 
6795 	if (fpras_chkfngrps == NULL)
6796 		return (0);
6797 
6798 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6799 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6800 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6801 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6802 			break;
6803 	}
6804 	if (op == FPRAS_NCOPYOPS)
6805 		return (0);
6806 
6807 	/*
6808 	 * This is an fpRAS failure caught through an illegal
6809 	 * instruction - trampoline.
6810 	 */
6811 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6812 	rp->r_npc = rp->r_pc + 4;
6813 	return (1);
6814 }
6815 
6816 /*
6817  * fpras_failure is called when a fpras check detects a bad calculation
6818  * result or an illegal instruction trap is attributed to an fpras
6819  * check.  In all cases we are still bound to CPU.
6820  */
6821 int
6822 fpras_failure(int op, int how)
6823 {
6824 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6825 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6826 	ch_async_flt_t ch_flt;
6827 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6828 	struct fpras_chkfn *sfp, *cfp;
6829 	uint32_t *sip, *cip;
6830 	int i;
6831 
6832 	/*
6833 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6834 	 * the time in which we dispatch an ereport and (if applicable) panic.
6835 	 */
6836 	use_hw_bcopy_orig = use_hw_bcopy;
6837 	use_hw_bzero_orig = use_hw_bzero;
6838 	hcl1_orig = hw_copy_limit_1;
6839 	hcl2_orig = hw_copy_limit_2;
6840 	hcl4_orig = hw_copy_limit_4;
6841 	hcl8_orig = hw_copy_limit_8;
6842 	use_hw_bcopy = use_hw_bzero = 0;
6843 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6844 	    hw_copy_limit_8 = 0;
6845 
6846 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6847 	aflt->flt_id = gethrtime_waitfree();
6848 	aflt->flt_class = CPU_FAULT;
6849 	aflt->flt_inst = CPU->cpu_id;
6850 	aflt->flt_status = (how << 8) | op;
6851 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6852 	ch_flt.flt_type = CPU_FPUERR;
6853 
6854 	/*
6855 	 * We must panic if the copy operation had no lofault protection -
6856 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6857 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6858 	 */
6859 	aflt->flt_panic = (curthread->t_lofault == NULL);
6860 
6861 	/*
6862 	 * XOR the source instruction block with the copied instruction
6863 	 * block - this will show us which bit(s) are corrupted.
6864 	 */
6865 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6866 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6867 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6868 		sip = &sfp->fpras_blk0[0];
6869 		cip = &cfp->fpras_blk0[0];
6870 	} else {
6871 		sip = &sfp->fpras_blk1[0];
6872 		cip = &cfp->fpras_blk1[0];
6873 	}
6874 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6875 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6876 
6877 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6878 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6879 
6880 	if (aflt->flt_panic)
6881 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6882 
6883 	/*
6884 	 * We get here for copyin/copyout and kcopy or bcopy where the
6885 	 * caller has used on_fault.  We will flag the error so that
6886 	 * the process may be killed  The trap_async_hwerr mechanism will
6887 	 * take appropriate further action (such as a reboot, contract
6888 	 * notification etc).  Since we may be continuing we will
6889 	 * restore the global hardware copy acceleration switches.
6890 	 *
6891 	 * When we return from this function to the copy function we want to
6892 	 * avoid potentially bad data being used, ie we want the affected
6893 	 * copy function to return an error.  The caller should therefore
6894 	 * invoke its lofault handler (which always exists for these functions)
6895 	 * which will return the appropriate error.
6896 	 */
6897 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6898 	aston(curthread);
6899 
6900 	use_hw_bcopy = use_hw_bcopy_orig;
6901 	use_hw_bzero = use_hw_bzero_orig;
6902 	hw_copy_limit_1 = hcl1_orig;
6903 	hw_copy_limit_2 = hcl2_orig;
6904 	hw_copy_limit_4 = hcl4_orig;
6905 	hw_copy_limit_8 = hcl8_orig;
6906 
6907 	return (1);
6908 }
6909 
6910 #define	VIS_BLOCKSIZE		64
6911 
6912 int
6913 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6914 {
6915 	int ret, watched;
6916 
6917 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6918 	ret = dtrace_blksuword32(addr, data, 0);
6919 	if (watched)
6920 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6921 
6922 	return (ret);
6923 }
6924 
6925 /*
6926  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6927  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6928  * CEEN from the EER to disable traps for further disrupting error types
6929  * on that cpu.  We could cross-call instead, but that has a larger
6930  * instruction and data footprint than cross-trapping, and the cpu is known
6931  * to be faulted.
6932  */
6933 
6934 void
6935 cpu_faulted_enter(struct cpu *cp)
6936 {
6937 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6938 }
6939 
6940 /*
6941  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6942  * offline, spare, or online (by the cpu requesting this state change).
6943  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6944  * disrupting error bits that have accumulated without trapping, then
6945  * we cross-trap to re-enable CEEN controlled traps.
6946  */
6947 void
6948 cpu_faulted_exit(struct cpu *cp)
6949 {
6950 	ch_cpu_errors_t cpu_error_regs;
6951 
6952 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6953 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6954 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6955 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6956 	    (uint64_t)&cpu_error_regs, 0);
6957 
6958 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6959 }
6960 
6961 /*
6962  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6963  * the errors in the original AFSR, 0 otherwise.
6964  *
6965  * For all procs if the initial error was a BERR or TO, then it is possible
6966  * that we may have caused a secondary BERR or TO in the process of logging the
6967  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6968  * if the request was protected then a panic is still not necessary, if not
6969  * protected then aft_panic is already set - so either way there's no need
6970  * to set aft_panic for the secondary error.
6971  *
6972  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6973  * a store merge, then the error handling code will call cpu_deferred_error().
6974  * When clear_errors() is called, it will determine that secondary errors have
6975  * occurred - in particular, the store merge also caused a EDU and WDU that
6976  * weren't discovered until this point.
6977  *
6978  * We do three checks to verify that we are in this case.  If we pass all three
6979  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6980  * errors occur, we return 0.
6981  *
6982  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6983  * handled in cpu_disrupting_errors().  Since this function is not even called
6984  * in the case we are interested in, we just return 0 for these processors.
6985  */
6986 /*ARGSUSED*/
6987 static int
6988 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6989     uint64_t t_afar)
6990 {
6991 #if defined(CHEETAH_PLUS)
6992 #else	/* CHEETAH_PLUS */
6993 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6994 #endif	/* CHEETAH_PLUS */
6995 
6996 	/*
6997 	 * Was the original error a BERR or TO and only a BERR or TO
6998 	 * (multiple errors are also OK)
6999 	 */
7000 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7001 		/*
7002 		 * Is the new error a BERR or TO and only a BERR or TO
7003 		 * (multiple errors are also OK)
7004 		 */
7005 		if ((ch_flt->afsr_errs &
7006 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7007 			return (1);
7008 	}
7009 
7010 #if defined(CHEETAH_PLUS)
7011 	return (0);
7012 #else	/* CHEETAH_PLUS */
7013 	/*
7014 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7015 	 *
7016 	 * Check the original error was a UE, and only a UE.  Note that
7017 	 * the ME bit will cause us to fail this check.
7018 	 */
7019 	if (t_afsr_errs != C_AFSR_UE)
7020 		return (0);
7021 
7022 	/*
7023 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7024 	 */
7025 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7026 		return (0);
7027 
7028 	/*
7029 	 * Check the AFAR of the original error and secondary errors
7030 	 * match to the 64-byte boundary
7031 	 */
7032 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7033 		return (0);
7034 
7035 	/*
7036 	 * We've passed all the checks, so it's a secondary error!
7037 	 */
7038 	return (1);
7039 #endif	/* CHEETAH_PLUS */
7040 }
7041 
7042 /*
7043  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7044  * is checked for any valid errors.  If found, the error type is
7045  * returned. If not found, the flt_type is checked for L1$ parity errors.
7046  */
7047 /*ARGSUSED*/
7048 static uint8_t
7049 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7050 {
7051 #if defined(JALAPENO)
7052 	/*
7053 	 * Currently, logging errors to the SC is not supported on Jalapeno
7054 	 */
7055 	return (PLAT_ECC_ERROR2_NONE);
7056 #else
7057 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7058 
7059 	switch (ch_flt->flt_bit) {
7060 	case C_AFSR_CE:
7061 		return (PLAT_ECC_ERROR2_CE);
7062 	case C_AFSR_UCC:
7063 	case C_AFSR_EDC:
7064 	case C_AFSR_WDC:
7065 	case C_AFSR_CPC:
7066 		return (PLAT_ECC_ERROR2_L2_CE);
7067 	case C_AFSR_EMC:
7068 		return (PLAT_ECC_ERROR2_EMC);
7069 	case C_AFSR_IVC:
7070 		return (PLAT_ECC_ERROR2_IVC);
7071 	case C_AFSR_UE:
7072 		return (PLAT_ECC_ERROR2_UE);
7073 	case C_AFSR_UCU:
7074 	case C_AFSR_EDU:
7075 	case C_AFSR_WDU:
7076 	case C_AFSR_CPU:
7077 		return (PLAT_ECC_ERROR2_L2_UE);
7078 	case C_AFSR_IVU:
7079 		return (PLAT_ECC_ERROR2_IVU);
7080 	case C_AFSR_TO:
7081 		return (PLAT_ECC_ERROR2_TO);
7082 	case C_AFSR_BERR:
7083 		return (PLAT_ECC_ERROR2_BERR);
7084 #if defined(CHEETAH_PLUS)
7085 	case C_AFSR_L3_EDC:
7086 	case C_AFSR_L3_UCC:
7087 	case C_AFSR_L3_CPC:
7088 	case C_AFSR_L3_WDC:
7089 		return (PLAT_ECC_ERROR2_L3_CE);
7090 	case C_AFSR_IMC:
7091 		return (PLAT_ECC_ERROR2_IMC);
7092 	case C_AFSR_TSCE:
7093 		return (PLAT_ECC_ERROR2_L2_TSCE);
7094 	case C_AFSR_THCE:
7095 		return (PLAT_ECC_ERROR2_L2_THCE);
7096 	case C_AFSR_L3_MECC:
7097 		return (PLAT_ECC_ERROR2_L3_MECC);
7098 	case C_AFSR_L3_THCE:
7099 		return (PLAT_ECC_ERROR2_L3_THCE);
7100 	case C_AFSR_L3_CPU:
7101 	case C_AFSR_L3_EDU:
7102 	case C_AFSR_L3_UCU:
7103 	case C_AFSR_L3_WDU:
7104 		return (PLAT_ECC_ERROR2_L3_UE);
7105 	case C_AFSR_DUE:
7106 		return (PLAT_ECC_ERROR2_DUE);
7107 	case C_AFSR_DTO:
7108 		return (PLAT_ECC_ERROR2_DTO);
7109 	case C_AFSR_DBERR:
7110 		return (PLAT_ECC_ERROR2_DBERR);
7111 #endif	/* CHEETAH_PLUS */
7112 	default:
7113 		switch (ch_flt->flt_type) {
7114 #if defined(CPU_IMP_L1_CACHE_PARITY)
7115 		case CPU_IC_PARITY:
7116 			return (PLAT_ECC_ERROR2_IPE);
7117 		case CPU_DC_PARITY:
7118 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7119 				if (ch_flt->parity_data.dpe.cpl_cache ==
7120 				    CPU_PC_PARITY) {
7121 					return (PLAT_ECC_ERROR2_PCACHE);
7122 				}
7123 			}
7124 			return (PLAT_ECC_ERROR2_DPE);
7125 #endif /* CPU_IMP_L1_CACHE_PARITY */
7126 		case CPU_ITLB_PARITY:
7127 			return (PLAT_ECC_ERROR2_ITLB);
7128 		case CPU_DTLB_PARITY:
7129 			return (PLAT_ECC_ERROR2_DTLB);
7130 		default:
7131 			return (PLAT_ECC_ERROR2_NONE);
7132 		}
7133 	}
7134 #endif	/* JALAPENO */
7135 }
7136