xref: /illumos-gate/usr/src/uts/sun4u/cpu/us3_common.c (revision 56870e8c76c2675bcef1fcee5d519585ce9c768e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/ddi.h>
29 #include <sys/sysmacros.h>
30 #include <sys/archsystm.h>
31 #include <sys/vmsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/machthread.h>
35 #include <sys/cpu.h>
36 #include <sys/cmp.h>
37 #include <sys/elf_SPARC.h>
38 #include <vm/vm_dep.h>
39 #include <vm/hat_sfmmu.h>
40 #include <vm/seg_kpm.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/us3_module.h>
44 #include <sys/async.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/dditypes.h>
48 #include <sys/prom_debug.h>
49 #include <sys/prom_plat.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/machtrap.h>
56 #include <sys/ontrap.h>
57 #include <sys/panic.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/ivintr.h>
61 #include <sys/atomic.h>
62 #include <sys/taskq.h>
63 #include <sys/note.h>
64 #include <sys/ndifm.h>
65 #include <sys/ddifm.h>
66 #include <sys/fm/protocol.h>
67 #include <sys/fm/util.h>
68 #include <sys/fm/cpu/UltraSPARC-III.h>
69 #include <sys/fpras_impl.h>
70 #include <sys/dtrace.h>
71 #include <sys/watchpoint.h>
72 #include <sys/plat_ecc_unum.h>
73 #include <sys/cyclic.h>
74 #include <sys/errorq.h>
75 #include <sys/errclassify.h>
76 #include <sys/pghw.h>
77 #include <sys/clock_impl.h>
78 
79 #ifdef	CHEETAHPLUS_ERRATUM_25
80 #include <sys/xc_impl.h>
81 #endif	/* CHEETAHPLUS_ERRATUM_25 */
82 
83 ch_cpu_logout_t	clop_before_flush;
84 ch_cpu_logout_t	clop_after_flush;
85 uint_t	flush_retries_done = 0;
86 /*
87  * Note that 'Cheetah PRM' refers to:
88  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
89  */
90 
91 /*
92  * Per CPU pointers to physical address of TL>0 logout data areas.
93  * These pointers have to be in the kernel nucleus to avoid MMU
94  * misses.
95  */
96 uint64_t ch_err_tl1_paddrs[NCPU];
97 
98 /*
99  * One statically allocated structure to use during startup/DR
100  * to prevent unnecessary panics.
101  */
102 ch_err_tl1_data_t ch_err_tl1_data;
103 
104 /*
105  * Per CPU pending error at TL>0, used by level15 softint handler
106  */
107 uchar_t ch_err_tl1_pending[NCPU];
108 
109 /*
110  * For deferred CE re-enable after trap.
111  */
112 taskq_t		*ch_check_ce_tq;
113 
114 /*
115  * Internal functions.
116  */
117 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
118 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
119 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
120     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
121 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
122     uint64_t t_afsr_bit);
123 static int clear_ecc(struct async_flt *ecc);
124 #if defined(CPU_IMP_ECACHE_ASSOC)
125 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
126 #endif
127 int cpu_ecache_set_size(struct cpu *cp);
128 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
129 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
130 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
131 int cpu_ectag_pa_to_subblk_state(int cachesize,
132 				uint64_t subaddr, uint64_t tag);
133 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
134 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
136 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
137 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
138 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
139 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
140 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
141 static void cpu_scrubphys(struct async_flt *aflt);
142 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
143     int *, int *);
144 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
145 static void cpu_ereport_init(struct async_flt *aflt);
146 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
147 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
148 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
149     uint64_t nceen, ch_cpu_logout_t *clop);
150 static int cpu_ce_delayed_ec_logout(uint64_t);
151 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
152 static int cpu_error_is_ecache_data(int, uint64_t);
153 static void cpu_fmri_cpu_set(nvlist_t *, int);
154 static int cpu_error_to_resource_type(struct async_flt *aflt);
155 
156 #ifdef	CHEETAHPLUS_ERRATUM_25
157 static int mondo_recover_proc(uint16_t, int);
158 static void cheetah_nudge_init(void);
159 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
160     cyc_time_t *when);
161 static void cheetah_nudge_buddy(void);
162 #endif	/* CHEETAHPLUS_ERRATUM_25 */
163 
164 #if defined(CPU_IMP_L1_CACHE_PARITY)
165 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
166 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
167 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
168     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
169 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
170 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
171 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
172 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
173 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
174 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
175 #endif	/* CPU_IMP_L1_CACHE_PARITY */
176 
177 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
178     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
179     int *segsp, int *banksp, int *mcidp);
180 
181 /*
182  * This table is used to determine which bit(s) is(are) bad when an ECC
183  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
184  * of this array have the following semantics:
185  *
186  *      00-127  The number of the bad bit, when only one bit is bad.
187  *      128     ECC bit C0 is bad.
188  *      129     ECC bit C1 is bad.
189  *      130     ECC bit C2 is bad.
190  *      131     ECC bit C3 is bad.
191  *      132     ECC bit C4 is bad.
192  *      133     ECC bit C5 is bad.
193  *      134     ECC bit C6 is bad.
194  *      135     ECC bit C7 is bad.
195  *      136     ECC bit C8 is bad.
196  *	137-143 reserved for Mtag Data and ECC.
197  *      144(M2) Two bits are bad within a nibble.
198  *      145(M3) Three bits are bad within a nibble.
199  *      146(M3) Four bits are bad within a nibble.
200  *      147(M)  Multiple bits (5 or more) are bad.
201  *      148     NO bits are bad.
202  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
203  */
204 
205 #define	C0	128
206 #define	C1	129
207 #define	C2	130
208 #define	C3	131
209 #define	C4	132
210 #define	C5	133
211 #define	C6	134
212 #define	C7	135
213 #define	C8	136
214 #define	MT0	137	/* Mtag Data bit 0 */
215 #define	MT1	138
216 #define	MT2	139
217 #define	MTC0	140	/* Mtag Check bit 0 */
218 #define	MTC1	141
219 #define	MTC2	142
220 #define	MTC3	143
221 #define	M2	144
222 #define	M3	145
223 #define	M4	146
224 #define	M	147
225 #define	NA	148
226 #if defined(JALAPENO) || defined(SERRANO)
227 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
228 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
229 #define	SLAST	S003MEM	/* last special syndrome */
230 #else /* JALAPENO || SERRANO */
231 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
232 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
233 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
234 #define	SLAST	S11C	/* last special syndrome */
235 #endif /* JALAPENO || SERRANO */
236 #if defined(JALAPENO) || defined(SERRANO)
237 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
238 #define	BPAR15	167
239 #endif	/* JALAPENO || SERRANO */
240 
241 static uint8_t ecc_syndrome_tab[] =
242 {
243 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
244 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
245 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
246 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
247 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
248 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
249 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
250 #if defined(JALAPENO) || defined(SERRANO)
251 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
252 #else	/* JALAPENO || SERRANO */
253 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
254 #endif	/* JALAPENO || SERRANO */
255 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
256 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
257 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
258 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
259 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
260 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
261 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
262 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
263 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
264 #if defined(JALAPENO) || defined(SERRANO)
265 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
266 #else	/* JALAPENO || SERRANO */
267 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
268 #endif	/* JALAPENO || SERRANO */
269 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
270 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
271 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
272 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
273 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
274 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
275 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
276 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
277 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
278 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
279 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
280 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
281 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
282 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
283 };
284 
285 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
286 
287 #if !(defined(JALAPENO) || defined(SERRANO))
288 /*
289  * This table is used to determine which bit(s) is(are) bad when a Mtag
290  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
291  * of this array have the following semantics:
292  *
293  *      -1	Invalid mtag syndrome.
294  *      137     Mtag Data 0 is bad.
295  *      138     Mtag Data 1 is bad.
296  *      139     Mtag Data 2 is bad.
297  *      140     Mtag ECC 0 is bad.
298  *      141     Mtag ECC 1 is bad.
299  *      142     Mtag ECC 2 is bad.
300  *      143     Mtag ECC 3 is bad.
301  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
302  */
303 short mtag_syndrome_tab[] =
304 {
305 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
306 };
307 
308 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
309 
310 #else /* !(JALAPENO || SERRANO) */
311 
312 #define	BSYND_TBL_SIZE	16
313 
314 #endif /* !(JALAPENO || SERRANO) */
315 
316 /*
317  * Virtual Address bit flag in the data cache. This is actually bit 2 in the
318  * dcache data tag.
319  */
320 #define	VA13	INT64_C(0x0000000000000002)
321 
322 /*
323  * Types returned from cpu_error_to_resource_type()
324  */
325 #define	ERRTYPE_UNKNOWN		0
326 #define	ERRTYPE_CPU		1
327 #define	ERRTYPE_MEMORY		2
328 #define	ERRTYPE_ECACHE_DATA	3
329 
330 /*
331  * CE initial classification and subsequent action lookup table
332  */
333 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
334 static int ce_disp_inited;
335 
336 /*
337  * Set to disable leaky and partner check for memory correctables
338  */
339 int ce_xdiag_off;
340 
341 /*
342  * The following are not incremented atomically so are indicative only
343  */
344 static int ce_xdiag_drops;
345 static int ce_xdiag_lkydrops;
346 static int ce_xdiag_ptnrdrops;
347 static int ce_xdiag_bad;
348 
349 /*
350  * CE leaky check callback structure
351  */
352 typedef struct {
353 	struct async_flt *lkycb_aflt;
354 	errorq_t *lkycb_eqp;
355 	errorq_elem_t *lkycb_eqep;
356 } ce_lkychk_cb_t;
357 
358 /*
359  * defines for various ecache_flush_flag's
360  */
361 #define	ECACHE_FLUSH_LINE	1
362 #define	ECACHE_FLUSH_ALL	2
363 
364 /*
365  * STICK sync
366  */
367 #define	STICK_ITERATION 10
368 #define	MAX_TSKEW	1
369 #define	EV_A_START	0
370 #define	EV_A_END	1
371 #define	EV_B_START	2
372 #define	EV_B_END	3
373 #define	EVENTS		4
374 
375 static int64_t stick_iter = STICK_ITERATION;
376 static int64_t stick_tsk = MAX_TSKEW;
377 
378 typedef enum {
379 	EVENT_NULL = 0,
380 	SLAVE_START,
381 	SLAVE_CONT,
382 	MASTER_START
383 } event_cmd_t;
384 
385 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
386 static int64_t timestamp[EVENTS];
387 static volatile int slave_done;
388 
389 #ifdef DEBUG
390 #define	DSYNC_ATTEMPTS 64
391 typedef struct {
392 	int64_t	skew_val[DSYNC_ATTEMPTS];
393 } ss_t;
394 
395 ss_t stick_sync_stats[NCPU];
396 #endif /* DEBUG */
397 
398 uint_t cpu_impl_dual_pgsz = 0;
399 #if defined(CPU_IMP_DUAL_PAGESIZE)
400 uint_t disable_dual_pgsz = 0;
401 #endif	/* CPU_IMP_DUAL_PAGESIZE */
402 
403 /*
404  * Save the cache bootup state for use when internal
405  * caches are to be re-enabled after an error occurs.
406  */
407 uint64_t cache_boot_state;
408 
409 /*
410  * PA[22:0] represent Displacement in Safari configuration space.
411  */
412 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
413 
414 bus_config_eclk_t bus_config_eclk[] = {
415 #if defined(JALAPENO) || defined(SERRANO)
416 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
417 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
418 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
419 #else /* JALAPENO || SERRANO */
420 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
421 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
422 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
423 #endif /* JALAPENO || SERRANO */
424 	{0, 0}
425 };
426 
427 /*
428  * Interval for deferred CEEN reenable
429  */
430 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
431 
432 /*
433  * set in /etc/system to control logging of user BERR/TO's
434  */
435 int cpu_berr_to_verbose = 0;
436 
437 /*
438  * set to 0 in /etc/system to defer CEEN reenable for all CEs
439  */
440 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
441 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
442 
443 /*
444  * Set of all offline cpus
445  */
446 cpuset_t cpu_offline_set;
447 
448 static void cpu_delayed_check_ce_errors(void *);
449 static void cpu_check_ce_errors(void *);
450 void cpu_error_ecache_flush(ch_async_flt_t *);
451 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
452 static void cpu_log_and_clear_ce(ch_async_flt_t *);
453 void cpu_ce_detected(ch_cpu_errors_t *, int);
454 
455 /*
456  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
457  * memory refresh interval of current DIMMs (64ms).  After initial fix that
458  * gives at least one full refresh cycle in which the cell can leak
459  * (whereafter further refreshes simply reinforce any incorrect bit value).
460  */
461 clock_t cpu_ce_lkychk_timeout_usec = 128000;
462 
463 /*
464  * CE partner check partner caching period in seconds
465  */
466 int cpu_ce_ptnr_cachetime_sec = 60;
467 
468 /*
469  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
470  */
471 #define	CH_SET_TRAP(ttentry, ttlabel)			\
472 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
473 		flush_instr_mem((caddr_t)&ttentry, 32);
474 
475 static int min_ecache_size;
476 static uint_t priv_hcl_1;
477 static uint_t priv_hcl_2;
478 static uint_t priv_hcl_4;
479 static uint_t priv_hcl_8;
480 
481 void
cpu_setup(void)482 cpu_setup(void)
483 {
484 	extern int at_flags;
485 	extern int cpc_has_overflow_intr;
486 
487 	/*
488 	 * Setup chip-specific trap handlers.
489 	 */
490 	cpu_init_trap();
491 
492 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
493 
494 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
495 
496 	/*
497 	 * save the cache bootup state.
498 	 */
499 	cache_boot_state = get_dcu() & DCU_CACHE;
500 
501 	/*
502 	 * Due to the number of entries in the fully-associative tlb
503 	 * this may have to be tuned lower than in spitfire.
504 	 */
505 	pp_slots = MIN(8, MAXPP_SLOTS);
506 
507 	/*
508 	 * Block stores do not invalidate all pages of the d$, pagecopy
509 	 * et. al. need virtual translations with virtual coloring taken
510 	 * into consideration.  prefetch/ldd will pollute the d$ on the
511 	 * load side.
512 	 */
513 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
514 
515 	if (use_page_coloring) {
516 		do_pg_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 #if defined(CPU_IMP_DUAL_PAGESIZE)
561 	/*
562 	 * Use Cheetah+ and later dual page size support.
563 	 */
564 	if (!disable_dual_pgsz) {
565 		cpu_impl_dual_pgsz = 1;
566 	}
567 #endif	/* CPU_IMP_DUAL_PAGESIZE */
568 
569 	/*
570 	 * Declare that this architecture/cpu combination does fpRAS.
571 	 */
572 	fpras_implemented = 1;
573 
574 	/*
575 	 * Setup CE lookup table
576 	 */
577 	CE_INITDISPTBL_POPULATE(ce_disp_table);
578 	ce_disp_inited = 1;
579 }
580 
581 /*
582  * Called by setcpudelay
583  */
584 void
cpu_init_tick_freq(void)585 cpu_init_tick_freq(void)
586 {
587 	/*
588 	 * For UltraSPARC III and beyond we want to use the
589 	 * system clock rate as the basis for low level timing,
590 	 * due to support of mixed speed CPUs and power managment.
591 	 */
592 	if (system_clock_freq == 0)
593 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
594 
595 	sys_tick_freq = system_clock_freq;
596 }
597 
598 #ifdef CHEETAHPLUS_ERRATUM_25
599 /*
600  * Tunables
601  */
602 int cheetah_bpe_off = 0;
603 int cheetah_sendmondo_recover = 1;
604 int cheetah_sendmondo_fullscan = 0;
605 int cheetah_sendmondo_recover_delay = 5;
606 
607 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
608 
609 /*
610  * Recovery Statistics
611  */
612 typedef struct cheetah_livelock_entry	{
613 	int cpuid;		/* fallen cpu */
614 	int buddy;		/* cpu that ran recovery */
615 	clock_t lbolt;		/* when recovery started */
616 	hrtime_t recovery_time;	/* time spent in recovery */
617 } cheetah_livelock_entry_t;
618 
619 #define	CHEETAH_LIVELOCK_NENTRY	32
620 
621 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
622 int cheetah_livelock_entry_nxt;
623 
624 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
625 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
626 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
627 		cheetah_livelock_entry_nxt = 0;				\
628 	}								\
629 }
630 
631 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
632 
633 struct {
634 	hrtime_t hrt;		/* maximum recovery time */
635 	int recovery;		/* recovered */
636 	int full_claimed;	/* maximum pages claimed in full recovery */
637 	int proc_entry;		/* attempted to claim TSB */
638 	int proc_tsb_scan;	/* tsb scanned */
639 	int proc_tsb_partscan;	/* tsb partially scanned */
640 	int proc_tsb_fullscan;	/* whole tsb scanned */
641 	int proc_claimed;	/* maximum pages claimed in tsb scan */
642 	int proc_user;		/* user thread */
643 	int proc_kernel;	/* kernel thread */
644 	int proc_onflt;		/* bad stack */
645 	int proc_cpu;		/* null cpu */
646 	int proc_thread;	/* null thread */
647 	int proc_proc;		/* null proc */
648 	int proc_as;		/* null as */
649 	int proc_hat;		/* null hat */
650 	int proc_hat_inval;	/* hat contents don't make sense */
651 	int proc_hat_busy;	/* hat is changing TSBs */
652 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
653 	int proc_cnum_bad;	/* cnum out of range */
654 	int proc_cnum;		/* last cnum processed */
655 	tte_t proc_tte;		/* last tte processed */
656 } cheetah_livelock_stat;
657 
658 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
659 
660 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
661 	cheetah_livelock_stat.item = value
662 
663 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
664 	if (value > cheetah_livelock_stat.item)		\
665 		cheetah_livelock_stat.item = value;	\
666 }
667 
668 /*
669  * Attempt to recover a cpu by claiming every cache line as saved
670  * in the TSB that the non-responsive cpu is using. Since we can't
671  * grab any adaptive lock, this is at best an attempt to do so. Because
672  * we don't grab any locks, we must operate under the protection of
673  * on_fault().
674  *
675  * Return 1 if cpuid could be recovered, 0 if failed.
676  */
677 int
mondo_recover_proc(uint16_t cpuid,int bn)678 mondo_recover_proc(uint16_t cpuid, int bn)
679 {
680 	label_t ljb;
681 	cpu_t *cp;
682 	kthread_t *t;
683 	proc_t *p;
684 	struct as *as;
685 	struct hat *hat;
686 	uint_t  cnum;
687 	struct tsb_info *tsbinfop;
688 	struct tsbe *tsbep;
689 	caddr_t tsbp;
690 	caddr_t end_tsbp;
691 	uint64_t paddr;
692 	uint64_t idsr;
693 	u_longlong_t pahi, palo;
694 	int pages_claimed = 0;
695 	tte_t tsbe_tte;
696 	int tried_kernel_tsb = 0;
697 	mmu_ctx_t *mmu_ctxp;
698 
699 	CHEETAH_LIVELOCK_STAT(proc_entry);
700 
701 	if (on_fault(&ljb)) {
702 		CHEETAH_LIVELOCK_STAT(proc_onflt);
703 		goto badstruct;
704 	}
705 
706 	if ((cp = cpu[cpuid]) == NULL) {
707 		CHEETAH_LIVELOCK_STAT(proc_cpu);
708 		goto badstruct;
709 	}
710 
711 	if ((t = cp->cpu_thread) == NULL) {
712 		CHEETAH_LIVELOCK_STAT(proc_thread);
713 		goto badstruct;
714 	}
715 
716 	if ((p = ttoproc(t)) == NULL) {
717 		CHEETAH_LIVELOCK_STAT(proc_proc);
718 		goto badstruct;
719 	}
720 
721 	if ((as = p->p_as) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_as);
723 		goto badstruct;
724 	}
725 
726 	if ((hat = as->a_hat) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_hat);
728 		goto badstruct;
729 	}
730 
731 	if (hat != ksfmmup) {
732 		CHEETAH_LIVELOCK_STAT(proc_user);
733 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
734 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
735 			goto badstruct;
736 		}
737 		tsbinfop = hat->sfmmu_tsb;
738 		if (tsbinfop == NULL) {
739 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
740 			goto badstruct;
741 		}
742 		tsbp = tsbinfop->tsb_va;
743 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
744 	} else {
745 		CHEETAH_LIVELOCK_STAT(proc_kernel);
746 		tsbinfop = NULL;
747 		tsbp = ktsb_base;
748 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
749 	}
750 
751 	/* Verify as */
752 	if (hat->sfmmu_as != as) {
753 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
754 		goto badstruct;
755 	}
756 
757 	mmu_ctxp = CPU_MMU_CTXP(cp);
758 	ASSERT(mmu_ctxp);
759 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
760 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
761 
762 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
763 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
764 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
765 		goto badstruct;
766 	}
767 
768 	do {
769 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
770 
771 		/*
772 		 * Skip TSBs being relocated.  This is important because
773 		 * we want to avoid the following deadlock scenario:
774 		 *
775 		 * 1) when we came in we set ourselves to "in recover" state.
776 		 * 2) when we try to touch TSB being relocated the mapping
777 		 *    will be in the suspended state so we'll spin waiting
778 		 *    for it to be unlocked.
779 		 * 3) when the CPU that holds the TSB mapping locked tries to
780 		 *    unlock it it will send a xtrap which will fail to xcall
781 		 *    us or the CPU we're trying to recover, and will in turn
782 		 *    enter the mondo code.
783 		 * 4) since we are still spinning on the locked mapping
784 		 *    no further progress will be made and the system will
785 		 *    inevitably hard hang.
786 		 *
787 		 * A TSB not being relocated can't begin being relocated
788 		 * while we're accessing it because we check
789 		 * sendmondo_in_recover before relocating TSBs.
790 		 */
791 		if (hat != ksfmmup &&
792 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
793 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
794 			goto next_tsbinfo;
795 		}
796 
797 		for (tsbep = (struct tsbe *)tsbp;
798 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
799 			tsbe_tte = tsbep->tte_data;
800 
801 			if (tsbe_tte.tte_val == 0) {
802 				/*
803 				 * Invalid tte
804 				 */
805 				continue;
806 			}
807 			if (tsbe_tte.tte_se) {
808 				/*
809 				 * Don't want device registers
810 				 */
811 				continue;
812 			}
813 			if (tsbe_tte.tte_cp == 0) {
814 				/*
815 				 * Must be cached in E$
816 				 */
817 				continue;
818 			}
819 			if (tsbep->tte_tag.tag_invalid != 0) {
820 				/*
821 				 * Invalid tag, ingnore this entry.
822 				 */
823 				continue;
824 			}
825 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
826 			idsr = getidsr();
827 			if ((idsr & (IDSR_NACK_BIT(bn) |
828 			    IDSR_BUSY_BIT(bn))) == 0) {
829 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
830 				goto done;
831 			}
832 			pahi = tsbe_tte.tte_pahi;
833 			palo = tsbe_tte.tte_palo;
834 			paddr = (uint64_t)((pahi << 32) |
835 			    (palo << MMU_PAGESHIFT));
836 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
837 			    CH_ECACHE_SUBBLK_SIZE);
838 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
839 				shipit(cpuid, bn);
840 			}
841 			pages_claimed++;
842 		}
843 next_tsbinfo:
844 		if (tsbinfop != NULL)
845 			tsbinfop = tsbinfop->tsb_next;
846 		if (tsbinfop != NULL) {
847 			tsbp = tsbinfop->tsb_va;
848 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
849 		} else if (tsbp == ktsb_base) {
850 			tried_kernel_tsb = 1;
851 		} else if (!tried_kernel_tsb) {
852 			tsbp = ktsb_base;
853 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
854 			hat = ksfmmup;
855 			tsbinfop = NULL;
856 		}
857 	} while (tsbinfop != NULL ||
858 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
859 
860 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
861 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
862 	no_fault();
863 	idsr = getidsr();
864 	if ((idsr & (IDSR_NACK_BIT(bn) |
865 	    IDSR_BUSY_BIT(bn))) == 0) {
866 		return (1);
867 	} else {
868 		return (0);
869 	}
870 
871 done:
872 	no_fault();
873 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
874 	return (1);
875 
876 badstruct:
877 	no_fault();
878 	return (0);
879 }
880 
881 /*
882  * Attempt to claim ownership, temporarily, of every cache line that a
883  * non-responsive cpu might be using.  This might kick that cpu out of
884  * this state.
885  *
886  * The return value indicates to the caller if we have exhausted all recovery
887  * techniques. If 1 is returned, it is useless to call this function again
888  * even for a different target CPU.
889  */
890 int
mondo_recover(uint16_t cpuid,int bn)891 mondo_recover(uint16_t cpuid, int bn)
892 {
893 	struct memseg *seg;
894 	uint64_t begin_pa, end_pa, cur_pa;
895 	hrtime_t begin_hrt, end_hrt;
896 	int retval = 0;
897 	int pages_claimed = 0;
898 	cheetah_livelock_entry_t *histp;
899 	uint64_t idsr;
900 
901 	if (atomic_cas_32(&sendmondo_in_recover, 0, 1) != 0) {
902 		/*
903 		 * Wait while recovery takes place
904 		 */
905 		while (sendmondo_in_recover) {
906 			drv_usecwait(1);
907 		}
908 		/*
909 		 * Assume we didn't claim the whole memory. If
910 		 * the target of this caller is not recovered,
911 		 * it will come back.
912 		 */
913 		return (retval);
914 	}
915 
916 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
917 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
918 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
919 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
920 
921 	begin_hrt = gethrtime_waitfree();
922 	/*
923 	 * First try to claim the lines in the TSB the target
924 	 * may have been using.
925 	 */
926 	if (mondo_recover_proc(cpuid, bn) == 1) {
927 		/*
928 		 * Didn't claim the whole memory
929 		 */
930 		goto done;
931 	}
932 
933 	/*
934 	 * We tried using the TSB. The target is still
935 	 * not recovered. Check if complete memory scan is
936 	 * enabled.
937 	 */
938 	if (cheetah_sendmondo_fullscan == 0) {
939 		/*
940 		 * Full memory scan is disabled.
941 		 */
942 		retval = 1;
943 		goto done;
944 	}
945 
946 	/*
947 	 * Try claiming the whole memory.
948 	 */
949 	for (seg = memsegs; seg; seg = seg->next) {
950 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
951 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
952 		for (cur_pa = begin_pa; cur_pa < end_pa;
953 		    cur_pa += MMU_PAGESIZE) {
954 			idsr = getidsr();
955 			if ((idsr & (IDSR_NACK_BIT(bn) |
956 			    IDSR_BUSY_BIT(bn))) == 0) {
957 				/*
958 				 * Didn't claim all memory
959 				 */
960 				goto done;
961 			}
962 			claimlines(cur_pa, MMU_PAGESIZE,
963 			    CH_ECACHE_SUBBLK_SIZE);
964 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
965 				shipit(cpuid, bn);
966 			}
967 			pages_claimed++;
968 		}
969 	}
970 
971 	/*
972 	 * We did all we could.
973 	 */
974 	retval = 1;
975 
976 done:
977 	/*
978 	 * Update statistics
979 	 */
980 	end_hrt = gethrtime_waitfree();
981 	CHEETAH_LIVELOCK_STAT(recovery);
982 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
983 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
984 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
985 	    (end_hrt -  begin_hrt));
986 
987 	while (atomic_cas_32(&sendmondo_in_recover, 1, 0) != 1)
988 		;
989 
990 	return (retval);
991 }
992 
993 /*
994  * This is called by the cyclic framework when this CPU becomes online
995  */
996 /*ARGSUSED*/
997 static void
cheetah_nudge_onln(void * arg,cpu_t * cpu,cyc_handler_t * hdlr,cyc_time_t * when)998 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
999 {
1000 
1001 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1002 	hdlr->cyh_level = CY_LOW_LEVEL;
1003 	hdlr->cyh_arg = NULL;
1004 
1005 	/*
1006 	 * Stagger the start time
1007 	 */
1008 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1009 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1010 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1011 	}
1012 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1013 }
1014 
1015 /*
1016  * Create a low level cyclic to send a xtrap to the next cpu online.
1017  * However, there's no need to have this running on a uniprocessor system.
1018  */
1019 static void
cheetah_nudge_init(void)1020 cheetah_nudge_init(void)
1021 {
1022 	cyc_omni_handler_t hdlr;
1023 
1024 	if (max_ncpus == 1) {
1025 		return;
1026 	}
1027 
1028 	hdlr.cyo_online = cheetah_nudge_onln;
1029 	hdlr.cyo_offline = NULL;
1030 	hdlr.cyo_arg = NULL;
1031 
1032 	mutex_enter(&cpu_lock);
1033 	(void) cyclic_add_omni(&hdlr);
1034 	mutex_exit(&cpu_lock);
1035 }
1036 
1037 /*
1038  * Cyclic handler to wake up buddy
1039  */
1040 void
cheetah_nudge_buddy(void)1041 cheetah_nudge_buddy(void)
1042 {
1043 	/*
1044 	 * Disable kernel preemption to protect the cpu list
1045 	 */
1046 	kpreempt_disable();
1047 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1048 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1049 		    0, 0);
1050 	}
1051 	kpreempt_enable();
1052 }
1053 
1054 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1055 
1056 #ifdef SEND_MONDO_STATS
1057 uint32_t x_one_stimes[64];
1058 uint32_t x_one_ltimes[16];
1059 uint32_t x_set_stimes[64];
1060 uint32_t x_set_ltimes[16];
1061 uint32_t x_set_cpus[NCPU];
1062 uint32_t x_nack_stimes[64];
1063 #endif
1064 
1065 /*
1066  * Note: A version of this function is used by the debugger via the KDI,
1067  * and must be kept in sync with this version.  Any changes made to this
1068  * function to support new chips or to accomodate errata must also be included
1069  * in the KDI-specific version.  See us3_kdi.c.
1070  */
1071 void
send_one_mondo(int cpuid)1072 send_one_mondo(int cpuid)
1073 {
1074 	int busy, nack;
1075 	uint64_t idsr, starttick, endtick, tick, lasttick;
1076 	uint64_t busymask;
1077 #ifdef	CHEETAHPLUS_ERRATUM_25
1078 	int recovered = 0;
1079 #endif
1080 
1081 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1082 	starttick = lasttick = gettick();
1083 	shipit(cpuid, 0);
1084 	endtick = starttick + xc_tick_limit;
1085 	busy = nack = 0;
1086 #if defined(JALAPENO) || defined(SERRANO)
1087 	/*
1088 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1089 	 * will be used for dispatching interrupt. For now, assume
1090 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1091 	 * issues with respect to BUSY/NACK pair usage.
1092 	 */
1093 	busymask  = IDSR_BUSY_BIT(cpuid);
1094 #else /* JALAPENO || SERRANO */
1095 	busymask = IDSR_BUSY;
1096 #endif /* JALAPENO || SERRANO */
1097 	for (;;) {
1098 		idsr = getidsr();
1099 		if (idsr == 0)
1100 			break;
1101 
1102 		tick = gettick();
1103 		/*
1104 		 * If there is a big jump between the current tick
1105 		 * count and lasttick, we have probably hit a break
1106 		 * point.  Adjust endtick accordingly to avoid panic.
1107 		 */
1108 		if (tick > (lasttick + xc_tick_jump_limit))
1109 			endtick += (tick - lasttick);
1110 		lasttick = tick;
1111 		if (tick > endtick) {
1112 			if (panic_quiesce)
1113 				return;
1114 #ifdef	CHEETAHPLUS_ERRATUM_25
1115 			if (cheetah_sendmondo_recover && recovered == 0) {
1116 				if (mondo_recover(cpuid, 0)) {
1117 					/*
1118 					 * We claimed the whole memory or
1119 					 * full scan is disabled.
1120 					 */
1121 					recovered++;
1122 				}
1123 				tick = gettick();
1124 				endtick = tick + xc_tick_limit;
1125 				lasttick = tick;
1126 				/*
1127 				 * Recheck idsr
1128 				 */
1129 				continue;
1130 			} else
1131 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1132 			{
1133 				cmn_err(CE_PANIC, "send mondo timeout "
1134 				    "(target 0x%x) [%d NACK %d BUSY]",
1135 				    cpuid, nack, busy);
1136 			}
1137 		}
1138 
1139 		if (idsr & busymask) {
1140 			busy++;
1141 			continue;
1142 		}
1143 		drv_usecwait(1);
1144 		shipit(cpuid, 0);
1145 		nack++;
1146 		busy = 0;
1147 	}
1148 #ifdef SEND_MONDO_STATS
1149 	{
1150 		int n = gettick() - starttick;
1151 		if (n < 8192)
1152 			x_one_stimes[n >> 7]++;
1153 		else
1154 			x_one_ltimes[(n >> 13) & 0xf]++;
1155 	}
1156 #endif
1157 }
1158 
1159 void
syncfpu(void)1160 syncfpu(void)
1161 {
1162 }
1163 
1164 /*
1165  * Return processor specific async error structure
1166  * size used.
1167  */
1168 int
cpu_aflt_size(void)1169 cpu_aflt_size(void)
1170 {
1171 	return (sizeof (ch_async_flt_t));
1172 }
1173 
1174 /*
1175  * Tunable to disable the checking of other cpu logout areas during panic for
1176  * potential syndrome 71 generating errors.
1177  */
1178 int enable_check_other_cpus_logout = 1;
1179 
1180 /*
1181  * Check other cpus logout area for potential synd 71 generating
1182  * errors.
1183  */
1184 static void
cpu_check_cpu_logout(int cpuid,caddr_t tpc,int tl,int ecc_type,ch_cpu_logout_t * clop)1185 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1186     ch_cpu_logout_t *clop)
1187 {
1188 	struct async_flt *aflt;
1189 	ch_async_flt_t ch_flt;
1190 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1191 
1192 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1193 		return;
1194 	}
1195 
1196 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1197 
1198 	t_afar = clop->clo_data.chd_afar;
1199 	t_afsr = clop->clo_data.chd_afsr;
1200 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1201 #if defined(SERRANO)
1202 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1203 #endif	/* SERRANO */
1204 
1205 	/*
1206 	 * In order to simplify code, we maintain this afsr_errs
1207 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1208 	 * sticky bits.
1209 	 */
1210 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1211 	    (t_afsr & C_AFSR_ALL_ERRS);
1212 
1213 	/* Setup the async fault structure */
1214 	aflt = (struct async_flt *)&ch_flt;
1215 	aflt->flt_id = gethrtime_waitfree();
1216 	ch_flt.afsr_ext = t_afsr_ext;
1217 	ch_flt.afsr_errs = t_afsr_errs;
1218 	aflt->flt_stat = t_afsr;
1219 	aflt->flt_addr = t_afar;
1220 	aflt->flt_bus_id = cpuid;
1221 	aflt->flt_inst = cpuid;
1222 	aflt->flt_pc = tpc;
1223 	aflt->flt_prot = AFLT_PROT_NONE;
1224 	aflt->flt_class = CPU_FAULT;
1225 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1226 	aflt->flt_tl = tl;
1227 	aflt->flt_status = ecc_type;
1228 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1229 
1230 	/*
1231 	 * Queue events on the async event queue, one event per error bit.
1232 	 * If no events are queued, queue an event to complain.
1233 	 */
1234 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1235 		ch_flt.flt_type = CPU_INV_AFSR;
1236 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1237 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1238 		    aflt->flt_panic);
1239 	}
1240 
1241 	/*
1242 	 * Zero out + invalidate CPU logout.
1243 	 */
1244 	bzero(clop, sizeof (ch_cpu_logout_t));
1245 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1246 }
1247 
1248 /*
1249  * Check the logout areas of all other cpus for unlogged errors.
1250  */
1251 static void
cpu_check_other_cpus_logout(void)1252 cpu_check_other_cpus_logout(void)
1253 {
1254 	int i, j;
1255 	processorid_t myid;
1256 	struct cpu *cp;
1257 	ch_err_tl1_data_t *cl1p;
1258 
1259 	myid = CPU->cpu_id;
1260 	for (i = 0; i < NCPU; i++) {
1261 		cp = cpu[i];
1262 
1263 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1264 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1265 			continue;
1266 		}
1267 
1268 		/*
1269 		 * Check each of the tl>0 logout areas
1270 		 */
1271 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1272 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1273 			if (cl1p->ch_err_tl1_flags == 0)
1274 				continue;
1275 
1276 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1277 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1278 		}
1279 
1280 		/*
1281 		 * Check each of the remaining logout areas
1282 		 */
1283 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1284 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1285 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1286 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1287 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1288 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1289 	}
1290 }
1291 
1292 /*
1293  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1294  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1295  * flush the error that caused the UCU/UCC, then again here at the end to
1296  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1297  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1298  * another Fast ECC trap.
1299  *
1300  * Cheetah+ also handles: TSCE: No additional processing required.
1301  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1302  *
1303  * Note that the p_clo_flags input is only valid in cases where the
1304  * cpu_private struct is not yet initialized (since that is the only
1305  * time that information cannot be obtained from the logout struct.)
1306  */
1307 /*ARGSUSED*/
1308 void
cpu_fast_ecc_error(struct regs * rp,ulong_t p_clo_flags)1309 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1310 {
1311 	ch_cpu_logout_t *clop;
1312 	uint64_t ceen, nceen;
1313 
1314 	/*
1315 	 * Get the CPU log out info. If we can't find our CPU private
1316 	 * pointer, then we will have to make due without any detailed
1317 	 * logout information.
1318 	 */
1319 	if (CPU_PRIVATE(CPU) == NULL) {
1320 		clop = NULL;
1321 		ceen = p_clo_flags & EN_REG_CEEN;
1322 		nceen = p_clo_flags & EN_REG_NCEEN;
1323 	} else {
1324 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1325 		ceen = clop->clo_flags & EN_REG_CEEN;
1326 		nceen = clop->clo_flags & EN_REG_NCEEN;
1327 	}
1328 
1329 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1330 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1331 }
1332 
1333 /*
1334  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1335  * ECC at TL>0.  Need to supply either a error register pointer or a
1336  * cpu logout structure pointer.
1337  */
1338 static void
cpu_log_fast_ecc_error(caddr_t tpc,int priv,int tl,uint64_t ceen,uint64_t nceen,ch_cpu_logout_t * clop)1339 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1340     uint64_t nceen, ch_cpu_logout_t *clop)
1341 {
1342 	struct async_flt *aflt;
1343 	ch_async_flt_t ch_flt;
1344 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1345 	char pr_reason[MAX_REASON_STRING];
1346 	ch_cpu_errors_t cpu_error_regs;
1347 
1348 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1349 	/*
1350 	 * If no cpu logout data, then we will have to make due without
1351 	 * any detailed logout information.
1352 	 */
1353 	if (clop == NULL) {
1354 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1355 		get_cpu_error_state(&cpu_error_regs);
1356 		set_cpu_error_state(&cpu_error_regs);
1357 		t_afar = cpu_error_regs.afar;
1358 		t_afsr = cpu_error_regs.afsr;
1359 		t_afsr_ext = cpu_error_regs.afsr_ext;
1360 #if defined(SERRANO)
1361 		ch_flt.afar2 = cpu_error_regs.afar2;
1362 #endif	/* SERRANO */
1363 	} else {
1364 		t_afar = clop->clo_data.chd_afar;
1365 		t_afsr = clop->clo_data.chd_afsr;
1366 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1367 #if defined(SERRANO)
1368 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1369 #endif	/* SERRANO */
1370 	}
1371 
1372 	/*
1373 	 * In order to simplify code, we maintain this afsr_errs
1374 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1375 	 * sticky bits.
1376 	 */
1377 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1378 	    (t_afsr & C_AFSR_ALL_ERRS);
1379 	pr_reason[0] = '\0';
1380 
1381 	/* Setup the async fault structure */
1382 	aflt = (struct async_flt *)&ch_flt;
1383 	aflt->flt_id = gethrtime_waitfree();
1384 	ch_flt.afsr_ext = t_afsr_ext;
1385 	ch_flt.afsr_errs = t_afsr_errs;
1386 	aflt->flt_stat = t_afsr;
1387 	aflt->flt_addr = t_afar;
1388 	aflt->flt_bus_id = getprocessorid();
1389 	aflt->flt_inst = CPU->cpu_id;
1390 	aflt->flt_pc = tpc;
1391 	aflt->flt_prot = AFLT_PROT_NONE;
1392 	aflt->flt_class = CPU_FAULT;
1393 	aflt->flt_priv = priv;
1394 	aflt->flt_tl = tl;
1395 	aflt->flt_status = ECC_F_TRAP;
1396 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1397 
1398 	/*
1399 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1400 	 * cmn_err messages out to the console.  The situation is a UCU (in
1401 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1402 	 * The messages for the UCU and WDU are enqueued and then pulled off
1403 	 * the async queue via softint and syslogd starts to process them
1404 	 * but doesn't get them to the console.  The UE causes a panic, but
1405 	 * since the UCU/WDU messages are already in transit, those aren't
1406 	 * on the async queue.  The hack is to check if we have a matching
1407 	 * WDU event for the UCU, and if it matches, we're more than likely
1408 	 * going to panic with a UE, unless we're under protection.  So, we
1409 	 * check to see if we got a matching WDU event and if we're under
1410 	 * protection.
1411 	 *
1412 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1413 	 * looks like this:
1414 	 *    UCU->WDU->UE
1415 	 * For Panther, it could look like either of these:
1416 	 *    UCU---->WDU->L3_WDU->UE
1417 	 *    L3_UCU->WDU->L3_WDU->UE
1418 	 */
1419 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1420 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1421 	    curthread->t_ontrap == NULL &&
1422 	    curthread->t_lofault == (uintptr_t)NULL) {
1423 		get_cpu_error_state(&cpu_error_regs);
1424 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1425 			aflt->flt_panic |=
1426 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1427 			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1428 			    (cpu_error_regs.afar == t_afar));
1429 			aflt->flt_panic |= ((clop == NULL) &&
1430 			    (t_afsr_errs & C_AFSR_WDU) &&
1431 			    (t_afsr_errs & C_AFSR_L3_WDU));
1432 		} else {
1433 			aflt->flt_panic |=
1434 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1435 			    (cpu_error_regs.afar == t_afar));
1436 			aflt->flt_panic |= ((clop == NULL) &&
1437 			    (t_afsr_errs & C_AFSR_WDU));
1438 		}
1439 	}
1440 
1441 	/*
1442 	 * Queue events on the async event queue, one event per error bit.
1443 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1444 	 * queue an event to complain.
1445 	 */
1446 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1447 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1448 		ch_flt.flt_type = CPU_INV_AFSR;
1449 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1450 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1451 		    aflt->flt_panic);
1452 	}
1453 
1454 	/*
1455 	 * Zero out + invalidate CPU logout.
1456 	 */
1457 	if (clop) {
1458 		bzero(clop, sizeof (ch_cpu_logout_t));
1459 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1460 	}
1461 
1462 	/*
1463 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1464 	 * or disrupting errors have happened.  We do this because if a
1465 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1466 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1467 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1468 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1469 	 * deferred or disrupting error happening between checking the AFSR and
1470 	 * enabling NCEEN/CEEN.
1471 	 *
1472 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1473 	 * taken.
1474 	 */
1475 	set_error_enable(get_error_enable() | (nceen | ceen));
1476 	if (clear_errors(&ch_flt)) {
1477 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1478 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1479 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1480 		    NULL);
1481 	}
1482 
1483 	/*
1484 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1485 	 * be logged as part of the panic flow.
1486 	 */
1487 	if (aflt->flt_panic)
1488 		fm_panic("%sError(s)", pr_reason);
1489 
1490 	/*
1491 	 * Flushing the Ecache here gets the part of the trap handler that
1492 	 * is run at TL=1 out of the Ecache.
1493 	 */
1494 	cpu_flush_ecache();
1495 }
1496 
1497 /*
1498  * This is called via sys_trap from pil15_interrupt code if the
1499  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1500  * various ch_err_tl1_data structures for valid entries based on the bit
1501  * settings in the ch_err_tl1_flags entry of the structure.
1502  */
1503 /*ARGSUSED*/
1504 void
cpu_tl1_error(struct regs * rp,int panic)1505 cpu_tl1_error(struct regs *rp, int panic)
1506 {
1507 	ch_err_tl1_data_t *cl1p, cl1;
1508 	int i, ncl1ps;
1509 	uint64_t me_flags;
1510 	uint64_t ceen, nceen;
1511 
1512 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1513 		cl1p = &ch_err_tl1_data;
1514 		ncl1ps = 1;
1515 	} else if (CPU_PRIVATE(CPU) != NULL) {
1516 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1517 		ncl1ps = CH_ERR_TL1_TLMAX;
1518 	} else {
1519 		ncl1ps = 0;
1520 	}
1521 
1522 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1523 		if (cl1p->ch_err_tl1_flags == 0)
1524 			continue;
1525 
1526 		/*
1527 		 * Grab a copy of the logout data and invalidate
1528 		 * the logout area.
1529 		 */
1530 		cl1 = *cl1p;
1531 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1532 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1533 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1534 
1535 		/*
1536 		 * Log "first error" in ch_err_tl1_data.
1537 		 */
1538 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1539 			ceen = get_error_enable() & EN_REG_CEEN;
1540 			nceen = get_error_enable() & EN_REG_NCEEN;
1541 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1542 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1543 		}
1544 #if defined(CPU_IMP_L1_CACHE_PARITY)
1545 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1546 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1547 			    (caddr_t)cl1.ch_err_tl1_tpc);
1548 		}
1549 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1550 
1551 		/*
1552 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1553 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1554 		 * if the structure is busy, we just do the cache flushing
1555 		 * we have to do and then do the retry.  So the AFSR/AFAR
1556 		 * at this point *should* have some relevant info.  If there
1557 		 * are no valid errors in the AFSR, we'll assume they've
1558 		 * already been picked up and logged.  For I$/D$ parity,
1559 		 * we just log an event with an "Unknown" (NULL) TPC.
1560 		 */
1561 		if (me_flags & CH_ERR_FECC) {
1562 			ch_cpu_errors_t cpu_error_regs;
1563 			uint64_t t_afsr_errs;
1564 
1565 			/*
1566 			 * Get the error registers and see if there's
1567 			 * a pending error.  If not, don't bother
1568 			 * generating an "Invalid AFSR" error event.
1569 			 */
1570 			get_cpu_error_state(&cpu_error_regs);
1571 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1572 			    C_AFSR_EXT_ALL_ERRS) |
1573 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1574 			if (t_afsr_errs != 0) {
1575 				ceen = get_error_enable() & EN_REG_CEEN;
1576 				nceen = get_error_enable() & EN_REG_NCEEN;
1577 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1578 				    1, ceen, nceen, NULL);
1579 			}
1580 		}
1581 #if defined(CPU_IMP_L1_CACHE_PARITY)
1582 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1583 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1584 		}
1585 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1586 	}
1587 }
1588 
1589 /*
1590  * Called from Fast ECC TL>0 handler in case of fatal error.
1591  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1592  * but if we don't, we'll panic with something reasonable.
1593  */
1594 /*ARGSUSED*/
1595 void
cpu_tl1_err_panic(struct regs * rp,ulong_t flags)1596 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1597 {
1598 	cpu_tl1_error(rp, 1);
1599 	/*
1600 	 * Should never return, but just in case.
1601 	 */
1602 	fm_panic("Unsurvivable ECC Error at TL>0");
1603 }
1604 
1605 /*
1606  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1607  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1608  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1609  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1610  *
1611  * Cheetah+ also handles (No additional processing required):
1612  *    DUE, DTO, DBERR	(NCEEN controlled)
1613  *    THCE		(CEEN and ET_ECC_en controlled)
1614  *    TUE		(ET_ECC_en controlled)
1615  *
1616  * Panther further adds:
1617  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1618  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1619  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1620  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1621  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1622  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1623  *
1624  * Note that the p_clo_flags input is only valid in cases where the
1625  * cpu_private struct is not yet initialized (since that is the only
1626  * time that information cannot be obtained from the logout struct.)
1627  */
1628 /*ARGSUSED*/
1629 void
cpu_disrupting_error(struct regs * rp,ulong_t p_clo_flags)1630 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1631 {
1632 	struct async_flt *aflt;
1633 	ch_async_flt_t ch_flt;
1634 	char pr_reason[MAX_REASON_STRING];
1635 	ch_cpu_logout_t *clop;
1636 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1637 	ch_cpu_errors_t cpu_error_regs;
1638 
1639 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1640 	/*
1641 	 * Get the CPU log out info. If we can't find our CPU private
1642 	 * pointer, then we will have to make due without any detailed
1643 	 * logout information.
1644 	 */
1645 	if (CPU_PRIVATE(CPU) == NULL) {
1646 		clop = NULL;
1647 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1648 		get_cpu_error_state(&cpu_error_regs);
1649 		set_cpu_error_state(&cpu_error_regs);
1650 		t_afar = cpu_error_regs.afar;
1651 		t_afsr = cpu_error_regs.afsr;
1652 		t_afsr_ext = cpu_error_regs.afsr_ext;
1653 #if defined(SERRANO)
1654 		ch_flt.afar2 = cpu_error_regs.afar2;
1655 #endif	/* SERRANO */
1656 	} else {
1657 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1658 		t_afar = clop->clo_data.chd_afar;
1659 		t_afsr = clop->clo_data.chd_afsr;
1660 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1661 #if defined(SERRANO)
1662 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1663 #endif	/* SERRANO */
1664 	}
1665 
1666 	/*
1667 	 * In order to simplify code, we maintain this afsr_errs
1668 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1669 	 * sticky bits.
1670 	 */
1671 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1672 	    (t_afsr & C_AFSR_ALL_ERRS);
1673 
1674 	pr_reason[0] = '\0';
1675 	/* Setup the async fault structure */
1676 	aflt = (struct async_flt *)&ch_flt;
1677 	ch_flt.afsr_ext = t_afsr_ext;
1678 	ch_flt.afsr_errs = t_afsr_errs;
1679 	aflt->flt_stat = t_afsr;
1680 	aflt->flt_addr = t_afar;
1681 	aflt->flt_pc = (caddr_t)rp->r_pc;
1682 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1683 	aflt->flt_tl = 0;
1684 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1685 
1686 	/*
1687 	 * If this trap is a result of one of the errors not masked
1688 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1689 	 * indicate that a timeout is to be set later.
1690 	 */
1691 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1692 	    !aflt->flt_panic)
1693 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1694 	else
1695 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1696 
1697 	/*
1698 	 * log the CE and clean up
1699 	 */
1700 	cpu_log_and_clear_ce(&ch_flt);
1701 
1702 	/*
1703 	 * We re-enable CEEN (if required) and check if any disrupting errors
1704 	 * have happened.  We do this because if a disrupting error had occurred
1705 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1706 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1707 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1708 	 * of a error happening between checking the AFSR and enabling CEEN.
1709 	 */
1710 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1711 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1712 	if (clear_errors(&ch_flt)) {
1713 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1714 		    NULL);
1715 	}
1716 
1717 	/*
1718 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1719 	 * be logged as part of the panic flow.
1720 	 */
1721 	if (aflt->flt_panic)
1722 		fm_panic("%sError(s)", pr_reason);
1723 }
1724 
1725 /*
1726  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1727  * L3_EDU:BLD, TO, and BERR events.
1728  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1729  *
1730  * Cheetah+: No additional errors handled.
1731  *
1732  * Note that the p_clo_flags input is only valid in cases where the
1733  * cpu_private struct is not yet initialized (since that is the only
1734  * time that information cannot be obtained from the logout struct.)
1735  */
1736 /*ARGSUSED*/
1737 void
cpu_deferred_error(struct regs * rp,ulong_t p_clo_flags)1738 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1739 {
1740 	ushort_t ttype, tl;
1741 	ch_async_flt_t ch_flt;
1742 	struct async_flt *aflt;
1743 	int trampolined = 0;
1744 	char pr_reason[MAX_REASON_STRING];
1745 	ch_cpu_logout_t *clop;
1746 	uint64_t ceen, clo_flags;
1747 	uint64_t log_afsr;
1748 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1749 	ch_cpu_errors_t cpu_error_regs;
1750 	int expected = DDI_FM_ERR_UNEXPECTED;
1751 	ddi_acc_hdl_t *hp;
1752 
1753 	/*
1754 	 * We need to look at p_flag to determine if the thread detected an
1755 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1756 	 * because we just need a consistent snapshot and we know that everyone
1757 	 * else will store a consistent set of bits while holding p_lock.  We
1758 	 * don't have to worry about a race because SDOCORE is set once prior
1759 	 * to doing i/o from the process's address space and is never cleared.
1760 	 */
1761 	uint_t pflag = ttoproc(curthread)->p_flag;
1762 
1763 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1764 	/*
1765 	 * Get the CPU log out info. If we can't find our CPU private
1766 	 * pointer then we will have to make due without any detailed
1767 	 * logout information.
1768 	 */
1769 	if (CPU_PRIVATE(CPU) == NULL) {
1770 		clop = NULL;
1771 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1772 		get_cpu_error_state(&cpu_error_regs);
1773 		set_cpu_error_state(&cpu_error_regs);
1774 		t_afar = cpu_error_regs.afar;
1775 		t_afsr = cpu_error_regs.afsr;
1776 		t_afsr_ext = cpu_error_regs.afsr_ext;
1777 #if defined(SERRANO)
1778 		ch_flt.afar2 = cpu_error_regs.afar2;
1779 #endif	/* SERRANO */
1780 		clo_flags = p_clo_flags;
1781 	} else {
1782 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1783 		t_afar = clop->clo_data.chd_afar;
1784 		t_afsr = clop->clo_data.chd_afsr;
1785 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1786 #if defined(SERRANO)
1787 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1788 #endif	/* SERRANO */
1789 		clo_flags = clop->clo_flags;
1790 	}
1791 
1792 	/*
1793 	 * In order to simplify code, we maintain this afsr_errs
1794 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1795 	 * sticky bits.
1796 	 */
1797 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1798 	    (t_afsr & C_AFSR_ALL_ERRS);
1799 	pr_reason[0] = '\0';
1800 
1801 	/*
1802 	 * Grab information encoded into our clo_flags field.
1803 	 */
1804 	ceen = clo_flags & EN_REG_CEEN;
1805 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1806 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1807 
1808 	/*
1809 	 * handle the specific error
1810 	 */
1811 	aflt = (struct async_flt *)&ch_flt;
1812 	aflt->flt_id = gethrtime_waitfree();
1813 	aflt->flt_bus_id = getprocessorid();
1814 	aflt->flt_inst = CPU->cpu_id;
1815 	ch_flt.afsr_ext = t_afsr_ext;
1816 	ch_flt.afsr_errs = t_afsr_errs;
1817 	aflt->flt_stat = t_afsr;
1818 	aflt->flt_addr = t_afar;
1819 	aflt->flt_pc = (caddr_t)rp->r_pc;
1820 	aflt->flt_prot = AFLT_PROT_NONE;
1821 	aflt->flt_class = CPU_FAULT;
1822 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1823 	aflt->flt_tl = (uchar_t)tl;
1824 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1825 	    C_AFSR_PANIC(t_afsr_errs));
1826 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1827 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1828 
1829 	/*
1830 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1831 	 * see if we were executing in the kernel under on_trap() or t_lofault
1832 	 * protection.  If so, modify the saved registers so that we return
1833 	 * from the trap to the appropriate trampoline routine.
1834 	 */
1835 	if (aflt->flt_priv && tl == 0) {
1836 		if (curthread->t_ontrap != NULL) {
1837 			on_trap_data_t *otp = curthread->t_ontrap;
1838 
1839 			if (otp->ot_prot & OT_DATA_EC) {
1840 				aflt->flt_prot = AFLT_PROT_EC;
1841 				otp->ot_trap |= OT_DATA_EC;
1842 				rp->r_pc = otp->ot_trampoline;
1843 				rp->r_npc = rp->r_pc + 4;
1844 				trampolined = 1;
1845 			}
1846 
1847 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1848 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1849 				aflt->flt_prot = AFLT_PROT_ACCESS;
1850 				otp->ot_trap |= OT_DATA_ACCESS;
1851 				rp->r_pc = otp->ot_trampoline;
1852 				rp->r_npc = rp->r_pc + 4;
1853 				trampolined = 1;
1854 				/*
1855 				 * for peeks and caut_gets errors are expected
1856 				 */
1857 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1858 				if (!hp)
1859 					expected = DDI_FM_ERR_PEEK;
1860 				else if (hp->ah_acc.devacc_attr_access ==
1861 				    DDI_CAUTIOUS_ACC)
1862 					expected = DDI_FM_ERR_EXPECTED;
1863 			}
1864 
1865 		} else if (curthread->t_lofault) {
1866 			aflt->flt_prot = AFLT_PROT_COPY;
1867 			rp->r_g1 = EFAULT;
1868 			rp->r_pc = curthread->t_lofault;
1869 			rp->r_npc = rp->r_pc + 4;
1870 			trampolined = 1;
1871 		}
1872 	}
1873 
1874 	/*
1875 	 * If we're in user mode or we're doing a protected copy, we either
1876 	 * want the ASTON code below to send a signal to the user process
1877 	 * or we want to panic if aft_panic is set.
1878 	 *
1879 	 * If we're in privileged mode and we're not doing a copy, then we
1880 	 * need to check if we've trampolined.  If we haven't trampolined,
1881 	 * we should panic.
1882 	 */
1883 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1884 		if (t_afsr_errs &
1885 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1886 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1887 			aflt->flt_panic |= aft_panic;
1888 	} else if (!trampolined) {
1889 			aflt->flt_panic = 1;
1890 	}
1891 
1892 	/*
1893 	 * If we've trampolined due to a privileged TO or BERR, or if an
1894 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1895 	 * event for that TO or BERR.  Queue all other events (if any) besides
1896 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1897 	 * ignore the number of events queued.  If we haven't trampolined due
1898 	 * to a TO or BERR, just enqueue events normally.
1899 	 */
1900 	log_afsr = t_afsr_errs;
1901 	if (trampolined) {
1902 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1903 	} else if (!aflt->flt_priv) {
1904 		/*
1905 		 * User mode, suppress messages if
1906 		 * cpu_berr_to_verbose is not set.
1907 		 */
1908 		if (!cpu_berr_to_verbose)
1909 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1910 	}
1911 
1912 	/*
1913 	 * Log any errors that occurred
1914 	 */
1915 	if (((log_afsr &
1916 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1917 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1918 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1919 		ch_flt.flt_type = CPU_INV_AFSR;
1920 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1921 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1922 		    aflt->flt_panic);
1923 	}
1924 
1925 	/*
1926 	 * Zero out + invalidate CPU logout.
1927 	 */
1928 	if (clop) {
1929 		bzero(clop, sizeof (ch_cpu_logout_t));
1930 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1931 	}
1932 
1933 #if defined(JALAPENO) || defined(SERRANO)
1934 	/*
1935 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1936 	 * IO errors that may have resulted in this trap.
1937 	 */
1938 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1939 		cpu_run_bus_error_handlers(aflt, expected);
1940 	}
1941 
1942 	/*
1943 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1944 	 * line from the Ecache.  We also need to query the bus nexus for
1945 	 * fatal errors.  Attempts to do diagnostic read on caches may
1946 	 * introduce more errors (especially when the module is bad).
1947 	 */
1948 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1949 		/*
1950 		 * Ask our bus nexus friends if they have any fatal errors.  If
1951 		 * so, they will log appropriate error messages.
1952 		 */
1953 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1954 			aflt->flt_panic = 1;
1955 
1956 		/*
1957 		 * We got a UE or RUE and are panicking, save the fault PA in
1958 		 * a known location so that the platform specific panic code
1959 		 * can check for copyback errors.
1960 		 */
1961 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1962 			panic_aflt = *aflt;
1963 		}
1964 	}
1965 
1966 	/*
1967 	 * Flush Ecache line or entire Ecache
1968 	 */
1969 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1970 		cpu_error_ecache_flush(&ch_flt);
1971 #else /* JALAPENO || SERRANO */
1972 	/*
1973 	 * UE/BERR/TO: Call our bus nexus friends to check for
1974 	 * IO errors that may have resulted in this trap.
1975 	 */
1976 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1977 		cpu_run_bus_error_handlers(aflt, expected);
1978 	}
1979 
1980 	/*
1981 	 * UE: If the UE is in memory, we need to flush the bad
1982 	 * line from the Ecache.  We also need to query the bus nexus for
1983 	 * fatal errors.  Attempts to do diagnostic read on caches may
1984 	 * introduce more errors (especially when the module is bad).
1985 	 */
1986 	if (t_afsr & C_AFSR_UE) {
1987 		/*
1988 		 * Ask our legacy bus nexus friends if they have any fatal
1989 		 * errors.  If so, they will log appropriate error messages.
1990 		 */
1991 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1992 			aflt->flt_panic = 1;
1993 
1994 		/*
1995 		 * We got a UE and are panicking, save the fault PA in a known
1996 		 * location so that the platform specific panic code can check
1997 		 * for copyback errors.
1998 		 */
1999 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
2000 			panic_aflt = *aflt;
2001 		}
2002 	}
2003 
2004 	/*
2005 	 * Flush Ecache line or entire Ecache
2006 	 */
2007 	if (t_afsr_errs &
2008 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2009 		cpu_error_ecache_flush(&ch_flt);
2010 #endif /* JALAPENO || SERRANO */
2011 
2012 	/*
2013 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2014 	 * or disrupting errors have happened.  We do this because if a
2015 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2016 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2017 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2018 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2019 	 * deferred or disrupting error happening between checking the AFSR and
2020 	 * enabling NCEEN/CEEN.
2021 	 *
2022 	 * Note: CEEN reenabled only if it was on when trap taken.
2023 	 */
2024 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2025 	if (clear_errors(&ch_flt)) {
2026 		/*
2027 		 * Check for secondary errors, and avoid panicking if we
2028 		 * have them
2029 		 */
2030 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2031 		    t_afar) == 0) {
2032 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2033 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2034 		}
2035 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2036 		    NULL);
2037 	}
2038 
2039 	/*
2040 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2041 	 * be logged as part of the panic flow.
2042 	 */
2043 	if (aflt->flt_panic)
2044 		fm_panic("%sError(s)", pr_reason);
2045 
2046 	/*
2047 	 * If we queued an error and we are going to return from the trap and
2048 	 * the error was in user mode or inside of a copy routine, set AST flag
2049 	 * so the queue will be drained before returning to user mode.  The
2050 	 * AST processing will also act on our failure policy.
2051 	 */
2052 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2053 		int pcb_flag = 0;
2054 
2055 		if (t_afsr_errs &
2056 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2057 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2058 			pcb_flag |= ASYNC_HWERR;
2059 
2060 		if (t_afsr & C_AFSR_BERR)
2061 			pcb_flag |= ASYNC_BERR;
2062 
2063 		if (t_afsr & C_AFSR_TO)
2064 			pcb_flag |= ASYNC_BTO;
2065 
2066 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2067 		aston(curthread);
2068 	}
2069 }
2070 
2071 #if defined(CPU_IMP_L1_CACHE_PARITY)
2072 /*
2073  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2074  *
2075  * For Panther, P$ data parity errors during floating point load hits
2076  * are also detected (reported as TT 0x71) and handled by this trap
2077  * handler.
2078  *
2079  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2080  * is available.
2081  */
2082 /*ARGSUSED*/
2083 void
cpu_parity_error(struct regs * rp,uint_t flags,caddr_t tpc)2084 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2085 {
2086 	ch_async_flt_t ch_flt;
2087 	struct async_flt *aflt;
2088 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2089 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2090 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2091 	char *error_class;
2092 	int index, way, word;
2093 	ch_dc_data_t tmp_dcp;
2094 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
2095 	uint64_t parity_bits, pbits;
2096 	/* The parity bit array corresponds to the result of summing two bits */
2097 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
2098 
2099 	/*
2100 	 * Log the error.
2101 	 * For icache parity errors the fault address is the trap PC.
2102 	 * For dcache/pcache parity errors the instruction would have to
2103 	 * be decoded to determine the address and that isn't possible
2104 	 * at high PIL.
2105 	 */
2106 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2107 	aflt = (struct async_flt *)&ch_flt;
2108 	aflt->flt_id = gethrtime_waitfree();
2109 	aflt->flt_bus_id = getprocessorid();
2110 	aflt->flt_inst = CPU->cpu_id;
2111 	aflt->flt_pc = tpc;
2112 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2113 	aflt->flt_prot = AFLT_PROT_NONE;
2114 	aflt->flt_class = CPU_FAULT;
2115 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2116 	aflt->flt_tl = tl;
2117 	aflt->flt_panic = panic;
2118 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2119 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2120 
2121 	if (iparity) {
2122 		cpu_icache_parity_info(&ch_flt);
2123 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2124 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2125 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2126 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2127 		else
2128 			error_class = FM_EREPORT_CPU_USIII_IPE;
2129 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2130 	} else {
2131 		cpu_dcache_parity_info(&ch_flt);
2132 		if (ch_flt.parity_data.dpe.cpl_off != -1) {
2133 			/*
2134 			 * If not at TL 0 and running on a Jalapeno processor,
2135 			 * then process as a true ddspe.  A true
2136 			 * ddspe error can only occur if the way == 0
2137 			 */
2138 			way = ch_flt.parity_data.dpe.cpl_way;
2139 			if ((tl == 0) && (way != 0) &&
2140 			    IS_JALAPENO(cpunodes[CPU->cpu_id].implementation)) {
2141 				for (index = 0; index < dc_set_size;
2142 				    index += dcache_linesize) {
2143 					get_dcache_dtag(index + way *
2144 					    dc_set_size,
2145 					    (uint64_t *)&tmp_dcp);
2146 					/*
2147 					 * Check data array for even parity.
2148 					 * The 8 parity bits are grouped into
2149 					 * 4 pairs each of which covers a 64-bit
2150 					 * word.  The endianness is reversed
2151 					 * -- the low-order parity bits cover
2152 					 *  the high-order data words.
2153 					 */
2154 					parity_bits = tmp_dcp.dc_utag >> 8;
2155 					for (word = 0; word < 4; word++) {
2156 						pbits = (parity_bits >>
2157 						    (6 - word * 2)) & 3;
2158 						if (((popc64(
2159 						    tmp_dcp.dc_data[word]) +
2160 						    parity_bits_popc[pbits]) &
2161 						    1) && (tmp_dcp.dc_tag &
2162 						    VA13)) {
2163 							/* cleanup */
2164 							correct_dcache_parity(
2165 							    dcache_size,
2166 							    dcache_linesize);
2167 							if (cache_boot_state &
2168 							    DCU_DC) {
2169 								flush_dcache();
2170 							}
2171 
2172 							set_dcu(get_dcu() |
2173 							    cache_boot_state);
2174 							return;
2175 						}
2176 					}
2177 				}
2178 			} /* (tl == 0) && (way != 0) && IS JALAPENO */
2179 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2180 		} else if (ch_flt.parity_data.dpe.cpl_way != -1)
2181 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2182 		else
2183 			error_class = FM_EREPORT_CPU_USIII_DPE;
2184 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2185 		/*
2186 		 * For panther we also need to check the P$ for parity errors.
2187 		 */
2188 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2189 			cpu_pcache_parity_info(&ch_flt);
2190 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2191 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2192 				aflt->flt_payload =
2193 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2194 			}
2195 		}
2196 	}
2197 
2198 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2199 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2200 
2201 	if (iparity) {
2202 		/*
2203 		 * Invalidate entire I$.
2204 		 * This is required due to the use of diagnostic ASI
2205 		 * accesses that may result in a loss of I$ coherency.
2206 		 */
2207 		if (cache_boot_state & DCU_IC) {
2208 			flush_icache();
2209 		}
2210 		/*
2211 		 * According to section P.3.1 of the Panther PRM, we
2212 		 * need to do a little more for recovery on those
2213 		 * CPUs after encountering an I$ parity error.
2214 		 */
2215 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2216 			flush_ipb();
2217 			correct_dcache_parity(dcache_size,
2218 			    dcache_linesize);
2219 			flush_pcache();
2220 		}
2221 	} else {
2222 		/*
2223 		 * Since the valid bit is ignored when checking parity the
2224 		 * D$ data and tag must also be corrected.  Set D$ data bits
2225 		 * to zero and set utag to 0, 1, 2, 3.
2226 		 */
2227 		correct_dcache_parity(dcache_size, dcache_linesize);
2228 
2229 		/*
2230 		 * According to section P.3.3 of the Panther PRM, we
2231 		 * need to do a little more for recovery on those
2232 		 * CPUs after encountering a D$ or P$ parity error.
2233 		 *
2234 		 * As far as clearing P$ parity errors, it is enough to
2235 		 * simply invalidate all entries in the P$ since P$ parity
2236 		 * error traps are only generated for floating point load
2237 		 * hits.
2238 		 */
2239 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2240 			flush_icache();
2241 			flush_ipb();
2242 			flush_pcache();
2243 		}
2244 	}
2245 
2246 	/*
2247 	 * Invalidate entire D$ if it was enabled.
2248 	 * This is done to avoid stale data in the D$ which might
2249 	 * occur with the D$ disabled and the trap handler doing
2250 	 * stores affecting lines already in the D$.
2251 	 */
2252 	if (cache_boot_state & DCU_DC) {
2253 		flush_dcache();
2254 	}
2255 
2256 	/*
2257 	 * Restore caches to their bootup state.
2258 	 */
2259 	set_dcu(get_dcu() | cache_boot_state);
2260 
2261 	/*
2262 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2263 	 * be logged as part of the panic flow.
2264 	 */
2265 	if (aflt->flt_panic)
2266 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2267 
2268 	/*
2269 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2270 	 * the chance of getting an unrecoverable Fast ECC error.  This
2271 	 * flush will evict the part of the parity trap handler that is run
2272 	 * at TL>1.
2273 	 */
2274 	if (tl) {
2275 		cpu_flush_ecache();
2276 	}
2277 }
2278 
2279 /*
2280  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2281  * to indicate which portions of the captured data should be in the ereport.
2282  */
2283 void
cpu_async_log_ic_parity_err(ch_async_flt_t * ch_flt)2284 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2285 {
2286 	int way = ch_flt->parity_data.ipe.cpl_way;
2287 	int offset = ch_flt->parity_data.ipe.cpl_off;
2288 	int tag_index;
2289 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2290 
2291 
2292 	if ((offset != -1) || (way != -1)) {
2293 		/*
2294 		 * Parity error in I$ tag or data
2295 		 */
2296 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2297 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2298 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2299 			    PN_ICIDX_TO_WAY(tag_index);
2300 		else
2301 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2302 			    CH_ICIDX_TO_WAY(tag_index);
2303 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2304 		    IC_LOGFLAG_MAGIC;
2305 	} else {
2306 		/*
2307 		 * Parity error was not identified.
2308 		 * Log tags and data for all ways.
2309 		 */
2310 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2311 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2312 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2313 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2314 				    PN_ICIDX_TO_WAY(tag_index);
2315 			else
2316 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2317 				    CH_ICIDX_TO_WAY(tag_index);
2318 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2319 			    IC_LOGFLAG_MAGIC;
2320 		}
2321 	}
2322 }
2323 
2324 /*
2325  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2326  * to indicate which portions of the captured data should be in the ereport.
2327  */
2328 void
cpu_async_log_dc_parity_err(ch_async_flt_t * ch_flt)2329 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2330 {
2331 	int way = ch_flt->parity_data.dpe.cpl_way;
2332 	int offset = ch_flt->parity_data.dpe.cpl_off;
2333 	int tag_index;
2334 
2335 	if (offset != -1) {
2336 		/*
2337 		 * Parity error in D$ or P$ data array.
2338 		 *
2339 		 * First check to see whether the parity error is in D$ or P$
2340 		 * since P$ data parity errors are reported in Panther using
2341 		 * the same trap.
2342 		 */
2343 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2344 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2345 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2346 			    CH_PCIDX_TO_WAY(tag_index);
2347 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2348 			    PC_LOGFLAG_MAGIC;
2349 		} else {
2350 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2351 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2352 			    CH_DCIDX_TO_WAY(tag_index);
2353 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2354 			    DC_LOGFLAG_MAGIC;
2355 		}
2356 	} else if (way != -1) {
2357 		/*
2358 		 * Parity error in D$ tag.
2359 		 */
2360 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2361 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2362 		    CH_DCIDX_TO_WAY(tag_index);
2363 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2364 		    DC_LOGFLAG_MAGIC;
2365 	}
2366 }
2367 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2368 
2369 /*
2370  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2371  * post-process CPU events that are dequeued.  As such, it can be invoked
2372  * from softint context, from AST processing in the trap() flow, or from the
2373  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2374  * Historically this entry point was used to log the actual cmn_err(9F) text;
2375  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2376  * With FMA this function now also returns a flag which indicates to the
2377  * caller whether the ereport should be posted (1) or suppressed (0).
2378  */
2379 static int
cpu_async_log_err(void * flt,errorq_elem_t * eqep)2380 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2381 {
2382 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2383 	struct async_flt *aflt = (struct async_flt *)flt;
2384 	uint64_t errors;
2385 	extern void memscrub_induced_error(void);
2386 
2387 	switch (ch_flt->flt_type) {
2388 	case CPU_INV_AFSR:
2389 		/*
2390 		 * If it is a disrupting trap and the AFSR is zero, then
2391 		 * the event has probably already been noted. Do not post
2392 		 * an ereport.
2393 		 */
2394 		if ((aflt->flt_status & ECC_C_TRAP) &&
2395 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2396 			return (0);
2397 		else
2398 			return (1);
2399 	case CPU_TO:
2400 	case CPU_BERR:
2401 	case CPU_FATAL:
2402 	case CPU_FPUERR:
2403 		return (1);
2404 
2405 	case CPU_UE_ECACHE_RETIRE:
2406 		cpu_log_err(aflt);
2407 		cpu_page_retire(ch_flt);
2408 		return (1);
2409 
2410 	/*
2411 	 * Cases where we may want to suppress logging or perform
2412 	 * extended diagnostics.
2413 	 */
2414 	case CPU_CE:
2415 	case CPU_EMC:
2416 		/*
2417 		 * We want to skip logging and further classification
2418 		 * only if ALL the following conditions are true:
2419 		 *
2420 		 *	1. There is only one error
2421 		 *	2. That error is a correctable memory error
2422 		 *	3. The error is caused by the memory scrubber (in
2423 		 *	   which case the error will have occurred under
2424 		 *	   on_trap protection)
2425 		 *	4. The error is on a retired page
2426 		 *
2427 		 * Note: AFLT_PROT_EC is used places other than the memory
2428 		 * scrubber.  However, none of those errors should occur
2429 		 * on a retired page.
2430 		 */
2431 		if ((ch_flt->afsr_errs &
2432 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2433 		    aflt->flt_prot == AFLT_PROT_EC) {
2434 
2435 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2436 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2437 
2438 				/*
2439 				 * Since we're skipping logging, we'll need
2440 				 * to schedule the re-enabling of CEEN
2441 				 */
2442 				(void) timeout(cpu_delayed_check_ce_errors,
2443 				    (void *)(uintptr_t)aflt->flt_inst,
2444 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2445 				    * MICROSEC));
2446 				}
2447 
2448 				/*
2449 				 * Inform memscrubber - scrubbing induced
2450 				 * CE on a retired page.
2451 				 */
2452 				memscrub_induced_error();
2453 				return (0);
2454 			}
2455 		}
2456 
2457 		/*
2458 		 * Perform/schedule further classification actions, but
2459 		 * only if the page is healthy (we don't want bad
2460 		 * pages inducing too much diagnostic activity).  If we could
2461 		 * not find a page pointer then we also skip this.  If
2462 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2463 		 * to copy and recirculate the event (for further diagnostics)
2464 		 * and we should not proceed to log it here.
2465 		 *
2466 		 * This must be the last step here before the cpu_log_err()
2467 		 * below - if an event recirculates cpu_ce_log_err() will
2468 		 * not call the current function but just proceed directly
2469 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2470 		 *
2471 		 * Note: Check cpu_impl_async_log_err if changing this
2472 		 */
2473 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2474 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2475 			    CE_XDIAG_SKIP_NOPP);
2476 		} else {
2477 			if (errors != PR_OK) {
2478 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2479 				    CE_XDIAG_SKIP_PAGEDET);
2480 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2481 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2482 				return (0);
2483 			}
2484 		}
2485 		/*FALLTHRU*/
2486 
2487 	/*
2488 	 * Cases where we just want to report the error and continue.
2489 	 */
2490 	case CPU_CE_ECACHE:
2491 	case CPU_UE_ECACHE:
2492 	case CPU_IV:
2493 	case CPU_ORPH:
2494 		cpu_log_err(aflt);
2495 		return (1);
2496 
2497 	/*
2498 	 * Cases where we want to fall through to handle panicking.
2499 	 */
2500 	case CPU_UE:
2501 		/*
2502 		 * We want to skip logging in the same conditions as the
2503 		 * CE case.  In addition, we want to make sure we're not
2504 		 * panicking.
2505 		 */
2506 		if (!panicstr && (ch_flt->afsr_errs &
2507 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2508 		    aflt->flt_prot == AFLT_PROT_EC) {
2509 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2510 				/* Zero the address to clear the error */
2511 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2512 				/*
2513 				 * Inform memscrubber - scrubbing induced
2514 				 * UE on a retired page.
2515 				 */
2516 				memscrub_induced_error();
2517 				return (0);
2518 			}
2519 		}
2520 		cpu_log_err(aflt);
2521 		break;
2522 
2523 	default:
2524 		/*
2525 		 * If the us3_common.c code doesn't know the flt_type, it may
2526 		 * be an implementation-specific code.  Call into the impldep
2527 		 * backend to find out what to do: if it tells us to continue,
2528 		 * break and handle as if falling through from a UE; if not,
2529 		 * the impldep backend has handled the error and we're done.
2530 		 */
2531 		switch (cpu_impl_async_log_err(flt, eqep)) {
2532 		case CH_ASYNC_LOG_DONE:
2533 			return (1);
2534 		case CH_ASYNC_LOG_RECIRC:
2535 			return (0);
2536 		case CH_ASYNC_LOG_CONTINUE:
2537 			break; /* continue on to handle UE-like error */
2538 		default:
2539 			cmn_err(CE_WARN, "discarding error 0x%p with "
2540 			    "invalid fault type (0x%x)",
2541 			    (void *)aflt, ch_flt->flt_type);
2542 			return (0);
2543 		}
2544 	}
2545 
2546 	/* ... fall through from the UE case */
2547 
2548 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2549 		if (!panicstr) {
2550 			cpu_page_retire(ch_flt);
2551 		} else {
2552 			/*
2553 			 * Clear UEs on panic so that we don't
2554 			 * get haunted by them during panic or
2555 			 * after reboot
2556 			 */
2557 			cpu_clearphys(aflt);
2558 			(void) clear_errors(NULL);
2559 		}
2560 	}
2561 
2562 	return (1);
2563 }
2564 
2565 /*
2566  * Retire the bad page that may contain the flushed error.
2567  */
2568 void
cpu_page_retire(ch_async_flt_t * ch_flt)2569 cpu_page_retire(ch_async_flt_t *ch_flt)
2570 {
2571 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2572 	(void) page_retire(aflt->flt_addr, PR_UE);
2573 }
2574 
2575 /*
2576  * Return true if the error specified in the AFSR indicates
2577  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2578  * for Panther, none for Jalapeno/Serrano).
2579  */
2580 /* ARGSUSED */
2581 static int
cpu_error_is_ecache_data(int cpuid,uint64_t t_afsr)2582 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2583 {
2584 #if defined(JALAPENO) || defined(SERRANO)
2585 	return (0);
2586 #elif defined(CHEETAH_PLUS)
2587 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2588 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2589 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2590 #else	/* CHEETAH_PLUS */
2591 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2592 #endif
2593 }
2594 
2595 /*
2596  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2597  * generic event post-processing for correctable and uncorrectable memory,
2598  * E$, and MTag errors.  Historically this entry point was used to log bits of
2599  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2600  * converted into an ereport.  In addition, it transmits the error to any
2601  * platform-specific service-processor FRU logging routines, if available.
2602  */
2603 void
cpu_log_err(struct async_flt * aflt)2604 cpu_log_err(struct async_flt *aflt)
2605 {
2606 	char unum[UNUM_NAMLEN];
2607 	int synd_status, synd_code, afar_status;
2608 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2609 
2610 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2611 		aflt->flt_status |= ECC_ECACHE;
2612 	else
2613 		aflt->flt_status &= ~ECC_ECACHE;
2614 	/*
2615 	 * Determine syndrome status.
2616 	 */
2617 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2618 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2619 
2620 	/*
2621 	 * Determine afar status.
2622 	 */
2623 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2624 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2625 		    ch_flt->flt_bit);
2626 	else
2627 		afar_status = AFLT_STAT_INVALID;
2628 
2629 	synd_code = synd_to_synd_code(synd_status,
2630 	    aflt->flt_synd, ch_flt->flt_bit);
2631 
2632 	/*
2633 	 * If afar status is not invalid do a unum lookup.
2634 	 */
2635 	if (afar_status != AFLT_STAT_INVALID) {
2636 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2637 	} else {
2638 		unum[0] = '\0';
2639 	}
2640 
2641 	/*
2642 	 * Do not send the fruid message (plat_ecc_error_data_t)
2643 	 * to the SC if it can handle the enhanced error information
2644 	 * (plat_ecc_error2_data_t) or when the tunable
2645 	 * ecc_log_fruid_enable is set to 0.
2646 	 */
2647 
2648 	if (&plat_ecc_capability_sc_get &&
2649 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2650 		if (&plat_log_fruid_error)
2651 			plat_log_fruid_error(synd_code, aflt, unum,
2652 			    ch_flt->flt_bit);
2653 	}
2654 
2655 	if (aflt->flt_func != NULL)
2656 		aflt->flt_func(aflt, unum);
2657 
2658 	if (afar_status != AFLT_STAT_INVALID)
2659 		cpu_log_diag_info(ch_flt);
2660 
2661 	/*
2662 	 * If we have a CEEN error , we do not reenable CEEN until after
2663 	 * we exit the trap handler. Otherwise, another error may
2664 	 * occur causing the handler to be entered recursively.
2665 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2666 	 * to try and ensure that the CPU makes progress in the face
2667 	 * of a CE storm.
2668 	 */
2669 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2670 		(void) timeout(cpu_delayed_check_ce_errors,
2671 		    (void *)(uintptr_t)aflt->flt_inst,
2672 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2673 	}
2674 }
2675 
2676 /*
2677  * Invoked by error_init() early in startup and therefore before
2678  * startup_errorq() is called to drain any error Q -
2679  *
2680  * startup()
2681  *   startup_end()
2682  *     error_init()
2683  *       cpu_error_init()
2684  * errorq_init()
2685  *   errorq_drain()
2686  * start_other_cpus()
2687  *
2688  * The purpose of this routine is to create error-related taskqs.  Taskqs
2689  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2690  * context.
2691  */
2692 void
cpu_error_init(int items)2693 cpu_error_init(int items)
2694 {
2695 	/*
2696 	 * Create taskq(s) to reenable CE
2697 	 */
2698 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2699 	    items, items, TASKQ_PREPOPULATE);
2700 }
2701 
2702 void
cpu_ce_log_err(struct async_flt * aflt,errorq_elem_t * eqep)2703 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2704 {
2705 	char unum[UNUM_NAMLEN];
2706 	int len;
2707 
2708 	switch (aflt->flt_class) {
2709 	case CPU_FAULT:
2710 		cpu_ereport_init(aflt);
2711 		if (cpu_async_log_err(aflt, eqep))
2712 			cpu_ereport_post(aflt);
2713 		break;
2714 
2715 	case BUS_FAULT:
2716 		if (aflt->flt_func != NULL) {
2717 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2718 			    unum, UNUM_NAMLEN, &len);
2719 			aflt->flt_func(aflt, unum);
2720 		}
2721 		break;
2722 
2723 	case RECIRC_CPU_FAULT:
2724 		aflt->flt_class = CPU_FAULT;
2725 		cpu_log_err(aflt);
2726 		cpu_ereport_post(aflt);
2727 		break;
2728 
2729 	case RECIRC_BUS_FAULT:
2730 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2731 		/*FALLTHRU*/
2732 	default:
2733 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2734 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2735 		return;
2736 	}
2737 }
2738 
2739 /*
2740  * Scrub and classify a CE.  This function must not modify the
2741  * fault structure passed to it but instead should return the classification
2742  * information.
2743  */
2744 
2745 static uchar_t
cpu_ce_scrub_mem_err_common(struct async_flt * ecc,boolean_t logout_tried)2746 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2747 {
2748 	uchar_t disp = CE_XDIAG_EXTALG;
2749 	on_trap_data_t otd;
2750 	uint64_t orig_err;
2751 	ch_cpu_logout_t *clop;
2752 
2753 	/*
2754 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2755 	 * this, but our other callers have not.  Disable preemption to
2756 	 * avoid CPU migration so that we restore CEEN on the correct
2757 	 * cpu later.
2758 	 *
2759 	 * CEEN is cleared so that further CEs that our instruction and
2760 	 * data footprint induce do not cause use to either creep down
2761 	 * kernel stack to the point of overflow, or do so much CE
2762 	 * notification as to make little real forward progress.
2763 	 *
2764 	 * NCEEN must not be cleared.  However it is possible that
2765 	 * our accesses to the flt_addr may provoke a bus error or timeout
2766 	 * if the offending address has just been unconfigured as part of
2767 	 * a DR action.  So we must operate under on_trap protection.
2768 	 */
2769 	kpreempt_disable();
2770 	orig_err = get_error_enable();
2771 	if (orig_err & EN_REG_CEEN)
2772 		set_error_enable(orig_err & ~EN_REG_CEEN);
2773 
2774 	/*
2775 	 * Our classification algorithm includes the line state before
2776 	 * the scrub; we'd like this captured after the detection and
2777 	 * before the algorithm below - the earlier the better.
2778 	 *
2779 	 * If we've come from a cpu CE trap then this info already exists
2780 	 * in the cpu logout area.
2781 	 *
2782 	 * For a CE detected by memscrub for which there was no trap
2783 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2784 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2785 	 * marked the fault structure as incomplete as a flag to later
2786 	 * logging code.
2787 	 *
2788 	 * If called directly from an IO detected CE there has been
2789 	 * no line data capture.  In this case we logout to the cpu logout
2790 	 * area - that's appropriate since it's the cpu cache data we need
2791 	 * for classification.  We thus borrow the cpu logout area for a
2792 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2793 	 * this time (we will invalidate it again below).
2794 	 *
2795 	 * If called from the partner check xcall handler then this cpu
2796 	 * (the partner) has not necessarily experienced a CE at this
2797 	 * address.  But we want to capture line state before its scrub
2798 	 * attempt since we use that in our classification.
2799 	 */
2800 	if (logout_tried == B_FALSE) {
2801 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2802 			disp |= CE_XDIAG_NOLOGOUT;
2803 	}
2804 
2805 	/*
2806 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2807 	 * no longer be valid (if DR'd since the initial event) so we
2808 	 * perform this scrub under on_trap protection.  If this access is
2809 	 * ok then further accesses below will also be ok - DR cannot
2810 	 * proceed while this thread is active (preemption is disabled);
2811 	 * to be safe we'll nonetheless use on_trap again below.
2812 	 */
2813 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2814 		cpu_scrubphys(ecc);
2815 	} else {
2816 		no_trap();
2817 		if (orig_err & EN_REG_CEEN)
2818 			set_error_enable(orig_err);
2819 		kpreempt_enable();
2820 		return (disp);
2821 	}
2822 	no_trap();
2823 
2824 	/*
2825 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2826 	 * Note that it's quite possible that the read sourced the data from
2827 	 * another cpu.
2828 	 */
2829 	if (clear_ecc(ecc))
2830 		disp |= CE_XDIAG_CE1;
2831 
2832 	/*
2833 	 * Read the data again.  This time the read is very likely to
2834 	 * come from memory since the scrub induced a writeback to memory.
2835 	 */
2836 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2837 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2838 	} else {
2839 		no_trap();
2840 		if (orig_err & EN_REG_CEEN)
2841 			set_error_enable(orig_err);
2842 		kpreempt_enable();
2843 		return (disp);
2844 	}
2845 	no_trap();
2846 
2847 	/* Did that read induce a CE that matches the AFAR? */
2848 	if (clear_ecc(ecc))
2849 		disp |= CE_XDIAG_CE2;
2850 
2851 	/*
2852 	 * Look at the logout information and record whether we found the
2853 	 * line in l2/l3 cache.  For Panther we are interested in whether
2854 	 * we found it in either cache (it won't reside in both but
2855 	 * it is possible to read it that way given the moving target).
2856 	 */
2857 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2858 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2859 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2860 		int hit, level;
2861 		int state;
2862 		int totalsize;
2863 		ch_ec_data_t *ecp;
2864 
2865 		/*
2866 		 * If hit is nonzero then a match was found and hit will
2867 		 * be one greater than the index which hit.  For Panther we
2868 		 * also need to pay attention to level to see which of l2$ or
2869 		 * l3$ it hit in.
2870 		 */
2871 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2872 		    0, &level);
2873 
2874 		if (hit) {
2875 			--hit;
2876 			disp |= CE_XDIAG_AFARMATCH;
2877 
2878 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2879 				if (level == 2)
2880 					ecp = &clop->clo_data.chd_l2_data[hit];
2881 				else
2882 					ecp = &clop->clo_data.chd_ec_data[hit];
2883 			} else {
2884 				ASSERT(level == 2);
2885 				ecp = &clop->clo_data.chd_ec_data[hit];
2886 			}
2887 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2888 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2889 			    ecc->flt_addr, ecp->ec_tag);
2890 
2891 			/*
2892 			 * Cheetah variants use different state encodings -
2893 			 * the CH_ECSTATE_* defines vary depending on the
2894 			 * module we're compiled for.  Translate into our
2895 			 * one true version.  Conflate Owner-Shared state
2896 			 * of SSM mode with Owner as victimisation of such
2897 			 * lines may cause a writeback.
2898 			 */
2899 			switch (state) {
2900 			case CH_ECSTATE_MOD:
2901 				disp |= EC_STATE_M;
2902 				break;
2903 
2904 			case CH_ECSTATE_OWN:
2905 			case CH_ECSTATE_OWS:
2906 				disp |= EC_STATE_O;
2907 				break;
2908 
2909 			case CH_ECSTATE_EXL:
2910 				disp |= EC_STATE_E;
2911 				break;
2912 
2913 			case CH_ECSTATE_SHR:
2914 				disp |= EC_STATE_S;
2915 				break;
2916 
2917 			default:
2918 				disp |= EC_STATE_I;
2919 				break;
2920 			}
2921 		}
2922 
2923 		/*
2924 		 * If we initiated the delayed logout then we are responsible
2925 		 * for invalidating the logout area.
2926 		 */
2927 		if (logout_tried == B_FALSE) {
2928 			bzero(clop, sizeof (ch_cpu_logout_t));
2929 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2930 		}
2931 	}
2932 
2933 	/*
2934 	 * Re-enable CEEN if we turned it off.
2935 	 */
2936 	if (orig_err & EN_REG_CEEN)
2937 		set_error_enable(orig_err);
2938 	kpreempt_enable();
2939 
2940 	return (disp);
2941 }
2942 
2943 /*
2944  * Scrub a correctable memory error and collect data for classification
2945  * of CE type.  This function is called in the detection path, ie tl0 handling
2946  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2947  */
2948 void
cpu_ce_scrub_mem_err(struct async_flt * ecc,boolean_t logout_tried)2949 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2950 {
2951 	/*
2952 	 * Cheetah CE classification does not set any bits in flt_status.
2953 	 * Instead we will record classification datapoints in flt_disp.
2954 	 */
2955 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2956 
2957 	/*
2958 	 * To check if the error detected by IO is persistent, sticky or
2959 	 * intermittent.  This is noticed by clear_ecc().
2960 	 */
2961 	if (ecc->flt_status & ECC_IOBUS)
2962 		ecc->flt_stat = C_AFSR_MEMORY;
2963 
2964 	/*
2965 	 * Record information from this first part of the algorithm in
2966 	 * flt_disp.
2967 	 */
2968 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2969 }
2970 
2971 /*
2972  * Select a partner to perform a further CE classification check from.
2973  * Must be called with kernel preemption disabled (to stop the cpu list
2974  * from changing).  The detecting cpu we are partnering has cpuid
2975  * aflt->flt_inst; we might not be running on the detecting cpu.
2976  *
2977  * Restrict choice to active cpus in the same cpu partition as ourselves in
2978  * an effort to stop bad cpus in one partition causing other partitions to
2979  * perform excessive diagnostic activity.  Actually since the errorq drain
2980  * is run from a softint most of the time and that is a global mechanism
2981  * this isolation is only partial.  Return NULL if we fail to find a
2982  * suitable partner.
2983  *
2984  * We prefer a partner that is in a different latency group to ourselves as
2985  * we will share fewer datapaths.  If such a partner is unavailable then
2986  * choose one in the same lgroup but prefer a different chip and only allow
2987  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2988  * flags includes PTNR_SELFOK then permit selection of the original detector.
2989  *
2990  * We keep a cache of the last partner selected for a cpu, and we'll try to
2991  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2992  * have passed since that selection was made.  This provides the benefit
2993  * of the point-of-view of different partners over time but without
2994  * requiring frequent cpu list traversals.
2995  */
2996 
2997 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2998 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2999 
3000 static cpu_t *
ce_ptnr_select(struct async_flt * aflt,int flags,int * typep)3001 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
3002 {
3003 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
3004 	hrtime_t lasttime, thistime;
3005 
3006 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
3007 
3008 	dtcr = cpu[aflt->flt_inst];
3009 
3010 	/*
3011 	 * Short-circuit for the following cases:
3012 	 *	. the dtcr is not flagged active
3013 	 *	. there is just one cpu present
3014 	 *	. the detector has disappeared
3015 	 *	. we were given a bad flt_inst cpuid; this should not happen
3016 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
3017 	 *	  reason to panic.
3018 	 *	. there is just one cpu left online in the cpu partition
3019 	 *
3020 	 * If we return NULL after this point then we do not update the
3021 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
3022 	 * again next time; this is the case where the only other cpu online
3023 	 * in the detector's partition is on the same chip as the detector
3024 	 * and since CEEN re-enable is throttled even that case should not
3025 	 * hurt performance.
3026 	 */
3027 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
3028 		return (NULL);
3029 	}
3030 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
3031 		if (flags & PTNR_SELFOK) {
3032 			*typep = CE_XDIAG_PTNR_SELF;
3033 			return (dtcr);
3034 		} else {
3035 			return (NULL);
3036 		}
3037 	}
3038 
3039 	thistime = gethrtime();
3040 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
3041 
3042 	/*
3043 	 * Select a starting point.
3044 	 */
3045 	if (!lasttime) {
3046 		/*
3047 		 * We've never selected a partner for this detector before.
3048 		 * Start the scan at the next online cpu in the same cpu
3049 		 * partition.
3050 		 */
3051 		sp = dtcr->cpu_next_part;
3052 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
3053 		/*
3054 		 * Our last selection has not aged yet.  If this partner:
3055 		 *	. is still a valid cpu,
3056 		 *	. is still in the same partition as the detector
3057 		 *	. is still marked active
3058 		 *	. satisfies the 'flags' argument criteria
3059 		 * then select it again without updating the timestamp.
3060 		 */
3061 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3062 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3063 		    !cpu_flagged_active(sp->cpu_flags) ||
3064 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3065 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3066 		    !(flags & PTNR_SIBLINGOK))) {
3067 			sp = dtcr->cpu_next_part;
3068 		} else {
3069 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3070 				*typep = CE_XDIAG_PTNR_REMOTE;
3071 			} else if (sp == dtcr) {
3072 				*typep = CE_XDIAG_PTNR_SELF;
3073 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3074 				*typep = CE_XDIAG_PTNR_SIBLING;
3075 			} else {
3076 				*typep = CE_XDIAG_PTNR_LOCAL;
3077 			}
3078 			return (sp);
3079 		}
3080 	} else {
3081 		/*
3082 		 * Our last selection has aged.  If it is nonetheless still a
3083 		 * valid cpu then start the scan at the next cpu in the
3084 		 * partition after our last partner.  If the last selection
3085 		 * is no longer a valid cpu then go with our default.  In
3086 		 * this way we slowly cycle through possible partners to
3087 		 * obtain multiple viewpoints over time.
3088 		 */
3089 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3090 		if (sp == NULL) {
3091 			sp = dtcr->cpu_next_part;
3092 		} else {
3093 			sp = sp->cpu_next_part;		/* may be dtcr */
3094 			if (sp->cpu_part != dtcr->cpu_part)
3095 				sp = dtcr;
3096 		}
3097 	}
3098 
3099 	/*
3100 	 * We have a proposed starting point for our search, but if this
3101 	 * cpu is offline then its cpu_next_part will point to itself
3102 	 * so we can't use that to iterate over cpus in this partition in
3103 	 * the loop below.  We still want to avoid iterating over cpus not
3104 	 * in our partition, so in the case that our starting point is offline
3105 	 * we will repoint it to be the detector itself;  and if the detector
3106 	 * happens to be offline we'll return NULL from the following loop.
3107 	 */
3108 	if (!cpu_flagged_active(sp->cpu_flags)) {
3109 		sp = dtcr;
3110 	}
3111 
3112 	ptnr = sp;
3113 	locptnr = NULL;
3114 	sibptnr = NULL;
3115 	do {
3116 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3117 			continue;
3118 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3119 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3120 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3121 			*typep = CE_XDIAG_PTNR_REMOTE;
3122 			return (ptnr);
3123 		}
3124 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3125 			if (sibptnr == NULL)
3126 				sibptnr = ptnr;
3127 			continue;
3128 		}
3129 		if (locptnr == NULL)
3130 			locptnr = ptnr;
3131 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3132 
3133 	/*
3134 	 * A foreign partner has already been returned if one was available.
3135 	 *
3136 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3137 	 * detector, is active, and is not a sibling of the detector.
3138 	 *
3139 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3140 	 * active.
3141 	 *
3142 	 * If we have to resort to using the detector itself we have already
3143 	 * checked that it is active.
3144 	 */
3145 	if (locptnr) {
3146 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3147 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3148 		*typep = CE_XDIAG_PTNR_LOCAL;
3149 		return (locptnr);
3150 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3151 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3152 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3153 		*typep = CE_XDIAG_PTNR_SIBLING;
3154 		return (sibptnr);
3155 	} else if (flags & PTNR_SELFOK) {
3156 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3157 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3158 		*typep = CE_XDIAG_PTNR_SELF;
3159 		return (dtcr);
3160 	}
3161 
3162 	return (NULL);
3163 }
3164 
3165 /*
3166  * Cross call handler that is requested to run on the designated partner of
3167  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3168  */
3169 static void
ce_ptnrchk_xc(struct async_flt * aflt,uchar_t * dispp)3170 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3171 {
3172 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3173 }
3174 
3175 /*
3176  * The associated errorqs are never destroyed so we do not need to deal with
3177  * them disappearing before this timeout fires.  If the affected memory
3178  * has been DR'd out since the original event the scrub algrithm will catch
3179  * any errors and return null disposition info.  If the original detecting
3180  * cpu has been DR'd out then ereport detector info will not be able to
3181  * lookup CPU type;  with a small timeout this is unlikely.
3182  */
3183 static void
ce_lkychk_cb(ce_lkychk_cb_t * cbarg)3184 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3185 {
3186 	struct async_flt *aflt = cbarg->lkycb_aflt;
3187 	uchar_t disp;
3188 	cpu_t *cp;
3189 	int ptnrtype;
3190 
3191 	kpreempt_disable();
3192 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3193 	    &ptnrtype)) {
3194 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3195 		    (uint64_t)&disp);
3196 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3197 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3198 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3199 	} else {
3200 		ce_xdiag_lkydrops++;
3201 		if (ncpus > 1)
3202 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3203 			    CE_XDIAG_SKIP_NOPTNR);
3204 	}
3205 	kpreempt_enable();
3206 
3207 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3208 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3209 }
3210 
3211 /*
3212  * Called from errorq drain code when processing a CE error, both from
3213  * CPU and PCI drain functions.  Decide what further classification actions,
3214  * if any, we will perform.  Perform immediate actions now, and schedule
3215  * delayed actions as required.  Note that we are no longer necessarily running
3216  * on the detecting cpu, and that the async_flt structure will not persist on
3217  * return from this function.
3218  *
3219  * Calls to this function should aim to be self-throtlling in some way.  With
3220  * the delayed re-enable of CEEN the absolute rate of calls should not
3221  * be excessive.  Callers should also avoid performing in-depth classification
3222  * for events in pages that are already known to be suspect.
3223  *
3224  * We return nonzero to indicate that the event has been copied and
3225  * recirculated for further testing.  The caller should not log the event
3226  * in this case - it will be logged when further test results are available.
3227  *
3228  * Our possible contexts are that of errorq_drain: below lock level or from
3229  * panic context.  We can assume that the cpu we are running on is online.
3230  */
3231 
3232 
3233 #ifdef DEBUG
3234 static int ce_xdiag_forceaction;
3235 #endif
3236 
3237 int
ce_scrub_xdiag_recirc(struct async_flt * aflt,errorq_t * eqp,errorq_elem_t * eqep,size_t afltoffset)3238 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3239     errorq_elem_t *eqep, size_t afltoffset)
3240 {
3241 	ce_dispact_t dispact, action;
3242 	cpu_t *cp;
3243 	uchar_t dtcrinfo, disp;
3244 	int ptnrtype;
3245 
3246 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3247 		ce_xdiag_drops++;
3248 		return (0);
3249 	} else if (!aflt->flt_in_memory) {
3250 		ce_xdiag_drops++;
3251 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3252 		return (0);
3253 	}
3254 
3255 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3256 
3257 	/*
3258 	 * Some correctable events are not scrubbed/classified, such as those
3259 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3260 	 * initial detector classification go no further.
3261 	 */
3262 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3263 		ce_xdiag_drops++;
3264 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3265 		return (0);
3266 	}
3267 
3268 	dispact = CE_DISPACT(ce_disp_table,
3269 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3270 	    CE_XDIAG_STATE(dtcrinfo),
3271 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3272 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3273 
3274 
3275 	action = CE_ACT(dispact);	/* bad lookup caught below */
3276 #ifdef DEBUG
3277 	if (ce_xdiag_forceaction != 0)
3278 		action = ce_xdiag_forceaction;
3279 #endif
3280 
3281 	switch (action) {
3282 	case CE_ACT_LKYCHK: {
3283 		caddr_t ndata;
3284 		errorq_elem_t *neqep;
3285 		struct async_flt *ecc;
3286 		ce_lkychk_cb_t *cbargp;
3287 
3288 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3289 			ce_xdiag_lkydrops++;
3290 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3291 			    CE_XDIAG_SKIP_DUPFAIL);
3292 			break;
3293 		}
3294 		ecc = (struct async_flt *)(ndata + afltoffset);
3295 
3296 		ASSERT(ecc->flt_class == CPU_FAULT ||
3297 		    ecc->flt_class == BUS_FAULT);
3298 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3299 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3300 
3301 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3302 		cbargp->lkycb_aflt = ecc;
3303 		cbargp->lkycb_eqp = eqp;
3304 		cbargp->lkycb_eqep = neqep;
3305 
3306 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3307 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3308 		return (1);
3309 	}
3310 
3311 	case CE_ACT_PTNRCHK:
3312 		kpreempt_disable();	/* stop cpu list changing */
3313 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3314 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3315 			    (uint64_t)aflt, (uint64_t)&disp);
3316 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3317 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3318 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3319 		} else if (ncpus > 1) {
3320 			ce_xdiag_ptnrdrops++;
3321 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3322 			    CE_XDIAG_SKIP_NOPTNR);
3323 		} else {
3324 			ce_xdiag_ptnrdrops++;
3325 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3326 			    CE_XDIAG_SKIP_UNIPROC);
3327 		}
3328 		kpreempt_enable();
3329 		break;
3330 
3331 	case CE_ACT_DONE:
3332 		break;
3333 
3334 	case CE_DISP_BAD:
3335 	default:
3336 #ifdef DEBUG
3337 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3338 #endif
3339 		ce_xdiag_bad++;
3340 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3341 		break;
3342 	}
3343 
3344 	return (0);
3345 }
3346 
3347 /*
3348  * We route all errors through a single switch statement.
3349  */
3350 void
cpu_ue_log_err(struct async_flt * aflt)3351 cpu_ue_log_err(struct async_flt *aflt)
3352 {
3353 	switch (aflt->flt_class) {
3354 	case CPU_FAULT:
3355 		cpu_ereport_init(aflt);
3356 		if (cpu_async_log_err(aflt, NULL))
3357 			cpu_ereport_post(aflt);
3358 		break;
3359 
3360 	case BUS_FAULT:
3361 		bus_async_log_err(aflt);
3362 		break;
3363 
3364 	default:
3365 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3366 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3367 		return;
3368 	}
3369 }
3370 
3371 /*
3372  * Routine for panic hook callback from panic_idle().
3373  */
3374 void
cpu_async_panic_callb(void)3375 cpu_async_panic_callb(void)
3376 {
3377 	ch_async_flt_t ch_flt;
3378 	struct async_flt *aflt;
3379 	ch_cpu_errors_t cpu_error_regs;
3380 	uint64_t afsr_errs;
3381 
3382 	get_cpu_error_state(&cpu_error_regs);
3383 
3384 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3385 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3386 
3387 	if (afsr_errs) {
3388 
3389 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3390 		aflt = (struct async_flt *)&ch_flt;
3391 		aflt->flt_id = gethrtime_waitfree();
3392 		aflt->flt_bus_id = getprocessorid();
3393 		aflt->flt_inst = CPU->cpu_id;
3394 		aflt->flt_stat = cpu_error_regs.afsr;
3395 		aflt->flt_addr = cpu_error_regs.afar;
3396 		aflt->flt_prot = AFLT_PROT_NONE;
3397 		aflt->flt_class = CPU_FAULT;
3398 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3399 		aflt->flt_panic = 1;
3400 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3401 		ch_flt.afsr_errs = afsr_errs;
3402 #if defined(SERRANO)
3403 		ch_flt.afar2 = cpu_error_regs.afar2;
3404 #endif	/* SERRANO */
3405 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3406 	}
3407 }
3408 
3409 /*
3410  * Routine to convert a syndrome into a syndrome code.
3411  */
3412 static int
synd_to_synd_code(int synd_status,ushort_t synd,uint64_t afsr_bit)3413 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3414 {
3415 	if (synd_status == AFLT_STAT_INVALID)
3416 		return (-1);
3417 
3418 	/*
3419 	 * Use the syndrome to index the appropriate syndrome table,
3420 	 * to get the code indicating which bit(s) is(are) bad.
3421 	 */
3422 	if (afsr_bit &
3423 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3424 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3425 #if defined(JALAPENO) || defined(SERRANO)
3426 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3427 				return (-1);
3428 			else
3429 				return (BPAR0 + synd);
3430 #else /* JALAPENO || SERRANO */
3431 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3432 				return (-1);
3433 			else
3434 				return (mtag_syndrome_tab[synd]);
3435 #endif /* JALAPENO || SERRANO */
3436 		} else {
3437 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3438 				return (-1);
3439 			else
3440 				return (ecc_syndrome_tab[synd]);
3441 		}
3442 	} else {
3443 		return (-1);
3444 	}
3445 }
3446 
3447 int
cpu_get_mem_sid(char * unum,char * buf,int buflen,int * lenp)3448 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3449 {
3450 	if (&plat_get_mem_sid)
3451 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3452 	else
3453 		return (ENOTSUP);
3454 }
3455 
3456 int
cpu_get_mem_offset(uint64_t flt_addr,uint64_t * offp)3457 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3458 {
3459 	if (&plat_get_mem_offset)
3460 		return (plat_get_mem_offset(flt_addr, offp));
3461 	else
3462 		return (ENOTSUP);
3463 }
3464 
3465 int
cpu_get_mem_addr(char * unum,char * sid,uint64_t offset,uint64_t * addrp)3466 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3467 {
3468 	if (&plat_get_mem_addr)
3469 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3470 	else
3471 		return (ENOTSUP);
3472 }
3473 
3474 /*
3475  * Routine to return a string identifying the physical name
3476  * associated with a memory/cache error.
3477  */
3478 int
cpu_get_mem_unum(int synd_status,ushort_t flt_synd,uint64_t flt_stat,uint64_t flt_addr,int flt_bus_id,int flt_in_memory,ushort_t flt_status,char * buf,int buflen,int * lenp)3479 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3480     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3481     ushort_t flt_status, char *buf, int buflen, int *lenp)
3482 {
3483 	int synd_code;
3484 	int ret;
3485 
3486 	/*
3487 	 * An AFSR of -1 defaults to a memory syndrome.
3488 	 */
3489 	if (flt_stat == (uint64_t)-1)
3490 		flt_stat = C_AFSR_CE;
3491 
3492 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3493 
3494 	/*
3495 	 * Syndrome code must be either a single-bit error code
3496 	 * (0...143) or -1 for unum lookup.
3497 	 */
3498 	if (synd_code < 0 || synd_code >= M2)
3499 		synd_code = -1;
3500 	if (&plat_get_mem_unum) {
3501 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3502 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3503 			buf[0] = '\0';
3504 			*lenp = 0;
3505 		}
3506 
3507 		return (ret);
3508 	}
3509 
3510 	return (ENOTSUP);
3511 }
3512 
3513 /*
3514  * Wrapper for cpu_get_mem_unum() routine that takes an
3515  * async_flt struct rather than explicit arguments.
3516  */
3517 int
cpu_get_mem_unum_aflt(int synd_status,struct async_flt * aflt,char * buf,int buflen,int * lenp)3518 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3519     char *buf, int buflen, int *lenp)
3520 {
3521 	/*
3522 	 * If we come thru here for an IO bus error aflt->flt_stat will
3523 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3524 	 * so it will interpret this as a memory error.
3525 	 */
3526 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3527 	    (aflt->flt_class == BUS_FAULT) ?
3528 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3529 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3530 	    aflt->flt_status, buf, buflen, lenp));
3531 }
3532 
3533 /*
3534  * Return unum string given synd_code and async_flt into
3535  * the buf with size UNUM_NAMLEN
3536  */
3537 static int
cpu_get_mem_unum_synd(int synd_code,struct async_flt * aflt,char * buf)3538 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3539 {
3540 	int ret, len;
3541 
3542 	/*
3543 	 * Syndrome code must be either a single-bit error code
3544 	 * (0...143) or -1 for unum lookup.
3545 	 */
3546 	if (synd_code < 0 || synd_code >= M2)
3547 		synd_code = -1;
3548 	if (&plat_get_mem_unum) {
3549 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3550 		    aflt->flt_bus_id, aflt->flt_in_memory,
3551 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3552 			buf[0] = '\0';
3553 		}
3554 		return (ret);
3555 	}
3556 
3557 	buf[0] = '\0';
3558 	return (ENOTSUP);
3559 }
3560 
3561 /*
3562  * This routine is a more generic interface to cpu_get_mem_unum()
3563  * that may be used by other modules (e.g. the 'mm' driver, through
3564  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3565  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3566  */
3567 int
cpu_get_mem_name(uint64_t synd,uint64_t * afsr,uint64_t afar,char * buf,int buflen,int * lenp)3568 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3569     char *buf, int buflen, int *lenp)
3570 {
3571 	int synd_status, flt_in_memory, ret;
3572 	ushort_t flt_status = 0;
3573 	char unum[UNUM_NAMLEN];
3574 	uint64_t t_afsr_errs;
3575 
3576 	/*
3577 	 * Check for an invalid address.
3578 	 */
3579 	if (afar == (uint64_t)-1)
3580 		return (ENXIO);
3581 
3582 	if (synd == (uint64_t)-1)
3583 		synd_status = AFLT_STAT_INVALID;
3584 	else
3585 		synd_status = AFLT_STAT_VALID;
3586 
3587 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3588 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3589 
3590 	/*
3591 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3592 	 */
3593 	if (*afsr == (uint64_t)-1)
3594 		t_afsr_errs = C_AFSR_CE;
3595 	else {
3596 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3597 #if defined(CHEETAH_PLUS)
3598 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3599 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3600 #endif	/* CHEETAH_PLUS */
3601 	}
3602 
3603 	/*
3604 	 * Turn on ECC_ECACHE if error type is E$ Data.
3605 	 */
3606 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3607 		flt_status |= ECC_ECACHE;
3608 
3609 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3610 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3611 	if (ret != 0)
3612 		return (ret);
3613 
3614 	if (*lenp >= buflen)
3615 		return (ENAMETOOLONG);
3616 
3617 	(void) strncpy(buf, unum, buflen);
3618 
3619 	return (0);
3620 }
3621 
3622 /*
3623  * Routine to return memory information associated
3624  * with a physical address and syndrome.
3625  */
3626 int
cpu_get_mem_info(uint64_t synd,uint64_t afar,uint64_t * mem_sizep,uint64_t * seg_sizep,uint64_t * bank_sizep,int * segsp,int * banksp,int * mcidp)3627 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3628     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3629     int *segsp, int *banksp, int *mcidp)
3630 {
3631 	int synd_status, synd_code;
3632 
3633 	if (afar == (uint64_t)-1)
3634 		return (ENXIO);
3635 
3636 	if (synd == (uint64_t)-1)
3637 		synd_status = AFLT_STAT_INVALID;
3638 	else
3639 		synd_status = AFLT_STAT_VALID;
3640 
3641 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3642 
3643 	if (p2get_mem_info != NULL)
3644 		return ((p2get_mem_info)(synd_code, afar,
3645 		    mem_sizep, seg_sizep, bank_sizep,
3646 		    segsp, banksp, mcidp));
3647 	else
3648 		return (ENOTSUP);
3649 }
3650 
3651 /*
3652  * Routine to return a string identifying the physical
3653  * name associated with a cpuid.
3654  */
3655 int
cpu_get_cpu_unum(int cpuid,char * buf,int buflen,int * lenp)3656 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3657 {
3658 	int ret;
3659 	char unum[UNUM_NAMLEN];
3660 
3661 	if (&plat_get_cpu_unum) {
3662 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3663 		    != 0)
3664 			return (ret);
3665 	} else {
3666 		return (ENOTSUP);
3667 	}
3668 
3669 	if (*lenp >= buflen)
3670 		return (ENAMETOOLONG);
3671 
3672 	(void) strncpy(buf, unum, buflen);
3673 
3674 	return (0);
3675 }
3676 
3677 /*
3678  * This routine exports the name buffer size.
3679  */
3680 size_t
cpu_get_name_bufsize()3681 cpu_get_name_bufsize()
3682 {
3683 	return (UNUM_NAMLEN);
3684 }
3685 
3686 /*
3687  * Historical function, apparantly not used.
3688  */
3689 /* ARGSUSED */
3690 void
cpu_read_paddr(struct async_flt * ecc,short verbose,short ce_err)3691 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3692 {}
3693 
3694 /*
3695  * Historical function only called for SBus errors in debugging.
3696  */
3697 /*ARGSUSED*/
3698 void
read_ecc_data(struct async_flt * aflt,short verbose,short ce_err)3699 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3700 {}
3701 
3702 /*
3703  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3704  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3705  * an async fault structure argument is passed in, the captured error state
3706  * (AFSR, AFAR) info will be returned in the structure.
3707  */
3708 int
clear_errors(ch_async_flt_t * ch_flt)3709 clear_errors(ch_async_flt_t *ch_flt)
3710 {
3711 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3712 	ch_cpu_errors_t	cpu_error_regs;
3713 
3714 	get_cpu_error_state(&cpu_error_regs);
3715 
3716 	if (ch_flt != NULL) {
3717 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3718 		aflt->flt_addr = cpu_error_regs.afar;
3719 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3720 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3721 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3722 #if defined(SERRANO)
3723 		ch_flt->afar2 = cpu_error_regs.afar2;
3724 #endif	/* SERRANO */
3725 	}
3726 
3727 	set_cpu_error_state(&cpu_error_regs);
3728 
3729 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3730 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3731 }
3732 
3733 /*
3734  * Clear any AFSR error bits, and check for persistence.
3735  *
3736  * It would be desirable to also insist that syndrome match.  PCI handling
3737  * has already filled flt_synd.  For errors trapped by CPU we only fill
3738  * flt_synd when we queue the event, so we do not have a valid flt_synd
3739  * during initial classification (it is valid if we're called as part of
3740  * subsequent low-pil additional classification attempts).  We could try
3741  * to determine which syndrome to use: we know we're only called for
3742  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3743  * would be esynd/none and esynd/msynd, respectively.  If that is
3744  * implemented then what do we do in the case that we do experience an
3745  * error on the same afar but with different syndrome?  At the very least
3746  * we should count such occurences.  Anyway, for now, we'll leave it as
3747  * it has been for ages.
3748  */
3749 static int
clear_ecc(struct async_flt * aflt)3750 clear_ecc(struct async_flt *aflt)
3751 {
3752 	ch_cpu_errors_t	cpu_error_regs;
3753 
3754 	/*
3755 	 * Snapshot the AFSR and AFAR and clear any errors
3756 	 */
3757 	get_cpu_error_state(&cpu_error_regs);
3758 	set_cpu_error_state(&cpu_error_regs);
3759 
3760 	/*
3761 	 * If any of the same memory access error bits are still on and
3762 	 * the AFAR matches, return that the error is persistent.
3763 	 */
3764 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3765 	    cpu_error_regs.afar == aflt->flt_addr);
3766 }
3767 
3768 /*
3769  * Turn off all cpu error detection, normally only used for panics.
3770  */
3771 void
cpu_disable_errors(void)3772 cpu_disable_errors(void)
3773 {
3774 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3775 
3776 	/*
3777 	 * With error detection now turned off, check the other cpus
3778 	 * logout areas for any unlogged errors.
3779 	 */
3780 	if (enable_check_other_cpus_logout) {
3781 		cpu_check_other_cpus_logout();
3782 		/*
3783 		 * Make a second pass over the logout areas, in case
3784 		 * there is a failing CPU in an error-trap loop which
3785 		 * will write to the logout area once it is emptied.
3786 		 */
3787 		cpu_check_other_cpus_logout();
3788 	}
3789 }
3790 
3791 /*
3792  * Enable errors.
3793  */
3794 void
cpu_enable_errors(void)3795 cpu_enable_errors(void)
3796 {
3797 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3798 }
3799 
3800 /*
3801  * Flush the entire ecache using displacement flush by reading through a
3802  * physical address range twice as large as the Ecache.
3803  */
3804 void
cpu_flush_ecache(void)3805 cpu_flush_ecache(void)
3806 {
3807 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3808 	    cpunodes[CPU->cpu_id].ecache_linesize);
3809 }
3810 
3811 /*
3812  * Return CPU E$ set size - E$ size divided by the associativity.
3813  * We use this function in places where the CPU_PRIVATE ptr may not be
3814  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3815  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3816  * up before the kernel switches from OBP's to the kernel's trap table, so
3817  * we don't have to worry about cpunodes being unitialized.
3818  */
3819 int
cpu_ecache_set_size(struct cpu * cp)3820 cpu_ecache_set_size(struct cpu *cp)
3821 {
3822 	if (CPU_PRIVATE(cp))
3823 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3824 
3825 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3826 }
3827 
3828 /*
3829  * Flush Ecache line.
3830  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3831  * Uses normal displacement flush for Cheetah.
3832  */
3833 static void
cpu_flush_ecache_line(ch_async_flt_t * ch_flt)3834 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3835 {
3836 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3837 	int ec_set_size = cpu_ecache_set_size(CPU);
3838 
3839 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3840 }
3841 
3842 /*
3843  * Scrub physical address.
3844  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3845  * Ecache or direct-mapped Ecache.
3846  */
3847 static void
cpu_scrubphys(struct async_flt * aflt)3848 cpu_scrubphys(struct async_flt *aflt)
3849 {
3850 	int ec_set_size = cpu_ecache_set_size(CPU);
3851 
3852 	scrubphys(aflt->flt_addr, ec_set_size);
3853 }
3854 
3855 /*
3856  * Clear physical address.
3857  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3858  * Ecache or direct-mapped Ecache.
3859  */
3860 void
cpu_clearphys(struct async_flt * aflt)3861 cpu_clearphys(struct async_flt *aflt)
3862 {
3863 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3864 	int ec_set_size = cpu_ecache_set_size(CPU);
3865 
3866 
3867 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3868 }
3869 
3870 #if defined(CPU_IMP_ECACHE_ASSOC)
3871 /*
3872  * Check for a matching valid line in all the sets.
3873  * If found, return set# + 1. Otherwise return 0.
3874  */
3875 static int
cpu_ecache_line_valid(ch_async_flt_t * ch_flt)3876 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3877 {
3878 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3879 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3880 	int ec_set_size = cpu_ecache_set_size(CPU);
3881 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3882 	int nway = cpu_ecache_nway();
3883 	int i;
3884 
3885 	for (i = 0; i < nway; i++, ecp++) {
3886 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3887 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3888 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3889 			return (i+1);
3890 	}
3891 	return (0);
3892 }
3893 #endif /* CPU_IMP_ECACHE_ASSOC */
3894 
3895 /*
3896  * Check whether a line in the given logout info matches the specified
3897  * fault address.  If reqval is set then the line must not be Invalid.
3898  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3899  * set to 2 for l2$ or 3 for l3$.
3900  */
3901 static int
cpu_matching_ecache_line(uint64_t faddr,void * data,int reqval,int * level)3902 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3903 {
3904 	ch_diag_data_t *cdp = data;
3905 	ch_ec_data_t *ecp;
3906 	int totalsize, ec_set_size;
3907 	int i, ways;
3908 	int match = 0;
3909 	int tagvalid;
3910 	uint64_t addr, tagpa;
3911 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3912 
3913 	/*
3914 	 * Check the l2$ logout data
3915 	 */
3916 	if (ispanther) {
3917 		ecp = &cdp->chd_l2_data[0];
3918 		ec_set_size = PN_L2_SET_SIZE;
3919 		ways = PN_L2_NWAYS;
3920 	} else {
3921 		ecp = &cdp->chd_ec_data[0];
3922 		ec_set_size = cpu_ecache_set_size(CPU);
3923 		ways = cpu_ecache_nway();
3924 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3925 	}
3926 	/* remove low order PA bits from fault address not used in PA tag */
3927 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3928 	for (i = 0; i < ways; i++, ecp++) {
3929 		if (ispanther) {
3930 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3931 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3932 		} else {
3933 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3934 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3935 			    ecp->ec_tag);
3936 		}
3937 		if (tagpa == addr && (!reqval || tagvalid)) {
3938 			match = i + 1;
3939 			*level = 2;
3940 			break;
3941 		}
3942 	}
3943 
3944 	if (match || !ispanther)
3945 		return (match);
3946 
3947 	/* For Panther we also check the l3$ */
3948 	ecp = &cdp->chd_ec_data[0];
3949 	ec_set_size = PN_L3_SET_SIZE;
3950 	ways = PN_L3_NWAYS;
3951 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3952 
3953 	for (i = 0; i < ways; i++, ecp++) {
3954 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3955 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3956 			match = i + 1;
3957 			*level = 3;
3958 			break;
3959 		}
3960 	}
3961 
3962 	return (match);
3963 }
3964 
3965 #if defined(CPU_IMP_L1_CACHE_PARITY)
3966 /*
3967  * Record information related to the source of an Dcache Parity Error.
3968  */
3969 static void
cpu_dcache_parity_info(ch_async_flt_t * ch_flt)3970 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3971 {
3972 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3973 	int index;
3974 
3975 	/*
3976 	 * Since instruction decode cannot be done at high PIL
3977 	 * just examine the entire Dcache to locate the error.
3978 	 */
3979 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3980 		ch_flt->parity_data.dpe.cpl_way = -1;
3981 		ch_flt->parity_data.dpe.cpl_off = -1;
3982 	}
3983 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3984 		cpu_dcache_parity_check(ch_flt, index);
3985 }
3986 
3987 /*
3988  * Check all ways of the Dcache at a specified index for good parity.
3989  */
3990 static void
cpu_dcache_parity_check(ch_async_flt_t * ch_flt,int index)3991 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3992 {
3993 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3994 	uint64_t parity_bits, pbits, data_word;
3995 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3996 	int way, word, data_byte;
3997 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3998 	ch_dc_data_t tmp_dcp;
3999 
4000 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
4001 		/*
4002 		 * Perform diagnostic read.
4003 		 */
4004 		get_dcache_dtag(index + way * dc_set_size,
4005 		    (uint64_t *)&tmp_dcp);
4006 
4007 		/*
4008 		 * Check tag for even parity.
4009 		 * Sum of 1 bits (including parity bit) should be even.
4010 		 */
4011 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
4012 			/*
4013 			 * If this is the first error log detailed information
4014 			 * about it and check the snoop tag. Otherwise just
4015 			 * record the fact that we found another error.
4016 			 */
4017 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4018 				ch_flt->parity_data.dpe.cpl_way = way;
4019 				ch_flt->parity_data.dpe.cpl_cache =
4020 				    CPU_DC_PARITY;
4021 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
4022 
4023 				if (popc64(tmp_dcp.dc_sntag &
4024 				    CHP_DCSNTAG_PARMASK) & 1) {
4025 					ch_flt->parity_data.dpe.cpl_tag |=
4026 					    CHP_DC_SNTAG;
4027 					ch_flt->parity_data.dpe.cpl_lcnt++;
4028 				}
4029 
4030 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
4031 			}
4032 
4033 			ch_flt->parity_data.dpe.cpl_lcnt++;
4034 		}
4035 
4036 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4037 			/*
4038 			 * Panther has more parity bits than the other
4039 			 * processors for covering dcache data and so each
4040 			 * byte of data in each word has its own parity bit.
4041 			 */
4042 			parity_bits = tmp_dcp.dc_pn_data_parity;
4043 			for (word = 0; word < 4; word++) {
4044 				data_word = tmp_dcp.dc_data[word];
4045 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
4046 				for (data_byte = 0; data_byte < 8;
4047 				    data_byte++) {
4048 					if (((popc64(data_word &
4049 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
4050 					    (pbits & 1)) {
4051 						cpu_record_dc_data_parity(
4052 						    ch_flt, dcp, &tmp_dcp, way,
4053 						    word);
4054 					}
4055 					pbits >>= 1;
4056 					data_word >>= 8;
4057 				}
4058 				parity_bits >>= 8;
4059 			}
4060 		} else {
4061 			/*
4062 			 * Check data array for even parity.
4063 			 * The 8 parity bits are grouped into 4 pairs each
4064 			 * of which covers a 64-bit word.  The endianness is
4065 			 * reversed -- the low-order parity bits cover the
4066 			 * high-order data words.
4067 			 */
4068 			parity_bits = tmp_dcp.dc_utag >> 8;
4069 			for (word = 0; word < 4; word++) {
4070 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4071 				if ((popc64(tmp_dcp.dc_data[word]) +
4072 				    parity_bits_popc[pbits]) & 1) {
4073 					cpu_record_dc_data_parity(ch_flt, dcp,
4074 					    &tmp_dcp, way, word);
4075 				}
4076 			}
4077 		}
4078 	}
4079 }
4080 
4081 static void
cpu_record_dc_data_parity(ch_async_flt_t * ch_flt,ch_dc_data_t * dest_dcp,ch_dc_data_t * src_dcp,int way,int word)4082 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4083     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4084 {
4085 	/*
4086 	 * If this is the first error log detailed information about it.
4087 	 * Otherwise just record the fact that we found another error.
4088 	 */
4089 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4090 		ch_flt->parity_data.dpe.cpl_way = way;
4091 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4092 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4093 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4094 	}
4095 	ch_flt->parity_data.dpe.cpl_lcnt++;
4096 }
4097 
4098 /*
4099  * Record information related to the source of an Icache Parity Error.
4100  *
4101  * Called with the Icache disabled so any diagnostic accesses are safe.
4102  */
4103 static void
cpu_icache_parity_info(ch_async_flt_t * ch_flt)4104 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4105 {
4106 	int	ic_set_size;
4107 	int	ic_linesize;
4108 	int	index;
4109 
4110 	if (CPU_PRIVATE(CPU)) {
4111 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4112 		    CH_ICACHE_NWAY;
4113 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4114 	} else {
4115 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4116 		ic_linesize = icache_linesize;
4117 	}
4118 
4119 	ch_flt->parity_data.ipe.cpl_way = -1;
4120 	ch_flt->parity_data.ipe.cpl_off = -1;
4121 
4122 	for (index = 0; index < ic_set_size; index += ic_linesize)
4123 		cpu_icache_parity_check(ch_flt, index);
4124 }
4125 
4126 /*
4127  * Check all ways of the Icache at a specified index for good parity.
4128  */
4129 static void
cpu_icache_parity_check(ch_async_flt_t * ch_flt,int index)4130 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4131 {
4132 	uint64_t parmask, pn_inst_parity;
4133 	int ic_set_size;
4134 	int ic_linesize;
4135 	int flt_index, way, instr, num_instr;
4136 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4137 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4138 	ch_ic_data_t tmp_icp;
4139 
4140 	if (CPU_PRIVATE(CPU)) {
4141 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4142 		    CH_ICACHE_NWAY;
4143 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4144 	} else {
4145 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4146 		ic_linesize = icache_linesize;
4147 	}
4148 
4149 	/*
4150 	 * Panther has twice as many instructions per icache line and the
4151 	 * instruction parity bit is in a different location.
4152 	 */
4153 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4154 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4155 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4156 	} else {
4157 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4158 		pn_inst_parity = 0;
4159 	}
4160 
4161 	/*
4162 	 * Index at which we expect to find the parity error.
4163 	 */
4164 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4165 
4166 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4167 		/*
4168 		 * Diagnostic reads expect address argument in ASI format.
4169 		 */
4170 		get_icache_dtag(2 * (index + way * ic_set_size),
4171 		    (uint64_t *)&tmp_icp);
4172 
4173 		/*
4174 		 * If this is the index in which we expect to find the
4175 		 * error log detailed information about each of the ways.
4176 		 * This information will be displayed later if we can't
4177 		 * determine the exact way in which the error is located.
4178 		 */
4179 		if (flt_index == index)
4180 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4181 
4182 		/*
4183 		 * Check tag for even parity.
4184 		 * Sum of 1 bits (including parity bit) should be even.
4185 		 */
4186 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4187 			/*
4188 			 * If this way is the one in which we expected
4189 			 * to find the error record the way and check the
4190 			 * snoop tag. Otherwise just record the fact we
4191 			 * found another error.
4192 			 */
4193 			if (flt_index == index) {
4194 				ch_flt->parity_data.ipe.cpl_way = way;
4195 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4196 
4197 				if (popc64(tmp_icp.ic_sntag &
4198 				    CHP_ICSNTAG_PARMASK) & 1) {
4199 					ch_flt->parity_data.ipe.cpl_tag |=
4200 					    CHP_IC_SNTAG;
4201 					ch_flt->parity_data.ipe.cpl_lcnt++;
4202 				}
4203 
4204 			}
4205 			ch_flt->parity_data.ipe.cpl_lcnt++;
4206 			continue;
4207 		}
4208 
4209 		/*
4210 		 * Check instruction data for even parity.
4211 		 * Bits participating in parity differ for PC-relative
4212 		 * versus non-PC-relative instructions.
4213 		 */
4214 		for (instr = 0; instr < num_instr; instr++) {
4215 			parmask = (tmp_icp.ic_data[instr] &
4216 			    CH_ICDATA_PRED_ISPCREL) ?
4217 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4218 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4219 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4220 				/*
4221 				 * If this way is the one in which we expected
4222 				 * to find the error record the way and offset.
4223 				 * Otherwise just log the fact we found another
4224 				 * error.
4225 				 */
4226 				if (flt_index == index) {
4227 					ch_flt->parity_data.ipe.cpl_way = way;
4228 					ch_flt->parity_data.ipe.cpl_off =
4229 					    instr * 4;
4230 				}
4231 				ch_flt->parity_data.ipe.cpl_lcnt++;
4232 				continue;
4233 			}
4234 		}
4235 	}
4236 }
4237 
4238 /*
4239  * Record information related to the source of an Pcache Parity Error.
4240  */
4241 static void
cpu_pcache_parity_info(ch_async_flt_t * ch_flt)4242 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4243 {
4244 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4245 	int index;
4246 
4247 	/*
4248 	 * Since instruction decode cannot be done at high PIL just
4249 	 * examine the entire Pcache to check for any parity errors.
4250 	 */
4251 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4252 		ch_flt->parity_data.dpe.cpl_way = -1;
4253 		ch_flt->parity_data.dpe.cpl_off = -1;
4254 	}
4255 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4256 		cpu_pcache_parity_check(ch_flt, index);
4257 }
4258 
4259 /*
4260  * Check all ways of the Pcache at a specified index for good parity.
4261  */
4262 static void
cpu_pcache_parity_check(ch_async_flt_t * ch_flt,int index)4263 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4264 {
4265 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4266 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4267 	int way, word, pbit, parity_bits;
4268 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4269 	ch_pc_data_t tmp_pcp;
4270 
4271 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4272 		/*
4273 		 * Perform diagnostic read.
4274 		 */
4275 		get_pcache_dtag(index + way * pc_set_size,
4276 		    (uint64_t *)&tmp_pcp);
4277 		/*
4278 		 * Check data array for odd parity. There are 8 parity
4279 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4280 		 * of those bits covers exactly 8 bytes of the data
4281 		 * array:
4282 		 *
4283 		 *	parity bit	P$ data bytes covered
4284 		 *	----------	---------------------
4285 		 *	50		63:56
4286 		 *	51		55:48
4287 		 *	52		47:40
4288 		 *	53		39:32
4289 		 *	54		31:24
4290 		 *	55		23:16
4291 		 *	56		15:8
4292 		 *	57		7:0
4293 		 */
4294 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4295 		for (word = 0; word < pc_data_words; word++) {
4296 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4297 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4298 				/*
4299 				 * If this is the first error log detailed
4300 				 * information about it. Otherwise just record
4301 				 * the fact that we found another error.
4302 				 */
4303 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4304 					ch_flt->parity_data.dpe.cpl_way = way;
4305 					ch_flt->parity_data.dpe.cpl_cache =
4306 					    CPU_PC_PARITY;
4307 					ch_flt->parity_data.dpe.cpl_off =
4308 					    word * sizeof (uint64_t);
4309 					bcopy(&tmp_pcp, pcp,
4310 					    sizeof (ch_pc_data_t));
4311 				}
4312 				ch_flt->parity_data.dpe.cpl_lcnt++;
4313 			}
4314 		}
4315 	}
4316 }
4317 
4318 
4319 /*
4320  * Add L1 Data cache data to the ereport payload.
4321  */
4322 static void
cpu_payload_add_dcache(struct async_flt * aflt,nvlist_t * nvl)4323 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4324 {
4325 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4326 	ch_dc_data_t *dcp;
4327 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4328 	uint_t nelem;
4329 	int i, ways_to_check, ways_logged = 0;
4330 
4331 	/*
4332 	 * If this is an D$ fault then there may be multiple
4333 	 * ways captured in the ch_parity_log_t structure.
4334 	 * Otherwise, there will be at most one way captured
4335 	 * in the ch_diag_data_t struct.
4336 	 * Check each way to see if it should be encoded.
4337 	 */
4338 	if (ch_flt->flt_type == CPU_DC_PARITY)
4339 		ways_to_check = CH_DCACHE_NWAY;
4340 	else
4341 		ways_to_check = 1;
4342 	for (i = 0; i < ways_to_check; i++) {
4343 		if (ch_flt->flt_type == CPU_DC_PARITY)
4344 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4345 		else
4346 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4347 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4348 			bcopy(dcp, &dcdata[ways_logged],
4349 			    sizeof (ch_dc_data_t));
4350 			ways_logged++;
4351 		}
4352 	}
4353 
4354 	/*
4355 	 * Add the dcache data to the payload.
4356 	 */
4357 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4358 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4359 	if (ways_logged != 0) {
4360 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4361 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4362 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4363 	}
4364 }
4365 
4366 /*
4367  * Add L1 Instruction cache data to the ereport payload.
4368  */
4369 static void
cpu_payload_add_icache(struct async_flt * aflt,nvlist_t * nvl)4370 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4371 {
4372 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4373 	ch_ic_data_t *icp;
4374 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4375 	uint_t nelem;
4376 	int i, ways_to_check, ways_logged = 0;
4377 
4378 	/*
4379 	 * If this is an I$ fault then there may be multiple
4380 	 * ways captured in the ch_parity_log_t structure.
4381 	 * Otherwise, there will be at most one way captured
4382 	 * in the ch_diag_data_t struct.
4383 	 * Check each way to see if it should be encoded.
4384 	 */
4385 	if (ch_flt->flt_type == CPU_IC_PARITY)
4386 		ways_to_check = CH_ICACHE_NWAY;
4387 	else
4388 		ways_to_check = 1;
4389 	for (i = 0; i < ways_to_check; i++) {
4390 		if (ch_flt->flt_type == CPU_IC_PARITY)
4391 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4392 		else
4393 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4394 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4395 			bcopy(icp, &icdata[ways_logged],
4396 			    sizeof (ch_ic_data_t));
4397 			ways_logged++;
4398 		}
4399 	}
4400 
4401 	/*
4402 	 * Add the icache data to the payload.
4403 	 */
4404 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4405 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4406 	if (ways_logged != 0) {
4407 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4408 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4409 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4410 	}
4411 }
4412 
4413 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4414 
4415 /*
4416  * Add ecache data to payload.
4417  */
4418 static void
cpu_payload_add_ecache(struct async_flt * aflt,nvlist_t * nvl)4419 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4420 {
4421 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4422 	ch_ec_data_t *ecp;
4423 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4424 	uint_t nelem;
4425 	int i, ways_logged = 0;
4426 
4427 	/*
4428 	 * Check each way to see if it should be encoded
4429 	 * and concatinate it into a temporary buffer.
4430 	 */
4431 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4432 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4433 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4434 			bcopy(ecp, &ecdata[ways_logged],
4435 			    sizeof (ch_ec_data_t));
4436 			ways_logged++;
4437 		}
4438 	}
4439 
4440 	/*
4441 	 * Panther CPUs have an additional level of cache and so
4442 	 * what we just collected was the L3 (ecache) and not the
4443 	 * L2 cache.
4444 	 */
4445 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4446 		/*
4447 		 * Add the L3 (ecache) data to the payload.
4448 		 */
4449 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4450 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4451 		if (ways_logged != 0) {
4452 			nelem = sizeof (ch_ec_data_t) /
4453 			    sizeof (uint64_t) * ways_logged;
4454 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4455 			    DATA_TYPE_UINT64_ARRAY, nelem,
4456 			    (uint64_t *)ecdata, NULL);
4457 		}
4458 
4459 		/*
4460 		 * Now collect the L2 cache.
4461 		 */
4462 		ways_logged = 0;
4463 		for (i = 0; i < PN_L2_NWAYS; i++) {
4464 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4465 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4466 				bcopy(ecp, &ecdata[ways_logged],
4467 				    sizeof (ch_ec_data_t));
4468 				ways_logged++;
4469 			}
4470 		}
4471 	}
4472 
4473 	/*
4474 	 * Add the L2 cache data to the payload.
4475 	 */
4476 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4477 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4478 	if (ways_logged != 0) {
4479 		nelem = sizeof (ch_ec_data_t) /
4480 		    sizeof (uint64_t) * ways_logged;
4481 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4482 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4483 	}
4484 }
4485 
4486 /*
4487  * Initialize cpu scheme for specified cpu.
4488  */
4489 static void
cpu_fmri_cpu_set(nvlist_t * cpu_fmri,int cpuid)4490 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4491 {
4492 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4493 	uint8_t mask;
4494 
4495 	mask = cpunodes[cpuid].version;
4496 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4497 	    (u_longlong_t)cpunodes[cpuid].device_id);
4498 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4499 	    cpuid, &mask, (const char *)sbuf);
4500 }
4501 
4502 /*
4503  * Returns ereport resource type.
4504  */
4505 static int
cpu_error_to_resource_type(struct async_flt * aflt)4506 cpu_error_to_resource_type(struct async_flt *aflt)
4507 {
4508 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4509 
4510 	switch (ch_flt->flt_type) {
4511 
4512 	case CPU_CE_ECACHE:
4513 	case CPU_UE_ECACHE:
4514 	case CPU_UE_ECACHE_RETIRE:
4515 	case CPU_ORPH:
4516 		/*
4517 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4518 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4519 		 * E$ Data type, otherwise, return CPU type.
4520 		 */
4521 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4522 		    ch_flt->flt_bit))
4523 			return (ERRTYPE_ECACHE_DATA);
4524 		return (ERRTYPE_CPU);
4525 
4526 	case CPU_CE:
4527 	case CPU_UE:
4528 	case CPU_EMC:
4529 	case CPU_DUE:
4530 	case CPU_RCE:
4531 	case CPU_RUE:
4532 	case CPU_FRC:
4533 	case CPU_FRU:
4534 		return (ERRTYPE_MEMORY);
4535 
4536 	case CPU_IC_PARITY:
4537 	case CPU_DC_PARITY:
4538 	case CPU_FPUERR:
4539 	case CPU_PC_PARITY:
4540 	case CPU_ITLB_PARITY:
4541 	case CPU_DTLB_PARITY:
4542 		return (ERRTYPE_CPU);
4543 	}
4544 	return (ERRTYPE_UNKNOWN);
4545 }
4546 
4547 /*
4548  * Encode the data saved in the ch_async_flt_t struct into
4549  * the FM ereport payload.
4550  */
4551 static void
cpu_payload_add_aflt(struct async_flt * aflt,nvlist_t * payload,nvlist_t * resource,int * afar_status,int * synd_status)4552 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4553     nvlist_t *resource, int *afar_status, int *synd_status)
4554 {
4555 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4556 	*synd_status = AFLT_STAT_INVALID;
4557 	*afar_status = AFLT_STAT_INVALID;
4558 
4559 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4560 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4561 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4562 	}
4563 
4564 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4565 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4566 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4567 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4568 	}
4569 
4570 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4571 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4572 		    ch_flt->flt_bit);
4573 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4574 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4575 	}
4576 
4577 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4578 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4579 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4580 	}
4581 
4582 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4583 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4584 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4585 	}
4586 
4587 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4588 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4589 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4590 	}
4591 
4592 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4593 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4594 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4595 	}
4596 
4597 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4598 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4599 		    DATA_TYPE_BOOLEAN_VALUE,
4600 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4601 	}
4602 
4603 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4604 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4605 		    DATA_TYPE_BOOLEAN_VALUE,
4606 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4607 	}
4608 
4609 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4610 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4611 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4612 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4613 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4614 	}
4615 
4616 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4617 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4618 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4619 	}
4620 
4621 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4622 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4623 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4624 	}
4625 
4626 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4627 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4628 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4629 	}
4630 
4631 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4632 		cpu_payload_add_ecache(aflt, payload);
4633 
4634 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4635 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4636 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4637 	}
4638 
4639 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4640 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4641 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4642 	}
4643 
4644 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4645 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4646 		    DATA_TYPE_UINT32_ARRAY, 16,
4647 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4648 	}
4649 
4650 #if defined(CPU_IMP_L1_CACHE_PARITY)
4651 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4652 		cpu_payload_add_dcache(aflt, payload);
4653 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4654 		cpu_payload_add_icache(aflt, payload);
4655 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4656 
4657 #if defined(CHEETAH_PLUS)
4658 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4659 		cpu_payload_add_pcache(aflt, payload);
4660 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4661 		cpu_payload_add_tlb(aflt, payload);
4662 #endif	/* CHEETAH_PLUS */
4663 	/*
4664 	 * Create the FMRI that goes into the payload
4665 	 * and contains the unum info if necessary.
4666 	 */
4667 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4668 		char unum[UNUM_NAMLEN] = "";
4669 		char sid[DIMM_SERIAL_ID_LEN] = "";
4670 		int len, ret, rtype, synd_code;
4671 		uint64_t offset = (uint64_t)-1;
4672 
4673 		rtype = cpu_error_to_resource_type(aflt);
4674 		switch (rtype) {
4675 
4676 		case ERRTYPE_MEMORY:
4677 		case ERRTYPE_ECACHE_DATA:
4678 
4679 			/*
4680 			 * Memory errors, do unum lookup
4681 			 */
4682 			if (*afar_status == AFLT_STAT_INVALID)
4683 				break;
4684 
4685 			if (rtype == ERRTYPE_ECACHE_DATA)
4686 				aflt->flt_status |= ECC_ECACHE;
4687 			else
4688 				aflt->flt_status &= ~ECC_ECACHE;
4689 
4690 			synd_code = synd_to_synd_code(*synd_status,
4691 			    aflt->flt_synd, ch_flt->flt_bit);
4692 
4693 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4694 				break;
4695 
4696 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4697 			    &len);
4698 
4699 			if (ret == 0) {
4700 				(void) cpu_get_mem_offset(aflt->flt_addr,
4701 				    &offset);
4702 			}
4703 
4704 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4705 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4706 			fm_payload_set(payload,
4707 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4708 			    DATA_TYPE_NVLIST, resource, NULL);
4709 			break;
4710 
4711 		case ERRTYPE_CPU:
4712 			/*
4713 			 * On-board processor array error, add cpu resource.
4714 			 */
4715 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4716 			fm_payload_set(payload,
4717 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4718 			    DATA_TYPE_NVLIST, resource, NULL);
4719 			break;
4720 		}
4721 	}
4722 }
4723 
4724 /*
4725  * Initialize the way info if necessary.
4726  */
4727 void
cpu_ereport_init(struct async_flt * aflt)4728 cpu_ereport_init(struct async_flt *aflt)
4729 {
4730 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4731 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4732 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4733 	int i;
4734 
4735 	/*
4736 	 * Initialize the info in the CPU logout structure.
4737 	 * The I$/D$ way information is not initialized here
4738 	 * since it is captured in the logout assembly code.
4739 	 */
4740 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4741 		(ecp + i)->ec_way = i;
4742 
4743 	for (i = 0; i < PN_L2_NWAYS; i++)
4744 		(l2p + i)->ec_way = i;
4745 }
4746 
4747 /*
4748  * Returns whether fault address is valid for this error bit and
4749  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4750  */
4751 int
cpu_flt_in_memory(ch_async_flt_t * ch_flt,uint64_t t_afsr_bit)4752 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4753 {
4754 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4755 
4756 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4757 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4758 	    AFLT_STAT_VALID &&
4759 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4760 }
4761 
4762 /*
4763  * Returns whether fault address is valid based on the error bit for the
4764  * one event being queued and whether the address is "in memory".
4765  */
4766 static int
cpu_flt_in_memory_one_event(ch_async_flt_t * ch_flt,uint64_t t_afsr_bit)4767 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4768 {
4769 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4770 	int afar_status;
4771 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4772 
4773 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4774 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4775 		return (0);
4776 
4777 	afsr_errs = ch_flt->afsr_errs;
4778 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4779 
4780 	switch (afar_status) {
4781 	case AFLT_STAT_VALID:
4782 		return (1);
4783 
4784 	case AFLT_STAT_AMBIGUOUS:
4785 		/*
4786 		 * Status is ambiguous since another error bit (or bits)
4787 		 * of equal priority to the specified bit on in the afsr,
4788 		 * so check those bits. Return 1 only if the bits on in the
4789 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4790 		 * Otherwise not all the equal priority bits are for memory
4791 		 * errors, so return 0.
4792 		 */
4793 		ow_bits = afar_overwrite;
4794 		while ((afsr_ow = *ow_bits++) != 0) {
4795 			/*
4796 			 * Get other bits that are on in t_afsr_bit's priority
4797 			 * class to check for Memory Error bits only.
4798 			 */
4799 			if (afsr_ow & t_afsr_bit) {
4800 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4801 					return (0);
4802 				else
4803 					return (1);
4804 			}
4805 		}
4806 		/*FALLTHRU*/
4807 
4808 	default:
4809 		return (0);
4810 	}
4811 }
4812 
4813 static void
cpu_log_diag_info(ch_async_flt_t * ch_flt)4814 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4815 {
4816 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4817 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4818 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4819 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4820 #if defined(CPU_IMP_ECACHE_ASSOC)
4821 	int i, nway;
4822 #endif /* CPU_IMP_ECACHE_ASSOC */
4823 
4824 	/*
4825 	 * Check if the CPU log out captured was valid.
4826 	 */
4827 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4828 	    ch_flt->flt_data_incomplete)
4829 		return;
4830 
4831 #if defined(CPU_IMP_ECACHE_ASSOC)
4832 	nway = cpu_ecache_nway();
4833 	i =  cpu_ecache_line_valid(ch_flt);
4834 	if (i == 0 || i > nway) {
4835 		for (i = 0; i < nway; i++)
4836 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4837 	} else
4838 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4839 #else /* CPU_IMP_ECACHE_ASSOC */
4840 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4841 #endif /* CPU_IMP_ECACHE_ASSOC */
4842 
4843 #if defined(CHEETAH_PLUS)
4844 	pn_cpu_log_diag_l2_info(ch_flt);
4845 #endif /* CHEETAH_PLUS */
4846 
4847 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4848 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4849 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4850 	}
4851 
4852 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4853 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4854 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4855 		else
4856 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4857 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4858 	}
4859 }
4860 
4861 /*
4862  * Cheetah ECC calculation.
4863  *
4864  * We only need to do the calculation on the data bits and can ignore check
4865  * bit and Mtag bit terms in the calculation.
4866  */
4867 static uint64_t ch_ecc_table[9][2] = {
4868 	/*
4869 	 * low order 64-bits   high-order 64-bits
4870 	 */
4871 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4872 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4873 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4874 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4875 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4876 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4877 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4878 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4879 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4880 };
4881 
4882 /*
4883  * 64-bit population count, use well-known popcnt trick.
4884  * We could use the UltraSPARC V9 POPC instruction, but some
4885  * CPUs including Cheetahplus and Jaguar do not support that
4886  * instruction.
4887  */
4888 int
popc64(uint64_t val)4889 popc64(uint64_t val)
4890 {
4891 	int cnt;
4892 
4893 	for (cnt = 0; val != 0; val &= val - 1)
4894 		cnt++;
4895 	return (cnt);
4896 }
4897 
4898 /*
4899  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4900  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4901  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4902  * instead of doing all the xor's.
4903  */
4904 uint32_t
us3_gen_ecc(uint64_t data_low,uint64_t data_high)4905 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4906 {
4907 	int bitno, s;
4908 	int synd = 0;
4909 
4910 	for (bitno = 0; bitno < 9; bitno++) {
4911 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4912 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4913 		synd |= (s << bitno);
4914 	}
4915 	return (synd);
4916 
4917 }
4918 
4919 /*
4920  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4921  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4922  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4923  */
4924 static void
cpu_queue_one_event(ch_async_flt_t * ch_flt,char * reason,ecc_type_to_info_t * eccp,ch_diag_data_t * cdp)4925 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4926     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4927 {
4928 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4929 
4930 	if (reason &&
4931 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4932 		(void) strcat(reason, eccp->ec_reason);
4933 	}
4934 
4935 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4936 	ch_flt->flt_type = eccp->ec_flt_type;
4937 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4938 		ch_flt->flt_diag_data = *cdp;
4939 	else
4940 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4941 	aflt->flt_in_memory =
4942 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4943 
4944 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4945 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4946 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4947 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4948 	else
4949 		aflt->flt_synd = 0;
4950 
4951 	aflt->flt_payload = eccp->ec_err_payload;
4952 
4953 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4954 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4955 		cpu_errorq_dispatch(eccp->ec_err_class,
4956 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4957 		    aflt->flt_panic);
4958 	else
4959 		cpu_errorq_dispatch(eccp->ec_err_class,
4960 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4961 		    aflt->flt_panic);
4962 }
4963 
4964 /*
4965  * Queue events on async event queue one event per error bit.  First we
4966  * queue the events that we "expect" for the given trap, then we queue events
4967  * that we may not expect.  Return number of events queued.
4968  */
4969 int
cpu_queue_events(ch_async_flt_t * ch_flt,char * reason,uint64_t t_afsr_errs,ch_cpu_logout_t * clop)4970 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4971     ch_cpu_logout_t *clop)
4972 {
4973 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4974 	ecc_type_to_info_t *eccp;
4975 	int nevents = 0;
4976 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4977 #if defined(CHEETAH_PLUS)
4978 	uint64_t orig_t_afsr_errs;
4979 #endif
4980 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4981 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4982 	ch_diag_data_t *cdp = NULL;
4983 
4984 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4985 
4986 #if defined(CHEETAH_PLUS)
4987 	orig_t_afsr_errs = t_afsr_errs;
4988 
4989 	/*
4990 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4991 	 */
4992 	if (clop != NULL) {
4993 		/*
4994 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4995 		 * flt_addr and flt_stat fields will be reset to the primaries
4996 		 * below, but the sdw_addr and sdw_stat will stay as the
4997 		 * secondaries.
4998 		 */
4999 		cdp = &clop->clo_sdw_data;
5000 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
5001 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
5002 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
5003 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5004 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
5005 
5006 		/*
5007 		 * If the primary and shadow AFSR differ, tag the shadow as
5008 		 * the first fault.
5009 		 */
5010 		if ((primary_afar != cdp->chd_afar) ||
5011 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
5012 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
5013 		}
5014 
5015 		/*
5016 		 * Check AFSR bits as well as AFSR_EXT bits in order of
5017 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
5018 		 * is expected to be zero for those CPUs which do not have
5019 		 * an AFSR_EXT register.
5020 		 */
5021 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
5022 			if ((eccp->ec_afsr_bit &
5023 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
5024 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
5025 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5026 				cdp = NULL;
5027 				t_afsr_errs &= ~eccp->ec_afsr_bit;
5028 				nevents++;
5029 			}
5030 		}
5031 
5032 		/*
5033 		 * If the ME bit is on in the primary AFSR turn all the
5034 		 * error bits on again that may set the ME bit to make
5035 		 * sure we see the ME AFSR error logs.
5036 		 */
5037 		if ((primary_afsr & C_AFSR_ME) != 0)
5038 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
5039 	}
5040 #endif	/* CHEETAH_PLUS */
5041 
5042 	if (clop != NULL)
5043 		cdp = &clop->clo_data;
5044 
5045 	/*
5046 	 * Queue expected errors, error bit and fault type must match
5047 	 * in the ecc_type_to_info table.
5048 	 */
5049 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5050 	    eccp++) {
5051 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
5052 		    (eccp->ec_flags & aflt->flt_status) != 0) {
5053 #if defined(SERRANO)
5054 			/*
5055 			 * For FRC/FRU errors on Serrano the afar2 captures
5056 			 * the address and the associated data is
5057 			 * in the shadow logout area.
5058 			 */
5059 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5060 				if (clop != NULL)
5061 					cdp = &clop->clo_sdw_data;
5062 				aflt->flt_addr = ch_flt->afar2;
5063 			} else {
5064 				if (clop != NULL)
5065 					cdp = &clop->clo_data;
5066 				aflt->flt_addr = primary_afar;
5067 			}
5068 #else	/* SERRANO */
5069 			aflt->flt_addr = primary_afar;
5070 #endif	/* SERRANO */
5071 			aflt->flt_stat = primary_afsr;
5072 			ch_flt->afsr_ext = primary_afsr_ext;
5073 			ch_flt->afsr_errs = primary_afsr_errs;
5074 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5075 			cdp = NULL;
5076 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5077 			nevents++;
5078 		}
5079 	}
5080 
5081 	/*
5082 	 * Queue unexpected errors, error bit only match.
5083 	 */
5084 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5085 	    eccp++) {
5086 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5087 #if defined(SERRANO)
5088 			/*
5089 			 * For FRC/FRU errors on Serrano the afar2 captures
5090 			 * the address and the associated data is
5091 			 * in the shadow logout area.
5092 			 */
5093 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5094 				if (clop != NULL)
5095 					cdp = &clop->clo_sdw_data;
5096 				aflt->flt_addr = ch_flt->afar2;
5097 			} else {
5098 				if (clop != NULL)
5099 					cdp = &clop->clo_data;
5100 				aflt->flt_addr = primary_afar;
5101 			}
5102 #else	/* SERRANO */
5103 			aflt->flt_addr = primary_afar;
5104 #endif	/* SERRANO */
5105 			aflt->flt_stat = primary_afsr;
5106 			ch_flt->afsr_ext = primary_afsr_ext;
5107 			ch_flt->afsr_errs = primary_afsr_errs;
5108 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5109 			cdp = NULL;
5110 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5111 			nevents++;
5112 		}
5113 	}
5114 	return (nevents);
5115 }
5116 
5117 /*
5118  * Return trap type number.
5119  */
5120 uint8_t
flt_to_trap_type(struct async_flt * aflt)5121 flt_to_trap_type(struct async_flt *aflt)
5122 {
5123 	if (aflt->flt_status & ECC_I_TRAP)
5124 		return (TRAP_TYPE_ECC_I);
5125 	if (aflt->flt_status & ECC_D_TRAP)
5126 		return (TRAP_TYPE_ECC_D);
5127 	if (aflt->flt_status & ECC_F_TRAP)
5128 		return (TRAP_TYPE_ECC_F);
5129 	if (aflt->flt_status & ECC_C_TRAP)
5130 		return (TRAP_TYPE_ECC_C);
5131 	if (aflt->flt_status & ECC_DP_TRAP)
5132 		return (TRAP_TYPE_ECC_DP);
5133 	if (aflt->flt_status & ECC_IP_TRAP)
5134 		return (TRAP_TYPE_ECC_IP);
5135 	if (aflt->flt_status & ECC_ITLB_TRAP)
5136 		return (TRAP_TYPE_ECC_ITLB);
5137 	if (aflt->flt_status & ECC_DTLB_TRAP)
5138 		return (TRAP_TYPE_ECC_DTLB);
5139 	return (TRAP_TYPE_UNKNOWN);
5140 }
5141 
5142 /*
5143  * Decide an error type based on detector and leaky/partner tests.
5144  * The following array is used for quick translation - it must
5145  * stay in sync with ce_dispact_t.
5146  */
5147 
5148 static char *cetypes[] = {
5149 	CE_DISP_DESC_U,
5150 	CE_DISP_DESC_I,
5151 	CE_DISP_DESC_PP,
5152 	CE_DISP_DESC_P,
5153 	CE_DISP_DESC_L,
5154 	CE_DISP_DESC_PS,
5155 	CE_DISP_DESC_S
5156 };
5157 
5158 char *
flt_to_error_type(struct async_flt * aflt)5159 flt_to_error_type(struct async_flt *aflt)
5160 {
5161 	ce_dispact_t dispact, disp;
5162 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5163 
5164 	/*
5165 	 * The memory payload bundle is shared by some events that do
5166 	 * not perform any classification.  For those flt_disp will be
5167 	 * 0 and we will return "unknown".
5168 	 */
5169 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5170 		return (cetypes[CE_DISP_UNKNOWN]);
5171 
5172 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5173 
5174 	/*
5175 	 * It is also possible that no scrub/classification was performed
5176 	 * by the detector, for instance where a disrupting error logged
5177 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5178 	 */
5179 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5180 		return (cetypes[CE_DISP_UNKNOWN]);
5181 
5182 	/*
5183 	 * Lookup type in initial classification/action table
5184 	 */
5185 	dispact = CE_DISPACT(ce_disp_table,
5186 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5187 	    CE_XDIAG_STATE(dtcrinfo),
5188 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5189 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5190 
5191 	/*
5192 	 * A bad lookup is not something to panic production systems for.
5193 	 */
5194 	ASSERT(dispact != CE_DISP_BAD);
5195 	if (dispact == CE_DISP_BAD)
5196 		return (cetypes[CE_DISP_UNKNOWN]);
5197 
5198 	disp = CE_DISP(dispact);
5199 
5200 	switch (disp) {
5201 	case CE_DISP_UNKNOWN:
5202 	case CE_DISP_INTERMITTENT:
5203 		break;
5204 
5205 	case CE_DISP_POSS_PERS:
5206 		/*
5207 		 * "Possible persistent" errors to which we have applied a valid
5208 		 * leaky test can be separated into "persistent" or "leaky".
5209 		 */
5210 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5211 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5212 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5213 			    CE_XDIAG_CE2SEEN(lkyinfo))
5214 				disp = CE_DISP_LEAKY;
5215 			else
5216 				disp = CE_DISP_PERS;
5217 		}
5218 		break;
5219 
5220 	case CE_DISP_POSS_STICKY:
5221 		/*
5222 		 * Promote "possible sticky" results that have been
5223 		 * confirmed by a partner test to "sticky".  Unconfirmed
5224 		 * "possible sticky" events are left at that status - we do not
5225 		 * guess at any bad reader/writer etc status here.
5226 		 */
5227 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5228 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5229 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5230 			disp = CE_DISP_STICKY;
5231 
5232 		/*
5233 		 * Promote "possible sticky" results on a uniprocessor
5234 		 * to "sticky"
5235 		 */
5236 		if (disp == CE_DISP_POSS_STICKY &&
5237 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5238 			disp = CE_DISP_STICKY;
5239 		break;
5240 
5241 	default:
5242 		disp = CE_DISP_UNKNOWN;
5243 		break;
5244 	}
5245 
5246 	return (cetypes[disp]);
5247 }
5248 
5249 /*
5250  * Given the entire afsr, the specific bit to check and a prioritized list of
5251  * error bits, determine the validity of the various overwrite priority
5252  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5253  * different overwrite priorities.
5254  *
5255  * Given a specific afsr error bit and the entire afsr, there are three cases:
5256  *   INVALID:	The specified bit is lower overwrite priority than some other
5257  *		error bit which is on in the afsr (or IVU/IVC).
5258  *   VALID:	The specified bit is higher priority than all other error bits
5259  *		which are on in the afsr.
5260  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5261  *		bit is on in the afsr.
5262  */
5263 int
afsr_to_overw_status(uint64_t afsr,uint64_t afsr_bit,uint64_t * ow_bits)5264 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5265 {
5266 	uint64_t afsr_ow;
5267 
5268 	while ((afsr_ow = *ow_bits++) != 0) {
5269 		/*
5270 		 * If bit is in the priority class, check to see if another
5271 		 * bit in the same class is on => ambiguous.  Otherwise,
5272 		 * the value is valid.  If the bit is not on at this priority
5273 		 * class, but a higher priority bit is on, then the value is
5274 		 * invalid.
5275 		 */
5276 		if (afsr_ow & afsr_bit) {
5277 			/*
5278 			 * If equal pri bit is on, ambiguous.
5279 			 */
5280 			if (afsr & (afsr_ow & ~afsr_bit))
5281 				return (AFLT_STAT_AMBIGUOUS);
5282 			return (AFLT_STAT_VALID);
5283 		} else if (afsr & afsr_ow)
5284 			break;
5285 	}
5286 
5287 	/*
5288 	 * We didn't find a match or a higher priority bit was on.  Not
5289 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5290 	 */
5291 	return (AFLT_STAT_INVALID);
5292 }
5293 
5294 static int
afsr_to_afar_status(uint64_t afsr,uint64_t afsr_bit)5295 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5296 {
5297 #if defined(SERRANO)
5298 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5299 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5300 	else
5301 #endif	/* SERRANO */
5302 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5303 }
5304 
5305 static int
afsr_to_esynd_status(uint64_t afsr,uint64_t afsr_bit)5306 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5307 {
5308 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5309 }
5310 
5311 static int
afsr_to_msynd_status(uint64_t afsr,uint64_t afsr_bit)5312 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5313 {
5314 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5315 }
5316 
5317 static int
afsr_to_synd_status(uint_t cpuid,uint64_t afsr,uint64_t afsr_bit)5318 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5319 {
5320 #ifdef lint
5321 	cpuid = cpuid;
5322 #endif
5323 #if defined(CHEETAH_PLUS)
5324 	/*
5325 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5326 	 * policy for Cheetah+ and separate for Panther CPUs.
5327 	 */
5328 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5329 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5330 			return (afsr_to_msynd_status(afsr, afsr_bit));
5331 		else
5332 			return (afsr_to_esynd_status(afsr, afsr_bit));
5333 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5334 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5335 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5336 		else
5337 			return (afsr_to_esynd_status(afsr, afsr_bit));
5338 #else /* CHEETAH_PLUS */
5339 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5340 		return (afsr_to_msynd_status(afsr, afsr_bit));
5341 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5342 		return (afsr_to_esynd_status(afsr, afsr_bit));
5343 #endif /* CHEETAH_PLUS */
5344 	} else {
5345 		return (AFLT_STAT_INVALID);
5346 	}
5347 }
5348 
5349 /*
5350  * Slave CPU stick synchronization.
5351  */
5352 void
5353 sticksync_slave(void)
5354 {
5355 	int		i;
5356 	int		tries = 0;
5357 	int64_t		tskew;
5358 	int64_t		av_tskew;
5359 
5360 	kpreempt_disable();
5361 	/* wait for the master side */
5362 	while (stick_sync_cmd != SLAVE_START)
5363 		;
5364 	/*
5365 	 * Synchronization should only take a few tries at most. But in the
5366 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5367 	 * without it's stick synchronized wouldn't be a good citizen.
5368 	 */
5369 	while (slave_done == 0) {
5370 		/*
5371 		 * Time skew calculation.
5372 		 */
5373 		av_tskew = tskew = 0;
5374 
5375 		for (i = 0; i < stick_iter; i++) {
5376 			/* make location hot */
5377 			timestamp[EV_A_START] = 0;
5378 			stick_timestamp(&timestamp[EV_A_START]);
5379 
5380 			/* tell the master we're ready */
5381 			stick_sync_cmd = MASTER_START;
5382 
5383 			/* and wait */
5384 			while (stick_sync_cmd != SLAVE_CONT)
5385 				;
5386 			/* Event B end */
5387 			stick_timestamp(&timestamp[EV_B_END]);
5388 
5389 			/* calculate time skew */
5390 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5391 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5392 			    / 2;
5393 
5394 			/* keep running count */
5395 			av_tskew += tskew;
5396 		} /* for */
5397 
5398 		/*
5399 		 * Adjust stick for time skew if not within the max allowed;
5400 		 * otherwise we're all done.
5401 		 */
5402 		if (stick_iter != 0)
5403 			av_tskew = av_tskew/stick_iter;
5404 		if (ABS(av_tskew) > stick_tsk) {
5405 			/*
5406 			 * If the skew is 1 (the slave's STICK register
5407 			 * is 1 STICK ahead of the master's), stick_adj
5408 			 * could fail to adjust the slave's STICK register
5409 			 * if the STICK read on the slave happens to
5410 			 * align with the increment of the STICK.
5411 			 * Therefore, we increment the skew to 2.
5412 			 */
5413 			if (av_tskew == 1)
5414 				av_tskew++;
5415 			stick_adj(-av_tskew);
5416 		} else
5417 			slave_done = 1;
5418 #ifdef DEBUG
5419 		if (tries < DSYNC_ATTEMPTS)
5420 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5421 			    av_tskew;
5422 		++tries;
5423 #endif /* DEBUG */
5424 #ifdef lint
5425 		tries = tries;
5426 #endif
5427 
5428 	} /* while */
5429 
5430 	/* allow the master to finish */
5431 	stick_sync_cmd = EVENT_NULL;
5432 	kpreempt_enable();
5433 }
5434 
5435 /*
5436  * Master CPU side of stick synchronization.
5437  *  - timestamp end of Event A
5438  *  - timestamp beginning of Event B
5439  */
5440 void
5441 sticksync_master(void)
5442 {
5443 	int		i;
5444 
5445 	kpreempt_disable();
5446 	/* tell the slave we've started */
5447 	slave_done = 0;
5448 	stick_sync_cmd = SLAVE_START;
5449 
5450 	while (slave_done == 0) {
5451 		for (i = 0; i < stick_iter; i++) {
5452 			/* wait for the slave */
5453 			while (stick_sync_cmd != MASTER_START)
5454 				;
5455 			/* Event A end */
5456 			stick_timestamp(&timestamp[EV_A_END]);
5457 
5458 			/* make location hot */
5459 			timestamp[EV_B_START] = 0;
5460 			stick_timestamp(&timestamp[EV_B_START]);
5461 
5462 			/* tell the slave to continue */
5463 			stick_sync_cmd = SLAVE_CONT;
5464 		} /* for */
5465 
5466 		/* wait while slave calculates time skew */
5467 		while (stick_sync_cmd == SLAVE_CONT)
5468 			;
5469 	} /* while */
5470 	kpreempt_enable();
5471 }
5472 
5473 /*
5474  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5475  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5476  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5477  * panic idle.
5478  */
5479 /*ARGSUSED*/
5480 void
5481 cpu_check_allcpus(struct async_flt *aflt)
5482 {}
5483 
5484 struct kmem_cache *ch_private_cache;
5485 
5486 /*
5487  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5488  * deallocate the scrubber data structures and cpu_private data structure.
5489  */
5490 void
5491 cpu_uninit_private(struct cpu *cp)
5492 {
5493 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5494 
5495 	ASSERT(chprp);
5496 	cpu_uninit_ecache_scrub_dr(cp);
5497 	CPU_PRIVATE(cp) = NULL;
5498 	ch_err_tl1_paddrs[cp->cpu_id] = 0;
5499 	kmem_cache_free(ch_private_cache, chprp);
5500 	cmp_delete_cpu(cp->cpu_id);
5501 
5502 }
5503 
5504 /*
5505  * Cheetah Cache Scrubbing
5506  *
5507  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5508  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5509  * protected by either parity or ECC.
5510  *
5511  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5512  * cache per second). Due to the the specifics of how the I$ control
5513  * logic works with respect to the ASI used to scrub I$ lines, the entire
5514  * I$ is scanned at once.
5515  */
5516 
5517 /*
5518  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5519  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5520  * on a running system.
5521  */
5522 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5523 
5524 /*
5525  * The following are the PIL levels that the softints/cross traps will fire at.
5526  */
5527 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5528 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5529 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5530 
5531 #if defined(JALAPENO)
5532 
5533 /*
5534  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5535  * on Jalapeno.
5536  */
5537 int ecache_scrub_enable = 0;
5538 
5539 #else	/* JALAPENO */
5540 
5541 /*
5542  * With all other cpu types, E$ scrubbing is on by default
5543  */
5544 int ecache_scrub_enable = 1;
5545 
5546 #endif	/* JALAPENO */
5547 
5548 
5549 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5550 
5551 /*
5552  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5553  * is disabled by default on non-Cheetah systems
5554  */
5555 int icache_scrub_enable = 0;
5556 
5557 /*
5558  * Tuneables specifying the scrub calls per second and the scan rate
5559  * for each cache
5560  *
5561  * The cyclic times are set during boot based on the following values.
5562  * Changing these values in mdb after this time will have no effect.  If
5563  * a different value is desired, it must be set in /etc/system before a
5564  * reboot.
5565  */
5566 int ecache_calls_a_sec = 1;
5567 int dcache_calls_a_sec = 2;
5568 int icache_calls_a_sec = 2;
5569 
5570 int ecache_scan_rate_idle = 1;
5571 int ecache_scan_rate_busy = 1;
5572 int dcache_scan_rate_idle = 1;
5573 int dcache_scan_rate_busy = 1;
5574 int icache_scan_rate_idle = 1;
5575 int icache_scan_rate_busy = 1;
5576 
5577 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5578 
5579 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5580 
5581 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5582 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5583 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5584 
5585 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5586 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5587 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5588 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5589 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5590 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5591 
5592 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5593 
5594 /*
5595  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5596  * increment the outstanding request counter and schedule a softint to run
5597  * the scrubber.
5598  */
5599 extern xcfunc_t cache_scrubreq_tl1;
5600 
5601 /*
5602  * These are the softint functions for each cache scrubber
5603  */
5604 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5605 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5606 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5607 
5608 /*
5609  * The cache scrub info table contains cache specific information
5610  * and allows for some of the scrub code to be table driven, reducing
5611  * duplication of cache similar code.
5612  *
5613  * This table keeps a copy of the value in the calls per second variable
5614  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5615  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5616  * mdb in a misguided attempt to disable the scrubber).
5617  */
5618 struct scrub_info {
5619 	int		*csi_enable;	/* scrubber enable flag */
5620 	int		csi_freq;	/* scrubber calls per second */
5621 	int		csi_index;	/* index to chsm_outstanding[] */
5622 	uint64_t	csi_inum;	/* scrubber interrupt number */
5623 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5624 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5625 	char		csi_name[3];	/* cache name for this scrub entry */
5626 } cache_scrub_info[] = {
5627 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5628 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5629 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5630 };
5631 
5632 /*
5633  * If scrubbing is enabled, increment the outstanding request counter.  If it
5634  * is 1 (meaning there were no previous requests outstanding), call
5635  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5636  * a self trap.
5637  */
5638 static void
5639 do_scrub(struct scrub_info *csi)
5640 {
5641 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5642 	int index = csi->csi_index;
5643 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5644 
5645 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5646 		if (atomic_inc_32_nv(outstanding) == 1) {
5647 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5648 			    csi->csi_inum, 0);
5649 		}
5650 	}
5651 }
5652 
5653 /*
5654  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5655  * cross-trap the offline cpus.
5656  */
5657 static void
5658 do_scrub_offline(struct scrub_info *csi)
5659 {
5660 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5661 
5662 	if (CPUSET_ISNULL(cpu_offline_set)) {
5663 		/*
5664 		 * No offline cpus - nothing to do
5665 		 */
5666 		return;
5667 	}
5668 
5669 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5670 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5671 		    csi->csi_index);
5672 	}
5673 }
5674 
5675 /*
5676  * This is the initial setup for the scrubber cyclics - it sets the
5677  * interrupt level, frequency, and function to call.
5678  */
5679 /*ARGSUSED*/
5680 static void
5681 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5682     cyc_time_t *when)
5683 {
5684 	struct scrub_info *csi = (struct scrub_info *)arg;
5685 
5686 	ASSERT(csi != NULL);
5687 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5688 	hdlr->cyh_level = CY_LOW_LEVEL;
5689 	hdlr->cyh_arg = arg;
5690 
5691 	when->cyt_when = 0;	/* Start immediately */
5692 	when->cyt_interval = NANOSEC / csi->csi_freq;
5693 }
5694 
5695 /*
5696  * Initialization for cache scrubbing.
5697  * This routine is called AFTER all cpus have had cpu_init_private called
5698  * to initialize their private data areas.
5699  */
5700 void
5701 cpu_init_cache_scrub(void)
5702 {
5703 	int i;
5704 	struct scrub_info *csi;
5705 	cyc_omni_handler_t omni_hdlr;
5706 	cyc_handler_t offline_hdlr;
5707 	cyc_time_t when;
5708 
5709 	/*
5710 	 * save away the maximum number of lines for the D$
5711 	 */
5712 	dcache_nlines = dcache_size / dcache_linesize;
5713 
5714 	/*
5715 	 * register the softints for the cache scrubbing
5716 	 */
5717 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5718 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5719 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5720 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5721 
5722 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5723 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5724 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5725 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5726 
5727 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5728 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5729 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5730 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5731 
5732 	/*
5733 	 * start the scrubbing for all the caches
5734 	 */
5735 	mutex_enter(&cpu_lock);
5736 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5737 
5738 		csi = &cache_scrub_info[i];
5739 
5740 		if (!(*csi->csi_enable))
5741 			continue;
5742 
5743 		/*
5744 		 * force the following to be true:
5745 		 *	1 <= calls_a_sec <= hz
5746 		 */
5747 		if (csi->csi_freq > hz) {
5748 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5749 			    "(%d); resetting to hz (%d)", csi->csi_name,
5750 			    csi->csi_freq, hz);
5751 			csi->csi_freq = hz;
5752 		} else if (csi->csi_freq < 1) {
5753 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5754 			    "(%d); resetting to 1", csi->csi_name,
5755 			    csi->csi_freq);
5756 			csi->csi_freq = 1;
5757 		}
5758 
5759 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5760 		omni_hdlr.cyo_offline = NULL;
5761 		omni_hdlr.cyo_arg = (void *)csi;
5762 
5763 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5764 		offline_hdlr.cyh_arg = (void *)csi;
5765 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5766 
5767 		when.cyt_when = 0;	/* Start immediately */
5768 		when.cyt_interval = NANOSEC / csi->csi_freq;
5769 
5770 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5771 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5772 	}
5773 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5774 	mutex_exit(&cpu_lock);
5775 }
5776 
5777 /*
5778  * Indicate that the specified cpu is idle.
5779  */
5780 void
5781 cpu_idle_ecache_scrub(struct cpu *cp)
5782 {
5783 	if (CPU_PRIVATE(cp) != NULL) {
5784 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5785 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5786 	}
5787 }
5788 
5789 /*
5790  * Indicate that the specified cpu is busy.
5791  */
5792 void
5793 cpu_busy_ecache_scrub(struct cpu *cp)
5794 {
5795 	if (CPU_PRIVATE(cp) != NULL) {
5796 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5797 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5798 	}
5799 }
5800 
5801 /*
5802  * Initialization for cache scrubbing for the specified cpu.
5803  */
5804 void
5805 cpu_init_ecache_scrub_dr(struct cpu *cp)
5806 {
5807 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5808 	int cpuid = cp->cpu_id;
5809 
5810 	/* initialize the number of lines in the caches */
5811 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5812 	    cpunodes[cpuid].ecache_linesize;
5813 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5814 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5815 
5816 	/*
5817 	 * do_scrub() and do_scrub_offline() check both the global
5818 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5819 	 * check this value before scrubbing.  Currently, we use it to
5820 	 * disable the E$ scrubber on multi-core cpus or while running at
5821 	 * slowed speed.  For now, just turn everything on and allow
5822 	 * cpu_init_private() to change it if necessary.
5823 	 */
5824 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5825 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5826 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5827 
5828 	cpu_busy_ecache_scrub(cp);
5829 }
5830 
5831 /*
5832  * Un-initialization for cache scrubbing for the specified cpu.
5833  */
5834 static void
5835 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5836 {
5837 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5838 
5839 	/*
5840 	 * un-initialize bookkeeping for cache scrubbing
5841 	 */
5842 	bzero(csmp, sizeof (ch_scrub_misc_t));
5843 
5844 	cpu_idle_ecache_scrub(cp);
5845 }
5846 
5847 /*
5848  * Called periodically on each CPU to scrub the D$.
5849  */
5850 static void
5851 scrub_dcache(int how_many)
5852 {
5853 	int i;
5854 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5855 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5856 
5857 	/*
5858 	 * scrub the desired number of lines
5859 	 */
5860 	for (i = 0; i < how_many; i++) {
5861 		/*
5862 		 * scrub a D$ line
5863 		 */
5864 		dcache_inval_line(index);
5865 
5866 		/*
5867 		 * calculate the next D$ line to scrub, assumes
5868 		 * that dcache_nlines is a power of 2
5869 		 */
5870 		index = (index + 1) & (dcache_nlines - 1);
5871 	}
5872 
5873 	/*
5874 	 * set the scrub index for the next visit
5875 	 */
5876 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5877 }
5878 
5879 /*
5880  * Handler for D$ scrub inum softint. Call scrub_dcache until
5881  * we decrement the outstanding request count to zero.
5882  */
5883 /*ARGSUSED*/
5884 static uint_t
5885 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5886 {
5887 	int i;
5888 	int how_many;
5889 	int outstanding;
5890 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5891 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5892 	struct scrub_info *csi = (struct scrub_info *)arg1;
5893 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5894 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5895 
5896 	/*
5897 	 * The scan rates are expressed in units of tenths of a
5898 	 * percent.  A scan rate of 1000 (100%) means the whole
5899 	 * cache is scanned every second.
5900 	 */
5901 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5902 
5903 	do {
5904 		outstanding = *countp;
5905 		for (i = 0; i < outstanding; i++) {
5906 			scrub_dcache(how_many);
5907 		}
5908 	} while (atomic_add_32_nv(countp, -outstanding));
5909 
5910 	return (DDI_INTR_CLAIMED);
5911 }
5912 
5913 /*
5914  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5915  * by invalidating lines. Due to the characteristics of the ASI which
5916  * is used to invalidate an I$ line, the entire I$ must be invalidated
5917  * vs. an individual I$ line.
5918  */
5919 static void
5920 scrub_icache(int how_many)
5921 {
5922 	int i;
5923 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5924 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5925 	int icache_nlines = csmp->chsm_icache_nlines;
5926 
5927 	/*
5928 	 * scrub the desired number of lines
5929 	 */
5930 	for (i = 0; i < how_many; i++) {
5931 		/*
5932 		 * since the entire I$ must be scrubbed at once,
5933 		 * wait until the index wraps to zero to invalidate
5934 		 * the entire I$
5935 		 */
5936 		if (index == 0) {
5937 			icache_inval_all();
5938 		}
5939 
5940 		/*
5941 		 * calculate the next I$ line to scrub, assumes
5942 		 * that chsm_icache_nlines is a power of 2
5943 		 */
5944 		index = (index + 1) & (icache_nlines - 1);
5945 	}
5946 
5947 	/*
5948 	 * set the scrub index for the next visit
5949 	 */
5950 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5951 }
5952 
5953 /*
5954  * Handler for I$ scrub inum softint. Call scrub_icache until
5955  * we decrement the outstanding request count to zero.
5956  */
5957 /*ARGSUSED*/
5958 static uint_t
5959 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5960 {
5961 	int i;
5962 	int how_many;
5963 	int outstanding;
5964 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5965 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5966 	struct scrub_info *csi = (struct scrub_info *)arg1;
5967 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5968 	    icache_scan_rate_idle : icache_scan_rate_busy;
5969 	int icache_nlines = csmp->chsm_icache_nlines;
5970 
5971 	/*
5972 	 * The scan rates are expressed in units of tenths of a
5973 	 * percent.  A scan rate of 1000 (100%) means the whole
5974 	 * cache is scanned every second.
5975 	 */
5976 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5977 
5978 	do {
5979 		outstanding = *countp;
5980 		for (i = 0; i < outstanding; i++) {
5981 			scrub_icache(how_many);
5982 		}
5983 	} while (atomic_add_32_nv(countp, -outstanding));
5984 
5985 	return (DDI_INTR_CLAIMED);
5986 }
5987 
5988 /*
5989  * Called periodically on each CPU to scrub the E$.
5990  */
5991 static void
5992 scrub_ecache(int how_many)
5993 {
5994 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5995 	int i;
5996 	int cpuid = CPU->cpu_id;
5997 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5998 	int nlines = csmp->chsm_ecache_nlines;
5999 	int linesize = cpunodes[cpuid].ecache_linesize;
6000 	int ec_set_size = cpu_ecache_set_size(CPU);
6001 
6002 	/*
6003 	 * scrub the desired number of lines
6004 	 */
6005 	for (i = 0; i < how_many; i++) {
6006 		/*
6007 		 * scrub the E$ line
6008 		 */
6009 		ecache_flush_line(ecache_flushaddr + (index * linesize),
6010 		    ec_set_size);
6011 
6012 		/*
6013 		 * calculate the next E$ line to scrub based on twice
6014 		 * the number of E$ lines (to displace lines containing
6015 		 * flush area data), assumes that the number of lines
6016 		 * is a power of 2
6017 		 */
6018 		index = (index + 1) & ((nlines << 1) - 1);
6019 	}
6020 
6021 	/*
6022 	 * set the ecache scrub index for the next visit
6023 	 */
6024 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
6025 }
6026 
6027 /*
6028  * Handler for E$ scrub inum softint. Call the E$ scrubber until
6029  * we decrement the outstanding request count to zero.
6030  *
6031  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
6032  * become negative after the atomic_add_32_nv().  This is not a problem, as
6033  * the next trip around the loop won't scrub anything, and the next add will
6034  * reset the count back to zero.
6035  */
6036 /*ARGSUSED*/
6037 static uint_t
6038 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
6039 {
6040 	int i;
6041 	int how_many;
6042 	int outstanding;
6043 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
6044 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
6045 	struct scrub_info *csi = (struct scrub_info *)arg1;
6046 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
6047 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
6048 	int ecache_nlines = csmp->chsm_ecache_nlines;
6049 
6050 	/*
6051 	 * The scan rates are expressed in units of tenths of a
6052 	 * percent.  A scan rate of 1000 (100%) means the whole
6053 	 * cache is scanned every second.
6054 	 */
6055 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
6056 
6057 	do {
6058 		outstanding = *countp;
6059 		for (i = 0; i < outstanding; i++) {
6060 			scrub_ecache(how_many);
6061 		}
6062 	} while (atomic_add_32_nv(countp, -outstanding));
6063 
6064 	return (DDI_INTR_CLAIMED);
6065 }
6066 
6067 /*
6068  * Timeout function to reenable CE
6069  */
6070 static void
6071 cpu_delayed_check_ce_errors(void *arg)
6072 {
6073 	if (taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6074 	    TQ_NOSLEEP) == TASKQID_INVALID) {
6075 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6076 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6077 	}
6078 }
6079 
6080 /*
6081  * CE Deferred Re-enable after trap.
6082  *
6083  * When the CPU gets a disrupting trap for any of the errors
6084  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6085  * immediately. To eliminate the possibility of multiple CEs causing
6086  * recursive stack overflow in the trap handler, we cannot
6087  * reenable CEEN while still running in the trap handler. Instead,
6088  * after a CE is logged on a CPU, we schedule a timeout function,
6089  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6090  * seconds. This function will check whether any further CEs
6091  * have occurred on that CPU, and if none have, will reenable CEEN.
6092  *
6093  * If further CEs have occurred while CEEN is disabled, another
6094  * timeout will be scheduled. This is to ensure that the CPU can
6095  * make progress in the face of CE 'storms', and that it does not
6096  * spend all its time logging CE errors.
6097  */
6098 static void
6099 cpu_check_ce_errors(void *arg)
6100 {
6101 	int	cpuid = (int)(uintptr_t)arg;
6102 	cpu_t	*cp;
6103 
6104 	/*
6105 	 * We acquire cpu_lock.
6106 	 */
6107 	ASSERT(curthread->t_pil == 0);
6108 
6109 	/*
6110 	 * verify that the cpu is still around, DR
6111 	 * could have got there first ...
6112 	 */
6113 	mutex_enter(&cpu_lock);
6114 	cp = cpu_get(cpuid);
6115 	if (cp == NULL) {
6116 		mutex_exit(&cpu_lock);
6117 		return;
6118 	}
6119 	/*
6120 	 * make sure we don't migrate across CPUs
6121 	 * while checking our CE status.
6122 	 */
6123 	kpreempt_disable();
6124 
6125 	/*
6126 	 * If we are running on the CPU that got the
6127 	 * CE, we can do the checks directly.
6128 	 */
6129 	if (cp->cpu_id == CPU->cpu_id) {
6130 		mutex_exit(&cpu_lock);
6131 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6132 		kpreempt_enable();
6133 		return;
6134 	}
6135 	kpreempt_enable();
6136 
6137 	/*
6138 	 * send an x-call to get the CPU that originally
6139 	 * got the CE to do the necessary checks. If we can't
6140 	 * send the x-call, reschedule the timeout, otherwise we
6141 	 * lose CEEN forever on that CPU.
6142 	 */
6143 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6144 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6145 		    TIMEOUT_CEEN_CHECK, 0);
6146 		mutex_exit(&cpu_lock);
6147 	} else {
6148 		/*
6149 		 * When the CPU is not accepting xcalls, or
6150 		 * the processor is offlined, we don't want to
6151 		 * incur the extra overhead of trying to schedule the
6152 		 * CE timeout indefinitely. However, we don't want to lose
6153 		 * CE checking forever.
6154 		 *
6155 		 * Keep rescheduling the timeout, accepting the additional
6156 		 * overhead as the cost of correctness in the case where we get
6157 		 * a CE, disable CEEN, offline the CPU during the
6158 		 * the timeout interval, and then online it at some
6159 		 * point in the future. This is unlikely given the short
6160 		 * cpu_ceen_delay_secs.
6161 		 */
6162 		mutex_exit(&cpu_lock);
6163 		(void) timeout(cpu_delayed_check_ce_errors,
6164 		    (void *)(uintptr_t)cp->cpu_id,
6165 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6166 	}
6167 }
6168 
6169 /*
6170  * This routine will check whether CEs have occurred while
6171  * CEEN is disabled. Any CEs detected will be logged and, if
6172  * possible, scrubbed.
6173  *
6174  * The memscrubber will also use this routine to clear any errors
6175  * caused by its scrubbing with CEEN disabled.
6176  *
6177  * flag == SCRUBBER_CEEN_CHECK
6178  *		called from memscrubber, just check/scrub, no reset
6179  *		paddr	physical addr. for start of scrub pages
6180  *		vaddr	virtual addr. for scrub area
6181  *		psz	page size of area to be scrubbed
6182  *
6183  * flag == TIMEOUT_CEEN_CHECK
6184  *		timeout function has triggered, reset timeout or CEEN
6185  *
6186  * Note: We must not migrate cpus during this function.  This can be
6187  * achieved by one of:
6188  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6189  *	The flag value must be first xcall argument.
6190  *    - disabling kernel preemption.  This should be done for very short
6191  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6192  *	scrub an extended area with cpu_check_block.  The call for
6193  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6194  *	brief for this case.
6195  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6196  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6197  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6198  */
6199 void
6200 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6201 {
6202 	ch_cpu_errors_t	cpu_error_regs;
6203 	uint64_t	ec_err_enable;
6204 	uint64_t	page_offset;
6205 
6206 	/* Read AFSR */
6207 	get_cpu_error_state(&cpu_error_regs);
6208 
6209 	/*
6210 	 * If no CEEN errors have occurred during the timeout
6211 	 * interval, it is safe to re-enable CEEN and exit.
6212 	 */
6213 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6214 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6215 		if (flag == TIMEOUT_CEEN_CHECK &&
6216 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6217 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6218 		return;
6219 	}
6220 
6221 	/*
6222 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6223 	 * we log/clear the error.
6224 	 */
6225 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6226 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6227 
6228 	/*
6229 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6230 	 * timeout will be rescheduled when the error is logged.
6231 	 */
6232 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6233 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6234 		cpu_ce_detected(&cpu_error_regs,
6235 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6236 	else
6237 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6238 
6239 	/*
6240 	 * If the memory scrubber runs while CEEN is
6241 	 * disabled, (or if CEEN is disabled during the
6242 	 * scrub as a result of a CE being triggered by
6243 	 * it), the range being scrubbed will not be
6244 	 * completely cleaned. If there are multiple CEs
6245 	 * in the range at most two of these will be dealt
6246 	 * with, (one by the trap handler and one by the
6247 	 * timeout). It is also possible that none are dealt
6248 	 * with, (CEEN disabled and another CE occurs before
6249 	 * the timeout triggers). So to ensure that the
6250 	 * memory is actually scrubbed, we have to access each
6251 	 * memory location in the range and then check whether
6252 	 * that access causes a CE.
6253 	 */
6254 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6255 		if ((cpu_error_regs.afar >= pa) &&
6256 		    (cpu_error_regs.afar < (pa + psz))) {
6257 			/*
6258 			 * Force a load from physical memory for each
6259 			 * 64-byte block, then check AFSR to determine
6260 			 * whether this access caused an error.
6261 			 *
6262 			 * This is a slow way to do a scrub, but as it will
6263 			 * only be invoked when the memory scrubber actually
6264 			 * triggered a CE, it should not happen too
6265 			 * frequently.
6266 			 *
6267 			 * cut down what we need to check as the scrubber
6268 			 * has verified up to AFAR, so get it's offset
6269 			 * into the page and start there.
6270 			 */
6271 			page_offset = (uint64_t)(cpu_error_regs.afar &
6272 			    (psz - 1));
6273 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6274 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6275 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6276 			    psz);
6277 		}
6278 	}
6279 
6280 	/*
6281 	 * Reset error enable if this CE is not masked.
6282 	 */
6283 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6284 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6285 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6286 
6287 }
6288 
6289 /*
6290  * Attempt a cpu logout for an error that we did not trap for, such
6291  * as a CE noticed with CEEN off.  It is assumed that we are still running
6292  * on the cpu that took the error and that we cannot migrate.  Returns
6293  * 0 on success, otherwise nonzero.
6294  */
6295 static int
6296 cpu_ce_delayed_ec_logout(uint64_t afar)
6297 {
6298 	ch_cpu_logout_t *clop;
6299 
6300 	if (CPU_PRIVATE(CPU) == NULL)
6301 		return (0);
6302 
6303 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6304 	if (atomic_cas_64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6305 	    LOGOUT_INVALID)
6306 		return (0);
6307 
6308 	cpu_delayed_logout(afar, clop);
6309 	return (1);
6310 }
6311 
6312 /*
6313  * We got an error while CEEN was disabled. We
6314  * need to clean up after it and log whatever
6315  * information we have on the CE.
6316  */
6317 void
6318 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6319 {
6320 	ch_async_flt_t ch_flt;
6321 	struct async_flt *aflt;
6322 	char pr_reason[MAX_REASON_STRING];
6323 
6324 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6325 	ch_flt.flt_trapped_ce = flag;
6326 	aflt = (struct async_flt *)&ch_flt;
6327 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6328 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6329 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6330 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6331 	aflt->flt_addr = cpu_error_regs->afar;
6332 #if defined(SERRANO)
6333 	ch_flt.afar2 = cpu_error_regs->afar2;
6334 #endif	/* SERRANO */
6335 	aflt->flt_pc = NULL;
6336 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6337 	aflt->flt_tl = 0;
6338 	aflt->flt_panic = 0;
6339 	cpu_log_and_clear_ce(&ch_flt);
6340 
6341 	/*
6342 	 * check if we caused any errors during cleanup
6343 	 */
6344 	if (clear_errors(&ch_flt)) {
6345 		pr_reason[0] = '\0';
6346 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6347 		    NULL);
6348 	}
6349 }
6350 
6351 /*
6352  * Log/clear CEEN-controlled disrupting errors
6353  */
6354 static void
6355 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6356 {
6357 	struct async_flt *aflt;
6358 	uint64_t afsr, afsr_errs;
6359 	ch_cpu_logout_t *clop;
6360 	char pr_reason[MAX_REASON_STRING];
6361 	on_trap_data_t *otp = curthread->t_ontrap;
6362 
6363 	aflt = (struct async_flt *)ch_flt;
6364 	afsr = aflt->flt_stat;
6365 	afsr_errs = ch_flt->afsr_errs;
6366 	aflt->flt_id = gethrtime_waitfree();
6367 	aflt->flt_bus_id = getprocessorid();
6368 	aflt->flt_inst = CPU->cpu_id;
6369 	aflt->flt_prot = AFLT_PROT_NONE;
6370 	aflt->flt_class = CPU_FAULT;
6371 	aflt->flt_status = ECC_C_TRAP;
6372 
6373 	pr_reason[0] = '\0';
6374 	/*
6375 	 * Get the CPU log out info for Disrupting Trap.
6376 	 */
6377 	if (CPU_PRIVATE(CPU) == NULL) {
6378 		clop = NULL;
6379 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6380 	} else {
6381 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6382 	}
6383 
6384 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6385 		ch_cpu_errors_t cpu_error_regs;
6386 
6387 		get_cpu_error_state(&cpu_error_regs);
6388 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6389 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6390 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6391 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6392 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6393 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6394 		clop->clo_sdw_data.chd_afsr_ext =
6395 		    cpu_error_regs.shadow_afsr_ext;
6396 #if defined(SERRANO)
6397 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6398 #endif	/* SERRANO */
6399 		ch_flt->flt_data_incomplete = 1;
6400 
6401 		/*
6402 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6403 		 * The trap handler does it for CEEN enabled errors
6404 		 * so we need to do it here.
6405 		 */
6406 		set_cpu_error_state(&cpu_error_regs);
6407 	}
6408 
6409 #if defined(JALAPENO) || defined(SERRANO)
6410 	/*
6411 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6412 	 * For Serrano, even thou we do have the AFAR, we still do the
6413 	 * scrub on the RCE side since that's where the error type can
6414 	 * be properly classified as intermittent, persistent, etc.
6415 	 *
6416 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6417 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6418 	 * the flt_status bits.
6419 	 */
6420 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6421 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6422 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6423 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6424 	}
6425 #else /* JALAPENO || SERRANO */
6426 	/*
6427 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6428 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6429 	 * the flt_status bits.
6430 	 */
6431 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6432 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6433 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6434 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6435 		}
6436 	}
6437 
6438 #endif /* JALAPENO || SERRANO */
6439 
6440 	/*
6441 	 * Update flt_prot if this error occurred under on_trap protection.
6442 	 */
6443 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6444 		aflt->flt_prot = AFLT_PROT_EC;
6445 
6446 	/*
6447 	 * Queue events on the async event queue, one event per error bit.
6448 	 */
6449 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6450 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6451 		ch_flt->flt_type = CPU_INV_AFSR;
6452 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6453 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6454 		    aflt->flt_panic);
6455 	}
6456 
6457 	/*
6458 	 * Zero out + invalidate CPU logout.
6459 	 */
6460 	if (clop) {
6461 		bzero(clop, sizeof (ch_cpu_logout_t));
6462 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6463 	}
6464 
6465 	/*
6466 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6467 	 * was disabled, we need to flush either the entire
6468 	 * E$ or an E$ line.
6469 	 */
6470 #if defined(JALAPENO) || defined(SERRANO)
6471 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6472 #else	/* JALAPENO || SERRANO */
6473 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6474 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6475 #endif	/* JALAPENO || SERRANO */
6476 		cpu_error_ecache_flush(ch_flt);
6477 
6478 }
6479 
6480 /*
6481  * depending on the error type, we determine whether we
6482  * need to flush the entire ecache or just a line.
6483  */
6484 static int
6485 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6486 {
6487 	struct async_flt *aflt;
6488 	uint64_t	afsr;
6489 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6490 
6491 	aflt = (struct async_flt *)ch_flt;
6492 	afsr = aflt->flt_stat;
6493 
6494 	/*
6495 	 * If we got multiple errors, no point in trying
6496 	 * the individual cases, just flush the whole cache
6497 	 */
6498 	if (afsr & C_AFSR_ME) {
6499 		return (ECACHE_FLUSH_ALL);
6500 	}
6501 
6502 	/*
6503 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6504 	 * was disabled, we need to flush entire E$. We can't just
6505 	 * flush the cache line affected as the ME bit
6506 	 * is not set when multiple correctable errors of the same
6507 	 * type occur, so we might have multiple CPC or EDC errors,
6508 	 * with only the first recorded.
6509 	 */
6510 #if defined(JALAPENO) || defined(SERRANO)
6511 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6512 #else	/* JALAPENO || SERRANO */
6513 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6514 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6515 #endif	/* JALAPENO || SERRANO */
6516 		return (ECACHE_FLUSH_ALL);
6517 	}
6518 
6519 #if defined(JALAPENO) || defined(SERRANO)
6520 	/*
6521 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6522 	 * flush the entire Ecache.
6523 	 */
6524 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6525 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6526 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6527 			return (ECACHE_FLUSH_LINE);
6528 		} else {
6529 			return (ECACHE_FLUSH_ALL);
6530 		}
6531 	}
6532 #else /* JALAPENO || SERRANO */
6533 	/*
6534 	 * If UE only is set, flush the Ecache line, otherwise
6535 	 * flush the entire Ecache.
6536 	 */
6537 	if (afsr_errs & C_AFSR_UE) {
6538 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6539 		    C_AFSR_UE) {
6540 			return (ECACHE_FLUSH_LINE);
6541 		} else {
6542 			return (ECACHE_FLUSH_ALL);
6543 		}
6544 	}
6545 #endif /* JALAPENO || SERRANO */
6546 
6547 	/*
6548 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6549 	 * flush the entire Ecache.
6550 	 */
6551 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6552 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6553 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6554 			return (ECACHE_FLUSH_LINE);
6555 		} else {
6556 			return (ECACHE_FLUSH_ALL);
6557 		}
6558 	}
6559 
6560 	/*
6561 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6562 	 * flush the entire Ecache.
6563 	 */
6564 	if (afsr_errs & C_AFSR_BERR) {
6565 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6566 			return (ECACHE_FLUSH_LINE);
6567 		} else {
6568 			return (ECACHE_FLUSH_ALL);
6569 		}
6570 	}
6571 
6572 	return (0);
6573 }
6574 
6575 void
6576 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6577 {
6578 	int	ecache_flush_flag =
6579 	    cpu_error_ecache_flush_required(ch_flt);
6580 
6581 	/*
6582 	 * Flush Ecache line or entire Ecache based on above checks.
6583 	 */
6584 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6585 		cpu_flush_ecache();
6586 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6587 		cpu_flush_ecache_line(ch_flt);
6588 	}
6589 
6590 }
6591 
6592 /*
6593  * Extract the PA portion from the E$ tag.
6594  */
6595 uint64_t
6596 cpu_ectag_to_pa(int setsize, uint64_t tag)
6597 {
6598 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6599 		return (JG_ECTAG_TO_PA(setsize, tag));
6600 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6601 		return (PN_L3TAG_TO_PA(tag));
6602 	else
6603 		return (CH_ECTAG_TO_PA(setsize, tag));
6604 }
6605 
6606 /*
6607  * Convert the E$ tag PA into an E$ subblock index.
6608  */
6609 int
6610 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6611 {
6612 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6613 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6614 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6615 		/* Panther has only one subblock per line */
6616 		return (0);
6617 	else
6618 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6619 }
6620 
6621 /*
6622  * All subblocks in an E$ line must be invalid for
6623  * the line to be invalid.
6624  */
6625 int
6626 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6627 {
6628 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6629 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6630 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6631 		return (PN_L3_LINE_INVALID(tag));
6632 	else
6633 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6634 }
6635 
6636 /*
6637  * Extract state bits for a subblock given the tag.  Note that for Panther
6638  * this works on both l2 and l3 tags.
6639  */
6640 int
6641 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6642 {
6643 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6644 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6645 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6646 		return (tag & CH_ECSTATE_MASK);
6647 	else
6648 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6649 }
6650 
6651 /*
6652  * Cpu specific initialization.
6653  */
6654 void
6655 cpu_mp_init(void)
6656 {
6657 #ifdef	CHEETAHPLUS_ERRATUM_25
6658 	if (cheetah_sendmondo_recover) {
6659 		cheetah_nudge_init();
6660 	}
6661 #endif
6662 }
6663 
6664 void
6665 cpu_ereport_post(struct async_flt *aflt)
6666 {
6667 	char *cpu_type, buf[FM_MAX_CLASS];
6668 	nv_alloc_t *nva = NULL;
6669 	nvlist_t *ereport, *detector, *resource;
6670 	errorq_elem_t *eqep;
6671 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6672 	char unum[UNUM_NAMLEN];
6673 	int synd_code;
6674 	uint8_t msg_type;
6675 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6676 
6677 	if (aflt->flt_panic || panicstr) {
6678 		eqep = errorq_reserve(ereport_errorq);
6679 		if (eqep == NULL)
6680 			return;
6681 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6682 		nva = errorq_elem_nva(ereport_errorq, eqep);
6683 	} else {
6684 		ereport = fm_nvlist_create(nva);
6685 	}
6686 
6687 	/*
6688 	 * Create the scheme "cpu" FMRI.
6689 	 */
6690 	detector = fm_nvlist_create(nva);
6691 	resource = fm_nvlist_create(nva);
6692 	switch (cpunodes[aflt->flt_inst].implementation) {
6693 	case CHEETAH_IMPL:
6694 		cpu_type = FM_EREPORT_CPU_USIII;
6695 		break;
6696 	case CHEETAH_PLUS_IMPL:
6697 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6698 		break;
6699 	case JALAPENO_IMPL:
6700 		cpu_type = FM_EREPORT_CPU_USIIIi;
6701 		break;
6702 	case SERRANO_IMPL:
6703 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6704 		break;
6705 	case JAGUAR_IMPL:
6706 		cpu_type = FM_EREPORT_CPU_USIV;
6707 		break;
6708 	case PANTHER_IMPL:
6709 		cpu_type = FM_EREPORT_CPU_USIVplus;
6710 		break;
6711 	default:
6712 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6713 		break;
6714 	}
6715 
6716 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6717 
6718 	/*
6719 	 * Encode all the common data into the ereport.
6720 	 */
6721 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6722 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6723 
6724 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6725 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6726 	    detector, NULL);
6727 
6728 	/*
6729 	 * Encode the error specific data that was saved in
6730 	 * the async_flt structure into the ereport.
6731 	 */
6732 	cpu_payload_add_aflt(aflt, ereport, resource,
6733 	    &plat_ecc_ch_flt.ecaf_afar_status,
6734 	    &plat_ecc_ch_flt.ecaf_synd_status);
6735 
6736 	if (aflt->flt_panic || panicstr) {
6737 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6738 	} else {
6739 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6740 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6741 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6742 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6743 	}
6744 	/*
6745 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6746 	 * to the SC olny if it can process it.
6747 	 */
6748 
6749 	if (&plat_ecc_capability_sc_get &&
6750 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6751 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6752 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6753 			/*
6754 			 * If afar status is not invalid do a unum lookup.
6755 			 */
6756 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6757 			    AFLT_STAT_INVALID) {
6758 				synd_code = synd_to_synd_code(
6759 				    plat_ecc_ch_flt.ecaf_synd_status,
6760 				    aflt->flt_synd, ch_flt->flt_bit);
6761 				(void) cpu_get_mem_unum_synd(synd_code,
6762 				    aflt, unum);
6763 			} else {
6764 				unum[0] = '\0';
6765 			}
6766 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6767 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6768 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6769 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6770 			    ch_flt->flt_sdw_afsr_ext;
6771 
6772 			if (&plat_log_fruid_error2)
6773 				plat_log_fruid_error2(msg_type, unum, aflt,
6774 				    &plat_ecc_ch_flt);
6775 		}
6776 	}
6777 }
6778 
6779 void
6780 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6781 {
6782 	int status;
6783 	ddi_fm_error_t de;
6784 
6785 	bzero(&de, sizeof (ddi_fm_error_t));
6786 
6787 	de.fme_version = DDI_FME_VERSION;
6788 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6789 	    FM_ENA_FMT1);
6790 	de.fme_flag = expected;
6791 	de.fme_bus_specific = (void *)aflt->flt_addr;
6792 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6793 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6794 		aflt->flt_panic = 1;
6795 }
6796 
6797 void
6798 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6799     errorq_t *eqp, uint_t flag)
6800 {
6801 	struct async_flt *aflt = (struct async_flt *)payload;
6802 
6803 	aflt->flt_erpt_class = error_class;
6804 	errorq_dispatch(eqp, payload, payload_sz, flag);
6805 }
6806 
6807 /*
6808  * This routine may be called by the IO module, but does not do
6809  * anything in this cpu module. The SERD algorithm is handled by
6810  * cpumem-diagnosis engine instead.
6811  */
6812 /*ARGSUSED*/
6813 void
6814 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6815 {}
6816 
6817 void
6818 adjust_hw_copy_limits(int ecache_size)
6819 {
6820 	/*
6821 	 * Set hw copy limits.
6822 	 *
6823 	 * /etc/system will be parsed later and can override one or more
6824 	 * of these settings.
6825 	 *
6826 	 * At this time, ecache size seems only mildly relevant.
6827 	 * We seem to run into issues with the d-cache and stalls
6828 	 * we see on misses.
6829 	 *
6830 	 * Cycle measurement indicates that 2 byte aligned copies fare
6831 	 * little better than doing things with VIS at around 512 bytes.
6832 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6833 	 * aligned is faster whenever the source and destination data
6834 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6835 	 * limit seems to be driven by the 2K write cache.
6836 	 * When more than 2K of copies are done in non-VIS mode, stores
6837 	 * backup in the write cache.  In VIS mode, the write cache is
6838 	 * bypassed, allowing faster cache-line writes aligned on cache
6839 	 * boundaries.
6840 	 *
6841 	 * In addition, in non-VIS mode, there is no prefetching, so
6842 	 * for larger copies, the advantage of prefetching to avoid even
6843 	 * occasional cache misses is enough to justify using the VIS code.
6844 	 *
6845 	 * During testing, it was discovered that netbench ran 3% slower
6846 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6847 	 * applications, data is only used once (copied to the output
6848 	 * buffer, then copied by the network device off the system).  Using
6849 	 * the VIS copy saves more L2 cache state.  Network copies are
6850 	 * around 1.3K to 1.5K in size for historical reasons.
6851 	 *
6852 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6853 	 * aligned copy even for large caches and 8 MB ecache.  The
6854 	 * infrastructure to allow different limits for different sized
6855 	 * caches is kept to allow further tuning in later releases.
6856 	 */
6857 
6858 	if (min_ecache_size == 0 && use_hw_bcopy) {
6859 		/*
6860 		 * First time through - should be before /etc/system
6861 		 * is read.
6862 		 * Could skip the checks for zero but this lets us
6863 		 * preserve any debugger rewrites.
6864 		 */
6865 		if (hw_copy_limit_1 == 0) {
6866 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6867 			priv_hcl_1 = hw_copy_limit_1;
6868 		}
6869 		if (hw_copy_limit_2 == 0) {
6870 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6871 			priv_hcl_2 = hw_copy_limit_2;
6872 		}
6873 		if (hw_copy_limit_4 == 0) {
6874 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6875 			priv_hcl_4 = hw_copy_limit_4;
6876 		}
6877 		if (hw_copy_limit_8 == 0) {
6878 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6879 			priv_hcl_8 = hw_copy_limit_8;
6880 		}
6881 		min_ecache_size = ecache_size;
6882 	} else {
6883 		/*
6884 		 * MP initialization. Called *after* /etc/system has
6885 		 * been parsed. One CPU has already been initialized.
6886 		 * Need to cater for /etc/system having scragged one
6887 		 * of our values.
6888 		 */
6889 		if (ecache_size == min_ecache_size) {
6890 			/*
6891 			 * Same size ecache. We do nothing unless we
6892 			 * have a pessimistic ecache setting. In that
6893 			 * case we become more optimistic (if the cache is
6894 			 * large enough).
6895 			 */
6896 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6897 				/*
6898 				 * Need to adjust hw_copy_limit* from our
6899 				 * pessimistic uniprocessor value to a more
6900 				 * optimistic UP value *iff* it hasn't been
6901 				 * reset.
6902 				 */
6903 				if ((ecache_size > 1048576) &&
6904 				    (priv_hcl_8 == hw_copy_limit_8)) {
6905 					if (ecache_size <= 2097152)
6906 						hw_copy_limit_8 = 4 *
6907 						    VIS_COPY_THRESHOLD;
6908 					else if (ecache_size <= 4194304)
6909 						hw_copy_limit_8 = 4 *
6910 						    VIS_COPY_THRESHOLD;
6911 					else
6912 						hw_copy_limit_8 = 4 *
6913 						    VIS_COPY_THRESHOLD;
6914 					priv_hcl_8 = hw_copy_limit_8;
6915 				}
6916 			}
6917 		} else if (ecache_size < min_ecache_size) {
6918 			/*
6919 			 * A different ecache size. Can this even happen?
6920 			 */
6921 			if (priv_hcl_8 == hw_copy_limit_8) {
6922 				/*
6923 				 * The previous value that we set
6924 				 * is unchanged (i.e., it hasn't been
6925 				 * scragged by /etc/system). Rewrite it.
6926 				 */
6927 				if (ecache_size <= 1048576)
6928 					hw_copy_limit_8 = 8 *
6929 					    VIS_COPY_THRESHOLD;
6930 				else if (ecache_size <= 2097152)
6931 					hw_copy_limit_8 = 8 *
6932 					    VIS_COPY_THRESHOLD;
6933 				else if (ecache_size <= 4194304)
6934 					hw_copy_limit_8 = 8 *
6935 					    VIS_COPY_THRESHOLD;
6936 				else
6937 					hw_copy_limit_8 = 10 *
6938 					    VIS_COPY_THRESHOLD;
6939 				priv_hcl_8 = hw_copy_limit_8;
6940 				min_ecache_size = ecache_size;
6941 			}
6942 		}
6943 	}
6944 }
6945 
6946 /*
6947  * Called from illegal instruction trap handler to see if we can attribute
6948  * the trap to a fpras check.
6949  */
6950 int
6951 fpras_chktrap(struct regs *rp)
6952 {
6953 	int op;
6954 	struct fpras_chkfngrp *cgp;
6955 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6956 
6957 	if (fpras_chkfngrps == NULL)
6958 		return (0);
6959 
6960 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6961 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6962 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6963 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6964 			break;
6965 	}
6966 	if (op == FPRAS_NCOPYOPS)
6967 		return (0);
6968 
6969 	/*
6970 	 * This is an fpRAS failure caught through an illegal
6971 	 * instruction - trampoline.
6972 	 */
6973 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6974 	rp->r_npc = rp->r_pc + 4;
6975 	return (1);
6976 }
6977 
6978 /*
6979  * fpras_failure is called when a fpras check detects a bad calculation
6980  * result or an illegal instruction trap is attributed to an fpras
6981  * check.  In all cases we are still bound to CPU.
6982  */
6983 int
6984 fpras_failure(int op, int how)
6985 {
6986 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6987 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6988 	ch_async_flt_t ch_flt;
6989 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6990 	struct fpras_chkfn *sfp, *cfp;
6991 	uint32_t *sip, *cip;
6992 	int i;
6993 
6994 	/*
6995 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6996 	 * the time in which we dispatch an ereport and (if applicable) panic.
6997 	 */
6998 	use_hw_bcopy_orig = use_hw_bcopy;
6999 	use_hw_bzero_orig = use_hw_bzero;
7000 	hcl1_orig = hw_copy_limit_1;
7001 	hcl2_orig = hw_copy_limit_2;
7002 	hcl4_orig = hw_copy_limit_4;
7003 	hcl8_orig = hw_copy_limit_8;
7004 	use_hw_bcopy = use_hw_bzero = 0;
7005 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
7006 	    hw_copy_limit_8 = 0;
7007 
7008 	bzero(&ch_flt, sizeof (ch_async_flt_t));
7009 	aflt->flt_id = gethrtime_waitfree();
7010 	aflt->flt_class = CPU_FAULT;
7011 	aflt->flt_inst = CPU->cpu_id;
7012 	aflt->flt_status = (how << 8) | op;
7013 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
7014 	ch_flt.flt_type = CPU_FPUERR;
7015 
7016 	/*
7017 	 * We must panic if the copy operation had no lofault protection -
7018 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
7019 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
7020 	 */
7021 	aflt->flt_panic = (curthread->t_lofault == (uintptr_t)NULL);
7022 
7023 	/*
7024 	 * XOR the source instruction block with the copied instruction
7025 	 * block - this will show us which bit(s) are corrupted.
7026 	 */
7027 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
7028 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
7029 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
7030 		sip = &sfp->fpras_blk0[0];
7031 		cip = &cfp->fpras_blk0[0];
7032 	} else {
7033 		sip = &sfp->fpras_blk1[0];
7034 		cip = &cfp->fpras_blk1[0];
7035 	}
7036 	for (i = 0; i < 16; ++i, ++sip, ++cip)
7037 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
7038 
7039 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
7040 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
7041 
7042 	if (aflt->flt_panic)
7043 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
7044 
7045 	/*
7046 	 * We get here for copyin/copyout and kcopy or bcopy where the
7047 	 * caller has used on_fault.  We will flag the error so that
7048 	 * the process may be killed  The trap_async_hwerr mechanism will
7049 	 * take appropriate further action (such as a reboot, contract
7050 	 * notification etc).  Since we may be continuing we will
7051 	 * restore the global hardware copy acceleration switches.
7052 	 *
7053 	 * When we return from this function to the copy function we want to
7054 	 * avoid potentially bad data being used, ie we want the affected
7055 	 * copy function to return an error.  The caller should therefore
7056 	 * invoke its lofault handler (which always exists for these functions)
7057 	 * which will return the appropriate error.
7058 	 */
7059 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7060 	aston(curthread);
7061 
7062 	use_hw_bcopy = use_hw_bcopy_orig;
7063 	use_hw_bzero = use_hw_bzero_orig;
7064 	hw_copy_limit_1 = hcl1_orig;
7065 	hw_copy_limit_2 = hcl2_orig;
7066 	hw_copy_limit_4 = hcl4_orig;
7067 	hw_copy_limit_8 = hcl8_orig;
7068 
7069 	return (1);
7070 }
7071 
7072 #define	VIS_BLOCKSIZE		64
7073 
7074 int
7075 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7076 {
7077 	int ret, watched;
7078 
7079 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7080 	ret = dtrace_blksuword32(addr, data, 0);
7081 	if (watched)
7082 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7083 
7084 	return (ret);
7085 }
7086 
7087 /*
7088  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7089  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7090  * CEEN from the EER to disable traps for further disrupting error types
7091  * on that cpu.  We could cross-call instead, but that has a larger
7092  * instruction and data footprint than cross-trapping, and the cpu is known
7093  * to be faulted.
7094  */
7095 
7096 void
7097 cpu_faulted_enter(struct cpu *cp)
7098 {
7099 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7100 }
7101 
7102 /*
7103  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7104  * offline, spare, or online (by the cpu requesting this state change).
7105  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7106  * disrupting error bits that have accumulated without trapping, then
7107  * we cross-trap to re-enable CEEN controlled traps.
7108  */
7109 void
7110 cpu_faulted_exit(struct cpu *cp)
7111 {
7112 	ch_cpu_errors_t cpu_error_regs;
7113 
7114 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7115 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7116 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7117 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7118 	    (uint64_t)&cpu_error_regs, 0);
7119 
7120 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7121 }
7122 
7123 /*
7124  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7125  * the errors in the original AFSR, 0 otherwise.
7126  *
7127  * For all procs if the initial error was a BERR or TO, then it is possible
7128  * that we may have caused a secondary BERR or TO in the process of logging the
7129  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7130  * if the request was protected then a panic is still not necessary, if not
7131  * protected then aft_panic is already set - so either way there's no need
7132  * to set aft_panic for the secondary error.
7133  *
7134  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7135  * a store merge, then the error handling code will call cpu_deferred_error().
7136  * When clear_errors() is called, it will determine that secondary errors have
7137  * occurred - in particular, the store merge also caused a EDU and WDU that
7138  * weren't discovered until this point.
7139  *
7140  * We do three checks to verify that we are in this case.  If we pass all three
7141  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7142  * errors occur, we return 0.
7143  *
7144  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7145  * handled in cpu_disrupting_errors().  Since this function is not even called
7146  * in the case we are interested in, we just return 0 for these processors.
7147  */
7148 /*ARGSUSED*/
7149 static int
7150 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7151     uint64_t t_afar)
7152 {
7153 #if defined(CHEETAH_PLUS)
7154 #else	/* CHEETAH_PLUS */
7155 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7156 #endif	/* CHEETAH_PLUS */
7157 
7158 	/*
7159 	 * Was the original error a BERR or TO and only a BERR or TO
7160 	 * (multiple errors are also OK)
7161 	 */
7162 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7163 		/*
7164 		 * Is the new error a BERR or TO and only a BERR or TO
7165 		 * (multiple errors are also OK)
7166 		 */
7167 		if ((ch_flt->afsr_errs &
7168 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7169 			return (1);
7170 	}
7171 
7172 #if defined(CHEETAH_PLUS)
7173 	return (0);
7174 #else	/* CHEETAH_PLUS */
7175 	/*
7176 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7177 	 *
7178 	 * Check the original error was a UE, and only a UE.  Note that
7179 	 * the ME bit will cause us to fail this check.
7180 	 */
7181 	if (t_afsr_errs != C_AFSR_UE)
7182 		return (0);
7183 
7184 	/*
7185 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7186 	 */
7187 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7188 		return (0);
7189 
7190 	/*
7191 	 * Check the AFAR of the original error and secondary errors
7192 	 * match to the 64-byte boundary
7193 	 */
7194 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7195 		return (0);
7196 
7197 	/*
7198 	 * We've passed all the checks, so it's a secondary error!
7199 	 */
7200 	return (1);
7201 #endif	/* CHEETAH_PLUS */
7202 }
7203 
7204 /*
7205  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7206  * is checked for any valid errors.  If found, the error type is
7207  * returned. If not found, the flt_type is checked for L1$ parity errors.
7208  */
7209 /*ARGSUSED*/
7210 static uint8_t
7211 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7212 {
7213 #if defined(JALAPENO)
7214 	/*
7215 	 * Currently, logging errors to the SC is not supported on Jalapeno
7216 	 */
7217 	return (PLAT_ECC_ERROR2_NONE);
7218 #else
7219 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7220 
7221 	switch (ch_flt->flt_bit) {
7222 	case C_AFSR_CE:
7223 		return (PLAT_ECC_ERROR2_CE);
7224 	case C_AFSR_UCC:
7225 	case C_AFSR_EDC:
7226 	case C_AFSR_WDC:
7227 	case C_AFSR_CPC:
7228 		return (PLAT_ECC_ERROR2_L2_CE);
7229 	case C_AFSR_EMC:
7230 		return (PLAT_ECC_ERROR2_EMC);
7231 	case C_AFSR_IVC:
7232 		return (PLAT_ECC_ERROR2_IVC);
7233 	case C_AFSR_UE:
7234 		return (PLAT_ECC_ERROR2_UE);
7235 	case C_AFSR_UCU:
7236 	case C_AFSR_EDU:
7237 	case C_AFSR_WDU:
7238 	case C_AFSR_CPU:
7239 		return (PLAT_ECC_ERROR2_L2_UE);
7240 	case C_AFSR_IVU:
7241 		return (PLAT_ECC_ERROR2_IVU);
7242 	case C_AFSR_TO:
7243 		return (PLAT_ECC_ERROR2_TO);
7244 	case C_AFSR_BERR:
7245 		return (PLAT_ECC_ERROR2_BERR);
7246 #if defined(CHEETAH_PLUS)
7247 	case C_AFSR_L3_EDC:
7248 	case C_AFSR_L3_UCC:
7249 	case C_AFSR_L3_CPC:
7250 	case C_AFSR_L3_WDC:
7251 		return (PLAT_ECC_ERROR2_L3_CE);
7252 	case C_AFSR_IMC:
7253 		return (PLAT_ECC_ERROR2_IMC);
7254 	case C_AFSR_TSCE:
7255 		return (PLAT_ECC_ERROR2_L2_TSCE);
7256 	case C_AFSR_THCE:
7257 		return (PLAT_ECC_ERROR2_L2_THCE);
7258 	case C_AFSR_L3_MECC:
7259 		return (PLAT_ECC_ERROR2_L3_MECC);
7260 	case C_AFSR_L3_THCE:
7261 		return (PLAT_ECC_ERROR2_L3_THCE);
7262 	case C_AFSR_L3_CPU:
7263 	case C_AFSR_L3_EDU:
7264 	case C_AFSR_L3_UCU:
7265 	case C_AFSR_L3_WDU:
7266 		return (PLAT_ECC_ERROR2_L3_UE);
7267 	case C_AFSR_DUE:
7268 		return (PLAT_ECC_ERROR2_DUE);
7269 	case C_AFSR_DTO:
7270 		return (PLAT_ECC_ERROR2_DTO);
7271 	case C_AFSR_DBERR:
7272 		return (PLAT_ECC_ERROR2_DBERR);
7273 #endif	/* CHEETAH_PLUS */
7274 	default:
7275 		switch (ch_flt->flt_type) {
7276 #if defined(CPU_IMP_L1_CACHE_PARITY)
7277 		case CPU_IC_PARITY:
7278 			return (PLAT_ECC_ERROR2_IPE);
7279 		case CPU_DC_PARITY:
7280 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7281 				if (ch_flt->parity_data.dpe.cpl_cache ==
7282 				    CPU_PC_PARITY) {
7283 					return (PLAT_ECC_ERROR2_PCACHE);
7284 				}
7285 			}
7286 			return (PLAT_ECC_ERROR2_DPE);
7287 #endif /* CPU_IMP_L1_CACHE_PARITY */
7288 		case CPU_ITLB_PARITY:
7289 			return (PLAT_ECC_ERROR2_ITLB);
7290 		case CPU_DTLB_PARITY:
7291 			return (PLAT_ECC_ERROR2_DTLB);
7292 		default:
7293 			return (PLAT_ECC_ERROR2_NONE);
7294 		}
7295 	}
7296 #endif	/* JALAPENO */
7297 }
7298