xref: /titanic_41/usr/src/uts/sun4u/cpu/us3_common.c (revision 0d6bb4c6728fd20087fe25f4028a3838250e6e9c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/ddi.h>
29 #include <sys/sysmacros.h>
30 #include <sys/archsystm.h>
31 #include <sys/vmsystm.h>
32 #include <sys/machparam.h>
33 #include <sys/machsystm.h>
34 #include <sys/machthread.h>
35 #include <sys/cpu.h>
36 #include <sys/cmp.h>
37 #include <sys/elf_SPARC.h>
38 #include <vm/vm_dep.h>
39 #include <vm/hat_sfmmu.h>
40 #include <vm/seg_kpm.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/us3_module.h>
44 #include <sys/async.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/dditypes.h>
48 #include <sys/prom_debug.h>
49 #include <sys/prom_plat.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/machtrap.h>
56 #include <sys/ontrap.h>
57 #include <sys/panic.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/ivintr.h>
61 #include <sys/atomic.h>
62 #include <sys/taskq.h>
63 #include <sys/note.h>
64 #include <sys/ndifm.h>
65 #include <sys/ddifm.h>
66 #include <sys/fm/protocol.h>
67 #include <sys/fm/util.h>
68 #include <sys/fm/cpu/UltraSPARC-III.h>
69 #include <sys/fpras_impl.h>
70 #include <sys/dtrace.h>
71 #include <sys/watchpoint.h>
72 #include <sys/plat_ecc_unum.h>
73 #include <sys/cyclic.h>
74 #include <sys/errorq.h>
75 #include <sys/errclassify.h>
76 #include <sys/pghw.h>
77 #include <sys/clock_impl.h>
78 
79 #ifdef	CHEETAHPLUS_ERRATUM_25
80 #include <sys/xc_impl.h>
81 #endif	/* CHEETAHPLUS_ERRATUM_25 */
82 
83 ch_cpu_logout_t	clop_before_flush;
84 ch_cpu_logout_t	clop_after_flush;
85 uint_t	flush_retries_done = 0;
86 /*
87  * Note that 'Cheetah PRM' refers to:
88  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
89  */
90 
91 /*
92  * Per CPU pointers to physical address of TL>0 logout data areas.
93  * These pointers have to be in the kernel nucleus to avoid MMU
94  * misses.
95  */
96 uint64_t ch_err_tl1_paddrs[NCPU];
97 
98 /*
99  * One statically allocated structure to use during startup/DR
100  * to prevent unnecessary panics.
101  */
102 ch_err_tl1_data_t ch_err_tl1_data;
103 
104 /*
105  * Per CPU pending error at TL>0, used by level15 softint handler
106  */
107 uchar_t ch_err_tl1_pending[NCPU];
108 
109 /*
110  * For deferred CE re-enable after trap.
111  */
112 taskq_t		*ch_check_ce_tq;
113 
114 /*
115  * Internal functions.
116  */
117 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
118 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
119 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
120     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
121 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
122     uint64_t t_afsr_bit);
123 static int clear_ecc(struct async_flt *ecc);
124 #if defined(CPU_IMP_ECACHE_ASSOC)
125 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
126 #endif
127 int cpu_ecache_set_size(struct cpu *cp);
128 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
129 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
130 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
131 int cpu_ectag_pa_to_subblk_state(int cachesize,
132 				uint64_t subaddr, uint64_t tag);
133 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
134 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
135 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
136 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
137 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
138 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
139 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
140 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
141 static void cpu_scrubphys(struct async_flt *aflt);
142 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
143     int *, int *);
144 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
145 static void cpu_ereport_init(struct async_flt *aflt);
146 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
147 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
148 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
149     uint64_t nceen, ch_cpu_logout_t *clop);
150 static int cpu_ce_delayed_ec_logout(uint64_t);
151 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
152 static int cpu_error_is_ecache_data(int, uint64_t);
153 static void cpu_fmri_cpu_set(nvlist_t *, int);
154 static int cpu_error_to_resource_type(struct async_flt *aflt);
155 
156 #ifdef	CHEETAHPLUS_ERRATUM_25
157 static int mondo_recover_proc(uint16_t, int);
158 static void cheetah_nudge_init(void);
159 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
160     cyc_time_t *when);
161 static void cheetah_nudge_buddy(void);
162 #endif	/* CHEETAHPLUS_ERRATUM_25 */
163 
164 #if defined(CPU_IMP_L1_CACHE_PARITY)
165 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
166 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
167 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
168     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
169 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
170 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
171 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
172 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
173 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
174 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
175 #endif	/* CPU_IMP_L1_CACHE_PARITY */
176 
177 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
178     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
179     int *segsp, int *banksp, int *mcidp);
180 
181 /*
182  * This table is used to determine which bit(s) is(are) bad when an ECC
183  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
184  * of this array have the following semantics:
185  *
186  *      00-127  The number of the bad bit, when only one bit is bad.
187  *      128     ECC bit C0 is bad.
188  *      129     ECC bit C1 is bad.
189  *      130     ECC bit C2 is bad.
190  *      131     ECC bit C3 is bad.
191  *      132     ECC bit C4 is bad.
192  *      133     ECC bit C5 is bad.
193  *      134     ECC bit C6 is bad.
194  *      135     ECC bit C7 is bad.
195  *      136     ECC bit C8 is bad.
196  *	137-143 reserved for Mtag Data and ECC.
197  *      144(M2) Two bits are bad within a nibble.
198  *      145(M3) Three bits are bad within a nibble.
199  *      146(M3) Four bits are bad within a nibble.
200  *      147(M)  Multiple bits (5 or more) are bad.
201  *      148     NO bits are bad.
202  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
203  */
204 
205 #define	C0	128
206 #define	C1	129
207 #define	C2	130
208 #define	C3	131
209 #define	C4	132
210 #define	C5	133
211 #define	C6	134
212 #define	C7	135
213 #define	C8	136
214 #define	MT0	137	/* Mtag Data bit 0 */
215 #define	MT1	138
216 #define	MT2	139
217 #define	MTC0	140	/* Mtag Check bit 0 */
218 #define	MTC1	141
219 #define	MTC2	142
220 #define	MTC3	143
221 #define	M2	144
222 #define	M3	145
223 #define	M4	146
224 #define	M	147
225 #define	NA	148
226 #if defined(JALAPENO) || defined(SERRANO)
227 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
228 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
229 #define	SLAST	S003MEM	/* last special syndrome */
230 #else /* JALAPENO || SERRANO */
231 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
232 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
233 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
234 #define	SLAST	S11C	/* last special syndrome */
235 #endif /* JALAPENO || SERRANO */
236 #if defined(JALAPENO) || defined(SERRANO)
237 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
238 #define	BPAR15	167
239 #endif	/* JALAPENO || SERRANO */
240 
241 static uint8_t ecc_syndrome_tab[] =
242 {
243 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
244 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
245 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
246 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
247 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
248 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
249 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
250 #if defined(JALAPENO) || defined(SERRANO)
251 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
252 #else	/* JALAPENO || SERRANO */
253 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
254 #endif	/* JALAPENO || SERRANO */
255 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
256 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
257 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
258 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
259 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
260 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
261 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
262 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
263 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
264 #if defined(JALAPENO) || defined(SERRANO)
265 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
266 #else	/* JALAPENO || SERRANO */
267 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
268 #endif	/* JALAPENO || SERRANO */
269 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
270 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
271 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
272 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
273 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
274 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
275 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
276 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
277 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
278 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
279 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
280 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
281 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
282 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
283 };
284 
285 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
286 
287 #if !(defined(JALAPENO) || defined(SERRANO))
288 /*
289  * This table is used to determine which bit(s) is(are) bad when a Mtag
290  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
291  * of this array have the following semantics:
292  *
293  *      -1	Invalid mtag syndrome.
294  *      137     Mtag Data 0 is bad.
295  *      138     Mtag Data 1 is bad.
296  *      139     Mtag Data 2 is bad.
297  *      140     Mtag ECC 0 is bad.
298  *      141     Mtag ECC 1 is bad.
299  *      142     Mtag ECC 2 is bad.
300  *      143     Mtag ECC 3 is bad.
301  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
302  */
303 short mtag_syndrome_tab[] =
304 {
305 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
306 };
307 
308 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
309 
310 #else /* !(JALAPENO || SERRANO) */
311 
312 #define	BSYND_TBL_SIZE	16
313 
314 #endif /* !(JALAPENO || SERRANO) */
315 
316 /*
317  * Virtual Address bit flag in the data cache. This is actually bit 2 in the
318  * dcache data tag.
319  */
320 #define	VA13	INT64_C(0x0000000000000002)
321 
322 /*
323  * Types returned from cpu_error_to_resource_type()
324  */
325 #define	ERRTYPE_UNKNOWN		0
326 #define	ERRTYPE_CPU		1
327 #define	ERRTYPE_MEMORY		2
328 #define	ERRTYPE_ECACHE_DATA	3
329 
330 /*
331  * CE initial classification and subsequent action lookup table
332  */
333 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
334 static int ce_disp_inited;
335 
336 /*
337  * Set to disable leaky and partner check for memory correctables
338  */
339 int ce_xdiag_off;
340 
341 /*
342  * The following are not incremented atomically so are indicative only
343  */
344 static int ce_xdiag_drops;
345 static int ce_xdiag_lkydrops;
346 static int ce_xdiag_ptnrdrops;
347 static int ce_xdiag_bad;
348 
349 /*
350  * CE leaky check callback structure
351  */
352 typedef struct {
353 	struct async_flt *lkycb_aflt;
354 	errorq_t *lkycb_eqp;
355 	errorq_elem_t *lkycb_eqep;
356 } ce_lkychk_cb_t;
357 
358 /*
359  * defines for various ecache_flush_flag's
360  */
361 #define	ECACHE_FLUSH_LINE	1
362 #define	ECACHE_FLUSH_ALL	2
363 
364 /*
365  * STICK sync
366  */
367 #define	STICK_ITERATION 10
368 #define	MAX_TSKEW	1
369 #define	EV_A_START	0
370 #define	EV_A_END	1
371 #define	EV_B_START	2
372 #define	EV_B_END	3
373 #define	EVENTS		4
374 
375 static int64_t stick_iter = STICK_ITERATION;
376 static int64_t stick_tsk = MAX_TSKEW;
377 
378 typedef enum {
379 	EVENT_NULL = 0,
380 	SLAVE_START,
381 	SLAVE_CONT,
382 	MASTER_START
383 } event_cmd_t;
384 
385 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
386 static int64_t timestamp[EVENTS];
387 static volatile int slave_done;
388 
389 #ifdef DEBUG
390 #define	DSYNC_ATTEMPTS 64
391 typedef struct {
392 	int64_t	skew_val[DSYNC_ATTEMPTS];
393 } ss_t;
394 
395 ss_t stick_sync_stats[NCPU];
396 #endif /* DEBUG */
397 
398 uint_t cpu_impl_dual_pgsz = 0;
399 #if defined(CPU_IMP_DUAL_PAGESIZE)
400 uint_t disable_dual_pgsz = 0;
401 #endif	/* CPU_IMP_DUAL_PAGESIZE */
402 
403 /*
404  * Save the cache bootup state for use when internal
405  * caches are to be re-enabled after an error occurs.
406  */
407 uint64_t cache_boot_state;
408 
409 /*
410  * PA[22:0] represent Displacement in Safari configuration space.
411  */
412 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
413 
414 bus_config_eclk_t bus_config_eclk[] = {
415 #if defined(JALAPENO) || defined(SERRANO)
416 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
417 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
418 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
419 #else /* JALAPENO || SERRANO */
420 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
421 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
422 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
423 #endif /* JALAPENO || SERRANO */
424 	{0, 0}
425 };
426 
427 /*
428  * Interval for deferred CEEN reenable
429  */
430 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
431 
432 /*
433  * set in /etc/system to control logging of user BERR/TO's
434  */
435 int cpu_berr_to_verbose = 0;
436 
437 /*
438  * set to 0 in /etc/system to defer CEEN reenable for all CEs
439  */
440 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
441 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
442 
443 /*
444  * Set of all offline cpus
445  */
446 cpuset_t cpu_offline_set;
447 
448 static void cpu_delayed_check_ce_errors(void *);
449 static void cpu_check_ce_errors(void *);
450 void cpu_error_ecache_flush(ch_async_flt_t *);
451 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
452 static void cpu_log_and_clear_ce(ch_async_flt_t *);
453 void cpu_ce_detected(ch_cpu_errors_t *, int);
454 
455 /*
456  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
457  * memory refresh interval of current DIMMs (64ms).  After initial fix that
458  * gives at least one full refresh cycle in which the cell can leak
459  * (whereafter further refreshes simply reinforce any incorrect bit value).
460  */
461 clock_t cpu_ce_lkychk_timeout_usec = 128000;
462 
463 /*
464  * CE partner check partner caching period in seconds
465  */
466 int cpu_ce_ptnr_cachetime_sec = 60;
467 
468 /*
469  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
470  */
471 #define	CH_SET_TRAP(ttentry, ttlabel)			\
472 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
473 		flush_instr_mem((caddr_t)&ttentry, 32);
474 
475 static int min_ecache_size;
476 static uint_t priv_hcl_1;
477 static uint_t priv_hcl_2;
478 static uint_t priv_hcl_4;
479 static uint_t priv_hcl_8;
480 
481 void
cpu_setup(void)482 cpu_setup(void)
483 {
484 	extern int at_flags;
485 	extern int cpc_has_overflow_intr;
486 
487 	/*
488 	 * Setup chip-specific trap handlers.
489 	 */
490 	cpu_init_trap();
491 
492 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
493 
494 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
495 
496 	/*
497 	 * save the cache bootup state.
498 	 */
499 	cache_boot_state = get_dcu() & DCU_CACHE;
500 
501 	/*
502 	 * Due to the number of entries in the fully-associative tlb
503 	 * this may have to be tuned lower than in spitfire.
504 	 */
505 	pp_slots = MIN(8, MAXPP_SLOTS);
506 
507 	/*
508 	 * Block stores do not invalidate all pages of the d$, pagecopy
509 	 * et. al. need virtual translations with virtual coloring taken
510 	 * into consideration.  prefetch/ldd will pollute the d$ on the
511 	 * load side.
512 	 */
513 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
514 
515 	if (use_page_coloring) {
516 		do_pg_coloring = 1;
517 	}
518 
519 	isa_list =
520 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523 
524 	/*
525 	 * On Panther-based machines, this should
526 	 * also include AV_SPARC_POPC too
527 	 */
528 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529 
530 	/*
531 	 * On cheetah, there's no hole in the virtual address space
532 	 */
533 	hole_start = hole_end = 0;
534 
535 	/*
536 	 * The kpm mapping window.
537 	 * kpm_size:
538 	 *	The size of a single kpm range.
539 	 *	The overall size will be: kpm_size * vac_colors.
540 	 * kpm_vbase:
541 	 *	The virtual start address of the kpm range within the kernel
542 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543 	 */
544 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545 	kpm_size_shift = 43;
546 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547 	kpm_smallpages = 1;
548 
549 	/*
550 	 * The traptrace code uses either %tick or %stick for
551 	 * timestamping.  We have %stick so we can use it.
552 	 */
553 	traptrace_use_stick = 1;
554 
555 	/*
556 	 * Cheetah has a performance counter overflow interrupt
557 	 */
558 	cpc_has_overflow_intr = 1;
559 
560 #if defined(CPU_IMP_DUAL_PAGESIZE)
561 	/*
562 	 * Use Cheetah+ and later dual page size support.
563 	 */
564 	if (!disable_dual_pgsz) {
565 		cpu_impl_dual_pgsz = 1;
566 	}
567 #endif	/* CPU_IMP_DUAL_PAGESIZE */
568 
569 	/*
570 	 * Declare that this architecture/cpu combination does fpRAS.
571 	 */
572 	fpras_implemented = 1;
573 
574 	/*
575 	 * Setup CE lookup table
576 	 */
577 	CE_INITDISPTBL_POPULATE(ce_disp_table);
578 	ce_disp_inited = 1;
579 }
580 
581 /*
582  * Called by setcpudelay
583  */
584 void
cpu_init_tick_freq(void)585 cpu_init_tick_freq(void)
586 {
587 	/*
588 	 * For UltraSPARC III and beyond we want to use the
589 	 * system clock rate as the basis for low level timing,
590 	 * due to support of mixed speed CPUs and power managment.
591 	 */
592 	if (system_clock_freq == 0)
593 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
594 
595 	sys_tick_freq = system_clock_freq;
596 }
597 
598 #ifdef CHEETAHPLUS_ERRATUM_25
599 /*
600  * Tunables
601  */
602 int cheetah_bpe_off = 0;
603 int cheetah_sendmondo_recover = 1;
604 int cheetah_sendmondo_fullscan = 0;
605 int cheetah_sendmondo_recover_delay = 5;
606 
607 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
608 
609 /*
610  * Recovery Statistics
611  */
612 typedef struct cheetah_livelock_entry	{
613 	int cpuid;		/* fallen cpu */
614 	int buddy;		/* cpu that ran recovery */
615 	clock_t lbolt;		/* when recovery started */
616 	hrtime_t recovery_time;	/* time spent in recovery */
617 } cheetah_livelock_entry_t;
618 
619 #define	CHEETAH_LIVELOCK_NENTRY	32
620 
621 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
622 int cheetah_livelock_entry_nxt;
623 
624 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
625 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
626 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
627 		cheetah_livelock_entry_nxt = 0;				\
628 	}								\
629 }
630 
631 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
632 
633 struct {
634 	hrtime_t hrt;		/* maximum recovery time */
635 	int recovery;		/* recovered */
636 	int full_claimed;	/* maximum pages claimed in full recovery */
637 	int proc_entry;		/* attempted to claim TSB */
638 	int proc_tsb_scan;	/* tsb scanned */
639 	int proc_tsb_partscan;	/* tsb partially scanned */
640 	int proc_tsb_fullscan;	/* whole tsb scanned */
641 	int proc_claimed;	/* maximum pages claimed in tsb scan */
642 	int proc_user;		/* user thread */
643 	int proc_kernel;	/* kernel thread */
644 	int proc_onflt;		/* bad stack */
645 	int proc_cpu;		/* null cpu */
646 	int proc_thread;	/* null thread */
647 	int proc_proc;		/* null proc */
648 	int proc_as;		/* null as */
649 	int proc_hat;		/* null hat */
650 	int proc_hat_inval;	/* hat contents don't make sense */
651 	int proc_hat_busy;	/* hat is changing TSBs */
652 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
653 	int proc_cnum_bad;	/* cnum out of range */
654 	int proc_cnum;		/* last cnum processed */
655 	tte_t proc_tte;		/* last tte processed */
656 } cheetah_livelock_stat;
657 
658 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
659 
660 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
661 	cheetah_livelock_stat.item = value
662 
663 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
664 	if (value > cheetah_livelock_stat.item)		\
665 		cheetah_livelock_stat.item = value;	\
666 }
667 
668 /*
669  * Attempt to recover a cpu by claiming every cache line as saved
670  * in the TSB that the non-responsive cpu is using. Since we can't
671  * grab any adaptive lock, this is at best an attempt to do so. Because
672  * we don't grab any locks, we must operate under the protection of
673  * on_fault().
674  *
675  * Return 1 if cpuid could be recovered, 0 if failed.
676  */
677 int
mondo_recover_proc(uint16_t cpuid,int bn)678 mondo_recover_proc(uint16_t cpuid, int bn)
679 {
680 	label_t ljb;
681 	cpu_t *cp;
682 	kthread_t *t;
683 	proc_t *p;
684 	struct as *as;
685 	struct hat *hat;
686 	uint_t  cnum;
687 	struct tsb_info *tsbinfop;
688 	struct tsbe *tsbep;
689 	caddr_t tsbp;
690 	caddr_t end_tsbp;
691 	uint64_t paddr;
692 	uint64_t idsr;
693 	u_longlong_t pahi, palo;
694 	int pages_claimed = 0;
695 	tte_t tsbe_tte;
696 	int tried_kernel_tsb = 0;
697 	mmu_ctx_t *mmu_ctxp;
698 
699 	CHEETAH_LIVELOCK_STAT(proc_entry);
700 
701 	if (on_fault(&ljb)) {
702 		CHEETAH_LIVELOCK_STAT(proc_onflt);
703 		goto badstruct;
704 	}
705 
706 	if ((cp = cpu[cpuid]) == NULL) {
707 		CHEETAH_LIVELOCK_STAT(proc_cpu);
708 		goto badstruct;
709 	}
710 
711 	if ((t = cp->cpu_thread) == NULL) {
712 		CHEETAH_LIVELOCK_STAT(proc_thread);
713 		goto badstruct;
714 	}
715 
716 	if ((p = ttoproc(t)) == NULL) {
717 		CHEETAH_LIVELOCK_STAT(proc_proc);
718 		goto badstruct;
719 	}
720 
721 	if ((as = p->p_as) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_as);
723 		goto badstruct;
724 	}
725 
726 	if ((hat = as->a_hat) == NULL) {
727 		CHEETAH_LIVELOCK_STAT(proc_hat);
728 		goto badstruct;
729 	}
730 
731 	if (hat != ksfmmup) {
732 		CHEETAH_LIVELOCK_STAT(proc_user);
733 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
734 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
735 			goto badstruct;
736 		}
737 		tsbinfop = hat->sfmmu_tsb;
738 		if (tsbinfop == NULL) {
739 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
740 			goto badstruct;
741 		}
742 		tsbp = tsbinfop->tsb_va;
743 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
744 	} else {
745 		CHEETAH_LIVELOCK_STAT(proc_kernel);
746 		tsbinfop = NULL;
747 		tsbp = ktsb_base;
748 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
749 	}
750 
751 	/* Verify as */
752 	if (hat->sfmmu_as != as) {
753 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
754 		goto badstruct;
755 	}
756 
757 	mmu_ctxp = CPU_MMU_CTXP(cp);
758 	ASSERT(mmu_ctxp);
759 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
760 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
761 
762 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
763 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
764 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
765 		goto badstruct;
766 	}
767 
768 	do {
769 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
770 
771 		/*
772 		 * Skip TSBs being relocated.  This is important because
773 		 * we want to avoid the following deadlock scenario:
774 		 *
775 		 * 1) when we came in we set ourselves to "in recover" state.
776 		 * 2) when we try to touch TSB being relocated the mapping
777 		 *    will be in the suspended state so we'll spin waiting
778 		 *    for it to be unlocked.
779 		 * 3) when the CPU that holds the TSB mapping locked tries to
780 		 *    unlock it it will send a xtrap which will fail to xcall
781 		 *    us or the CPU we're trying to recover, and will in turn
782 		 *    enter the mondo code.
783 		 * 4) since we are still spinning on the locked mapping
784 		 *    no further progress will be made and the system will
785 		 *    inevitably hard hang.
786 		 *
787 		 * A TSB not being relocated can't begin being relocated
788 		 * while we're accessing it because we check
789 		 * sendmondo_in_recover before relocating TSBs.
790 		 */
791 		if (hat != ksfmmup &&
792 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
793 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
794 			goto next_tsbinfo;
795 		}
796 
797 		for (tsbep = (struct tsbe *)tsbp;
798 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
799 			tsbe_tte = tsbep->tte_data;
800 
801 			if (tsbe_tte.tte_val == 0) {
802 				/*
803 				 * Invalid tte
804 				 */
805 				continue;
806 			}
807 			if (tsbe_tte.tte_se) {
808 				/*
809 				 * Don't want device registers
810 				 */
811 				continue;
812 			}
813 			if (tsbe_tte.tte_cp == 0) {
814 				/*
815 				 * Must be cached in E$
816 				 */
817 				continue;
818 			}
819 			if (tsbep->tte_tag.tag_invalid != 0) {
820 				/*
821 				 * Invalid tag, ingnore this entry.
822 				 */
823 				continue;
824 			}
825 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
826 			idsr = getidsr();
827 			if ((idsr & (IDSR_NACK_BIT(bn) |
828 			    IDSR_BUSY_BIT(bn))) == 0) {
829 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
830 				goto done;
831 			}
832 			pahi = tsbe_tte.tte_pahi;
833 			palo = tsbe_tte.tte_palo;
834 			paddr = (uint64_t)((pahi << 32) |
835 			    (palo << MMU_PAGESHIFT));
836 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
837 			    CH_ECACHE_SUBBLK_SIZE);
838 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
839 				shipit(cpuid, bn);
840 			}
841 			pages_claimed++;
842 		}
843 next_tsbinfo:
844 		if (tsbinfop != NULL)
845 			tsbinfop = tsbinfop->tsb_next;
846 		if (tsbinfop != NULL) {
847 			tsbp = tsbinfop->tsb_va;
848 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
849 		} else if (tsbp == ktsb_base) {
850 			tried_kernel_tsb = 1;
851 		} else if (!tried_kernel_tsb) {
852 			tsbp = ktsb_base;
853 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
854 			hat = ksfmmup;
855 			tsbinfop = NULL;
856 		}
857 	} while (tsbinfop != NULL ||
858 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
859 
860 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
861 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
862 	no_fault();
863 	idsr = getidsr();
864 	if ((idsr & (IDSR_NACK_BIT(bn) |
865 	    IDSR_BUSY_BIT(bn))) == 0) {
866 		return (1);
867 	} else {
868 		return (0);
869 	}
870 
871 done:
872 	no_fault();
873 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
874 	return (1);
875 
876 badstruct:
877 	no_fault();
878 	return (0);
879 }
880 
881 /*
882  * Attempt to claim ownership, temporarily, of every cache line that a
883  * non-responsive cpu might be using.  This might kick that cpu out of
884  * this state.
885  *
886  * The return value indicates to the caller if we have exhausted all recovery
887  * techniques. If 1 is returned, it is useless to call this function again
888  * even for a different target CPU.
889  */
890 int
mondo_recover(uint16_t cpuid,int bn)891 mondo_recover(uint16_t cpuid, int bn)
892 {
893 	struct memseg *seg;
894 	uint64_t begin_pa, end_pa, cur_pa;
895 	hrtime_t begin_hrt, end_hrt;
896 	int retval = 0;
897 	int pages_claimed = 0;
898 	cheetah_livelock_entry_t *histp;
899 	uint64_t idsr;
900 
901 	if (atomic_cas_32(&sendmondo_in_recover, 0, 1) != 0) {
902 		/*
903 		 * Wait while recovery takes place
904 		 */
905 		while (sendmondo_in_recover) {
906 			drv_usecwait(1);
907 		}
908 		/*
909 		 * Assume we didn't claim the whole memory. If
910 		 * the target of this caller is not recovered,
911 		 * it will come back.
912 		 */
913 		return (retval);
914 	}
915 
916 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
917 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
918 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
919 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
920 
921 	begin_hrt = gethrtime_waitfree();
922 	/*
923 	 * First try to claim the lines in the TSB the target
924 	 * may have been using.
925 	 */
926 	if (mondo_recover_proc(cpuid, bn) == 1) {
927 		/*
928 		 * Didn't claim the whole memory
929 		 */
930 		goto done;
931 	}
932 
933 	/*
934 	 * We tried using the TSB. The target is still
935 	 * not recovered. Check if complete memory scan is
936 	 * enabled.
937 	 */
938 	if (cheetah_sendmondo_fullscan == 0) {
939 		/*
940 		 * Full memory scan is disabled.
941 		 */
942 		retval = 1;
943 		goto done;
944 	}
945 
946 	/*
947 	 * Try claiming the whole memory.
948 	 */
949 	for (seg = memsegs; seg; seg = seg->next) {
950 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
951 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
952 		for (cur_pa = begin_pa; cur_pa < end_pa;
953 		    cur_pa += MMU_PAGESIZE) {
954 			idsr = getidsr();
955 			if ((idsr & (IDSR_NACK_BIT(bn) |
956 			    IDSR_BUSY_BIT(bn))) == 0) {
957 				/*
958 				 * Didn't claim all memory
959 				 */
960 				goto done;
961 			}
962 			claimlines(cur_pa, MMU_PAGESIZE,
963 			    CH_ECACHE_SUBBLK_SIZE);
964 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
965 				shipit(cpuid, bn);
966 			}
967 			pages_claimed++;
968 		}
969 	}
970 
971 	/*
972 	 * We did all we could.
973 	 */
974 	retval = 1;
975 
976 done:
977 	/*
978 	 * Update statistics
979 	 */
980 	end_hrt = gethrtime_waitfree();
981 	CHEETAH_LIVELOCK_STAT(recovery);
982 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
983 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
984 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
985 	    (end_hrt -  begin_hrt));
986 
987 	while (atomic_cas_32(&sendmondo_in_recover, 1, 0) != 1)
988 		;
989 
990 	return (retval);
991 }
992 
993 /*
994  * This is called by the cyclic framework when this CPU becomes online
995  */
996 /*ARGSUSED*/
997 static void
cheetah_nudge_onln(void * arg,cpu_t * cpu,cyc_handler_t * hdlr,cyc_time_t * when)998 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
999 {
1000 
1001 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1002 	hdlr->cyh_level = CY_LOW_LEVEL;
1003 	hdlr->cyh_arg = NULL;
1004 
1005 	/*
1006 	 * Stagger the start time
1007 	 */
1008 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1009 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1010 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1011 	}
1012 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1013 }
1014 
1015 /*
1016  * Create a low level cyclic to send a xtrap to the next cpu online.
1017  * However, there's no need to have this running on a uniprocessor system.
1018  */
1019 static void
cheetah_nudge_init(void)1020 cheetah_nudge_init(void)
1021 {
1022 	cyc_omni_handler_t hdlr;
1023 
1024 	if (max_ncpus == 1) {
1025 		return;
1026 	}
1027 
1028 	hdlr.cyo_online = cheetah_nudge_onln;
1029 	hdlr.cyo_offline = NULL;
1030 	hdlr.cyo_arg = NULL;
1031 
1032 	mutex_enter(&cpu_lock);
1033 	(void) cyclic_add_omni(&hdlr);
1034 	mutex_exit(&cpu_lock);
1035 }
1036 
1037 /*
1038  * Cyclic handler to wake up buddy
1039  */
1040 void
cheetah_nudge_buddy(void)1041 cheetah_nudge_buddy(void)
1042 {
1043 	/*
1044 	 * Disable kernel preemption to protect the cpu list
1045 	 */
1046 	kpreempt_disable();
1047 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1048 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1049 		    0, 0);
1050 	}
1051 	kpreempt_enable();
1052 }
1053 
1054 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1055 
1056 #ifdef SEND_MONDO_STATS
1057 uint32_t x_one_stimes[64];
1058 uint32_t x_one_ltimes[16];
1059 uint32_t x_set_stimes[64];
1060 uint32_t x_set_ltimes[16];
1061 uint32_t x_set_cpus[NCPU];
1062 uint32_t x_nack_stimes[64];
1063 #endif
1064 
1065 /*
1066  * Note: A version of this function is used by the debugger via the KDI,
1067  * and must be kept in sync with this version.  Any changes made to this
1068  * function to support new chips or to accomodate errata must also be included
1069  * in the KDI-specific version.  See us3_kdi.c.
1070  */
1071 void
send_one_mondo(int cpuid)1072 send_one_mondo(int cpuid)
1073 {
1074 	int busy, nack;
1075 	uint64_t idsr, starttick, endtick, tick, lasttick;
1076 	uint64_t busymask;
1077 #ifdef	CHEETAHPLUS_ERRATUM_25
1078 	int recovered = 0;
1079 #endif
1080 
1081 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1082 	starttick = lasttick = gettick();
1083 	shipit(cpuid, 0);
1084 	endtick = starttick + xc_tick_limit;
1085 	busy = nack = 0;
1086 #if defined(JALAPENO) || defined(SERRANO)
1087 	/*
1088 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1089 	 * will be used for dispatching interrupt. For now, assume
1090 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1091 	 * issues with respect to BUSY/NACK pair usage.
1092 	 */
1093 	busymask  = IDSR_BUSY_BIT(cpuid);
1094 #else /* JALAPENO || SERRANO */
1095 	busymask = IDSR_BUSY;
1096 #endif /* JALAPENO || SERRANO */
1097 	for (;;) {
1098 		idsr = getidsr();
1099 		if (idsr == 0)
1100 			break;
1101 
1102 		tick = gettick();
1103 		/*
1104 		 * If there is a big jump between the current tick
1105 		 * count and lasttick, we have probably hit a break
1106 		 * point.  Adjust endtick accordingly to avoid panic.
1107 		 */
1108 		if (tick > (lasttick + xc_tick_jump_limit))
1109 			endtick += (tick - lasttick);
1110 		lasttick = tick;
1111 		if (tick > endtick) {
1112 			if (panic_quiesce)
1113 				return;
1114 #ifdef	CHEETAHPLUS_ERRATUM_25
1115 			if (cheetah_sendmondo_recover && recovered == 0) {
1116 				if (mondo_recover(cpuid, 0)) {
1117 					/*
1118 					 * We claimed the whole memory or
1119 					 * full scan is disabled.
1120 					 */
1121 					recovered++;
1122 				}
1123 				tick = gettick();
1124 				endtick = tick + xc_tick_limit;
1125 				lasttick = tick;
1126 				/*
1127 				 * Recheck idsr
1128 				 */
1129 				continue;
1130 			} else
1131 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1132 			{
1133 				cmn_err(CE_PANIC, "send mondo timeout "
1134 				    "(target 0x%x) [%d NACK %d BUSY]",
1135 				    cpuid, nack, busy);
1136 			}
1137 		}
1138 
1139 		if (idsr & busymask) {
1140 			busy++;
1141 			continue;
1142 		}
1143 		drv_usecwait(1);
1144 		shipit(cpuid, 0);
1145 		nack++;
1146 		busy = 0;
1147 	}
1148 #ifdef SEND_MONDO_STATS
1149 	{
1150 		int n = gettick() - starttick;
1151 		if (n < 8192)
1152 			x_one_stimes[n >> 7]++;
1153 		else
1154 			x_one_ltimes[(n >> 13) & 0xf]++;
1155 	}
1156 #endif
1157 }
1158 
1159 void
syncfpu(void)1160 syncfpu(void)
1161 {
1162 }
1163 
1164 /*
1165  * Return processor specific async error structure
1166  * size used.
1167  */
1168 int
cpu_aflt_size(void)1169 cpu_aflt_size(void)
1170 {
1171 	return (sizeof (ch_async_flt_t));
1172 }
1173 
1174 /*
1175  * Tunable to disable the checking of other cpu logout areas during panic for
1176  * potential syndrome 71 generating errors.
1177  */
1178 int enable_check_other_cpus_logout = 1;
1179 
1180 /*
1181  * Check other cpus logout area for potential synd 71 generating
1182  * errors.
1183  */
1184 static void
cpu_check_cpu_logout(int cpuid,caddr_t tpc,int tl,int ecc_type,ch_cpu_logout_t * clop)1185 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1186     ch_cpu_logout_t *clop)
1187 {
1188 	struct async_flt *aflt;
1189 	ch_async_flt_t ch_flt;
1190 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1191 
1192 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1193 		return;
1194 	}
1195 
1196 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1197 
1198 	t_afar = clop->clo_data.chd_afar;
1199 	t_afsr = clop->clo_data.chd_afsr;
1200 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1201 #if defined(SERRANO)
1202 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1203 #endif	/* SERRANO */
1204 
1205 	/*
1206 	 * In order to simplify code, we maintain this afsr_errs
1207 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1208 	 * sticky bits.
1209 	 */
1210 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1211 	    (t_afsr & C_AFSR_ALL_ERRS);
1212 
1213 	/* Setup the async fault structure */
1214 	aflt = (struct async_flt *)&ch_flt;
1215 	aflt->flt_id = gethrtime_waitfree();
1216 	ch_flt.afsr_ext = t_afsr_ext;
1217 	ch_flt.afsr_errs = t_afsr_errs;
1218 	aflt->flt_stat = t_afsr;
1219 	aflt->flt_addr = t_afar;
1220 	aflt->flt_bus_id = cpuid;
1221 	aflt->flt_inst = cpuid;
1222 	aflt->flt_pc = tpc;
1223 	aflt->flt_prot = AFLT_PROT_NONE;
1224 	aflt->flt_class = CPU_FAULT;
1225 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1226 	aflt->flt_tl = tl;
1227 	aflt->flt_status = ecc_type;
1228 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1229 
1230 	/*
1231 	 * Queue events on the async event queue, one event per error bit.
1232 	 * If no events are queued, queue an event to complain.
1233 	 */
1234 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1235 		ch_flt.flt_type = CPU_INV_AFSR;
1236 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1237 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1238 		    aflt->flt_panic);
1239 	}
1240 
1241 	/*
1242 	 * Zero out + invalidate CPU logout.
1243 	 */
1244 	bzero(clop, sizeof (ch_cpu_logout_t));
1245 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1246 }
1247 
1248 /*
1249  * Check the logout areas of all other cpus for unlogged errors.
1250  */
1251 static void
cpu_check_other_cpus_logout(void)1252 cpu_check_other_cpus_logout(void)
1253 {
1254 	int i, j;
1255 	processorid_t myid;
1256 	struct cpu *cp;
1257 	ch_err_tl1_data_t *cl1p;
1258 
1259 	myid = CPU->cpu_id;
1260 	for (i = 0; i < NCPU; i++) {
1261 		cp = cpu[i];
1262 
1263 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1264 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1265 			continue;
1266 		}
1267 
1268 		/*
1269 		 * Check each of the tl>0 logout areas
1270 		 */
1271 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1272 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1273 			if (cl1p->ch_err_tl1_flags == 0)
1274 				continue;
1275 
1276 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1277 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1278 		}
1279 
1280 		/*
1281 		 * Check each of the remaining logout areas
1282 		 */
1283 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1284 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1285 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1286 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1287 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1288 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1289 	}
1290 }
1291 
1292 /*
1293  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1294  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1295  * flush the error that caused the UCU/UCC, then again here at the end to
1296  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1297  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1298  * another Fast ECC trap.
1299  *
1300  * Cheetah+ also handles: TSCE: No additional processing required.
1301  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1302  *
1303  * Note that the p_clo_flags input is only valid in cases where the
1304  * cpu_private struct is not yet initialized (since that is the only
1305  * time that information cannot be obtained from the logout struct.)
1306  */
1307 /*ARGSUSED*/
1308 void
cpu_fast_ecc_error(struct regs * rp,ulong_t p_clo_flags)1309 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1310 {
1311 	ch_cpu_logout_t *clop;
1312 	uint64_t ceen, nceen;
1313 
1314 	/*
1315 	 * Get the CPU log out info. If we can't find our CPU private
1316 	 * pointer, then we will have to make due without any detailed
1317 	 * logout information.
1318 	 */
1319 	if (CPU_PRIVATE(CPU) == NULL) {
1320 		clop = NULL;
1321 		ceen = p_clo_flags & EN_REG_CEEN;
1322 		nceen = p_clo_flags & EN_REG_NCEEN;
1323 	} else {
1324 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1325 		ceen = clop->clo_flags & EN_REG_CEEN;
1326 		nceen = clop->clo_flags & EN_REG_NCEEN;
1327 	}
1328 
1329 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1330 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1331 }
1332 
1333 /*
1334  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1335  * ECC at TL>0.  Need to supply either a error register pointer or a
1336  * cpu logout structure pointer.
1337  */
1338 static void
cpu_log_fast_ecc_error(caddr_t tpc,int priv,int tl,uint64_t ceen,uint64_t nceen,ch_cpu_logout_t * clop)1339 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1340     uint64_t nceen, ch_cpu_logout_t *clop)
1341 {
1342 	struct async_flt *aflt;
1343 	ch_async_flt_t ch_flt;
1344 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1345 	char pr_reason[MAX_REASON_STRING];
1346 	ch_cpu_errors_t cpu_error_regs;
1347 
1348 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1349 	/*
1350 	 * If no cpu logout data, then we will have to make due without
1351 	 * any detailed logout information.
1352 	 */
1353 	if (clop == NULL) {
1354 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1355 		get_cpu_error_state(&cpu_error_regs);
1356 		set_cpu_error_state(&cpu_error_regs);
1357 		t_afar = cpu_error_regs.afar;
1358 		t_afsr = cpu_error_regs.afsr;
1359 		t_afsr_ext = cpu_error_regs.afsr_ext;
1360 #if defined(SERRANO)
1361 		ch_flt.afar2 = cpu_error_regs.afar2;
1362 #endif	/* SERRANO */
1363 	} else {
1364 		t_afar = clop->clo_data.chd_afar;
1365 		t_afsr = clop->clo_data.chd_afsr;
1366 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1367 #if defined(SERRANO)
1368 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1369 #endif	/* SERRANO */
1370 	}
1371 
1372 	/*
1373 	 * In order to simplify code, we maintain this afsr_errs
1374 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1375 	 * sticky bits.
1376 	 */
1377 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1378 	    (t_afsr & C_AFSR_ALL_ERRS);
1379 	pr_reason[0] = '\0';
1380 
1381 	/* Setup the async fault structure */
1382 	aflt = (struct async_flt *)&ch_flt;
1383 	aflt->flt_id = gethrtime_waitfree();
1384 	ch_flt.afsr_ext = t_afsr_ext;
1385 	ch_flt.afsr_errs = t_afsr_errs;
1386 	aflt->flt_stat = t_afsr;
1387 	aflt->flt_addr = t_afar;
1388 	aflt->flt_bus_id = getprocessorid();
1389 	aflt->flt_inst = CPU->cpu_id;
1390 	aflt->flt_pc = tpc;
1391 	aflt->flt_prot = AFLT_PROT_NONE;
1392 	aflt->flt_class = CPU_FAULT;
1393 	aflt->flt_priv = priv;
1394 	aflt->flt_tl = tl;
1395 	aflt->flt_status = ECC_F_TRAP;
1396 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1397 
1398 	/*
1399 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1400 	 * cmn_err messages out to the console.  The situation is a UCU (in
1401 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1402 	 * The messages for the UCU and WDU are enqueued and then pulled off
1403 	 * the async queue via softint and syslogd starts to process them
1404 	 * but doesn't get them to the console.  The UE causes a panic, but
1405 	 * since the UCU/WDU messages are already in transit, those aren't
1406 	 * on the async queue.  The hack is to check if we have a matching
1407 	 * WDU event for the UCU, and if it matches, we're more than likely
1408 	 * going to panic with a UE, unless we're under protection.  So, we
1409 	 * check to see if we got a matching WDU event and if we're under
1410 	 * protection.
1411 	 *
1412 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1413 	 * looks like this:
1414 	 *    UCU->WDU->UE
1415 	 * For Panther, it could look like either of these:
1416 	 *    UCU---->WDU->L3_WDU->UE
1417 	 *    L3_UCU->WDU->L3_WDU->UE
1418 	 */
1419 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1420 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1421 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1422 		get_cpu_error_state(&cpu_error_regs);
1423 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1424 			aflt->flt_panic |=
1425 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1426 			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1427 			    (cpu_error_regs.afar == t_afar));
1428 			aflt->flt_panic |= ((clop == NULL) &&
1429 			    (t_afsr_errs & C_AFSR_WDU) &&
1430 			    (t_afsr_errs & C_AFSR_L3_WDU));
1431 		} else {
1432 			aflt->flt_panic |=
1433 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1434 			    (cpu_error_regs.afar == t_afar));
1435 			aflt->flt_panic |= ((clop == NULL) &&
1436 			    (t_afsr_errs & C_AFSR_WDU));
1437 		}
1438 	}
1439 
1440 	/*
1441 	 * Queue events on the async event queue, one event per error bit.
1442 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1443 	 * queue an event to complain.
1444 	 */
1445 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1446 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1447 		ch_flt.flt_type = CPU_INV_AFSR;
1448 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1449 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1450 		    aflt->flt_panic);
1451 	}
1452 
1453 	/*
1454 	 * Zero out + invalidate CPU logout.
1455 	 */
1456 	if (clop) {
1457 		bzero(clop, sizeof (ch_cpu_logout_t));
1458 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1459 	}
1460 
1461 	/*
1462 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1463 	 * or disrupting errors have happened.  We do this because if a
1464 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1465 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1466 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1467 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1468 	 * deferred or disrupting error happening between checking the AFSR and
1469 	 * enabling NCEEN/CEEN.
1470 	 *
1471 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1472 	 * taken.
1473 	 */
1474 	set_error_enable(get_error_enable() | (nceen | ceen));
1475 	if (clear_errors(&ch_flt)) {
1476 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1477 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1478 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1479 		    NULL);
1480 	}
1481 
1482 	/*
1483 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1484 	 * be logged as part of the panic flow.
1485 	 */
1486 	if (aflt->flt_panic)
1487 		fm_panic("%sError(s)", pr_reason);
1488 
1489 	/*
1490 	 * Flushing the Ecache here gets the part of the trap handler that
1491 	 * is run at TL=1 out of the Ecache.
1492 	 */
1493 	cpu_flush_ecache();
1494 }
1495 
1496 /*
1497  * This is called via sys_trap from pil15_interrupt code if the
1498  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1499  * various ch_err_tl1_data structures for valid entries based on the bit
1500  * settings in the ch_err_tl1_flags entry of the structure.
1501  */
1502 /*ARGSUSED*/
1503 void
cpu_tl1_error(struct regs * rp,int panic)1504 cpu_tl1_error(struct regs *rp, int panic)
1505 {
1506 	ch_err_tl1_data_t *cl1p, cl1;
1507 	int i, ncl1ps;
1508 	uint64_t me_flags;
1509 	uint64_t ceen, nceen;
1510 
1511 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1512 		cl1p = &ch_err_tl1_data;
1513 		ncl1ps = 1;
1514 	} else if (CPU_PRIVATE(CPU) != NULL) {
1515 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1516 		ncl1ps = CH_ERR_TL1_TLMAX;
1517 	} else {
1518 		ncl1ps = 0;
1519 	}
1520 
1521 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1522 		if (cl1p->ch_err_tl1_flags == 0)
1523 			continue;
1524 
1525 		/*
1526 		 * Grab a copy of the logout data and invalidate
1527 		 * the logout area.
1528 		 */
1529 		cl1 = *cl1p;
1530 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1531 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1532 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1533 
1534 		/*
1535 		 * Log "first error" in ch_err_tl1_data.
1536 		 */
1537 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1538 			ceen = get_error_enable() & EN_REG_CEEN;
1539 			nceen = get_error_enable() & EN_REG_NCEEN;
1540 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1541 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1542 		}
1543 #if defined(CPU_IMP_L1_CACHE_PARITY)
1544 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1545 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1546 			    (caddr_t)cl1.ch_err_tl1_tpc);
1547 		}
1548 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1549 
1550 		/*
1551 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1552 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1553 		 * if the structure is busy, we just do the cache flushing
1554 		 * we have to do and then do the retry.  So the AFSR/AFAR
1555 		 * at this point *should* have some relevant info.  If there
1556 		 * are no valid errors in the AFSR, we'll assume they've
1557 		 * already been picked up and logged.  For I$/D$ parity,
1558 		 * we just log an event with an "Unknown" (NULL) TPC.
1559 		 */
1560 		if (me_flags & CH_ERR_FECC) {
1561 			ch_cpu_errors_t cpu_error_regs;
1562 			uint64_t t_afsr_errs;
1563 
1564 			/*
1565 			 * Get the error registers and see if there's
1566 			 * a pending error.  If not, don't bother
1567 			 * generating an "Invalid AFSR" error event.
1568 			 */
1569 			get_cpu_error_state(&cpu_error_regs);
1570 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1571 			    C_AFSR_EXT_ALL_ERRS) |
1572 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1573 			if (t_afsr_errs != 0) {
1574 				ceen = get_error_enable() & EN_REG_CEEN;
1575 				nceen = get_error_enable() & EN_REG_NCEEN;
1576 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1577 				    1, ceen, nceen, NULL);
1578 			}
1579 		}
1580 #if defined(CPU_IMP_L1_CACHE_PARITY)
1581 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1582 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1583 		}
1584 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1585 	}
1586 }
1587 
1588 /*
1589  * Called from Fast ECC TL>0 handler in case of fatal error.
1590  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1591  * but if we don't, we'll panic with something reasonable.
1592  */
1593 /*ARGSUSED*/
1594 void
cpu_tl1_err_panic(struct regs * rp,ulong_t flags)1595 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1596 {
1597 	cpu_tl1_error(rp, 1);
1598 	/*
1599 	 * Should never return, but just in case.
1600 	 */
1601 	fm_panic("Unsurvivable ECC Error at TL>0");
1602 }
1603 
1604 /*
1605  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1606  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1607  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1608  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1609  *
1610  * Cheetah+ also handles (No additional processing required):
1611  *    DUE, DTO, DBERR	(NCEEN controlled)
1612  *    THCE		(CEEN and ET_ECC_en controlled)
1613  *    TUE		(ET_ECC_en controlled)
1614  *
1615  * Panther further adds:
1616  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1617  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1618  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1619  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1620  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1621  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1622  *
1623  * Note that the p_clo_flags input is only valid in cases where the
1624  * cpu_private struct is not yet initialized (since that is the only
1625  * time that information cannot be obtained from the logout struct.)
1626  */
1627 /*ARGSUSED*/
1628 void
cpu_disrupting_error(struct regs * rp,ulong_t p_clo_flags)1629 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1630 {
1631 	struct async_flt *aflt;
1632 	ch_async_flt_t ch_flt;
1633 	char pr_reason[MAX_REASON_STRING];
1634 	ch_cpu_logout_t *clop;
1635 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1636 	ch_cpu_errors_t cpu_error_regs;
1637 
1638 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1639 	/*
1640 	 * Get the CPU log out info. If we can't find our CPU private
1641 	 * pointer, then we will have to make due without any detailed
1642 	 * logout information.
1643 	 */
1644 	if (CPU_PRIVATE(CPU) == NULL) {
1645 		clop = NULL;
1646 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1647 		get_cpu_error_state(&cpu_error_regs);
1648 		set_cpu_error_state(&cpu_error_regs);
1649 		t_afar = cpu_error_regs.afar;
1650 		t_afsr = cpu_error_regs.afsr;
1651 		t_afsr_ext = cpu_error_regs.afsr_ext;
1652 #if defined(SERRANO)
1653 		ch_flt.afar2 = cpu_error_regs.afar2;
1654 #endif	/* SERRANO */
1655 	} else {
1656 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1657 		t_afar = clop->clo_data.chd_afar;
1658 		t_afsr = clop->clo_data.chd_afsr;
1659 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1660 #if defined(SERRANO)
1661 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1662 #endif	/* SERRANO */
1663 	}
1664 
1665 	/*
1666 	 * In order to simplify code, we maintain this afsr_errs
1667 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1668 	 * sticky bits.
1669 	 */
1670 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1671 	    (t_afsr & C_AFSR_ALL_ERRS);
1672 
1673 	pr_reason[0] = '\0';
1674 	/* Setup the async fault structure */
1675 	aflt = (struct async_flt *)&ch_flt;
1676 	ch_flt.afsr_ext = t_afsr_ext;
1677 	ch_flt.afsr_errs = t_afsr_errs;
1678 	aflt->flt_stat = t_afsr;
1679 	aflt->flt_addr = t_afar;
1680 	aflt->flt_pc = (caddr_t)rp->r_pc;
1681 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1682 	aflt->flt_tl = 0;
1683 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1684 
1685 	/*
1686 	 * If this trap is a result of one of the errors not masked
1687 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1688 	 * indicate that a timeout is to be set later.
1689 	 */
1690 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1691 	    !aflt->flt_panic)
1692 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1693 	else
1694 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1695 
1696 	/*
1697 	 * log the CE and clean up
1698 	 */
1699 	cpu_log_and_clear_ce(&ch_flt);
1700 
1701 	/*
1702 	 * We re-enable CEEN (if required) and check if any disrupting errors
1703 	 * have happened.  We do this because if a disrupting error had occurred
1704 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1705 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1706 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1707 	 * of a error happening between checking the AFSR and enabling CEEN.
1708 	 */
1709 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1710 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1711 	if (clear_errors(&ch_flt)) {
1712 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1713 		    NULL);
1714 	}
1715 
1716 	/*
1717 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1718 	 * be logged as part of the panic flow.
1719 	 */
1720 	if (aflt->flt_panic)
1721 		fm_panic("%sError(s)", pr_reason);
1722 }
1723 
1724 /*
1725  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1726  * L3_EDU:BLD, TO, and BERR events.
1727  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1728  *
1729  * Cheetah+: No additional errors handled.
1730  *
1731  * Note that the p_clo_flags input is only valid in cases where the
1732  * cpu_private struct is not yet initialized (since that is the only
1733  * time that information cannot be obtained from the logout struct.)
1734  */
1735 /*ARGSUSED*/
1736 void
cpu_deferred_error(struct regs * rp,ulong_t p_clo_flags)1737 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1738 {
1739 	ushort_t ttype, tl;
1740 	ch_async_flt_t ch_flt;
1741 	struct async_flt *aflt;
1742 	int trampolined = 0;
1743 	char pr_reason[MAX_REASON_STRING];
1744 	ch_cpu_logout_t *clop;
1745 	uint64_t ceen, clo_flags;
1746 	uint64_t log_afsr;
1747 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1748 	ch_cpu_errors_t cpu_error_regs;
1749 	int expected = DDI_FM_ERR_UNEXPECTED;
1750 	ddi_acc_hdl_t *hp;
1751 
1752 	/*
1753 	 * We need to look at p_flag to determine if the thread detected an
1754 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1755 	 * because we just need a consistent snapshot and we know that everyone
1756 	 * else will store a consistent set of bits while holding p_lock.  We
1757 	 * don't have to worry about a race because SDOCORE is set once prior
1758 	 * to doing i/o from the process's address space and is never cleared.
1759 	 */
1760 	uint_t pflag = ttoproc(curthread)->p_flag;
1761 
1762 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1763 	/*
1764 	 * Get the CPU log out info. If we can't find our CPU private
1765 	 * pointer then we will have to make due without any detailed
1766 	 * logout information.
1767 	 */
1768 	if (CPU_PRIVATE(CPU) == NULL) {
1769 		clop = NULL;
1770 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1771 		get_cpu_error_state(&cpu_error_regs);
1772 		set_cpu_error_state(&cpu_error_regs);
1773 		t_afar = cpu_error_regs.afar;
1774 		t_afsr = cpu_error_regs.afsr;
1775 		t_afsr_ext = cpu_error_regs.afsr_ext;
1776 #if defined(SERRANO)
1777 		ch_flt.afar2 = cpu_error_regs.afar2;
1778 #endif	/* SERRANO */
1779 		clo_flags = p_clo_flags;
1780 	} else {
1781 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1782 		t_afar = clop->clo_data.chd_afar;
1783 		t_afsr = clop->clo_data.chd_afsr;
1784 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1785 #if defined(SERRANO)
1786 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1787 #endif	/* SERRANO */
1788 		clo_flags = clop->clo_flags;
1789 	}
1790 
1791 	/*
1792 	 * In order to simplify code, we maintain this afsr_errs
1793 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1794 	 * sticky bits.
1795 	 */
1796 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1797 	    (t_afsr & C_AFSR_ALL_ERRS);
1798 	pr_reason[0] = '\0';
1799 
1800 	/*
1801 	 * Grab information encoded into our clo_flags field.
1802 	 */
1803 	ceen = clo_flags & EN_REG_CEEN;
1804 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1805 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1806 
1807 	/*
1808 	 * handle the specific error
1809 	 */
1810 	aflt = (struct async_flt *)&ch_flt;
1811 	aflt->flt_id = gethrtime_waitfree();
1812 	aflt->flt_bus_id = getprocessorid();
1813 	aflt->flt_inst = CPU->cpu_id;
1814 	ch_flt.afsr_ext = t_afsr_ext;
1815 	ch_flt.afsr_errs = t_afsr_errs;
1816 	aflt->flt_stat = t_afsr;
1817 	aflt->flt_addr = t_afar;
1818 	aflt->flt_pc = (caddr_t)rp->r_pc;
1819 	aflt->flt_prot = AFLT_PROT_NONE;
1820 	aflt->flt_class = CPU_FAULT;
1821 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1822 	aflt->flt_tl = (uchar_t)tl;
1823 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1824 	    C_AFSR_PANIC(t_afsr_errs));
1825 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1826 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1827 
1828 	/*
1829 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1830 	 * see if we were executing in the kernel under on_trap() or t_lofault
1831 	 * protection.  If so, modify the saved registers so that we return
1832 	 * from the trap to the appropriate trampoline routine.
1833 	 */
1834 	if (aflt->flt_priv && tl == 0) {
1835 		if (curthread->t_ontrap != NULL) {
1836 			on_trap_data_t *otp = curthread->t_ontrap;
1837 
1838 			if (otp->ot_prot & OT_DATA_EC) {
1839 				aflt->flt_prot = AFLT_PROT_EC;
1840 				otp->ot_trap |= OT_DATA_EC;
1841 				rp->r_pc = otp->ot_trampoline;
1842 				rp->r_npc = rp->r_pc + 4;
1843 				trampolined = 1;
1844 			}
1845 
1846 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1847 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1848 				aflt->flt_prot = AFLT_PROT_ACCESS;
1849 				otp->ot_trap |= OT_DATA_ACCESS;
1850 				rp->r_pc = otp->ot_trampoline;
1851 				rp->r_npc = rp->r_pc + 4;
1852 				trampolined = 1;
1853 				/*
1854 				 * for peeks and caut_gets errors are expected
1855 				 */
1856 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1857 				if (!hp)
1858 					expected = DDI_FM_ERR_PEEK;
1859 				else if (hp->ah_acc.devacc_attr_access ==
1860 				    DDI_CAUTIOUS_ACC)
1861 					expected = DDI_FM_ERR_EXPECTED;
1862 			}
1863 
1864 		} else if (curthread->t_lofault) {
1865 			aflt->flt_prot = AFLT_PROT_COPY;
1866 			rp->r_g1 = EFAULT;
1867 			rp->r_pc = curthread->t_lofault;
1868 			rp->r_npc = rp->r_pc + 4;
1869 			trampolined = 1;
1870 		}
1871 	}
1872 
1873 	/*
1874 	 * If we're in user mode or we're doing a protected copy, we either
1875 	 * want the ASTON code below to send a signal to the user process
1876 	 * or we want to panic if aft_panic is set.
1877 	 *
1878 	 * If we're in privileged mode and we're not doing a copy, then we
1879 	 * need to check if we've trampolined.  If we haven't trampolined,
1880 	 * we should panic.
1881 	 */
1882 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1883 		if (t_afsr_errs &
1884 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1885 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1886 			aflt->flt_panic |= aft_panic;
1887 	} else if (!trampolined) {
1888 			aflt->flt_panic = 1;
1889 	}
1890 
1891 	/*
1892 	 * If we've trampolined due to a privileged TO or BERR, or if an
1893 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1894 	 * event for that TO or BERR.  Queue all other events (if any) besides
1895 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1896 	 * ignore the number of events queued.  If we haven't trampolined due
1897 	 * to a TO or BERR, just enqueue events normally.
1898 	 */
1899 	log_afsr = t_afsr_errs;
1900 	if (trampolined) {
1901 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1902 	} else if (!aflt->flt_priv) {
1903 		/*
1904 		 * User mode, suppress messages if
1905 		 * cpu_berr_to_verbose is not set.
1906 		 */
1907 		if (!cpu_berr_to_verbose)
1908 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1909 	}
1910 
1911 	/*
1912 	 * Log any errors that occurred
1913 	 */
1914 	if (((log_afsr &
1915 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1916 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1917 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1918 		ch_flt.flt_type = CPU_INV_AFSR;
1919 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1920 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1921 		    aflt->flt_panic);
1922 	}
1923 
1924 	/*
1925 	 * Zero out + invalidate CPU logout.
1926 	 */
1927 	if (clop) {
1928 		bzero(clop, sizeof (ch_cpu_logout_t));
1929 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1930 	}
1931 
1932 #if defined(JALAPENO) || defined(SERRANO)
1933 	/*
1934 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1935 	 * IO errors that may have resulted in this trap.
1936 	 */
1937 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1938 		cpu_run_bus_error_handlers(aflt, expected);
1939 	}
1940 
1941 	/*
1942 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1943 	 * line from the Ecache.  We also need to query the bus nexus for
1944 	 * fatal errors.  Attempts to do diagnostic read on caches may
1945 	 * introduce more errors (especially when the module is bad).
1946 	 */
1947 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1948 		/*
1949 		 * Ask our bus nexus friends if they have any fatal errors.  If
1950 		 * so, they will log appropriate error messages.
1951 		 */
1952 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1953 			aflt->flt_panic = 1;
1954 
1955 		/*
1956 		 * We got a UE or RUE and are panicking, save the fault PA in
1957 		 * a known location so that the platform specific panic code
1958 		 * can check for copyback errors.
1959 		 */
1960 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1961 			panic_aflt = *aflt;
1962 		}
1963 	}
1964 
1965 	/*
1966 	 * Flush Ecache line or entire Ecache
1967 	 */
1968 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1969 		cpu_error_ecache_flush(&ch_flt);
1970 #else /* JALAPENO || SERRANO */
1971 	/*
1972 	 * UE/BERR/TO: Call our bus nexus friends to check for
1973 	 * IO errors that may have resulted in this trap.
1974 	 */
1975 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1976 		cpu_run_bus_error_handlers(aflt, expected);
1977 	}
1978 
1979 	/*
1980 	 * UE: If the UE is in memory, we need to flush the bad
1981 	 * line from the Ecache.  We also need to query the bus nexus for
1982 	 * fatal errors.  Attempts to do diagnostic read on caches may
1983 	 * introduce more errors (especially when the module is bad).
1984 	 */
1985 	if (t_afsr & C_AFSR_UE) {
1986 		/*
1987 		 * Ask our legacy bus nexus friends if they have any fatal
1988 		 * errors.  If so, they will log appropriate error messages.
1989 		 */
1990 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1991 			aflt->flt_panic = 1;
1992 
1993 		/*
1994 		 * We got a UE and are panicking, save the fault PA in a known
1995 		 * location so that the platform specific panic code can check
1996 		 * for copyback errors.
1997 		 */
1998 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1999 			panic_aflt = *aflt;
2000 		}
2001 	}
2002 
2003 	/*
2004 	 * Flush Ecache line or entire Ecache
2005 	 */
2006 	if (t_afsr_errs &
2007 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2008 		cpu_error_ecache_flush(&ch_flt);
2009 #endif /* JALAPENO || SERRANO */
2010 
2011 	/*
2012 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2013 	 * or disrupting errors have happened.  We do this because if a
2014 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2015 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2016 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2017 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2018 	 * deferred or disrupting error happening between checking the AFSR and
2019 	 * enabling NCEEN/CEEN.
2020 	 *
2021 	 * Note: CEEN reenabled only if it was on when trap taken.
2022 	 */
2023 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2024 	if (clear_errors(&ch_flt)) {
2025 		/*
2026 		 * Check for secondary errors, and avoid panicking if we
2027 		 * have them
2028 		 */
2029 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2030 		    t_afar) == 0) {
2031 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2032 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2033 		}
2034 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2035 		    NULL);
2036 	}
2037 
2038 	/*
2039 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2040 	 * be logged as part of the panic flow.
2041 	 */
2042 	if (aflt->flt_panic)
2043 		fm_panic("%sError(s)", pr_reason);
2044 
2045 	/*
2046 	 * If we queued an error and we are going to return from the trap and
2047 	 * the error was in user mode or inside of a copy routine, set AST flag
2048 	 * so the queue will be drained before returning to user mode.  The
2049 	 * AST processing will also act on our failure policy.
2050 	 */
2051 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2052 		int pcb_flag = 0;
2053 
2054 		if (t_afsr_errs &
2055 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2056 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2057 			pcb_flag |= ASYNC_HWERR;
2058 
2059 		if (t_afsr & C_AFSR_BERR)
2060 			pcb_flag |= ASYNC_BERR;
2061 
2062 		if (t_afsr & C_AFSR_TO)
2063 			pcb_flag |= ASYNC_BTO;
2064 
2065 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2066 		aston(curthread);
2067 	}
2068 }
2069 
2070 #if defined(CPU_IMP_L1_CACHE_PARITY)
2071 /*
2072  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2073  *
2074  * For Panther, P$ data parity errors during floating point load hits
2075  * are also detected (reported as TT 0x71) and handled by this trap
2076  * handler.
2077  *
2078  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2079  * is available.
2080  */
2081 /*ARGSUSED*/
2082 void
cpu_parity_error(struct regs * rp,uint_t flags,caddr_t tpc)2083 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2084 {
2085 	ch_async_flt_t ch_flt;
2086 	struct async_flt *aflt;
2087 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2088 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2089 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2090 	char *error_class;
2091 	int index, way, word;
2092 	ch_dc_data_t tmp_dcp;
2093 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
2094 	uint64_t parity_bits, pbits;
2095 	/* The parity bit array corresponds to the result of summing two bits */
2096 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
2097 
2098 	/*
2099 	 * Log the error.
2100 	 * For icache parity errors the fault address is the trap PC.
2101 	 * For dcache/pcache parity errors the instruction would have to
2102 	 * be decoded to determine the address and that isn't possible
2103 	 * at high PIL.
2104 	 */
2105 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2106 	aflt = (struct async_flt *)&ch_flt;
2107 	aflt->flt_id = gethrtime_waitfree();
2108 	aflt->flt_bus_id = getprocessorid();
2109 	aflt->flt_inst = CPU->cpu_id;
2110 	aflt->flt_pc = tpc;
2111 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2112 	aflt->flt_prot = AFLT_PROT_NONE;
2113 	aflt->flt_class = CPU_FAULT;
2114 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2115 	aflt->flt_tl = tl;
2116 	aflt->flt_panic = panic;
2117 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2118 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2119 
2120 	if (iparity) {
2121 		cpu_icache_parity_info(&ch_flt);
2122 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2123 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2124 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2125 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2126 		else
2127 			error_class = FM_EREPORT_CPU_USIII_IPE;
2128 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2129 	} else {
2130 		cpu_dcache_parity_info(&ch_flt);
2131 		if (ch_flt.parity_data.dpe.cpl_off != -1) {
2132 			/*
2133 			 * If not at TL 0 and running on a Jalapeno processor,
2134 			 * then process as a true ddspe.  A true
2135 			 * ddspe error can only occur if the way == 0
2136 			 */
2137 			way = ch_flt.parity_data.dpe.cpl_way;
2138 			if ((tl == 0) && (way != 0) &&
2139 			    IS_JALAPENO(cpunodes[CPU->cpu_id].implementation)) {
2140 				for (index = 0; index < dc_set_size;
2141 				    index += dcache_linesize) {
2142 					get_dcache_dtag(index + way *
2143 					    dc_set_size,
2144 					    (uint64_t *)&tmp_dcp);
2145 					/*
2146 					 * Check data array for even parity.
2147 					 * The 8 parity bits are grouped into
2148 					 * 4 pairs each of which covers a 64-bit
2149 					 * word.  The endianness is reversed
2150 					 * -- the low-order parity bits cover
2151 					 *  the high-order data words.
2152 					 */
2153 					parity_bits = tmp_dcp.dc_utag >> 8;
2154 					for (word = 0; word < 4; word++) {
2155 						pbits = (parity_bits >>
2156 						    (6 - word * 2)) & 3;
2157 						if (((popc64(
2158 						    tmp_dcp.dc_data[word]) +
2159 						    parity_bits_popc[pbits]) &
2160 						    1) && (tmp_dcp.dc_tag &
2161 						    VA13)) {
2162 							/* cleanup */
2163 							correct_dcache_parity(
2164 							    dcache_size,
2165 							    dcache_linesize);
2166 							if (cache_boot_state &
2167 							    DCU_DC) {
2168 								flush_dcache();
2169 							}
2170 
2171 							set_dcu(get_dcu() |
2172 							    cache_boot_state);
2173 							return;
2174 						}
2175 					}
2176 				}
2177 			} /* (tl == 0) && (way != 0) && IS JALAPENO */
2178 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2179 		} else if (ch_flt.parity_data.dpe.cpl_way != -1)
2180 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2181 		else
2182 			error_class = FM_EREPORT_CPU_USIII_DPE;
2183 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2184 		/*
2185 		 * For panther we also need to check the P$ for parity errors.
2186 		 */
2187 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2188 			cpu_pcache_parity_info(&ch_flt);
2189 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2190 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2191 				aflt->flt_payload =
2192 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2193 			}
2194 		}
2195 	}
2196 
2197 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2198 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2199 
2200 	if (iparity) {
2201 		/*
2202 		 * Invalidate entire I$.
2203 		 * This is required due to the use of diagnostic ASI
2204 		 * accesses that may result in a loss of I$ coherency.
2205 		 */
2206 		if (cache_boot_state & DCU_IC) {
2207 			flush_icache();
2208 		}
2209 		/*
2210 		 * According to section P.3.1 of the Panther PRM, we
2211 		 * need to do a little more for recovery on those
2212 		 * CPUs after encountering an I$ parity error.
2213 		 */
2214 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2215 			flush_ipb();
2216 			correct_dcache_parity(dcache_size,
2217 			    dcache_linesize);
2218 			flush_pcache();
2219 		}
2220 	} else {
2221 		/*
2222 		 * Since the valid bit is ignored when checking parity the
2223 		 * D$ data and tag must also be corrected.  Set D$ data bits
2224 		 * to zero and set utag to 0, 1, 2, 3.
2225 		 */
2226 		correct_dcache_parity(dcache_size, dcache_linesize);
2227 
2228 		/*
2229 		 * According to section P.3.3 of the Panther PRM, we
2230 		 * need to do a little more for recovery on those
2231 		 * CPUs after encountering a D$ or P$ parity error.
2232 		 *
2233 		 * As far as clearing P$ parity errors, it is enough to
2234 		 * simply invalidate all entries in the P$ since P$ parity
2235 		 * error traps are only generated for floating point load
2236 		 * hits.
2237 		 */
2238 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2239 			flush_icache();
2240 			flush_ipb();
2241 			flush_pcache();
2242 		}
2243 	}
2244 
2245 	/*
2246 	 * Invalidate entire D$ if it was enabled.
2247 	 * This is done to avoid stale data in the D$ which might
2248 	 * occur with the D$ disabled and the trap handler doing
2249 	 * stores affecting lines already in the D$.
2250 	 */
2251 	if (cache_boot_state & DCU_DC) {
2252 		flush_dcache();
2253 	}
2254 
2255 	/*
2256 	 * Restore caches to their bootup state.
2257 	 */
2258 	set_dcu(get_dcu() | cache_boot_state);
2259 
2260 	/*
2261 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2262 	 * be logged as part of the panic flow.
2263 	 */
2264 	if (aflt->flt_panic)
2265 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2266 
2267 	/*
2268 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2269 	 * the chance of getting an unrecoverable Fast ECC error.  This
2270 	 * flush will evict the part of the parity trap handler that is run
2271 	 * at TL>1.
2272 	 */
2273 	if (tl) {
2274 		cpu_flush_ecache();
2275 	}
2276 }
2277 
2278 /*
2279  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2280  * to indicate which portions of the captured data should be in the ereport.
2281  */
2282 void
cpu_async_log_ic_parity_err(ch_async_flt_t * ch_flt)2283 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2284 {
2285 	int way = ch_flt->parity_data.ipe.cpl_way;
2286 	int offset = ch_flt->parity_data.ipe.cpl_off;
2287 	int tag_index;
2288 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2289 
2290 
2291 	if ((offset != -1) || (way != -1)) {
2292 		/*
2293 		 * Parity error in I$ tag or data
2294 		 */
2295 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2296 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2297 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2298 			    PN_ICIDX_TO_WAY(tag_index);
2299 		else
2300 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2301 			    CH_ICIDX_TO_WAY(tag_index);
2302 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2303 		    IC_LOGFLAG_MAGIC;
2304 	} else {
2305 		/*
2306 		 * Parity error was not identified.
2307 		 * Log tags and data for all ways.
2308 		 */
2309 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2310 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2311 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2312 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2313 				    PN_ICIDX_TO_WAY(tag_index);
2314 			else
2315 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2316 				    CH_ICIDX_TO_WAY(tag_index);
2317 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2318 			    IC_LOGFLAG_MAGIC;
2319 		}
2320 	}
2321 }
2322 
2323 /*
2324  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2325  * to indicate which portions of the captured data should be in the ereport.
2326  */
2327 void
cpu_async_log_dc_parity_err(ch_async_flt_t * ch_flt)2328 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2329 {
2330 	int way = ch_flt->parity_data.dpe.cpl_way;
2331 	int offset = ch_flt->parity_data.dpe.cpl_off;
2332 	int tag_index;
2333 
2334 	if (offset != -1) {
2335 		/*
2336 		 * Parity error in D$ or P$ data array.
2337 		 *
2338 		 * First check to see whether the parity error is in D$ or P$
2339 		 * since P$ data parity errors are reported in Panther using
2340 		 * the same trap.
2341 		 */
2342 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2343 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2344 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2345 			    CH_PCIDX_TO_WAY(tag_index);
2346 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2347 			    PC_LOGFLAG_MAGIC;
2348 		} else {
2349 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2350 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2351 			    CH_DCIDX_TO_WAY(tag_index);
2352 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2353 			    DC_LOGFLAG_MAGIC;
2354 		}
2355 	} else if (way != -1) {
2356 		/*
2357 		 * Parity error in D$ tag.
2358 		 */
2359 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2360 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2361 		    CH_DCIDX_TO_WAY(tag_index);
2362 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2363 		    DC_LOGFLAG_MAGIC;
2364 	}
2365 }
2366 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2367 
2368 /*
2369  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2370  * post-process CPU events that are dequeued.  As such, it can be invoked
2371  * from softint context, from AST processing in the trap() flow, or from the
2372  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2373  * Historically this entry point was used to log the actual cmn_err(9F) text;
2374  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2375  * With FMA this function now also returns a flag which indicates to the
2376  * caller whether the ereport should be posted (1) or suppressed (0).
2377  */
2378 static int
cpu_async_log_err(void * flt,errorq_elem_t * eqep)2379 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2380 {
2381 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2382 	struct async_flt *aflt = (struct async_flt *)flt;
2383 	uint64_t errors;
2384 	extern void memscrub_induced_error(void);
2385 
2386 	switch (ch_flt->flt_type) {
2387 	case CPU_INV_AFSR:
2388 		/*
2389 		 * If it is a disrupting trap and the AFSR is zero, then
2390 		 * the event has probably already been noted. Do not post
2391 		 * an ereport.
2392 		 */
2393 		if ((aflt->flt_status & ECC_C_TRAP) &&
2394 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2395 			return (0);
2396 		else
2397 			return (1);
2398 	case CPU_TO:
2399 	case CPU_BERR:
2400 	case CPU_FATAL:
2401 	case CPU_FPUERR:
2402 		return (1);
2403 
2404 	case CPU_UE_ECACHE_RETIRE:
2405 		cpu_log_err(aflt);
2406 		cpu_page_retire(ch_flt);
2407 		return (1);
2408 
2409 	/*
2410 	 * Cases where we may want to suppress logging or perform
2411 	 * extended diagnostics.
2412 	 */
2413 	case CPU_CE:
2414 	case CPU_EMC:
2415 		/*
2416 		 * We want to skip logging and further classification
2417 		 * only if ALL the following conditions are true:
2418 		 *
2419 		 *	1. There is only one error
2420 		 *	2. That error is a correctable memory error
2421 		 *	3. The error is caused by the memory scrubber (in
2422 		 *	   which case the error will have occurred under
2423 		 *	   on_trap protection)
2424 		 *	4. The error is on a retired page
2425 		 *
2426 		 * Note: AFLT_PROT_EC is used places other than the memory
2427 		 * scrubber.  However, none of those errors should occur
2428 		 * on a retired page.
2429 		 */
2430 		if ((ch_flt->afsr_errs &
2431 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2432 		    aflt->flt_prot == AFLT_PROT_EC) {
2433 
2434 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2435 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2436 
2437 				/*
2438 				 * Since we're skipping logging, we'll need
2439 				 * to schedule the re-enabling of CEEN
2440 				 */
2441 				(void) timeout(cpu_delayed_check_ce_errors,
2442 				    (void *)(uintptr_t)aflt->flt_inst,
2443 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2444 				    * MICROSEC));
2445 				}
2446 
2447 				/*
2448 				 * Inform memscrubber - scrubbing induced
2449 				 * CE on a retired page.
2450 				 */
2451 				memscrub_induced_error();
2452 				return (0);
2453 			}
2454 		}
2455 
2456 		/*
2457 		 * Perform/schedule further classification actions, but
2458 		 * only if the page is healthy (we don't want bad
2459 		 * pages inducing too much diagnostic activity).  If we could
2460 		 * not find a page pointer then we also skip this.  If
2461 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2462 		 * to copy and recirculate the event (for further diagnostics)
2463 		 * and we should not proceed to log it here.
2464 		 *
2465 		 * This must be the last step here before the cpu_log_err()
2466 		 * below - if an event recirculates cpu_ce_log_err() will
2467 		 * not call the current function but just proceed directly
2468 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2469 		 *
2470 		 * Note: Check cpu_impl_async_log_err if changing this
2471 		 */
2472 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2473 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2474 			    CE_XDIAG_SKIP_NOPP);
2475 		} else {
2476 			if (errors != PR_OK) {
2477 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2478 				    CE_XDIAG_SKIP_PAGEDET);
2479 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2480 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2481 				return (0);
2482 			}
2483 		}
2484 		/*FALLTHRU*/
2485 
2486 	/*
2487 	 * Cases where we just want to report the error and continue.
2488 	 */
2489 	case CPU_CE_ECACHE:
2490 	case CPU_UE_ECACHE:
2491 	case CPU_IV:
2492 	case CPU_ORPH:
2493 		cpu_log_err(aflt);
2494 		return (1);
2495 
2496 	/*
2497 	 * Cases where we want to fall through to handle panicking.
2498 	 */
2499 	case CPU_UE:
2500 		/*
2501 		 * We want to skip logging in the same conditions as the
2502 		 * CE case.  In addition, we want to make sure we're not
2503 		 * panicking.
2504 		 */
2505 		if (!panicstr && (ch_flt->afsr_errs &
2506 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2507 		    aflt->flt_prot == AFLT_PROT_EC) {
2508 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2509 				/* Zero the address to clear the error */
2510 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2511 				/*
2512 				 * Inform memscrubber - scrubbing induced
2513 				 * UE on a retired page.
2514 				 */
2515 				memscrub_induced_error();
2516 				return (0);
2517 			}
2518 		}
2519 		cpu_log_err(aflt);
2520 		break;
2521 
2522 	default:
2523 		/*
2524 		 * If the us3_common.c code doesn't know the flt_type, it may
2525 		 * be an implementation-specific code.  Call into the impldep
2526 		 * backend to find out what to do: if it tells us to continue,
2527 		 * break and handle as if falling through from a UE; if not,
2528 		 * the impldep backend has handled the error and we're done.
2529 		 */
2530 		switch (cpu_impl_async_log_err(flt, eqep)) {
2531 		case CH_ASYNC_LOG_DONE:
2532 			return (1);
2533 		case CH_ASYNC_LOG_RECIRC:
2534 			return (0);
2535 		case CH_ASYNC_LOG_CONTINUE:
2536 			break; /* continue on to handle UE-like error */
2537 		default:
2538 			cmn_err(CE_WARN, "discarding error 0x%p with "
2539 			    "invalid fault type (0x%x)",
2540 			    (void *)aflt, ch_flt->flt_type);
2541 			return (0);
2542 		}
2543 	}
2544 
2545 	/* ... fall through from the UE case */
2546 
2547 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2548 		if (!panicstr) {
2549 			cpu_page_retire(ch_flt);
2550 		} else {
2551 			/*
2552 			 * Clear UEs on panic so that we don't
2553 			 * get haunted by them during panic or
2554 			 * after reboot
2555 			 */
2556 			cpu_clearphys(aflt);
2557 			(void) clear_errors(NULL);
2558 		}
2559 	}
2560 
2561 	return (1);
2562 }
2563 
2564 /*
2565  * Retire the bad page that may contain the flushed error.
2566  */
2567 void
cpu_page_retire(ch_async_flt_t * ch_flt)2568 cpu_page_retire(ch_async_flt_t *ch_flt)
2569 {
2570 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2571 	(void) page_retire(aflt->flt_addr, PR_UE);
2572 }
2573 
2574 /*
2575  * Return true if the error specified in the AFSR indicates
2576  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2577  * for Panther, none for Jalapeno/Serrano).
2578  */
2579 /* ARGSUSED */
2580 static int
cpu_error_is_ecache_data(int cpuid,uint64_t t_afsr)2581 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2582 {
2583 #if defined(JALAPENO) || defined(SERRANO)
2584 	return (0);
2585 #elif defined(CHEETAH_PLUS)
2586 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2587 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2588 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2589 #else	/* CHEETAH_PLUS */
2590 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2591 #endif
2592 }
2593 
2594 /*
2595  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2596  * generic event post-processing for correctable and uncorrectable memory,
2597  * E$, and MTag errors.  Historically this entry point was used to log bits of
2598  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2599  * converted into an ereport.  In addition, it transmits the error to any
2600  * platform-specific service-processor FRU logging routines, if available.
2601  */
2602 void
cpu_log_err(struct async_flt * aflt)2603 cpu_log_err(struct async_flt *aflt)
2604 {
2605 	char unum[UNUM_NAMLEN];
2606 	int synd_status, synd_code, afar_status;
2607 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2608 
2609 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2610 		aflt->flt_status |= ECC_ECACHE;
2611 	else
2612 		aflt->flt_status &= ~ECC_ECACHE;
2613 	/*
2614 	 * Determine syndrome status.
2615 	 */
2616 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2617 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2618 
2619 	/*
2620 	 * Determine afar status.
2621 	 */
2622 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2623 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2624 		    ch_flt->flt_bit);
2625 	else
2626 		afar_status = AFLT_STAT_INVALID;
2627 
2628 	synd_code = synd_to_synd_code(synd_status,
2629 	    aflt->flt_synd, ch_flt->flt_bit);
2630 
2631 	/*
2632 	 * If afar status is not invalid do a unum lookup.
2633 	 */
2634 	if (afar_status != AFLT_STAT_INVALID) {
2635 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2636 	} else {
2637 		unum[0] = '\0';
2638 	}
2639 
2640 	/*
2641 	 * Do not send the fruid message (plat_ecc_error_data_t)
2642 	 * to the SC if it can handle the enhanced error information
2643 	 * (plat_ecc_error2_data_t) or when the tunable
2644 	 * ecc_log_fruid_enable is set to 0.
2645 	 */
2646 
2647 	if (&plat_ecc_capability_sc_get &&
2648 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2649 		if (&plat_log_fruid_error)
2650 			plat_log_fruid_error(synd_code, aflt, unum,
2651 			    ch_flt->flt_bit);
2652 	}
2653 
2654 	if (aflt->flt_func != NULL)
2655 		aflt->flt_func(aflt, unum);
2656 
2657 	if (afar_status != AFLT_STAT_INVALID)
2658 		cpu_log_diag_info(ch_flt);
2659 
2660 	/*
2661 	 * If we have a CEEN error , we do not reenable CEEN until after
2662 	 * we exit the trap handler. Otherwise, another error may
2663 	 * occur causing the handler to be entered recursively.
2664 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2665 	 * to try and ensure that the CPU makes progress in the face
2666 	 * of a CE storm.
2667 	 */
2668 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2669 		(void) timeout(cpu_delayed_check_ce_errors,
2670 		    (void *)(uintptr_t)aflt->flt_inst,
2671 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2672 	}
2673 }
2674 
2675 /*
2676  * Invoked by error_init() early in startup and therefore before
2677  * startup_errorq() is called to drain any error Q -
2678  *
2679  * startup()
2680  *   startup_end()
2681  *     error_init()
2682  *       cpu_error_init()
2683  * errorq_init()
2684  *   errorq_drain()
2685  * start_other_cpus()
2686  *
2687  * The purpose of this routine is to create error-related taskqs.  Taskqs
2688  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2689  * context.
2690  */
2691 void
cpu_error_init(int items)2692 cpu_error_init(int items)
2693 {
2694 	/*
2695 	 * Create taskq(s) to reenable CE
2696 	 */
2697 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2698 	    items, items, TASKQ_PREPOPULATE);
2699 }
2700 
2701 void
cpu_ce_log_err(struct async_flt * aflt,errorq_elem_t * eqep)2702 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2703 {
2704 	char unum[UNUM_NAMLEN];
2705 	int len;
2706 
2707 	switch (aflt->flt_class) {
2708 	case CPU_FAULT:
2709 		cpu_ereport_init(aflt);
2710 		if (cpu_async_log_err(aflt, eqep))
2711 			cpu_ereport_post(aflt);
2712 		break;
2713 
2714 	case BUS_FAULT:
2715 		if (aflt->flt_func != NULL) {
2716 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2717 			    unum, UNUM_NAMLEN, &len);
2718 			aflt->flt_func(aflt, unum);
2719 		}
2720 		break;
2721 
2722 	case RECIRC_CPU_FAULT:
2723 		aflt->flt_class = CPU_FAULT;
2724 		cpu_log_err(aflt);
2725 		cpu_ereport_post(aflt);
2726 		break;
2727 
2728 	case RECIRC_BUS_FAULT:
2729 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2730 		/*FALLTHRU*/
2731 	default:
2732 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2733 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2734 		return;
2735 	}
2736 }
2737 
2738 /*
2739  * Scrub and classify a CE.  This function must not modify the
2740  * fault structure passed to it but instead should return the classification
2741  * information.
2742  */
2743 
2744 static uchar_t
cpu_ce_scrub_mem_err_common(struct async_flt * ecc,boolean_t logout_tried)2745 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2746 {
2747 	uchar_t disp = CE_XDIAG_EXTALG;
2748 	on_trap_data_t otd;
2749 	uint64_t orig_err;
2750 	ch_cpu_logout_t *clop;
2751 
2752 	/*
2753 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2754 	 * this, but our other callers have not.  Disable preemption to
2755 	 * avoid CPU migration so that we restore CEEN on the correct
2756 	 * cpu later.
2757 	 *
2758 	 * CEEN is cleared so that further CEs that our instruction and
2759 	 * data footprint induce do not cause use to either creep down
2760 	 * kernel stack to the point of overflow, or do so much CE
2761 	 * notification as to make little real forward progress.
2762 	 *
2763 	 * NCEEN must not be cleared.  However it is possible that
2764 	 * our accesses to the flt_addr may provoke a bus error or timeout
2765 	 * if the offending address has just been unconfigured as part of
2766 	 * a DR action.  So we must operate under on_trap protection.
2767 	 */
2768 	kpreempt_disable();
2769 	orig_err = get_error_enable();
2770 	if (orig_err & EN_REG_CEEN)
2771 		set_error_enable(orig_err & ~EN_REG_CEEN);
2772 
2773 	/*
2774 	 * Our classification algorithm includes the line state before
2775 	 * the scrub; we'd like this captured after the detection and
2776 	 * before the algorithm below - the earlier the better.
2777 	 *
2778 	 * If we've come from a cpu CE trap then this info already exists
2779 	 * in the cpu logout area.
2780 	 *
2781 	 * For a CE detected by memscrub for which there was no trap
2782 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2783 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2784 	 * marked the fault structure as incomplete as a flag to later
2785 	 * logging code.
2786 	 *
2787 	 * If called directly from an IO detected CE there has been
2788 	 * no line data capture.  In this case we logout to the cpu logout
2789 	 * area - that's appropriate since it's the cpu cache data we need
2790 	 * for classification.  We thus borrow the cpu logout area for a
2791 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2792 	 * this time (we will invalidate it again below).
2793 	 *
2794 	 * If called from the partner check xcall handler then this cpu
2795 	 * (the partner) has not necessarily experienced a CE at this
2796 	 * address.  But we want to capture line state before its scrub
2797 	 * attempt since we use that in our classification.
2798 	 */
2799 	if (logout_tried == B_FALSE) {
2800 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2801 			disp |= CE_XDIAG_NOLOGOUT;
2802 	}
2803 
2804 	/*
2805 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2806 	 * no longer be valid (if DR'd since the initial event) so we
2807 	 * perform this scrub under on_trap protection.  If this access is
2808 	 * ok then further accesses below will also be ok - DR cannot
2809 	 * proceed while this thread is active (preemption is disabled);
2810 	 * to be safe we'll nonetheless use on_trap again below.
2811 	 */
2812 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2813 		cpu_scrubphys(ecc);
2814 	} else {
2815 		no_trap();
2816 		if (orig_err & EN_REG_CEEN)
2817 			set_error_enable(orig_err);
2818 		kpreempt_enable();
2819 		return (disp);
2820 	}
2821 	no_trap();
2822 
2823 	/*
2824 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2825 	 * Note that it's quite possible that the read sourced the data from
2826 	 * another cpu.
2827 	 */
2828 	if (clear_ecc(ecc))
2829 		disp |= CE_XDIAG_CE1;
2830 
2831 	/*
2832 	 * Read the data again.  This time the read is very likely to
2833 	 * come from memory since the scrub induced a writeback to memory.
2834 	 */
2835 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2836 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2837 	} else {
2838 		no_trap();
2839 		if (orig_err & EN_REG_CEEN)
2840 			set_error_enable(orig_err);
2841 		kpreempt_enable();
2842 		return (disp);
2843 	}
2844 	no_trap();
2845 
2846 	/* Did that read induce a CE that matches the AFAR? */
2847 	if (clear_ecc(ecc))
2848 		disp |= CE_XDIAG_CE2;
2849 
2850 	/*
2851 	 * Look at the logout information and record whether we found the
2852 	 * line in l2/l3 cache.  For Panther we are interested in whether
2853 	 * we found it in either cache (it won't reside in both but
2854 	 * it is possible to read it that way given the moving target).
2855 	 */
2856 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2857 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2858 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2859 		int hit, level;
2860 		int state;
2861 		int totalsize;
2862 		ch_ec_data_t *ecp;
2863 
2864 		/*
2865 		 * If hit is nonzero then a match was found and hit will
2866 		 * be one greater than the index which hit.  For Panther we
2867 		 * also need to pay attention to level to see which of l2$ or
2868 		 * l3$ it hit in.
2869 		 */
2870 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2871 		    0, &level);
2872 
2873 		if (hit) {
2874 			--hit;
2875 			disp |= CE_XDIAG_AFARMATCH;
2876 
2877 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2878 				if (level == 2)
2879 					ecp = &clop->clo_data.chd_l2_data[hit];
2880 				else
2881 					ecp = &clop->clo_data.chd_ec_data[hit];
2882 			} else {
2883 				ASSERT(level == 2);
2884 				ecp = &clop->clo_data.chd_ec_data[hit];
2885 			}
2886 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2887 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2888 			    ecc->flt_addr, ecp->ec_tag);
2889 
2890 			/*
2891 			 * Cheetah variants use different state encodings -
2892 			 * the CH_ECSTATE_* defines vary depending on the
2893 			 * module we're compiled for.  Translate into our
2894 			 * one true version.  Conflate Owner-Shared state
2895 			 * of SSM mode with Owner as victimisation of such
2896 			 * lines may cause a writeback.
2897 			 */
2898 			switch (state) {
2899 			case CH_ECSTATE_MOD:
2900 				disp |= EC_STATE_M;
2901 				break;
2902 
2903 			case CH_ECSTATE_OWN:
2904 			case CH_ECSTATE_OWS:
2905 				disp |= EC_STATE_O;
2906 				break;
2907 
2908 			case CH_ECSTATE_EXL:
2909 				disp |= EC_STATE_E;
2910 				break;
2911 
2912 			case CH_ECSTATE_SHR:
2913 				disp |= EC_STATE_S;
2914 				break;
2915 
2916 			default:
2917 				disp |= EC_STATE_I;
2918 				break;
2919 			}
2920 		}
2921 
2922 		/*
2923 		 * If we initiated the delayed logout then we are responsible
2924 		 * for invalidating the logout area.
2925 		 */
2926 		if (logout_tried == B_FALSE) {
2927 			bzero(clop, sizeof (ch_cpu_logout_t));
2928 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2929 		}
2930 	}
2931 
2932 	/*
2933 	 * Re-enable CEEN if we turned it off.
2934 	 */
2935 	if (orig_err & EN_REG_CEEN)
2936 		set_error_enable(orig_err);
2937 	kpreempt_enable();
2938 
2939 	return (disp);
2940 }
2941 
2942 /*
2943  * Scrub a correctable memory error and collect data for classification
2944  * of CE type.  This function is called in the detection path, ie tl0 handling
2945  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2946  */
2947 void
cpu_ce_scrub_mem_err(struct async_flt * ecc,boolean_t logout_tried)2948 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2949 {
2950 	/*
2951 	 * Cheetah CE classification does not set any bits in flt_status.
2952 	 * Instead we will record classification datapoints in flt_disp.
2953 	 */
2954 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2955 
2956 	/*
2957 	 * To check if the error detected by IO is persistent, sticky or
2958 	 * intermittent.  This is noticed by clear_ecc().
2959 	 */
2960 	if (ecc->flt_status & ECC_IOBUS)
2961 		ecc->flt_stat = C_AFSR_MEMORY;
2962 
2963 	/*
2964 	 * Record information from this first part of the algorithm in
2965 	 * flt_disp.
2966 	 */
2967 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2968 }
2969 
2970 /*
2971  * Select a partner to perform a further CE classification check from.
2972  * Must be called with kernel preemption disabled (to stop the cpu list
2973  * from changing).  The detecting cpu we are partnering has cpuid
2974  * aflt->flt_inst; we might not be running on the detecting cpu.
2975  *
2976  * Restrict choice to active cpus in the same cpu partition as ourselves in
2977  * an effort to stop bad cpus in one partition causing other partitions to
2978  * perform excessive diagnostic activity.  Actually since the errorq drain
2979  * is run from a softint most of the time and that is a global mechanism
2980  * this isolation is only partial.  Return NULL if we fail to find a
2981  * suitable partner.
2982  *
2983  * We prefer a partner that is in a different latency group to ourselves as
2984  * we will share fewer datapaths.  If such a partner is unavailable then
2985  * choose one in the same lgroup but prefer a different chip and only allow
2986  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2987  * flags includes PTNR_SELFOK then permit selection of the original detector.
2988  *
2989  * We keep a cache of the last partner selected for a cpu, and we'll try to
2990  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2991  * have passed since that selection was made.  This provides the benefit
2992  * of the point-of-view of different partners over time but without
2993  * requiring frequent cpu list traversals.
2994  */
2995 
2996 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2997 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2998 
2999 static cpu_t *
ce_ptnr_select(struct async_flt * aflt,int flags,int * typep)3000 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
3001 {
3002 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
3003 	hrtime_t lasttime, thistime;
3004 
3005 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
3006 
3007 	dtcr = cpu[aflt->flt_inst];
3008 
3009 	/*
3010 	 * Short-circuit for the following cases:
3011 	 *	. the dtcr is not flagged active
3012 	 *	. there is just one cpu present
3013 	 *	. the detector has disappeared
3014 	 *	. we were given a bad flt_inst cpuid; this should not happen
3015 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
3016 	 *	  reason to panic.
3017 	 *	. there is just one cpu left online in the cpu partition
3018 	 *
3019 	 * If we return NULL after this point then we do not update the
3020 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
3021 	 * again next time; this is the case where the only other cpu online
3022 	 * in the detector's partition is on the same chip as the detector
3023 	 * and since CEEN re-enable is throttled even that case should not
3024 	 * hurt performance.
3025 	 */
3026 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
3027 		return (NULL);
3028 	}
3029 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
3030 		if (flags & PTNR_SELFOK) {
3031 			*typep = CE_XDIAG_PTNR_SELF;
3032 			return (dtcr);
3033 		} else {
3034 			return (NULL);
3035 		}
3036 	}
3037 
3038 	thistime = gethrtime();
3039 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
3040 
3041 	/*
3042 	 * Select a starting point.
3043 	 */
3044 	if (!lasttime) {
3045 		/*
3046 		 * We've never selected a partner for this detector before.
3047 		 * Start the scan at the next online cpu in the same cpu
3048 		 * partition.
3049 		 */
3050 		sp = dtcr->cpu_next_part;
3051 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
3052 		/*
3053 		 * Our last selection has not aged yet.  If this partner:
3054 		 *	. is still a valid cpu,
3055 		 *	. is still in the same partition as the detector
3056 		 *	. is still marked active
3057 		 *	. satisfies the 'flags' argument criteria
3058 		 * then select it again without updating the timestamp.
3059 		 */
3060 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3061 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3062 		    !cpu_flagged_active(sp->cpu_flags) ||
3063 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3064 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3065 		    !(flags & PTNR_SIBLINGOK))) {
3066 			sp = dtcr->cpu_next_part;
3067 		} else {
3068 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3069 				*typep = CE_XDIAG_PTNR_REMOTE;
3070 			} else if (sp == dtcr) {
3071 				*typep = CE_XDIAG_PTNR_SELF;
3072 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3073 				*typep = CE_XDIAG_PTNR_SIBLING;
3074 			} else {
3075 				*typep = CE_XDIAG_PTNR_LOCAL;
3076 			}
3077 			return (sp);
3078 		}
3079 	} else {
3080 		/*
3081 		 * Our last selection has aged.  If it is nonetheless still a
3082 		 * valid cpu then start the scan at the next cpu in the
3083 		 * partition after our last partner.  If the last selection
3084 		 * is no longer a valid cpu then go with our default.  In
3085 		 * this way we slowly cycle through possible partners to
3086 		 * obtain multiple viewpoints over time.
3087 		 */
3088 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3089 		if (sp == NULL) {
3090 			sp = dtcr->cpu_next_part;
3091 		} else {
3092 			sp = sp->cpu_next_part;		/* may be dtcr */
3093 			if (sp->cpu_part != dtcr->cpu_part)
3094 				sp = dtcr;
3095 		}
3096 	}
3097 
3098 	/*
3099 	 * We have a proposed starting point for our search, but if this
3100 	 * cpu is offline then its cpu_next_part will point to itself
3101 	 * so we can't use that to iterate over cpus in this partition in
3102 	 * the loop below.  We still want to avoid iterating over cpus not
3103 	 * in our partition, so in the case that our starting point is offline
3104 	 * we will repoint it to be the detector itself;  and if the detector
3105 	 * happens to be offline we'll return NULL from the following loop.
3106 	 */
3107 	if (!cpu_flagged_active(sp->cpu_flags)) {
3108 		sp = dtcr;
3109 	}
3110 
3111 	ptnr = sp;
3112 	locptnr = NULL;
3113 	sibptnr = NULL;
3114 	do {
3115 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3116 			continue;
3117 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3118 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3119 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3120 			*typep = CE_XDIAG_PTNR_REMOTE;
3121 			return (ptnr);
3122 		}
3123 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3124 			if (sibptnr == NULL)
3125 				sibptnr = ptnr;
3126 			continue;
3127 		}
3128 		if (locptnr == NULL)
3129 			locptnr = ptnr;
3130 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3131 
3132 	/*
3133 	 * A foreign partner has already been returned if one was available.
3134 	 *
3135 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3136 	 * detector, is active, and is not a sibling of the detector.
3137 	 *
3138 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3139 	 * active.
3140 	 *
3141 	 * If we have to resort to using the detector itself we have already
3142 	 * checked that it is active.
3143 	 */
3144 	if (locptnr) {
3145 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3146 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3147 		*typep = CE_XDIAG_PTNR_LOCAL;
3148 		return (locptnr);
3149 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3150 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3151 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3152 		*typep = CE_XDIAG_PTNR_SIBLING;
3153 		return (sibptnr);
3154 	} else if (flags & PTNR_SELFOK) {
3155 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3156 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3157 		*typep = CE_XDIAG_PTNR_SELF;
3158 		return (dtcr);
3159 	}
3160 
3161 	return (NULL);
3162 }
3163 
3164 /*
3165  * Cross call handler that is requested to run on the designated partner of
3166  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3167  */
3168 static void
ce_ptnrchk_xc(struct async_flt * aflt,uchar_t * dispp)3169 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3170 {
3171 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3172 }
3173 
3174 /*
3175  * The associated errorqs are never destroyed so we do not need to deal with
3176  * them disappearing before this timeout fires.  If the affected memory
3177  * has been DR'd out since the original event the scrub algrithm will catch
3178  * any errors and return null disposition info.  If the original detecting
3179  * cpu has been DR'd out then ereport detector info will not be able to
3180  * lookup CPU type;  with a small timeout this is unlikely.
3181  */
3182 static void
ce_lkychk_cb(ce_lkychk_cb_t * cbarg)3183 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3184 {
3185 	struct async_flt *aflt = cbarg->lkycb_aflt;
3186 	uchar_t disp;
3187 	cpu_t *cp;
3188 	int ptnrtype;
3189 
3190 	kpreempt_disable();
3191 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3192 	    &ptnrtype)) {
3193 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3194 		    (uint64_t)&disp);
3195 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3196 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3197 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3198 	} else {
3199 		ce_xdiag_lkydrops++;
3200 		if (ncpus > 1)
3201 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3202 			    CE_XDIAG_SKIP_NOPTNR);
3203 	}
3204 	kpreempt_enable();
3205 
3206 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3207 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3208 }
3209 
3210 /*
3211  * Called from errorq drain code when processing a CE error, both from
3212  * CPU and PCI drain functions.  Decide what further classification actions,
3213  * if any, we will perform.  Perform immediate actions now, and schedule
3214  * delayed actions as required.  Note that we are no longer necessarily running
3215  * on the detecting cpu, and that the async_flt structure will not persist on
3216  * return from this function.
3217  *
3218  * Calls to this function should aim to be self-throtlling in some way.  With
3219  * the delayed re-enable of CEEN the absolute rate of calls should not
3220  * be excessive.  Callers should also avoid performing in-depth classification
3221  * for events in pages that are already known to be suspect.
3222  *
3223  * We return nonzero to indicate that the event has been copied and
3224  * recirculated for further testing.  The caller should not log the event
3225  * in this case - it will be logged when further test results are available.
3226  *
3227  * Our possible contexts are that of errorq_drain: below lock level or from
3228  * panic context.  We can assume that the cpu we are running on is online.
3229  */
3230 
3231 
3232 #ifdef DEBUG
3233 static int ce_xdiag_forceaction;
3234 #endif
3235 
3236 int
ce_scrub_xdiag_recirc(struct async_flt * aflt,errorq_t * eqp,errorq_elem_t * eqep,size_t afltoffset)3237 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3238     errorq_elem_t *eqep, size_t afltoffset)
3239 {
3240 	ce_dispact_t dispact, action;
3241 	cpu_t *cp;
3242 	uchar_t dtcrinfo, disp;
3243 	int ptnrtype;
3244 
3245 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3246 		ce_xdiag_drops++;
3247 		return (0);
3248 	} else if (!aflt->flt_in_memory) {
3249 		ce_xdiag_drops++;
3250 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3251 		return (0);
3252 	}
3253 
3254 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3255 
3256 	/*
3257 	 * Some correctable events are not scrubbed/classified, such as those
3258 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3259 	 * initial detector classification go no further.
3260 	 */
3261 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3262 		ce_xdiag_drops++;
3263 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3264 		return (0);
3265 	}
3266 
3267 	dispact = CE_DISPACT(ce_disp_table,
3268 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3269 	    CE_XDIAG_STATE(dtcrinfo),
3270 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3271 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3272 
3273 
3274 	action = CE_ACT(dispact);	/* bad lookup caught below */
3275 #ifdef DEBUG
3276 	if (ce_xdiag_forceaction != 0)
3277 		action = ce_xdiag_forceaction;
3278 #endif
3279 
3280 	switch (action) {
3281 	case CE_ACT_LKYCHK: {
3282 		caddr_t ndata;
3283 		errorq_elem_t *neqep;
3284 		struct async_flt *ecc;
3285 		ce_lkychk_cb_t *cbargp;
3286 
3287 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3288 			ce_xdiag_lkydrops++;
3289 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3290 			    CE_XDIAG_SKIP_DUPFAIL);
3291 			break;
3292 		}
3293 		ecc = (struct async_flt *)(ndata + afltoffset);
3294 
3295 		ASSERT(ecc->flt_class == CPU_FAULT ||
3296 		    ecc->flt_class == BUS_FAULT);
3297 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3298 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3299 
3300 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3301 		cbargp->lkycb_aflt = ecc;
3302 		cbargp->lkycb_eqp = eqp;
3303 		cbargp->lkycb_eqep = neqep;
3304 
3305 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3306 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3307 		return (1);
3308 	}
3309 
3310 	case CE_ACT_PTNRCHK:
3311 		kpreempt_disable();	/* stop cpu list changing */
3312 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3313 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3314 			    (uint64_t)aflt, (uint64_t)&disp);
3315 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3316 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3317 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3318 		} else if (ncpus > 1) {
3319 			ce_xdiag_ptnrdrops++;
3320 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3321 			    CE_XDIAG_SKIP_NOPTNR);
3322 		} else {
3323 			ce_xdiag_ptnrdrops++;
3324 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3325 			    CE_XDIAG_SKIP_UNIPROC);
3326 		}
3327 		kpreempt_enable();
3328 		break;
3329 
3330 	case CE_ACT_DONE:
3331 		break;
3332 
3333 	case CE_ACT(CE_DISP_BAD):
3334 	default:
3335 #ifdef DEBUG
3336 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3337 #endif
3338 		ce_xdiag_bad++;
3339 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3340 		break;
3341 	}
3342 
3343 	return (0);
3344 }
3345 
3346 /*
3347  * We route all errors through a single switch statement.
3348  */
3349 void
cpu_ue_log_err(struct async_flt * aflt)3350 cpu_ue_log_err(struct async_flt *aflt)
3351 {
3352 	switch (aflt->flt_class) {
3353 	case CPU_FAULT:
3354 		cpu_ereport_init(aflt);
3355 		if (cpu_async_log_err(aflt, NULL))
3356 			cpu_ereport_post(aflt);
3357 		break;
3358 
3359 	case BUS_FAULT:
3360 		bus_async_log_err(aflt);
3361 		break;
3362 
3363 	default:
3364 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3365 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3366 		return;
3367 	}
3368 }
3369 
3370 /*
3371  * Routine for panic hook callback from panic_idle().
3372  */
3373 void
cpu_async_panic_callb(void)3374 cpu_async_panic_callb(void)
3375 {
3376 	ch_async_flt_t ch_flt;
3377 	struct async_flt *aflt;
3378 	ch_cpu_errors_t cpu_error_regs;
3379 	uint64_t afsr_errs;
3380 
3381 	get_cpu_error_state(&cpu_error_regs);
3382 
3383 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3384 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3385 
3386 	if (afsr_errs) {
3387 
3388 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3389 		aflt = (struct async_flt *)&ch_flt;
3390 		aflt->flt_id = gethrtime_waitfree();
3391 		aflt->flt_bus_id = getprocessorid();
3392 		aflt->flt_inst = CPU->cpu_id;
3393 		aflt->flt_stat = cpu_error_regs.afsr;
3394 		aflt->flt_addr = cpu_error_regs.afar;
3395 		aflt->flt_prot = AFLT_PROT_NONE;
3396 		aflt->flt_class = CPU_FAULT;
3397 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3398 		aflt->flt_panic = 1;
3399 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3400 		ch_flt.afsr_errs = afsr_errs;
3401 #if defined(SERRANO)
3402 		ch_flt.afar2 = cpu_error_regs.afar2;
3403 #endif	/* SERRANO */
3404 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3405 	}
3406 }
3407 
3408 /*
3409  * Routine to convert a syndrome into a syndrome code.
3410  */
3411 static int
synd_to_synd_code(int synd_status,ushort_t synd,uint64_t afsr_bit)3412 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3413 {
3414 	if (synd_status == AFLT_STAT_INVALID)
3415 		return (-1);
3416 
3417 	/*
3418 	 * Use the syndrome to index the appropriate syndrome table,
3419 	 * to get the code indicating which bit(s) is(are) bad.
3420 	 */
3421 	if (afsr_bit &
3422 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3423 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3424 #if defined(JALAPENO) || defined(SERRANO)
3425 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3426 				return (-1);
3427 			else
3428 				return (BPAR0 + synd);
3429 #else /* JALAPENO || SERRANO */
3430 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3431 				return (-1);
3432 			else
3433 				return (mtag_syndrome_tab[synd]);
3434 #endif /* JALAPENO || SERRANO */
3435 		} else {
3436 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3437 				return (-1);
3438 			else
3439 				return (ecc_syndrome_tab[synd]);
3440 		}
3441 	} else {
3442 		return (-1);
3443 	}
3444 }
3445 
3446 int
cpu_get_mem_sid(char * unum,char * buf,int buflen,int * lenp)3447 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3448 {
3449 	if (&plat_get_mem_sid)
3450 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3451 	else
3452 		return (ENOTSUP);
3453 }
3454 
3455 int
cpu_get_mem_offset(uint64_t flt_addr,uint64_t * offp)3456 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3457 {
3458 	if (&plat_get_mem_offset)
3459 		return (plat_get_mem_offset(flt_addr, offp));
3460 	else
3461 		return (ENOTSUP);
3462 }
3463 
3464 int
cpu_get_mem_addr(char * unum,char * sid,uint64_t offset,uint64_t * addrp)3465 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3466 {
3467 	if (&plat_get_mem_addr)
3468 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3469 	else
3470 		return (ENOTSUP);
3471 }
3472 
3473 /*
3474  * Routine to return a string identifying the physical name
3475  * associated with a memory/cache error.
3476  */
3477 int
cpu_get_mem_unum(int synd_status,ushort_t flt_synd,uint64_t flt_stat,uint64_t flt_addr,int flt_bus_id,int flt_in_memory,ushort_t flt_status,char * buf,int buflen,int * lenp)3478 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3479     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3480     ushort_t flt_status, char *buf, int buflen, int *lenp)
3481 {
3482 	int synd_code;
3483 	int ret;
3484 
3485 	/*
3486 	 * An AFSR of -1 defaults to a memory syndrome.
3487 	 */
3488 	if (flt_stat == (uint64_t)-1)
3489 		flt_stat = C_AFSR_CE;
3490 
3491 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3492 
3493 	/*
3494 	 * Syndrome code must be either a single-bit error code
3495 	 * (0...143) or -1 for unum lookup.
3496 	 */
3497 	if (synd_code < 0 || synd_code >= M2)
3498 		synd_code = -1;
3499 	if (&plat_get_mem_unum) {
3500 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3501 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3502 			buf[0] = '\0';
3503 			*lenp = 0;
3504 		}
3505 
3506 		return (ret);
3507 	}
3508 
3509 	return (ENOTSUP);
3510 }
3511 
3512 /*
3513  * Wrapper for cpu_get_mem_unum() routine that takes an
3514  * async_flt struct rather than explicit arguments.
3515  */
3516 int
cpu_get_mem_unum_aflt(int synd_status,struct async_flt * aflt,char * buf,int buflen,int * lenp)3517 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3518     char *buf, int buflen, int *lenp)
3519 {
3520 	/*
3521 	 * If we come thru here for an IO bus error aflt->flt_stat will
3522 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3523 	 * so it will interpret this as a memory error.
3524 	 */
3525 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3526 	    (aflt->flt_class == BUS_FAULT) ?
3527 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3528 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3529 	    aflt->flt_status, buf, buflen, lenp));
3530 }
3531 
3532 /*
3533  * Return unum string given synd_code and async_flt into
3534  * the buf with size UNUM_NAMLEN
3535  */
3536 static int
cpu_get_mem_unum_synd(int synd_code,struct async_flt * aflt,char * buf)3537 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3538 {
3539 	int ret, len;
3540 
3541 	/*
3542 	 * Syndrome code must be either a single-bit error code
3543 	 * (0...143) or -1 for unum lookup.
3544 	 */
3545 	if (synd_code < 0 || synd_code >= M2)
3546 		synd_code = -1;
3547 	if (&plat_get_mem_unum) {
3548 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3549 		    aflt->flt_bus_id, aflt->flt_in_memory,
3550 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3551 			buf[0] = '\0';
3552 		}
3553 		return (ret);
3554 	}
3555 
3556 	buf[0] = '\0';
3557 	return (ENOTSUP);
3558 }
3559 
3560 /*
3561  * This routine is a more generic interface to cpu_get_mem_unum()
3562  * that may be used by other modules (e.g. the 'mm' driver, through
3563  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3564  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3565  */
3566 int
cpu_get_mem_name(uint64_t synd,uint64_t * afsr,uint64_t afar,char * buf,int buflen,int * lenp)3567 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3568     char *buf, int buflen, int *lenp)
3569 {
3570 	int synd_status, flt_in_memory, ret;
3571 	ushort_t flt_status = 0;
3572 	char unum[UNUM_NAMLEN];
3573 	uint64_t t_afsr_errs;
3574 
3575 	/*
3576 	 * Check for an invalid address.
3577 	 */
3578 	if (afar == (uint64_t)-1)
3579 		return (ENXIO);
3580 
3581 	if (synd == (uint64_t)-1)
3582 		synd_status = AFLT_STAT_INVALID;
3583 	else
3584 		synd_status = AFLT_STAT_VALID;
3585 
3586 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3587 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3588 
3589 	/*
3590 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3591 	 */
3592 	if (*afsr == (uint64_t)-1)
3593 		t_afsr_errs = C_AFSR_CE;
3594 	else {
3595 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3596 #if defined(CHEETAH_PLUS)
3597 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3598 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3599 #endif	/* CHEETAH_PLUS */
3600 	}
3601 
3602 	/*
3603 	 * Turn on ECC_ECACHE if error type is E$ Data.
3604 	 */
3605 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3606 		flt_status |= ECC_ECACHE;
3607 
3608 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3609 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3610 	if (ret != 0)
3611 		return (ret);
3612 
3613 	if (*lenp >= buflen)
3614 		return (ENAMETOOLONG);
3615 
3616 	(void) strncpy(buf, unum, buflen);
3617 
3618 	return (0);
3619 }
3620 
3621 /*
3622  * Routine to return memory information associated
3623  * with a physical address and syndrome.
3624  */
3625 int
cpu_get_mem_info(uint64_t synd,uint64_t afar,uint64_t * mem_sizep,uint64_t * seg_sizep,uint64_t * bank_sizep,int * segsp,int * banksp,int * mcidp)3626 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3627     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3628     int *segsp, int *banksp, int *mcidp)
3629 {
3630 	int synd_status, synd_code;
3631 
3632 	if (afar == (uint64_t)-1)
3633 		return (ENXIO);
3634 
3635 	if (synd == (uint64_t)-1)
3636 		synd_status = AFLT_STAT_INVALID;
3637 	else
3638 		synd_status = AFLT_STAT_VALID;
3639 
3640 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3641 
3642 	if (p2get_mem_info != NULL)
3643 		return ((p2get_mem_info)(synd_code, afar,
3644 		    mem_sizep, seg_sizep, bank_sizep,
3645 		    segsp, banksp, mcidp));
3646 	else
3647 		return (ENOTSUP);
3648 }
3649 
3650 /*
3651  * Routine to return a string identifying the physical
3652  * name associated with a cpuid.
3653  */
3654 int
cpu_get_cpu_unum(int cpuid,char * buf,int buflen,int * lenp)3655 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3656 {
3657 	int ret;
3658 	char unum[UNUM_NAMLEN];
3659 
3660 	if (&plat_get_cpu_unum) {
3661 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3662 		    != 0)
3663 			return (ret);
3664 	} else {
3665 		return (ENOTSUP);
3666 	}
3667 
3668 	if (*lenp >= buflen)
3669 		return (ENAMETOOLONG);
3670 
3671 	(void) strncpy(buf, unum, buflen);
3672 
3673 	return (0);
3674 }
3675 
3676 /*
3677  * This routine exports the name buffer size.
3678  */
3679 size_t
cpu_get_name_bufsize()3680 cpu_get_name_bufsize()
3681 {
3682 	return (UNUM_NAMLEN);
3683 }
3684 
3685 /*
3686  * Historical function, apparantly not used.
3687  */
3688 /* ARGSUSED */
3689 void
cpu_read_paddr(struct async_flt * ecc,short verbose,short ce_err)3690 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3691 {}
3692 
3693 /*
3694  * Historical function only called for SBus errors in debugging.
3695  */
3696 /*ARGSUSED*/
3697 void
read_ecc_data(struct async_flt * aflt,short verbose,short ce_err)3698 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3699 {}
3700 
3701 /*
3702  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3703  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3704  * an async fault structure argument is passed in, the captured error state
3705  * (AFSR, AFAR) info will be returned in the structure.
3706  */
3707 int
clear_errors(ch_async_flt_t * ch_flt)3708 clear_errors(ch_async_flt_t *ch_flt)
3709 {
3710 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3711 	ch_cpu_errors_t	cpu_error_regs;
3712 
3713 	get_cpu_error_state(&cpu_error_regs);
3714 
3715 	if (ch_flt != NULL) {
3716 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3717 		aflt->flt_addr = cpu_error_regs.afar;
3718 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3719 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3720 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3721 #if defined(SERRANO)
3722 		ch_flt->afar2 = cpu_error_regs.afar2;
3723 #endif	/* SERRANO */
3724 	}
3725 
3726 	set_cpu_error_state(&cpu_error_regs);
3727 
3728 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3729 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3730 }
3731 
3732 /*
3733  * Clear any AFSR error bits, and check for persistence.
3734  *
3735  * It would be desirable to also insist that syndrome match.  PCI handling
3736  * has already filled flt_synd.  For errors trapped by CPU we only fill
3737  * flt_synd when we queue the event, so we do not have a valid flt_synd
3738  * during initial classification (it is valid if we're called as part of
3739  * subsequent low-pil additional classification attempts).  We could try
3740  * to determine which syndrome to use: we know we're only called for
3741  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3742  * would be esynd/none and esynd/msynd, respectively.  If that is
3743  * implemented then what do we do in the case that we do experience an
3744  * error on the same afar but with different syndrome?  At the very least
3745  * we should count such occurences.  Anyway, for now, we'll leave it as
3746  * it has been for ages.
3747  */
3748 static int
clear_ecc(struct async_flt * aflt)3749 clear_ecc(struct async_flt *aflt)
3750 {
3751 	ch_cpu_errors_t	cpu_error_regs;
3752 
3753 	/*
3754 	 * Snapshot the AFSR and AFAR and clear any errors
3755 	 */
3756 	get_cpu_error_state(&cpu_error_regs);
3757 	set_cpu_error_state(&cpu_error_regs);
3758 
3759 	/*
3760 	 * If any of the same memory access error bits are still on and
3761 	 * the AFAR matches, return that the error is persistent.
3762 	 */
3763 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3764 	    cpu_error_regs.afar == aflt->flt_addr);
3765 }
3766 
3767 /*
3768  * Turn off all cpu error detection, normally only used for panics.
3769  */
3770 void
cpu_disable_errors(void)3771 cpu_disable_errors(void)
3772 {
3773 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3774 
3775 	/*
3776 	 * With error detection now turned off, check the other cpus
3777 	 * logout areas for any unlogged errors.
3778 	 */
3779 	if (enable_check_other_cpus_logout) {
3780 		cpu_check_other_cpus_logout();
3781 		/*
3782 		 * Make a second pass over the logout areas, in case
3783 		 * there is a failing CPU in an error-trap loop which
3784 		 * will write to the logout area once it is emptied.
3785 		 */
3786 		cpu_check_other_cpus_logout();
3787 	}
3788 }
3789 
3790 /*
3791  * Enable errors.
3792  */
3793 void
cpu_enable_errors(void)3794 cpu_enable_errors(void)
3795 {
3796 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3797 }
3798 
3799 /*
3800  * Flush the entire ecache using displacement flush by reading through a
3801  * physical address range twice as large as the Ecache.
3802  */
3803 void
cpu_flush_ecache(void)3804 cpu_flush_ecache(void)
3805 {
3806 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3807 	    cpunodes[CPU->cpu_id].ecache_linesize);
3808 }
3809 
3810 /*
3811  * Return CPU E$ set size - E$ size divided by the associativity.
3812  * We use this function in places where the CPU_PRIVATE ptr may not be
3813  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3814  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3815  * up before the kernel switches from OBP's to the kernel's trap table, so
3816  * we don't have to worry about cpunodes being unitialized.
3817  */
3818 int
cpu_ecache_set_size(struct cpu * cp)3819 cpu_ecache_set_size(struct cpu *cp)
3820 {
3821 	if (CPU_PRIVATE(cp))
3822 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3823 
3824 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3825 }
3826 
3827 /*
3828  * Flush Ecache line.
3829  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3830  * Uses normal displacement flush for Cheetah.
3831  */
3832 static void
cpu_flush_ecache_line(ch_async_flt_t * ch_flt)3833 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3834 {
3835 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3836 	int ec_set_size = cpu_ecache_set_size(CPU);
3837 
3838 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3839 }
3840 
3841 /*
3842  * Scrub physical address.
3843  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3844  * Ecache or direct-mapped Ecache.
3845  */
3846 static void
cpu_scrubphys(struct async_flt * aflt)3847 cpu_scrubphys(struct async_flt *aflt)
3848 {
3849 	int ec_set_size = cpu_ecache_set_size(CPU);
3850 
3851 	scrubphys(aflt->flt_addr, ec_set_size);
3852 }
3853 
3854 /*
3855  * Clear physical address.
3856  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3857  * Ecache or direct-mapped Ecache.
3858  */
3859 void
cpu_clearphys(struct async_flt * aflt)3860 cpu_clearphys(struct async_flt *aflt)
3861 {
3862 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3863 	int ec_set_size = cpu_ecache_set_size(CPU);
3864 
3865 
3866 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3867 }
3868 
3869 #if defined(CPU_IMP_ECACHE_ASSOC)
3870 /*
3871  * Check for a matching valid line in all the sets.
3872  * If found, return set# + 1. Otherwise return 0.
3873  */
3874 static int
cpu_ecache_line_valid(ch_async_flt_t * ch_flt)3875 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3876 {
3877 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3878 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3879 	int ec_set_size = cpu_ecache_set_size(CPU);
3880 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3881 	int nway = cpu_ecache_nway();
3882 	int i;
3883 
3884 	for (i = 0; i < nway; i++, ecp++) {
3885 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3886 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3887 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3888 			return (i+1);
3889 	}
3890 	return (0);
3891 }
3892 #endif /* CPU_IMP_ECACHE_ASSOC */
3893 
3894 /*
3895  * Check whether a line in the given logout info matches the specified
3896  * fault address.  If reqval is set then the line must not be Invalid.
3897  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3898  * set to 2 for l2$ or 3 for l3$.
3899  */
3900 static int
cpu_matching_ecache_line(uint64_t faddr,void * data,int reqval,int * level)3901 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3902 {
3903 	ch_diag_data_t *cdp = data;
3904 	ch_ec_data_t *ecp;
3905 	int totalsize, ec_set_size;
3906 	int i, ways;
3907 	int match = 0;
3908 	int tagvalid;
3909 	uint64_t addr, tagpa;
3910 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3911 
3912 	/*
3913 	 * Check the l2$ logout data
3914 	 */
3915 	if (ispanther) {
3916 		ecp = &cdp->chd_l2_data[0];
3917 		ec_set_size = PN_L2_SET_SIZE;
3918 		ways = PN_L2_NWAYS;
3919 	} else {
3920 		ecp = &cdp->chd_ec_data[0];
3921 		ec_set_size = cpu_ecache_set_size(CPU);
3922 		ways = cpu_ecache_nway();
3923 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3924 	}
3925 	/* remove low order PA bits from fault address not used in PA tag */
3926 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3927 	for (i = 0; i < ways; i++, ecp++) {
3928 		if (ispanther) {
3929 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3930 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3931 		} else {
3932 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3933 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3934 			    ecp->ec_tag);
3935 		}
3936 		if (tagpa == addr && (!reqval || tagvalid)) {
3937 			match = i + 1;
3938 			*level = 2;
3939 			break;
3940 		}
3941 	}
3942 
3943 	if (match || !ispanther)
3944 		return (match);
3945 
3946 	/* For Panther we also check the l3$ */
3947 	ecp = &cdp->chd_ec_data[0];
3948 	ec_set_size = PN_L3_SET_SIZE;
3949 	ways = PN_L3_NWAYS;
3950 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3951 
3952 	for (i = 0; i < ways; i++, ecp++) {
3953 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3954 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3955 			match = i + 1;
3956 			*level = 3;
3957 			break;
3958 		}
3959 	}
3960 
3961 	return (match);
3962 }
3963 
3964 #if defined(CPU_IMP_L1_CACHE_PARITY)
3965 /*
3966  * Record information related to the source of an Dcache Parity Error.
3967  */
3968 static void
cpu_dcache_parity_info(ch_async_flt_t * ch_flt)3969 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3970 {
3971 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3972 	int index;
3973 
3974 	/*
3975 	 * Since instruction decode cannot be done at high PIL
3976 	 * just examine the entire Dcache to locate the error.
3977 	 */
3978 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3979 		ch_flt->parity_data.dpe.cpl_way = -1;
3980 		ch_flt->parity_data.dpe.cpl_off = -1;
3981 	}
3982 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3983 		cpu_dcache_parity_check(ch_flt, index);
3984 }
3985 
3986 /*
3987  * Check all ways of the Dcache at a specified index for good parity.
3988  */
3989 static void
cpu_dcache_parity_check(ch_async_flt_t * ch_flt,int index)3990 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3991 {
3992 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3993 	uint64_t parity_bits, pbits, data_word;
3994 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3995 	int way, word, data_byte;
3996 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3997 	ch_dc_data_t tmp_dcp;
3998 
3999 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
4000 		/*
4001 		 * Perform diagnostic read.
4002 		 */
4003 		get_dcache_dtag(index + way * dc_set_size,
4004 		    (uint64_t *)&tmp_dcp);
4005 
4006 		/*
4007 		 * Check tag for even parity.
4008 		 * Sum of 1 bits (including parity bit) should be even.
4009 		 */
4010 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
4011 			/*
4012 			 * If this is the first error log detailed information
4013 			 * about it and check the snoop tag. Otherwise just
4014 			 * record the fact that we found another error.
4015 			 */
4016 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4017 				ch_flt->parity_data.dpe.cpl_way = way;
4018 				ch_flt->parity_data.dpe.cpl_cache =
4019 				    CPU_DC_PARITY;
4020 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
4021 
4022 				if (popc64(tmp_dcp.dc_sntag &
4023 				    CHP_DCSNTAG_PARMASK) & 1) {
4024 					ch_flt->parity_data.dpe.cpl_tag |=
4025 					    CHP_DC_SNTAG;
4026 					ch_flt->parity_data.dpe.cpl_lcnt++;
4027 				}
4028 
4029 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
4030 			}
4031 
4032 			ch_flt->parity_data.dpe.cpl_lcnt++;
4033 		}
4034 
4035 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4036 			/*
4037 			 * Panther has more parity bits than the other
4038 			 * processors for covering dcache data and so each
4039 			 * byte of data in each word has its own parity bit.
4040 			 */
4041 			parity_bits = tmp_dcp.dc_pn_data_parity;
4042 			for (word = 0; word < 4; word++) {
4043 				data_word = tmp_dcp.dc_data[word];
4044 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
4045 				for (data_byte = 0; data_byte < 8;
4046 				    data_byte++) {
4047 					if (((popc64(data_word &
4048 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
4049 					    (pbits & 1)) {
4050 						cpu_record_dc_data_parity(
4051 						    ch_flt, dcp, &tmp_dcp, way,
4052 						    word);
4053 					}
4054 					pbits >>= 1;
4055 					data_word >>= 8;
4056 				}
4057 				parity_bits >>= 8;
4058 			}
4059 		} else {
4060 			/*
4061 			 * Check data array for even parity.
4062 			 * The 8 parity bits are grouped into 4 pairs each
4063 			 * of which covers a 64-bit word.  The endianness is
4064 			 * reversed -- the low-order parity bits cover the
4065 			 * high-order data words.
4066 			 */
4067 			parity_bits = tmp_dcp.dc_utag >> 8;
4068 			for (word = 0; word < 4; word++) {
4069 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4070 				if ((popc64(tmp_dcp.dc_data[word]) +
4071 				    parity_bits_popc[pbits]) & 1) {
4072 					cpu_record_dc_data_parity(ch_flt, dcp,
4073 					    &tmp_dcp, way, word);
4074 				}
4075 			}
4076 		}
4077 	}
4078 }
4079 
4080 static void
cpu_record_dc_data_parity(ch_async_flt_t * ch_flt,ch_dc_data_t * dest_dcp,ch_dc_data_t * src_dcp,int way,int word)4081 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4082     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4083 {
4084 	/*
4085 	 * If this is the first error log detailed information about it.
4086 	 * Otherwise just record the fact that we found another error.
4087 	 */
4088 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4089 		ch_flt->parity_data.dpe.cpl_way = way;
4090 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4091 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4092 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4093 	}
4094 	ch_flt->parity_data.dpe.cpl_lcnt++;
4095 }
4096 
4097 /*
4098  * Record information related to the source of an Icache Parity Error.
4099  *
4100  * Called with the Icache disabled so any diagnostic accesses are safe.
4101  */
4102 static void
cpu_icache_parity_info(ch_async_flt_t * ch_flt)4103 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4104 {
4105 	int	ic_set_size;
4106 	int	ic_linesize;
4107 	int	index;
4108 
4109 	if (CPU_PRIVATE(CPU)) {
4110 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4111 		    CH_ICACHE_NWAY;
4112 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4113 	} else {
4114 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4115 		ic_linesize = icache_linesize;
4116 	}
4117 
4118 	ch_flt->parity_data.ipe.cpl_way = -1;
4119 	ch_flt->parity_data.ipe.cpl_off = -1;
4120 
4121 	for (index = 0; index < ic_set_size; index += ic_linesize)
4122 		cpu_icache_parity_check(ch_flt, index);
4123 }
4124 
4125 /*
4126  * Check all ways of the Icache at a specified index for good parity.
4127  */
4128 static void
cpu_icache_parity_check(ch_async_flt_t * ch_flt,int index)4129 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4130 {
4131 	uint64_t parmask, pn_inst_parity;
4132 	int ic_set_size;
4133 	int ic_linesize;
4134 	int flt_index, way, instr, num_instr;
4135 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4136 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4137 	ch_ic_data_t tmp_icp;
4138 
4139 	if (CPU_PRIVATE(CPU)) {
4140 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4141 		    CH_ICACHE_NWAY;
4142 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4143 	} else {
4144 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4145 		ic_linesize = icache_linesize;
4146 	}
4147 
4148 	/*
4149 	 * Panther has twice as many instructions per icache line and the
4150 	 * instruction parity bit is in a different location.
4151 	 */
4152 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4153 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4154 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4155 	} else {
4156 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4157 		pn_inst_parity = 0;
4158 	}
4159 
4160 	/*
4161 	 * Index at which we expect to find the parity error.
4162 	 */
4163 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4164 
4165 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4166 		/*
4167 		 * Diagnostic reads expect address argument in ASI format.
4168 		 */
4169 		get_icache_dtag(2 * (index + way * ic_set_size),
4170 		    (uint64_t *)&tmp_icp);
4171 
4172 		/*
4173 		 * If this is the index in which we expect to find the
4174 		 * error log detailed information about each of the ways.
4175 		 * This information will be displayed later if we can't
4176 		 * determine the exact way in which the error is located.
4177 		 */
4178 		if (flt_index == index)
4179 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4180 
4181 		/*
4182 		 * Check tag for even parity.
4183 		 * Sum of 1 bits (including parity bit) should be even.
4184 		 */
4185 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4186 			/*
4187 			 * If this way is the one in which we expected
4188 			 * to find the error record the way and check the
4189 			 * snoop tag. Otherwise just record the fact we
4190 			 * found another error.
4191 			 */
4192 			if (flt_index == index) {
4193 				ch_flt->parity_data.ipe.cpl_way = way;
4194 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4195 
4196 				if (popc64(tmp_icp.ic_sntag &
4197 				    CHP_ICSNTAG_PARMASK) & 1) {
4198 					ch_flt->parity_data.ipe.cpl_tag |=
4199 					    CHP_IC_SNTAG;
4200 					ch_flt->parity_data.ipe.cpl_lcnt++;
4201 				}
4202 
4203 			}
4204 			ch_flt->parity_data.ipe.cpl_lcnt++;
4205 			continue;
4206 		}
4207 
4208 		/*
4209 		 * Check instruction data for even parity.
4210 		 * Bits participating in parity differ for PC-relative
4211 		 * versus non-PC-relative instructions.
4212 		 */
4213 		for (instr = 0; instr < num_instr; instr++) {
4214 			parmask = (tmp_icp.ic_data[instr] &
4215 			    CH_ICDATA_PRED_ISPCREL) ?
4216 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4217 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4218 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4219 				/*
4220 				 * If this way is the one in which we expected
4221 				 * to find the error record the way and offset.
4222 				 * Otherwise just log the fact we found another
4223 				 * error.
4224 				 */
4225 				if (flt_index == index) {
4226 					ch_flt->parity_data.ipe.cpl_way = way;
4227 					ch_flt->parity_data.ipe.cpl_off =
4228 					    instr * 4;
4229 				}
4230 				ch_flt->parity_data.ipe.cpl_lcnt++;
4231 				continue;
4232 			}
4233 		}
4234 	}
4235 }
4236 
4237 /*
4238  * Record information related to the source of an Pcache Parity Error.
4239  */
4240 static void
cpu_pcache_parity_info(ch_async_flt_t * ch_flt)4241 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4242 {
4243 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4244 	int index;
4245 
4246 	/*
4247 	 * Since instruction decode cannot be done at high PIL just
4248 	 * examine the entire Pcache to check for any parity errors.
4249 	 */
4250 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4251 		ch_flt->parity_data.dpe.cpl_way = -1;
4252 		ch_flt->parity_data.dpe.cpl_off = -1;
4253 	}
4254 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4255 		cpu_pcache_parity_check(ch_flt, index);
4256 }
4257 
4258 /*
4259  * Check all ways of the Pcache at a specified index for good parity.
4260  */
4261 static void
cpu_pcache_parity_check(ch_async_flt_t * ch_flt,int index)4262 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4263 {
4264 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4265 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4266 	int way, word, pbit, parity_bits;
4267 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4268 	ch_pc_data_t tmp_pcp;
4269 
4270 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4271 		/*
4272 		 * Perform diagnostic read.
4273 		 */
4274 		get_pcache_dtag(index + way * pc_set_size,
4275 		    (uint64_t *)&tmp_pcp);
4276 		/*
4277 		 * Check data array for odd parity. There are 8 parity
4278 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4279 		 * of those bits covers exactly 8 bytes of the data
4280 		 * array:
4281 		 *
4282 		 *	parity bit	P$ data bytes covered
4283 		 *	----------	---------------------
4284 		 *	50		63:56
4285 		 *	51		55:48
4286 		 *	52		47:40
4287 		 *	53		39:32
4288 		 *	54		31:24
4289 		 *	55		23:16
4290 		 *	56		15:8
4291 		 *	57		7:0
4292 		 */
4293 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4294 		for (word = 0; word < pc_data_words; word++) {
4295 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4296 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4297 				/*
4298 				 * If this is the first error log detailed
4299 				 * information about it. Otherwise just record
4300 				 * the fact that we found another error.
4301 				 */
4302 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4303 					ch_flt->parity_data.dpe.cpl_way = way;
4304 					ch_flt->parity_data.dpe.cpl_cache =
4305 					    CPU_PC_PARITY;
4306 					ch_flt->parity_data.dpe.cpl_off =
4307 					    word * sizeof (uint64_t);
4308 					bcopy(&tmp_pcp, pcp,
4309 					    sizeof (ch_pc_data_t));
4310 				}
4311 				ch_flt->parity_data.dpe.cpl_lcnt++;
4312 			}
4313 		}
4314 	}
4315 }
4316 
4317 
4318 /*
4319  * Add L1 Data cache data to the ereport payload.
4320  */
4321 static void
cpu_payload_add_dcache(struct async_flt * aflt,nvlist_t * nvl)4322 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4323 {
4324 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4325 	ch_dc_data_t *dcp;
4326 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4327 	uint_t nelem;
4328 	int i, ways_to_check, ways_logged = 0;
4329 
4330 	/*
4331 	 * If this is an D$ fault then there may be multiple
4332 	 * ways captured in the ch_parity_log_t structure.
4333 	 * Otherwise, there will be at most one way captured
4334 	 * in the ch_diag_data_t struct.
4335 	 * Check each way to see if it should be encoded.
4336 	 */
4337 	if (ch_flt->flt_type == CPU_DC_PARITY)
4338 		ways_to_check = CH_DCACHE_NWAY;
4339 	else
4340 		ways_to_check = 1;
4341 	for (i = 0; i < ways_to_check; i++) {
4342 		if (ch_flt->flt_type == CPU_DC_PARITY)
4343 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4344 		else
4345 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4346 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4347 			bcopy(dcp, &dcdata[ways_logged],
4348 			    sizeof (ch_dc_data_t));
4349 			ways_logged++;
4350 		}
4351 	}
4352 
4353 	/*
4354 	 * Add the dcache data to the payload.
4355 	 */
4356 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4357 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4358 	if (ways_logged != 0) {
4359 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4360 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4361 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4362 	}
4363 }
4364 
4365 /*
4366  * Add L1 Instruction cache data to the ereport payload.
4367  */
4368 static void
cpu_payload_add_icache(struct async_flt * aflt,nvlist_t * nvl)4369 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4370 {
4371 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4372 	ch_ic_data_t *icp;
4373 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4374 	uint_t nelem;
4375 	int i, ways_to_check, ways_logged = 0;
4376 
4377 	/*
4378 	 * If this is an I$ fault then there may be multiple
4379 	 * ways captured in the ch_parity_log_t structure.
4380 	 * Otherwise, there will be at most one way captured
4381 	 * in the ch_diag_data_t struct.
4382 	 * Check each way to see if it should be encoded.
4383 	 */
4384 	if (ch_flt->flt_type == CPU_IC_PARITY)
4385 		ways_to_check = CH_ICACHE_NWAY;
4386 	else
4387 		ways_to_check = 1;
4388 	for (i = 0; i < ways_to_check; i++) {
4389 		if (ch_flt->flt_type == CPU_IC_PARITY)
4390 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4391 		else
4392 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4393 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4394 			bcopy(icp, &icdata[ways_logged],
4395 			    sizeof (ch_ic_data_t));
4396 			ways_logged++;
4397 		}
4398 	}
4399 
4400 	/*
4401 	 * Add the icache data to the payload.
4402 	 */
4403 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4404 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4405 	if (ways_logged != 0) {
4406 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4407 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4408 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4409 	}
4410 }
4411 
4412 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4413 
4414 /*
4415  * Add ecache data to payload.
4416  */
4417 static void
cpu_payload_add_ecache(struct async_flt * aflt,nvlist_t * nvl)4418 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4419 {
4420 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4421 	ch_ec_data_t *ecp;
4422 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4423 	uint_t nelem;
4424 	int i, ways_logged = 0;
4425 
4426 	/*
4427 	 * Check each way to see if it should be encoded
4428 	 * and concatinate it into a temporary buffer.
4429 	 */
4430 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4431 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4432 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4433 			bcopy(ecp, &ecdata[ways_logged],
4434 			    sizeof (ch_ec_data_t));
4435 			ways_logged++;
4436 		}
4437 	}
4438 
4439 	/*
4440 	 * Panther CPUs have an additional level of cache and so
4441 	 * what we just collected was the L3 (ecache) and not the
4442 	 * L2 cache.
4443 	 */
4444 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4445 		/*
4446 		 * Add the L3 (ecache) data to the payload.
4447 		 */
4448 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4449 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4450 		if (ways_logged != 0) {
4451 			nelem = sizeof (ch_ec_data_t) /
4452 			    sizeof (uint64_t) * ways_logged;
4453 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4454 			    DATA_TYPE_UINT64_ARRAY, nelem,
4455 			    (uint64_t *)ecdata, NULL);
4456 		}
4457 
4458 		/*
4459 		 * Now collect the L2 cache.
4460 		 */
4461 		ways_logged = 0;
4462 		for (i = 0; i < PN_L2_NWAYS; i++) {
4463 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4464 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4465 				bcopy(ecp, &ecdata[ways_logged],
4466 				    sizeof (ch_ec_data_t));
4467 				ways_logged++;
4468 			}
4469 		}
4470 	}
4471 
4472 	/*
4473 	 * Add the L2 cache data to the payload.
4474 	 */
4475 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4476 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4477 	if (ways_logged != 0) {
4478 		nelem = sizeof (ch_ec_data_t) /
4479 		    sizeof (uint64_t) * ways_logged;
4480 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4481 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4482 	}
4483 }
4484 
4485 /*
4486  * Initialize cpu scheme for specified cpu.
4487  */
4488 static void
cpu_fmri_cpu_set(nvlist_t * cpu_fmri,int cpuid)4489 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4490 {
4491 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4492 	uint8_t mask;
4493 
4494 	mask = cpunodes[cpuid].version;
4495 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4496 	    (u_longlong_t)cpunodes[cpuid].device_id);
4497 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4498 	    cpuid, &mask, (const char *)sbuf);
4499 }
4500 
4501 /*
4502  * Returns ereport resource type.
4503  */
4504 static int
cpu_error_to_resource_type(struct async_flt * aflt)4505 cpu_error_to_resource_type(struct async_flt *aflt)
4506 {
4507 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4508 
4509 	switch (ch_flt->flt_type) {
4510 
4511 	case CPU_CE_ECACHE:
4512 	case CPU_UE_ECACHE:
4513 	case CPU_UE_ECACHE_RETIRE:
4514 	case CPU_ORPH:
4515 		/*
4516 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4517 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4518 		 * E$ Data type, otherwise, return CPU type.
4519 		 */
4520 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4521 		    ch_flt->flt_bit))
4522 			return (ERRTYPE_ECACHE_DATA);
4523 		return (ERRTYPE_CPU);
4524 
4525 	case CPU_CE:
4526 	case CPU_UE:
4527 	case CPU_EMC:
4528 	case CPU_DUE:
4529 	case CPU_RCE:
4530 	case CPU_RUE:
4531 	case CPU_FRC:
4532 	case CPU_FRU:
4533 		return (ERRTYPE_MEMORY);
4534 
4535 	case CPU_IC_PARITY:
4536 	case CPU_DC_PARITY:
4537 	case CPU_FPUERR:
4538 	case CPU_PC_PARITY:
4539 	case CPU_ITLB_PARITY:
4540 	case CPU_DTLB_PARITY:
4541 		return (ERRTYPE_CPU);
4542 	}
4543 	return (ERRTYPE_UNKNOWN);
4544 }
4545 
4546 /*
4547  * Encode the data saved in the ch_async_flt_t struct into
4548  * the FM ereport payload.
4549  */
4550 static void
cpu_payload_add_aflt(struct async_flt * aflt,nvlist_t * payload,nvlist_t * resource,int * afar_status,int * synd_status)4551 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4552 	nvlist_t *resource, int *afar_status, int *synd_status)
4553 {
4554 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4555 	*synd_status = AFLT_STAT_INVALID;
4556 	*afar_status = AFLT_STAT_INVALID;
4557 
4558 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4559 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4560 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4561 	}
4562 
4563 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4564 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4565 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4566 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4567 	}
4568 
4569 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4570 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4571 		    ch_flt->flt_bit);
4572 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4573 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4574 	}
4575 
4576 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4577 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4578 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4579 	}
4580 
4581 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4582 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4583 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4584 	}
4585 
4586 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4587 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4588 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4589 	}
4590 
4591 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4592 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4593 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4594 	}
4595 
4596 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4597 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4598 		    DATA_TYPE_BOOLEAN_VALUE,
4599 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4600 	}
4601 
4602 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4603 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4604 		    DATA_TYPE_BOOLEAN_VALUE,
4605 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4606 	}
4607 
4608 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4609 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4610 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4611 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4612 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4613 	}
4614 
4615 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4616 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4617 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4618 	}
4619 
4620 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4621 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4622 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4623 	}
4624 
4625 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4626 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4627 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4628 	}
4629 
4630 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4631 		cpu_payload_add_ecache(aflt, payload);
4632 
4633 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4634 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4635 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4636 	}
4637 
4638 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4639 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4640 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4641 	}
4642 
4643 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4644 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4645 		    DATA_TYPE_UINT32_ARRAY, 16,
4646 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4647 	}
4648 
4649 #if defined(CPU_IMP_L1_CACHE_PARITY)
4650 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4651 		cpu_payload_add_dcache(aflt, payload);
4652 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4653 		cpu_payload_add_icache(aflt, payload);
4654 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4655 
4656 #if defined(CHEETAH_PLUS)
4657 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4658 		cpu_payload_add_pcache(aflt, payload);
4659 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4660 		cpu_payload_add_tlb(aflt, payload);
4661 #endif	/* CHEETAH_PLUS */
4662 	/*
4663 	 * Create the FMRI that goes into the payload
4664 	 * and contains the unum info if necessary.
4665 	 */
4666 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4667 		char unum[UNUM_NAMLEN] = "";
4668 		char sid[DIMM_SERIAL_ID_LEN] = "";
4669 		int len, ret, rtype, synd_code;
4670 		uint64_t offset = (uint64_t)-1;
4671 
4672 		rtype = cpu_error_to_resource_type(aflt);
4673 		switch (rtype) {
4674 
4675 		case ERRTYPE_MEMORY:
4676 		case ERRTYPE_ECACHE_DATA:
4677 
4678 			/*
4679 			 * Memory errors, do unum lookup
4680 			 */
4681 			if (*afar_status == AFLT_STAT_INVALID)
4682 				break;
4683 
4684 			if (rtype == ERRTYPE_ECACHE_DATA)
4685 				aflt->flt_status |= ECC_ECACHE;
4686 			else
4687 				aflt->flt_status &= ~ECC_ECACHE;
4688 
4689 			synd_code = synd_to_synd_code(*synd_status,
4690 			    aflt->flt_synd, ch_flt->flt_bit);
4691 
4692 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4693 				break;
4694 
4695 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4696 			    &len);
4697 
4698 			if (ret == 0) {
4699 				(void) cpu_get_mem_offset(aflt->flt_addr,
4700 				    &offset);
4701 			}
4702 
4703 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4704 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4705 			fm_payload_set(payload,
4706 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4707 			    DATA_TYPE_NVLIST, resource, NULL);
4708 			break;
4709 
4710 		case ERRTYPE_CPU:
4711 			/*
4712 			 * On-board processor array error, add cpu resource.
4713 			 */
4714 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4715 			fm_payload_set(payload,
4716 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4717 			    DATA_TYPE_NVLIST, resource, NULL);
4718 			break;
4719 		}
4720 	}
4721 }
4722 
4723 /*
4724  * Initialize the way info if necessary.
4725  */
4726 void
cpu_ereport_init(struct async_flt * aflt)4727 cpu_ereport_init(struct async_flt *aflt)
4728 {
4729 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4730 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4731 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4732 	int i;
4733 
4734 	/*
4735 	 * Initialize the info in the CPU logout structure.
4736 	 * The I$/D$ way information is not initialized here
4737 	 * since it is captured in the logout assembly code.
4738 	 */
4739 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4740 		(ecp + i)->ec_way = i;
4741 
4742 	for (i = 0; i < PN_L2_NWAYS; i++)
4743 		(l2p + i)->ec_way = i;
4744 }
4745 
4746 /*
4747  * Returns whether fault address is valid for this error bit and
4748  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4749  */
4750 int
cpu_flt_in_memory(ch_async_flt_t * ch_flt,uint64_t t_afsr_bit)4751 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4752 {
4753 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4754 
4755 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4756 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4757 	    AFLT_STAT_VALID &&
4758 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4759 }
4760 
4761 /*
4762  * Returns whether fault address is valid based on the error bit for the
4763  * one event being queued and whether the address is "in memory".
4764  */
4765 static int
cpu_flt_in_memory_one_event(ch_async_flt_t * ch_flt,uint64_t t_afsr_bit)4766 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4767 {
4768 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4769 	int afar_status;
4770 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4771 
4772 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4773 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4774 		return (0);
4775 
4776 	afsr_errs = ch_flt->afsr_errs;
4777 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4778 
4779 	switch (afar_status) {
4780 	case AFLT_STAT_VALID:
4781 		return (1);
4782 
4783 	case AFLT_STAT_AMBIGUOUS:
4784 		/*
4785 		 * Status is ambiguous since another error bit (or bits)
4786 		 * of equal priority to the specified bit on in the afsr,
4787 		 * so check those bits. Return 1 only if the bits on in the
4788 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4789 		 * Otherwise not all the equal priority bits are for memory
4790 		 * errors, so return 0.
4791 		 */
4792 		ow_bits = afar_overwrite;
4793 		while ((afsr_ow = *ow_bits++) != 0) {
4794 			/*
4795 			 * Get other bits that are on in t_afsr_bit's priority
4796 			 * class to check for Memory Error bits only.
4797 			 */
4798 			if (afsr_ow & t_afsr_bit) {
4799 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4800 					return (0);
4801 				else
4802 					return (1);
4803 			}
4804 		}
4805 		/*FALLTHRU*/
4806 
4807 	default:
4808 		return (0);
4809 	}
4810 }
4811 
4812 static void
cpu_log_diag_info(ch_async_flt_t * ch_flt)4813 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4814 {
4815 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4816 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4817 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4818 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4819 #if defined(CPU_IMP_ECACHE_ASSOC)
4820 	int i, nway;
4821 #endif /* CPU_IMP_ECACHE_ASSOC */
4822 
4823 	/*
4824 	 * Check if the CPU log out captured was valid.
4825 	 */
4826 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4827 	    ch_flt->flt_data_incomplete)
4828 		return;
4829 
4830 #if defined(CPU_IMP_ECACHE_ASSOC)
4831 	nway = cpu_ecache_nway();
4832 	i =  cpu_ecache_line_valid(ch_flt);
4833 	if (i == 0 || i > nway) {
4834 		for (i = 0; i < nway; i++)
4835 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4836 	} else
4837 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4838 #else /* CPU_IMP_ECACHE_ASSOC */
4839 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4840 #endif /* CPU_IMP_ECACHE_ASSOC */
4841 
4842 #if defined(CHEETAH_PLUS)
4843 	pn_cpu_log_diag_l2_info(ch_flt);
4844 #endif /* CHEETAH_PLUS */
4845 
4846 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4847 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4848 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4849 	}
4850 
4851 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4852 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4853 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4854 		else
4855 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4856 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4857 	}
4858 }
4859 
4860 /*
4861  * Cheetah ECC calculation.
4862  *
4863  * We only need to do the calculation on the data bits and can ignore check
4864  * bit and Mtag bit terms in the calculation.
4865  */
4866 static uint64_t ch_ecc_table[9][2] = {
4867 	/*
4868 	 * low order 64-bits   high-order 64-bits
4869 	 */
4870 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4871 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4872 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4873 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4874 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4875 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4876 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4877 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4878 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4879 };
4880 
4881 /*
4882  * 64-bit population count, use well-known popcnt trick.
4883  * We could use the UltraSPARC V9 POPC instruction, but some
4884  * CPUs including Cheetahplus and Jaguar do not support that
4885  * instruction.
4886  */
4887 int
popc64(uint64_t val)4888 popc64(uint64_t val)
4889 {
4890 	int cnt;
4891 
4892 	for (cnt = 0; val != 0; val &= val - 1)
4893 		cnt++;
4894 	return (cnt);
4895 }
4896 
4897 /*
4898  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4899  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4900  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4901  * instead of doing all the xor's.
4902  */
4903 uint32_t
us3_gen_ecc(uint64_t data_low,uint64_t data_high)4904 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4905 {
4906 	int bitno, s;
4907 	int synd = 0;
4908 
4909 	for (bitno = 0; bitno < 9; bitno++) {
4910 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4911 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4912 		synd |= (s << bitno);
4913 	}
4914 	return (synd);
4915 
4916 }
4917 
4918 /*
4919  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4920  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4921  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4922  */
4923 static void
cpu_queue_one_event(ch_async_flt_t * ch_flt,char * reason,ecc_type_to_info_t * eccp,ch_diag_data_t * cdp)4924 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4925     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4926 {
4927 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4928 
4929 	if (reason &&
4930 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4931 		(void) strcat(reason, eccp->ec_reason);
4932 	}
4933 
4934 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4935 	ch_flt->flt_type = eccp->ec_flt_type;
4936 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4937 		ch_flt->flt_diag_data = *cdp;
4938 	else
4939 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4940 	aflt->flt_in_memory =
4941 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4942 
4943 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4944 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4945 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4946 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4947 	else
4948 		aflt->flt_synd = 0;
4949 
4950 	aflt->flt_payload = eccp->ec_err_payload;
4951 
4952 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4953 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4954 		cpu_errorq_dispatch(eccp->ec_err_class,
4955 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4956 		    aflt->flt_panic);
4957 	else
4958 		cpu_errorq_dispatch(eccp->ec_err_class,
4959 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4960 		    aflt->flt_panic);
4961 }
4962 
4963 /*
4964  * Queue events on async event queue one event per error bit.  First we
4965  * queue the events that we "expect" for the given trap, then we queue events
4966  * that we may not expect.  Return number of events queued.
4967  */
4968 int
cpu_queue_events(ch_async_flt_t * ch_flt,char * reason,uint64_t t_afsr_errs,ch_cpu_logout_t * clop)4969 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4970     ch_cpu_logout_t *clop)
4971 {
4972 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4973 	ecc_type_to_info_t *eccp;
4974 	int nevents = 0;
4975 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4976 #if defined(CHEETAH_PLUS)
4977 	uint64_t orig_t_afsr_errs;
4978 #endif
4979 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4980 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4981 	ch_diag_data_t *cdp = NULL;
4982 
4983 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4984 
4985 #if defined(CHEETAH_PLUS)
4986 	orig_t_afsr_errs = t_afsr_errs;
4987 
4988 	/*
4989 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4990 	 */
4991 	if (clop != NULL) {
4992 		/*
4993 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4994 		 * flt_addr and flt_stat fields will be reset to the primaries
4995 		 * below, but the sdw_addr and sdw_stat will stay as the
4996 		 * secondaries.
4997 		 */
4998 		cdp = &clop->clo_sdw_data;
4999 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
5000 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
5001 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
5002 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5003 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
5004 
5005 		/*
5006 		 * If the primary and shadow AFSR differ, tag the shadow as
5007 		 * the first fault.
5008 		 */
5009 		if ((primary_afar != cdp->chd_afar) ||
5010 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
5011 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
5012 		}
5013 
5014 		/*
5015 		 * Check AFSR bits as well as AFSR_EXT bits in order of
5016 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
5017 		 * is expected to be zero for those CPUs which do not have
5018 		 * an AFSR_EXT register.
5019 		 */
5020 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
5021 			if ((eccp->ec_afsr_bit &
5022 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
5023 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
5024 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5025 				cdp = NULL;
5026 				t_afsr_errs &= ~eccp->ec_afsr_bit;
5027 				nevents++;
5028 			}
5029 		}
5030 
5031 		/*
5032 		 * If the ME bit is on in the primary AFSR turn all the
5033 		 * error bits on again that may set the ME bit to make
5034 		 * sure we see the ME AFSR error logs.
5035 		 */
5036 		if ((primary_afsr & C_AFSR_ME) != 0)
5037 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
5038 	}
5039 #endif	/* CHEETAH_PLUS */
5040 
5041 	if (clop != NULL)
5042 		cdp = &clop->clo_data;
5043 
5044 	/*
5045 	 * Queue expected errors, error bit and fault type must match
5046 	 * in the ecc_type_to_info table.
5047 	 */
5048 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5049 	    eccp++) {
5050 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
5051 		    (eccp->ec_flags & aflt->flt_status) != 0) {
5052 #if defined(SERRANO)
5053 			/*
5054 			 * For FRC/FRU errors on Serrano the afar2 captures
5055 			 * the address and the associated data is
5056 			 * in the shadow logout area.
5057 			 */
5058 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5059 				if (clop != NULL)
5060 					cdp = &clop->clo_sdw_data;
5061 				aflt->flt_addr = ch_flt->afar2;
5062 			} else {
5063 				if (clop != NULL)
5064 					cdp = &clop->clo_data;
5065 				aflt->flt_addr = primary_afar;
5066 			}
5067 #else	/* SERRANO */
5068 			aflt->flt_addr = primary_afar;
5069 #endif	/* SERRANO */
5070 			aflt->flt_stat = primary_afsr;
5071 			ch_flt->afsr_ext = primary_afsr_ext;
5072 			ch_flt->afsr_errs = primary_afsr_errs;
5073 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5074 			cdp = NULL;
5075 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5076 			nevents++;
5077 		}
5078 	}
5079 
5080 	/*
5081 	 * Queue unexpected errors, error bit only match.
5082 	 */
5083 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5084 	    eccp++) {
5085 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5086 #if defined(SERRANO)
5087 			/*
5088 			 * For FRC/FRU errors on Serrano the afar2 captures
5089 			 * the address and the associated data is
5090 			 * in the shadow logout area.
5091 			 */
5092 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5093 				if (clop != NULL)
5094 					cdp = &clop->clo_sdw_data;
5095 				aflt->flt_addr = ch_flt->afar2;
5096 			} else {
5097 				if (clop != NULL)
5098 					cdp = &clop->clo_data;
5099 				aflt->flt_addr = primary_afar;
5100 			}
5101 #else	/* SERRANO */
5102 			aflt->flt_addr = primary_afar;
5103 #endif	/* SERRANO */
5104 			aflt->flt_stat = primary_afsr;
5105 			ch_flt->afsr_ext = primary_afsr_ext;
5106 			ch_flt->afsr_errs = primary_afsr_errs;
5107 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5108 			cdp = NULL;
5109 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5110 			nevents++;
5111 		}
5112 	}
5113 	return (nevents);
5114 }
5115 
5116 /*
5117  * Return trap type number.
5118  */
5119 uint8_t
flt_to_trap_type(struct async_flt * aflt)5120 flt_to_trap_type(struct async_flt *aflt)
5121 {
5122 	if (aflt->flt_status & ECC_I_TRAP)
5123 		return (TRAP_TYPE_ECC_I);
5124 	if (aflt->flt_status & ECC_D_TRAP)
5125 		return (TRAP_TYPE_ECC_D);
5126 	if (aflt->flt_status & ECC_F_TRAP)
5127 		return (TRAP_TYPE_ECC_F);
5128 	if (aflt->flt_status & ECC_C_TRAP)
5129 		return (TRAP_TYPE_ECC_C);
5130 	if (aflt->flt_status & ECC_DP_TRAP)
5131 		return (TRAP_TYPE_ECC_DP);
5132 	if (aflt->flt_status & ECC_IP_TRAP)
5133 		return (TRAP_TYPE_ECC_IP);
5134 	if (aflt->flt_status & ECC_ITLB_TRAP)
5135 		return (TRAP_TYPE_ECC_ITLB);
5136 	if (aflt->flt_status & ECC_DTLB_TRAP)
5137 		return (TRAP_TYPE_ECC_DTLB);
5138 	return (TRAP_TYPE_UNKNOWN);
5139 }
5140 
5141 /*
5142  * Decide an error type based on detector and leaky/partner tests.
5143  * The following array is used for quick translation - it must
5144  * stay in sync with ce_dispact_t.
5145  */
5146 
5147 static char *cetypes[] = {
5148 	CE_DISP_DESC_U,
5149 	CE_DISP_DESC_I,
5150 	CE_DISP_DESC_PP,
5151 	CE_DISP_DESC_P,
5152 	CE_DISP_DESC_L,
5153 	CE_DISP_DESC_PS,
5154 	CE_DISP_DESC_S
5155 };
5156 
5157 char *
flt_to_error_type(struct async_flt * aflt)5158 flt_to_error_type(struct async_flt *aflt)
5159 {
5160 	ce_dispact_t dispact, disp;
5161 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5162 
5163 	/*
5164 	 * The memory payload bundle is shared by some events that do
5165 	 * not perform any classification.  For those flt_disp will be
5166 	 * 0 and we will return "unknown".
5167 	 */
5168 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5169 		return (cetypes[CE_DISP_UNKNOWN]);
5170 
5171 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5172 
5173 	/*
5174 	 * It is also possible that no scrub/classification was performed
5175 	 * by the detector, for instance where a disrupting error logged
5176 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5177 	 */
5178 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5179 		return (cetypes[CE_DISP_UNKNOWN]);
5180 
5181 	/*
5182 	 * Lookup type in initial classification/action table
5183 	 */
5184 	dispact = CE_DISPACT(ce_disp_table,
5185 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5186 	    CE_XDIAG_STATE(dtcrinfo),
5187 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5188 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5189 
5190 	/*
5191 	 * A bad lookup is not something to panic production systems for.
5192 	 */
5193 	ASSERT(dispact != CE_DISP_BAD);
5194 	if (dispact == CE_DISP_BAD)
5195 		return (cetypes[CE_DISP_UNKNOWN]);
5196 
5197 	disp = CE_DISP(dispact);
5198 
5199 	switch (disp) {
5200 	case CE_DISP_UNKNOWN:
5201 	case CE_DISP_INTERMITTENT:
5202 		break;
5203 
5204 	case CE_DISP_POSS_PERS:
5205 		/*
5206 		 * "Possible persistent" errors to which we have applied a valid
5207 		 * leaky test can be separated into "persistent" or "leaky".
5208 		 */
5209 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5210 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5211 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5212 			    CE_XDIAG_CE2SEEN(lkyinfo))
5213 				disp = CE_DISP_LEAKY;
5214 			else
5215 				disp = CE_DISP_PERS;
5216 		}
5217 		break;
5218 
5219 	case CE_DISP_POSS_STICKY:
5220 		/*
5221 		 * Promote "possible sticky" results that have been
5222 		 * confirmed by a partner test to "sticky".  Unconfirmed
5223 		 * "possible sticky" events are left at that status - we do not
5224 		 * guess at any bad reader/writer etc status here.
5225 		 */
5226 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5227 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5228 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5229 			disp = CE_DISP_STICKY;
5230 
5231 		/*
5232 		 * Promote "possible sticky" results on a uniprocessor
5233 		 * to "sticky"
5234 		 */
5235 		if (disp == CE_DISP_POSS_STICKY &&
5236 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5237 			disp = CE_DISP_STICKY;
5238 		break;
5239 
5240 	default:
5241 		disp = CE_DISP_UNKNOWN;
5242 		break;
5243 	}
5244 
5245 	return (cetypes[disp]);
5246 }
5247 
5248 /*
5249  * Given the entire afsr, the specific bit to check and a prioritized list of
5250  * error bits, determine the validity of the various overwrite priority
5251  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5252  * different overwrite priorities.
5253  *
5254  * Given a specific afsr error bit and the entire afsr, there are three cases:
5255  *   INVALID:	The specified bit is lower overwrite priority than some other
5256  *		error bit which is on in the afsr (or IVU/IVC).
5257  *   VALID:	The specified bit is higher priority than all other error bits
5258  *		which are on in the afsr.
5259  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5260  *		bit is on in the afsr.
5261  */
5262 int
afsr_to_overw_status(uint64_t afsr,uint64_t afsr_bit,uint64_t * ow_bits)5263 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5264 {
5265 	uint64_t afsr_ow;
5266 
5267 	while ((afsr_ow = *ow_bits++) != 0) {
5268 		/*
5269 		 * If bit is in the priority class, check to see if another
5270 		 * bit in the same class is on => ambiguous.  Otherwise,
5271 		 * the value is valid.  If the bit is not on at this priority
5272 		 * class, but a higher priority bit is on, then the value is
5273 		 * invalid.
5274 		 */
5275 		if (afsr_ow & afsr_bit) {
5276 			/*
5277 			 * If equal pri bit is on, ambiguous.
5278 			 */
5279 			if (afsr & (afsr_ow & ~afsr_bit))
5280 				return (AFLT_STAT_AMBIGUOUS);
5281 			return (AFLT_STAT_VALID);
5282 		} else if (afsr & afsr_ow)
5283 			break;
5284 	}
5285 
5286 	/*
5287 	 * We didn't find a match or a higher priority bit was on.  Not
5288 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5289 	 */
5290 	return (AFLT_STAT_INVALID);
5291 }
5292 
5293 static int
afsr_to_afar_status(uint64_t afsr,uint64_t afsr_bit)5294 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5295 {
5296 #if defined(SERRANO)
5297 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5298 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5299 	else
5300 #endif	/* SERRANO */
5301 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5302 }
5303 
5304 static int
afsr_to_esynd_status(uint64_t afsr,uint64_t afsr_bit)5305 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5306 {
5307 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5308 }
5309 
5310 static int
afsr_to_msynd_status(uint64_t afsr,uint64_t afsr_bit)5311 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5312 {
5313 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5314 }
5315 
5316 static int
afsr_to_synd_status(uint_t cpuid,uint64_t afsr,uint64_t afsr_bit)5317 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5318 {
5319 #ifdef lint
5320 	cpuid = cpuid;
5321 #endif
5322 #if defined(CHEETAH_PLUS)
5323 	/*
5324 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5325 	 * policy for Cheetah+ and separate for Panther CPUs.
5326 	 */
5327 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5328 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5329 			return (afsr_to_msynd_status(afsr, afsr_bit));
5330 		else
5331 			return (afsr_to_esynd_status(afsr, afsr_bit));
5332 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5333 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5334 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5335 		else
5336 			return (afsr_to_esynd_status(afsr, afsr_bit));
5337 #else /* CHEETAH_PLUS */
5338 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5339 		return (afsr_to_msynd_status(afsr, afsr_bit));
5340 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5341 		return (afsr_to_esynd_status(afsr, afsr_bit));
5342 #endif /* CHEETAH_PLUS */
5343 	} else {
5344 		return (AFLT_STAT_INVALID);
5345 	}
5346 }
5347 
5348 /*
5349  * Slave CPU stick synchronization.
5350  */
5351 void
5352 sticksync_slave(void)
5353 {
5354 	int 		i;
5355 	int		tries = 0;
5356 	int64_t		tskew;
5357 	int64_t		av_tskew;
5358 
5359 	kpreempt_disable();
5360 	/* wait for the master side */
5361 	while (stick_sync_cmd != SLAVE_START)
5362 		;
5363 	/*
5364 	 * Synchronization should only take a few tries at most. But in the
5365 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5366 	 * without it's stick synchronized wouldn't be a good citizen.
5367 	 */
5368 	while (slave_done == 0) {
5369 		/*
5370 		 * Time skew calculation.
5371 		 */
5372 		av_tskew = tskew = 0;
5373 
5374 		for (i = 0; i < stick_iter; i++) {
5375 			/* make location hot */
5376 			timestamp[EV_A_START] = 0;
5377 			stick_timestamp(&timestamp[EV_A_START]);
5378 
5379 			/* tell the master we're ready */
5380 			stick_sync_cmd = MASTER_START;
5381 
5382 			/* and wait */
5383 			while (stick_sync_cmd != SLAVE_CONT)
5384 				;
5385 			/* Event B end */
5386 			stick_timestamp(&timestamp[EV_B_END]);
5387 
5388 			/* calculate time skew */
5389 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5390 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5391 			    / 2;
5392 
5393 			/* keep running count */
5394 			av_tskew += tskew;
5395 		} /* for */
5396 
5397 		/*
5398 		 * Adjust stick for time skew if not within the max allowed;
5399 		 * otherwise we're all done.
5400 		 */
5401 		if (stick_iter != 0)
5402 			av_tskew = av_tskew/stick_iter;
5403 		if (ABS(av_tskew) > stick_tsk) {
5404 			/*
5405 			 * If the skew is 1 (the slave's STICK register
5406 			 * is 1 STICK ahead of the master's), stick_adj
5407 			 * could fail to adjust the slave's STICK register
5408 			 * if the STICK read on the slave happens to
5409 			 * align with the increment of the STICK.
5410 			 * Therefore, we increment the skew to 2.
5411 			 */
5412 			if (av_tskew == 1)
5413 				av_tskew++;
5414 			stick_adj(-av_tskew);
5415 		} else
5416 			slave_done = 1;
5417 #ifdef DEBUG
5418 		if (tries < DSYNC_ATTEMPTS)
5419 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5420 			    av_tskew;
5421 		++tries;
5422 #endif /* DEBUG */
5423 #ifdef lint
5424 		tries = tries;
5425 #endif
5426 
5427 	} /* while */
5428 
5429 	/* allow the master to finish */
5430 	stick_sync_cmd = EVENT_NULL;
5431 	kpreempt_enable();
5432 }
5433 
5434 /*
5435  * Master CPU side of stick synchronization.
5436  *  - timestamp end of Event A
5437  *  - timestamp beginning of Event B
5438  */
5439 void
5440 sticksync_master(void)
5441 {
5442 	int		i;
5443 
5444 	kpreempt_disable();
5445 	/* tell the slave we've started */
5446 	slave_done = 0;
5447 	stick_sync_cmd = SLAVE_START;
5448 
5449 	while (slave_done == 0) {
5450 		for (i = 0; i < stick_iter; i++) {
5451 			/* wait for the slave */
5452 			while (stick_sync_cmd != MASTER_START)
5453 				;
5454 			/* Event A end */
5455 			stick_timestamp(&timestamp[EV_A_END]);
5456 
5457 			/* make location hot */
5458 			timestamp[EV_B_START] = 0;
5459 			stick_timestamp(&timestamp[EV_B_START]);
5460 
5461 			/* tell the slave to continue */
5462 			stick_sync_cmd = SLAVE_CONT;
5463 		} /* for */
5464 
5465 		/* wait while slave calculates time skew */
5466 		while (stick_sync_cmd == SLAVE_CONT)
5467 			;
5468 	} /* while */
5469 	kpreempt_enable();
5470 }
5471 
5472 /*
5473  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5474  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5475  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5476  * panic idle.
5477  */
5478 /*ARGSUSED*/
5479 void
5480 cpu_check_allcpus(struct async_flt *aflt)
5481 {}
5482 
5483 struct kmem_cache *ch_private_cache;
5484 
5485 /*
5486  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5487  * deallocate the scrubber data structures and cpu_private data structure.
5488  */
5489 void
5490 cpu_uninit_private(struct cpu *cp)
5491 {
5492 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5493 
5494 	ASSERT(chprp);
5495 	cpu_uninit_ecache_scrub_dr(cp);
5496 	CPU_PRIVATE(cp) = NULL;
5497 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5498 	kmem_cache_free(ch_private_cache, chprp);
5499 	cmp_delete_cpu(cp->cpu_id);
5500 
5501 }
5502 
5503 /*
5504  * Cheetah Cache Scrubbing
5505  *
5506  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5507  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5508  * protected by either parity or ECC.
5509  *
5510  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5511  * cache per second). Due to the the specifics of how the I$ control
5512  * logic works with respect to the ASI used to scrub I$ lines, the entire
5513  * I$ is scanned at once.
5514  */
5515 
5516 /*
5517  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5518  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5519  * on a running system.
5520  */
5521 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5522 
5523 /*
5524  * The following are the PIL levels that the softints/cross traps will fire at.
5525  */
5526 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5527 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5528 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5529 
5530 #if defined(JALAPENO)
5531 
5532 /*
5533  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5534  * on Jalapeno.
5535  */
5536 int ecache_scrub_enable = 0;
5537 
5538 #else	/* JALAPENO */
5539 
5540 /*
5541  * With all other cpu types, E$ scrubbing is on by default
5542  */
5543 int ecache_scrub_enable = 1;
5544 
5545 #endif	/* JALAPENO */
5546 
5547 
5548 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5549 
5550 /*
5551  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5552  * is disabled by default on non-Cheetah systems
5553  */
5554 int icache_scrub_enable = 0;
5555 
5556 /*
5557  * Tuneables specifying the scrub calls per second and the scan rate
5558  * for each cache
5559  *
5560  * The cyclic times are set during boot based on the following values.
5561  * Changing these values in mdb after this time will have no effect.  If
5562  * a different value is desired, it must be set in /etc/system before a
5563  * reboot.
5564  */
5565 int ecache_calls_a_sec = 1;
5566 int dcache_calls_a_sec = 2;
5567 int icache_calls_a_sec = 2;
5568 
5569 int ecache_scan_rate_idle = 1;
5570 int ecache_scan_rate_busy = 1;
5571 int dcache_scan_rate_idle = 1;
5572 int dcache_scan_rate_busy = 1;
5573 int icache_scan_rate_idle = 1;
5574 int icache_scan_rate_busy = 1;
5575 
5576 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5577 
5578 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5579 
5580 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5581 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5582 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5583 
5584 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5585 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5586 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5587 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5588 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5589 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5590 
5591 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5592 
5593 /*
5594  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5595  * increment the outstanding request counter and schedule a softint to run
5596  * the scrubber.
5597  */
5598 extern xcfunc_t cache_scrubreq_tl1;
5599 
5600 /*
5601  * These are the softint functions for each cache scrubber
5602  */
5603 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5604 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5605 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5606 
5607 /*
5608  * The cache scrub info table contains cache specific information
5609  * and allows for some of the scrub code to be table driven, reducing
5610  * duplication of cache similar code.
5611  *
5612  * This table keeps a copy of the value in the calls per second variable
5613  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5614  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5615  * mdb in a misguided attempt to disable the scrubber).
5616  */
5617 struct scrub_info {
5618 	int		*csi_enable;	/* scrubber enable flag */
5619 	int		csi_freq;	/* scrubber calls per second */
5620 	int		csi_index;	/* index to chsm_outstanding[] */
5621 	uint64_t	csi_inum;	/* scrubber interrupt number */
5622 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5623 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5624 	char		csi_name[3];	/* cache name for this scrub entry */
5625 } cache_scrub_info[] = {
5626 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5627 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5628 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5629 };
5630 
5631 /*
5632  * If scrubbing is enabled, increment the outstanding request counter.  If it
5633  * is 1 (meaning there were no previous requests outstanding), call
5634  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5635  * a self trap.
5636  */
5637 static void
5638 do_scrub(struct scrub_info *csi)
5639 {
5640 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5641 	int index = csi->csi_index;
5642 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5643 
5644 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5645 		if (atomic_inc_32_nv(outstanding) == 1) {
5646 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5647 			    csi->csi_inum, 0);
5648 		}
5649 	}
5650 }
5651 
5652 /*
5653  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5654  * cross-trap the offline cpus.
5655  */
5656 static void
5657 do_scrub_offline(struct scrub_info *csi)
5658 {
5659 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5660 
5661 	if (CPUSET_ISNULL(cpu_offline_set)) {
5662 		/*
5663 		 * No offline cpus - nothing to do
5664 		 */
5665 		return;
5666 	}
5667 
5668 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5669 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5670 		    csi->csi_index);
5671 	}
5672 }
5673 
5674 /*
5675  * This is the initial setup for the scrubber cyclics - it sets the
5676  * interrupt level, frequency, and function to call.
5677  */
5678 /*ARGSUSED*/
5679 static void
5680 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5681     cyc_time_t *when)
5682 {
5683 	struct scrub_info *csi = (struct scrub_info *)arg;
5684 
5685 	ASSERT(csi != NULL);
5686 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5687 	hdlr->cyh_level = CY_LOW_LEVEL;
5688 	hdlr->cyh_arg = arg;
5689 
5690 	when->cyt_when = 0;	/* Start immediately */
5691 	when->cyt_interval = NANOSEC / csi->csi_freq;
5692 }
5693 
5694 /*
5695  * Initialization for cache scrubbing.
5696  * This routine is called AFTER all cpus have had cpu_init_private called
5697  * to initialize their private data areas.
5698  */
5699 void
5700 cpu_init_cache_scrub(void)
5701 {
5702 	int i;
5703 	struct scrub_info *csi;
5704 	cyc_omni_handler_t omni_hdlr;
5705 	cyc_handler_t offline_hdlr;
5706 	cyc_time_t when;
5707 
5708 	/*
5709 	 * save away the maximum number of lines for the D$
5710 	 */
5711 	dcache_nlines = dcache_size / dcache_linesize;
5712 
5713 	/*
5714 	 * register the softints for the cache scrubbing
5715 	 */
5716 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5717 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5718 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5719 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5720 
5721 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5722 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5723 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5724 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5725 
5726 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5727 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5728 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5729 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5730 
5731 	/*
5732 	 * start the scrubbing for all the caches
5733 	 */
5734 	mutex_enter(&cpu_lock);
5735 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5736 
5737 		csi = &cache_scrub_info[i];
5738 
5739 		if (!(*csi->csi_enable))
5740 			continue;
5741 
5742 		/*
5743 		 * force the following to be true:
5744 		 *	1 <= calls_a_sec <= hz
5745 		 */
5746 		if (csi->csi_freq > hz) {
5747 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5748 			    "(%d); resetting to hz (%d)", csi->csi_name,
5749 			    csi->csi_freq, hz);
5750 			csi->csi_freq = hz;
5751 		} else if (csi->csi_freq < 1) {
5752 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5753 			    "(%d); resetting to 1", csi->csi_name,
5754 			    csi->csi_freq);
5755 			csi->csi_freq = 1;
5756 		}
5757 
5758 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5759 		omni_hdlr.cyo_offline = NULL;
5760 		omni_hdlr.cyo_arg = (void *)csi;
5761 
5762 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5763 		offline_hdlr.cyh_arg = (void *)csi;
5764 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5765 
5766 		when.cyt_when = 0;	/* Start immediately */
5767 		when.cyt_interval = NANOSEC / csi->csi_freq;
5768 
5769 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5770 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5771 	}
5772 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5773 	mutex_exit(&cpu_lock);
5774 }
5775 
5776 /*
5777  * Indicate that the specified cpu is idle.
5778  */
5779 void
5780 cpu_idle_ecache_scrub(struct cpu *cp)
5781 {
5782 	if (CPU_PRIVATE(cp) != NULL) {
5783 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5784 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5785 	}
5786 }
5787 
5788 /*
5789  * Indicate that the specified cpu is busy.
5790  */
5791 void
5792 cpu_busy_ecache_scrub(struct cpu *cp)
5793 {
5794 	if (CPU_PRIVATE(cp) != NULL) {
5795 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5796 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5797 	}
5798 }
5799 
5800 /*
5801  * Initialization for cache scrubbing for the specified cpu.
5802  */
5803 void
5804 cpu_init_ecache_scrub_dr(struct cpu *cp)
5805 {
5806 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5807 	int cpuid = cp->cpu_id;
5808 
5809 	/* initialize the number of lines in the caches */
5810 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5811 	    cpunodes[cpuid].ecache_linesize;
5812 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5813 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5814 
5815 	/*
5816 	 * do_scrub() and do_scrub_offline() check both the global
5817 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5818 	 * check this value before scrubbing.  Currently, we use it to
5819 	 * disable the E$ scrubber on multi-core cpus or while running at
5820 	 * slowed speed.  For now, just turn everything on and allow
5821 	 * cpu_init_private() to change it if necessary.
5822 	 */
5823 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5824 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5825 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5826 
5827 	cpu_busy_ecache_scrub(cp);
5828 }
5829 
5830 /*
5831  * Un-initialization for cache scrubbing for the specified cpu.
5832  */
5833 static void
5834 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5835 {
5836 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5837 
5838 	/*
5839 	 * un-initialize bookkeeping for cache scrubbing
5840 	 */
5841 	bzero(csmp, sizeof (ch_scrub_misc_t));
5842 
5843 	cpu_idle_ecache_scrub(cp);
5844 }
5845 
5846 /*
5847  * Called periodically on each CPU to scrub the D$.
5848  */
5849 static void
5850 scrub_dcache(int how_many)
5851 {
5852 	int i;
5853 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5854 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5855 
5856 	/*
5857 	 * scrub the desired number of lines
5858 	 */
5859 	for (i = 0; i < how_many; i++) {
5860 		/*
5861 		 * scrub a D$ line
5862 		 */
5863 		dcache_inval_line(index);
5864 
5865 		/*
5866 		 * calculate the next D$ line to scrub, assumes
5867 		 * that dcache_nlines is a power of 2
5868 		 */
5869 		index = (index + 1) & (dcache_nlines - 1);
5870 	}
5871 
5872 	/*
5873 	 * set the scrub index for the next visit
5874 	 */
5875 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5876 }
5877 
5878 /*
5879  * Handler for D$ scrub inum softint. Call scrub_dcache until
5880  * we decrement the outstanding request count to zero.
5881  */
5882 /*ARGSUSED*/
5883 static uint_t
5884 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5885 {
5886 	int i;
5887 	int how_many;
5888 	int outstanding;
5889 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5890 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5891 	struct scrub_info *csi = (struct scrub_info *)arg1;
5892 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5893 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5894 
5895 	/*
5896 	 * The scan rates are expressed in units of tenths of a
5897 	 * percent.  A scan rate of 1000 (100%) means the whole
5898 	 * cache is scanned every second.
5899 	 */
5900 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5901 
5902 	do {
5903 		outstanding = *countp;
5904 		for (i = 0; i < outstanding; i++) {
5905 			scrub_dcache(how_many);
5906 		}
5907 	} while (atomic_add_32_nv(countp, -outstanding));
5908 
5909 	return (DDI_INTR_CLAIMED);
5910 }
5911 
5912 /*
5913  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5914  * by invalidating lines. Due to the characteristics of the ASI which
5915  * is used to invalidate an I$ line, the entire I$ must be invalidated
5916  * vs. an individual I$ line.
5917  */
5918 static void
5919 scrub_icache(int how_many)
5920 {
5921 	int i;
5922 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5923 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5924 	int icache_nlines = csmp->chsm_icache_nlines;
5925 
5926 	/*
5927 	 * scrub the desired number of lines
5928 	 */
5929 	for (i = 0; i < how_many; i++) {
5930 		/*
5931 		 * since the entire I$ must be scrubbed at once,
5932 		 * wait until the index wraps to zero to invalidate
5933 		 * the entire I$
5934 		 */
5935 		if (index == 0) {
5936 			icache_inval_all();
5937 		}
5938 
5939 		/*
5940 		 * calculate the next I$ line to scrub, assumes
5941 		 * that chsm_icache_nlines is a power of 2
5942 		 */
5943 		index = (index + 1) & (icache_nlines - 1);
5944 	}
5945 
5946 	/*
5947 	 * set the scrub index for the next visit
5948 	 */
5949 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5950 }
5951 
5952 /*
5953  * Handler for I$ scrub inum softint. Call scrub_icache until
5954  * we decrement the outstanding request count to zero.
5955  */
5956 /*ARGSUSED*/
5957 static uint_t
5958 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5959 {
5960 	int i;
5961 	int how_many;
5962 	int outstanding;
5963 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5964 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5965 	struct scrub_info *csi = (struct scrub_info *)arg1;
5966 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5967 	    icache_scan_rate_idle : icache_scan_rate_busy;
5968 	int icache_nlines = csmp->chsm_icache_nlines;
5969 
5970 	/*
5971 	 * The scan rates are expressed in units of tenths of a
5972 	 * percent.  A scan rate of 1000 (100%) means the whole
5973 	 * cache is scanned every second.
5974 	 */
5975 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5976 
5977 	do {
5978 		outstanding = *countp;
5979 		for (i = 0; i < outstanding; i++) {
5980 			scrub_icache(how_many);
5981 		}
5982 	} while (atomic_add_32_nv(countp, -outstanding));
5983 
5984 	return (DDI_INTR_CLAIMED);
5985 }
5986 
5987 /*
5988  * Called periodically on each CPU to scrub the E$.
5989  */
5990 static void
5991 scrub_ecache(int how_many)
5992 {
5993 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5994 	int i;
5995 	int cpuid = CPU->cpu_id;
5996 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5997 	int nlines = csmp->chsm_ecache_nlines;
5998 	int linesize = cpunodes[cpuid].ecache_linesize;
5999 	int ec_set_size = cpu_ecache_set_size(CPU);
6000 
6001 	/*
6002 	 * scrub the desired number of lines
6003 	 */
6004 	for (i = 0; i < how_many; i++) {
6005 		/*
6006 		 * scrub the E$ line
6007 		 */
6008 		ecache_flush_line(ecache_flushaddr + (index * linesize),
6009 		    ec_set_size);
6010 
6011 		/*
6012 		 * calculate the next E$ line to scrub based on twice
6013 		 * the number of E$ lines (to displace lines containing
6014 		 * flush area data), assumes that the number of lines
6015 		 * is a power of 2
6016 		 */
6017 		index = (index + 1) & ((nlines << 1) - 1);
6018 	}
6019 
6020 	/*
6021 	 * set the ecache scrub index for the next visit
6022 	 */
6023 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
6024 }
6025 
6026 /*
6027  * Handler for E$ scrub inum softint. Call the E$ scrubber until
6028  * we decrement the outstanding request count to zero.
6029  *
6030  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
6031  * become negative after the atomic_add_32_nv().  This is not a problem, as
6032  * the next trip around the loop won't scrub anything, and the next add will
6033  * reset the count back to zero.
6034  */
6035 /*ARGSUSED*/
6036 static uint_t
6037 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
6038 {
6039 	int i;
6040 	int how_many;
6041 	int outstanding;
6042 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
6043 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
6044 	struct scrub_info *csi = (struct scrub_info *)arg1;
6045 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
6046 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
6047 	int ecache_nlines = csmp->chsm_ecache_nlines;
6048 
6049 	/*
6050 	 * The scan rates are expressed in units of tenths of a
6051 	 * percent.  A scan rate of 1000 (100%) means the whole
6052 	 * cache is scanned every second.
6053 	 */
6054 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
6055 
6056 	do {
6057 		outstanding = *countp;
6058 		for (i = 0; i < outstanding; i++) {
6059 			scrub_ecache(how_many);
6060 		}
6061 	} while (atomic_add_32_nv(countp, -outstanding));
6062 
6063 	return (DDI_INTR_CLAIMED);
6064 }
6065 
6066 /*
6067  * Timeout function to reenable CE
6068  */
6069 static void
6070 cpu_delayed_check_ce_errors(void *arg)
6071 {
6072 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6073 	    TQ_NOSLEEP)) {
6074 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6075 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6076 	}
6077 }
6078 
6079 /*
6080  * CE Deferred Re-enable after trap.
6081  *
6082  * When the CPU gets a disrupting trap for any of the errors
6083  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6084  * immediately. To eliminate the possibility of multiple CEs causing
6085  * recursive stack overflow in the trap handler, we cannot
6086  * reenable CEEN while still running in the trap handler. Instead,
6087  * after a CE is logged on a CPU, we schedule a timeout function,
6088  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6089  * seconds. This function will check whether any further CEs
6090  * have occurred on that CPU, and if none have, will reenable CEEN.
6091  *
6092  * If further CEs have occurred while CEEN is disabled, another
6093  * timeout will be scheduled. This is to ensure that the CPU can
6094  * make progress in the face of CE 'storms', and that it does not
6095  * spend all its time logging CE errors.
6096  */
6097 static void
6098 cpu_check_ce_errors(void *arg)
6099 {
6100 	int	cpuid = (int)(uintptr_t)arg;
6101 	cpu_t	*cp;
6102 
6103 	/*
6104 	 * We acquire cpu_lock.
6105 	 */
6106 	ASSERT(curthread->t_pil == 0);
6107 
6108 	/*
6109 	 * verify that the cpu is still around, DR
6110 	 * could have got there first ...
6111 	 */
6112 	mutex_enter(&cpu_lock);
6113 	cp = cpu_get(cpuid);
6114 	if (cp == NULL) {
6115 		mutex_exit(&cpu_lock);
6116 		return;
6117 	}
6118 	/*
6119 	 * make sure we don't migrate across CPUs
6120 	 * while checking our CE status.
6121 	 */
6122 	kpreempt_disable();
6123 
6124 	/*
6125 	 * If we are running on the CPU that got the
6126 	 * CE, we can do the checks directly.
6127 	 */
6128 	if (cp->cpu_id == CPU->cpu_id) {
6129 		mutex_exit(&cpu_lock);
6130 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6131 		kpreempt_enable();
6132 		return;
6133 	}
6134 	kpreempt_enable();
6135 
6136 	/*
6137 	 * send an x-call to get the CPU that originally
6138 	 * got the CE to do the necessary checks. If we can't
6139 	 * send the x-call, reschedule the timeout, otherwise we
6140 	 * lose CEEN forever on that CPU.
6141 	 */
6142 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6143 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6144 		    TIMEOUT_CEEN_CHECK, 0);
6145 		mutex_exit(&cpu_lock);
6146 	} else {
6147 		/*
6148 		 * When the CPU is not accepting xcalls, or
6149 		 * the processor is offlined, we don't want to
6150 		 * incur the extra overhead of trying to schedule the
6151 		 * CE timeout indefinitely. However, we don't want to lose
6152 		 * CE checking forever.
6153 		 *
6154 		 * Keep rescheduling the timeout, accepting the additional
6155 		 * overhead as the cost of correctness in the case where we get
6156 		 * a CE, disable CEEN, offline the CPU during the
6157 		 * the timeout interval, and then online it at some
6158 		 * point in the future. This is unlikely given the short
6159 		 * cpu_ceen_delay_secs.
6160 		 */
6161 		mutex_exit(&cpu_lock);
6162 		(void) timeout(cpu_delayed_check_ce_errors,
6163 		    (void *)(uintptr_t)cp->cpu_id,
6164 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6165 	}
6166 }
6167 
6168 /*
6169  * This routine will check whether CEs have occurred while
6170  * CEEN is disabled. Any CEs detected will be logged and, if
6171  * possible, scrubbed.
6172  *
6173  * The memscrubber will also use this routine to clear any errors
6174  * caused by its scrubbing with CEEN disabled.
6175  *
6176  * flag == SCRUBBER_CEEN_CHECK
6177  *		called from memscrubber, just check/scrub, no reset
6178  *		paddr 	physical addr. for start of scrub pages
6179  *		vaddr 	virtual addr. for scrub area
6180  *		psz	page size of area to be scrubbed
6181  *
6182  * flag == TIMEOUT_CEEN_CHECK
6183  *		timeout function has triggered, reset timeout or CEEN
6184  *
6185  * Note: We must not migrate cpus during this function.  This can be
6186  * achieved by one of:
6187  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6188  *	The flag value must be first xcall argument.
6189  *    - disabling kernel preemption.  This should be done for very short
6190  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6191  *	scrub an extended area with cpu_check_block.  The call for
6192  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6193  *	brief for this case.
6194  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6195  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6196  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6197  */
6198 void
6199 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6200 {
6201 	ch_cpu_errors_t	cpu_error_regs;
6202 	uint64_t	ec_err_enable;
6203 	uint64_t	page_offset;
6204 
6205 	/* Read AFSR */
6206 	get_cpu_error_state(&cpu_error_regs);
6207 
6208 	/*
6209 	 * If no CEEN errors have occurred during the timeout
6210 	 * interval, it is safe to re-enable CEEN and exit.
6211 	 */
6212 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6213 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6214 		if (flag == TIMEOUT_CEEN_CHECK &&
6215 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6216 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6217 		return;
6218 	}
6219 
6220 	/*
6221 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6222 	 * we log/clear the error.
6223 	 */
6224 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6225 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6226 
6227 	/*
6228 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6229 	 * timeout will be rescheduled when the error is logged.
6230 	 */
6231 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6232 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6233 		cpu_ce_detected(&cpu_error_regs,
6234 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6235 	else
6236 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6237 
6238 	/*
6239 	 * If the memory scrubber runs while CEEN is
6240 	 * disabled, (or if CEEN is disabled during the
6241 	 * scrub as a result of a CE being triggered by
6242 	 * it), the range being scrubbed will not be
6243 	 * completely cleaned. If there are multiple CEs
6244 	 * in the range at most two of these will be dealt
6245 	 * with, (one by the trap handler and one by the
6246 	 * timeout). It is also possible that none are dealt
6247 	 * with, (CEEN disabled and another CE occurs before
6248 	 * the timeout triggers). So to ensure that the
6249 	 * memory is actually scrubbed, we have to access each
6250 	 * memory location in the range and then check whether
6251 	 * that access causes a CE.
6252 	 */
6253 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6254 		if ((cpu_error_regs.afar >= pa) &&
6255 		    (cpu_error_regs.afar < (pa + psz))) {
6256 			/*
6257 			 * Force a load from physical memory for each
6258 			 * 64-byte block, then check AFSR to determine
6259 			 * whether this access caused an error.
6260 			 *
6261 			 * This is a slow way to do a scrub, but as it will
6262 			 * only be invoked when the memory scrubber actually
6263 			 * triggered a CE, it should not happen too
6264 			 * frequently.
6265 			 *
6266 			 * cut down what we need to check as the scrubber
6267 			 * has verified up to AFAR, so get it's offset
6268 			 * into the page and start there.
6269 			 */
6270 			page_offset = (uint64_t)(cpu_error_regs.afar &
6271 			    (psz - 1));
6272 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6273 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6274 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6275 			    psz);
6276 		}
6277 	}
6278 
6279 	/*
6280 	 * Reset error enable if this CE is not masked.
6281 	 */
6282 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6283 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6284 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6285 
6286 }
6287 
6288 /*
6289  * Attempt a cpu logout for an error that we did not trap for, such
6290  * as a CE noticed with CEEN off.  It is assumed that we are still running
6291  * on the cpu that took the error and that we cannot migrate.  Returns
6292  * 0 on success, otherwise nonzero.
6293  */
6294 static int
6295 cpu_ce_delayed_ec_logout(uint64_t afar)
6296 {
6297 	ch_cpu_logout_t *clop;
6298 
6299 	if (CPU_PRIVATE(CPU) == NULL)
6300 		return (0);
6301 
6302 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6303 	if (atomic_cas_64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6304 	    LOGOUT_INVALID)
6305 		return (0);
6306 
6307 	cpu_delayed_logout(afar, clop);
6308 	return (1);
6309 }
6310 
6311 /*
6312  * We got an error while CEEN was disabled. We
6313  * need to clean up after it and log whatever
6314  * information we have on the CE.
6315  */
6316 void
6317 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6318 {
6319 	ch_async_flt_t 	ch_flt;
6320 	struct async_flt *aflt;
6321 	char 		pr_reason[MAX_REASON_STRING];
6322 
6323 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6324 	ch_flt.flt_trapped_ce = flag;
6325 	aflt = (struct async_flt *)&ch_flt;
6326 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6327 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6328 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6329 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6330 	aflt->flt_addr = cpu_error_regs->afar;
6331 #if defined(SERRANO)
6332 	ch_flt.afar2 = cpu_error_regs->afar2;
6333 #endif	/* SERRANO */
6334 	aflt->flt_pc = NULL;
6335 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6336 	aflt->flt_tl = 0;
6337 	aflt->flt_panic = 0;
6338 	cpu_log_and_clear_ce(&ch_flt);
6339 
6340 	/*
6341 	 * check if we caused any errors during cleanup
6342 	 */
6343 	if (clear_errors(&ch_flt)) {
6344 		pr_reason[0] = '\0';
6345 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6346 		    NULL);
6347 	}
6348 }
6349 
6350 /*
6351  * Log/clear CEEN-controlled disrupting errors
6352  */
6353 static void
6354 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6355 {
6356 	struct async_flt *aflt;
6357 	uint64_t afsr, afsr_errs;
6358 	ch_cpu_logout_t *clop;
6359 	char 		pr_reason[MAX_REASON_STRING];
6360 	on_trap_data_t	*otp = curthread->t_ontrap;
6361 
6362 	aflt = (struct async_flt *)ch_flt;
6363 	afsr = aflt->flt_stat;
6364 	afsr_errs = ch_flt->afsr_errs;
6365 	aflt->flt_id = gethrtime_waitfree();
6366 	aflt->flt_bus_id = getprocessorid();
6367 	aflt->flt_inst = CPU->cpu_id;
6368 	aflt->flt_prot = AFLT_PROT_NONE;
6369 	aflt->flt_class = CPU_FAULT;
6370 	aflt->flt_status = ECC_C_TRAP;
6371 
6372 	pr_reason[0] = '\0';
6373 	/*
6374 	 * Get the CPU log out info for Disrupting Trap.
6375 	 */
6376 	if (CPU_PRIVATE(CPU) == NULL) {
6377 		clop = NULL;
6378 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6379 	} else {
6380 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6381 	}
6382 
6383 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6384 		ch_cpu_errors_t cpu_error_regs;
6385 
6386 		get_cpu_error_state(&cpu_error_regs);
6387 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6388 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6389 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6390 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6391 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6392 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6393 		clop->clo_sdw_data.chd_afsr_ext =
6394 		    cpu_error_regs.shadow_afsr_ext;
6395 #if defined(SERRANO)
6396 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6397 #endif	/* SERRANO */
6398 		ch_flt->flt_data_incomplete = 1;
6399 
6400 		/*
6401 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6402 		 * The trap handler does it for CEEN enabled errors
6403 		 * so we need to do it here.
6404 		 */
6405 		set_cpu_error_state(&cpu_error_regs);
6406 	}
6407 
6408 #if defined(JALAPENO) || defined(SERRANO)
6409 	/*
6410 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6411 	 * For Serrano, even thou we do have the AFAR, we still do the
6412 	 * scrub on the RCE side since that's where the error type can
6413 	 * be properly classified as intermittent, persistent, etc.
6414 	 *
6415 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6416 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6417 	 * the flt_status bits.
6418 	 */
6419 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6420 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6421 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6422 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6423 	}
6424 #else /* JALAPENO || SERRANO */
6425 	/*
6426 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6427 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6428 	 * the flt_status bits.
6429 	 */
6430 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6431 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6432 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6433 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6434 		}
6435 	}
6436 
6437 #endif /* JALAPENO || SERRANO */
6438 
6439 	/*
6440 	 * Update flt_prot if this error occurred under on_trap protection.
6441 	 */
6442 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6443 		aflt->flt_prot = AFLT_PROT_EC;
6444 
6445 	/*
6446 	 * Queue events on the async event queue, one event per error bit.
6447 	 */
6448 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6449 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6450 		ch_flt->flt_type = CPU_INV_AFSR;
6451 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6452 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6453 		    aflt->flt_panic);
6454 	}
6455 
6456 	/*
6457 	 * Zero out + invalidate CPU logout.
6458 	 */
6459 	if (clop) {
6460 		bzero(clop, sizeof (ch_cpu_logout_t));
6461 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6462 	}
6463 
6464 	/*
6465 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6466 	 * was disabled, we need to flush either the entire
6467 	 * E$ or an E$ line.
6468 	 */
6469 #if defined(JALAPENO) || defined(SERRANO)
6470 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6471 #else	/* JALAPENO || SERRANO */
6472 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6473 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6474 #endif	/* JALAPENO || SERRANO */
6475 		cpu_error_ecache_flush(ch_flt);
6476 
6477 }
6478 
6479 /*
6480  * depending on the error type, we determine whether we
6481  * need to flush the entire ecache or just a line.
6482  */
6483 static int
6484 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6485 {
6486 	struct async_flt *aflt;
6487 	uint64_t	afsr;
6488 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6489 
6490 	aflt = (struct async_flt *)ch_flt;
6491 	afsr = aflt->flt_stat;
6492 
6493 	/*
6494 	 * If we got multiple errors, no point in trying
6495 	 * the individual cases, just flush the whole cache
6496 	 */
6497 	if (afsr & C_AFSR_ME) {
6498 		return (ECACHE_FLUSH_ALL);
6499 	}
6500 
6501 	/*
6502 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6503 	 * was disabled, we need to flush entire E$. We can't just
6504 	 * flush the cache line affected as the ME bit
6505 	 * is not set when multiple correctable errors of the same
6506 	 * type occur, so we might have multiple CPC or EDC errors,
6507 	 * with only the first recorded.
6508 	 */
6509 #if defined(JALAPENO) || defined(SERRANO)
6510 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6511 #else	/* JALAPENO || SERRANO */
6512 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6513 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6514 #endif	/* JALAPENO || SERRANO */
6515 		return (ECACHE_FLUSH_ALL);
6516 	}
6517 
6518 #if defined(JALAPENO) || defined(SERRANO)
6519 	/*
6520 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6521 	 * flush the entire Ecache.
6522 	 */
6523 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6524 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6525 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6526 			return (ECACHE_FLUSH_LINE);
6527 		} else {
6528 			return (ECACHE_FLUSH_ALL);
6529 		}
6530 	}
6531 #else /* JALAPENO || SERRANO */
6532 	/*
6533 	 * If UE only is set, flush the Ecache line, otherwise
6534 	 * flush the entire Ecache.
6535 	 */
6536 	if (afsr_errs & C_AFSR_UE) {
6537 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6538 		    C_AFSR_UE) {
6539 			return (ECACHE_FLUSH_LINE);
6540 		} else {
6541 			return (ECACHE_FLUSH_ALL);
6542 		}
6543 	}
6544 #endif /* JALAPENO || SERRANO */
6545 
6546 	/*
6547 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6548 	 * flush the entire Ecache.
6549 	 */
6550 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6551 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6552 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6553 			return (ECACHE_FLUSH_LINE);
6554 		} else {
6555 			return (ECACHE_FLUSH_ALL);
6556 		}
6557 	}
6558 
6559 	/*
6560 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6561 	 * flush the entire Ecache.
6562 	 */
6563 	if (afsr_errs & C_AFSR_BERR) {
6564 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6565 			return (ECACHE_FLUSH_LINE);
6566 		} else {
6567 			return (ECACHE_FLUSH_ALL);
6568 		}
6569 	}
6570 
6571 	return (0);
6572 }
6573 
6574 void
6575 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6576 {
6577 	int	ecache_flush_flag =
6578 	    cpu_error_ecache_flush_required(ch_flt);
6579 
6580 	/*
6581 	 * Flush Ecache line or entire Ecache based on above checks.
6582 	 */
6583 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6584 		cpu_flush_ecache();
6585 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6586 		cpu_flush_ecache_line(ch_flt);
6587 	}
6588 
6589 }
6590 
6591 /*
6592  * Extract the PA portion from the E$ tag.
6593  */
6594 uint64_t
6595 cpu_ectag_to_pa(int setsize, uint64_t tag)
6596 {
6597 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6598 		return (JG_ECTAG_TO_PA(setsize, tag));
6599 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6600 		return (PN_L3TAG_TO_PA(tag));
6601 	else
6602 		return (CH_ECTAG_TO_PA(setsize, tag));
6603 }
6604 
6605 /*
6606  * Convert the E$ tag PA into an E$ subblock index.
6607  */
6608 int
6609 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6610 {
6611 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6612 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6613 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6614 		/* Panther has only one subblock per line */
6615 		return (0);
6616 	else
6617 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6618 }
6619 
6620 /*
6621  * All subblocks in an E$ line must be invalid for
6622  * the line to be invalid.
6623  */
6624 int
6625 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6626 {
6627 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6628 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6629 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6630 		return (PN_L3_LINE_INVALID(tag));
6631 	else
6632 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6633 }
6634 
6635 /*
6636  * Extract state bits for a subblock given the tag.  Note that for Panther
6637  * this works on both l2 and l3 tags.
6638  */
6639 int
6640 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6641 {
6642 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6643 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6644 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6645 		return (tag & CH_ECSTATE_MASK);
6646 	else
6647 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6648 }
6649 
6650 /*
6651  * Cpu specific initialization.
6652  */
6653 void
6654 cpu_mp_init(void)
6655 {
6656 #ifdef	CHEETAHPLUS_ERRATUM_25
6657 	if (cheetah_sendmondo_recover) {
6658 		cheetah_nudge_init();
6659 	}
6660 #endif
6661 }
6662 
6663 void
6664 cpu_ereport_post(struct async_flt *aflt)
6665 {
6666 	char *cpu_type, buf[FM_MAX_CLASS];
6667 	nv_alloc_t *nva = NULL;
6668 	nvlist_t *ereport, *detector, *resource;
6669 	errorq_elem_t *eqep;
6670 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6671 	char unum[UNUM_NAMLEN];
6672 	int synd_code;
6673 	uint8_t msg_type;
6674 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6675 
6676 	if (aflt->flt_panic || panicstr) {
6677 		eqep = errorq_reserve(ereport_errorq);
6678 		if (eqep == NULL)
6679 			return;
6680 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6681 		nva = errorq_elem_nva(ereport_errorq, eqep);
6682 	} else {
6683 		ereport = fm_nvlist_create(nva);
6684 	}
6685 
6686 	/*
6687 	 * Create the scheme "cpu" FMRI.
6688 	 */
6689 	detector = fm_nvlist_create(nva);
6690 	resource = fm_nvlist_create(nva);
6691 	switch (cpunodes[aflt->flt_inst].implementation) {
6692 	case CHEETAH_IMPL:
6693 		cpu_type = FM_EREPORT_CPU_USIII;
6694 		break;
6695 	case CHEETAH_PLUS_IMPL:
6696 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6697 		break;
6698 	case JALAPENO_IMPL:
6699 		cpu_type = FM_EREPORT_CPU_USIIIi;
6700 		break;
6701 	case SERRANO_IMPL:
6702 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6703 		break;
6704 	case JAGUAR_IMPL:
6705 		cpu_type = FM_EREPORT_CPU_USIV;
6706 		break;
6707 	case PANTHER_IMPL:
6708 		cpu_type = FM_EREPORT_CPU_USIVplus;
6709 		break;
6710 	default:
6711 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6712 		break;
6713 	}
6714 
6715 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6716 
6717 	/*
6718 	 * Encode all the common data into the ereport.
6719 	 */
6720 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6721 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6722 
6723 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6724 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6725 	    detector, NULL);
6726 
6727 	/*
6728 	 * Encode the error specific data that was saved in
6729 	 * the async_flt structure into the ereport.
6730 	 */
6731 	cpu_payload_add_aflt(aflt, ereport, resource,
6732 	    &plat_ecc_ch_flt.ecaf_afar_status,
6733 	    &plat_ecc_ch_flt.ecaf_synd_status);
6734 
6735 	if (aflt->flt_panic || panicstr) {
6736 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6737 	} else {
6738 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6739 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6740 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6741 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6742 	}
6743 	/*
6744 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6745 	 * to the SC olny if it can process it.
6746 	 */
6747 
6748 	if (&plat_ecc_capability_sc_get &&
6749 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6750 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6751 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6752 			/*
6753 			 * If afar status is not invalid do a unum lookup.
6754 			 */
6755 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6756 			    AFLT_STAT_INVALID) {
6757 				synd_code = synd_to_synd_code(
6758 				    plat_ecc_ch_flt.ecaf_synd_status,
6759 				    aflt->flt_synd, ch_flt->flt_bit);
6760 				(void) cpu_get_mem_unum_synd(synd_code,
6761 				    aflt, unum);
6762 			} else {
6763 				unum[0] = '\0';
6764 			}
6765 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6766 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6767 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6768 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6769 			    ch_flt->flt_sdw_afsr_ext;
6770 
6771 			if (&plat_log_fruid_error2)
6772 				plat_log_fruid_error2(msg_type, unum, aflt,
6773 				    &plat_ecc_ch_flt);
6774 		}
6775 	}
6776 }
6777 
6778 void
6779 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6780 {
6781 	int status;
6782 	ddi_fm_error_t de;
6783 
6784 	bzero(&de, sizeof (ddi_fm_error_t));
6785 
6786 	de.fme_version = DDI_FME_VERSION;
6787 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6788 	    FM_ENA_FMT1);
6789 	de.fme_flag = expected;
6790 	de.fme_bus_specific = (void *)aflt->flt_addr;
6791 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6792 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6793 		aflt->flt_panic = 1;
6794 }
6795 
6796 void
6797 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6798     errorq_t *eqp, uint_t flag)
6799 {
6800 	struct async_flt *aflt = (struct async_flt *)payload;
6801 
6802 	aflt->flt_erpt_class = error_class;
6803 	errorq_dispatch(eqp, payload, payload_sz, flag);
6804 }
6805 
6806 /*
6807  * This routine may be called by the IO module, but does not do
6808  * anything in this cpu module. The SERD algorithm is handled by
6809  * cpumem-diagnosis engine instead.
6810  */
6811 /*ARGSUSED*/
6812 void
6813 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6814 {}
6815 
6816 void
6817 adjust_hw_copy_limits(int ecache_size)
6818 {
6819 	/*
6820 	 * Set hw copy limits.
6821 	 *
6822 	 * /etc/system will be parsed later and can override one or more
6823 	 * of these settings.
6824 	 *
6825 	 * At this time, ecache size seems only mildly relevant.
6826 	 * We seem to run into issues with the d-cache and stalls
6827 	 * we see on misses.
6828 	 *
6829 	 * Cycle measurement indicates that 2 byte aligned copies fare
6830 	 * little better than doing things with VIS at around 512 bytes.
6831 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6832 	 * aligned is faster whenever the source and destination data
6833 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6834 	 * limit seems to be driven by the 2K write cache.
6835 	 * When more than 2K of copies are done in non-VIS mode, stores
6836 	 * backup in the write cache.  In VIS mode, the write cache is
6837 	 * bypassed, allowing faster cache-line writes aligned on cache
6838 	 * boundaries.
6839 	 *
6840 	 * In addition, in non-VIS mode, there is no prefetching, so
6841 	 * for larger copies, the advantage of prefetching to avoid even
6842 	 * occasional cache misses is enough to justify using the VIS code.
6843 	 *
6844 	 * During testing, it was discovered that netbench ran 3% slower
6845 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6846 	 * applications, data is only used once (copied to the output
6847 	 * buffer, then copied by the network device off the system).  Using
6848 	 * the VIS copy saves more L2 cache state.  Network copies are
6849 	 * around 1.3K to 1.5K in size for historical reasons.
6850 	 *
6851 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6852 	 * aligned copy even for large caches and 8 MB ecache.  The
6853 	 * infrastructure to allow different limits for different sized
6854 	 * caches is kept to allow further tuning in later releases.
6855 	 */
6856 
6857 	if (min_ecache_size == 0 && use_hw_bcopy) {
6858 		/*
6859 		 * First time through - should be before /etc/system
6860 		 * is read.
6861 		 * Could skip the checks for zero but this lets us
6862 		 * preserve any debugger rewrites.
6863 		 */
6864 		if (hw_copy_limit_1 == 0) {
6865 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6866 			priv_hcl_1 = hw_copy_limit_1;
6867 		}
6868 		if (hw_copy_limit_2 == 0) {
6869 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6870 			priv_hcl_2 = hw_copy_limit_2;
6871 		}
6872 		if (hw_copy_limit_4 == 0) {
6873 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6874 			priv_hcl_4 = hw_copy_limit_4;
6875 		}
6876 		if (hw_copy_limit_8 == 0) {
6877 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6878 			priv_hcl_8 = hw_copy_limit_8;
6879 		}
6880 		min_ecache_size = ecache_size;
6881 	} else {
6882 		/*
6883 		 * MP initialization. Called *after* /etc/system has
6884 		 * been parsed. One CPU has already been initialized.
6885 		 * Need to cater for /etc/system having scragged one
6886 		 * of our values.
6887 		 */
6888 		if (ecache_size == min_ecache_size) {
6889 			/*
6890 			 * Same size ecache. We do nothing unless we
6891 			 * have a pessimistic ecache setting. In that
6892 			 * case we become more optimistic (if the cache is
6893 			 * large enough).
6894 			 */
6895 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6896 				/*
6897 				 * Need to adjust hw_copy_limit* from our
6898 				 * pessimistic uniprocessor value to a more
6899 				 * optimistic UP value *iff* it hasn't been
6900 				 * reset.
6901 				 */
6902 				if ((ecache_size > 1048576) &&
6903 				    (priv_hcl_8 == hw_copy_limit_8)) {
6904 					if (ecache_size <= 2097152)
6905 						hw_copy_limit_8 = 4 *
6906 						    VIS_COPY_THRESHOLD;
6907 					else if (ecache_size <= 4194304)
6908 						hw_copy_limit_8 = 4 *
6909 						    VIS_COPY_THRESHOLD;
6910 					else
6911 						hw_copy_limit_8 = 4 *
6912 						    VIS_COPY_THRESHOLD;
6913 					priv_hcl_8 = hw_copy_limit_8;
6914 				}
6915 			}
6916 		} else if (ecache_size < min_ecache_size) {
6917 			/*
6918 			 * A different ecache size. Can this even happen?
6919 			 */
6920 			if (priv_hcl_8 == hw_copy_limit_8) {
6921 				/*
6922 				 * The previous value that we set
6923 				 * is unchanged (i.e., it hasn't been
6924 				 * scragged by /etc/system). Rewrite it.
6925 				 */
6926 				if (ecache_size <= 1048576)
6927 					hw_copy_limit_8 = 8 *
6928 					    VIS_COPY_THRESHOLD;
6929 				else if (ecache_size <= 2097152)
6930 					hw_copy_limit_8 = 8 *
6931 					    VIS_COPY_THRESHOLD;
6932 				else if (ecache_size <= 4194304)
6933 					hw_copy_limit_8 = 8 *
6934 					    VIS_COPY_THRESHOLD;
6935 				else
6936 					hw_copy_limit_8 = 10 *
6937 					    VIS_COPY_THRESHOLD;
6938 				priv_hcl_8 = hw_copy_limit_8;
6939 				min_ecache_size = ecache_size;
6940 			}
6941 		}
6942 	}
6943 }
6944 
6945 /*
6946  * Called from illegal instruction trap handler to see if we can attribute
6947  * the trap to a fpras check.
6948  */
6949 int
6950 fpras_chktrap(struct regs *rp)
6951 {
6952 	int op;
6953 	struct fpras_chkfngrp *cgp;
6954 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6955 
6956 	if (fpras_chkfngrps == NULL)
6957 		return (0);
6958 
6959 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6960 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6961 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6962 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6963 			break;
6964 	}
6965 	if (op == FPRAS_NCOPYOPS)
6966 		return (0);
6967 
6968 	/*
6969 	 * This is an fpRAS failure caught through an illegal
6970 	 * instruction - trampoline.
6971 	 */
6972 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6973 	rp->r_npc = rp->r_pc + 4;
6974 	return (1);
6975 }
6976 
6977 /*
6978  * fpras_failure is called when a fpras check detects a bad calculation
6979  * result or an illegal instruction trap is attributed to an fpras
6980  * check.  In all cases we are still bound to CPU.
6981  */
6982 int
6983 fpras_failure(int op, int how)
6984 {
6985 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6986 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6987 	ch_async_flt_t ch_flt;
6988 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6989 	struct fpras_chkfn *sfp, *cfp;
6990 	uint32_t *sip, *cip;
6991 	int i;
6992 
6993 	/*
6994 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6995 	 * the time in which we dispatch an ereport and (if applicable) panic.
6996 	 */
6997 	use_hw_bcopy_orig = use_hw_bcopy;
6998 	use_hw_bzero_orig = use_hw_bzero;
6999 	hcl1_orig = hw_copy_limit_1;
7000 	hcl2_orig = hw_copy_limit_2;
7001 	hcl4_orig = hw_copy_limit_4;
7002 	hcl8_orig = hw_copy_limit_8;
7003 	use_hw_bcopy = use_hw_bzero = 0;
7004 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
7005 	    hw_copy_limit_8 = 0;
7006 
7007 	bzero(&ch_flt, sizeof (ch_async_flt_t));
7008 	aflt->flt_id = gethrtime_waitfree();
7009 	aflt->flt_class = CPU_FAULT;
7010 	aflt->flt_inst = CPU->cpu_id;
7011 	aflt->flt_status = (how << 8) | op;
7012 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
7013 	ch_flt.flt_type = CPU_FPUERR;
7014 
7015 	/*
7016 	 * We must panic if the copy operation had no lofault protection -
7017 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
7018 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
7019 	 */
7020 	aflt->flt_panic = (curthread->t_lofault == NULL);
7021 
7022 	/*
7023 	 * XOR the source instruction block with the copied instruction
7024 	 * block - this will show us which bit(s) are corrupted.
7025 	 */
7026 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
7027 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
7028 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
7029 		sip = &sfp->fpras_blk0[0];
7030 		cip = &cfp->fpras_blk0[0];
7031 	} else {
7032 		sip = &sfp->fpras_blk1[0];
7033 		cip = &cfp->fpras_blk1[0];
7034 	}
7035 	for (i = 0; i < 16; ++i, ++sip, ++cip)
7036 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
7037 
7038 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
7039 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
7040 
7041 	if (aflt->flt_panic)
7042 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
7043 
7044 	/*
7045 	 * We get here for copyin/copyout and kcopy or bcopy where the
7046 	 * caller has used on_fault.  We will flag the error so that
7047 	 * the process may be killed  The trap_async_hwerr mechanism will
7048 	 * take appropriate further action (such as a reboot, contract
7049 	 * notification etc).  Since we may be continuing we will
7050 	 * restore the global hardware copy acceleration switches.
7051 	 *
7052 	 * When we return from this function to the copy function we want to
7053 	 * avoid potentially bad data being used, ie we want the affected
7054 	 * copy function to return an error.  The caller should therefore
7055 	 * invoke its lofault handler (which always exists for these functions)
7056 	 * which will return the appropriate error.
7057 	 */
7058 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7059 	aston(curthread);
7060 
7061 	use_hw_bcopy = use_hw_bcopy_orig;
7062 	use_hw_bzero = use_hw_bzero_orig;
7063 	hw_copy_limit_1 = hcl1_orig;
7064 	hw_copy_limit_2 = hcl2_orig;
7065 	hw_copy_limit_4 = hcl4_orig;
7066 	hw_copy_limit_8 = hcl8_orig;
7067 
7068 	return (1);
7069 }
7070 
7071 #define	VIS_BLOCKSIZE		64
7072 
7073 int
7074 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7075 {
7076 	int ret, watched;
7077 
7078 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7079 	ret = dtrace_blksuword32(addr, data, 0);
7080 	if (watched)
7081 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7082 
7083 	return (ret);
7084 }
7085 
7086 /*
7087  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7088  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7089  * CEEN from the EER to disable traps for further disrupting error types
7090  * on that cpu.  We could cross-call instead, but that has a larger
7091  * instruction and data footprint than cross-trapping, and the cpu is known
7092  * to be faulted.
7093  */
7094 
7095 void
7096 cpu_faulted_enter(struct cpu *cp)
7097 {
7098 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7099 }
7100 
7101 /*
7102  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7103  * offline, spare, or online (by the cpu requesting this state change).
7104  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7105  * disrupting error bits that have accumulated without trapping, then
7106  * we cross-trap to re-enable CEEN controlled traps.
7107  */
7108 void
7109 cpu_faulted_exit(struct cpu *cp)
7110 {
7111 	ch_cpu_errors_t cpu_error_regs;
7112 
7113 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7114 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7115 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7116 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7117 	    (uint64_t)&cpu_error_regs, 0);
7118 
7119 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7120 }
7121 
7122 /*
7123  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7124  * the errors in the original AFSR, 0 otherwise.
7125  *
7126  * For all procs if the initial error was a BERR or TO, then it is possible
7127  * that we may have caused a secondary BERR or TO in the process of logging the
7128  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7129  * if the request was protected then a panic is still not necessary, if not
7130  * protected then aft_panic is already set - so either way there's no need
7131  * to set aft_panic for the secondary error.
7132  *
7133  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7134  * a store merge, then the error handling code will call cpu_deferred_error().
7135  * When clear_errors() is called, it will determine that secondary errors have
7136  * occurred - in particular, the store merge also caused a EDU and WDU that
7137  * weren't discovered until this point.
7138  *
7139  * We do three checks to verify that we are in this case.  If we pass all three
7140  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7141  * errors occur, we return 0.
7142  *
7143  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7144  * handled in cpu_disrupting_errors().  Since this function is not even called
7145  * in the case we are interested in, we just return 0 for these processors.
7146  */
7147 /*ARGSUSED*/
7148 static int
7149 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7150     uint64_t t_afar)
7151 {
7152 #if defined(CHEETAH_PLUS)
7153 #else	/* CHEETAH_PLUS */
7154 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7155 #endif	/* CHEETAH_PLUS */
7156 
7157 	/*
7158 	 * Was the original error a BERR or TO and only a BERR or TO
7159 	 * (multiple errors are also OK)
7160 	 */
7161 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7162 		/*
7163 		 * Is the new error a BERR or TO and only a BERR or TO
7164 		 * (multiple errors are also OK)
7165 		 */
7166 		if ((ch_flt->afsr_errs &
7167 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7168 			return (1);
7169 	}
7170 
7171 #if defined(CHEETAH_PLUS)
7172 	return (0);
7173 #else	/* CHEETAH_PLUS */
7174 	/*
7175 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7176 	 *
7177 	 * Check the original error was a UE, and only a UE.  Note that
7178 	 * the ME bit will cause us to fail this check.
7179 	 */
7180 	if (t_afsr_errs != C_AFSR_UE)
7181 		return (0);
7182 
7183 	/*
7184 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7185 	 */
7186 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7187 		return (0);
7188 
7189 	/*
7190 	 * Check the AFAR of the original error and secondary errors
7191 	 * match to the 64-byte boundary
7192 	 */
7193 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7194 		return (0);
7195 
7196 	/*
7197 	 * We've passed all the checks, so it's a secondary error!
7198 	 */
7199 	return (1);
7200 #endif	/* CHEETAH_PLUS */
7201 }
7202 
7203 /*
7204  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7205  * is checked for any valid errors.  If found, the error type is
7206  * returned. If not found, the flt_type is checked for L1$ parity errors.
7207  */
7208 /*ARGSUSED*/
7209 static uint8_t
7210 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7211 {
7212 #if defined(JALAPENO)
7213 	/*
7214 	 * Currently, logging errors to the SC is not supported on Jalapeno
7215 	 */
7216 	return (PLAT_ECC_ERROR2_NONE);
7217 #else
7218 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7219 
7220 	switch (ch_flt->flt_bit) {
7221 	case C_AFSR_CE:
7222 		return (PLAT_ECC_ERROR2_CE);
7223 	case C_AFSR_UCC:
7224 	case C_AFSR_EDC:
7225 	case C_AFSR_WDC:
7226 	case C_AFSR_CPC:
7227 		return (PLAT_ECC_ERROR2_L2_CE);
7228 	case C_AFSR_EMC:
7229 		return (PLAT_ECC_ERROR2_EMC);
7230 	case C_AFSR_IVC:
7231 		return (PLAT_ECC_ERROR2_IVC);
7232 	case C_AFSR_UE:
7233 		return (PLAT_ECC_ERROR2_UE);
7234 	case C_AFSR_UCU:
7235 	case C_AFSR_EDU:
7236 	case C_AFSR_WDU:
7237 	case C_AFSR_CPU:
7238 		return (PLAT_ECC_ERROR2_L2_UE);
7239 	case C_AFSR_IVU:
7240 		return (PLAT_ECC_ERROR2_IVU);
7241 	case C_AFSR_TO:
7242 		return (PLAT_ECC_ERROR2_TO);
7243 	case C_AFSR_BERR:
7244 		return (PLAT_ECC_ERROR2_BERR);
7245 #if defined(CHEETAH_PLUS)
7246 	case C_AFSR_L3_EDC:
7247 	case C_AFSR_L3_UCC:
7248 	case C_AFSR_L3_CPC:
7249 	case C_AFSR_L3_WDC:
7250 		return (PLAT_ECC_ERROR2_L3_CE);
7251 	case C_AFSR_IMC:
7252 		return (PLAT_ECC_ERROR2_IMC);
7253 	case C_AFSR_TSCE:
7254 		return (PLAT_ECC_ERROR2_L2_TSCE);
7255 	case C_AFSR_THCE:
7256 		return (PLAT_ECC_ERROR2_L2_THCE);
7257 	case C_AFSR_L3_MECC:
7258 		return (PLAT_ECC_ERROR2_L3_MECC);
7259 	case C_AFSR_L3_THCE:
7260 		return (PLAT_ECC_ERROR2_L3_THCE);
7261 	case C_AFSR_L3_CPU:
7262 	case C_AFSR_L3_EDU:
7263 	case C_AFSR_L3_UCU:
7264 	case C_AFSR_L3_WDU:
7265 		return (PLAT_ECC_ERROR2_L3_UE);
7266 	case C_AFSR_DUE:
7267 		return (PLAT_ECC_ERROR2_DUE);
7268 	case C_AFSR_DTO:
7269 		return (PLAT_ECC_ERROR2_DTO);
7270 	case C_AFSR_DBERR:
7271 		return (PLAT_ECC_ERROR2_DBERR);
7272 #endif	/* CHEETAH_PLUS */
7273 	default:
7274 		switch (ch_flt->flt_type) {
7275 #if defined(CPU_IMP_L1_CACHE_PARITY)
7276 		case CPU_IC_PARITY:
7277 			return (PLAT_ECC_ERROR2_IPE);
7278 		case CPU_DC_PARITY:
7279 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7280 				if (ch_flt->parity_data.dpe.cpl_cache ==
7281 				    CPU_PC_PARITY) {
7282 					return (PLAT_ECC_ERROR2_PCACHE);
7283 				}
7284 			}
7285 			return (PLAT_ECC_ERROR2_DPE);
7286 #endif /* CPU_IMP_L1_CACHE_PARITY */
7287 		case CPU_ITLB_PARITY:
7288 			return (PLAT_ECC_ERROR2_ITLB);
7289 		case CPU_DTLB_PARITY:
7290 			return (PLAT_ECC_ERROR2_DTLB);
7291 		default:
7292 			return (PLAT_ECC_ERROR2_NONE);
7293 		}
7294 	}
7295 #endif	/* JALAPENO */
7296 }
7297