1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 1990-2002 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/cmn_err.h>
35 #include <sys/async.h>
36 #include <sys/sysiosbus.h>
37 #include <sys/sysioerr.h>
38 #include <sys/x_call.h>
39 #include <sys/machsystm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/vmsystm.h>
42 #include <sys/cpu_module.h>
43
44 /*
45 * Set the following variable in /etc/system to tell the kernel
46 * not to shutdown the machine if the temperature reaches
47 * the Thermal Warning limit.
48 */
49 int oven_test = 0;
50
51 /*
52 * To indicate if the prom has the property of "thermal-interrupt".
53 */
54 static int thermal_interrupt_enabled = 0;
55
56 #ifdef _STARFIRE
57 #include <sys/starfire.h>
58
59 int
60 pc_translate_tgtid(caddr_t, int, volatile uint64_t *);
61
62 void
63 pc_ittrans_cleanup(caddr_t, volatile uint64_t *);
64 #endif /* _STARFIRE */
65
66 /*
67 * adb debug_sysio_errs to 1 if you don't want your system to panic on
68 * sbus ue errors. adb sysio_err_flag to 0 if you don't want your system
69 * to check for sysio errors at all.
70 */
71 int sysio_err_flag = 1;
72 uint_t debug_sysio_errs = 0;
73
74 /*
75 * bto_cnt = number of bus errors and timeouts allowed within bto_secs
76 * use /etc/system to change the bto_cnt to a very large number if
77 * it's a problem!
78 */
79 int bto_secs = 10;
80 int bto_cnt = 10;
81
82 static uint_t
83 sysio_ue_intr(struct sbus_soft_state *softsp);
84
85 static uint_t
86 sysio_ce_intr(struct sbus_soft_state *softsp);
87
88 static uint_t
89 sbus_err_intr(struct sbus_soft_state *softsp);
90
91 static void
92 sysio_log_ce_err(struct async_flt *ecc, char *unum);
93
94 static void
95 sysio_log_ue_err(struct async_flt *ecc, char *unum);
96
97 static void
98 sbus_clear_intr(struct sbus_soft_state *softsp, uint64_t *pafsr);
99
100 static void
101 sbus_log_error(struct sbus_soft_state *softsp, uint64_t *pafsr, uint64_t *pafar,
102 ushort_t id, ushort_t inst, int cleared,
103 on_trap_data_t *ontrap_data);
104
105 static int
106 sbus_check_bto(struct sbus_soft_state *softsp);
107
108 static void
109 sbus_log_csr_error(struct async_flt *aflt, char *unum);
110
111 static uint_t
112 sbus_ctrl_ecc_err(struct sbus_soft_state *softsp);
113
114 static uint_t
115 sysio_dis_err(struct sbus_soft_state *softsp);
116
117 static uint_t
118 sysio_init_err(struct sbus_soft_state *softsp);
119
120 static uint_t
121 sysio_thermal_warn_intr(struct sbus_soft_state *softsp);
122
123 static int sbus_pil[] = {SBUS_UE_PIL, SBUS_CE_PIL, SBUS_ERR_PIL, SBUS_PF_PIL,
124 SBUS_THERMAL_PIL, SBUS_PM_PIL};
125 int
sysio_err_init(struct sbus_soft_state * softsp,caddr_t address)126 sysio_err_init(struct sbus_soft_state *softsp, caddr_t address)
127 {
128 if (sysio_err_flag == 0) {
129 cmn_err(CE_CONT, "Warning: sysio errors not initialized\n");
130 return (DDI_SUCCESS);
131 }
132
133 /*
134 * Get the address of the already mapped-in sysio/sbus error registers.
135 * Simply add each registers offset to the already mapped in address
136 * that was retrieved from the device node's "address" property,
137 * and passed as an argument to this function.
138 *
139 * Define a macro for the pointer arithmetic ...
140 */
141
142 #define REG_ADDR(b, o) (uint64_t *)((caddr_t)(b) + (o))
143
144 softsp->sysio_ecc_reg = REG_ADDR(address, OFF_SYSIO_ECC_REGS);
145 softsp->sysio_ue_reg = REG_ADDR(address, OFF_SYSIO_UE_REGS);
146 softsp->sysio_ce_reg = REG_ADDR(address, OFF_SYSIO_CE_REGS);
147 softsp->sbus_err_reg = REG_ADDR(address, OFF_SBUS_ERR_REGS);
148
149 #undef REG_ADDR
150
151 /*
152 * create the interrupt-priorities property if it doesn't
153 * already exist to provide a hint as to the PIL level for
154 * our interrupt.
155 */
156 {
157 int len;
158
159 if (ddi_getproplen(DDI_DEV_T_ANY, softsp->dip,
160 DDI_PROP_DONTPASS, "interrupt-priorities",
161 &len) != DDI_PROP_SUCCESS) {
162 /* Create the interrupt-priorities property. */
163 (void) ddi_prop_update_int_array(DDI_DEV_T_NONE,
164 softsp->dip, "interrupt-priorities",
165 (int *)sbus_pil, sizeof (sbus_pil) / sizeof (int));
166 }
167 }
168
169 (void) ddi_add_intr(softsp->dip, 0, NULL, NULL,
170 (uint_t (*)())sysio_ue_intr, (caddr_t)softsp);
171 (void) ddi_add_intr(softsp->dip, 1, NULL, NULL,
172 (uint_t (*)())sysio_ce_intr, (caddr_t)softsp);
173 (void) ddi_add_intr(softsp->dip, 2, NULL, NULL,
174 (uint_t (*)())sbus_err_intr, (caddr_t)softsp);
175 /*
176 * If the thermal-interrupt property is in place,
177 * then register the thermal warning interrupt handler and
178 * program its mapping register
179 */
180 thermal_interrupt_enabled = ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
181 DDI_PROP_DONTPASS, "thermal-interrupt", -1);
182
183 if (thermal_interrupt_enabled == 1) {
184 (void) ddi_add_intr(softsp->dip, 4, NULL, NULL,
185 (uint_t (*)())sysio_thermal_warn_intr, (caddr_t)softsp);
186 }
187
188 bus_func_register(BF_TYPE_UE, (busfunc_t)sbus_ctrl_ecc_err, softsp);
189 bus_func_register(BF_TYPE_ERRDIS, (busfunc_t)sysio_dis_err, softsp);
190
191 (void) sysio_init_err(softsp);
192
193 return (DDI_SUCCESS);
194 }
195
196 int
sysio_err_resume_init(struct sbus_soft_state * softsp)197 sysio_err_resume_init(struct sbus_soft_state *softsp)
198 {
199 (void) sysio_init_err(softsp);
200 return (DDI_SUCCESS);
201 }
202
203 int
sysio_err_uninit(struct sbus_soft_state * softsp)204 sysio_err_uninit(struct sbus_soft_state *softsp)
205 {
206 /* remove the interrupts from the interrupt list */
207 (void) sysio_dis_err(softsp);
208
209 ddi_remove_intr(softsp->dip, 0, NULL);
210 ddi_remove_intr(softsp->dip, 1, NULL);
211 ddi_remove_intr(softsp->dip, 2, NULL);
212
213 if (thermal_interrupt_enabled == 1) {
214 ddi_remove_intr(softsp->dip, 4, NULL);
215 }
216
217 bus_func_unregister(BF_TYPE_UE, (busfunc_t)sbus_ctrl_ecc_err, softsp);
218 bus_func_unregister(BF_TYPE_ERRDIS, (busfunc_t)sysio_dis_err, softsp);
219
220 return (DDI_SUCCESS);
221 }
222
223 static uint_t
sysio_init_err(struct sbus_soft_state * softsp)224 sysio_init_err(struct sbus_soft_state *softsp)
225 {
226 volatile uint64_t tmp_mondo_vec, tmpreg;
227 volatile uint64_t *mondo_vec_reg;
228 uint_t cpu_id, acpu_id;
229
230 acpu_id = intr_dist_cpuid();
231 /*
232 * Program the mondo vector accordingly. This MUST be the
233 * last thing we do. Once we program the mondo, the device
234 * may begin to interrupt. Store it in the hardware reg.
235 */
236 mondo_vec_reg = (uint64_t *)(softsp->intr_mapping_reg + UE_ECC_MAPREG);
237 cpu_id = acpu_id;
238 #ifdef _STARFIRE
239 cpu_id = pc_translate_tgtid(softsp->ittrans_cookie, cpu_id,
240 mondo_vec_reg);
241 #endif /* _STARFIRE */
242 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID;
243 *mondo_vec_reg = tmp_mondo_vec;
244
245 mondo_vec_reg = (uint64_t *)(softsp->intr_mapping_reg + CE_ECC_MAPREG);
246 cpu_id = acpu_id;
247 #ifdef _STARFIRE
248 cpu_id = pc_translate_tgtid(softsp->ittrans_cookie, cpu_id,
249 mondo_vec_reg);
250 #endif /* _STARFIRE */
251 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID;
252 *mondo_vec_reg = tmp_mondo_vec;
253
254 mondo_vec_reg =
255 (uint64_t *)(softsp->intr_mapping_reg + SBUS_ERR_MAPREG);
256 cpu_id = acpu_id;
257 #ifdef _STARFIRE
258 cpu_id = pc_translate_tgtid(softsp->ittrans_cookie, cpu_id,
259 mondo_vec_reg);
260 #endif /* _STARFIRE */
261
262 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID;
263 *mondo_vec_reg = tmp_mondo_vec;
264
265 if (thermal_interrupt_enabled == 1) {
266 mondo_vec_reg = (softsp->intr_mapping_reg + THERMAL_MAPREG);
267 cpu_id = acpu_id;
268 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) |
269 INTERRUPT_VALID;
270 *mondo_vec_reg = tmp_mondo_vec;
271 }
272
273 /* Flush store buffers */
274 tmpreg = *softsp->sbus_ctrl_reg;
275
276 /*
277 * XXX - This may already be set by the OBP.
278 */
279 tmpreg = SYSIO_APCKEN;
280 *softsp->sysio_ctrl_reg |= tmpreg;
281 tmpreg = (SECR_ECC_EN | SECR_UE_INTEN | SECR_CE_INTEN);
282 *softsp->sysio_ecc_reg = tmpreg;
283 tmpreg = SB_CSR_ERRINT_EN;
284 *softsp->sbus_err_reg |= tmpreg;
285
286 /* Initialize timeout/bus error counter */
287 softsp->bto_timestamp = 0;
288 softsp->bto_ctr = 0;
289
290 return (0);
291 }
292
293 static uint_t
sysio_dis_err(struct sbus_soft_state * softsp)294 sysio_dis_err(struct sbus_soft_state *softsp)
295 {
296 volatile uint64_t tmpreg;
297 volatile uint64_t *mondo_vec_reg, *clear_vec_reg;
298
299 *softsp->sysio_ctrl_reg &= ~SYSIO_APCKEN;
300 *softsp->sysio_ecc_reg = 0;
301 *softsp->sbus_err_reg &= ~SB_CSR_ERRINT_EN;
302
303 /* Flush store buffers */
304 tmpreg = *softsp->sbus_ctrl_reg;
305 #ifdef lint
306 tmpreg = tmpreg;
307 #endif
308
309 /* Unmap mapping registers */
310 mondo_vec_reg = (softsp->intr_mapping_reg + UE_ECC_MAPREG);
311 clear_vec_reg = (softsp->clr_intr_reg + UE_ECC_CLEAR);
312
313 *mondo_vec_reg = 0;
314
315 #ifdef _STARFIRE
316 /* do cleanup for starfire interrupt target translation */
317 pc_ittrans_cleanup(softsp->ittrans_cookie, mondo_vec_reg);
318 #endif /* _STARFIRE */
319
320 *clear_vec_reg = 0;
321
322 mondo_vec_reg = (softsp->intr_mapping_reg + CE_ECC_MAPREG);
323 clear_vec_reg = (softsp->clr_intr_reg + CE_ECC_CLEAR);
324
325 *mondo_vec_reg = 0;
326
327 #ifdef _STARFIRE
328 /* Do cleanup for starfire interrupt target translation */
329 pc_ittrans_cleanup(softsp->ittrans_cookie, mondo_vec_reg);
330 #endif /* _STARFIRE */
331
332 *clear_vec_reg = 0;
333
334 mondo_vec_reg = (softsp->intr_mapping_reg + SBUS_ERR_MAPREG);
335 clear_vec_reg = (softsp->clr_intr_reg + SBUS_ERR_CLEAR);
336
337 *mondo_vec_reg = 0;
338
339 #ifdef _STARFIRE
340 /* Do cleanup for starfire interrupt target translation */
341 pc_ittrans_cleanup(softsp->ittrans_cookie, mondo_vec_reg);
342 #endif /* _STARFIRE */
343
344 *clear_vec_reg = 0;
345
346 /* Flush store buffers */
347 tmpreg = *softsp->sbus_ctrl_reg;
348
349 return (BF_NONE);
350 }
351
352 /*
353 * Gather information about the error into an async_flt structure, and then
354 * enqueue the error for reporting and processing and panic.
355 */
356 static uint_t
sysio_ue_intr(struct sbus_soft_state * softsp)357 sysio_ue_intr(struct sbus_soft_state *softsp)
358 {
359 volatile uint64_t t_afsr;
360 volatile uint64_t t_afar;
361 volatile uint64_t *ue_reg, *afar_reg, *clear_reg;
362 struct async_flt ecc;
363 uint64_t offset;
364
365 /*
366 * Disable all further sbus errors, for this sbus instance, for
367 * what is guaranteed to be a fatal error. And grab any other cpus.
368 */
369 (void) sysio_dis_err(softsp); /* disabled sysio errors */
370
371 /*
372 * Then read and clear the afsr/afar and clear interrupt regs.
373 */
374 ue_reg = (uint64_t *)softsp->sysio_ue_reg;
375 t_afsr = *ue_reg;
376 afar_reg = (uint64_t *)ue_reg + 1;
377 t_afar = *afar_reg;
378 *ue_reg = t_afsr;
379
380 clear_reg = (softsp->clr_intr_reg + UE_ECC_CLEAR);
381 *clear_reg = 0;
382
383 /*
384 * The AFSR DW_OFFSET field contains the offset of the doubleword with
385 * the ECC error relative to the 64-byte aligned PA. We multiply by 8
386 * to convert to a byte offset, and then add this to flt_addr.
387 */
388 offset = ((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT) * 8;
389
390 bzero(&ecc, sizeof (ecc));
391 ecc.flt_id = gethrtime();
392 ecc.flt_stat = t_afsr;
393 ecc.flt_addr = P2ALIGN(t_afar, 64) + offset;
394 ecc.flt_func = sysio_log_ue_err;
395 ecc.flt_bus_id = softsp->upa_id;
396 ecc.flt_inst = ddi_get_instance(softsp->dip);
397 ecc.flt_status = ECC_IOBUS;
398 ecc.flt_in_memory = (pf_is_memory(t_afar >> MMU_PAGESHIFT)) ? 1: 0;
399 ecc.flt_class = BUS_FAULT;
400 ecc.flt_panic = (debug_sysio_errs == 0);
401
402 errorq_dispatch(ue_queue, &ecc, sizeof (ecc), ecc.flt_panic);
403
404 /*
405 * If the UE is in memory and fatal, save the fault info so the
406 * panic code will know to check for copyback errors.
407 */
408 if (ecc.flt_panic && ecc.flt_in_memory)
409 panic_aflt = ecc;
410
411 /*
412 * We must also check for other bus UE errors, and panic if
413 * any fatal ones are detected at this point.
414 */
415 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
416 ecc.flt_panic = 1;
417
418 if (ecc.flt_panic)
419 cmn_err(CE_PANIC, "Fatal Sbus%d UE Error", ecc.flt_inst);
420
421 return (DDI_INTR_CLAIMED);
422 }
423
424 /*
425 * callback logging function from the common error handling code
426 */
427 static void
sysio_log_ue_err(struct async_flt * ecc,char * unum)428 sysio_log_ue_err(struct async_flt *ecc, char *unum)
429 {
430 uint64_t t_afsr = ecc->flt_stat;
431 uint64_t t_afar = ecc->flt_addr;
432
433 ushort_t id = ecc->flt_bus_id;
434 ushort_t inst = ecc->flt_inst;
435
436 if (t_afsr & SB_UE_AFSR_P_PIO) {
437 cmn_err(CE_WARN, "SBus%d UE Primary Error from PIO: "
438 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
439 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
440 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
441 }
442 if (t_afsr & SB_UE_AFSR_P_DRD) {
443 cmn_err(CE_WARN, "SBus%d UE Primary Error DMA read: "
444 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
445 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
446 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
447 }
448 if (t_afsr & SB_UE_AFSR_P_DWR) {
449 cmn_err(CE_WARN, "SBus%d UE Primary Error DVMA write: "
450 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
451 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
452 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
453 }
454 /*
455 * We should never hit the secondary error panics.
456 */
457 if (t_afsr & SB_UE_AFSR_S_PIO) {
458 cmn_err(CE_WARN, "SBus%d UE Secondary Error from PIO: "
459 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
460 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
461 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
462 }
463 if (t_afsr & SB_UE_AFSR_S_DRD) {
464 cmn_err(CE_WARN, "SBus%d UE Secondary Error DMA read: "
465 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
466 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
467 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
468 }
469 if (t_afsr & SB_UE_AFSR_S_DWR) {
470 cmn_err(CE_WARN, "SBus%d UE Secondary Error DMA write: "
471 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
472 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
473 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
474 }
475
476 if ((debug_sysio_errs) || (aft_verbose)) {
477 (void) read_ecc_data(ecc, 1, 0);
478 cmn_err(CE_CONT, "\tOffset 0x%x, Size %d, UPA MID 0x%x\n",
479 (uint32_t)((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT),
480 (uint32_t)((t_afsr & SB_UE_AFSR_SIZE) >> SB_UE_SIZE_SHIFT),
481 (uint32_t)((t_afsr & SB_UE_AFSR_MID) >> SB_UE_MID_SHIFT));
482 }
483 }
484
485 /*
486 * gather the information about the error, plus a pointer to
487 * the callback logging function, and call the generic ce_error handler.
488 */
489 static uint_t
sysio_ce_intr(struct sbus_soft_state * softsp)490 sysio_ce_intr(struct sbus_soft_state *softsp)
491 {
492 volatile uint64_t t_afsr;
493 volatile uint64_t t_afar;
494 volatile uint64_t *afar_reg, *clear_reg, *ce_reg;
495 struct async_flt ecc;
496 uint64_t offset;
497
498 ce_reg = (uint64_t *)softsp->sysio_ce_reg;
499 t_afsr = *ce_reg;
500 afar_reg = (uint64_t *)ce_reg + 1;
501 t_afar = *afar_reg;
502 *ce_reg = t_afsr;
503
504 clear_reg = (softsp->clr_intr_reg + CE_ECC_CLEAR);
505 *clear_reg = 0;
506
507 /*
508 * The AFSR DW_OFFSET field contains the offset of the doubleword with
509 * the ECC error relative to the 64-byte aligned PA. We multiply by 8
510 * to convert to a byte offset, and then add this to flt_addr.
511 */
512 offset = ((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT) * 8;
513
514 bzero(&ecc, sizeof (ecc));
515 ecc.flt_id = gethrtime();
516 ecc.flt_stat = t_afsr;
517 ecc.flt_addr = P2ALIGN(t_afar, 64) + offset;
518 ecc.flt_func = sysio_log_ce_err;
519 ecc.flt_bus_id = softsp->upa_id;
520 ecc.flt_inst = ddi_get_instance(softsp->dip);
521 ecc.flt_status = ECC_IOBUS;
522
523 ecc.flt_synd = (ushort_t)((t_afsr & SB_CE_AFSR_SYND) >>
524 SB_CE_SYND_SHIFT);
525
526 ecc.flt_in_memory = (pf_is_memory(t_afar >> MMU_PAGESHIFT)) ? 1: 0;
527 ecc.flt_class = BUS_FAULT;
528
529 ce_scrub(&ecc);
530 errorq_dispatch(ce_queue, &ecc, sizeof (ecc), ERRORQ_ASYNC);
531
532 return (DDI_INTR_CLAIMED);
533 }
534
535 /*
536 * callback logging function from the common error handling code
537 */
538 static void
sysio_log_ce_err(struct async_flt * ecc,char * unum)539 sysio_log_ce_err(struct async_flt *ecc, char *unum)
540 {
541 uint64_t t_afsr = ecc->flt_stat;
542 uint64_t t_afar = ecc->flt_addr;
543 ushort_t id = ecc->flt_bus_id;
544 ushort_t inst = ecc->flt_inst;
545 int ce_verbose = ce_verbose_memory;
546 char *syndrome_str = "!\tSyndrome 0x%x, Offset 0x%x, Size %d, "
547 "UPA MID 0x%x\n";
548
549 if ((!ce_verbose_memory) && (!debug_sysio_errs))
550 return;
551
552 if (t_afsr & SB_CE_AFSR_P_PIO) {
553 char *fmtstr = "!SBus%d CE Primary Error from PIO: "
554 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n";
555
556 if ((debug_sysio_errs) || (ce_verbose > 1))
557 fmtstr++;
558
559 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
560 (uint32_t)t_afsr, (uint32_t)(t_afar>>32),
561 (uint32_t)t_afar, id);
562 }
563 if (t_afsr & SB_CE_AFSR_P_DRD) {
564 char *fmtstr = "!SBus%d CE Primary Error DMA read: "
565 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s "
566 "Id %d\n";
567
568 if ((debug_sysio_errs) || (ce_verbose > 1))
569 fmtstr++;
570
571 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
572 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
573 unum, id);
574 }
575 if (t_afsr & SB_CE_AFSR_P_DWR) {
576 char *fmtstr = "!SBus%d CE Primary Error DMA write: "
577 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d\n";
578
579 if ((debug_sysio_errs) || (ce_verbose > 1))
580 fmtstr++;
581
582 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
583 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
584 unum, id);
585 }
586
587 if (t_afsr & SB_CE_AFSR_S_PIO) {
588 char *fmtstr = "!SBus%d CE Secondary Error from PIO: "
589 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n";
590
591 if ((debug_sysio_errs) || (ce_verbose > 1))
592 fmtstr++;
593
594 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
595 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
596 id);
597 }
598 if (t_afsr & SB_CE_AFSR_S_DRD) {
599 char *fmtstr = "!SBus%d CE Secondary Error DMA read: "
600 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s "
601 "Id %d\n";
602
603 if ((debug_sysio_errs) || (ce_verbose > 1))
604 fmtstr++;
605
606 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
607 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
608 unum, id);
609 }
610 if (t_afsr & SB_CE_AFSR_S_DWR) {
611 char *fmtstr = "!SBus%d CE Secondary Error DMA write: "
612 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s "
613 "Id %d\n";
614
615 if ((debug_sysio_errs) || (ce_verbose > 1))
616 fmtstr++;
617
618 cmn_err(CE_CONT, fmtstr,
619 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
620 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
621 }
622
623 if ((debug_sysio_errs) || (ce_verbose > 1))
624 syndrome_str++;
625
626 cmn_err(CE_CONT, syndrome_str,
627 (uint32_t)((t_afsr & SB_CE_AFSR_SYND) >> SB_CE_SYND_SHIFT),
628 (uint32_t)((t_afsr & SB_CE_AFSR_OFF) >> SB_CE_OFFSET_SHIFT),
629 (uint32_t)((t_afsr & SB_CE_AFSR_SIZE) >> SB_CE_SIZE_SHIFT),
630 (uint32_t)((t_afsr & SB_CE_AFSR_MID) >> SB_CE_MID_SHIFT));
631 }
632
633 static uint_t
sbus_err_intr(struct sbus_soft_state * softsp)634 sbus_err_intr(struct sbus_soft_state *softsp)
635 {
636 volatile uint64_t t_afsr;
637 volatile uint64_t t_afar;
638 ushort_t id, inst;
639 int cleared = 0;
640 volatile uint64_t *afar_reg;
641 on_trap_data_t *otp = softsp->ontrap_data;
642
643 t_afsr = *softsp->sbus_err_reg;
644 afar_reg = (uint64_t *)softsp->sbus_err_reg + 1;
645 t_afar = *afar_reg;
646
647 if (otp == NULL || !(otp->ot_prot & OT_DATA_ACCESS)) {
648 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
649 cleared = 1;
650 }
651
652 id = (ushort_t)softsp->upa_id;
653 inst = (ushort_t)ddi_get_instance(softsp->dip);
654
655 if (debug_sysio_errs) {
656 if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS))
657 otp->ot_trap |= OT_DATA_ACCESS;
658 if (!cleared)
659 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
660
661 cmn_err(CE_CONT, "SBus%d Error: AFSR 0x%08x.%08x "
662 "AFAR 0x%08x.%08x Id %d\n",
663 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
664 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
665
666 debug_enter("sbus_err_intr");
667 } else {
668 sbus_log_error(softsp, (uint64_t *)&t_afsr,
669 (uint64_t *)&t_afar, id, inst, cleared, otp);
670 }
671 if (!cleared) {
672 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
673 }
674
675 return (DDI_INTR_CLAIMED);
676 }
677
678 static void
sbus_clear_intr(struct sbus_soft_state * softsp,uint64_t * pafsr)679 sbus_clear_intr(struct sbus_soft_state *softsp, uint64_t *pafsr)
680 {
681 volatile uint64_t *clear_reg;
682
683 *softsp->sbus_err_reg = *pafsr;
684 clear_reg = (softsp->clr_intr_reg + SBUS_ERR_CLEAR);
685 *clear_reg = 0;
686 }
687
688 static void
sbus_log_error(struct sbus_soft_state * softsp,uint64_t * pafsr,uint64_t * pafar,ushort_t id,ushort_t inst,int cleared,on_trap_data_t * otp)689 sbus_log_error(struct sbus_soft_state *softsp, uint64_t *pafsr, uint64_t *pafar,
690 ushort_t id, ushort_t inst, int cleared, on_trap_data_t *otp)
691 {
692 uint64_t t_afsr;
693 uint64_t t_afar;
694 int level = CE_WARN;
695
696 t_afsr = *pafsr;
697 t_afar = *pafar;
698 if (t_afsr & SB_AFSR_P_LE) {
699 if (!cleared)
700 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
701 cmn_err(CE_PANIC, "SBus%d Primary Error Late PIO: "
702 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
703 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
704 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
705 }
706 if (t_afsr & SB_AFSR_P_TO) {
707 if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) {
708 otp->ot_trap |= OT_DATA_ACCESS;
709 return;
710 }
711 if (sbus_check_bto(softsp)) {
712 if (!cleared)
713 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
714 level = CE_PANIC;
715 }
716 cmn_err(level, "SBus%d Primary Error Timeout: "
717 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
718 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
719 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
720 }
721 if (t_afsr & SB_AFSR_P_BERR) {
722 if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) {
723 otp->ot_trap |= OT_DATA_ACCESS;
724 return;
725 }
726 if (sbus_check_bto(softsp)) {
727 if (!cleared)
728 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
729 level = CE_PANIC;
730 }
731 cmn_err(level, "SBus%d Primary Error Bus Error: "
732 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n",
733 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
734 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
735 }
736
737 if (t_afsr & SB_AFSR_S_LE) {
738 if (!cleared)
739 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
740 cmn_err(CE_PANIC, "SBus%d Secondary Late PIO Error: "
741 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
742 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
743 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
744 }
745 if (t_afsr & SB_AFSR_S_TO) {
746 if (sbus_check_bto(softsp)) {
747 if (!cleared)
748 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
749 level = CE_PANIC;
750 }
751 cmn_err(level, "SBus%d Secondary Timeout Error: "
752 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
753 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
754 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
755 }
756 if (t_afsr & SB_AFSR_S_BERR) {
757 if (sbus_check_bto(softsp)) {
758 if (!cleared)
759 sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
760 level = CE_PANIC;
761 }
762 cmn_err(level, "SBus%d Secondary Bus Error: "
763 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
764 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
765 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
766 }
767 }
768
769
770 static int
sbus_check_bto(struct sbus_soft_state * softsp)771 sbus_check_bto(struct sbus_soft_state *softsp)
772 {
773 hrtime_t now = gethrtime(); /* high PIL safe */
774 hrtime_t diff = now - softsp->bto_timestamp;
775
776 if (diff > ((hrtime_t)bto_secs * NANOSEC) || diff < 0LL) {
777 /*
778 * Reset error counter as this bus error has occurred
779 * after more than bto_secs duration.
780 */
781 softsp->bto_timestamp = now;
782 softsp->bto_ctr = 0;
783 }
784 if (softsp->bto_ctr++ >= bto_cnt)
785 return (1);
786 return (0);
787 }
788
789 static uint_t
sbus_ctrl_ecc_err(struct sbus_soft_state * softsp)790 sbus_ctrl_ecc_err(struct sbus_soft_state *softsp)
791 {
792 uint64_t t_sb_csr;
793 ushort_t id, inst;
794
795 t_sb_csr = *softsp->sbus_ctrl_reg;
796 id = (ushort_t)softsp->upa_id;
797 inst = (ushort_t)ddi_get_instance(softsp->dip);
798
799 if (debug_sysio_errs) {
800 cmn_err(CE_CONT, "sbus_ctrl_ecc_error: SBus%d Control Reg "
801 "0x%016llx Id %d\n", inst, (u_longlong_t)t_sb_csr, id);
802 }
803
804 if (t_sb_csr & (SB_CSR_DPERR_S14|SB_CSR_DPERR_S13|SB_CSR_DPERR_S3|
805 SB_CSR_DPERR_S2|SB_CSR_DPERR_S1|SB_CSR_DPERR_S0|SB_CSR_PIO_PERRS)) {
806 struct async_flt aflt;
807
808 *softsp->sbus_ctrl_reg = t_sb_csr; /* clear error bits */
809
810 bzero(&aflt, sizeof (aflt));
811 aflt.flt_id = gethrtime();
812 aflt.flt_stat = t_sb_csr;
813 aflt.flt_func = sbus_log_csr_error;
814 aflt.flt_bus_id = id;
815 aflt.flt_inst = inst;
816 aflt.flt_status = ECC_IOBUS;
817 aflt.flt_class = BUS_FAULT;
818 aflt.flt_panic = 1;
819
820 errorq_dispatch(ue_queue, &aflt, sizeof (aflt), aflt.flt_panic);
821 return (BF_FATAL);
822 }
823
824 return (BF_NONE);
825 }
826
827 /*ARGSUSED*/
828 static void
sbus_log_csr_error(struct async_flt * aflt,char * unum)829 sbus_log_csr_error(struct async_flt *aflt, char *unum)
830 {
831 uint64_t t_sb_csr = aflt->flt_stat;
832 uint_t id = aflt->flt_bus_id;
833 uint_t inst = aflt->flt_inst;
834
835 /*
836 * Print out SBus error information.
837 */
838 if (t_sb_csr & SB_CSR_DPERR_S14) {
839 cmn_err(CE_WARN,
840 "SBus%d Slot 14 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
841 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
842 }
843 if (t_sb_csr & SB_CSR_DPERR_S13) {
844 cmn_err(CE_WARN,
845 "SBus%d Slot 13 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
846 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
847 }
848 if (t_sb_csr & SB_CSR_DPERR_S3) {
849 cmn_err(CE_WARN,
850 "SBus%d Slot 3 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
851 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
852 }
853 if (t_sb_csr & SB_CSR_DPERR_S2) {
854 cmn_err(CE_WARN,
855 "SBus%d Slot 2 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
856 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
857 }
858 if (t_sb_csr & SB_CSR_DPERR_S1) {
859 cmn_err(CE_WARN,
860 "SBus%d Slot 1 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
861 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
862 }
863 if (t_sb_csr & SB_CSR_DPERR_S0) {
864 cmn_err(CE_WARN,
865 "SBus%d Slot 0 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
866 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
867 }
868 if (t_sb_csr & SB_CSR_PPERR_S15) {
869 cmn_err(CE_WARN,
870 "SBus%d Slot 15 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
871 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
872 }
873 if (t_sb_csr & SB_CSR_PPERR_S14) {
874 cmn_err(CE_WARN,
875 "SBus%d Slot 14 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
876 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
877 }
878 if (t_sb_csr & SB_CSR_PPERR_S13) {
879 cmn_err(CE_WARN,
880 "SBus%d Slot 13 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
881 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
882 }
883 if (t_sb_csr & SB_CSR_PPERR_S3) {
884 cmn_err(CE_WARN,
885 "SBus%d Slot 3 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
886 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
887 }
888 if (t_sb_csr & SB_CSR_PPERR_S2) {
889 cmn_err(CE_WARN,
890 "SBus%d Slot 2 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
891 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
892 }
893 if (t_sb_csr & SB_CSR_PPERR_S1) {
894 cmn_err(CE_WARN,
895 "SBus%d Slot 1 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
896 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
897 }
898 if (t_sb_csr & SB_CSR_PPERR_S0) {
899 cmn_err(CE_WARN,
900 "SBus%d Slot 0 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
901 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
902 }
903 }
904
905 /*
906 * Sysio Thermal Warning interrupt handler
907 */
908 static uint_t
sysio_thermal_warn_intr(struct sbus_soft_state * softsp)909 sysio_thermal_warn_intr(struct sbus_soft_state *softsp)
910 {
911 volatile uint64_t *clear_reg;
912 volatile uint64_t tmp_mondo_vec;
913 volatile uint64_t *mondo_vec_reg;
914 const char thermal_warn_msg[] =
915 "Severe over-temperature condition detected!";
916
917 /*
918 * Take off the Thermal Warning interrupt and
919 * remove its interrupt handler.
920 */
921 mondo_vec_reg = (softsp->intr_mapping_reg + THERMAL_MAPREG);
922 tmp_mondo_vec = *mondo_vec_reg;
923 tmp_mondo_vec &= ~INTERRUPT_VALID;
924 *mondo_vec_reg = tmp_mondo_vec;
925
926 ddi_remove_intr(softsp->dip, 4, NULL);
927
928 clear_reg = (softsp->clr_intr_reg + THERMAL_CLEAR);
929 *clear_reg = 0;
930
931 if (oven_test) {
932 cmn_err(CE_NOTE, "OVEN TEST: %s", thermal_warn_msg);
933 return (DDI_INTR_CLAIMED);
934 }
935
936 cmn_err(CE_WARN, "%s", thermal_warn_msg);
937 cmn_err(CE_WARN, "Powering down...");
938
939 do_shutdown();
940
941 /*
942 * just in case do_shutdown() fails
943 */
944 (void) timeout((void(*)(void *))power_down, NULL,
945 thermal_powerdown_delay * hz);
946
947 return (DDI_INTR_CLAIMED);
948 }
949