1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 1990-2002 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/conf.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/ddi_impldefs.h> 34 #include <sys/cmn_err.h> 35 #include <sys/async.h> 36 #include <sys/sysiosbus.h> 37 #include <sys/sysioerr.h> 38 #include <sys/x_call.h> 39 #include <sys/machsystm.h> 40 #include <sys/sysmacros.h> 41 #include <sys/vmsystm.h> 42 #include <sys/cpu_module.h> 43 44 /* 45 * Set the following variable in /etc/system to tell the kernel 46 * not to shutdown the machine if the temperature reaches 47 * the Thermal Warning limit. 48 */ 49 int oven_test = 0; 50 51 /* 52 * To indicate if the prom has the property of "thermal-interrupt". 53 */ 54 static int thermal_interrupt_enabled = 0; 55 56 #ifdef _STARFIRE 57 #include <sys/starfire.h> 58 59 int 60 pc_translate_tgtid(caddr_t, int, volatile uint64_t *); 61 62 void 63 pc_ittrans_cleanup(caddr_t, volatile uint64_t *); 64 #endif /* _STARFIRE */ 65 66 /* 67 * adb debug_sysio_errs to 1 if you don't want your system to panic on 68 * sbus ue errors. adb sysio_err_flag to 0 if you don't want your system 69 * to check for sysio errors at all. 70 */ 71 int sysio_err_flag = 1; 72 uint_t debug_sysio_errs = 0; 73 74 /* 75 * bto_cnt = number of bus errors and timeouts allowed within bto_secs 76 * use /etc/system to change the bto_cnt to a very large number if 77 * it's a problem! 78 */ 79 int bto_secs = 10; 80 int bto_cnt = 10; 81 82 static uint_t 83 sysio_ue_intr(struct sbus_soft_state *softsp); 84 85 static uint_t 86 sysio_ce_intr(struct sbus_soft_state *softsp); 87 88 static uint_t 89 sbus_err_intr(struct sbus_soft_state *softsp); 90 91 static void 92 sysio_log_ce_err(struct async_flt *ecc, char *unum); 93 94 static void 95 sysio_log_ue_err(struct async_flt *ecc, char *unum); 96 97 static void 98 sbus_clear_intr(struct sbus_soft_state *softsp, uint64_t *pafsr); 99 100 static void 101 sbus_log_error(struct sbus_soft_state *softsp, uint64_t *pafsr, uint64_t *pafar, 102 ushort_t id, ushort_t inst, int cleared, 103 on_trap_data_t *ontrap_data); 104 105 static int 106 sbus_check_bto(struct sbus_soft_state *softsp); 107 108 static void 109 sbus_log_csr_error(struct async_flt *aflt, char *unum); 110 111 static uint_t 112 sbus_ctrl_ecc_err(struct sbus_soft_state *softsp); 113 114 static uint_t 115 sysio_dis_err(struct sbus_soft_state *softsp); 116 117 static uint_t 118 sysio_init_err(struct sbus_soft_state *softsp); 119 120 static uint_t 121 sysio_thermal_warn_intr(struct sbus_soft_state *softsp); 122 123 static int sbus_pil[] = {SBUS_UE_PIL, SBUS_CE_PIL, SBUS_ERR_PIL, SBUS_PF_PIL, 124 SBUS_THERMAL_PIL, SBUS_PM_PIL}; 125 int 126 sysio_err_init(struct sbus_soft_state *softsp, caddr_t address) 127 { 128 if (sysio_err_flag == 0) { 129 cmn_err(CE_CONT, "Warning: sysio errors not initialized\n"); 130 return (DDI_SUCCESS); 131 } 132 133 /* 134 * Get the address of the already mapped-in sysio/sbus error registers. 135 * Simply add each registers offset to the already mapped in address 136 * that was retrieved from the device node's "address" property, 137 * and passed as an argument to this function. 138 * 139 * Define a macro for the pointer arithmetic ... 140 */ 141 142 #define REG_ADDR(b, o) (uint64_t *)((caddr_t)(b) + (o)) 143 144 softsp->sysio_ecc_reg = REG_ADDR(address, OFF_SYSIO_ECC_REGS); 145 softsp->sysio_ue_reg = REG_ADDR(address, OFF_SYSIO_UE_REGS); 146 softsp->sysio_ce_reg = REG_ADDR(address, OFF_SYSIO_CE_REGS); 147 softsp->sbus_err_reg = REG_ADDR(address, OFF_SBUS_ERR_REGS); 148 149 #undef REG_ADDR 150 151 /* 152 * create the interrupt-priorities property if it doesn't 153 * already exist to provide a hint as to the PIL level for 154 * our interrupt. 155 */ 156 { 157 int len; 158 159 if (ddi_getproplen(DDI_DEV_T_ANY, softsp->dip, 160 DDI_PROP_DONTPASS, "interrupt-priorities", 161 &len) != DDI_PROP_SUCCESS) { 162 /* Create the interrupt-priorities property. */ 163 (void) ddi_prop_update_int_array(DDI_DEV_T_NONE, 164 softsp->dip, "interrupt-priorities", 165 (int *)sbus_pil, sizeof (sbus_pil) / sizeof (int)); 166 } 167 } 168 169 (void) ddi_add_intr(softsp->dip, 0, NULL, NULL, 170 (uint_t (*)())sysio_ue_intr, (caddr_t)softsp); 171 (void) ddi_add_intr(softsp->dip, 1, NULL, NULL, 172 (uint_t (*)())sysio_ce_intr, (caddr_t)softsp); 173 (void) ddi_add_intr(softsp->dip, 2, NULL, NULL, 174 (uint_t (*)())sbus_err_intr, (caddr_t)softsp); 175 /* 176 * If the thermal-interrupt property is in place, 177 * then register the thermal warning interrupt handler and 178 * program its mapping register 179 */ 180 thermal_interrupt_enabled = ddi_getprop(DDI_DEV_T_ANY, softsp->dip, 181 DDI_PROP_DONTPASS, "thermal-interrupt", -1); 182 183 if (thermal_interrupt_enabled == 1) { 184 (void) ddi_add_intr(softsp->dip, 4, NULL, NULL, 185 (uint_t (*)())sysio_thermal_warn_intr, (caddr_t)softsp); 186 } 187 188 bus_func_register(BF_TYPE_UE, (busfunc_t)sbus_ctrl_ecc_err, softsp); 189 bus_func_register(BF_TYPE_ERRDIS, (busfunc_t)sysio_dis_err, softsp); 190 191 (void) sysio_init_err(softsp); 192 193 return (DDI_SUCCESS); 194 } 195 196 int 197 sysio_err_resume_init(struct sbus_soft_state *softsp) 198 { 199 (void) sysio_init_err(softsp); 200 return (DDI_SUCCESS); 201 } 202 203 int 204 sysio_err_uninit(struct sbus_soft_state *softsp) 205 { 206 /* remove the interrupts from the interrupt list */ 207 (void) sysio_dis_err(softsp); 208 209 ddi_remove_intr(softsp->dip, 0, NULL); 210 ddi_remove_intr(softsp->dip, 1, NULL); 211 ddi_remove_intr(softsp->dip, 2, NULL); 212 213 if (thermal_interrupt_enabled == 1) { 214 ddi_remove_intr(softsp->dip, 4, NULL); 215 } 216 217 bus_func_unregister(BF_TYPE_UE, (busfunc_t)sbus_ctrl_ecc_err, softsp); 218 bus_func_unregister(BF_TYPE_ERRDIS, (busfunc_t)sysio_dis_err, softsp); 219 220 return (DDI_SUCCESS); 221 } 222 223 static uint_t 224 sysio_init_err(struct sbus_soft_state *softsp) 225 { 226 volatile uint64_t tmp_mondo_vec, tmpreg; 227 volatile uint64_t *mondo_vec_reg; 228 uint_t cpu_id, acpu_id; 229 230 acpu_id = intr_dist_cpuid(); 231 /* 232 * Program the mondo vector accordingly. This MUST be the 233 * last thing we do. Once we program the mondo, the device 234 * may begin to interrupt. Store it in the hardware reg. 235 */ 236 mondo_vec_reg = (uint64_t *)(softsp->intr_mapping_reg + UE_ECC_MAPREG); 237 cpu_id = acpu_id; 238 #ifdef _STARFIRE 239 cpu_id = pc_translate_tgtid(softsp->ittrans_cookie, cpu_id, 240 mondo_vec_reg); 241 #endif /* _STARFIRE */ 242 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID; 243 *mondo_vec_reg = tmp_mondo_vec; 244 245 mondo_vec_reg = (uint64_t *)(softsp->intr_mapping_reg + CE_ECC_MAPREG); 246 cpu_id = acpu_id; 247 #ifdef _STARFIRE 248 cpu_id = pc_translate_tgtid(softsp->ittrans_cookie, cpu_id, 249 mondo_vec_reg); 250 #endif /* _STARFIRE */ 251 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID; 252 *mondo_vec_reg = tmp_mondo_vec; 253 254 mondo_vec_reg = 255 (uint64_t *)(softsp->intr_mapping_reg + SBUS_ERR_MAPREG); 256 cpu_id = acpu_id; 257 #ifdef _STARFIRE 258 cpu_id = pc_translate_tgtid(softsp->ittrans_cookie, cpu_id, 259 mondo_vec_reg); 260 #endif /* _STARFIRE */ 261 262 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID; 263 *mondo_vec_reg = tmp_mondo_vec; 264 265 if (thermal_interrupt_enabled == 1) { 266 mondo_vec_reg = (softsp->intr_mapping_reg + THERMAL_MAPREG); 267 cpu_id = acpu_id; 268 tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | 269 INTERRUPT_VALID; 270 *mondo_vec_reg = tmp_mondo_vec; 271 } 272 273 /* Flush store buffers */ 274 tmpreg = *softsp->sbus_ctrl_reg; 275 276 /* 277 * XXX - This may already be set by the OBP. 278 */ 279 tmpreg = SYSIO_APCKEN; 280 *softsp->sysio_ctrl_reg |= tmpreg; 281 tmpreg = (SECR_ECC_EN | SECR_UE_INTEN | SECR_CE_INTEN); 282 *softsp->sysio_ecc_reg = tmpreg; 283 tmpreg = SB_CSR_ERRINT_EN; 284 *softsp->sbus_err_reg |= tmpreg; 285 286 /* Initialize timeout/bus error counter */ 287 softsp->bto_timestamp = 0; 288 softsp->bto_ctr = 0; 289 290 return (0); 291 } 292 293 static uint_t 294 sysio_dis_err(struct sbus_soft_state *softsp) 295 { 296 volatile uint64_t tmpreg; 297 volatile uint64_t *mondo_vec_reg, *clear_vec_reg; 298 299 *softsp->sysio_ctrl_reg &= ~SYSIO_APCKEN; 300 *softsp->sysio_ecc_reg = 0; 301 *softsp->sbus_err_reg &= ~SB_CSR_ERRINT_EN; 302 303 /* Flush store buffers */ 304 tmpreg = *softsp->sbus_ctrl_reg; 305 #ifdef lint 306 tmpreg = tmpreg; 307 #endif 308 309 /* Unmap mapping registers */ 310 mondo_vec_reg = (softsp->intr_mapping_reg + UE_ECC_MAPREG); 311 clear_vec_reg = (softsp->clr_intr_reg + UE_ECC_CLEAR); 312 313 *mondo_vec_reg = 0; 314 315 #ifdef _STARFIRE 316 /* do cleanup for starfire interrupt target translation */ 317 pc_ittrans_cleanup(softsp->ittrans_cookie, mondo_vec_reg); 318 #endif /* _STARFIRE */ 319 320 *clear_vec_reg = 0; 321 322 mondo_vec_reg = (softsp->intr_mapping_reg + CE_ECC_MAPREG); 323 clear_vec_reg = (softsp->clr_intr_reg + CE_ECC_CLEAR); 324 325 *mondo_vec_reg = 0; 326 327 #ifdef _STARFIRE 328 /* Do cleanup for starfire interrupt target translation */ 329 pc_ittrans_cleanup(softsp->ittrans_cookie, mondo_vec_reg); 330 #endif /* _STARFIRE */ 331 332 *clear_vec_reg = 0; 333 334 mondo_vec_reg = (softsp->intr_mapping_reg + SBUS_ERR_MAPREG); 335 clear_vec_reg = (softsp->clr_intr_reg + SBUS_ERR_CLEAR); 336 337 *mondo_vec_reg = 0; 338 339 #ifdef _STARFIRE 340 /* Do cleanup for starfire interrupt target translation */ 341 pc_ittrans_cleanup(softsp->ittrans_cookie, mondo_vec_reg); 342 #endif /* _STARFIRE */ 343 344 *clear_vec_reg = 0; 345 346 /* Flush store buffers */ 347 tmpreg = *softsp->sbus_ctrl_reg; 348 349 return (BF_NONE); 350 } 351 352 /* 353 * Gather information about the error into an async_flt structure, and then 354 * enqueue the error for reporting and processing and panic. 355 */ 356 static uint_t 357 sysio_ue_intr(struct sbus_soft_state *softsp) 358 { 359 volatile uint64_t t_afsr; 360 volatile uint64_t t_afar; 361 volatile uint64_t *ue_reg, *afar_reg, *clear_reg; 362 struct async_flt ecc; 363 uint64_t offset; 364 365 /* 366 * Disable all further sbus errors, for this sbus instance, for 367 * what is guaranteed to be a fatal error. And grab any other cpus. 368 */ 369 (void) sysio_dis_err(softsp); /* disabled sysio errors */ 370 371 /* 372 * Then read and clear the afsr/afar and clear interrupt regs. 373 */ 374 ue_reg = (uint64_t *)softsp->sysio_ue_reg; 375 t_afsr = *ue_reg; 376 afar_reg = (uint64_t *)ue_reg + 1; 377 t_afar = *afar_reg; 378 *ue_reg = t_afsr; 379 380 clear_reg = (softsp->clr_intr_reg + UE_ECC_CLEAR); 381 *clear_reg = 0; 382 383 /* 384 * The AFSR DW_OFFSET field contains the offset of the doubleword with 385 * the ECC error relative to the 64-byte aligned PA. We multiply by 8 386 * to convert to a byte offset, and then add this to flt_addr. 387 */ 388 offset = ((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT) * 8; 389 390 bzero(&ecc, sizeof (ecc)); 391 ecc.flt_id = gethrtime(); 392 ecc.flt_stat = t_afsr; 393 ecc.flt_addr = P2ALIGN(t_afar, 64) + offset; 394 ecc.flt_func = sysio_log_ue_err; 395 ecc.flt_bus_id = softsp->upa_id; 396 ecc.flt_inst = ddi_get_instance(softsp->dip); 397 ecc.flt_status = ECC_IOBUS; 398 ecc.flt_in_memory = (pf_is_memory(t_afar >> MMU_PAGESHIFT)) ? 1: 0; 399 ecc.flt_class = BUS_FAULT; 400 ecc.flt_panic = (debug_sysio_errs == 0); 401 402 errorq_dispatch(ue_queue, &ecc, sizeof (ecc), ecc.flt_panic); 403 404 /* 405 * If the UE is in memory and fatal, save the fault info so the 406 * panic code will know to check for copyback errors. 407 */ 408 if (ecc.flt_panic && ecc.flt_in_memory) 409 panic_aflt = ecc; 410 411 /* 412 * We must also check for other bus UE errors, and panic if 413 * any fatal ones are detected at this point. 414 */ 415 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL) 416 ecc.flt_panic = 1; 417 418 if (ecc.flt_panic) 419 cmn_err(CE_PANIC, "Fatal Sbus%d UE Error", ecc.flt_inst); 420 421 return (DDI_INTR_CLAIMED); 422 } 423 424 /* 425 * callback logging function from the common error handling code 426 */ 427 static void 428 sysio_log_ue_err(struct async_flt *ecc, char *unum) 429 { 430 uint64_t t_afsr = ecc->flt_stat; 431 uint64_t t_afar = ecc->flt_addr; 432 433 ushort_t id = ecc->flt_bus_id; 434 ushort_t inst = ecc->flt_inst; 435 436 if (t_afsr & SB_UE_AFSR_P_PIO) { 437 cmn_err(CE_WARN, "SBus%d UE Primary Error from PIO: " 438 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 439 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 440 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 441 } 442 if (t_afsr & SB_UE_AFSR_P_DRD) { 443 cmn_err(CE_WARN, "SBus%d UE Primary Error DMA read: " 444 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d", 445 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 446 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id); 447 } 448 if (t_afsr & SB_UE_AFSR_P_DWR) { 449 cmn_err(CE_WARN, "SBus%d UE Primary Error DVMA write: " 450 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d", 451 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 452 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id); 453 } 454 /* 455 * We should never hit the secondary error panics. 456 */ 457 if (t_afsr & SB_UE_AFSR_S_PIO) { 458 cmn_err(CE_WARN, "SBus%d UE Secondary Error from PIO: " 459 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 460 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 461 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 462 } 463 if (t_afsr & SB_UE_AFSR_S_DRD) { 464 cmn_err(CE_WARN, "SBus%d UE Secondary Error DMA read: " 465 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d", 466 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 467 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id); 468 } 469 if (t_afsr & SB_UE_AFSR_S_DWR) { 470 cmn_err(CE_WARN, "SBus%d UE Secondary Error DMA write: " 471 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d", 472 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 473 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id); 474 } 475 476 if ((debug_sysio_errs) || (aft_verbose)) { 477 (void) read_ecc_data(ecc, 1, 0); 478 cmn_err(CE_CONT, "\tOffset 0x%x, Size %d, UPA MID 0x%x\n", 479 (uint32_t)((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT), 480 (uint32_t)((t_afsr & SB_UE_AFSR_SIZE) >> SB_UE_SIZE_SHIFT), 481 (uint32_t)((t_afsr & SB_UE_AFSR_MID) >> SB_UE_MID_SHIFT)); 482 } 483 } 484 485 /* 486 * gather the information about the error, plus a pointer to 487 * the callback logging function, and call the generic ce_error handler. 488 */ 489 static uint_t 490 sysio_ce_intr(struct sbus_soft_state *softsp) 491 { 492 volatile uint64_t t_afsr; 493 volatile uint64_t t_afar; 494 volatile uint64_t *afar_reg, *clear_reg, *ce_reg; 495 struct async_flt ecc; 496 uint64_t offset; 497 498 ce_reg = (uint64_t *)softsp->sysio_ce_reg; 499 t_afsr = *ce_reg; 500 afar_reg = (uint64_t *)ce_reg + 1; 501 t_afar = *afar_reg; 502 *ce_reg = t_afsr; 503 504 clear_reg = (softsp->clr_intr_reg + CE_ECC_CLEAR); 505 *clear_reg = 0; 506 507 /* 508 * The AFSR DW_OFFSET field contains the offset of the doubleword with 509 * the ECC error relative to the 64-byte aligned PA. We multiply by 8 510 * to convert to a byte offset, and then add this to flt_addr. 511 */ 512 offset = ((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT) * 8; 513 514 bzero(&ecc, sizeof (ecc)); 515 ecc.flt_id = gethrtime(); 516 ecc.flt_stat = t_afsr; 517 ecc.flt_addr = P2ALIGN(t_afar, 64) + offset; 518 ecc.flt_func = sysio_log_ce_err; 519 ecc.flt_bus_id = softsp->upa_id; 520 ecc.flt_inst = ddi_get_instance(softsp->dip); 521 ecc.flt_status = ECC_IOBUS; 522 523 ecc.flt_synd = (ushort_t)((t_afsr & SB_CE_AFSR_SYND) >> 524 SB_CE_SYND_SHIFT); 525 526 ecc.flt_in_memory = (pf_is_memory(t_afar >> MMU_PAGESHIFT)) ? 1: 0; 527 ecc.flt_class = BUS_FAULT; 528 529 ce_scrub(&ecc); 530 errorq_dispatch(ce_queue, &ecc, sizeof (ecc), ERRORQ_ASYNC); 531 532 return (DDI_INTR_CLAIMED); 533 } 534 535 /* 536 * callback logging function from the common error handling code 537 */ 538 static void 539 sysio_log_ce_err(struct async_flt *ecc, char *unum) 540 { 541 uint64_t t_afsr = ecc->flt_stat; 542 uint64_t t_afar = ecc->flt_addr; 543 ushort_t id = ecc->flt_bus_id; 544 ushort_t inst = ecc->flt_inst; 545 int ce_verbose = ce_verbose_memory; 546 char *syndrome_str = "!\tSyndrome 0x%x, Offset 0x%x, Size %d, " 547 "UPA MID 0x%x\n"; 548 549 if ((!ce_verbose_memory) && (!debug_sysio_errs)) 550 return; 551 552 if (t_afsr & SB_CE_AFSR_P_PIO) { 553 char *fmtstr = "!SBus%d CE Primary Error from PIO: " 554 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n"; 555 556 if ((debug_sysio_errs) || (ce_verbose > 1)) 557 fmtstr++; 558 559 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32), 560 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), 561 (uint32_t)t_afar, id); 562 } 563 if (t_afsr & SB_CE_AFSR_P_DRD) { 564 char *fmtstr = "!SBus%d CE Primary Error DMA read: " 565 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s " 566 "Id %d\n"; 567 568 if ((debug_sysio_errs) || (ce_verbose > 1)) 569 fmtstr++; 570 571 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32), 572 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar, 573 unum, id); 574 } 575 if (t_afsr & SB_CE_AFSR_P_DWR) { 576 char *fmtstr = "!SBus%d CE Primary Error DMA write: " 577 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d\n"; 578 579 if ((debug_sysio_errs) || (ce_verbose > 1)) 580 fmtstr++; 581 582 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32), 583 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar, 584 unum, id); 585 } 586 587 if (t_afsr & SB_CE_AFSR_S_PIO) { 588 char *fmtstr = "!SBus%d CE Secondary Error from PIO: " 589 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n"; 590 591 if ((debug_sysio_errs) || (ce_verbose > 1)) 592 fmtstr++; 593 594 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32), 595 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar, 596 id); 597 } 598 if (t_afsr & SB_CE_AFSR_S_DRD) { 599 char *fmtstr = "!SBus%d CE Secondary Error DMA read: " 600 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s " 601 "Id %d\n"; 602 603 if ((debug_sysio_errs) || (ce_verbose > 1)) 604 fmtstr++; 605 606 cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32), 607 (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar, 608 unum, id); 609 } 610 if (t_afsr & SB_CE_AFSR_S_DWR) { 611 char *fmtstr = "!SBus%d CE Secondary Error DMA write: " 612 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s " 613 "Id %d\n"; 614 615 if ((debug_sysio_errs) || (ce_verbose > 1)) 616 fmtstr++; 617 618 cmn_err(CE_CONT, fmtstr, 619 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 620 (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id); 621 } 622 623 if ((debug_sysio_errs) || (ce_verbose > 1)) 624 syndrome_str++; 625 626 cmn_err(CE_CONT, syndrome_str, 627 (uint32_t)((t_afsr & SB_CE_AFSR_SYND) >> SB_CE_SYND_SHIFT), 628 (uint32_t)((t_afsr & SB_CE_AFSR_OFF) >> SB_CE_OFFSET_SHIFT), 629 (uint32_t)((t_afsr & SB_CE_AFSR_SIZE) >> SB_CE_SIZE_SHIFT), 630 (uint32_t)((t_afsr & SB_CE_AFSR_MID) >> SB_CE_MID_SHIFT)); 631 } 632 633 static uint_t 634 sbus_err_intr(struct sbus_soft_state *softsp) 635 { 636 volatile uint64_t t_afsr; 637 volatile uint64_t t_afar; 638 ushort_t id, inst; 639 int cleared = 0; 640 volatile uint64_t *afar_reg; 641 on_trap_data_t *otp = softsp->ontrap_data; 642 643 t_afsr = *softsp->sbus_err_reg; 644 afar_reg = (uint64_t *)softsp->sbus_err_reg + 1; 645 t_afar = *afar_reg; 646 647 if (otp == NULL || !(otp->ot_prot & OT_DATA_ACCESS)) { 648 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 649 cleared = 1; 650 } 651 652 id = (ushort_t)softsp->upa_id; 653 inst = (ushort_t)ddi_get_instance(softsp->dip); 654 655 if (debug_sysio_errs) { 656 if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) 657 otp->ot_trap |= OT_DATA_ACCESS; 658 if (!cleared) 659 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 660 661 cmn_err(CE_CONT, "SBus%d Error: AFSR 0x%08x.%08x " 662 "AFAR 0x%08x.%08x Id %d\n", 663 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 664 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 665 666 debug_enter("sbus_err_intr"); 667 } else { 668 sbus_log_error(softsp, (uint64_t *)&t_afsr, 669 (uint64_t *)&t_afar, id, inst, cleared, otp); 670 } 671 if (!cleared) { 672 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 673 } 674 675 return (DDI_INTR_CLAIMED); 676 } 677 678 static void 679 sbus_clear_intr(struct sbus_soft_state *softsp, uint64_t *pafsr) 680 { 681 volatile uint64_t *clear_reg; 682 683 *softsp->sbus_err_reg = *pafsr; 684 clear_reg = (softsp->clr_intr_reg + SBUS_ERR_CLEAR); 685 *clear_reg = 0; 686 } 687 688 static void 689 sbus_log_error(struct sbus_soft_state *softsp, uint64_t *pafsr, uint64_t *pafar, 690 ushort_t id, ushort_t inst, int cleared, on_trap_data_t *otp) 691 { 692 uint64_t t_afsr; 693 uint64_t t_afar; 694 int level = CE_WARN; 695 696 t_afsr = *pafsr; 697 t_afar = *pafar; 698 if (t_afsr & SB_AFSR_P_LE) { 699 if (!cleared) 700 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 701 cmn_err(CE_PANIC, "SBus%d Primary Error Late PIO: " 702 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 703 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 704 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 705 } 706 if (t_afsr & SB_AFSR_P_TO) { 707 if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) { 708 otp->ot_trap |= OT_DATA_ACCESS; 709 return; 710 } 711 if (sbus_check_bto(softsp)) { 712 if (!cleared) 713 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 714 level = CE_PANIC; 715 } 716 cmn_err(level, "SBus%d Primary Error Timeout: " 717 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 718 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 719 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 720 } 721 if (t_afsr & SB_AFSR_P_BERR) { 722 if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) { 723 otp->ot_trap |= OT_DATA_ACCESS; 724 return; 725 } 726 if (sbus_check_bto(softsp)) { 727 if (!cleared) 728 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 729 level = CE_PANIC; 730 } 731 cmn_err(level, "SBus%d Primary Error Bus Error: " 732 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n", 733 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 734 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 735 } 736 737 if (t_afsr & SB_AFSR_S_LE) { 738 if (!cleared) 739 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 740 cmn_err(CE_PANIC, "SBus%d Secondary Late PIO Error: " 741 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 742 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 743 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 744 } 745 if (t_afsr & SB_AFSR_S_TO) { 746 if (sbus_check_bto(softsp)) { 747 if (!cleared) 748 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 749 level = CE_PANIC; 750 } 751 cmn_err(level, "SBus%d Secondary Timeout Error: " 752 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 753 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 754 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 755 } 756 if (t_afsr & SB_AFSR_S_BERR) { 757 if (sbus_check_bto(softsp)) { 758 if (!cleared) 759 sbus_clear_intr(softsp, (uint64_t *)&t_afsr); 760 level = CE_PANIC; 761 } 762 cmn_err(level, "SBus%d Secondary Bus Error: " 763 "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d", 764 inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr, 765 (uint32_t)(t_afar>>32), (uint32_t)t_afar, id); 766 } 767 } 768 769 770 static int 771 sbus_check_bto(struct sbus_soft_state *softsp) 772 { 773 hrtime_t now = gethrtime(); /* high PIL safe */ 774 hrtime_t diff = now - softsp->bto_timestamp; 775 776 if (diff > ((hrtime_t)bto_secs * NANOSEC) || diff < 0LL) { 777 /* 778 * Reset error counter as this bus error has occurred 779 * after more than bto_secs duration. 780 */ 781 softsp->bto_timestamp = now; 782 softsp->bto_ctr = 0; 783 } 784 if (softsp->bto_ctr++ >= bto_cnt) 785 return (1); 786 return (0); 787 } 788 789 static uint_t 790 sbus_ctrl_ecc_err(struct sbus_soft_state *softsp) 791 { 792 uint64_t t_sb_csr; 793 ushort_t id, inst; 794 795 t_sb_csr = *softsp->sbus_ctrl_reg; 796 id = (ushort_t)softsp->upa_id; 797 inst = (ushort_t)ddi_get_instance(softsp->dip); 798 799 if (debug_sysio_errs) { 800 cmn_err(CE_CONT, "sbus_ctrl_ecc_error: SBus%d Control Reg " 801 "0x%016llx Id %d\n", inst, (u_longlong_t)t_sb_csr, id); 802 } 803 804 if (t_sb_csr & (SB_CSR_DPERR_S14|SB_CSR_DPERR_S13|SB_CSR_DPERR_S3| 805 SB_CSR_DPERR_S2|SB_CSR_DPERR_S1|SB_CSR_DPERR_S0|SB_CSR_PIO_PERRS)) { 806 struct async_flt aflt; 807 808 *softsp->sbus_ctrl_reg = t_sb_csr; /* clear error bits */ 809 810 bzero(&aflt, sizeof (aflt)); 811 aflt.flt_id = gethrtime(); 812 aflt.flt_stat = t_sb_csr; 813 aflt.flt_func = sbus_log_csr_error; 814 aflt.flt_bus_id = id; 815 aflt.flt_inst = inst; 816 aflt.flt_status = ECC_IOBUS; 817 aflt.flt_class = BUS_FAULT; 818 aflt.flt_panic = 1; 819 820 errorq_dispatch(ue_queue, &aflt, sizeof (aflt), aflt.flt_panic); 821 return (BF_FATAL); 822 } 823 824 return (BF_NONE); 825 } 826 827 /*ARGSUSED*/ 828 static void 829 sbus_log_csr_error(struct async_flt *aflt, char *unum) 830 { 831 uint64_t t_sb_csr = aflt->flt_stat; 832 uint_t id = aflt->flt_bus_id; 833 uint_t inst = aflt->flt_inst; 834 835 /* 836 * Print out SBus error information. 837 */ 838 if (t_sb_csr & SB_CSR_DPERR_S14) { 839 cmn_err(CE_WARN, 840 "SBus%d Slot 14 DVMA Parity Error: AFSR 0x%08x.%08x Id %d", 841 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 842 } 843 if (t_sb_csr & SB_CSR_DPERR_S13) { 844 cmn_err(CE_WARN, 845 "SBus%d Slot 13 DVMA Parity Error: AFSR 0x%08x.%08x Id %d", 846 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 847 } 848 if (t_sb_csr & SB_CSR_DPERR_S3) { 849 cmn_err(CE_WARN, 850 "SBus%d Slot 3 DVMA Parity Error: AFSR 0x%08x.%08x Id %d", 851 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 852 } 853 if (t_sb_csr & SB_CSR_DPERR_S2) { 854 cmn_err(CE_WARN, 855 "SBus%d Slot 2 DVMA Parity Error: AFSR 0x%08x.%08x Id %d", 856 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 857 } 858 if (t_sb_csr & SB_CSR_DPERR_S1) { 859 cmn_err(CE_WARN, 860 "SBus%d Slot 1 DVMA Parity Error: AFSR 0x%08x.%08x Id %d", 861 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 862 } 863 if (t_sb_csr & SB_CSR_DPERR_S0) { 864 cmn_err(CE_WARN, 865 "SBus%d Slot 0 DVMA Parity Error: AFSR 0x%08x.%08x Id %d", 866 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 867 } 868 if (t_sb_csr & SB_CSR_PPERR_S15) { 869 cmn_err(CE_WARN, 870 "SBus%d Slot 15 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 871 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 872 } 873 if (t_sb_csr & SB_CSR_PPERR_S14) { 874 cmn_err(CE_WARN, 875 "SBus%d Slot 14 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 876 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 877 } 878 if (t_sb_csr & SB_CSR_PPERR_S13) { 879 cmn_err(CE_WARN, 880 "SBus%d Slot 13 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 881 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 882 } 883 if (t_sb_csr & SB_CSR_PPERR_S3) { 884 cmn_err(CE_WARN, 885 "SBus%d Slot 3 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 886 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 887 } 888 if (t_sb_csr & SB_CSR_PPERR_S2) { 889 cmn_err(CE_WARN, 890 "SBus%d Slot 2 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 891 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 892 } 893 if (t_sb_csr & SB_CSR_PPERR_S1) { 894 cmn_err(CE_WARN, 895 "SBus%d Slot 1 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 896 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 897 } 898 if (t_sb_csr & SB_CSR_PPERR_S0) { 899 cmn_err(CE_WARN, 900 "SBus%d Slot 0 PIO Parity Error: AFSR 0x%08x.%08x Id %d", 901 inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id); 902 } 903 } 904 905 /* 906 * Sysio Thermal Warning interrupt handler 907 */ 908 static uint_t 909 sysio_thermal_warn_intr(struct sbus_soft_state *softsp) 910 { 911 volatile uint64_t *clear_reg; 912 volatile uint64_t tmp_mondo_vec; 913 volatile uint64_t *mondo_vec_reg; 914 const char thermal_warn_msg[] = 915 "Severe over-temperature condition detected!"; 916 917 /* 918 * Take off the Thermal Warning interrupt and 919 * remove its interrupt handler. 920 */ 921 mondo_vec_reg = (softsp->intr_mapping_reg + THERMAL_MAPREG); 922 tmp_mondo_vec = *mondo_vec_reg; 923 tmp_mondo_vec &= ~INTERRUPT_VALID; 924 *mondo_vec_reg = tmp_mondo_vec; 925 926 ddi_remove_intr(softsp->dip, 4, NULL); 927 928 clear_reg = (softsp->clr_intr_reg + THERMAL_CLEAR); 929 *clear_reg = 0; 930 931 if (oven_test) { 932 cmn_err(CE_NOTE, "OVEN TEST: %s", thermal_warn_msg); 933 return (DDI_INTR_CLAIMED); 934 } 935 936 cmn_err(CE_WARN, "%s", thermal_warn_msg); 937 cmn_err(CE_WARN, "Powering down..."); 938 939 do_shutdown(); 940 941 /* 942 * just in case do_shutdown() fails 943 */ 944 (void) timeout((void(*)(void *))power_down, NULL, 945 thermal_powerdown_delay * hz); 946 947 return (DDI_INTR_CLAIMED); 948 } 949