1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/machsystm.h> 30 #include <sys/cpuvar.h> 31 #include <sys/async.h> 32 #include <sys/ontrap.h> 33 #include <sys/ddifm.h> 34 #include <sys/hypervisor_api.h> 35 #include <sys/errorq.h> 36 #include <sys/promif.h> 37 #include <sys/prom_plat.h> 38 #include <sys/x_call.h> 39 #include <sys/error.h> 40 #include <sys/fm/util.h> 41 #include <sys/ivintr.h> 42 #include <sys/archsystm.h> 43 44 #define MAX_CE_FLTS 10 45 #define MAX_ASYNC_FLTS 6 46 47 errorq_t *ue_queue; /* queue of uncorrectable errors */ 48 errorq_t *ce_queue; /* queue of correctable errors */ 49 50 /* 51 * Being used by memory test driver. 52 * ce_verbose_memory - covers CEs in DIMMs 53 * ce_verbose_other - covers "others" (ecache, IO, etc.) 54 * 55 * If the value is 0, nothing is logged. 56 * If the value is 1, the error is logged to the log file, but not console. 57 * If the value is 2, the error is logged to the log file and console. 58 */ 59 int ce_verbose_memory = 1; 60 int ce_verbose_other = 1; 61 62 int ce_show_data = 0; 63 int ce_debug = 0; 64 int ue_debug = 0; 65 int reset_debug = 0; 66 67 /* 68 * Tunables for controlling the handling of asynchronous faults (AFTs). Setting 69 * these to non-default values on a non-DEBUG kernel is NOT supported. 70 */ 71 int aft_verbose = 0; /* log AFT messages > 1 to log only */ 72 int aft_panic = 0; /* panic (not reboot) on fatal usermode AFLT */ 73 int aft_testfatal = 0; /* force all AFTs to panic immediately */ 74 75 /* 76 * Used for vbsc hostshutdown (power-off buton) 77 */ 78 int err_shutdown_triggered = 0; /* only once */ 79 uint64_t err_shutdown_inum = 0; /* used to pull the trigger */ 80 81 /* 82 * Defined in bus_func.c but initialised in error_init 83 */ 84 extern kmutex_t bfd_lock; 85 86 static uint32_t rq_overflow_count = 0; /* counter for rq overflow */ 87 88 static void cpu_queue_one_event(errh_async_flt_t *); 89 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t); 90 static void errh_page_retire(errh_async_flt_t *, uchar_t); 91 static int errh_error_protected(struct regs *, struct async_flt *, int *); 92 static void errh_rq_full(struct async_flt *); 93 static void ue_drain(void *, struct async_flt *, errorq_elem_t *); 94 static void ce_drain(void *, struct async_flt *, errorq_elem_t *); 95 static void errh_handle_attr(errh_async_flt_t *); 96 static void errh_handle_asr(errh_async_flt_t *); 97 98 /*ARGSUSED*/ 99 void 100 process_resumable_error(struct regs *rp, uint32_t head_offset, 101 uint32_t tail_offset) 102 { 103 struct machcpu *mcpup; 104 struct async_flt *aflt; 105 errh_async_flt_t errh_flt; 106 errh_er_t *head_va; 107 108 mcpup = &(CPU->cpu_m); 109 110 while (head_offset != tail_offset) { 111 /* kernel buffer starts right after the resumable queue */ 112 head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset + 113 CPU_RQ_SIZE); 114 /* Copy the error report to local buffer */ 115 bzero(&errh_flt, sizeof (errh_async_flt_t)); 116 bcopy((char *)head_va, &(errh_flt.errh_er), 117 sizeof (errh_er_t)); 118 119 /* Increment the queue head */ 120 head_offset += Q_ENTRY_SIZE; 121 /* Wrap around */ 122 head_offset &= (CPU_RQ_SIZE - 1); 123 124 /* set error handle to zero so it can hold new error report */ 125 head_va->ehdl = 0; 126 127 switch (errh_flt.errh_er.desc) { 128 case ERRH_DESC_UCOR_RE: 129 /* 130 * Check error attribute, handle individual error 131 * if it is needed. 132 */ 133 errh_handle_attr(&errh_flt); 134 break; 135 136 case ERRH_DESC_WARN_RE: 137 /* 138 * Power-off requested, but handle it one time only. 139 */ 140 if (!err_shutdown_triggered) { 141 setsoftint(err_shutdown_inum); 142 ++err_shutdown_triggered; 143 } 144 continue; 145 146 default: 147 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 148 " invalid in resumable error handler", 149 (long long) errh_flt.errh_er.desc); 150 continue; 151 } 152 153 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 154 aflt->flt_id = gethrtime(); 155 aflt->flt_bus_id = getprocessorid(); 156 aflt->flt_class = CPU_FAULT; 157 aflt->flt_prot = AFLT_PROT_NONE; 158 aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK) 159 >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV); 160 161 if (errh_flt.errh_er.attr & ERRH_ATTR_CPU) 162 /* If it is an error on other cpu */ 163 aflt->flt_panic = 1; 164 else 165 aflt->flt_panic = 0; 166 167 /* 168 * Handle resumable queue full case. 169 */ 170 if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) { 171 (void) errh_rq_full(aflt); 172 } 173 174 /* 175 * Queue the error on ce or ue queue depend on flt_panic. 176 * Even if flt_panic is set, the code still keep processing 177 * the rest element on rq until the panic starts. 178 */ 179 (void) cpu_queue_one_event(&errh_flt); 180 181 /* 182 * Panic here if aflt->flt_panic has been set. 183 * Enqueued errors will be logged as part of the panic flow. 184 */ 185 if (aflt->flt_panic) { 186 fm_panic("Unrecoverable error on another CPU"); 187 } 188 } 189 } 190 191 void 192 process_nonresumable_error(struct regs *rp, uint64_t flags, 193 uint32_t head_offset, uint32_t tail_offset) 194 { 195 struct machcpu *mcpup; 196 struct async_flt *aflt; 197 errh_async_flt_t errh_flt; 198 errh_er_t *head_va; 199 int trampolined = 0; 200 int expected = DDI_FM_ERR_UNEXPECTED; 201 uint64_t exec_mode; 202 uint8_t u_spill_fill; 203 204 mcpup = &(CPU->cpu_m); 205 206 while (head_offset != tail_offset) { 207 /* kernel buffer starts right after the nonresumable queue */ 208 head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset + 209 CPU_NRQ_SIZE); 210 211 /* Copy the error report to local buffer */ 212 bzero(&errh_flt, sizeof (errh_async_flt_t)); 213 214 bcopy((char *)head_va, &(errh_flt.errh_er), 215 sizeof (errh_er_t)); 216 217 /* Increment the queue head */ 218 head_offset += Q_ENTRY_SIZE; 219 /* Wrap around */ 220 head_offset &= (CPU_NRQ_SIZE - 1); 221 222 /* set error handle to zero so it can hold new error report */ 223 head_va->ehdl = 0; 224 225 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 226 227 trampolined = 0; 228 229 if (errh_flt.errh_er.attr & ERRH_ATTR_PIO) 230 aflt->flt_class = BUS_FAULT; 231 else 232 aflt->flt_class = CPU_FAULT; 233 234 aflt->flt_id = gethrtime(); 235 aflt->flt_bus_id = getprocessorid(); 236 aflt->flt_pc = (caddr_t)rp->r_pc; 237 exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK) 238 >> ERRH_MODE_SHIFT; 239 aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV || 240 exec_mode == ERRH_MODE_UNKNOWN); 241 aflt->flt_prot = AFLT_PROT_NONE; 242 aflt->flt_tl = (uchar_t)(flags & ERRH_TL_MASK); 243 aflt->flt_panic = ((aflt->flt_tl != 0) || 244 (aft_testfatal != 0)); 245 246 /* 247 * For the first error packet on the queue, check if it 248 * happened in user fill/spill trap. 249 */ 250 if (flags & ERRH_U_SPILL_FILL) { 251 u_spill_fill = 1; 252 /* clear the user fill/spill flag in flags */ 253 flags = (uint64_t)aflt->flt_tl; 254 } else 255 u_spill_fill = 0; 256 257 switch (errh_flt.errh_er.desc) { 258 case ERRH_DESC_PR_NRE: 259 if (u_spill_fill) { 260 aflt->flt_panic = 0; 261 break; 262 } 263 /* 264 * Fall through, precise fault also need to check 265 * to see if it was protected. 266 */ 267 /*FALLTHRU*/ 268 269 case ERRH_DESC_DEF_NRE: 270 /* 271 * If the trap occurred in privileged mode at TL=0, 272 * we need to check to see if we were executing 273 * in kernel under on_trap() or t_lofault 274 * protection. If so, and if it was a PIO or MEM 275 * error, then modify the saved registers so that 276 * we return from the trap to the appropriate 277 * trampoline routine. 278 */ 279 if (aflt->flt_priv == 1 && aflt->flt_tl == 0 && 280 ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) || 281 (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) { 282 trampolined = 283 errh_error_protected(rp, aflt, &expected); 284 } 285 286 if (!aflt->flt_priv || aflt->flt_prot == 287 AFLT_PROT_COPY) { 288 aflt->flt_panic |= aft_panic; 289 } else if (!trampolined && 290 (aflt->flt_class != BUS_FAULT)) { 291 aflt->flt_panic = 1; 292 } 293 294 /* 295 * Check error attribute, handle individual error 296 * if it is needed. 297 */ 298 errh_handle_attr(&errh_flt); 299 300 /* 301 * If PIO error, we need to query the bus nexus 302 * for fatal errors. 303 */ 304 if (aflt->flt_class == BUS_FAULT) { 305 aflt->flt_addr = errh_flt.errh_er.ra; 306 errh_cpu_run_bus_error_handlers(aflt, 307 expected); 308 } 309 310 break; 311 312 default: 313 cmn_err(CE_WARN, "Panic - Error Descriptor 0x%llx " 314 " invalid in non-resumable error handler", 315 (long long) errh_flt.errh_er.desc); 316 aflt->flt_panic = 1; 317 break; 318 } 319 320 /* 321 * Queue the error report for further processing. If 322 * flt_panic is set, code still process other errors 323 * in the queue until the panic routine stops the 324 * kernel. 325 */ 326 (void) cpu_queue_one_event(&errh_flt); 327 328 /* 329 * Panic here if aflt->flt_panic has been set. 330 * Enqueued errors will be logged as part of the panic flow. 331 */ 332 if (aflt->flt_panic) { 333 fm_panic("Unrecoverable hardware error"); 334 } 335 336 /* 337 * Call page_retire() to handle memory errors. 338 */ 339 if (errh_flt.errh_er.attr & ERRH_ATTR_MEM) 340 errh_page_retire(&errh_flt, PR_UE); 341 342 /* 343 * If we queued an error and the it was in user mode, or 344 * protected by t_lofault, or user_spill_fill is set, we 345 * set AST flag so the queue will be drained before 346 * returning to user mode. 347 */ 348 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY || 349 u_spill_fill) { 350 int pcb_flag = 0; 351 352 if (aflt->flt_class == CPU_FAULT) 353 pcb_flag |= ASYNC_HWERR; 354 else if (aflt->flt_class == BUS_FAULT) 355 pcb_flag |= ASYNC_BERR; 356 357 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 358 aston(curthread); 359 } 360 } 361 } 362 363 /* 364 * For PIO errors, this routine calls nexus driver's error 365 * callback routines. If the callback routine returns fatal, and 366 * we are in kernel or unknow mode without any error protection, 367 * we need to turn on the panic flag. 368 */ 369 void 370 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 371 { 372 int status; 373 ddi_fm_error_t de; 374 375 bzero(&de, sizeof (ddi_fm_error_t)); 376 377 de.fme_version = DDI_FME_VERSION; 378 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 379 de.fme_flag = expected; 380 de.fme_bus_specific = (void *)aflt->flt_addr; 381 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 382 383 /* 384 * If error is protected, it will jump to proper routine 385 * to handle the handle; if it is in user level, we just 386 * kill the user process; if the driver thinks the error is 387 * not fatal, we can drive on. If none of above are true, 388 * we panic 389 */ 390 if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) && 391 (status == DDI_FM_FATAL)) 392 aflt->flt_panic = 1; 393 } 394 395 /* 396 * This routine checks to see if we are under any error protection when 397 * the error happens. If we are under error protection, we unwind to 398 * the protection and indicate fault. 399 */ 400 static int 401 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected) 402 { 403 int trampolined = 0; 404 ddi_acc_hdl_t *hp; 405 406 if (curthread->t_ontrap != NULL) { 407 on_trap_data_t *otp = curthread->t_ontrap; 408 409 if (otp->ot_prot & OT_DATA_EC) { 410 aflt->flt_prot = AFLT_PROT_EC; 411 otp->ot_trap |= OT_DATA_EC; 412 rp->r_pc = otp->ot_trampoline; 413 rp->r_npc = rp->r_pc +4; 414 trampolined = 1; 415 } 416 417 if (otp->ot_prot & OT_DATA_ACCESS) { 418 aflt->flt_prot = AFLT_PROT_ACCESS; 419 otp->ot_trap |= OT_DATA_ACCESS; 420 rp->r_pc = otp->ot_trampoline; 421 rp->r_npc = rp->r_pc + 4; 422 trampolined = 1; 423 /* 424 * for peek and caut_gets 425 * errors are expected 426 */ 427 hp = (ddi_acc_hdl_t *)otp->ot_handle; 428 if (!hp) 429 *expected = DDI_FM_ERR_PEEK; 430 else if (hp->ah_acc.devacc_attr_access == 431 DDI_CAUTIOUS_ACC) 432 *expected = DDI_FM_ERR_EXPECTED; 433 } 434 } else if (curthread->t_lofault) { 435 aflt->flt_prot = AFLT_PROT_COPY; 436 rp->r_g1 = EFAULT; 437 rp->r_pc = curthread->t_lofault; 438 rp->r_npc = rp->r_pc + 4; 439 trampolined = 1; 440 } 441 442 return (trampolined); 443 } 444 445 /* 446 * Queue one event. 447 */ 448 static void 449 cpu_queue_one_event(errh_async_flt_t *errh_fltp) 450 { 451 struct async_flt *aflt = (struct async_flt *)errh_fltp; 452 errorq_t *eqp; 453 454 if (aflt->flt_panic) 455 eqp = ue_queue; 456 else 457 eqp = ce_queue; 458 459 errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t), 460 aflt->flt_panic); 461 } 462 463 /* 464 * The cpu_async_log_err() function is called by the ce/ue_drain() function to 465 * handle logging for CPU events that are dequeued. As such, it can be invoked 466 * from softint context, from AST processing in the trap() flow, or from the 467 * panic flow. We decode the CPU-specific data, and log appropriate messages. 468 */ 469 void 470 cpu_async_log_err(void *flt) 471 { 472 errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt; 473 errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er; 474 475 switch (errh_erp->desc) { 476 case ERRH_DESC_UCOR_RE: 477 if (errh_erp->attr & ERRH_ATTR_MEM) { 478 /* 479 * Turn on the PR_UE flag. The page will be 480 * scrubbed when it is freed. 481 */ 482 errh_page_retire(errh_fltp, PR_UE); 483 } 484 485 break; 486 487 case ERRH_DESC_PR_NRE: 488 case ERRH_DESC_DEF_NRE: 489 if (errh_erp->attr & ERRH_ATTR_MEM) { 490 /* 491 * For non-resumable memory error, retire 492 * the page here. 493 */ 494 errh_page_retire(errh_fltp, PR_UE); 495 496 /* 497 * If we are going to panic, scrub the page first 498 */ 499 if (errh_fltp->cmn_asyncflt.flt_panic) 500 mem_scrub(errh_fltp->errh_er.ra, 501 errh_fltp->errh_er.sz); 502 } 503 break; 504 505 default: 506 break; 507 } 508 } 509 510 /* 511 * Called from ce_drain(). 512 */ 513 void 514 cpu_ce_log_err(struct async_flt *aflt) 515 { 516 switch (aflt->flt_class) { 517 case CPU_FAULT: 518 cpu_async_log_err(aflt); 519 break; 520 521 case BUS_FAULT: 522 cpu_async_log_err(aflt); 523 break; 524 525 default: 526 break; 527 } 528 } 529 530 /* 531 * Called from ue_drain(). 532 */ 533 void 534 cpu_ue_log_err(struct async_flt *aflt) 535 { 536 switch (aflt->flt_class) { 537 case CPU_FAULT: 538 cpu_async_log_err(aflt); 539 break; 540 541 case BUS_FAULT: 542 cpu_async_log_err(aflt); 543 break; 544 545 default: 546 break; 547 } 548 } 549 550 /* 551 * Turn on flag on the error memory region. 552 */ 553 static void 554 errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag) 555 { 556 uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 557 uint64_t flt_real_addr_end = flt_real_addr_start + 558 errh_fltp->errh_er.sz - 1; 559 int64_t current_addr; 560 561 if (errh_fltp->errh_er.sz == 0) 562 return; 563 564 for (current_addr = flt_real_addr_start; 565 current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 566 (void) page_retire(current_addr, flag); 567 } 568 } 569 570 void 571 mem_scrub(uint64_t paddr, uint64_t len) 572 { 573 uint64_t pa, length, scrubbed_len; 574 575 pa = paddr; 576 length = len; 577 scrubbed_len = 0; 578 579 while (length > 0) { 580 if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK) 581 break; 582 583 pa += scrubbed_len; 584 length -= scrubbed_len; 585 } 586 } 587 588 /* 589 * Call hypervisor to flush the memory region. The memory region 590 * must be within the same page frame. 591 */ 592 void 593 mem_sync(caddr_t va, size_t len) 594 { 595 uint64_t pa, length, flushed; 596 597 pa = va_to_pa((caddr_t)va); 598 599 if (pa == (uint64_t)-1) 600 return; 601 602 ASSERT((pa >> MMU_PAGESHIFT) == ((pa + len - 1) >> MMU_PAGESHIFT)); 603 604 length = len; 605 flushed = 0; 606 607 while (length > 0) { 608 if (hv_mem_sync(pa, length, &flushed) != H_EOK) 609 break; 610 611 pa += flushed; 612 length -= flushed; 613 } 614 } 615 616 /* 617 * If resumable queue is full, we need to check if any cpu is in 618 * error state. If not, we drive on. If yes, we need to panic. The 619 * hypervisor call hv_cpu_state() is being used for checking the 620 * cpu state. 621 */ 622 static void 623 errh_rq_full(struct async_flt *afltp) 624 { 625 processorid_t who; 626 uint64_t cpu_state; 627 uint64_t retval; 628 629 for (who = 0; who < NCPU; who++) 630 if (CPU_IN_SET(cpu_ready_set, who)) { 631 retval = hv_cpu_state(who, &cpu_state); 632 if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) { 633 afltp->flt_panic = 1; 634 break; 635 } 636 } 637 } 638 639 /* 640 * Return processor specific async error structure 641 * size used. 642 */ 643 int 644 cpu_aflt_size(void) 645 { 646 return (sizeof (errh_async_flt_t)); 647 } 648 649 #define SZ_TO_ETRS_SHIFT 6 650 651 /* 652 * Message print out when resumable queue is overflown 653 */ 654 /*ARGSUSED*/ 655 void 656 rq_overflow(struct regs *rp, uint64_t head_offset, 657 uint64_t tail_offset) 658 { 659 rq_overflow_count++; 660 } 661 662 /* 663 * Handler to process a fatal error. This routine can be called from a 664 * softint, called from trap()'s AST handling, or called from the panic flow. 665 */ 666 /*ARGSUSED*/ 667 static void 668 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 669 { 670 cpu_ue_log_err(aflt); 671 } 672 673 /* 674 * Handler to process a correctable error. This routine can be called from a 675 * softint. We just call the CPU module's logging routine. 676 */ 677 /*ARGSUSED*/ 678 static void 679 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 680 { 681 cpu_ce_log_err(aflt); 682 } 683 684 /* 685 * Handler to process vbsc hostshutdown (power-off button). 686 */ 687 static int 688 err_shutdown_softintr() 689 { 690 cmn_err(CE_WARN, "Power-off requested, system will now shutdown."); 691 do_shutdown(); 692 693 /* 694 * just in case do_shutdown() fails 695 */ 696 (void) timeout((void(*)(void *))power_down, NULL, 100 * hz); 697 return (DDI_INTR_CLAIMED); 698 } 699 700 /* 701 * Allocate error queue sizes based on max_ncpus. max_ncpus is set just 702 * after ncpunode has been determined. ncpus is set in start_other_cpus 703 * which is called after error_init() but may change dynamically. 704 */ 705 void 706 error_init(void) 707 { 708 char tmp_name[MAXSYSNAME]; 709 pnode_t node; 710 size_t size = cpu_aflt_size(); 711 712 /* 713 * Initialize the correctable and uncorrectable error queues. 714 */ 715 ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL, 716 MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL); 717 718 ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL, 719 MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0); 720 721 if (ue_queue == NULL || ce_queue == NULL) 722 panic("failed to create required system error queue"); 723 724 /* 725 * Setup interrupt handler for power-off button. 726 */ 727 err_shutdown_inum = add_softintr(PIL_9, 728 (softintrfunc)err_shutdown_softintr, NULL, SOFTINT_ST); 729 730 /* 731 * Initialize the busfunc list mutex. This must be a PIL_15 spin lock 732 * because we will need to acquire it from cpu_async_error(). 733 */ 734 mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15); 735 736 node = prom_rootnode(); 737 if ((node == OBP_NONODE) || (node == OBP_BADNODE)) { 738 cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node); 739 return; 740 } 741 742 if (((size = prom_getproplen(node, "reset-reason")) != -1) && 743 (size <= MAXSYSNAME) && 744 (prom_getprop(node, "reset-reason", tmp_name) != -1)) { 745 if (reset_debug) { 746 cmn_err(CE_CONT, "System booting after %s\n", tmp_name); 747 } else if (strncmp(tmp_name, "FATAL", 5) == 0) { 748 cmn_err(CE_CONT, 749 "System booting after fatal error %s\n", tmp_name); 750 } 751 } 752 } 753 754 /* 755 * Nonresumable queue is full, panic here 756 */ 757 /*ARGSUSED*/ 758 void 759 nrq_overflow(struct regs *rp) 760 { 761 fm_panic("Nonresumable queue full"); 762 } 763 764 /* 765 * This is the place for special error handling for individual errors. 766 */ 767 static void 768 errh_handle_attr(errh_async_flt_t *errh_fltp) 769 { 770 switch (errh_fltp->errh_er.attr & ~ERRH_MODE_MASK) { 771 case ERRH_ATTR_CPU: 772 case ERRH_ATTR_MEM: 773 case ERRH_ATTR_PIO: 774 case ERRH_ATTR_IRF: 775 case ERRH_ATTR_FRF: 776 case ERRH_ATTR_SHUT: 777 break; 778 779 case ERRH_ATTR_ASR: 780 errh_handle_asr(errh_fltp); 781 break; 782 783 case ERRH_ATTR_ASI: 784 case ERRH_ATTR_PREG: 785 case ERRH_ATTR_RQF: 786 break; 787 788 default: 789 break; 790 } 791 } 792 793 /* 794 * Handle ASR bit set in ATTR 795 */ 796 static void 797 errh_handle_asr(errh_async_flt_t *errh_fltp) 798 { 799 uint64_t current_tick; 800 801 switch (errh_fltp->errh_er.reg) { 802 case ASR_REG_VALID | ASR_REG_TICK: 803 /* 804 * For Tick Compare Register error, it only happens when 805 * the register is being read or compared with the %tick 806 * register. Since we lost the contents of the register, 807 * we set the %tick_compr in the future. An interrupt will 808 * happen when %tick matches the value field of %tick_compr. 809 */ 810 current_tick = (uint64_t)gettick(); 811 tickcmpr_set(current_tick); 812 /* Do not panic */ 813 errh_fltp->cmn_asyncflt.flt_panic = 0; 814 break; 815 816 default: 817 break; 818 } 819 } 820