1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/machsystm.h> 31 #include <sys/cpuvar.h> 32 #include <sys/async.h> 33 #include <sys/ontrap.h> 34 #include <sys/ddifm.h> 35 #include <sys/hypervisor_api.h> 36 #include <sys/errorq.h> 37 #include <sys/promif.h> 38 #include <sys/prom_plat.h> 39 #include <sys/x_call.h> 40 #include <sys/error.h> 41 #include <sys/fm/util.h> 42 #include <sys/ivintr.h> 43 44 #define MAX_CE_FLTS 10 45 #define MAX_ASYNC_FLTS 6 46 47 errorq_t *ue_queue; /* queue of uncorrectable errors */ 48 errorq_t *ce_queue; /* queue of correctable errors */ 49 50 /* 51 * Being used by memory test driver. 52 * ce_verbose_memory - covers CEs in DIMMs 53 * ce_verbose_other - covers "others" (ecache, IO, etc.) 54 * 55 * If the value is 0, nothing is logged. 56 * If the value is 1, the error is logged to the log file, but not console. 57 * If the value is 2, the error is logged to the log file and console. 58 */ 59 int ce_verbose_memory = 1; 60 int ce_verbose_other = 1; 61 62 int ce_show_data = 0; 63 int ce_debug = 0; 64 int ue_debug = 0; 65 int reset_debug = 0; 66 67 /* 68 * Tunables for controlling the handling of asynchronous faults (AFTs). Setting 69 * these to non-default values on a non-DEBUG kernel is NOT supported. 70 */ 71 int aft_verbose = 0; /* log AFT messages > 1 to log only */ 72 int aft_panic = 0; /* panic (not reboot) on fatal usermode AFLT */ 73 int aft_testfatal = 0; /* force all AFTs to panic immediately */ 74 75 /* 76 * Used for vbsc hostshutdown (power-off buton) 77 */ 78 int err_shutdown_triggered = 0; /* only once */ 79 uint_t err_shutdown_inum = 0; /* used to pull the trigger */ 80 81 /* 82 * Defined in bus_func.c but initialised in error_init 83 */ 84 extern kmutex_t bfd_lock; 85 86 static uint32_t rq_overflow_count = 0; /* counter for rq overflow */ 87 88 static void cpu_queue_one_event(errh_async_flt_t *); 89 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t); 90 static void errh_page_retire(errh_async_flt_t *, uchar_t); 91 static int errh_error_protected(struct regs *, struct async_flt *, int *); 92 static void errh_rq_full(struct async_flt *); 93 static void ue_drain(void *, struct async_flt *, errorq_elem_t *); 94 static void ce_drain(void *, struct async_flt *, errorq_elem_t *); 95 96 /*ARGSUSED*/ 97 void 98 process_resumable_error(struct regs *rp, uint32_t head_offset, 99 uint32_t tail_offset) 100 { 101 struct machcpu *mcpup; 102 struct async_flt *aflt; 103 errh_async_flt_t errh_flt; 104 errh_er_t *head_va; 105 106 mcpup = &(CPU->cpu_m); 107 108 while (head_offset != tail_offset) { 109 /* kernel buffer starts right after the resumable queue */ 110 head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset + 111 CPU_RQ_SIZE); 112 /* Copy the error report to local buffer */ 113 bzero(&errh_flt, sizeof (errh_async_flt_t)); 114 bcopy((char *)head_va, &(errh_flt.errh_er), 115 sizeof (errh_er_t)); 116 117 /* Increment the queue head */ 118 head_offset += Q_ENTRY_SIZE; 119 /* Wrap around */ 120 head_offset &= (CPU_RQ_SIZE - 1); 121 122 /* set error handle to zero so it can hold new error report */ 123 head_va->ehdl = 0; 124 125 switch (errh_flt.errh_er.desc) { 126 case ERRH_DESC_UCOR_RE: 127 break; 128 129 case ERRH_DESC_WARN_RE: 130 /* 131 * Power-off requested, but handle it one time only. 132 */ 133 if (!err_shutdown_triggered) { 134 setsoftint(err_shutdown_inum); 135 ++err_shutdown_triggered; 136 } 137 continue; 138 139 default: 140 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 141 " invalid in resumable error handler", 142 (long long) errh_flt.errh_er.desc); 143 continue; 144 } 145 146 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 147 aflt->flt_id = gethrtime(); 148 aflt->flt_bus_id = getprocessorid(); 149 aflt->flt_class = CPU_FAULT; 150 aflt->flt_prot = AFLT_PROT_NONE; 151 aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK) 152 >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV); 153 154 if (errh_flt.errh_er.attr & ERRH_ATTR_CPU) 155 /* If it is an error on other cpu */ 156 aflt->flt_panic = 1; 157 else 158 aflt->flt_panic = 0; 159 160 /* 161 * Handle resumable queue full case. 162 */ 163 if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) { 164 (void) errh_rq_full(aflt); 165 } 166 167 /* 168 * Queue the error on ce or ue queue depend on flt_panic. 169 * Even if flt_panic is set, the code still keep processing 170 * the rest element on rq until the panic starts. 171 */ 172 (void) cpu_queue_one_event(&errh_flt); 173 174 /* 175 * Panic here if aflt->flt_panic has been set. 176 * Enqueued errors will be logged as part of the panic flow. 177 */ 178 if (aflt->flt_panic) { 179 fm_panic("Unrecoverable error on another CPU"); 180 } 181 } 182 } 183 184 void 185 process_nonresumable_error(struct regs *rp, uint64_t tl, 186 uint32_t head_offset, uint32_t tail_offset) 187 { 188 struct machcpu *mcpup; 189 struct async_flt *aflt; 190 errh_async_flt_t errh_flt; 191 errh_er_t *head_va; 192 int trampolined = 0; 193 int expected = DDI_FM_ERR_UNEXPECTED; 194 uint64_t exec_mode; 195 196 mcpup = &(CPU->cpu_m); 197 198 while (head_offset != tail_offset) { 199 /* kernel buffer starts right after the nonresumable queue */ 200 head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset + 201 CPU_NRQ_SIZE); 202 203 /* Copy the error report to local buffer */ 204 bzero(&errh_flt, sizeof (errh_async_flt_t)); 205 206 bcopy((char *)head_va, &(errh_flt.errh_er), 207 sizeof (errh_er_t)); 208 209 /* Increment the queue head */ 210 head_offset += Q_ENTRY_SIZE; 211 /* Wrap around */ 212 head_offset &= (CPU_NRQ_SIZE - 1); 213 214 /* set error handle to zero so it can hold new error report */ 215 head_va->ehdl = 0; 216 217 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 218 219 trampolined = 0; 220 221 if (errh_flt.errh_er.attr & ERRH_ATTR_PIO) 222 aflt->flt_class = BUS_FAULT; 223 else 224 aflt->flt_class = CPU_FAULT; 225 226 aflt->flt_id = gethrtime(); 227 aflt->flt_bus_id = getprocessorid(); 228 aflt->flt_pc = (caddr_t)rp->r_pc; 229 exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK) 230 >> ERRH_MODE_SHIFT; 231 aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV || 232 exec_mode == ERRH_MODE_UNKNOWN); 233 aflt->flt_tl = (uchar_t)tl; 234 aflt->flt_prot = AFLT_PROT_NONE; 235 aflt->flt_panic = ((aflt->flt_tl != 0) || 236 (aft_testfatal != 0)); 237 238 switch (errh_flt.errh_er.desc) { 239 case ERRH_DESC_PR_NRE: 240 /* 241 * Fall through, precise fault also need to check 242 * to see if it was protected. 243 */ 244 245 case ERRH_DESC_DEF_NRE: 246 /* 247 * If the trap occurred in privileged mode at TL=0, 248 * we need to check to see if we were executing 249 * in kernel under on_trap() or t_lofault 250 * protection. If so, and if it was a PIO or MEM 251 * error, then modify the saved registers so that 252 * we return from the trap to the appropriate 253 * trampoline routine. 254 */ 255 if (aflt->flt_priv == 1 && aflt->flt_tl == 0 && 256 ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) || 257 (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) { 258 trampolined = 259 errh_error_protected(rp, aflt, &expected); 260 } 261 262 if (!aflt->flt_priv || aflt->flt_prot == 263 AFLT_PROT_COPY) { 264 aflt->flt_panic |= aft_panic; 265 } else if (!trampolined && 266 (errh_flt.errh_er.attr & ERRH_ATTR_MEM)) { 267 aflt->flt_panic = 1; 268 } 269 270 /* 271 * If PIO error, we need to query the bus nexus 272 * for fatal errors. 273 */ 274 if (aflt->flt_class == BUS_FAULT) { 275 aflt->flt_addr = errh_flt.errh_er.ra; 276 errh_cpu_run_bus_error_handlers(aflt, 277 expected); 278 } 279 280 break; 281 282 default: 283 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 284 " invalid in nonresumable error handler", 285 (long long) errh_flt.errh_er.desc); 286 continue; 287 } 288 289 /* 290 * Queue the error report for further processing. If 291 * flt_panic is set, code still process other errors 292 * in the queue until the panic routine stops the 293 * kernel. 294 */ 295 (void) cpu_queue_one_event(&errh_flt); 296 297 /* 298 * Panic here if aflt->flt_panic has been set. 299 * Enqueued errors will be logged as part of the panic flow. 300 */ 301 if (aflt->flt_panic) { 302 fm_panic("Unrecoverable hardware error"); 303 } 304 305 /* 306 * Call page_retire() to handle memory errors. 307 */ 308 if (errh_flt.errh_er.attr & ERRH_ATTR_MEM) 309 errh_page_retire(&errh_flt, PR_UE); 310 311 /* 312 * If we queued an error and the it was in user mode or 313 * protected by t_lofault, 314 * set AST flag so the queue will be drained before 315 * returning to user mode. 316 */ 317 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 318 int pcb_flag = 0; 319 320 if (aflt->flt_class == CPU_FAULT) 321 pcb_flag |= ASYNC_HWERR; 322 else if (aflt->flt_class == BUS_FAULT) 323 pcb_flag |= ASYNC_BERR; 324 325 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 326 aston(curthread); 327 } 328 } 329 } 330 331 /* 332 * For PIO errors, this routine calls nexus driver's error 333 * callback routines. If the callback routine returns fatal, and 334 * we are in kernel or unknow mode without any error protection, 335 * we need to turn on the panic flag. 336 */ 337 void 338 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 339 { 340 int status; 341 ddi_fm_error_t de; 342 343 bzero(&de, sizeof (ddi_fm_error_t)); 344 345 de.fme_version = DDI_FME_VERSION; 346 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 347 de.fme_flag = expected; 348 de.fme_bus_specific = (void *)aflt->flt_addr; 349 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 350 351 /* 352 * If error is protected, it will jump to proper routine 353 * to handle the handle; if it is in user level, we just 354 * kill the user process; if the driver thinks the error is 355 * not fatal, we can drive on. If none of above are true, 356 * we panic 357 */ 358 if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) && 359 (status == DDI_FM_FATAL)) 360 aflt->flt_panic = 1; 361 } 362 363 /* 364 * This routine checks to see if we are under any error protection when 365 * the error happens. If we are under error protection, we unwind to 366 * the protection and indicate fault. 367 */ 368 static int 369 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected) 370 { 371 int trampolined = 0; 372 ddi_acc_hdl_t *hp; 373 374 if (curthread->t_ontrap != NULL) { 375 on_trap_data_t *otp = curthread->t_ontrap; 376 377 if (otp->ot_prot & OT_DATA_EC) { 378 aflt->flt_prot = AFLT_PROT_EC; 379 otp->ot_trap |= OT_DATA_EC; 380 rp->r_pc = otp->ot_trampoline; 381 rp->r_npc = rp->r_pc +4; 382 trampolined = 1; 383 } 384 385 if (otp->ot_prot & OT_DATA_ACCESS) { 386 aflt->flt_prot = AFLT_PROT_ACCESS; 387 otp->ot_trap |= OT_DATA_ACCESS; 388 rp->r_pc = otp->ot_trampoline; 389 rp->r_npc = rp->r_pc + 4; 390 trampolined = 1; 391 /* 392 * for peek and caut_gets 393 * errors are expected 394 */ 395 hp = (ddi_acc_hdl_t *)otp->ot_handle; 396 if (!hp) 397 *expected = DDI_FM_ERR_PEEK; 398 else if (hp->ah_acc.devacc_attr_access == 399 DDI_CAUTIOUS_ACC) 400 *expected = DDI_FM_ERR_EXPECTED; 401 } 402 } else if (curthread->t_lofault) { 403 aflt->flt_prot = AFLT_PROT_COPY; 404 rp->r_g1 = EFAULT; 405 rp->r_pc = curthread->t_lofault; 406 rp->r_npc = rp->r_pc + 4; 407 trampolined = 1; 408 } 409 410 return (trampolined); 411 } 412 413 /* 414 * Queue one event. 415 */ 416 static void 417 cpu_queue_one_event(errh_async_flt_t *errh_fltp) 418 { 419 struct async_flt *aflt = (struct async_flt *)errh_fltp; 420 errorq_t *eqp; 421 422 if (aflt->flt_panic) 423 eqp = ue_queue; 424 else 425 eqp = ce_queue; 426 427 errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t), 428 aflt->flt_panic); 429 } 430 431 /* 432 * The cpu_async_log_err() function is called by the ce/ue_drain() function to 433 * handle logging for CPU events that are dequeued. As such, it can be invoked 434 * from softint context, from AST processing in the trap() flow, or from the 435 * panic flow. We decode the CPU-specific data, and log appropriate messages. 436 */ 437 void 438 cpu_async_log_err(void *flt) 439 { 440 errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt; 441 errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er; 442 443 switch (errh_erp->desc) { 444 case ERRH_DESC_UCOR_RE: 445 if (errh_erp->attr & ERRH_ATTR_MEM) { 446 /* 447 * Turn on the PR_UE flag. The page will be 448 * scrubbed when it is freed. 449 */ 450 errh_page_retire(errh_fltp, PR_UE); 451 } 452 453 break; 454 455 case ERRH_DESC_PR_NRE: 456 case ERRH_DESC_DEF_NRE: 457 if (errh_erp->attr & ERRH_ATTR_MEM) { 458 /* 459 * For non-resumable memory error, retire 460 * the page here. 461 */ 462 errh_page_retire(errh_fltp, PR_UE); 463 464 /* 465 * If we are going to panic, scrub the page first 466 */ 467 if (errh_fltp->cmn_asyncflt.flt_panic) 468 mem_scrub(errh_fltp->errh_er.ra, 469 errh_fltp->errh_er.sz); 470 } 471 break; 472 473 default: 474 break; 475 } 476 } 477 478 /* 479 * Called from ce_drain(). 480 */ 481 void 482 cpu_ce_log_err(struct async_flt *aflt) 483 { 484 switch (aflt->flt_class) { 485 case CPU_FAULT: 486 cpu_async_log_err(aflt); 487 break; 488 489 case BUS_FAULT: 490 cpu_async_log_err(aflt); 491 break; 492 493 default: 494 break; 495 } 496 } 497 498 /* 499 * Called from ue_drain(). 500 */ 501 void 502 cpu_ue_log_err(struct async_flt *aflt) 503 { 504 switch (aflt->flt_class) { 505 case CPU_FAULT: 506 cpu_async_log_err(aflt); 507 break; 508 509 case BUS_FAULT: 510 cpu_async_log_err(aflt); 511 break; 512 513 default: 514 break; 515 } 516 } 517 518 /* 519 * Turn on flag on the error memory region. 520 */ 521 static void 522 errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag) 523 { 524 uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 525 uint64_t flt_real_addr_end = flt_real_addr_start + 526 errh_fltp->errh_er.sz - 1; 527 int64_t current_addr; 528 529 if (errh_fltp->errh_er.sz == 0) 530 return; 531 532 for (current_addr = flt_real_addr_start; 533 current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 534 (void) page_retire(current_addr, flag); 535 } 536 } 537 538 void 539 mem_scrub(uint64_t paddr, uint64_t len) 540 { 541 uint64_t pa, length, scrubbed_len; 542 543 pa = paddr; 544 length = len; 545 scrubbed_len = 0; 546 547 while (length > 0) { 548 if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK) 549 break; 550 551 pa += scrubbed_len; 552 length -= scrubbed_len; 553 } 554 } 555 556 void 557 mem_sync(caddr_t va, size_t len) 558 { 559 uint64_t pa, length, flushed; 560 561 pa = va_to_pa((caddr_t)va); 562 563 if (pa == (uint64_t)-1) 564 return; 565 566 length = len; 567 flushed = 0; 568 569 while (length > 0) { 570 if (hv_mem_sync(pa, length, &flushed) != H_EOK) 571 break; 572 573 pa += flushed; 574 length -= flushed; 575 } 576 } 577 578 /* 579 * If resumable queue is full, we need to check if any cpu is in 580 * error state. If not, we drive on. If yes, we need to panic. The 581 * hypervisor call hv_cpu_state() is being used for checking the 582 * cpu state. 583 */ 584 static void 585 errh_rq_full(struct async_flt *afltp) 586 { 587 processorid_t who; 588 uint64_t cpu_state; 589 uint64_t retval; 590 591 for (who = 0; who < NCPU; who++) 592 if (CPU_IN_SET(cpu_ready_set, who)) { 593 retval = hv_cpu_state(who, &cpu_state); 594 if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) { 595 afltp->flt_panic = 1; 596 break; 597 } 598 } 599 } 600 601 /* 602 * Return processor specific async error structure 603 * size used. 604 */ 605 int 606 cpu_aflt_size(void) 607 { 608 return (sizeof (errh_async_flt_t)); 609 } 610 611 #define SZ_TO_ETRS_SHIFT 6 612 613 /* 614 * Message print out when resumable queue is overflown 615 */ 616 /*ARGSUSED*/ 617 void 618 rq_overflow(struct regs *rp, uint64_t head_offset, 619 uint64_t tail_offset) 620 { 621 rq_overflow_count++; 622 } 623 624 /* 625 * Handler to process a fatal error. This routine can be called from a 626 * softint, called from trap()'s AST handling, or called from the panic flow. 627 */ 628 /*ARGSUSED*/ 629 static void 630 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 631 { 632 cpu_ue_log_err(aflt); 633 } 634 635 /* 636 * Handler to process a correctable error. This routine can be called from a 637 * softint. We just call the CPU module's logging routine. 638 */ 639 /*ARGSUSED*/ 640 static void 641 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 642 { 643 cpu_ce_log_err(aflt); 644 } 645 646 /* 647 * Handler to process vbsc hostshutdown (power-off button). 648 */ 649 static int 650 err_shutdown_softintr() 651 { 652 cmn_err(CE_WARN, "Power-off requested, system will now shutdown."); 653 do_shutdown(); 654 655 /* 656 * just in case do_shutdown() fails 657 */ 658 (void) timeout((void(*)(void *))power_down, NULL, 100 * hz); 659 return (DDI_INTR_CLAIMED); 660 } 661 662 /* 663 * Allocate error queue sizes based on max_ncpus. max_ncpus is set just 664 * after ncpunode has been determined. ncpus is set in start_other_cpus 665 * which is called after error_init() but may change dynamically. 666 */ 667 void 668 error_init(void) 669 { 670 char tmp_name[MAXSYSNAME]; 671 pnode_t node; 672 size_t size = cpu_aflt_size(); 673 674 /* 675 * Initialize the correctable and uncorrectable error queues. 676 */ 677 ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL, 678 MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL); 679 680 ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL, 681 MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0); 682 683 if (ue_queue == NULL || ce_queue == NULL) 684 panic("failed to create required system error queue"); 685 686 /* 687 * Setup interrupt handler for power-off button. 688 */ 689 err_shutdown_inum = add_softintr(PIL_9, 690 (softintrfunc)err_shutdown_softintr, NULL); 691 692 /* 693 * Initialize the busfunc list mutex. This must be a PIL_15 spin lock 694 * because we will need to acquire it from cpu_async_error(). 695 */ 696 mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15); 697 698 node = prom_rootnode(); 699 if ((node == OBP_NONODE) || (node == OBP_BADNODE)) { 700 cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node); 701 return; 702 } 703 704 if (((size = prom_getproplen(node, "reset-reason")) != -1) && 705 (size <= MAXSYSNAME) && 706 (prom_getprop(node, "reset-reason", tmp_name) != -1)) { 707 if (reset_debug) { 708 cmn_err(CE_CONT, "System booting after %s\n", tmp_name); 709 } else if (strncmp(tmp_name, "FATAL", 5) == 0) { 710 cmn_err(CE_CONT, 711 "System booting after fatal error %s\n", tmp_name); 712 } 713 } 714 } 715 716 /* 717 * Nonresumable queue is full, panic here 718 */ 719 /*ARGSUSED*/ 720 void 721 nrq_overflow(struct regs *rp) 722 { 723 fm_panic("Nonresumable queue full"); 724 } 725