1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/machsystm.h> 31 #include <sys/cpuvar.h> 32 #include <sys/async.h> 33 #include <sys/ontrap.h> 34 #include <sys/ddifm.h> 35 #include <sys/hypervisor_api.h> 36 #include <sys/errorq.h> 37 #include <sys/promif.h> 38 #include <sys/prom_plat.h> 39 #include <sys/x_call.h> 40 #include <sys/error.h> 41 #include <sys/fm/util.h> 42 #include <sys/ivintr.h> 43 44 #define MAX_CE_FLTS 10 45 #define MAX_ASYNC_FLTS 6 46 47 errorq_t *ue_queue; /* queue of uncorrectable errors */ 48 errorq_t *ce_queue; /* queue of correctable errors */ 49 50 /* 51 * Being used by memory test driver. 52 * ce_verbose_memory - covers CEs in DIMMs 53 * ce_verbose_other - covers "others" (ecache, IO, etc.) 54 * 55 * If the value is 0, nothing is logged. 56 * If the value is 1, the error is logged to the log file, but not console. 57 * If the value is 2, the error is logged to the log file and console. 58 */ 59 int ce_verbose_memory = 1; 60 int ce_verbose_other = 1; 61 62 int ce_show_data = 0; 63 int ce_debug = 0; 64 int ue_debug = 0; 65 int reset_debug = 0; 66 67 /* 68 * Tunables for controlling the handling of asynchronous faults (AFTs). Setting 69 * these to non-default values on a non-DEBUG kernel is NOT supported. 70 */ 71 int aft_verbose = 0; /* log AFT messages > 1 to log only */ 72 int aft_panic = 0; /* panic (not reboot) on fatal usermode AFLT */ 73 int aft_testfatal = 0; /* force all AFTs to panic immediately */ 74 75 /* 76 * Used for vbsc hostshutdown (power-off buton) 77 */ 78 int err_shutdown_triggered = 0; /* only once */ 79 uint_t err_shutdown_inum = 0; /* used to pull the trigger */ 80 81 /* 82 * Defined in bus_func.c but initialised in error_init 83 */ 84 extern kmutex_t bfd_lock; 85 86 static uint32_t rq_overflow_count = 0; /* counter for rq overflow */ 87 88 static void cpu_queue_one_event(errh_async_flt_t *); 89 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t); 90 static void errh_page_retire(errh_async_flt_t *, uchar_t); 91 static int errh_error_protected(struct regs *, struct async_flt *, int *); 92 static void errh_rq_full(struct async_flt *); 93 static void ue_drain(void *, struct async_flt *, errorq_elem_t *); 94 static void ce_drain(void *, struct async_flt *, errorq_elem_t *); 95 96 /*ARGSUSED*/ 97 void 98 process_resumable_error(struct regs *rp, uint32_t head_offset, 99 uint32_t tail_offset) 100 { 101 struct machcpu *mcpup; 102 struct async_flt *aflt; 103 errh_async_flt_t errh_flt; 104 errh_er_t *head_va; 105 106 mcpup = &(CPU->cpu_m); 107 108 while (head_offset != tail_offset) { 109 /* kernel buffer starts right after the resumable queue */ 110 head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset + 111 CPU_RQ_SIZE); 112 /* Copy the error report to local buffer */ 113 bzero(&errh_flt, sizeof (errh_async_flt_t)); 114 bcopy((char *)head_va, &(errh_flt.errh_er), 115 sizeof (errh_er_t)); 116 117 /* Increment the queue head */ 118 head_offset += Q_ENTRY_SIZE; 119 /* Wrap around */ 120 head_offset &= (CPU_RQ_SIZE - 1); 121 122 /* set error handle to zero so it can hold new error report */ 123 head_va->ehdl = 0; 124 125 switch (errh_flt.errh_er.desc) { 126 case ERRH_DESC_UCOR_RE: 127 break; 128 129 case ERRH_DESC_WARN_RE: 130 /* 131 * Power-off requested, but handle it one time only. 132 */ 133 if (!err_shutdown_triggered) { 134 setsoftint(err_shutdown_inum); 135 ++err_shutdown_triggered; 136 } 137 continue; 138 139 default: 140 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 141 " invalid in resumable error handler", 142 (long long) errh_flt.errh_er.desc); 143 continue; 144 } 145 146 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 147 aflt->flt_id = gethrtime(); 148 aflt->flt_bus_id = getprocessorid(); 149 aflt->flt_class = CPU_FAULT; 150 aflt->flt_prot = AFLT_PROT_NONE; 151 aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK) 152 >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV); 153 154 if (errh_flt.errh_er.attr & ERRH_ATTR_CPU) 155 /* If it is an error on other cpu */ 156 aflt->flt_panic = 1; 157 else 158 aflt->flt_panic = 0; 159 160 /* 161 * Handle resumable queue full case. 162 */ 163 if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) { 164 (void) errh_rq_full(aflt); 165 } 166 167 /* 168 * Queue the error on ce or ue queue depend on flt_panic. 169 * Even if flt_panic is set, the code still keep processing 170 * the rest element on rq until the panic starts. 171 */ 172 (void) cpu_queue_one_event(&errh_flt); 173 174 /* 175 * Panic here if aflt->flt_panic has been set. 176 * Enqueued errors will be logged as part of the panic flow. 177 */ 178 if (aflt->flt_panic) { 179 fm_panic("Unrecoverable error on another CPU"); 180 } 181 } 182 } 183 184 void 185 process_nonresumable_error(struct regs *rp, uint64_t tl, 186 uint32_t head_offset, uint32_t tail_offset) 187 { 188 struct machcpu *mcpup; 189 struct async_flt *aflt; 190 errh_async_flt_t errh_flt; 191 errh_er_t *head_va; 192 int trampolined = 0; 193 int expected = DDI_FM_ERR_UNEXPECTED; 194 uint64_t exec_mode; 195 196 mcpup = &(CPU->cpu_m); 197 198 while (head_offset != tail_offset) { 199 /* kernel buffer starts right after the nonresumable queue */ 200 head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset + 201 CPU_NRQ_SIZE); 202 203 /* Copy the error report to local buffer */ 204 bzero(&errh_flt, sizeof (errh_async_flt_t)); 205 206 bcopy((char *)head_va, &(errh_flt.errh_er), 207 sizeof (errh_er_t)); 208 209 /* Increment the queue head */ 210 head_offset += Q_ENTRY_SIZE; 211 /* Wrap around */ 212 head_offset &= (CPU_NRQ_SIZE - 1); 213 214 /* set error handle to zero so it can hold new error report */ 215 head_va->ehdl = 0; 216 217 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 218 219 trampolined = 0; 220 221 if (errh_flt.errh_er.attr & ERRH_ATTR_PIO) 222 aflt->flt_class = BUS_FAULT; 223 else 224 aflt->flt_class = CPU_FAULT; 225 226 aflt->flt_id = gethrtime(); 227 aflt->flt_bus_id = getprocessorid(); 228 aflt->flt_pc = (caddr_t)rp->r_pc; 229 exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK) 230 >> ERRH_MODE_SHIFT; 231 aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV || 232 exec_mode == ERRH_MODE_UNKNOWN); 233 aflt->flt_tl = (uchar_t)tl; 234 aflt->flt_prot = AFLT_PROT_NONE; 235 aflt->flt_panic = ((aflt->flt_tl != 0) || 236 (aft_testfatal != 0)); 237 238 switch (errh_flt.errh_er.desc) { 239 case ERRH_DESC_PR_NRE: 240 /* 241 * Fall through, precise fault also need to check 242 * to see if it was protected. 243 */ 244 245 case ERRH_DESC_DEF_NRE: 246 /* 247 * If the trap occurred in privileged mode at TL=0, 248 * we need to check to see if we were executing 249 * in kernel under on_trap() or t_lofault 250 * protection. If so, modify the saved registers 251 * so that we return from the trap to the 252 * appropriate trampoline routine. 253 */ 254 if (aflt->flt_priv == 1 && aflt->flt_tl == 0) 255 trampolined = 256 errh_error_protected(rp, aflt, &expected); 257 258 if (!aflt->flt_priv || aflt->flt_prot == 259 AFLT_PROT_COPY) { 260 aflt->flt_panic |= aft_panic; 261 } else if (!trampolined && 262 aflt->flt_class != BUS_FAULT) { 263 aflt->flt_panic = 1; 264 } 265 266 /* 267 * If PIO error, we need to query the bus nexus 268 * for fatal errors. 269 */ 270 if (aflt->flt_class == BUS_FAULT) { 271 aflt->flt_addr = errh_flt.errh_er.ra; 272 errh_cpu_run_bus_error_handlers(aflt, 273 expected); 274 } 275 276 break; 277 278 default: 279 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 280 " invalid in nonresumable error handler", 281 (long long) errh_flt.errh_er.desc); 282 continue; 283 } 284 285 /* 286 * Queue the error report for further processing. If 287 * flt_panic is set, code still process other errors 288 * in the queue until the panic routine stops the 289 * kernel. 290 */ 291 (void) cpu_queue_one_event(&errh_flt); 292 293 /* 294 * Panic here if aflt->flt_panic has been set. 295 * Enqueued errors will be logged as part of the panic flow. 296 */ 297 if (aflt->flt_panic) { 298 fm_panic("Unrecoverable hardware error"); 299 } 300 301 /* 302 * Call page_retire() to handle memory errors. 303 */ 304 if (errh_flt.errh_er.attr & ERRH_ATTR_MEM) 305 errh_page_retire(&errh_flt, PR_UE); 306 307 /* 308 * If we queued an error and the it was in user mode or 309 * protected by t_lofault, 310 * set AST flag so the queue will be drained before 311 * returning to user mode. 312 */ 313 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 314 int pcb_flag = 0; 315 316 if (aflt->flt_class == CPU_FAULT) 317 pcb_flag |= ASYNC_HWERR; 318 else if (aflt->flt_class == BUS_FAULT) 319 pcb_flag |= ASYNC_BERR; 320 321 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 322 aston(curthread); 323 } 324 } 325 } 326 327 /* 328 * For PIO errors, this routine calls nexus driver's error 329 * callback routines. If the callback routine returns fatal, and 330 * we are in kernel or unknow mode without any error protection, 331 * we need to turn on the panic flag. 332 */ 333 void 334 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 335 { 336 int status; 337 ddi_fm_error_t de; 338 339 bzero(&de, sizeof (ddi_fm_error_t)); 340 341 de.fme_version = DDI_FME_VERSION; 342 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 343 de.fme_flag = expected; 344 de.fme_bus_specific = (void *)aflt->flt_addr; 345 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 346 347 /* 348 * If error is protected, it will jump to proper routine 349 * to handle the handle; if it is in user level, we just 350 * kill the user process; if the driver thinks the error is 351 * not fatal, we can drive on. If none of above are true, 352 * we panic 353 */ 354 if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) && 355 (status == DDI_FM_FATAL)) 356 aflt->flt_panic = 1; 357 } 358 359 /* 360 * This routine checks to see if we are under any error protection when 361 * the error happens. If we are under error protection, we unwind to 362 * the protection and indicate fault. 363 */ 364 static int 365 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected) 366 { 367 int trampolined = 0; 368 ddi_acc_hdl_t *hp; 369 370 if (curthread->t_ontrap != NULL) { 371 on_trap_data_t *otp = curthread->t_ontrap; 372 373 if (otp->ot_prot & OT_DATA_EC) { 374 aflt->flt_prot = AFLT_PROT_EC; 375 otp->ot_trap |= OT_DATA_EC; 376 rp->r_pc = otp->ot_trampoline; 377 rp->r_npc = rp->r_pc +4; 378 trampolined = 1; 379 } 380 381 if (otp->ot_prot & OT_DATA_ACCESS) { 382 aflt->flt_prot = AFLT_PROT_ACCESS; 383 otp->ot_trap |= OT_DATA_ACCESS; 384 rp->r_pc = otp->ot_trampoline; 385 rp->r_npc = rp->r_pc + 4; 386 trampolined = 1; 387 /* 388 * for peek and caut_gets 389 * errors are expected 390 */ 391 hp = (ddi_acc_hdl_t *)otp->ot_handle; 392 if (!hp) 393 *expected = DDI_FM_ERR_PEEK; 394 else if (hp->ah_acc.devacc_attr_access == 395 DDI_CAUTIOUS_ACC) 396 *expected = DDI_FM_ERR_EXPECTED; 397 } 398 } else if (curthread->t_lofault) { 399 aflt->flt_prot = AFLT_PROT_COPY; 400 rp->r_g1 = EFAULT; 401 rp->r_pc = curthread->t_lofault; 402 rp->r_npc = rp->r_pc + 4; 403 trampolined = 1; 404 } 405 406 return (trampolined); 407 } 408 409 /* 410 * Queue one event. 411 */ 412 static void 413 cpu_queue_one_event(errh_async_flt_t *errh_fltp) 414 { 415 struct async_flt *aflt = (struct async_flt *)errh_fltp; 416 errorq_t *eqp; 417 418 if (aflt->flt_panic) 419 eqp = ue_queue; 420 else 421 eqp = ce_queue; 422 423 errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t), 424 aflt->flt_panic); 425 } 426 427 /* 428 * The cpu_async_log_err() function is called by the ce/ue_drain() function to 429 * handle logging for CPU events that are dequeued. As such, it can be invoked 430 * from softint context, from AST processing in the trap() flow, or from the 431 * panic flow. We decode the CPU-specific data, and log appropriate messages. 432 */ 433 void 434 cpu_async_log_err(void *flt) 435 { 436 errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt; 437 errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er; 438 439 switch (errh_erp->desc) { 440 case ERRH_DESC_UCOR_RE: 441 if (errh_erp->attr & ERRH_ATTR_MEM) { 442 /* 443 * Turn on the PR_UE flag. The page will be 444 * scrubbed when it is freed. 445 */ 446 errh_page_retire(errh_fltp, PR_UE); 447 } 448 449 break; 450 451 case ERRH_DESC_PR_NRE: 452 case ERRH_DESC_DEF_NRE: 453 if (errh_erp->attr & ERRH_ATTR_MEM) { 454 /* 455 * For non-resumable memory error, retire 456 * the page here. 457 */ 458 errh_page_retire(errh_fltp, PR_UE); 459 460 /* 461 * If we are going to panic, scrub the page first 462 */ 463 if (errh_fltp->cmn_asyncflt.flt_panic) 464 mem_scrub(errh_fltp->errh_er.ra, 465 errh_fltp->errh_er.sz); 466 } 467 break; 468 469 default: 470 break; 471 } 472 } 473 474 /* 475 * Called from ce_drain(). 476 */ 477 void 478 cpu_ce_log_err(struct async_flt *aflt) 479 { 480 switch (aflt->flt_class) { 481 case CPU_FAULT: 482 cpu_async_log_err(aflt); 483 break; 484 485 case BUS_FAULT: 486 cpu_async_log_err(aflt); 487 break; 488 489 default: 490 break; 491 } 492 } 493 494 /* 495 * Called from ue_drain(). 496 */ 497 void 498 cpu_ue_log_err(struct async_flt *aflt) 499 { 500 switch (aflt->flt_class) { 501 case CPU_FAULT: 502 cpu_async_log_err(aflt); 503 break; 504 505 case BUS_FAULT: 506 cpu_async_log_err(aflt); 507 break; 508 509 default: 510 break; 511 } 512 } 513 514 /* 515 * Turn on flag on the error memory region. 516 */ 517 static void 518 errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag) 519 { 520 uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 521 uint64_t flt_real_addr_end = flt_real_addr_start + 522 errh_fltp->errh_er.sz - 1; 523 int64_t current_addr; 524 525 if (errh_fltp->errh_er.sz == 0) 526 return; 527 528 for (current_addr = flt_real_addr_start; 529 current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 530 (void) page_retire(current_addr, flag); 531 } 532 } 533 534 void 535 mem_scrub(uint64_t paddr, uint64_t len) 536 { 537 uint64_t pa, length, scrubbed_len; 538 539 pa = paddr; 540 length = len; 541 scrubbed_len = 0; 542 543 while (length > 0) { 544 if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK) 545 break; 546 547 pa += scrubbed_len; 548 length -= scrubbed_len; 549 } 550 } 551 552 void 553 mem_sync(caddr_t va, size_t len) 554 { 555 uint64_t pa, length, flushed; 556 557 pa = va_to_pa((caddr_t)va); 558 559 if (pa == (uint64_t)-1) 560 return; 561 562 length = len; 563 flushed = 0; 564 565 while (length > 0) { 566 if (hv_mem_sync(pa, length, &flushed) != H_EOK) 567 break; 568 569 pa += flushed; 570 length -= flushed; 571 } 572 } 573 574 /* 575 * If resumable queue is full, we need to check if any cpu is in 576 * error state. If not, we drive on. If yes, we need to panic. The 577 * hypervisor call hv_cpu_state() is being used for checking the 578 * cpu state. 579 */ 580 static void 581 errh_rq_full(struct async_flt *afltp) 582 { 583 processorid_t who; 584 uint64_t cpu_state; 585 uint64_t retval; 586 587 for (who = 0; who < NCPU; who++) 588 if (CPU_IN_SET(cpu_ready_set, who)) { 589 retval = hv_cpu_state(who, &cpu_state); 590 if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) { 591 afltp->flt_panic = 1; 592 break; 593 } 594 } 595 } 596 597 /* 598 * Return processor specific async error structure 599 * size used. 600 */ 601 int 602 cpu_aflt_size(void) 603 { 604 return (sizeof (errh_async_flt_t)); 605 } 606 607 #define SZ_TO_ETRS_SHIFT 6 608 609 /* 610 * Message print out when resumable queue is overflown 611 */ 612 /*ARGSUSED*/ 613 void 614 rq_overflow(struct regs *rp, uint64_t head_offset, 615 uint64_t tail_offset) 616 { 617 rq_overflow_count++; 618 } 619 620 /* 621 * Handler to process a fatal error. This routine can be called from a 622 * softint, called from trap()'s AST handling, or called from the panic flow. 623 */ 624 /*ARGSUSED*/ 625 static void 626 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 627 { 628 cpu_ue_log_err(aflt); 629 } 630 631 /* 632 * Handler to process a correctable error. This routine can be called from a 633 * softint. We just call the CPU module's logging routine. 634 */ 635 /*ARGSUSED*/ 636 static void 637 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 638 { 639 cpu_ce_log_err(aflt); 640 } 641 642 /* 643 * Handler to process vbsc hostshutdown (power-off button). 644 */ 645 static int 646 err_shutdown_softintr() 647 { 648 cmn_err(CE_WARN, "Power-off requested, system will now shutdown."); 649 do_shutdown(); 650 651 /* 652 * just in case do_shutdown() fails 653 */ 654 (void) timeout((void(*)(void *))power_down, NULL, 100 * hz); 655 return (DDI_INTR_CLAIMED); 656 } 657 658 /* 659 * Allocate error queue sizes based on max_ncpus. max_ncpus is set just 660 * after ncpunode has been determined. ncpus is set in start_other_cpus 661 * which is called after error_init() but may change dynamically. 662 */ 663 void 664 error_init(void) 665 { 666 char tmp_name[MAXSYSNAME]; 667 pnode_t node; 668 size_t size = cpu_aflt_size(); 669 670 /* 671 * Initialize the correctable and uncorrectable error queues. 672 */ 673 ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL, 674 MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL); 675 676 ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL, 677 MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0); 678 679 if (ue_queue == NULL || ce_queue == NULL) 680 panic("failed to create required system error queue"); 681 682 /* 683 * Setup interrupt handler for power-off button. 684 */ 685 err_shutdown_inum = add_softintr(PIL_9, 686 (softintrfunc)err_shutdown_softintr, NULL); 687 688 /* 689 * Initialize the busfunc list mutex. This must be a PIL_15 spin lock 690 * because we will need to acquire it from cpu_async_error(). 691 */ 692 mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15); 693 694 node = prom_rootnode(); 695 if ((node == OBP_NONODE) || (node == OBP_BADNODE)) { 696 cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node); 697 return; 698 } 699 700 if (((size = prom_getproplen(node, "reset-reason")) != -1) && 701 (size <= MAXSYSNAME) && 702 (prom_getprop(node, "reset-reason", tmp_name) != -1)) { 703 if (reset_debug) { 704 cmn_err(CE_CONT, "System booting after %s\n", tmp_name); 705 } else if (strncmp(tmp_name, "FATAL", 5) == 0) { 706 cmn_err(CE_CONT, 707 "System booting after fatal error %s\n", tmp_name); 708 } 709 } 710 } 711 712 /* 713 * Nonresumable queue is full, panic here 714 */ 715 /*ARGSUSED*/ 716 void 717 nrq_overflow(struct regs *rp) 718 { 719 fm_panic("Nonresumable queue full"); 720 } 721