1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/machsystm.h> 31 #include <sys/cpuvar.h> 32 #include <sys/async.h> 33 #include <sys/ontrap.h> 34 #include <sys/ddifm.h> 35 #include <sys/hypervisor_api.h> 36 #include <sys/errorq.h> 37 #include <sys/promif.h> 38 #include <sys/prom_plat.h> 39 #include <sys/x_call.h> 40 #include <sys/error.h> 41 #include <sys/fm/util.h> 42 43 #define MAX_CE_FLTS 10 44 #define MAX_ASYNC_FLTS 6 45 46 errorq_t *ue_queue; /* queue of uncorrectable errors */ 47 errorq_t *ce_queue; /* queue of correctable errors */ 48 49 /* 50 * Being used by memory test driver. 51 * ce_verbose_memory - covers CEs in DIMMs 52 * ce_verbose_other - covers "others" (ecache, IO, etc.) 53 * 54 * If the value is 0, nothing is logged. 55 * If the value is 1, the error is logged to the log file, but not console. 56 * If the value is 2, the error is logged to the log file and console. 57 */ 58 int ce_verbose_memory = 1; 59 int ce_verbose_other = 1; 60 61 int ce_show_data = 0; 62 int ce_debug = 0; 63 int ue_debug = 0; 64 int reset_debug = 0; 65 66 /* 67 * Tunables for controlling the handling of asynchronous faults (AFTs). Setting 68 * these to non-default values on a non-DEBUG kernel is NOT supported. 69 */ 70 int aft_verbose = 0; /* log AFT messages > 1 to log only */ 71 int aft_panic = 0; /* panic (not reboot) on fatal usermode AFLT */ 72 int aft_testfatal = 0; /* force all AFTs to panic immediately */ 73 74 /* 75 * Defined in bus_func.c but initialised in error_init 76 */ 77 extern kmutex_t bfd_lock; 78 79 static uint32_t rq_overflow_count = 0; /* counter for rq overflow */ 80 81 static void cpu_queue_one_event(errh_async_flt_t *); 82 static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t); 83 static void errh_page_settoxic(errh_async_flt_t *, uchar_t); 84 static void errh_page_retire(errh_async_flt_t *); 85 static int errh_error_protected(struct regs *, struct async_flt *, int *); 86 static void errh_rq_full(struct async_flt *); 87 static void ue_drain(void *, struct async_flt *, errorq_elem_t *); 88 static void ce_drain(void *, struct async_flt *, errorq_elem_t *); 89 90 /*ARGSUSED*/ 91 void 92 process_resumable_error(struct regs *rp, uint32_t head_offset, 93 uint32_t tail_offset) 94 { 95 struct machcpu *mcpup; 96 struct async_flt *aflt; 97 errh_async_flt_t errh_flt; 98 errh_er_t *head_va; 99 100 mcpup = &(CPU->cpu_m); 101 102 while (head_offset != tail_offset) { 103 /* kernel buffer starts right after the resumable queue */ 104 head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset + 105 CPU_RQ_SIZE); 106 /* Copy the error report to local buffer */ 107 bzero(&errh_flt, sizeof (errh_async_flt_t)); 108 bcopy((char *)head_va, &(errh_flt.errh_er), 109 sizeof (errh_er_t)); 110 111 /* Increment the queue head */ 112 head_offset += Q_ENTRY_SIZE; 113 /* Wrap around */ 114 head_offset &= (CPU_RQ_SIZE - 1); 115 116 /* set error handle to zero so it can hold new error report */ 117 head_va->ehdl = 0; 118 119 switch (errh_flt.errh_er.desc) { 120 case ERRH_DESC_UCOR_RE: 121 break; 122 123 default: 124 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 125 " invalid in resumable error handler", 126 (long long) errh_flt.errh_er.desc); 127 continue; 128 } 129 130 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 131 aflt->flt_id = gethrtime(); 132 aflt->flt_bus_id = getprocessorid(); 133 aflt->flt_class = CPU_FAULT; 134 aflt->flt_prot = AFLT_PROT_NONE; 135 aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK) 136 >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV); 137 138 if (errh_flt.errh_er.attr & ERRH_ATTR_CPU) 139 /* If it is an error on other cpu */ 140 aflt->flt_panic = 1; 141 else 142 aflt->flt_panic = 0; 143 144 /* 145 * Handle resumable queue full case. 146 */ 147 if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) { 148 (void) errh_rq_full(aflt); 149 } 150 151 /* 152 * Queue the error on ce or ue queue depend on flt_panic. 153 * Even if flt_panic is set, the code still keep processing 154 * the rest element on rq until the panic starts. 155 */ 156 (void) cpu_queue_one_event(&errh_flt); 157 158 /* 159 * Panic here if aflt->flt_panic has been set. 160 * Enqueued errors will be logged as part of the panic flow. 161 */ 162 if (aflt->flt_panic) { 163 fm_panic("Unrecoverable error on another CPU"); 164 } 165 } 166 } 167 168 void 169 process_nonresumable_error(struct regs *rp, uint64_t tl, 170 uint32_t head_offset, uint32_t tail_offset) 171 { 172 struct machcpu *mcpup; 173 struct async_flt *aflt; 174 errh_async_flt_t errh_flt; 175 errh_er_t *head_va; 176 int trampolined = 0; 177 int expected = DDI_FM_ERR_UNEXPECTED; 178 uint64_t exec_mode; 179 180 mcpup = &(CPU->cpu_m); 181 182 while (head_offset != tail_offset) { 183 /* kernel buffer starts right after the nonresumable queue */ 184 head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset + 185 CPU_NRQ_SIZE); 186 187 /* Copy the error report to local buffer */ 188 bzero(&errh_flt, sizeof (errh_async_flt_t)); 189 190 bcopy((char *)head_va, &(errh_flt.errh_er), 191 sizeof (errh_er_t)); 192 193 /* Increment the queue head */ 194 head_offset += Q_ENTRY_SIZE; 195 /* Wrap around */ 196 head_offset &= (CPU_NRQ_SIZE - 1); 197 198 /* set error handle to zero so it can hold new error report */ 199 head_va->ehdl = 0; 200 201 aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 202 203 trampolined = 0; 204 205 if (errh_flt.errh_er.attr & ERRH_ATTR_PIO) 206 aflt->flt_class = BUS_FAULT; 207 else 208 aflt->flt_class = CPU_FAULT; 209 210 aflt->flt_id = gethrtime(); 211 aflt->flt_bus_id = getprocessorid(); 212 aflt->flt_pc = (caddr_t)rp->r_pc; 213 exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK) 214 >> ERRH_MODE_SHIFT; 215 aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV || 216 exec_mode == ERRH_MODE_UNKNOWN); 217 aflt->flt_tl = (uchar_t)tl; 218 aflt->flt_prot = AFLT_PROT_NONE; 219 aflt->flt_panic = ((aflt->flt_tl != 0) || 220 (aft_testfatal != 0)); 221 222 switch (errh_flt.errh_er.desc) { 223 case ERRH_DESC_PR_NRE: 224 /* 225 * Fall through, precise fault also need to check 226 * to see if it was protected. 227 */ 228 229 case ERRH_DESC_DEF_NRE: 230 /* 231 * If the trap occurred in privileged mode at TL=0, 232 * we need to check to see if we were executing 233 * in kernel under on_trap() or t_lofault 234 * protection. If so, modify the saved registers 235 * so that we return from the trap to the 236 * appropriate trampoline routine. 237 */ 238 if (aflt->flt_priv == 1 && aflt->flt_tl == 0) 239 trampolined = 240 errh_error_protected(rp, aflt, &expected); 241 242 if (!aflt->flt_priv || aflt->flt_prot == 243 AFLT_PROT_COPY) { 244 aflt->flt_panic |= aft_panic; 245 } else if (!trampolined && 246 aflt->flt_class != BUS_FAULT) { 247 aflt->flt_panic = 1; 248 } 249 250 /* 251 * If PIO error, we need to query the bus nexus 252 * for fatal errors. 253 */ 254 if (aflt->flt_class == BUS_FAULT) { 255 aflt->flt_addr = errh_flt.errh_er.ra; 256 errh_cpu_run_bus_error_handlers(aflt, 257 expected); 258 } 259 260 break; 261 262 default: 263 cmn_err(CE_WARN, "Error Descriptor 0x%llx " 264 " invalid in nonresumable error handler", 265 (long long) errh_flt.errh_er.desc); 266 continue; 267 } 268 269 /* 270 * Queue the error report for further processing. If 271 * flt_panic is set, code still process other errors 272 * in the queue until the panic routine stops the 273 * kernel. 274 */ 275 (void) cpu_queue_one_event(&errh_flt); 276 277 /* 278 * Panic here if aflt->flt_panic has been set. 279 * Enqueued errors will be logged as part of the panic flow. 280 */ 281 if (aflt->flt_panic) { 282 fm_panic("Unrecoverable hardware error"); 283 } 284 285 /* 286 * If it is a memory error, we turn on the PAGE_IS_TOXIC 287 * flag. The page will be retired later and scrubbed when 288 * it is freed. 289 */ 290 if (errh_flt.errh_er.attr & ERRH_ATTR_MEM) 291 (void) errh_page_settoxic(&errh_flt, PAGE_IS_TOXIC); 292 293 /* 294 * If we queued an error and the it was in user mode or 295 * protected by t_lofault, 296 * set AST flag so the queue will be drained before 297 * returning to user mode. 298 */ 299 if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 300 int pcb_flag = 0; 301 302 if (aflt->flt_class == CPU_FAULT) 303 pcb_flag |= ASYNC_HWERR; 304 else if (aflt->flt_class == BUS_FAULT) 305 pcb_flag |= ASYNC_BERR; 306 307 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 308 aston(curthread); 309 } 310 } 311 } 312 313 /* 314 * For PIO errors, this routine calls nexus driver's error 315 * callback routines. If the callback routine returns fatal, and 316 * we are in kernel or unknow mode without any error protection, 317 * we need to turn on the panic flag. 318 */ 319 void 320 errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 321 { 322 int status; 323 ddi_fm_error_t de; 324 325 bzero(&de, sizeof (ddi_fm_error_t)); 326 327 de.fme_version = DDI_FME_VERSION; 328 de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 329 de.fme_flag = expected; 330 de.fme_bus_specific = (void *)aflt->flt_addr; 331 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 332 333 /* 334 * If error is protected, it will jump to proper routine 335 * to handle the handle; if it is in user level, we just 336 * kill the user process; if the driver thinks the error is 337 * not fatal, we can drive on. If none of above are true, 338 * we panic 339 */ 340 if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) && 341 (status == DDI_FM_FATAL)) 342 aflt->flt_panic = 1; 343 } 344 345 /* 346 * This routine checks to see if we are under any error protection when 347 * the error happens. If we are under error protection, we unwind to 348 * the protection and indicate fault. 349 */ 350 static int 351 errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected) 352 { 353 int trampolined = 0; 354 ddi_acc_hdl_t *hp; 355 356 if (curthread->t_ontrap != NULL) { 357 on_trap_data_t *otp = curthread->t_ontrap; 358 359 if (otp->ot_prot & OT_DATA_EC) { 360 aflt->flt_prot = AFLT_PROT_EC; 361 otp->ot_trap |= OT_DATA_EC; 362 rp->r_pc = otp->ot_trampoline; 363 rp->r_npc = rp->r_pc +4; 364 trampolined = 1; 365 } 366 367 if (otp->ot_prot & OT_DATA_ACCESS) { 368 aflt->flt_prot = AFLT_PROT_ACCESS; 369 otp->ot_trap |= OT_DATA_ACCESS; 370 rp->r_pc = otp->ot_trampoline; 371 rp->r_npc = rp->r_pc + 4; 372 trampolined = 1; 373 /* 374 * for peek and caut_gets 375 * errors are expected 376 */ 377 hp = (ddi_acc_hdl_t *)otp->ot_handle; 378 if (!hp) 379 *expected = DDI_FM_ERR_PEEK; 380 else if (hp->ah_acc.devacc_attr_access == 381 DDI_CAUTIOUS_ACC) 382 *expected = DDI_FM_ERR_EXPECTED; 383 } 384 } else if (curthread->t_lofault) { 385 aflt->flt_prot = AFLT_PROT_COPY; 386 rp->r_g1 = EFAULT; 387 rp->r_pc = curthread->t_lofault; 388 rp->r_npc = rp->r_pc + 4; 389 trampolined = 1; 390 } 391 392 return (trampolined); 393 } 394 395 /* 396 * Queue one event. 397 */ 398 static void 399 cpu_queue_one_event(errh_async_flt_t *errh_fltp) 400 { 401 struct async_flt *aflt = (struct async_flt *)errh_fltp; 402 errorq_t *eqp; 403 404 if (aflt->flt_panic) 405 eqp = ue_queue; 406 else 407 eqp = ce_queue; 408 409 errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t), 410 aflt->flt_panic); 411 } 412 413 /* 414 * The cpu_async_log_err() function is called by the ce/ue_drain() function to 415 * handle logging for CPU events that are dequeued. As such, it can be invoked 416 * from softint context, from AST processing in the trap() flow, or from the 417 * panic flow. We decode the CPU-specific data, and log appropriate messages. 418 */ 419 void 420 cpu_async_log_err(void *flt) 421 { 422 errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt; 423 errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er; 424 425 switch (errh_erp->desc) { 426 case ERRH_DESC_UCOR_RE: 427 if (errh_erp->attr & ERRH_ATTR_MEM) { 428 /* 429 * Turn on the PAGE_IS_TOXIC flag. The page will be 430 * scrubbed when it is freed. 431 */ 432 (void) errh_page_settoxic(errh_fltp, PAGE_IS_TOXIC); 433 } 434 435 break; 436 437 case ERRH_DESC_PR_NRE: 438 case ERRH_DESC_DEF_NRE: 439 if (errh_erp->attr & ERRH_ATTR_MEM) { 440 /* 441 * For non-resumable memory error, retire 442 * the page here. 443 */ 444 errh_page_retire(errh_fltp); 445 } 446 break; 447 448 default: 449 break; 450 } 451 } 452 453 /* 454 * Called from ce_drain(). 455 */ 456 void 457 cpu_ce_log_err(struct async_flt *aflt) 458 { 459 switch (aflt->flt_class) { 460 case CPU_FAULT: 461 cpu_async_log_err(aflt); 462 break; 463 464 case BUS_FAULT: 465 cpu_async_log_err(aflt); 466 break; 467 468 default: 469 break; 470 } 471 } 472 473 /* 474 * Called from ue_drain(). 475 */ 476 void 477 cpu_ue_log_err(struct async_flt *aflt) 478 { 479 switch (aflt->flt_class) { 480 case CPU_FAULT: 481 cpu_async_log_err(aflt); 482 break; 483 484 case BUS_FAULT: 485 cpu_async_log_err(aflt); 486 break; 487 488 default: 489 break; 490 } 491 } 492 493 /* 494 * Turn on flag on the error memory region. 495 */ 496 static void 497 errh_page_settoxic(errh_async_flt_t *errh_fltp, uchar_t flag) 498 { 499 page_t *pp; 500 uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 501 uint64_t flt_real_addr_end = flt_real_addr_start + 502 errh_fltp->errh_er.sz - 1; 503 int64_t current_addr; 504 505 if (errh_fltp->errh_er.sz == 0) 506 return; 507 508 for (current_addr = flt_real_addr_start; 509 current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 510 pp = page_numtopp_nolock((pfn_t) 511 (current_addr >> MMU_PAGESHIFT)); 512 513 if (pp != NULL) { 514 page_settoxic(pp, flag); 515 } 516 } 517 } 518 519 /* 520 * Retire the page(s) indicated in the error report. 521 */ 522 static void 523 errh_page_retire(errh_async_flt_t *errh_fltp) 524 { 525 page_t *pp; 526 uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 527 uint64_t flt_real_addr_end = flt_real_addr_start + 528 errh_fltp->errh_er.sz - 1; 529 int64_t current_addr; 530 531 if (errh_fltp->errh_er.sz == 0) 532 return; 533 534 for (current_addr = flt_real_addr_start; 535 current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 536 pp = page_numtopp_nolock((pfn_t) 537 (current_addr >> MMU_PAGESHIFT)); 538 539 if (pp != NULL) { 540 (void) page_retire(pp, PAGE_IS_TOXIC); 541 } 542 } 543 } 544 545 void 546 mem_scrub(uint64_t paddr, uint64_t len) 547 { 548 uint64_t pa, length, scrubbed_len; 549 uint64_t ret = H_EOK; 550 551 pa = paddr; 552 length = len; 553 scrubbed_len = 0; 554 555 while (ret == H_EOK) { 556 ret = hv_mem_scrub(pa, length, &scrubbed_len); 557 558 if (ret == H_EOK || scrubbed_len >= length) { 559 break; 560 } 561 562 pa += scrubbed_len; 563 length -= scrubbed_len; 564 } 565 } 566 567 void 568 mem_sync(caddr_t va, size_t len) 569 { 570 uint64_t pa, length, flushed; 571 uint64_t ret = H_EOK; 572 573 pa = va_to_pa((caddr_t)va); 574 575 if (pa == (uint64_t)-1) 576 return; 577 578 length = len; 579 flushed = 0; 580 581 while (ret == H_EOK) { 582 ret = hv_mem_sync(pa, length, &flushed); 583 584 if (ret == H_EOK || flushed >= length) { 585 break; 586 } 587 588 pa += flushed; 589 length -= flushed; 590 } 591 } 592 593 /* 594 * If resumable queue is full, we need to check if any cpu is in 595 * error state. If not, we drive on. If yes, we need to panic. The 596 * hypervisor call hv_cpu_state() is being used for checking the 597 * cpu state. 598 */ 599 static void 600 errh_rq_full(struct async_flt *afltp) 601 { 602 processorid_t who; 603 uint64_t cpu_state; 604 uint64_t retval; 605 606 for (who = 0; who < NCPU; who++) 607 if (CPU_IN_SET(cpu_ready_set, who)) { 608 retval = hv_cpu_state(who, &cpu_state); 609 if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) { 610 afltp->flt_panic = 1; 611 break; 612 } 613 } 614 } 615 616 /* 617 * Return processor specific async error structure 618 * size used. 619 */ 620 int 621 cpu_aflt_size(void) 622 { 623 return (sizeof (errh_async_flt_t)); 624 } 625 626 #define SZ_TO_ETRS_SHIFT 6 627 628 /* 629 * Message print out when resumable queue is overflown 630 */ 631 /*ARGSUSED*/ 632 void 633 rq_overflow(struct regs *rp, uint64_t head_offset, 634 uint64_t tail_offset) 635 { 636 rq_overflow_count++; 637 } 638 639 /* 640 * Handler to process a fatal error. This routine can be called from a 641 * softint, called from trap()'s AST handling, or called from the panic flow. 642 */ 643 /*ARGSUSED*/ 644 static void 645 ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 646 { 647 cpu_ue_log_err(aflt); 648 } 649 650 /* 651 * Handler to process a correctable error. This routine can be called from a 652 * softint. We just call the CPU module's logging routine. 653 */ 654 /*ARGSUSED*/ 655 static void 656 ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 657 { 658 cpu_ce_log_err(aflt); 659 } 660 661 /* 662 * Allocate error queue sizes based on max_ncpus. max_ncpus is set just 663 * after ncpunode has been determined. ncpus is set in start_other_cpus 664 * which is called after error_init() but may change dynamically. 665 */ 666 void 667 error_init(void) 668 { 669 char tmp_name[MAXSYSNAME]; 670 dnode_t node; 671 size_t size = cpu_aflt_size(); 672 673 /* 674 * Initialize the correctable and uncorrectable error queues. 675 */ 676 ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL, 677 MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL); 678 679 ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL, 680 MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0); 681 682 if (ue_queue == NULL || ce_queue == NULL) 683 panic("failed to create required system error queue"); 684 685 /* 686 * Initialize the busfunc list mutex. This must be a PIL_15 spin lock 687 * because we will need to acquire it from cpu_async_error(). 688 */ 689 mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15); 690 691 node = prom_rootnode(); 692 if ((node == OBP_NONODE) || (node == OBP_BADNODE)) { 693 cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node); 694 return; 695 } 696 697 if (((size = prom_getproplen(node, "reset-reason")) != -1) && 698 (size <= MAXSYSNAME) && 699 (prom_getprop(node, "reset-reason", tmp_name) != -1)) { 700 if (reset_debug) { 701 cmn_err(CE_CONT, "System booting after %s\n", tmp_name); 702 } else if (strncmp(tmp_name, "FATAL", 5) == 0) { 703 cmn_err(CE_CONT, 704 "System booting after fatal error %s\n", tmp_name); 705 } 706 } 707 } 708