1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Machine check exception handling. 4 * 5 * Copyright 2013 IBM Corporation 6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 7 */ 8 9 #undef DEBUG 10 #define pr_fmt(fmt) "mce: " fmt 11 12 #include <linux/hardirq.h> 13 #include <linux/types.h> 14 #include <linux/ptrace.h> 15 #include <linux/percpu.h> 16 #include <linux/export.h> 17 #include <linux/irq_work.h> 18 19 #include <asm/machdep.h> 20 #include <asm/mce.h> 21 #include <asm/nmi.h> 22 23 static DEFINE_PER_CPU(int, mce_nest_count); 24 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 25 26 /* Queue for delayed MCE events. */ 27 static DEFINE_PER_CPU(int, mce_queue_count); 28 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 29 30 /* Queue for delayed MCE UE events. */ 31 static DEFINE_PER_CPU(int, mce_ue_count); 32 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 33 mce_ue_event_queue); 34 35 static void machine_check_process_queued_event(struct irq_work *work); 36 static void machine_check_ue_irq_work(struct irq_work *work); 37 static void machine_check_ue_event(struct machine_check_event *evt); 38 static void machine_process_ue_event(struct work_struct *work); 39 40 static struct irq_work mce_event_process_work = { 41 .func = machine_check_process_queued_event, 42 }; 43 44 static struct irq_work mce_ue_event_irq_work = { 45 .func = machine_check_ue_irq_work, 46 }; 47 48 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 49 50 static void mce_set_error_info(struct machine_check_event *mce, 51 struct mce_error_info *mce_err) 52 { 53 mce->error_type = mce_err->error_type; 54 switch (mce_err->error_type) { 55 case MCE_ERROR_TYPE_UE: 56 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 57 break; 58 case MCE_ERROR_TYPE_SLB: 59 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 60 break; 61 case MCE_ERROR_TYPE_ERAT: 62 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 63 break; 64 case MCE_ERROR_TYPE_TLB: 65 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 66 break; 67 case MCE_ERROR_TYPE_USER: 68 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 69 break; 70 case MCE_ERROR_TYPE_RA: 71 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 72 break; 73 case MCE_ERROR_TYPE_LINK: 74 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 75 break; 76 case MCE_ERROR_TYPE_UNKNOWN: 77 default: 78 break; 79 } 80 } 81 82 /* 83 * Decode and save high level MCE information into per cpu buffer which 84 * is an array of machine_check_event structure. 85 */ 86 void save_mce_event(struct pt_regs *regs, long handled, 87 struct mce_error_info *mce_err, 88 uint64_t nip, uint64_t addr, uint64_t phys_addr) 89 { 90 int index = __this_cpu_inc_return(mce_nest_count) - 1; 91 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 92 93 /* 94 * Return if we don't have enough space to log mce event. 95 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 96 * the check below will stop buffer overrun. 97 */ 98 if (index >= MAX_MC_EVT) 99 return; 100 101 /* Populate generic machine check info */ 102 mce->version = MCE_V1; 103 mce->srr0 = nip; 104 mce->srr1 = regs->msr; 105 mce->gpr3 = regs->gpr[3]; 106 mce->in_use = 1; 107 mce->cpu = get_paca()->paca_index; 108 109 /* Mark it recovered if we have handled it and MSR(RI=1). */ 110 if (handled && (regs->msr & MSR_RI)) 111 mce->disposition = MCE_DISPOSITION_RECOVERED; 112 else 113 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 114 115 mce->initiator = mce_err->initiator; 116 mce->severity = mce_err->severity; 117 mce->sync_error = mce_err->sync_error; 118 mce->error_class = mce_err->error_class; 119 120 /* 121 * Populate the mce error_type and type-specific error_type. 122 */ 123 mce_set_error_info(mce, mce_err); 124 125 if (!addr) 126 return; 127 128 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 129 mce->u.tlb_error.effective_address_provided = true; 130 mce->u.tlb_error.effective_address = addr; 131 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 132 mce->u.slb_error.effective_address_provided = true; 133 mce->u.slb_error.effective_address = addr; 134 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 135 mce->u.erat_error.effective_address_provided = true; 136 mce->u.erat_error.effective_address = addr; 137 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 138 mce->u.user_error.effective_address_provided = true; 139 mce->u.user_error.effective_address = addr; 140 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 141 mce->u.ra_error.effective_address_provided = true; 142 mce->u.ra_error.effective_address = addr; 143 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 144 mce->u.link_error.effective_address_provided = true; 145 mce->u.link_error.effective_address = addr; 146 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 147 mce->u.ue_error.effective_address_provided = true; 148 mce->u.ue_error.effective_address = addr; 149 if (phys_addr != ULONG_MAX) { 150 mce->u.ue_error.physical_address_provided = true; 151 mce->u.ue_error.physical_address = phys_addr; 152 mce->u.ue_error.ignore_event = mce_err->ignore_event; 153 machine_check_ue_event(mce); 154 } 155 } 156 return; 157 } 158 159 /* 160 * get_mce_event: 161 * mce Pointer to machine_check_event structure to be filled. 162 * release Flag to indicate whether to free the event slot or not. 163 * 0 <= do not release the mce event. Caller will invoke 164 * release_mce_event() once event has been consumed. 165 * 1 <= release the slot. 166 * 167 * return 1 = success 168 * 0 = failure 169 * 170 * get_mce_event() will be called by platform specific machine check 171 * handle routine and in KVM. 172 * When we call get_mce_event(), we are still in interrupt context and 173 * preemption will not be scheduled until ret_from_expect() routine 174 * is called. 175 */ 176 int get_mce_event(struct machine_check_event *mce, bool release) 177 { 178 int index = __this_cpu_read(mce_nest_count) - 1; 179 struct machine_check_event *mc_evt; 180 int ret = 0; 181 182 /* Sanity check */ 183 if (index < 0) 184 return ret; 185 186 /* Check if we have MCE info to process. */ 187 if (index < MAX_MC_EVT) { 188 mc_evt = this_cpu_ptr(&mce_event[index]); 189 /* Copy the event structure and release the original */ 190 if (mce) 191 *mce = *mc_evt; 192 if (release) 193 mc_evt->in_use = 0; 194 ret = 1; 195 } 196 /* Decrement the count to free the slot. */ 197 if (release) 198 __this_cpu_dec(mce_nest_count); 199 200 return ret; 201 } 202 203 void release_mce_event(void) 204 { 205 get_mce_event(NULL, true); 206 } 207 208 static void machine_check_ue_irq_work(struct irq_work *work) 209 { 210 schedule_work(&mce_ue_event_work); 211 } 212 213 /* 214 * Queue up the MCE event which then can be handled later. 215 */ 216 static void machine_check_ue_event(struct machine_check_event *evt) 217 { 218 int index; 219 220 index = __this_cpu_inc_return(mce_ue_count) - 1; 221 /* If queue is full, just return for now. */ 222 if (index >= MAX_MC_EVT) { 223 __this_cpu_dec(mce_ue_count); 224 return; 225 } 226 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 227 228 /* Queue work to process this event later. */ 229 irq_work_queue(&mce_ue_event_irq_work); 230 } 231 232 /* 233 * Queue up the MCE event which then can be handled later. 234 */ 235 void machine_check_queue_event(void) 236 { 237 int index; 238 struct machine_check_event evt; 239 240 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 241 return; 242 243 index = __this_cpu_inc_return(mce_queue_count) - 1; 244 /* If queue is full, just return for now. */ 245 if (index >= MAX_MC_EVT) { 246 __this_cpu_dec(mce_queue_count); 247 return; 248 } 249 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 250 251 /* Queue irq work to process this event later. */ 252 irq_work_queue(&mce_event_process_work); 253 } 254 /* 255 * process pending MCE event from the mce event queue. This function will be 256 * called during syscall exit. 257 */ 258 static void machine_process_ue_event(struct work_struct *work) 259 { 260 int index; 261 struct machine_check_event *evt; 262 263 while (__this_cpu_read(mce_ue_count) > 0) { 264 index = __this_cpu_read(mce_ue_count) - 1; 265 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 266 #ifdef CONFIG_MEMORY_FAILURE 267 /* 268 * This should probably queued elsewhere, but 269 * oh! well 270 * 271 * Don't report this machine check because the caller has a 272 * asked us to ignore the event, it has a fixup handler which 273 * will do the appropriate error handling and reporting. 274 */ 275 if (evt->error_type == MCE_ERROR_TYPE_UE) { 276 if (evt->u.ue_error.ignore_event) { 277 __this_cpu_dec(mce_ue_count); 278 continue; 279 } 280 281 if (evt->u.ue_error.physical_address_provided) { 282 unsigned long pfn; 283 284 pfn = evt->u.ue_error.physical_address >> 285 PAGE_SHIFT; 286 memory_failure(pfn, 0); 287 } else 288 pr_warn("Failed to identify bad address from " 289 "where the uncorrectable error (UE) " 290 "was generated\n"); 291 } 292 #endif 293 __this_cpu_dec(mce_ue_count); 294 } 295 } 296 /* 297 * process pending MCE event from the mce event queue. This function will be 298 * called during syscall exit. 299 */ 300 static void machine_check_process_queued_event(struct irq_work *work) 301 { 302 int index; 303 struct machine_check_event *evt; 304 305 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 306 307 /* 308 * For now just print it to console. 309 * TODO: log this error event to FSP or nvram. 310 */ 311 while (__this_cpu_read(mce_queue_count) > 0) { 312 index = __this_cpu_read(mce_queue_count) - 1; 313 evt = this_cpu_ptr(&mce_event_queue[index]); 314 315 if (evt->error_type == MCE_ERROR_TYPE_UE && 316 evt->u.ue_error.ignore_event) { 317 __this_cpu_dec(mce_queue_count); 318 continue; 319 } 320 machine_check_print_event_info(evt, false, false); 321 __this_cpu_dec(mce_queue_count); 322 } 323 } 324 325 void machine_check_print_event_info(struct machine_check_event *evt, 326 bool user_mode, bool in_guest) 327 { 328 const char *level, *sevstr, *subtype, *err_type; 329 uint64_t ea = 0, pa = 0; 330 int n = 0; 331 char dar_str[50]; 332 char pa_str[50]; 333 static const char *mc_ue_types[] = { 334 "Indeterminate", 335 "Instruction fetch", 336 "Page table walk ifetch", 337 "Load/Store", 338 "Page table walk Load/Store", 339 }; 340 static const char *mc_slb_types[] = { 341 "Indeterminate", 342 "Parity", 343 "Multihit", 344 }; 345 static const char *mc_erat_types[] = { 346 "Indeterminate", 347 "Parity", 348 "Multihit", 349 }; 350 static const char *mc_tlb_types[] = { 351 "Indeterminate", 352 "Parity", 353 "Multihit", 354 }; 355 static const char *mc_user_types[] = { 356 "Indeterminate", 357 "tlbie(l) invalid", 358 }; 359 static const char *mc_ra_types[] = { 360 "Indeterminate", 361 "Instruction fetch (bad)", 362 "Instruction fetch (foreign)", 363 "Page table walk ifetch (bad)", 364 "Page table walk ifetch (foreign)", 365 "Load (bad)", 366 "Store (bad)", 367 "Page table walk Load/Store (bad)", 368 "Page table walk Load/Store (foreign)", 369 "Load/Store (foreign)", 370 }; 371 static const char *mc_link_types[] = { 372 "Indeterminate", 373 "Instruction fetch (timeout)", 374 "Page table walk ifetch (timeout)", 375 "Load (timeout)", 376 "Store (timeout)", 377 "Page table walk Load/Store (timeout)", 378 }; 379 static const char *mc_error_class[] = { 380 "Unknown", 381 "Hardware error", 382 "Probable Hardware error (some chance of software cause)", 383 "Software error", 384 "Probable Software error (some chance of hardware cause)", 385 }; 386 387 /* Print things out */ 388 if (evt->version != MCE_V1) { 389 pr_err("Machine Check Exception, Unknown event version %d !\n", 390 evt->version); 391 return; 392 } 393 switch (evt->severity) { 394 case MCE_SEV_NO_ERROR: 395 level = KERN_INFO; 396 sevstr = "Harmless"; 397 break; 398 case MCE_SEV_WARNING: 399 level = KERN_WARNING; 400 sevstr = "Warning"; 401 break; 402 case MCE_SEV_SEVERE: 403 level = KERN_ERR; 404 sevstr = "Severe"; 405 break; 406 case MCE_SEV_FATAL: 407 default: 408 level = KERN_ERR; 409 sevstr = "Fatal"; 410 break; 411 } 412 413 switch (evt->error_type) { 414 case MCE_ERROR_TYPE_UE: 415 err_type = "UE"; 416 subtype = evt->u.ue_error.ue_error_type < 417 ARRAY_SIZE(mc_ue_types) ? 418 mc_ue_types[evt->u.ue_error.ue_error_type] 419 : "Unknown"; 420 if (evt->u.ue_error.effective_address_provided) 421 ea = evt->u.ue_error.effective_address; 422 if (evt->u.ue_error.physical_address_provided) 423 pa = evt->u.ue_error.physical_address; 424 break; 425 case MCE_ERROR_TYPE_SLB: 426 err_type = "SLB"; 427 subtype = evt->u.slb_error.slb_error_type < 428 ARRAY_SIZE(mc_slb_types) ? 429 mc_slb_types[evt->u.slb_error.slb_error_type] 430 : "Unknown"; 431 if (evt->u.slb_error.effective_address_provided) 432 ea = evt->u.slb_error.effective_address; 433 break; 434 case MCE_ERROR_TYPE_ERAT: 435 err_type = "ERAT"; 436 subtype = evt->u.erat_error.erat_error_type < 437 ARRAY_SIZE(mc_erat_types) ? 438 mc_erat_types[evt->u.erat_error.erat_error_type] 439 : "Unknown"; 440 if (evt->u.erat_error.effective_address_provided) 441 ea = evt->u.erat_error.effective_address; 442 break; 443 case MCE_ERROR_TYPE_TLB: 444 err_type = "TLB"; 445 subtype = evt->u.tlb_error.tlb_error_type < 446 ARRAY_SIZE(mc_tlb_types) ? 447 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 448 : "Unknown"; 449 if (evt->u.tlb_error.effective_address_provided) 450 ea = evt->u.tlb_error.effective_address; 451 break; 452 case MCE_ERROR_TYPE_USER: 453 err_type = "User"; 454 subtype = evt->u.user_error.user_error_type < 455 ARRAY_SIZE(mc_user_types) ? 456 mc_user_types[evt->u.user_error.user_error_type] 457 : "Unknown"; 458 if (evt->u.user_error.effective_address_provided) 459 ea = evt->u.user_error.effective_address; 460 break; 461 case MCE_ERROR_TYPE_RA: 462 err_type = "Real address"; 463 subtype = evt->u.ra_error.ra_error_type < 464 ARRAY_SIZE(mc_ra_types) ? 465 mc_ra_types[evt->u.ra_error.ra_error_type] 466 : "Unknown"; 467 if (evt->u.ra_error.effective_address_provided) 468 ea = evt->u.ra_error.effective_address; 469 break; 470 case MCE_ERROR_TYPE_LINK: 471 err_type = "Link"; 472 subtype = evt->u.link_error.link_error_type < 473 ARRAY_SIZE(mc_link_types) ? 474 mc_link_types[evt->u.link_error.link_error_type] 475 : "Unknown"; 476 if (evt->u.link_error.effective_address_provided) 477 ea = evt->u.link_error.effective_address; 478 break; 479 default: 480 case MCE_ERROR_TYPE_UNKNOWN: 481 err_type = "Unknown"; 482 subtype = ""; 483 break; 484 } 485 486 dar_str[0] = pa_str[0] = '\0'; 487 if (ea && evt->srr0 != ea) { 488 /* Load/Store address */ 489 n = sprintf(dar_str, "DAR: %016llx ", ea); 490 if (pa) 491 sprintf(dar_str + n, "paddr: %016llx ", pa); 492 } else if (pa) { 493 sprintf(pa_str, " paddr: %016llx", pa); 494 } 495 496 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", 497 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", 498 err_type, subtype, dar_str, 499 evt->disposition == MCE_DISPOSITION_RECOVERED ? 500 "Recovered" : "Not recovered"); 501 502 if (in_guest || user_mode) { 503 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", 504 level, evt->cpu, current->pid, current->comm, 505 in_guest ? "Guest " : "", evt->srr0, pa_str); 506 } else { 507 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", 508 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); 509 } 510 511 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? 512 mc_error_class[evt->error_class] : "Unknown"; 513 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); 514 } 515 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 516 517 /* 518 * This function is called in real mode. Strictly no printk's please. 519 * 520 * regs->nip and regs->msr contains srr0 and ssr1. 521 */ 522 long machine_check_early(struct pt_regs *regs) 523 { 524 long handled = 0; 525 526 hv_nmi_check_nonrecoverable(regs); 527 528 /* 529 * See if platform is capable of handling machine check. 530 */ 531 if (ppc_md.machine_check_early) 532 handled = ppc_md.machine_check_early(regs); 533 return handled; 534 } 535 536 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 537 static enum { 538 DTRIG_UNKNOWN, 539 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 540 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 541 } hmer_debug_trig_function; 542 543 static int init_debug_trig_function(void) 544 { 545 int pvr; 546 struct device_node *cpun; 547 struct property *prop = NULL; 548 const char *str; 549 550 /* First look in the device tree */ 551 preempt_disable(); 552 cpun = of_get_cpu_node(smp_processor_id(), NULL); 553 if (cpun) { 554 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 555 prop, str) { 556 if (strcmp(str, "bit17-vector-ci-load") == 0) 557 hmer_debug_trig_function = DTRIG_VECTOR_CI; 558 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 559 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 560 } 561 of_node_put(cpun); 562 } 563 preempt_enable(); 564 565 /* If we found the property, don't look at PVR */ 566 if (prop) 567 goto out; 568 569 pvr = mfspr(SPRN_PVR); 570 /* Check for POWER9 Nimbus (scale-out) */ 571 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 572 /* DD2.2 and later */ 573 if ((pvr & 0xfff) >= 0x202) 574 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 575 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 576 else if ((pvr & 0xfff) >= 0x200) 577 hmer_debug_trig_function = DTRIG_VECTOR_CI; 578 } 579 580 out: 581 switch (hmer_debug_trig_function) { 582 case DTRIG_VECTOR_CI: 583 pr_debug("HMI debug trigger used for vector CI load\n"); 584 break; 585 case DTRIG_SUSPEND_ESCAPE: 586 pr_debug("HMI debug trigger used for TM suspend escape\n"); 587 break; 588 default: 589 break; 590 } 591 return 0; 592 } 593 __initcall(init_debug_trig_function); 594 595 /* 596 * Handle HMIs that occur as a result of a debug trigger. 597 * Return values: 598 * -1 means this is not a HMI cause that we know about 599 * 0 means no further handling is required 600 * 1 means further handling is required 601 */ 602 long hmi_handle_debugtrig(struct pt_regs *regs) 603 { 604 unsigned long hmer = mfspr(SPRN_HMER); 605 long ret = 0; 606 607 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 608 if (!((hmer & HMER_DEBUG_TRIG) 609 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 610 return -1; 611 612 hmer &= ~HMER_DEBUG_TRIG; 613 /* HMER is a write-AND register */ 614 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 615 616 switch (hmer_debug_trig_function) { 617 case DTRIG_VECTOR_CI: 618 /* 619 * Now to avoid problems with soft-disable we 620 * only do the emulation if we are coming from 621 * host user space 622 */ 623 if (regs && user_mode(regs)) 624 ret = local_paca->hmi_p9_special_emu = 1; 625 626 break; 627 628 default: 629 break; 630 } 631 632 /* 633 * See if any other HMI causes remain to be handled 634 */ 635 if (hmer & mfspr(SPRN_HMEER)) 636 return -1; 637 638 return ret; 639 } 640 641 /* 642 * Return values: 643 */ 644 long hmi_exception_realmode(struct pt_regs *regs) 645 { 646 int ret; 647 648 __this_cpu_inc(irq_stat.hmi_exceptions); 649 650 ret = hmi_handle_debugtrig(regs); 651 if (ret >= 0) 652 return ret; 653 654 wait_for_subcore_guest_exit(); 655 656 if (ppc_md.hmi_exception_early) 657 ppc_md.hmi_exception_early(regs); 658 659 wait_for_tb_resync(); 660 661 return 1; 662 } 663