1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Machine check exception handling. 4 * 5 * Copyright 2013 IBM Corporation 6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 7 */ 8 9 #undef DEBUG 10 #define pr_fmt(fmt) "mce: " fmt 11 12 #include <linux/hardirq.h> 13 #include <linux/types.h> 14 #include <linux/ptrace.h> 15 #include <linux/percpu.h> 16 #include <linux/export.h> 17 #include <linux/irq_work.h> 18 19 #include <asm/machdep.h> 20 #include <asm/mce.h> 21 #include <asm/nmi.h> 22 23 static DEFINE_PER_CPU(int, mce_nest_count); 24 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 25 26 /* Queue for delayed MCE events. */ 27 static DEFINE_PER_CPU(int, mce_queue_count); 28 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 29 30 /* Queue for delayed MCE UE events. */ 31 static DEFINE_PER_CPU(int, mce_ue_count); 32 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 33 mce_ue_event_queue); 34 35 static void machine_check_process_queued_event(struct irq_work *work); 36 void machine_check_ue_event(struct machine_check_event *evt); 37 static void machine_process_ue_event(struct work_struct *work); 38 39 static struct irq_work mce_event_process_work = { 40 .func = machine_check_process_queued_event, 41 }; 42 43 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 44 45 static void mce_set_error_info(struct machine_check_event *mce, 46 struct mce_error_info *mce_err) 47 { 48 mce->error_type = mce_err->error_type; 49 switch (mce_err->error_type) { 50 case MCE_ERROR_TYPE_UE: 51 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 52 break; 53 case MCE_ERROR_TYPE_SLB: 54 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 55 break; 56 case MCE_ERROR_TYPE_ERAT: 57 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 58 break; 59 case MCE_ERROR_TYPE_TLB: 60 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 61 break; 62 case MCE_ERROR_TYPE_USER: 63 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 64 break; 65 case MCE_ERROR_TYPE_RA: 66 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 67 break; 68 case MCE_ERROR_TYPE_LINK: 69 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 70 break; 71 case MCE_ERROR_TYPE_UNKNOWN: 72 default: 73 break; 74 } 75 } 76 77 /* 78 * Decode and save high level MCE information into per cpu buffer which 79 * is an array of machine_check_event structure. 80 */ 81 void save_mce_event(struct pt_regs *regs, long handled, 82 struct mce_error_info *mce_err, 83 uint64_t nip, uint64_t addr, uint64_t phys_addr) 84 { 85 int index = __this_cpu_inc_return(mce_nest_count) - 1; 86 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 87 88 /* 89 * Return if we don't have enough space to log mce event. 90 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 91 * the check below will stop buffer overrun. 92 */ 93 if (index >= MAX_MC_EVT) 94 return; 95 96 /* Populate generic machine check info */ 97 mce->version = MCE_V1; 98 mce->srr0 = nip; 99 mce->srr1 = regs->msr; 100 mce->gpr3 = regs->gpr[3]; 101 mce->in_use = 1; 102 mce->cpu = get_paca()->paca_index; 103 104 /* Mark it recovered if we have handled it and MSR(RI=1). */ 105 if (handled && (regs->msr & MSR_RI)) 106 mce->disposition = MCE_DISPOSITION_RECOVERED; 107 else 108 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 109 110 mce->initiator = mce_err->initiator; 111 mce->severity = mce_err->severity; 112 mce->sync_error = mce_err->sync_error; 113 mce->error_class = mce_err->error_class; 114 115 /* 116 * Populate the mce error_type and type-specific error_type. 117 */ 118 mce_set_error_info(mce, mce_err); 119 120 if (!addr) 121 return; 122 123 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 124 mce->u.tlb_error.effective_address_provided = true; 125 mce->u.tlb_error.effective_address = addr; 126 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 127 mce->u.slb_error.effective_address_provided = true; 128 mce->u.slb_error.effective_address = addr; 129 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 130 mce->u.erat_error.effective_address_provided = true; 131 mce->u.erat_error.effective_address = addr; 132 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 133 mce->u.user_error.effective_address_provided = true; 134 mce->u.user_error.effective_address = addr; 135 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 136 mce->u.ra_error.effective_address_provided = true; 137 mce->u.ra_error.effective_address = addr; 138 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 139 mce->u.link_error.effective_address_provided = true; 140 mce->u.link_error.effective_address = addr; 141 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 142 mce->u.ue_error.effective_address_provided = true; 143 mce->u.ue_error.effective_address = addr; 144 if (phys_addr != ULONG_MAX) { 145 mce->u.ue_error.physical_address_provided = true; 146 mce->u.ue_error.physical_address = phys_addr; 147 machine_check_ue_event(mce); 148 } 149 } 150 return; 151 } 152 153 /* 154 * get_mce_event: 155 * mce Pointer to machine_check_event structure to be filled. 156 * release Flag to indicate whether to free the event slot or not. 157 * 0 <= do not release the mce event. Caller will invoke 158 * release_mce_event() once event has been consumed. 159 * 1 <= release the slot. 160 * 161 * return 1 = success 162 * 0 = failure 163 * 164 * get_mce_event() will be called by platform specific machine check 165 * handle routine and in KVM. 166 * When we call get_mce_event(), we are still in interrupt context and 167 * preemption will not be scheduled until ret_from_expect() routine 168 * is called. 169 */ 170 int get_mce_event(struct machine_check_event *mce, bool release) 171 { 172 int index = __this_cpu_read(mce_nest_count) - 1; 173 struct machine_check_event *mc_evt; 174 int ret = 0; 175 176 /* Sanity check */ 177 if (index < 0) 178 return ret; 179 180 /* Check if we have MCE info to process. */ 181 if (index < MAX_MC_EVT) { 182 mc_evt = this_cpu_ptr(&mce_event[index]); 183 /* Copy the event structure and release the original */ 184 if (mce) 185 *mce = *mc_evt; 186 if (release) 187 mc_evt->in_use = 0; 188 ret = 1; 189 } 190 /* Decrement the count to free the slot. */ 191 if (release) 192 __this_cpu_dec(mce_nest_count); 193 194 return ret; 195 } 196 197 void release_mce_event(void) 198 { 199 get_mce_event(NULL, true); 200 } 201 202 203 /* 204 * Queue up the MCE event which then can be handled later. 205 */ 206 void machine_check_ue_event(struct machine_check_event *evt) 207 { 208 int index; 209 210 index = __this_cpu_inc_return(mce_ue_count) - 1; 211 /* If queue is full, just return for now. */ 212 if (index >= MAX_MC_EVT) { 213 __this_cpu_dec(mce_ue_count); 214 return; 215 } 216 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 217 218 /* Queue work to process this event later. */ 219 schedule_work(&mce_ue_event_work); 220 } 221 222 /* 223 * Queue up the MCE event which then can be handled later. 224 */ 225 void machine_check_queue_event(void) 226 { 227 int index; 228 struct machine_check_event evt; 229 230 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 231 return; 232 233 index = __this_cpu_inc_return(mce_queue_count) - 1; 234 /* If queue is full, just return for now. */ 235 if (index >= MAX_MC_EVT) { 236 __this_cpu_dec(mce_queue_count); 237 return; 238 } 239 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 240 241 /* Queue irq work to process this event later. */ 242 irq_work_queue(&mce_event_process_work); 243 } 244 /* 245 * process pending MCE event from the mce event queue. This function will be 246 * called during syscall exit. 247 */ 248 static void machine_process_ue_event(struct work_struct *work) 249 { 250 int index; 251 struct machine_check_event *evt; 252 253 while (__this_cpu_read(mce_ue_count) > 0) { 254 index = __this_cpu_read(mce_ue_count) - 1; 255 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 256 #ifdef CONFIG_MEMORY_FAILURE 257 /* 258 * This should probably queued elsewhere, but 259 * oh! well 260 */ 261 if (evt->error_type == MCE_ERROR_TYPE_UE) { 262 if (evt->u.ue_error.physical_address_provided) { 263 unsigned long pfn; 264 265 pfn = evt->u.ue_error.physical_address >> 266 PAGE_SHIFT; 267 memory_failure(pfn, 0); 268 } else 269 pr_warn("Failed to identify bad address from " 270 "where the uncorrectable error (UE) " 271 "was generated\n"); 272 } 273 #endif 274 __this_cpu_dec(mce_ue_count); 275 } 276 } 277 /* 278 * process pending MCE event from the mce event queue. This function will be 279 * called during syscall exit. 280 */ 281 static void machine_check_process_queued_event(struct irq_work *work) 282 { 283 int index; 284 struct machine_check_event *evt; 285 286 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 287 288 /* 289 * For now just print it to console. 290 * TODO: log this error event to FSP or nvram. 291 */ 292 while (__this_cpu_read(mce_queue_count) > 0) { 293 index = __this_cpu_read(mce_queue_count) - 1; 294 evt = this_cpu_ptr(&mce_event_queue[index]); 295 machine_check_print_event_info(evt, false, false); 296 __this_cpu_dec(mce_queue_count); 297 } 298 } 299 300 void machine_check_print_event_info(struct machine_check_event *evt, 301 bool user_mode, bool in_guest) 302 { 303 const char *level, *sevstr, *subtype, *err_type; 304 uint64_t ea = 0, pa = 0; 305 int n = 0; 306 char dar_str[50]; 307 char pa_str[50]; 308 static const char *mc_ue_types[] = { 309 "Indeterminate", 310 "Instruction fetch", 311 "Page table walk ifetch", 312 "Load/Store", 313 "Page table walk Load/Store", 314 }; 315 static const char *mc_slb_types[] = { 316 "Indeterminate", 317 "Parity", 318 "Multihit", 319 }; 320 static const char *mc_erat_types[] = { 321 "Indeterminate", 322 "Parity", 323 "Multihit", 324 }; 325 static const char *mc_tlb_types[] = { 326 "Indeterminate", 327 "Parity", 328 "Multihit", 329 }; 330 static const char *mc_user_types[] = { 331 "Indeterminate", 332 "tlbie(l) invalid", 333 }; 334 static const char *mc_ra_types[] = { 335 "Indeterminate", 336 "Instruction fetch (bad)", 337 "Instruction fetch (foreign)", 338 "Page table walk ifetch (bad)", 339 "Page table walk ifetch (foreign)", 340 "Load (bad)", 341 "Store (bad)", 342 "Page table walk Load/Store (bad)", 343 "Page table walk Load/Store (foreign)", 344 "Load/Store (foreign)", 345 }; 346 static const char *mc_link_types[] = { 347 "Indeterminate", 348 "Instruction fetch (timeout)", 349 "Page table walk ifetch (timeout)", 350 "Load (timeout)", 351 "Store (timeout)", 352 "Page table walk Load/Store (timeout)", 353 }; 354 static const char *mc_error_class[] = { 355 "Unknown", 356 "Hardware error", 357 "Probable Hardware error (some chance of software cause)", 358 "Software error", 359 "Probable Software error (some chance of hardware cause)", 360 }; 361 362 /* Print things out */ 363 if (evt->version != MCE_V1) { 364 pr_err("Machine Check Exception, Unknown event version %d !\n", 365 evt->version); 366 return; 367 } 368 switch (evt->severity) { 369 case MCE_SEV_NO_ERROR: 370 level = KERN_INFO; 371 sevstr = "Harmless"; 372 break; 373 case MCE_SEV_WARNING: 374 level = KERN_WARNING; 375 sevstr = "Warning"; 376 break; 377 case MCE_SEV_SEVERE: 378 level = KERN_ERR; 379 sevstr = "Severe"; 380 break; 381 case MCE_SEV_FATAL: 382 default: 383 level = KERN_ERR; 384 sevstr = "Fatal"; 385 break; 386 } 387 388 switch (evt->error_type) { 389 case MCE_ERROR_TYPE_UE: 390 err_type = "UE"; 391 subtype = evt->u.ue_error.ue_error_type < 392 ARRAY_SIZE(mc_ue_types) ? 393 mc_ue_types[evt->u.ue_error.ue_error_type] 394 : "Unknown"; 395 if (evt->u.ue_error.effective_address_provided) 396 ea = evt->u.ue_error.effective_address; 397 if (evt->u.ue_error.physical_address_provided) 398 pa = evt->u.ue_error.physical_address; 399 break; 400 case MCE_ERROR_TYPE_SLB: 401 err_type = "SLB"; 402 subtype = evt->u.slb_error.slb_error_type < 403 ARRAY_SIZE(mc_slb_types) ? 404 mc_slb_types[evt->u.slb_error.slb_error_type] 405 : "Unknown"; 406 if (evt->u.slb_error.effective_address_provided) 407 ea = evt->u.slb_error.effective_address; 408 break; 409 case MCE_ERROR_TYPE_ERAT: 410 err_type = "ERAT"; 411 subtype = evt->u.erat_error.erat_error_type < 412 ARRAY_SIZE(mc_erat_types) ? 413 mc_erat_types[evt->u.erat_error.erat_error_type] 414 : "Unknown"; 415 if (evt->u.erat_error.effective_address_provided) 416 ea = evt->u.erat_error.effective_address; 417 break; 418 case MCE_ERROR_TYPE_TLB: 419 err_type = "TLB"; 420 subtype = evt->u.tlb_error.tlb_error_type < 421 ARRAY_SIZE(mc_tlb_types) ? 422 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 423 : "Unknown"; 424 if (evt->u.tlb_error.effective_address_provided) 425 ea = evt->u.tlb_error.effective_address; 426 break; 427 case MCE_ERROR_TYPE_USER: 428 err_type = "User"; 429 subtype = evt->u.user_error.user_error_type < 430 ARRAY_SIZE(mc_user_types) ? 431 mc_user_types[evt->u.user_error.user_error_type] 432 : "Unknown"; 433 if (evt->u.user_error.effective_address_provided) 434 ea = evt->u.user_error.effective_address; 435 break; 436 case MCE_ERROR_TYPE_RA: 437 err_type = "Real address"; 438 subtype = evt->u.ra_error.ra_error_type < 439 ARRAY_SIZE(mc_ra_types) ? 440 mc_ra_types[evt->u.ra_error.ra_error_type] 441 : "Unknown"; 442 if (evt->u.ra_error.effective_address_provided) 443 ea = evt->u.ra_error.effective_address; 444 break; 445 case MCE_ERROR_TYPE_LINK: 446 err_type = "Link"; 447 subtype = evt->u.link_error.link_error_type < 448 ARRAY_SIZE(mc_link_types) ? 449 mc_link_types[evt->u.link_error.link_error_type] 450 : "Unknown"; 451 if (evt->u.link_error.effective_address_provided) 452 ea = evt->u.link_error.effective_address; 453 break; 454 default: 455 case MCE_ERROR_TYPE_UNKNOWN: 456 err_type = "Unknown"; 457 subtype = ""; 458 break; 459 } 460 461 dar_str[0] = pa_str[0] = '\0'; 462 if (ea && evt->srr0 != ea) { 463 /* Load/Store address */ 464 n = sprintf(dar_str, "DAR: %016llx ", ea); 465 if (pa) 466 sprintf(dar_str + n, "paddr: %016llx ", pa); 467 } else if (pa) { 468 sprintf(pa_str, " paddr: %016llx", pa); 469 } 470 471 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", 472 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", 473 err_type, subtype, dar_str, 474 evt->disposition == MCE_DISPOSITION_RECOVERED ? 475 "Recovered" : "Not recovered"); 476 477 if (in_guest || user_mode) { 478 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", 479 level, evt->cpu, current->pid, current->comm, 480 in_guest ? "Guest " : "", evt->srr0, pa_str); 481 } else { 482 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", 483 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); 484 } 485 486 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? 487 mc_error_class[evt->error_class] : "Unknown"; 488 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); 489 } 490 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 491 492 /* 493 * This function is called in real mode. Strictly no printk's please. 494 * 495 * regs->nip and regs->msr contains srr0 and ssr1. 496 */ 497 long machine_check_early(struct pt_regs *regs) 498 { 499 long handled = 0; 500 501 hv_nmi_check_nonrecoverable(regs); 502 503 /* 504 * See if platform is capable of handling machine check. 505 */ 506 if (ppc_md.machine_check_early) 507 handled = ppc_md.machine_check_early(regs); 508 return handled; 509 } 510 511 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 512 static enum { 513 DTRIG_UNKNOWN, 514 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 515 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 516 } hmer_debug_trig_function; 517 518 static int init_debug_trig_function(void) 519 { 520 int pvr; 521 struct device_node *cpun; 522 struct property *prop = NULL; 523 const char *str; 524 525 /* First look in the device tree */ 526 preempt_disable(); 527 cpun = of_get_cpu_node(smp_processor_id(), NULL); 528 if (cpun) { 529 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 530 prop, str) { 531 if (strcmp(str, "bit17-vector-ci-load") == 0) 532 hmer_debug_trig_function = DTRIG_VECTOR_CI; 533 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 534 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 535 } 536 of_node_put(cpun); 537 } 538 preempt_enable(); 539 540 /* If we found the property, don't look at PVR */ 541 if (prop) 542 goto out; 543 544 pvr = mfspr(SPRN_PVR); 545 /* Check for POWER9 Nimbus (scale-out) */ 546 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 547 /* DD2.2 and later */ 548 if ((pvr & 0xfff) >= 0x202) 549 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 550 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 551 else if ((pvr & 0xfff) >= 0x200) 552 hmer_debug_trig_function = DTRIG_VECTOR_CI; 553 } 554 555 out: 556 switch (hmer_debug_trig_function) { 557 case DTRIG_VECTOR_CI: 558 pr_debug("HMI debug trigger used for vector CI load\n"); 559 break; 560 case DTRIG_SUSPEND_ESCAPE: 561 pr_debug("HMI debug trigger used for TM suspend escape\n"); 562 break; 563 default: 564 break; 565 } 566 return 0; 567 } 568 __initcall(init_debug_trig_function); 569 570 /* 571 * Handle HMIs that occur as a result of a debug trigger. 572 * Return values: 573 * -1 means this is not a HMI cause that we know about 574 * 0 means no further handling is required 575 * 1 means further handling is required 576 */ 577 long hmi_handle_debugtrig(struct pt_regs *regs) 578 { 579 unsigned long hmer = mfspr(SPRN_HMER); 580 long ret = 0; 581 582 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 583 if (!((hmer & HMER_DEBUG_TRIG) 584 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 585 return -1; 586 587 hmer &= ~HMER_DEBUG_TRIG; 588 /* HMER is a write-AND register */ 589 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 590 591 switch (hmer_debug_trig_function) { 592 case DTRIG_VECTOR_CI: 593 /* 594 * Now to avoid problems with soft-disable we 595 * only do the emulation if we are coming from 596 * host user space 597 */ 598 if (regs && user_mode(regs)) 599 ret = local_paca->hmi_p9_special_emu = 1; 600 601 break; 602 603 default: 604 break; 605 } 606 607 /* 608 * See if any other HMI causes remain to be handled 609 */ 610 if (hmer & mfspr(SPRN_HMEER)) 611 return -1; 612 613 return ret; 614 } 615 616 /* 617 * Return values: 618 */ 619 long hmi_exception_realmode(struct pt_regs *regs) 620 { 621 int ret; 622 623 __this_cpu_inc(irq_stat.hmi_exceptions); 624 625 ret = hmi_handle_debugtrig(regs); 626 if (ret >= 0) 627 return ret; 628 629 wait_for_subcore_guest_exit(); 630 631 if (ppc_md.hmi_exception_early) 632 ppc_md.hmi_exception_early(regs); 633 634 wait_for_tb_resync(); 635 636 return 1; 637 } 638