1 /* 2 * Machine check exception handling. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright 2013 IBM Corporation 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 20 */ 21 22 #undef DEBUG 23 #define pr_fmt(fmt) "mce: " fmt 24 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/ptrace.h> 28 #include <linux/percpu.h> 29 #include <linux/export.h> 30 #include <linux/irq_work.h> 31 32 #include <asm/machdep.h> 33 #include <asm/mce.h> 34 #include <asm/nmi.h> 35 36 static DEFINE_PER_CPU(int, mce_nest_count); 37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 38 39 /* Queue for delayed MCE events. */ 40 static DEFINE_PER_CPU(int, mce_queue_count); 41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 42 43 /* Queue for delayed MCE UE events. */ 44 static DEFINE_PER_CPU(int, mce_ue_count); 45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 46 mce_ue_event_queue); 47 48 static void machine_check_process_queued_event(struct irq_work *work); 49 void machine_check_ue_event(struct machine_check_event *evt); 50 static void machine_process_ue_event(struct work_struct *work); 51 52 static struct irq_work mce_event_process_work = { 53 .func = machine_check_process_queued_event, 54 }; 55 56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 57 58 static void mce_set_error_info(struct machine_check_event *mce, 59 struct mce_error_info *mce_err) 60 { 61 mce->error_type = mce_err->error_type; 62 switch (mce_err->error_type) { 63 case MCE_ERROR_TYPE_UE: 64 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 65 break; 66 case MCE_ERROR_TYPE_SLB: 67 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 68 break; 69 case MCE_ERROR_TYPE_ERAT: 70 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 71 break; 72 case MCE_ERROR_TYPE_TLB: 73 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 74 break; 75 case MCE_ERROR_TYPE_USER: 76 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 77 break; 78 case MCE_ERROR_TYPE_RA: 79 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 80 break; 81 case MCE_ERROR_TYPE_LINK: 82 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 83 break; 84 case MCE_ERROR_TYPE_UNKNOWN: 85 default: 86 break; 87 } 88 } 89 90 /* 91 * Decode and save high level MCE information into per cpu buffer which 92 * is an array of machine_check_event structure. 93 */ 94 void save_mce_event(struct pt_regs *regs, long handled, 95 struct mce_error_info *mce_err, 96 uint64_t nip, uint64_t addr, uint64_t phys_addr) 97 { 98 int index = __this_cpu_inc_return(mce_nest_count) - 1; 99 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 100 101 /* 102 * Return if we don't have enough space to log mce event. 103 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 104 * the check below will stop buffer overrun. 105 */ 106 if (index >= MAX_MC_EVT) 107 return; 108 109 /* Populate generic machine check info */ 110 mce->version = MCE_V1; 111 mce->srr0 = nip; 112 mce->srr1 = regs->msr; 113 mce->gpr3 = regs->gpr[3]; 114 mce->in_use = 1; 115 mce->cpu = get_paca()->paca_index; 116 117 /* Mark it recovered if we have handled it and MSR(RI=1). */ 118 if (handled && (regs->msr & MSR_RI)) 119 mce->disposition = MCE_DISPOSITION_RECOVERED; 120 else 121 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 122 123 mce->initiator = mce_err->initiator; 124 mce->severity = mce_err->severity; 125 mce->sync_error = mce_err->sync_error; 126 127 /* 128 * Populate the mce error_type and type-specific error_type. 129 */ 130 mce_set_error_info(mce, mce_err); 131 132 if (!addr) 133 return; 134 135 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 136 mce->u.tlb_error.effective_address_provided = true; 137 mce->u.tlb_error.effective_address = addr; 138 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 139 mce->u.slb_error.effective_address_provided = true; 140 mce->u.slb_error.effective_address = addr; 141 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 142 mce->u.erat_error.effective_address_provided = true; 143 mce->u.erat_error.effective_address = addr; 144 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 145 mce->u.user_error.effective_address_provided = true; 146 mce->u.user_error.effective_address = addr; 147 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 148 mce->u.ra_error.effective_address_provided = true; 149 mce->u.ra_error.effective_address = addr; 150 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 151 mce->u.link_error.effective_address_provided = true; 152 mce->u.link_error.effective_address = addr; 153 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 154 mce->u.ue_error.effective_address_provided = true; 155 mce->u.ue_error.effective_address = addr; 156 if (phys_addr != ULONG_MAX) { 157 mce->u.ue_error.physical_address_provided = true; 158 mce->u.ue_error.physical_address = phys_addr; 159 machine_check_ue_event(mce); 160 } 161 } 162 return; 163 } 164 165 /* 166 * get_mce_event: 167 * mce Pointer to machine_check_event structure to be filled. 168 * release Flag to indicate whether to free the event slot or not. 169 * 0 <= do not release the mce event. Caller will invoke 170 * release_mce_event() once event has been consumed. 171 * 1 <= release the slot. 172 * 173 * return 1 = success 174 * 0 = failure 175 * 176 * get_mce_event() will be called by platform specific machine check 177 * handle routine and in KVM. 178 * When we call get_mce_event(), we are still in interrupt context and 179 * preemption will not be scheduled until ret_from_expect() routine 180 * is called. 181 */ 182 int get_mce_event(struct machine_check_event *mce, bool release) 183 { 184 int index = __this_cpu_read(mce_nest_count) - 1; 185 struct machine_check_event *mc_evt; 186 int ret = 0; 187 188 /* Sanity check */ 189 if (index < 0) 190 return ret; 191 192 /* Check if we have MCE info to process. */ 193 if (index < MAX_MC_EVT) { 194 mc_evt = this_cpu_ptr(&mce_event[index]); 195 /* Copy the event structure and release the original */ 196 if (mce) 197 *mce = *mc_evt; 198 if (release) 199 mc_evt->in_use = 0; 200 ret = 1; 201 } 202 /* Decrement the count to free the slot. */ 203 if (release) 204 __this_cpu_dec(mce_nest_count); 205 206 return ret; 207 } 208 209 void release_mce_event(void) 210 { 211 get_mce_event(NULL, true); 212 } 213 214 215 /* 216 * Queue up the MCE event which then can be handled later. 217 */ 218 void machine_check_ue_event(struct machine_check_event *evt) 219 { 220 int index; 221 222 index = __this_cpu_inc_return(mce_ue_count) - 1; 223 /* If queue is full, just return for now. */ 224 if (index >= MAX_MC_EVT) { 225 __this_cpu_dec(mce_ue_count); 226 return; 227 } 228 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 229 230 /* Queue work to process this event later. */ 231 schedule_work(&mce_ue_event_work); 232 } 233 234 /* 235 * Queue up the MCE event which then can be handled later. 236 */ 237 void machine_check_queue_event(void) 238 { 239 int index; 240 struct machine_check_event evt; 241 242 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 243 return; 244 245 index = __this_cpu_inc_return(mce_queue_count) - 1; 246 /* If queue is full, just return for now. */ 247 if (index >= MAX_MC_EVT) { 248 __this_cpu_dec(mce_queue_count); 249 return; 250 } 251 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 252 253 /* Queue irq work to process this event later. */ 254 irq_work_queue(&mce_event_process_work); 255 } 256 /* 257 * process pending MCE event from the mce event queue. This function will be 258 * called during syscall exit. 259 */ 260 static void machine_process_ue_event(struct work_struct *work) 261 { 262 int index; 263 struct machine_check_event *evt; 264 265 while (__this_cpu_read(mce_ue_count) > 0) { 266 index = __this_cpu_read(mce_ue_count) - 1; 267 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 268 #ifdef CONFIG_MEMORY_FAILURE 269 /* 270 * This should probably queued elsewhere, but 271 * oh! well 272 */ 273 if (evt->error_type == MCE_ERROR_TYPE_UE) { 274 if (evt->u.ue_error.physical_address_provided) { 275 unsigned long pfn; 276 277 pfn = evt->u.ue_error.physical_address >> 278 PAGE_SHIFT; 279 memory_failure(pfn, 0); 280 } else 281 pr_warn("Failed to identify bad address from " 282 "where the uncorrectable error (UE) " 283 "was generated\n"); 284 } 285 #endif 286 __this_cpu_dec(mce_ue_count); 287 } 288 } 289 /* 290 * process pending MCE event from the mce event queue. This function will be 291 * called during syscall exit. 292 */ 293 static void machine_check_process_queued_event(struct irq_work *work) 294 { 295 int index; 296 struct machine_check_event *evt; 297 298 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 299 300 /* 301 * For now just print it to console. 302 * TODO: log this error event to FSP or nvram. 303 */ 304 while (__this_cpu_read(mce_queue_count) > 0) { 305 index = __this_cpu_read(mce_queue_count) - 1; 306 evt = this_cpu_ptr(&mce_event_queue[index]); 307 machine_check_print_event_info(evt, false, false); 308 __this_cpu_dec(mce_queue_count); 309 } 310 } 311 312 void machine_check_print_event_info(struct machine_check_event *evt, 313 bool user_mode, bool in_guest) 314 { 315 const char *level, *sevstr, *subtype, *err_type; 316 uint64_t ea = 0, pa = 0; 317 int n = 0; 318 char dar_str[50]; 319 char pa_str[50]; 320 static const char *mc_ue_types[] = { 321 "Indeterminate", 322 "Instruction fetch", 323 "Page table walk ifetch", 324 "Load/Store", 325 "Page table walk Load/Store", 326 }; 327 static const char *mc_slb_types[] = { 328 "Indeterminate", 329 "Parity", 330 "Multihit", 331 }; 332 static const char *mc_erat_types[] = { 333 "Indeterminate", 334 "Parity", 335 "Multihit", 336 }; 337 static const char *mc_tlb_types[] = { 338 "Indeterminate", 339 "Parity", 340 "Multihit", 341 }; 342 static const char *mc_user_types[] = { 343 "Indeterminate", 344 "tlbie(l) invalid", 345 }; 346 static const char *mc_ra_types[] = { 347 "Indeterminate", 348 "Instruction fetch (bad)", 349 "Instruction fetch (foreign)", 350 "Page table walk ifetch (bad)", 351 "Page table walk ifetch (foreign)", 352 "Load (bad)", 353 "Store (bad)", 354 "Page table walk Load/Store (bad)", 355 "Page table walk Load/Store (foreign)", 356 "Load/Store (foreign)", 357 }; 358 static const char *mc_link_types[] = { 359 "Indeterminate", 360 "Instruction fetch (timeout)", 361 "Page table walk ifetch (timeout)", 362 "Load (timeout)", 363 "Store (timeout)", 364 "Page table walk Load/Store (timeout)", 365 }; 366 367 /* Print things out */ 368 if (evt->version != MCE_V1) { 369 pr_err("Machine Check Exception, Unknown event version %d !\n", 370 evt->version); 371 return; 372 } 373 switch (evt->severity) { 374 case MCE_SEV_NO_ERROR: 375 level = KERN_INFO; 376 sevstr = "Harmless"; 377 break; 378 case MCE_SEV_WARNING: 379 level = KERN_WARNING; 380 sevstr = "Warning"; 381 break; 382 case MCE_SEV_SEVERE: 383 level = KERN_ERR; 384 sevstr = "Severe"; 385 break; 386 case MCE_SEV_FATAL: 387 default: 388 level = KERN_ERR; 389 sevstr = "Fatal"; 390 break; 391 } 392 393 switch (evt->error_type) { 394 case MCE_ERROR_TYPE_UE: 395 err_type = "UE"; 396 subtype = evt->u.ue_error.ue_error_type < 397 ARRAY_SIZE(mc_ue_types) ? 398 mc_ue_types[evt->u.ue_error.ue_error_type] 399 : "Unknown"; 400 if (evt->u.ue_error.effective_address_provided) 401 ea = evt->u.ue_error.effective_address; 402 if (evt->u.ue_error.physical_address_provided) 403 pa = evt->u.ue_error.physical_address; 404 break; 405 case MCE_ERROR_TYPE_SLB: 406 err_type = "SLB"; 407 subtype = evt->u.slb_error.slb_error_type < 408 ARRAY_SIZE(mc_slb_types) ? 409 mc_slb_types[evt->u.slb_error.slb_error_type] 410 : "Unknown"; 411 if (evt->u.slb_error.effective_address_provided) 412 ea = evt->u.slb_error.effective_address; 413 break; 414 case MCE_ERROR_TYPE_ERAT: 415 err_type = "ERAT"; 416 subtype = evt->u.erat_error.erat_error_type < 417 ARRAY_SIZE(mc_erat_types) ? 418 mc_erat_types[evt->u.erat_error.erat_error_type] 419 : "Unknown"; 420 if (evt->u.erat_error.effective_address_provided) 421 ea = evt->u.erat_error.effective_address; 422 break; 423 case MCE_ERROR_TYPE_TLB: 424 err_type = "TLB"; 425 subtype = evt->u.tlb_error.tlb_error_type < 426 ARRAY_SIZE(mc_tlb_types) ? 427 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 428 : "Unknown"; 429 if (evt->u.tlb_error.effective_address_provided) 430 ea = evt->u.tlb_error.effective_address; 431 break; 432 case MCE_ERROR_TYPE_USER: 433 err_type = "User"; 434 subtype = evt->u.user_error.user_error_type < 435 ARRAY_SIZE(mc_user_types) ? 436 mc_user_types[evt->u.user_error.user_error_type] 437 : "Unknown"; 438 if (evt->u.user_error.effective_address_provided) 439 ea = evt->u.user_error.effective_address; 440 break; 441 case MCE_ERROR_TYPE_RA: 442 err_type = "Real address"; 443 subtype = evt->u.ra_error.ra_error_type < 444 ARRAY_SIZE(mc_ra_types) ? 445 mc_ra_types[evt->u.ra_error.ra_error_type] 446 : "Unknown"; 447 if (evt->u.ra_error.effective_address_provided) 448 ea = evt->u.ra_error.effective_address; 449 break; 450 case MCE_ERROR_TYPE_LINK: 451 err_type = "Link"; 452 subtype = evt->u.link_error.link_error_type < 453 ARRAY_SIZE(mc_link_types) ? 454 mc_link_types[evt->u.link_error.link_error_type] 455 : "Unknown"; 456 if (evt->u.link_error.effective_address_provided) 457 ea = evt->u.link_error.effective_address; 458 break; 459 default: 460 case MCE_ERROR_TYPE_UNKNOWN: 461 err_type = "Unknown"; 462 subtype = ""; 463 break; 464 } 465 466 dar_str[0] = pa_str[0] = '\0'; 467 if (ea && evt->srr0 != ea) { 468 /* Load/Store address */ 469 n = sprintf(dar_str, "DAR: %016llx ", ea); 470 if (pa) 471 sprintf(dar_str + n, "paddr: %016llx ", pa); 472 } else if (pa) { 473 sprintf(pa_str, " paddr: %016llx", pa); 474 } 475 476 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", 477 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", 478 err_type, subtype, dar_str, 479 evt->disposition == MCE_DISPOSITION_RECOVERED ? 480 "Recovered" : "Not recovered"); 481 482 if (in_guest || user_mode) { 483 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", 484 level, evt->cpu, current->pid, current->comm, 485 in_guest ? "Guest " : "", evt->srr0, pa_str); 486 } else { 487 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", 488 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); 489 } 490 } 491 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 492 493 /* 494 * This function is called in real mode. Strictly no printk's please. 495 * 496 * regs->nip and regs->msr contains srr0 and ssr1. 497 */ 498 long machine_check_early(struct pt_regs *regs) 499 { 500 long handled = 0; 501 502 hv_nmi_check_nonrecoverable(regs); 503 504 /* 505 * See if platform is capable of handling machine check. 506 */ 507 if (ppc_md.machine_check_early) 508 handled = ppc_md.machine_check_early(regs); 509 return handled; 510 } 511 512 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 513 static enum { 514 DTRIG_UNKNOWN, 515 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 516 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 517 } hmer_debug_trig_function; 518 519 static int init_debug_trig_function(void) 520 { 521 int pvr; 522 struct device_node *cpun; 523 struct property *prop = NULL; 524 const char *str; 525 526 /* First look in the device tree */ 527 preempt_disable(); 528 cpun = of_get_cpu_node(smp_processor_id(), NULL); 529 if (cpun) { 530 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 531 prop, str) { 532 if (strcmp(str, "bit17-vector-ci-load") == 0) 533 hmer_debug_trig_function = DTRIG_VECTOR_CI; 534 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 535 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 536 } 537 of_node_put(cpun); 538 } 539 preempt_enable(); 540 541 /* If we found the property, don't look at PVR */ 542 if (prop) 543 goto out; 544 545 pvr = mfspr(SPRN_PVR); 546 /* Check for POWER9 Nimbus (scale-out) */ 547 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 548 /* DD2.2 and later */ 549 if ((pvr & 0xfff) >= 0x202) 550 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 551 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 552 else if ((pvr & 0xfff) >= 0x200) 553 hmer_debug_trig_function = DTRIG_VECTOR_CI; 554 } 555 556 out: 557 switch (hmer_debug_trig_function) { 558 case DTRIG_VECTOR_CI: 559 pr_debug("HMI debug trigger used for vector CI load\n"); 560 break; 561 case DTRIG_SUSPEND_ESCAPE: 562 pr_debug("HMI debug trigger used for TM suspend escape\n"); 563 break; 564 default: 565 break; 566 } 567 return 0; 568 } 569 __initcall(init_debug_trig_function); 570 571 /* 572 * Handle HMIs that occur as a result of a debug trigger. 573 * Return values: 574 * -1 means this is not a HMI cause that we know about 575 * 0 means no further handling is required 576 * 1 means further handling is required 577 */ 578 long hmi_handle_debugtrig(struct pt_regs *regs) 579 { 580 unsigned long hmer = mfspr(SPRN_HMER); 581 long ret = 0; 582 583 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 584 if (!((hmer & HMER_DEBUG_TRIG) 585 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 586 return -1; 587 588 hmer &= ~HMER_DEBUG_TRIG; 589 /* HMER is a write-AND register */ 590 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 591 592 switch (hmer_debug_trig_function) { 593 case DTRIG_VECTOR_CI: 594 /* 595 * Now to avoid problems with soft-disable we 596 * only do the emulation if we are coming from 597 * host user space 598 */ 599 if (regs && user_mode(regs)) 600 ret = local_paca->hmi_p9_special_emu = 1; 601 602 break; 603 604 default: 605 break; 606 } 607 608 /* 609 * See if any other HMI causes remain to be handled 610 */ 611 if (hmer & mfspr(SPRN_HMEER)) 612 return -1; 613 614 return ret; 615 } 616 617 /* 618 * Return values: 619 */ 620 long hmi_exception_realmode(struct pt_regs *regs) 621 { 622 int ret; 623 624 __this_cpu_inc(irq_stat.hmi_exceptions); 625 626 ret = hmi_handle_debugtrig(regs); 627 if (ret >= 0) 628 return ret; 629 630 wait_for_subcore_guest_exit(); 631 632 if (ppc_md.hmi_exception_early) 633 ppc_md.hmi_exception_early(regs); 634 635 wait_for_tb_resync(); 636 637 return 1; 638 } 639