1 /* 2 * Machine check exception handling. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright 2013 IBM Corporation 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 20 */ 21 22 #undef DEBUG 23 #define pr_fmt(fmt) "mce: " fmt 24 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/ptrace.h> 28 #include <linux/percpu.h> 29 #include <linux/export.h> 30 #include <linux/irq_work.h> 31 32 #include <asm/machdep.h> 33 #include <asm/mce.h> 34 35 static DEFINE_PER_CPU(int, mce_nest_count); 36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 37 38 /* Queue for delayed MCE events. */ 39 static DEFINE_PER_CPU(int, mce_queue_count); 40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 41 42 /* Queue for delayed MCE UE events. */ 43 static DEFINE_PER_CPU(int, mce_ue_count); 44 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 45 mce_ue_event_queue); 46 47 static void machine_check_process_queued_event(struct irq_work *work); 48 void machine_check_ue_event(struct machine_check_event *evt); 49 static void machine_process_ue_event(struct work_struct *work); 50 51 static struct irq_work mce_event_process_work = { 52 .func = machine_check_process_queued_event, 53 }; 54 55 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 56 57 static void mce_set_error_info(struct machine_check_event *mce, 58 struct mce_error_info *mce_err) 59 { 60 mce->error_type = mce_err->error_type; 61 switch (mce_err->error_type) { 62 case MCE_ERROR_TYPE_UE: 63 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; 64 break; 65 case MCE_ERROR_TYPE_SLB: 66 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; 67 break; 68 case MCE_ERROR_TYPE_ERAT: 69 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; 70 break; 71 case MCE_ERROR_TYPE_TLB: 72 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; 73 break; 74 case MCE_ERROR_TYPE_USER: 75 mce->u.user_error.user_error_type = mce_err->u.user_error_type; 76 break; 77 case MCE_ERROR_TYPE_RA: 78 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; 79 break; 80 case MCE_ERROR_TYPE_LINK: 81 mce->u.link_error.link_error_type = mce_err->u.link_error_type; 82 break; 83 case MCE_ERROR_TYPE_UNKNOWN: 84 default: 85 break; 86 } 87 } 88 89 /* 90 * Decode and save high level MCE information into per cpu buffer which 91 * is an array of machine_check_event structure. 92 */ 93 void save_mce_event(struct pt_regs *regs, long handled, 94 struct mce_error_info *mce_err, 95 uint64_t nip, uint64_t addr, uint64_t phys_addr) 96 { 97 int index = __this_cpu_inc_return(mce_nest_count) - 1; 98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 99 100 /* 101 * Return if we don't have enough space to log mce event. 102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 103 * the check below will stop buffer overrun. 104 */ 105 if (index >= MAX_MC_EVT) 106 return; 107 108 /* Populate generic machine check info */ 109 mce->version = MCE_V1; 110 mce->srr0 = nip; 111 mce->srr1 = regs->msr; 112 mce->gpr3 = regs->gpr[3]; 113 mce->in_use = 1; 114 115 /* Mark it recovered if we have handled it and MSR(RI=1). */ 116 if (handled && (regs->msr & MSR_RI)) 117 mce->disposition = MCE_DISPOSITION_RECOVERED; 118 else 119 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; 120 121 mce->initiator = mce_err->initiator; 122 mce->severity = mce_err->severity; 123 124 /* 125 * Populate the mce error_type and type-specific error_type. 126 */ 127 mce_set_error_info(mce, mce_err); 128 129 if (!addr) 130 return; 131 132 if (mce->error_type == MCE_ERROR_TYPE_TLB) { 133 mce->u.tlb_error.effective_address_provided = true; 134 mce->u.tlb_error.effective_address = addr; 135 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { 136 mce->u.slb_error.effective_address_provided = true; 137 mce->u.slb_error.effective_address = addr; 138 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { 139 mce->u.erat_error.effective_address_provided = true; 140 mce->u.erat_error.effective_address = addr; 141 } else if (mce->error_type == MCE_ERROR_TYPE_USER) { 142 mce->u.user_error.effective_address_provided = true; 143 mce->u.user_error.effective_address = addr; 144 } else if (mce->error_type == MCE_ERROR_TYPE_RA) { 145 mce->u.ra_error.effective_address_provided = true; 146 mce->u.ra_error.effective_address = addr; 147 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { 148 mce->u.link_error.effective_address_provided = true; 149 mce->u.link_error.effective_address = addr; 150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 151 mce->u.ue_error.effective_address_provided = true; 152 mce->u.ue_error.effective_address = addr; 153 if (phys_addr != ULONG_MAX) { 154 mce->u.ue_error.physical_address_provided = true; 155 mce->u.ue_error.physical_address = phys_addr; 156 machine_check_ue_event(mce); 157 } 158 } 159 return; 160 } 161 162 /* 163 * get_mce_event: 164 * mce Pointer to machine_check_event structure to be filled. 165 * release Flag to indicate whether to free the event slot or not. 166 * 0 <= do not release the mce event. Caller will invoke 167 * release_mce_event() once event has been consumed. 168 * 1 <= release the slot. 169 * 170 * return 1 = success 171 * 0 = failure 172 * 173 * get_mce_event() will be called by platform specific machine check 174 * handle routine and in KVM. 175 * When we call get_mce_event(), we are still in interrupt context and 176 * preemption will not be scheduled until ret_from_expect() routine 177 * is called. 178 */ 179 int get_mce_event(struct machine_check_event *mce, bool release) 180 { 181 int index = __this_cpu_read(mce_nest_count) - 1; 182 struct machine_check_event *mc_evt; 183 int ret = 0; 184 185 /* Sanity check */ 186 if (index < 0) 187 return ret; 188 189 /* Check if we have MCE info to process. */ 190 if (index < MAX_MC_EVT) { 191 mc_evt = this_cpu_ptr(&mce_event[index]); 192 /* Copy the event structure and release the original */ 193 if (mce) 194 *mce = *mc_evt; 195 if (release) 196 mc_evt->in_use = 0; 197 ret = 1; 198 } 199 /* Decrement the count to free the slot. */ 200 if (release) 201 __this_cpu_dec(mce_nest_count); 202 203 return ret; 204 } 205 206 void release_mce_event(void) 207 { 208 get_mce_event(NULL, true); 209 } 210 211 212 /* 213 * Queue up the MCE event which then can be handled later. 214 */ 215 void machine_check_ue_event(struct machine_check_event *evt) 216 { 217 int index; 218 219 index = __this_cpu_inc_return(mce_ue_count) - 1; 220 /* If queue is full, just return for now. */ 221 if (index >= MAX_MC_EVT) { 222 __this_cpu_dec(mce_ue_count); 223 return; 224 } 225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); 226 227 /* Queue work to process this event later. */ 228 schedule_work(&mce_ue_event_work); 229 } 230 231 /* 232 * Queue up the MCE event which then can be handled later. 233 */ 234 void machine_check_queue_event(void) 235 { 236 int index; 237 struct machine_check_event evt; 238 239 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 240 return; 241 242 index = __this_cpu_inc_return(mce_queue_count) - 1; 243 /* If queue is full, just return for now. */ 244 if (index >= MAX_MC_EVT) { 245 __this_cpu_dec(mce_queue_count); 246 return; 247 } 248 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); 249 250 /* Queue irq work to process this event later. */ 251 irq_work_queue(&mce_event_process_work); 252 } 253 /* 254 * process pending MCE event from the mce event queue. This function will be 255 * called during syscall exit. 256 */ 257 static void machine_process_ue_event(struct work_struct *work) 258 { 259 int index; 260 struct machine_check_event *evt; 261 262 while (__this_cpu_read(mce_ue_count) > 0) { 263 index = __this_cpu_read(mce_ue_count) - 1; 264 evt = this_cpu_ptr(&mce_ue_event_queue[index]); 265 #ifdef CONFIG_MEMORY_FAILURE 266 /* 267 * This should probably queued elsewhere, but 268 * oh! well 269 */ 270 if (evt->error_type == MCE_ERROR_TYPE_UE) { 271 if (evt->u.ue_error.physical_address_provided) { 272 unsigned long pfn; 273 274 pfn = evt->u.ue_error.physical_address >> 275 PAGE_SHIFT; 276 memory_failure(pfn, 0); 277 } else 278 pr_warn("Failed to identify bad address from " 279 "where the uncorrectable error (UE) " 280 "was generated\n"); 281 } 282 #endif 283 __this_cpu_dec(mce_ue_count); 284 } 285 } 286 /* 287 * process pending MCE event from the mce event queue. This function will be 288 * called during syscall exit. 289 */ 290 static void machine_check_process_queued_event(struct irq_work *work) 291 { 292 int index; 293 struct machine_check_event *evt; 294 295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 296 297 /* 298 * For now just print it to console. 299 * TODO: log this error event to FSP or nvram. 300 */ 301 while (__this_cpu_read(mce_queue_count) > 0) { 302 index = __this_cpu_read(mce_queue_count) - 1; 303 evt = this_cpu_ptr(&mce_event_queue[index]); 304 machine_check_print_event_info(evt, false, false); 305 __this_cpu_dec(mce_queue_count); 306 } 307 } 308 309 void machine_check_print_event_info(struct machine_check_event *evt, 310 bool user_mode, bool in_guest) 311 { 312 const char *level, *sevstr, *subtype; 313 static const char *mc_ue_types[] = { 314 "Indeterminate", 315 "Instruction fetch", 316 "Page table walk ifetch", 317 "Load/Store", 318 "Page table walk Load/Store", 319 }; 320 static const char *mc_slb_types[] = { 321 "Indeterminate", 322 "Parity", 323 "Multihit", 324 }; 325 static const char *mc_erat_types[] = { 326 "Indeterminate", 327 "Parity", 328 "Multihit", 329 }; 330 static const char *mc_tlb_types[] = { 331 "Indeterminate", 332 "Parity", 333 "Multihit", 334 }; 335 static const char *mc_user_types[] = { 336 "Indeterminate", 337 "tlbie(l) invalid", 338 }; 339 static const char *mc_ra_types[] = { 340 "Indeterminate", 341 "Instruction fetch (bad)", 342 "Instruction fetch (foreign)", 343 "Page table walk ifetch (bad)", 344 "Page table walk ifetch (foreign)", 345 "Load (bad)", 346 "Store (bad)", 347 "Page table walk Load/Store (bad)", 348 "Page table walk Load/Store (foreign)", 349 "Load/Store (foreign)", 350 }; 351 static const char *mc_link_types[] = { 352 "Indeterminate", 353 "Instruction fetch (timeout)", 354 "Page table walk ifetch (timeout)", 355 "Load (timeout)", 356 "Store (timeout)", 357 "Page table walk Load/Store (timeout)", 358 }; 359 360 /* Print things out */ 361 if (evt->version != MCE_V1) { 362 pr_err("Machine Check Exception, Unknown event version %d !\n", 363 evt->version); 364 return; 365 } 366 switch (evt->severity) { 367 case MCE_SEV_NO_ERROR: 368 level = KERN_INFO; 369 sevstr = "Harmless"; 370 break; 371 case MCE_SEV_WARNING: 372 level = KERN_WARNING; 373 sevstr = ""; 374 break; 375 case MCE_SEV_ERROR_SYNC: 376 level = KERN_ERR; 377 sevstr = "Severe"; 378 break; 379 case MCE_SEV_FATAL: 380 default: 381 level = KERN_ERR; 382 sevstr = "Fatal"; 383 break; 384 } 385 386 printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 387 evt->disposition == MCE_DISPOSITION_RECOVERED ? 388 "Recovered" : "Not recovered"); 389 390 if (in_guest) { 391 printk("%s Guest NIP: %016llx\n", level, evt->srr0); 392 } else if (user_mode) { 393 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, 394 evt->srr0, current->pid, current->comm); 395 } else { 396 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, 397 (void *)evt->srr0); 398 } 399 400 printk("%s Initiator: %s\n", level, 401 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); 402 switch (evt->error_type) { 403 case MCE_ERROR_TYPE_UE: 404 subtype = evt->u.ue_error.ue_error_type < 405 ARRAY_SIZE(mc_ue_types) ? 406 mc_ue_types[evt->u.ue_error.ue_error_type] 407 : "Unknown"; 408 printk("%s Error type: UE [%s]\n", level, subtype); 409 if (evt->u.ue_error.effective_address_provided) 410 printk("%s Effective address: %016llx\n", 411 level, evt->u.ue_error.effective_address); 412 if (evt->u.ue_error.physical_address_provided) 413 printk("%s Physical address: %016llx\n", 414 level, evt->u.ue_error.physical_address); 415 break; 416 case MCE_ERROR_TYPE_SLB: 417 subtype = evt->u.slb_error.slb_error_type < 418 ARRAY_SIZE(mc_slb_types) ? 419 mc_slb_types[evt->u.slb_error.slb_error_type] 420 : "Unknown"; 421 printk("%s Error type: SLB [%s]\n", level, subtype); 422 if (evt->u.slb_error.effective_address_provided) 423 printk("%s Effective address: %016llx\n", 424 level, evt->u.slb_error.effective_address); 425 break; 426 case MCE_ERROR_TYPE_ERAT: 427 subtype = evt->u.erat_error.erat_error_type < 428 ARRAY_SIZE(mc_erat_types) ? 429 mc_erat_types[evt->u.erat_error.erat_error_type] 430 : "Unknown"; 431 printk("%s Error type: ERAT [%s]\n", level, subtype); 432 if (evt->u.erat_error.effective_address_provided) 433 printk("%s Effective address: %016llx\n", 434 level, evt->u.erat_error.effective_address); 435 break; 436 case MCE_ERROR_TYPE_TLB: 437 subtype = evt->u.tlb_error.tlb_error_type < 438 ARRAY_SIZE(mc_tlb_types) ? 439 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 440 : "Unknown"; 441 printk("%s Error type: TLB [%s]\n", level, subtype); 442 if (evt->u.tlb_error.effective_address_provided) 443 printk("%s Effective address: %016llx\n", 444 level, evt->u.tlb_error.effective_address); 445 break; 446 case MCE_ERROR_TYPE_USER: 447 subtype = evt->u.user_error.user_error_type < 448 ARRAY_SIZE(mc_user_types) ? 449 mc_user_types[evt->u.user_error.user_error_type] 450 : "Unknown"; 451 printk("%s Error type: User [%s]\n", level, subtype); 452 if (evt->u.user_error.effective_address_provided) 453 printk("%s Effective address: %016llx\n", 454 level, evt->u.user_error.effective_address); 455 break; 456 case MCE_ERROR_TYPE_RA: 457 subtype = evt->u.ra_error.ra_error_type < 458 ARRAY_SIZE(mc_ra_types) ? 459 mc_ra_types[evt->u.ra_error.ra_error_type] 460 : "Unknown"; 461 printk("%s Error type: Real address [%s]\n", level, subtype); 462 if (evt->u.ra_error.effective_address_provided) 463 printk("%s Effective address: %016llx\n", 464 level, evt->u.ra_error.effective_address); 465 break; 466 case MCE_ERROR_TYPE_LINK: 467 subtype = evt->u.link_error.link_error_type < 468 ARRAY_SIZE(mc_link_types) ? 469 mc_link_types[evt->u.link_error.link_error_type] 470 : "Unknown"; 471 printk("%s Error type: Link [%s]\n", level, subtype); 472 if (evt->u.link_error.effective_address_provided) 473 printk("%s Effective address: %016llx\n", 474 level, evt->u.link_error.effective_address); 475 break; 476 default: 477 case MCE_ERROR_TYPE_UNKNOWN: 478 printk("%s Error type: Unknown\n", level); 479 break; 480 } 481 } 482 EXPORT_SYMBOL_GPL(machine_check_print_event_info); 483 484 /* 485 * This function is called in real mode. Strictly no printk's please. 486 * 487 * regs->nip and regs->msr contains srr0 and ssr1. 488 */ 489 long machine_check_early(struct pt_regs *regs) 490 { 491 long handled = 0; 492 493 /* 494 * See if platform is capable of handling machine check. 495 */ 496 if (ppc_md.machine_check_early) 497 handled = ppc_md.machine_check_early(regs); 498 return handled; 499 } 500 501 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 502 static enum { 503 DTRIG_UNKNOWN, 504 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 505 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 506 } hmer_debug_trig_function; 507 508 static int init_debug_trig_function(void) 509 { 510 int pvr; 511 struct device_node *cpun; 512 struct property *prop = NULL; 513 const char *str; 514 515 /* First look in the device tree */ 516 preempt_disable(); 517 cpun = of_get_cpu_node(smp_processor_id(), NULL); 518 if (cpun) { 519 of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 520 prop, str) { 521 if (strcmp(str, "bit17-vector-ci-load") == 0) 522 hmer_debug_trig_function = DTRIG_VECTOR_CI; 523 else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 524 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 525 } 526 of_node_put(cpun); 527 } 528 preempt_enable(); 529 530 /* If we found the property, don't look at PVR */ 531 if (prop) 532 goto out; 533 534 pvr = mfspr(SPRN_PVR); 535 /* Check for POWER9 Nimbus (scale-out) */ 536 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 537 /* DD2.2 and later */ 538 if ((pvr & 0xfff) >= 0x202) 539 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 540 /* DD2.0 and DD2.1 - used for vector CI load emulation */ 541 else if ((pvr & 0xfff) >= 0x200) 542 hmer_debug_trig_function = DTRIG_VECTOR_CI; 543 } 544 545 out: 546 switch (hmer_debug_trig_function) { 547 case DTRIG_VECTOR_CI: 548 pr_debug("HMI debug trigger used for vector CI load\n"); 549 break; 550 case DTRIG_SUSPEND_ESCAPE: 551 pr_debug("HMI debug trigger used for TM suspend escape\n"); 552 break; 553 default: 554 break; 555 } 556 return 0; 557 } 558 __initcall(init_debug_trig_function); 559 560 /* 561 * Handle HMIs that occur as a result of a debug trigger. 562 * Return values: 563 * -1 means this is not a HMI cause that we know about 564 * 0 means no further handling is required 565 * 1 means further handling is required 566 */ 567 long hmi_handle_debugtrig(struct pt_regs *regs) 568 { 569 unsigned long hmer = mfspr(SPRN_HMER); 570 long ret = 0; 571 572 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 573 if (!((hmer & HMER_DEBUG_TRIG) 574 && hmer_debug_trig_function != DTRIG_UNKNOWN)) 575 return -1; 576 577 hmer &= ~HMER_DEBUG_TRIG; 578 /* HMER is a write-AND register */ 579 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 580 581 switch (hmer_debug_trig_function) { 582 case DTRIG_VECTOR_CI: 583 /* 584 * Now to avoid problems with soft-disable we 585 * only do the emulation if we are coming from 586 * host user space 587 */ 588 if (regs && user_mode(regs)) 589 ret = local_paca->hmi_p9_special_emu = 1; 590 591 break; 592 593 default: 594 break; 595 } 596 597 /* 598 * See if any other HMI causes remain to be handled 599 */ 600 if (hmer & mfspr(SPRN_HMEER)) 601 return -1; 602 603 return ret; 604 } 605 606 /* 607 * Return values: 608 */ 609 long hmi_exception_realmode(struct pt_regs *regs) 610 { 611 int ret; 612 613 __this_cpu_inc(irq_stat.hmi_exceptions); 614 615 ret = hmi_handle_debugtrig(regs); 616 if (ret >= 0) 617 return ret; 618 619 wait_for_subcore_guest_exit(); 620 621 if (ppc_md.hmi_exception_early) 622 ppc_md.hmi_exception_early(regs); 623 624 wait_for_tb_resync(); 625 626 return 1; 627 } 628