mce_power.c (58f2c391cc0560231d7636c39d31b1b26c9396b7) | mce_power.c (ba41e1e1ccb9771ce41a3b8e2121f95486e76ac9) |
---|---|
1/* 2 * Machine check exception handling CPU-side for power7 and power8 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * --- 13 unchanged lines hidden (view full) --- 22#undef DEBUG 23#define pr_fmt(fmt) "mce_power: " fmt 24 25#include <linux/types.h> 26#include <linux/ptrace.h> 27#include <asm/mmu.h> 28#include <asm/mce.h> 29#include <asm/machdep.h> | 1/* 2 * Machine check exception handling CPU-side for power7 and power8 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * --- 13 unchanged lines hidden (view full) --- 22#undef DEBUG 23#define pr_fmt(fmt) "mce_power: " fmt 24 25#include <linux/types.h> 26#include <linux/ptrace.h> 27#include <asm/mmu.h> 28#include <asm/mce.h> 29#include <asm/machdep.h> |
30#include <asm/pgtable.h> 31#include <asm/pte-walk.h> 32#include <asm/sstep.h> 33#include <asm/exception-64s.h> |
|
30 | 34 |
35/* 36 * Convert an address related to an mm to a PFN. NOTE: we are in real 37 * mode, we could potentially race with page table updates. 38 */ 39static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) 40{ 41 pte_t *ptep; 42 unsigned long flags; 43 struct mm_struct *mm; 44 45 if (user_mode(regs)) 46 mm = current->mm; 47 else 48 mm = &init_mm; 49 50 local_irq_save(flags); 51 if (mm == current->mm) 52 ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL); 53 else 54 ptep = find_init_mm_pte(addr, NULL); 55 local_irq_restore(flags); 56 if (!ptep || pte_special(*ptep)) 57 return ULONG_MAX; 58 return pte_pfn(*ptep); 59} 60 |
|
31static void flush_tlb_206(unsigned int num_sets, unsigned int action) 32{ 33 unsigned long rb; 34 unsigned int i; 35 36 switch (action) { 37 case TLB_INVAL_SCOPE_GLOBAL: 38 rb = TLBIEL_INVAL_SET; --- 84 unchanged lines hidden (view full) --- 123{ 124 flush_tlb_206(POWER8_TLB_SETS, action); 125} 126 127void __flush_tlb_power9(unsigned int action) 128{ 129 unsigned int num_sets; 130 | 61static void flush_tlb_206(unsigned int num_sets, unsigned int action) 62{ 63 unsigned long rb; 64 unsigned int i; 65 66 switch (action) { 67 case TLB_INVAL_SCOPE_GLOBAL: 68 rb = TLBIEL_INVAL_SET; --- 84 unchanged lines hidden (view full) --- 153{ 154 flush_tlb_206(POWER8_TLB_SETS, action); 155} 156 157void __flush_tlb_power9(unsigned int action) 158{ 159 unsigned int num_sets; 160 |
131 if (radix_enabled()) | 161 if (early_radix_enabled()) |
132 num_sets = POWER9_TLB_SETS_RADIX; 133 else 134 num_sets = POWER9_TLB_SETS_HASH; 135 136 flush_tlb_300(num_sets, action); 137} 138 139 --- 276 unchanged lines hidden (view full) --- 416{ 0x00000010, false, 417 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, 418 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 419{ 0x00000008, false, 420 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, 421 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 422{ 0, false, 0, 0, 0, 0 } }; 423 | 162 num_sets = POWER9_TLB_SETS_RADIX; 163 else 164 num_sets = POWER9_TLB_SETS_HASH; 165 166 flush_tlb_300(num_sets, action); 167} 168 169 --- 276 unchanged lines hidden (view full) --- 446{ 0x00000010, false, 447 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, 448 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 449{ 0x00000008, false, 450 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, 451 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 452{ 0, false, 0, 0, 0, 0 } }; 453 |
454static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, 455 uint64_t *phys_addr) 456{ 457 /* 458 * Carefully look at the NIP to determine 459 * the instruction to analyse. Reading the NIP 460 * in real-mode is tricky and can lead to recursive 461 * faults 462 */ 463 int instr; 464 unsigned long pfn, instr_addr; 465 struct instruction_op op; 466 struct pt_regs tmp = *regs; 467 468 pfn = addr_to_pfn(regs, regs->nip); 469 if (pfn != ULONG_MAX) { 470 instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); 471 instr = *(unsigned int *)(instr_addr); 472 if (!analyse_instr(&op, &tmp, instr)) { 473 pfn = addr_to_pfn(regs, op.ea); 474 *addr = op.ea; 475 *phys_addr = (pfn << PAGE_SHIFT); 476 return 0; 477 } 478 /* 479 * analyse_instr() might fail if the instruction 480 * is not a load/store, although this is unexpected 481 * for load/store errors or if we got the NIP 482 * wrong 483 */ 484 } 485 *addr = 0; 486 return -1; 487} 488 |
|
424static int mce_handle_ierror(struct pt_regs *regs, 425 const struct mce_ierror_table table[], 426 struct mce_error_info *mce_err, uint64_t *addr) 427{ 428 uint64_t srr1 = regs->msr; 429 int handled = 0; 430 int i; 431 --- 52 unchanged lines hidden (view full) --- 484 mce_err->severity = MCE_SEV_ERROR_SYNC; 485 mce_err->initiator = MCE_INITIATOR_CPU; 486 487 return 0; 488} 489 490static int mce_handle_derror(struct pt_regs *regs, 491 const struct mce_derror_table table[], | 489static int mce_handle_ierror(struct pt_regs *regs, 490 const struct mce_ierror_table table[], 491 struct mce_error_info *mce_err, uint64_t *addr) 492{ 493 uint64_t srr1 = regs->msr; 494 int handled = 0; 495 int i; 496 --- 52 unchanged lines hidden (view full) --- 549 mce_err->severity = MCE_SEV_ERROR_SYNC; 550 mce_err->initiator = MCE_INITIATOR_CPU; 551 552 return 0; 553} 554 555static int mce_handle_derror(struct pt_regs *regs, 556 const struct mce_derror_table table[], |
492 struct mce_error_info *mce_err, uint64_t *addr) | 557 struct mce_error_info *mce_err, uint64_t *addr, 558 uint64_t *phys_addr) |
493{ 494 uint64_t dsisr = regs->dsisr; 495 int handled = 0; 496 int found = 0; 497 int i; 498 499 *addr = 0; 500 --- 49 unchanged lines hidden (view full) --- 550 case MCE_ERROR_TYPE_LINK: 551 mce_err->u.link_error_type = table[i].error_subtype; 552 break; 553 } 554 mce_err->severity = table[i].severity; 555 mce_err->initiator = table[i].initiator; 556 if (table[i].dar_valid) 557 *addr = regs->dar; | 559{ 560 uint64_t dsisr = regs->dsisr; 561 int handled = 0; 562 int found = 0; 563 int i; 564 565 *addr = 0; 566 --- 49 unchanged lines hidden (view full) --- 616 case MCE_ERROR_TYPE_LINK: 617 mce_err->u.link_error_type = table[i].error_subtype; 618 break; 619 } 620 mce_err->severity = table[i].severity; 621 mce_err->initiator = table[i].initiator; 622 if (table[i].dar_valid) 623 *addr = regs->dar; |
558 | 624 else if (mce_err->severity == MCE_SEV_ERROR_SYNC && 625 table[i].error_type == MCE_ERROR_TYPE_UE) { 626 /* 627 * We do a maximum of 4 nested MCE calls, see 628 * kernel/exception-64s.h 629 */ 630 if (get_paca()->in_mce < MAX_MCE_DEPTH) 631 if (!mce_find_instr_ea_and_pfn(regs, addr, 632 phys_addr)) 633 handled = 1; 634 } |
559 found = 1; 560 } 561 562 if (found) 563 return handled; 564 565 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; 566 mce_err->severity = MCE_SEV_ERROR_SYNC; --- 20 unchanged lines hidden (view full) --- 587 return handled; 588} 589 590static long mce_handle_error(struct pt_regs *regs, 591 const struct mce_derror_table dtable[], 592 const struct mce_ierror_table itable[]) 593{ 594 struct mce_error_info mce_err = { 0 }; | 635 found = 1; 636 } 637 638 if (found) 639 return handled; 640 641 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; 642 mce_err->severity = MCE_SEV_ERROR_SYNC; --- 20 unchanged lines hidden (view full) --- 663 return handled; 664} 665 666static long mce_handle_error(struct pt_regs *regs, 667 const struct mce_derror_table dtable[], 668 const struct mce_ierror_table itable[]) 669{ 670 struct mce_error_info mce_err = { 0 }; |
595 uint64_t addr; | 671 uint64_t addr, phys_addr; |
596 uint64_t srr1 = regs->msr; 597 long handled; 598 599 if (SRR1_MC_LOADSTORE(srr1)) | 672 uint64_t srr1 = regs->msr; 673 long handled; 674 675 if (SRR1_MC_LOADSTORE(srr1)) |
600 handled = mce_handle_derror(regs, dtable, &mce_err, &addr); | 676 handled = mce_handle_derror(regs, dtable, &mce_err, &addr, 677 &phys_addr); |
601 else 602 handled = mce_handle_ierror(regs, itable, &mce_err, &addr); 603 604 if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) 605 handled = mce_handle_ue_error(regs); 606 | 678 else 679 handled = mce_handle_ierror(regs, itable, &mce_err, &addr); 680 681 if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) 682 handled = mce_handle_ue_error(regs); 683 |
607 save_mce_event(regs, handled, &mce_err, regs->nip, addr); | 684 save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr); |
608 609 return handled; 610} 611 612long __machine_check_early_realmode_p7(struct pt_regs *regs) 613{ 614 /* P7 DD1 leaves top bits of DSISR undefined */ 615 regs->dsisr &= 0x0000ffff; 616 617 return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); 618} 619 620long __machine_check_early_realmode_p8(struct pt_regs *regs) 621{ 622 return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); 623} 624 625long __machine_check_early_realmode_p9(struct pt_regs *regs) 626{ | 685 686 return handled; 687} 688 689long __machine_check_early_realmode_p7(struct pt_regs *regs) 690{ 691 /* P7 DD1 leaves top bits of DSISR undefined */ 692 regs->dsisr &= 0x0000ffff; 693 694 return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); 695} 696 697long __machine_check_early_realmode_p8(struct pt_regs *regs) 698{ 699 return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); 700} 701 702long __machine_check_early_realmode_p9(struct pt_regs *regs) 703{ |
627 /* 628 * On POWER9 DD2.1 and below, it's possible to get a machine check 629 * caused by a paste instruction where only DSISR bit 25 is set. This 630 * will result in the MCE handler seeing an unknown event and the kernel 631 * crashing. An MCE that occurs like this is spurious, so we don't need 632 * to do anything in terms of servicing it. If there is something that 633 * needs to be serviced, the CPU will raise the MCE again with the 634 * correct DSISR so that it can be serviced properly. So detect this 635 * case and mark it as handled. 636 */ 637 if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) 638 return 1; 639 | |
640 return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); 641} | 704 return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); 705} |