1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common corrected MCE threshold handler code: 4 */ 5 #include <linux/interrupt.h> 6 #include <linux/kernel.h> 7 8 #include <asm/irq_vectors.h> 9 #include <asm/traps.h> 10 #include <asm/apic.h> 11 #include <asm/mce.h> 12 #include <asm/trace/irq_vectors.h> 13 14 #include "internal.h" 15 16 static void default_threshold_interrupt(void) 17 { 18 pr_err("Unexpected threshold interrupt at vector %x\n", 19 THRESHOLD_APIC_VECTOR); 20 } 21 22 void (*mce_threshold_vector)(void) = default_threshold_interrupt; 23 24 DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) 25 { 26 trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); 27 inc_irq_stat(irq_threshold_count); 28 mce_threshold_vector(); 29 trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); 30 apic_eoi(); 31 } 32 33 DEFINE_PER_CPU(struct mca_storm_desc, storm_desc); 34 35 void mce_inherit_storm(unsigned int bank) 36 { 37 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 38 39 /* 40 * Previous CPU owning this bank had put it into storm mode, 41 * but the precise history of that storm is unknown. Assume 42 * the worst (all recent polls of the bank found a valid error 43 * logged). This will avoid the new owner prematurely declaring 44 * the storm has ended. 45 */ 46 storm->banks[bank].history = ~0ull; 47 storm->banks[bank].timestamp = jiffies; 48 } 49 50 bool mce_get_storm_mode(void) 51 { 52 return __this_cpu_read(storm_desc.poll_mode); 53 } 54 55 void mce_set_storm_mode(bool storm) 56 { 57 __this_cpu_write(storm_desc.poll_mode, storm); 58 } 59 60 static void mce_handle_storm(unsigned int bank, bool on) 61 { 62 switch (boot_cpu_data.x86_vendor) { 63 case X86_VENDOR_INTEL: 64 mce_intel_handle_storm(bank, on); 65 break; 66 } 67 } 68 69 void cmci_storm_begin(unsigned int bank) 70 { 71 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 72 73 __set_bit(bank, this_cpu_ptr(mce_poll_banks)); 74 storm->banks[bank].in_storm_mode = true; 75 76 /* 77 * If this is the first bank on this CPU to enter storm mode 78 * start polling. 79 */ 80 if (++storm->stormy_bank_count == 1) 81 mce_timer_kick(true); 82 } 83 84 void cmci_storm_end(unsigned int bank) 85 { 86 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 87 88 __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 89 storm->banks[bank].history = 0; 90 storm->banks[bank].in_storm_mode = false; 91 92 /* If no banks left in storm mode, stop polling. */ 93 if (!this_cpu_dec_return(storm_desc.stormy_bank_count)) 94 mce_timer_kick(false); 95 } 96 97 void mce_track_storm(struct mce *mce) 98 { 99 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 100 unsigned long now = jiffies, delta; 101 unsigned int shift = 1; 102 u64 history = 0; 103 104 /* No tracking needed for banks that do not support CMCI */ 105 if (storm->banks[mce->bank].poll_only) 106 return; 107 108 /* 109 * When a bank is in storm mode it is polled once per second and 110 * the history mask will record about the last minute of poll results. 111 * If it is not in storm mode, then the bank is only checked when 112 * there is a CMCI interrupt. Check how long it has been since 113 * this bank was last checked, and adjust the amount of "shift" 114 * to apply to history. 115 */ 116 if (!storm->banks[mce->bank].in_storm_mode) { 117 delta = now - storm->banks[mce->bank].timestamp; 118 shift = (delta + HZ) / HZ; 119 } 120 121 /* If it has been a long time since the last poll, clear history. */ 122 if (shift < NUM_HISTORY_BITS) 123 history = storm->banks[mce->bank].history << shift; 124 125 storm->banks[mce->bank].timestamp = now; 126 127 /* History keeps track of corrected errors. VAL=1 && UC=0 */ 128 if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce)) 129 history |= 1; 130 131 storm->banks[mce->bank].history = history; 132 133 if (storm->banks[mce->bank].in_storm_mode) { 134 if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0)) 135 return; 136 printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank); 137 mce_handle_storm(mce->bank, false); 138 cmci_storm_end(mce->bank); 139 } else { 140 if (hweight64(history) < STORM_BEGIN_THRESHOLD) 141 return; 142 printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank); 143 mce_handle_storm(mce->bank, true); 144 cmci_storm_begin(mce->bank); 145 } 146 } 147