1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * (c) 2005-2016 Advanced Micro Devices, Inc.
4 *
5 * Written by Jacob Shin - AMD, Inc.
6 * Maintained by: Borislav Petkov <bp@alien8.de>
7 */
8 #include <linux/interrupt.h>
9 #include <linux/notifier.h>
10 #include <linux/kobject.h>
11 #include <linux/percpu.h>
12 #include <linux/errno.h>
13 #include <linux/sched.h>
14 #include <linux/sysfs.h>
15 #include <linux/slab.h>
16 #include <linux/init.h>
17 #include <linux/cpu.h>
18 #include <linux/smp.h>
19 #include <linux/string.h>
20
21 #include <asm/traps.h>
22 #include <asm/apic.h>
23 #include <asm/mce.h>
24 #include <asm/msr.h>
25 #include <asm/trace/irq_vectors.h>
26
27 #include "internal.h"
28
29 #define NR_BLOCKS 5
30 #define THRESHOLD_MAX 0xFFF
31 #define INT_TYPE_APIC 0x00020000
32 #define MASK_VALID_HI 0x80000000
33 #define MASK_CNTP_HI 0x40000000
34 #define MASK_LOCKED_HI 0x20000000
35 #define MASK_LVTOFF_HI 0x00F00000
36 #define MASK_COUNT_EN_HI 0x00080000
37 #define MASK_INT_TYPE_HI 0x00060000
38 #define MASK_OVERFLOW_HI 0x00010000
39 #define MASK_ERR_COUNT_HI 0x00000FFF
40 #define MASK_BLKPTR_LO 0xFF000000
41 #define MCG_XBLK_ADDR 0xC0000400
42
43 /* Deferred error settings */
44 #define MSR_CU_DEF_ERR 0xC0000410
45 #define MASK_DEF_LVTOFF 0x000000F0
46
47 /* Scalable MCA: */
48
49 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
50 #define SMCA_THR_LVT_OFF 0xF000
51
52 static bool thresholding_irq_en;
53
54 struct mce_amd_cpu_data {
55 mce_banks_t thr_intr_banks;
56 mce_banks_t dfr_intr_banks;
57
58 u32 thr_intr_en: 1,
59 dfr_intr_en: 1,
60 __resv: 30;
61 };
62
63 static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
64
65 static const char * const th_names[] = {
66 "load_store",
67 "insn_fetch",
68 "combined_unit",
69 "decode_unit",
70 "northbridge",
71 "execution_unit",
72 };
73
74 static const char * const smca_umc_block_names[] = {
75 "dram_ecc",
76 "misc_umc"
77 };
78
79 #define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
80
81 struct smca_hwid {
82 unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
83 u32 hwid_mcatype; /* (hwid,mcatype) tuple */
84 };
85
86 struct smca_bank {
87 const struct smca_hwid *hwid;
88 u32 id; /* Value of MCA_IPID[InstanceId]. */
89 u8 sysfs_id; /* Value used for sysfs name. */
90 u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */
91 __reserved :63;
92 };
93
94 static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
95 static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
96
97 static const char * const smca_names[] = {
98 [SMCA_LS ... SMCA_LS_V2] = "load_store",
99 [SMCA_IF] = "insn_fetch",
100 [SMCA_L2_CACHE] = "l2_cache",
101 [SMCA_DE] = "decode_unit",
102 [SMCA_RESERVED] = "reserved",
103 [SMCA_EX] = "execution_unit",
104 [SMCA_FP] = "floating_point",
105 [SMCA_L3_CACHE] = "l3_cache",
106 [SMCA_CS ... SMCA_CS_V2] = "coherent_slave",
107 [SMCA_PIE] = "pie",
108
109 /* UMC v2 is separate because both of them can exist in a single system. */
110 [SMCA_UMC] = "umc",
111 [SMCA_UMC_V2] = "umc_v2",
112 [SMCA_MA_LLC] = "ma_llc",
113 [SMCA_PB] = "param_block",
114 [SMCA_PSP ... SMCA_PSP_V2] = "psp",
115 [SMCA_SMU ... SMCA_SMU_V2] = "smu",
116 [SMCA_MP5] = "mp5",
117 [SMCA_MPDMA] = "mpdma",
118 [SMCA_NBIO] = "nbio",
119 [SMCA_PCIE ... SMCA_PCIE_V2] = "pcie",
120 [SMCA_XGMI_PCS] = "xgmi_pcs",
121 [SMCA_NBIF] = "nbif",
122 [SMCA_SHUB] = "shub",
123 [SMCA_SATA] = "sata",
124 [SMCA_USB] = "usb",
125 [SMCA_USR_DP] = "usr_dp",
126 [SMCA_USR_CP] = "usr_cp",
127 [SMCA_GMI_PCS] = "gmi_pcs",
128 [SMCA_XGMI_PHY] = "xgmi_phy",
129 [SMCA_WAFL_PHY] = "wafl_phy",
130 [SMCA_GMI_PHY] = "gmi_phy",
131 };
132
smca_get_name(enum smca_bank_types t)133 static const char *smca_get_name(enum smca_bank_types t)
134 {
135 if (t >= N_SMCA_BANK_TYPES)
136 return NULL;
137
138 return smca_names[t];
139 }
140
smca_get_bank_type(unsigned int cpu,unsigned int bank)141 enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
142 {
143 struct smca_bank *b;
144
145 if (bank >= MAX_NR_BANKS)
146 return N_SMCA_BANK_TYPES;
147
148 b = &per_cpu(smca_banks, cpu)[bank];
149 if (!b->hwid)
150 return N_SMCA_BANK_TYPES;
151
152 return b->hwid->bank_type;
153 }
154 EXPORT_SYMBOL_GPL(smca_get_bank_type);
155
156 static const struct smca_hwid smca_hwid_mcatypes[] = {
157 /* { bank_type, hwid_mcatype } */
158
159 /* Reserved type */
160 { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
161
162 /* ZN Core (HWID=0xB0) MCA types */
163 { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
164 { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
165 { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
166 { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
167 { SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
168 /* HWID 0xB0 MCATYPE 0x4 is Reserved */
169 { SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
170 { SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
171 { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
172
173 /* Data Fabric MCA types */
174 { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
175 { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
176 { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
177 { SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) },
178
179 /* Unified Memory Controller MCA type */
180 { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
181 { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
182
183 /* Parameter Block MCA type */
184 { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
185
186 /* Platform Security Processor MCA type */
187 { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
188 { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
189
190 /* System Management Unit MCA type */
191 { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
192 { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
193
194 /* Microprocessor 5 Unit MCA type */
195 { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
196
197 /* MPDMA MCA type */
198 { SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
199
200 /* Northbridge IO Unit MCA type */
201 { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
202
203 /* PCI Express Unit MCA type */
204 { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
205 { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
206
207 { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
208 { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
209 { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
210 { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
211 { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
212 { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
213 { SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) },
214 { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
215 { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
216 { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
217 { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
218 };
219
220 /*
221 * In SMCA enabled processors, we can have multiple banks for a given IP type.
222 * So to define a unique name for each bank, we use a temp c-string to append
223 * the MCA_IPID[InstanceId] to type's name in get_name().
224 *
225 * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
226 * is greater than 8 plus 1 (for underscore) plus length of longest type name.
227 */
228 #define MAX_MCATYPE_NAME_LEN 30
229 static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
230
231 struct threshold_block {
232 /* This block's number within its bank. */
233 unsigned int block;
234 /* MCA bank number that contains this block. */
235 unsigned int bank;
236 /* CPU which controls this block's MCA bank. */
237 unsigned int cpu;
238 /* MCA_MISC MSR address for this block. */
239 u32 address;
240 /* Enable/Disable APIC interrupt. */
241 bool interrupt_enable;
242 /* Bank can generate an interrupt. */
243 bool interrupt_capable;
244 /* Value upon which threshold interrupt is generated. */
245 u16 threshold_limit;
246 /* sysfs object */
247 struct kobject kobj;
248 /* List of threshold blocks within this block's MCA bank. */
249 struct list_head miscj;
250 };
251
252 struct threshold_bank {
253 struct kobject *kobj;
254 /* List of threshold blocks within this MCA bank. */
255 struct list_head miscj;
256 };
257
258 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
259
260 /*
261 * A list of the banks enabled on each logical CPU. Controls which respective
262 * descriptors to initialize later in mce_threshold_create_device().
263 */
264 static DEFINE_PER_CPU(u64, bank_map);
265
266 static void amd_threshold_interrupt(void);
267 static void amd_deferred_error_interrupt(void);
268
default_deferred_error_interrupt(void)269 static void default_deferred_error_interrupt(void)
270 {
271 pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
272 }
273 void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
274
smca_configure(unsigned int bank,unsigned int cpu)275 static void smca_configure(unsigned int bank, unsigned int cpu)
276 {
277 struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
278 u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
279 const struct smca_hwid *s_hwid;
280 unsigned int i, hwid_mcatype;
281 u32 high, low;
282 u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
283
284 /* Set appropriate bits in MCA_CONFIG */
285 if (!rdmsr_safe(smca_config, &low, &high)) {
286 /*
287 * OS is required to set the MCAX bit to acknowledge that it is
288 * now using the new MSR ranges and new registers under each
289 * bank. It also means that the OS will configure deferred
290 * errors in the new MCx_CONFIG register. If the bit is not set,
291 * uncorrectable errors will cause a system panic.
292 *
293 * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
294 */
295 high |= BIT(0);
296
297 /*
298 * SMCA sets the Deferred Error Interrupt type per bank.
299 *
300 * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
301 * if the DeferredIntType bit field is available.
302 *
303 * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
304 * high portion of the MSR). OS should set this to 0x1 to enable
305 * APIC based interrupt. First, check that no interrupt has been
306 * set.
307 */
308 if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) {
309 __set_bit(bank, data->dfr_intr_banks);
310 high |= BIT(5);
311 }
312
313 /*
314 * SMCA Corrected Error Interrupt
315 *
316 * MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can
317 * send an MCA Thresholding interrupt without the OS initializing
318 * this feature. This can be used if the threshold limit is managed
319 * by the platform.
320 *
321 * MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR).
322 * The OS should set this to inform the platform that the OS is ready
323 * to handle the MCA Thresholding interrupt.
324 */
325 if ((low & BIT(10)) && data->thr_intr_en) {
326 __set_bit(bank, data->thr_intr_banks);
327 high |= BIT(8);
328 }
329
330 this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
331
332 if (low & MCI_CONFIG_PADDRV)
333 this_cpu_ptr(smca_banks)[bank].paddrv = 1;
334
335 wrmsr(smca_config, low, high);
336 }
337
338 if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
339 pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
340 return;
341 }
342
343 hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
344 (high & MCI_IPID_MCATYPE) >> 16);
345
346 for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
347 s_hwid = &smca_hwid_mcatypes[i];
348
349 if (hwid_mcatype == s_hwid->hwid_mcatype) {
350 this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
351 this_cpu_ptr(smca_banks)[bank].id = low;
352 this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++;
353 break;
354 }
355 }
356 }
357
358 struct thresh_restart {
359 struct threshold_block *b;
360 int set_lvt_off;
361 int lvt_off;
362 u16 old_limit;
363 };
364
bank4_names(const struct threshold_block * b)365 static const char *bank4_names(const struct threshold_block *b)
366 {
367 switch (b->address) {
368 /* MSR4_MISC0 */
369 case 0x00000413:
370 return "dram";
371
372 case 0xc0000408:
373 return "ht_links";
374
375 case 0xc0000409:
376 return "l3_cache";
377
378 default:
379 WARN(1, "Funny MSR: 0x%08x\n", b->address);
380 return "";
381 }
382 };
383
384
lvt_interrupt_supported(unsigned int bank,u32 msr_high_bits)385 static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
386 {
387 /*
388 * bank 4 supports APIC LVT interrupts implicitly since forever.
389 */
390 if (bank == 4)
391 return true;
392
393 /*
394 * IntP: interrupt present; if this bit is set, the thresholding
395 * bank can generate APIC LVT interrupts
396 */
397 return msr_high_bits & BIT(28);
398 }
399
lvt_off_valid(struct threshold_block * b,int apic,u32 lo,u32 hi)400 static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
401 {
402 int msr = (hi & MASK_LVTOFF_HI) >> 20;
403
404 /*
405 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
406 * the BIOS provides the value. The original field where LVT offset
407 * was set is reserved. Return early here:
408 */
409 if (mce_flags.smca)
410 return false;
411
412 if (apic < 0) {
413 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
414 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
415 b->bank, b->block, b->address, hi, lo);
416 return false;
417 }
418
419 if (apic != msr) {
420 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
421 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
422 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
423 return false;
424 }
425
426 return true;
427 };
428
429 /* Reprogram MCx_MISC MSR behind this threshold block. */
threshold_restart_block(void * _tr)430 static void threshold_restart_block(void *_tr)
431 {
432 struct thresh_restart *tr = _tr;
433 u32 hi, lo;
434
435 /* sysfs write might race against an offline operation */
436 if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
437 return;
438
439 rdmsr(tr->b->address, lo, hi);
440
441 /*
442 * Reset error count and overflow bit.
443 * This is done during init or after handling an interrupt.
444 */
445 if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
446 hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
447 hi |= THRESHOLD_MAX - tr->b->threshold_limit;
448 } else if (tr->old_limit) { /* change limit w/o reset */
449 int new_count = (hi & THRESHOLD_MAX) +
450 (tr->old_limit - tr->b->threshold_limit);
451
452 hi = (hi & ~MASK_ERR_COUNT_HI) |
453 (new_count & THRESHOLD_MAX);
454 }
455
456 /* clear IntType */
457 hi &= ~MASK_INT_TYPE_HI;
458
459 if (!tr->b->interrupt_capable)
460 goto done;
461
462 if (tr->set_lvt_off) {
463 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
464 /* set new lvt offset */
465 hi &= ~MASK_LVTOFF_HI;
466 hi |= tr->lvt_off << 20;
467 }
468 }
469
470 if (tr->b->interrupt_enable)
471 hi |= INT_TYPE_APIC;
472
473 done:
474
475 hi |= MASK_COUNT_EN_HI;
476 wrmsr(tr->b->address, lo, hi);
477 }
478
threshold_restart_bank(unsigned int bank,bool intr_en)479 static void threshold_restart_bank(unsigned int bank, bool intr_en)
480 {
481 struct threshold_bank **thr_banks = this_cpu_read(threshold_banks);
482 struct threshold_block *block, *tmp;
483 struct thresh_restart tr;
484
485 if (!thr_banks || !thr_banks[bank])
486 return;
487
488 memset(&tr, 0, sizeof(tr));
489
490 list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) {
491 tr.b = block;
492 tr.b->interrupt_enable = intr_en;
493 threshold_restart_block(&tr);
494 }
495 }
496
497 /* Try to use the threshold limit reported through APEI. */
get_thr_limit(void)498 static u16 get_thr_limit(void)
499 {
500 u32 thr_limit = mce_get_apei_thr_limit();
501
502 /* Fallback to old default if APEI limit is not available. */
503 if (!thr_limit)
504 return THRESHOLD_MAX;
505
506 return min(thr_limit, THRESHOLD_MAX);
507 }
508
mce_threshold_block_init(struct threshold_block * b,int offset)509 static void mce_threshold_block_init(struct threshold_block *b, int offset)
510 {
511 struct thresh_restart tr = {
512 .b = b,
513 .set_lvt_off = 1,
514 .lvt_off = offset,
515 };
516
517 b->threshold_limit = get_thr_limit();
518 threshold_restart_block(&tr);
519 };
520
setup_APIC_mce_threshold(int reserved,int new)521 static int setup_APIC_mce_threshold(int reserved, int new)
522 {
523 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
524 APIC_EILVT_MSG_FIX, 0))
525 return new;
526
527 return reserved;
528 }
529
get_block_address(u32 current_addr,u32 low,u32 high,unsigned int bank,unsigned int block,unsigned int cpu)530 static u32 get_block_address(u32 current_addr, u32 low, u32 high,
531 unsigned int bank, unsigned int block,
532 unsigned int cpu)
533 {
534 u32 addr = 0, offset = 0;
535
536 if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
537 return addr;
538
539 if (mce_flags.smca) {
540 if (!block)
541 return MSR_AMD64_SMCA_MCx_MISC(bank);
542
543 if (!(low & MASK_BLKPTR_LO))
544 return 0;
545
546 return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
547 }
548
549 /* Fall back to method we used for older processors: */
550 switch (block) {
551 case 0:
552 addr = mca_msr_reg(bank, MCA_MISC);
553 break;
554 case 1:
555 offset = ((low & MASK_BLKPTR_LO) >> 21);
556 if (offset)
557 addr = MCG_XBLK_ADDR + offset;
558 break;
559 default:
560 addr = ++current_addr;
561 }
562 return addr;
563 }
564
prepare_threshold_block(unsigned int bank,unsigned int block,u32 addr,int offset,u32 misc_high)565 static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
566 int offset, u32 misc_high)
567 {
568 unsigned int cpu = smp_processor_id();
569 struct threshold_block b;
570 int new;
571
572 if (!block)
573 per_cpu(bank_map, cpu) |= BIT_ULL(bank);
574
575 memset(&b, 0, sizeof(b));
576 b.cpu = cpu;
577 b.bank = bank;
578 b.block = block;
579 b.address = addr;
580 b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
581
582 if (!b.interrupt_capable)
583 goto done;
584
585 __set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
586 b.interrupt_enable = 1;
587
588 if (mce_flags.smca)
589 goto done;
590
591 new = (misc_high & MASK_LVTOFF_HI) >> 20;
592 offset = setup_APIC_mce_threshold(offset, new);
593 if (offset == new)
594 thresholding_irq_en = true;
595
596 done:
597 mce_threshold_block_init(&b, offset);
598
599 return offset;
600 }
601
amd_filter_mce(struct mce * m)602 bool amd_filter_mce(struct mce *m)
603 {
604 enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
605 struct cpuinfo_x86 *c = &boot_cpu_data;
606
607 /* See Family 17h Models 10h-2Fh Erratum #1114. */
608 if (c->x86 == 0x17 &&
609 c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
610 bank_type == SMCA_IF && XEC(m->status, 0x3f) == 10)
611 return true;
612
613 /* NB GART TLB error reporting is disabled by default. */
614 if (c->x86 < 0x17) {
615 if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5)
616 return true;
617 }
618
619 return false;
620 }
621
622 /*
623 * Turn off thresholding banks for the following conditions:
624 * - MC4_MISC thresholding is not supported on Family 0x15.
625 * - Prevent possible spurious interrupts from the IF bank on Family 0x17
626 * Models 0x10-0x2F due to Erratum #1114.
627 */
disable_err_thresholding(struct cpuinfo_x86 * c,unsigned int bank)628 static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
629 {
630 int i, num_msrs;
631 u64 hwcr;
632 bool need_toggle;
633 u32 msrs[NR_BLOCKS];
634
635 if (c->x86 == 0x15 && bank == 4) {
636 msrs[0] = 0x00000413; /* MC4_MISC0 */
637 msrs[1] = 0xc0000408; /* MC4_MISC1 */
638 num_msrs = 2;
639 } else if (c->x86 == 0x17 &&
640 (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
641
642 if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF)
643 return;
644
645 msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
646 num_msrs = 1;
647 } else {
648 return;
649 }
650
651 rdmsrq(MSR_K7_HWCR, hwcr);
652
653 /* McStatusWrEn has to be set */
654 need_toggle = !(hwcr & BIT(18));
655 if (need_toggle)
656 wrmsrq(MSR_K7_HWCR, hwcr | BIT(18));
657
658 /* Clear CntP bit safely */
659 for (i = 0; i < num_msrs; i++)
660 msr_clear_bit(msrs[i], 62);
661
662 /* restore old settings */
663 if (need_toggle)
664 wrmsrq(MSR_K7_HWCR, hwcr);
665 }
666
amd_apply_cpu_quirks(struct cpuinfo_x86 * c)667 static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
668 {
669 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
670
671 /* This should be disabled by the BIOS, but isn't always */
672 if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
673 /*
674 * disable GART TBL walk error reporting, which
675 * trips off incorrectly with the IOMMU & 3ware
676 * & Cerberus:
677 */
678 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
679 }
680
681 /*
682 * Various K7s with broken bank 0 around. Always disable
683 * by default.
684 */
685 if (c->x86 == 6 && this_cpu_read(mce_num_banks))
686 mce_banks[0].ctl = 0;
687 }
688
689 /*
690 * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is
691 * ready to send interrupts.
692 *
693 * Individual error sources are enabled later during per-bank init.
694 */
smca_enable_interrupt_vectors(void)695 static void smca_enable_interrupt_vectors(void)
696 {
697 struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
698 u64 mca_intr_cfg, offset;
699
700 if (!mce_flags.smca || !mce_flags.succor)
701 return;
702
703 if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg))
704 return;
705
706 offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12;
707 if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0))
708 data->thr_intr_en = 1;
709
710 offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4;
711 if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0))
712 data->dfr_intr_en = 1;
713 }
714
715 /* cpu init entry point, called from mce.c with preempt off */
mce_amd_feature_init(struct cpuinfo_x86 * c)716 void mce_amd_feature_init(struct cpuinfo_x86 *c)
717 {
718 unsigned int bank, block, cpu = smp_processor_id();
719 u32 low = 0, high = 0, address = 0;
720 int offset = -1;
721
722 amd_apply_cpu_quirks(c);
723
724 mce_flags.amd_threshold = 1;
725
726 smca_enable_interrupt_vectors();
727
728 for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
729 if (mce_flags.smca) {
730 smca_configure(bank, cpu);
731
732 if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en)
733 continue;
734 }
735
736 disable_err_thresholding(c, bank);
737
738 for (block = 0; block < NR_BLOCKS; ++block) {
739 address = get_block_address(address, low, high, bank, block, cpu);
740 if (!address)
741 break;
742
743 if (rdmsr_safe(address, &low, &high))
744 break;
745
746 if (!(high & MASK_VALID_HI))
747 continue;
748
749 if (!(high & MASK_CNTP_HI) ||
750 (high & MASK_LOCKED_HI))
751 continue;
752
753 offset = prepare_threshold_block(bank, block, address, offset, high);
754 }
755 }
756 }
757
smca_bsp_init(void)758 void smca_bsp_init(void)
759 {
760 mce_threshold_vector = amd_threshold_interrupt;
761 deferred_error_int_vector = amd_deferred_error_interrupt;
762 }
763
764 /*
765 * DRAM ECC errors are reported in the Northbridge (bank 4) with
766 * Extended Error Code 8.
767 */
legacy_mce_is_memory_error(struct mce * m)768 static bool legacy_mce_is_memory_error(struct mce *m)
769 {
770 return m->bank == 4 && XEC(m->status, 0x1f) == 8;
771 }
772
773 /*
774 * DRAM ECC errors are reported in Unified Memory Controllers with
775 * Extended Error Code 0.
776 */
smca_mce_is_memory_error(struct mce * m)777 static bool smca_mce_is_memory_error(struct mce *m)
778 {
779 enum smca_bank_types bank_type;
780
781 if (XEC(m->status, 0x3f))
782 return false;
783
784 bank_type = smca_get_bank_type(m->extcpu, m->bank);
785
786 return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
787 }
788
amd_mce_is_memory_error(struct mce * m)789 bool amd_mce_is_memory_error(struct mce *m)
790 {
791 if (mce_flags.smca)
792 return smca_mce_is_memory_error(m);
793 else
794 return legacy_mce_is_memory_error(m);
795 }
796
797 /*
798 * Some AMD systems have an explicit indicator that the value in MCA_ADDR is a
799 * system physical address. Individual cases though, need to be detected for
800 * other systems. Future cases will be added as needed.
801 *
802 * 1) General case
803 * a) Assume address is not usable.
804 * 2) Poison errors
805 * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy
806 * northbridge (bank 4).
807 * b) Refers to poison consumption in the core. Does not include "no action",
808 * "action optional", or "deferred" error severities.
809 * c) Will include a usable address so that immediate action can be taken.
810 * 3) Northbridge DRAM ECC errors
811 * a) Reported in legacy bank 4 with extended error code (XEC) 8.
812 * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore,
813 * this bit should not be checked.
814 * 4) MCI_STATUS_PADDRVAL is set
815 * a) Will provide a valid system physical address.
816 *
817 * NOTE: SMCA UMC memory errors fall into case #1.
818 */
amd_mce_usable_address(struct mce * m)819 bool amd_mce_usable_address(struct mce *m)
820 {
821 /* Check special northbridge case 3) first. */
822 if (!mce_flags.smca) {
823 if (legacy_mce_is_memory_error(m))
824 return true;
825 else if (m->bank == 4)
826 return false;
827 }
828
829 if (this_cpu_ptr(smca_banks)[m->bank].paddrv)
830 return m->status & MCI_STATUS_PADDRV;
831
832 /* Check poison bit for all other bank types. */
833 if (m->status & MCI_STATUS_POISON)
834 return true;
835
836 /* Assume address is not usable for all others. */
837 return false;
838 }
839
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)840 DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
841 {
842 trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
843 inc_irq_stat(irq_deferred_error_count);
844 deferred_error_int_vector();
845 trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
846 apic_eoi();
847 }
848
849 /* APIC interrupt handler for deferred errors */
amd_deferred_error_interrupt(void)850 static void amd_deferred_error_interrupt(void)
851 {
852 machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
853 }
854
mce_amd_handle_storm(unsigned int bank,bool on)855 void mce_amd_handle_storm(unsigned int bank, bool on)
856 {
857 threshold_restart_bank(bank, on);
858 }
859
amd_reset_thr_limit(unsigned int bank)860 static void amd_reset_thr_limit(unsigned int bank)
861 {
862 threshold_restart_bank(bank, true);
863 }
864
865 /*
866 * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
867 * goes off when error_count reaches threshold_limit.
868 */
amd_threshold_interrupt(void)869 static void amd_threshold_interrupt(void)
870 {
871 machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
872 }
873
amd_clear_bank(struct mce * m)874 void amd_clear_bank(struct mce *m)
875 {
876 amd_reset_thr_limit(m->bank);
877
878 /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
879 if (m->status & MCI_STATUS_DEFERRED)
880 mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
881
882 /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
883 if (m->kflags & MCE_CHECK_DFR_REGS)
884 return;
885
886 mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
887 }
888
889 /*
890 * Sysfs Interface
891 */
892
893 struct threshold_attr {
894 struct attribute attr;
895 ssize_t (*show) (struct threshold_block *, char *);
896 ssize_t (*store) (struct threshold_block *, const char *, size_t count);
897 };
898
899 #define SHOW_FIELDS(name) \
900 static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
901 { \
902 return sprintf(buf, "%lu\n", (unsigned long) b->name); \
903 }
904 SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)905 SHOW_FIELDS(threshold_limit)
906
907 static ssize_t
908 store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
909 {
910 struct thresh_restart tr;
911 unsigned long new;
912
913 if (!b->interrupt_capable)
914 return -EINVAL;
915
916 if (kstrtoul(buf, 0, &new) < 0)
917 return -EINVAL;
918
919 b->interrupt_enable = !!new;
920
921 memset(&tr, 0, sizeof(tr));
922 tr.b = b;
923
924 if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
925 return -ENODEV;
926
927 return size;
928 }
929
930 static ssize_t
store_threshold_limit(struct threshold_block * b,const char * buf,size_t size)931 store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
932 {
933 struct thresh_restart tr;
934 unsigned long new;
935
936 if (kstrtoul(buf, 0, &new) < 0)
937 return -EINVAL;
938
939 if (new > THRESHOLD_MAX)
940 new = THRESHOLD_MAX;
941 if (new < 1)
942 new = 1;
943
944 memset(&tr, 0, sizeof(tr));
945 tr.old_limit = b->threshold_limit;
946 b->threshold_limit = new;
947 tr.b = b;
948
949 if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
950 return -ENODEV;
951
952 return size;
953 }
954
show_error_count(struct threshold_block * b,char * buf)955 static ssize_t show_error_count(struct threshold_block *b, char *buf)
956 {
957 u32 lo, hi;
958
959 /* CPU might be offline by now */
960 if (rdmsr_on_cpu(b->cpu, b->address, &lo, &hi))
961 return -ENODEV;
962
963 return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
964 (THRESHOLD_MAX - b->threshold_limit)));
965 }
966
967 static struct threshold_attr error_count = {
968 .attr = {.name = __stringify(error_count), .mode = 0444 },
969 .show = show_error_count,
970 };
971
972 #define RW_ATTR(val) \
973 static struct threshold_attr val = { \
974 .attr = {.name = __stringify(val), .mode = 0644 }, \
975 .show = show_## val, \
976 .store = store_## val, \
977 };
978
979 RW_ATTR(interrupt_enable);
980 RW_ATTR(threshold_limit);
981
982 static struct attribute *default_attrs[] = {
983 &threshold_limit.attr,
984 &error_count.attr,
985 NULL, /* possibly interrupt_enable if supported, see below */
986 NULL,
987 };
988 ATTRIBUTE_GROUPS(default);
989
990 #define to_block(k) container_of(k, struct threshold_block, kobj)
991 #define to_attr(a) container_of(a, struct threshold_attr, attr)
992
show(struct kobject * kobj,struct attribute * attr,char * buf)993 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
994 {
995 struct threshold_block *b = to_block(kobj);
996 struct threshold_attr *a = to_attr(attr);
997 ssize_t ret;
998
999 ret = a->show ? a->show(b, buf) : -EIO;
1000
1001 return ret;
1002 }
1003
store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1004 static ssize_t store(struct kobject *kobj, struct attribute *attr,
1005 const char *buf, size_t count)
1006 {
1007 struct threshold_block *b = to_block(kobj);
1008 struct threshold_attr *a = to_attr(attr);
1009 ssize_t ret;
1010
1011 ret = a->store ? a->store(b, buf, count) : -EIO;
1012
1013 return ret;
1014 }
1015
1016 static const struct sysfs_ops threshold_ops = {
1017 .show = show,
1018 .store = store,
1019 };
1020
1021 static void threshold_block_release(struct kobject *kobj);
1022
1023 static const struct kobj_type threshold_ktype = {
1024 .sysfs_ops = &threshold_ops,
1025 .default_groups = default_groups,
1026 .release = threshold_block_release,
1027 };
1028
get_name(unsigned int cpu,unsigned int bank,struct threshold_block * b)1029 static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b)
1030 {
1031 enum smca_bank_types bank_type;
1032
1033 if (!mce_flags.smca) {
1034 if (b && bank == 4)
1035 return bank4_names(b);
1036
1037 return th_names[bank];
1038 }
1039
1040 bank_type = smca_get_bank_type(cpu, bank);
1041
1042 if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
1043 if (b->block < ARRAY_SIZE(smca_umc_block_names))
1044 return smca_umc_block_names[b->block];
1045 }
1046
1047 if (b && b->block) {
1048 snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
1049 return buf_mcatype;
1050 }
1051
1052 if (bank_type >= N_SMCA_BANK_TYPES) {
1053 snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
1054 return buf_mcatype;
1055 }
1056
1057 if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
1058 return smca_get_name(bank_type);
1059
1060 snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
1061 "%s_%u", smca_get_name(bank_type),
1062 per_cpu(smca_banks, cpu)[bank].sysfs_id);
1063 return buf_mcatype;
1064 }
1065
allocate_threshold_blocks(unsigned int cpu,struct threshold_bank * tb,unsigned int bank,unsigned int block,u32 address)1066 static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
1067 unsigned int bank, unsigned int block,
1068 u32 address)
1069 {
1070 struct threshold_block *b = NULL;
1071 u32 low, high;
1072 int err;
1073
1074 if ((bank >= this_cpu_read(mce_num_banks)) || (block >= NR_BLOCKS))
1075 return 0;
1076
1077 if (rdmsr_safe(address, &low, &high))
1078 return 0;
1079
1080 if (!(high & MASK_VALID_HI)) {
1081 if (block)
1082 goto recurse;
1083 else
1084 return 0;
1085 }
1086
1087 if (!(high & MASK_CNTP_HI) ||
1088 (high & MASK_LOCKED_HI))
1089 goto recurse;
1090
1091 b = kzalloc_obj(struct threshold_block);
1092 if (!b)
1093 return -ENOMEM;
1094
1095 b->block = block;
1096 b->bank = bank;
1097 b->cpu = cpu;
1098 b->address = address;
1099 b->interrupt_enable = 0;
1100 b->interrupt_capable = lvt_interrupt_supported(bank, high);
1101 b->threshold_limit = get_thr_limit();
1102
1103 if (b->interrupt_capable) {
1104 default_attrs[2] = &interrupt_enable.attr;
1105 b->interrupt_enable = 1;
1106 } else {
1107 default_attrs[2] = NULL;
1108 }
1109
1110 list_add(&b->miscj, &tb->miscj);
1111
1112 mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20);
1113
1114 err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
1115 if (err)
1116 goto out_free;
1117 recurse:
1118 address = get_block_address(address, low, high, bank, ++block, cpu);
1119 if (!address)
1120 return 0;
1121
1122 err = allocate_threshold_blocks(cpu, tb, bank, block, address);
1123 if (err)
1124 goto out_free;
1125
1126 if (b)
1127 kobject_uevent(&b->kobj, KOBJ_ADD);
1128
1129 return 0;
1130
1131 out_free:
1132 if (b) {
1133 list_del(&b->miscj);
1134 kobject_put(&b->kobj);
1135 }
1136 return err;
1137 }
1138
threshold_create_bank(struct threshold_bank ** bp,unsigned int cpu,unsigned int bank)1139 static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
1140 unsigned int bank)
1141 {
1142 struct device *dev = this_cpu_read(mce_device);
1143 struct threshold_bank *b = NULL;
1144 const char *name = get_name(cpu, bank, NULL);
1145 int err = 0;
1146
1147 if (!dev)
1148 return -ENODEV;
1149
1150 b = kzalloc_obj(struct threshold_bank);
1151 if (!b) {
1152 err = -ENOMEM;
1153 goto out;
1154 }
1155
1156 /* Associate the bank with the per-CPU MCE device */
1157 b->kobj = kobject_create_and_add(name, &dev->kobj);
1158 if (!b->kobj) {
1159 err = -EINVAL;
1160 goto out_free;
1161 }
1162
1163 INIT_LIST_HEAD(&b->miscj);
1164
1165 err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
1166 if (err)
1167 goto out_kobj;
1168
1169 bp[bank] = b;
1170 return 0;
1171
1172 out_kobj:
1173 kobject_put(b->kobj);
1174 out_free:
1175 kfree(b);
1176 out:
1177 return err;
1178 }
1179
threshold_block_release(struct kobject * kobj)1180 static void threshold_block_release(struct kobject *kobj)
1181 {
1182 kfree(to_block(kobj));
1183 }
1184
threshold_remove_bank(struct threshold_bank * bank)1185 static void threshold_remove_bank(struct threshold_bank *bank)
1186 {
1187 struct threshold_block *pos, *tmp;
1188
1189 list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) {
1190 list_del(&pos->miscj);
1191 kobject_put(&pos->kobj);
1192 }
1193
1194 kobject_put(bank->kobj);
1195 kfree(bank);
1196 }
1197
__threshold_remove_device(struct threshold_bank ** bp)1198 static void __threshold_remove_device(struct threshold_bank **bp)
1199 {
1200 unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
1201
1202 for (bank = 0; bank < numbanks; bank++) {
1203 if (!bp[bank])
1204 continue;
1205
1206 threshold_remove_bank(bp[bank]);
1207 bp[bank] = NULL;
1208 }
1209 kfree(bp);
1210 }
1211
mce_threshold_remove_device(unsigned int cpu)1212 void mce_threshold_remove_device(unsigned int cpu)
1213 {
1214 struct threshold_bank **bp = this_cpu_read(threshold_banks);
1215
1216 if (!bp)
1217 return;
1218
1219 /*
1220 * Clear the pointer before cleaning up, so that the interrupt won't
1221 * touch anything of this.
1222 */
1223 this_cpu_write(threshold_banks, NULL);
1224
1225 __threshold_remove_device(bp);
1226 return;
1227 }
1228
1229 /**
1230 * mce_threshold_create_device - Create the per-CPU MCE threshold device
1231 * @cpu: The plugged in CPU
1232 *
1233 * Create directories and files for all valid threshold banks.
1234 *
1235 * This is invoked from the CPU hotplug callback which was installed in
1236 * mcheck_init_device(). The invocation happens in context of the hotplug
1237 * thread running on @cpu. The callback is invoked on all CPUs which are
1238 * online when the callback is installed or during a real hotplug event.
1239 */
mce_threshold_create_device(unsigned int cpu)1240 void mce_threshold_create_device(unsigned int cpu)
1241 {
1242 unsigned int numbanks, bank;
1243 struct threshold_bank **bp;
1244
1245 if (!mce_flags.amd_threshold)
1246 return;
1247
1248 bp = this_cpu_read(threshold_banks);
1249 if (bp)
1250 return;
1251
1252 numbanks = this_cpu_read(mce_num_banks);
1253 bp = kzalloc_objs(*bp, numbanks);
1254 if (!bp)
1255 return;
1256
1257 for (bank = 0; bank < numbanks; ++bank) {
1258 if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
1259 continue;
1260 if (threshold_create_bank(bp, cpu, bank)) {
1261 __threshold_remove_device(bp);
1262 return;
1263 }
1264 }
1265 this_cpu_write(threshold_banks, bp);
1266
1267 if (thresholding_irq_en)
1268 mce_threshold_vector = amd_threshold_interrupt;
1269 return;
1270 }
1271