1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Driver for Intel client SoC with integrated memory controller using IBECC
4 *
5 * Copyright (C) 2020 Intel Corporation
6 *
7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific
8 * regions of the physical memory space. It's used for memory controllers
9 * that don't support the out-of-band ECC which often needs an additional
10 * storage device to each channel for storing ECC data.
11 */
12
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/pci.h>
16 #include <linux/slab.h>
17 #include <linux/irq_work.h>
18 #include <linux/llist.h>
19 #include <linux/genalloc.h>
20 #include <linux/edac.h>
21 #include <linux/bits.h>
22 #include <linux/io.h>
23 #include <asm/mach_traps.h>
24 #include <asm/nmi.h>
25 #include <asm/mce.h>
26
27 #include "edac_mc.h"
28 #include "edac_module.h"
29
30 #define IGEN6_REVISION "v2.5.1"
31
32 #define EDAC_MOD_STR "igen6_edac"
33 #define IGEN6_NMI_NAME "igen6_ibecc"
34
35 /* Debug macros */
36 #define igen6_printk(level, fmt, arg...) \
37 edac_printk(level, "igen6", fmt, ##arg)
38
39 #define igen6_mc_printk(mci, level, fmt, arg...) \
40 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
41
42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
43
44 #define NUM_IMC 2 /* Max memory controllers */
45 #define NUM_CHANNELS 2 /* Max channels */
46 #define NUM_DIMMS 2 /* Max DIMMs per channel */
47
48 #define _4GB BIT_ULL(32)
49
50 /* Size of physical memory */
51 #define TOM_OFFSET 0xa0
52 /* Top of low usable DRAM */
53 #define TOLUD_OFFSET 0xbc
54 /* Capability register C */
55 #define CAPID_C_OFFSET 0xec
56 #define CAPID_C_IBECC BIT(15)
57
58 /* Capability register E */
59 #define CAPID_E_OFFSET 0xf0
60 #define CAPID_E_IBECC BIT(12)
61 #define CAPID_E_IBECC_BIT18 BIT(18)
62
63 /* Error Status */
64 #define ERRSTS_OFFSET 0xc8
65 #define ERRSTS_CE BIT_ULL(6)
66 #define ERRSTS_UE BIT_ULL(7)
67
68 /* Error Command */
69 #define ERRCMD_OFFSET 0xca
70 #define ERRCMD_CE BIT_ULL(6)
71 #define ERRCMD_UE BIT_ULL(7)
72
73 /* IBECC MMIO base address */
74 #define IBECC_BASE (res_cfg->ibecc_base)
75 #define IBECC_ACTIVATE_OFFSET IBECC_BASE
76 #define IBECC_ACTIVATE_EN BIT(0)
77
78 /* IBECC error log */
79 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
80 #define ECC_ERROR_LOG_CE BIT_ULL(62)
81 #define ECC_ERROR_LOG_UE BIT_ULL(63)
82 #define ECC_ERROR_LOG_ADDR_SHIFT 5
83 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38)
84 #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45)
85 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
86
87 /* Host MMIO base address */
88 #define MCHBAR_OFFSET 0x48
89 #define MCHBAR_EN BIT_ULL(0)
90 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16)
91 #define MCHBAR_SIZE 0x10000
92
93 /* Parameters for the channel decode stage */
94 #define IMC_BASE (res_cfg->imc_base)
95 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE
96 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
97 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
98 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
100
101 /* Parameters for DRAM decode stage */
102 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
103 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
104
105 /* DIMM characteristics */
106 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
107 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
108 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
109 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
110 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
111
112 /* Hash for memory controller selection */
113 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
114 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
115
116 /* Hash for channel selection */
117 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
118 /* Hash for enhanced channel selection */
119 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
120 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
121 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
122 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
123
124 /* Parameters for memory slice decode stage */
125 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
127
128 static struct res_config {
129 bool machine_check;
130 /* The number of present memory controllers. */
131 int num_imc;
132 u32 imc_base;
133 u32 cmf_base;
134 u32 cmf_size;
135 u32 ms_hash_offset;
136 u32 ibecc_base;
137 u32 ibecc_error_log_offset;
138 bool (*ibecc_available)(struct pci_dev *pdev);
139 /* Extract error address logged in IBECC */
140 u64 (*err_addr)(u64 ecclog);
141 /* Convert error address logged in IBECC to system physical address */
142 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
143 /* Convert error address logged in IBECC to integrated memory controller address */
144 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
145 } *res_cfg;
146
147 struct igen6_imc {
148 int mc;
149 struct mem_ctl_info *mci;
150 struct pci_dev *pdev;
151 struct device dev;
152 void __iomem *window;
153 u64 size;
154 u64 ch_s_size;
155 int ch_l_map;
156 u64 dimm_s_size[NUM_CHANNELS];
157 u64 dimm_l_size[NUM_CHANNELS];
158 int dimm_l_map[NUM_CHANNELS];
159 };
160
161 static struct igen6_pvt {
162 struct igen6_imc imc[NUM_IMC];
163 u64 ms_hash;
164 u64 ms_s_size;
165 int ms_l_map;
166 } *igen6_pvt;
167
168 /* The top of low usable DRAM */
169 static u32 igen6_tolud;
170 /* The size of physical memory */
171 static u64 igen6_tom;
172
173 struct decoded_addr {
174 int mc;
175 u64 imc_addr;
176 u64 sys_addr;
177 int channel_idx;
178 u64 channel_addr;
179 int sub_channel_idx;
180 u64 sub_channel_addr;
181 };
182
183 struct ecclog_node {
184 struct llist_node llnode;
185 int mc;
186 u64 ecclog;
187 };
188
189 /*
190 * In the NMI handler, the driver uses the lock-less memory allocator
191 * to allocate memory to store the IBECC error logs and links the logs
192 * to the lock-less list. Delay printk() and the work of error reporting
193 * to EDAC core in a worker.
194 */
195 #define ECCLOG_POOL_SIZE PAGE_SIZE
196 static LLIST_HEAD(ecclog_llist);
197 static struct gen_pool *ecclog_pool;
198 static char ecclog_buf[ECCLOG_POOL_SIZE];
199 static struct irq_work ecclog_irq_work;
200 static struct work_struct ecclog_work;
201
202 /* Compute die IDs for Elkhart Lake with IBECC */
203 #define DID_EHL_SKU5 0x4514
204 #define DID_EHL_SKU6 0x4528
205 #define DID_EHL_SKU7 0x452a
206 #define DID_EHL_SKU8 0x4516
207 #define DID_EHL_SKU9 0x452c
208 #define DID_EHL_SKU10 0x452e
209 #define DID_EHL_SKU11 0x4532
210 #define DID_EHL_SKU12 0x4518
211 #define DID_EHL_SKU13 0x451a
212 #define DID_EHL_SKU14 0x4534
213 #define DID_EHL_SKU15 0x4536
214
215 /* Compute die IDs for ICL-NNPI with IBECC */
216 #define DID_ICL_SKU8 0x4581
217 #define DID_ICL_SKU10 0x4585
218 #define DID_ICL_SKU11 0x4589
219 #define DID_ICL_SKU12 0x458d
220
221 /* Compute die IDs for Tiger Lake with IBECC */
222 #define DID_TGL_SKU 0x9a14
223
224 /* Compute die IDs for Alder Lake with IBECC */
225 #define DID_ADL_SKU1 0x4601
226 #define DID_ADL_SKU2 0x4602
227 #define DID_ADL_SKU3 0x4621
228 #define DID_ADL_SKU4 0x4641
229
230 /* Compute die IDs for Alder Lake-N with IBECC */
231 #define DID_ADL_N_SKU1 0x4614
232 #define DID_ADL_N_SKU2 0x4617
233 #define DID_ADL_N_SKU3 0x461b
234 #define DID_ADL_N_SKU4 0x461c
235 #define DID_ADL_N_SKU5 0x4673
236 #define DID_ADL_N_SKU6 0x4674
237 #define DID_ADL_N_SKU7 0x4675
238 #define DID_ADL_N_SKU8 0x4677
239 #define DID_ADL_N_SKU9 0x4678
240 #define DID_ADL_N_SKU10 0x4679
241 #define DID_ADL_N_SKU11 0x467c
242 #define DID_ADL_N_SKU12 0x4632
243
244 /* Compute die IDs for Arizona Beach with IBECC */
245 #define DID_AZB_SKU1 0x4676
246
247 /* Compute did IDs for Amston Lake with IBECC */
248 #define DID_ASL_SKU1 0x464a
249
250 /* Compute die IDs for Raptor Lake-P with IBECC */
251 #define DID_RPL_P_SKU1 0xa706
252 #define DID_RPL_P_SKU2 0xa707
253 #define DID_RPL_P_SKU3 0xa708
254 #define DID_RPL_P_SKU4 0xa716
255 #define DID_RPL_P_SKU5 0xa718
256
257 /* Compute die IDs for Meteor Lake-PS with IBECC */
258 #define DID_MTL_PS_SKU1 0x7d21
259 #define DID_MTL_PS_SKU2 0x7d22
260 #define DID_MTL_PS_SKU3 0x7d23
261 #define DID_MTL_PS_SKU4 0x7d24
262
263 /* Compute die IDs for Meteor Lake-P with IBECC */
264 #define DID_MTL_P_SKU1 0x7d01
265 #define DID_MTL_P_SKU2 0x7d02
266 #define DID_MTL_P_SKU3 0x7d14
267
268 /* Compute die IDs for Arrow Lake-UH with IBECC */
269 #define DID_ARL_UH_SKU1 0x7d06
270 #define DID_ARL_UH_SKU2 0x7d20
271 #define DID_ARL_UH_SKU3 0x7d30
272
273 /* Compute die IDs for Panther Lake-H with IBECC */
274 #define DID_PTL_H_SKU1 0xb000
275 #define DID_PTL_H_SKU2 0xb001
276 #define DID_PTL_H_SKU3 0xb002
277
278 /* Compute die IDs for Wildcat Lake with IBECC */
279 #define DID_WCL_SKU1 0xfd00
280
get_mchbar(struct pci_dev * pdev,u64 * mchbar)281 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
282 {
283 union {
284 u64 v;
285 struct {
286 u32 v_lo;
287 u32 v_hi;
288 };
289 } u;
290
291 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
292 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
293 return -ENODEV;
294 }
295
296 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
297 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
298 return -ENODEV;
299 }
300
301 if (!(u.v & MCHBAR_EN)) {
302 igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
303 return -ENODEV;
304 }
305
306 *mchbar = MCHBAR_BASE(u.v);
307
308 return 0;
309 }
310
ehl_ibecc_available(struct pci_dev * pdev)311 static bool ehl_ibecc_available(struct pci_dev *pdev)
312 {
313 u32 v;
314
315 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
316 return false;
317
318 return !!(CAPID_C_IBECC & v);
319 }
320
ehl_err_addr_to_sys_addr(u64 eaddr,int mc)321 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
322 {
323 return eaddr;
324 }
325
ehl_err_addr_to_imc_addr(u64 eaddr,int mc)326 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
327 {
328 if (eaddr < igen6_tolud)
329 return eaddr;
330
331 if (igen6_tom <= _4GB)
332 return eaddr + igen6_tolud - _4GB;
333
334 if (eaddr >= igen6_tom)
335 return eaddr + igen6_tolud - igen6_tom;
336
337 return eaddr;
338 }
339
icl_ibecc_available(struct pci_dev * pdev)340 static bool icl_ibecc_available(struct pci_dev *pdev)
341 {
342 u32 v;
343
344 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
345 return false;
346
347 return !(CAPID_C_IBECC & v) &&
348 (boot_cpu_data.x86_stepping >= 1);
349 }
350
tgl_ibecc_available(struct pci_dev * pdev)351 static bool tgl_ibecc_available(struct pci_dev *pdev)
352 {
353 u32 v;
354
355 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
356 return false;
357
358 return !(CAPID_E_IBECC & v);
359 }
360
mtl_p_ibecc_available(struct pci_dev * pdev)361 static bool mtl_p_ibecc_available(struct pci_dev *pdev)
362 {
363 u32 v;
364
365 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
366 return false;
367
368 return !(CAPID_E_IBECC_BIT18 & v);
369 }
370
mtl_ps_ibecc_available(struct pci_dev * pdev)371 static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
372 {
373 #define MCHBAR_MEMSS_IBECCDIS 0x13c00
374 void __iomem *window;
375 u64 mchbar;
376 u32 val;
377
378 if (get_mchbar(pdev, &mchbar))
379 return false;
380
381 window = ioremap(mchbar, MCHBAR_SIZE * 2);
382 if (!window) {
383 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
384 return false;
385 }
386
387 val = readl(window + MCHBAR_MEMSS_IBECCDIS);
388 iounmap(window);
389
390 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
391 return !GET_BITFIELD(val, 6, 6);
392 }
393
mem_addr_to_sys_addr(u64 maddr)394 static u64 mem_addr_to_sys_addr(u64 maddr)
395 {
396 if (maddr < igen6_tolud)
397 return maddr;
398
399 if (igen6_tom <= _4GB)
400 return maddr - igen6_tolud + _4GB;
401
402 if (maddr < _4GB)
403 return maddr - igen6_tolud + igen6_tom;
404
405 return maddr;
406 }
407
mem_slice_hash(u64 addr,u64 mask,u64 hash_init,int intlv_bit)408 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
409 {
410 u64 hash_addr = addr & mask, hash = hash_init;
411 u64 intlv = (addr >> intlv_bit) & 1;
412 int i;
413
414 for (i = 6; i < 20; i++)
415 hash ^= (hash_addr >> i) & 1;
416
417 return hash ^ intlv;
418 }
419
tgl_err_addr_to_mem_addr(u64 eaddr,int mc)420 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
421 {
422 u64 maddr, hash, mask, ms_s_size;
423 int intlv_bit;
424 u32 ms_hash;
425
426 ms_s_size = igen6_pvt->ms_s_size;
427 if (eaddr >= ms_s_size)
428 return eaddr + ms_s_size;
429
430 ms_hash = igen6_pvt->ms_hash;
431
432 mask = MEM_SLICE_HASH_MASK(ms_hash);
433 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
434
435 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
436 GET_BITFIELD(eaddr, 0, intlv_bit - 1);
437
438 hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
439
440 return maddr | (hash << intlv_bit);
441 }
442
tgl_err_addr_to_sys_addr(u64 eaddr,int mc)443 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
444 {
445 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
446
447 return mem_addr_to_sys_addr(maddr);
448 }
449
tgl_err_addr_to_imc_addr(u64 eaddr,int mc)450 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
451 {
452 return eaddr;
453 }
454
adl_err_addr_to_sys_addr(u64 eaddr,int mc)455 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
456 {
457 return mem_addr_to_sys_addr(eaddr);
458 }
459
adl_err_addr_to_imc_addr(u64 eaddr,int mc)460 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
461 {
462 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
463 struct igen6_imc *imc = &igen6_pvt->imc[mc];
464 int intlv_bit;
465 u32 mc_hash;
466
467 if (eaddr >= 2 * ms_s_size)
468 return eaddr - ms_s_size;
469
470 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
471
472 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
473
474 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
475 GET_BITFIELD(eaddr, 0, intlv_bit - 1);
476
477 return imc_addr;
478 }
479
rpl_p_err_addr(u64 ecclog)480 static u64 rpl_p_err_addr(u64 ecclog)
481 {
482 return ECC_ERROR_LOG_ADDR45(ecclog);
483 }
484
485 static struct res_config ehl_cfg = {
486 .num_imc = 1,
487 .imc_base = 0x5000,
488 .ibecc_base = 0xdc00,
489 .ibecc_available = ehl_ibecc_available,
490 .ibecc_error_log_offset = 0x170,
491 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
492 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
493 };
494
495 static struct res_config icl_cfg = {
496 .num_imc = 1,
497 .imc_base = 0x5000,
498 .ibecc_base = 0xd800,
499 .ibecc_error_log_offset = 0x170,
500 .ibecc_available = icl_ibecc_available,
501 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
502 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
503 };
504
505 static struct res_config tgl_cfg = {
506 .machine_check = true,
507 .num_imc = 2,
508 .imc_base = 0x5000,
509 .cmf_base = 0x11000,
510 .cmf_size = 0x800,
511 .ms_hash_offset = 0xac,
512 .ibecc_base = 0xd400,
513 .ibecc_error_log_offset = 0x170,
514 .ibecc_available = tgl_ibecc_available,
515 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
516 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
517 };
518
519 static struct res_config adl_cfg = {
520 .machine_check = true,
521 .num_imc = 2,
522 .imc_base = 0xd800,
523 .ibecc_base = 0xd400,
524 .ibecc_error_log_offset = 0x68,
525 .ibecc_available = tgl_ibecc_available,
526 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
527 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
528 };
529
530 static struct res_config adl_n_cfg = {
531 .machine_check = true,
532 .num_imc = 1,
533 .imc_base = 0xd800,
534 .ibecc_base = 0xd400,
535 .ibecc_error_log_offset = 0x68,
536 .ibecc_available = tgl_ibecc_available,
537 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
538 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
539 };
540
541 static struct res_config rpl_p_cfg = {
542 .machine_check = true,
543 .num_imc = 2,
544 .imc_base = 0xd800,
545 .ibecc_base = 0xd400,
546 .ibecc_error_log_offset = 0x68,
547 .ibecc_available = tgl_ibecc_available,
548 .err_addr = rpl_p_err_addr,
549 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
550 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
551 };
552
553 static struct res_config mtl_ps_cfg = {
554 .machine_check = true,
555 .num_imc = 2,
556 .imc_base = 0xd800,
557 .ibecc_base = 0xd400,
558 .ibecc_error_log_offset = 0x170,
559 .ibecc_available = mtl_ps_ibecc_available,
560 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
561 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
562 };
563
564 static struct res_config mtl_p_cfg = {
565 .machine_check = true,
566 .num_imc = 2,
567 .imc_base = 0xd800,
568 .ibecc_base = 0xd400,
569 .ibecc_error_log_offset = 0x170,
570 .ibecc_available = mtl_p_ibecc_available,
571 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
572 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
573 };
574
575 static struct res_config wcl_cfg = {
576 .machine_check = true,
577 .num_imc = 1,
578 .imc_base = 0xd800,
579 .ibecc_base = 0xd400,
580 .ibecc_error_log_offset = 0x170,
581 .ibecc_available = mtl_p_ibecc_available,
582 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
583 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
584 };
585
586 static struct pci_device_id igen6_pci_tbl[] = {
587 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
588 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
589 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
590 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
591 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
592 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
593 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
594 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
595 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
596 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
597 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
598 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
599 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
600 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
601 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
602 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
603 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
604 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
605 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
606 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
607 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
608 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
609 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
610 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
611 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
612 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
613 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
614 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
615 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
616 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
617 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
618 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
619 { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
620 { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
621 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
622 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
623 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
624 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
625 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
626 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
627 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
628 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
629 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
630 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
631 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
632 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
633 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
634 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
635 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
636 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
637 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
638 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
639 { PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg },
640 { },
641 };
642 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
643
get_width(int dimm_l,u32 mad_dimm)644 static enum dev_type get_width(int dimm_l, u32 mad_dimm)
645 {
646 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
647 MAD_DIMM_CH_DSW(mad_dimm);
648
649 switch (w) {
650 case 0:
651 return DEV_X8;
652 case 1:
653 return DEV_X16;
654 case 2:
655 return DEV_X32;
656 default:
657 return DEV_UNKNOWN;
658 }
659 }
660
get_memory_type(u32 mad_inter)661 static enum mem_type get_memory_type(u32 mad_inter)
662 {
663 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
664
665 switch (t) {
666 case 0:
667 return MEM_DDR4;
668 case 1:
669 return MEM_DDR3;
670 case 2:
671 return MEM_LPDDR3;
672 case 3:
673 return MEM_LPDDR4;
674 case 4:
675 return MEM_WIO2;
676 default:
677 return MEM_UNKNOWN;
678 }
679 }
680
decode_chan_idx(u64 addr,u64 mask,int intlv_bit)681 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
682 {
683 u64 hash_addr = addr & mask, hash = 0;
684 u64 intlv = (addr >> intlv_bit) & 1;
685 int i;
686
687 for (i = 6; i < 20; i++)
688 hash ^= (hash_addr >> i) & 1;
689
690 return (int)hash ^ intlv;
691 }
692
decode_channel_addr(u64 addr,int intlv_bit)693 static u64 decode_channel_addr(u64 addr, int intlv_bit)
694 {
695 u64 channel_addr;
696
697 /* Remove the interleave bit and shift upper part down to fill gap */
698 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
699 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
700
701 return channel_addr;
702 }
703
decode_addr(u64 addr,u32 hash,u64 s_size,int l_map,int * idx,u64 * sub_addr)704 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
705 int *idx, u64 *sub_addr)
706 {
707 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
708
709 if (addr > 2 * s_size) {
710 *sub_addr = addr - s_size;
711 *idx = l_map;
712 return;
713 }
714
715 if (CHANNEL_HASH_MODE(hash)) {
716 *sub_addr = decode_channel_addr(addr, intlv_bit);
717 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
718 } else {
719 *sub_addr = decode_channel_addr(addr, 6);
720 *idx = GET_BITFIELD(addr, 6, 6);
721 }
722 }
723
igen6_decode(struct decoded_addr * res)724 static int igen6_decode(struct decoded_addr *res)
725 {
726 struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
727 u64 addr = res->imc_addr, sub_addr, s_size;
728 int idx, l_map;
729 u32 hash;
730
731 if (addr >= igen6_tom) {
732 edac_dbg(0, "Address 0x%llx out of range\n", addr);
733 return -EINVAL;
734 }
735
736 /* Decode channel */
737 hash = readl(imc->window + CHANNEL_HASH_OFFSET);
738 s_size = imc->ch_s_size;
739 l_map = imc->ch_l_map;
740 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
741 res->channel_idx = idx;
742 res->channel_addr = sub_addr;
743
744 /* Decode sub-channel/DIMM */
745 hash = readl(imc->window + CHANNEL_EHASH_OFFSET);
746 s_size = imc->dimm_s_size[idx];
747 l_map = imc->dimm_l_map[idx];
748 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
749 res->sub_channel_idx = idx;
750 res->sub_channel_addr = sub_addr;
751
752 return 0;
753 }
754
igen6_output_error(struct decoded_addr * res,struct mem_ctl_info * mci,u64 ecclog)755 static void igen6_output_error(struct decoded_addr *res,
756 struct mem_ctl_info *mci, u64 ecclog)
757 {
758 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
759 HW_EVENT_ERR_UNCORRECTED :
760 HW_EVENT_ERR_CORRECTED;
761
762 edac_mc_handle_error(type, mci, 1,
763 res->sys_addr >> PAGE_SHIFT,
764 res->sys_addr & ~PAGE_MASK,
765 ECC_ERROR_LOG_SYND(ecclog),
766 res->channel_idx, res->sub_channel_idx,
767 -1, "", "");
768 }
769
ecclog_gen_pool_create(void)770 static struct gen_pool *ecclog_gen_pool_create(void)
771 {
772 struct gen_pool *pool;
773
774 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
775 if (!pool)
776 return NULL;
777
778 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
779 gen_pool_destroy(pool);
780 return NULL;
781 }
782
783 return pool;
784 }
785
ecclog_gen_pool_add(int mc,u64 ecclog)786 static int ecclog_gen_pool_add(int mc, u64 ecclog)
787 {
788 struct ecclog_node *node;
789
790 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
791 if (!node)
792 return -ENOMEM;
793
794 node->mc = mc;
795 node->ecclog = ecclog;
796 llist_add(&node->llnode, &ecclog_llist);
797
798 return 0;
799 }
800
801 /*
802 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
803 * configuration space status register ERRSTS can indicate whether a
804 * correctable error or an uncorrectable error occurred. We only use the
805 * ECC_ERROR_LOG register to check error type, but need to clear both
806 * registers to enable future error events.
807 */
ecclog_read_and_clear(struct igen6_imc * imc)808 static u64 ecclog_read_and_clear(struct igen6_imc *imc)
809 {
810 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
811
812 /*
813 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
814 * the invalid value ~0. This will result in a flood of invalid
815 * error reports in polling mode. Skip it.
816 */
817 if (ecclog == ~0)
818 return 0;
819
820 /* Neither a CE nor a UE. Skip it.*/
821 if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
822 return 0;
823
824 /* Clear CE/UE bits by writing 1s */
825 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
826
827 return ecclog;
828 }
829
errsts_clear(struct igen6_imc * imc)830 static void errsts_clear(struct igen6_imc *imc)
831 {
832 u16 errsts;
833
834 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
835 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
836 return;
837 }
838
839 /* Clear CE/UE bits by writing 1s */
840 if (errsts & (ERRSTS_CE | ERRSTS_UE))
841 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
842 }
843
errcmd_enable_error_reporting(bool enable)844 static int errcmd_enable_error_reporting(bool enable)
845 {
846 struct igen6_imc *imc = &igen6_pvt->imc[0];
847 u16 errcmd;
848 int rc;
849
850 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
851 if (rc)
852 return pcibios_err_to_errno(rc);
853
854 if (enable)
855 errcmd |= ERRCMD_CE | ERRSTS_UE;
856 else
857 errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
858
859 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
860 if (rc)
861 return pcibios_err_to_errno(rc);
862
863 return 0;
864 }
865
ecclog_handler(void)866 static int ecclog_handler(void)
867 {
868 struct igen6_imc *imc;
869 int i, n = 0;
870 u64 ecclog;
871
872 for (i = 0; i < res_cfg->num_imc; i++) {
873 imc = &igen6_pvt->imc[i];
874
875 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
876
877 ecclog = ecclog_read_and_clear(imc);
878 if (!ecclog)
879 continue;
880
881 if (!ecclog_gen_pool_add(i, ecclog))
882 irq_work_queue(&ecclog_irq_work);
883
884 n++;
885 }
886
887 return n;
888 }
889
ecclog_work_cb(struct work_struct * work)890 static void ecclog_work_cb(struct work_struct *work)
891 {
892 struct ecclog_node *node, *tmp;
893 struct mem_ctl_info *mci;
894 struct llist_node *head;
895 struct decoded_addr res;
896 u64 eaddr;
897
898 head = llist_del_all(&ecclog_llist);
899 if (!head)
900 return;
901
902 llist_for_each_entry_safe(node, tmp, head, llnode) {
903 memset(&res, 0, sizeof(res));
904 if (res_cfg->err_addr)
905 eaddr = res_cfg->err_addr(node->ecclog);
906 else
907 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
908 ECC_ERROR_LOG_ADDR_SHIFT;
909 res.mc = node->mc;
910 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
911 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
912
913 mci = igen6_pvt->imc[res.mc].mci;
914
915 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
916 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
917 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
918
919 if (!igen6_decode(&res))
920 igen6_output_error(&res, mci, node->ecclog);
921
922 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
923 }
924 }
925
ecclog_irq_work_cb(struct irq_work * irq_work)926 static void ecclog_irq_work_cb(struct irq_work *irq_work)
927 {
928 int i;
929
930 for (i = 0; i < res_cfg->num_imc; i++)
931 errsts_clear(&igen6_pvt->imc[i]);
932
933 if (!llist_empty(&ecclog_llist))
934 schedule_work(&ecclog_work);
935 }
936
ecclog_nmi_handler(unsigned int cmd,struct pt_regs * regs)937 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
938 {
939 unsigned char reason;
940
941 if (!ecclog_handler())
942 return NMI_DONE;
943
944 /*
945 * Both In-Band ECC correctable error and uncorrectable error are
946 * reported by SERR# NMI. The NMI generic code (see pci_serr_error())
947 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
948 * re-enable the SERR# NMI after NMI handling. So clear this bit here
949 * to re-enable SERR# NMI for receiving future In-Band ECC errors.
950 */
951 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
952 reason |= NMI_REASON_CLEAR_SERR;
953 outb(reason, NMI_REASON_PORT);
954 reason &= ~NMI_REASON_CLEAR_SERR;
955 outb(reason, NMI_REASON_PORT);
956
957 return NMI_HANDLED;
958 }
959
ecclog_mce_handler(struct notifier_block * nb,unsigned long val,void * data)960 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
961 void *data)
962 {
963 struct mce *mce = (struct mce *)data;
964 char *type;
965
966 if (mce->kflags & MCE_HANDLED_CEC)
967 return NOTIFY_DONE;
968
969 /*
970 * Ignore unless this is a memory related error.
971 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
972 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
973 */
974 if ((mce->status & 0xefff) >> 7 != 1)
975 return NOTIFY_DONE;
976
977 if (mce->mcgstatus & MCG_STATUS_MCIP)
978 type = "Exception";
979 else
980 type = "Event";
981
982 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
983 mce->extcpu, type, mce->mcgstatus,
984 mce->bank, mce->status);
985 edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
986 edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
987 edac_dbg(0, "MISC 0x%llx\n", mce->misc);
988 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
989 mce->cpuvendor, mce->cpuid, mce->time,
990 mce->socketid, mce->apicid);
991 /*
992 * We just use the Machine Check for the memory error notification.
993 * Each memory controller is associated with an IBECC instance.
994 * Directly read and clear the error information(error address and
995 * error type) on all the IBECC instances so that we know on which
996 * memory controller the memory error(s) occurred.
997 */
998 if (!ecclog_handler())
999 return NOTIFY_DONE;
1000
1001 mce->kflags |= MCE_HANDLED_EDAC;
1002
1003 return NOTIFY_DONE;
1004 }
1005
1006 static struct notifier_block ecclog_mce_dec = {
1007 .notifier_call = ecclog_mce_handler,
1008 .priority = MCE_PRIO_EDAC,
1009 };
1010
igen6_check_ecc(struct igen6_imc * imc)1011 static bool igen6_check_ecc(struct igen6_imc *imc)
1012 {
1013 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
1014
1015 return !!(activate & IBECC_ACTIVATE_EN);
1016 }
1017
igen6_get_dimm_config(struct mem_ctl_info * mci)1018 static int igen6_get_dimm_config(struct mem_ctl_info *mci)
1019 {
1020 struct igen6_imc *imc = mci->pvt_info;
1021 u32 mad_inter, mad_intra, mad_dimm;
1022 int i, j, ndimms, mc = imc->mc;
1023 struct dimm_info *dimm;
1024 enum mem_type mtype;
1025 enum dev_type dtype;
1026 u64 dsize;
1027 bool ecc;
1028
1029 edac_dbg(2, "\n");
1030
1031 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
1032 mtype = get_memory_type(mad_inter);
1033 ecc = igen6_check_ecc(imc);
1034 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
1035 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
1036
1037 for (i = 0; i < NUM_CHANNELS; i++) {
1038 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
1039 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
1040
1041 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
1042 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
1043 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
1044 imc->size += imc->dimm_s_size[i];
1045 imc->size += imc->dimm_l_size[i];
1046 ndimms = 0;
1047
1048 for (j = 0; j < NUM_DIMMS; j++) {
1049 dimm = edac_get_dimm(mci, i, j, 0);
1050
1051 if (j ^ imc->dimm_l_map[i]) {
1052 dtype = get_width(0, mad_dimm);
1053 dsize = imc->dimm_s_size[i];
1054 } else {
1055 dtype = get_width(1, mad_dimm);
1056 dsize = imc->dimm_l_size[i];
1057 }
1058
1059 if (!dsize)
1060 continue;
1061
1062 dimm->grain = 64;
1063 dimm->mtype = mtype;
1064 dimm->dtype = dtype;
1065 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20);
1066 dimm->edac_mode = EDAC_SECDED;
1067 snprintf(dimm->label, sizeof(dimm->label),
1068 "MC#%d_Chan#%d_DIMM#%d", mc, i, j);
1069 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
1070 mc, i, j, dsize >> 20, dimm->nr_pages);
1071
1072 ndimms++;
1073 }
1074
1075 if (ndimms && !ecc) {
1076 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
1077 return -ENODEV;
1078 }
1079 }
1080
1081 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
1082
1083 return 0;
1084 }
1085
1086 #ifdef CONFIG_EDAC_DEBUG
1087 /* Top of upper usable DRAM */
1088 static u64 igen6_touud;
1089 #define TOUUD_OFFSET 0xa8
1090
igen6_reg_dump(struct igen6_imc * imc)1091 static void igen6_reg_dump(struct igen6_imc *imc)
1092 {
1093 int i;
1094
1095 edac_dbg(2, "CHANNEL_HASH : 0x%x\n",
1096 readl(imc->window + CHANNEL_HASH_OFFSET));
1097 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n",
1098 readl(imc->window + CHANNEL_EHASH_OFFSET));
1099 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
1100 readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
1101 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n",
1102 readq(imc->window + ECC_ERROR_LOG_OFFSET));
1103
1104 for (i = 0; i < NUM_CHANNELS; i++) {
1105 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i,
1106 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
1107 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i,
1108 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
1109 }
1110 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud);
1111 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud);
1112 edac_dbg(2, "TOM : 0x%llx", igen6_tom);
1113 }
1114
1115 static struct dentry *igen6_test;
1116
debugfs_u64_set(void * data,u64 val)1117 static int debugfs_u64_set(void *data, u64 val)
1118 {
1119 u64 ecclog;
1120
1121 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
1122 edac_dbg(0, "Address 0x%llx out of range\n", val);
1123 return 0;
1124 }
1125
1126 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1127
1128 val >>= ECC_ERROR_LOG_ADDR_SHIFT;
1129 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE;
1130
1131 if (!ecclog_gen_pool_add(0, ecclog))
1132 irq_work_queue(&ecclog_irq_work);
1133
1134 return 0;
1135 }
1136 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1137
igen6_debug_setup(void)1138 static void igen6_debug_setup(void)
1139 {
1140 igen6_test = edac_debugfs_create_dir("igen6_test");
1141 if (!igen6_test)
1142 return;
1143
1144 if (!edac_debugfs_create_file("addr", 0200, igen6_test,
1145 NULL, &fops_u64_wo)) {
1146 debugfs_remove(igen6_test);
1147 igen6_test = NULL;
1148 }
1149 }
1150
igen6_debug_teardown(void)1151 static void igen6_debug_teardown(void)
1152 {
1153 debugfs_remove_recursive(igen6_test);
1154 }
1155 #else
igen6_reg_dump(struct igen6_imc * imc)1156 static void igen6_reg_dump(struct igen6_imc *imc) {}
igen6_debug_setup(void)1157 static void igen6_debug_setup(void) {}
igen6_debug_teardown(void)1158 static void igen6_debug_teardown(void) {}
1159 #endif
1160
igen6_pci_setup(struct pci_dev * pdev,u64 * mchbar)1161 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
1162 {
1163 union {
1164 u64 v;
1165 struct {
1166 u32 v_lo;
1167 u32 v_hi;
1168 };
1169 } u;
1170
1171 edac_dbg(2, "\n");
1172
1173 if (!res_cfg->ibecc_available(pdev)) {
1174 edac_dbg(2, "No In-Band ECC IP\n");
1175 goto fail;
1176 }
1177
1178 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
1179 igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
1180 goto fail;
1181 }
1182
1183 igen6_tolud &= GENMASK(31, 20);
1184
1185 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
1186 igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
1187 goto fail;
1188 }
1189
1190 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
1191 igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
1192 goto fail;
1193 }
1194
1195 igen6_tom = u.v & GENMASK_ULL(38, 20);
1196
1197 if (get_mchbar(pdev, mchbar))
1198 goto fail;
1199
1200 #ifdef CONFIG_EDAC_DEBUG
1201 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
1202 edac_dbg(2, "Failed to read lower TOUUD\n");
1203 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
1204 edac_dbg(2, "Failed to read upper TOUUD\n");
1205 else
1206 igen6_touud = u.v & GENMASK_ULL(38, 20);
1207 #endif
1208
1209 return 0;
1210 fail:
1211 return -ENODEV;
1212 }
1213
igen6_check(struct mem_ctl_info * mci)1214 static void igen6_check(struct mem_ctl_info *mci)
1215 {
1216 struct igen6_imc *imc = mci->pvt_info;
1217 u64 ecclog;
1218
1219 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
1220 ecclog = ecclog_read_and_clear(imc);
1221 if (!ecclog)
1222 return;
1223
1224 if (!ecclog_gen_pool_add(imc->mc, ecclog))
1225 irq_work_queue(&ecclog_irq_work);
1226 }
1227
1228 /* Check whether the memory controller is absent. */
igen6_imc_absent(void __iomem * window)1229 static bool igen6_imc_absent(void __iomem *window)
1230 {
1231 return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
1232 }
1233
igen6_register_mci(int mc,void __iomem * window,struct pci_dev * pdev)1234 static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
1235 {
1236 struct edac_mc_layer layers[2];
1237 struct mem_ctl_info *mci;
1238 struct igen6_imc *imc;
1239 int rc;
1240
1241 edac_dbg(2, "\n");
1242
1243 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1244 layers[0].size = NUM_CHANNELS;
1245 layers[0].is_virt_csrow = false;
1246 layers[1].type = EDAC_MC_LAYER_SLOT;
1247 layers[1].size = NUM_DIMMS;
1248 layers[1].is_virt_csrow = true;
1249
1250 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
1251 if (!mci) {
1252 rc = -ENOMEM;
1253 goto fail;
1254 }
1255
1256 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
1257 if (!mci->ctl_name) {
1258 rc = -ENOMEM;
1259 goto fail2;
1260 }
1261
1262 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
1263 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
1264 mci->edac_cap = EDAC_FLAG_SECDED;
1265 mci->mod_name = EDAC_MOD_STR;
1266 mci->dev_name = pci_name(pdev);
1267 if (edac_op_state == EDAC_OPSTATE_POLL)
1268 mci->edac_check = igen6_check;
1269 mci->pvt_info = &igen6_pvt->imc[mc];
1270
1271 imc = mci->pvt_info;
1272 device_initialize(&imc->dev);
1273 /*
1274 * EDAC core uses mci->pdev(pointer of structure device) as
1275 * memory controller ID. The client SoCs attach one or more
1276 * memory controllers to single pci_dev (single pci_dev->dev
1277 * can be for multiple memory controllers).
1278 *
1279 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
1280 * for the first memory controller and assign a unique imc->dev
1281 * to mci->pdev for each non-first memory controller.
1282 */
1283 mci->pdev = mc ? &imc->dev : &pdev->dev;
1284 imc->mc = mc;
1285 imc->pdev = pdev;
1286 imc->window = window;
1287
1288 igen6_reg_dump(imc);
1289
1290 rc = igen6_get_dimm_config(mci);
1291 if (rc)
1292 goto fail3;
1293
1294 rc = edac_mc_add_mc(mci);
1295 if (rc) {
1296 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
1297 goto fail3;
1298 }
1299
1300 imc->mci = mci;
1301 return 0;
1302 fail3:
1303 mci->pvt_info = NULL;
1304 kfree(mci->ctl_name);
1305 fail2:
1306 edac_mc_free(mci);
1307 fail:
1308 return rc;
1309 }
1310
igen6_unregister_mcis(void)1311 static void igen6_unregister_mcis(void)
1312 {
1313 struct mem_ctl_info *mci;
1314 struct igen6_imc *imc;
1315 int i;
1316
1317 edac_dbg(2, "\n");
1318
1319 for (i = 0; i < res_cfg->num_imc; i++) {
1320 imc = &igen6_pvt->imc[i];
1321 mci = imc->mci;
1322 if (!mci)
1323 continue;
1324
1325 edac_mc_del_mc(mci->pdev);
1326 kfree(mci->ctl_name);
1327 mci->pvt_info = NULL;
1328 edac_mc_free(mci);
1329 iounmap(imc->window);
1330 }
1331 }
1332
igen6_register_mcis(struct pci_dev * pdev,u64 mchbar)1333 static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
1334 {
1335 void __iomem *window;
1336 int lmc, pmc, rc;
1337 u64 base;
1338
1339 for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
1340 base = mchbar + pmc * MCHBAR_SIZE;
1341 window = ioremap(base, MCHBAR_SIZE);
1342 if (!window) {
1343 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
1344 rc = -ENOMEM;
1345 goto out_unregister_mcis;
1346 }
1347
1348 if (igen6_imc_absent(window)) {
1349 iounmap(window);
1350 edac_dbg(2, "Skip absent mc%d\n", pmc);
1351 continue;
1352 }
1353
1354 rc = igen6_register_mci(lmc, window, pdev);
1355 if (rc)
1356 goto out_iounmap;
1357
1358 /* Done, if all present MCs are detected and registered. */
1359 if (++lmc >= res_cfg->num_imc)
1360 break;
1361 }
1362
1363 if (!lmc) {
1364 igen6_printk(KERN_ERR, "No mc found.\n");
1365 return -ENODEV;
1366 }
1367
1368 if (lmc < res_cfg->num_imc) {
1369 igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.",
1370 res_cfg->num_imc, lmc);
1371 res_cfg->num_imc = lmc;
1372 }
1373
1374 return 0;
1375
1376 out_iounmap:
1377 iounmap(window);
1378
1379 out_unregister_mcis:
1380 igen6_unregister_mcis();
1381
1382 return rc;
1383 }
1384
igen6_mem_slice_setup(u64 mchbar)1385 static int igen6_mem_slice_setup(u64 mchbar)
1386 {
1387 struct igen6_imc *imc = &igen6_pvt->imc[0];
1388 u64 base = mchbar + res_cfg->cmf_base;
1389 u32 offset = res_cfg->ms_hash_offset;
1390 u32 size = res_cfg->cmf_size;
1391 u64 ms_s_size, ms_hash;
1392 void __iomem *cmf;
1393 int ms_l_map;
1394
1395 edac_dbg(2, "\n");
1396
1397 if (imc[0].size < imc[1].size) {
1398 ms_s_size = imc[0].size;
1399 ms_l_map = 1;
1400 } else {
1401 ms_s_size = imc[1].size;
1402 ms_l_map = 0;
1403 }
1404
1405 igen6_pvt->ms_s_size = ms_s_size;
1406 igen6_pvt->ms_l_map = ms_l_map;
1407
1408 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
1409 ms_s_size >> 20, ms_l_map);
1410
1411 if (!size)
1412 return 0;
1413
1414 cmf = ioremap(base, size);
1415 if (!cmf) {
1416 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
1417 return -ENODEV;
1418 }
1419
1420 ms_hash = readq(cmf + offset);
1421 igen6_pvt->ms_hash = ms_hash;
1422
1423 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
1424
1425 iounmap(cmf);
1426
1427 return 0;
1428 }
1429
register_err_handler(void)1430 static int register_err_handler(void)
1431 {
1432 int rc;
1433
1434 if (res_cfg->machine_check) {
1435 mce_register_decode_chain(&ecclog_mce_dec);
1436 return 0;
1437 }
1438
1439 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
1440 0, IGEN6_NMI_NAME);
1441 if (rc) {
1442 igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
1443 return rc;
1444 }
1445
1446 return 0;
1447 }
1448
unregister_err_handler(void)1449 static void unregister_err_handler(void)
1450 {
1451 if (res_cfg->machine_check) {
1452 mce_unregister_decode_chain(&ecclog_mce_dec);
1453 return;
1454 }
1455
1456 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1457 }
1458
opstate_set(const struct res_config * cfg,const struct pci_device_id * ent)1459 static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
1460 {
1461 /*
1462 * Quirk: Certain SoCs' error reporting interrupts don't work.
1463 * Force polling mode for them to ensure that memory error
1464 * events can be handled.
1465 */
1466 if (ent->device == DID_ADL_N_SKU4) {
1467 edac_op_state = EDAC_OPSTATE_POLL;
1468 return;
1469 }
1470
1471 /* Set the mode according to the configuration data. */
1472 if (cfg->machine_check)
1473 edac_op_state = EDAC_OPSTATE_INT;
1474 else
1475 edac_op_state = EDAC_OPSTATE_NMI;
1476 }
1477
igen6_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1478 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1479 {
1480 u64 mchbar;
1481 int rc;
1482
1483 edac_dbg(2, "\n");
1484
1485 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
1486 if (!igen6_pvt)
1487 return -ENOMEM;
1488
1489 res_cfg = (struct res_config *)ent->driver_data;
1490
1491 rc = igen6_pci_setup(pdev, &mchbar);
1492 if (rc)
1493 goto fail;
1494
1495 opstate_set(res_cfg, ent);
1496
1497 rc = igen6_register_mcis(pdev, mchbar);
1498 if (rc)
1499 goto fail;
1500
1501 if (res_cfg->num_imc > 1) {
1502 rc = igen6_mem_slice_setup(mchbar);
1503 if (rc)
1504 goto fail2;
1505 }
1506
1507 ecclog_pool = ecclog_gen_pool_create();
1508 if (!ecclog_pool) {
1509 rc = -ENOMEM;
1510 goto fail2;
1511 }
1512
1513 INIT_WORK(&ecclog_work, ecclog_work_cb);
1514 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
1515
1516 rc = register_err_handler();
1517 if (rc)
1518 goto fail3;
1519
1520 /* Enable error reporting */
1521 rc = errcmd_enable_error_reporting(true);
1522 if (rc) {
1523 igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
1524 goto fail4;
1525 }
1526
1527 /* Check if any pending errors before/during the registration of the error handler */
1528 ecclog_handler();
1529
1530 igen6_debug_setup();
1531 return 0;
1532 fail4:
1533 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1534 fail3:
1535 gen_pool_destroy(ecclog_pool);
1536 fail2:
1537 igen6_unregister_mcis();
1538 fail:
1539 kfree(igen6_pvt);
1540 return rc;
1541 }
1542
igen6_remove(struct pci_dev * pdev)1543 static void igen6_remove(struct pci_dev *pdev)
1544 {
1545 edac_dbg(2, "\n");
1546
1547 igen6_debug_teardown();
1548 errcmd_enable_error_reporting(false);
1549 unregister_err_handler();
1550 irq_work_sync(&ecclog_irq_work);
1551 flush_work(&ecclog_work);
1552 gen_pool_destroy(ecclog_pool);
1553 igen6_unregister_mcis();
1554 kfree(igen6_pvt);
1555 }
1556
1557 static struct pci_driver igen6_driver = {
1558 .name = EDAC_MOD_STR,
1559 .probe = igen6_probe,
1560 .remove = igen6_remove,
1561 .id_table = igen6_pci_tbl,
1562 };
1563
igen6_init(void)1564 static int __init igen6_init(void)
1565 {
1566 const char *owner;
1567 int rc;
1568
1569 edac_dbg(2, "\n");
1570
1571 if (ghes_get_devices())
1572 return -EBUSY;
1573
1574 owner = edac_get_owner();
1575 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1576 return -EBUSY;
1577
1578 rc = pci_register_driver(&igen6_driver);
1579 if (rc)
1580 return rc;
1581
1582 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
1583
1584 return 0;
1585 }
1586
igen6_exit(void)1587 static void __exit igen6_exit(void)
1588 {
1589 edac_dbg(2, "\n");
1590
1591 pci_unregister_driver(&igen6_driver);
1592 }
1593
1594 module_init(igen6_init);
1595 module_exit(igen6_exit);
1596
1597 MODULE_LICENSE("GPL v2");
1598 MODULE_AUTHOR("Qiuxu Zhuo");
1599 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");
1600