xref: /linux/drivers/edac/igen6_edac.c (revision 73543bad766486c3cdbf6fa9d1faf7d0c4bcc7af)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Driver for Intel client SoC with integrated memory controller using IBECC
4  *
5  * Copyright (C) 2020 Intel Corporation
6  *
7  * The In-Band ECC (IBECC) IP provides ECC protection to all or specific
8  * regions of the physical memory space. It's used for memory controllers
9  * that don't support the out-of-band ECC which often needs an additional
10  * storage device to each channel for storing ECC data.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/pci.h>
16 #include <linux/slab.h>
17 #include <linux/irq_work.h>
18 #include <linux/llist.h>
19 #include <linux/genalloc.h>
20 #include <linux/edac.h>
21 #include <linux/bits.h>
22 #include <linux/io.h>
23 #include <asm/mach_traps.h>
24 #include <asm/nmi.h>
25 #include <asm/mce.h>
26 
27 #include "edac_mc.h"
28 #include "edac_module.h"
29 
30 #define IGEN6_REVISION	"v2.5.1"
31 
32 #define EDAC_MOD_STR	"igen6_edac"
33 #define IGEN6_NMI_NAME	"igen6_ibecc"
34 
35 /* Debug macros */
36 #define igen6_printk(level, fmt, arg...)		\
37 	edac_printk(level, "igen6", fmt, ##arg)
38 
39 #define igen6_mc_printk(mci, level, fmt, arg...)	\
40 	edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
41 
42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
43 
44 #define NUM_IMC				2 /* Max memory controllers */
45 #define NUM_CHANNELS			2 /* Max channels */
46 #define NUM_DIMMS			2 /* Max DIMMs per channel */
47 
48 #define _4GB				BIT_ULL(32)
49 
50 /* Size of physical memory */
51 #define TOM_OFFSET			0xa0
52 /* Top of low usable DRAM */
53 #define TOLUD_OFFSET			0xbc
54 /* Capability register C */
55 #define CAPID_C_OFFSET			0xec
56 #define CAPID_C_IBECC			BIT(15)
57 
58 /* Capability register E */
59 #define CAPID_E_OFFSET			0xf0
60 #define CAPID_E_IBECC			BIT(12)
61 #define CAPID_E_IBECC_BIT18		BIT(18)
62 
63 /* Error Status */
64 #define ERRSTS_OFFSET			0xc8
65 #define ERRSTS_CE			BIT_ULL(6)
66 #define ERRSTS_UE			BIT_ULL(7)
67 
68 /* Error Command */
69 #define ERRCMD_OFFSET			0xca
70 #define ERRCMD_CE			BIT_ULL(6)
71 #define ERRCMD_UE			BIT_ULL(7)
72 
73 /* IBECC MMIO base address */
74 #define IBECC_BASE			(res_cfg->ibecc_base)
75 #define IBECC_ACTIVATE_OFFSET		IBECC_BASE
76 #define IBECC_ACTIVATE_EN		BIT(0)
77 
78 /* IBECC error log */
79 #define ECC_ERROR_LOG_OFFSET		(IBECC_BASE + res_cfg->ibecc_error_log_offset)
80 #define ECC_ERROR_LOG_CE		BIT_ULL(62)
81 #define ECC_ERROR_LOG_UE		BIT_ULL(63)
82 #define ECC_ERROR_LOG_ADDR_SHIFT	5
83 #define ECC_ERROR_LOG_ADDR(v)		GET_BITFIELD(v, 5, 38)
84 #define ECC_ERROR_LOG_ADDR45(v)		GET_BITFIELD(v, 5, 45)
85 #define ECC_ERROR_LOG_SYND(v)		GET_BITFIELD(v, 46, 61)
86 
87 /* Host MMIO base address */
88 #define MCHBAR_OFFSET			0x48
89 #define MCHBAR_EN			BIT_ULL(0)
90 #define MCHBAR_BASE(v)			(GET_BITFIELD(v, 16, 38) << 16)
91 #define MCHBAR_SIZE			0x10000
92 
93 /* Parameters for the channel decode stage */
94 #define IMC_BASE			(res_cfg->imc_base)
95 #define MAD_INTER_CHANNEL_OFFSET	IMC_BASE
96 #define MAD_INTER_CHANNEL_DDR_TYPE(v)	GET_BITFIELD(v, 0, 2)
97 #define MAD_INTER_CHANNEL_ECHM(v)	GET_BITFIELD(v, 3, 3)
98 #define MAD_INTER_CHANNEL_CH_L_MAP(v)	GET_BITFIELD(v, 4, 4)
99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v)	((u64)GET_BITFIELD(v, 12, 19) << 29)
100 
101 /* Parameters for DRAM decode stage */
102 #define MAD_INTRA_CH0_OFFSET		(IMC_BASE + 4)
103 #define MAD_INTRA_CH_DIMM_L_MAP(v)	GET_BITFIELD(v, 0, 0)
104 
105 /* DIMM characteristics */
106 #define MAD_DIMM_CH0_OFFSET		(IMC_BASE + 0xc)
107 #define MAD_DIMM_CH_DIMM_L_SIZE(v)	((u64)GET_BITFIELD(v, 0, 6) << 29)
108 #define MAD_DIMM_CH_DLW(v)		GET_BITFIELD(v, 7, 8)
109 #define MAD_DIMM_CH_DIMM_S_SIZE(v)	((u64)GET_BITFIELD(v, 16, 22) << 29)
110 #define MAD_DIMM_CH_DSW(v)		GET_BITFIELD(v, 24, 25)
111 
112 /* Hash for memory controller selection */
113 #define MAD_MC_HASH_OFFSET		(IMC_BASE + 0x1b8)
114 #define MAC_MC_HASH_LSB(v)		GET_BITFIELD(v, 1, 3)
115 
116 /* Hash for channel selection */
117 #define CHANNEL_HASH_OFFSET		(IMC_BASE + 0x24)
118 /* Hash for enhanced channel selection */
119 #define CHANNEL_EHASH_OFFSET		(IMC_BASE + 0x28)
120 #define CHANNEL_HASH_MASK(v)		(GET_BITFIELD(v, 6, 19) << 6)
121 #define CHANNEL_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)
122 #define CHANNEL_HASH_MODE(v)		GET_BITFIELD(v, 28, 28)
123 
124 /* Parameters for memory slice decode stage */
125 #define MEM_SLICE_HASH_MASK(v)		(GET_BITFIELD(v, 6, 19) << 6)
126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)
127 
128 static struct res_config {
129 	bool machine_check;
130 	/* The number of present memory controllers. */
131 	int num_imc;
132 	u32 imc_base;
133 	u32 cmf_base;
134 	u32 cmf_size;
135 	u32 ms_hash_offset;
136 	u32 ibecc_base;
137 	u32 ibecc_error_log_offset;
138 	bool (*ibecc_available)(struct pci_dev *pdev);
139 	/* Extract error address logged in IBECC */
140 	u64 (*err_addr)(u64 ecclog);
141 	/* Convert error address logged in IBECC to system physical address */
142 	u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
143 	/* Convert error address logged in IBECC to integrated memory controller address */
144 	u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
145 } *res_cfg;
146 
147 struct igen6_imc {
148 	int mc;
149 	struct mem_ctl_info *mci;
150 	struct pci_dev *pdev;
151 	struct device dev;
152 	void __iomem *window;
153 	u64 size;
154 	u64 ch_s_size;
155 	int ch_l_map;
156 	u64 dimm_s_size[NUM_CHANNELS];
157 	u64 dimm_l_size[NUM_CHANNELS];
158 	int dimm_l_map[NUM_CHANNELS];
159 };
160 
161 static struct igen6_pvt {
162 	struct igen6_imc imc[NUM_IMC];
163 	u64 ms_hash;
164 	u64 ms_s_size;
165 	int ms_l_map;
166 } *igen6_pvt;
167 
168 /* The top of low usable DRAM */
169 static u32 igen6_tolud;
170 /* The size of physical memory */
171 static u64 igen6_tom;
172 
173 struct decoded_addr {
174 	int mc;
175 	u64 imc_addr;
176 	u64 sys_addr;
177 	int channel_idx;
178 	u64 channel_addr;
179 	int sub_channel_idx;
180 	u64 sub_channel_addr;
181 };
182 
183 struct ecclog_node {
184 	struct llist_node llnode;
185 	int mc;
186 	u64 ecclog;
187 };
188 
189 /*
190  * In the NMI handler, the driver uses the lock-less memory allocator
191  * to allocate memory to store the IBECC error logs and links the logs
192  * to the lock-less list. Delay printk() and the work of error reporting
193  * to EDAC core in a worker.
194  */
195 #define ECCLOG_POOL_SIZE	PAGE_SIZE
196 static LLIST_HEAD(ecclog_llist);
197 static struct gen_pool *ecclog_pool;
198 static char ecclog_buf[ECCLOG_POOL_SIZE];
199 static struct irq_work ecclog_irq_work;
200 static struct work_struct ecclog_work;
201 
202 /* Compute die IDs for Elkhart Lake with IBECC */
203 #define DID_EHL_SKU5	0x4514
204 #define DID_EHL_SKU6	0x4528
205 #define DID_EHL_SKU7	0x452a
206 #define DID_EHL_SKU8	0x4516
207 #define DID_EHL_SKU9	0x452c
208 #define DID_EHL_SKU10	0x452e
209 #define DID_EHL_SKU11	0x4532
210 #define DID_EHL_SKU12	0x4518
211 #define DID_EHL_SKU13	0x451a
212 #define DID_EHL_SKU14	0x4534
213 #define DID_EHL_SKU15	0x4536
214 
215 /* Compute die IDs for ICL-NNPI with IBECC */
216 #define DID_ICL_SKU8	0x4581
217 #define DID_ICL_SKU10	0x4585
218 #define DID_ICL_SKU11	0x4589
219 #define DID_ICL_SKU12	0x458d
220 
221 /* Compute die IDs for Tiger Lake with IBECC */
222 #define DID_TGL_SKU	0x9a14
223 
224 /* Compute die IDs for Alder Lake with IBECC */
225 #define DID_ADL_SKU1	0x4601
226 #define DID_ADL_SKU2	0x4602
227 #define DID_ADL_SKU3	0x4621
228 #define DID_ADL_SKU4	0x4641
229 
230 /* Compute die IDs for Alder Lake-N with IBECC */
231 #define DID_ADL_N_SKU1	0x4614
232 #define DID_ADL_N_SKU2	0x4617
233 #define DID_ADL_N_SKU3	0x461b
234 #define DID_ADL_N_SKU4	0x461c
235 #define DID_ADL_N_SKU5	0x4673
236 #define DID_ADL_N_SKU6	0x4674
237 #define DID_ADL_N_SKU7	0x4675
238 #define DID_ADL_N_SKU8	0x4677
239 #define DID_ADL_N_SKU9	0x4678
240 #define DID_ADL_N_SKU10	0x4679
241 #define DID_ADL_N_SKU11	0x467c
242 #define DID_ADL_N_SKU12	0x4632
243 
244 /* Compute die IDs for Arizona Beach with IBECC */
245 #define DID_AZB_SKU1	0x4676
246 
247 /* Compute did IDs for Amston Lake with IBECC */
248 #define DID_ASL_SKU1	0x464a
249 
250 /* Compute die IDs for Raptor Lake-P with IBECC */
251 #define DID_RPL_P_SKU1	0xa706
252 #define DID_RPL_P_SKU2	0xa707
253 #define DID_RPL_P_SKU3	0xa708
254 #define DID_RPL_P_SKU4	0xa716
255 #define DID_RPL_P_SKU5	0xa718
256 
257 /* Compute die IDs for Meteor Lake-PS with IBECC */
258 #define DID_MTL_PS_SKU1	0x7d21
259 #define DID_MTL_PS_SKU2	0x7d22
260 #define DID_MTL_PS_SKU3	0x7d23
261 #define DID_MTL_PS_SKU4	0x7d24
262 
263 /* Compute die IDs for Meteor Lake-P with IBECC */
264 #define DID_MTL_P_SKU1	0x7d01
265 #define DID_MTL_P_SKU2	0x7d02
266 #define DID_MTL_P_SKU3	0x7d14
267 
268 /* Compute die IDs for Arrow Lake-UH with IBECC */
269 #define DID_ARL_UH_SKU1	0x7d06
270 #define DID_ARL_UH_SKU2	0x7d20
271 #define DID_ARL_UH_SKU3	0x7d30
272 
273 /* Compute die IDs for Panther Lake-H with IBECC */
274 #define DID_PTL_H_SKU1	0xb000
275 #define DID_PTL_H_SKU2	0xb001
276 #define DID_PTL_H_SKU3	0xb002
277 
get_mchbar(struct pci_dev * pdev,u64 * mchbar)278 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
279 {
280 	union  {
281 		u64 v;
282 		struct {
283 			u32 v_lo;
284 			u32 v_hi;
285 		};
286 	} u;
287 
288 	if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
289 		igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
290 		return -ENODEV;
291 	}
292 
293 	if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
294 		igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
295 		return -ENODEV;
296 	}
297 
298 	if (!(u.v & MCHBAR_EN)) {
299 		igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
300 		return -ENODEV;
301 	}
302 
303 	*mchbar = MCHBAR_BASE(u.v);
304 
305 	return 0;
306 }
307 
ehl_ibecc_available(struct pci_dev * pdev)308 static bool ehl_ibecc_available(struct pci_dev *pdev)
309 {
310 	u32 v;
311 
312 	if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
313 		return false;
314 
315 	return !!(CAPID_C_IBECC & v);
316 }
317 
ehl_err_addr_to_sys_addr(u64 eaddr,int mc)318 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
319 {
320 	return eaddr;
321 }
322 
ehl_err_addr_to_imc_addr(u64 eaddr,int mc)323 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
324 {
325 	if (eaddr < igen6_tolud)
326 		return eaddr;
327 
328 	if (igen6_tom <= _4GB)
329 		return eaddr + igen6_tolud - _4GB;
330 
331 	if (eaddr >= igen6_tom)
332 		return eaddr + igen6_tolud - igen6_tom;
333 
334 	return eaddr;
335 }
336 
icl_ibecc_available(struct pci_dev * pdev)337 static bool icl_ibecc_available(struct pci_dev *pdev)
338 {
339 	u32 v;
340 
341 	if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
342 		return false;
343 
344 	return !(CAPID_C_IBECC & v) &&
345 		(boot_cpu_data.x86_stepping >= 1);
346 }
347 
tgl_ibecc_available(struct pci_dev * pdev)348 static bool tgl_ibecc_available(struct pci_dev *pdev)
349 {
350 	u32 v;
351 
352 	if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
353 		return false;
354 
355 	return !(CAPID_E_IBECC & v);
356 }
357 
mtl_p_ibecc_available(struct pci_dev * pdev)358 static bool mtl_p_ibecc_available(struct pci_dev *pdev)
359 {
360 	u32 v;
361 
362 	if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
363 		return false;
364 
365 	return !(CAPID_E_IBECC_BIT18 & v);
366 }
367 
mtl_ps_ibecc_available(struct pci_dev * pdev)368 static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
369 {
370 #define MCHBAR_MEMSS_IBECCDIS	0x13c00
371 	void __iomem *window;
372 	u64 mchbar;
373 	u32 val;
374 
375 	if (get_mchbar(pdev, &mchbar))
376 		return false;
377 
378 	window = ioremap(mchbar, MCHBAR_SIZE * 2);
379 	if (!window) {
380 		igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
381 		return false;
382 	}
383 
384 	val = readl(window + MCHBAR_MEMSS_IBECCDIS);
385 	iounmap(window);
386 
387 	/* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
388 	return !GET_BITFIELD(val, 6, 6);
389 }
390 
mem_addr_to_sys_addr(u64 maddr)391 static u64 mem_addr_to_sys_addr(u64 maddr)
392 {
393 	if (maddr < igen6_tolud)
394 		return maddr;
395 
396 	if (igen6_tom <= _4GB)
397 		return maddr - igen6_tolud + _4GB;
398 
399 	if (maddr < _4GB)
400 		return maddr - igen6_tolud + igen6_tom;
401 
402 	return maddr;
403 }
404 
mem_slice_hash(u64 addr,u64 mask,u64 hash_init,int intlv_bit)405 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
406 {
407 	u64 hash_addr = addr & mask, hash = hash_init;
408 	u64 intlv = (addr >> intlv_bit) & 1;
409 	int i;
410 
411 	for (i = 6; i < 20; i++)
412 		hash ^= (hash_addr >> i) & 1;
413 
414 	return hash ^ intlv;
415 }
416 
tgl_err_addr_to_mem_addr(u64 eaddr,int mc)417 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
418 {
419 	u64 maddr, hash, mask, ms_s_size;
420 	int intlv_bit;
421 	u32 ms_hash;
422 
423 	ms_s_size = igen6_pvt->ms_s_size;
424 	if (eaddr >= ms_s_size)
425 		return eaddr + ms_s_size;
426 
427 	ms_hash = igen6_pvt->ms_hash;
428 
429 	mask = MEM_SLICE_HASH_MASK(ms_hash);
430 	intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
431 
432 	maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
433 		GET_BITFIELD(eaddr, 0, intlv_bit - 1);
434 
435 	hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
436 
437 	return maddr | (hash << intlv_bit);
438 }
439 
tgl_err_addr_to_sys_addr(u64 eaddr,int mc)440 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
441 {
442 	u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
443 
444 	return mem_addr_to_sys_addr(maddr);
445 }
446 
tgl_err_addr_to_imc_addr(u64 eaddr,int mc)447 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
448 {
449 	return eaddr;
450 }
451 
adl_err_addr_to_sys_addr(u64 eaddr,int mc)452 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
453 {
454 	return mem_addr_to_sys_addr(eaddr);
455 }
456 
adl_err_addr_to_imc_addr(u64 eaddr,int mc)457 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
458 {
459 	u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
460 	struct igen6_imc *imc = &igen6_pvt->imc[mc];
461 	int intlv_bit;
462 	u32 mc_hash;
463 
464 	if (eaddr >= 2 * ms_s_size)
465 		return eaddr - ms_s_size;
466 
467 	mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
468 
469 	intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
470 
471 	imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
472 		   GET_BITFIELD(eaddr, 0, intlv_bit - 1);
473 
474 	return imc_addr;
475 }
476 
rpl_p_err_addr(u64 ecclog)477 static u64 rpl_p_err_addr(u64 ecclog)
478 {
479 	return ECC_ERROR_LOG_ADDR45(ecclog);
480 }
481 
482 static struct res_config ehl_cfg = {
483 	.num_imc		= 1,
484 	.imc_base		= 0x5000,
485 	.ibecc_base		= 0xdc00,
486 	.ibecc_available	= ehl_ibecc_available,
487 	.ibecc_error_log_offset	= 0x170,
488 	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
489 	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
490 };
491 
492 static struct res_config icl_cfg = {
493 	.num_imc		= 1,
494 	.imc_base		= 0x5000,
495 	.ibecc_base		= 0xd800,
496 	.ibecc_error_log_offset	= 0x170,
497 	.ibecc_available	= icl_ibecc_available,
498 	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
499 	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
500 };
501 
502 static struct res_config tgl_cfg = {
503 	.machine_check		= true,
504 	.num_imc		= 2,
505 	.imc_base		= 0x5000,
506 	.cmf_base		= 0x11000,
507 	.cmf_size		= 0x800,
508 	.ms_hash_offset		= 0xac,
509 	.ibecc_base		= 0xd400,
510 	.ibecc_error_log_offset	= 0x170,
511 	.ibecc_available	= tgl_ibecc_available,
512 	.err_addr_to_sys_addr	= tgl_err_addr_to_sys_addr,
513 	.err_addr_to_imc_addr	= tgl_err_addr_to_imc_addr,
514 };
515 
516 static struct res_config adl_cfg = {
517 	.machine_check		= true,
518 	.num_imc		= 2,
519 	.imc_base		= 0xd800,
520 	.ibecc_base		= 0xd400,
521 	.ibecc_error_log_offset	= 0x68,
522 	.ibecc_available	= tgl_ibecc_available,
523 	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
524 	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
525 };
526 
527 static struct res_config adl_n_cfg = {
528 	.machine_check		= true,
529 	.num_imc		= 1,
530 	.imc_base		= 0xd800,
531 	.ibecc_base		= 0xd400,
532 	.ibecc_error_log_offset	= 0x68,
533 	.ibecc_available	= tgl_ibecc_available,
534 	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
535 	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
536 };
537 
538 static struct res_config rpl_p_cfg = {
539 	.machine_check		= true,
540 	.num_imc		= 2,
541 	.imc_base		= 0xd800,
542 	.ibecc_base		= 0xd400,
543 	.ibecc_error_log_offset	= 0x68,
544 	.ibecc_available	= tgl_ibecc_available,
545 	.err_addr		= rpl_p_err_addr,
546 	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
547 	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
548 };
549 
550 static struct res_config mtl_ps_cfg = {
551 	.machine_check		= true,
552 	.num_imc		= 2,
553 	.imc_base		= 0xd800,
554 	.ibecc_base		= 0xd400,
555 	.ibecc_error_log_offset	= 0x170,
556 	.ibecc_available	= mtl_ps_ibecc_available,
557 	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
558 	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
559 };
560 
561 static struct res_config mtl_p_cfg = {
562 	.machine_check		= true,
563 	.num_imc		= 2,
564 	.imc_base		= 0xd800,
565 	.ibecc_base		= 0xd400,
566 	.ibecc_error_log_offset	= 0x170,
567 	.ibecc_available	= mtl_p_ibecc_available,
568 	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
569 	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
570 };
571 
572 static struct pci_device_id igen6_pci_tbl[] = {
573 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
574 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
575 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
576 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
577 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
578 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
579 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
580 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
581 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
582 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
583 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
584 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
585 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
586 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
587 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
588 	{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
589 	{ PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
590 	{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
591 	{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
592 	{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
593 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
594 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
595 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
596 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
597 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
598 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
599 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
600 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
601 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
602 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
603 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
604 	{ PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
605 	{ PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg },
606 	{ PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg },
607 	{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
608 	{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
609 	{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
610 	{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
611 	{ PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
612 	{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
613 	{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
614 	{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
615 	{ PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
616 	{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
617 	{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
618 	{ PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
619 	{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
620 	{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
621 	{ PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
622 	{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
623 	{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
624 	{ PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
625 	{ },
626 };
627 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
628 
get_width(int dimm_l,u32 mad_dimm)629 static enum dev_type get_width(int dimm_l, u32 mad_dimm)
630 {
631 	u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
632 			 MAD_DIMM_CH_DSW(mad_dimm);
633 
634 	switch (w) {
635 	case 0:
636 		return DEV_X8;
637 	case 1:
638 		return DEV_X16;
639 	case 2:
640 		return DEV_X32;
641 	default:
642 		return DEV_UNKNOWN;
643 	}
644 }
645 
get_memory_type(u32 mad_inter)646 static enum mem_type get_memory_type(u32 mad_inter)
647 {
648 	u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
649 
650 	switch (t) {
651 	case 0:
652 		return MEM_DDR4;
653 	case 1:
654 		return MEM_DDR3;
655 	case 2:
656 		return MEM_LPDDR3;
657 	case 3:
658 		return MEM_LPDDR4;
659 	case 4:
660 		return MEM_WIO2;
661 	default:
662 		return MEM_UNKNOWN;
663 	}
664 }
665 
decode_chan_idx(u64 addr,u64 mask,int intlv_bit)666 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
667 {
668 	u64 hash_addr = addr & mask, hash = 0;
669 	u64 intlv = (addr >> intlv_bit) & 1;
670 	int i;
671 
672 	for (i = 6; i < 20; i++)
673 		hash ^= (hash_addr >> i) & 1;
674 
675 	return (int)hash ^ intlv;
676 }
677 
decode_channel_addr(u64 addr,int intlv_bit)678 static u64 decode_channel_addr(u64 addr, int intlv_bit)
679 {
680 	u64 channel_addr;
681 
682 	/* Remove the interleave bit and shift upper part down to fill gap */
683 	channel_addr  = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
684 	channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
685 
686 	return channel_addr;
687 }
688 
decode_addr(u64 addr,u32 hash,u64 s_size,int l_map,int * idx,u64 * sub_addr)689 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
690 			int *idx, u64 *sub_addr)
691 {
692 	int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
693 
694 	if (addr > 2 * s_size) {
695 		*sub_addr = addr - s_size;
696 		*idx = l_map;
697 		return;
698 	}
699 
700 	if (CHANNEL_HASH_MODE(hash)) {
701 		*sub_addr = decode_channel_addr(addr, intlv_bit);
702 		*idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
703 	} else {
704 		*sub_addr = decode_channel_addr(addr, 6);
705 		*idx = GET_BITFIELD(addr, 6, 6);
706 	}
707 }
708 
igen6_decode(struct decoded_addr * res)709 static int igen6_decode(struct decoded_addr *res)
710 {
711 	struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
712 	u64 addr = res->imc_addr, sub_addr, s_size;
713 	int idx, l_map;
714 	u32 hash;
715 
716 	if (addr >= igen6_tom) {
717 		edac_dbg(0, "Address 0x%llx out of range\n", addr);
718 		return -EINVAL;
719 	}
720 
721 	/* Decode channel */
722 	hash   = readl(imc->window + CHANNEL_HASH_OFFSET);
723 	s_size = imc->ch_s_size;
724 	l_map  = imc->ch_l_map;
725 	decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
726 	res->channel_idx  = idx;
727 	res->channel_addr = sub_addr;
728 
729 	/* Decode sub-channel/DIMM */
730 	hash   = readl(imc->window + CHANNEL_EHASH_OFFSET);
731 	s_size = imc->dimm_s_size[idx];
732 	l_map  = imc->dimm_l_map[idx];
733 	decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
734 	res->sub_channel_idx  = idx;
735 	res->sub_channel_addr = sub_addr;
736 
737 	return 0;
738 }
739 
igen6_output_error(struct decoded_addr * res,struct mem_ctl_info * mci,u64 ecclog)740 static void igen6_output_error(struct decoded_addr *res,
741 			       struct mem_ctl_info *mci, u64 ecclog)
742 {
743 	enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
744 					 HW_EVENT_ERR_UNCORRECTED :
745 					 HW_EVENT_ERR_CORRECTED;
746 
747 	edac_mc_handle_error(type, mci, 1,
748 			     res->sys_addr >> PAGE_SHIFT,
749 			     res->sys_addr & ~PAGE_MASK,
750 			     ECC_ERROR_LOG_SYND(ecclog),
751 			     res->channel_idx, res->sub_channel_idx,
752 			     -1, "", "");
753 }
754 
ecclog_gen_pool_create(void)755 static struct gen_pool *ecclog_gen_pool_create(void)
756 {
757 	struct gen_pool *pool;
758 
759 	pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
760 	if (!pool)
761 		return NULL;
762 
763 	if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
764 		gen_pool_destroy(pool);
765 		return NULL;
766 	}
767 
768 	return pool;
769 }
770 
ecclog_gen_pool_add(int mc,u64 ecclog)771 static int ecclog_gen_pool_add(int mc, u64 ecclog)
772 {
773 	struct ecclog_node *node;
774 
775 	node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
776 	if (!node)
777 		return -ENOMEM;
778 
779 	node->mc = mc;
780 	node->ecclog = ecclog;
781 	llist_add(&node->llnode, &ecclog_llist);
782 
783 	return 0;
784 }
785 
786 /*
787  * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
788  * configuration space status register ERRSTS can indicate whether a
789  * correctable error or an uncorrectable error occurred. We only use the
790  * ECC_ERROR_LOG register to check error type, but need to clear both
791  * registers to enable future error events.
792  */
ecclog_read_and_clear(struct igen6_imc * imc)793 static u64 ecclog_read_and_clear(struct igen6_imc *imc)
794 {
795 	u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
796 
797 	/*
798 	 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
799 	 *        the invalid value ~0. This will result in a flood of invalid
800 	 *        error reports in polling mode. Skip it.
801 	 */
802 	if (ecclog == ~0)
803 		return 0;
804 
805 	/* Neither a CE nor a UE. Skip it.*/
806 	if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
807 		return 0;
808 
809 	/* Clear CE/UE bits by writing 1s */
810 	writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
811 
812 	return ecclog;
813 }
814 
errsts_clear(struct igen6_imc * imc)815 static void errsts_clear(struct igen6_imc *imc)
816 {
817 	u16 errsts;
818 
819 	if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
820 		igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
821 		return;
822 	}
823 
824 	/* Clear CE/UE bits by writing 1s */
825 	if (errsts & (ERRSTS_CE | ERRSTS_UE))
826 		pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
827 }
828 
errcmd_enable_error_reporting(bool enable)829 static int errcmd_enable_error_reporting(bool enable)
830 {
831 	struct igen6_imc *imc = &igen6_pvt->imc[0];
832 	u16 errcmd;
833 	int rc;
834 
835 	rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
836 	if (rc)
837 		return pcibios_err_to_errno(rc);
838 
839 	if (enable)
840 		errcmd |= ERRCMD_CE | ERRSTS_UE;
841 	else
842 		errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
843 
844 	rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
845 	if (rc)
846 		return pcibios_err_to_errno(rc);
847 
848 	return 0;
849 }
850 
ecclog_handler(void)851 static int ecclog_handler(void)
852 {
853 	struct igen6_imc *imc;
854 	int i, n = 0;
855 	u64 ecclog;
856 
857 	for (i = 0; i < res_cfg->num_imc; i++) {
858 		imc = &igen6_pvt->imc[i];
859 
860 		/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
861 
862 		ecclog = ecclog_read_and_clear(imc);
863 		if (!ecclog)
864 			continue;
865 
866 		if (!ecclog_gen_pool_add(i, ecclog))
867 			irq_work_queue(&ecclog_irq_work);
868 
869 		n++;
870 	}
871 
872 	return n;
873 }
874 
ecclog_work_cb(struct work_struct * work)875 static void ecclog_work_cb(struct work_struct *work)
876 {
877 	struct ecclog_node *node, *tmp;
878 	struct mem_ctl_info *mci;
879 	struct llist_node *head;
880 	struct decoded_addr res;
881 	u64 eaddr;
882 
883 	head = llist_del_all(&ecclog_llist);
884 	if (!head)
885 		return;
886 
887 	llist_for_each_entry_safe(node, tmp, head, llnode) {
888 		memset(&res, 0, sizeof(res));
889 		if (res_cfg->err_addr)
890 			eaddr = res_cfg->err_addr(node->ecclog);
891 		else
892 			eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
893 				ECC_ERROR_LOG_ADDR_SHIFT;
894 		res.mc	     = node->mc;
895 		res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
896 		res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
897 
898 		mci = igen6_pvt->imc[res.mc].mci;
899 
900 		edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
901 		igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
902 		igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
903 
904 		if (!igen6_decode(&res))
905 			igen6_output_error(&res, mci, node->ecclog);
906 
907 		gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
908 	}
909 }
910 
ecclog_irq_work_cb(struct irq_work * irq_work)911 static void ecclog_irq_work_cb(struct irq_work *irq_work)
912 {
913 	int i;
914 
915 	for (i = 0; i < res_cfg->num_imc; i++)
916 		errsts_clear(&igen6_pvt->imc[i]);
917 
918 	if (!llist_empty(&ecclog_llist))
919 		schedule_work(&ecclog_work);
920 }
921 
ecclog_nmi_handler(unsigned int cmd,struct pt_regs * regs)922 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
923 {
924 	unsigned char reason;
925 
926 	if (!ecclog_handler())
927 		return NMI_DONE;
928 
929 	/*
930 	 * Both In-Band ECC correctable error and uncorrectable error are
931 	 * reported by SERR# NMI. The NMI generic code (see pci_serr_error())
932 	 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
933 	 * re-enable the SERR# NMI after NMI handling. So clear this bit here
934 	 * to re-enable SERR# NMI for receiving future In-Band ECC errors.
935 	 */
936 	reason  = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
937 	reason |= NMI_REASON_CLEAR_SERR;
938 	outb(reason, NMI_REASON_PORT);
939 	reason &= ~NMI_REASON_CLEAR_SERR;
940 	outb(reason, NMI_REASON_PORT);
941 
942 	return NMI_HANDLED;
943 }
944 
ecclog_mce_handler(struct notifier_block * nb,unsigned long val,void * data)945 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
946 			      void *data)
947 {
948 	struct mce *mce = (struct mce *)data;
949 	char *type;
950 
951 	if (mce->kflags & MCE_HANDLED_CEC)
952 		return NOTIFY_DONE;
953 
954 	/*
955 	 * Ignore unless this is a memory related error.
956 	 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
957 	 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
958 	 */
959 	if ((mce->status & 0xefff) >> 7 != 1)
960 		return NOTIFY_DONE;
961 
962 	if (mce->mcgstatus & MCG_STATUS_MCIP)
963 		type = "Exception";
964 	else
965 		type = "Event";
966 
967 	edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
968 		 mce->extcpu, type, mce->mcgstatus,
969 		 mce->bank, mce->status);
970 	edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
971 	edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
972 	edac_dbg(0, "MISC 0x%llx\n", mce->misc);
973 	edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
974 		 mce->cpuvendor, mce->cpuid, mce->time,
975 		 mce->socketid, mce->apicid);
976 	/*
977 	 * We just use the Machine Check for the memory error notification.
978 	 * Each memory controller is associated with an IBECC instance.
979 	 * Directly read and clear the error information(error address and
980 	 * error type) on all the IBECC instances so that we know on which
981 	 * memory controller the memory error(s) occurred.
982 	 */
983 	if (!ecclog_handler())
984 		return NOTIFY_DONE;
985 
986 	mce->kflags |= MCE_HANDLED_EDAC;
987 
988 	return NOTIFY_DONE;
989 }
990 
991 static struct notifier_block ecclog_mce_dec = {
992 	.notifier_call	= ecclog_mce_handler,
993 	.priority	= MCE_PRIO_EDAC,
994 };
995 
igen6_check_ecc(struct igen6_imc * imc)996 static bool igen6_check_ecc(struct igen6_imc *imc)
997 {
998 	u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
999 
1000 	return !!(activate & IBECC_ACTIVATE_EN);
1001 }
1002 
igen6_get_dimm_config(struct mem_ctl_info * mci)1003 static int igen6_get_dimm_config(struct mem_ctl_info *mci)
1004 {
1005 	struct igen6_imc *imc = mci->pvt_info;
1006 	u32 mad_inter, mad_intra, mad_dimm;
1007 	int i, j, ndimms, mc = imc->mc;
1008 	struct dimm_info *dimm;
1009 	enum mem_type mtype;
1010 	enum dev_type dtype;
1011 	u64 dsize;
1012 	bool ecc;
1013 
1014 	edac_dbg(2, "\n");
1015 
1016 	mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
1017 	mtype = get_memory_type(mad_inter);
1018 	ecc = igen6_check_ecc(imc);
1019 	imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
1020 	imc->ch_l_map  = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
1021 
1022 	for (i = 0; i < NUM_CHANNELS; i++) {
1023 		mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
1024 		mad_dimm  = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
1025 
1026 		imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
1027 		imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
1028 		imc->dimm_l_map[i]  = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
1029 		imc->size += imc->dimm_s_size[i];
1030 		imc->size += imc->dimm_l_size[i];
1031 		ndimms = 0;
1032 
1033 		for (j = 0; j < NUM_DIMMS; j++) {
1034 			dimm = edac_get_dimm(mci, i, j, 0);
1035 
1036 			if (j ^ imc->dimm_l_map[i]) {
1037 				dtype = get_width(0, mad_dimm);
1038 				dsize = imc->dimm_s_size[i];
1039 			} else {
1040 				dtype = get_width(1, mad_dimm);
1041 				dsize = imc->dimm_l_size[i];
1042 			}
1043 
1044 			if (!dsize)
1045 				continue;
1046 
1047 			dimm->grain = 64;
1048 			dimm->mtype = mtype;
1049 			dimm->dtype = dtype;
1050 			dimm->nr_pages  = MiB_TO_PAGES(dsize >> 20);
1051 			dimm->edac_mode = EDAC_SECDED;
1052 			snprintf(dimm->label, sizeof(dimm->label),
1053 				 "MC#%d_Chan#%d_DIMM#%d", mc, i, j);
1054 			edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
1055 				 mc, i, j, dsize >> 20, dimm->nr_pages);
1056 
1057 			ndimms++;
1058 		}
1059 
1060 		if (ndimms && !ecc) {
1061 			igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
1062 			return -ENODEV;
1063 		}
1064 	}
1065 
1066 	edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
1067 
1068 	return 0;
1069 }
1070 
1071 #ifdef CONFIG_EDAC_DEBUG
1072 /* Top of upper usable DRAM */
1073 static u64 igen6_touud;
1074 #define TOUUD_OFFSET	0xa8
1075 
igen6_reg_dump(struct igen6_imc * imc)1076 static void igen6_reg_dump(struct igen6_imc *imc)
1077 {
1078 	int i;
1079 
1080 	edac_dbg(2, "CHANNEL_HASH     : 0x%x\n",
1081 		 readl(imc->window + CHANNEL_HASH_OFFSET));
1082 	edac_dbg(2, "CHANNEL_EHASH    : 0x%x\n",
1083 		 readl(imc->window + CHANNEL_EHASH_OFFSET));
1084 	edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
1085 		 readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
1086 	edac_dbg(2, "ECC_ERROR_LOG    : 0x%llx\n",
1087 		 readq(imc->window + ECC_ERROR_LOG_OFFSET));
1088 
1089 	for (i = 0; i < NUM_CHANNELS; i++) {
1090 		edac_dbg(2, "MAD_INTRA_CH%d    : 0x%x\n", i,
1091 			 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
1092 		edac_dbg(2, "MAD_DIMM_CH%d     : 0x%x\n", i,
1093 			 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
1094 	}
1095 	edac_dbg(2, "TOLUD            : 0x%x", igen6_tolud);
1096 	edac_dbg(2, "TOUUD            : 0x%llx", igen6_touud);
1097 	edac_dbg(2, "TOM              : 0x%llx", igen6_tom);
1098 }
1099 
1100 static struct dentry *igen6_test;
1101 
debugfs_u64_set(void * data,u64 val)1102 static int debugfs_u64_set(void *data, u64 val)
1103 {
1104 	u64 ecclog;
1105 
1106 	if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
1107 		edac_dbg(0, "Address 0x%llx out of range\n", val);
1108 		return 0;
1109 	}
1110 
1111 	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1112 
1113 	val  >>= ECC_ERROR_LOG_ADDR_SHIFT;
1114 	ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE;
1115 
1116 	if (!ecclog_gen_pool_add(0, ecclog))
1117 		irq_work_queue(&ecclog_irq_work);
1118 
1119 	return 0;
1120 }
1121 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1122 
igen6_debug_setup(void)1123 static void igen6_debug_setup(void)
1124 {
1125 	igen6_test = edac_debugfs_create_dir("igen6_test");
1126 	if (!igen6_test)
1127 		return;
1128 
1129 	if (!edac_debugfs_create_file("addr", 0200, igen6_test,
1130 				      NULL, &fops_u64_wo)) {
1131 		debugfs_remove(igen6_test);
1132 		igen6_test = NULL;
1133 	}
1134 }
1135 
igen6_debug_teardown(void)1136 static void igen6_debug_teardown(void)
1137 {
1138 	debugfs_remove_recursive(igen6_test);
1139 }
1140 #else
igen6_reg_dump(struct igen6_imc * imc)1141 static void igen6_reg_dump(struct igen6_imc *imc) {}
igen6_debug_setup(void)1142 static void igen6_debug_setup(void) {}
igen6_debug_teardown(void)1143 static void igen6_debug_teardown(void) {}
1144 #endif
1145 
igen6_pci_setup(struct pci_dev * pdev,u64 * mchbar)1146 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
1147 {
1148 	union  {
1149 		u64 v;
1150 		struct {
1151 			u32 v_lo;
1152 			u32 v_hi;
1153 		};
1154 	} u;
1155 
1156 	edac_dbg(2, "\n");
1157 
1158 	if (!res_cfg->ibecc_available(pdev)) {
1159 		edac_dbg(2, "No In-Band ECC IP\n");
1160 		goto fail;
1161 	}
1162 
1163 	if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
1164 		igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
1165 		goto fail;
1166 	}
1167 
1168 	igen6_tolud &= GENMASK(31, 20);
1169 
1170 	if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
1171 		igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
1172 		goto fail;
1173 	}
1174 
1175 	if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
1176 		igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
1177 		goto fail;
1178 	}
1179 
1180 	igen6_tom = u.v & GENMASK_ULL(38, 20);
1181 
1182 	if (get_mchbar(pdev, mchbar))
1183 		goto fail;
1184 
1185 #ifdef CONFIG_EDAC_DEBUG
1186 	if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
1187 		edac_dbg(2, "Failed to read lower TOUUD\n");
1188 	else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
1189 		edac_dbg(2, "Failed to read upper TOUUD\n");
1190 	else
1191 		igen6_touud = u.v & GENMASK_ULL(38, 20);
1192 #endif
1193 
1194 	return 0;
1195 fail:
1196 	return -ENODEV;
1197 }
1198 
igen6_check(struct mem_ctl_info * mci)1199 static void igen6_check(struct mem_ctl_info *mci)
1200 {
1201 	struct igen6_imc *imc = mci->pvt_info;
1202 	u64 ecclog;
1203 
1204 	/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
1205 	ecclog = ecclog_read_and_clear(imc);
1206 	if (!ecclog)
1207 		return;
1208 
1209 	if (!ecclog_gen_pool_add(imc->mc, ecclog))
1210 		irq_work_queue(&ecclog_irq_work);
1211 }
1212 
1213 /* Check whether the memory controller is absent. */
igen6_imc_absent(void __iomem * window)1214 static bool igen6_imc_absent(void __iomem *window)
1215 {
1216 	return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
1217 }
1218 
igen6_register_mci(int mc,void __iomem * window,struct pci_dev * pdev)1219 static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
1220 {
1221 	struct edac_mc_layer layers[2];
1222 	struct mem_ctl_info *mci;
1223 	struct igen6_imc *imc;
1224 	int rc;
1225 
1226 	edac_dbg(2, "\n");
1227 
1228 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
1229 	layers[0].size = NUM_CHANNELS;
1230 	layers[0].is_virt_csrow = false;
1231 	layers[1].type = EDAC_MC_LAYER_SLOT;
1232 	layers[1].size = NUM_DIMMS;
1233 	layers[1].is_virt_csrow = true;
1234 
1235 	mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
1236 	if (!mci) {
1237 		rc = -ENOMEM;
1238 		goto fail;
1239 	}
1240 
1241 	mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
1242 	if (!mci->ctl_name) {
1243 		rc = -ENOMEM;
1244 		goto fail2;
1245 	}
1246 
1247 	mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
1248 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
1249 	mci->edac_cap = EDAC_FLAG_SECDED;
1250 	mci->mod_name = EDAC_MOD_STR;
1251 	mci->dev_name = pci_name(pdev);
1252 	if (edac_op_state == EDAC_OPSTATE_POLL)
1253 		mci->edac_check = igen6_check;
1254 	mci->pvt_info = &igen6_pvt->imc[mc];
1255 
1256 	imc = mci->pvt_info;
1257 	device_initialize(&imc->dev);
1258 	/*
1259 	 * EDAC core uses mci->pdev(pointer of structure device) as
1260 	 * memory controller ID. The client SoCs attach one or more
1261 	 * memory controllers to single pci_dev (single pci_dev->dev
1262 	 * can be for multiple memory controllers).
1263 	 *
1264 	 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
1265 	 * for the first memory controller and assign a unique imc->dev
1266 	 * to mci->pdev for each non-first memory controller.
1267 	 */
1268 	mci->pdev = mc ? &imc->dev : &pdev->dev;
1269 	imc->mc	= mc;
1270 	imc->pdev = pdev;
1271 	imc->window = window;
1272 
1273 	igen6_reg_dump(imc);
1274 
1275 	rc = igen6_get_dimm_config(mci);
1276 	if (rc)
1277 		goto fail3;
1278 
1279 	rc = edac_mc_add_mc(mci);
1280 	if (rc) {
1281 		igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
1282 		goto fail3;
1283 	}
1284 
1285 	imc->mci = mci;
1286 	return 0;
1287 fail3:
1288 	mci->pvt_info = NULL;
1289 	kfree(mci->ctl_name);
1290 fail2:
1291 	edac_mc_free(mci);
1292 fail:
1293 	return rc;
1294 }
1295 
igen6_unregister_mcis(void)1296 static void igen6_unregister_mcis(void)
1297 {
1298 	struct mem_ctl_info *mci;
1299 	struct igen6_imc *imc;
1300 	int i;
1301 
1302 	edac_dbg(2, "\n");
1303 
1304 	for (i = 0; i < res_cfg->num_imc; i++) {
1305 		imc = &igen6_pvt->imc[i];
1306 		mci = imc->mci;
1307 		if (!mci)
1308 			continue;
1309 
1310 		edac_mc_del_mc(mci->pdev);
1311 		kfree(mci->ctl_name);
1312 		mci->pvt_info = NULL;
1313 		edac_mc_free(mci);
1314 		iounmap(imc->window);
1315 	}
1316 }
1317 
igen6_register_mcis(struct pci_dev * pdev,u64 mchbar)1318 static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
1319 {
1320 	void __iomem *window;
1321 	int lmc, pmc, rc;
1322 	u64 base;
1323 
1324 	for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
1325 		base   = mchbar + pmc * MCHBAR_SIZE;
1326 		window = ioremap(base, MCHBAR_SIZE);
1327 		if (!window) {
1328 			igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
1329 			rc = -ENOMEM;
1330 			goto out_unregister_mcis;
1331 		}
1332 
1333 		if (igen6_imc_absent(window)) {
1334 			iounmap(window);
1335 			edac_dbg(2, "Skip absent mc%d\n", pmc);
1336 			continue;
1337 		}
1338 
1339 		rc = igen6_register_mci(lmc, window, pdev);
1340 		if (rc)
1341 			goto out_iounmap;
1342 
1343 		/* Done, if all present MCs are detected and registered. */
1344 		if (++lmc >= res_cfg->num_imc)
1345 			break;
1346 	}
1347 
1348 	if (!lmc) {
1349 		igen6_printk(KERN_ERR, "No mc found.\n");
1350 		return -ENODEV;
1351 	}
1352 
1353 	if (lmc < res_cfg->num_imc) {
1354 		igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.",
1355 			     res_cfg->num_imc, lmc);
1356 		res_cfg->num_imc = lmc;
1357 	}
1358 
1359 	return 0;
1360 
1361 out_iounmap:
1362 	iounmap(window);
1363 
1364 out_unregister_mcis:
1365 	igen6_unregister_mcis();
1366 
1367 	return rc;
1368 }
1369 
igen6_mem_slice_setup(u64 mchbar)1370 static int igen6_mem_slice_setup(u64 mchbar)
1371 {
1372 	struct igen6_imc *imc = &igen6_pvt->imc[0];
1373 	u64 base = mchbar + res_cfg->cmf_base;
1374 	u32 offset = res_cfg->ms_hash_offset;
1375 	u32 size = res_cfg->cmf_size;
1376 	u64 ms_s_size, ms_hash;
1377 	void __iomem *cmf;
1378 	int ms_l_map;
1379 
1380 	edac_dbg(2, "\n");
1381 
1382 	if (imc[0].size < imc[1].size) {
1383 		ms_s_size = imc[0].size;
1384 		ms_l_map  = 1;
1385 	} else {
1386 		ms_s_size = imc[1].size;
1387 		ms_l_map  = 0;
1388 	}
1389 
1390 	igen6_pvt->ms_s_size = ms_s_size;
1391 	igen6_pvt->ms_l_map  = ms_l_map;
1392 
1393 	edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
1394 		 ms_s_size >> 20, ms_l_map);
1395 
1396 	if (!size)
1397 		return 0;
1398 
1399 	cmf = ioremap(base, size);
1400 	if (!cmf) {
1401 		igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
1402 		return -ENODEV;
1403 	}
1404 
1405 	ms_hash = readq(cmf + offset);
1406 	igen6_pvt->ms_hash = ms_hash;
1407 
1408 	edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
1409 
1410 	iounmap(cmf);
1411 
1412 	return 0;
1413 }
1414 
register_err_handler(void)1415 static int register_err_handler(void)
1416 {
1417 	int rc;
1418 
1419 	if (res_cfg->machine_check) {
1420 		mce_register_decode_chain(&ecclog_mce_dec);
1421 		return 0;
1422 	}
1423 
1424 	rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
1425 				  0, IGEN6_NMI_NAME);
1426 	if (rc) {
1427 		igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
1428 		return rc;
1429 	}
1430 
1431 	return 0;
1432 }
1433 
unregister_err_handler(void)1434 static void unregister_err_handler(void)
1435 {
1436 	if (res_cfg->machine_check) {
1437 		mce_unregister_decode_chain(&ecclog_mce_dec);
1438 		return;
1439 	}
1440 
1441 	unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1442 }
1443 
opstate_set(const struct res_config * cfg,const struct pci_device_id * ent)1444 static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
1445 {
1446 	/*
1447 	 * Quirk: Certain SoCs' error reporting interrupts don't work.
1448 	 *        Force polling mode for them to ensure that memory error
1449 	 *        events can be handled.
1450 	 */
1451 	if (ent->device == DID_ADL_N_SKU4) {
1452 		edac_op_state = EDAC_OPSTATE_POLL;
1453 		return;
1454 	}
1455 
1456 	/* Set the mode according to the configuration data. */
1457 	if (cfg->machine_check)
1458 		edac_op_state = EDAC_OPSTATE_INT;
1459 	else
1460 		edac_op_state = EDAC_OPSTATE_NMI;
1461 }
1462 
igen6_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1463 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1464 {
1465 	u64 mchbar;
1466 	int rc;
1467 
1468 	edac_dbg(2, "\n");
1469 
1470 	igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
1471 	if (!igen6_pvt)
1472 		return -ENOMEM;
1473 
1474 	res_cfg = (struct res_config *)ent->driver_data;
1475 
1476 	rc = igen6_pci_setup(pdev, &mchbar);
1477 	if (rc)
1478 		goto fail;
1479 
1480 	opstate_set(res_cfg, ent);
1481 
1482 	rc = igen6_register_mcis(pdev, mchbar);
1483 	if (rc)
1484 		goto fail;
1485 
1486 	if (res_cfg->num_imc > 1) {
1487 		rc = igen6_mem_slice_setup(mchbar);
1488 		if (rc)
1489 			goto fail2;
1490 	}
1491 
1492 	ecclog_pool = ecclog_gen_pool_create();
1493 	if (!ecclog_pool) {
1494 		rc = -ENOMEM;
1495 		goto fail2;
1496 	}
1497 
1498 	INIT_WORK(&ecclog_work, ecclog_work_cb);
1499 	init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
1500 
1501 	rc = register_err_handler();
1502 	if (rc)
1503 		goto fail3;
1504 
1505 	/* Enable error reporting */
1506 	rc = errcmd_enable_error_reporting(true);
1507 	if (rc) {
1508 		igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
1509 		goto fail4;
1510 	}
1511 
1512 	/* Check if any pending errors before/during the registration of the error handler */
1513 	ecclog_handler();
1514 
1515 	igen6_debug_setup();
1516 	return 0;
1517 fail4:
1518 	unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1519 fail3:
1520 	gen_pool_destroy(ecclog_pool);
1521 fail2:
1522 	igen6_unregister_mcis();
1523 fail:
1524 	kfree(igen6_pvt);
1525 	return rc;
1526 }
1527 
igen6_remove(struct pci_dev * pdev)1528 static void igen6_remove(struct pci_dev *pdev)
1529 {
1530 	edac_dbg(2, "\n");
1531 
1532 	igen6_debug_teardown();
1533 	errcmd_enable_error_reporting(false);
1534 	unregister_err_handler();
1535 	irq_work_sync(&ecclog_irq_work);
1536 	flush_work(&ecclog_work);
1537 	gen_pool_destroy(ecclog_pool);
1538 	igen6_unregister_mcis();
1539 	kfree(igen6_pvt);
1540 }
1541 
1542 static struct pci_driver igen6_driver = {
1543 	.name     = EDAC_MOD_STR,
1544 	.probe    = igen6_probe,
1545 	.remove   = igen6_remove,
1546 	.id_table = igen6_pci_tbl,
1547 };
1548 
igen6_init(void)1549 static int __init igen6_init(void)
1550 {
1551 	const char *owner;
1552 	int rc;
1553 
1554 	edac_dbg(2, "\n");
1555 
1556 	if (ghes_get_devices())
1557 		return -EBUSY;
1558 
1559 	owner = edac_get_owner();
1560 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1561 		return -EBUSY;
1562 
1563 	rc = pci_register_driver(&igen6_driver);
1564 	if (rc)
1565 		return rc;
1566 
1567 	igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
1568 
1569 	return 0;
1570 }
1571 
igen6_exit(void)1572 static void __exit igen6_exit(void)
1573 {
1574 	edac_dbg(2, "\n");
1575 
1576 	pci_unregister_driver(&igen6_driver);
1577 }
1578 
1579 module_init(igen6_init);
1580 module_exit(igen6_exit);
1581 
1582 MODULE_LICENSE("GPL v2");
1583 MODULE_AUTHOR("Qiuxu Zhuo");
1584 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");
1585