xref: /linux/drivers/edac/imh_base.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller.
4  * Copyright (c) 2025, Intel Corporation.
5  */
6 
7 #include <linux/kernel.h>
8 #include <linux/io.h>
9 #include <asm/cpu_device_id.h>
10 #include <asm/intel-family.h>
11 #include <asm/mce.h>
12 #include <asm/cpu.h>
13 #include "edac_module.h"
14 #include "skx_common.h"
15 
16 #define IMH_REVISION	"v0.0.1"
17 #define EDAC_MOD_STR	"imh_edac"
18 
19 /* Debug macros */
20 #define imh_printk(level, fmt, arg...)	\
21 	edac_printk(level, "imh", fmt, ##arg)
22 
23 /* Configuration Agent(Ubox) */
24 #define MMIO_BASE_H(reg)		(((u64)GET_BITFIELD(reg, 0, 29)) << 23)
25 #define SOCKET_ID(reg)			GET_BITFIELD(reg, 0, 3)
26 
27 /* PUNIT */
28 #define DDR_IMC_BITMAP(reg)		GET_BITFIELD(reg, 23, 30)
29 
30 /* Memory Controller */
31 #define ECC_ENABLED(reg)		GET_BITFIELD(reg, 2, 2)
32 #define DIMM_POPULATED(reg)		GET_BITFIELD(reg, 15, 15)
33 
34 /* System Cache Agent(SCA) */
35 #define TOLM(reg)			(((u64)GET_BITFIELD(reg, 16, 31)) << 16)
36 #define TOHM(reg)			(((u64)GET_BITFIELD(reg, 16, 51)) << 16)
37 
38 /* Home Agent (HA) */
39 #define NMCACHING(reg)			GET_BITFIELD(reg, 8, 8)
40 
41 /**
42  * struct local_reg - A register as described in the local package view.
43  *
44  * @pkg: (input)	The package where the register is located.
45  * @pbase: (input)	The IP MMIO base physical address in the local package view.
46  * @size: (input)	The IP MMIO size.
47  * @offset: (input)	The register offset from the IP MMIO base @pbase.
48  * @width: (input)	The register width in byte.
49  * @vbase: (internal)	The IP MMIO base virtual address.
50  * @val: (output)	The register value.
51  */
52 struct local_reg {
53 	int pkg;
54 	u64 pbase;
55 	u32 size;
56 	u32 offset;
57 	u8  width;
58 	void __iomem *vbase;
59 	u64 val;
60 };
61 
62 #define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name)	\
63 	struct local_reg name = {						\
64 		.pkg	= package,						\
65 		.pbase	= (north ? (cfg)->mmio_base_l_north :			\
66 			  (cfg)->mmio_base_l_south) +				\
67 			  (cfg)->ip_name##_base +				\
68 			  (cfg)->ip_name##_size * (ip_idx),			\
69 		.size	= (cfg)->ip_name##_size,				\
70 		.offset	= (cfg)->ip_name##_reg_##reg_name##_offset,		\
71 		.width	= (cfg)->ip_name##_reg_##reg_name##_width,		\
72 	}
73 
74 static u64 readx(void __iomem *addr, u8 width)
75 {
76 	switch (width) {
77 	case 1:
78 		return readb(addr);
79 	case 2:
80 		return readw(addr);
81 	case 4:
82 		return readl(addr);
83 	case 8:
84 		return readq(addr);
85 	default:
86 		imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width);
87 		return 0;
88 	}
89 }
90 
91 static void __read_local_reg(void *reg)
92 {
93 	struct local_reg *r = (struct local_reg *)reg;
94 
95 	r->val = readx(r->vbase + r->offset, r->width);
96 }
97 
98 /* Read a local-view register. */
99 static bool read_local_reg(struct local_reg *reg)
100 {
101 	int cpu;
102 
103 	/* Get the target CPU in the package @reg->pkg. */
104 	for_each_online_cpu(cpu) {
105 		if (reg->pkg == topology_physical_package_id(cpu))
106 			break;
107 	}
108 
109 	if (cpu >= nr_cpu_ids)
110 		return false;
111 
112 	reg->vbase = ioremap(reg->pbase, reg->size);
113 	if (!reg->vbase) {
114 		imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase);
115 		return false;
116 	}
117 
118 	/* Get the target CPU to read the register. */
119 	smp_call_function_single(cpu, __read_local_reg, reg, 1);
120 	iounmap(reg->vbase);
121 
122 	return true;
123 }
124 
125 /* Get the bitmap of memory controller instances in package @pkg. */
126 static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north)
127 {
128 	DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3);
129 
130 	if (!read_local_reg(&reg))
131 		return 0;
132 
133 	edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n",
134 		 pkg, north ? "north" : "south",
135 		 DDR_IMC_BITMAP(reg.val), reg.val);
136 
137 	return DDR_IMC_BITMAP(reg.val);
138 }
139 
140 static void imc_release(struct device *dev)
141 {
142 	edac_dbg(2, "imc device %s released\n", dev_name(dev));
143 	kfree(dev);
144 }
145 
146 static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d,
147 			    bool north, int lmc)
148 {
149 	unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num;
150 	unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north);
151 	void __iomem *mbase;
152 	struct device *dev;
153 	int i, rc, pmc;
154 	u64 base;
155 
156 	for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) {
157 		base  = north ? d->mmio_base_h_north : d->mmio_base_h_south;
158 		base += cfg->ddr_imc_base + size * i;
159 
160 		edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n",
161 			 d->pkg, lmc, base, size);
162 
163 		/* Set up the imc MMIO. */
164 		mbase = ioremap(base, size);
165 		if (!mbase) {
166 			imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base);
167 			return -ENOMEM;
168 		}
169 
170 		d->imc[lmc].mbase = mbase;
171 		d->imc[lmc].lmc = lmc;
172 
173 		/* Create the imc device instance. */
174 		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
175 		if (!dev)
176 			return -ENOMEM;
177 
178 		dev->release = imc_release;
179 		device_initialize(dev);
180 		rc = dev_set_name(dev, "0x%llx", base);
181 		if (rc) {
182 			imh_printk(KERN_ERR, "Failed to set dev name\n");
183 			put_device(dev);
184 			return rc;
185 		}
186 
187 		d->imc[lmc].dev = dev;
188 
189 		/* Set up the imc index mapping. */
190 		pmc = north ? i : 8 + i;
191 		skx_set_mc_mapping(d, pmc, lmc);
192 
193 		lmc++;
194 	}
195 
196 	return lmc;
197 }
198 
199 static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d)
200 {
201 	int lmc = __get_ddr_munits(cfg, d, true, 0);
202 
203 	if (lmc < 0)
204 		return false;
205 
206 	lmc = __get_ddr_munits(cfg, d, false, lmc);
207 	if (lmc <= 0)
208 		return false;
209 
210 	return true;
211 }
212 
213 static bool get_socket_id(struct res_config *cfg, struct skx_dev *d)
214 {
215 	DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id);
216 	u8 src_id;
217 	int i;
218 
219 	if (!read_local_reg(&reg))
220 		return false;
221 
222 	src_id = SOCKET_ID(reg.val);
223 	edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val);
224 
225 	for (i = 0; i < cfg->ddr_imc_num; i++)
226 		d->imc[i].src_id   = src_id;
227 
228 	return true;
229 }
230 
231 /* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */
232 static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm)
233 {
234 	DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm);
235 
236 	if (!read_local_reg(&reg))
237 		return false;
238 
239 	*tolm = TOLM(reg.val);
240 	edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val);
241 
242 	DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm);
243 
244 	if (!read_local_reg(&reg2))
245 		return false;
246 
247 	*tohm = TOHM(reg2.val);
248 	edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val);
249 
250 	return true;
251 }
252 
253 /* Get the system-view MMIO_BASE_H for {north,south}-IMH. */
254 static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list)
255 {
256 	int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
257 	struct skx_dev *d;
258 
259 	for (i = 0; i < n; i++) {
260 		d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL);
261 		if (!d)
262 			return -ENOMEM;
263 
264 		DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base);
265 
266 		/* Get MMIO_BASE_H for the north-IMH. */
267 		if (!read_local_reg(&reg) || !reg.val) {
268 			kfree(d);
269 			imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i);
270 			return -ENODEV;
271 		}
272 
273 		d->mmio_base_h_north = MMIO_BASE_H(reg.val);
274 		edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n",
275 			 i, d->mmio_base_h_north, reg.val);
276 
277 		/* Get MMIO_BASE_H for the south-IMH (optional). */
278 		DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base);
279 
280 		if (read_local_reg(&reg2)) {
281 			d->mmio_base_h_south = MMIO_BASE_H(reg2.val);
282 			edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n",
283 				 i, d->mmio_base_h_south, reg2.val);
284 		}
285 
286 		d->pkg = i;
287 		d->num_imc = imc_num;
288 		skx_init_mc_mapping(d);
289 		list_add_tail(&d->list, edac_list);
290 	}
291 
292 	return 0;
293 }
294 
295 /* Get the number of per-package memory controllers. */
296 static int imh_get_imc_num(struct res_config *cfg)
297 {
298 	int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) +
299 		      hweight32(get_imc_bitmap(cfg, 0, false));
300 
301 	if (!imc_num) {
302 		imh_printk(KERN_ERR, "Invalid mc number\n");
303 		return -ENODEV;
304 	}
305 
306 	if (cfg->ddr_imc_num != imc_num) {
307 		/*
308 		 * Update the configuration data to reflect the number of
309 		 * present DDR memory controllers.
310 		 */
311 		cfg->ddr_imc_num = imc_num;
312 		edac_dbg(2, "Set ddr mc number %d\n", imc_num);
313 	}
314 
315 	return 0;
316 }
317 
318 /* Get all memory controllers' parameters. */
319 static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list)
320 {
321 	struct skx_imc *imc;
322 	struct skx_dev *d;
323 	u8 mc = 0;
324 	int i;
325 
326 	list_for_each_entry(d, edac_list, list) {
327 		if (!get_ddr_munits(cfg, d)) {
328 			imh_printk(KERN_ERR, "No mc found\n");
329 			return -ENODEV;
330 		}
331 
332 		if (!get_socket_id(cfg, d)) {
333 			imh_printk(KERN_ERR, "Failed to get socket id\n");
334 			return -ENODEV;
335 		}
336 
337 		for (i = 0; i < cfg->ddr_imc_num; i++) {
338 			imc = &d->imc[i];
339 			if (!imc->mbase)
340 				continue;
341 
342 			imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz;
343 			imc->num_channels = cfg->ddr_chan_num;
344 			imc->num_dimms    = cfg->ddr_dimm_num;
345 			imc->mc		  = mc++;
346 		}
347 	}
348 
349 	return 0;
350 }
351 
352 static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx)
353 {
354 	DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode);
355 
356 	if (!read_local_reg(&reg))
357 		return false;
358 
359 	if (!NMCACHING(reg.val))
360 		return false;
361 
362 	edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx);
363 	return true;
364 }
365 
366 /* Check whether the system has a 2-level memory configuration. */
367 static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head)
368 {
369 	struct skx_dev *d;
370 	int i;
371 
372 	list_for_each_entry(d, head, list) {
373 		for (i = 0; i < cfg->ddr_imc_num; i++)
374 			if (check_2lm_enabled(cfg, d, i))
375 				return true;
376 	}
377 
378 	return false;
379 }
380 
381 /* Helpers to read memory controller registers */
382 static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
383 {
384 	return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width);
385 }
386 
387 static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan)
388 {
389 	return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width);
390 }
391 
392 static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm)
393 {
394 	return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset +
395 				 cfg->ddr_reg_dimmmtr_width * dimm,
396 				 cfg->ddr_reg_dimmmtr_width);
397 }
398 
399 static bool ecc_enabled(u32 mcmtr)
400 {
401 	return (bool)ECC_ENABLED(mcmtr);
402 }
403 
404 static bool dimm_populated(u32 dimmmtr)
405 {
406 	return (bool)DIMM_POPULATED(dimmmtr);
407 }
408 
409 /* Get each DIMM's configurations of the memory controller @mci. */
410 static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
411 {
412 	struct skx_pvt *pvt = mci->pvt_info;
413 	struct skx_imc *imc = pvt->imc;
414 	struct dimm_info *dimm;
415 	u32 mcmtr, dimmmtr;
416 	int i, j, ndimms;
417 
418 	for (i = 0; i < imc->num_channels; i++) {
419 		if (!imc->mbase)
420 			continue;
421 
422 		mcmtr = read_imc_mcmtr(cfg, imc, i);
423 
424 		for (ndimms = 0, j = 0; j < imc->num_dimms; j++) {
425 			dimmmtr = read_imc_dimmmtr(cfg, imc, i, j);
426 			edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n",
427 				 mcmtr, dimmmtr, imc->mc, i, j);
428 
429 			if (!dimm_populated(dimmmtr))
430 				continue;
431 
432 			dimm = edac_get_dimm(mci, i, j, 0);
433 			ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm,
434 						    imc, i, j, cfg);
435 		}
436 
437 		if (ndimms && !ecc_enabled(mcmtr)) {
438 			imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n",
439 				   imc->mc, i);
440 			return -ENODEV;
441 		}
442 	}
443 
444 	return 0;
445 }
446 
447 /* Register all memory controllers to the EDAC core. */
448 static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list)
449 {
450 	struct skx_imc *imc;
451 	struct skx_dev *d;
452 	int i, rc;
453 
454 	list_for_each_entry(d, edac_list, list) {
455 		for (i = 0; i < cfg->ddr_imc_num; i++) {
456 			imc = &d->imc[i];
457 			if (!imc->mbase)
458 				continue;
459 
460 			rc = skx_register_mci(imc, imc->dev,
461 					      dev_name(imc->dev),
462 					      "Intel IMH-based Socket",
463 					      EDAC_MOD_STR,
464 					      imh_get_dimm_config, cfg);
465 			if (rc)
466 				return rc;
467 		}
468 	}
469 
470 	return 0;
471 }
472 
473 static struct res_config dmr_cfg = {
474 	.type				= DMR,
475 	.support_ddr5			= true,
476 	.mmio_base_l_north		= 0xf6800000,
477 	.mmio_base_l_south		= 0xf6000000,
478 	.ddr_chan_num			= 1,
479 	.ddr_dimm_num			= 2,
480 	.ddr_imc_base			= 0x39b000,
481 	.ddr_chan_mmio_sz		= 0x8000,
482 	.ddr_reg_mcmtr_offset		= 0x360,
483 	.ddr_reg_mcmtr_width		= 4,
484 	.ddr_reg_dimmmtr_offset		= 0x370,
485 	.ddr_reg_dimmmtr_width		= 4,
486 	.ubox_base			= 0x0,
487 	.ubox_size			= 0x2000,
488 	.ubox_reg_mmio_base_offset	= 0x580,
489 	.ubox_reg_mmio_base_width	= 4,
490 	.ubox_reg_socket_id_offset	= 0x1080,
491 	.ubox_reg_socket_id_width	= 4,
492 	.pcu_base			= 0x3000,
493 	.pcu_size			= 0x10000,
494 	.pcu_reg_capid3_offset		= 0x290,
495 	.pcu_reg_capid3_width		= 4,
496 	.sca_base			= 0x24c000,
497 	.sca_size			= 0x2500,
498 	.sca_reg_tolm_offset		= 0x2100,
499 	.sca_reg_tolm_width		= 8,
500 	.sca_reg_tohm_offset		= 0x2108,
501 	.sca_reg_tohm_width		= 8,
502 	.ha_base			= 0x3eb000,
503 	.ha_size			= 0x1000,
504 	.ha_reg_mode_offset		= 0x4a0,
505 	.ha_reg_mode_width		= 4,
506 };
507 
508 static const struct x86_cpu_id imh_cpuids[] = {
509 	X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg),
510 	{}
511 };
512 MODULE_DEVICE_TABLE(x86cpu, imh_cpuids);
513 
514 static struct notifier_block imh_mce_dec = {
515 	.notifier_call	= skx_mce_check_error,
516 	.priority	= MCE_PRIO_EDAC,
517 };
518 
519 static int __init imh_init(void)
520 {
521 	const struct x86_cpu_id *id;
522 	struct list_head *edac_list;
523 	struct res_config *cfg;
524 	const char *owner;
525 	u64 tolm, tohm;
526 	int rc;
527 
528 	edac_dbg(2, "\n");
529 
530 	if (ghes_get_devices())
531 		return -EBUSY;
532 
533 	owner = edac_get_owner();
534 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
535 		return -EBUSY;
536 
537 	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
538 		return -ENODEV;
539 
540 	id = x86_match_cpu(imh_cpuids);
541 	if (!id)
542 		return -ENODEV;
543 	cfg = (struct res_config *)id->driver_data;
544 	skx_set_res_cfg(cfg);
545 
546 	if (!imh_get_tolm_tohm(cfg, &tolm, &tohm))
547 		return -ENODEV;
548 
549 	skx_set_hi_lo(tolm, tohm);
550 
551 	rc = imh_get_imc_num(cfg);
552 	if (rc < 0)
553 		goto fail;
554 
555 	edac_list = skx_get_edac_list();
556 
557 	rc = imh_get_all_mmio_base_h(cfg, edac_list);
558 	if (rc)
559 		goto fail;
560 
561 	rc = imh_get_munits(cfg, edac_list);
562 	if (rc)
563 		goto fail;
564 
565 	skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list));
566 
567 	rc = imh_register_mci(cfg, edac_list);
568 	if (rc)
569 		goto fail;
570 
571 	rc = skx_adxl_get();
572 	if (rc)
573 		goto fail;
574 
575 	opstate_init();
576 	mce_register_decode_chain(&imh_mce_dec);
577 	skx_setup_debug("imh_test");
578 
579 	imh_printk(KERN_INFO, "%s\n", IMH_REVISION);
580 
581 	return 0;
582 fail:
583 	skx_remove();
584 	return rc;
585 }
586 
587 static void __exit imh_exit(void)
588 {
589 	edac_dbg(2, "\n");
590 
591 	skx_teardown_debug();
592 	mce_unregister_decode_chain(&imh_mce_dec);
593 	skx_adxl_put();
594 	skx_remove();
595 }
596 
597 module_init(imh_init);
598 module_exit(imh_exit);
599 
600 MODULE_LICENSE("GPL");
601 MODULE_AUTHOR("Qiuxu Zhuo");
602 MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller");
603