xref: /linux/drivers/edac/a72_edac.c (revision 03f76ddff5b04a808ae16c06418460151e2fdd4b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Cortex A72 EDAC L1 and L2 cache error detection
4  *
5  * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
6  * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com>
7  *
8  * Based on Code from:
9  * Copyright (c) 2018, NXP Semiconductor
10  * Author: York Sun <york.sun@nxp.com>
11  */
12 
13 #include <linux/module.h>
14 #include <linux/of.h>
15 #include <linux/bitfield.h>
16 #include <asm/smp_plat.h>
17 
18 #include "edac_module.h"
19 
20 #define DRVNAME			"a72-edac"
21 
22 #define SYS_CPUMERRSR_EL1	sys_reg(3, 1, 15, 2, 2)
23 #define SYS_L2MERRSR_EL1	sys_reg(3, 1, 15, 2, 3)
24 
25 #define CPUMERRSR_EL1_RAMID	GENMASK(30, 24)
26 #define L2MERRSR_EL1_CPUID_WAY	GENMASK(21, 18)
27 
28 #define CPUMERRSR_EL1_VALID	BIT(31)
29 #define CPUMERRSR_EL1_FATAL	BIT(63)
30 #define L2MERRSR_EL1_VALID	BIT(31)
31 #define L2MERRSR_EL1_FATAL	BIT(63)
32 
33 #define L1_I_TAG_RAM		0x00
34 #define L1_I_DATA_RAM		0x01
35 #define L1_D_TAG_RAM		0x08
36 #define L1_D_DATA_RAM		0x09
37 #define TLB_RAM			0x18
38 
39 #define MESSAGE_SIZE		64
40 
41 struct mem_err_synd_reg {
42 	u64 cpu_mesr;
43 	u64 l2_mesr;
44 };
45 
46 static struct cpumask compat_mask;
47 
report_errors(struct edac_device_ctl_info * edac_ctl,int cpu,struct mem_err_synd_reg * mesr)48 static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu,
49 			  struct mem_err_synd_reg *mesr)
50 {
51 	u64 cpu_mesr = mesr->cpu_mesr;
52 	u64 l2_mesr = mesr->l2_mesr;
53 	char msg[MESSAGE_SIZE];
54 
55 	if (cpu_mesr & CPUMERRSR_EL1_VALID) {
56 		const char *str;
57 		bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL;
58 
59 		switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) {
60 		case L1_I_TAG_RAM:
61 			str = "L1-I Tag RAM";
62 			break;
63 		case L1_I_DATA_RAM:
64 			str = "L1-I Data RAM";
65 			break;
66 		case L1_D_TAG_RAM:
67 			str = "L1-D Tag RAM";
68 			break;
69 		case L1_D_DATA_RAM:
70 			str = "L1-D Data RAM";
71 			break;
72 		case TLB_RAM:
73 			str = "TLB RAM";
74 			break;
75 		default:
76 			str = "Unspecified";
77 			break;
78 		}
79 
80 		snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d",
81 			 str, fatal ? "fatal" : "correctable", cpu);
82 
83 		if (fatal)
84 			edac_device_handle_ue(edac_ctl, cpu, 0, msg);
85 		else
86 			edac_device_handle_ce(edac_ctl, cpu, 0, msg);
87 	}
88 
89 	if (l2_mesr & L2MERRSR_EL1_VALID) {
90 		bool fatal = l2_mesr & L2MERRSR_EL1_FATAL;
91 
92 		snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx",
93 			 fatal ? "fatal" : "correctable", cpu,
94 			 FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr));
95 		if (fatal)
96 			edac_device_handle_ue(edac_ctl, cpu, 1, msg);
97 		else
98 			edac_device_handle_ce(edac_ctl, cpu, 1, msg);
99 	}
100 }
101 
read_errors(void * data)102 static void read_errors(void *data)
103 {
104 	struct mem_err_synd_reg *mesr = data;
105 
106 	mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1);
107 	if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) {
108 		write_sysreg_s(0, SYS_CPUMERRSR_EL1);
109 		isb();
110 	}
111 	mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1);
112 	if (mesr->l2_mesr & L2MERRSR_EL1_VALID) {
113 		write_sysreg_s(0, SYS_L2MERRSR_EL1);
114 		isb();
115 	}
116 }
117 
a72_edac_check(struct edac_device_ctl_info * edac_ctl)118 static void a72_edac_check(struct edac_device_ctl_info *edac_ctl)
119 {
120 	struct mem_err_synd_reg mesr;
121 	int cpu;
122 
123 	cpus_read_lock();
124 	for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) {
125 		smp_call_function_single(cpu, read_errors, &mesr, true);
126 		report_errors(edac_ctl, cpu, &mesr);
127 	}
128 	cpus_read_unlock();
129 }
130 
a72_edac_probe(struct platform_device * pdev)131 static int a72_edac_probe(struct platform_device *pdev)
132 {
133 	struct edac_device_ctl_info *edac_ctl;
134 	struct device *dev = &pdev->dev;
135 	int rc;
136 
137 	edac_ctl = edac_device_alloc_ctl_info(0, "cpu",
138 					      num_possible_cpus(), "L", 2, 1,
139 					      edac_device_alloc_index());
140 	if (!edac_ctl)
141 		return -ENOMEM;
142 
143 	edac_ctl->edac_check = a72_edac_check;
144 	edac_ctl->dev = dev;
145 	edac_ctl->mod_name = dev_name(dev);
146 	edac_ctl->dev_name = dev_name(dev);
147 	edac_ctl->ctl_name = DRVNAME;
148 	dev_set_drvdata(dev, edac_ctl);
149 
150 	rc = edac_device_add_device(edac_ctl);
151 	if (rc)
152 		goto out_dev;
153 
154 	return 0;
155 
156 out_dev:
157 	edac_device_free_ctl_info(edac_ctl);
158 
159 	return rc;
160 }
161 
a72_edac_remove(struct platform_device * pdev)162 static void a72_edac_remove(struct platform_device *pdev)
163 {
164 	struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev);
165 
166 	edac_device_del_device(edac_ctl->dev);
167 	edac_device_free_ctl_info(edac_ctl);
168 }
169 
170 static const struct of_device_id cortex_arm64_edac_of_match[] = {
171 	{ .compatible = "arm,cortex-a72" },
172 	{}
173 };
174 MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
175 
176 static struct platform_driver a72_edac_driver = {
177 	.probe = a72_edac_probe,
178 	.remove = a72_edac_remove,
179 	.driver = {
180 		.name = DRVNAME,
181 	},
182 };
183 
184 static struct platform_device *a72_pdev;
185 
a72_edac_driver_init(void)186 static int __init a72_edac_driver_init(void)
187 {
188 	int cpu;
189 
190 	for_each_possible_cpu(cpu) {
191 		struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
192 		if (np) {
193 			if (of_match_node(cortex_arm64_edac_of_match, np) &&
194 			    of_property_read_bool(np, "edac-enabled")) {
195 				cpumask_set_cpu(cpu, &compat_mask);
196 			}
197 		} else {
198 			pr_warn("failed to find device node for CPU %d\n", cpu);
199 		}
200 	}
201 
202 	if (cpumask_empty(&compat_mask))
203 		return 0;
204 
205 	a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0);
206 	if (IS_ERR(a72_pdev)) {
207 		pr_err("failed to register A72 EDAC device\n");
208 		return PTR_ERR(a72_pdev);
209 	}
210 
211 	return platform_driver_register(&a72_edac_driver);
212 }
213 
a72_edac_driver_exit(void)214 static void __exit a72_edac_driver_exit(void)
215 {
216 	platform_device_unregister(a72_pdev);
217 	platform_driver_unregister(&a72_edac_driver);
218 }
219 
220 module_init(a72_edac_driver_init);
221 module_exit(a72_edac_driver_exit);
222 
223 MODULE_LICENSE("GPL");
224 MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
225 MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver");
226