1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Cortex A72 EDAC L1 and L2 cache error detection 4 * 5 * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de> 6 * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com> 7 * 8 * Based on Code from: 9 * Copyright (c) 2018, NXP Semiconductor 10 * Author: York Sun <york.sun@nxp.com> 11 */ 12 13 #include <linux/module.h> 14 #include <linux/of.h> 15 #include <linux/bitfield.h> 16 #include <asm/smp_plat.h> 17 18 #include "edac_module.h" 19 20 #define DRVNAME "a72-edac" 21 22 #define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2) 23 #define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3) 24 25 #define CPUMERRSR_EL1_RAMID GENMASK(30, 24) 26 #define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18) 27 28 #define CPUMERRSR_EL1_VALID BIT(31) 29 #define CPUMERRSR_EL1_FATAL BIT(63) 30 #define L2MERRSR_EL1_VALID BIT(31) 31 #define L2MERRSR_EL1_FATAL BIT(63) 32 33 #define L1_I_TAG_RAM 0x00 34 #define L1_I_DATA_RAM 0x01 35 #define L1_D_TAG_RAM 0x08 36 #define L1_D_DATA_RAM 0x09 37 #define TLB_RAM 0x18 38 39 #define MESSAGE_SIZE 64 40 41 struct mem_err_synd_reg { 42 u64 cpu_mesr; 43 u64 l2_mesr; 44 }; 45 46 static struct cpumask compat_mask; 47 48 static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu, 49 struct mem_err_synd_reg *mesr) 50 { 51 u64 cpu_mesr = mesr->cpu_mesr; 52 u64 l2_mesr = mesr->l2_mesr; 53 char msg[MESSAGE_SIZE]; 54 55 if (cpu_mesr & CPUMERRSR_EL1_VALID) { 56 const char *str; 57 bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL; 58 59 switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) { 60 case L1_I_TAG_RAM: 61 str = "L1-I Tag RAM"; 62 break; 63 case L1_I_DATA_RAM: 64 str = "L1-I Data RAM"; 65 break; 66 case L1_D_TAG_RAM: 67 str = "L1-D Tag RAM"; 68 break; 69 case L1_D_DATA_RAM: 70 str = "L1-D Data RAM"; 71 break; 72 case TLB_RAM: 73 str = "TLB RAM"; 74 break; 75 default: 76 str = "Unspecified"; 77 break; 78 } 79 80 snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d", 81 str, fatal ? "fatal" : "correctable", cpu); 82 83 if (fatal) 84 edac_device_handle_ue(edac_ctl, cpu, 0, msg); 85 else 86 edac_device_handle_ce(edac_ctl, cpu, 0, msg); 87 } 88 89 if (l2_mesr & L2MERRSR_EL1_VALID) { 90 bool fatal = l2_mesr & L2MERRSR_EL1_FATAL; 91 92 snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx", 93 fatal ? "fatal" : "correctable", cpu, 94 FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr)); 95 if (fatal) 96 edac_device_handle_ue(edac_ctl, cpu, 1, msg); 97 else 98 edac_device_handle_ce(edac_ctl, cpu, 1, msg); 99 } 100 } 101 102 static void read_errors(void *data) 103 { 104 struct mem_err_synd_reg *mesr = data; 105 106 mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1); 107 if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) { 108 write_sysreg_s(0, SYS_CPUMERRSR_EL1); 109 isb(); 110 } 111 mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1); 112 if (mesr->l2_mesr & L2MERRSR_EL1_VALID) { 113 write_sysreg_s(0, SYS_L2MERRSR_EL1); 114 isb(); 115 } 116 } 117 118 static void a72_edac_check(struct edac_device_ctl_info *edac_ctl) 119 { 120 struct mem_err_synd_reg mesr; 121 int cpu; 122 123 cpus_read_lock(); 124 for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) { 125 smp_call_function_single(cpu, read_errors, &mesr, true); 126 report_errors(edac_ctl, cpu, &mesr); 127 } 128 cpus_read_unlock(); 129 } 130 131 static int a72_edac_probe(struct platform_device *pdev) 132 { 133 struct edac_device_ctl_info *edac_ctl; 134 struct device *dev = &pdev->dev; 135 int rc; 136 137 edac_ctl = edac_device_alloc_ctl_info(0, "cpu", 138 num_possible_cpus(), "L", 2, 1, 139 edac_device_alloc_index()); 140 if (!edac_ctl) 141 return -ENOMEM; 142 143 edac_ctl->edac_check = a72_edac_check; 144 edac_ctl->dev = dev; 145 edac_ctl->mod_name = dev_name(dev); 146 edac_ctl->dev_name = dev_name(dev); 147 edac_ctl->ctl_name = DRVNAME; 148 dev_set_drvdata(dev, edac_ctl); 149 150 rc = edac_device_add_device(edac_ctl); 151 if (rc) 152 goto out_dev; 153 154 return 0; 155 156 out_dev: 157 edac_device_free_ctl_info(edac_ctl); 158 159 return rc; 160 } 161 162 static void a72_edac_remove(struct platform_device *pdev) 163 { 164 struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev); 165 166 edac_device_del_device(edac_ctl->dev); 167 edac_device_free_ctl_info(edac_ctl); 168 } 169 170 static const struct of_device_id cortex_arm64_edac_of_match[] = { 171 { .compatible = "arm,cortex-a72" }, 172 {} 173 }; 174 MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match); 175 176 static struct platform_driver a72_edac_driver = { 177 .probe = a72_edac_probe, 178 .remove = a72_edac_remove, 179 .driver = { 180 .name = DRVNAME, 181 }, 182 }; 183 184 static struct platform_device *a72_pdev; 185 186 static int __init a72_edac_driver_init(void) 187 { 188 int cpu; 189 190 for_each_possible_cpu(cpu) { 191 struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu); 192 if (np) { 193 if (of_match_node(cortex_arm64_edac_of_match, np) && 194 of_property_read_bool(np, "edac-enabled")) { 195 cpumask_set_cpu(cpu, &compat_mask); 196 } 197 } else { 198 pr_warn("failed to find device node for CPU %d\n", cpu); 199 } 200 } 201 202 if (cpumask_empty(&compat_mask)) 203 return 0; 204 205 a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0); 206 if (IS_ERR(a72_pdev)) { 207 pr_err("failed to register A72 EDAC device\n"); 208 return PTR_ERR(a72_pdev); 209 } 210 211 return platform_driver_register(&a72_edac_driver); 212 } 213 214 static void __exit a72_edac_driver_exit(void) 215 { 216 platform_device_unregister(a72_pdev); 217 platform_driver_unregister(&a72_edac_driver); 218 } 219 220 module_init(a72_edac_driver_init); 221 module_exit(a72_edac_driver_exit); 222 223 MODULE_LICENSE("GPL"); 224 MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); 225 MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver"); 226