1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Bluefield-specific EDAC driver. 4 * 5 * Copyright (c) 2019 Mellanox Technologies. 6 */ 7 8 #include <linux/acpi.h> 9 #include <linux/arm-smccc.h> 10 #include <linux/bitfield.h> 11 #include <linux/edac.h> 12 #include <linux/io.h> 13 #include <linux/module.h> 14 #include <linux/platform_device.h> 15 16 #include "edac_module.h" 17 18 #define DRIVER_NAME "bluefield-edac" 19 20 /* 21 * Mellanox BlueField EMI (External Memory Interface) register definitions. 22 */ 23 24 #define MLXBF_ECC_CNT 0x340 25 #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) 26 #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) 27 28 #define MLXBF_ECC_ERR 0x348 29 #define MLXBF_ECC_ERR__SECC BIT(0) 30 #define MLXBF_ECC_ERR__DECC BIT(16) 31 32 #define MLXBF_ECC_LATCH_SEL 0x354 33 #define MLXBF_ECC_LATCH_SEL__START BIT(24) 34 35 #define MLXBF_ERR_ADDR_0 0x358 36 37 #define MLXBF_ERR_ADDR_1 0x37c 38 39 #define MLXBF_SYNDROM 0x35c 40 #define MLXBF_SYNDROM__DERR BIT(0) 41 #define MLXBF_SYNDROM__SERR BIT(1) 42 #define MLXBF_SYNDROM__SYN GENMASK(25, 16) 43 44 #define MLXBF_ADD_INFO 0x364 45 #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) 46 47 #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 48 #define MLXBF_EDAC_ERROR_GRAIN 8 49 50 #define MLXBF_WRITE_REG_32 (0x82000009) 51 #define MLXBF_READ_REG_32 (0x8200000A) 52 #define MLXBF_SIP_SVC_VERSION (0x8200ff03) 53 54 #define MLXBF_SMCCC_ACCESS_VIOLATION (-4) 55 56 #define MLXBF_SVC_REQ_MAJOR 0 57 #define MLXBF_SVC_REQ_MINOR 3 58 59 /* 60 * Request MLXBF_SIP_GET_DIMM_INFO 61 * 62 * Retrieve information about DIMM on a certain slot. 63 * 64 * Call register usage: 65 * a0: MLXBF_SIP_GET_DIMM_INFO 66 * a1: (Memory controller index) << 16 | (Dimm index in memory controller) 67 * a2-7: not used. 68 * 69 * Return status: 70 * a0: MLXBF_DIMM_INFO defined below describing the DIMM. 71 * a1-3: not used. 72 */ 73 #define MLXBF_SIP_GET_DIMM_INFO 0x82000008 74 75 /* Format for the SMC response about the memory information */ 76 #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) 77 #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) 78 #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) 79 #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) 80 #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) 81 #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) 82 83 struct bluefield_edac_priv { 84 /* pointer to device structure */ 85 struct device *dev; 86 int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; 87 void __iomem *emi_base; 88 int dimm_per_mc; 89 /* access to secure regs supported */ 90 bool svc_sreg_support; 91 /* SMC table# for secure regs access */ 92 u32 sreg_tbl; 93 }; 94 95 static u64 smc_call1(u64 smc_op, u64 smc_arg) 96 { 97 struct arm_smccc_res res; 98 99 arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); 100 101 return res.a0; 102 } 103 104 static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl) 105 { 106 struct arm_smccc_res res; 107 int status; 108 109 arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr, 110 0, 0, 0, 0, 0, &res); 111 112 status = res.a0; 113 114 if (status == SMCCC_RET_NOT_SUPPORTED || 115 status == MLXBF_SMCCC_ACCESS_VIOLATION) 116 return -1; 117 118 *result = (u32)res.a1; 119 return 0; 120 } 121 122 static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl) 123 { 124 struct arm_smccc_res res; 125 int status; 126 127 arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr, 128 0, 0, 0, 0, &res); 129 130 status = res.a0; 131 132 if (status == SMCCC_RET_NOT_SUPPORTED || 133 status == MLXBF_SMCCC_ACCESS_VIOLATION) 134 return -1; 135 else 136 return 0; 137 } 138 139 static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result) 140 { 141 void __iomem *addr; 142 int err = 0; 143 144 addr = priv->emi_base + offset; 145 146 if (priv->svc_sreg_support) 147 err = secure_readl(addr, result, priv->sreg_tbl); 148 else 149 *result = readl(addr); 150 151 return err; 152 } 153 154 static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data) 155 { 156 void __iomem *addr; 157 int err = 0; 158 159 addr = priv->emi_base + offset; 160 161 if (priv->svc_sreg_support) 162 err = secure_writel(addr, data, priv->sreg_tbl); 163 else 164 writel(data, addr); 165 166 return err; 167 } 168 169 /* 170 * Gather the ECC information from the External Memory Interface registers 171 * and report it to the edac handler. 172 */ 173 static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, 174 int error_cnt, 175 int is_single_ecc) 176 { 177 struct bluefield_edac_priv *priv = mci->pvt_info; 178 u32 dram_additional_info, err_prank, edea0, edea1; 179 u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; 180 enum hw_event_mc_err_type ecc_type; 181 u64 ecc_dimm_addr; 182 int ecc_dimm, err; 183 184 ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : 185 HW_EVENT_ERR_UNCORRECTED; 186 187 /* 188 * Tell the External Memory Interface to populate the relevant 189 * registers with information about the last ECC error occurrence. 190 */ 191 ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; 192 err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select); 193 if (err) 194 dev_err(priv->dev, "ECC latch select write failed.\n"); 195 196 /* 197 * Verify that the ECC reported info in the registers is of the 198 * same type as the one asked to report. If not, just report the 199 * error without the detailed information. 200 */ 201 err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); 202 if (err) 203 dev_err(priv->dev, "DRAM syndrom read failed.\n"); 204 205 serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); 206 derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); 207 syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); 208 209 if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { 210 edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, 211 0, 0, -1, mci->ctl_name, ""); 212 return; 213 } 214 215 err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); 216 if (err) 217 dev_err(priv->dev, "DRAM additional info read failed.\n"); 218 219 err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); 220 221 ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; 222 223 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); 224 if (err) 225 dev_err(priv->dev, "Error addr 0 read failed.\n"); 226 227 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); 228 if (err) 229 dev_err(priv->dev, "Error addr 1 read failed.\n"); 230 231 ecc_dimm_addr = ((u64)edea1 << 32) | edea0; 232 233 edac_mc_handle_error(ecc_type, mci, error_cnt, 234 PFN_DOWN(ecc_dimm_addr), 235 offset_in_page(ecc_dimm_addr), 236 syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); 237 } 238 239 static void bluefield_edac_check(struct mem_ctl_info *mci) 240 { 241 struct bluefield_edac_priv *priv = mci->pvt_info; 242 u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; 243 int err; 244 245 /* 246 * The memory controller might not be initialized by the firmware 247 * when there isn't memory, which may lead to bad register readings. 248 */ 249 if (mci->edac_cap == EDAC_FLAG_NONE) 250 return; 251 252 err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); 253 if (err) 254 dev_err(priv->dev, "ECC count read failed.\n"); 255 256 single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); 257 double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); 258 259 if (single_error_count) { 260 ecc_error |= MLXBF_ECC_ERR__SECC; 261 262 bluefield_gather_report_ecc(mci, single_error_count, 1); 263 } 264 265 if (double_error_count) { 266 ecc_error |= MLXBF_ECC_ERR__DECC; 267 268 bluefield_gather_report_ecc(mci, double_error_count, 0); 269 } 270 271 /* Write to clear reported errors. */ 272 if (ecc_count) { 273 err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error); 274 if (err) 275 dev_err(priv->dev, "ECC Error write failed.\n"); 276 } 277 } 278 279 /* Initialize the DIMMs information for the given memory controller. */ 280 static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) 281 { 282 struct bluefield_edac_priv *priv = mci->pvt_info; 283 u64 mem_ctrl_idx = mci->mc_idx; 284 struct dimm_info *dimm; 285 u64 smc_info, smc_arg; 286 int is_empty = 1, i; 287 288 for (i = 0; i < priv->dimm_per_mc; i++) { 289 dimm = mci->dimms[i]; 290 291 smc_arg = mem_ctrl_idx << 16 | i; 292 smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg); 293 294 if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { 295 dimm->mtype = MEM_EMPTY; 296 continue; 297 } 298 299 is_empty = 0; 300 301 dimm->edac_mode = EDAC_SECDED; 302 303 if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) 304 dimm->mtype = MEM_NVDIMM; 305 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) 306 dimm->mtype = MEM_LRDDR4; 307 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) 308 dimm->mtype = MEM_RDDR4; 309 else 310 dimm->mtype = MEM_DDR4; 311 312 dimm->nr_pages = 313 FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * 314 (SZ_1G / PAGE_SIZE); 315 dimm->grain = MLXBF_EDAC_ERROR_GRAIN; 316 317 /* Mem controller for BlueField only supports x4, x8 and x16 */ 318 switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { 319 case 4: 320 dimm->dtype = DEV_X4; 321 break; 322 case 8: 323 dimm->dtype = DEV_X8; 324 break; 325 case 16: 326 dimm->dtype = DEV_X16; 327 break; 328 default: 329 dimm->dtype = DEV_UNKNOWN; 330 } 331 332 priv->dimm_ranks[i] = 333 FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); 334 } 335 336 if (is_empty) 337 mci->edac_cap = EDAC_FLAG_NONE; 338 else 339 mci->edac_cap = EDAC_FLAG_SECDED; 340 } 341 342 static int bluefield_edac_mc_probe(struct platform_device *pdev) 343 { 344 struct bluefield_edac_priv *priv; 345 struct device *dev = &pdev->dev; 346 struct edac_mc_layer layers[1]; 347 struct arm_smccc_res res; 348 struct mem_ctl_info *mci; 349 struct resource *emi_res; 350 unsigned int mc_idx, dimm_count; 351 int rc, ret; 352 353 /* Read the MSS (Memory SubSystem) index from ACPI table. */ 354 if (device_property_read_u32(dev, "mss_number", &mc_idx)) { 355 dev_warn(dev, "bf_edac: MSS number unknown\n"); 356 return -EINVAL; 357 } 358 359 /* Read the DIMMs per MC from ACPI table. */ 360 if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { 361 dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); 362 return -EINVAL; 363 } 364 365 if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { 366 dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); 367 return -EINVAL; 368 } 369 370 emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 371 if (!emi_res) 372 return -EINVAL; 373 374 layers[0].type = EDAC_MC_LAYER_SLOT; 375 layers[0].size = dimm_count; 376 layers[0].is_virt_csrow = true; 377 378 mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); 379 if (!mci) 380 return -ENOMEM; 381 382 priv = mci->pvt_info; 383 priv->dev = dev; 384 385 /* 386 * The "sec_reg_block" property in the ACPI table determines the method 387 * the driver uses to access the EMI registers: 388 * a) property is not present - directly access registers via readl/writel 389 * b) property is present - indirectly access registers via SMC calls 390 * (assuming required Silicon Provider service version found) 391 */ 392 if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) { 393 priv->svc_sreg_support = false; 394 } else { 395 /* 396 * Check for minimum required Arm Silicon Provider (SiP) service 397 * version, ensuring support of required SMC function IDs. 398 */ 399 arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res); 400 if (res.a0 == MLXBF_SVC_REQ_MAJOR && 401 res.a1 >= MLXBF_SVC_REQ_MINOR) { 402 priv->svc_sreg_support = true; 403 } else { 404 dev_err(dev, "Required SMCs are not supported.\n"); 405 ret = -EINVAL; 406 goto err; 407 } 408 } 409 410 priv->dimm_per_mc = dimm_count; 411 if (!priv->svc_sreg_support) { 412 priv->emi_base = devm_ioremap_resource(dev, emi_res); 413 if (IS_ERR(priv->emi_base)) { 414 dev_err(dev, "failed to map EMI IO resource\n"); 415 ret = PTR_ERR(priv->emi_base); 416 goto err; 417 } 418 } else { 419 priv->emi_base = (void __iomem *)emi_res->start; 420 } 421 422 mci->pdev = dev; 423 mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | 424 MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; 425 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 426 427 mci->mod_name = DRIVER_NAME; 428 mci->ctl_name = "BlueField_Memory_Controller"; 429 mci->dev_name = dev_name(dev); 430 mci->edac_check = bluefield_edac_check; 431 432 /* Initialize mci with the actual populated DIMM information. */ 433 bluefield_edac_init_dimms(mci); 434 435 platform_set_drvdata(pdev, mci); 436 437 /* Register with EDAC core */ 438 rc = edac_mc_add_mc(mci); 439 if (rc) { 440 dev_err(dev, "failed to register with EDAC core\n"); 441 ret = rc; 442 goto err; 443 } 444 445 /* Only POLL mode supported so far. */ 446 edac_op_state = EDAC_OPSTATE_POLL; 447 448 return 0; 449 450 err: 451 edac_mc_free(mci); 452 453 return ret; 454 } 455 456 static void bluefield_edac_mc_remove(struct platform_device *pdev) 457 { 458 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 459 460 edac_mc_del_mc(&pdev->dev); 461 edac_mc_free(mci); 462 } 463 464 static const struct acpi_device_id bluefield_mc_acpi_ids[] = { 465 {"MLNXBF08", 0}, 466 {} 467 }; 468 469 MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); 470 471 static struct platform_driver bluefield_edac_mc_driver = { 472 .driver = { 473 .name = DRIVER_NAME, 474 .acpi_match_table = bluefield_mc_acpi_ids, 475 }, 476 .probe = bluefield_edac_mc_probe, 477 .remove_new = bluefield_edac_mc_remove, 478 }; 479 480 module_platform_driver(bluefield_edac_mc_driver); 481 482 MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); 483 MODULE_AUTHOR("Mellanox Technologies"); 484 MODULE_LICENSE("GPL v2"); 485