1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Shared code by both skx_edac and i10nm_edac. Originally split out 5 * from the skx_edac driver. 6 * 7 * This file is linked into both skx_edac and i10nm_edac drivers. In 8 * order to avoid link errors, this file must be like a pure library 9 * without including symbols and defines which would otherwise conflict, 10 * when linked once into a module and into a built-in object, at the 11 * same time. For example, __this_module symbol references when that 12 * file is being linked into a built-in object. 13 * 14 * Copyright (c) 2018, Intel Corporation. 15 */ 16 17 #include <linux/topology.h> 18 #include <linux/acpi.h> 19 #include <linux/dmi.h> 20 #include <linux/adxl.h> 21 #include <linux/overflow.h> 22 #include <acpi/nfit.h> 23 #include <asm/mce.h> 24 #include <asm/uv/uv.h> 25 #include "edac_module.h" 26 #include "skx_common.h" 27 28 static const char * const component_names[] = { 29 [INDEX_SOCKET] = "ProcessorSocketId", 30 [INDEX_MEMCTRL] = "MemoryControllerId", 31 [INDEX_CHANNEL] = "ChannelId", 32 [INDEX_DIMM] = "DimmSlotId", 33 [INDEX_CS] = "ChipSelect", 34 [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", 35 [INDEX_NM_CHANNEL] = "NmChannelId", 36 [INDEX_NM_DIMM] = "NmDimmSlotId", 37 [INDEX_NM_CS] = "NmChipSelect", 38 }; 39 40 static int component_indices[ARRAY_SIZE(component_names)]; 41 static int adxl_component_count; 42 static const char * const *adxl_component_names; 43 static u64 *adxl_values; 44 static char *adxl_msg; 45 static unsigned long adxl_nm_bitmap; 46 47 static char skx_msg[MSG_SIZE]; 48 static skx_decode_f driver_decode; 49 static skx_show_retry_log_f skx_show_retry_rd_err_log; 50 static u64 skx_tolm, skx_tohm; 51 static LIST_HEAD(dev_edac_list); 52 static bool skx_mem_cfg_2lm; 53 static struct res_config *skx_res_cfg; 54 55 int skx_adxl_get(void) 56 { 57 const char * const *names; 58 int i, j; 59 60 names = adxl_get_component_names(); 61 if (!names) { 62 skx_printk(KERN_NOTICE, "No firmware support for address translation.\n"); 63 return -ENODEV; 64 } 65 66 for (i = 0; i < INDEX_MAX; i++) { 67 for (j = 0; names[j]; j++) { 68 if (!strcmp(component_names[i], names[j])) { 69 component_indices[i] = j; 70 71 if (i >= INDEX_NM_FIRST) 72 adxl_nm_bitmap |= 1 << i; 73 74 break; 75 } 76 } 77 78 if (!names[j] && i < INDEX_NM_FIRST) 79 goto err; 80 } 81 82 if (skx_mem_cfg_2lm) { 83 if (!adxl_nm_bitmap) 84 skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); 85 else 86 edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); 87 } 88 89 adxl_component_names = names; 90 while (*names++) 91 adxl_component_count++; 92 93 adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), 94 GFP_KERNEL); 95 if (!adxl_values) { 96 adxl_component_count = 0; 97 return -ENOMEM; 98 } 99 100 adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); 101 if (!adxl_msg) { 102 adxl_component_count = 0; 103 kfree(adxl_values); 104 return -ENOMEM; 105 } 106 107 return 0; 108 err: 109 skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", 110 component_names[i]); 111 for (j = 0; names[j]; j++) 112 skx_printk(KERN_CONT, "%s ", names[j]); 113 skx_printk(KERN_CONT, "\n"); 114 115 return -ENODEV; 116 } 117 EXPORT_SYMBOL_GPL(skx_adxl_get); 118 119 void skx_adxl_put(void) 120 { 121 adxl_component_count = 0; 122 kfree(adxl_values); 123 kfree(adxl_msg); 124 } 125 EXPORT_SYMBOL_GPL(skx_adxl_put); 126 127 void skx_init_mc_mapping(struct skx_dev *d) 128 { 129 /* 130 * By default, the BIOS presents all memory controllers within each 131 * socket to the EDAC driver. The physical indices are the same as 132 * the logical indices of the memory controllers enumerated by the 133 * EDAC driver. 134 */ 135 for (int i = 0; i < d->num_imc; i++) 136 d->imc[i].mc_mapping = i; 137 } 138 EXPORT_SYMBOL_GPL(skx_init_mc_mapping); 139 140 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) 141 { 142 edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", 143 pmc, lmc); 144 145 d->imc[lmc].mc_mapping = pmc; 146 } 147 EXPORT_SYMBOL_GPL(skx_set_mc_mapping); 148 149 static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc) 150 { 151 for (int lmc = 0; lmc < d->num_imc; lmc++) { 152 if (d->imc[lmc].mc_mapping == pmc) { 153 edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", 154 pmc, lmc); 155 156 return lmc; 157 } 158 } 159 160 return -1; 161 } 162 163 static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) 164 { 165 int i, lmc, len = 0; 166 struct skx_dev *d; 167 168 if (res->addr >= skx_tohm || (res->addr >= skx_tolm && 169 res->addr < BIT_ULL(32))) { 170 edac_dbg(0, "Address 0x%llx out of range\n", res->addr); 171 return false; 172 } 173 174 if (adxl_decode(res->addr, adxl_values)) { 175 edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); 176 return false; 177 } 178 179 /* 180 * GNR with a Flat2LM memory configuration may mistakenly classify 181 * a near-memory error(DDR5) as a far-memory error(CXL), resulting 182 * in the incorrect selection of decoded ADXL components. 183 * To address this, prefetch the decoded far-memory controller ID 184 * and adjust the error source to near-memory if the far-memory 185 * controller ID is invalid. 186 */ 187 if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { 188 res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 189 if (res->imc == -1) { 190 err_src = ERR_SRC_2LM_NM; 191 edac_dbg(0, "Adjust the error source to near-memory.\n"); 192 } 193 } 194 195 res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; 196 if (err_src == ERR_SRC_2LM_NM) { 197 res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? 198 (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; 199 res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? 200 (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; 201 res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? 202 (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; 203 res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? 204 (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; 205 } else { 206 res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 207 res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; 208 res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; 209 res->cs = (int)adxl_values[component_indices[INDEX_CS]]; 210 } 211 212 if (res->imc < 0) { 213 skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); 214 return false; 215 } 216 217 list_for_each_entry(d, &dev_edac_list, list) { 218 if (d->imc[0].src_id == res->socket) { 219 res->dev = d; 220 break; 221 } 222 } 223 224 if (!res->dev) { 225 skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", 226 res->socket, res->imc); 227 return false; 228 } 229 230 lmc = skx_get_mc_mapping(d, res->imc); 231 if (lmc < 0) { 232 skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc); 233 return false; 234 } 235 236 res->imc = lmc; 237 238 for (i = 0; i < adxl_component_count; i++) { 239 if (adxl_values[i] == ~0x0ull) 240 continue; 241 242 len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", 243 adxl_component_names[i], adxl_values[i]); 244 if (MSG_SIZE - len <= 0) 245 break; 246 } 247 248 res->decoded_by_adxl = true; 249 250 return true; 251 } 252 253 void skx_set_mem_cfg(bool mem_cfg_2lm) 254 { 255 skx_mem_cfg_2lm = mem_cfg_2lm; 256 } 257 EXPORT_SYMBOL_GPL(skx_set_mem_cfg); 258 259 void skx_set_res_cfg(struct res_config *cfg) 260 { 261 skx_res_cfg = cfg; 262 } 263 EXPORT_SYMBOL_GPL(skx_set_res_cfg); 264 265 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) 266 { 267 driver_decode = decode; 268 skx_show_retry_rd_err_log = show_retry_log; 269 } 270 EXPORT_SYMBOL_GPL(skx_set_decode); 271 272 static int skx_get_pkg_id(struct skx_dev *d, u8 *id) 273 { 274 int node; 275 int cpu; 276 277 node = pcibus_to_node(d->util_all->bus); 278 if (numa_valid_node(node)) { 279 for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { 280 struct cpuinfo_x86 *c = &cpu_data(cpu); 281 282 if (c->initialized && cpu_to_node(cpu) == node) { 283 *id = topology_physical_package_id(cpu); 284 return 0; 285 } 286 } 287 } 288 289 skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); 290 return -ENODEV; 291 } 292 293 int skx_get_src_id(struct skx_dev *d, int off, u8 *id) 294 { 295 u32 reg; 296 297 /* 298 * The 3-bit source IDs in PCI configuration space registers are limited 299 * to 8 unique IDs, and each ID is local to a UPI/QPI domain. 300 * 301 * Source IDs cannot be used to map devices to sockets on UV systems 302 * because they can exceed 8 sockets and have multiple UPI/QPI domains 303 * with identical, repeating source IDs. 304 */ 305 if (is_uv_system()) 306 return skx_get_pkg_id(d, id); 307 308 if (pci_read_config_dword(d->util_all, off, ®)) { 309 skx_printk(KERN_ERR, "Failed to read src id\n"); 310 return -ENODEV; 311 } 312 313 *id = GET_BITFIELD(reg, 12, 14); 314 return 0; 315 } 316 EXPORT_SYMBOL_GPL(skx_get_src_id); 317 318 static int get_width(u32 mtr) 319 { 320 switch (GET_BITFIELD(mtr, 8, 9)) { 321 case 0: 322 return DEV_X4; 323 case 1: 324 return DEV_X8; 325 case 2: 326 return DEV_X16; 327 } 328 return DEV_UNKNOWN; 329 } 330 331 /* 332 * We use the per-socket device @cfg->did to count how many sockets are present, 333 * and to detemine which PCI buses are associated with each socket. Allocate 334 * and build the full list of all the skx_dev structures that we need here. 335 */ 336 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) 337 { 338 int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; 339 struct pci_dev *pdev, *prev; 340 struct skx_dev *d; 341 u32 reg; 342 343 prev = NULL; 344 for (;;) { 345 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev); 346 if (!pdev) 347 break; 348 ndev++; 349 d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL); 350 if (!d) { 351 pci_dev_put(pdev); 352 return -ENOMEM; 353 } 354 355 if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, ®)) { 356 kfree(d); 357 pci_dev_put(pdev); 358 skx_printk(KERN_ERR, "Failed to read bus idx\n"); 359 return -ENODEV; 360 } 361 362 d->bus[0] = GET_BITFIELD(reg, 0, 7); 363 d->bus[1] = GET_BITFIELD(reg, 8, 15); 364 if (cfg->type == SKX) { 365 d->seg = pci_domain_nr(pdev->bus); 366 d->bus[2] = GET_BITFIELD(reg, 16, 23); 367 d->bus[3] = GET_BITFIELD(reg, 24, 31); 368 } else { 369 d->seg = GET_BITFIELD(reg, 16, 23); 370 } 371 372 d->num_imc = imc_num; 373 374 edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n", 375 d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num); 376 list_add_tail(&d->list, &dev_edac_list); 377 prev = pdev; 378 379 skx_init_mc_mapping(d); 380 } 381 382 if (list) 383 *list = &dev_edac_list; 384 return ndev; 385 } 386 EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); 387 388 struct list_head *skx_get_edac_list(void) 389 { 390 return &dev_edac_list; 391 } 392 EXPORT_SYMBOL_GPL(skx_get_edac_list); 393 394 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) 395 { 396 struct pci_dev *pdev; 397 u32 reg; 398 399 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL); 400 if (!pdev) { 401 edac_dbg(2, "Can't get tolm/tohm\n"); 402 return -ENODEV; 403 } 404 405 if (pci_read_config_dword(pdev, off[0], ®)) { 406 skx_printk(KERN_ERR, "Failed to read tolm\n"); 407 goto fail; 408 } 409 skx_tolm = reg; 410 411 if (pci_read_config_dword(pdev, off[1], ®)) { 412 skx_printk(KERN_ERR, "Failed to read lower tohm\n"); 413 goto fail; 414 } 415 skx_tohm = reg; 416 417 if (pci_read_config_dword(pdev, off[2], ®)) { 418 skx_printk(KERN_ERR, "Failed to read upper tohm\n"); 419 goto fail; 420 } 421 skx_tohm |= (u64)reg << 32; 422 423 pci_dev_put(pdev); 424 *tolm = skx_tolm; 425 *tohm = skx_tohm; 426 edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm); 427 return 0; 428 fail: 429 pci_dev_put(pdev); 430 return -ENODEV; 431 } 432 EXPORT_SYMBOL_GPL(skx_get_hi_lo); 433 434 void skx_set_hi_lo(u64 tolm, u64 tohm) 435 { 436 skx_tolm = tolm; 437 skx_tohm = tohm; 438 } 439 EXPORT_SYMBOL_GPL(skx_set_hi_lo); 440 441 static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, 442 int minval, int maxval, const char *name) 443 { 444 u32 val = GET_BITFIELD(reg, lobit, hibit); 445 446 if (val < minval || val > maxval) { 447 edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg); 448 return -EINVAL; 449 } 450 return val + add; 451 } 452 453 #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") 454 #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows") 455 #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") 456 457 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 458 struct skx_imc *imc, int chan, int dimmno, 459 struct res_config *cfg) 460 { 461 int banks, ranks, rows, cols, npages; 462 enum mem_type mtype; 463 u64 size; 464 465 ranks = numrank(mtr); 466 rows = numrow(mtr); 467 cols = imc->hbm_mc ? 6 : numcol(mtr); 468 469 if (imc->hbm_mc) { 470 banks = 32; 471 mtype = MEM_HBM2; 472 } else if (cfg->support_ddr5) { 473 banks = 32; 474 mtype = MEM_DDR5; 475 } else { 476 banks = 16; 477 mtype = MEM_DDR4; 478 } 479 480 /* 481 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) 482 */ 483 size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); 484 npages = MiB_TO_PAGES(size); 485 486 edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n", 487 imc->mc, chan, dimmno, size, npages, 488 banks, 1 << ranks, rows, cols); 489 490 imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0); 491 imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9); 492 imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); 493 imc->chan[chan].dimms[dimmno].rowbits = rows; 494 imc->chan[chan].dimms[dimmno].colbits = cols; 495 496 dimm->nr_pages = npages; 497 dimm->grain = 32; 498 dimm->dtype = get_width(mtr); 499 dimm->mtype = mtype; 500 dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 501 502 if (imc->hbm_mc) 503 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", 504 imc->src_id, imc->lmc, chan); 505 else 506 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 507 imc->src_id, imc->lmc, chan, dimmno); 508 509 return 1; 510 } 511 EXPORT_SYMBOL_GPL(skx_get_dimm_info); 512 513 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 514 int chan, int dimmno, const char *mod_str) 515 { 516 int smbios_handle; 517 u32 dev_handle; 518 u16 flags; 519 u64 size = 0; 520 521 dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, 522 imc->src_id, 0); 523 524 smbios_handle = nfit_get_smbios_id(dev_handle, &flags); 525 if (smbios_handle == -EOPNOTSUPP) { 526 pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str); 527 goto unknown_size; 528 } 529 530 if (smbios_handle < 0) { 531 skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle); 532 goto unknown_size; 533 } 534 535 if (flags & ACPI_NFIT_MEM_MAP_FAILED) { 536 skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle); 537 goto unknown_size; 538 } 539 540 size = dmi_memdev_size(smbios_handle); 541 if (size == ~0ull) 542 skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n", 543 dev_handle, smbios_handle); 544 545 unknown_size: 546 dimm->nr_pages = size >> PAGE_SHIFT; 547 dimm->grain = 32; 548 dimm->dtype = DEV_UNKNOWN; 549 dimm->mtype = MEM_NVDIMM; 550 dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 551 552 edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", 553 imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); 554 555 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 556 imc->src_id, imc->lmc, chan, dimmno); 557 558 return (size == 0 || size == ~0ull) ? 0 : 1; 559 } 560 EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); 561 562 int skx_register_mci(struct skx_imc *imc, struct device *dev, 563 const char *dev_name, const char *ctl_name, 564 const char *mod_str, get_dimm_config_f get_dimm_config, 565 struct res_config *cfg) 566 { 567 struct mem_ctl_info *mci; 568 struct edac_mc_layer layers[2]; 569 struct skx_pvt *pvt; 570 int rc; 571 572 /* Allocate a new MC control structure */ 573 layers[0].type = EDAC_MC_LAYER_CHANNEL; 574 layers[0].size = imc->num_channels; 575 layers[0].is_virt_csrow = false; 576 layers[1].type = EDAC_MC_LAYER_SLOT; 577 layers[1].size = imc->num_dimms; 578 layers[1].is_virt_csrow = true; 579 mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, 580 sizeof(struct skx_pvt)); 581 582 if (unlikely(!mci)) 583 return -ENOMEM; 584 585 edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); 586 587 /* Associate skx_dev and mci for future usage */ 588 imc->mci = mci; 589 pvt = mci->pvt_info; 590 pvt->imc = imc; 591 592 mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, 593 imc->src_id, imc->lmc); 594 if (!mci->ctl_name) { 595 rc = -ENOMEM; 596 goto fail0; 597 } 598 599 mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM; 600 if (cfg->support_ddr5) 601 mci->mtype_cap |= MEM_FLAG_DDR5; 602 mci->edac_ctl_cap = EDAC_FLAG_NONE; 603 mci->edac_cap = EDAC_FLAG_NONE; 604 mci->mod_name = mod_str; 605 mci->dev_name = dev_name; 606 mci->ctl_page_to_phys = NULL; 607 608 rc = get_dimm_config(mci, cfg); 609 if (rc < 0) 610 goto fail; 611 612 /* Record ptr to the generic device */ 613 mci->pdev = dev; 614 615 /* Add this new MC control structure to EDAC's list of MCs */ 616 if (unlikely(edac_mc_add_mc(mci))) { 617 edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); 618 rc = -EINVAL; 619 goto fail; 620 } 621 622 return 0; 623 624 fail: 625 kfree(mci->ctl_name); 626 fail0: 627 edac_mc_free(mci); 628 imc->mci = NULL; 629 return rc; 630 } 631 EXPORT_SYMBOL_GPL(skx_register_mci); 632 633 static void skx_unregister_mci(struct skx_imc *imc) 634 { 635 struct mem_ctl_info *mci = imc->mci; 636 637 if (!mci) 638 return; 639 640 edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); 641 642 /* Remove MC sysfs nodes */ 643 edac_mc_del_mc(mci->pdev); 644 645 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); 646 kfree(mci->ctl_name); 647 edac_mc_free(mci); 648 } 649 650 static void skx_mce_output_error(struct mem_ctl_info *mci, 651 const struct mce *m, 652 struct decoded_addr *res) 653 { 654 enum hw_event_mc_err_type tp_event; 655 char *optype; 656 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 657 bool overflow = GET_BITFIELD(m->status, 62, 62); 658 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 659 bool scrub_err = false; 660 bool recoverable; 661 int len; 662 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); 663 u32 mscod = GET_BITFIELD(m->status, 16, 31); 664 u32 errcode = GET_BITFIELD(m->status, 0, 15); 665 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 666 667 recoverable = GET_BITFIELD(m->status, 56, 56); 668 669 if (uncorrected_error) { 670 core_err_cnt = 1; 671 if (ripv) { 672 tp_event = HW_EVENT_ERR_UNCORRECTED; 673 } else { 674 tp_event = HW_EVENT_ERR_FATAL; 675 } 676 } else { 677 tp_event = HW_EVENT_ERR_CORRECTED; 678 } 679 680 switch (optypenum) { 681 case 0: 682 optype = "generic undef request error"; 683 break; 684 case 1: 685 optype = "memory read error"; 686 break; 687 case 2: 688 optype = "memory write error"; 689 break; 690 case 3: 691 optype = "addr/cmd error"; 692 break; 693 case 4: 694 optype = "memory scrubbing error"; 695 scrub_err = true; 696 break; 697 default: 698 optype = "reserved"; 699 break; 700 } 701 702 if (res->decoded_by_adxl) { 703 len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", 704 overflow ? " OVERFLOW" : "", 705 (uncorrected_error && recoverable) ? " recoverable" : "", 706 mscod, errcode, adxl_msg); 707 } else { 708 len = scnprintf(skx_msg, MSG_SIZE, 709 "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", 710 overflow ? " OVERFLOW" : "", 711 (uncorrected_error && recoverable) ? " recoverable" : "", 712 mscod, errcode, 713 res->socket, res->imc, res->rank, 714 res->row, res->column, res->bank_address, res->bank_group); 715 } 716 717 if (skx_show_retry_rd_err_log) 718 skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err); 719 720 edac_dbg(0, "%s\n", skx_msg); 721 722 /* Call the helper to output message */ 723 edac_mc_handle_error(tp_event, mci, core_err_cnt, 724 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 725 res->channel, res->dimm, -1, 726 optype, skx_msg); 727 } 728 729 static enum error_source skx_error_source(const struct mce *m) 730 { 731 u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; 732 733 if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) 734 return ERR_SRC_NOT_MEMORY; 735 736 if (!skx_mem_cfg_2lm) 737 return ERR_SRC_1LM; 738 739 if (errcode == MCACOD_EXT_MEM_ERR) 740 return ERR_SRC_2LM_NM; 741 742 return ERR_SRC_2LM_FM; 743 } 744 745 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 746 void *data) 747 { 748 struct mce *mce = (struct mce *)data; 749 enum error_source err_src; 750 struct decoded_addr res; 751 struct mem_ctl_info *mci; 752 char *type; 753 754 if (mce->kflags & MCE_HANDLED_CEC) 755 return NOTIFY_DONE; 756 757 err_src = skx_error_source(mce); 758 759 /* Ignore unless this is memory related with an address */ 760 if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) 761 return NOTIFY_DONE; 762 763 memset(&res, 0, sizeof(res)); 764 res.mce = mce; 765 res.addr = mce->addr & MCI_ADDR_PHYSADDR; 766 if (!pfn_to_online_page(res.addr >> PAGE_SHIFT) && !arch_is_platform_page(res.addr)) { 767 pr_err("Invalid address 0x%llx in IA32_MC%d_ADDR\n", mce->addr, mce->bank); 768 return NOTIFY_DONE; 769 } 770 771 /* Try driver decoder first */ 772 if (!(driver_decode && driver_decode(&res))) { 773 /* Then try firmware decoder (ACPI DSM methods) */ 774 if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) 775 return NOTIFY_DONE; 776 } 777 778 mci = res.dev->imc[res.imc].mci; 779 780 if (!mci) 781 return NOTIFY_DONE; 782 783 if (mce->mcgstatus & MCG_STATUS_MCIP) 784 type = "Exception"; 785 else 786 type = "Event"; 787 788 skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); 789 790 skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx " 791 "Bank %d: 0x%llx\n", mce->extcpu, type, 792 mce->mcgstatus, mce->bank, mce->status); 793 skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc); 794 skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr); 795 skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc); 796 797 skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET " 798 "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid, 799 mce->time, mce->socketid, mce->apicid); 800 801 skx_mce_output_error(mci, mce, &res); 802 803 mce->kflags |= MCE_HANDLED_EDAC; 804 return NOTIFY_DONE; 805 } 806 EXPORT_SYMBOL_GPL(skx_mce_check_error); 807 808 void skx_remove(void) 809 { 810 int i, j; 811 struct skx_dev *d, *tmp; 812 813 edac_dbg(0, "\n"); 814 815 list_for_each_entry_safe(d, tmp, &dev_edac_list, list) { 816 list_del(&d->list); 817 for (i = 0; i < d->num_imc; i++) { 818 if (d->imc[i].mci) 819 skx_unregister_mci(&d->imc[i]); 820 821 if (d->imc[i].mdev) 822 pci_dev_put(d->imc[i].mdev); 823 824 if (d->imc[i].mbase) 825 iounmap(d->imc[i].mbase); 826 827 if (d->imc[i].dev) 828 put_device(d->imc[i].dev); 829 830 for (j = 0; j < d->imc[i].num_channels; j++) { 831 if (d->imc[i].chan[j].cdev) 832 pci_dev_put(d->imc[i].chan[j].cdev); 833 } 834 } 835 if (d->util_all) 836 pci_dev_put(d->util_all); 837 if (d->pcu_cr3) 838 pci_dev_put(d->pcu_cr3); 839 if (d->sad_all) 840 pci_dev_put(d->sad_all); 841 if (d->uracu) 842 pci_dev_put(d->uracu); 843 844 kfree(d); 845 } 846 } 847 EXPORT_SYMBOL_GPL(skx_remove); 848 849 #ifdef CONFIG_EDAC_DEBUG 850 /* 851 * Debug feature. 852 * Exercise the address decode logic by writing an address to 853 * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr. 854 */ 855 static struct dentry *skx_test; 856 857 static int debugfs_u64_set(void *data, u64 val) 858 { 859 struct mce m; 860 861 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 862 863 memset(&m, 0, sizeof(m)); 864 /* ADDRV + MemRd + Unknown channel */ 865 m.status = MCI_STATUS_ADDRV + 0x90; 866 /* One corrected error */ 867 m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); 868 m.addr = val; 869 skx_mce_check_error(NULL, 0, &m); 870 871 return 0; 872 } 873 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 874 875 void skx_setup_debug(const char *name) 876 { 877 skx_test = edac_debugfs_create_dir(name); 878 if (!skx_test) 879 return; 880 881 if (!edac_debugfs_create_file("addr", 0200, skx_test, 882 NULL, &fops_u64_wo)) { 883 debugfs_remove(skx_test); 884 skx_test = NULL; 885 } 886 } 887 EXPORT_SYMBOL_GPL(skx_setup_debug); 888 889 void skx_teardown_debug(void) 890 { 891 debugfs_remove_recursive(skx_test); 892 } 893 EXPORT_SYMBOL_GPL(skx_teardown_debug); 894 #endif /*CONFIG_EDAC_DEBUG*/ 895 896 MODULE_LICENSE("GPL v2"); 897 MODULE_AUTHOR("Tony Luck"); 898 MODULE_DESCRIPTION("MC Driver for Intel server processors"); 899