1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Shared code by both skx_edac and i10nm_edac. Originally split out 5 * from the skx_edac driver. 6 * 7 * This file is linked into both skx_edac and i10nm_edac drivers. In 8 * order to avoid link errors, this file must be like a pure library 9 * without including symbols and defines which would otherwise conflict, 10 * when linked once into a module and into a built-in object, at the 11 * same time. For example, __this_module symbol references when that 12 * file is being linked into a built-in object. 13 * 14 * Copyright (c) 2018, Intel Corporation. 15 */ 16 17 #include <linux/acpi.h> 18 #include <linux/dmi.h> 19 #include <linux/adxl.h> 20 #include <acpi/nfit.h> 21 #include <asm/mce.h> 22 #include <asm/uv/uv.h> 23 #include "edac_module.h" 24 #include "skx_common.h" 25 26 static const char * const component_names[] = { 27 [INDEX_SOCKET] = "ProcessorSocketId", 28 [INDEX_MEMCTRL] = "MemoryControllerId", 29 [INDEX_CHANNEL] = "ChannelId", 30 [INDEX_DIMM] = "DimmSlotId", 31 [INDEX_CS] = "ChipSelect", 32 [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", 33 [INDEX_NM_CHANNEL] = "NmChannelId", 34 [INDEX_NM_DIMM] = "NmDimmSlotId", 35 [INDEX_NM_CS] = "NmChipSelect", 36 }; 37 38 static int component_indices[ARRAY_SIZE(component_names)]; 39 static int adxl_component_count; 40 static const char * const *adxl_component_names; 41 static u64 *adxl_values; 42 static char *adxl_msg; 43 static unsigned long adxl_nm_bitmap; 44 45 static char skx_msg[MSG_SIZE]; 46 static skx_decode_f driver_decode; 47 static skx_show_retry_log_f skx_show_retry_rd_err_log; 48 static u64 skx_tolm, skx_tohm; 49 static LIST_HEAD(dev_edac_list); 50 static bool skx_mem_cfg_2lm; 51 static struct res_config *skx_res_cfg; 52 53 int skx_adxl_get(void) 54 { 55 const char * const *names; 56 int i, j; 57 58 names = adxl_get_component_names(); 59 if (!names) { 60 skx_printk(KERN_NOTICE, "No firmware support for address translation.\n"); 61 return -ENODEV; 62 } 63 64 for (i = 0; i < INDEX_MAX; i++) { 65 for (j = 0; names[j]; j++) { 66 if (!strcmp(component_names[i], names[j])) { 67 component_indices[i] = j; 68 69 if (i >= INDEX_NM_FIRST) 70 adxl_nm_bitmap |= 1 << i; 71 72 break; 73 } 74 } 75 76 if (!names[j] && i < INDEX_NM_FIRST) 77 goto err; 78 } 79 80 if (skx_mem_cfg_2lm) { 81 if (!adxl_nm_bitmap) 82 skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); 83 else 84 edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); 85 } 86 87 adxl_component_names = names; 88 while (*names++) 89 adxl_component_count++; 90 91 adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), 92 GFP_KERNEL); 93 if (!adxl_values) { 94 adxl_component_count = 0; 95 return -ENOMEM; 96 } 97 98 adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); 99 if (!adxl_msg) { 100 adxl_component_count = 0; 101 kfree(adxl_values); 102 return -ENOMEM; 103 } 104 105 return 0; 106 err: 107 skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", 108 component_names[i]); 109 for (j = 0; names[j]; j++) 110 skx_printk(KERN_CONT, "%s ", names[j]); 111 skx_printk(KERN_CONT, "\n"); 112 113 return -ENODEV; 114 } 115 EXPORT_SYMBOL_GPL(skx_adxl_get); 116 117 void skx_adxl_put(void) 118 { 119 kfree(adxl_values); 120 kfree(adxl_msg); 121 } 122 EXPORT_SYMBOL_GPL(skx_adxl_put); 123 124 static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) 125 { 126 struct skx_dev *d; 127 int i, len = 0; 128 129 if (res->addr >= skx_tohm || (res->addr >= skx_tolm && 130 res->addr < BIT_ULL(32))) { 131 edac_dbg(0, "Address 0x%llx out of range\n", res->addr); 132 return false; 133 } 134 135 if (adxl_decode(res->addr, adxl_values)) { 136 edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); 137 return false; 138 } 139 140 /* 141 * GNR with a Flat2LM memory configuration may mistakenly classify 142 * a near-memory error(DDR5) as a far-memory error(CXL), resulting 143 * in the incorrect selection of decoded ADXL components. 144 * To address this, prefetch the decoded far-memory controller ID 145 * and adjust the error source to near-memory if the far-memory 146 * controller ID is invalid. 147 */ 148 if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { 149 res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 150 if (res->imc == -1) { 151 err_src = ERR_SRC_2LM_NM; 152 edac_dbg(0, "Adjust the error source to near-memory.\n"); 153 } 154 } 155 156 res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; 157 if (err_src == ERR_SRC_2LM_NM) { 158 res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? 159 (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; 160 res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? 161 (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; 162 res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? 163 (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; 164 res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? 165 (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; 166 } else { 167 res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 168 res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; 169 res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; 170 res->cs = (int)adxl_values[component_indices[INDEX_CS]]; 171 } 172 173 if (res->imc > NUM_IMC - 1 || res->imc < 0) { 174 skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); 175 return false; 176 } 177 178 list_for_each_entry(d, &dev_edac_list, list) { 179 if (d->imc[0].src_id == res->socket) { 180 res->dev = d; 181 break; 182 } 183 } 184 185 if (!res->dev) { 186 skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", 187 res->socket, res->imc); 188 return false; 189 } 190 191 for (i = 0; i < adxl_component_count; i++) { 192 if (adxl_values[i] == ~0x0ull) 193 continue; 194 195 len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", 196 adxl_component_names[i], adxl_values[i]); 197 if (MSG_SIZE - len <= 0) 198 break; 199 } 200 201 res->decoded_by_adxl = true; 202 203 return true; 204 } 205 206 void skx_set_mem_cfg(bool mem_cfg_2lm) 207 { 208 skx_mem_cfg_2lm = mem_cfg_2lm; 209 } 210 EXPORT_SYMBOL_GPL(skx_set_mem_cfg); 211 212 void skx_set_res_cfg(struct res_config *cfg) 213 { 214 skx_res_cfg = cfg; 215 } 216 EXPORT_SYMBOL_GPL(skx_set_res_cfg); 217 218 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) 219 { 220 driver_decode = decode; 221 skx_show_retry_rd_err_log = show_retry_log; 222 } 223 EXPORT_SYMBOL_GPL(skx_set_decode); 224 225 static int skx_get_pkg_id(struct skx_dev *d, u8 *id) 226 { 227 int node; 228 int cpu; 229 230 node = pcibus_to_node(d->util_all->bus); 231 if (numa_valid_node(node)) { 232 for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { 233 struct cpuinfo_x86 *c = &cpu_data(cpu); 234 235 if (c->initialized && cpu_to_node(cpu) == node) { 236 *id = c->topo.pkg_id; 237 return 0; 238 } 239 } 240 } 241 242 skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); 243 return -ENODEV; 244 } 245 246 int skx_get_src_id(struct skx_dev *d, int off, u8 *id) 247 { 248 u32 reg; 249 250 /* 251 * The 3-bit source IDs in PCI configuration space registers are limited 252 * to 8 unique IDs, and each ID is local to a UPI/QPI domain. 253 * 254 * Source IDs cannot be used to map devices to sockets on UV systems 255 * because they can exceed 8 sockets and have multiple UPI/QPI domains 256 * with identical, repeating source IDs. 257 */ 258 if (is_uv_system()) 259 return skx_get_pkg_id(d, id); 260 261 if (pci_read_config_dword(d->util_all, off, ®)) { 262 skx_printk(KERN_ERR, "Failed to read src id\n"); 263 return -ENODEV; 264 } 265 266 *id = GET_BITFIELD(reg, 12, 14); 267 return 0; 268 } 269 EXPORT_SYMBOL_GPL(skx_get_src_id); 270 271 static int get_width(u32 mtr) 272 { 273 switch (GET_BITFIELD(mtr, 8, 9)) { 274 case 0: 275 return DEV_X4; 276 case 1: 277 return DEV_X8; 278 case 2: 279 return DEV_X16; 280 } 281 return DEV_UNKNOWN; 282 } 283 284 /* 285 * We use the per-socket device @cfg->did to count how many sockets are present, 286 * and to detemine which PCI buses are associated with each socket. Allocate 287 * and build the full list of all the skx_dev structures that we need here. 288 */ 289 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) 290 { 291 struct pci_dev *pdev, *prev; 292 struct skx_dev *d; 293 u32 reg; 294 int ndev = 0; 295 296 prev = NULL; 297 for (;;) { 298 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev); 299 if (!pdev) 300 break; 301 ndev++; 302 d = kzalloc(sizeof(*d), GFP_KERNEL); 303 if (!d) { 304 pci_dev_put(pdev); 305 return -ENOMEM; 306 } 307 308 if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, ®)) { 309 kfree(d); 310 pci_dev_put(pdev); 311 skx_printk(KERN_ERR, "Failed to read bus idx\n"); 312 return -ENODEV; 313 } 314 315 d->bus[0] = GET_BITFIELD(reg, 0, 7); 316 d->bus[1] = GET_BITFIELD(reg, 8, 15); 317 if (cfg->type == SKX) { 318 d->seg = pci_domain_nr(pdev->bus); 319 d->bus[2] = GET_BITFIELD(reg, 16, 23); 320 d->bus[3] = GET_BITFIELD(reg, 24, 31); 321 } else { 322 d->seg = GET_BITFIELD(reg, 16, 23); 323 } 324 325 edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n", 326 d->bus[0], d->bus[1], d->bus[2], d->bus[3]); 327 list_add_tail(&d->list, &dev_edac_list); 328 prev = pdev; 329 } 330 331 if (list) 332 *list = &dev_edac_list; 333 return ndev; 334 } 335 EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); 336 337 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) 338 { 339 struct pci_dev *pdev; 340 u32 reg; 341 342 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL); 343 if (!pdev) { 344 edac_dbg(2, "Can't get tolm/tohm\n"); 345 return -ENODEV; 346 } 347 348 if (pci_read_config_dword(pdev, off[0], ®)) { 349 skx_printk(KERN_ERR, "Failed to read tolm\n"); 350 goto fail; 351 } 352 skx_tolm = reg; 353 354 if (pci_read_config_dword(pdev, off[1], ®)) { 355 skx_printk(KERN_ERR, "Failed to read lower tohm\n"); 356 goto fail; 357 } 358 skx_tohm = reg; 359 360 if (pci_read_config_dword(pdev, off[2], ®)) { 361 skx_printk(KERN_ERR, "Failed to read upper tohm\n"); 362 goto fail; 363 } 364 skx_tohm |= (u64)reg << 32; 365 366 pci_dev_put(pdev); 367 *tolm = skx_tolm; 368 *tohm = skx_tohm; 369 edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm); 370 return 0; 371 fail: 372 pci_dev_put(pdev); 373 return -ENODEV; 374 } 375 EXPORT_SYMBOL_GPL(skx_get_hi_lo); 376 377 static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, 378 int minval, int maxval, const char *name) 379 { 380 u32 val = GET_BITFIELD(reg, lobit, hibit); 381 382 if (val < minval || val > maxval) { 383 edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg); 384 return -EINVAL; 385 } 386 return val + add; 387 } 388 389 #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") 390 #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows") 391 #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") 392 393 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 394 struct skx_imc *imc, int chan, int dimmno, 395 struct res_config *cfg) 396 { 397 int banks, ranks, rows, cols, npages; 398 enum mem_type mtype; 399 u64 size; 400 401 ranks = numrank(mtr); 402 rows = numrow(mtr); 403 cols = imc->hbm_mc ? 6 : numcol(mtr); 404 405 if (imc->hbm_mc) { 406 banks = 32; 407 mtype = MEM_HBM2; 408 } else if (cfg->support_ddr5) { 409 banks = 32; 410 mtype = MEM_DDR5; 411 } else { 412 banks = 16; 413 mtype = MEM_DDR4; 414 } 415 416 /* 417 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) 418 */ 419 size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); 420 npages = MiB_TO_PAGES(size); 421 422 edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n", 423 imc->mc, chan, dimmno, size, npages, 424 banks, 1 << ranks, rows, cols); 425 426 imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0); 427 imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9); 428 imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); 429 imc->chan[chan].dimms[dimmno].rowbits = rows; 430 imc->chan[chan].dimms[dimmno].colbits = cols; 431 432 dimm->nr_pages = npages; 433 dimm->grain = 32; 434 dimm->dtype = get_width(mtr); 435 dimm->mtype = mtype; 436 dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 437 438 if (imc->hbm_mc) 439 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", 440 imc->src_id, imc->lmc, chan); 441 else 442 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 443 imc->src_id, imc->lmc, chan, dimmno); 444 445 return 1; 446 } 447 EXPORT_SYMBOL_GPL(skx_get_dimm_info); 448 449 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 450 int chan, int dimmno, const char *mod_str) 451 { 452 int smbios_handle; 453 u32 dev_handle; 454 u16 flags; 455 u64 size = 0; 456 457 dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, 458 imc->src_id, 0); 459 460 smbios_handle = nfit_get_smbios_id(dev_handle, &flags); 461 if (smbios_handle == -EOPNOTSUPP) { 462 pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str); 463 goto unknown_size; 464 } 465 466 if (smbios_handle < 0) { 467 skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle); 468 goto unknown_size; 469 } 470 471 if (flags & ACPI_NFIT_MEM_MAP_FAILED) { 472 skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle); 473 goto unknown_size; 474 } 475 476 size = dmi_memdev_size(smbios_handle); 477 if (size == ~0ull) 478 skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n", 479 dev_handle, smbios_handle); 480 481 unknown_size: 482 dimm->nr_pages = size >> PAGE_SHIFT; 483 dimm->grain = 32; 484 dimm->dtype = DEV_UNKNOWN; 485 dimm->mtype = MEM_NVDIMM; 486 dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 487 488 edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", 489 imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); 490 491 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 492 imc->src_id, imc->lmc, chan, dimmno); 493 494 return (size == 0 || size == ~0ull) ? 0 : 1; 495 } 496 EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); 497 498 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, 499 const char *ctl_name, const char *mod_str, 500 get_dimm_config_f get_dimm_config, 501 struct res_config *cfg) 502 { 503 struct mem_ctl_info *mci; 504 struct edac_mc_layer layers[2]; 505 struct skx_pvt *pvt; 506 int rc; 507 508 /* Allocate a new MC control structure */ 509 layers[0].type = EDAC_MC_LAYER_CHANNEL; 510 layers[0].size = NUM_CHANNELS; 511 layers[0].is_virt_csrow = false; 512 layers[1].type = EDAC_MC_LAYER_SLOT; 513 layers[1].size = NUM_DIMMS; 514 layers[1].is_virt_csrow = true; 515 mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, 516 sizeof(struct skx_pvt)); 517 518 if (unlikely(!mci)) 519 return -ENOMEM; 520 521 edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); 522 523 /* Associate skx_dev and mci for future usage */ 524 imc->mci = mci; 525 pvt = mci->pvt_info; 526 pvt->imc = imc; 527 528 mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, 529 imc->src_id, imc->lmc); 530 if (!mci->ctl_name) { 531 rc = -ENOMEM; 532 goto fail0; 533 } 534 535 mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM; 536 if (cfg->support_ddr5) 537 mci->mtype_cap |= MEM_FLAG_DDR5; 538 mci->edac_ctl_cap = EDAC_FLAG_NONE; 539 mci->edac_cap = EDAC_FLAG_NONE; 540 mci->mod_name = mod_str; 541 mci->dev_name = pci_name(pdev); 542 mci->ctl_page_to_phys = NULL; 543 544 rc = get_dimm_config(mci, cfg); 545 if (rc < 0) 546 goto fail; 547 548 /* Record ptr to the generic device */ 549 mci->pdev = &pdev->dev; 550 551 /* Add this new MC control structure to EDAC's list of MCs */ 552 if (unlikely(edac_mc_add_mc(mci))) { 553 edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); 554 rc = -EINVAL; 555 goto fail; 556 } 557 558 return 0; 559 560 fail: 561 kfree(mci->ctl_name); 562 fail0: 563 edac_mc_free(mci); 564 imc->mci = NULL; 565 return rc; 566 } 567 EXPORT_SYMBOL_GPL(skx_register_mci); 568 569 static void skx_unregister_mci(struct skx_imc *imc) 570 { 571 struct mem_ctl_info *mci = imc->mci; 572 573 if (!mci) 574 return; 575 576 edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); 577 578 /* Remove MC sysfs nodes */ 579 edac_mc_del_mc(mci->pdev); 580 581 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); 582 kfree(mci->ctl_name); 583 edac_mc_free(mci); 584 } 585 586 static void skx_mce_output_error(struct mem_ctl_info *mci, 587 const struct mce *m, 588 struct decoded_addr *res) 589 { 590 enum hw_event_mc_err_type tp_event; 591 char *optype; 592 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 593 bool overflow = GET_BITFIELD(m->status, 62, 62); 594 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 595 bool scrub_err = false; 596 bool recoverable; 597 int len; 598 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); 599 u32 mscod = GET_BITFIELD(m->status, 16, 31); 600 u32 errcode = GET_BITFIELD(m->status, 0, 15); 601 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 602 603 recoverable = GET_BITFIELD(m->status, 56, 56); 604 605 if (uncorrected_error) { 606 core_err_cnt = 1; 607 if (ripv) { 608 tp_event = HW_EVENT_ERR_UNCORRECTED; 609 } else { 610 tp_event = HW_EVENT_ERR_FATAL; 611 } 612 } else { 613 tp_event = HW_EVENT_ERR_CORRECTED; 614 } 615 616 switch (optypenum) { 617 case 0: 618 optype = "generic undef request error"; 619 break; 620 case 1: 621 optype = "memory read error"; 622 break; 623 case 2: 624 optype = "memory write error"; 625 break; 626 case 3: 627 optype = "addr/cmd error"; 628 break; 629 case 4: 630 optype = "memory scrubbing error"; 631 scrub_err = true; 632 break; 633 default: 634 optype = "reserved"; 635 break; 636 } 637 638 if (res->decoded_by_adxl) { 639 len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", 640 overflow ? " OVERFLOW" : "", 641 (uncorrected_error && recoverable) ? " recoverable" : "", 642 mscod, errcode, adxl_msg); 643 } else { 644 len = snprintf(skx_msg, MSG_SIZE, 645 "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", 646 overflow ? " OVERFLOW" : "", 647 (uncorrected_error && recoverable) ? " recoverable" : "", 648 mscod, errcode, 649 res->socket, res->imc, res->rank, 650 res->row, res->column, res->bank_address, res->bank_group); 651 } 652 653 if (skx_show_retry_rd_err_log) 654 skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err); 655 656 edac_dbg(0, "%s\n", skx_msg); 657 658 /* Call the helper to output message */ 659 edac_mc_handle_error(tp_event, mci, core_err_cnt, 660 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 661 res->channel, res->dimm, -1, 662 optype, skx_msg); 663 } 664 665 static enum error_source skx_error_source(const struct mce *m) 666 { 667 u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; 668 669 if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) 670 return ERR_SRC_NOT_MEMORY; 671 672 if (!skx_mem_cfg_2lm) 673 return ERR_SRC_1LM; 674 675 if (errcode == MCACOD_EXT_MEM_ERR) 676 return ERR_SRC_2LM_NM; 677 678 return ERR_SRC_2LM_FM; 679 } 680 681 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 682 void *data) 683 { 684 struct mce *mce = (struct mce *)data; 685 enum error_source err_src; 686 struct decoded_addr res; 687 struct mem_ctl_info *mci; 688 char *type; 689 690 if (mce->kflags & MCE_HANDLED_CEC) 691 return NOTIFY_DONE; 692 693 err_src = skx_error_source(mce); 694 695 /* Ignore unless this is memory related with an address */ 696 if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) 697 return NOTIFY_DONE; 698 699 memset(&res, 0, sizeof(res)); 700 res.mce = mce; 701 res.addr = mce->addr & MCI_ADDR_PHYSADDR; 702 if (!pfn_to_online_page(res.addr >> PAGE_SHIFT) && !arch_is_platform_page(res.addr)) { 703 pr_err("Invalid address 0x%llx in IA32_MC%d_ADDR\n", mce->addr, mce->bank); 704 return NOTIFY_DONE; 705 } 706 707 /* Try driver decoder first */ 708 if (!(driver_decode && driver_decode(&res))) { 709 /* Then try firmware decoder (ACPI DSM methods) */ 710 if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) 711 return NOTIFY_DONE; 712 } 713 714 mci = res.dev->imc[res.imc].mci; 715 716 if (!mci) 717 return NOTIFY_DONE; 718 719 if (mce->mcgstatus & MCG_STATUS_MCIP) 720 type = "Exception"; 721 else 722 type = "Event"; 723 724 skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); 725 726 skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx " 727 "Bank %d: 0x%llx\n", mce->extcpu, type, 728 mce->mcgstatus, mce->bank, mce->status); 729 skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc); 730 skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr); 731 skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc); 732 733 skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET " 734 "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid, 735 mce->time, mce->socketid, mce->apicid); 736 737 skx_mce_output_error(mci, mce, &res); 738 739 mce->kflags |= MCE_HANDLED_EDAC; 740 return NOTIFY_DONE; 741 } 742 EXPORT_SYMBOL_GPL(skx_mce_check_error); 743 744 void skx_remove(void) 745 { 746 int i, j; 747 struct skx_dev *d, *tmp; 748 749 edac_dbg(0, "\n"); 750 751 list_for_each_entry_safe(d, tmp, &dev_edac_list, list) { 752 list_del(&d->list); 753 for (i = 0; i < NUM_IMC; i++) { 754 if (d->imc[i].mci) 755 skx_unregister_mci(&d->imc[i]); 756 757 if (d->imc[i].mdev) 758 pci_dev_put(d->imc[i].mdev); 759 760 if (d->imc[i].mbase) 761 iounmap(d->imc[i].mbase); 762 763 for (j = 0; j < NUM_CHANNELS; j++) { 764 if (d->imc[i].chan[j].cdev) 765 pci_dev_put(d->imc[i].chan[j].cdev); 766 } 767 } 768 if (d->util_all) 769 pci_dev_put(d->util_all); 770 if (d->pcu_cr3) 771 pci_dev_put(d->pcu_cr3); 772 if (d->sad_all) 773 pci_dev_put(d->sad_all); 774 if (d->uracu) 775 pci_dev_put(d->uracu); 776 777 kfree(d); 778 } 779 } 780 EXPORT_SYMBOL_GPL(skx_remove); 781 782 #ifdef CONFIG_EDAC_DEBUG 783 /* 784 * Debug feature. 785 * Exercise the address decode logic by writing an address to 786 * /sys/kernel/debug/edac/{skx,i10nm}_test/addr. 787 */ 788 static struct dentry *skx_test; 789 790 static int debugfs_u64_set(void *data, u64 val) 791 { 792 struct mce m; 793 794 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 795 796 memset(&m, 0, sizeof(m)); 797 /* ADDRV + MemRd + Unknown channel */ 798 m.status = MCI_STATUS_ADDRV + 0x90; 799 /* One corrected error */ 800 m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); 801 m.addr = val; 802 skx_mce_check_error(NULL, 0, &m); 803 804 return 0; 805 } 806 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 807 808 void skx_setup_debug(const char *name) 809 { 810 skx_test = edac_debugfs_create_dir(name); 811 if (!skx_test) 812 return; 813 814 if (!edac_debugfs_create_file("addr", 0200, skx_test, 815 NULL, &fops_u64_wo)) { 816 debugfs_remove(skx_test); 817 skx_test = NULL; 818 } 819 } 820 EXPORT_SYMBOL_GPL(skx_setup_debug); 821 822 void skx_teardown_debug(void) 823 { 824 debugfs_remove_recursive(skx_test); 825 } 826 EXPORT_SYMBOL_GPL(skx_teardown_debug); 827 #endif /*CONFIG_EDAC_DEBUG*/ 828 829 MODULE_LICENSE("GPL v2"); 830 MODULE_AUTHOR("Tony Luck"); 831 MODULE_DESCRIPTION("MC Driver for Intel server processors"); 832