1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Shared code by both skx_edac and i10nm_edac. Originally split out 5 * from the skx_edac driver. 6 * 7 * This file is linked into both skx_edac and i10nm_edac drivers. In 8 * order to avoid link errors, this file must be like a pure library 9 * without including symbols and defines which would otherwise conflict, 10 * when linked once into a module and into a built-in object, at the 11 * same time. For example, __this_module symbol references when that 12 * file is being linked into a built-in object. 13 * 14 * Copyright (c) 2018, Intel Corporation. 15 */ 16 17 #include <linux/topology.h> 18 #include <linux/acpi.h> 19 #include <linux/dmi.h> 20 #include <linux/adxl.h> 21 #include <linux/overflow.h> 22 #include <acpi/nfit.h> 23 #include <asm/mce.h> 24 #include <asm/uv/uv.h> 25 #include "edac_module.h" 26 #include "skx_common.h" 27 28 static const char * const component_names[] = { 29 [INDEX_SOCKET] = "ProcessorSocketId", 30 [INDEX_MEMCTRL] = "MemoryControllerId", 31 [INDEX_CHANNEL] = "ChannelId", 32 [INDEX_DIMM] = "DimmSlotId", 33 [INDEX_CS] = "ChipSelect", 34 [INDEX_SUBCH] = "SubChId", 35 [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", 36 [INDEX_NM_CHANNEL] = "NmChannelId", 37 [INDEX_NM_DIMM] = "NmDimmSlotId", 38 [INDEX_NM_CS] = "NmChipSelect", 39 [INDEX_NM_SUBCH] = "NmSubChId", 40 }; 41 42 static int component_indices[ARRAY_SIZE(component_names)]; 43 static int adxl_component_count; 44 static const char * const *adxl_component_names; 45 static u64 *adxl_values; 46 static char *adxl_msg; 47 static unsigned long adxl_nm_bitmap; 48 static unsigned long adxl_bitmap; 49 50 static char skx_msg[MSG_SIZE]; 51 static skx_decode_f driver_decode; 52 static skx_show_rrl_f show_rrl; 53 static u64 skx_tolm, skx_tohm; 54 static LIST_HEAD(dev_edac_list); 55 static bool skx_mem_cfg_2lm; 56 static struct res_config *skx_res_cfg; 57 58 u64 skx_readx(void __iomem *addr, u8 width) 59 { 60 switch (width) { 61 case 1: 62 return readb(addr); 63 case 2: 64 return readw(addr); 65 case 4: 66 return readl(addr); 67 case 8: 68 return readq(addr); 69 default: 70 skx_printk(KERN_ERR, "Invalid reg 0x%p width %u to read.\n", addr, width); 71 return 0; 72 } 73 } 74 EXPORT_SYMBOL_GPL(skx_readx); 75 76 static void skx_writex(void __iomem *addr, u8 width, u64 val) 77 { 78 switch (width) { 79 case 1: 80 writeb((u8)val, addr); 81 return; 82 case 2: 83 writew((u16)val, addr); 84 return; 85 case 4: 86 writel((u32)val, addr); 87 return; 88 case 8: 89 writeq(val, addr); 90 return; 91 default: 92 skx_printk(KERN_ERR, "Invalid reg 0x%p width %u to write 0x%llx.\n", addr, width, val); 93 } 94 } 95 96 u64 skx_read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) 97 { 98 return skx_readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width); 99 } 100 EXPORT_SYMBOL_GPL(skx_read_imc_reg); 101 102 void skx_write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) 103 { 104 skx_writex(imc->mbase + imc->chan_mmio_sz * chan + offset, width, val); 105 } 106 EXPORT_SYMBOL_GPL(skx_write_imc_reg); 107 108 static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, 109 int rrl_set, bool enable, u32 *rrl_ctl) 110 { 111 enum rrl_source_type source = rrl->sources[rrl_set]; 112 u32 offset = rrl->offsets[rrl_set][0], v; 113 u8 width = rrl->widths[0]; 114 bool first, scrub; 115 116 /* First or last read error. */ 117 first = (source == RRL_SRC_FRE_SCRUB || source == RRL_SRC_FRE_DEMAND); 118 /* Patrol scrub or on-demand read error. */ 119 scrub = (source == RRL_SRC_FRE_SCRUB || source == RRL_SRC_LRE_SCRUB); 120 121 v = skx_read_imc_reg(imc, chan, offset, width); 122 123 if (enable) { 124 /* Save default configurations. */ 125 *rrl_ctl = v; 126 v &= ~rrl->uc_mask; 127 128 if (first) 129 v |= rrl->noover_mask; 130 else 131 v &= ~rrl->noover_mask; 132 133 if (scrub) 134 v |= rrl->en_patspr_mask; 135 else 136 v &= ~rrl->en_patspr_mask; 137 138 v |= rrl->en_mask; 139 } else { 140 /* Restore default configurations. */ 141 if (*rrl_ctl & rrl->uc_mask) 142 v |= rrl->uc_mask; 143 144 if (first) { 145 if (!(*rrl_ctl & rrl->noover_mask)) 146 v &= ~rrl->noover_mask; 147 } else { 148 if (*rrl_ctl & rrl->noover_mask) 149 v |= rrl->noover_mask; 150 } 151 152 if (scrub) { 153 if (!(*rrl_ctl & rrl->en_patspr_mask)) 154 v &= ~rrl->en_patspr_mask; 155 } else { 156 if (*rrl_ctl & rrl->en_patspr_mask) 157 v |= rrl->en_patspr_mask; 158 } 159 160 if (!(*rrl_ctl & rrl->en_mask)) 161 v &= ~rrl->en_mask; 162 } 163 164 skx_write_imc_reg(imc, chan, offset, width, v); 165 } 166 167 static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, 168 bool enable, u32 *rrl_ctl) 169 { 170 for (int i = 0; i < rrl->set_num; i++) 171 enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); 172 } 173 174 static void enable_rrls_ddr(struct skx_imc *imc, bool enable) 175 { 176 struct reg_rrl **rrl_ddr = skx_res_cfg->reg_rrl_ddr; 177 int i, chan_num = skx_res_cfg->ddr_chan_num; 178 struct skx_channel *chan = imc->chan; 179 180 if (!imc->mbase) 181 return; 182 183 for (i = 0; i < chan_num; i++) { 184 enable_rrls(imc, i, rrl_ddr[0], enable, chan[i].rrl_ctl[0]); 185 if (rrl_ddr[1]) 186 enable_rrls(imc, i, rrl_ddr[1], enable, chan[i].rrl_ctl[1]); 187 } 188 } 189 190 static void enable_rrls_hbm(struct skx_imc *imc, bool enable) 191 { 192 struct reg_rrl **rrl_hbm = skx_res_cfg->reg_rrl_hbm; 193 int i, chan_num = skx_res_cfg->hbm_chan_num; 194 struct skx_channel *chan = imc->chan; 195 196 if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) 197 return; 198 199 for (i = 0; i < chan_num; i++) { 200 enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); 201 enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); 202 } 203 } 204 205 void skx_enable_rrl(bool enable) 206 { 207 struct skx_dev *d; 208 int i, imc_num; 209 210 edac_dbg(2, "\n"); 211 212 list_for_each_entry(d, &dev_edac_list, list) { 213 imc_num = skx_res_cfg->ddr_imc_num; 214 for (i = 0; i < imc_num; i++) 215 enable_rrls_ddr(&d->imc[i], enable); 216 217 imc_num += skx_res_cfg->hbm_imc_num; 218 for (; i < imc_num; i++) 219 enable_rrls_hbm(&d->imc[i], enable); 220 } 221 } 222 EXPORT_SYMBOL_GPL(skx_enable_rrl); 223 224 static struct reg_rrl *get_rrl_reg(struct decoded_addr *res, struct res_config *cfg) 225 { 226 struct skx_imc *imc = &res->dev->imc[res->imc]; 227 228 /* HBM has two groups of RRL sets, one per pseudo-channel. */ 229 if (imc->hbm_mc) 230 return cfg->reg_rrl_hbm[res->cs & 1]; 231 232 /* One group of RRL sets per DDR channel. */ 233 if (!cfg->reg_rrl_ddr[1]) 234 return cfg->reg_rrl_ddr[0]; 235 236 if (res->subch == -1) { 237 skx_printk(KERN_ERR, "Invalid sub-channel id (-1), possibly missing %s ADXL component.\n", component_names[INDEX_SUBCH]); 238 return NULL; 239 } 240 241 /* Two groups of RRL sets per DDR channel (e.g., DMR: one group per sub-channel). */ 242 return cfg->reg_rrl_ddr[res->subch & 1]; 243 } 244 245 void skx_show_rrl(struct decoded_addr *res, char *msg, int len, bool scrub_err) 246 { 247 struct skx_imc *imc = &res->dev->imc[res->imc]; 248 int i, j, n, ch = res->channel; 249 u64 log, corr, status_mask; 250 struct reg_rrl *rrl; 251 bool scrub; 252 u32 offset; 253 u8 width; 254 255 if (!imc->mbase) 256 return; 257 258 rrl = get_rrl_reg(res, skx_res_cfg); 259 if (!rrl) 260 return; 261 262 status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; 263 264 n = scnprintf(msg, len, " retry_rd_err_log["); 265 for (i = 0; i < rrl->set_num; i++) { 266 scrub = (rrl->sources[i] == RRL_SRC_FRE_SCRUB || rrl->sources[i] == RRL_SRC_LRE_SCRUB); 267 if (scrub_err != scrub) 268 continue; 269 270 for (j = 0; j < rrl->reg_num && len - n > 0; j++) { 271 offset = rrl->offsets[i][j]; 272 width = rrl->widths[j]; 273 log = skx_read_imc_reg(imc, ch, offset, width); 274 275 if (width == 4) 276 n += scnprintf(msg + n, len - n, "%.8llx ", log); 277 else 278 n += scnprintf(msg + n, len - n, "%.16llx ", log); 279 280 /* Clear RRL status if RRL in Linux control mode. */ 281 if (skx_res_cfg->rrl_ctrl_mode == RRL_CTRL_LINUX && !j && (log & status_mask)) 282 skx_write_imc_reg(imc, ch, offset, width, log & ~status_mask); 283 } 284 } 285 286 /* Move back one space. */ 287 n--; 288 n += scnprintf(msg + n, len - n, "]"); 289 290 if (len - n > 0) { 291 n += scnprintf(msg + n, len - n, " correrrcnt["); 292 for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { 293 offset = rrl->cecnt_offsets[i]; 294 width = rrl->cecnt_widths[i]; 295 corr = skx_read_imc_reg(imc, ch, offset, width); 296 297 /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ 298 if (skx_res_cfg->type <= SPR) { 299 n += scnprintf(msg + n, len - n, "%.4llx %.4llx ", 300 corr & 0xffff, corr >> 16); 301 } else { 302 /* CPUs {GNR} encode one counter per CORRERRCNT register. */ 303 if (width == 4) 304 n += scnprintf(msg + n, len - n, "%.8llx ", corr); 305 else 306 n += scnprintf(msg + n, len - n, "%.16llx ", corr); 307 } 308 } 309 310 /* Move back one space. */ 311 n--; 312 n += scnprintf(msg + n, len - n, "]"); 313 } 314 } 315 EXPORT_SYMBOL_GPL(skx_show_rrl); 316 317 static bool adxl_component_required(int idx) 318 { 319 return idx == INDEX_SOCKET || 320 idx == INDEX_MEMCTRL || 321 idx == INDEX_CHANNEL || 322 idx == INDEX_DIMM || 323 idx == INDEX_CS; 324 } 325 326 int skx_adxl_get(void) 327 { 328 const char * const *names; 329 int i, j; 330 331 names = adxl_get_component_names(); 332 if (!names) { 333 skx_printk(KERN_NOTICE, "No firmware support for address translation.\n"); 334 return -ENODEV; 335 } 336 337 for (i = 0; i < INDEX_MAX; i++) { 338 for (j = 0; names[j]; j++) { 339 if (!strcmp(component_names[i], names[j])) { 340 component_indices[i] = j; 341 342 if (i >= INDEX_NM_FIRST) 343 adxl_nm_bitmap |= 1 << i; 344 else 345 adxl_bitmap |= 1 << i; 346 347 break; 348 } 349 } 350 351 if (!names[j] && adxl_component_required(i)) 352 goto err; 353 } 354 355 if (skx_mem_cfg_2lm) { 356 if (!adxl_nm_bitmap) 357 skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n"); 358 else 359 edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap); 360 } 361 362 adxl_component_names = names; 363 while (*names++) 364 adxl_component_count++; 365 366 adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), 367 GFP_KERNEL); 368 if (!adxl_values) { 369 adxl_component_count = 0; 370 return -ENOMEM; 371 } 372 373 adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); 374 if (!adxl_msg) { 375 adxl_component_count = 0; 376 kfree(adxl_values); 377 return -ENOMEM; 378 } 379 380 return 0; 381 err: 382 skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", 383 component_names[i]); 384 for (j = 0; names[j]; j++) 385 skx_printk(KERN_CONT, "%s ", names[j]); 386 skx_printk(KERN_CONT, "\n"); 387 388 return -ENODEV; 389 } 390 EXPORT_SYMBOL_GPL(skx_adxl_get); 391 392 void skx_adxl_put(void) 393 { 394 adxl_component_count = 0; 395 kfree(adxl_values); 396 kfree(adxl_msg); 397 } 398 EXPORT_SYMBOL_GPL(skx_adxl_put); 399 400 void skx_init_mc_mapping(struct skx_dev *d) 401 { 402 /* 403 * By default, the BIOS presents all memory controllers within each 404 * socket to the EDAC driver. The physical indices are the same as 405 * the logical indices of the memory controllers enumerated by the 406 * EDAC driver. 407 */ 408 for (int i = 0; i < d->num_imc; i++) 409 d->imc[i].mc_mapping = i; 410 } 411 EXPORT_SYMBOL_GPL(skx_init_mc_mapping); 412 413 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc) 414 { 415 edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n", 416 pmc, lmc); 417 418 d->imc[lmc].mc_mapping = pmc; 419 } 420 EXPORT_SYMBOL_GPL(skx_set_mc_mapping); 421 422 static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc) 423 { 424 for (int lmc = 0; lmc < d->num_imc; lmc++) { 425 if (d->imc[lmc].mc_mapping == pmc) { 426 edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n", 427 pmc, lmc); 428 429 return lmc; 430 } 431 } 432 433 return -1; 434 } 435 436 static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) 437 { 438 int i, lmc, len = 0; 439 struct skx_dev *d; 440 441 if (res->addr >= skx_tohm || (res->addr >= skx_tolm && 442 res->addr < BIT_ULL(32))) { 443 edac_dbg(0, "Address 0x%llx out of range\n", res->addr); 444 return false; 445 } 446 447 if (adxl_decode(res->addr, adxl_values)) { 448 edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); 449 return false; 450 } 451 452 /* 453 * GNR with a Flat2LM memory configuration may mistakenly classify 454 * a near-memory error(DDR5) as a far-memory error(CXL), resulting 455 * in the incorrect selection of decoded ADXL components. 456 * To address this, prefetch the decoded far-memory controller ID 457 * and adjust the error source to near-memory if the far-memory 458 * controller ID is invalid. 459 */ 460 if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { 461 res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 462 if (res->imc == -1) { 463 err_src = ERR_SRC_2LM_NM; 464 edac_dbg(0, "Adjust the error source to near-memory.\n"); 465 } 466 } 467 468 res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; 469 if (err_src == ERR_SRC_2LM_NM) { 470 res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? 471 (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; 472 res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? 473 (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; 474 res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? 475 (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; 476 res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? 477 (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; 478 res->subch = (adxl_nm_bitmap & BIT_NM_SUBCH) ? 479 (int)adxl_values[component_indices[INDEX_NM_SUBCH]] : -1; 480 } else { 481 res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; 482 res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; 483 res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; 484 res->cs = (int)adxl_values[component_indices[INDEX_CS]]; 485 res->subch = (adxl_bitmap & BIT_SUBCH) ? 486 (int)adxl_values[component_indices[INDEX_SUBCH]] : -1; 487 } 488 489 if (res->imc < 0) { 490 skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); 491 return false; 492 } 493 494 list_for_each_entry(d, &dev_edac_list, list) { 495 if (d->imc[0].src_id == res->socket) { 496 res->dev = d; 497 break; 498 } 499 } 500 501 if (!res->dev) { 502 skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", 503 res->socket, res->imc); 504 return false; 505 } 506 507 lmc = skx_get_mc_mapping(d, res->imc); 508 if (lmc < 0) { 509 skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc); 510 return false; 511 } 512 513 res->imc = lmc; 514 515 for (i = 0; i < adxl_component_count; i++) { 516 if (adxl_values[i] == ~0x0ull) 517 continue; 518 519 len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", 520 adxl_component_names[i], adxl_values[i]); 521 if (MSG_SIZE - len <= 0) 522 break; 523 } 524 525 res->decoded_by_adxl = true; 526 527 return true; 528 } 529 530 void skx_set_mem_cfg(bool mem_cfg_2lm) 531 { 532 skx_mem_cfg_2lm = mem_cfg_2lm; 533 } 534 EXPORT_SYMBOL_GPL(skx_set_mem_cfg); 535 536 void skx_set_res_cfg(struct res_config *cfg) 537 { 538 skx_res_cfg = cfg; 539 } 540 EXPORT_SYMBOL_GPL(skx_set_res_cfg); 541 542 void skx_set_decode(skx_decode_f decode) 543 { 544 driver_decode = decode; 545 } 546 EXPORT_SYMBOL_GPL(skx_set_decode); 547 548 void skx_set_show_rrl(skx_show_rrl_f rrl) 549 { 550 show_rrl = rrl; 551 } 552 EXPORT_SYMBOL_GPL(skx_set_show_rrl); 553 554 static int skx_get_pkg_id(struct skx_dev *d, u8 *id) 555 { 556 int node; 557 int cpu; 558 559 node = pcibus_to_node(d->util_all->bus); 560 if (numa_valid_node(node)) { 561 for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { 562 struct cpuinfo_x86 *c = &cpu_data(cpu); 563 564 if (c->initialized && cpu_to_node(cpu) == node) { 565 *id = topology_physical_package_id(cpu); 566 return 0; 567 } 568 } 569 } 570 571 skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); 572 return -ENODEV; 573 } 574 575 int skx_get_src_id(struct skx_dev *d, int off, u8 *id) 576 { 577 u32 reg; 578 579 /* 580 * The 3-bit source IDs in PCI configuration space registers are limited 581 * to 8 unique IDs, and each ID is local to a UPI/QPI domain. 582 * 583 * Source IDs cannot be used to map devices to sockets on UV systems 584 * because they can exceed 8 sockets and have multiple UPI/QPI domains 585 * with identical, repeating source IDs. 586 */ 587 if (is_uv_system()) 588 return skx_get_pkg_id(d, id); 589 590 if (pci_read_config_dword(d->util_all, off, ®)) { 591 skx_printk(KERN_ERR, "Failed to read src id\n"); 592 return -ENODEV; 593 } 594 595 *id = GET_BITFIELD(reg, 12, 14); 596 return 0; 597 } 598 EXPORT_SYMBOL_GPL(skx_get_src_id); 599 600 static int get_width(u32 mtr) 601 { 602 switch (GET_BITFIELD(mtr, 8, 9)) { 603 case 0: 604 return DEV_X4; 605 case 1: 606 return DEV_X8; 607 case 2: 608 return DEV_X16; 609 } 610 return DEV_UNKNOWN; 611 } 612 613 /* 614 * We use the per-socket device @cfg->did to count how many sockets are present, 615 * and to detemine which PCI buses are associated with each socket. Allocate 616 * and build the full list of all the skx_dev structures that we need here. 617 */ 618 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) 619 { 620 int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num; 621 struct pci_dev *pdev, *prev; 622 struct skx_dev *d; 623 u32 reg; 624 625 prev = NULL; 626 for (;;) { 627 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev); 628 if (!pdev) 629 break; 630 ndev++; 631 d = kzalloc_flex(*d, imc, imc_num); 632 if (!d) { 633 pci_dev_put(pdev); 634 return -ENOMEM; 635 } 636 637 if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, ®)) { 638 kfree(d); 639 pci_dev_put(pdev); 640 skx_printk(KERN_ERR, "Failed to read bus idx\n"); 641 return -ENODEV; 642 } 643 644 d->bus[0] = GET_BITFIELD(reg, 0, 7); 645 d->bus[1] = GET_BITFIELD(reg, 8, 15); 646 if (cfg->type == SKX) { 647 d->seg = pci_domain_nr(pdev->bus); 648 d->bus[2] = GET_BITFIELD(reg, 16, 23); 649 d->bus[3] = GET_BITFIELD(reg, 24, 31); 650 } else { 651 d->seg = GET_BITFIELD(reg, 16, 23); 652 } 653 654 d->num_imc = imc_num; 655 656 edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n", 657 d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num); 658 list_add_tail(&d->list, &dev_edac_list); 659 prev = pdev; 660 661 skx_init_mc_mapping(d); 662 } 663 664 if (list) 665 *list = &dev_edac_list; 666 return ndev; 667 } 668 EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); 669 670 struct list_head *skx_get_edac_list(void) 671 { 672 return &dev_edac_list; 673 } 674 EXPORT_SYMBOL_GPL(skx_get_edac_list); 675 676 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) 677 { 678 struct pci_dev *pdev; 679 u32 reg; 680 681 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL); 682 if (!pdev) { 683 edac_dbg(2, "Can't get tolm/tohm\n"); 684 return -ENODEV; 685 } 686 687 if (pci_read_config_dword(pdev, off[0], ®)) { 688 skx_printk(KERN_ERR, "Failed to read tolm\n"); 689 goto fail; 690 } 691 skx_tolm = reg; 692 693 if (pci_read_config_dword(pdev, off[1], ®)) { 694 skx_printk(KERN_ERR, "Failed to read lower tohm\n"); 695 goto fail; 696 } 697 skx_tohm = reg; 698 699 if (pci_read_config_dword(pdev, off[2], ®)) { 700 skx_printk(KERN_ERR, "Failed to read upper tohm\n"); 701 goto fail; 702 } 703 skx_tohm |= (u64)reg << 32; 704 705 pci_dev_put(pdev); 706 *tolm = skx_tolm; 707 *tohm = skx_tohm; 708 edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm); 709 return 0; 710 fail: 711 pci_dev_put(pdev); 712 return -ENODEV; 713 } 714 EXPORT_SYMBOL_GPL(skx_get_hi_lo); 715 716 void skx_set_hi_lo(u64 tolm, u64 tohm) 717 { 718 skx_tolm = tolm; 719 skx_tohm = tohm; 720 } 721 EXPORT_SYMBOL_GPL(skx_set_hi_lo); 722 723 static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, 724 int minval, int maxval, const char *name) 725 { 726 u32 val = GET_BITFIELD(reg, lobit, hibit); 727 728 if (val < minval || val > maxval) { 729 edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg); 730 return -EINVAL; 731 } 732 return val + add; 733 } 734 735 #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks") 736 #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows") 737 #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") 738 739 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 740 struct skx_imc *imc, int chan, int dimmno, 741 struct res_config *cfg) 742 { 743 int banks, ranks, rows, cols, npages; 744 enum mem_type mtype; 745 u64 size; 746 747 ranks = numrank(mtr); 748 rows = numrow(mtr); 749 cols = imc->hbm_mc ? 6 : numcol(mtr); 750 751 if (ranks < 0 || rows < 0 || cols < 0) 752 return 0; 753 754 if (imc->hbm_mc) { 755 banks = 32; 756 mtype = MEM_HBM2; 757 } else if (cfg->support_ddr5) { 758 banks = 32; 759 mtype = MEM_DDR5; 760 } else { 761 banks = 16; 762 mtype = MEM_DDR4; 763 } 764 765 /* 766 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) 767 */ 768 size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); 769 npages = MiB_TO_PAGES(size); 770 771 edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n", 772 imc->mc, chan, dimmno, size, npages, 773 banks, 1 << ranks, rows, cols); 774 775 imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0); 776 imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9); 777 imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); 778 imc->chan[chan].dimms[dimmno].rowbits = rows; 779 imc->chan[chan].dimms[dimmno].colbits = cols; 780 781 dimm->nr_pages = npages; 782 dimm->grain = 32; 783 dimm->dtype = get_width(mtr); 784 dimm->mtype = mtype; 785 dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 786 787 if (imc->hbm_mc) 788 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u", 789 imc->src_id, imc->lmc, chan); 790 else 791 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 792 imc->src_id, imc->lmc, chan, dimmno); 793 794 return 1; 795 } 796 EXPORT_SYMBOL_GPL(skx_get_dimm_info); 797 798 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 799 int chan, int dimmno, const char *mod_str) 800 { 801 int smbios_handle; 802 u32 dev_handle; 803 u16 flags; 804 u64 size = 0; 805 806 dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, 807 imc->src_id, 0); 808 809 smbios_handle = nfit_get_smbios_id(dev_handle, &flags); 810 if (smbios_handle == -EOPNOTSUPP) { 811 pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str); 812 goto unknown_size; 813 } 814 815 if (smbios_handle < 0) { 816 skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle); 817 goto unknown_size; 818 } 819 820 if (flags & ACPI_NFIT_MEM_MAP_FAILED) { 821 skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle); 822 goto unknown_size; 823 } 824 825 size = dmi_memdev_size(smbios_handle); 826 if (size == ~0ull) 827 skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n", 828 dev_handle, smbios_handle); 829 830 unknown_size: 831 dimm->nr_pages = size >> PAGE_SHIFT; 832 dimm->grain = 32; 833 dimm->dtype = DEV_UNKNOWN; 834 dimm->mtype = MEM_NVDIMM; 835 dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 836 837 edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", 838 imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); 839 840 snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 841 imc->src_id, imc->lmc, chan, dimmno); 842 843 return (size == 0 || size == ~0ull) ? 0 : 1; 844 } 845 EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); 846 847 int skx_register_mci(struct skx_imc *imc, struct device *dev, 848 const char *dev_name, const char *ctl_name, 849 const char *mod_str, get_dimm_config_f get_dimm_config, 850 struct res_config *cfg) 851 { 852 struct mem_ctl_info *mci; 853 struct edac_mc_layer layers[2]; 854 struct skx_pvt *pvt; 855 int rc; 856 857 /* Allocate a new MC control structure */ 858 layers[0].type = EDAC_MC_LAYER_CHANNEL; 859 layers[0].size = imc->num_channels; 860 layers[0].is_virt_csrow = false; 861 layers[1].type = EDAC_MC_LAYER_SLOT; 862 layers[1].size = imc->num_dimms; 863 layers[1].is_virt_csrow = true; 864 mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, 865 sizeof(struct skx_pvt)); 866 867 if (unlikely(!mci)) 868 return -ENOMEM; 869 870 edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); 871 872 /* Associate skx_dev and mci for future usage */ 873 imc->mci = mci; 874 pvt = mci->pvt_info; 875 pvt->imc = imc; 876 877 mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, 878 imc->src_id, imc->lmc); 879 if (!mci->ctl_name) { 880 rc = -ENOMEM; 881 goto fail0; 882 } 883 884 mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM; 885 if (cfg->support_ddr5) 886 mci->mtype_cap |= MEM_FLAG_DDR5; 887 mci->edac_ctl_cap = EDAC_FLAG_NONE; 888 mci->edac_cap = EDAC_FLAG_NONE; 889 mci->mod_name = mod_str; 890 mci->dev_name = dev_name; 891 mci->ctl_page_to_phys = NULL; 892 893 rc = get_dimm_config(mci, cfg); 894 if (rc < 0) 895 goto fail; 896 897 /* Record ptr to the generic device */ 898 mci->pdev = dev; 899 900 /* Add this new MC control structure to EDAC's list of MCs */ 901 if (unlikely(edac_mc_add_mc(mci))) { 902 edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); 903 rc = -EINVAL; 904 goto fail; 905 } 906 907 return 0; 908 909 fail: 910 kfree(mci->ctl_name); 911 fail0: 912 edac_mc_free(mci); 913 imc->mci = NULL; 914 return rc; 915 } 916 EXPORT_SYMBOL_GPL(skx_register_mci); 917 918 static void skx_unregister_mci(struct skx_imc *imc) 919 { 920 struct mem_ctl_info *mci = imc->mci; 921 922 if (!mci) 923 return; 924 925 edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); 926 927 /* Remove MC sysfs nodes */ 928 edac_mc_del_mc(mci->pdev); 929 930 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); 931 kfree(mci->ctl_name); 932 edac_mc_free(mci); 933 } 934 935 static void skx_mce_output_error(struct mem_ctl_info *mci, 936 const struct mce *m, 937 struct decoded_addr *res) 938 { 939 enum hw_event_mc_err_type tp_event; 940 char *optype; 941 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 942 bool overflow = GET_BITFIELD(m->status, 62, 62); 943 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 944 bool scrub_err = false; 945 bool recoverable; 946 int len; 947 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); 948 u32 mscod = GET_BITFIELD(m->status, 16, 31); 949 u32 errcode = GET_BITFIELD(m->status, 0, 15); 950 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 951 952 recoverable = GET_BITFIELD(m->status, 56, 56); 953 954 if (uncorrected_error) { 955 core_err_cnt = 1; 956 if (ripv) { 957 tp_event = HW_EVENT_ERR_UNCORRECTED; 958 } else { 959 tp_event = HW_EVENT_ERR_FATAL; 960 } 961 } else { 962 tp_event = HW_EVENT_ERR_CORRECTED; 963 } 964 965 switch (optypenum) { 966 case 0: 967 optype = "generic undef request error"; 968 break; 969 case 1: 970 optype = "memory read error"; 971 break; 972 case 2: 973 optype = "memory write error"; 974 break; 975 case 3: 976 optype = "addr/cmd error"; 977 break; 978 case 4: 979 optype = "memory scrubbing error"; 980 scrub_err = true; 981 break; 982 default: 983 optype = "reserved"; 984 break; 985 } 986 987 if (res->decoded_by_adxl) { 988 len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", 989 overflow ? " OVERFLOW" : "", 990 (uncorrected_error && recoverable) ? " recoverable" : "", 991 mscod, errcode, adxl_msg); 992 } else { 993 len = scnprintf(skx_msg, MSG_SIZE, 994 "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", 995 overflow ? " OVERFLOW" : "", 996 (uncorrected_error && recoverable) ? " recoverable" : "", 997 mscod, errcode, 998 res->socket, res->imc, res->rank, 999 res->row, res->column, res->bank_address, res->bank_group); 1000 } 1001 1002 if (show_rrl) 1003 show_rrl(res, skx_msg + len, MSG_SIZE - len, scrub_err); 1004 1005 edac_dbg(0, "%s\n", skx_msg); 1006 1007 /* Call the helper to output message */ 1008 edac_mc_handle_error(tp_event, mci, core_err_cnt, 1009 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 1010 res->channel, res->dimm, -1, 1011 optype, skx_msg); 1012 } 1013 1014 static enum error_source skx_error_source(const struct mce *m) 1015 { 1016 u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; 1017 1018 if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) 1019 return ERR_SRC_NOT_MEMORY; 1020 1021 if (!skx_mem_cfg_2lm) 1022 return ERR_SRC_1LM; 1023 1024 if (errcode == MCACOD_EXT_MEM_ERR) 1025 return ERR_SRC_2LM_NM; 1026 1027 return ERR_SRC_2LM_FM; 1028 } 1029 1030 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 1031 void *data) 1032 { 1033 struct mce *mce = (struct mce *)data; 1034 enum error_source err_src; 1035 struct decoded_addr res; 1036 struct mem_ctl_info *mci; 1037 char *type; 1038 1039 if (mce->kflags & MCE_HANDLED_CEC) 1040 return NOTIFY_DONE; 1041 1042 err_src = skx_error_source(mce); 1043 1044 /* Ignore unless this is memory related with an address */ 1045 if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) 1046 return NOTIFY_DONE; 1047 1048 memset(&res, 0, sizeof(res)); 1049 res.mce = mce; 1050 res.addr = mce->addr & MCI_ADDR_PHYSADDR; 1051 if (!pfn_to_online_page(res.addr >> PAGE_SHIFT) && !arch_is_platform_page(res.addr)) { 1052 pr_err("Invalid address 0x%llx in IA32_MC%d_ADDR\n", mce->addr, mce->bank); 1053 return NOTIFY_DONE; 1054 } 1055 1056 /* Try driver decoder first */ 1057 if (!(driver_decode && driver_decode(&res))) { 1058 /* Then try firmware decoder (ACPI DSM methods) */ 1059 if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) 1060 return NOTIFY_DONE; 1061 } 1062 1063 mci = res.dev->imc[res.imc].mci; 1064 1065 if (!mci) 1066 return NOTIFY_DONE; 1067 1068 if (mce->mcgstatus & MCG_STATUS_MCIP) 1069 type = "Exception"; 1070 else 1071 type = "Event"; 1072 1073 skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); 1074 1075 skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx " 1076 "Bank %d: 0x%llx\n", mce->extcpu, type, 1077 mce->mcgstatus, mce->bank, mce->status); 1078 skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc); 1079 skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr); 1080 skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc); 1081 1082 skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET " 1083 "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid, 1084 mce->time, mce->socketid, mce->apicid); 1085 1086 skx_mce_output_error(mci, mce, &res); 1087 1088 mce->kflags |= MCE_HANDLED_EDAC; 1089 return NOTIFY_DONE; 1090 } 1091 EXPORT_SYMBOL_GPL(skx_mce_check_error); 1092 1093 void skx_remove(void) 1094 { 1095 int i, j; 1096 struct skx_dev *d, *tmp; 1097 1098 edac_dbg(0, "\n"); 1099 1100 list_for_each_entry_safe(d, tmp, &dev_edac_list, list) { 1101 list_del(&d->list); 1102 for (i = 0; i < d->num_imc; i++) { 1103 if (d->imc[i].mci) 1104 skx_unregister_mci(&d->imc[i]); 1105 1106 if (d->imc[i].mdev) 1107 pci_dev_put(d->imc[i].mdev); 1108 1109 if (d->imc[i].mbase) 1110 iounmap(d->imc[i].mbase); 1111 1112 if (d->imc[i].dev) 1113 put_device(d->imc[i].dev); 1114 1115 for (j = 0; j < d->imc[i].num_channels; j++) { 1116 if (d->imc[i].chan[j].cdev) 1117 pci_dev_put(d->imc[i].chan[j].cdev); 1118 } 1119 } 1120 if (d->util_all) 1121 pci_dev_put(d->util_all); 1122 if (d->pcu_cr3) 1123 pci_dev_put(d->pcu_cr3); 1124 if (d->sad_all) 1125 pci_dev_put(d->sad_all); 1126 if (d->uracu) 1127 pci_dev_put(d->uracu); 1128 1129 kfree(d); 1130 } 1131 } 1132 EXPORT_SYMBOL_GPL(skx_remove); 1133 1134 #ifdef CONFIG_EDAC_DEBUG 1135 /* 1136 * Debug feature. 1137 * Exercise the address decode logic by writing an address to 1138 * /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr. 1139 */ 1140 static struct dentry *skx_test; 1141 1142 static int debugfs_u64_set(void *data, u64 val) 1143 { 1144 struct mce m; 1145 1146 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 1147 1148 memset(&m, 0, sizeof(m)); 1149 /* ADDRV + MemRd + Unknown channel */ 1150 m.status = MCI_STATUS_ADDRV + 0x90; 1151 /* One corrected error */ 1152 m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); 1153 m.addr = val; 1154 skx_mce_check_error(NULL, 0, &m); 1155 1156 return 0; 1157 } 1158 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 1159 1160 void skx_setup_debug(const char *name) 1161 { 1162 skx_test = edac_debugfs_create_dir(name); 1163 if (!skx_test) 1164 return; 1165 1166 if (!edac_debugfs_create_file("addr", 0200, skx_test, 1167 NULL, &fops_u64_wo)) { 1168 debugfs_remove(skx_test); 1169 skx_test = NULL; 1170 } 1171 } 1172 EXPORT_SYMBOL_GPL(skx_setup_debug); 1173 1174 void skx_teardown_debug(void) 1175 { 1176 debugfs_remove_recursive(skx_test); 1177 } 1178 EXPORT_SYMBOL_GPL(skx_teardown_debug); 1179 #endif /*CONFIG_EDAC_DEBUG*/ 1180 1181 MODULE_LICENSE("GPL v2"); 1182 MODULE_AUTHOR("Tony Luck"); 1183 MODULE_DESCRIPTION("MC Driver for Intel server processors"); 1184