1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel(R) 10nm server memory controller. 4 * Copyright (c) 2019, Intel Corporation. 5 * 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/io.h> 10 #include <asm/cpu_device_id.h> 11 #include <asm/intel-family.h> 12 #include <asm/mce.h> 13 #include "edac_module.h" 14 #include "skx_common.h" 15 16 #define I10NM_REVISION "v0.0.6" 17 #define EDAC_MOD_STR "i10nm_edac" 18 19 /* Debug macros */ 20 #define i10nm_printk(level, fmt, arg...) \ 21 edac_printk(level, "i10nm", fmt, ##arg) 22 23 #define I10NM_GET_SCK_BAR(d, reg) \ 24 pci_read_config_dword((d)->uracu, 0xd0, &(reg)) 25 #define I10NM_GET_IMC_BAR(d, i, reg) \ 26 pci_read_config_dword((d)->uracu, \ 27 (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg)) 28 #define I10NM_GET_SAD(d, offset, i, reg)\ 29 pci_read_config_dword((d)->sad_all, (offset) + (i) * \ 30 (res_cfg->type == GNR ? 12 : 8), &(reg)) 31 #define I10NM_GET_HBM_IMC_BAR(d, reg) \ 32 pci_read_config_dword((d)->uracu, 0xd4, &(reg)) 33 #define I10NM_GET_CAPID3_CFG(d, reg) \ 34 pci_read_config_dword((d)->pcu_cr3, \ 35 res_cfg->type == GNR ? 0x290 : 0x90, &(reg)) 36 #define I10NM_GET_CAPID5_CFG(d, reg) \ 37 pci_read_config_dword((d)->pcu_cr3, \ 38 res_cfg->type == GNR ? 0x298 : 0x98, &(reg)) 39 #define I10NM_GET_DIMMMTR(m, i, j) \ 40 readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \ 41 (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \ 42 (i) * (m)->chan_mmio_sz + (j) * 4) 43 #define I10NM_GET_MCDDRTCFG(m, i) \ 44 readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ 45 (i) * (m)->chan_mmio_sz) 46 #define I10NM_GET_MCMTR(m, i) \ 47 readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ 48 (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ 49 (i) * (m)->chan_mmio_sz) 50 51 #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) 52 #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) 53 #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ 54 GET_BITFIELD(reg, 0, 10) + 1) << 12) 55 #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ 56 ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) 57 58 #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 59 #define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000 60 #define I10NM_GNR_IMC_MMIO_SIZE 0x4000 61 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 62 #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) 63 #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) 64 #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) 65 66 #define I10NM_MAX_SAD 16 67 #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) 68 #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) 69 70 static struct list_head *i10nm_edac_list; 71 72 static struct res_config *res_cfg; 73 static int retry_rd_err_log; 74 static int decoding_via_mca; 75 static bool mem_cfg_2lm; 76 static bool no_adxl; 77 78 static struct reg_rrl icx_reg_rrl_ddr = { 79 .set_num = 2, 80 .reg_num = 6, 81 .sources = {RRL_SRC_LRE_SCRUB, RRL_SRC_LRE_DEMAND}, 82 .offsets = { 83 {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, 84 {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, 85 }, 86 .widths = {4, 4, 4, 4, 4, 8}, 87 .v_mask = BIT(0), 88 .uc_mask = BIT(1), 89 .over_mask = BIT(2), 90 .en_patspr_mask = BIT(13), 91 .noover_mask = BIT(14), 92 .en_mask = BIT(15), 93 94 .cecnt_num = 4, 95 .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, 96 .cecnt_widths = {4, 4, 4, 4}, 97 }; 98 99 static struct reg_rrl spr_reg_rrl_ddr = { 100 .set_num = 3, 101 .reg_num = 6, 102 .sources = {RRL_SRC_LRE_SCRUB, RRL_SRC_LRE_DEMAND, RRL_SRC_FRE_DEMAND}, 103 .offsets = { 104 {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, 105 {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, 106 {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, 107 }, 108 .widths = {4, 4, 8, 4, 4, 8}, 109 .v_mask = BIT(0), 110 .uc_mask = BIT(1), 111 .over_mask = BIT(2), 112 .en_patspr_mask = BIT(13), 113 .noover_mask = BIT(14), 114 .en_mask = BIT(15), 115 116 .cecnt_num = 4, 117 .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, 118 .cecnt_widths = {4, 4, 4, 4}, 119 }; 120 121 static struct reg_rrl spr_reg_rrl_hbm_pch0 = { 122 .set_num = 2, 123 .reg_num = 6, 124 .sources = {RRL_SRC_LRE_SCRUB, RRL_SRC_LRE_DEMAND}, 125 .offsets = { 126 {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, 127 {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, 128 }, 129 .widths = {4, 4, 8, 4, 4, 8}, 130 .v_mask = BIT(0), 131 .uc_mask = BIT(1), 132 .over_mask = BIT(2), 133 .en_patspr_mask = BIT(13), 134 .noover_mask = BIT(14), 135 .en_mask = BIT(15), 136 137 .cecnt_num = 4, 138 .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, 139 .cecnt_widths = {4, 4, 4, 4}, 140 }; 141 142 static struct reg_rrl spr_reg_rrl_hbm_pch1 = { 143 .set_num = 2, 144 .reg_num = 6, 145 .sources = {RRL_SRC_LRE_SCRUB, RRL_SRC_LRE_DEMAND}, 146 .offsets = { 147 {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, 148 {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, 149 }, 150 .widths = {4, 4, 8, 4, 4, 8}, 151 .v_mask = BIT(0), 152 .uc_mask = BIT(1), 153 .over_mask = BIT(2), 154 .en_patspr_mask = BIT(13), 155 .noover_mask = BIT(14), 156 .en_mask = BIT(15), 157 158 .cecnt_num = 4, 159 .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, 160 .cecnt_widths = {4, 4, 4, 4}, 161 }; 162 163 static struct reg_rrl gnr_reg_rrl_ddr = { 164 .set_num = 4, 165 .reg_num = 6, 166 .sources = {RRL_SRC_FRE_SCRUB, RRL_SRC_FRE_DEMAND, RRL_SRC_LRE_SCRUB, RRL_SRC_LRE_DEMAND}, 167 .offsets = { 168 {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, 169 {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, 170 {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, 171 {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, 172 }, 173 .widths = {4, 4, 8, 4, 4, 8}, 174 .v_mask = BIT(0), 175 .uc_mask = BIT(1), 176 .over_mask = BIT(2), 177 .en_patspr_mask = BIT(14), 178 .noover_mask = BIT(15), 179 .en_mask = BIT(12), 180 181 .cecnt_num = 8, 182 .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, 183 .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, 184 }; 185 186 static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, 187 unsigned int dev, unsigned int fun) 188 { 189 struct pci_dev *pdev; 190 191 pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun)); 192 if (!pdev) { 193 edac_dbg(2, "No device %02x:%02x.%x\n", 194 bus, dev, fun); 195 return NULL; 196 } 197 198 if (unlikely(pci_enable_device(pdev) < 0)) { 199 edac_dbg(2, "Failed to enable device %02x:%02x.%x\n", 200 bus, dev, fun); 201 pci_dev_put(pdev); 202 return NULL; 203 } 204 205 return pdev; 206 } 207 208 /** 209 * i10nm_get_imc_num() - Get the number of present DDR memory controllers. 210 * 211 * @cfg : The pointer to the structure of EDAC resource configurations. 212 * 213 * For Granite Rapids CPUs, the number of present DDR memory controllers read 214 * at runtime overwrites the value statically configured in @cfg->ddr_imc_num. 215 * For other CPUs, the number of present DDR memory controllers is statically 216 * configured in @cfg->ddr_imc_num. 217 * 218 * RETURNS : 0 on success, < 0 on failure. 219 */ 220 static int i10nm_get_imc_num(struct res_config *cfg) 221 { 222 int n, imc_num, chan_num = 0; 223 struct skx_dev *d; 224 u32 reg; 225 226 list_for_each_entry(d, i10nm_edac_list, list) { 227 d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus], 228 res_cfg->pcu_cr3_bdf.dev, 229 res_cfg->pcu_cr3_bdf.fun); 230 if (!d->pcu_cr3) 231 continue; 232 233 if (I10NM_GET_CAPID5_CFG(d, reg)) 234 continue; 235 236 n = I10NM_DDR_IMC_CH_CNT(reg); 237 238 if (!chan_num) { 239 chan_num = n; 240 edac_dbg(2, "Get DDR CH number: %d\n", chan_num); 241 } else if (chan_num != n) { 242 i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n); 243 } 244 } 245 246 switch (cfg->type) { 247 case GNR: 248 /* 249 * One channel per DDR memory controller for Granite Rapids CPUs. 250 */ 251 imc_num = chan_num; 252 253 if (!imc_num) { 254 i10nm_printk(KERN_ERR, "Invalid DDR MC number\n"); 255 return -ENODEV; 256 } 257 258 if (cfg->ddr_imc_num != imc_num) { 259 /* 260 * Update the configuration data to reflect the number of 261 * present DDR memory controllers. 262 */ 263 cfg->ddr_imc_num = imc_num; 264 edac_dbg(2, "Set DDR MC number: %d", imc_num); 265 266 /* Release and reallocate skx_dev list with the updated number. */ 267 skx_remove(); 268 if (skx_get_all_bus_mappings(cfg, &i10nm_edac_list) <= 0) 269 return -ENODEV; 270 } 271 272 return 0; 273 default: 274 /* 275 * For other CPUs, the number of present DDR memory controllers 276 * is statically pre-configured in cfg->ddr_imc_num. 277 */ 278 return 0; 279 } 280 } 281 282 static bool i10nm_check_2lm(struct res_config *cfg) 283 { 284 struct skx_dev *d; 285 u32 reg; 286 int i; 287 288 list_for_each_entry(d, i10nm_edac_list, list) { 289 d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus], 290 res_cfg->sad_all_bdf.dev, 291 res_cfg->sad_all_bdf.fun); 292 if (!d->sad_all) 293 continue; 294 295 for (i = 0; i < I10NM_MAX_SAD; i++) { 296 I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); 297 if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) { 298 edac_dbg(2, "2-level memory configuration.\n"); 299 return true; 300 } 301 } 302 } 303 304 return false; 305 } 306 307 /* 308 * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. 309 * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS. 310 */ 311 static bool i10nm_mscod_is_ddrt(u32 mscod) 312 { 313 switch (res_cfg->type) { 314 case I10NM: 315 switch (mscod) { 316 case 0x0106: case 0x0107: 317 case 0x0800: case 0x0804: 318 case 0x0806 ... 0x0808: 319 case 0x080a ... 0x080e: 320 case 0x0810: case 0x0811: 321 case 0x0816: case 0x081e: 322 case 0x081f: 323 return true; 324 } 325 326 break; 327 case SPR: 328 switch (mscod) { 329 case 0x0800: case 0x0804: 330 case 0x0806 ... 0x0808: 331 case 0x080a ... 0x080e: 332 case 0x0810: case 0x0811: 333 case 0x0816: case 0x081e: 334 case 0x081f: 335 return true; 336 } 337 338 break; 339 default: 340 return false; 341 } 342 343 return false; 344 } 345 346 static bool i10nm_mc_decode_available(struct mce *mce) 347 { 348 #define ICX_IMCx_CHy 0x06666000 349 u8 bank; 350 351 if (!decoding_via_mca || mem_cfg_2lm) 352 return false; 353 354 if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) 355 != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) 356 return false; 357 358 bank = mce->bank; 359 360 switch (res_cfg->type) { 361 case I10NM: 362 /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ 363 if (!(ICX_IMCx_CHy & (1 << bank))) 364 return false; 365 break; 366 case SPR: 367 if (bank < 13 || bank > 20) 368 return false; 369 break; 370 case GNR: 371 if (bank < 13 || bank > 24) 372 return false; 373 break; 374 default: 375 return false; 376 } 377 378 /* DDRT errors can't be decoded from MCA bank registers */ 379 if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) 380 return false; 381 382 if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) 383 return false; 384 385 return true; 386 } 387 388 static bool i10nm_mc_decode(struct decoded_addr *res) 389 { 390 struct mce *m = res->mce; 391 struct skx_dev *d; 392 u8 bank; 393 394 if (!i10nm_mc_decode_available(m)) 395 return false; 396 397 list_for_each_entry(d, i10nm_edac_list, list) { 398 if (d->imc[0].src_id == m->socketid) { 399 res->socket = m->socketid; 400 res->dev = d; 401 break; 402 } 403 } 404 405 switch (res_cfg->type) { 406 case I10NM: 407 bank = m->bank - 13; 408 res->imc = bank / 4; 409 res->channel = bank % 2; 410 res->column = GET_BITFIELD(m->misc, 9, 18) << 2; 411 res->row = GET_BITFIELD(m->misc, 19, 39); 412 res->bank_group = GET_BITFIELD(m->misc, 40, 41); 413 res->bank_address = GET_BITFIELD(m->misc, 42, 43); 414 res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; 415 res->rank = GET_BITFIELD(m->misc, 56, 58); 416 res->dimm = res->rank >> 2; 417 res->rank = res->rank % 4; 418 break; 419 case SPR: 420 bank = m->bank - 13; 421 res->imc = bank / 2; 422 res->channel = bank % 2; 423 res->column = GET_BITFIELD(m->misc, 9, 18) << 2; 424 res->row = GET_BITFIELD(m->misc, 19, 36); 425 res->bank_group = GET_BITFIELD(m->misc, 37, 38); 426 res->bank_address = GET_BITFIELD(m->misc, 39, 40); 427 res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2; 428 res->rank = GET_BITFIELD(m->misc, 57, 57); 429 res->dimm = GET_BITFIELD(m->misc, 58, 58); 430 break; 431 case GNR: 432 res->imc = m->bank - 13; 433 res->channel = 0; 434 res->column = GET_BITFIELD(m->misc, 9, 18) << 2; 435 res->row = GET_BITFIELD(m->misc, 19, 36); 436 res->bank_group = GET_BITFIELD(m->misc, 39, 41); 437 res->bank_address = GET_BITFIELD(m->misc, 37, 38); 438 res->rank = GET_BITFIELD(m->misc, 55, 56); 439 res->dimm = GET_BITFIELD(m->misc, 57, 57); 440 break; 441 default: 442 return false; 443 } 444 445 if (!res->dev) { 446 skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", 447 m->socketid, res->imc); 448 return false; 449 } 450 451 return true; 452 } 453 454 /** 455 * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller. 456 * 457 * @d : The pointer to the structure of CPU socket EDAC device. 458 * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1). 459 * @physical_idx : To store the corresponding physical index of @logical_idx. 460 * 461 * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure. 462 */ 463 static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx) 464 { 465 #define GNR_MAX_IMC_PCI_CNT 28 466 467 struct pci_dev *mdev; 468 int i, logical = 0; 469 470 /* 471 * Detect present memory controllers from { PCI device: 8-5, function 7-1 } 472 */ 473 for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) { 474 mdev = pci_get_dev_wrapper(d->seg, 475 d->bus[res_cfg->ddr_mdev_bdf.bus], 476 res_cfg->ddr_mdev_bdf.dev + i / 7, 477 res_cfg->ddr_mdev_bdf.fun + i % 7); 478 479 if (mdev) { 480 if (logical == logical_idx) { 481 *physical_idx = i; 482 return mdev; 483 } 484 485 pci_dev_put(mdev); 486 logical++; 487 } 488 } 489 490 return NULL; 491 } 492 493 static u32 get_gnr_imc_mmio_offset(void) 494 { 495 if (boot_cpu_data.x86_vfm == INTEL_GRANITERAPIDS_D) 496 return I10NM_GNR_D_IMC_MMIO_OFFSET; 497 498 return I10NM_GNR_IMC_MMIO_OFFSET; 499 } 500 501 /** 502 * get_ddr_munit() - Get the resource of the i-th DDR memory controller. 503 * 504 * @d : The pointer to the structure of CPU socket EDAC device. 505 * @i : The index of the CPU socket relative DDR memory controller. 506 * @offset : To store the MMIO offset of the i-th DDR memory controller. 507 * @size : To store the MMIO size of the i-th DDR memory controller. 508 * 509 * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure. 510 */ 511 static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size) 512 { 513 struct pci_dev *mdev; 514 int physical_idx; 515 u32 reg; 516 517 switch (res_cfg->type) { 518 case GNR: 519 if (I10NM_GET_IMC_BAR(d, 0, reg)) { 520 i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n"); 521 return NULL; 522 } 523 524 mdev = get_gnr_mdev(d, i, &physical_idx); 525 if (!mdev) 526 return NULL; 527 528 *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + 529 get_gnr_imc_mmio_offset() + 530 physical_idx * I10NM_GNR_IMC_MMIO_SIZE; 531 *size = I10NM_GNR_IMC_MMIO_SIZE; 532 533 break; 534 default: 535 if (I10NM_GET_IMC_BAR(d, i, reg)) { 536 i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i); 537 return NULL; 538 } 539 540 mdev = pci_get_dev_wrapper(d->seg, 541 d->bus[res_cfg->ddr_mdev_bdf.bus], 542 res_cfg->ddr_mdev_bdf.dev + i, 543 res_cfg->ddr_mdev_bdf.fun); 544 if (!mdev) 545 return NULL; 546 547 *offset = I10NM_GET_IMC_MMIO_OFFSET(reg); 548 *size = I10NM_GET_IMC_MMIO_SIZE(reg); 549 } 550 551 return mdev; 552 } 553 554 /** 555 * i10nm_imc_absent() - Check whether the memory controller @imc is absent 556 * 557 * @imc : The pointer to the structure of memory controller EDAC device. 558 * 559 * RETURNS : true if the memory controller EDAC device is absent, false otherwise. 560 */ 561 static bool i10nm_imc_absent(struct skx_imc *imc) 562 { 563 u32 mcmtr; 564 int i; 565 566 switch (res_cfg->type) { 567 case SPR: 568 for (i = 0; i < res_cfg->ddr_chan_num; i++) { 569 mcmtr = I10NM_GET_MCMTR(imc, i); 570 edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr); 571 if (mcmtr != ~0) 572 return false; 573 } 574 575 /* 576 * Some workstations' absent memory controllers still 577 * appear as PCIe devices, misleading the EDAC driver. 578 * By observing that the MMIO registers of these absent 579 * memory controllers consistently hold the value of ~0. 580 * 581 * We identify a memory controller as absent by checking 582 * if its MMIO register "mcmtr" == ~0 in all its channels. 583 */ 584 return true; 585 default: 586 return false; 587 } 588 } 589 590 static int i10nm_get_ddr_munits(void) 591 { 592 struct pci_dev *mdev; 593 void __iomem *mbase; 594 unsigned long size; 595 struct skx_dev *d; 596 int i, lmc, j = 0; 597 u32 reg, off; 598 u64 base; 599 600 list_for_each_entry(d, i10nm_edac_list, list) { 601 d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus], 602 res_cfg->util_all_bdf.dev, 603 res_cfg->util_all_bdf.fun); 604 if (!d->util_all) 605 return -ENODEV; 606 607 d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus], 608 res_cfg->uracu_bdf.dev, 609 res_cfg->uracu_bdf.fun); 610 if (!d->uracu) 611 return -ENODEV; 612 613 if (I10NM_GET_SCK_BAR(d, reg)) { 614 i10nm_printk(KERN_ERR, "Failed to socket bar\n"); 615 return -ENODEV; 616 } 617 618 base = I10NM_GET_SCK_MMIO_BASE(reg); 619 edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", 620 j++, base, reg); 621 622 for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) { 623 mdev = get_ddr_munit(d, i, &off, &size); 624 625 if (i == 0 && !mdev) { 626 i10nm_printk(KERN_ERR, "No IMC found\n"); 627 return -ENODEV; 628 } 629 if (!mdev) 630 continue; 631 632 edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n", 633 i, base + off, size, reg); 634 635 mbase = ioremap(base + off, size); 636 if (!mbase) { 637 i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", 638 base + off); 639 return -ENODEV; 640 } 641 642 d->imc[lmc].mbase = mbase; 643 if (i10nm_imc_absent(&d->imc[lmc])) { 644 pci_dev_put(mdev); 645 iounmap(mbase); 646 d->imc[lmc].mbase = NULL; 647 edac_dbg(2, "Skip absent mc%d\n", i); 648 continue; 649 } else { 650 d->imc[lmc].mdev = mdev; 651 if (res_cfg->type == SPR) 652 skx_set_mc_mapping(d, i, lmc); 653 lmc++; 654 } 655 } 656 } 657 658 return 0; 659 } 660 661 static bool i10nm_check_hbm_imc(struct skx_dev *d) 662 { 663 u32 reg; 664 665 if (I10NM_GET_CAPID3_CFG(d, reg)) { 666 i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n"); 667 return false; 668 } 669 670 return I10NM_IS_HBM_PRESENT(reg) != 0; 671 } 672 673 static int i10nm_get_hbm_munits(void) 674 { 675 struct pci_dev *mdev; 676 void __iomem *mbase; 677 u32 reg, off, mcmtr; 678 struct skx_dev *d; 679 int i, lmc; 680 u64 base; 681 682 list_for_each_entry(d, i10nm_edac_list, list) { 683 if (!d->pcu_cr3) 684 return -ENODEV; 685 686 if (!i10nm_check_hbm_imc(d)) { 687 i10nm_printk(KERN_DEBUG, "No hbm memory\n"); 688 return -ENODEV; 689 } 690 691 if (I10NM_GET_SCK_BAR(d, reg)) { 692 i10nm_printk(KERN_ERR, "Failed to get socket bar\n"); 693 return -ENODEV; 694 } 695 base = I10NM_GET_SCK_MMIO_BASE(reg); 696 697 if (I10NM_GET_HBM_IMC_BAR(d, reg)) { 698 i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n"); 699 return -ENODEV; 700 } 701 base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); 702 703 lmc = res_cfg->ddr_imc_num; 704 705 for (i = 0; i < res_cfg->hbm_imc_num; i++) { 706 mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus], 707 res_cfg->hbm_mdev_bdf.dev + i / 4, 708 res_cfg->hbm_mdev_bdf.fun + i % 4); 709 710 if (i == 0 && !mdev) { 711 i10nm_printk(KERN_ERR, "No hbm mc found\n"); 712 return -ENODEV; 713 } 714 if (!mdev) 715 continue; 716 717 d->imc[lmc].mdev = mdev; 718 off = i * I10NM_HBM_IMC_MMIO_SIZE; 719 720 edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n", 721 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE); 722 723 mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); 724 if (!mbase) { 725 pci_dev_put(d->imc[lmc].mdev); 726 d->imc[lmc].mdev = NULL; 727 728 i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", 729 base + off); 730 return -ENOMEM; 731 } 732 733 d->imc[lmc].mbase = mbase; 734 d->imc[lmc].hbm_mc = true; 735 736 mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); 737 if (!I10NM_IS_HBM_IMC(mcmtr)) { 738 iounmap(d->imc[lmc].mbase); 739 d->imc[lmc].mbase = NULL; 740 d->imc[lmc].hbm_mc = false; 741 pci_dev_put(d->imc[lmc].mdev); 742 d->imc[lmc].mdev = NULL; 743 744 i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); 745 return -ENODEV; 746 } 747 748 lmc++; 749 } 750 } 751 752 return 0; 753 } 754 755 static struct res_config i10nm_cfg0 = { 756 .type = I10NM, 757 .decs_did = 0x3452, 758 .busno_cfg_offset = 0xcc, 759 .ddr_imc_num = 4, 760 .ddr_chan_num = 2, 761 .ddr_dimm_num = 2, 762 .ddr_chan_mmio_sz = 0x4000, 763 .sad_all_bdf = {1, 29, 0}, 764 .pcu_cr3_bdf = {1, 30, 3}, 765 .util_all_bdf = {1, 29, 1}, 766 .uracu_bdf = {0, 0, 1}, 767 .ddr_mdev_bdf = {0, 12, 0}, 768 .hbm_mdev_bdf = {0, 12, 1}, 769 .sad_all_offset = 0x108, 770 .reg_rrl_ddr[0] = &icx_reg_rrl_ddr, 771 }; 772 773 static struct res_config i10nm_cfg1 = { 774 .type = I10NM, 775 .decs_did = 0x3452, 776 .busno_cfg_offset = 0xd0, 777 .ddr_imc_num = 4, 778 .ddr_chan_num = 2, 779 .ddr_dimm_num = 2, 780 .ddr_chan_mmio_sz = 0x4000, 781 .sad_all_bdf = {1, 29, 0}, 782 .pcu_cr3_bdf = {1, 30, 3}, 783 .util_all_bdf = {1, 29, 1}, 784 .uracu_bdf = {0, 0, 1}, 785 .ddr_mdev_bdf = {0, 12, 0}, 786 .hbm_mdev_bdf = {0, 12, 1}, 787 .sad_all_offset = 0x108, 788 .reg_rrl_ddr[0] = &icx_reg_rrl_ddr, 789 }; 790 791 static struct res_config spr_cfg = { 792 .type = SPR, 793 .decs_did = 0x3252, 794 .busno_cfg_offset = 0xd0, 795 .ddr_imc_num = 4, 796 .ddr_chan_num = 2, 797 .ddr_dimm_num = 2, 798 .hbm_imc_num = 16, 799 .hbm_chan_num = 2, 800 .hbm_dimm_num = 1, 801 .ddr_chan_mmio_sz = 0x8000, 802 .hbm_chan_mmio_sz = 0x4000, 803 .support_ddr5 = true, 804 .sad_all_bdf = {1, 10, 0}, 805 .pcu_cr3_bdf = {1, 30, 3}, 806 .util_all_bdf = {1, 29, 1}, 807 .uracu_bdf = {0, 0, 1}, 808 .ddr_mdev_bdf = {0, 12, 0}, 809 .hbm_mdev_bdf = {0, 12, 1}, 810 .sad_all_offset = 0x300, 811 .reg_rrl_ddr[0] = &spr_reg_rrl_ddr, 812 .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, 813 .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, 814 }; 815 816 static struct res_config gnr_cfg = { 817 .type = GNR, 818 .decs_did = 0x3252, 819 .busno_cfg_offset = 0xd0, 820 .ddr_imc_num = 12, 821 .ddr_chan_num = 1, 822 .ddr_dimm_num = 2, 823 .ddr_chan_mmio_sz = 0x4000, 824 .support_ddr5 = true, 825 .sad_all_bdf = {0, 13, 0}, 826 .pcu_cr3_bdf = {0, 5, 0}, 827 .util_all_bdf = {0, 13, 1}, 828 .uracu_bdf = {0, 0, 1}, 829 .ddr_mdev_bdf = {0, 5, 1}, 830 .sad_all_offset = 0x300, 831 .reg_rrl_ddr[0] = &gnr_reg_rrl_ddr, 832 }; 833 834 static const struct x86_cpu_id i10nm_cpuids[] = { 835 X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MIN, 0x3, &i10nm_cfg0), 836 X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, 0x4, X86_STEP_MAX, &i10nm_cfg1), 837 X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, X86_STEP_MIN, 0x3, &i10nm_cfg0), 838 X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, 0x4, X86_STEP_MAX, &i10nm_cfg1), 839 X86_MATCH_VFM( INTEL_ICELAKE_D, &i10nm_cfg1), 840 841 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_cfg), 842 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_cfg), 843 X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_cfg), 844 X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_cfg), 845 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_cfg), 846 X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_cfg), 847 X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_cfg), 848 {} 849 }; 850 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); 851 852 static bool i10nm_check_ecc(struct skx_imc *imc, int chan) 853 { 854 u32 mcmtr; 855 856 mcmtr = I10NM_GET_MCMTR(imc, chan); 857 edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr); 858 859 return !!GET_BITFIELD(mcmtr, 2, 2); 860 } 861 862 static bool i10nm_channel_disabled(struct skx_imc *imc, int chan) 863 { 864 u32 mcmtr = I10NM_GET_MCMTR(imc, chan); 865 866 edac_dbg(1, "mc%d ch%d mcmtr reg %x\n", imc->mc, chan, mcmtr); 867 868 return (mcmtr == ~0 || GET_BITFIELD(mcmtr, 18, 18)); 869 } 870 871 static int i10nm_get_dimm_config(struct mem_ctl_info *mci, 872 struct res_config *cfg) 873 { 874 struct skx_pvt *pvt = mci->pvt_info; 875 struct skx_imc *imc = pvt->imc; 876 u32 mtr, mcddrtcfg = 0; 877 struct dimm_info *dimm; 878 int i, j, ndimms; 879 880 for (i = 0; i < imc->num_channels; i++) { 881 if (!imc->mbase) 882 continue; 883 884 if (i10nm_channel_disabled(imc, i)) { 885 edac_dbg(1, "mc%d ch%d is disabled.\n", imc->mc, i); 886 continue; 887 } 888 889 ndimms = 0; 890 891 if (res_cfg->type != GNR) 892 mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); 893 894 for (j = 0; j < imc->num_dimms; j++) { 895 dimm = edac_get_dimm(mci, i, j, 0); 896 mtr = I10NM_GET_DIMMMTR(imc, i, j); 897 edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", 898 mtr, mcddrtcfg, imc->mc, i, j); 899 900 if (IS_DIMM_PRESENT(mtr)) 901 ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, 902 imc, i, j, cfg); 903 else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) 904 ndimms += skx_get_nvdimm_info(dimm, imc, i, j, 905 EDAC_MOD_STR); 906 } 907 if (ndimms && !i10nm_check_ecc(imc, i)) { 908 i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n", 909 imc->mc, i); 910 return -ENODEV; 911 } 912 } 913 914 return 0; 915 } 916 917 static struct notifier_block i10nm_mce_dec = { 918 .notifier_call = skx_mce_check_error, 919 .priority = MCE_PRIO_EDAC, 920 }; 921 922 static int __init i10nm_init(void) 923 { 924 u8 mc = 0, src_id = 0; 925 const struct x86_cpu_id *id; 926 struct res_config *cfg; 927 const char *owner; 928 struct skx_dev *d; 929 int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; 930 u64 tolm, tohm; 931 int imc_num; 932 933 edac_dbg(2, "\n"); 934 935 if (ghes_get_devices()) 936 return -EBUSY; 937 938 owner = edac_get_owner(); 939 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 940 return -EBUSY; 941 942 if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) 943 return -ENODEV; 944 945 id = x86_match_cpu(i10nm_cpuids); 946 if (!id) 947 return -ENODEV; 948 949 cfg = (struct res_config *)id->driver_data; 950 skx_set_res_cfg(cfg); 951 res_cfg = cfg; 952 953 rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); 954 if (rc) 955 return rc; 956 957 rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list); 958 if (rc < 0) 959 goto fail; 960 if (rc == 0) { 961 i10nm_printk(KERN_ERR, "No memory controllers found\n"); 962 return -ENODEV; 963 } 964 965 rc = i10nm_get_imc_num(cfg); 966 if (rc < 0) 967 goto fail; 968 969 mem_cfg_2lm = i10nm_check_2lm(cfg); 970 skx_set_mem_cfg(mem_cfg_2lm); 971 972 rc = i10nm_get_ddr_munits(); 973 974 if (i10nm_get_hbm_munits() && rc) 975 goto fail; 976 977 imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num; 978 979 list_for_each_entry(d, i10nm_edac_list, list) { 980 rc = skx_get_src_id(d, 0xf8, &src_id); 981 if (rc < 0) 982 goto fail; 983 984 edac_dbg(2, "src_id = %d\n", src_id); 985 for (i = 0; i < imc_num; i++) { 986 if (!d->imc[i].mdev) 987 continue; 988 989 d->imc[i].mc = mc++; 990 d->imc[i].lmc = i; 991 d->imc[i].src_id = src_id; 992 if (d->imc[i].hbm_mc) { 993 d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; 994 d->imc[i].num_channels = cfg->hbm_chan_num; 995 d->imc[i].num_dimms = cfg->hbm_dimm_num; 996 } else { 997 d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; 998 d->imc[i].num_channels = cfg->ddr_chan_num; 999 d->imc[i].num_dimms = cfg->ddr_dimm_num; 1000 } 1001 1002 rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev, 1003 pci_name(d->imc[i].mdev), 1004 "Intel_10nm Socket", EDAC_MOD_STR, 1005 i10nm_get_dimm_config, cfg); 1006 if (rc < 0) 1007 goto fail; 1008 } 1009 } 1010 1011 rc = skx_adxl_get(); 1012 if (rc) { 1013 /* Decoding errors via MCA banks for 2LM isn't supported yet */ 1014 if (rc != -ENODEV || mem_cfg_2lm) 1015 goto fail; 1016 i10nm_printk(KERN_INFO, "ADXL not found, falling back to MCA-based decoding.\n"); 1017 no_adxl = true; 1018 decoding_via_mca = true; 1019 } 1020 1021 opstate_init(); 1022 mce_register_decode_chain(&i10nm_mce_dec); 1023 skx_setup_debug("i10nm_test"); 1024 1025 res_cfg->rrl_ctrl_mode = retry_rd_err_log; 1026 if (retry_rd_err_log && res_cfg->reg_rrl_ddr[0]) { 1027 skx_set_show_rrl(skx_show_rrl); 1028 if (retry_rd_err_log == RRL_CTRL_LINUX) 1029 skx_enable_rrl(true); 1030 } 1031 1032 skx_set_decode(i10nm_mc_decode); 1033 1034 i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); 1035 1036 return 0; 1037 fail: 1038 skx_remove(); 1039 return rc; 1040 } 1041 1042 static void __exit i10nm_exit(void) 1043 { 1044 edac_dbg(2, "\n"); 1045 1046 skx_set_decode(NULL); 1047 1048 if (retry_rd_err_log && res_cfg->reg_rrl_ddr[0]) { 1049 if (retry_rd_err_log == RRL_CTRL_LINUX) 1050 skx_enable_rrl(false); 1051 skx_set_show_rrl(NULL); 1052 } 1053 1054 skx_teardown_debug(); 1055 mce_unregister_decode_chain(&i10nm_mce_dec); 1056 if (!no_adxl) 1057 skx_adxl_put(); 1058 skx_remove(); 1059 } 1060 1061 module_init(i10nm_init); 1062 module_exit(i10nm_exit); 1063 1064 static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp) 1065 { 1066 unsigned long val; 1067 int ret; 1068 1069 ret = kstrtoul(buf, 0, &val); 1070 1071 if (ret || val > 1) 1072 return -EINVAL; 1073 1074 if (val && mem_cfg_2lm) { 1075 i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n"); 1076 return -EIO; 1077 } 1078 1079 ret = param_set_int(buf, kp); 1080 1081 return ret; 1082 } 1083 1084 static const struct kernel_param_ops decoding_via_mca_param_ops = { 1085 .set = set_decoding_via_mca, 1086 .get = param_get_int, 1087 }; 1088 1089 module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644); 1090 MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable"); 1091 1092 module_param(retry_rd_err_log, int, 0444); 1093 MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); 1094 1095 MODULE_LICENSE("GPL v2"); 1096 MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors"); 1097