1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel client SoC with integrated memory controller using IBECC 4 * 5 * Copyright (C) 2020 Intel Corporation 6 * 7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific 8 * regions of the physical memory space. It's used for memory controllers 9 * that don't support the out-of-band ECC which often needs an additional 10 * storage device to each channel for storing ECC data. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/slab.h> 17 #include <linux/irq_work.h> 18 #include <linux/llist.h> 19 #include <linux/genalloc.h> 20 #include <linux/edac.h> 21 #include <linux/bits.h> 22 #include <linux/io.h> 23 #include <asm/mach_traps.h> 24 #include <asm/nmi.h> 25 26 #include "edac_mc.h" 27 #include "edac_module.h" 28 29 #define IGEN6_REVISION "v2.4" 30 31 #define EDAC_MOD_STR "igen6_edac" 32 #define IGEN6_NMI_NAME "igen6_ibecc" 33 34 /* Debug macros */ 35 #define igen6_printk(level, fmt, arg...) \ 36 edac_printk(level, "igen6", fmt, ##arg) 37 38 #define igen6_mc_printk(mci, level, fmt, arg...) \ 39 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) 40 41 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 42 43 #define NUM_IMC 1 /* Max memory controllers */ 44 #define NUM_CHANNELS 2 /* Max channels */ 45 #define NUM_DIMMS 2 /* Max DIMMs per channel */ 46 47 #define _4GB BIT_ULL(32) 48 49 /* Size of physical memory */ 50 #define TOM_OFFSET 0xa0 51 /* Top of low usable DRAM */ 52 #define TOLUD_OFFSET 0xbc 53 /* Capability register C */ 54 #define CAPID_C_OFFSET 0xec 55 #define CAPID_C_IBECC BIT(15) 56 57 /* Error Status */ 58 #define ERRSTS_OFFSET 0xc8 59 #define ERRSTS_CE BIT_ULL(6) 60 #define ERRSTS_UE BIT_ULL(7) 61 62 /* Error Command */ 63 #define ERRCMD_OFFSET 0xca 64 #define ERRCMD_CE BIT_ULL(6) 65 #define ERRCMD_UE BIT_ULL(7) 66 67 /* IBECC MMIO base address */ 68 #define IBECC_BASE (res_cfg->ibecc_base) 69 #define IBECC_ACTIVATE_OFFSET IBECC_BASE 70 #define IBECC_ACTIVATE_EN BIT(0) 71 72 /* IBECC error log */ 73 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + 0x170) 74 #define ECC_ERROR_LOG_CE BIT_ULL(62) 75 #define ECC_ERROR_LOG_UE BIT_ULL(63) 76 #define ECC_ERROR_LOG_ADDR_SHIFT 5 77 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) 78 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) 79 80 /* Host MMIO base address */ 81 #define MCHBAR_OFFSET 0x48 82 #define MCHBAR_EN BIT_ULL(0) 83 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) 84 #define MCHBAR_SIZE 0x10000 85 86 /* Parameters for the channel decode stage */ 87 #define MAD_INTER_CHANNEL_OFFSET 0x5000 88 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) 89 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) 90 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) 91 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) 92 93 /* Parameters for DRAM decode stage */ 94 #define MAD_INTRA_CH0_OFFSET 0x5004 95 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) 96 97 /* DIMM characteristics */ 98 #define MAD_DIMM_CH0_OFFSET 0x500c 99 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) 100 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) 101 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) 102 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) 103 104 /* Hash for channel selection */ 105 #define CHANNEL_HASH_OFFSET 0X5024 106 /* Hash for enhanced channel selection */ 107 #define CHANNEL_EHASH_OFFSET 0X5028 108 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 109 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 110 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) 111 112 static struct res_config { 113 int num_imc; 114 u32 ibecc_base; 115 bool (*ibecc_available)(struct pci_dev *pdev); 116 /* Convert error address logged in IBECC to system physical address */ 117 u64 (*err_addr_to_sys_addr)(u64 eaddr); 118 /* Convert error address logged in IBECC to integrated memory controller address */ 119 u64 (*err_addr_to_imc_addr)(u64 eaddr); 120 } *res_cfg; 121 122 struct igen6_imc { 123 int mc; 124 struct mem_ctl_info *mci; 125 struct pci_dev *pdev; 126 struct device dev; 127 void __iomem *window; 128 u64 ch_s_size; 129 int ch_l_map; 130 u64 dimm_s_size[NUM_CHANNELS]; 131 u64 dimm_l_size[NUM_CHANNELS]; 132 int dimm_l_map[NUM_CHANNELS]; 133 }; 134 135 static struct igen6_pvt { 136 struct igen6_imc imc[NUM_IMC]; 137 } *igen6_pvt; 138 139 /* The top of low usable DRAM */ 140 static u32 igen6_tolud; 141 /* The size of physical memory */ 142 static u64 igen6_tom; 143 144 struct decoded_addr { 145 int mc; 146 u64 imc_addr; 147 u64 sys_addr; 148 int channel_idx; 149 u64 channel_addr; 150 int sub_channel_idx; 151 u64 sub_channel_addr; 152 }; 153 154 struct ecclog_node { 155 struct llist_node llnode; 156 int mc; 157 u64 ecclog; 158 }; 159 160 /* 161 * In the NMI handler, the driver uses the lock-less memory allocator 162 * to allocate memory to store the IBECC error logs and links the logs 163 * to the lock-less list. Delay printk() and the work of error reporting 164 * to EDAC core in a worker. 165 */ 166 #define ECCLOG_POOL_SIZE PAGE_SIZE 167 static LLIST_HEAD(ecclog_llist); 168 static struct gen_pool *ecclog_pool; 169 static char ecclog_buf[ECCLOG_POOL_SIZE]; 170 static struct irq_work ecclog_irq_work; 171 static struct work_struct ecclog_work; 172 173 /* Compute die IDs for Elkhart Lake with IBECC */ 174 #define DID_EHL_SKU5 0x4514 175 #define DID_EHL_SKU6 0x4528 176 #define DID_EHL_SKU7 0x452a 177 #define DID_EHL_SKU8 0x4516 178 #define DID_EHL_SKU9 0x452c 179 #define DID_EHL_SKU10 0x452e 180 #define DID_EHL_SKU11 0x4532 181 #define DID_EHL_SKU12 0x4518 182 #define DID_EHL_SKU13 0x451a 183 #define DID_EHL_SKU14 0x4534 184 #define DID_EHL_SKU15 0x4536 185 186 static bool ehl_ibecc_available(struct pci_dev *pdev) 187 { 188 u32 v; 189 190 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 191 return false; 192 193 return !!(CAPID_C_IBECC & v); 194 } 195 196 static u64 ehl_err_addr_to_sys_addr(u64 eaddr) 197 { 198 return eaddr; 199 } 200 201 static u64 ehl_err_addr_to_imc_addr(u64 eaddr) 202 { 203 if (eaddr < igen6_tolud) 204 return eaddr; 205 206 if (igen6_tom <= _4GB) 207 return eaddr + igen6_tolud - _4GB; 208 209 if (eaddr < _4GB) 210 return eaddr + igen6_tolud - igen6_tom; 211 212 return eaddr; 213 } 214 215 static struct res_config ehl_cfg = { 216 .num_imc = 1, 217 .ibecc_base = 0xdc00, 218 .ibecc_available = ehl_ibecc_available, 219 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 220 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 221 }; 222 223 static const struct pci_device_id igen6_pci_tbl[] = { 224 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, 225 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, 226 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, 227 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, 228 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, 229 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, 230 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, 231 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, 232 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, 233 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, 234 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, 235 { }, 236 }; 237 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); 238 239 static enum dev_type get_width(int dimm_l, u32 mad_dimm) 240 { 241 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : 242 MAD_DIMM_CH_DSW(mad_dimm); 243 244 switch (w) { 245 case 0: 246 return DEV_X8; 247 case 1: 248 return DEV_X16; 249 case 2: 250 return DEV_X32; 251 default: 252 return DEV_UNKNOWN; 253 } 254 } 255 256 static enum mem_type get_memory_type(u32 mad_inter) 257 { 258 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); 259 260 switch (t) { 261 case 0: 262 return MEM_DDR4; 263 case 1: 264 return MEM_DDR3; 265 case 2: 266 return MEM_LPDDR3; 267 case 3: 268 return MEM_LPDDR4; 269 case 4: 270 return MEM_WIO2; 271 default: 272 return MEM_UNKNOWN; 273 } 274 } 275 276 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) 277 { 278 u64 hash_addr = addr & mask, hash = 0; 279 u64 intlv = (addr >> intlv_bit) & 1; 280 int i; 281 282 for (i = 6; i < 20; i++) 283 hash ^= (hash_addr >> i) & 1; 284 285 return (int)hash ^ intlv; 286 } 287 288 static u64 decode_channel_addr(u64 addr, int intlv_bit) 289 { 290 u64 channel_addr; 291 292 /* Remove the interleave bit and shift upper part down to fill gap */ 293 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; 294 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); 295 296 return channel_addr; 297 } 298 299 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, 300 int *idx, u64 *sub_addr) 301 { 302 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; 303 304 if (addr > 2 * s_size) { 305 *sub_addr = addr - s_size; 306 *idx = l_map; 307 return; 308 } 309 310 if (CHANNEL_HASH_MODE(hash)) { 311 *sub_addr = decode_channel_addr(addr, intlv_bit); 312 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); 313 } else { 314 *sub_addr = decode_channel_addr(addr, 6); 315 *idx = GET_BITFIELD(addr, 6, 6); 316 } 317 } 318 319 static int igen6_decode(struct decoded_addr *res) 320 { 321 struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; 322 u64 addr = res->imc_addr, sub_addr, s_size; 323 int idx, l_map; 324 u32 hash; 325 326 if (addr >= igen6_tom) { 327 edac_dbg(0, "Address 0x%llx out of range\n", addr); 328 return -EINVAL; 329 } 330 331 /* Decode channel */ 332 hash = readl(imc->window + CHANNEL_HASH_OFFSET); 333 s_size = imc->ch_s_size; 334 l_map = imc->ch_l_map; 335 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); 336 res->channel_idx = idx; 337 res->channel_addr = sub_addr; 338 339 /* Decode sub-channel/DIMM */ 340 hash = readl(imc->window + CHANNEL_EHASH_OFFSET); 341 s_size = imc->dimm_s_size[idx]; 342 l_map = imc->dimm_l_map[idx]; 343 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); 344 res->sub_channel_idx = idx; 345 res->sub_channel_addr = sub_addr; 346 347 return 0; 348 } 349 350 static void igen6_output_error(struct decoded_addr *res, 351 struct mem_ctl_info *mci, u64 ecclog) 352 { 353 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? 354 HW_EVENT_ERR_UNCORRECTED : 355 HW_EVENT_ERR_CORRECTED; 356 357 edac_mc_handle_error(type, mci, 1, 358 res->sys_addr >> PAGE_SHIFT, 359 res->sys_addr & ~PAGE_MASK, 360 ECC_ERROR_LOG_SYND(ecclog), 361 res->channel_idx, res->sub_channel_idx, 362 -1, "", ""); 363 } 364 365 static struct gen_pool *ecclog_gen_pool_create(void) 366 { 367 struct gen_pool *pool; 368 369 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); 370 if (!pool) 371 return NULL; 372 373 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { 374 gen_pool_destroy(pool); 375 return NULL; 376 } 377 378 return pool; 379 } 380 381 static int ecclog_gen_pool_add(int mc, u64 ecclog) 382 { 383 struct ecclog_node *node; 384 385 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); 386 if (!node) 387 return -ENOMEM; 388 389 node->mc = mc; 390 node->ecclog = ecclog; 391 llist_add(&node->llnode, &ecclog_llist); 392 393 return 0; 394 } 395 396 /* 397 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI 398 * configuration space status register ERRSTS can indicate whether a 399 * correctable error or an uncorrectable error occurred. We only use the 400 * ECC_ERROR_LOG register to check error type, but need to clear both 401 * registers to enable future error events. 402 */ 403 static u64 ecclog_read_and_clear(struct igen6_imc *imc) 404 { 405 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); 406 407 if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { 408 /* Clear CE/UE bits by writing 1s */ 409 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); 410 return ecclog; 411 } 412 413 return 0; 414 } 415 416 static void errsts_clear(struct igen6_imc *imc) 417 { 418 u16 errsts; 419 420 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { 421 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); 422 return; 423 } 424 425 /* Clear CE/UE bits by writing 1s */ 426 if (errsts & (ERRSTS_CE | ERRSTS_UE)) 427 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); 428 } 429 430 static int errcmd_enable_error_reporting(bool enable) 431 { 432 struct igen6_imc *imc = &igen6_pvt->imc[0]; 433 u16 errcmd; 434 int rc; 435 436 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); 437 if (rc) 438 return rc; 439 440 if (enable) 441 errcmd |= ERRCMD_CE | ERRSTS_UE; 442 else 443 errcmd &= ~(ERRCMD_CE | ERRSTS_UE); 444 445 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); 446 if (rc) 447 return rc; 448 449 return 0; 450 } 451 452 static int ecclog_handler(void) 453 { 454 struct igen6_imc *imc; 455 int i, n = 0; 456 u64 ecclog; 457 458 for (i = 0; i < res_cfg->num_imc; i++) { 459 imc = &igen6_pvt->imc[i]; 460 461 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 462 463 ecclog = ecclog_read_and_clear(imc); 464 if (!ecclog) 465 continue; 466 467 if (!ecclog_gen_pool_add(i, ecclog)) 468 irq_work_queue(&ecclog_irq_work); 469 470 n++; 471 } 472 473 return n; 474 } 475 476 static void ecclog_work_cb(struct work_struct *work) 477 { 478 struct ecclog_node *node, *tmp; 479 struct mem_ctl_info *mci; 480 struct llist_node *head; 481 struct decoded_addr res; 482 u64 eaddr; 483 484 head = llist_del_all(&ecclog_llist); 485 if (!head) 486 return; 487 488 llist_for_each_entry_safe(node, tmp, head, llnode) { 489 memset(&res, 0, sizeof(res)); 490 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << 491 ECC_ERROR_LOG_ADDR_SHIFT; 492 res.mc = node->mc; 493 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr); 494 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr); 495 496 mci = igen6_pvt->imc[res.mc].mci; 497 498 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); 499 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); 500 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); 501 502 if (!igen6_decode(&res)) 503 igen6_output_error(&res, mci, node->ecclog); 504 505 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); 506 } 507 } 508 509 static void ecclog_irq_work_cb(struct irq_work *irq_work) 510 { 511 int i; 512 513 for (i = 0; i < res_cfg->num_imc; i++) 514 errsts_clear(&igen6_pvt->imc[i]); 515 516 if (!llist_empty(&ecclog_llist)) 517 schedule_work(&ecclog_work); 518 } 519 520 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) 521 { 522 unsigned char reason; 523 524 if (!ecclog_handler()) 525 return NMI_DONE; 526 527 /* 528 * Both In-Band ECC correctable error and uncorrectable error are 529 * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) 530 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to 531 * re-enable the SERR# NMI after NMI handling. So clear this bit here 532 * to re-enable SERR# NMI for receiving future In-Band ECC errors. 533 */ 534 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; 535 reason |= NMI_REASON_CLEAR_SERR; 536 outb(reason, NMI_REASON_PORT); 537 reason &= ~NMI_REASON_CLEAR_SERR; 538 outb(reason, NMI_REASON_PORT); 539 540 return NMI_HANDLED; 541 } 542 543 static bool igen6_check_ecc(struct igen6_imc *imc) 544 { 545 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); 546 547 return !!(activate & IBECC_ACTIVATE_EN); 548 } 549 550 static int igen6_get_dimm_config(struct mem_ctl_info *mci) 551 { 552 struct igen6_imc *imc = mci->pvt_info; 553 u32 mad_inter, mad_intra, mad_dimm; 554 int i, j, ndimms, mc = imc->mc; 555 struct dimm_info *dimm; 556 enum mem_type mtype; 557 enum dev_type dtype; 558 u64 dsize; 559 bool ecc; 560 561 edac_dbg(2, "\n"); 562 563 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); 564 mtype = get_memory_type(mad_inter); 565 ecc = igen6_check_ecc(imc); 566 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); 567 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); 568 569 for (i = 0; i < NUM_CHANNELS; i++) { 570 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); 571 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); 572 573 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); 574 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); 575 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); 576 ndimms = 0; 577 578 for (j = 0; j < NUM_DIMMS; j++) { 579 dimm = edac_get_dimm(mci, i, j, 0); 580 581 if (j ^ imc->dimm_l_map[i]) { 582 dtype = get_width(0, mad_dimm); 583 dsize = imc->dimm_s_size[i]; 584 } else { 585 dtype = get_width(1, mad_dimm); 586 dsize = imc->dimm_l_size[i]; 587 } 588 589 if (!dsize) 590 continue; 591 592 dimm->grain = 64; 593 dimm->mtype = mtype; 594 dimm->dtype = dtype; 595 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); 596 dimm->edac_mode = EDAC_SECDED; 597 snprintf(dimm->label, sizeof(dimm->label), 598 "MC#%d_Chan#%d_DIMM#%d", mc, i, j); 599 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", 600 mc, i, j, dsize >> 20, dimm->nr_pages); 601 602 ndimms++; 603 } 604 605 if (ndimms && !ecc) { 606 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); 607 return -ENODEV; 608 } 609 } 610 611 return 0; 612 } 613 614 #ifdef CONFIG_EDAC_DEBUG 615 /* Top of upper usable DRAM */ 616 static u64 igen6_touud; 617 #define TOUUD_OFFSET 0xa8 618 619 static void igen6_reg_dump(struct igen6_imc *imc) 620 { 621 int i; 622 623 edac_dbg(2, "CHANNEL_HASH : 0x%x\n", 624 readl(imc->window + CHANNEL_HASH_OFFSET)); 625 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", 626 readl(imc->window + CHANNEL_EHASH_OFFSET)); 627 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", 628 readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); 629 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", 630 readq(imc->window + ECC_ERROR_LOG_OFFSET)); 631 632 for (i = 0; i < NUM_CHANNELS; i++) { 633 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, 634 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); 635 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, 636 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); 637 } 638 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); 639 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); 640 edac_dbg(2, "TOM : 0x%llx", igen6_tom); 641 } 642 643 static struct dentry *igen6_test; 644 645 static int debugfs_u64_set(void *data, u64 val) 646 { 647 u64 ecclog; 648 649 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { 650 edac_dbg(0, "Address 0x%llx out of range\n", val); 651 return 0; 652 } 653 654 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 655 656 val >>= ECC_ERROR_LOG_ADDR_SHIFT; 657 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; 658 659 if (!ecclog_gen_pool_add(0, ecclog)) 660 irq_work_queue(&ecclog_irq_work); 661 662 return 0; 663 } 664 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 665 666 static void igen6_debug_setup(void) 667 { 668 igen6_test = edac_debugfs_create_dir("igen6_test"); 669 if (!igen6_test) 670 return; 671 672 if (!edac_debugfs_create_file("addr", 0200, igen6_test, 673 NULL, &fops_u64_wo)) { 674 debugfs_remove(igen6_test); 675 igen6_test = NULL; 676 } 677 } 678 679 static void igen6_debug_teardown(void) 680 { 681 debugfs_remove_recursive(igen6_test); 682 } 683 #else 684 static void igen6_reg_dump(struct igen6_imc *imc) {} 685 static void igen6_debug_setup(void) {} 686 static void igen6_debug_teardown(void) {} 687 #endif 688 689 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) 690 { 691 union { 692 u64 v; 693 struct { 694 u32 v_lo; 695 u32 v_hi; 696 }; 697 } u; 698 699 edac_dbg(2, "\n"); 700 701 if (!res_cfg->ibecc_available(pdev)) { 702 edac_dbg(2, "No In-Band ECC IP\n"); 703 goto fail; 704 } 705 706 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { 707 igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); 708 goto fail; 709 } 710 711 igen6_tolud &= GENMASK(31, 20); 712 713 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { 714 igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); 715 goto fail; 716 } 717 718 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { 719 igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); 720 goto fail; 721 } 722 723 igen6_tom = u.v & GENMASK_ULL(38, 20); 724 725 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { 726 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); 727 goto fail; 728 } 729 730 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { 731 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); 732 goto fail; 733 } 734 735 if (!(u.v & MCHBAR_EN)) { 736 igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); 737 goto fail; 738 } 739 740 *mchbar = MCHBAR_BASE(u.v); 741 742 #ifdef CONFIG_EDAC_DEBUG 743 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) 744 edac_dbg(2, "Failed to read lower TOUUD\n"); 745 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) 746 edac_dbg(2, "Failed to read upper TOUUD\n"); 747 else 748 igen6_touud = u.v & GENMASK_ULL(38, 20); 749 #endif 750 751 return 0; 752 fail: 753 return -ENODEV; 754 } 755 756 static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) 757 { 758 struct edac_mc_layer layers[2]; 759 struct mem_ctl_info *mci; 760 struct igen6_imc *imc; 761 void __iomem *window; 762 int rc; 763 764 edac_dbg(2, "\n"); 765 766 mchbar += mc * MCHBAR_SIZE; 767 window = ioremap(mchbar, MCHBAR_SIZE); 768 if (!window) { 769 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 770 return -ENODEV; 771 } 772 773 layers[0].type = EDAC_MC_LAYER_CHANNEL; 774 layers[0].size = NUM_CHANNELS; 775 layers[0].is_virt_csrow = false; 776 layers[1].type = EDAC_MC_LAYER_SLOT; 777 layers[1].size = NUM_DIMMS; 778 layers[1].is_virt_csrow = true; 779 780 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); 781 if (!mci) { 782 rc = -ENOMEM; 783 goto fail; 784 } 785 786 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); 787 if (!mci->ctl_name) { 788 rc = -ENOMEM; 789 goto fail2; 790 } 791 792 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; 793 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 794 mci->edac_cap = EDAC_FLAG_SECDED; 795 mci->mod_name = EDAC_MOD_STR; 796 mci->dev_name = pci_name(pdev); 797 mci->pvt_info = &igen6_pvt->imc[mc]; 798 799 imc = mci->pvt_info; 800 device_initialize(&imc->dev); 801 /* 802 * EDAC core uses mci->pdev(pointer of structure device) as 803 * memory controller ID. The client SoCs attach one or more 804 * memory controllers to single pci_dev (single pci_dev->dev 805 * can be for multiple memory controllers). 806 * 807 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev 808 * for the first memory controller and assign a unique imc->dev 809 * to mci->pdev for each non-first memory controller. 810 */ 811 mci->pdev = mc ? &imc->dev : &pdev->dev; 812 imc->mc = mc; 813 imc->pdev = pdev; 814 imc->window = window; 815 816 igen6_reg_dump(imc); 817 818 rc = igen6_get_dimm_config(mci); 819 if (rc) 820 goto fail3; 821 822 rc = edac_mc_add_mc(mci); 823 if (rc) { 824 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); 825 goto fail3; 826 } 827 828 imc->mci = mci; 829 return 0; 830 fail3: 831 kfree(mci->ctl_name); 832 fail2: 833 edac_mc_free(mci); 834 fail: 835 iounmap(window); 836 return rc; 837 } 838 839 static void igen6_unregister_mcis(void) 840 { 841 struct mem_ctl_info *mci; 842 struct igen6_imc *imc; 843 int i; 844 845 edac_dbg(2, "\n"); 846 847 for (i = 0; i < res_cfg->num_imc; i++) { 848 imc = &igen6_pvt->imc[i]; 849 mci = imc->mci; 850 if (!mci) 851 continue; 852 853 edac_mc_del_mc(mci->pdev); 854 kfree(mci->ctl_name); 855 edac_mc_free(mci); 856 iounmap(imc->window); 857 } 858 } 859 860 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 861 { 862 u64 mchbar; 863 int i, rc; 864 865 edac_dbg(2, "\n"); 866 867 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); 868 if (!igen6_pvt) 869 return -ENOMEM; 870 871 res_cfg = (struct res_config *)ent->driver_data; 872 873 rc = igen6_pci_setup(pdev, &mchbar); 874 if (rc) 875 goto fail; 876 877 for (i = 0; i < res_cfg->num_imc; i++) { 878 rc = igen6_register_mci(i, mchbar, pdev); 879 if (rc) 880 goto fail2; 881 } 882 883 ecclog_pool = ecclog_gen_pool_create(); 884 if (!ecclog_pool) { 885 rc = -ENOMEM; 886 goto fail2; 887 } 888 889 INIT_WORK(&ecclog_work, ecclog_work_cb); 890 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); 891 892 /* Check if any pending errors before registering the NMI handler */ 893 ecclog_handler(); 894 895 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, 896 0, IGEN6_NMI_NAME); 897 if (rc) { 898 igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); 899 goto fail3; 900 } 901 902 /* Enable error reporting */ 903 rc = errcmd_enable_error_reporting(true); 904 if (rc) { 905 igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); 906 goto fail4; 907 } 908 909 igen6_debug_setup(); 910 return 0; 911 fail4: 912 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 913 fail3: 914 gen_pool_destroy(ecclog_pool); 915 fail2: 916 igen6_unregister_mcis(); 917 fail: 918 kfree(igen6_pvt); 919 return rc; 920 } 921 922 static void igen6_remove(struct pci_dev *pdev) 923 { 924 edac_dbg(2, "\n"); 925 926 igen6_debug_teardown(); 927 errcmd_enable_error_reporting(false); 928 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 929 irq_work_sync(&ecclog_irq_work); 930 flush_work(&ecclog_work); 931 gen_pool_destroy(ecclog_pool); 932 igen6_unregister_mcis(); 933 kfree(igen6_pvt); 934 } 935 936 static struct pci_driver igen6_driver = { 937 .name = EDAC_MOD_STR, 938 .probe = igen6_probe, 939 .remove = igen6_remove, 940 .id_table = igen6_pci_tbl, 941 }; 942 943 static int __init igen6_init(void) 944 { 945 const char *owner; 946 int rc; 947 948 edac_dbg(2, "\n"); 949 950 owner = edac_get_owner(); 951 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 952 return -ENODEV; 953 954 edac_op_state = EDAC_OPSTATE_NMI; 955 956 rc = pci_register_driver(&igen6_driver); 957 if (rc) 958 return rc; 959 960 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); 961 962 return 0; 963 } 964 965 static void __exit igen6_exit(void) 966 { 967 edac_dbg(2, "\n"); 968 969 pci_unregister_driver(&igen6_driver); 970 } 971 972 module_init(igen6_init); 973 module_exit(igen6_exit); 974 975 MODULE_LICENSE("GPL v2"); 976 MODULE_AUTHOR("Qiuxu Zhuo"); 977 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); 978