1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel client SoC with integrated memory controller using IBECC 4 * 5 * Copyright (C) 2020 Intel Corporation 6 * 7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific 8 * regions of the physical memory space. It's used for memory controllers 9 * that don't support the out-of-band ECC which often needs an additional 10 * storage device to each channel for storing ECC data. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/slab.h> 17 #include <linux/irq_work.h> 18 #include <linux/llist.h> 19 #include <linux/genalloc.h> 20 #include <linux/edac.h> 21 #include <linux/bits.h> 22 #include <linux/io.h> 23 #include <asm/mach_traps.h> 24 #include <asm/nmi.h> 25 #include <asm/mce.h> 26 27 #include "edac_mc.h" 28 #include "edac_module.h" 29 30 #define IGEN6_REVISION "v2.5.1" 31 32 #define EDAC_MOD_STR "igen6_edac" 33 #define IGEN6_NMI_NAME "igen6_ibecc" 34 35 /* Debug macros */ 36 #define igen6_printk(level, fmt, arg...) \ 37 edac_printk(level, "igen6", fmt, ##arg) 38 39 #define igen6_mc_printk(mci, level, fmt, arg...) \ 40 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) 41 42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 43 44 #define NUM_IMC 2 /* Max memory controllers */ 45 #define NUM_CHANNELS 2 /* Max channels */ 46 #define NUM_DIMMS 2 /* Max DIMMs per channel */ 47 48 #define _4GB BIT_ULL(32) 49 50 /* Size of physical memory */ 51 #define TOM_OFFSET 0xa0 52 /* Top of low usable DRAM */ 53 #define TOLUD_OFFSET 0xbc 54 /* Capability register C */ 55 #define CAPID_C_OFFSET 0xec 56 #define CAPID_C_IBECC BIT(15) 57 58 /* Capability register E */ 59 #define CAPID_E_OFFSET 0xf0 60 #define CAPID_E_IBECC BIT(12) 61 #define CAPID_E_IBECC_BIT18 BIT(18) 62 63 /* Error Status */ 64 #define ERRSTS_OFFSET 0xc8 65 #define ERRSTS_CE BIT_ULL(6) 66 #define ERRSTS_UE BIT_ULL(7) 67 68 /* Error Command */ 69 #define ERRCMD_OFFSET 0xca 70 #define ERRCMD_CE BIT_ULL(6) 71 #define ERRCMD_UE BIT_ULL(7) 72 73 /* IBECC MMIO base address */ 74 #define IBECC_BASE (res_cfg->ibecc_base) 75 #define IBECC_ACTIVATE_OFFSET IBECC_BASE 76 #define IBECC_ACTIVATE_EN BIT(0) 77 78 /* IBECC error log */ 79 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) 80 #define ECC_ERROR_LOG_CE BIT_ULL(62) 81 #define ECC_ERROR_LOG_UE BIT_ULL(63) 82 #define ECC_ERROR_LOG_ADDR_SHIFT 5 83 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) 84 #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) 85 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) 86 87 /* Host MMIO base address */ 88 #define MCHBAR_OFFSET 0x48 89 #define MCHBAR_EN BIT_ULL(0) 90 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) 91 #define MCHBAR_SIZE 0x10000 92 93 /* Parameters for the channel decode stage */ 94 #define IMC_BASE (res_cfg->imc_base) 95 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE 96 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) 97 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) 98 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) 99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) 100 101 /* Parameters for DRAM decode stage */ 102 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) 103 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) 104 105 /* DIMM characteristics */ 106 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) 107 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) 108 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) 109 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) 110 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) 111 112 /* Hash for memory controller selection */ 113 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) 114 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) 115 116 /* Hash for channel selection */ 117 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) 118 /* Hash for enhanced channel selection */ 119 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) 120 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 121 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 122 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) 123 124 /* Parameters for memory slice decode stage */ 125 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 127 128 static struct res_config { 129 bool machine_check; 130 int num_imc; 131 u32 imc_base; 132 u32 cmf_base; 133 u32 cmf_size; 134 u32 ms_hash_offset; 135 u32 ibecc_base; 136 u32 ibecc_error_log_offset; 137 bool (*ibecc_available)(struct pci_dev *pdev); 138 /* Extract error address logged in IBECC */ 139 u64 (*err_addr)(u64 ecclog); 140 /* Convert error address logged in IBECC to system physical address */ 141 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); 142 /* Convert error address logged in IBECC to integrated memory controller address */ 143 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); 144 } *res_cfg; 145 146 struct igen6_imc { 147 int mc; 148 struct mem_ctl_info *mci; 149 struct pci_dev *pdev; 150 struct device dev; 151 void __iomem *window; 152 u64 size; 153 u64 ch_s_size; 154 int ch_l_map; 155 u64 dimm_s_size[NUM_CHANNELS]; 156 u64 dimm_l_size[NUM_CHANNELS]; 157 int dimm_l_map[NUM_CHANNELS]; 158 }; 159 160 static struct igen6_pvt { 161 struct igen6_imc imc[NUM_IMC]; 162 u64 ms_hash; 163 u64 ms_s_size; 164 int ms_l_map; 165 } *igen6_pvt; 166 167 /* The top of low usable DRAM */ 168 static u32 igen6_tolud; 169 /* The size of physical memory */ 170 static u64 igen6_tom; 171 172 struct decoded_addr { 173 int mc; 174 u64 imc_addr; 175 u64 sys_addr; 176 int channel_idx; 177 u64 channel_addr; 178 int sub_channel_idx; 179 u64 sub_channel_addr; 180 }; 181 182 struct ecclog_node { 183 struct llist_node llnode; 184 int mc; 185 u64 ecclog; 186 }; 187 188 /* 189 * In the NMI handler, the driver uses the lock-less memory allocator 190 * to allocate memory to store the IBECC error logs and links the logs 191 * to the lock-less list. Delay printk() and the work of error reporting 192 * to EDAC core in a worker. 193 */ 194 #define ECCLOG_POOL_SIZE PAGE_SIZE 195 static LLIST_HEAD(ecclog_llist); 196 static struct gen_pool *ecclog_pool; 197 static char ecclog_buf[ECCLOG_POOL_SIZE]; 198 static struct irq_work ecclog_irq_work; 199 static struct work_struct ecclog_work; 200 201 /* Compute die IDs for Elkhart Lake with IBECC */ 202 #define DID_EHL_SKU5 0x4514 203 #define DID_EHL_SKU6 0x4528 204 #define DID_EHL_SKU7 0x452a 205 #define DID_EHL_SKU8 0x4516 206 #define DID_EHL_SKU9 0x452c 207 #define DID_EHL_SKU10 0x452e 208 #define DID_EHL_SKU11 0x4532 209 #define DID_EHL_SKU12 0x4518 210 #define DID_EHL_SKU13 0x451a 211 #define DID_EHL_SKU14 0x4534 212 #define DID_EHL_SKU15 0x4536 213 214 /* Compute die IDs for ICL-NNPI with IBECC */ 215 #define DID_ICL_SKU8 0x4581 216 #define DID_ICL_SKU10 0x4585 217 #define DID_ICL_SKU11 0x4589 218 #define DID_ICL_SKU12 0x458d 219 220 /* Compute die IDs for Tiger Lake with IBECC */ 221 #define DID_TGL_SKU 0x9a14 222 223 /* Compute die IDs for Alder Lake with IBECC */ 224 #define DID_ADL_SKU1 0x4601 225 #define DID_ADL_SKU2 0x4602 226 #define DID_ADL_SKU3 0x4621 227 #define DID_ADL_SKU4 0x4641 228 229 /* Compute die IDs for Alder Lake-N with IBECC */ 230 #define DID_ADL_N_SKU1 0x4614 231 #define DID_ADL_N_SKU2 0x4617 232 #define DID_ADL_N_SKU3 0x461b 233 #define DID_ADL_N_SKU4 0x461c 234 #define DID_ADL_N_SKU5 0x4673 235 #define DID_ADL_N_SKU6 0x4674 236 #define DID_ADL_N_SKU7 0x4675 237 #define DID_ADL_N_SKU8 0x4677 238 #define DID_ADL_N_SKU9 0x4678 239 #define DID_ADL_N_SKU10 0x4679 240 #define DID_ADL_N_SKU11 0x467c 241 #define DID_ADL_N_SKU12 0x4632 242 243 /* Compute die IDs for Raptor Lake-P with IBECC */ 244 #define DID_RPL_P_SKU1 0xa706 245 #define DID_RPL_P_SKU2 0xa707 246 #define DID_RPL_P_SKU3 0xa708 247 #define DID_RPL_P_SKU4 0xa716 248 #define DID_RPL_P_SKU5 0xa718 249 250 /* Compute die IDs for Meteor Lake-PS with IBECC */ 251 #define DID_MTL_PS_SKU1 0x7d21 252 #define DID_MTL_PS_SKU2 0x7d22 253 #define DID_MTL_PS_SKU3 0x7d23 254 #define DID_MTL_PS_SKU4 0x7d24 255 256 /* Compute die IDs for Meteor Lake-P with IBECC */ 257 #define DID_MTL_P_SKU1 0x7d01 258 #define DID_MTL_P_SKU2 0x7d02 259 #define DID_MTL_P_SKU3 0x7d14 260 261 /* Compute die IDs for Arrow Lake-UH with IBECC */ 262 #define DID_ARL_UH_SKU1 0x7d06 263 #define DID_ARL_UH_SKU2 0x7d20 264 #define DID_ARL_UH_SKU3 0x7d30 265 266 /* Compute die IDs for Panther Lake-H with IBECC */ 267 #define DID_PTL_H_SKU1 0xb000 268 #define DID_PTL_H_SKU2 0xb001 269 #define DID_PTL_H_SKU3 0xb002 270 271 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) 272 { 273 union { 274 u64 v; 275 struct { 276 u32 v_lo; 277 u32 v_hi; 278 }; 279 } u; 280 281 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { 282 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); 283 return -ENODEV; 284 } 285 286 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { 287 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); 288 return -ENODEV; 289 } 290 291 if (!(u.v & MCHBAR_EN)) { 292 igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); 293 return -ENODEV; 294 } 295 296 *mchbar = MCHBAR_BASE(u.v); 297 298 return 0; 299 } 300 301 static bool ehl_ibecc_available(struct pci_dev *pdev) 302 { 303 u32 v; 304 305 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 306 return false; 307 308 return !!(CAPID_C_IBECC & v); 309 } 310 311 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) 312 { 313 return eaddr; 314 } 315 316 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) 317 { 318 if (eaddr < igen6_tolud) 319 return eaddr; 320 321 if (igen6_tom <= _4GB) 322 return eaddr + igen6_tolud - _4GB; 323 324 if (eaddr >= igen6_tom) 325 return eaddr + igen6_tolud - igen6_tom; 326 327 return eaddr; 328 } 329 330 static bool icl_ibecc_available(struct pci_dev *pdev) 331 { 332 u32 v; 333 334 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 335 return false; 336 337 return !(CAPID_C_IBECC & v) && 338 (boot_cpu_data.x86_stepping >= 1); 339 } 340 341 static bool tgl_ibecc_available(struct pci_dev *pdev) 342 { 343 u32 v; 344 345 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 346 return false; 347 348 return !(CAPID_E_IBECC & v); 349 } 350 351 static bool mtl_p_ibecc_available(struct pci_dev *pdev) 352 { 353 u32 v; 354 355 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 356 return false; 357 358 return !(CAPID_E_IBECC_BIT18 & v); 359 } 360 361 static bool mtl_ps_ibecc_available(struct pci_dev *pdev) 362 { 363 #define MCHBAR_MEMSS_IBECCDIS 0x13c00 364 void __iomem *window; 365 u64 mchbar; 366 u32 val; 367 368 if (get_mchbar(pdev, &mchbar)) 369 return false; 370 371 window = ioremap(mchbar, MCHBAR_SIZE * 2); 372 if (!window) { 373 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 374 return false; 375 } 376 377 val = readl(window + MCHBAR_MEMSS_IBECCDIS); 378 iounmap(window); 379 380 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ 381 return !GET_BITFIELD(val, 6, 6); 382 } 383 384 static u64 mem_addr_to_sys_addr(u64 maddr) 385 { 386 if (maddr < igen6_tolud) 387 return maddr; 388 389 if (igen6_tom <= _4GB) 390 return maddr - igen6_tolud + _4GB; 391 392 if (maddr < _4GB) 393 return maddr - igen6_tolud + igen6_tom; 394 395 return maddr; 396 } 397 398 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) 399 { 400 u64 hash_addr = addr & mask, hash = hash_init; 401 u64 intlv = (addr >> intlv_bit) & 1; 402 int i; 403 404 for (i = 6; i < 20; i++) 405 hash ^= (hash_addr >> i) & 1; 406 407 return hash ^ intlv; 408 } 409 410 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) 411 { 412 u64 maddr, hash, mask, ms_s_size; 413 int intlv_bit; 414 u32 ms_hash; 415 416 ms_s_size = igen6_pvt->ms_s_size; 417 if (eaddr >= ms_s_size) 418 return eaddr + ms_s_size; 419 420 ms_hash = igen6_pvt->ms_hash; 421 422 mask = MEM_SLICE_HASH_MASK(ms_hash); 423 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; 424 425 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | 426 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 427 428 hash = mem_slice_hash(maddr, mask, mc, intlv_bit); 429 430 return maddr | (hash << intlv_bit); 431 } 432 433 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) 434 { 435 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); 436 437 return mem_addr_to_sys_addr(maddr); 438 } 439 440 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) 441 { 442 return eaddr; 443 } 444 445 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) 446 { 447 return mem_addr_to_sys_addr(eaddr); 448 } 449 450 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) 451 { 452 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; 453 struct igen6_imc *imc = &igen6_pvt->imc[mc]; 454 int intlv_bit; 455 u32 mc_hash; 456 457 if (eaddr >= 2 * ms_s_size) 458 return eaddr - ms_s_size; 459 460 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); 461 462 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; 463 464 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | 465 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 466 467 return imc_addr; 468 } 469 470 static u64 rpl_p_err_addr(u64 ecclog) 471 { 472 return ECC_ERROR_LOG_ADDR45(ecclog); 473 } 474 475 static struct res_config ehl_cfg = { 476 .num_imc = 1, 477 .imc_base = 0x5000, 478 .ibecc_base = 0xdc00, 479 .ibecc_available = ehl_ibecc_available, 480 .ibecc_error_log_offset = 0x170, 481 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 482 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 483 }; 484 485 static struct res_config icl_cfg = { 486 .num_imc = 1, 487 .imc_base = 0x5000, 488 .ibecc_base = 0xd800, 489 .ibecc_error_log_offset = 0x170, 490 .ibecc_available = icl_ibecc_available, 491 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 492 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 493 }; 494 495 static struct res_config tgl_cfg = { 496 .machine_check = true, 497 .num_imc = 2, 498 .imc_base = 0x5000, 499 .cmf_base = 0x11000, 500 .cmf_size = 0x800, 501 .ms_hash_offset = 0xac, 502 .ibecc_base = 0xd400, 503 .ibecc_error_log_offset = 0x170, 504 .ibecc_available = tgl_ibecc_available, 505 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, 506 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, 507 }; 508 509 static struct res_config adl_cfg = { 510 .machine_check = true, 511 .num_imc = 2, 512 .imc_base = 0xd800, 513 .ibecc_base = 0xd400, 514 .ibecc_error_log_offset = 0x68, 515 .ibecc_available = tgl_ibecc_available, 516 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 517 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 518 }; 519 520 static struct res_config adl_n_cfg = { 521 .machine_check = true, 522 .num_imc = 1, 523 .imc_base = 0xd800, 524 .ibecc_base = 0xd400, 525 .ibecc_error_log_offset = 0x68, 526 .ibecc_available = tgl_ibecc_available, 527 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 528 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 529 }; 530 531 static struct res_config rpl_p_cfg = { 532 .machine_check = true, 533 .num_imc = 2, 534 .imc_base = 0xd800, 535 .ibecc_base = 0xd400, 536 .ibecc_error_log_offset = 0x68, 537 .ibecc_available = tgl_ibecc_available, 538 .err_addr = rpl_p_err_addr, 539 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 540 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 541 }; 542 543 static struct res_config mtl_ps_cfg = { 544 .machine_check = true, 545 .num_imc = 2, 546 .imc_base = 0xd800, 547 .ibecc_base = 0xd400, 548 .ibecc_error_log_offset = 0x170, 549 .ibecc_available = mtl_ps_ibecc_available, 550 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 551 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 552 }; 553 554 static struct res_config mtl_p_cfg = { 555 .machine_check = true, 556 .num_imc = 2, 557 .imc_base = 0xd800, 558 .ibecc_base = 0xd400, 559 .ibecc_error_log_offset = 0x170, 560 .ibecc_available = mtl_p_ibecc_available, 561 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 562 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 563 }; 564 565 static const struct pci_device_id igen6_pci_tbl[] = { 566 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, 567 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, 568 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, 569 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, 570 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, 571 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, 572 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, 573 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, 574 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, 575 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, 576 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, 577 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, 578 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, 579 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, 580 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, 581 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, 582 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, 583 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, 584 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, 585 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, 586 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, 587 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, 588 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, 589 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, 590 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, 591 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, 592 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, 593 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, 594 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, 595 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, 596 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, 597 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, 598 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, 599 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, 600 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, 601 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, 602 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, 603 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, 604 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, 605 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, 606 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, 607 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 608 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 609 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 610 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 611 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 612 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 613 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 614 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 615 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 616 { }, 617 }; 618 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); 619 620 static enum dev_type get_width(int dimm_l, u32 mad_dimm) 621 { 622 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : 623 MAD_DIMM_CH_DSW(mad_dimm); 624 625 switch (w) { 626 case 0: 627 return DEV_X8; 628 case 1: 629 return DEV_X16; 630 case 2: 631 return DEV_X32; 632 default: 633 return DEV_UNKNOWN; 634 } 635 } 636 637 static enum mem_type get_memory_type(u32 mad_inter) 638 { 639 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); 640 641 switch (t) { 642 case 0: 643 return MEM_DDR4; 644 case 1: 645 return MEM_DDR3; 646 case 2: 647 return MEM_LPDDR3; 648 case 3: 649 return MEM_LPDDR4; 650 case 4: 651 return MEM_WIO2; 652 default: 653 return MEM_UNKNOWN; 654 } 655 } 656 657 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) 658 { 659 u64 hash_addr = addr & mask, hash = 0; 660 u64 intlv = (addr >> intlv_bit) & 1; 661 int i; 662 663 for (i = 6; i < 20; i++) 664 hash ^= (hash_addr >> i) & 1; 665 666 return (int)hash ^ intlv; 667 } 668 669 static u64 decode_channel_addr(u64 addr, int intlv_bit) 670 { 671 u64 channel_addr; 672 673 /* Remove the interleave bit and shift upper part down to fill gap */ 674 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; 675 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); 676 677 return channel_addr; 678 } 679 680 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, 681 int *idx, u64 *sub_addr) 682 { 683 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; 684 685 if (addr > 2 * s_size) { 686 *sub_addr = addr - s_size; 687 *idx = l_map; 688 return; 689 } 690 691 if (CHANNEL_HASH_MODE(hash)) { 692 *sub_addr = decode_channel_addr(addr, intlv_bit); 693 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); 694 } else { 695 *sub_addr = decode_channel_addr(addr, 6); 696 *idx = GET_BITFIELD(addr, 6, 6); 697 } 698 } 699 700 static int igen6_decode(struct decoded_addr *res) 701 { 702 struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; 703 u64 addr = res->imc_addr, sub_addr, s_size; 704 int idx, l_map; 705 u32 hash; 706 707 if (addr >= igen6_tom) { 708 edac_dbg(0, "Address 0x%llx out of range\n", addr); 709 return -EINVAL; 710 } 711 712 /* Decode channel */ 713 hash = readl(imc->window + CHANNEL_HASH_OFFSET); 714 s_size = imc->ch_s_size; 715 l_map = imc->ch_l_map; 716 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); 717 res->channel_idx = idx; 718 res->channel_addr = sub_addr; 719 720 /* Decode sub-channel/DIMM */ 721 hash = readl(imc->window + CHANNEL_EHASH_OFFSET); 722 s_size = imc->dimm_s_size[idx]; 723 l_map = imc->dimm_l_map[idx]; 724 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); 725 res->sub_channel_idx = idx; 726 res->sub_channel_addr = sub_addr; 727 728 return 0; 729 } 730 731 static void igen6_output_error(struct decoded_addr *res, 732 struct mem_ctl_info *mci, u64 ecclog) 733 { 734 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? 735 HW_EVENT_ERR_UNCORRECTED : 736 HW_EVENT_ERR_CORRECTED; 737 738 edac_mc_handle_error(type, mci, 1, 739 res->sys_addr >> PAGE_SHIFT, 740 res->sys_addr & ~PAGE_MASK, 741 ECC_ERROR_LOG_SYND(ecclog), 742 res->channel_idx, res->sub_channel_idx, 743 -1, "", ""); 744 } 745 746 static struct gen_pool *ecclog_gen_pool_create(void) 747 { 748 struct gen_pool *pool; 749 750 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); 751 if (!pool) 752 return NULL; 753 754 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { 755 gen_pool_destroy(pool); 756 return NULL; 757 } 758 759 return pool; 760 } 761 762 static int ecclog_gen_pool_add(int mc, u64 ecclog) 763 { 764 struct ecclog_node *node; 765 766 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); 767 if (!node) 768 return -ENOMEM; 769 770 node->mc = mc; 771 node->ecclog = ecclog; 772 llist_add(&node->llnode, &ecclog_llist); 773 774 return 0; 775 } 776 777 /* 778 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI 779 * configuration space status register ERRSTS can indicate whether a 780 * correctable error or an uncorrectable error occurred. We only use the 781 * ECC_ERROR_LOG register to check error type, but need to clear both 782 * registers to enable future error events. 783 */ 784 static u64 ecclog_read_and_clear(struct igen6_imc *imc) 785 { 786 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); 787 788 if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { 789 /* Clear CE/UE bits by writing 1s */ 790 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); 791 return ecclog; 792 } 793 794 return 0; 795 } 796 797 static void errsts_clear(struct igen6_imc *imc) 798 { 799 u16 errsts; 800 801 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { 802 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); 803 return; 804 } 805 806 /* Clear CE/UE bits by writing 1s */ 807 if (errsts & (ERRSTS_CE | ERRSTS_UE)) 808 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); 809 } 810 811 static int errcmd_enable_error_reporting(bool enable) 812 { 813 struct igen6_imc *imc = &igen6_pvt->imc[0]; 814 u16 errcmd; 815 int rc; 816 817 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); 818 if (rc) 819 return pcibios_err_to_errno(rc); 820 821 if (enable) 822 errcmd |= ERRCMD_CE | ERRSTS_UE; 823 else 824 errcmd &= ~(ERRCMD_CE | ERRSTS_UE); 825 826 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); 827 if (rc) 828 return pcibios_err_to_errno(rc); 829 830 return 0; 831 } 832 833 static int ecclog_handler(void) 834 { 835 struct igen6_imc *imc; 836 int i, n = 0; 837 u64 ecclog; 838 839 for (i = 0; i < res_cfg->num_imc; i++) { 840 imc = &igen6_pvt->imc[i]; 841 842 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 843 844 ecclog = ecclog_read_and_clear(imc); 845 if (!ecclog) 846 continue; 847 848 if (!ecclog_gen_pool_add(i, ecclog)) 849 irq_work_queue(&ecclog_irq_work); 850 851 n++; 852 } 853 854 return n; 855 } 856 857 static void ecclog_work_cb(struct work_struct *work) 858 { 859 struct ecclog_node *node, *tmp; 860 struct mem_ctl_info *mci; 861 struct llist_node *head; 862 struct decoded_addr res; 863 u64 eaddr; 864 865 head = llist_del_all(&ecclog_llist); 866 if (!head) 867 return; 868 869 llist_for_each_entry_safe(node, tmp, head, llnode) { 870 memset(&res, 0, sizeof(res)); 871 if (res_cfg->err_addr) 872 eaddr = res_cfg->err_addr(node->ecclog); 873 else 874 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << 875 ECC_ERROR_LOG_ADDR_SHIFT; 876 res.mc = node->mc; 877 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); 878 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); 879 880 mci = igen6_pvt->imc[res.mc].mci; 881 882 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); 883 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); 884 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); 885 886 if (!igen6_decode(&res)) 887 igen6_output_error(&res, mci, node->ecclog); 888 889 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); 890 } 891 } 892 893 static void ecclog_irq_work_cb(struct irq_work *irq_work) 894 { 895 int i; 896 897 for (i = 0; i < res_cfg->num_imc; i++) 898 errsts_clear(&igen6_pvt->imc[i]); 899 900 if (!llist_empty(&ecclog_llist)) 901 schedule_work(&ecclog_work); 902 } 903 904 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) 905 { 906 unsigned char reason; 907 908 if (!ecclog_handler()) 909 return NMI_DONE; 910 911 /* 912 * Both In-Band ECC correctable error and uncorrectable error are 913 * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) 914 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to 915 * re-enable the SERR# NMI after NMI handling. So clear this bit here 916 * to re-enable SERR# NMI for receiving future In-Band ECC errors. 917 */ 918 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; 919 reason |= NMI_REASON_CLEAR_SERR; 920 outb(reason, NMI_REASON_PORT); 921 reason &= ~NMI_REASON_CLEAR_SERR; 922 outb(reason, NMI_REASON_PORT); 923 924 return NMI_HANDLED; 925 } 926 927 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, 928 void *data) 929 { 930 struct mce *mce = (struct mce *)data; 931 char *type; 932 933 if (mce->kflags & MCE_HANDLED_CEC) 934 return NOTIFY_DONE; 935 936 /* 937 * Ignore unless this is a memory related error. 938 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, 939 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). 940 */ 941 if ((mce->status & 0xefff) >> 7 != 1) 942 return NOTIFY_DONE; 943 944 if (mce->mcgstatus & MCG_STATUS_MCIP) 945 type = "Exception"; 946 else 947 type = "Event"; 948 949 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", 950 mce->extcpu, type, mce->mcgstatus, 951 mce->bank, mce->status); 952 edac_dbg(0, "TSC 0x%llx\n", mce->tsc); 953 edac_dbg(0, "ADDR 0x%llx\n", mce->addr); 954 edac_dbg(0, "MISC 0x%llx\n", mce->misc); 955 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", 956 mce->cpuvendor, mce->cpuid, mce->time, 957 mce->socketid, mce->apicid); 958 /* 959 * We just use the Machine Check for the memory error notification. 960 * Each memory controller is associated with an IBECC instance. 961 * Directly read and clear the error information(error address and 962 * error type) on all the IBECC instances so that we know on which 963 * memory controller the memory error(s) occurred. 964 */ 965 if (!ecclog_handler()) 966 return NOTIFY_DONE; 967 968 mce->kflags |= MCE_HANDLED_EDAC; 969 970 return NOTIFY_DONE; 971 } 972 973 static struct notifier_block ecclog_mce_dec = { 974 .notifier_call = ecclog_mce_handler, 975 .priority = MCE_PRIO_EDAC, 976 }; 977 978 static bool igen6_check_ecc(struct igen6_imc *imc) 979 { 980 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); 981 982 return !!(activate & IBECC_ACTIVATE_EN); 983 } 984 985 static int igen6_get_dimm_config(struct mem_ctl_info *mci) 986 { 987 struct igen6_imc *imc = mci->pvt_info; 988 u32 mad_inter, mad_intra, mad_dimm; 989 int i, j, ndimms, mc = imc->mc; 990 struct dimm_info *dimm; 991 enum mem_type mtype; 992 enum dev_type dtype; 993 u64 dsize; 994 bool ecc; 995 996 edac_dbg(2, "\n"); 997 998 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); 999 mtype = get_memory_type(mad_inter); 1000 ecc = igen6_check_ecc(imc); 1001 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); 1002 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); 1003 1004 for (i = 0; i < NUM_CHANNELS; i++) { 1005 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); 1006 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); 1007 1008 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); 1009 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); 1010 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); 1011 imc->size += imc->dimm_s_size[i]; 1012 imc->size += imc->dimm_l_size[i]; 1013 ndimms = 0; 1014 1015 for (j = 0; j < NUM_DIMMS; j++) { 1016 dimm = edac_get_dimm(mci, i, j, 0); 1017 1018 if (j ^ imc->dimm_l_map[i]) { 1019 dtype = get_width(0, mad_dimm); 1020 dsize = imc->dimm_s_size[i]; 1021 } else { 1022 dtype = get_width(1, mad_dimm); 1023 dsize = imc->dimm_l_size[i]; 1024 } 1025 1026 if (!dsize) 1027 continue; 1028 1029 dimm->grain = 64; 1030 dimm->mtype = mtype; 1031 dimm->dtype = dtype; 1032 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); 1033 dimm->edac_mode = EDAC_SECDED; 1034 snprintf(dimm->label, sizeof(dimm->label), 1035 "MC#%d_Chan#%d_DIMM#%d", mc, i, j); 1036 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", 1037 mc, i, j, dsize >> 20, dimm->nr_pages); 1038 1039 ndimms++; 1040 } 1041 1042 if (ndimms && !ecc) { 1043 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); 1044 return -ENODEV; 1045 } 1046 } 1047 1048 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); 1049 1050 return 0; 1051 } 1052 1053 #ifdef CONFIG_EDAC_DEBUG 1054 /* Top of upper usable DRAM */ 1055 static u64 igen6_touud; 1056 #define TOUUD_OFFSET 0xa8 1057 1058 static void igen6_reg_dump(struct igen6_imc *imc) 1059 { 1060 int i; 1061 1062 edac_dbg(2, "CHANNEL_HASH : 0x%x\n", 1063 readl(imc->window + CHANNEL_HASH_OFFSET)); 1064 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", 1065 readl(imc->window + CHANNEL_EHASH_OFFSET)); 1066 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", 1067 readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); 1068 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", 1069 readq(imc->window + ECC_ERROR_LOG_OFFSET)); 1070 1071 for (i = 0; i < NUM_CHANNELS; i++) { 1072 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, 1073 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); 1074 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, 1075 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); 1076 } 1077 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); 1078 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); 1079 edac_dbg(2, "TOM : 0x%llx", igen6_tom); 1080 } 1081 1082 static struct dentry *igen6_test; 1083 1084 static int debugfs_u64_set(void *data, u64 val) 1085 { 1086 u64 ecclog; 1087 1088 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { 1089 edac_dbg(0, "Address 0x%llx out of range\n", val); 1090 return 0; 1091 } 1092 1093 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 1094 1095 val >>= ECC_ERROR_LOG_ADDR_SHIFT; 1096 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; 1097 1098 if (!ecclog_gen_pool_add(0, ecclog)) 1099 irq_work_queue(&ecclog_irq_work); 1100 1101 return 0; 1102 } 1103 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 1104 1105 static void igen6_debug_setup(void) 1106 { 1107 igen6_test = edac_debugfs_create_dir("igen6_test"); 1108 if (!igen6_test) 1109 return; 1110 1111 if (!edac_debugfs_create_file("addr", 0200, igen6_test, 1112 NULL, &fops_u64_wo)) { 1113 debugfs_remove(igen6_test); 1114 igen6_test = NULL; 1115 } 1116 } 1117 1118 static void igen6_debug_teardown(void) 1119 { 1120 debugfs_remove_recursive(igen6_test); 1121 } 1122 #else 1123 static void igen6_reg_dump(struct igen6_imc *imc) {} 1124 static void igen6_debug_setup(void) {} 1125 static void igen6_debug_teardown(void) {} 1126 #endif 1127 1128 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) 1129 { 1130 union { 1131 u64 v; 1132 struct { 1133 u32 v_lo; 1134 u32 v_hi; 1135 }; 1136 } u; 1137 1138 edac_dbg(2, "\n"); 1139 1140 if (!res_cfg->ibecc_available(pdev)) { 1141 edac_dbg(2, "No In-Band ECC IP\n"); 1142 goto fail; 1143 } 1144 1145 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { 1146 igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); 1147 goto fail; 1148 } 1149 1150 igen6_tolud &= GENMASK(31, 20); 1151 1152 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { 1153 igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); 1154 goto fail; 1155 } 1156 1157 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { 1158 igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); 1159 goto fail; 1160 } 1161 1162 igen6_tom = u.v & GENMASK_ULL(38, 20); 1163 1164 if (get_mchbar(pdev, mchbar)) 1165 goto fail; 1166 1167 #ifdef CONFIG_EDAC_DEBUG 1168 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) 1169 edac_dbg(2, "Failed to read lower TOUUD\n"); 1170 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) 1171 edac_dbg(2, "Failed to read upper TOUUD\n"); 1172 else 1173 igen6_touud = u.v & GENMASK_ULL(38, 20); 1174 #endif 1175 1176 return 0; 1177 fail: 1178 return -ENODEV; 1179 } 1180 1181 static void igen6_check(struct mem_ctl_info *mci) 1182 { 1183 struct igen6_imc *imc = mci->pvt_info; 1184 u64 ecclog; 1185 1186 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 1187 ecclog = ecclog_read_and_clear(imc); 1188 if (!ecclog) 1189 return; 1190 1191 if (!ecclog_gen_pool_add(imc->mc, ecclog)) 1192 irq_work_queue(&ecclog_irq_work); 1193 } 1194 1195 static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) 1196 { 1197 struct edac_mc_layer layers[2]; 1198 struct mem_ctl_info *mci; 1199 struct igen6_imc *imc; 1200 void __iomem *window; 1201 int rc; 1202 1203 edac_dbg(2, "\n"); 1204 1205 mchbar += mc * MCHBAR_SIZE; 1206 window = ioremap(mchbar, MCHBAR_SIZE); 1207 if (!window) { 1208 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 1209 return -ENODEV; 1210 } 1211 1212 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1213 layers[0].size = NUM_CHANNELS; 1214 layers[0].is_virt_csrow = false; 1215 layers[1].type = EDAC_MC_LAYER_SLOT; 1216 layers[1].size = NUM_DIMMS; 1217 layers[1].is_virt_csrow = true; 1218 1219 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); 1220 if (!mci) { 1221 rc = -ENOMEM; 1222 goto fail; 1223 } 1224 1225 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); 1226 if (!mci->ctl_name) { 1227 rc = -ENOMEM; 1228 goto fail2; 1229 } 1230 1231 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; 1232 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 1233 mci->edac_cap = EDAC_FLAG_SECDED; 1234 mci->mod_name = EDAC_MOD_STR; 1235 mci->dev_name = pci_name(pdev); 1236 if (edac_op_state == EDAC_OPSTATE_POLL) 1237 mci->edac_check = igen6_check; 1238 mci->pvt_info = &igen6_pvt->imc[mc]; 1239 1240 imc = mci->pvt_info; 1241 device_initialize(&imc->dev); 1242 /* 1243 * EDAC core uses mci->pdev(pointer of structure device) as 1244 * memory controller ID. The client SoCs attach one or more 1245 * memory controllers to single pci_dev (single pci_dev->dev 1246 * can be for multiple memory controllers). 1247 * 1248 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev 1249 * for the first memory controller and assign a unique imc->dev 1250 * to mci->pdev for each non-first memory controller. 1251 */ 1252 mci->pdev = mc ? &imc->dev : &pdev->dev; 1253 imc->mc = mc; 1254 imc->pdev = pdev; 1255 imc->window = window; 1256 1257 igen6_reg_dump(imc); 1258 1259 rc = igen6_get_dimm_config(mci); 1260 if (rc) 1261 goto fail3; 1262 1263 rc = edac_mc_add_mc(mci); 1264 if (rc) { 1265 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); 1266 goto fail3; 1267 } 1268 1269 imc->mci = mci; 1270 return 0; 1271 fail3: 1272 mci->pvt_info = NULL; 1273 kfree(mci->ctl_name); 1274 fail2: 1275 edac_mc_free(mci); 1276 fail: 1277 iounmap(window); 1278 return rc; 1279 } 1280 1281 static void igen6_unregister_mcis(void) 1282 { 1283 struct mem_ctl_info *mci; 1284 struct igen6_imc *imc; 1285 int i; 1286 1287 edac_dbg(2, "\n"); 1288 1289 for (i = 0; i < res_cfg->num_imc; i++) { 1290 imc = &igen6_pvt->imc[i]; 1291 mci = imc->mci; 1292 if (!mci) 1293 continue; 1294 1295 edac_mc_del_mc(mci->pdev); 1296 kfree(mci->ctl_name); 1297 mci->pvt_info = NULL; 1298 edac_mc_free(mci); 1299 iounmap(imc->window); 1300 } 1301 } 1302 1303 static int igen6_mem_slice_setup(u64 mchbar) 1304 { 1305 struct igen6_imc *imc = &igen6_pvt->imc[0]; 1306 u64 base = mchbar + res_cfg->cmf_base; 1307 u32 offset = res_cfg->ms_hash_offset; 1308 u32 size = res_cfg->cmf_size; 1309 u64 ms_s_size, ms_hash; 1310 void __iomem *cmf; 1311 int ms_l_map; 1312 1313 edac_dbg(2, "\n"); 1314 1315 if (imc[0].size < imc[1].size) { 1316 ms_s_size = imc[0].size; 1317 ms_l_map = 1; 1318 } else { 1319 ms_s_size = imc[1].size; 1320 ms_l_map = 0; 1321 } 1322 1323 igen6_pvt->ms_s_size = ms_s_size; 1324 igen6_pvt->ms_l_map = ms_l_map; 1325 1326 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", 1327 ms_s_size >> 20, ms_l_map); 1328 1329 if (!size) 1330 return 0; 1331 1332 cmf = ioremap(base, size); 1333 if (!cmf) { 1334 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); 1335 return -ENODEV; 1336 } 1337 1338 ms_hash = readq(cmf + offset); 1339 igen6_pvt->ms_hash = ms_hash; 1340 1341 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); 1342 1343 iounmap(cmf); 1344 1345 return 0; 1346 } 1347 1348 static int register_err_handler(void) 1349 { 1350 int rc; 1351 1352 if (res_cfg->machine_check) { 1353 mce_register_decode_chain(&ecclog_mce_dec); 1354 return 0; 1355 } 1356 1357 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, 1358 0, IGEN6_NMI_NAME); 1359 if (rc) { 1360 igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); 1361 return rc; 1362 } 1363 1364 return 0; 1365 } 1366 1367 static void unregister_err_handler(void) 1368 { 1369 if (res_cfg->machine_check) { 1370 mce_unregister_decode_chain(&ecclog_mce_dec); 1371 return; 1372 } 1373 1374 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1375 } 1376 1377 static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent) 1378 { 1379 /* 1380 * Quirk: Certain SoCs' error reporting interrupts don't work. 1381 * Force polling mode for them to ensure that memory error 1382 * events can be handled. 1383 */ 1384 if (ent->device == DID_ADL_N_SKU4) { 1385 edac_op_state = EDAC_OPSTATE_POLL; 1386 return; 1387 } 1388 1389 /* Set the mode according to the configuration data. */ 1390 if (cfg->machine_check) 1391 edac_op_state = EDAC_OPSTATE_INT; 1392 else 1393 edac_op_state = EDAC_OPSTATE_NMI; 1394 } 1395 1396 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1397 { 1398 u64 mchbar; 1399 int i, rc; 1400 1401 edac_dbg(2, "\n"); 1402 1403 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); 1404 if (!igen6_pvt) 1405 return -ENOMEM; 1406 1407 res_cfg = (struct res_config *)ent->driver_data; 1408 1409 rc = igen6_pci_setup(pdev, &mchbar); 1410 if (rc) 1411 goto fail; 1412 1413 opstate_set(res_cfg, ent); 1414 1415 for (i = 0; i < res_cfg->num_imc; i++) { 1416 rc = igen6_register_mci(i, mchbar, pdev); 1417 if (rc) 1418 goto fail2; 1419 } 1420 1421 if (res_cfg->num_imc > 1) { 1422 rc = igen6_mem_slice_setup(mchbar); 1423 if (rc) 1424 goto fail2; 1425 } 1426 1427 ecclog_pool = ecclog_gen_pool_create(); 1428 if (!ecclog_pool) { 1429 rc = -ENOMEM; 1430 goto fail2; 1431 } 1432 1433 INIT_WORK(&ecclog_work, ecclog_work_cb); 1434 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); 1435 1436 rc = register_err_handler(); 1437 if (rc) 1438 goto fail3; 1439 1440 /* Enable error reporting */ 1441 rc = errcmd_enable_error_reporting(true); 1442 if (rc) { 1443 igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); 1444 goto fail4; 1445 } 1446 1447 /* Check if any pending errors before/during the registration of the error handler */ 1448 ecclog_handler(); 1449 1450 igen6_debug_setup(); 1451 return 0; 1452 fail4: 1453 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1454 fail3: 1455 gen_pool_destroy(ecclog_pool); 1456 fail2: 1457 igen6_unregister_mcis(); 1458 fail: 1459 kfree(igen6_pvt); 1460 return rc; 1461 } 1462 1463 static void igen6_remove(struct pci_dev *pdev) 1464 { 1465 edac_dbg(2, "\n"); 1466 1467 igen6_debug_teardown(); 1468 errcmd_enable_error_reporting(false); 1469 unregister_err_handler(); 1470 irq_work_sync(&ecclog_irq_work); 1471 flush_work(&ecclog_work); 1472 gen_pool_destroy(ecclog_pool); 1473 igen6_unregister_mcis(); 1474 kfree(igen6_pvt); 1475 } 1476 1477 static struct pci_driver igen6_driver = { 1478 .name = EDAC_MOD_STR, 1479 .probe = igen6_probe, 1480 .remove = igen6_remove, 1481 .id_table = igen6_pci_tbl, 1482 }; 1483 1484 static int __init igen6_init(void) 1485 { 1486 const char *owner; 1487 int rc; 1488 1489 edac_dbg(2, "\n"); 1490 1491 if (ghes_get_devices()) 1492 return -EBUSY; 1493 1494 owner = edac_get_owner(); 1495 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 1496 return -EBUSY; 1497 1498 rc = pci_register_driver(&igen6_driver); 1499 if (rc) 1500 return rc; 1501 1502 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); 1503 1504 return 0; 1505 } 1506 1507 static void __exit igen6_exit(void) 1508 { 1509 edac_dbg(2, "\n"); 1510 1511 pci_unregister_driver(&igen6_driver); 1512 } 1513 1514 module_init(igen6_init); 1515 module_exit(igen6_exit); 1516 1517 MODULE_LICENSE("GPL v2"); 1518 MODULE_AUTHOR("Qiuxu Zhuo"); 1519 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); 1520