1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel client SoC with integrated memory controller using IBECC 4 * 5 * Copyright (C) 2020 Intel Corporation 6 * 7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific 8 * regions of the physical memory space. It's used for memory controllers 9 * that don't support the out-of-band ECC which often needs an additional 10 * storage device to each channel for storing ECC data. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/slab.h> 17 #include <linux/irq_work.h> 18 #include <linux/llist.h> 19 #include <linux/genalloc.h> 20 #include <linux/edac.h> 21 #include <linux/bits.h> 22 #include <linux/io.h> 23 #include <asm/mach_traps.h> 24 #include <asm/nmi.h> 25 #include <asm/mce.h> 26 27 #include "edac_mc.h" 28 #include "edac_module.h" 29 30 #define IGEN6_REVISION "v2.5.1" 31 32 #define EDAC_MOD_STR "igen6_edac" 33 #define IGEN6_NMI_NAME "igen6_ibecc" 34 35 /* Debug macros */ 36 #define igen6_printk(level, fmt, arg...) \ 37 edac_printk(level, "igen6", fmt, ##arg) 38 39 #define igen6_mc_printk(mci, level, fmt, arg...) \ 40 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) 41 42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 43 44 #define NUM_IMC 2 /* Max memory controllers */ 45 #define NUM_CHANNELS 2 /* Max channels */ 46 #define NUM_DIMMS 2 /* Max DIMMs per channel */ 47 48 #define _4GB BIT_ULL(32) 49 50 /* Size of physical memory */ 51 #define TOM_OFFSET 0xa0 52 /* Top of low usable DRAM */ 53 #define TOLUD_OFFSET 0xbc 54 /* Capability register C */ 55 #define CAPID_C_OFFSET 0xec 56 #define CAPID_C_IBECC BIT(15) 57 58 /* Capability register E */ 59 #define CAPID_E_OFFSET 0xf0 60 #define CAPID_E_IBECC BIT(12) 61 #define CAPID_E_IBECC_BIT18 BIT(18) 62 63 /* Error Status */ 64 #define ERRSTS_OFFSET 0xc8 65 #define ERRSTS_CE BIT_ULL(6) 66 #define ERRSTS_UE BIT_ULL(7) 67 68 /* Error Command */ 69 #define ERRCMD_OFFSET 0xca 70 #define ERRCMD_CE BIT_ULL(6) 71 #define ERRCMD_UE BIT_ULL(7) 72 73 /* IBECC MMIO base address */ 74 #define IBECC_BASE (res_cfg->ibecc_base) 75 #define IBECC_ACTIVATE_OFFSET IBECC_BASE 76 #define IBECC_ACTIVATE_EN BIT(0) 77 78 /* IBECC error log */ 79 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) 80 #define ECC_ERROR_LOG_CE BIT_ULL(62) 81 #define ECC_ERROR_LOG_UE BIT_ULL(63) 82 #define ECC_ERROR_LOG_ADDR_SHIFT 5 83 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) 84 #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) 85 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) 86 87 /* Host MMIO base address */ 88 #define MCHBAR_OFFSET 0x48 89 #define MCHBAR_EN BIT_ULL(0) 90 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) 91 #define MCHBAR_SIZE 0x10000 92 93 /* Parameters for the channel decode stage */ 94 #define IMC_BASE (res_cfg->imc_base) 95 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE 96 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) 97 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) 98 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) 99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) 100 101 /* Parameters for DRAM decode stage */ 102 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) 103 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) 104 105 /* DIMM characteristics */ 106 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) 107 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) 108 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) 109 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) 110 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) 111 112 /* Hash for memory controller selection */ 113 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) 114 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) 115 116 /* Hash for channel selection */ 117 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) 118 /* Hash for enhanced channel selection */ 119 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) 120 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 121 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 122 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) 123 124 /* Parameters for memory slice decode stage */ 125 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 127 128 static const struct res_config { 129 bool machine_check; 130 int num_imc; 131 u32 imc_base; 132 u32 cmf_base; 133 u32 cmf_size; 134 u32 ms_hash_offset; 135 u32 ibecc_base; 136 u32 ibecc_error_log_offset; 137 bool (*ibecc_available)(struct pci_dev *pdev); 138 /* Extract error address logged in IBECC */ 139 u64 (*err_addr)(u64 ecclog); 140 /* Convert error address logged in IBECC to system physical address */ 141 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); 142 /* Convert error address logged in IBECC to integrated memory controller address */ 143 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); 144 } *res_cfg; 145 146 struct igen6_imc { 147 int mc; 148 struct mem_ctl_info *mci; 149 struct pci_dev *pdev; 150 struct device dev; 151 void __iomem *window; 152 u64 size; 153 u64 ch_s_size; 154 int ch_l_map; 155 u64 dimm_s_size[NUM_CHANNELS]; 156 u64 dimm_l_size[NUM_CHANNELS]; 157 int dimm_l_map[NUM_CHANNELS]; 158 }; 159 160 static struct igen6_pvt { 161 struct igen6_imc imc[NUM_IMC]; 162 u64 ms_hash; 163 u64 ms_s_size; 164 int ms_l_map; 165 } *igen6_pvt; 166 167 /* The top of low usable DRAM */ 168 static u32 igen6_tolud; 169 /* The size of physical memory */ 170 static u64 igen6_tom; 171 172 struct decoded_addr { 173 int mc; 174 u64 imc_addr; 175 u64 sys_addr; 176 int channel_idx; 177 u64 channel_addr; 178 int sub_channel_idx; 179 u64 sub_channel_addr; 180 }; 181 182 struct ecclog_node { 183 struct llist_node llnode; 184 int mc; 185 u64 ecclog; 186 }; 187 188 /* 189 * In the NMI handler, the driver uses the lock-less memory allocator 190 * to allocate memory to store the IBECC error logs and links the logs 191 * to the lock-less list. Delay printk() and the work of error reporting 192 * to EDAC core in a worker. 193 */ 194 #define ECCLOG_POOL_SIZE PAGE_SIZE 195 static LLIST_HEAD(ecclog_llist); 196 static struct gen_pool *ecclog_pool; 197 static char ecclog_buf[ECCLOG_POOL_SIZE]; 198 static struct irq_work ecclog_irq_work; 199 static struct work_struct ecclog_work; 200 201 /* Compute die IDs for Elkhart Lake with IBECC */ 202 #define DID_EHL_SKU5 0x4514 203 #define DID_EHL_SKU6 0x4528 204 #define DID_EHL_SKU7 0x452a 205 #define DID_EHL_SKU8 0x4516 206 #define DID_EHL_SKU9 0x452c 207 #define DID_EHL_SKU10 0x452e 208 #define DID_EHL_SKU11 0x4532 209 #define DID_EHL_SKU12 0x4518 210 #define DID_EHL_SKU13 0x451a 211 #define DID_EHL_SKU14 0x4534 212 #define DID_EHL_SKU15 0x4536 213 214 /* Compute die IDs for ICL-NNPI with IBECC */ 215 #define DID_ICL_SKU8 0x4581 216 #define DID_ICL_SKU10 0x4585 217 #define DID_ICL_SKU11 0x4589 218 #define DID_ICL_SKU12 0x458d 219 220 /* Compute die IDs for Tiger Lake with IBECC */ 221 #define DID_TGL_SKU 0x9a14 222 223 /* Compute die IDs for Alder Lake with IBECC */ 224 #define DID_ADL_SKU1 0x4601 225 #define DID_ADL_SKU2 0x4602 226 #define DID_ADL_SKU3 0x4621 227 #define DID_ADL_SKU4 0x4641 228 229 /* Compute die IDs for Alder Lake-N with IBECC */ 230 #define DID_ADL_N_SKU1 0x4614 231 #define DID_ADL_N_SKU2 0x4617 232 #define DID_ADL_N_SKU3 0x461b 233 #define DID_ADL_N_SKU4 0x461c 234 #define DID_ADL_N_SKU5 0x4673 235 #define DID_ADL_N_SKU6 0x4674 236 #define DID_ADL_N_SKU7 0x4675 237 #define DID_ADL_N_SKU8 0x4677 238 #define DID_ADL_N_SKU9 0x4678 239 #define DID_ADL_N_SKU10 0x4679 240 #define DID_ADL_N_SKU11 0x467c 241 #define DID_ADL_N_SKU12 0x4632 242 243 /* Compute die IDs for Raptor Lake-P with IBECC */ 244 #define DID_RPL_P_SKU1 0xa706 245 #define DID_RPL_P_SKU2 0xa707 246 #define DID_RPL_P_SKU3 0xa708 247 #define DID_RPL_P_SKU4 0xa716 248 #define DID_RPL_P_SKU5 0xa718 249 250 /* Compute die IDs for Meteor Lake-PS with IBECC */ 251 #define DID_MTL_PS_SKU1 0x7d21 252 #define DID_MTL_PS_SKU2 0x7d22 253 #define DID_MTL_PS_SKU3 0x7d23 254 #define DID_MTL_PS_SKU4 0x7d24 255 256 /* Compute die IDs for Meteor Lake-P with IBECC */ 257 #define DID_MTL_P_SKU1 0x7d01 258 #define DID_MTL_P_SKU2 0x7d02 259 #define DID_MTL_P_SKU3 0x7d14 260 261 /* Compute die IDs for Arrow Lake-UH with IBECC */ 262 #define DID_ARL_UH_SKU1 0x7d06 263 #define DID_ARL_UH_SKU2 0x7d20 264 #define DID_ARL_UH_SKU3 0x7d30 265 266 /* Compute die IDs for Panther Lake-H with IBECC */ 267 #define DID_PTL_H_SKU1 0xb000 268 #define DID_PTL_H_SKU2 0xb001 269 #define DID_PTL_H_SKU3 0xb002 270 271 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) 272 { 273 union { 274 u64 v; 275 struct { 276 u32 v_lo; 277 u32 v_hi; 278 }; 279 } u; 280 281 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { 282 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); 283 return -ENODEV; 284 } 285 286 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { 287 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); 288 return -ENODEV; 289 } 290 291 if (!(u.v & MCHBAR_EN)) { 292 igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); 293 return -ENODEV; 294 } 295 296 *mchbar = MCHBAR_BASE(u.v); 297 298 return 0; 299 } 300 301 static bool ehl_ibecc_available(struct pci_dev *pdev) 302 { 303 u32 v; 304 305 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 306 return false; 307 308 return !!(CAPID_C_IBECC & v); 309 } 310 311 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) 312 { 313 return eaddr; 314 } 315 316 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) 317 { 318 if (eaddr < igen6_tolud) 319 return eaddr; 320 321 if (igen6_tom <= _4GB) 322 return eaddr + igen6_tolud - _4GB; 323 324 if (eaddr >= igen6_tom) 325 return eaddr + igen6_tolud - igen6_tom; 326 327 return eaddr; 328 } 329 330 static bool icl_ibecc_available(struct pci_dev *pdev) 331 { 332 u32 v; 333 334 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 335 return false; 336 337 return !(CAPID_C_IBECC & v) && 338 (boot_cpu_data.x86_stepping >= 1); 339 } 340 341 static bool tgl_ibecc_available(struct pci_dev *pdev) 342 { 343 u32 v; 344 345 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 346 return false; 347 348 return !(CAPID_E_IBECC & v); 349 } 350 351 static bool mtl_p_ibecc_available(struct pci_dev *pdev) 352 { 353 u32 v; 354 355 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 356 return false; 357 358 return !(CAPID_E_IBECC_BIT18 & v); 359 } 360 361 static bool mtl_ps_ibecc_available(struct pci_dev *pdev) 362 { 363 #define MCHBAR_MEMSS_IBECCDIS 0x13c00 364 void __iomem *window; 365 u64 mchbar; 366 u32 val; 367 368 if (get_mchbar(pdev, &mchbar)) 369 return false; 370 371 window = ioremap(mchbar, MCHBAR_SIZE * 2); 372 if (!window) { 373 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 374 return false; 375 } 376 377 val = readl(window + MCHBAR_MEMSS_IBECCDIS); 378 iounmap(window); 379 380 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ 381 return !GET_BITFIELD(val, 6, 6); 382 } 383 384 static u64 mem_addr_to_sys_addr(u64 maddr) 385 { 386 if (maddr < igen6_tolud) 387 return maddr; 388 389 if (igen6_tom <= _4GB) 390 return maddr - igen6_tolud + _4GB; 391 392 if (maddr < _4GB) 393 return maddr - igen6_tolud + igen6_tom; 394 395 return maddr; 396 } 397 398 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) 399 { 400 u64 hash_addr = addr & mask, hash = hash_init; 401 u64 intlv = (addr >> intlv_bit) & 1; 402 int i; 403 404 for (i = 6; i < 20; i++) 405 hash ^= (hash_addr >> i) & 1; 406 407 return hash ^ intlv; 408 } 409 410 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) 411 { 412 u64 maddr, hash, mask, ms_s_size; 413 int intlv_bit; 414 u32 ms_hash; 415 416 ms_s_size = igen6_pvt->ms_s_size; 417 if (eaddr >= ms_s_size) 418 return eaddr + ms_s_size; 419 420 ms_hash = igen6_pvt->ms_hash; 421 422 mask = MEM_SLICE_HASH_MASK(ms_hash); 423 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; 424 425 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | 426 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 427 428 hash = mem_slice_hash(maddr, mask, mc, intlv_bit); 429 430 return maddr | (hash << intlv_bit); 431 } 432 433 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) 434 { 435 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); 436 437 return mem_addr_to_sys_addr(maddr); 438 } 439 440 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) 441 { 442 return eaddr; 443 } 444 445 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) 446 { 447 return mem_addr_to_sys_addr(eaddr); 448 } 449 450 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) 451 { 452 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; 453 struct igen6_imc *imc = &igen6_pvt->imc[mc]; 454 int intlv_bit; 455 u32 mc_hash; 456 457 if (eaddr >= 2 * ms_s_size) 458 return eaddr - ms_s_size; 459 460 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); 461 462 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; 463 464 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | 465 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 466 467 return imc_addr; 468 } 469 470 static u64 rpl_p_err_addr(u64 ecclog) 471 { 472 return ECC_ERROR_LOG_ADDR45(ecclog); 473 } 474 475 static const struct res_config ehl_cfg = { 476 .num_imc = 1, 477 .imc_base = 0x5000, 478 .ibecc_base = 0xdc00, 479 .ibecc_available = ehl_ibecc_available, 480 .ibecc_error_log_offset = 0x170, 481 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 482 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 483 }; 484 485 static const struct res_config icl_cfg = { 486 .num_imc = 1, 487 .imc_base = 0x5000, 488 .ibecc_base = 0xd800, 489 .ibecc_error_log_offset = 0x170, 490 .ibecc_available = icl_ibecc_available, 491 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 492 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 493 }; 494 495 static const struct res_config tgl_cfg = { 496 .machine_check = true, 497 .num_imc = 2, 498 .imc_base = 0x5000, 499 .cmf_base = 0x11000, 500 .cmf_size = 0x800, 501 .ms_hash_offset = 0xac, 502 .ibecc_base = 0xd400, 503 .ibecc_error_log_offset = 0x170, 504 .ibecc_available = tgl_ibecc_available, 505 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, 506 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, 507 }; 508 509 static const struct res_config adl_cfg = { 510 .machine_check = true, 511 .num_imc = 2, 512 .imc_base = 0xd800, 513 .ibecc_base = 0xd400, 514 .ibecc_error_log_offset = 0x68, 515 .ibecc_available = tgl_ibecc_available, 516 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 517 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 518 }; 519 520 static const struct res_config adl_n_cfg = { 521 .machine_check = true, 522 .num_imc = 1, 523 .imc_base = 0xd800, 524 .ibecc_base = 0xd400, 525 .ibecc_error_log_offset = 0x68, 526 .ibecc_available = tgl_ibecc_available, 527 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 528 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 529 }; 530 531 static const struct res_config rpl_p_cfg = { 532 .machine_check = true, 533 .num_imc = 2, 534 .imc_base = 0xd800, 535 .ibecc_base = 0xd400, 536 .ibecc_error_log_offset = 0x68, 537 .ibecc_available = tgl_ibecc_available, 538 .err_addr = rpl_p_err_addr, 539 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 540 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 541 }; 542 543 static const struct res_config mtl_ps_cfg = { 544 .machine_check = true, 545 .num_imc = 2, 546 .imc_base = 0xd800, 547 .ibecc_base = 0xd400, 548 .ibecc_error_log_offset = 0x170, 549 .ibecc_available = mtl_ps_ibecc_available, 550 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 551 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 552 }; 553 554 static const struct res_config mtl_p_cfg = { 555 .machine_check = true, 556 .num_imc = 2, 557 .imc_base = 0xd800, 558 .ibecc_base = 0xd400, 559 .ibecc_error_log_offset = 0x170, 560 .ibecc_available = mtl_p_ibecc_available, 561 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 562 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 563 }; 564 565 static const struct pci_device_id igen6_pci_tbl[] = { 566 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, 567 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, 568 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, 569 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, 570 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, 571 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, 572 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, 573 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, 574 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, 575 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, 576 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, 577 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, 578 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, 579 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, 580 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, 581 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, 582 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, 583 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, 584 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, 585 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, 586 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, 587 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, 588 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, 589 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, 590 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, 591 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, 592 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, 593 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, 594 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, 595 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, 596 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, 597 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, 598 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, 599 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, 600 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, 601 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, 602 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, 603 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, 604 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, 605 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, 606 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, 607 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 608 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 609 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 610 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 611 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 612 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 613 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 614 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 615 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 616 { }, 617 }; 618 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); 619 620 static enum dev_type get_width(int dimm_l, u32 mad_dimm) 621 { 622 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : 623 MAD_DIMM_CH_DSW(mad_dimm); 624 625 switch (w) { 626 case 0: 627 return DEV_X8; 628 case 1: 629 return DEV_X16; 630 case 2: 631 return DEV_X32; 632 default: 633 return DEV_UNKNOWN; 634 } 635 } 636 637 static enum mem_type get_memory_type(u32 mad_inter) 638 { 639 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); 640 641 switch (t) { 642 case 0: 643 return MEM_DDR4; 644 case 1: 645 return MEM_DDR3; 646 case 2: 647 return MEM_LPDDR3; 648 case 3: 649 return MEM_LPDDR4; 650 case 4: 651 return MEM_WIO2; 652 default: 653 return MEM_UNKNOWN; 654 } 655 } 656 657 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) 658 { 659 u64 hash_addr = addr & mask, hash = 0; 660 u64 intlv = (addr >> intlv_bit) & 1; 661 int i; 662 663 for (i = 6; i < 20; i++) 664 hash ^= (hash_addr >> i) & 1; 665 666 return (int)hash ^ intlv; 667 } 668 669 static u64 decode_channel_addr(u64 addr, int intlv_bit) 670 { 671 u64 channel_addr; 672 673 /* Remove the interleave bit and shift upper part down to fill gap */ 674 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; 675 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); 676 677 return channel_addr; 678 } 679 680 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, 681 int *idx, u64 *sub_addr) 682 { 683 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; 684 685 if (addr > 2 * s_size) { 686 *sub_addr = addr - s_size; 687 *idx = l_map; 688 return; 689 } 690 691 if (CHANNEL_HASH_MODE(hash)) { 692 *sub_addr = decode_channel_addr(addr, intlv_bit); 693 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); 694 } else { 695 *sub_addr = decode_channel_addr(addr, 6); 696 *idx = GET_BITFIELD(addr, 6, 6); 697 } 698 } 699 700 static int igen6_decode(struct decoded_addr *res) 701 { 702 struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; 703 u64 addr = res->imc_addr, sub_addr, s_size; 704 int idx, l_map; 705 u32 hash; 706 707 if (addr >= igen6_tom) { 708 edac_dbg(0, "Address 0x%llx out of range\n", addr); 709 return -EINVAL; 710 } 711 712 /* Decode channel */ 713 hash = readl(imc->window + CHANNEL_HASH_OFFSET); 714 s_size = imc->ch_s_size; 715 l_map = imc->ch_l_map; 716 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); 717 res->channel_idx = idx; 718 res->channel_addr = sub_addr; 719 720 /* Decode sub-channel/DIMM */ 721 hash = readl(imc->window + CHANNEL_EHASH_OFFSET); 722 s_size = imc->dimm_s_size[idx]; 723 l_map = imc->dimm_l_map[idx]; 724 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); 725 res->sub_channel_idx = idx; 726 res->sub_channel_addr = sub_addr; 727 728 return 0; 729 } 730 731 static void igen6_output_error(struct decoded_addr *res, 732 struct mem_ctl_info *mci, u64 ecclog) 733 { 734 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? 735 HW_EVENT_ERR_UNCORRECTED : 736 HW_EVENT_ERR_CORRECTED; 737 738 edac_mc_handle_error(type, mci, 1, 739 res->sys_addr >> PAGE_SHIFT, 740 res->sys_addr & ~PAGE_MASK, 741 ECC_ERROR_LOG_SYND(ecclog), 742 res->channel_idx, res->sub_channel_idx, 743 -1, "", ""); 744 } 745 746 static struct gen_pool *ecclog_gen_pool_create(void) 747 { 748 struct gen_pool *pool; 749 750 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); 751 if (!pool) 752 return NULL; 753 754 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { 755 gen_pool_destroy(pool); 756 return NULL; 757 } 758 759 return pool; 760 } 761 762 static int ecclog_gen_pool_add(int mc, u64 ecclog) 763 { 764 struct ecclog_node *node; 765 766 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); 767 if (!node) 768 return -ENOMEM; 769 770 node->mc = mc; 771 node->ecclog = ecclog; 772 llist_add(&node->llnode, &ecclog_llist); 773 774 return 0; 775 } 776 777 /* 778 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI 779 * configuration space status register ERRSTS can indicate whether a 780 * correctable error or an uncorrectable error occurred. We only use the 781 * ECC_ERROR_LOG register to check error type, but need to clear both 782 * registers to enable future error events. 783 */ 784 static u64 ecclog_read_and_clear(struct igen6_imc *imc) 785 { 786 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); 787 788 /* 789 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain 790 * the invalid value ~0. This will result in a flood of invalid 791 * error reports in polling mode. Skip it. 792 */ 793 if (ecclog == ~0) 794 return 0; 795 796 /* Neither a CE nor a UE. Skip it.*/ 797 if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) 798 return 0; 799 800 /* Clear CE/UE bits by writing 1s */ 801 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); 802 803 return ecclog; 804 } 805 806 static void errsts_clear(struct igen6_imc *imc) 807 { 808 u16 errsts; 809 810 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { 811 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); 812 return; 813 } 814 815 /* Clear CE/UE bits by writing 1s */ 816 if (errsts & (ERRSTS_CE | ERRSTS_UE)) 817 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); 818 } 819 820 static int errcmd_enable_error_reporting(bool enable) 821 { 822 struct igen6_imc *imc = &igen6_pvt->imc[0]; 823 u16 errcmd; 824 int rc; 825 826 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); 827 if (rc) 828 return pcibios_err_to_errno(rc); 829 830 if (enable) 831 errcmd |= ERRCMD_CE | ERRSTS_UE; 832 else 833 errcmd &= ~(ERRCMD_CE | ERRSTS_UE); 834 835 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); 836 if (rc) 837 return pcibios_err_to_errno(rc); 838 839 return 0; 840 } 841 842 static int ecclog_handler(void) 843 { 844 struct igen6_imc *imc; 845 int i, n = 0; 846 u64 ecclog; 847 848 for (i = 0; i < res_cfg->num_imc; i++) { 849 imc = &igen6_pvt->imc[i]; 850 851 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 852 853 ecclog = ecclog_read_and_clear(imc); 854 if (!ecclog) 855 continue; 856 857 if (!ecclog_gen_pool_add(i, ecclog)) 858 irq_work_queue(&ecclog_irq_work); 859 860 n++; 861 } 862 863 return n; 864 } 865 866 static void ecclog_work_cb(struct work_struct *work) 867 { 868 struct ecclog_node *node, *tmp; 869 struct mem_ctl_info *mci; 870 struct llist_node *head; 871 struct decoded_addr res; 872 u64 eaddr; 873 874 head = llist_del_all(&ecclog_llist); 875 if (!head) 876 return; 877 878 llist_for_each_entry_safe(node, tmp, head, llnode) { 879 memset(&res, 0, sizeof(res)); 880 if (res_cfg->err_addr) 881 eaddr = res_cfg->err_addr(node->ecclog); 882 else 883 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << 884 ECC_ERROR_LOG_ADDR_SHIFT; 885 res.mc = node->mc; 886 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); 887 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); 888 889 mci = igen6_pvt->imc[res.mc].mci; 890 891 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); 892 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); 893 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); 894 895 if (!igen6_decode(&res)) 896 igen6_output_error(&res, mci, node->ecclog); 897 898 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); 899 } 900 } 901 902 static void ecclog_irq_work_cb(struct irq_work *irq_work) 903 { 904 int i; 905 906 for (i = 0; i < res_cfg->num_imc; i++) 907 errsts_clear(&igen6_pvt->imc[i]); 908 909 if (!llist_empty(&ecclog_llist)) 910 schedule_work(&ecclog_work); 911 } 912 913 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) 914 { 915 unsigned char reason; 916 917 if (!ecclog_handler()) 918 return NMI_DONE; 919 920 /* 921 * Both In-Band ECC correctable error and uncorrectable error are 922 * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) 923 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to 924 * re-enable the SERR# NMI after NMI handling. So clear this bit here 925 * to re-enable SERR# NMI for receiving future In-Band ECC errors. 926 */ 927 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; 928 reason |= NMI_REASON_CLEAR_SERR; 929 outb(reason, NMI_REASON_PORT); 930 reason &= ~NMI_REASON_CLEAR_SERR; 931 outb(reason, NMI_REASON_PORT); 932 933 return NMI_HANDLED; 934 } 935 936 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, 937 void *data) 938 { 939 struct mce *mce = (struct mce *)data; 940 char *type; 941 942 if (mce->kflags & MCE_HANDLED_CEC) 943 return NOTIFY_DONE; 944 945 /* 946 * Ignore unless this is a memory related error. 947 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, 948 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). 949 */ 950 if ((mce->status & 0xefff) >> 7 != 1) 951 return NOTIFY_DONE; 952 953 if (mce->mcgstatus & MCG_STATUS_MCIP) 954 type = "Exception"; 955 else 956 type = "Event"; 957 958 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", 959 mce->extcpu, type, mce->mcgstatus, 960 mce->bank, mce->status); 961 edac_dbg(0, "TSC 0x%llx\n", mce->tsc); 962 edac_dbg(0, "ADDR 0x%llx\n", mce->addr); 963 edac_dbg(0, "MISC 0x%llx\n", mce->misc); 964 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", 965 mce->cpuvendor, mce->cpuid, mce->time, 966 mce->socketid, mce->apicid); 967 /* 968 * We just use the Machine Check for the memory error notification. 969 * Each memory controller is associated with an IBECC instance. 970 * Directly read and clear the error information(error address and 971 * error type) on all the IBECC instances so that we know on which 972 * memory controller the memory error(s) occurred. 973 */ 974 if (!ecclog_handler()) 975 return NOTIFY_DONE; 976 977 mce->kflags |= MCE_HANDLED_EDAC; 978 979 return NOTIFY_DONE; 980 } 981 982 static struct notifier_block ecclog_mce_dec = { 983 .notifier_call = ecclog_mce_handler, 984 .priority = MCE_PRIO_EDAC, 985 }; 986 987 static bool igen6_check_ecc(struct igen6_imc *imc) 988 { 989 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); 990 991 return !!(activate & IBECC_ACTIVATE_EN); 992 } 993 994 static int igen6_get_dimm_config(struct mem_ctl_info *mci) 995 { 996 struct igen6_imc *imc = mci->pvt_info; 997 u32 mad_inter, mad_intra, mad_dimm; 998 int i, j, ndimms, mc = imc->mc; 999 struct dimm_info *dimm; 1000 enum mem_type mtype; 1001 enum dev_type dtype; 1002 u64 dsize; 1003 bool ecc; 1004 1005 edac_dbg(2, "\n"); 1006 1007 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); 1008 mtype = get_memory_type(mad_inter); 1009 ecc = igen6_check_ecc(imc); 1010 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); 1011 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); 1012 1013 for (i = 0; i < NUM_CHANNELS; i++) { 1014 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); 1015 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); 1016 1017 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); 1018 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); 1019 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); 1020 imc->size += imc->dimm_s_size[i]; 1021 imc->size += imc->dimm_l_size[i]; 1022 ndimms = 0; 1023 1024 for (j = 0; j < NUM_DIMMS; j++) { 1025 dimm = edac_get_dimm(mci, i, j, 0); 1026 1027 if (j ^ imc->dimm_l_map[i]) { 1028 dtype = get_width(0, mad_dimm); 1029 dsize = imc->dimm_s_size[i]; 1030 } else { 1031 dtype = get_width(1, mad_dimm); 1032 dsize = imc->dimm_l_size[i]; 1033 } 1034 1035 if (!dsize) 1036 continue; 1037 1038 dimm->grain = 64; 1039 dimm->mtype = mtype; 1040 dimm->dtype = dtype; 1041 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); 1042 dimm->edac_mode = EDAC_SECDED; 1043 snprintf(dimm->label, sizeof(dimm->label), 1044 "MC#%d_Chan#%d_DIMM#%d", mc, i, j); 1045 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", 1046 mc, i, j, dsize >> 20, dimm->nr_pages); 1047 1048 ndimms++; 1049 } 1050 1051 if (ndimms && !ecc) { 1052 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); 1053 return -ENODEV; 1054 } 1055 } 1056 1057 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); 1058 1059 return 0; 1060 } 1061 1062 #ifdef CONFIG_EDAC_DEBUG 1063 /* Top of upper usable DRAM */ 1064 static u64 igen6_touud; 1065 #define TOUUD_OFFSET 0xa8 1066 1067 static void igen6_reg_dump(struct igen6_imc *imc) 1068 { 1069 int i; 1070 1071 edac_dbg(2, "CHANNEL_HASH : 0x%x\n", 1072 readl(imc->window + CHANNEL_HASH_OFFSET)); 1073 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", 1074 readl(imc->window + CHANNEL_EHASH_OFFSET)); 1075 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", 1076 readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); 1077 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", 1078 readq(imc->window + ECC_ERROR_LOG_OFFSET)); 1079 1080 for (i = 0; i < NUM_CHANNELS; i++) { 1081 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, 1082 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); 1083 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, 1084 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); 1085 } 1086 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); 1087 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); 1088 edac_dbg(2, "TOM : 0x%llx", igen6_tom); 1089 } 1090 1091 static struct dentry *igen6_test; 1092 1093 static int debugfs_u64_set(void *data, u64 val) 1094 { 1095 u64 ecclog; 1096 1097 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { 1098 edac_dbg(0, "Address 0x%llx out of range\n", val); 1099 return 0; 1100 } 1101 1102 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 1103 1104 val >>= ECC_ERROR_LOG_ADDR_SHIFT; 1105 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; 1106 1107 if (!ecclog_gen_pool_add(0, ecclog)) 1108 irq_work_queue(&ecclog_irq_work); 1109 1110 return 0; 1111 } 1112 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 1113 1114 static void igen6_debug_setup(void) 1115 { 1116 igen6_test = edac_debugfs_create_dir("igen6_test"); 1117 if (!igen6_test) 1118 return; 1119 1120 if (!edac_debugfs_create_file("addr", 0200, igen6_test, 1121 NULL, &fops_u64_wo)) { 1122 debugfs_remove(igen6_test); 1123 igen6_test = NULL; 1124 } 1125 } 1126 1127 static void igen6_debug_teardown(void) 1128 { 1129 debugfs_remove_recursive(igen6_test); 1130 } 1131 #else 1132 static void igen6_reg_dump(struct igen6_imc *imc) {} 1133 static void igen6_debug_setup(void) {} 1134 static void igen6_debug_teardown(void) {} 1135 #endif 1136 1137 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) 1138 { 1139 union { 1140 u64 v; 1141 struct { 1142 u32 v_lo; 1143 u32 v_hi; 1144 }; 1145 } u; 1146 1147 edac_dbg(2, "\n"); 1148 1149 if (!res_cfg->ibecc_available(pdev)) { 1150 edac_dbg(2, "No In-Band ECC IP\n"); 1151 goto fail; 1152 } 1153 1154 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { 1155 igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); 1156 goto fail; 1157 } 1158 1159 igen6_tolud &= GENMASK(31, 20); 1160 1161 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { 1162 igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); 1163 goto fail; 1164 } 1165 1166 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { 1167 igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); 1168 goto fail; 1169 } 1170 1171 igen6_tom = u.v & GENMASK_ULL(38, 20); 1172 1173 if (get_mchbar(pdev, mchbar)) 1174 goto fail; 1175 1176 #ifdef CONFIG_EDAC_DEBUG 1177 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) 1178 edac_dbg(2, "Failed to read lower TOUUD\n"); 1179 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) 1180 edac_dbg(2, "Failed to read upper TOUUD\n"); 1181 else 1182 igen6_touud = u.v & GENMASK_ULL(38, 20); 1183 #endif 1184 1185 return 0; 1186 fail: 1187 return -ENODEV; 1188 } 1189 1190 static void igen6_check(struct mem_ctl_info *mci) 1191 { 1192 struct igen6_imc *imc = mci->pvt_info; 1193 u64 ecclog; 1194 1195 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 1196 ecclog = ecclog_read_and_clear(imc); 1197 if (!ecclog) 1198 return; 1199 1200 if (!ecclog_gen_pool_add(imc->mc, ecclog)) 1201 irq_work_queue(&ecclog_irq_work); 1202 } 1203 1204 static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) 1205 { 1206 struct edac_mc_layer layers[2]; 1207 struct mem_ctl_info *mci; 1208 struct igen6_imc *imc; 1209 void __iomem *window; 1210 int rc; 1211 1212 edac_dbg(2, "\n"); 1213 1214 mchbar += mc * MCHBAR_SIZE; 1215 window = ioremap(mchbar, MCHBAR_SIZE); 1216 if (!window) { 1217 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 1218 return -ENODEV; 1219 } 1220 1221 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1222 layers[0].size = NUM_CHANNELS; 1223 layers[0].is_virt_csrow = false; 1224 layers[1].type = EDAC_MC_LAYER_SLOT; 1225 layers[1].size = NUM_DIMMS; 1226 layers[1].is_virt_csrow = true; 1227 1228 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); 1229 if (!mci) { 1230 rc = -ENOMEM; 1231 goto fail; 1232 } 1233 1234 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); 1235 if (!mci->ctl_name) { 1236 rc = -ENOMEM; 1237 goto fail2; 1238 } 1239 1240 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; 1241 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 1242 mci->edac_cap = EDAC_FLAG_SECDED; 1243 mci->mod_name = EDAC_MOD_STR; 1244 mci->dev_name = pci_name(pdev); 1245 if (edac_op_state == EDAC_OPSTATE_POLL) 1246 mci->edac_check = igen6_check; 1247 mci->pvt_info = &igen6_pvt->imc[mc]; 1248 1249 imc = mci->pvt_info; 1250 device_initialize(&imc->dev); 1251 /* 1252 * EDAC core uses mci->pdev(pointer of structure device) as 1253 * memory controller ID. The client SoCs attach one or more 1254 * memory controllers to single pci_dev (single pci_dev->dev 1255 * can be for multiple memory controllers). 1256 * 1257 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev 1258 * for the first memory controller and assign a unique imc->dev 1259 * to mci->pdev for each non-first memory controller. 1260 */ 1261 mci->pdev = mc ? &imc->dev : &pdev->dev; 1262 imc->mc = mc; 1263 imc->pdev = pdev; 1264 imc->window = window; 1265 1266 igen6_reg_dump(imc); 1267 1268 rc = igen6_get_dimm_config(mci); 1269 if (rc) 1270 goto fail3; 1271 1272 rc = edac_mc_add_mc(mci); 1273 if (rc) { 1274 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); 1275 goto fail3; 1276 } 1277 1278 imc->mci = mci; 1279 return 0; 1280 fail3: 1281 mci->pvt_info = NULL; 1282 kfree(mci->ctl_name); 1283 fail2: 1284 edac_mc_free(mci); 1285 fail: 1286 iounmap(window); 1287 return rc; 1288 } 1289 1290 static void igen6_unregister_mcis(void) 1291 { 1292 struct mem_ctl_info *mci; 1293 struct igen6_imc *imc; 1294 int i; 1295 1296 edac_dbg(2, "\n"); 1297 1298 for (i = 0; i < res_cfg->num_imc; i++) { 1299 imc = &igen6_pvt->imc[i]; 1300 mci = imc->mci; 1301 if (!mci) 1302 continue; 1303 1304 edac_mc_del_mc(mci->pdev); 1305 kfree(mci->ctl_name); 1306 mci->pvt_info = NULL; 1307 edac_mc_free(mci); 1308 iounmap(imc->window); 1309 } 1310 } 1311 1312 static int igen6_mem_slice_setup(u64 mchbar) 1313 { 1314 struct igen6_imc *imc = &igen6_pvt->imc[0]; 1315 u64 base = mchbar + res_cfg->cmf_base; 1316 u32 offset = res_cfg->ms_hash_offset; 1317 u32 size = res_cfg->cmf_size; 1318 u64 ms_s_size, ms_hash; 1319 void __iomem *cmf; 1320 int ms_l_map; 1321 1322 edac_dbg(2, "\n"); 1323 1324 if (imc[0].size < imc[1].size) { 1325 ms_s_size = imc[0].size; 1326 ms_l_map = 1; 1327 } else { 1328 ms_s_size = imc[1].size; 1329 ms_l_map = 0; 1330 } 1331 1332 igen6_pvt->ms_s_size = ms_s_size; 1333 igen6_pvt->ms_l_map = ms_l_map; 1334 1335 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", 1336 ms_s_size >> 20, ms_l_map); 1337 1338 if (!size) 1339 return 0; 1340 1341 cmf = ioremap(base, size); 1342 if (!cmf) { 1343 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); 1344 return -ENODEV; 1345 } 1346 1347 ms_hash = readq(cmf + offset); 1348 igen6_pvt->ms_hash = ms_hash; 1349 1350 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); 1351 1352 iounmap(cmf); 1353 1354 return 0; 1355 } 1356 1357 static int register_err_handler(void) 1358 { 1359 int rc; 1360 1361 if (res_cfg->machine_check) { 1362 mce_register_decode_chain(&ecclog_mce_dec); 1363 return 0; 1364 } 1365 1366 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, 1367 0, IGEN6_NMI_NAME); 1368 if (rc) { 1369 igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); 1370 return rc; 1371 } 1372 1373 return 0; 1374 } 1375 1376 static void unregister_err_handler(void) 1377 { 1378 if (res_cfg->machine_check) { 1379 mce_unregister_decode_chain(&ecclog_mce_dec); 1380 return; 1381 } 1382 1383 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1384 } 1385 1386 static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) 1387 { 1388 /* 1389 * Quirk: Certain SoCs' error reporting interrupts don't work. 1390 * Force polling mode for them to ensure that memory error 1391 * events can be handled. 1392 */ 1393 if (ent->device == DID_ADL_N_SKU4) { 1394 edac_op_state = EDAC_OPSTATE_POLL; 1395 return; 1396 } 1397 1398 /* Set the mode according to the configuration data. */ 1399 if (cfg->machine_check) 1400 edac_op_state = EDAC_OPSTATE_INT; 1401 else 1402 edac_op_state = EDAC_OPSTATE_NMI; 1403 } 1404 1405 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1406 { 1407 u64 mchbar; 1408 int i, rc; 1409 1410 edac_dbg(2, "\n"); 1411 1412 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); 1413 if (!igen6_pvt) 1414 return -ENOMEM; 1415 1416 res_cfg = (struct res_config *)ent->driver_data; 1417 1418 rc = igen6_pci_setup(pdev, &mchbar); 1419 if (rc) 1420 goto fail; 1421 1422 opstate_set(res_cfg, ent); 1423 1424 for (i = 0; i < res_cfg->num_imc; i++) { 1425 rc = igen6_register_mci(i, mchbar, pdev); 1426 if (rc) 1427 goto fail2; 1428 } 1429 1430 if (res_cfg->num_imc > 1) { 1431 rc = igen6_mem_slice_setup(mchbar); 1432 if (rc) 1433 goto fail2; 1434 } 1435 1436 ecclog_pool = ecclog_gen_pool_create(); 1437 if (!ecclog_pool) { 1438 rc = -ENOMEM; 1439 goto fail2; 1440 } 1441 1442 INIT_WORK(&ecclog_work, ecclog_work_cb); 1443 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); 1444 1445 rc = register_err_handler(); 1446 if (rc) 1447 goto fail3; 1448 1449 /* Enable error reporting */ 1450 rc = errcmd_enable_error_reporting(true); 1451 if (rc) { 1452 igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); 1453 goto fail4; 1454 } 1455 1456 /* Check if any pending errors before/during the registration of the error handler */ 1457 ecclog_handler(); 1458 1459 igen6_debug_setup(); 1460 return 0; 1461 fail4: 1462 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1463 fail3: 1464 gen_pool_destroy(ecclog_pool); 1465 fail2: 1466 igen6_unregister_mcis(); 1467 fail: 1468 kfree(igen6_pvt); 1469 return rc; 1470 } 1471 1472 static void igen6_remove(struct pci_dev *pdev) 1473 { 1474 edac_dbg(2, "\n"); 1475 1476 igen6_debug_teardown(); 1477 errcmd_enable_error_reporting(false); 1478 unregister_err_handler(); 1479 irq_work_sync(&ecclog_irq_work); 1480 flush_work(&ecclog_work); 1481 gen_pool_destroy(ecclog_pool); 1482 igen6_unregister_mcis(); 1483 kfree(igen6_pvt); 1484 } 1485 1486 static struct pci_driver igen6_driver = { 1487 .name = EDAC_MOD_STR, 1488 .probe = igen6_probe, 1489 .remove = igen6_remove, 1490 .id_table = igen6_pci_tbl, 1491 }; 1492 1493 static int __init igen6_init(void) 1494 { 1495 const char *owner; 1496 int rc; 1497 1498 edac_dbg(2, "\n"); 1499 1500 if (ghes_get_devices()) 1501 return -EBUSY; 1502 1503 owner = edac_get_owner(); 1504 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 1505 return -EBUSY; 1506 1507 rc = pci_register_driver(&igen6_driver); 1508 if (rc) 1509 return rc; 1510 1511 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); 1512 1513 return 0; 1514 } 1515 1516 static void __exit igen6_exit(void) 1517 { 1518 edac_dbg(2, "\n"); 1519 1520 pci_unregister_driver(&igen6_driver); 1521 } 1522 1523 module_init(igen6_init); 1524 module_exit(igen6_exit); 1525 1526 MODULE_LICENSE("GPL v2"); 1527 MODULE_AUTHOR("Qiuxu Zhuo"); 1528 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); 1529