1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel client SoC with integrated memory controller using IBECC 4 * 5 * Copyright (C) 2020 Intel Corporation 6 * 7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific 8 * regions of the physical memory space. It's used for memory controllers 9 * that don't support the out-of-band ECC which often needs an additional 10 * storage device to each channel for storing ECC data. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/slab.h> 17 #include <linux/irq_work.h> 18 #include <linux/llist.h> 19 #include <linux/genalloc.h> 20 #include <linux/edac.h> 21 #include <linux/bits.h> 22 #include <linux/io.h> 23 #include <asm/mach_traps.h> 24 #include <asm/nmi.h> 25 #include <asm/mce.h> 26 27 #include "edac_mc.h" 28 #include "edac_module.h" 29 30 #define IGEN6_REVISION "v2.5.1" 31 32 #define EDAC_MOD_STR "igen6_edac" 33 #define IGEN6_NMI_NAME "igen6_ibecc" 34 35 /* Debug macros */ 36 #define igen6_printk(level, fmt, arg...) \ 37 edac_printk(level, "igen6", fmt, ##arg) 38 39 #define igen6_mc_printk(mci, level, fmt, arg...) \ 40 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) 41 42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 43 44 #define NUM_IMC 2 /* Max memory controllers */ 45 #define NUM_CHANNELS 2 /* Max channels */ 46 #define NUM_DIMMS 2 /* Max DIMMs per channel */ 47 48 #define _4GB BIT_ULL(32) 49 50 /* Size of physical memory */ 51 #define TOM_OFFSET 0xa0 52 /* Top of low usable DRAM */ 53 #define TOLUD_OFFSET 0xbc 54 /* Capability register C */ 55 #define CAPID_C_OFFSET 0xec 56 #define CAPID_C_IBECC BIT(15) 57 58 /* Capability register E */ 59 #define CAPID_E_OFFSET 0xf0 60 #define CAPID_E_IBECC BIT(12) 61 #define CAPID_E_IBECC_BIT18 BIT(18) 62 63 /* Error Status */ 64 #define ERRSTS_OFFSET 0xc8 65 #define ERRSTS_CE BIT_ULL(6) 66 #define ERRSTS_UE BIT_ULL(7) 67 68 /* Error Command */ 69 #define ERRCMD_OFFSET 0xca 70 #define ERRCMD_CE BIT_ULL(6) 71 #define ERRCMD_UE BIT_ULL(7) 72 73 /* IBECC MMIO base address */ 74 #define IBECC_BASE (res_cfg->ibecc_base) 75 #define IBECC_ACTIVATE_OFFSET IBECC_BASE 76 #define IBECC_ACTIVATE_EN BIT(0) 77 78 /* IBECC error log */ 79 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) 80 #define ECC_ERROR_LOG_CE BIT_ULL(62) 81 #define ECC_ERROR_LOG_UE BIT_ULL(63) 82 #define ECC_ERROR_LOG_ADDR_SHIFT 5 83 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) 84 #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) 85 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) 86 87 /* Host MMIO base address */ 88 #define MCHBAR_OFFSET 0x48 89 #define MCHBAR_EN BIT_ULL(0) 90 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) 91 #define MCHBAR_SIZE 0x10000 92 93 /* Parameters for the channel decode stage */ 94 #define IMC_BASE (res_cfg->imc_base) 95 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE 96 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) 97 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) 98 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) 99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) 100 101 /* Parameters for DRAM decode stage */ 102 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) 103 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) 104 105 /* DIMM characteristics */ 106 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) 107 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) 108 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) 109 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) 110 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) 111 112 /* Hash for memory controller selection */ 113 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) 114 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) 115 116 /* Hash for channel selection */ 117 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) 118 /* Hash for enhanced channel selection */ 119 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) 120 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 121 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 122 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) 123 124 /* Parameters for memory slice decode stage */ 125 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 127 128 static const struct res_config { 129 bool machine_check; 130 /* The number of present memory controllers. */ 131 int num_imc; 132 u32 imc_base; 133 u32 cmf_base; 134 u32 cmf_size; 135 u32 ms_hash_offset; 136 u32 ibecc_base; 137 u32 ibecc_error_log_offset; 138 bool (*ibecc_available)(struct pci_dev *pdev); 139 /* Extract error address logged in IBECC */ 140 u64 (*err_addr)(u64 ecclog); 141 /* Convert error address logged in IBECC to system physical address */ 142 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); 143 /* Convert error address logged in IBECC to integrated memory controller address */ 144 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); 145 } *res_cfg; 146 147 struct igen6_imc { 148 int mc; 149 struct mem_ctl_info *mci; 150 struct pci_dev *pdev; 151 struct device dev; 152 void __iomem *window; 153 u64 size; 154 u64 ch_s_size; 155 int ch_l_map; 156 u64 dimm_s_size[NUM_CHANNELS]; 157 u64 dimm_l_size[NUM_CHANNELS]; 158 int dimm_l_map[NUM_CHANNELS]; 159 }; 160 161 static struct igen6_pvt { 162 struct igen6_imc imc[NUM_IMC]; 163 u64 ms_hash; 164 u64 ms_s_size; 165 int ms_l_map; 166 } *igen6_pvt; 167 168 /* The top of low usable DRAM */ 169 static u32 igen6_tolud; 170 /* The size of physical memory */ 171 static u64 igen6_tom; 172 173 struct decoded_addr { 174 int mc; 175 u64 imc_addr; 176 u64 sys_addr; 177 int channel_idx; 178 u64 channel_addr; 179 int sub_channel_idx; 180 u64 sub_channel_addr; 181 }; 182 183 struct ecclog_node { 184 struct llist_node llnode; 185 int mc; 186 u64 ecclog; 187 }; 188 189 /* 190 * In the NMI handler, the driver uses the lock-less memory allocator 191 * to allocate memory to store the IBECC error logs and links the logs 192 * to the lock-less list. Delay printk() and the work of error reporting 193 * to EDAC core in a worker. 194 */ 195 #define ECCLOG_POOL_SIZE PAGE_SIZE 196 static LLIST_HEAD(ecclog_llist); 197 static struct gen_pool *ecclog_pool; 198 static char ecclog_buf[ECCLOG_POOL_SIZE]; 199 static struct irq_work ecclog_irq_work; 200 static struct work_struct ecclog_work; 201 202 /* Compute die IDs for Elkhart Lake with IBECC */ 203 #define DID_EHL_SKU5 0x4514 204 #define DID_EHL_SKU6 0x4528 205 #define DID_EHL_SKU7 0x452a 206 #define DID_EHL_SKU8 0x4516 207 #define DID_EHL_SKU9 0x452c 208 #define DID_EHL_SKU10 0x452e 209 #define DID_EHL_SKU11 0x4532 210 #define DID_EHL_SKU12 0x4518 211 #define DID_EHL_SKU13 0x451a 212 #define DID_EHL_SKU14 0x4534 213 #define DID_EHL_SKU15 0x4536 214 215 /* Compute die IDs for ICL-NNPI with IBECC */ 216 #define DID_ICL_SKU8 0x4581 217 #define DID_ICL_SKU10 0x4585 218 #define DID_ICL_SKU11 0x4589 219 #define DID_ICL_SKU12 0x458d 220 221 /* Compute die IDs for Tiger Lake with IBECC */ 222 #define DID_TGL_SKU 0x9a14 223 224 /* Compute die IDs for Alder Lake with IBECC */ 225 #define DID_ADL_SKU1 0x4601 226 #define DID_ADL_SKU2 0x4602 227 #define DID_ADL_SKU3 0x4621 228 #define DID_ADL_SKU4 0x4641 229 230 /* Compute die IDs for Alder Lake-N with IBECC */ 231 #define DID_ADL_N_SKU1 0x4614 232 #define DID_ADL_N_SKU2 0x4617 233 #define DID_ADL_N_SKU3 0x461b 234 #define DID_ADL_N_SKU4 0x461c 235 #define DID_ADL_N_SKU5 0x4673 236 #define DID_ADL_N_SKU6 0x4674 237 #define DID_ADL_N_SKU7 0x4675 238 #define DID_ADL_N_SKU8 0x4677 239 #define DID_ADL_N_SKU9 0x4678 240 #define DID_ADL_N_SKU10 0x4679 241 #define DID_ADL_N_SKU11 0x467c 242 #define DID_ADL_N_SKU12 0x4632 243 244 /* Compute die IDs for Arizona Beach with IBECC */ 245 #define DID_AZB_SKU1 0x4676 246 247 /* Compute did IDs for Amston Lake with IBECC */ 248 #define DID_ASL_SKU1 0x464a 249 250 /* Compute die IDs for Raptor Lake-P with IBECC */ 251 #define DID_RPL_P_SKU1 0xa706 252 #define DID_RPL_P_SKU2 0xa707 253 #define DID_RPL_P_SKU3 0xa708 254 #define DID_RPL_P_SKU4 0xa716 255 #define DID_RPL_P_SKU5 0xa718 256 257 /* Compute die IDs for Meteor Lake-PS with IBECC */ 258 #define DID_MTL_PS_SKU1 0x7d21 259 #define DID_MTL_PS_SKU2 0x7d22 260 #define DID_MTL_PS_SKU3 0x7d23 261 #define DID_MTL_PS_SKU4 0x7d24 262 263 /* Compute die IDs for Meteor Lake-P with IBECC */ 264 #define DID_MTL_P_SKU1 0x7d01 265 #define DID_MTL_P_SKU2 0x7d02 266 #define DID_MTL_P_SKU3 0x7d14 267 268 /* Compute die IDs for Arrow Lake-UH with IBECC */ 269 #define DID_ARL_UH_SKU1 0x7d06 270 #define DID_ARL_UH_SKU2 0x7d20 271 #define DID_ARL_UH_SKU3 0x7d30 272 273 /* Compute die IDs for Panther Lake-H with IBECC */ 274 #define DID_PTL_H_SKU1 0xb000 275 #define DID_PTL_H_SKU2 0xb001 276 #define DID_PTL_H_SKU3 0xb002 277 278 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) 279 { 280 union { 281 u64 v; 282 struct { 283 u32 v_lo; 284 u32 v_hi; 285 }; 286 } u; 287 288 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { 289 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); 290 return -ENODEV; 291 } 292 293 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { 294 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); 295 return -ENODEV; 296 } 297 298 if (!(u.v & MCHBAR_EN)) { 299 igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); 300 return -ENODEV; 301 } 302 303 *mchbar = MCHBAR_BASE(u.v); 304 305 return 0; 306 } 307 308 static bool ehl_ibecc_available(struct pci_dev *pdev) 309 { 310 u32 v; 311 312 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 313 return false; 314 315 return !!(CAPID_C_IBECC & v); 316 } 317 318 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) 319 { 320 return eaddr; 321 } 322 323 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) 324 { 325 if (eaddr < igen6_tolud) 326 return eaddr; 327 328 if (igen6_tom <= _4GB) 329 return eaddr + igen6_tolud - _4GB; 330 331 if (eaddr >= igen6_tom) 332 return eaddr + igen6_tolud - igen6_tom; 333 334 return eaddr; 335 } 336 337 static bool icl_ibecc_available(struct pci_dev *pdev) 338 { 339 u32 v; 340 341 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 342 return false; 343 344 return !(CAPID_C_IBECC & v) && 345 (boot_cpu_data.x86_stepping >= 1); 346 } 347 348 static bool tgl_ibecc_available(struct pci_dev *pdev) 349 { 350 u32 v; 351 352 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 353 return false; 354 355 return !(CAPID_E_IBECC & v); 356 } 357 358 static bool mtl_p_ibecc_available(struct pci_dev *pdev) 359 { 360 u32 v; 361 362 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 363 return false; 364 365 return !(CAPID_E_IBECC_BIT18 & v); 366 } 367 368 static bool mtl_ps_ibecc_available(struct pci_dev *pdev) 369 { 370 #define MCHBAR_MEMSS_IBECCDIS 0x13c00 371 void __iomem *window; 372 u64 mchbar; 373 u32 val; 374 375 if (get_mchbar(pdev, &mchbar)) 376 return false; 377 378 window = ioremap(mchbar, MCHBAR_SIZE * 2); 379 if (!window) { 380 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 381 return false; 382 } 383 384 val = readl(window + MCHBAR_MEMSS_IBECCDIS); 385 iounmap(window); 386 387 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ 388 return !GET_BITFIELD(val, 6, 6); 389 } 390 391 static u64 mem_addr_to_sys_addr(u64 maddr) 392 { 393 if (maddr < igen6_tolud) 394 return maddr; 395 396 if (igen6_tom <= _4GB) 397 return maddr - igen6_tolud + _4GB; 398 399 if (maddr < _4GB) 400 return maddr - igen6_tolud + igen6_tom; 401 402 return maddr; 403 } 404 405 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) 406 { 407 u64 hash_addr = addr & mask, hash = hash_init; 408 u64 intlv = (addr >> intlv_bit) & 1; 409 int i; 410 411 for (i = 6; i < 20; i++) 412 hash ^= (hash_addr >> i) & 1; 413 414 return hash ^ intlv; 415 } 416 417 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) 418 { 419 u64 maddr, hash, mask, ms_s_size; 420 int intlv_bit; 421 u32 ms_hash; 422 423 ms_s_size = igen6_pvt->ms_s_size; 424 if (eaddr >= ms_s_size) 425 return eaddr + ms_s_size; 426 427 ms_hash = igen6_pvt->ms_hash; 428 429 mask = MEM_SLICE_HASH_MASK(ms_hash); 430 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; 431 432 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | 433 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 434 435 hash = mem_slice_hash(maddr, mask, mc, intlv_bit); 436 437 return maddr | (hash << intlv_bit); 438 } 439 440 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) 441 { 442 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); 443 444 return mem_addr_to_sys_addr(maddr); 445 } 446 447 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) 448 { 449 return eaddr; 450 } 451 452 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) 453 { 454 return mem_addr_to_sys_addr(eaddr); 455 } 456 457 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) 458 { 459 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; 460 struct igen6_imc *imc = &igen6_pvt->imc[mc]; 461 int intlv_bit; 462 u32 mc_hash; 463 464 if (eaddr >= 2 * ms_s_size) 465 return eaddr - ms_s_size; 466 467 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); 468 469 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; 470 471 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | 472 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 473 474 return imc_addr; 475 } 476 477 static u64 rpl_p_err_addr(u64 ecclog) 478 { 479 return ECC_ERROR_LOG_ADDR45(ecclog); 480 } 481 482 static const struct res_config ehl_cfg = { 483 .num_imc = 1, 484 .imc_base = 0x5000, 485 .ibecc_base = 0xdc00, 486 .ibecc_available = ehl_ibecc_available, 487 .ibecc_error_log_offset = 0x170, 488 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 489 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 490 }; 491 492 static const struct res_config icl_cfg = { 493 .num_imc = 1, 494 .imc_base = 0x5000, 495 .ibecc_base = 0xd800, 496 .ibecc_error_log_offset = 0x170, 497 .ibecc_available = icl_ibecc_available, 498 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 499 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 500 }; 501 502 static const struct res_config tgl_cfg = { 503 .machine_check = true, 504 .num_imc = 2, 505 .imc_base = 0x5000, 506 .cmf_base = 0x11000, 507 .cmf_size = 0x800, 508 .ms_hash_offset = 0xac, 509 .ibecc_base = 0xd400, 510 .ibecc_error_log_offset = 0x170, 511 .ibecc_available = tgl_ibecc_available, 512 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, 513 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, 514 }; 515 516 static const struct res_config adl_cfg = { 517 .machine_check = true, 518 .num_imc = 2, 519 .imc_base = 0xd800, 520 .ibecc_base = 0xd400, 521 .ibecc_error_log_offset = 0x68, 522 .ibecc_available = tgl_ibecc_available, 523 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 524 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 525 }; 526 527 static const struct res_config adl_n_cfg = { 528 .machine_check = true, 529 .num_imc = 1, 530 .imc_base = 0xd800, 531 .ibecc_base = 0xd400, 532 .ibecc_error_log_offset = 0x68, 533 .ibecc_available = tgl_ibecc_available, 534 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 535 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 536 }; 537 538 static const struct res_config rpl_p_cfg = { 539 .machine_check = true, 540 .num_imc = 2, 541 .imc_base = 0xd800, 542 .ibecc_base = 0xd400, 543 .ibecc_error_log_offset = 0x68, 544 .ibecc_available = tgl_ibecc_available, 545 .err_addr = rpl_p_err_addr, 546 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 547 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 548 }; 549 550 static const struct res_config mtl_ps_cfg = { 551 .machine_check = true, 552 .num_imc = 2, 553 .imc_base = 0xd800, 554 .ibecc_base = 0xd400, 555 .ibecc_error_log_offset = 0x170, 556 .ibecc_available = mtl_ps_ibecc_available, 557 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 558 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 559 }; 560 561 static const struct res_config mtl_p_cfg = { 562 .machine_check = true, 563 .num_imc = 2, 564 .imc_base = 0xd800, 565 .ibecc_base = 0xd400, 566 .ibecc_error_log_offset = 0x170, 567 .ibecc_available = mtl_p_ibecc_available, 568 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 569 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 570 }; 571 572 static const struct pci_device_id igen6_pci_tbl[] = { 573 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, 574 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, 575 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, 576 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, 577 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, 578 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, 579 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, 580 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, 581 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, 582 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, 583 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, 584 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, 585 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, 586 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, 587 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, 588 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, 589 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, 590 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, 591 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, 592 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, 593 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, 594 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, 595 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, 596 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, 597 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, 598 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, 599 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, 600 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, 601 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, 602 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, 603 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, 604 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, 605 { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, 606 { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, 607 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, 608 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, 609 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, 610 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, 611 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, 612 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, 613 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, 614 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, 615 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, 616 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 617 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 618 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 619 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 620 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 621 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 622 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 623 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 624 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 625 { }, 626 }; 627 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); 628 629 static enum dev_type get_width(int dimm_l, u32 mad_dimm) 630 { 631 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : 632 MAD_DIMM_CH_DSW(mad_dimm); 633 634 switch (w) { 635 case 0: 636 return DEV_X8; 637 case 1: 638 return DEV_X16; 639 case 2: 640 return DEV_X32; 641 default: 642 return DEV_UNKNOWN; 643 } 644 } 645 646 static enum mem_type get_memory_type(u32 mad_inter) 647 { 648 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); 649 650 switch (t) { 651 case 0: 652 return MEM_DDR4; 653 case 1: 654 return MEM_DDR3; 655 case 2: 656 return MEM_LPDDR3; 657 case 3: 658 return MEM_LPDDR4; 659 case 4: 660 return MEM_WIO2; 661 default: 662 return MEM_UNKNOWN; 663 } 664 } 665 666 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) 667 { 668 u64 hash_addr = addr & mask, hash = 0; 669 u64 intlv = (addr >> intlv_bit) & 1; 670 int i; 671 672 for (i = 6; i < 20; i++) 673 hash ^= (hash_addr >> i) & 1; 674 675 return (int)hash ^ intlv; 676 } 677 678 static u64 decode_channel_addr(u64 addr, int intlv_bit) 679 { 680 u64 channel_addr; 681 682 /* Remove the interleave bit and shift upper part down to fill gap */ 683 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; 684 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); 685 686 return channel_addr; 687 } 688 689 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, 690 int *idx, u64 *sub_addr) 691 { 692 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; 693 694 if (addr > 2 * s_size) { 695 *sub_addr = addr - s_size; 696 *idx = l_map; 697 return; 698 } 699 700 if (CHANNEL_HASH_MODE(hash)) { 701 *sub_addr = decode_channel_addr(addr, intlv_bit); 702 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); 703 } else { 704 *sub_addr = decode_channel_addr(addr, 6); 705 *idx = GET_BITFIELD(addr, 6, 6); 706 } 707 } 708 709 static int igen6_decode(struct decoded_addr *res) 710 { 711 struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; 712 u64 addr = res->imc_addr, sub_addr, s_size; 713 int idx, l_map; 714 u32 hash; 715 716 if (addr >= igen6_tom) { 717 edac_dbg(0, "Address 0x%llx out of range\n", addr); 718 return -EINVAL; 719 } 720 721 /* Decode channel */ 722 hash = readl(imc->window + CHANNEL_HASH_OFFSET); 723 s_size = imc->ch_s_size; 724 l_map = imc->ch_l_map; 725 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); 726 res->channel_idx = idx; 727 res->channel_addr = sub_addr; 728 729 /* Decode sub-channel/DIMM */ 730 hash = readl(imc->window + CHANNEL_EHASH_OFFSET); 731 s_size = imc->dimm_s_size[idx]; 732 l_map = imc->dimm_l_map[idx]; 733 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); 734 res->sub_channel_idx = idx; 735 res->sub_channel_addr = sub_addr; 736 737 return 0; 738 } 739 740 static void igen6_output_error(struct decoded_addr *res, 741 struct mem_ctl_info *mci, u64 ecclog) 742 { 743 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? 744 HW_EVENT_ERR_UNCORRECTED : 745 HW_EVENT_ERR_CORRECTED; 746 747 edac_mc_handle_error(type, mci, 1, 748 res->sys_addr >> PAGE_SHIFT, 749 res->sys_addr & ~PAGE_MASK, 750 ECC_ERROR_LOG_SYND(ecclog), 751 res->channel_idx, res->sub_channel_idx, 752 -1, "", ""); 753 } 754 755 static struct gen_pool *ecclog_gen_pool_create(void) 756 { 757 struct gen_pool *pool; 758 759 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); 760 if (!pool) 761 return NULL; 762 763 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { 764 gen_pool_destroy(pool); 765 return NULL; 766 } 767 768 return pool; 769 } 770 771 static int ecclog_gen_pool_add(int mc, u64 ecclog) 772 { 773 struct ecclog_node *node; 774 775 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); 776 if (!node) 777 return -ENOMEM; 778 779 node->mc = mc; 780 node->ecclog = ecclog; 781 llist_add(&node->llnode, &ecclog_llist); 782 783 return 0; 784 } 785 786 /* 787 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI 788 * configuration space status register ERRSTS can indicate whether a 789 * correctable error or an uncorrectable error occurred. We only use the 790 * ECC_ERROR_LOG register to check error type, but need to clear both 791 * registers to enable future error events. 792 */ 793 static u64 ecclog_read_and_clear(struct igen6_imc *imc) 794 { 795 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); 796 797 /* 798 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain 799 * the invalid value ~0. This will result in a flood of invalid 800 * error reports in polling mode. Skip it. 801 */ 802 if (ecclog == ~0) 803 return 0; 804 805 /* Neither a CE nor a UE. Skip it.*/ 806 if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) 807 return 0; 808 809 /* Clear CE/UE bits by writing 1s */ 810 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); 811 812 return ecclog; 813 } 814 815 static void errsts_clear(struct igen6_imc *imc) 816 { 817 u16 errsts; 818 819 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { 820 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); 821 return; 822 } 823 824 /* Clear CE/UE bits by writing 1s */ 825 if (errsts & (ERRSTS_CE | ERRSTS_UE)) 826 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); 827 } 828 829 static int errcmd_enable_error_reporting(bool enable) 830 { 831 struct igen6_imc *imc = &igen6_pvt->imc[0]; 832 u16 errcmd; 833 int rc; 834 835 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); 836 if (rc) 837 return pcibios_err_to_errno(rc); 838 839 if (enable) 840 errcmd |= ERRCMD_CE | ERRSTS_UE; 841 else 842 errcmd &= ~(ERRCMD_CE | ERRSTS_UE); 843 844 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); 845 if (rc) 846 return pcibios_err_to_errno(rc); 847 848 return 0; 849 } 850 851 static int ecclog_handler(void) 852 { 853 struct igen6_imc *imc; 854 int i, n = 0; 855 u64 ecclog; 856 857 for (i = 0; i < res_cfg->num_imc; i++) { 858 imc = &igen6_pvt->imc[i]; 859 860 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 861 862 ecclog = ecclog_read_and_clear(imc); 863 if (!ecclog) 864 continue; 865 866 if (!ecclog_gen_pool_add(i, ecclog)) 867 irq_work_queue(&ecclog_irq_work); 868 869 n++; 870 } 871 872 return n; 873 } 874 875 static void ecclog_work_cb(struct work_struct *work) 876 { 877 struct ecclog_node *node, *tmp; 878 struct mem_ctl_info *mci; 879 struct llist_node *head; 880 struct decoded_addr res; 881 u64 eaddr; 882 883 head = llist_del_all(&ecclog_llist); 884 if (!head) 885 return; 886 887 llist_for_each_entry_safe(node, tmp, head, llnode) { 888 memset(&res, 0, sizeof(res)); 889 if (res_cfg->err_addr) 890 eaddr = res_cfg->err_addr(node->ecclog); 891 else 892 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << 893 ECC_ERROR_LOG_ADDR_SHIFT; 894 res.mc = node->mc; 895 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); 896 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); 897 898 mci = igen6_pvt->imc[res.mc].mci; 899 900 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); 901 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); 902 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); 903 904 if (!igen6_decode(&res)) 905 igen6_output_error(&res, mci, node->ecclog); 906 907 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); 908 } 909 } 910 911 static void ecclog_irq_work_cb(struct irq_work *irq_work) 912 { 913 int i; 914 915 for (i = 0; i < res_cfg->num_imc; i++) 916 errsts_clear(&igen6_pvt->imc[i]); 917 918 if (!llist_empty(&ecclog_llist)) 919 schedule_work(&ecclog_work); 920 } 921 922 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) 923 { 924 unsigned char reason; 925 926 if (!ecclog_handler()) 927 return NMI_DONE; 928 929 /* 930 * Both In-Band ECC correctable error and uncorrectable error are 931 * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) 932 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to 933 * re-enable the SERR# NMI after NMI handling. So clear this bit here 934 * to re-enable SERR# NMI for receiving future In-Band ECC errors. 935 */ 936 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; 937 reason |= NMI_REASON_CLEAR_SERR; 938 outb(reason, NMI_REASON_PORT); 939 reason &= ~NMI_REASON_CLEAR_SERR; 940 outb(reason, NMI_REASON_PORT); 941 942 return NMI_HANDLED; 943 } 944 945 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, 946 void *data) 947 { 948 struct mce *mce = (struct mce *)data; 949 char *type; 950 951 if (mce->kflags & MCE_HANDLED_CEC) 952 return NOTIFY_DONE; 953 954 /* 955 * Ignore unless this is a memory related error. 956 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, 957 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). 958 */ 959 if ((mce->status & 0xefff) >> 7 != 1) 960 return NOTIFY_DONE; 961 962 if (mce->mcgstatus & MCG_STATUS_MCIP) 963 type = "Exception"; 964 else 965 type = "Event"; 966 967 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", 968 mce->extcpu, type, mce->mcgstatus, 969 mce->bank, mce->status); 970 edac_dbg(0, "TSC 0x%llx\n", mce->tsc); 971 edac_dbg(0, "ADDR 0x%llx\n", mce->addr); 972 edac_dbg(0, "MISC 0x%llx\n", mce->misc); 973 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", 974 mce->cpuvendor, mce->cpuid, mce->time, 975 mce->socketid, mce->apicid); 976 /* 977 * We just use the Machine Check for the memory error notification. 978 * Each memory controller is associated with an IBECC instance. 979 * Directly read and clear the error information(error address and 980 * error type) on all the IBECC instances so that we know on which 981 * memory controller the memory error(s) occurred. 982 */ 983 if (!ecclog_handler()) 984 return NOTIFY_DONE; 985 986 mce->kflags |= MCE_HANDLED_EDAC; 987 988 return NOTIFY_DONE; 989 } 990 991 static struct notifier_block ecclog_mce_dec = { 992 .notifier_call = ecclog_mce_handler, 993 .priority = MCE_PRIO_EDAC, 994 }; 995 996 static bool igen6_check_ecc(struct igen6_imc *imc) 997 { 998 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); 999 1000 return !!(activate & IBECC_ACTIVATE_EN); 1001 } 1002 1003 static int igen6_get_dimm_config(struct mem_ctl_info *mci) 1004 { 1005 struct igen6_imc *imc = mci->pvt_info; 1006 u32 mad_inter, mad_intra, mad_dimm; 1007 int i, j, ndimms, mc = imc->mc; 1008 struct dimm_info *dimm; 1009 enum mem_type mtype; 1010 enum dev_type dtype; 1011 u64 dsize; 1012 bool ecc; 1013 1014 edac_dbg(2, "\n"); 1015 1016 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); 1017 mtype = get_memory_type(mad_inter); 1018 ecc = igen6_check_ecc(imc); 1019 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); 1020 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); 1021 1022 for (i = 0; i < NUM_CHANNELS; i++) { 1023 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); 1024 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); 1025 1026 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); 1027 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); 1028 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); 1029 imc->size += imc->dimm_s_size[i]; 1030 imc->size += imc->dimm_l_size[i]; 1031 ndimms = 0; 1032 1033 for (j = 0; j < NUM_DIMMS; j++) { 1034 dimm = edac_get_dimm(mci, i, j, 0); 1035 1036 if (j ^ imc->dimm_l_map[i]) { 1037 dtype = get_width(0, mad_dimm); 1038 dsize = imc->dimm_s_size[i]; 1039 } else { 1040 dtype = get_width(1, mad_dimm); 1041 dsize = imc->dimm_l_size[i]; 1042 } 1043 1044 if (!dsize) 1045 continue; 1046 1047 dimm->grain = 64; 1048 dimm->mtype = mtype; 1049 dimm->dtype = dtype; 1050 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); 1051 dimm->edac_mode = EDAC_SECDED; 1052 snprintf(dimm->label, sizeof(dimm->label), 1053 "MC#%d_Chan#%d_DIMM#%d", mc, i, j); 1054 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", 1055 mc, i, j, dsize >> 20, dimm->nr_pages); 1056 1057 ndimms++; 1058 } 1059 1060 if (ndimms && !ecc) { 1061 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); 1062 return -ENODEV; 1063 } 1064 } 1065 1066 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); 1067 1068 return 0; 1069 } 1070 1071 #ifdef CONFIG_EDAC_DEBUG 1072 /* Top of upper usable DRAM */ 1073 static u64 igen6_touud; 1074 #define TOUUD_OFFSET 0xa8 1075 1076 static void igen6_reg_dump(struct igen6_imc *imc) 1077 { 1078 int i; 1079 1080 edac_dbg(2, "CHANNEL_HASH : 0x%x\n", 1081 readl(imc->window + CHANNEL_HASH_OFFSET)); 1082 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", 1083 readl(imc->window + CHANNEL_EHASH_OFFSET)); 1084 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", 1085 readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); 1086 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", 1087 readq(imc->window + ECC_ERROR_LOG_OFFSET)); 1088 1089 for (i = 0; i < NUM_CHANNELS; i++) { 1090 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, 1091 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); 1092 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, 1093 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); 1094 } 1095 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); 1096 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); 1097 edac_dbg(2, "TOM : 0x%llx", igen6_tom); 1098 } 1099 1100 static struct dentry *igen6_test; 1101 1102 static int debugfs_u64_set(void *data, u64 val) 1103 { 1104 u64 ecclog; 1105 1106 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { 1107 edac_dbg(0, "Address 0x%llx out of range\n", val); 1108 return 0; 1109 } 1110 1111 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 1112 1113 val >>= ECC_ERROR_LOG_ADDR_SHIFT; 1114 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; 1115 1116 if (!ecclog_gen_pool_add(0, ecclog)) 1117 irq_work_queue(&ecclog_irq_work); 1118 1119 return 0; 1120 } 1121 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 1122 1123 static void igen6_debug_setup(void) 1124 { 1125 igen6_test = edac_debugfs_create_dir("igen6_test"); 1126 if (!igen6_test) 1127 return; 1128 1129 if (!edac_debugfs_create_file("addr", 0200, igen6_test, 1130 NULL, &fops_u64_wo)) { 1131 debugfs_remove(igen6_test); 1132 igen6_test = NULL; 1133 } 1134 } 1135 1136 static void igen6_debug_teardown(void) 1137 { 1138 debugfs_remove_recursive(igen6_test); 1139 } 1140 #else 1141 static void igen6_reg_dump(struct igen6_imc *imc) {} 1142 static void igen6_debug_setup(void) {} 1143 static void igen6_debug_teardown(void) {} 1144 #endif 1145 1146 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) 1147 { 1148 union { 1149 u64 v; 1150 struct { 1151 u32 v_lo; 1152 u32 v_hi; 1153 }; 1154 } u; 1155 1156 edac_dbg(2, "\n"); 1157 1158 if (!res_cfg->ibecc_available(pdev)) { 1159 edac_dbg(2, "No In-Band ECC IP\n"); 1160 goto fail; 1161 } 1162 1163 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { 1164 igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); 1165 goto fail; 1166 } 1167 1168 igen6_tolud &= GENMASK(31, 20); 1169 1170 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { 1171 igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); 1172 goto fail; 1173 } 1174 1175 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { 1176 igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); 1177 goto fail; 1178 } 1179 1180 igen6_tom = u.v & GENMASK_ULL(38, 20); 1181 1182 if (get_mchbar(pdev, mchbar)) 1183 goto fail; 1184 1185 #ifdef CONFIG_EDAC_DEBUG 1186 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) 1187 edac_dbg(2, "Failed to read lower TOUUD\n"); 1188 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) 1189 edac_dbg(2, "Failed to read upper TOUUD\n"); 1190 else 1191 igen6_touud = u.v & GENMASK_ULL(38, 20); 1192 #endif 1193 1194 return 0; 1195 fail: 1196 return -ENODEV; 1197 } 1198 1199 static void igen6_check(struct mem_ctl_info *mci) 1200 { 1201 struct igen6_imc *imc = mci->pvt_info; 1202 u64 ecclog; 1203 1204 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 1205 ecclog = ecclog_read_and_clear(imc); 1206 if (!ecclog) 1207 return; 1208 1209 if (!ecclog_gen_pool_add(imc->mc, ecclog)) 1210 irq_work_queue(&ecclog_irq_work); 1211 } 1212 1213 /* Check whether the memory controller is absent. */ 1214 static bool igen6_imc_absent(void __iomem *window) 1215 { 1216 return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; 1217 } 1218 1219 static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) 1220 { 1221 struct edac_mc_layer layers[2]; 1222 struct mem_ctl_info *mci; 1223 struct igen6_imc *imc; 1224 int rc; 1225 1226 edac_dbg(2, "\n"); 1227 1228 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1229 layers[0].size = NUM_CHANNELS; 1230 layers[0].is_virt_csrow = false; 1231 layers[1].type = EDAC_MC_LAYER_SLOT; 1232 layers[1].size = NUM_DIMMS; 1233 layers[1].is_virt_csrow = true; 1234 1235 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); 1236 if (!mci) { 1237 rc = -ENOMEM; 1238 goto fail; 1239 } 1240 1241 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); 1242 if (!mci->ctl_name) { 1243 rc = -ENOMEM; 1244 goto fail2; 1245 } 1246 1247 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; 1248 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 1249 mci->edac_cap = EDAC_FLAG_SECDED; 1250 mci->mod_name = EDAC_MOD_STR; 1251 mci->dev_name = pci_name(pdev); 1252 if (edac_op_state == EDAC_OPSTATE_POLL) 1253 mci->edac_check = igen6_check; 1254 mci->pvt_info = &igen6_pvt->imc[mc]; 1255 1256 imc = mci->pvt_info; 1257 device_initialize(&imc->dev); 1258 /* 1259 * EDAC core uses mci->pdev(pointer of structure device) as 1260 * memory controller ID. The client SoCs attach one or more 1261 * memory controllers to single pci_dev (single pci_dev->dev 1262 * can be for multiple memory controllers). 1263 * 1264 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev 1265 * for the first memory controller and assign a unique imc->dev 1266 * to mci->pdev for each non-first memory controller. 1267 */ 1268 mci->pdev = mc ? &imc->dev : &pdev->dev; 1269 imc->mc = mc; 1270 imc->pdev = pdev; 1271 imc->window = window; 1272 1273 igen6_reg_dump(imc); 1274 1275 rc = igen6_get_dimm_config(mci); 1276 if (rc) 1277 goto fail3; 1278 1279 rc = edac_mc_add_mc(mci); 1280 if (rc) { 1281 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); 1282 goto fail3; 1283 } 1284 1285 imc->mci = mci; 1286 return 0; 1287 fail3: 1288 mci->pvt_info = NULL; 1289 kfree(mci->ctl_name); 1290 fail2: 1291 edac_mc_free(mci); 1292 fail: 1293 return rc; 1294 } 1295 1296 static void igen6_unregister_mcis(void) 1297 { 1298 struct mem_ctl_info *mci; 1299 struct igen6_imc *imc; 1300 int i; 1301 1302 edac_dbg(2, "\n"); 1303 1304 for (i = 0; i < res_cfg->num_imc; i++) { 1305 imc = &igen6_pvt->imc[i]; 1306 mci = imc->mci; 1307 if (!mci) 1308 continue; 1309 1310 edac_mc_del_mc(mci->pdev); 1311 kfree(mci->ctl_name); 1312 mci->pvt_info = NULL; 1313 edac_mc_free(mci); 1314 iounmap(imc->window); 1315 } 1316 } 1317 1318 static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) 1319 { 1320 void __iomem *window; 1321 int lmc, pmc, rc; 1322 u64 base; 1323 1324 for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { 1325 base = mchbar + pmc * MCHBAR_SIZE; 1326 window = ioremap(base, MCHBAR_SIZE); 1327 if (!window) { 1328 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); 1329 rc = -ENOMEM; 1330 goto out_unregister_mcis; 1331 } 1332 1333 if (igen6_imc_absent(window)) { 1334 iounmap(window); 1335 edac_dbg(2, "Skip absent mc%d\n", pmc); 1336 continue; 1337 } 1338 1339 rc = igen6_register_mci(lmc, window, pdev); 1340 if (rc) 1341 goto out_iounmap; 1342 1343 /* Done, if all present MCs are detected and registered. */ 1344 if (++lmc >= res_cfg->num_imc) 1345 break; 1346 } 1347 1348 if (!lmc) { 1349 igen6_printk(KERN_ERR, "No mc found.\n"); 1350 return -ENODEV; 1351 } 1352 1353 if (lmc < res_cfg->num_imc) 1354 igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.", 1355 res_cfg->num_imc, lmc); 1356 1357 return 0; 1358 1359 out_iounmap: 1360 iounmap(window); 1361 1362 out_unregister_mcis: 1363 igen6_unregister_mcis(); 1364 1365 return rc; 1366 } 1367 1368 static int igen6_mem_slice_setup(u64 mchbar) 1369 { 1370 struct igen6_imc *imc = &igen6_pvt->imc[0]; 1371 u64 base = mchbar + res_cfg->cmf_base; 1372 u32 offset = res_cfg->ms_hash_offset; 1373 u32 size = res_cfg->cmf_size; 1374 u64 ms_s_size, ms_hash; 1375 void __iomem *cmf; 1376 int ms_l_map; 1377 1378 edac_dbg(2, "\n"); 1379 1380 if (imc[0].size < imc[1].size) { 1381 ms_s_size = imc[0].size; 1382 ms_l_map = 1; 1383 } else { 1384 ms_s_size = imc[1].size; 1385 ms_l_map = 0; 1386 } 1387 1388 igen6_pvt->ms_s_size = ms_s_size; 1389 igen6_pvt->ms_l_map = ms_l_map; 1390 1391 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", 1392 ms_s_size >> 20, ms_l_map); 1393 1394 if (!size) 1395 return 0; 1396 1397 cmf = ioremap(base, size); 1398 if (!cmf) { 1399 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); 1400 return -ENODEV; 1401 } 1402 1403 ms_hash = readq(cmf + offset); 1404 igen6_pvt->ms_hash = ms_hash; 1405 1406 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); 1407 1408 iounmap(cmf); 1409 1410 return 0; 1411 } 1412 1413 static int register_err_handler(void) 1414 { 1415 int rc; 1416 1417 if (res_cfg->machine_check) { 1418 mce_register_decode_chain(&ecclog_mce_dec); 1419 return 0; 1420 } 1421 1422 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, 1423 0, IGEN6_NMI_NAME); 1424 if (rc) { 1425 igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); 1426 return rc; 1427 } 1428 1429 return 0; 1430 } 1431 1432 static void unregister_err_handler(void) 1433 { 1434 if (res_cfg->machine_check) { 1435 mce_unregister_decode_chain(&ecclog_mce_dec); 1436 return; 1437 } 1438 1439 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1440 } 1441 1442 static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) 1443 { 1444 /* 1445 * Quirk: Certain SoCs' error reporting interrupts don't work. 1446 * Force polling mode for them to ensure that memory error 1447 * events can be handled. 1448 */ 1449 if (ent->device == DID_ADL_N_SKU4) { 1450 edac_op_state = EDAC_OPSTATE_POLL; 1451 return; 1452 } 1453 1454 /* Set the mode according to the configuration data. */ 1455 if (cfg->machine_check) 1456 edac_op_state = EDAC_OPSTATE_INT; 1457 else 1458 edac_op_state = EDAC_OPSTATE_NMI; 1459 } 1460 1461 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1462 { 1463 u64 mchbar; 1464 int rc; 1465 1466 edac_dbg(2, "\n"); 1467 1468 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); 1469 if (!igen6_pvt) 1470 return -ENOMEM; 1471 1472 res_cfg = (struct res_config *)ent->driver_data; 1473 1474 rc = igen6_pci_setup(pdev, &mchbar); 1475 if (rc) 1476 goto fail; 1477 1478 opstate_set(res_cfg, ent); 1479 1480 rc = igen6_register_mcis(pdev, mchbar); 1481 if (rc) 1482 goto fail; 1483 1484 if (res_cfg->num_imc > 1) { 1485 rc = igen6_mem_slice_setup(mchbar); 1486 if (rc) 1487 goto fail2; 1488 } 1489 1490 ecclog_pool = ecclog_gen_pool_create(); 1491 if (!ecclog_pool) { 1492 rc = -ENOMEM; 1493 goto fail2; 1494 } 1495 1496 INIT_WORK(&ecclog_work, ecclog_work_cb); 1497 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); 1498 1499 rc = register_err_handler(); 1500 if (rc) 1501 goto fail3; 1502 1503 /* Enable error reporting */ 1504 rc = errcmd_enable_error_reporting(true); 1505 if (rc) { 1506 igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); 1507 goto fail4; 1508 } 1509 1510 /* Check if any pending errors before/during the registration of the error handler */ 1511 ecclog_handler(); 1512 1513 igen6_debug_setup(); 1514 return 0; 1515 fail4: 1516 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1517 fail3: 1518 gen_pool_destroy(ecclog_pool); 1519 fail2: 1520 igen6_unregister_mcis(); 1521 fail: 1522 kfree(igen6_pvt); 1523 return rc; 1524 } 1525 1526 static void igen6_remove(struct pci_dev *pdev) 1527 { 1528 edac_dbg(2, "\n"); 1529 1530 igen6_debug_teardown(); 1531 errcmd_enable_error_reporting(false); 1532 unregister_err_handler(); 1533 irq_work_sync(&ecclog_irq_work); 1534 flush_work(&ecclog_work); 1535 gen_pool_destroy(ecclog_pool); 1536 igen6_unregister_mcis(); 1537 kfree(igen6_pvt); 1538 } 1539 1540 static struct pci_driver igen6_driver = { 1541 .name = EDAC_MOD_STR, 1542 .probe = igen6_probe, 1543 .remove = igen6_remove, 1544 .id_table = igen6_pci_tbl, 1545 }; 1546 1547 static int __init igen6_init(void) 1548 { 1549 const char *owner; 1550 int rc; 1551 1552 edac_dbg(2, "\n"); 1553 1554 if (ghes_get_devices()) 1555 return -EBUSY; 1556 1557 owner = edac_get_owner(); 1558 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 1559 return -EBUSY; 1560 1561 rc = pci_register_driver(&igen6_driver); 1562 if (rc) 1563 return rc; 1564 1565 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); 1566 1567 return 0; 1568 } 1569 1570 static void __exit igen6_exit(void) 1571 { 1572 edac_dbg(2, "\n"); 1573 1574 pci_unregister_driver(&igen6_driver); 1575 } 1576 1577 module_init(igen6_init); 1578 module_exit(igen6_exit); 1579 1580 MODULE_LICENSE("GPL v2"); 1581 MODULE_AUTHOR("Qiuxu Zhuo"); 1582 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); 1583