1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel client SoC with integrated memory controller using IBECC 4 * 5 * Copyright (C) 2020 Intel Corporation 6 * 7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific 8 * regions of the physical memory space. It's used for memory controllers 9 * that don't support the out-of-band ECC which often needs an additional 10 * storage device to each channel for storing ECC data. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/slab.h> 17 #include <linux/irq_work.h> 18 #include <linux/llist.h> 19 #include <linux/genalloc.h> 20 #include <linux/edac.h> 21 #include <linux/bits.h> 22 #include <linux/bitfield.h> 23 #include <linux/io.h> 24 #include <asm/mach_traps.h> 25 #include <asm/nmi.h> 26 #include <asm/mce.h> 27 28 #include "edac_mc.h" 29 #include "edac_module.h" 30 31 #define IGEN6_REVISION "v2.5.1" 32 33 #define EDAC_MOD_STR "igen6_edac" 34 #define IGEN6_NMI_NAME "igen6_ibecc" 35 36 /* Debug macros */ 37 #define igen6_printk(level, fmt, arg...) \ 38 edac_printk(level, "igen6", fmt, ##arg) 39 40 #define igen6_mc_printk(mci, level, fmt, arg...) \ 41 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) 42 43 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 44 45 #define NUM_IMC 2 /* Max memory controllers */ 46 #define NUM_CHANNELS 2 /* Max channels */ 47 #define NUM_DIMMS 2 /* Max DIMMs per channel */ 48 49 #define _4GB BIT_ULL(32) 50 51 /* Size of physical memory */ 52 #define TOM_OFFSET 0xa0 53 /* Top of low usable DRAM */ 54 #define TOLUD_OFFSET 0xbc 55 /* Capability register C */ 56 #define CAPID_C_OFFSET 0xec 57 #define CAPID_C_IBECC BIT(15) 58 59 /* Capability register E */ 60 #define CAPID_E_OFFSET 0xf0 61 #define CAPID_E_IBECC BIT(12) 62 #define CAPID_E_IBECC_BIT18 BIT(18) 63 64 /* Error Status */ 65 #define ERRSTS_OFFSET 0xc8 66 #define ERRSTS_CE BIT_ULL(6) 67 #define ERRSTS_UE BIT_ULL(7) 68 69 /* Error Command */ 70 #define ERRCMD_OFFSET 0xca 71 #define ERRCMD_CE BIT_ULL(6) 72 #define ERRCMD_UE BIT_ULL(7) 73 74 /* IBECC MMIO base address */ 75 #define IBECC_BASE (res_cfg->ibecc_base) 76 #define IBECC_ACTIVATE_OFFSET IBECC_BASE 77 #define IBECC_ACTIVATE_EN BIT(0) 78 79 /* IBECC error log */ 80 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) 81 #define ECC_ERROR_LOG_CE BIT_ULL(62) 82 #define ECC_ERROR_LOG_UE BIT_ULL(63) 83 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) 84 85 /* Host MMIO base address */ 86 #define MCHBAR_OFFSET 0x48 87 #define MCHBAR_EN BIT_ULL(0) 88 #define MCHBAR_SIZE 0x10000 89 90 /* Parameters for the channel decode stage */ 91 #define IMC_BASE (res_cfg->imc_base) 92 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE 93 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) 94 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) 95 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) 96 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) 97 98 /* Parameters for DRAM decode stage */ 99 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) 100 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) 101 102 /* DIMM characteristics */ 103 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) 104 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) 105 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) 106 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) 107 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) 108 109 /* Hash for memory controller selection */ 110 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) 111 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) 112 113 /* Hash for channel selection */ 114 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) 115 /* Hash for enhanced channel selection */ 116 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) 117 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 118 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 119 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) 120 121 /* Parameters for memory slice decode stage */ 122 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 123 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 124 125 static struct res_config { 126 bool machine_check; 127 /* The number of present memory controllers. */ 128 int num_imc; 129 /* Host MMIO configuration */ 130 u64 reg_mchbar_mask; 131 /* Top of memory */ 132 u64 reg_tom_mask; 133 /* Top of upper usable DRAM */ 134 u64 reg_touud_mask; 135 /* IBECC error log */ 136 u64 reg_eccerrlog_addr_mask; 137 u32 imc_base; 138 u32 cmf_base; 139 u32 cmf_size; 140 u32 ms_hash_offset; 141 u32 ibecc_base; 142 u32 ibecc_error_log_offset; 143 bool (*ibecc_available)(struct pci_dev *pdev); 144 /* Extract error address logged in IBECC */ 145 u64 (*err_addr)(u64 ecclog); 146 /* Convert error address logged in IBECC to system physical address */ 147 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); 148 /* Convert error address logged in IBECC to integrated memory controller address */ 149 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); 150 } *res_cfg; 151 152 struct igen6_imc { 153 int mc; 154 struct mem_ctl_info *mci; 155 struct pci_dev *pdev; 156 struct device dev; 157 void __iomem *window; 158 u64 size; 159 u64 ch_s_size; 160 int ch_l_map; 161 u64 dimm_s_size[NUM_CHANNELS]; 162 u64 dimm_l_size[NUM_CHANNELS]; 163 int dimm_l_map[NUM_CHANNELS]; 164 }; 165 166 static struct igen6_pvt { 167 struct igen6_imc imc[NUM_IMC]; 168 u64 ms_hash; 169 u64 ms_s_size; 170 int ms_l_map; 171 } *igen6_pvt; 172 173 /* The top of low usable DRAM */ 174 static u32 igen6_tolud; 175 /* The size of physical memory */ 176 static u64 igen6_tom; 177 178 struct decoded_addr { 179 int mc; 180 u64 imc_addr; 181 u64 sys_addr; 182 int channel_idx; 183 u64 channel_addr; 184 int sub_channel_idx; 185 u64 sub_channel_addr; 186 }; 187 188 struct ecclog_node { 189 struct llist_node llnode; 190 int mc; 191 u64 ecclog; 192 }; 193 194 /* 195 * In the NMI handler, the driver uses the lock-less memory allocator 196 * to allocate memory to store the IBECC error logs and links the logs 197 * to the lock-less list. Delay printk() and the work of error reporting 198 * to EDAC core in a worker. 199 */ 200 #define ECCLOG_POOL_SIZE PAGE_SIZE 201 static LLIST_HEAD(ecclog_llist); 202 static struct gen_pool *ecclog_pool; 203 static char ecclog_buf[ECCLOG_POOL_SIZE]; 204 static struct irq_work ecclog_irq_work; 205 static struct work_struct ecclog_work; 206 207 /* Compute die IDs for Elkhart Lake with IBECC */ 208 #define DID_EHL_SKU5 0x4514 209 #define DID_EHL_SKU6 0x4528 210 #define DID_EHL_SKU7 0x452a 211 #define DID_EHL_SKU8 0x4516 212 #define DID_EHL_SKU9 0x452c 213 #define DID_EHL_SKU10 0x452e 214 #define DID_EHL_SKU11 0x4532 215 #define DID_EHL_SKU12 0x4518 216 #define DID_EHL_SKU13 0x451a 217 #define DID_EHL_SKU14 0x4534 218 #define DID_EHL_SKU15 0x4536 219 220 /* Compute die IDs for ICL-NNPI with IBECC */ 221 #define DID_ICL_SKU8 0x4581 222 #define DID_ICL_SKU10 0x4585 223 #define DID_ICL_SKU11 0x4589 224 #define DID_ICL_SKU12 0x458d 225 226 /* Compute die IDs for Tiger Lake with IBECC */ 227 #define DID_TGL_SKU 0x9a14 228 229 /* Compute die IDs for Alder Lake with IBECC */ 230 #define DID_ADL_SKU1 0x4601 231 #define DID_ADL_SKU2 0x4602 232 #define DID_ADL_SKU3 0x4621 233 #define DID_ADL_SKU4 0x4641 234 235 /* Compute die IDs for Alder Lake-N with IBECC */ 236 #define DID_ADL_N_SKU1 0x4614 237 #define DID_ADL_N_SKU2 0x4617 238 #define DID_ADL_N_SKU3 0x461b 239 #define DID_ADL_N_SKU4 0x461c 240 #define DID_ADL_N_SKU5 0x4673 241 #define DID_ADL_N_SKU6 0x4674 242 #define DID_ADL_N_SKU7 0x4675 243 #define DID_ADL_N_SKU8 0x4677 244 #define DID_ADL_N_SKU9 0x4678 245 #define DID_ADL_N_SKU10 0x4679 246 #define DID_ADL_N_SKU11 0x467c 247 #define DID_ADL_N_SKU12 0x4632 248 249 /* Compute die IDs for Arizona Beach with IBECC */ 250 #define DID_AZB_SKU1 0x4676 251 252 /* Compute did IDs for Amston Lake with IBECC */ 253 #define DID_ASL_SKU1 0x464a 254 #define DID_ASL_SKU2 0x4646 255 #define DID_ASL_SKU3 0x4652 256 257 /* Compute die IDs for Raptor Lake-P with IBECC */ 258 #define DID_RPL_P_SKU1 0xa706 259 #define DID_RPL_P_SKU2 0xa707 260 #define DID_RPL_P_SKU3 0xa708 261 #define DID_RPL_P_SKU4 0xa716 262 #define DID_RPL_P_SKU5 0xa718 263 264 /* Compute die IDs for Meteor Lake-PS with IBECC */ 265 #define DID_MTL_PS_SKU1 0x7d21 266 #define DID_MTL_PS_SKU2 0x7d22 267 #define DID_MTL_PS_SKU3 0x7d23 268 #define DID_MTL_PS_SKU4 0x7d24 269 270 /* Compute die IDs for Meteor Lake-P with IBECC */ 271 #define DID_MTL_P_SKU1 0x7d01 272 #define DID_MTL_P_SKU2 0x7d02 273 #define DID_MTL_P_SKU3 0x7d14 274 275 /* Compute die IDs for Arrow Lake-UH with IBECC */ 276 #define DID_ARL_UH_SKU1 0x7d06 277 #define DID_ARL_UH_SKU2 0x7d20 278 #define DID_ARL_UH_SKU3 0x7d30 279 280 /* Compute die IDs for Panther Lake-H with IBECC */ 281 #define DID_PTL_H_SKU1 0xb000 282 #define DID_PTL_H_SKU2 0xb001 283 #define DID_PTL_H_SKU3 0xb002 284 #define DID_PTL_H_SKU4 0xb003 285 #define DID_PTL_H_SKU5 0xb004 286 #define DID_PTL_H_SKU6 0xb005 287 #define DID_PTL_H_SKU7 0xb008 288 #define DID_PTL_H_SKU8 0xb011 289 #define DID_PTL_H_SKU9 0xb014 290 #define DID_PTL_H_SKU10 0xb015 291 #define DID_PTL_H_SKU11 0xb028 292 #define DID_PTL_H_SKU12 0xb029 293 #define DID_PTL_H_SKU13 0xb02a 294 295 /* Compute die IDs for Wildcat Lake with IBECC */ 296 #define DID_WCL_SKU1 0xfd00 297 298 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) 299 { 300 union { 301 u64 v; 302 struct { 303 u32 v_lo; 304 u32 v_hi; 305 }; 306 } u; 307 308 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { 309 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); 310 return -ENODEV; 311 } 312 313 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { 314 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); 315 return -ENODEV; 316 } 317 318 if (!(u.v & MCHBAR_EN)) { 319 igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); 320 return -ENODEV; 321 } 322 323 *mchbar = u.v & res_cfg->reg_mchbar_mask; 324 edac_dbg(2, "MCHBAR 0x%llx (reg 0x%llx)\n", *mchbar, u.v); 325 326 return 0; 327 } 328 329 static bool ehl_ibecc_available(struct pci_dev *pdev) 330 { 331 u32 v; 332 333 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 334 return false; 335 336 return !!(CAPID_C_IBECC & v); 337 } 338 339 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) 340 { 341 return eaddr; 342 } 343 344 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) 345 { 346 if (eaddr < igen6_tolud) 347 return eaddr; 348 349 if (igen6_tom <= _4GB) 350 return eaddr + igen6_tolud - _4GB; 351 352 if (eaddr >= igen6_tom) 353 return eaddr + igen6_tolud - igen6_tom; 354 355 return eaddr; 356 } 357 358 static bool icl_ibecc_available(struct pci_dev *pdev) 359 { 360 u32 v; 361 362 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 363 return false; 364 365 return !(CAPID_C_IBECC & v) && 366 (boot_cpu_data.x86_stepping >= 1); 367 } 368 369 static bool tgl_ibecc_available(struct pci_dev *pdev) 370 { 371 u32 v; 372 373 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 374 return false; 375 376 return !(CAPID_E_IBECC & v); 377 } 378 379 static bool mtl_p_ibecc_available(struct pci_dev *pdev) 380 { 381 u32 v; 382 383 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 384 return false; 385 386 return !(CAPID_E_IBECC_BIT18 & v); 387 } 388 389 static bool mtl_ps_ibecc_available(struct pci_dev *pdev) 390 { 391 #define MCHBAR_MEMSS_IBECCDIS 0x13c00 392 void __iomem *window; 393 u64 mchbar; 394 u32 val; 395 396 if (get_mchbar(pdev, &mchbar)) 397 return false; 398 399 window = ioremap(mchbar, MCHBAR_SIZE * 2); 400 if (!window) { 401 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 402 return false; 403 } 404 405 val = readl(window + MCHBAR_MEMSS_IBECCDIS); 406 iounmap(window); 407 408 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ 409 return !GET_BITFIELD(val, 6, 6); 410 } 411 412 static u64 mem_addr_to_sys_addr(u64 maddr) 413 { 414 if (maddr < igen6_tolud) 415 return maddr; 416 417 if (igen6_tom <= _4GB) 418 return maddr - igen6_tolud + _4GB; 419 420 if (maddr < _4GB) 421 return maddr - igen6_tolud + igen6_tom; 422 423 return maddr; 424 } 425 426 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) 427 { 428 u64 hash_addr = addr & mask, hash = hash_init; 429 u64 intlv = (addr >> intlv_bit) & 1; 430 int i; 431 432 for (i = 6; i < 20; i++) 433 hash ^= (hash_addr >> i) & 1; 434 435 return hash ^ intlv; 436 } 437 438 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) 439 { 440 u64 maddr, hash, mask, ms_s_size; 441 int intlv_bit; 442 u32 ms_hash; 443 444 ms_s_size = igen6_pvt->ms_s_size; 445 if (eaddr >= ms_s_size) 446 return eaddr + ms_s_size; 447 448 ms_hash = igen6_pvt->ms_hash; 449 450 mask = MEM_SLICE_HASH_MASK(ms_hash); 451 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; 452 453 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | 454 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 455 456 hash = mem_slice_hash(maddr, mask, mc, intlv_bit); 457 458 return maddr | (hash << intlv_bit); 459 } 460 461 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) 462 { 463 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); 464 465 return mem_addr_to_sys_addr(maddr); 466 } 467 468 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) 469 { 470 return eaddr; 471 } 472 473 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) 474 { 475 return mem_addr_to_sys_addr(eaddr); 476 } 477 478 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) 479 { 480 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; 481 struct igen6_imc *imc = &igen6_pvt->imc[mc]; 482 int intlv_bit; 483 u32 mc_hash; 484 485 if (eaddr >= 2 * ms_s_size) 486 return eaddr - ms_s_size; 487 488 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); 489 490 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; 491 492 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | 493 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 494 495 return imc_addr; 496 } 497 498 static u64 rpl_p_err_addr(u64 ecclog) 499 { 500 return field_get(res_cfg->reg_eccerrlog_addr_mask, ecclog); 501 } 502 503 static struct res_config ehl_cfg = { 504 .num_imc = 1, 505 .reg_mchbar_mask = GENMASK_ULL(38, 16), 506 .reg_tom_mask = GENMASK_ULL(38, 20), 507 .reg_touud_mask = GENMASK_ULL(38, 20), 508 .reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5), 509 .imc_base = 0x5000, 510 .ibecc_base = 0xdc00, 511 .ibecc_available = ehl_ibecc_available, 512 .ibecc_error_log_offset = 0x170, 513 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 514 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 515 }; 516 517 static struct res_config icl_cfg = { 518 .num_imc = 1, 519 .reg_mchbar_mask = GENMASK_ULL(38, 16), 520 .reg_tom_mask = GENMASK_ULL(38, 20), 521 .reg_touud_mask = GENMASK_ULL(38, 20), 522 .reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5), 523 .imc_base = 0x5000, 524 .ibecc_base = 0xd800, 525 .ibecc_error_log_offset = 0x170, 526 .ibecc_available = icl_ibecc_available, 527 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 528 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 529 }; 530 531 static struct res_config tgl_cfg = { 532 .machine_check = true, 533 .num_imc = 2, 534 .reg_mchbar_mask = GENMASK_ULL(38, 17), 535 .reg_tom_mask = GENMASK_ULL(38, 20), 536 .reg_touud_mask = GENMASK_ULL(38, 20), 537 .reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5), 538 .imc_base = 0x5000, 539 .cmf_base = 0x11000, 540 .cmf_size = 0x800, 541 .ms_hash_offset = 0xac, 542 .ibecc_base = 0xd400, 543 .ibecc_error_log_offset = 0x170, 544 .ibecc_available = tgl_ibecc_available, 545 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, 546 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, 547 }; 548 549 static struct res_config adl_cfg = { 550 .machine_check = true, 551 .num_imc = 2, 552 .reg_mchbar_mask = GENMASK_ULL(41, 17), 553 .reg_tom_mask = GENMASK_ULL(41, 20), 554 .reg_touud_mask = GENMASK_ULL(41, 20), 555 .reg_eccerrlog_addr_mask = GENMASK_ULL(45, 5), 556 .imc_base = 0xd800, 557 .ibecc_base = 0xd400, 558 .ibecc_error_log_offset = 0x68, 559 .ibecc_available = tgl_ibecc_available, 560 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 561 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 562 }; 563 564 static struct res_config adl_n_cfg = { 565 .machine_check = true, 566 .num_imc = 1, 567 .reg_mchbar_mask = GENMASK_ULL(41, 17), 568 .reg_tom_mask = GENMASK_ULL(41, 20), 569 .reg_touud_mask = GENMASK_ULL(41, 20), 570 .reg_eccerrlog_addr_mask = GENMASK_ULL(45, 5), 571 .imc_base = 0xd800, 572 .ibecc_base = 0xd400, 573 .ibecc_error_log_offset = 0x68, 574 .ibecc_available = tgl_ibecc_available, 575 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 576 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 577 }; 578 579 static struct res_config rpl_p_cfg = { 580 .machine_check = true, 581 .num_imc = 2, 582 .reg_mchbar_mask = GENMASK_ULL(41, 17), 583 .reg_tom_mask = GENMASK_ULL(41, 20), 584 .reg_touud_mask = GENMASK_ULL(41, 20), 585 .reg_eccerrlog_addr_mask = GENMASK_ULL(45, 5), 586 .imc_base = 0xd800, 587 .ibecc_base = 0xd400, 588 .ibecc_error_log_offset = 0x68, 589 .ibecc_available = tgl_ibecc_available, 590 .err_addr = rpl_p_err_addr, 591 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 592 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 593 }; 594 595 static struct res_config mtl_ps_cfg = { 596 .machine_check = true, 597 .num_imc = 2, 598 .reg_mchbar_mask = GENMASK_ULL(41, 17), 599 .reg_tom_mask = GENMASK_ULL(41, 20), 600 .reg_touud_mask = GENMASK_ULL(41, 20), 601 .reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5), 602 .imc_base = 0xd800, 603 .ibecc_base = 0xd400, 604 .ibecc_error_log_offset = 0x170, 605 .ibecc_available = mtl_ps_ibecc_available, 606 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 607 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 608 }; 609 610 static struct res_config mtl_p_cfg = { 611 .machine_check = true, 612 .num_imc = 2, 613 .reg_mchbar_mask = GENMASK_ULL(41, 17), 614 .reg_tom_mask = GENMASK_ULL(41, 20), 615 .reg_touud_mask = GENMASK_ULL(41, 20), 616 .reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5), 617 .imc_base = 0xd800, 618 .ibecc_base = 0xd400, 619 .ibecc_error_log_offset = 0x170, 620 .ibecc_available = mtl_p_ibecc_available, 621 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 622 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 623 }; 624 625 static struct res_config wcl_cfg = { 626 .machine_check = true, 627 .num_imc = 1, 628 .reg_mchbar_mask = GENMASK_ULL(41, 17), 629 .reg_tom_mask = GENMASK_ULL(41, 20), 630 .reg_touud_mask = GENMASK_ULL(41, 20), 631 .reg_eccerrlog_addr_mask = GENMASK_ULL(38, 5), 632 .imc_base = 0xd800, 633 .ibecc_base = 0xd400, 634 .ibecc_error_log_offset = 0x170, 635 .ibecc_available = mtl_p_ibecc_available, 636 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 637 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 638 }; 639 640 static struct pci_device_id igen6_pci_tbl[] = { 641 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, 642 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, 643 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, 644 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, 645 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, 646 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, 647 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, 648 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, 649 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, 650 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, 651 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, 652 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, 653 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, 654 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, 655 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, 656 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, 657 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, 658 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, 659 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, 660 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, 661 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, 662 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, 663 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, 664 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, 665 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, 666 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, 667 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, 668 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, 669 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, 670 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, 671 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, 672 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, 673 { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, 674 { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, 675 { PCI_VDEVICE(INTEL, DID_ASL_SKU2), (kernel_ulong_t)&adl_n_cfg }, 676 { PCI_VDEVICE(INTEL, DID_ASL_SKU3), (kernel_ulong_t)&adl_n_cfg }, 677 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, 678 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, 679 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, 680 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, 681 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, 682 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, 683 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, 684 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, 685 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, 686 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 687 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 688 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 689 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 690 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 691 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 692 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 693 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 694 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 695 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU4), (kernel_ulong_t)&mtl_p_cfg }, 696 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU5), (kernel_ulong_t)&mtl_p_cfg }, 697 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU6), (kernel_ulong_t)&mtl_p_cfg }, 698 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU7), (kernel_ulong_t)&mtl_p_cfg }, 699 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU8), (kernel_ulong_t)&mtl_p_cfg }, 700 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU9), (kernel_ulong_t)&mtl_p_cfg }, 701 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU10), (kernel_ulong_t)&mtl_p_cfg }, 702 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU11), (kernel_ulong_t)&mtl_p_cfg }, 703 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU12), (kernel_ulong_t)&mtl_p_cfg }, 704 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU13), (kernel_ulong_t)&mtl_p_cfg }, 705 { PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg }, 706 { }, 707 }; 708 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); 709 710 static enum dev_type get_width(int dimm_l, u32 mad_dimm) 711 { 712 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : 713 MAD_DIMM_CH_DSW(mad_dimm); 714 715 switch (w) { 716 case 0: 717 return DEV_X8; 718 case 1: 719 return DEV_X16; 720 case 2: 721 return DEV_X32; 722 default: 723 return DEV_UNKNOWN; 724 } 725 } 726 727 static enum mem_type get_memory_type(u32 mad_inter) 728 { 729 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); 730 731 switch (t) { 732 case 0: 733 return MEM_DDR4; 734 case 1: 735 return MEM_DDR3; 736 case 2: 737 return MEM_LPDDR3; 738 case 3: 739 return MEM_LPDDR4; 740 case 4: 741 return MEM_WIO2; 742 default: 743 return MEM_UNKNOWN; 744 } 745 } 746 747 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) 748 { 749 u64 hash_addr = addr & mask, hash = 0; 750 u64 intlv = (addr >> intlv_bit) & 1; 751 int i; 752 753 for (i = 6; i < 20; i++) 754 hash ^= (hash_addr >> i) & 1; 755 756 return (int)hash ^ intlv; 757 } 758 759 static u64 decode_channel_addr(u64 addr, int intlv_bit) 760 { 761 u64 channel_addr; 762 763 /* Remove the interleave bit and shift upper part down to fill gap */ 764 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; 765 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); 766 767 return channel_addr; 768 } 769 770 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, 771 int *idx, u64 *sub_addr) 772 { 773 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; 774 775 if (addr > 2 * s_size) { 776 *sub_addr = addr - s_size; 777 *idx = l_map; 778 return; 779 } 780 781 if (CHANNEL_HASH_MODE(hash)) { 782 *sub_addr = decode_channel_addr(addr, intlv_bit); 783 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); 784 } else { 785 *sub_addr = decode_channel_addr(addr, 6); 786 *idx = GET_BITFIELD(addr, 6, 6); 787 } 788 } 789 790 static int igen6_decode(struct decoded_addr *res) 791 { 792 struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; 793 u64 addr = res->imc_addr, sub_addr, s_size; 794 int idx, l_map; 795 u32 hash; 796 797 if (addr >= igen6_tom) { 798 edac_dbg(0, "Address 0x%llx out of range\n", addr); 799 return -EINVAL; 800 } 801 802 /* Decode channel */ 803 hash = readl(imc->window + CHANNEL_HASH_OFFSET); 804 s_size = imc->ch_s_size; 805 l_map = imc->ch_l_map; 806 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); 807 res->channel_idx = idx; 808 res->channel_addr = sub_addr; 809 810 /* Decode sub-channel/DIMM */ 811 hash = readl(imc->window + CHANNEL_EHASH_OFFSET); 812 s_size = imc->dimm_s_size[idx]; 813 l_map = imc->dimm_l_map[idx]; 814 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); 815 res->sub_channel_idx = idx; 816 res->sub_channel_addr = sub_addr; 817 818 return 0; 819 } 820 821 static void igen6_output_error(struct decoded_addr *res, 822 struct mem_ctl_info *mci, u64 ecclog) 823 { 824 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? 825 HW_EVENT_ERR_UNCORRECTED : 826 HW_EVENT_ERR_CORRECTED; 827 828 edac_mc_handle_error(type, mci, 1, 829 res->sys_addr >> PAGE_SHIFT, 830 res->sys_addr & ~PAGE_MASK, 831 ECC_ERROR_LOG_SYND(ecclog), 832 res->channel_idx, res->sub_channel_idx, 833 -1, "", ""); 834 } 835 836 static struct gen_pool *ecclog_gen_pool_create(void) 837 { 838 struct gen_pool *pool; 839 840 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); 841 if (!pool) 842 return NULL; 843 844 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { 845 gen_pool_destroy(pool); 846 return NULL; 847 } 848 849 return pool; 850 } 851 852 static int ecclog_gen_pool_add(int mc, u64 ecclog) 853 { 854 struct ecclog_node *node; 855 856 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); 857 if (!node) 858 return -ENOMEM; 859 860 node->mc = mc; 861 node->ecclog = ecclog; 862 llist_add(&node->llnode, &ecclog_llist); 863 864 return 0; 865 } 866 867 /* 868 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI 869 * configuration space status register ERRSTS can indicate whether a 870 * correctable error or an uncorrectable error occurred. We only use the 871 * ECC_ERROR_LOG register to check error type, but need to clear both 872 * registers to enable future error events. 873 */ 874 static u64 ecclog_read_and_clear(struct igen6_imc *imc) 875 { 876 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); 877 878 /* 879 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain 880 * the invalid value ~0. This will result in a flood of invalid 881 * error reports in polling mode. Skip it. 882 */ 883 if (ecclog == ~0) 884 return 0; 885 886 /* Neither a CE nor a UE. Skip it.*/ 887 if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) 888 return 0; 889 890 /* Clear CE/UE bits by writing 1s */ 891 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); 892 893 return ecclog; 894 } 895 896 static void errsts_clear(struct igen6_imc *imc) 897 { 898 u16 errsts; 899 900 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { 901 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); 902 return; 903 } 904 905 /* Clear CE/UE bits by writing 1s */ 906 if (errsts & (ERRSTS_CE | ERRSTS_UE)) 907 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); 908 } 909 910 static int errcmd_enable_error_reporting(bool enable) 911 { 912 struct igen6_imc *imc = &igen6_pvt->imc[0]; 913 u16 errcmd; 914 int rc; 915 916 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); 917 if (rc) 918 return pcibios_err_to_errno(rc); 919 920 if (enable) 921 errcmd |= ERRCMD_CE | ERRSTS_UE; 922 else 923 errcmd &= ~(ERRCMD_CE | ERRSTS_UE); 924 925 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); 926 if (rc) 927 return pcibios_err_to_errno(rc); 928 929 return 0; 930 } 931 932 static int ecclog_handler(void) 933 { 934 struct igen6_imc *imc; 935 int i, n = 0; 936 u64 ecclog; 937 938 for (i = 0; i < res_cfg->num_imc; i++) { 939 imc = &igen6_pvt->imc[i]; 940 941 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 942 943 ecclog = ecclog_read_and_clear(imc); 944 if (!ecclog) 945 continue; 946 947 if (!ecclog_gen_pool_add(i, ecclog)) 948 irq_work_queue(&ecclog_irq_work); 949 950 n++; 951 } 952 953 return n; 954 } 955 956 static void ecclog_work_cb(struct work_struct *work) 957 { 958 struct ecclog_node *node, *tmp; 959 struct mem_ctl_info *mci; 960 struct llist_node *head; 961 struct decoded_addr res; 962 u64 eaddr; 963 964 head = llist_del_all(&ecclog_llist); 965 if (!head) 966 return; 967 968 llist_for_each_entry_safe(node, tmp, head, llnode) { 969 memset(&res, 0, sizeof(res)); 970 if (res_cfg->err_addr) 971 eaddr = res_cfg->err_addr(node->ecclog); 972 else 973 eaddr = node->ecclog & res_cfg->reg_eccerrlog_addr_mask; 974 975 res.mc = node->mc; 976 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); 977 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); 978 979 mci = igen6_pvt->imc[res.mc].mci; 980 981 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); 982 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); 983 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); 984 985 if (!igen6_decode(&res)) 986 igen6_output_error(&res, mci, node->ecclog); 987 988 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); 989 } 990 } 991 992 static void ecclog_irq_work_cb(struct irq_work *irq_work) 993 { 994 int i; 995 996 for (i = 0; i < res_cfg->num_imc; i++) 997 errsts_clear(&igen6_pvt->imc[i]); 998 999 if (!llist_empty(&ecclog_llist)) 1000 schedule_work(&ecclog_work); 1001 } 1002 1003 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1004 { 1005 unsigned char reason; 1006 1007 if (!ecclog_handler()) 1008 return NMI_DONE; 1009 1010 /* 1011 * Both In-Band ECC correctable error and uncorrectable error are 1012 * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) 1013 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to 1014 * re-enable the SERR# NMI after NMI handling. So clear this bit here 1015 * to re-enable SERR# NMI for receiving future In-Band ECC errors. 1016 */ 1017 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; 1018 reason |= NMI_REASON_CLEAR_SERR; 1019 outb(reason, NMI_REASON_PORT); 1020 reason &= ~NMI_REASON_CLEAR_SERR; 1021 outb(reason, NMI_REASON_PORT); 1022 1023 return NMI_HANDLED; 1024 } 1025 1026 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, 1027 void *data) 1028 { 1029 struct mce *mce = (struct mce *)data; 1030 char *type; 1031 1032 if (mce->kflags & MCE_HANDLED_CEC) 1033 return NOTIFY_DONE; 1034 1035 /* 1036 * Ignore unless this is a memory related error. 1037 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, 1038 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). 1039 */ 1040 if ((mce->status & 0xefff) >> 7 != 1) 1041 return NOTIFY_DONE; 1042 1043 if (mce->mcgstatus & MCG_STATUS_MCIP) 1044 type = "Exception"; 1045 else 1046 type = "Event"; 1047 1048 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", 1049 mce->extcpu, type, mce->mcgstatus, 1050 mce->bank, mce->status); 1051 edac_dbg(0, "TSC 0x%llx\n", mce->tsc); 1052 edac_dbg(0, "ADDR 0x%llx\n", mce->addr); 1053 edac_dbg(0, "MISC 0x%llx\n", mce->misc); 1054 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", 1055 mce->cpuvendor, mce->cpuid, mce->time, 1056 mce->socketid, mce->apicid); 1057 /* 1058 * We just use the Machine Check for the memory error notification. 1059 * Each memory controller is associated with an IBECC instance. 1060 * Directly read and clear the error information(error address and 1061 * error type) on all the IBECC instances so that we know on which 1062 * memory controller the memory error(s) occurred. 1063 */ 1064 if (!ecclog_handler()) 1065 return NOTIFY_DONE; 1066 1067 mce->kflags |= MCE_HANDLED_EDAC; 1068 1069 return NOTIFY_DONE; 1070 } 1071 1072 static struct notifier_block ecclog_mce_dec = { 1073 .notifier_call = ecclog_mce_handler, 1074 .priority = MCE_PRIO_EDAC, 1075 }; 1076 1077 static bool igen6_check_ecc(struct igen6_imc *imc) 1078 { 1079 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); 1080 1081 return !!(activate & IBECC_ACTIVATE_EN); 1082 } 1083 1084 static int igen6_get_dimm_config(struct mem_ctl_info *mci) 1085 { 1086 struct igen6_imc *imc = mci->pvt_info; 1087 u32 mad_inter, mad_intra, mad_dimm; 1088 int i, j, ndimms, mc = imc->mc; 1089 struct dimm_info *dimm; 1090 enum mem_type mtype; 1091 enum dev_type dtype; 1092 u64 dsize; 1093 bool ecc; 1094 1095 edac_dbg(2, "\n"); 1096 1097 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); 1098 mtype = get_memory_type(mad_inter); 1099 ecc = igen6_check_ecc(imc); 1100 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); 1101 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); 1102 1103 for (i = 0; i < NUM_CHANNELS; i++) { 1104 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); 1105 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); 1106 1107 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); 1108 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); 1109 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); 1110 imc->size += imc->dimm_s_size[i]; 1111 imc->size += imc->dimm_l_size[i]; 1112 ndimms = 0; 1113 1114 for (j = 0; j < NUM_DIMMS; j++) { 1115 dimm = edac_get_dimm(mci, i, j, 0); 1116 1117 if (j ^ imc->dimm_l_map[i]) { 1118 dtype = get_width(0, mad_dimm); 1119 dsize = imc->dimm_s_size[i]; 1120 } else { 1121 dtype = get_width(1, mad_dimm); 1122 dsize = imc->dimm_l_size[i]; 1123 } 1124 1125 if (!dsize) 1126 continue; 1127 1128 dimm->grain = 64; 1129 dimm->mtype = mtype; 1130 dimm->dtype = dtype; 1131 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); 1132 dimm->edac_mode = EDAC_SECDED; 1133 snprintf(dimm->label, sizeof(dimm->label), 1134 "MC#%d_Chan#%d_DIMM#%d", mc, i, j); 1135 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", 1136 mc, i, j, dsize >> 20, dimm->nr_pages); 1137 1138 ndimms++; 1139 } 1140 1141 if (ndimms && !ecc) { 1142 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); 1143 return -ENODEV; 1144 } 1145 } 1146 1147 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); 1148 1149 return 0; 1150 } 1151 1152 #ifdef CONFIG_EDAC_DEBUG 1153 /* Top of upper usable DRAM */ 1154 static u64 igen6_touud; 1155 #define TOUUD_OFFSET 0xa8 1156 1157 static void igen6_reg_dump(struct igen6_imc *imc) 1158 { 1159 int i; 1160 1161 edac_dbg(2, "CHANNEL_HASH : 0x%x\n", 1162 readl(imc->window + CHANNEL_HASH_OFFSET)); 1163 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", 1164 readl(imc->window + CHANNEL_EHASH_OFFSET)); 1165 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", 1166 readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); 1167 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", 1168 readq(imc->window + ECC_ERROR_LOG_OFFSET)); 1169 1170 for (i = 0; i < NUM_CHANNELS; i++) { 1171 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, 1172 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); 1173 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, 1174 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); 1175 } 1176 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); 1177 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); 1178 edac_dbg(2, "TOM : 0x%llx", igen6_tom); 1179 } 1180 1181 static struct dentry *igen6_test; 1182 1183 static int debugfs_u64_set(void *data, u64 val) 1184 { 1185 u64 ecclog; 1186 1187 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { 1188 edac_dbg(0, "Address 0x%llx out of range\n", val); 1189 return 0; 1190 } 1191 1192 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 1193 1194 ecclog = (val & res_cfg->reg_eccerrlog_addr_mask) | ECC_ERROR_LOG_CE; 1195 1196 if (!ecclog_gen_pool_add(0, ecclog)) 1197 irq_work_queue(&ecclog_irq_work); 1198 1199 return 0; 1200 } 1201 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 1202 1203 static void igen6_debug_setup(void) 1204 { 1205 igen6_test = edac_debugfs_create_dir("igen6_test"); 1206 if (!igen6_test) 1207 return; 1208 1209 if (!edac_debugfs_create_file("addr", 0200, igen6_test, 1210 NULL, &fops_u64_wo)) { 1211 debugfs_remove(igen6_test); 1212 igen6_test = NULL; 1213 } 1214 } 1215 1216 static void igen6_debug_teardown(void) 1217 { 1218 debugfs_remove_recursive(igen6_test); 1219 } 1220 #else 1221 static void igen6_reg_dump(struct igen6_imc *imc) {} 1222 static void igen6_debug_setup(void) {} 1223 static void igen6_debug_teardown(void) {} 1224 #endif 1225 1226 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) 1227 { 1228 union { 1229 u64 v; 1230 struct { 1231 u32 v_lo; 1232 u32 v_hi; 1233 }; 1234 } u; 1235 1236 edac_dbg(2, "\n"); 1237 1238 if (!res_cfg->ibecc_available(pdev)) { 1239 edac_dbg(2, "No In-Band ECC IP\n"); 1240 goto fail; 1241 } 1242 1243 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { 1244 igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); 1245 goto fail; 1246 } 1247 1248 igen6_tolud &= GENMASK(31, 20); 1249 1250 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { 1251 igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); 1252 goto fail; 1253 } 1254 1255 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { 1256 igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); 1257 goto fail; 1258 } 1259 1260 igen6_tom = u.v & res_cfg->reg_tom_mask; 1261 1262 if (get_mchbar(pdev, mchbar)) 1263 goto fail; 1264 1265 #ifdef CONFIG_EDAC_DEBUG 1266 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) 1267 edac_dbg(2, "Failed to read lower TOUUD\n"); 1268 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) 1269 edac_dbg(2, "Failed to read upper TOUUD\n"); 1270 else 1271 igen6_touud = u.v & res_cfg->reg_touud_mask; 1272 #endif 1273 1274 return 0; 1275 fail: 1276 return -ENODEV; 1277 } 1278 1279 static void igen6_check(struct mem_ctl_info *mci) 1280 { 1281 struct igen6_imc *imc = mci->pvt_info; 1282 u64 ecclog; 1283 1284 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 1285 ecclog = ecclog_read_and_clear(imc); 1286 if (!ecclog) 1287 return; 1288 1289 if (!ecclog_gen_pool_add(imc->mc, ecclog)) 1290 irq_work_queue(&ecclog_irq_work); 1291 } 1292 1293 /* Check whether the memory controller is absent. */ 1294 static bool igen6_imc_absent(void __iomem *window) 1295 { 1296 return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; 1297 } 1298 1299 static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) 1300 { 1301 struct edac_mc_layer layers[2]; 1302 struct mem_ctl_info *mci; 1303 struct igen6_imc *imc; 1304 int rc; 1305 1306 edac_dbg(2, "\n"); 1307 1308 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1309 layers[0].size = NUM_CHANNELS; 1310 layers[0].is_virt_csrow = false; 1311 layers[1].type = EDAC_MC_LAYER_SLOT; 1312 layers[1].size = NUM_DIMMS; 1313 layers[1].is_virt_csrow = true; 1314 1315 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); 1316 if (!mci) { 1317 rc = -ENOMEM; 1318 goto fail; 1319 } 1320 1321 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); 1322 if (!mci->ctl_name) { 1323 rc = -ENOMEM; 1324 goto fail2; 1325 } 1326 1327 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; 1328 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 1329 mci->edac_cap = EDAC_FLAG_SECDED; 1330 mci->mod_name = EDAC_MOD_STR; 1331 mci->dev_name = pci_name(pdev); 1332 if (edac_op_state == EDAC_OPSTATE_POLL) 1333 mci->edac_check = igen6_check; 1334 mci->pvt_info = &igen6_pvt->imc[mc]; 1335 1336 imc = mci->pvt_info; 1337 device_initialize(&imc->dev); 1338 /* 1339 * EDAC core uses mci->pdev(pointer of structure device) as 1340 * memory controller ID. The client SoCs attach one or more 1341 * memory controllers to single pci_dev (single pci_dev->dev 1342 * can be for multiple memory controllers). 1343 * 1344 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev 1345 * for the first memory controller and assign a unique imc->dev 1346 * to mci->pdev for each non-first memory controller. 1347 */ 1348 mci->pdev = mc ? &imc->dev : &pdev->dev; 1349 imc->mc = mc; 1350 imc->pdev = pdev; 1351 imc->window = window; 1352 1353 igen6_reg_dump(imc); 1354 1355 rc = igen6_get_dimm_config(mci); 1356 if (rc) 1357 goto fail3; 1358 1359 rc = edac_mc_add_mc(mci); 1360 if (rc) { 1361 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); 1362 goto fail3; 1363 } 1364 1365 imc->mci = mci; 1366 return 0; 1367 fail3: 1368 put_device(&imc->dev); 1369 mci->pvt_info = NULL; 1370 kfree(mci->ctl_name); 1371 fail2: 1372 edac_mc_free(mci); 1373 fail: 1374 return rc; 1375 } 1376 1377 static void igen6_unregister_mcis(void) 1378 { 1379 struct mem_ctl_info *mci; 1380 struct igen6_imc *imc; 1381 int i; 1382 1383 edac_dbg(2, "\n"); 1384 1385 for (i = 0; i < res_cfg->num_imc; i++) { 1386 imc = &igen6_pvt->imc[i]; 1387 mci = imc->mci; 1388 if (!mci) 1389 continue; 1390 1391 edac_mc_del_mc(mci->pdev); 1392 kfree(mci->ctl_name); 1393 mci->pvt_info = NULL; 1394 edac_mc_free(mci); 1395 put_device(&imc->dev); 1396 iounmap(imc->window); 1397 } 1398 } 1399 1400 static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) 1401 { 1402 void __iomem *window; 1403 int lmc, pmc, rc; 1404 u64 base; 1405 1406 for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { 1407 base = mchbar + pmc * MCHBAR_SIZE; 1408 window = ioremap(base, MCHBAR_SIZE); 1409 if (!window) { 1410 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); 1411 rc = -ENOMEM; 1412 goto out_unregister_mcis; 1413 } 1414 1415 if (igen6_imc_absent(window)) { 1416 iounmap(window); 1417 edac_dbg(2, "Skip absent mc%d\n", pmc); 1418 continue; 1419 } 1420 1421 rc = igen6_register_mci(lmc, window, pdev); 1422 if (rc) 1423 goto out_iounmap; 1424 1425 /* Done, if all present MCs are detected and registered. */ 1426 if (++lmc >= res_cfg->num_imc) 1427 break; 1428 } 1429 1430 if (!lmc) { 1431 igen6_printk(KERN_ERR, "No mc found.\n"); 1432 return -ENODEV; 1433 } 1434 1435 if (lmc < res_cfg->num_imc) { 1436 igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.", 1437 res_cfg->num_imc, lmc); 1438 res_cfg->num_imc = lmc; 1439 } 1440 1441 return 0; 1442 1443 out_iounmap: 1444 iounmap(window); 1445 1446 out_unregister_mcis: 1447 igen6_unregister_mcis(); 1448 1449 return rc; 1450 } 1451 1452 static int igen6_mem_slice_setup(u64 mchbar) 1453 { 1454 struct igen6_imc *imc = &igen6_pvt->imc[0]; 1455 u64 base = mchbar + res_cfg->cmf_base; 1456 u32 offset = res_cfg->ms_hash_offset; 1457 u32 size = res_cfg->cmf_size; 1458 u64 ms_s_size, ms_hash; 1459 void __iomem *cmf; 1460 int ms_l_map; 1461 1462 edac_dbg(2, "\n"); 1463 1464 if (imc[0].size < imc[1].size) { 1465 ms_s_size = imc[0].size; 1466 ms_l_map = 1; 1467 } else { 1468 ms_s_size = imc[1].size; 1469 ms_l_map = 0; 1470 } 1471 1472 igen6_pvt->ms_s_size = ms_s_size; 1473 igen6_pvt->ms_l_map = ms_l_map; 1474 1475 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", 1476 ms_s_size >> 20, ms_l_map); 1477 1478 if (!size) 1479 return 0; 1480 1481 cmf = ioremap(base, size); 1482 if (!cmf) { 1483 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); 1484 return -ENODEV; 1485 } 1486 1487 ms_hash = readq(cmf + offset); 1488 igen6_pvt->ms_hash = ms_hash; 1489 1490 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); 1491 1492 iounmap(cmf); 1493 1494 return 0; 1495 } 1496 1497 static int register_err_handler(void) 1498 { 1499 int rc; 1500 1501 if (res_cfg->machine_check) { 1502 mce_register_decode_chain(&ecclog_mce_dec); 1503 return 0; 1504 } 1505 1506 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, 1507 0, IGEN6_NMI_NAME); 1508 if (rc) { 1509 igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); 1510 return rc; 1511 } 1512 1513 return 0; 1514 } 1515 1516 static void unregister_err_handler(void) 1517 { 1518 if (res_cfg->machine_check) { 1519 mce_unregister_decode_chain(&ecclog_mce_dec); 1520 return; 1521 } 1522 1523 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1524 } 1525 1526 static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) 1527 { 1528 /* 1529 * Quirk: Certain SoCs' error reporting interrupts don't work. 1530 * Force polling mode for them to ensure that memory error 1531 * events can be handled. 1532 */ 1533 if (ent->device == DID_ADL_N_SKU4) { 1534 edac_op_state = EDAC_OPSTATE_POLL; 1535 return; 1536 } 1537 1538 /* Set the mode according to the configuration data. */ 1539 if (cfg->machine_check) 1540 edac_op_state = EDAC_OPSTATE_INT; 1541 else 1542 edac_op_state = EDAC_OPSTATE_NMI; 1543 } 1544 1545 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1546 { 1547 u64 mchbar; 1548 int rc; 1549 1550 edac_dbg(2, "\n"); 1551 1552 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); 1553 if (!igen6_pvt) 1554 return -ENOMEM; 1555 1556 res_cfg = (struct res_config *)ent->driver_data; 1557 1558 rc = igen6_pci_setup(pdev, &mchbar); 1559 if (rc) 1560 goto fail; 1561 1562 opstate_set(res_cfg, ent); 1563 1564 rc = igen6_register_mcis(pdev, mchbar); 1565 if (rc) 1566 goto fail; 1567 1568 if (res_cfg->num_imc > 1) { 1569 rc = igen6_mem_slice_setup(mchbar); 1570 if (rc) 1571 goto fail2; 1572 } 1573 1574 ecclog_pool = ecclog_gen_pool_create(); 1575 if (!ecclog_pool) { 1576 rc = -ENOMEM; 1577 goto fail2; 1578 } 1579 1580 INIT_WORK(&ecclog_work, ecclog_work_cb); 1581 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); 1582 1583 rc = register_err_handler(); 1584 if (rc) 1585 goto fail3; 1586 1587 /* Enable error reporting */ 1588 rc = errcmd_enable_error_reporting(true); 1589 if (rc) { 1590 igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); 1591 goto fail4; 1592 } 1593 1594 /* Check if any pending errors before/during the registration of the error handler */ 1595 ecclog_handler(); 1596 1597 igen6_debug_setup(); 1598 return 0; 1599 fail4: 1600 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1601 fail3: 1602 gen_pool_destroy(ecclog_pool); 1603 fail2: 1604 igen6_unregister_mcis(); 1605 fail: 1606 kfree(igen6_pvt); 1607 return rc; 1608 } 1609 1610 static void igen6_remove(struct pci_dev *pdev) 1611 { 1612 edac_dbg(2, "\n"); 1613 1614 igen6_debug_teardown(); 1615 errcmd_enable_error_reporting(false); 1616 unregister_err_handler(); 1617 irq_work_sync(&ecclog_irq_work); 1618 flush_work(&ecclog_work); 1619 gen_pool_destroy(ecclog_pool); 1620 igen6_unregister_mcis(); 1621 kfree(igen6_pvt); 1622 } 1623 1624 static struct pci_driver igen6_driver = { 1625 .name = EDAC_MOD_STR, 1626 .probe = igen6_probe, 1627 .remove = igen6_remove, 1628 .id_table = igen6_pci_tbl, 1629 }; 1630 1631 static int __init igen6_init(void) 1632 { 1633 const char *owner; 1634 int rc; 1635 1636 edac_dbg(2, "\n"); 1637 1638 if (ghes_get_devices()) 1639 return -EBUSY; 1640 1641 owner = edac_get_owner(); 1642 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 1643 return -EBUSY; 1644 1645 rc = pci_register_driver(&igen6_driver); 1646 if (rc) 1647 return rc; 1648 1649 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); 1650 1651 return 0; 1652 } 1653 1654 static void __exit igen6_exit(void) 1655 { 1656 edac_dbg(2, "\n"); 1657 1658 pci_unregister_driver(&igen6_driver); 1659 } 1660 1661 module_init(igen6_init); 1662 module_exit(igen6_exit); 1663 1664 MODULE_LICENSE("GPL v2"); 1665 MODULE_AUTHOR("Qiuxu Zhuo"); 1666 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); 1667