1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for Intel client SoC with integrated memory controller using IBECC 4 * 5 * Copyright (C) 2020 Intel Corporation 6 * 7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific 8 * regions of the physical memory space. It's used for memory controllers 9 * that don't support the out-of-band ECC which often needs an additional 10 * storage device to each channel for storing ECC data. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/pci.h> 16 #include <linux/slab.h> 17 #include <linux/irq_work.h> 18 #include <linux/llist.h> 19 #include <linux/genalloc.h> 20 #include <linux/edac.h> 21 #include <linux/bits.h> 22 #include <linux/io.h> 23 #include <asm/mach_traps.h> 24 #include <asm/nmi.h> 25 #include <asm/mce.h> 26 27 #include "edac_mc.h" 28 #include "edac_module.h" 29 30 #define IGEN6_REVISION "v2.5.1" 31 32 #define EDAC_MOD_STR "igen6_edac" 33 #define IGEN6_NMI_NAME "igen6_ibecc" 34 35 /* Debug macros */ 36 #define igen6_printk(level, fmt, arg...) \ 37 edac_printk(level, "igen6", fmt, ##arg) 38 39 #define igen6_mc_printk(mci, level, fmt, arg...) \ 40 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) 41 42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) 43 44 #define NUM_IMC 2 /* Max memory controllers */ 45 #define NUM_CHANNELS 2 /* Max channels */ 46 #define NUM_DIMMS 2 /* Max DIMMs per channel */ 47 48 #define _4GB BIT_ULL(32) 49 50 /* Size of physical memory */ 51 #define TOM_OFFSET 0xa0 52 /* Top of low usable DRAM */ 53 #define TOLUD_OFFSET 0xbc 54 /* Capability register C */ 55 #define CAPID_C_OFFSET 0xec 56 #define CAPID_C_IBECC BIT(15) 57 58 /* Capability register E */ 59 #define CAPID_E_OFFSET 0xf0 60 #define CAPID_E_IBECC BIT(12) 61 #define CAPID_E_IBECC_BIT18 BIT(18) 62 63 /* Error Status */ 64 #define ERRSTS_OFFSET 0xc8 65 #define ERRSTS_CE BIT_ULL(6) 66 #define ERRSTS_UE BIT_ULL(7) 67 68 /* Error Command */ 69 #define ERRCMD_OFFSET 0xca 70 #define ERRCMD_CE BIT_ULL(6) 71 #define ERRCMD_UE BIT_ULL(7) 72 73 /* IBECC MMIO base address */ 74 #define IBECC_BASE (res_cfg->ibecc_base) 75 #define IBECC_ACTIVATE_OFFSET IBECC_BASE 76 #define IBECC_ACTIVATE_EN BIT(0) 77 78 /* IBECC error log */ 79 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) 80 #define ECC_ERROR_LOG_CE BIT_ULL(62) 81 #define ECC_ERROR_LOG_UE BIT_ULL(63) 82 #define ECC_ERROR_LOG_ADDR_SHIFT 5 83 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) 84 #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) 85 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) 86 87 /* Host MMIO base address */ 88 #define MCHBAR_OFFSET 0x48 89 #define MCHBAR_EN BIT_ULL(0) 90 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) 91 #define MCHBAR_SIZE 0x10000 92 93 /* Parameters for the channel decode stage */ 94 #define IMC_BASE (res_cfg->imc_base) 95 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE 96 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) 97 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) 98 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) 99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) 100 101 /* Parameters for DRAM decode stage */ 102 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) 103 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) 104 105 /* DIMM characteristics */ 106 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) 107 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) 108 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) 109 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) 110 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) 111 112 /* Hash for memory controller selection */ 113 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) 114 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) 115 116 /* Hash for channel selection */ 117 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) 118 /* Hash for enhanced channel selection */ 119 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) 120 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 121 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 122 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) 123 124 /* Parameters for memory slice decode stage */ 125 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) 126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) 127 128 static struct res_config { 129 bool machine_check; 130 /* The number of present memory controllers. */ 131 int num_imc; 132 u32 imc_base; 133 u32 cmf_base; 134 u32 cmf_size; 135 u32 ms_hash_offset; 136 u32 ibecc_base; 137 u32 ibecc_error_log_offset; 138 bool (*ibecc_available)(struct pci_dev *pdev); 139 /* Extract error address logged in IBECC */ 140 u64 (*err_addr)(u64 ecclog); 141 /* Convert error address logged in IBECC to system physical address */ 142 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); 143 /* Convert error address logged in IBECC to integrated memory controller address */ 144 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); 145 } *res_cfg; 146 147 struct igen6_imc { 148 int mc; 149 struct mem_ctl_info *mci; 150 struct pci_dev *pdev; 151 struct device dev; 152 void __iomem *window; 153 u64 size; 154 u64 ch_s_size; 155 int ch_l_map; 156 u64 dimm_s_size[NUM_CHANNELS]; 157 u64 dimm_l_size[NUM_CHANNELS]; 158 int dimm_l_map[NUM_CHANNELS]; 159 }; 160 161 static struct igen6_pvt { 162 struct igen6_imc imc[NUM_IMC]; 163 u64 ms_hash; 164 u64 ms_s_size; 165 int ms_l_map; 166 } *igen6_pvt; 167 168 /* The top of low usable DRAM */ 169 static u32 igen6_tolud; 170 /* The size of physical memory */ 171 static u64 igen6_tom; 172 173 struct decoded_addr { 174 int mc; 175 u64 imc_addr; 176 u64 sys_addr; 177 int channel_idx; 178 u64 channel_addr; 179 int sub_channel_idx; 180 u64 sub_channel_addr; 181 }; 182 183 struct ecclog_node { 184 struct llist_node llnode; 185 int mc; 186 u64 ecclog; 187 }; 188 189 /* 190 * In the NMI handler, the driver uses the lock-less memory allocator 191 * to allocate memory to store the IBECC error logs and links the logs 192 * to the lock-less list. Delay printk() and the work of error reporting 193 * to EDAC core in a worker. 194 */ 195 #define ECCLOG_POOL_SIZE PAGE_SIZE 196 static LLIST_HEAD(ecclog_llist); 197 static struct gen_pool *ecclog_pool; 198 static char ecclog_buf[ECCLOG_POOL_SIZE]; 199 static struct irq_work ecclog_irq_work; 200 static struct work_struct ecclog_work; 201 202 /* Compute die IDs for Elkhart Lake with IBECC */ 203 #define DID_EHL_SKU5 0x4514 204 #define DID_EHL_SKU6 0x4528 205 #define DID_EHL_SKU7 0x452a 206 #define DID_EHL_SKU8 0x4516 207 #define DID_EHL_SKU9 0x452c 208 #define DID_EHL_SKU10 0x452e 209 #define DID_EHL_SKU11 0x4532 210 #define DID_EHL_SKU12 0x4518 211 #define DID_EHL_SKU13 0x451a 212 #define DID_EHL_SKU14 0x4534 213 #define DID_EHL_SKU15 0x4536 214 215 /* Compute die IDs for ICL-NNPI with IBECC */ 216 #define DID_ICL_SKU8 0x4581 217 #define DID_ICL_SKU10 0x4585 218 #define DID_ICL_SKU11 0x4589 219 #define DID_ICL_SKU12 0x458d 220 221 /* Compute die IDs for Tiger Lake with IBECC */ 222 #define DID_TGL_SKU 0x9a14 223 224 /* Compute die IDs for Alder Lake with IBECC */ 225 #define DID_ADL_SKU1 0x4601 226 #define DID_ADL_SKU2 0x4602 227 #define DID_ADL_SKU3 0x4621 228 #define DID_ADL_SKU4 0x4641 229 230 /* Compute die IDs for Alder Lake-N with IBECC */ 231 #define DID_ADL_N_SKU1 0x4614 232 #define DID_ADL_N_SKU2 0x4617 233 #define DID_ADL_N_SKU3 0x461b 234 #define DID_ADL_N_SKU4 0x461c 235 #define DID_ADL_N_SKU5 0x4673 236 #define DID_ADL_N_SKU6 0x4674 237 #define DID_ADL_N_SKU7 0x4675 238 #define DID_ADL_N_SKU8 0x4677 239 #define DID_ADL_N_SKU9 0x4678 240 #define DID_ADL_N_SKU10 0x4679 241 #define DID_ADL_N_SKU11 0x467c 242 #define DID_ADL_N_SKU12 0x4632 243 244 /* Compute die IDs for Arizona Beach with IBECC */ 245 #define DID_AZB_SKU1 0x4676 246 247 /* Compute did IDs for Amston Lake with IBECC */ 248 #define DID_ASL_SKU1 0x464a 249 250 /* Compute die IDs for Raptor Lake-P with IBECC */ 251 #define DID_RPL_P_SKU1 0xa706 252 #define DID_RPL_P_SKU2 0xa707 253 #define DID_RPL_P_SKU3 0xa708 254 #define DID_RPL_P_SKU4 0xa716 255 #define DID_RPL_P_SKU5 0xa718 256 257 /* Compute die IDs for Meteor Lake-PS with IBECC */ 258 #define DID_MTL_PS_SKU1 0x7d21 259 #define DID_MTL_PS_SKU2 0x7d22 260 #define DID_MTL_PS_SKU3 0x7d23 261 #define DID_MTL_PS_SKU4 0x7d24 262 263 /* Compute die IDs for Meteor Lake-P with IBECC */ 264 #define DID_MTL_P_SKU1 0x7d01 265 #define DID_MTL_P_SKU2 0x7d02 266 #define DID_MTL_P_SKU3 0x7d14 267 268 /* Compute die IDs for Arrow Lake-UH with IBECC */ 269 #define DID_ARL_UH_SKU1 0x7d06 270 #define DID_ARL_UH_SKU2 0x7d20 271 #define DID_ARL_UH_SKU3 0x7d30 272 273 /* Compute die IDs for Panther Lake-H with IBECC */ 274 #define DID_PTL_H_SKU1 0xb000 275 #define DID_PTL_H_SKU2 0xb001 276 #define DID_PTL_H_SKU3 0xb002 277 278 /* Compute die IDs for Wildcat Lake with IBECC */ 279 #define DID_WCL_SKU1 0xfd00 280 281 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) 282 { 283 union { 284 u64 v; 285 struct { 286 u32 v_lo; 287 u32 v_hi; 288 }; 289 } u; 290 291 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) { 292 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n"); 293 return -ENODEV; 294 } 295 296 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) { 297 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n"); 298 return -ENODEV; 299 } 300 301 if (!(u.v & MCHBAR_EN)) { 302 igen6_printk(KERN_ERR, "MCHBAR is disabled\n"); 303 return -ENODEV; 304 } 305 306 *mchbar = MCHBAR_BASE(u.v); 307 308 return 0; 309 } 310 311 static bool ehl_ibecc_available(struct pci_dev *pdev) 312 { 313 u32 v; 314 315 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 316 return false; 317 318 return !!(CAPID_C_IBECC & v); 319 } 320 321 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) 322 { 323 return eaddr; 324 } 325 326 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) 327 { 328 if (eaddr < igen6_tolud) 329 return eaddr; 330 331 if (igen6_tom <= _4GB) 332 return eaddr + igen6_tolud - _4GB; 333 334 if (eaddr >= igen6_tom) 335 return eaddr + igen6_tolud - igen6_tom; 336 337 return eaddr; 338 } 339 340 static bool icl_ibecc_available(struct pci_dev *pdev) 341 { 342 u32 v; 343 344 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v)) 345 return false; 346 347 return !(CAPID_C_IBECC & v) && 348 (boot_cpu_data.x86_stepping >= 1); 349 } 350 351 static bool tgl_ibecc_available(struct pci_dev *pdev) 352 { 353 u32 v; 354 355 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 356 return false; 357 358 return !(CAPID_E_IBECC & v); 359 } 360 361 static bool mtl_p_ibecc_available(struct pci_dev *pdev) 362 { 363 u32 v; 364 365 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v)) 366 return false; 367 368 return !(CAPID_E_IBECC_BIT18 & v); 369 } 370 371 static bool mtl_ps_ibecc_available(struct pci_dev *pdev) 372 { 373 #define MCHBAR_MEMSS_IBECCDIS 0x13c00 374 void __iomem *window; 375 u64 mchbar; 376 u32 val; 377 378 if (get_mchbar(pdev, &mchbar)) 379 return false; 380 381 window = ioremap(mchbar, MCHBAR_SIZE * 2); 382 if (!window) { 383 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 384 return false; 385 } 386 387 val = readl(window + MCHBAR_MEMSS_IBECCDIS); 388 iounmap(window); 389 390 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ 391 return !GET_BITFIELD(val, 6, 6); 392 } 393 394 static u64 mem_addr_to_sys_addr(u64 maddr) 395 { 396 if (maddr < igen6_tolud) 397 return maddr; 398 399 if (igen6_tom <= _4GB) 400 return maddr - igen6_tolud + _4GB; 401 402 if (maddr < _4GB) 403 return maddr - igen6_tolud + igen6_tom; 404 405 return maddr; 406 } 407 408 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) 409 { 410 u64 hash_addr = addr & mask, hash = hash_init; 411 u64 intlv = (addr >> intlv_bit) & 1; 412 int i; 413 414 for (i = 6; i < 20; i++) 415 hash ^= (hash_addr >> i) & 1; 416 417 return hash ^ intlv; 418 } 419 420 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) 421 { 422 u64 maddr, hash, mask, ms_s_size; 423 int intlv_bit; 424 u32 ms_hash; 425 426 ms_s_size = igen6_pvt->ms_s_size; 427 if (eaddr >= ms_s_size) 428 return eaddr + ms_s_size; 429 430 ms_hash = igen6_pvt->ms_hash; 431 432 mask = MEM_SLICE_HASH_MASK(ms_hash); 433 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; 434 435 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | 436 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 437 438 hash = mem_slice_hash(maddr, mask, mc, intlv_bit); 439 440 return maddr | (hash << intlv_bit); 441 } 442 443 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) 444 { 445 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); 446 447 return mem_addr_to_sys_addr(maddr); 448 } 449 450 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) 451 { 452 return eaddr; 453 } 454 455 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) 456 { 457 return mem_addr_to_sys_addr(eaddr); 458 } 459 460 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) 461 { 462 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; 463 struct igen6_imc *imc = &igen6_pvt->imc[mc]; 464 int intlv_bit; 465 u32 mc_hash; 466 467 if (eaddr >= 2 * ms_s_size) 468 return eaddr - ms_s_size; 469 470 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET); 471 472 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; 473 474 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | 475 GET_BITFIELD(eaddr, 0, intlv_bit - 1); 476 477 return imc_addr; 478 } 479 480 static u64 rpl_p_err_addr(u64 ecclog) 481 { 482 return ECC_ERROR_LOG_ADDR45(ecclog); 483 } 484 485 static struct res_config ehl_cfg = { 486 .num_imc = 1, 487 .imc_base = 0x5000, 488 .ibecc_base = 0xdc00, 489 .ibecc_available = ehl_ibecc_available, 490 .ibecc_error_log_offset = 0x170, 491 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 492 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 493 }; 494 495 static struct res_config icl_cfg = { 496 .num_imc = 1, 497 .imc_base = 0x5000, 498 .ibecc_base = 0xd800, 499 .ibecc_error_log_offset = 0x170, 500 .ibecc_available = icl_ibecc_available, 501 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, 502 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, 503 }; 504 505 static struct res_config tgl_cfg = { 506 .machine_check = true, 507 .num_imc = 2, 508 .imc_base = 0x5000, 509 .cmf_base = 0x11000, 510 .cmf_size = 0x800, 511 .ms_hash_offset = 0xac, 512 .ibecc_base = 0xd400, 513 .ibecc_error_log_offset = 0x170, 514 .ibecc_available = tgl_ibecc_available, 515 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, 516 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, 517 }; 518 519 static struct res_config adl_cfg = { 520 .machine_check = true, 521 .num_imc = 2, 522 .imc_base = 0xd800, 523 .ibecc_base = 0xd400, 524 .ibecc_error_log_offset = 0x68, 525 .ibecc_available = tgl_ibecc_available, 526 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 527 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 528 }; 529 530 static struct res_config adl_n_cfg = { 531 .machine_check = true, 532 .num_imc = 1, 533 .imc_base = 0xd800, 534 .ibecc_base = 0xd400, 535 .ibecc_error_log_offset = 0x68, 536 .ibecc_available = tgl_ibecc_available, 537 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 538 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 539 }; 540 541 static struct res_config rpl_p_cfg = { 542 .machine_check = true, 543 .num_imc = 2, 544 .imc_base = 0xd800, 545 .ibecc_base = 0xd400, 546 .ibecc_error_log_offset = 0x68, 547 .ibecc_available = tgl_ibecc_available, 548 .err_addr = rpl_p_err_addr, 549 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 550 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 551 }; 552 553 static struct res_config mtl_ps_cfg = { 554 .machine_check = true, 555 .num_imc = 2, 556 .imc_base = 0xd800, 557 .ibecc_base = 0xd400, 558 .ibecc_error_log_offset = 0x170, 559 .ibecc_available = mtl_ps_ibecc_available, 560 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 561 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 562 }; 563 564 static struct res_config mtl_p_cfg = { 565 .machine_check = true, 566 .num_imc = 2, 567 .imc_base = 0xd800, 568 .ibecc_base = 0xd400, 569 .ibecc_error_log_offset = 0x170, 570 .ibecc_available = mtl_p_ibecc_available, 571 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 572 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 573 }; 574 575 static struct res_config wcl_cfg = { 576 .machine_check = true, 577 .num_imc = 1, 578 .imc_base = 0xd800, 579 .ibecc_base = 0xd400, 580 .ibecc_error_log_offset = 0x170, 581 .ibecc_available = mtl_p_ibecc_available, 582 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, 583 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, 584 }; 585 586 static struct pci_device_id igen6_pci_tbl[] = { 587 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, 588 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, 589 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, 590 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, 591 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, 592 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, 593 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, 594 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, 595 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, 596 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, 597 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, 598 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, 599 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, 600 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, 601 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, 602 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, 603 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, 604 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, 605 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, 606 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, 607 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, 608 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, 609 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, 610 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, 611 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, 612 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, 613 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, 614 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, 615 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, 616 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, 617 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, 618 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, 619 { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, 620 { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, 621 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, 622 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, 623 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, 624 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, 625 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, 626 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, 627 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, 628 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, 629 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, 630 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 631 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 632 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 633 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 634 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 635 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 636 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg }, 637 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg }, 638 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg }, 639 { PCI_VDEVICE(INTEL, DID_WCL_SKU1), (kernel_ulong_t)&wcl_cfg }, 640 { }, 641 }; 642 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); 643 644 static enum dev_type get_width(int dimm_l, u32 mad_dimm) 645 { 646 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : 647 MAD_DIMM_CH_DSW(mad_dimm); 648 649 switch (w) { 650 case 0: 651 return DEV_X8; 652 case 1: 653 return DEV_X16; 654 case 2: 655 return DEV_X32; 656 default: 657 return DEV_UNKNOWN; 658 } 659 } 660 661 static enum mem_type get_memory_type(u32 mad_inter) 662 { 663 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); 664 665 switch (t) { 666 case 0: 667 return MEM_DDR4; 668 case 1: 669 return MEM_DDR3; 670 case 2: 671 return MEM_LPDDR3; 672 case 3: 673 return MEM_LPDDR4; 674 case 4: 675 return MEM_WIO2; 676 default: 677 return MEM_UNKNOWN; 678 } 679 } 680 681 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) 682 { 683 u64 hash_addr = addr & mask, hash = 0; 684 u64 intlv = (addr >> intlv_bit) & 1; 685 int i; 686 687 for (i = 6; i < 20; i++) 688 hash ^= (hash_addr >> i) & 1; 689 690 return (int)hash ^ intlv; 691 } 692 693 static u64 decode_channel_addr(u64 addr, int intlv_bit) 694 { 695 u64 channel_addr; 696 697 /* Remove the interleave bit and shift upper part down to fill gap */ 698 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; 699 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); 700 701 return channel_addr; 702 } 703 704 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, 705 int *idx, u64 *sub_addr) 706 { 707 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; 708 709 if (addr > 2 * s_size) { 710 *sub_addr = addr - s_size; 711 *idx = l_map; 712 return; 713 } 714 715 if (CHANNEL_HASH_MODE(hash)) { 716 *sub_addr = decode_channel_addr(addr, intlv_bit); 717 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); 718 } else { 719 *sub_addr = decode_channel_addr(addr, 6); 720 *idx = GET_BITFIELD(addr, 6, 6); 721 } 722 } 723 724 static int igen6_decode(struct decoded_addr *res) 725 { 726 struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; 727 u64 addr = res->imc_addr, sub_addr, s_size; 728 int idx, l_map; 729 u32 hash; 730 731 if (addr >= igen6_tom) { 732 edac_dbg(0, "Address 0x%llx out of range\n", addr); 733 return -EINVAL; 734 } 735 736 /* Decode channel */ 737 hash = readl(imc->window + CHANNEL_HASH_OFFSET); 738 s_size = imc->ch_s_size; 739 l_map = imc->ch_l_map; 740 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr); 741 res->channel_idx = idx; 742 res->channel_addr = sub_addr; 743 744 /* Decode sub-channel/DIMM */ 745 hash = readl(imc->window + CHANNEL_EHASH_OFFSET); 746 s_size = imc->dimm_s_size[idx]; 747 l_map = imc->dimm_l_map[idx]; 748 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr); 749 res->sub_channel_idx = idx; 750 res->sub_channel_addr = sub_addr; 751 752 return 0; 753 } 754 755 static void igen6_output_error(struct decoded_addr *res, 756 struct mem_ctl_info *mci, u64 ecclog) 757 { 758 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? 759 HW_EVENT_ERR_UNCORRECTED : 760 HW_EVENT_ERR_CORRECTED; 761 762 edac_mc_handle_error(type, mci, 1, 763 res->sys_addr >> PAGE_SHIFT, 764 res->sys_addr & ~PAGE_MASK, 765 ECC_ERROR_LOG_SYND(ecclog), 766 res->channel_idx, res->sub_channel_idx, 767 -1, "", ""); 768 } 769 770 static struct gen_pool *ecclog_gen_pool_create(void) 771 { 772 struct gen_pool *pool; 773 774 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); 775 if (!pool) 776 return NULL; 777 778 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) { 779 gen_pool_destroy(pool); 780 return NULL; 781 } 782 783 return pool; 784 } 785 786 static int ecclog_gen_pool_add(int mc, u64 ecclog) 787 { 788 struct ecclog_node *node; 789 790 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node)); 791 if (!node) 792 return -ENOMEM; 793 794 node->mc = mc; 795 node->ecclog = ecclog; 796 llist_add(&node->llnode, &ecclog_llist); 797 798 return 0; 799 } 800 801 /* 802 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI 803 * configuration space status register ERRSTS can indicate whether a 804 * correctable error or an uncorrectable error occurred. We only use the 805 * ECC_ERROR_LOG register to check error type, but need to clear both 806 * registers to enable future error events. 807 */ 808 static u64 ecclog_read_and_clear(struct igen6_imc *imc) 809 { 810 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET); 811 812 /* 813 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain 814 * the invalid value ~0. This will result in a flood of invalid 815 * error reports in polling mode. Skip it. 816 */ 817 if (ecclog == ~0) 818 return 0; 819 820 /* Neither a CE nor a UE. Skip it.*/ 821 if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE))) 822 return 0; 823 824 /* Clear CE/UE bits by writing 1s */ 825 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET); 826 827 return ecclog; 828 } 829 830 static void errsts_clear(struct igen6_imc *imc) 831 { 832 u16 errsts; 833 834 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) { 835 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n"); 836 return; 837 } 838 839 /* Clear CE/UE bits by writing 1s */ 840 if (errsts & (ERRSTS_CE | ERRSTS_UE)) 841 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts); 842 } 843 844 static int errcmd_enable_error_reporting(bool enable) 845 { 846 struct igen6_imc *imc = &igen6_pvt->imc[0]; 847 u16 errcmd; 848 int rc; 849 850 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd); 851 if (rc) 852 return pcibios_err_to_errno(rc); 853 854 if (enable) 855 errcmd |= ERRCMD_CE | ERRSTS_UE; 856 else 857 errcmd &= ~(ERRCMD_CE | ERRSTS_UE); 858 859 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd); 860 if (rc) 861 return pcibios_err_to_errno(rc); 862 863 return 0; 864 } 865 866 static int ecclog_handler(void) 867 { 868 struct igen6_imc *imc; 869 int i, n = 0; 870 u64 ecclog; 871 872 for (i = 0; i < res_cfg->num_imc; i++) { 873 imc = &igen6_pvt->imc[i]; 874 875 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 876 877 ecclog = ecclog_read_and_clear(imc); 878 if (!ecclog) 879 continue; 880 881 if (!ecclog_gen_pool_add(i, ecclog)) 882 irq_work_queue(&ecclog_irq_work); 883 884 n++; 885 } 886 887 return n; 888 } 889 890 static void ecclog_work_cb(struct work_struct *work) 891 { 892 struct ecclog_node *node, *tmp; 893 struct mem_ctl_info *mci; 894 struct llist_node *head; 895 struct decoded_addr res; 896 u64 eaddr; 897 898 head = llist_del_all(&ecclog_llist); 899 if (!head) 900 return; 901 902 llist_for_each_entry_safe(node, tmp, head, llnode) { 903 memset(&res, 0, sizeof(res)); 904 if (res_cfg->err_addr) 905 eaddr = res_cfg->err_addr(node->ecclog); 906 else 907 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << 908 ECC_ERROR_LOG_ADDR_SHIFT; 909 res.mc = node->mc; 910 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); 911 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); 912 913 mci = igen6_pvt->imc[res.mc].mci; 914 915 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog); 916 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n"); 917 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr); 918 919 if (!igen6_decode(&res)) 920 igen6_output_error(&res, mci, node->ecclog); 921 922 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node)); 923 } 924 } 925 926 static void ecclog_irq_work_cb(struct irq_work *irq_work) 927 { 928 int i; 929 930 for (i = 0; i < res_cfg->num_imc; i++) 931 errsts_clear(&igen6_pvt->imc[i]); 932 933 if (!llist_empty(&ecclog_llist)) 934 schedule_work(&ecclog_work); 935 } 936 937 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) 938 { 939 unsigned char reason; 940 941 if (!ecclog_handler()) 942 return NMI_DONE; 943 944 /* 945 * Both In-Band ECC correctable error and uncorrectable error are 946 * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) 947 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to 948 * re-enable the SERR# NMI after NMI handling. So clear this bit here 949 * to re-enable SERR# NMI for receiving future In-Band ECC errors. 950 */ 951 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; 952 reason |= NMI_REASON_CLEAR_SERR; 953 outb(reason, NMI_REASON_PORT); 954 reason &= ~NMI_REASON_CLEAR_SERR; 955 outb(reason, NMI_REASON_PORT); 956 957 return NMI_HANDLED; 958 } 959 960 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, 961 void *data) 962 { 963 struct mce *mce = (struct mce *)data; 964 char *type; 965 966 if (mce->kflags & MCE_HANDLED_CEC) 967 return NOTIFY_DONE; 968 969 /* 970 * Ignore unless this is a memory related error. 971 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, 972 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). 973 */ 974 if ((mce->status & 0xefff) >> 7 != 1) 975 return NOTIFY_DONE; 976 977 if (mce->mcgstatus & MCG_STATUS_MCIP) 978 type = "Exception"; 979 else 980 type = "Event"; 981 982 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", 983 mce->extcpu, type, mce->mcgstatus, 984 mce->bank, mce->status); 985 edac_dbg(0, "TSC 0x%llx\n", mce->tsc); 986 edac_dbg(0, "ADDR 0x%llx\n", mce->addr); 987 edac_dbg(0, "MISC 0x%llx\n", mce->misc); 988 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", 989 mce->cpuvendor, mce->cpuid, mce->time, 990 mce->socketid, mce->apicid); 991 /* 992 * We just use the Machine Check for the memory error notification. 993 * Each memory controller is associated with an IBECC instance. 994 * Directly read and clear the error information(error address and 995 * error type) on all the IBECC instances so that we know on which 996 * memory controller the memory error(s) occurred. 997 */ 998 if (!ecclog_handler()) 999 return NOTIFY_DONE; 1000 1001 mce->kflags |= MCE_HANDLED_EDAC; 1002 1003 return NOTIFY_DONE; 1004 } 1005 1006 static struct notifier_block ecclog_mce_dec = { 1007 .notifier_call = ecclog_mce_handler, 1008 .priority = MCE_PRIO_EDAC, 1009 }; 1010 1011 static bool igen6_check_ecc(struct igen6_imc *imc) 1012 { 1013 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET); 1014 1015 return !!(activate & IBECC_ACTIVATE_EN); 1016 } 1017 1018 static int igen6_get_dimm_config(struct mem_ctl_info *mci) 1019 { 1020 struct igen6_imc *imc = mci->pvt_info; 1021 u32 mad_inter, mad_intra, mad_dimm; 1022 int i, j, ndimms, mc = imc->mc; 1023 struct dimm_info *dimm; 1024 enum mem_type mtype; 1025 enum dev_type dtype; 1026 u64 dsize; 1027 bool ecc; 1028 1029 edac_dbg(2, "\n"); 1030 1031 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET); 1032 mtype = get_memory_type(mad_inter); 1033 ecc = igen6_check_ecc(imc); 1034 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); 1035 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); 1036 1037 for (i = 0; i < NUM_CHANNELS; i++) { 1038 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4); 1039 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4); 1040 1041 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); 1042 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); 1043 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); 1044 imc->size += imc->dimm_s_size[i]; 1045 imc->size += imc->dimm_l_size[i]; 1046 ndimms = 0; 1047 1048 for (j = 0; j < NUM_DIMMS; j++) { 1049 dimm = edac_get_dimm(mci, i, j, 0); 1050 1051 if (j ^ imc->dimm_l_map[i]) { 1052 dtype = get_width(0, mad_dimm); 1053 dsize = imc->dimm_s_size[i]; 1054 } else { 1055 dtype = get_width(1, mad_dimm); 1056 dsize = imc->dimm_l_size[i]; 1057 } 1058 1059 if (!dsize) 1060 continue; 1061 1062 dimm->grain = 64; 1063 dimm->mtype = mtype; 1064 dimm->dtype = dtype; 1065 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); 1066 dimm->edac_mode = EDAC_SECDED; 1067 snprintf(dimm->label, sizeof(dimm->label), 1068 "MC#%d_Chan#%d_DIMM#%d", mc, i, j); 1069 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n", 1070 mc, i, j, dsize >> 20, dimm->nr_pages); 1071 1072 ndimms++; 1073 } 1074 1075 if (ndimms && !ecc) { 1076 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc); 1077 return -ENODEV; 1078 } 1079 } 1080 1081 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20); 1082 1083 return 0; 1084 } 1085 1086 #ifdef CONFIG_EDAC_DEBUG 1087 /* Top of upper usable DRAM */ 1088 static u64 igen6_touud; 1089 #define TOUUD_OFFSET 0xa8 1090 1091 static void igen6_reg_dump(struct igen6_imc *imc) 1092 { 1093 int i; 1094 1095 edac_dbg(2, "CHANNEL_HASH : 0x%x\n", 1096 readl(imc->window + CHANNEL_HASH_OFFSET)); 1097 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n", 1098 readl(imc->window + CHANNEL_EHASH_OFFSET)); 1099 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n", 1100 readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); 1101 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n", 1102 readq(imc->window + ECC_ERROR_LOG_OFFSET)); 1103 1104 for (i = 0; i < NUM_CHANNELS; i++) { 1105 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i, 1106 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); 1107 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i, 1108 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); 1109 } 1110 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud); 1111 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud); 1112 edac_dbg(2, "TOM : 0x%llx", igen6_tom); 1113 } 1114 1115 static struct dentry *igen6_test; 1116 1117 static int debugfs_u64_set(void *data, u64 val) 1118 { 1119 u64 ecclog; 1120 1121 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { 1122 edac_dbg(0, "Address 0x%llx out of range\n", val); 1123 return 0; 1124 } 1125 1126 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); 1127 1128 val >>= ECC_ERROR_LOG_ADDR_SHIFT; 1129 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; 1130 1131 if (!ecclog_gen_pool_add(0, ecclog)) 1132 irq_work_queue(&ecclog_irq_work); 1133 1134 return 0; 1135 } 1136 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 1137 1138 static void igen6_debug_setup(void) 1139 { 1140 igen6_test = edac_debugfs_create_dir("igen6_test"); 1141 if (!igen6_test) 1142 return; 1143 1144 if (!edac_debugfs_create_file("addr", 0200, igen6_test, 1145 NULL, &fops_u64_wo)) { 1146 debugfs_remove(igen6_test); 1147 igen6_test = NULL; 1148 } 1149 } 1150 1151 static void igen6_debug_teardown(void) 1152 { 1153 debugfs_remove_recursive(igen6_test); 1154 } 1155 #else 1156 static void igen6_reg_dump(struct igen6_imc *imc) {} 1157 static void igen6_debug_setup(void) {} 1158 static void igen6_debug_teardown(void) {} 1159 #endif 1160 1161 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) 1162 { 1163 union { 1164 u64 v; 1165 struct { 1166 u32 v_lo; 1167 u32 v_hi; 1168 }; 1169 } u; 1170 1171 edac_dbg(2, "\n"); 1172 1173 if (!res_cfg->ibecc_available(pdev)) { 1174 edac_dbg(2, "No In-Band ECC IP\n"); 1175 goto fail; 1176 } 1177 1178 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) { 1179 igen6_printk(KERN_ERR, "Failed to read TOLUD\n"); 1180 goto fail; 1181 } 1182 1183 igen6_tolud &= GENMASK(31, 20); 1184 1185 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) { 1186 igen6_printk(KERN_ERR, "Failed to read lower TOM\n"); 1187 goto fail; 1188 } 1189 1190 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) { 1191 igen6_printk(KERN_ERR, "Failed to read upper TOM\n"); 1192 goto fail; 1193 } 1194 1195 igen6_tom = u.v & GENMASK_ULL(38, 20); 1196 1197 if (get_mchbar(pdev, mchbar)) 1198 goto fail; 1199 1200 #ifdef CONFIG_EDAC_DEBUG 1201 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo)) 1202 edac_dbg(2, "Failed to read lower TOUUD\n"); 1203 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi)) 1204 edac_dbg(2, "Failed to read upper TOUUD\n"); 1205 else 1206 igen6_touud = u.v & GENMASK_ULL(38, 20); 1207 #endif 1208 1209 return 0; 1210 fail: 1211 return -ENODEV; 1212 } 1213 1214 static void igen6_check(struct mem_ctl_info *mci) 1215 { 1216 struct igen6_imc *imc = mci->pvt_info; 1217 u64 ecclog; 1218 1219 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ 1220 ecclog = ecclog_read_and_clear(imc); 1221 if (!ecclog) 1222 return; 1223 1224 if (!ecclog_gen_pool_add(imc->mc, ecclog)) 1225 irq_work_queue(&ecclog_irq_work); 1226 } 1227 1228 /* Check whether the memory controller is absent. */ 1229 static bool igen6_imc_absent(void __iomem *window) 1230 { 1231 return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; 1232 } 1233 1234 static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) 1235 { 1236 struct edac_mc_layer layers[2]; 1237 struct mem_ctl_info *mci; 1238 struct igen6_imc *imc; 1239 int rc; 1240 1241 edac_dbg(2, "\n"); 1242 1243 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1244 layers[0].size = NUM_CHANNELS; 1245 layers[0].is_virt_csrow = false; 1246 layers[1].type = EDAC_MC_LAYER_SLOT; 1247 layers[1].size = NUM_DIMMS; 1248 layers[1].is_virt_csrow = true; 1249 1250 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); 1251 if (!mci) { 1252 rc = -ENOMEM; 1253 goto fail; 1254 } 1255 1256 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc); 1257 if (!mci->ctl_name) { 1258 rc = -ENOMEM; 1259 goto fail2; 1260 } 1261 1262 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; 1263 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 1264 mci->edac_cap = EDAC_FLAG_SECDED; 1265 mci->mod_name = EDAC_MOD_STR; 1266 mci->dev_name = pci_name(pdev); 1267 if (edac_op_state == EDAC_OPSTATE_POLL) 1268 mci->edac_check = igen6_check; 1269 mci->pvt_info = &igen6_pvt->imc[mc]; 1270 1271 imc = mci->pvt_info; 1272 device_initialize(&imc->dev); 1273 /* 1274 * EDAC core uses mci->pdev(pointer of structure device) as 1275 * memory controller ID. The client SoCs attach one or more 1276 * memory controllers to single pci_dev (single pci_dev->dev 1277 * can be for multiple memory controllers). 1278 * 1279 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev 1280 * for the first memory controller and assign a unique imc->dev 1281 * to mci->pdev for each non-first memory controller. 1282 */ 1283 mci->pdev = mc ? &imc->dev : &pdev->dev; 1284 imc->mc = mc; 1285 imc->pdev = pdev; 1286 imc->window = window; 1287 1288 igen6_reg_dump(imc); 1289 1290 rc = igen6_get_dimm_config(mci); 1291 if (rc) 1292 goto fail3; 1293 1294 rc = edac_mc_add_mc(mci); 1295 if (rc) { 1296 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc); 1297 goto fail3; 1298 } 1299 1300 imc->mci = mci; 1301 return 0; 1302 fail3: 1303 mci->pvt_info = NULL; 1304 kfree(mci->ctl_name); 1305 fail2: 1306 edac_mc_free(mci); 1307 fail: 1308 return rc; 1309 } 1310 1311 static void igen6_unregister_mcis(void) 1312 { 1313 struct mem_ctl_info *mci; 1314 struct igen6_imc *imc; 1315 int i; 1316 1317 edac_dbg(2, "\n"); 1318 1319 for (i = 0; i < res_cfg->num_imc; i++) { 1320 imc = &igen6_pvt->imc[i]; 1321 mci = imc->mci; 1322 if (!mci) 1323 continue; 1324 1325 edac_mc_del_mc(mci->pdev); 1326 kfree(mci->ctl_name); 1327 mci->pvt_info = NULL; 1328 edac_mc_free(mci); 1329 iounmap(imc->window); 1330 } 1331 } 1332 1333 static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) 1334 { 1335 void __iomem *window; 1336 int lmc, pmc, rc; 1337 u64 base; 1338 1339 for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { 1340 base = mchbar + pmc * MCHBAR_SIZE; 1341 window = ioremap(base, MCHBAR_SIZE); 1342 if (!window) { 1343 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); 1344 rc = -ENOMEM; 1345 goto out_unregister_mcis; 1346 } 1347 1348 if (igen6_imc_absent(window)) { 1349 iounmap(window); 1350 edac_dbg(2, "Skip absent mc%d\n", pmc); 1351 continue; 1352 } 1353 1354 rc = igen6_register_mci(lmc, window, pdev); 1355 if (rc) 1356 goto out_iounmap; 1357 1358 /* Done, if all present MCs are detected and registered. */ 1359 if (++lmc >= res_cfg->num_imc) 1360 break; 1361 } 1362 1363 if (!lmc) { 1364 igen6_printk(KERN_ERR, "No mc found.\n"); 1365 return -ENODEV; 1366 } 1367 1368 if (lmc < res_cfg->num_imc) { 1369 igen6_printk(KERN_DEBUG, "Expected %d mcs, but only %d detected.", 1370 res_cfg->num_imc, lmc); 1371 res_cfg->num_imc = lmc; 1372 } 1373 1374 return 0; 1375 1376 out_iounmap: 1377 iounmap(window); 1378 1379 out_unregister_mcis: 1380 igen6_unregister_mcis(); 1381 1382 return rc; 1383 } 1384 1385 static int igen6_mem_slice_setup(u64 mchbar) 1386 { 1387 struct igen6_imc *imc = &igen6_pvt->imc[0]; 1388 u64 base = mchbar + res_cfg->cmf_base; 1389 u32 offset = res_cfg->ms_hash_offset; 1390 u32 size = res_cfg->cmf_size; 1391 u64 ms_s_size, ms_hash; 1392 void __iomem *cmf; 1393 int ms_l_map; 1394 1395 edac_dbg(2, "\n"); 1396 1397 if (imc[0].size < imc[1].size) { 1398 ms_s_size = imc[0].size; 1399 ms_l_map = 1; 1400 } else { 1401 ms_s_size = imc[1].size; 1402 ms_l_map = 0; 1403 } 1404 1405 igen6_pvt->ms_s_size = ms_s_size; 1406 igen6_pvt->ms_l_map = ms_l_map; 1407 1408 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n", 1409 ms_s_size >> 20, ms_l_map); 1410 1411 if (!size) 1412 return 0; 1413 1414 cmf = ioremap(base, size); 1415 if (!cmf) { 1416 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base); 1417 return -ENODEV; 1418 } 1419 1420 ms_hash = readq(cmf + offset); 1421 igen6_pvt->ms_hash = ms_hash; 1422 1423 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash); 1424 1425 iounmap(cmf); 1426 1427 return 0; 1428 } 1429 1430 static int register_err_handler(void) 1431 { 1432 int rc; 1433 1434 if (res_cfg->machine_check) { 1435 mce_register_decode_chain(&ecclog_mce_dec); 1436 return 0; 1437 } 1438 1439 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, 1440 0, IGEN6_NMI_NAME); 1441 if (rc) { 1442 igen6_printk(KERN_ERR, "Failed to register NMI handler\n"); 1443 return rc; 1444 } 1445 1446 return 0; 1447 } 1448 1449 static void unregister_err_handler(void) 1450 { 1451 if (res_cfg->machine_check) { 1452 mce_unregister_decode_chain(&ecclog_mce_dec); 1453 return; 1454 } 1455 1456 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1457 } 1458 1459 static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent) 1460 { 1461 /* 1462 * Quirk: Certain SoCs' error reporting interrupts don't work. 1463 * Force polling mode for them to ensure that memory error 1464 * events can be handled. 1465 */ 1466 if (ent->device == DID_ADL_N_SKU4) { 1467 edac_op_state = EDAC_OPSTATE_POLL; 1468 return; 1469 } 1470 1471 /* Set the mode according to the configuration data. */ 1472 if (cfg->machine_check) 1473 edac_op_state = EDAC_OPSTATE_INT; 1474 else 1475 edac_op_state = EDAC_OPSTATE_NMI; 1476 } 1477 1478 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1479 { 1480 u64 mchbar; 1481 int rc; 1482 1483 edac_dbg(2, "\n"); 1484 1485 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL); 1486 if (!igen6_pvt) 1487 return -ENOMEM; 1488 1489 res_cfg = (struct res_config *)ent->driver_data; 1490 1491 rc = igen6_pci_setup(pdev, &mchbar); 1492 if (rc) 1493 goto fail; 1494 1495 opstate_set(res_cfg, ent); 1496 1497 rc = igen6_register_mcis(pdev, mchbar); 1498 if (rc) 1499 goto fail; 1500 1501 if (res_cfg->num_imc > 1) { 1502 rc = igen6_mem_slice_setup(mchbar); 1503 if (rc) 1504 goto fail2; 1505 } 1506 1507 ecclog_pool = ecclog_gen_pool_create(); 1508 if (!ecclog_pool) { 1509 rc = -ENOMEM; 1510 goto fail2; 1511 } 1512 1513 INIT_WORK(&ecclog_work, ecclog_work_cb); 1514 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb); 1515 1516 rc = register_err_handler(); 1517 if (rc) 1518 goto fail3; 1519 1520 /* Enable error reporting */ 1521 rc = errcmd_enable_error_reporting(true); 1522 if (rc) { 1523 igen6_printk(KERN_ERR, "Failed to enable error reporting\n"); 1524 goto fail4; 1525 } 1526 1527 /* Check if any pending errors before/during the registration of the error handler */ 1528 ecclog_handler(); 1529 1530 igen6_debug_setup(); 1531 return 0; 1532 fail4: 1533 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); 1534 fail3: 1535 gen_pool_destroy(ecclog_pool); 1536 fail2: 1537 igen6_unregister_mcis(); 1538 fail: 1539 kfree(igen6_pvt); 1540 return rc; 1541 } 1542 1543 static void igen6_remove(struct pci_dev *pdev) 1544 { 1545 edac_dbg(2, "\n"); 1546 1547 igen6_debug_teardown(); 1548 errcmd_enable_error_reporting(false); 1549 unregister_err_handler(); 1550 irq_work_sync(&ecclog_irq_work); 1551 flush_work(&ecclog_work); 1552 gen_pool_destroy(ecclog_pool); 1553 igen6_unregister_mcis(); 1554 kfree(igen6_pvt); 1555 } 1556 1557 static struct pci_driver igen6_driver = { 1558 .name = EDAC_MOD_STR, 1559 .probe = igen6_probe, 1560 .remove = igen6_remove, 1561 .id_table = igen6_pci_tbl, 1562 }; 1563 1564 static int __init igen6_init(void) 1565 { 1566 const char *owner; 1567 int rc; 1568 1569 edac_dbg(2, "\n"); 1570 1571 if (ghes_get_devices()) 1572 return -EBUSY; 1573 1574 owner = edac_get_owner(); 1575 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) 1576 return -EBUSY; 1577 1578 rc = pci_register_driver(&igen6_driver); 1579 if (rc) 1580 return rc; 1581 1582 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION); 1583 1584 return 0; 1585 } 1586 1587 static void __exit igen6_exit(void) 1588 { 1589 edac_dbg(2, "\n"); 1590 1591 pci_unregister_driver(&igen6_driver); 1592 } 1593 1594 module_init(igen6_init); 1595 module_exit(igen6_exit); 1596 1597 MODULE_LICENSE("GPL v2"); 1598 MODULE_AUTHOR("Qiuxu Zhuo"); 1599 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC"); 1600