1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. 4 * Originally split out from the skx_edac driver. 5 * 6 * Copyright (c) 2018, Intel Corporation. 7 */ 8 9 #ifndef _SKX_COMM_EDAC_H 10 #define _SKX_COMM_EDAC_H 11 12 #include <linux/bits.h> 13 #include <asm/mce.h> 14 15 #define MSG_SIZE 1024 16 17 /* 18 * Debug macros 19 */ 20 #define skx_printk(level, fmt, arg...) \ 21 edac_printk(level, "skx", fmt, ##arg) 22 23 #define skx_mc_printk(mci, level, fmt, arg...) \ 24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) 25 26 /* 27 * Get a bit field at register value <v>, from bit <lo> to bit <hi> 28 */ 29 #define GET_BITFIELD(v, lo, hi) \ 30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) 31 32 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ 33 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ 34 35 #define I10NM_NUM_DDR_CHANNELS 2 36 #define I10NM_NUM_DDR_DIMMS 2 37 38 #define I10NM_NUM_HBM_CHANNELS 2 39 #define I10NM_NUM_HBM_DIMMS 1 40 41 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) 42 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) 43 44 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) 45 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) 46 47 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) 48 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) 49 50 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) 51 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ 52 53 /* 54 * According to Intel Architecture spec vol 3B, 55 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" 56 * memory errors should fit one of these masks: 57 * 000f 0000 1mmm cccc (binary) 58 * 000f 0010 1mmm cccc (binary) [RAM used as cache] 59 * where: 60 * f = Correction Report Filtering Bit. If 1, subsequent errors 61 * won't be shown 62 * mmm = error type 63 * cccc = channel 64 */ 65 #define MCACOD_MEM_ERR_MASK 0xef80 66 /* 67 * Errors from either the memory of the 1-level memory system or the 68 * 2nd level memory (the slow "far" memory) of the 2-level memory system. 69 */ 70 #define MCACOD_MEM_CTL_ERR 0x80 71 /* 72 * Errors from the 1st level memory (the fast "near" memory as cache) 73 * of the 2-level memory system. 74 */ 75 #define MCACOD_EXT_MEM_ERR 0x280 76 77 /* Max RRL register sets per {,sub-,pseudo-}channel. */ 78 #define NUM_RRL_SET 4 79 /* Max RRL registers per set. */ 80 #define NUM_RRL_REG 7 81 /* Max correctable error count registers. */ 82 #define NUM_CECNT_REG 8 83 84 /* Error source from which the RRL registers log errors. */ 85 enum rrl_source_type { 86 /* Last read error from patrol scrub. */ 87 RRL_SRC_LRE_SCRUB, 88 /* Last read error from demand. */ 89 RRL_SRC_LRE_DEMAND, 90 /* First read error from patrol scrub. */ 91 RRL_SRC_FRE_SCRUB, 92 /* First read error from demand. */ 93 RRL_SRC_FRE_DEMAND, 94 }; 95 96 enum rrl_ctrl_mode { 97 /* Linux does not control RRL or reports values. */ 98 RRL_CTRL_NONE, 99 /* Firmware retains control. Linux only reports values. */ 100 RRL_CTRL_BIOS, 101 /* Linux takes control, resets mode bits, and clears valid/UC bits; reports values. */ 102 RRL_CTRL_LINUX, 103 }; 104 105 /* RRL registers per {,sub-,pseudo-}channel. */ 106 struct reg_rrl { 107 /* RRL register parts. */ 108 int set_num, reg_num; 109 enum rrl_source_type sources[NUM_RRL_SET]; 110 u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; 111 /* RRL register widths in byte per set. */ 112 u8 widths[NUM_RRL_REG]; 113 /* RRL control bits of the first register per set. */ 114 u32 v_mask; 115 u32 uc_mask; 116 u32 over_mask; 117 u32 en_patspr_mask; 118 u32 noover_mask; 119 u32 en_mask; 120 121 /* CORRERRCNT register parts. */ 122 int cecnt_num; 123 u32 cecnt_offsets[NUM_CECNT_REG]; 124 u8 cecnt_widths[NUM_CECNT_REG]; 125 }; 126 127 /* 128 * Each cpu socket contains some pci devices that provide global 129 * information, and also some that are local to each of the two 130 * memory controllers on the die. 131 */ 132 struct skx_dev { 133 /* {skx,i10nm}_edac */ 134 u8 bus[4]; 135 int seg; 136 struct pci_dev *sad_all; 137 struct pci_dev *util_all; 138 struct pci_dev *uracu; 139 struct pci_dev *pcu_cr3; 140 u32 mcroute; 141 142 /* imh_edac */ 143 /* System-view MMIO base physical addresses. */ 144 u64 mmio_base_h_north; 145 u64 mmio_base_h_south; 146 int pkg; 147 148 int num_imc; 149 struct list_head list; 150 struct skx_imc { 151 /* i10nm_edac */ 152 struct pci_dev *mdev; 153 154 /* imh_edac */ 155 struct device *dev; 156 157 struct mem_ctl_info *mci; 158 void __iomem *mbase; 159 int chan_mmio_sz; 160 int num_channels; /* channels per memory controller */ 161 int num_dimms; /* dimms per channel */ 162 bool hbm_mc; 163 u8 mc; /* system wide mc# */ 164 u8 lmc; /* socket relative mc# */ 165 u8 src_id; 166 /* 167 * Some server BIOS may hide certain memory controllers, and the 168 * EDAC driver skips those hidden memory controllers. However, the 169 * ADXL still decodes memory error address using physical memory 170 * controller indices. The mapping table is used to convert the 171 * physical indices (reported by ADXL) to the logical indices 172 * (used the EDAC driver) of present memory controllers during the 173 * error handling process. 174 */ 175 u8 mc_mapping; 176 struct skx_channel { 177 struct pci_dev *cdev; 178 struct pci_dev *edev; 179 /* 180 * Two groups of RRL control registers per channel to save default RRL 181 * settings of two {sub-,pseudo-}channels in Linux RRL control mode. 182 */ 183 u32 rrl_ctl[2][NUM_RRL_SET]; 184 struct skx_dimm { 185 u8 close_pg; 186 u8 bank_xor_enable; 187 u8 fine_grain_bank; 188 u8 rowbits; 189 u8 colbits; 190 } dimms[NUM_DIMMS]; 191 } chan[NUM_CHANNELS]; 192 } imc[]; 193 }; 194 195 struct skx_pvt { 196 struct skx_imc *imc; 197 }; 198 199 enum type { 200 SKX, 201 I10NM, 202 SPR, 203 GNR, 204 DMR, 205 }; 206 207 enum { 208 INDEX_SOCKET, 209 INDEX_MEMCTRL, 210 INDEX_CHANNEL, 211 INDEX_DIMM, 212 INDEX_CS, 213 INDEX_SUBCH, 214 INDEX_NM_FIRST, 215 INDEX_NM_MEMCTRL = INDEX_NM_FIRST, 216 INDEX_NM_CHANNEL, 217 INDEX_NM_DIMM, 218 INDEX_NM_CS, 219 INDEX_NM_SUBCH, 220 INDEX_MAX 221 }; 222 223 enum error_source { 224 ERR_SRC_1LM, 225 ERR_SRC_2LM_NM, 226 ERR_SRC_2LM_FM, 227 ERR_SRC_NOT_MEMORY, 228 }; 229 230 #define BIT_SUBCH BIT_ULL(INDEX_SUBCH) 231 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) 232 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) 233 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) 234 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS) 235 #define BIT_NM_SUBCH BIT_ULL(INDEX_NM_SUBCH) 236 237 struct decoded_addr { 238 struct mce *mce; 239 struct skx_dev *dev; 240 u64 addr; 241 int socket; 242 int imc; 243 int channel; 244 u64 chan_addr; 245 int sktways; 246 int chanways; 247 int dimm; 248 int cs; 249 int subch; 250 int rank; 251 int channel_rank; 252 u64 rank_address; 253 int row; 254 int column; 255 int bank_address; 256 int bank_group; 257 bool decoded_by_adxl; 258 }; 259 260 struct pci_bdf { 261 u32 bus : 8; 262 u32 dev : 5; 263 u32 fun : 3; 264 }; 265 266 struct res_config { 267 enum type type; 268 /* DDR memory controllers per socket */ 269 int ddr_imc_num; 270 /* DDR channels per DDR memory controller */ 271 int ddr_chan_num; 272 /* DDR DIMMs per DDR memory channel */ 273 int ddr_dimm_num; 274 /* Per DDR channel memory-mapped I/O size */ 275 int ddr_chan_mmio_sz; 276 /* HBM memory controllers per socket */ 277 int hbm_imc_num; 278 /* HBM channels per HBM memory controller */ 279 int hbm_chan_num; 280 /* HBM DIMMs per HBM memory channel */ 281 int hbm_dimm_num; 282 /* Per HBM channel memory-mapped I/O size */ 283 int hbm_chan_mmio_sz; 284 bool support_ddr5; 285 /* RRL register sets per DDR channel */ 286 struct reg_rrl *reg_rrl_ddr[2]; 287 /* RRL register sets per HBM channel */ 288 struct reg_rrl *reg_rrl_hbm[2]; 289 /* RRL control mode */ 290 enum rrl_ctrl_mode rrl_ctrl_mode; 291 union { 292 /* {skx,i10nm}_edac */ 293 struct { 294 /* Configuration agent device ID */ 295 unsigned int decs_did; 296 /* Default bus number configuration register offset */ 297 int busno_cfg_offset; 298 struct pci_bdf sad_all_bdf; 299 struct pci_bdf pcu_cr3_bdf; 300 struct pci_bdf util_all_bdf; 301 struct pci_bdf uracu_bdf; 302 struct pci_bdf ddr_mdev_bdf; 303 struct pci_bdf hbm_mdev_bdf; 304 int sad_all_offset; 305 }; 306 /* imh_edac */ 307 struct { 308 /* MMIO base physical address in local package view */ 309 u64 mmio_base_l_north; 310 u64 mmio_base_l_south; 311 u64 ddr_imc_base; 312 u64 ddr_reg_mcmtr_offset; 313 u8 ddr_reg_mcmtr_width; 314 u64 ddr_reg_dimmmtr_offset; 315 u8 ddr_reg_dimmmtr_width; 316 u64 ubox_base; 317 u32 ubox_size; 318 u32 ubox_reg_mmio_base_offset; 319 u8 ubox_reg_mmio_base_width; 320 u32 ubox_reg_socket_id_offset; 321 u8 ubox_reg_socket_id_width; 322 u64 pcu_base; 323 u32 pcu_size; 324 u32 pcu_reg_capid3_offset; 325 u8 pcu_reg_capid3_width; 326 u64 sca_base; 327 u32 sca_size; 328 u32 sca_reg_tolm_offset; 329 u8 sca_reg_tolm_width; 330 u32 sca_reg_tohm_offset; 331 u8 sca_reg_tohm_width; 332 u64 ha_base; 333 u32 ha_size; 334 u32 ha_reg_mode_offset; 335 u8 ha_reg_mode_width; 336 }; 337 }; 338 }; 339 340 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, 341 struct res_config *cfg); 342 typedef bool (*skx_decode_f)(struct decoded_addr *res); 343 typedef void (*skx_show_rrl_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); 344 345 u64 skx_readx(void __iomem *addr, u8 width); 346 u64 skx_read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width); 347 void skx_write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val); 348 int skx_adxl_get(void); 349 void skx_adxl_put(void); 350 void skx_set_decode(skx_decode_f decode); 351 void skx_set_show_rrl(skx_show_rrl_f rrl); 352 void skx_show_rrl(struct decoded_addr *res, char *msg, int len, bool scrub_err); 353 void skx_enable_rrl(bool enable); 354 void skx_set_mem_cfg(bool mem_cfg_2lm); 355 void skx_set_res_cfg(struct res_config *cfg); 356 void skx_init_mc_mapping(struct skx_dev *d); 357 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); 358 359 int skx_get_src_id(struct skx_dev *d, int off, u8 *id); 360 361 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); 362 363 struct list_head *skx_get_edac_list(void); 364 365 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); 366 void skx_set_hi_lo(u64 tolm, u64 tohm); 367 368 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 369 struct skx_imc *imc, int chan, int dimmno, 370 struct res_config *cfg); 371 372 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 373 int chan, int dimmno, const char *mod_str); 374 375 int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name, 376 const char *ctl_name, const char *mod_str, 377 get_dimm_config_f get_dimm_config, 378 struct res_config *cfg); 379 380 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 381 void *data); 382 383 void skx_remove(void); 384 385 #ifdef CONFIG_EDAC_DEBUG 386 void skx_setup_debug(const char *name); 387 void skx_teardown_debug(void); 388 #else 389 static inline void skx_setup_debug(const char *name) {} 390 static inline void skx_teardown_debug(void) {} 391 #endif 392 393 #endif /* _SKX_COMM_EDAC_H */ 394