1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. 4 * Originally split out from the skx_edac driver. 5 * 6 * Copyright (c) 2018, Intel Corporation. 7 */ 8 9 #ifndef _SKX_COMM_EDAC_H 10 #define _SKX_COMM_EDAC_H 11 12 #include <linux/bits.h> 13 #include <asm/mce.h> 14 15 #define MSG_SIZE 1024 16 17 /* 18 * Debug macros 19 */ 20 #define skx_printk(level, fmt, arg...) \ 21 edac_printk(level, "skx", fmt, ##arg) 22 23 #define skx_mc_printk(mci, level, fmt, arg...) \ 24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) 25 26 /* 27 * Get a bit field at register value <v>, from bit <lo> to bit <hi> 28 */ 29 #define GET_BITFIELD(v, lo, hi) \ 30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) 31 32 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ 33 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ 34 35 #define I10NM_NUM_DDR_CHANNELS 2 36 #define I10NM_NUM_DDR_DIMMS 2 37 38 #define I10NM_NUM_HBM_CHANNELS 2 39 #define I10NM_NUM_HBM_DIMMS 1 40 41 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) 42 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) 43 44 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) 45 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) 46 47 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) 48 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) 49 50 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) 51 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ 52 53 /* 54 * According to Intel Architecture spec vol 3B, 55 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" 56 * memory errors should fit one of these masks: 57 * 000f 0000 1mmm cccc (binary) 58 * 000f 0010 1mmm cccc (binary) [RAM used as cache] 59 * where: 60 * f = Correction Report Filtering Bit. If 1, subsequent errors 61 * won't be shown 62 * mmm = error type 63 * cccc = channel 64 */ 65 #define MCACOD_MEM_ERR_MASK 0xef80 66 /* 67 * Errors from either the memory of the 1-level memory system or the 68 * 2nd level memory (the slow "far" memory) of the 2-level memory system. 69 */ 70 #define MCACOD_MEM_CTL_ERR 0x80 71 /* 72 * Errors from the 1st level memory (the fast "near" memory as cache) 73 * of the 2-level memory system. 74 */ 75 #define MCACOD_EXT_MEM_ERR 0x280 76 77 /* Max RRL register sets per {,sub-,pseudo-}channel. */ 78 #define NUM_RRL_SET 4 79 /* Max RRL registers per set. */ 80 #define NUM_RRL_REG 6 81 /* Max correctable error count registers. */ 82 #define NUM_CECNT_REG 8 83 84 /* Modes of RRL register set. */ 85 enum rrl_mode { 86 /* Last read error from patrol scrub. */ 87 LRE_SCRUB, 88 /* Last read error from demand. */ 89 LRE_DEMAND, 90 /* First read error from patrol scrub. */ 91 FRE_SCRUB, 92 /* First read error from demand. */ 93 FRE_DEMAND, 94 }; 95 96 /* RRL registers per {,sub-,pseudo-}channel. */ 97 struct reg_rrl { 98 /* RRL register parts. */ 99 int set_num, reg_num; 100 enum rrl_mode modes[NUM_RRL_SET]; 101 u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; 102 /* RRL register widths in byte per set. */ 103 u8 widths[NUM_RRL_REG]; 104 /* RRL control bits of the first register per set. */ 105 u32 v_mask; 106 u32 uc_mask; 107 u32 over_mask; 108 u32 en_patspr_mask; 109 u32 noover_mask; 110 u32 en_mask; 111 112 /* CORRERRCNT register parts. */ 113 int cecnt_num; 114 u32 cecnt_offsets[NUM_CECNT_REG]; 115 u8 cecnt_widths[NUM_CECNT_REG]; 116 }; 117 118 /* 119 * Each cpu socket contains some pci devices that provide global 120 * information, and also some that are local to each of the two 121 * memory controllers on the die. 122 */ 123 struct skx_dev { 124 /* {skx,i10nm}_edac */ 125 u8 bus[4]; 126 int seg; 127 struct pci_dev *sad_all; 128 struct pci_dev *util_all; 129 struct pci_dev *uracu; 130 struct pci_dev *pcu_cr3; 131 u32 mcroute; 132 133 /* imh_edac */ 134 /* System-view MMIO base physical addresses. */ 135 u64 mmio_base_h_north; 136 u64 mmio_base_h_south; 137 int pkg; 138 139 int num_imc; 140 struct list_head list; 141 struct skx_imc { 142 /* i10nm_edac */ 143 struct pci_dev *mdev; 144 145 /* imh_edac */ 146 struct device *dev; 147 148 struct mem_ctl_info *mci; 149 void __iomem *mbase; 150 int chan_mmio_sz; 151 int num_channels; /* channels per memory controller */ 152 int num_dimms; /* dimms per channel */ 153 bool hbm_mc; 154 u8 mc; /* system wide mc# */ 155 u8 lmc; /* socket relative mc# */ 156 u8 src_id; 157 /* 158 * Some server BIOS may hide certain memory controllers, and the 159 * EDAC driver skips those hidden memory controllers. However, the 160 * ADXL still decodes memory error address using physical memory 161 * controller indices. The mapping table is used to convert the 162 * physical indices (reported by ADXL) to the logical indices 163 * (used the EDAC driver) of present memory controllers during the 164 * error handling process. 165 */ 166 u8 mc_mapping; 167 struct skx_channel { 168 struct pci_dev *cdev; 169 struct pci_dev *edev; 170 /* 171 * Two groups of RRL control registers per channel to save default RRL 172 * settings of two {sub-,pseudo-}channels in Linux RRL control mode. 173 */ 174 u32 rrl_ctl[2][NUM_RRL_SET]; 175 struct skx_dimm { 176 u8 close_pg; 177 u8 bank_xor_enable; 178 u8 fine_grain_bank; 179 u8 rowbits; 180 u8 colbits; 181 } dimms[NUM_DIMMS]; 182 } chan[NUM_CHANNELS]; 183 } imc[]; 184 }; 185 186 struct skx_pvt { 187 struct skx_imc *imc; 188 }; 189 190 enum type { 191 SKX, 192 I10NM, 193 SPR, 194 GNR, 195 DMR, 196 }; 197 198 enum { 199 INDEX_SOCKET, 200 INDEX_MEMCTRL, 201 INDEX_CHANNEL, 202 INDEX_DIMM, 203 INDEX_CS, 204 INDEX_NM_FIRST, 205 INDEX_NM_MEMCTRL = INDEX_NM_FIRST, 206 INDEX_NM_CHANNEL, 207 INDEX_NM_DIMM, 208 INDEX_NM_CS, 209 INDEX_MAX 210 }; 211 212 enum error_source { 213 ERR_SRC_1LM, 214 ERR_SRC_2LM_NM, 215 ERR_SRC_2LM_FM, 216 ERR_SRC_NOT_MEMORY, 217 }; 218 219 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) 220 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) 221 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) 222 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS) 223 224 struct decoded_addr { 225 struct mce *mce; 226 struct skx_dev *dev; 227 u64 addr; 228 int socket; 229 int imc; 230 int channel; 231 u64 chan_addr; 232 int sktways; 233 int chanways; 234 int dimm; 235 int cs; 236 int rank; 237 int channel_rank; 238 u64 rank_address; 239 int row; 240 int column; 241 int bank_address; 242 int bank_group; 243 bool decoded_by_adxl; 244 }; 245 246 struct pci_bdf { 247 u32 bus : 8; 248 u32 dev : 5; 249 u32 fun : 3; 250 }; 251 252 struct res_config { 253 enum type type; 254 /* DDR memory controllers per socket */ 255 int ddr_imc_num; 256 /* DDR channels per DDR memory controller */ 257 int ddr_chan_num; 258 /* DDR DIMMs per DDR memory channel */ 259 int ddr_dimm_num; 260 /* Per DDR channel memory-mapped I/O size */ 261 int ddr_chan_mmio_sz; 262 /* HBM memory controllers per socket */ 263 int hbm_imc_num; 264 /* HBM channels per HBM memory controller */ 265 int hbm_chan_num; 266 /* HBM DIMMs per HBM memory channel */ 267 int hbm_dimm_num; 268 /* Per HBM channel memory-mapped I/O size */ 269 int hbm_chan_mmio_sz; 270 bool support_ddr5; 271 /* RRL register sets per DDR channel */ 272 struct reg_rrl *reg_rrl_ddr; 273 /* RRL register sets per HBM channel */ 274 struct reg_rrl *reg_rrl_hbm[2]; 275 union { 276 /* {skx,i10nm}_edac */ 277 struct { 278 /* Configuration agent device ID */ 279 unsigned int decs_did; 280 /* Default bus number configuration register offset */ 281 int busno_cfg_offset; 282 struct pci_bdf sad_all_bdf; 283 struct pci_bdf pcu_cr3_bdf; 284 struct pci_bdf util_all_bdf; 285 struct pci_bdf uracu_bdf; 286 struct pci_bdf ddr_mdev_bdf; 287 struct pci_bdf hbm_mdev_bdf; 288 int sad_all_offset; 289 }; 290 /* imh_edac */ 291 struct { 292 /* MMIO base physical address in local package view */ 293 u64 mmio_base_l_north; 294 u64 mmio_base_l_south; 295 u64 ddr_imc_base; 296 u64 ddr_reg_mcmtr_offset; 297 u8 ddr_reg_mcmtr_width; 298 u64 ddr_reg_dimmmtr_offset; 299 u8 ddr_reg_dimmmtr_width; 300 u64 ubox_base; 301 u32 ubox_size; 302 u32 ubox_reg_mmio_base_offset; 303 u8 ubox_reg_mmio_base_width; 304 u32 ubox_reg_socket_id_offset; 305 u8 ubox_reg_socket_id_width; 306 u64 pcu_base; 307 u32 pcu_size; 308 u32 pcu_reg_capid3_offset; 309 u8 pcu_reg_capid3_width; 310 u64 sca_base; 311 u32 sca_size; 312 u32 sca_reg_tolm_offset; 313 u8 sca_reg_tolm_width; 314 u32 sca_reg_tohm_offset; 315 u8 sca_reg_tohm_width; 316 u64 ha_base; 317 u32 ha_size; 318 u32 ha_reg_mode_offset; 319 u8 ha_reg_mode_width; 320 }; 321 }; 322 }; 323 324 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, 325 struct res_config *cfg); 326 typedef bool (*skx_decode_f)(struct decoded_addr *res); 327 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); 328 329 int skx_adxl_get(void); 330 void skx_adxl_put(void); 331 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); 332 void skx_set_mem_cfg(bool mem_cfg_2lm); 333 void skx_set_res_cfg(struct res_config *cfg); 334 void skx_init_mc_mapping(struct skx_dev *d); 335 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); 336 337 int skx_get_src_id(struct skx_dev *d, int off, u8 *id); 338 339 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); 340 341 struct list_head *skx_get_edac_list(void); 342 343 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); 344 void skx_set_hi_lo(u64 tolm, u64 tohm); 345 346 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 347 struct skx_imc *imc, int chan, int dimmno, 348 struct res_config *cfg); 349 350 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 351 int chan, int dimmno, const char *mod_str); 352 353 int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name, 354 const char *ctl_name, const char *mod_str, 355 get_dimm_config_f get_dimm_config, 356 struct res_config *cfg); 357 358 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 359 void *data); 360 361 void skx_remove(void); 362 363 #ifdef CONFIG_EDAC_DEBUG 364 void skx_setup_debug(const char *name); 365 void skx_teardown_debug(void); 366 #else 367 static inline void skx_setup_debug(const char *name) {} 368 static inline void skx_teardown_debug(void) {} 369 #endif 370 371 #endif /* _SKX_COMM_EDAC_H */ 372