1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. 4 * Originally split out from the skx_edac driver. 5 * 6 * Copyright (c) 2018, Intel Corporation. 7 */ 8 9 #ifndef _SKX_COMM_EDAC_H 10 #define _SKX_COMM_EDAC_H 11 12 #include <linux/bits.h> 13 #include <asm/mce.h> 14 15 #define MSG_SIZE 1024 16 17 /* 18 * Debug macros 19 */ 20 #define skx_printk(level, fmt, arg...) \ 21 edac_printk(level, "skx", fmt, ##arg) 22 23 #define skx_mc_printk(mci, level, fmt, arg...) \ 24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) 25 26 /* 27 * Get a bit field at register value <v>, from bit <lo> to bit <hi> 28 */ 29 #define GET_BITFIELD(v, lo, hi) \ 30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) 31 32 #define SKX_NUM_IMC 2 /* Memory controllers per socket */ 33 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ 34 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ 35 36 #define I10NM_NUM_DDR_IMC 12 37 #define I10NM_NUM_DDR_CHANNELS 2 38 #define I10NM_NUM_DDR_DIMMS 2 39 40 #define I10NM_NUM_HBM_IMC 16 41 #define I10NM_NUM_HBM_CHANNELS 2 42 #define I10NM_NUM_HBM_DIMMS 1 43 44 #define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) 45 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) 46 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) 47 48 #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) 49 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) 50 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) 51 52 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) 53 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) 54 55 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) 56 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ 57 58 /* 59 * According to Intel Architecture spec vol 3B, 60 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" 61 * memory errors should fit one of these masks: 62 * 000f 0000 1mmm cccc (binary) 63 * 000f 0010 1mmm cccc (binary) [RAM used as cache] 64 * where: 65 * f = Correction Report Filtering Bit. If 1, subsequent errors 66 * won't be shown 67 * mmm = error type 68 * cccc = channel 69 */ 70 #define MCACOD_MEM_ERR_MASK 0xef80 71 /* 72 * Errors from either the memory of the 1-level memory system or the 73 * 2nd level memory (the slow "far" memory) of the 2-level memory system. 74 */ 75 #define MCACOD_MEM_CTL_ERR 0x80 76 /* 77 * Errors from the 1st level memory (the fast "near" memory as cache) 78 * of the 2-level memory system. 79 */ 80 #define MCACOD_EXT_MEM_ERR 0x280 81 82 /* Max RRL register sets per {,sub-,pseudo-}channel. */ 83 #define NUM_RRL_SET 4 84 /* Max RRL registers per set. */ 85 #define NUM_RRL_REG 6 86 /* Max correctable error count registers. */ 87 #define NUM_CECNT_REG 8 88 89 /* Modes of RRL register set. */ 90 enum rrl_mode { 91 /* Last read error from patrol scrub. */ 92 LRE_SCRUB, 93 /* Last read error from demand. */ 94 LRE_DEMAND, 95 /* First read error from patrol scrub. */ 96 FRE_SCRUB, 97 /* First read error from demand. */ 98 FRE_DEMAND, 99 }; 100 101 /* RRL registers per {,sub-,pseudo-}channel. */ 102 struct reg_rrl { 103 /* RRL register parts. */ 104 int set_num, reg_num; 105 enum rrl_mode modes[NUM_RRL_SET]; 106 u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; 107 /* RRL register widths in byte per set. */ 108 u8 widths[NUM_RRL_REG]; 109 /* RRL control bits of the first register per set. */ 110 u32 v_mask; 111 u32 uc_mask; 112 u32 over_mask; 113 u32 en_patspr_mask; 114 u32 noover_mask; 115 u32 en_mask; 116 117 /* CORRERRCNT register parts. */ 118 int cecnt_num; 119 u32 cecnt_offsets[NUM_CECNT_REG]; 120 u8 cecnt_widths[NUM_CECNT_REG]; 121 }; 122 123 /* 124 * Each cpu socket contains some pci devices that provide global 125 * information, and also some that are local to each of the two 126 * memory controllers on the die. 127 */ 128 struct skx_dev { 129 struct list_head list; 130 u8 bus[4]; 131 int seg; 132 struct pci_dev *sad_all; 133 struct pci_dev *util_all; 134 struct pci_dev *uracu; /* for i10nm CPU */ 135 struct pci_dev *pcu_cr3; /* for HBM memory detection */ 136 u32 mcroute; 137 /* 138 * Some server BIOS may hide certain memory controllers, and the 139 * EDAC driver skips those hidden memory controllers. However, the 140 * ADXL still decodes memory error address using physical memory 141 * controller indices. The mapping table is used to convert the 142 * physical indices (reported by ADXL) to the logical indices 143 * (used the EDAC driver) of present memory controllers during the 144 * error handling process. 145 */ 146 u8 mc_mapping[NUM_IMC]; 147 struct skx_imc { 148 struct mem_ctl_info *mci; 149 struct pci_dev *mdev; /* for i10nm CPU */ 150 void __iomem *mbase; /* for i10nm CPU */ 151 int chan_mmio_sz; /* for i10nm CPU */ 152 int num_channels; /* channels per memory controller */ 153 int num_dimms; /* dimms per channel */ 154 bool hbm_mc; 155 u8 mc; /* system wide mc# */ 156 u8 lmc; /* socket relative mc# */ 157 u8 src_id; 158 struct skx_channel { 159 struct pci_dev *cdev; 160 struct pci_dev *edev; 161 /* 162 * Two groups of RRL control registers per channel to save default RRL 163 * settings of two {sub-,pseudo-}channels in Linux RRL control mode. 164 */ 165 u32 rrl_ctl[2][NUM_RRL_SET]; 166 struct skx_dimm { 167 u8 close_pg; 168 u8 bank_xor_enable; 169 u8 fine_grain_bank; 170 u8 rowbits; 171 u8 colbits; 172 } dimms[NUM_DIMMS]; 173 } chan[NUM_CHANNELS]; 174 } imc[NUM_IMC]; 175 }; 176 177 struct skx_pvt { 178 struct skx_imc *imc; 179 }; 180 181 enum type { 182 SKX, 183 I10NM, 184 SPR, 185 GNR 186 }; 187 188 enum { 189 INDEX_SOCKET, 190 INDEX_MEMCTRL, 191 INDEX_CHANNEL, 192 INDEX_DIMM, 193 INDEX_CS, 194 INDEX_NM_FIRST, 195 INDEX_NM_MEMCTRL = INDEX_NM_FIRST, 196 INDEX_NM_CHANNEL, 197 INDEX_NM_DIMM, 198 INDEX_NM_CS, 199 INDEX_MAX 200 }; 201 202 enum error_source { 203 ERR_SRC_1LM, 204 ERR_SRC_2LM_NM, 205 ERR_SRC_2LM_FM, 206 ERR_SRC_NOT_MEMORY, 207 }; 208 209 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) 210 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) 211 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) 212 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS) 213 214 struct decoded_addr { 215 struct mce *mce; 216 struct skx_dev *dev; 217 u64 addr; 218 int socket; 219 int imc; 220 int channel; 221 u64 chan_addr; 222 int sktways; 223 int chanways; 224 int dimm; 225 int cs; 226 int rank; 227 int channel_rank; 228 u64 rank_address; 229 int row; 230 int column; 231 int bank_address; 232 int bank_group; 233 bool decoded_by_adxl; 234 }; 235 236 struct pci_bdf { 237 u32 bus : 8; 238 u32 dev : 5; 239 u32 fun : 3; 240 }; 241 242 struct res_config { 243 enum type type; 244 /* Configuration agent device ID */ 245 unsigned int decs_did; 246 /* Default bus number configuration register offset */ 247 int busno_cfg_offset; 248 /* DDR memory controllers per socket */ 249 int ddr_imc_num; 250 /* DDR channels per DDR memory controller */ 251 int ddr_chan_num; 252 /* DDR DIMMs per DDR memory channel */ 253 int ddr_dimm_num; 254 /* Per DDR channel memory-mapped I/O size */ 255 int ddr_chan_mmio_sz; 256 /* HBM memory controllers per socket */ 257 int hbm_imc_num; 258 /* HBM channels per HBM memory controller */ 259 int hbm_chan_num; 260 /* HBM DIMMs per HBM memory channel */ 261 int hbm_dimm_num; 262 /* Per HBM channel memory-mapped I/O size */ 263 int hbm_chan_mmio_sz; 264 bool support_ddr5; 265 /* SAD device BDF */ 266 struct pci_bdf sad_all_bdf; 267 /* PCU device BDF */ 268 struct pci_bdf pcu_cr3_bdf; 269 /* UTIL device BDF */ 270 struct pci_bdf util_all_bdf; 271 /* URACU device BDF */ 272 struct pci_bdf uracu_bdf; 273 /* DDR mdev device BDF */ 274 struct pci_bdf ddr_mdev_bdf; 275 /* HBM mdev device BDF */ 276 struct pci_bdf hbm_mdev_bdf; 277 int sad_all_offset; 278 /* RRL register sets per DDR channel */ 279 struct reg_rrl *reg_rrl_ddr; 280 /* RRL register sets per HBM channel */ 281 struct reg_rrl *reg_rrl_hbm[2]; 282 }; 283 284 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, 285 struct res_config *cfg); 286 typedef bool (*skx_decode_f)(struct decoded_addr *res); 287 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); 288 289 int skx_adxl_get(void); 290 void skx_adxl_put(void); 291 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); 292 void skx_set_mem_cfg(bool mem_cfg_2lm); 293 void skx_set_res_cfg(struct res_config *cfg); 294 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); 295 296 int skx_get_src_id(struct skx_dev *d, int off, u8 *id); 297 298 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); 299 300 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); 301 302 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 303 struct skx_imc *imc, int chan, int dimmno, 304 struct res_config *cfg); 305 306 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 307 int chan, int dimmno, const char *mod_str); 308 309 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, 310 const char *ctl_name, const char *mod_str, 311 get_dimm_config_f get_dimm_config, 312 struct res_config *cfg); 313 314 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 315 void *data); 316 317 void skx_remove(void); 318 319 #ifdef CONFIG_EDAC_DEBUG 320 void skx_setup_debug(const char *name); 321 void skx_teardown_debug(void); 322 #else 323 static inline void skx_setup_debug(const char *name) {} 324 static inline void skx_teardown_debug(void) {} 325 #endif 326 327 #endif /* _SKX_COMM_EDAC_H */ 328