1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4 * Originally split out from the skx_edac driver.
5 *
6 * Copyright (c) 2018, Intel Corporation.
7 */
8
9 #ifndef _SKX_COMM_EDAC_H
10 #define _SKX_COMM_EDAC_H
11
12 #include <linux/bits.h>
13 #include <asm/mce.h>
14
15 #define MSG_SIZE 1024
16
17 /*
18 * Debug macros
19 */
20 #define skx_printk(level, fmt, arg...) \
21 edac_printk(level, "skx", fmt, ##arg)
22
23 #define skx_mc_printk(mci, level, fmt, arg...) \
24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25
26 /*
27 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28 */
29 #define GET_BITFIELD(v, lo, hi) \
30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31
32 #define SKX_NUM_IMC 2 /* Memory controllers per socket */
33 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
34 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
35
36 #define I10NM_NUM_DDR_IMC 12
37 #define I10NM_NUM_DDR_CHANNELS 2
38 #define I10NM_NUM_DDR_DIMMS 2
39
40 #define I10NM_NUM_HBM_IMC 16
41 #define I10NM_NUM_HBM_CHANNELS 2
42 #define I10NM_NUM_HBM_DIMMS 1
43
44 #define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
47
48 #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
49 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
50 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
51
52 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
53 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
54
55 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
56 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
57
58 /*
59 * According to Intel Architecture spec vol 3B,
60 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
61 * memory errors should fit one of these masks:
62 * 000f 0000 1mmm cccc (binary)
63 * 000f 0010 1mmm cccc (binary) [RAM used as cache]
64 * where:
65 * f = Correction Report Filtering Bit. If 1, subsequent errors
66 * won't be shown
67 * mmm = error type
68 * cccc = channel
69 */
70 #define MCACOD_MEM_ERR_MASK 0xef80
71 /*
72 * Errors from either the memory of the 1-level memory system or the
73 * 2nd level memory (the slow "far" memory) of the 2-level memory system.
74 */
75 #define MCACOD_MEM_CTL_ERR 0x80
76 /*
77 * Errors from the 1st level memory (the fast "near" memory as cache)
78 * of the 2-level memory system.
79 */
80 #define MCACOD_EXT_MEM_ERR 0x280
81
82 /* Max RRL register sets per {,sub-,pseudo-}channel. */
83 #define NUM_RRL_SET 4
84 /* Max RRL registers per set. */
85 #define NUM_RRL_REG 6
86 /* Max correctable error count registers. */
87 #define NUM_CECNT_REG 8
88
89 /* Modes of RRL register set. */
90 enum rrl_mode {
91 /* Last read error from patrol scrub. */
92 LRE_SCRUB,
93 /* Last read error from demand. */
94 LRE_DEMAND,
95 /* First read error from patrol scrub. */
96 FRE_SCRUB,
97 /* First read error from demand. */
98 FRE_DEMAND,
99 };
100
101 /* RRL registers per {,sub-,pseudo-}channel. */
102 struct reg_rrl {
103 /* RRL register parts. */
104 int set_num, reg_num;
105 enum rrl_mode modes[NUM_RRL_SET];
106 u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
107 /* RRL register widths in byte per set. */
108 u8 widths[NUM_RRL_REG];
109 /* RRL control bits of the first register per set. */
110 u32 v_mask;
111 u32 uc_mask;
112 u32 over_mask;
113 u32 en_patspr_mask;
114 u32 noover_mask;
115 u32 en_mask;
116
117 /* CORRERRCNT register parts. */
118 int cecnt_num;
119 u32 cecnt_offsets[NUM_CECNT_REG];
120 u8 cecnt_widths[NUM_CECNT_REG];
121 };
122
123 /*
124 * Each cpu socket contains some pci devices that provide global
125 * information, and also some that are local to each of the two
126 * memory controllers on the die.
127 */
128 struct skx_dev {
129 struct list_head list;
130 u8 bus[4];
131 int seg;
132 struct pci_dev *sad_all;
133 struct pci_dev *util_all;
134 struct pci_dev *uracu; /* for i10nm CPU */
135 struct pci_dev *pcu_cr3; /* for HBM memory detection */
136 u32 mcroute;
137 /*
138 * Some server BIOS may hide certain memory controllers, and the
139 * EDAC driver skips those hidden memory controllers. However, the
140 * ADXL still decodes memory error address using physical memory
141 * controller indices. The mapping table is used to convert the
142 * physical indices (reported by ADXL) to the logical indices
143 * (used the EDAC driver) of present memory controllers during the
144 * error handling process.
145 */
146 u8 mc_mapping[NUM_IMC];
147 struct skx_imc {
148 struct mem_ctl_info *mci;
149 struct pci_dev *mdev; /* for i10nm CPU */
150 void __iomem *mbase; /* for i10nm CPU */
151 int chan_mmio_sz; /* for i10nm CPU */
152 int num_channels; /* channels per memory controller */
153 int num_dimms; /* dimms per channel */
154 bool hbm_mc;
155 u8 mc; /* system wide mc# */
156 u8 lmc; /* socket relative mc# */
157 u8 src_id;
158 struct skx_channel {
159 struct pci_dev *cdev;
160 struct pci_dev *edev;
161 /*
162 * Two groups of RRL control registers per channel to save default RRL
163 * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
164 */
165 u32 rrl_ctl[2][NUM_RRL_SET];
166 struct skx_dimm {
167 u8 close_pg;
168 u8 bank_xor_enable;
169 u8 fine_grain_bank;
170 u8 rowbits;
171 u8 colbits;
172 } dimms[NUM_DIMMS];
173 } chan[NUM_CHANNELS];
174 } imc[NUM_IMC];
175 };
176
177 struct skx_pvt {
178 struct skx_imc *imc;
179 };
180
181 enum type {
182 SKX,
183 I10NM,
184 SPR,
185 GNR
186 };
187
188 enum {
189 INDEX_SOCKET,
190 INDEX_MEMCTRL,
191 INDEX_CHANNEL,
192 INDEX_DIMM,
193 INDEX_CS,
194 INDEX_NM_FIRST,
195 INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
196 INDEX_NM_CHANNEL,
197 INDEX_NM_DIMM,
198 INDEX_NM_CS,
199 INDEX_MAX
200 };
201
202 enum error_source {
203 ERR_SRC_1LM,
204 ERR_SRC_2LM_NM,
205 ERR_SRC_2LM_FM,
206 ERR_SRC_NOT_MEMORY,
207 };
208
209 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
210 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
211 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
212 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
213
214 struct decoded_addr {
215 struct mce *mce;
216 struct skx_dev *dev;
217 u64 addr;
218 int socket;
219 int imc;
220 int channel;
221 u64 chan_addr;
222 int sktways;
223 int chanways;
224 int dimm;
225 int cs;
226 int rank;
227 int channel_rank;
228 u64 rank_address;
229 int row;
230 int column;
231 int bank_address;
232 int bank_group;
233 bool decoded_by_adxl;
234 };
235
236 struct pci_bdf {
237 u32 bus : 8;
238 u32 dev : 5;
239 u32 fun : 3;
240 };
241
242 struct res_config {
243 enum type type;
244 /* Configuration agent device ID */
245 unsigned int decs_did;
246 /* Default bus number configuration register offset */
247 int busno_cfg_offset;
248 /* DDR memory controllers per socket */
249 int ddr_imc_num;
250 /* DDR channels per DDR memory controller */
251 int ddr_chan_num;
252 /* DDR DIMMs per DDR memory channel */
253 int ddr_dimm_num;
254 /* Per DDR channel memory-mapped I/O size */
255 int ddr_chan_mmio_sz;
256 /* HBM memory controllers per socket */
257 int hbm_imc_num;
258 /* HBM channels per HBM memory controller */
259 int hbm_chan_num;
260 /* HBM DIMMs per HBM memory channel */
261 int hbm_dimm_num;
262 /* Per HBM channel memory-mapped I/O size */
263 int hbm_chan_mmio_sz;
264 bool support_ddr5;
265 /* SAD device BDF */
266 struct pci_bdf sad_all_bdf;
267 /* PCU device BDF */
268 struct pci_bdf pcu_cr3_bdf;
269 /* UTIL device BDF */
270 struct pci_bdf util_all_bdf;
271 /* URACU device BDF */
272 struct pci_bdf uracu_bdf;
273 /* DDR mdev device BDF */
274 struct pci_bdf ddr_mdev_bdf;
275 /* HBM mdev device BDF */
276 struct pci_bdf hbm_mdev_bdf;
277 int sad_all_offset;
278 /* RRL register sets per DDR channel */
279 struct reg_rrl *reg_rrl_ddr;
280 /* RRL register sets per HBM channel */
281 struct reg_rrl *reg_rrl_hbm[2];
282 };
283
284 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
285 struct res_config *cfg);
286 typedef bool (*skx_decode_f)(struct decoded_addr *res);
287 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
288
289 int skx_adxl_get(void);
290 void skx_adxl_put(void);
291 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
292 void skx_set_mem_cfg(bool mem_cfg_2lm);
293 void skx_set_res_cfg(struct res_config *cfg);
294 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
295
296 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
297
298 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
299
300 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
301
302 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
303 struct skx_imc *imc, int chan, int dimmno,
304 struct res_config *cfg);
305
306 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
307 int chan, int dimmno, const char *mod_str);
308
309 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
310 const char *ctl_name, const char *mod_str,
311 get_dimm_config_f get_dimm_config,
312 struct res_config *cfg);
313
314 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
315 void *data);
316
317 void skx_remove(void);
318
319 #ifdef CONFIG_EDAC_DEBUG
320 void skx_setup_debug(const char *name);
321 void skx_teardown_debug(void);
322 #else
skx_setup_debug(const char * name)323 static inline void skx_setup_debug(const char *name) {}
skx_teardown_debug(void)324 static inline void skx_teardown_debug(void) {}
325 #endif
326
327 #endif /* _SKX_COMM_EDAC_H */
328