xref: /linux/drivers/edac/skx_common.h (revision ada1b0436b5a290923b072b2eb0368a7869bf680)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4  * Originally split out from the skx_edac driver.
5  *
6  * Copyright (c) 2018, Intel Corporation.
7  */
8 
9 #ifndef _SKX_COMM_EDAC_H
10 #define _SKX_COMM_EDAC_H
11 
12 #include <linux/bits.h>
13 #include <asm/mce.h>
14 
15 #define MSG_SIZE		1024
16 
17 /*
18  * Debug macros
19  */
20 #define skx_printk(level, fmt, arg...)			\
21 	edac_printk(level, "skx", fmt, ##arg)
22 
23 #define skx_mc_printk(mci, level, fmt, arg...)		\
24 	edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25 
26 /*
27  * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28  */
29 #define GET_BITFIELD(v, lo, hi) \
30 	(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31 
32 #define SKX_NUM_IMC		2	/* Memory controllers per socket */
33 #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
34 #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
35 
36 #define I10NM_NUM_DDR_IMC	12
37 #define I10NM_NUM_DDR_CHANNELS	2
38 #define I10NM_NUM_DDR_DIMMS	2
39 
40 #define I10NM_NUM_HBM_IMC	16
41 #define I10NM_NUM_HBM_CHANNELS	2
42 #define I10NM_NUM_HBM_DIMMS	1
43 
44 #define I10NM_NUM_IMC		(I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45 #define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46 #define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
47 
48 #define NUM_IMC		MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
49 #define NUM_CHANNELS	MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
50 #define NUM_DIMMS	MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
51 
52 #define IS_DIMM_PRESENT(r)		GET_BITFIELD(r, 15, 15)
53 #define IS_NVDIMM_PRESENT(r, i)		GET_BITFIELD(r, i, i)
54 
55 #define MCI_MISC_ECC_MODE(m)	(((m) >> 59) & 15)
56 #define MCI_MISC_ECC_DDRT	8	/* read from DDRT */
57 
58 /*
59  * According to Intel Architecture spec vol 3B,
60  * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
61  * memory errors should fit one of these masks:
62  *	000f 0000 1mmm cccc (binary)
63  *	000f 0010 1mmm cccc (binary)	[RAM used as cache]
64  * where:
65  *	f = Correction Report Filtering Bit. If 1, subsequent errors
66  *	    won't be shown
67  *	mmm = error type
68  *	cccc = channel
69  */
70 #define MCACOD_MEM_ERR_MASK	0xef80
71 /*
72  * Errors from either the memory of the 1-level memory system or the
73  * 2nd level memory (the slow "far" memory) of the 2-level memory system.
74  */
75 #define MCACOD_MEM_CTL_ERR	0x80
76 /*
77  * Errors from the 1st level memory (the fast "near" memory as cache)
78  * of the 2-level memory system.
79  */
80 #define MCACOD_EXT_MEM_ERR	0x280
81 
82 /* Max RRL register sets per {,sub-,pseudo-}channel. */
83 #define NUM_RRL_SET		4
84 /* Max RRL registers per set. */
85 #define NUM_RRL_REG		6
86 /* Max correctable error count registers. */
87 #define NUM_CECNT_REG		8
88 
89 /* Modes of RRL register set. */
90 enum rrl_mode {
91 	/* Last read error from patrol scrub. */
92 	LRE_SCRUB,
93 	/* Last read error from demand. */
94 	LRE_DEMAND,
95 	/* First read error from patrol scrub. */
96 	FRE_SCRUB,
97 	/* First read error from demand. */
98 	FRE_DEMAND,
99 };
100 
101 /* RRL registers per {,sub-,pseudo-}channel. */
102 struct reg_rrl {
103 	/* RRL register parts. */
104 	int set_num, reg_num;
105 	enum rrl_mode modes[NUM_RRL_SET];
106 	u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
107 	/* RRL register widths in byte per set. */
108 	u8 widths[NUM_RRL_REG];
109 	/* RRL control bits of the first register per set. */
110 	u32 v_mask;
111 	u32 uc_mask;
112 	u32 over_mask;
113 	u32 en_patspr_mask;
114 	u32 noover_mask;
115 	u32 en_mask;
116 
117 	/* CORRERRCNT register parts. */
118 	int cecnt_num;
119 	u32 cecnt_offsets[NUM_CECNT_REG];
120 	u8 cecnt_widths[NUM_CECNT_REG];
121 };
122 
123 /*
124  * Each cpu socket contains some pci devices that provide global
125  * information, and also some that are local to each of the two
126  * memory controllers on the die.
127  */
128 struct skx_dev {
129 	struct list_head list;
130 	u8 bus[4];
131 	int seg;
132 	struct pci_dev *sad_all;
133 	struct pci_dev *util_all;
134 	struct pci_dev *uracu; /* for i10nm CPU */
135 	struct pci_dev *pcu_cr3; /* for HBM memory detection */
136 	u32 mcroute;
137 	/*
138 	 * Some server BIOS may hide certain memory controllers, and the
139 	 * EDAC driver skips those hidden memory controllers. However, the
140 	 * ADXL still decodes memory error address using physical memory
141 	 * controller indices. The mapping table is used to convert the
142 	 * physical indices (reported by ADXL) to the logical indices
143 	 * (used the EDAC driver) of present memory controllers during the
144 	 * error handling process.
145 	 */
146 	u8 mc_mapping[NUM_IMC];
147 	struct skx_imc {
148 		struct mem_ctl_info *mci;
149 		struct pci_dev *mdev; /* for i10nm CPU */
150 		void __iomem *mbase;  /* for i10nm CPU */
151 		int chan_mmio_sz;     /* for i10nm CPU */
152 		int num_channels; /* channels per memory controller */
153 		int num_dimms; /* dimms per channel */
154 		bool hbm_mc;
155 		u8 mc;	/* system wide mc# */
156 		u8 lmc;	/* socket relative mc# */
157 		u8 src_id;
158 		struct skx_channel {
159 			struct pci_dev	*cdev;
160 			struct pci_dev	*edev;
161 			/*
162 			 * Two groups of RRL control registers per channel to save default RRL
163 			 * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
164 			 */
165 			u32 rrl_ctl[2][NUM_RRL_SET];
166 			struct skx_dimm {
167 				u8 close_pg;
168 				u8 bank_xor_enable;
169 				u8 fine_grain_bank;
170 				u8 rowbits;
171 				u8 colbits;
172 			} dimms[NUM_DIMMS];
173 		} chan[NUM_CHANNELS];
174 	} imc[NUM_IMC];
175 };
176 
177 struct skx_pvt {
178 	struct skx_imc	*imc;
179 };
180 
181 enum type {
182 	SKX,
183 	I10NM,
184 	SPR,
185 	GNR
186 };
187 
188 enum {
189 	INDEX_SOCKET,
190 	INDEX_MEMCTRL,
191 	INDEX_CHANNEL,
192 	INDEX_DIMM,
193 	INDEX_CS,
194 	INDEX_NM_FIRST,
195 	INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
196 	INDEX_NM_CHANNEL,
197 	INDEX_NM_DIMM,
198 	INDEX_NM_CS,
199 	INDEX_MAX
200 };
201 
202 enum error_source {
203 	ERR_SRC_1LM,
204 	ERR_SRC_2LM_NM,
205 	ERR_SRC_2LM_FM,
206 	ERR_SRC_NOT_MEMORY,
207 };
208 
209 #define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
210 #define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
211 #define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
212 #define BIT_NM_CS	BIT_ULL(INDEX_NM_CS)
213 
214 struct decoded_addr {
215 	struct mce *mce;
216 	struct skx_dev *dev;
217 	u64	addr;
218 	int	socket;
219 	int	imc;
220 	int	channel;
221 	u64	chan_addr;
222 	int	sktways;
223 	int	chanways;
224 	int	dimm;
225 	int	cs;
226 	int	rank;
227 	int	channel_rank;
228 	u64	rank_address;
229 	int	row;
230 	int	column;
231 	int	bank_address;
232 	int	bank_group;
233 	bool	decoded_by_adxl;
234 };
235 
236 struct pci_bdf {
237 	u32 bus : 8;
238 	u32 dev : 5;
239 	u32 fun : 3;
240 };
241 
242 struct res_config {
243 	enum type type;
244 	/* Configuration agent device ID */
245 	unsigned int decs_did;
246 	/* Default bus number configuration register offset */
247 	int busno_cfg_offset;
248 	/* DDR memory controllers per socket */
249 	int ddr_imc_num;
250 	/* DDR channels per DDR memory controller */
251 	int ddr_chan_num;
252 	/* DDR DIMMs per DDR memory channel */
253 	int ddr_dimm_num;
254 	/* Per DDR channel memory-mapped I/O size */
255 	int ddr_chan_mmio_sz;
256 	/* HBM memory controllers per socket */
257 	int hbm_imc_num;
258 	/* HBM channels per HBM memory controller */
259 	int hbm_chan_num;
260 	/* HBM DIMMs per HBM memory channel */
261 	int hbm_dimm_num;
262 	/* Per HBM channel memory-mapped I/O size */
263 	int hbm_chan_mmio_sz;
264 	bool support_ddr5;
265 	/* SAD device BDF */
266 	struct pci_bdf sad_all_bdf;
267 	/* PCU device BDF */
268 	struct pci_bdf pcu_cr3_bdf;
269 	/* UTIL device BDF */
270 	struct pci_bdf util_all_bdf;
271 	/* URACU device BDF */
272 	struct pci_bdf uracu_bdf;
273 	/* DDR mdev device BDF */
274 	struct pci_bdf ddr_mdev_bdf;
275 	/* HBM mdev device BDF */
276 	struct pci_bdf hbm_mdev_bdf;
277 	int sad_all_offset;
278 	/* RRL register sets per DDR channel */
279 	struct reg_rrl *reg_rrl_ddr;
280 	/* RRL register sets per HBM channel */
281 	struct reg_rrl *reg_rrl_hbm[2];
282 };
283 
284 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
285 				 struct res_config *cfg);
286 typedef bool (*skx_decode_f)(struct decoded_addr *res);
287 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
288 
289 int skx_adxl_get(void);
290 void skx_adxl_put(void);
291 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
292 void skx_set_mem_cfg(bool mem_cfg_2lm);
293 void skx_set_res_cfg(struct res_config *cfg);
294 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
295 
296 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
297 
298 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
299 
300 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
301 
302 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
303 		      struct skx_imc *imc, int chan, int dimmno,
304 		      struct res_config *cfg);
305 
306 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
307 			int chan, int dimmno, const char *mod_str);
308 
309 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
310 		     const char *ctl_name, const char *mod_str,
311 		     get_dimm_config_f get_dimm_config,
312 		     struct res_config *cfg);
313 
314 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
315 			void *data);
316 
317 void skx_remove(void);
318 
319 #ifdef CONFIG_EDAC_DEBUG
320 void skx_setup_debug(const char *name);
321 void skx_teardown_debug(void);
322 #else
skx_setup_debug(const char * name)323 static inline void skx_setup_debug(const char *name) {}
skx_teardown_debug(void)324 static inline void skx_teardown_debug(void) {}
325 #endif
326 
327 #endif /* _SKX_COMM_EDAC_H */
328