xref: /linux/drivers/edac/skx_common.h (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4  * Originally split out from the skx_edac driver.
5  *
6  * Copyright (c) 2018, Intel Corporation.
7  */
8 
9 #ifndef _SKX_COMM_EDAC_H
10 #define _SKX_COMM_EDAC_H
11 
12 #include <linux/bits.h>
13 #include <asm/mce.h>
14 
15 #define MSG_SIZE		1024
16 
17 /*
18  * Debug macros
19  */
20 #define skx_printk(level, fmt, arg...)			\
21 	edac_printk(level, "skx", fmt, ##arg)
22 
23 #define skx_mc_printk(mci, level, fmt, arg...)		\
24 	edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25 
26 /*
27  * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28  */
29 #define GET_BITFIELD(v, lo, hi) \
30 	(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31 
32 #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
33 #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
34 
35 #define I10NM_NUM_DDR_CHANNELS	2
36 #define I10NM_NUM_DDR_DIMMS	2
37 
38 #define I10NM_NUM_HBM_CHANNELS	2
39 #define I10NM_NUM_HBM_DIMMS	1
40 
41 #define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
42 #define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
43 
44 #define NUM_CHANNELS	MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
45 #define NUM_DIMMS	MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
46 
47 #define IS_DIMM_PRESENT(r)		GET_BITFIELD(r, 15, 15)
48 #define IS_NVDIMM_PRESENT(r, i)		GET_BITFIELD(r, i, i)
49 
50 #define MCI_MISC_ECC_MODE(m)	(((m) >> 59) & 15)
51 #define MCI_MISC_ECC_DDRT	8	/* read from DDRT */
52 
53 /*
54  * According to Intel Architecture spec vol 3B,
55  * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
56  * memory errors should fit one of these masks:
57  *	000f 0000 1mmm cccc (binary)
58  *	000f 0010 1mmm cccc (binary)	[RAM used as cache]
59  * where:
60  *	f = Correction Report Filtering Bit. If 1, subsequent errors
61  *	    won't be shown
62  *	mmm = error type
63  *	cccc = channel
64  */
65 #define MCACOD_MEM_ERR_MASK	0xef80
66 /*
67  * Errors from either the memory of the 1-level memory system or the
68  * 2nd level memory (the slow "far" memory) of the 2-level memory system.
69  */
70 #define MCACOD_MEM_CTL_ERR	0x80
71 /*
72  * Errors from the 1st level memory (the fast "near" memory as cache)
73  * of the 2-level memory system.
74  */
75 #define MCACOD_EXT_MEM_ERR	0x280
76 
77 /* Max RRL register sets per {,sub-,pseudo-}channel. */
78 #define NUM_RRL_SET		4
79 /* Max RRL registers per set. */
80 #define NUM_RRL_REG		7
81 /* Max correctable error count registers. */
82 #define NUM_CECNT_REG		8
83 
84 /* Error source from which the RRL registers log errors. */
85 enum rrl_source_type {
86 	/* Last read error from patrol scrub. */
87 	RRL_SRC_LRE_SCRUB,
88 	/* Last read error from demand. */
89 	RRL_SRC_LRE_DEMAND,
90 	/* First read error from patrol scrub. */
91 	RRL_SRC_FRE_SCRUB,
92 	/* First read error from demand. */
93 	RRL_SRC_FRE_DEMAND,
94 };
95 
96 enum rrl_ctrl_mode {
97 	/* Linux does not control RRL or reports values. */
98 	RRL_CTRL_NONE,
99 	/* Firmware retains control. Linux only reports values. */
100 	RRL_CTRL_BIOS,
101 	/* Linux takes control, resets mode bits, and clears valid/UC bits; reports values. */
102 	RRL_CTRL_LINUX,
103 };
104 
105 /* RRL registers per {,sub-,pseudo-}channel. */
106 struct reg_rrl {
107 	/* RRL register parts. */
108 	int set_num, reg_num;
109 	enum rrl_source_type sources[NUM_RRL_SET];
110 	u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
111 	/* RRL register widths in byte per set. */
112 	u8 widths[NUM_RRL_REG];
113 	/* RRL control bits of the first register per set. */
114 	u32 v_mask;
115 	u32 uc_mask;
116 	u32 over_mask;
117 	u32 en_patspr_mask;
118 	u32 noover_mask;
119 	u32 en_mask;
120 
121 	/* CORRERRCNT register parts. */
122 	int cecnt_num;
123 	u32 cecnt_offsets[NUM_CECNT_REG];
124 	u8 cecnt_widths[NUM_CECNT_REG];
125 };
126 
127 /*
128  * Each cpu socket contains some pci devices that provide global
129  * information, and also some that are local to each of the two
130  * memory controllers on the die.
131  */
132 struct skx_dev {
133 	/* {skx,i10nm}_edac */
134 	u8 bus[4];
135 	int seg;
136 	struct pci_dev *sad_all;
137 	struct pci_dev *util_all;
138 	struct pci_dev *uracu;
139 	struct pci_dev *pcu_cr3;
140 	u32 mcroute;
141 
142 	/* imh_edac */
143 	/* System-view MMIO base physical addresses. */
144 	u64 mmio_base_h_north;
145 	u64 mmio_base_h_south;
146 	int pkg;
147 
148 	int num_imc;
149 	struct list_head list;
150 	struct skx_imc {
151 		/* i10nm_edac */
152 		struct pci_dev *mdev;
153 
154 		/* imh_edac */
155 		struct device *dev;
156 
157 		struct mem_ctl_info *mci;
158 		void __iomem *mbase;
159 		int chan_mmio_sz;
160 		int num_channels; /* channels per memory controller */
161 		int num_dimms; /* dimms per channel */
162 		bool hbm_mc;
163 		u8 mc;	/* system wide mc# */
164 		u8 lmc;	/* socket relative mc# */
165 		u8 src_id;
166 		/*
167 		 * Some server BIOS may hide certain memory controllers, and the
168 		 * EDAC driver skips those hidden memory controllers. However, the
169 		 * ADXL still decodes memory error address using physical memory
170 		 * controller indices. The mapping table is used to convert the
171 		 * physical indices (reported by ADXL) to the logical indices
172 		 * (used the EDAC driver) of present memory controllers during the
173 		 * error handling process.
174 		 */
175 		u8 mc_mapping;
176 		struct skx_channel {
177 			struct pci_dev	*cdev;
178 			struct pci_dev	*edev;
179 			/*
180 			 * Two groups of RRL control registers per channel to save default RRL
181 			 * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
182 			 */
183 			u32 rrl_ctl[2][NUM_RRL_SET];
184 			struct skx_dimm {
185 				u8 close_pg;
186 				u8 bank_xor_enable;
187 				u8 fine_grain_bank;
188 				u8 rowbits;
189 				u8 colbits;
190 			} dimms[NUM_DIMMS];
191 		} chan[NUM_CHANNELS];
192 	} imc[];
193 };
194 
195 struct skx_pvt {
196 	struct skx_imc	*imc;
197 };
198 
199 enum type {
200 	SKX,
201 	I10NM,
202 	SPR,
203 	GNR,
204 	DMR,
205 };
206 
207 enum {
208 	INDEX_SOCKET,
209 	INDEX_MEMCTRL,
210 	INDEX_CHANNEL,
211 	INDEX_DIMM,
212 	INDEX_CS,
213 	INDEX_SUBCH,
214 	INDEX_NM_FIRST,
215 	INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
216 	INDEX_NM_CHANNEL,
217 	INDEX_NM_DIMM,
218 	INDEX_NM_CS,
219 	INDEX_NM_SUBCH,
220 	INDEX_MAX
221 };
222 
223 enum error_source {
224 	ERR_SRC_1LM,
225 	ERR_SRC_2LM_NM,
226 	ERR_SRC_2LM_FM,
227 	ERR_SRC_NOT_MEMORY,
228 };
229 
230 #define BIT_SUBCH	BIT_ULL(INDEX_SUBCH)
231 #define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
232 #define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
233 #define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
234 #define BIT_NM_CS	BIT_ULL(INDEX_NM_CS)
235 #define BIT_NM_SUBCH	BIT_ULL(INDEX_NM_SUBCH)
236 
237 struct decoded_addr {
238 	struct mce *mce;
239 	struct skx_dev *dev;
240 	u64	addr;
241 	int	socket;
242 	int	imc;
243 	int	channel;
244 	u64	chan_addr;
245 	int	sktways;
246 	int	chanways;
247 	int	dimm;
248 	int	cs;
249 	int	subch;
250 	int	rank;
251 	int	channel_rank;
252 	u64	rank_address;
253 	int	row;
254 	int	column;
255 	int	bank_address;
256 	int	bank_group;
257 	bool	decoded_by_adxl;
258 };
259 
260 struct pci_bdf {
261 	u32 bus : 8;
262 	u32 dev : 5;
263 	u32 fun : 3;
264 };
265 
266 struct res_config {
267 	enum type type;
268 	/* DDR memory controllers per socket */
269 	int ddr_imc_num;
270 	/* DDR channels per DDR memory controller */
271 	int ddr_chan_num;
272 	/* DDR DIMMs per DDR memory channel */
273 	int ddr_dimm_num;
274 	/* Per DDR channel memory-mapped I/O size */
275 	int ddr_chan_mmio_sz;
276 	/* HBM memory controllers per socket */
277 	int hbm_imc_num;
278 	/* HBM channels per HBM memory controller */
279 	int hbm_chan_num;
280 	/* HBM DIMMs per HBM memory channel */
281 	int hbm_dimm_num;
282 	/* Per HBM channel memory-mapped I/O size */
283 	int hbm_chan_mmio_sz;
284 	bool support_ddr5;
285 	/* RRL register sets per DDR channel */
286 	struct reg_rrl *reg_rrl_ddr[2];
287 	/* RRL register sets per HBM channel */
288 	struct reg_rrl *reg_rrl_hbm[2];
289 	/* RRL control mode */
290 	enum rrl_ctrl_mode rrl_ctrl_mode;
291 	union {
292 		/* {skx,i10nm}_edac */
293 		struct {
294 			/* Configuration agent device ID */
295 			unsigned int decs_did;
296 			/* Default bus number configuration register offset */
297 			int busno_cfg_offset;
298 			struct pci_bdf sad_all_bdf;
299 			struct pci_bdf pcu_cr3_bdf;
300 			struct pci_bdf util_all_bdf;
301 			struct pci_bdf uracu_bdf;
302 			struct pci_bdf ddr_mdev_bdf;
303 			struct pci_bdf hbm_mdev_bdf;
304 			int sad_all_offset;
305 		};
306 		/* imh_edac */
307 		struct {
308 			/* MMIO base physical address in local package view */
309 			u64 mmio_base_l_north;
310 			u64 mmio_base_l_south;
311 			u64 ddr_imc_base;
312 			u64 ddr_reg_mcmtr_offset;
313 			u8  ddr_reg_mcmtr_width;
314 			u64 ddr_reg_dimmmtr_offset;
315 			u8  ddr_reg_dimmmtr_width;
316 			u64 ubox_base;
317 			u32 ubox_size;
318 			u32 ubox_reg_mmio_base_offset;
319 			u8  ubox_reg_mmio_base_width;
320 			u32 ubox_reg_socket_id_offset;
321 			u8  ubox_reg_socket_id_width;
322 			u64 pcu_base;
323 			u32 pcu_size;
324 			u32 pcu_reg_capid3_offset;
325 			u8  pcu_reg_capid3_width;
326 			u64 sca_base;
327 			u32 sca_size;
328 			u32 sca_reg_tolm_offset;
329 			u8  sca_reg_tolm_width;
330 			u32 sca_reg_tohm_offset;
331 			u8  sca_reg_tohm_width;
332 			u64 ha_base;
333 			u32 ha_size;
334 			u32 ha_reg_mode_offset;
335 			u8  ha_reg_mode_width;
336 		};
337 	};
338 };
339 
340 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
341 				 struct res_config *cfg);
342 typedef bool (*skx_decode_f)(struct decoded_addr *res);
343 typedef void (*skx_show_rrl_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
344 
345 u64 skx_readx(void __iomem *addr, u8 width);
346 u64 skx_read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width);
347 void skx_write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val);
348 int skx_adxl_get(void);
349 void skx_adxl_put(void);
350 void skx_set_decode(skx_decode_f decode);
351 void skx_set_show_rrl(skx_show_rrl_f rrl);
352 void skx_show_rrl(struct decoded_addr *res, char *msg, int len, bool scrub_err);
353 void skx_enable_rrl(bool enable);
354 void skx_set_mem_cfg(bool mem_cfg_2lm);
355 void skx_set_res_cfg(struct res_config *cfg);
356 void skx_init_mc_mapping(struct skx_dev *d);
357 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
358 
359 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
360 
361 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
362 
363 struct list_head *skx_get_edac_list(void);
364 
365 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
366 void skx_set_hi_lo(u64 tolm, u64 tohm);
367 
368 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
369 		      struct skx_imc *imc, int chan, int dimmno,
370 		      struct res_config *cfg);
371 
372 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
373 			int chan, int dimmno, const char *mod_str);
374 
375 int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name,
376 		     const char *ctl_name, const char *mod_str,
377 		     get_dimm_config_f get_dimm_config,
378 		     struct res_config *cfg);
379 
380 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
381 			void *data);
382 
383 void skx_remove(void);
384 
385 #ifdef CONFIG_EDAC_DEBUG
386 void skx_setup_debug(const char *name);
387 void skx_teardown_debug(void);
388 #else
389 static inline void skx_setup_debug(const char *name) {}
390 static inline void skx_teardown_debug(void) {}
391 #endif
392 
393 #endif /* _SKX_COMM_EDAC_H */
394