xref: /linux/drivers/edac/i7core_edac.c (revision 5d4a2e29fba5b2bef95b96a46b338ec4d76fa4fd)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39 
40 #include "edac_core.h"
41 
42 /*
43  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
44  * registers start at bus 255, and are not reported by BIOS.
45  * We currently find devices with only 2 sockets. In order to support more QPI
46  * Quick Path Interconnect, just increment this number.
47  */
48 #define MAX_SOCKET_BUSES	2
49 
50 
51 /*
52  * Alter this version for the module when modifications are made
53  */
54 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
55 #define EDAC_MOD_STR      "i7core_edac"
56 
57 /*
58  * Debug macros
59  */
60 #define i7core_printk(level, fmt, arg...)			\
61 	edac_printk(level, "i7core", fmt, ##arg)
62 
63 #define i7core_mc_printk(mci, level, fmt, arg...)		\
64 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
65 
66 /*
67  * i7core Memory Controller Registers
68  */
69 
70 	/* OFFSETS for Device 0 Function 0 */
71 
72 #define MC_CFG_CONTROL	0x90
73 
74 	/* OFFSETS for Device 3 Function 0 */
75 
76 #define MC_CONTROL	0x48
77 #define MC_STATUS	0x4c
78 #define MC_MAX_DOD	0x64
79 
80 /*
81  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
82  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
83  */
84 
85 #define MC_TEST_ERR_RCV1	0x60
86   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
87 
88 #define MC_TEST_ERR_RCV0	0x64
89   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
90   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
91 
92 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
93 #define MC_COR_ECC_CNT_0	0x80
94 #define MC_COR_ECC_CNT_1	0x84
95 #define MC_COR_ECC_CNT_2	0x88
96 #define MC_COR_ECC_CNT_3	0x8c
97 #define MC_COR_ECC_CNT_4	0x90
98 #define MC_COR_ECC_CNT_5	0x94
99 
100 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
101 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
102 
103 
104 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
105 
106 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
107   #define THREE_DIMMS_PRESENT		(1 << 24)
108   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
109   #define QUAD_RANK_PRESENT		(1 << 22)
110   #define REGISTERED_DIMM		(1 << 15)
111 
112 #define MC_CHANNEL_MAPPER	0x60
113   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
114   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
115 
116 #define MC_CHANNEL_RANK_PRESENT 0x7c
117   #define RANK_PRESENT_MASK		0xffff
118 
119 #define MC_CHANNEL_ADDR_MATCH	0xf0
120 #define MC_CHANNEL_ERROR_MASK	0xf8
121 #define MC_CHANNEL_ERROR_INJECT	0xfc
122   #define INJECT_ADDR_PARITY	0x10
123   #define INJECT_ECC		0x08
124   #define MASK_CACHELINE	0x06
125   #define MASK_FULL_CACHELINE	0x06
126   #define MASK_MSB32_CACHELINE	0x04
127   #define MASK_LSB32_CACHELINE	0x02
128   #define NO_MASK_CACHELINE	0x00
129   #define REPEAT_EN		0x01
130 
131 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
132 
133 #define MC_DOD_CH_DIMM0		0x48
134 #define MC_DOD_CH_DIMM1		0x4c
135 #define MC_DOD_CH_DIMM2		0x50
136   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
137   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
138   #define DIMM_PRESENT_MASK	(1 << 9)
139   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
140   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
141   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
142   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
143   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
144   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
145   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
146   #define MC_DOD_NUMCOL_MASK		3
147   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
148 
149 #define MC_RANK_PRESENT		0x7c
150 
151 #define MC_SAG_CH_0	0x80
152 #define MC_SAG_CH_1	0x84
153 #define MC_SAG_CH_2	0x88
154 #define MC_SAG_CH_3	0x8c
155 #define MC_SAG_CH_4	0x90
156 #define MC_SAG_CH_5	0x94
157 #define MC_SAG_CH_6	0x98
158 #define MC_SAG_CH_7	0x9c
159 
160 #define MC_RIR_LIMIT_CH_0	0x40
161 #define MC_RIR_LIMIT_CH_1	0x44
162 #define MC_RIR_LIMIT_CH_2	0x48
163 #define MC_RIR_LIMIT_CH_3	0x4C
164 #define MC_RIR_LIMIT_CH_4	0x50
165 #define MC_RIR_LIMIT_CH_5	0x54
166 #define MC_RIR_LIMIT_CH_6	0x58
167 #define MC_RIR_LIMIT_CH_7	0x5C
168 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
169 
170 #define MC_RIR_WAY_CH		0x80
171   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
172   #define MC_RIR_WAY_RANK_MASK		0x7
173 
174 /*
175  * i7core structs
176  */
177 
178 #define NUM_CHANS 3
179 #define MAX_DIMMS 3		/* Max DIMMS per channel */
180 #define MAX_MCR_FUNC  4
181 #define MAX_CHAN_FUNC 3
182 
183 struct i7core_info {
184 	u32	mc_control;
185 	u32	mc_status;
186 	u32	max_dod;
187 	u32	ch_map;
188 };
189 
190 
191 struct i7core_inject {
192 	int	enable;
193 
194 	u32	section;
195 	u32	type;
196 	u32	eccmask;
197 
198 	/* Error address mask */
199 	int channel, dimm, rank, bank, page, col;
200 };
201 
202 struct i7core_channel {
203 	u32		ranks;
204 	u32		dimms;
205 };
206 
207 struct pci_id_descr {
208 	int			dev;
209 	int			func;
210 	int 			dev_id;
211 	int			optional;
212 };
213 
214 struct pci_id_table {
215 	struct pci_id_descr	*descr;
216 	int			n_devs;
217 };
218 
219 struct i7core_dev {
220 	struct list_head	list;
221 	u8			socket;
222 	struct pci_dev		**pdev;
223 	int			n_devs;
224 	struct mem_ctl_info	*mci;
225 };
226 
227 struct i7core_pvt {
228 	struct pci_dev	*pci_noncore;
229 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
230 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
231 
232 	struct i7core_dev *i7core_dev;
233 
234 	struct i7core_info	info;
235 	struct i7core_inject	inject;
236 	struct i7core_channel	channel[NUM_CHANS];
237 
238 	int		channels; /* Number of active channels */
239 
240 	int		ce_count_available;
241 	int 		csrow_map[NUM_CHANS][MAX_DIMMS];
242 
243 			/* ECC corrected errors counts per udimm */
244 	unsigned long	udimm_ce_count[MAX_DIMMS];
245 	int		udimm_last_ce_count[MAX_DIMMS];
246 			/* ECC corrected errors counts per rdimm */
247 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
248 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
249 
250 	unsigned int	is_registered;
251 
252 	/* mcelog glue */
253 	struct edac_mce		edac_mce;
254 
255 	/* Fifo double buffers */
256 	struct mce		mce_entry[MCE_LOG_LEN];
257 	struct mce		mce_outentry[MCE_LOG_LEN];
258 
259 	/* Fifo in/out counters */
260 	unsigned		mce_in, mce_out;
261 
262 	/* Count indicator to show errors not got */
263 	unsigned		mce_overrun;
264 };
265 
266 /* Static vars */
267 static LIST_HEAD(i7core_edac_list);
268 static DEFINE_MUTEX(i7core_edac_lock);
269 
270 #define PCI_DESCR(device, function, device_id)	\
271 	.dev = (device),			\
272 	.func = (function),			\
273 	.dev_id = (device_id)
274 
275 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
276 		/* Memory controller */
277 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
278 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
279 			/* Exists only for RDIMM */
280 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
281 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
282 
283 		/* Channel 0 */
284 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
285 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
286 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
287 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
288 
289 		/* Channel 1 */
290 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
291 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
292 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
293 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
294 
295 		/* Channel 2 */
296 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
297 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
298 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
299 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
300 
301 		/* Generic Non-core registers */
302 	/*
303 	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
304 	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
305 	 * the probing code needs to test for the other address in case of
306 	 * failure of this one
307 	 */
308 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
309 
310 };
311 
312 struct pci_id_descr pci_dev_descr_lynnfield[] = {
313 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
314 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
315 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
316 
317 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
318 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
319 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
320 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
321 
322 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
323 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
324 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
325 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
326 
327 	/*
328 	 * This is the PCI device has an alternate address on some
329 	 * processors like Core i7 860
330 	 */
331 	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
332 };
333 
334 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
335 		/* Memory controller */
336 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
337 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
338 			/* Exists only for RDIMM */
339 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
340 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
341 
342 		/* Channel 0 */
343 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
344 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
345 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
346 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
347 
348 		/* Channel 1 */
349 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
350 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
351 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
352 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
353 
354 		/* Channel 2 */
355 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
356 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
357 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
358 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
359 
360 		/* Generic Non-core registers */
361 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
362 
363 };
364 
365 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
366 struct pci_id_table pci_dev_table[] = {
367 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
368 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
369 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
370 };
371 
372 /*
373  *	pci_device_id	table for which devices we are looking for
374  */
375 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
376 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
377 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
378 	{0,}			/* 0 terminated list. */
379 };
380 
381 static struct edac_pci_ctl_info *i7core_pci;
382 
383 /****************************************************************************
384 			Anciliary status routines
385  ****************************************************************************/
386 
387 	/* MC_CONTROL bits */
388 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
389 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
390 
391 	/* MC_STATUS bits */
392 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
393 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
394 
395 	/* MC_MAX_DOD read functions */
396 static inline int numdimms(u32 dimms)
397 {
398 	return (dimms & 0x3) + 1;
399 }
400 
401 static inline int numrank(u32 rank)
402 {
403 	static int ranks[4] = { 1, 2, 4, -EINVAL };
404 
405 	return ranks[rank & 0x3];
406 }
407 
408 static inline int numbank(u32 bank)
409 {
410 	static int banks[4] = { 4, 8, 16, -EINVAL };
411 
412 	return banks[bank & 0x3];
413 }
414 
415 static inline int numrow(u32 row)
416 {
417 	static int rows[8] = {
418 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
419 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
420 	};
421 
422 	return rows[row & 0x7];
423 }
424 
425 static inline int numcol(u32 col)
426 {
427 	static int cols[8] = {
428 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
429 	};
430 	return cols[col & 0x3];
431 }
432 
433 static struct i7core_dev *get_i7core_dev(u8 socket)
434 {
435 	struct i7core_dev *i7core_dev;
436 
437 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
438 		if (i7core_dev->socket == socket)
439 			return i7core_dev;
440 	}
441 
442 	return NULL;
443 }
444 
445 /****************************************************************************
446 			Memory check routines
447  ****************************************************************************/
448 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
449 					  unsigned func)
450 {
451 	struct i7core_dev *i7core_dev = get_i7core_dev(socket);
452 	int i;
453 
454 	if (!i7core_dev)
455 		return NULL;
456 
457 	for (i = 0; i < i7core_dev->n_devs; i++) {
458 		if (!i7core_dev->pdev[i])
459 			continue;
460 
461 		if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
462 		    PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
463 			return i7core_dev->pdev[i];
464 		}
465 	}
466 
467 	return NULL;
468 }
469 
470 /**
471  * i7core_get_active_channels() - gets the number of channels and csrows
472  * @socket:	Quick Path Interconnect socket
473  * @channels:	Number of channels that will be returned
474  * @csrows:	Number of csrows found
475  *
476  * Since EDAC core needs to know in advance the number of available channels
477  * and csrows, in order to allocate memory for csrows/channels, it is needed
478  * to run two similar steps. At the first step, implemented on this function,
479  * it checks the number of csrows/channels present at one socket.
480  * this is used in order to properly allocate the size of mci components.
481  *
482  * It should be noticed that none of the current available datasheets explain
483  * or even mention how csrows are seen by the memory controller. So, we need
484  * to add a fake description for csrows.
485  * So, this driver is attributing one DIMM memory for one csrow.
486  */
487 static int i7core_get_active_channels(u8 socket, unsigned *channels,
488 				      unsigned *csrows)
489 {
490 	struct pci_dev *pdev = NULL;
491 	int i, j;
492 	u32 status, control;
493 
494 	*channels = 0;
495 	*csrows = 0;
496 
497 	pdev = get_pdev_slot_func(socket, 3, 0);
498 	if (!pdev) {
499 		i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
500 			      socket);
501 		return -ENODEV;
502 	}
503 
504 	/* Device 3 function 0 reads */
505 	pci_read_config_dword(pdev, MC_STATUS, &status);
506 	pci_read_config_dword(pdev, MC_CONTROL, &control);
507 
508 	for (i = 0; i < NUM_CHANS; i++) {
509 		u32 dimm_dod[3];
510 		/* Check if the channel is active */
511 		if (!(control & (1 << (8 + i))))
512 			continue;
513 
514 		/* Check if the channel is disabled */
515 		if (status & (1 << i))
516 			continue;
517 
518 		pdev = get_pdev_slot_func(socket, i + 4, 1);
519 		if (!pdev) {
520 			i7core_printk(KERN_ERR, "Couldn't find socket %d "
521 						"fn %d.%d!!!\n",
522 						socket, i + 4, 1);
523 			return -ENODEV;
524 		}
525 		/* Devices 4-6 function 1 */
526 		pci_read_config_dword(pdev,
527 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
528 		pci_read_config_dword(pdev,
529 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
530 		pci_read_config_dword(pdev,
531 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
532 
533 		(*channels)++;
534 
535 		for (j = 0; j < 3; j++) {
536 			if (!DIMM_PRESENT(dimm_dod[j]))
537 				continue;
538 			(*csrows)++;
539 		}
540 	}
541 
542 	debugf0("Number of active channels on socket %d: %d\n",
543 		socket, *channels);
544 
545 	return 0;
546 }
547 
548 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
549 {
550 	struct i7core_pvt *pvt = mci->pvt_info;
551 	struct csrow_info *csr;
552 	struct pci_dev *pdev;
553 	int i, j;
554 	unsigned long last_page = 0;
555 	enum edac_type mode;
556 	enum mem_type mtype;
557 
558 	/* Get data from the MC register, function 0 */
559 	pdev = pvt->pci_mcr[0];
560 	if (!pdev)
561 		return -ENODEV;
562 
563 	/* Device 3 function 0 reads */
564 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
565 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
566 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
567 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
568 
569 	debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
570 		pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
571 		pvt->info.max_dod, pvt->info.ch_map);
572 
573 	if (ECC_ENABLED(pvt)) {
574 		debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
575 		if (ECCx8(pvt))
576 			mode = EDAC_S8ECD8ED;
577 		else
578 			mode = EDAC_S4ECD4ED;
579 	} else {
580 		debugf0("ECC disabled\n");
581 		mode = EDAC_NONE;
582 	}
583 
584 	/* FIXME: need to handle the error codes */
585 	debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
586 		"x%x x 0x%x\n",
587 		numdimms(pvt->info.max_dod),
588 		numrank(pvt->info.max_dod >> 2),
589 		numbank(pvt->info.max_dod >> 4),
590 		numrow(pvt->info.max_dod >> 6),
591 		numcol(pvt->info.max_dod >> 9));
592 
593 	for (i = 0; i < NUM_CHANS; i++) {
594 		u32 data, dimm_dod[3], value[8];
595 
596 		if (!pvt->pci_ch[i][0])
597 			continue;
598 
599 		if (!CH_ACTIVE(pvt, i)) {
600 			debugf0("Channel %i is not active\n", i);
601 			continue;
602 		}
603 		if (CH_DISABLED(pvt, i)) {
604 			debugf0("Channel %i is disabled\n", i);
605 			continue;
606 		}
607 
608 		/* Devices 4-6 function 0 */
609 		pci_read_config_dword(pvt->pci_ch[i][0],
610 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
611 
612 		pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
613 						4 : 2;
614 
615 		if (data & REGISTERED_DIMM)
616 			mtype = MEM_RDDR3;
617 		else
618 			mtype = MEM_DDR3;
619 #if 0
620 		if (data & THREE_DIMMS_PRESENT)
621 			pvt->channel[i].dimms = 3;
622 		else if (data & SINGLE_QUAD_RANK_PRESENT)
623 			pvt->channel[i].dimms = 1;
624 		else
625 			pvt->channel[i].dimms = 2;
626 #endif
627 
628 		/* Devices 4-6 function 1 */
629 		pci_read_config_dword(pvt->pci_ch[i][1],
630 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
631 		pci_read_config_dword(pvt->pci_ch[i][1],
632 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
633 		pci_read_config_dword(pvt->pci_ch[i][1],
634 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
635 
636 		debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
637 			"%d ranks, %cDIMMs\n",
638 			i,
639 			RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
640 			data,
641 			pvt->channel[i].ranks,
642 			(data & REGISTERED_DIMM) ? 'R' : 'U');
643 
644 		for (j = 0; j < 3; j++) {
645 			u32 banks, ranks, rows, cols;
646 			u32 size, npages;
647 
648 			if (!DIMM_PRESENT(dimm_dod[j]))
649 				continue;
650 
651 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
652 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
653 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
654 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
655 
656 			/* DDR3 has 8 I/O banks */
657 			size = (rows * cols * banks * ranks) >> (20 - 3);
658 
659 			pvt->channel[i].dimms++;
660 
661 			debugf0("\tdimm %d %d Mb offset: %x, "
662 				"bank: %d, rank: %d, row: %#x, col: %#x\n",
663 				j, size,
664 				RANKOFFSET(dimm_dod[j]),
665 				banks, ranks, rows, cols);
666 
667 #if PAGE_SHIFT > 20
668 			npages = size >> (PAGE_SHIFT - 20);
669 #else
670 			npages = size << (20 - PAGE_SHIFT);
671 #endif
672 
673 			csr = &mci->csrows[*csrow];
674 			csr->first_page = last_page + 1;
675 			last_page += npages;
676 			csr->last_page = last_page;
677 			csr->nr_pages = npages;
678 
679 			csr->page_mask = 0;
680 			csr->grain = 8;
681 			csr->csrow_idx = *csrow;
682 			csr->nr_channels = 1;
683 
684 			csr->channels[0].chan_idx = i;
685 			csr->channels[0].ce_count = 0;
686 
687 			pvt->csrow_map[i][j] = *csrow;
688 
689 			switch (banks) {
690 			case 4:
691 				csr->dtype = DEV_X4;
692 				break;
693 			case 8:
694 				csr->dtype = DEV_X8;
695 				break;
696 			case 16:
697 				csr->dtype = DEV_X16;
698 				break;
699 			default:
700 				csr->dtype = DEV_UNKNOWN;
701 			}
702 
703 			csr->edac_mode = mode;
704 			csr->mtype = mtype;
705 
706 			(*csrow)++;
707 		}
708 
709 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
710 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
711 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
712 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
713 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
714 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
715 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
716 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
717 		debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
718 		for (j = 0; j < 8; j++)
719 			debugf1("\t\t%#x\t%#x\t%#x\n",
720 				(value[j] >> 27) & 0x1,
721 				(value[j] >> 24) & 0x7,
722 				(value[j] && ((1 << 24) - 1)));
723 	}
724 
725 	return 0;
726 }
727 
728 /****************************************************************************
729 			Error insertion routines
730  ****************************************************************************/
731 
732 /* The i7core has independent error injection features per channel.
733    However, to have a simpler code, we don't allow enabling error injection
734    on more than one channel.
735    Also, since a change at an inject parameter will be applied only at enable,
736    we're disabling error injection on all write calls to the sysfs nodes that
737    controls the error code injection.
738  */
739 static int disable_inject(struct mem_ctl_info *mci)
740 {
741 	struct i7core_pvt *pvt = mci->pvt_info;
742 
743 	pvt->inject.enable = 0;
744 
745 	if (!pvt->pci_ch[pvt->inject.channel][0])
746 		return -ENODEV;
747 
748 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
749 				MC_CHANNEL_ERROR_INJECT, 0);
750 
751 	return 0;
752 }
753 
754 /*
755  * i7core inject inject.section
756  *
757  *	accept and store error injection inject.section value
758  *	bit 0 - refers to the lower 32-byte half cacheline
759  *	bit 1 - refers to the upper 32-byte half cacheline
760  */
761 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
762 					   const char *data, size_t count)
763 {
764 	struct i7core_pvt *pvt = mci->pvt_info;
765 	unsigned long value;
766 	int rc;
767 
768 	if (pvt->inject.enable)
769 		disable_inject(mci);
770 
771 	rc = strict_strtoul(data, 10, &value);
772 	if ((rc < 0) || (value > 3))
773 		return -EIO;
774 
775 	pvt->inject.section = (u32) value;
776 	return count;
777 }
778 
779 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
780 					      char *data)
781 {
782 	struct i7core_pvt *pvt = mci->pvt_info;
783 	return sprintf(data, "0x%08x\n", pvt->inject.section);
784 }
785 
786 /*
787  * i7core inject.type
788  *
789  *	accept and store error injection inject.section value
790  *	bit 0 - repeat enable - Enable error repetition
791  *	bit 1 - inject ECC error
792  *	bit 2 - inject parity error
793  */
794 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
795 					const char *data, size_t count)
796 {
797 	struct i7core_pvt *pvt = mci->pvt_info;
798 	unsigned long value;
799 	int rc;
800 
801 	if (pvt->inject.enable)
802 		disable_inject(mci);
803 
804 	rc = strict_strtoul(data, 10, &value);
805 	if ((rc < 0) || (value > 7))
806 		return -EIO;
807 
808 	pvt->inject.type = (u32) value;
809 	return count;
810 }
811 
812 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
813 					      char *data)
814 {
815 	struct i7core_pvt *pvt = mci->pvt_info;
816 	return sprintf(data, "0x%08x\n", pvt->inject.type);
817 }
818 
819 /*
820  * i7core_inject_inject.eccmask_store
821  *
822  * The type of error (UE/CE) will depend on the inject.eccmask value:
823  *   Any bits set to a 1 will flip the corresponding ECC bit
824  *   Correctable errors can be injected by flipping 1 bit or the bits within
825  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
826  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
827  *   uncorrectable error to be injected.
828  */
829 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
830 					const char *data, size_t count)
831 {
832 	struct i7core_pvt *pvt = mci->pvt_info;
833 	unsigned long value;
834 	int rc;
835 
836 	if (pvt->inject.enable)
837 		disable_inject(mci);
838 
839 	rc = strict_strtoul(data, 10, &value);
840 	if (rc < 0)
841 		return -EIO;
842 
843 	pvt->inject.eccmask = (u32) value;
844 	return count;
845 }
846 
847 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
848 					      char *data)
849 {
850 	struct i7core_pvt *pvt = mci->pvt_info;
851 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
852 }
853 
854 /*
855  * i7core_addrmatch
856  *
857  * The type of error (UE/CE) will depend on the inject.eccmask value:
858  *   Any bits set to a 1 will flip the corresponding ECC bit
859  *   Correctable errors can be injected by flipping 1 bit or the bits within
860  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
861  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
862  *   uncorrectable error to be injected.
863  */
864 
865 #define DECLARE_ADDR_MATCH(param, limit)			\
866 static ssize_t i7core_inject_store_##param(			\
867 		struct mem_ctl_info *mci,			\
868 		const char *data, size_t count)			\
869 {								\
870 	struct i7core_pvt *pvt;					\
871 	long value;						\
872 	int rc;							\
873 								\
874 	debugf1("%s()\n", __func__);				\
875 	pvt = mci->pvt_info;					\
876 								\
877 	if (pvt->inject.enable)					\
878 		disable_inject(mci);				\
879 								\
880 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
881 		value = -1;					\
882 	else {							\
883 		rc = strict_strtoul(data, 10, &value);		\
884 		if ((rc < 0) || (value >= limit))		\
885 			return -EIO;				\
886 	}							\
887 								\
888 	pvt->inject.param = value;				\
889 								\
890 	return count;						\
891 }								\
892 								\
893 static ssize_t i7core_inject_show_##param(			\
894 		struct mem_ctl_info *mci,			\
895 		char *data)					\
896 {								\
897 	struct i7core_pvt *pvt;					\
898 								\
899 	pvt = mci->pvt_info;					\
900 	debugf1("%s() pvt=%p\n", __func__, pvt);		\
901 	if (pvt->inject.param < 0)				\
902 		return sprintf(data, "any\n");			\
903 	else							\
904 		return sprintf(data, "%d\n", pvt->inject.param);\
905 }
906 
907 #define ATTR_ADDR_MATCH(param)					\
908 	{							\
909 		.attr = {					\
910 			.name = #param,				\
911 			.mode = (S_IRUGO | S_IWUSR)		\
912 		},						\
913 		.show  = i7core_inject_show_##param,		\
914 		.store = i7core_inject_store_##param,		\
915 	}
916 
917 DECLARE_ADDR_MATCH(channel, 3);
918 DECLARE_ADDR_MATCH(dimm, 3);
919 DECLARE_ADDR_MATCH(rank, 4);
920 DECLARE_ADDR_MATCH(bank, 32);
921 DECLARE_ADDR_MATCH(page, 0x10000);
922 DECLARE_ADDR_MATCH(col, 0x4000);
923 
924 static int write_and_test(struct pci_dev *dev, int where, u32 val)
925 {
926 	u32 read;
927 	int count;
928 
929 	debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
930 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
931 		where, val);
932 
933 	for (count = 0; count < 10; count++) {
934 		if (count)
935 			msleep(100);
936 		pci_write_config_dword(dev, where, val);
937 		pci_read_config_dword(dev, where, &read);
938 
939 		if (read == val)
940 			return 0;
941 	}
942 
943 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
944 		"write=%08x. Read=%08x\n",
945 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
946 		where, val, read);
947 
948 	return -EINVAL;
949 }
950 
951 /*
952  * This routine prepares the Memory Controller for error injection.
953  * The error will be injected when some process tries to write to the
954  * memory that matches the given criteria.
955  * The criteria can be set in terms of a mask where dimm, rank, bank, page
956  * and col can be specified.
957  * A -1 value for any of the mask items will make the MCU to ignore
958  * that matching criteria for error injection.
959  *
960  * It should be noticed that the error will only happen after a write operation
961  * on a memory that matches the condition. if REPEAT_EN is not enabled at
962  * inject mask, then it will produce just one error. Otherwise, it will repeat
963  * until the injectmask would be cleaned.
964  *
965  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
966  *    is reliable enough to check if the MC is using the
967  *    three channels. However, this is not clear at the datasheet.
968  */
969 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
970 				       const char *data, size_t count)
971 {
972 	struct i7core_pvt *pvt = mci->pvt_info;
973 	u32 injectmask;
974 	u64 mask = 0;
975 	int  rc;
976 	long enable;
977 
978 	if (!pvt->pci_ch[pvt->inject.channel][0])
979 		return 0;
980 
981 	rc = strict_strtoul(data, 10, &enable);
982 	if ((rc < 0))
983 		return 0;
984 
985 	if (enable) {
986 		pvt->inject.enable = 1;
987 	} else {
988 		disable_inject(mci);
989 		return count;
990 	}
991 
992 	/* Sets pvt->inject.dimm mask */
993 	if (pvt->inject.dimm < 0)
994 		mask |= 1LL << 41;
995 	else {
996 		if (pvt->channel[pvt->inject.channel].dimms > 2)
997 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
998 		else
999 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
1000 	}
1001 
1002 	/* Sets pvt->inject.rank mask */
1003 	if (pvt->inject.rank < 0)
1004 		mask |= 1LL << 40;
1005 	else {
1006 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1007 			mask |= (pvt->inject.rank & 0x1LL) << 34;
1008 		else
1009 			mask |= (pvt->inject.rank & 0x3LL) << 34;
1010 	}
1011 
1012 	/* Sets pvt->inject.bank mask */
1013 	if (pvt->inject.bank < 0)
1014 		mask |= 1LL << 39;
1015 	else
1016 		mask |= (pvt->inject.bank & 0x15LL) << 30;
1017 
1018 	/* Sets pvt->inject.page mask */
1019 	if (pvt->inject.page < 0)
1020 		mask |= 1LL << 38;
1021 	else
1022 		mask |= (pvt->inject.page & 0xffff) << 14;
1023 
1024 	/* Sets pvt->inject.column mask */
1025 	if (pvt->inject.col < 0)
1026 		mask |= 1LL << 37;
1027 	else
1028 		mask |= (pvt->inject.col & 0x3fff);
1029 
1030 	/*
1031 	 * bit    0: REPEAT_EN
1032 	 * bits 1-2: MASK_HALF_CACHELINE
1033 	 * bit    3: INJECT_ECC
1034 	 * bit    4: INJECT_ADDR_PARITY
1035 	 */
1036 
1037 	injectmask = (pvt->inject.type & 1) |
1038 		     (pvt->inject.section & 0x3) << 1 |
1039 		     (pvt->inject.type & 0x6) << (3 - 1);
1040 
1041 	/* Unlock writes to registers - this register is write only */
1042 	pci_write_config_dword(pvt->pci_noncore,
1043 			       MC_CFG_CONTROL, 0x2);
1044 
1045 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1046 			       MC_CHANNEL_ADDR_MATCH, mask);
1047 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1048 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1049 
1050 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1051 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1052 
1053 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1054 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1055 
1056 	/*
1057 	 * This is something undocumented, based on my tests
1058 	 * Without writing 8 to this register, errors aren't injected. Not sure
1059 	 * why.
1060 	 */
1061 	pci_write_config_dword(pvt->pci_noncore,
1062 			       MC_CFG_CONTROL, 8);
1063 
1064 	debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1065 		" inject 0x%08x\n",
1066 		mask, pvt->inject.eccmask, injectmask);
1067 
1068 
1069 	return count;
1070 }
1071 
1072 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1073 					char *data)
1074 {
1075 	struct i7core_pvt *pvt = mci->pvt_info;
1076 	u32 injectmask;
1077 
1078 	if (!pvt->pci_ch[pvt->inject.channel][0])
1079 		return 0;
1080 
1081 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1082 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1083 
1084 	debugf0("Inject error read: 0x%018x\n", injectmask);
1085 
1086 	if (injectmask & 0x0c)
1087 		pvt->inject.enable = 1;
1088 
1089 	return sprintf(data, "%d\n", pvt->inject.enable);
1090 }
1091 
1092 #define DECLARE_COUNTER(param)					\
1093 static ssize_t i7core_show_counter_##param(			\
1094 		struct mem_ctl_info *mci,			\
1095 		char *data)					\
1096 {								\
1097 	struct i7core_pvt *pvt = mci->pvt_info;			\
1098 								\
1099 	debugf1("%s() \n", __func__);				\
1100 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1101 		return sprintf(data, "data unavailable\n");	\
1102 	return sprintf(data, "%lu\n",				\
1103 			pvt->udimm_ce_count[param]);		\
1104 }
1105 
1106 #define ATTR_COUNTER(param)					\
1107 	{							\
1108 		.attr = {					\
1109 			.name = __stringify(udimm##param),	\
1110 			.mode = (S_IRUGO | S_IWUSR)		\
1111 		},						\
1112 		.show  = i7core_show_counter_##param		\
1113 	}
1114 
1115 DECLARE_COUNTER(0);
1116 DECLARE_COUNTER(1);
1117 DECLARE_COUNTER(2);
1118 
1119 /*
1120  * Sysfs struct
1121  */
1122 
1123 
1124 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1125 	ATTR_ADDR_MATCH(channel),
1126 	ATTR_ADDR_MATCH(dimm),
1127 	ATTR_ADDR_MATCH(rank),
1128 	ATTR_ADDR_MATCH(bank),
1129 	ATTR_ADDR_MATCH(page),
1130 	ATTR_ADDR_MATCH(col),
1131 	{ .attr = { .name = NULL } }
1132 };
1133 
1134 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1135 	.name  = "inject_addrmatch",
1136 	.mcidev_attr = i7core_addrmatch_attrs,
1137 };
1138 
1139 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1140 	ATTR_COUNTER(0),
1141 	ATTR_COUNTER(1),
1142 	ATTR_COUNTER(2),
1143 };
1144 
1145 static struct mcidev_sysfs_group i7core_udimm_counters = {
1146 	.name  = "all_channel_counts",
1147 	.mcidev_attr = i7core_udimm_counters_attrs,
1148 };
1149 
1150 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1151 	{
1152 		.attr = {
1153 			.name = "inject_section",
1154 			.mode = (S_IRUGO | S_IWUSR)
1155 		},
1156 		.show  = i7core_inject_section_show,
1157 		.store = i7core_inject_section_store,
1158 	}, {
1159 		.attr = {
1160 			.name = "inject_type",
1161 			.mode = (S_IRUGO | S_IWUSR)
1162 		},
1163 		.show  = i7core_inject_type_show,
1164 		.store = i7core_inject_type_store,
1165 	}, {
1166 		.attr = {
1167 			.name = "inject_eccmask",
1168 			.mode = (S_IRUGO | S_IWUSR)
1169 		},
1170 		.show  = i7core_inject_eccmask_show,
1171 		.store = i7core_inject_eccmask_store,
1172 	}, {
1173 		.grp = &i7core_inject_addrmatch,
1174 	}, {
1175 		.attr = {
1176 			.name = "inject_enable",
1177 			.mode = (S_IRUGO | S_IWUSR)
1178 		},
1179 		.show  = i7core_inject_enable_show,
1180 		.store = i7core_inject_enable_store,
1181 	},
1182 	{ .attr = { .name = NULL } },	/* Reserved for udimm counters */
1183 	{ .attr = { .name = NULL } }
1184 };
1185 
1186 /****************************************************************************
1187 	Device initialization routines: put/get, init/exit
1188  ****************************************************************************/
1189 
1190 /*
1191  *	i7core_put_devices	'put' all the devices that we have
1192  *				reserved via 'get'
1193  */
1194 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1195 {
1196 	int i;
1197 
1198 	debugf0(__FILE__ ": %s()\n", __func__);
1199 	for (i = 0; i < i7core_dev->n_devs; i++) {
1200 		struct pci_dev *pdev = i7core_dev->pdev[i];
1201 		if (!pdev)
1202 			continue;
1203 		debugf0("Removing dev %02x:%02x.%d\n",
1204 			pdev->bus->number,
1205 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1206 		pci_dev_put(pdev);
1207 	}
1208 	kfree(i7core_dev->pdev);
1209 	list_del(&i7core_dev->list);
1210 	kfree(i7core_dev);
1211 }
1212 
1213 static void i7core_put_all_devices(void)
1214 {
1215 	struct i7core_dev *i7core_dev, *tmp;
1216 
1217 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1218 		i7core_put_devices(i7core_dev);
1219 }
1220 
1221 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1222 {
1223 	struct pci_dev *pdev = NULL;
1224 	int i;
1225 	/*
1226 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1227 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1228 	 * to detect them
1229 	 */
1230 	while (table && table->descr) {
1231 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1232 		if (unlikely(!pdev)) {
1233 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1234 				pcibios_scan_specific_bus(255-i);
1235 		}
1236 		table++;
1237 	}
1238 }
1239 
1240 /*
1241  *	i7core_get_devices	Find and perform 'get' operation on the MCH's
1242  *			device/functions we want to reference for this driver
1243  *
1244  *			Need to 'get' device 16 func 1 and func 2
1245  */
1246 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1247 			 struct pci_id_descr *dev_descr, unsigned n_devs)
1248 {
1249 	struct i7core_dev *i7core_dev;
1250 
1251 	struct pci_dev *pdev = NULL;
1252 	u8 bus = 0;
1253 	u8 socket = 0;
1254 
1255 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1256 			      dev_descr->dev_id, *prev);
1257 
1258 	/*
1259 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1260 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1261 	 * to probe for the alternate address in case of failure
1262 	 */
1263 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1264 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1265 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1266 
1267 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1268 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1269 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1270 				      *prev);
1271 
1272 	if (!pdev) {
1273 		if (*prev) {
1274 			*prev = pdev;
1275 			return 0;
1276 		}
1277 
1278 		if (dev_descr->optional)
1279 			return 0;
1280 
1281 		if (devno == 0)
1282 			return -ENODEV;
1283 
1284 		i7core_printk(KERN_ERR,
1285 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1286 			dev_descr->dev, dev_descr->func,
1287 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1288 
1289 		/* End of list, leave */
1290 		return -ENODEV;
1291 	}
1292 	bus = pdev->bus->number;
1293 
1294 	if (bus == 0x3f)
1295 		socket = 0;
1296 	else
1297 		socket = 255 - bus;
1298 
1299 	i7core_dev = get_i7core_dev(socket);
1300 	if (!i7core_dev) {
1301 		i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1302 		if (!i7core_dev)
1303 			return -ENOMEM;
1304 		i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1305 					   GFP_KERNEL);
1306 		if (!i7core_dev->pdev) {
1307 			kfree(i7core_dev);
1308 			return -ENOMEM;
1309 		}
1310 		i7core_dev->socket = socket;
1311 		i7core_dev->n_devs = n_devs;
1312 		list_add_tail(&i7core_dev->list, &i7core_edac_list);
1313 	}
1314 
1315 	if (i7core_dev->pdev[devno]) {
1316 		i7core_printk(KERN_ERR,
1317 			"Duplicated device for "
1318 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1319 			bus, dev_descr->dev, dev_descr->func,
1320 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1321 		pci_dev_put(pdev);
1322 		return -ENODEV;
1323 	}
1324 
1325 	i7core_dev->pdev[devno] = pdev;
1326 
1327 	/* Sanity check */
1328 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1329 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1330 		i7core_printk(KERN_ERR,
1331 			"Device PCI ID %04x:%04x "
1332 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1333 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1334 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1335 			bus, dev_descr->dev, dev_descr->func);
1336 		return -ENODEV;
1337 	}
1338 
1339 	/* Be sure that the device is enabled */
1340 	if (unlikely(pci_enable_device(pdev) < 0)) {
1341 		i7core_printk(KERN_ERR,
1342 			"Couldn't enable "
1343 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1344 			bus, dev_descr->dev, dev_descr->func,
1345 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1346 		return -ENODEV;
1347 	}
1348 
1349 	debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1350 		socket, bus, dev_descr->dev,
1351 		dev_descr->func,
1352 		PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1353 
1354 	*prev = pdev;
1355 
1356 	return 0;
1357 }
1358 
1359 static int i7core_get_devices(struct pci_id_table *table)
1360 {
1361 	int i, rc;
1362 	struct pci_dev *pdev = NULL;
1363 	struct pci_id_descr *dev_descr;
1364 
1365 	while (table && table->descr) {
1366 		dev_descr = table->descr;
1367 		for (i = 0; i < table->n_devs; i++) {
1368 			pdev = NULL;
1369 			do {
1370 				rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1371 							  table->n_devs);
1372 				if (rc < 0) {
1373 					if (i == 0) {
1374 						i = table->n_devs;
1375 						break;
1376 					}
1377 					i7core_put_all_devices();
1378 					return -ENODEV;
1379 				}
1380 			} while (pdev);
1381 		}
1382 		table++;
1383 	}
1384 
1385 	return 0;
1386 	return 0;
1387 }
1388 
1389 static int mci_bind_devs(struct mem_ctl_info *mci,
1390 			 struct i7core_dev *i7core_dev)
1391 {
1392 	struct i7core_pvt *pvt = mci->pvt_info;
1393 	struct pci_dev *pdev;
1394 	int i, func, slot;
1395 
1396 	/* Associates i7core_dev and mci for future usage */
1397 	pvt->i7core_dev = i7core_dev;
1398 	i7core_dev->mci = mci;
1399 
1400 	pvt->is_registered = 0;
1401 	for (i = 0; i < i7core_dev->n_devs; i++) {
1402 		pdev = i7core_dev->pdev[i];
1403 		if (!pdev)
1404 			continue;
1405 
1406 		func = PCI_FUNC(pdev->devfn);
1407 		slot = PCI_SLOT(pdev->devfn);
1408 		if (slot == 3) {
1409 			if (unlikely(func > MAX_MCR_FUNC))
1410 				goto error;
1411 			pvt->pci_mcr[func] = pdev;
1412 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1413 			if (unlikely(func > MAX_CHAN_FUNC))
1414 				goto error;
1415 			pvt->pci_ch[slot - 4][func] = pdev;
1416 		} else if (!slot && !func)
1417 			pvt->pci_noncore = pdev;
1418 		else
1419 			goto error;
1420 
1421 		debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1422 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1423 			pdev, i7core_dev->socket);
1424 
1425 		if (PCI_SLOT(pdev->devfn) == 3 &&
1426 			PCI_FUNC(pdev->devfn) == 2)
1427 			pvt->is_registered = 1;
1428 	}
1429 
1430 	/*
1431 	 * Add extra nodes to count errors on udimm
1432 	 * For registered memory, this is not needed, since the counters
1433 	 * are already displayed at the standard locations
1434 	 */
1435 	if (!pvt->is_registered)
1436 		i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1437 			&i7core_udimm_counters;
1438 
1439 	return 0;
1440 
1441 error:
1442 	i7core_printk(KERN_ERR, "Device %d, function %d "
1443 		      "is out of the expected range\n",
1444 		      slot, func);
1445 	return -EINVAL;
1446 }
1447 
1448 /****************************************************************************
1449 			Error check routines
1450  ****************************************************************************/
1451 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1452 					 int chan, int dimm, int add)
1453 {
1454 	char *msg;
1455 	struct i7core_pvt *pvt = mci->pvt_info;
1456 	int row = pvt->csrow_map[chan][dimm], i;
1457 
1458 	for (i = 0; i < add; i++) {
1459 		msg = kasprintf(GFP_KERNEL, "Corrected error "
1460 				"(Socket=%d channel=%d dimm=%d)",
1461 				pvt->i7core_dev->socket, chan, dimm);
1462 
1463 		edac_mc_handle_fbd_ce(mci, row, 0, msg);
1464 		kfree (msg);
1465 	}
1466 }
1467 
1468 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1469 			int chan, int new0, int new1, int new2)
1470 {
1471 	struct i7core_pvt *pvt = mci->pvt_info;
1472 	int add0 = 0, add1 = 0, add2 = 0;
1473 	/* Updates CE counters if it is not the first time here */
1474 	if (pvt->ce_count_available) {
1475 		/* Updates CE counters */
1476 
1477 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1478 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1479 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1480 
1481 		if (add2 < 0)
1482 			add2 += 0x7fff;
1483 		pvt->rdimm_ce_count[chan][2] += add2;
1484 
1485 		if (add1 < 0)
1486 			add1 += 0x7fff;
1487 		pvt->rdimm_ce_count[chan][1] += add1;
1488 
1489 		if (add0 < 0)
1490 			add0 += 0x7fff;
1491 		pvt->rdimm_ce_count[chan][0] += add0;
1492 	} else
1493 		pvt->ce_count_available = 1;
1494 
1495 	/* Store the new values */
1496 	pvt->rdimm_last_ce_count[chan][2] = new2;
1497 	pvt->rdimm_last_ce_count[chan][1] = new1;
1498 	pvt->rdimm_last_ce_count[chan][0] = new0;
1499 
1500 	/*updated the edac core */
1501 	if (add0 != 0)
1502 		i7core_rdimm_update_csrow(mci, chan, 0, add0);
1503 	if (add1 != 0)
1504 		i7core_rdimm_update_csrow(mci, chan, 1, add1);
1505 	if (add2 != 0)
1506 		i7core_rdimm_update_csrow(mci, chan, 2, add2);
1507 
1508 }
1509 
1510 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1511 {
1512 	struct i7core_pvt *pvt = mci->pvt_info;
1513 	u32 rcv[3][2];
1514 	int i, new0, new1, new2;
1515 
1516 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1517 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1518 								&rcv[0][0]);
1519 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1520 								&rcv[0][1]);
1521 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1522 								&rcv[1][0]);
1523 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1524 								&rcv[1][1]);
1525 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1526 								&rcv[2][0]);
1527 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1528 								&rcv[2][1]);
1529 	for (i = 0 ; i < 3; i++) {
1530 		debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1531 			(i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1532 		/*if the channel has 3 dimms*/
1533 		if (pvt->channel[i].dimms > 2) {
1534 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1535 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1536 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1537 		} else {
1538 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1539 					DIMM_BOT_COR_ERR(rcv[i][0]);
1540 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1541 					DIMM_BOT_COR_ERR(rcv[i][1]);
1542 			new2 = 0;
1543 		}
1544 
1545 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1546 	}
1547 }
1548 
1549 /* This function is based on the device 3 function 4 registers as described on:
1550  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1551  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1552  * also available at:
1553  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1554  */
1555 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1556 {
1557 	struct i7core_pvt *pvt = mci->pvt_info;
1558 	u32 rcv1, rcv0;
1559 	int new0, new1, new2;
1560 
1561 	if (!pvt->pci_mcr[4]) {
1562 		debugf0("%s MCR registers not found\n", __func__);
1563 		return;
1564 	}
1565 
1566 	/* Corrected test errors */
1567 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1568 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1569 
1570 	/* Store the new values */
1571 	new2 = DIMM2_COR_ERR(rcv1);
1572 	new1 = DIMM1_COR_ERR(rcv0);
1573 	new0 = DIMM0_COR_ERR(rcv0);
1574 
1575 	/* Updates CE counters if it is not the first time here */
1576 	if (pvt->ce_count_available) {
1577 		/* Updates CE counters */
1578 		int add0, add1, add2;
1579 
1580 		add2 = new2 - pvt->udimm_last_ce_count[2];
1581 		add1 = new1 - pvt->udimm_last_ce_count[1];
1582 		add0 = new0 - pvt->udimm_last_ce_count[0];
1583 
1584 		if (add2 < 0)
1585 			add2 += 0x7fff;
1586 		pvt->udimm_ce_count[2] += add2;
1587 
1588 		if (add1 < 0)
1589 			add1 += 0x7fff;
1590 		pvt->udimm_ce_count[1] += add1;
1591 
1592 		if (add0 < 0)
1593 			add0 += 0x7fff;
1594 		pvt->udimm_ce_count[0] += add0;
1595 
1596 		if (add0 | add1 | add2)
1597 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1598 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1599 				      add0, add1, add2);
1600 	} else
1601 		pvt->ce_count_available = 1;
1602 
1603 	/* Store the new values */
1604 	pvt->udimm_last_ce_count[2] = new2;
1605 	pvt->udimm_last_ce_count[1] = new1;
1606 	pvt->udimm_last_ce_count[0] = new0;
1607 }
1608 
1609 /*
1610  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1611  * Architectures Software Developer’s Manual Volume 3B.
1612  * Nehalem are defined as family 0x06, model 0x1a
1613  *
1614  * The MCA registers used here are the following ones:
1615  *     struct mce field	MCA Register
1616  *     m->status	MSR_IA32_MC8_STATUS
1617  *     m->addr		MSR_IA32_MC8_ADDR
1618  *     m->misc		MSR_IA32_MC8_MISC
1619  * In the case of Nehalem, the error information is masked at .status and .misc
1620  * fields
1621  */
1622 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1623 				    struct mce *m)
1624 {
1625 	struct i7core_pvt *pvt = mci->pvt_info;
1626 	char *type, *optype, *err, *msg;
1627 	unsigned long error = m->status & 0x1ff0000l;
1628 	u32 optypenum = (m->status >> 4) & 0x07;
1629 	u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1630 	u32 dimm = (m->misc >> 16) & 0x3;
1631 	u32 channel = (m->misc >> 18) & 0x3;
1632 	u32 syndrome = m->misc >> 32;
1633 	u32 errnum = find_first_bit(&error, 32);
1634 	int csrow;
1635 
1636 	if (m->mcgstatus & 1)
1637 		type = "FATAL";
1638 	else
1639 		type = "NON_FATAL";
1640 
1641 	switch (optypenum) {
1642 	case 0:
1643 		optype = "generic undef request";
1644 		break;
1645 	case 1:
1646 		optype = "read error";
1647 		break;
1648 	case 2:
1649 		optype = "write error";
1650 		break;
1651 	case 3:
1652 		optype = "addr/cmd error";
1653 		break;
1654 	case 4:
1655 		optype = "scrubbing error";
1656 		break;
1657 	default:
1658 		optype = "reserved";
1659 		break;
1660 	}
1661 
1662 	switch (errnum) {
1663 	case 16:
1664 		err = "read ECC error";
1665 		break;
1666 	case 17:
1667 		err = "RAS ECC error";
1668 		break;
1669 	case 18:
1670 		err = "write parity error";
1671 		break;
1672 	case 19:
1673 		err = "redundacy loss";
1674 		break;
1675 	case 20:
1676 		err = "reserved";
1677 		break;
1678 	case 21:
1679 		err = "memory range error";
1680 		break;
1681 	case 22:
1682 		err = "RTID out of range";
1683 		break;
1684 	case 23:
1685 		err = "address parity error";
1686 		break;
1687 	case 24:
1688 		err = "byte enable parity error";
1689 		break;
1690 	default:
1691 		err = "unknown";
1692 	}
1693 
1694 	/* FIXME: should convert addr into bank and rank information */
1695 	msg = kasprintf(GFP_ATOMIC,
1696 		"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1697 		"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1698 		type, (long long) m->addr, m->cpu, dimm, channel,
1699 		syndrome, core_err_cnt, (long long)m->status,
1700 		(long long)m->misc, optype, err);
1701 
1702 	debugf0("%s", msg);
1703 
1704 	csrow = pvt->csrow_map[channel][dimm];
1705 
1706 	/* Call the helper to output message */
1707 	if (m->mcgstatus & 1)
1708 		edac_mc_handle_fbd_ue(mci, csrow, 0,
1709 				0 /* FIXME: should be channel here */, msg);
1710 	else if (!pvt->is_registered)
1711 		edac_mc_handle_fbd_ce(mci, csrow,
1712 				0 /* FIXME: should be channel here */, msg);
1713 
1714 	kfree(msg);
1715 }
1716 
1717 /*
1718  *	i7core_check_error	Retrieve and process errors reported by the
1719  *				hardware. Called by the Core module.
1720  */
1721 static void i7core_check_error(struct mem_ctl_info *mci)
1722 {
1723 	struct i7core_pvt *pvt = mci->pvt_info;
1724 	int i;
1725 	unsigned count = 0;
1726 	struct mce *m;
1727 
1728 	/*
1729 	 * MCE first step: Copy all mce errors into a temporary buffer
1730 	 * We use a double buffering here, to reduce the risk of
1731 	 * loosing an error.
1732 	 */
1733 	smp_rmb();
1734 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1735 		% MCE_LOG_LEN;
1736 	if (!count)
1737 		goto check_ce_error;
1738 
1739 	m = pvt->mce_outentry;
1740 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1741 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1742 
1743 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1744 		smp_wmb();
1745 		pvt->mce_in = 0;
1746 		count -= l;
1747 		m += l;
1748 	}
1749 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1750 	smp_wmb();
1751 	pvt->mce_in += count;
1752 
1753 	smp_rmb();
1754 	if (pvt->mce_overrun) {
1755 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1756 			      pvt->mce_overrun);
1757 		smp_wmb();
1758 		pvt->mce_overrun = 0;
1759 	}
1760 
1761 	/*
1762 	 * MCE second step: parse errors and display
1763 	 */
1764 	for (i = 0; i < count; i++)
1765 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1766 
1767 	/*
1768 	 * Now, let's increment CE error counts
1769 	 */
1770 check_ce_error:
1771 	if (!pvt->is_registered)
1772 		i7core_udimm_check_mc_ecc_err(mci);
1773 	else
1774 		i7core_rdimm_check_mc_ecc_err(mci);
1775 }
1776 
1777 /*
1778  * i7core_mce_check_error	Replicates mcelog routine to get errors
1779  *				This routine simply queues mcelog errors, and
1780  *				return. The error itself should be handled later
1781  *				by i7core_check_error.
1782  * WARNING: As this routine should be called at NMI time, extra care should
1783  * be taken to avoid deadlocks, and to be as fast as possible.
1784  */
1785 static int i7core_mce_check_error(void *priv, struct mce *mce)
1786 {
1787 	struct mem_ctl_info *mci = priv;
1788 	struct i7core_pvt *pvt = mci->pvt_info;
1789 
1790 	/*
1791 	 * Just let mcelog handle it if the error is
1792 	 * outside the memory controller
1793 	 */
1794 	if (((mce->status & 0xffff) >> 7) != 1)
1795 		return 0;
1796 
1797 	/* Bank 8 registers are the only ones that we know how to handle */
1798 	if (mce->bank != 8)
1799 		return 0;
1800 
1801 #ifdef CONFIG_SMP
1802 	/* Only handle if it is the right mc controller */
1803 	if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1804 		return 0;
1805 #endif
1806 
1807 	smp_rmb();
1808 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1809 		smp_wmb();
1810 		pvt->mce_overrun++;
1811 		return 0;
1812 	}
1813 
1814 	/* Copy memory error at the ringbuffer */
1815 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1816 	smp_wmb();
1817 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1818 
1819 	/* Handle fatal errors immediately */
1820 	if (mce->mcgstatus & 1)
1821 		i7core_check_error(mci);
1822 
1823 	/* Advice mcelog that the error were handled */
1824 	return 1;
1825 }
1826 
1827 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1828 			       int num_channels, int num_csrows)
1829 {
1830 	struct mem_ctl_info *mci;
1831 	struct i7core_pvt *pvt;
1832 	int csrow = 0;
1833 	int rc;
1834 
1835 	/* allocate a new MC control structure */
1836 	mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1837 			    i7core_dev->socket);
1838 	if (unlikely(!mci))
1839 		return -ENOMEM;
1840 
1841 	debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1842 
1843 	/* record ptr to the generic device */
1844 	mci->dev = &i7core_dev->pdev[0]->dev;
1845 
1846 	pvt = mci->pvt_info;
1847 	memset(pvt, 0, sizeof(*pvt));
1848 
1849 	/*
1850 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1851 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1852 	 * memory channels
1853 	 */
1854 	mci->mtype_cap = MEM_FLAG_DDR3;
1855 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
1856 	mci->edac_cap = EDAC_FLAG_NONE;
1857 	mci->mod_name = "i7core_edac.c";
1858 	mci->mod_ver = I7CORE_REVISION;
1859 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1860 				  i7core_dev->socket);
1861 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
1862 	mci->ctl_page_to_phys = NULL;
1863 	mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1864 	/* Set the function pointer to an actual operation function */
1865 	mci->edac_check = i7core_check_error;
1866 
1867 	/* Store pci devices at mci for faster access */
1868 	rc = mci_bind_devs(mci, i7core_dev);
1869 	if (unlikely(rc < 0))
1870 		goto fail;
1871 
1872 	/* Get dimm basic config */
1873 	get_dimm_config(mci, &csrow);
1874 
1875 	/* add this new MC control structure to EDAC's list of MCs */
1876 	if (unlikely(edac_mc_add_mc(mci))) {
1877 		debugf0("MC: " __FILE__
1878 			": %s(): failed edac_mc_add_mc()\n", __func__);
1879 		/* FIXME: perhaps some code should go here that disables error
1880 		 * reporting if we just enabled it
1881 		 */
1882 
1883 		rc = -EINVAL;
1884 		goto fail;
1885 	}
1886 
1887 	/* allocating generic PCI control info */
1888 	i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1889 						 EDAC_MOD_STR);
1890 	if (unlikely(!i7core_pci)) {
1891 		printk(KERN_WARNING
1892 			"%s(): Unable to create PCI control\n",
1893 			__func__);
1894 		printk(KERN_WARNING
1895 			"%s(): PCI error report via EDAC not setup\n",
1896 			__func__);
1897 	}
1898 
1899 	/* Default error mask is any memory */
1900 	pvt->inject.channel = 0;
1901 	pvt->inject.dimm = -1;
1902 	pvt->inject.rank = -1;
1903 	pvt->inject.bank = -1;
1904 	pvt->inject.page = -1;
1905 	pvt->inject.col = -1;
1906 
1907 	/* Registers on edac_mce in order to receive memory errors */
1908 	pvt->edac_mce.priv = mci;
1909 	pvt->edac_mce.check_error = i7core_mce_check_error;
1910 
1911 	rc = edac_mce_register(&pvt->edac_mce);
1912 	if (unlikely(rc < 0)) {
1913 		debugf0("MC: " __FILE__
1914 			": %s(): failed edac_mce_register()\n", __func__);
1915 	}
1916 
1917 fail:
1918 	if (rc < 0)
1919 		edac_mc_free(mci);
1920 	return rc;
1921 }
1922 
1923 /*
1924  *	i7core_probe	Probe for ONE instance of device to see if it is
1925  *			present.
1926  *	return:
1927  *		0 for FOUND a device
1928  *		< 0 for error code
1929  */
1930 static int __devinit i7core_probe(struct pci_dev *pdev,
1931 				  const struct pci_device_id *id)
1932 {
1933 	int dev_idx = id->driver_data;
1934 	int rc;
1935 	struct i7core_dev *i7core_dev;
1936 
1937 	/*
1938 	 * All memory controllers are allocated at the first pass.
1939 	 */
1940 	if (unlikely(dev_idx >= 1))
1941 		return -EINVAL;
1942 
1943 	/* get the pci devices we want to reserve for our use */
1944 	mutex_lock(&i7core_edac_lock);
1945 
1946 	rc = i7core_get_devices(pci_dev_table);
1947 	if (unlikely(rc < 0))
1948 		goto fail0;
1949 
1950 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1951 		int channels;
1952 		int csrows;
1953 
1954 		/* Check the number of active and not disabled channels */
1955 		rc = i7core_get_active_channels(i7core_dev->socket,
1956 						&channels, &csrows);
1957 		if (unlikely(rc < 0))
1958 			goto fail1;
1959 
1960 		rc = i7core_register_mci(i7core_dev, channels, csrows);
1961 		if (unlikely(rc < 0))
1962 			goto fail1;
1963 	}
1964 
1965 	i7core_printk(KERN_INFO, "Driver loaded.\n");
1966 
1967 	mutex_unlock(&i7core_edac_lock);
1968 	return 0;
1969 
1970 fail1:
1971 	i7core_put_all_devices();
1972 fail0:
1973 	mutex_unlock(&i7core_edac_lock);
1974 	return rc;
1975 }
1976 
1977 /*
1978  *	i7core_remove	destructor for one instance of device
1979  *
1980  */
1981 static void __devexit i7core_remove(struct pci_dev *pdev)
1982 {
1983 	struct mem_ctl_info *mci;
1984 	struct i7core_dev *i7core_dev, *tmp;
1985 
1986 	debugf0(__FILE__ ": %s()\n", __func__);
1987 
1988 	if (i7core_pci)
1989 		edac_pci_release_generic_ctl(i7core_pci);
1990 
1991 	/*
1992 	 * we have a trouble here: pdev value for removal will be wrong, since
1993 	 * it will point to the X58 register used to detect that the machine
1994 	 * is a Nehalem or upper design. However, due to the way several PCI
1995 	 * devices are grouped together to provide MC functionality, we need
1996 	 * to use a different method for releasing the devices
1997 	 */
1998 
1999 	mutex_lock(&i7core_edac_lock);
2000 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2001 		mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2002 		if (mci) {
2003 			struct i7core_pvt *pvt = mci->pvt_info;
2004 
2005 			i7core_dev = pvt->i7core_dev;
2006 			edac_mce_unregister(&pvt->edac_mce);
2007 			kfree(mci->ctl_name);
2008 			edac_mc_free(mci);
2009 			i7core_put_devices(i7core_dev);
2010 		} else {
2011 			i7core_printk(KERN_ERR,
2012 				      "Couldn't find mci for socket %d\n",
2013 				      i7core_dev->socket);
2014 		}
2015 	}
2016 	mutex_unlock(&i7core_edac_lock);
2017 }
2018 
2019 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2020 
2021 /*
2022  *	i7core_driver	pci_driver structure for this module
2023  *
2024  */
2025 static struct pci_driver i7core_driver = {
2026 	.name     = "i7core_edac",
2027 	.probe    = i7core_probe,
2028 	.remove   = __devexit_p(i7core_remove),
2029 	.id_table = i7core_pci_tbl,
2030 };
2031 
2032 /*
2033  *	i7core_init		Module entry function
2034  *			Try to initialize this module for its devices
2035  */
2036 static int __init i7core_init(void)
2037 {
2038 	int pci_rc;
2039 
2040 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2041 
2042 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2043 	opstate_init();
2044 
2045 	i7core_xeon_pci_fixup(pci_dev_table);
2046 
2047 	pci_rc = pci_register_driver(&i7core_driver);
2048 
2049 	if (pci_rc >= 0)
2050 		return 0;
2051 
2052 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2053 		      pci_rc);
2054 
2055 	return pci_rc;
2056 }
2057 
2058 /*
2059  *	i7core_exit()	Module exit function
2060  *			Unregister the driver
2061  */
2062 static void __exit i7core_exit(void)
2063 {
2064 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2065 	pci_unregister_driver(&i7core_driver);
2066 }
2067 
2068 module_init(i7core_init);
2069 module_exit(i7core_exit);
2070 
2071 MODULE_LICENSE("GPL");
2072 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2073 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2074 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2075 		   I7CORE_REVISION);
2076 
2077 module_param(edac_op_state, int, 0444);
2078 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2079