xref: /linux/drivers/edac/i7core_edac.c (revision d39d0ed196aa1685bb24771e92f78633c66ac9cb)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39 
40 #include "edac_core.h"
41 
42 /*
43  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
44  * registers start at bus 255, and are not reported by BIOS.
45  * We currently find devices with only 2 sockets. In order to support more QPI
46  * Quick Path Interconnect, just increment this number.
47  */
48 #define MAX_SOCKET_BUSES	2
49 
50 
51 /*
52  * Alter this version for the module when modifications are made
53  */
54 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
55 #define EDAC_MOD_STR      "i7core_edac"
56 
57 /*
58  * Debug macros
59  */
60 #define i7core_printk(level, fmt, arg...)			\
61 	edac_printk(level, "i7core", fmt, ##arg)
62 
63 #define i7core_mc_printk(mci, level, fmt, arg...)		\
64 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
65 
66 /*
67  * i7core Memory Controller Registers
68  */
69 
70 	/* OFFSETS for Device 0 Function 0 */
71 
72 #define MC_CFG_CONTROL	0x90
73 
74 	/* OFFSETS for Device 3 Function 0 */
75 
76 #define MC_CONTROL	0x48
77 #define MC_STATUS	0x4c
78 #define MC_MAX_DOD	0x64
79 
80 /*
81  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
82  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
83  */
84 
85 #define MC_TEST_ERR_RCV1	0x60
86   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
87 
88 #define MC_TEST_ERR_RCV0	0x64
89   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
90   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
91 
92 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
93 #define MC_COR_ECC_CNT_0	0x80
94 #define MC_COR_ECC_CNT_1	0x84
95 #define MC_COR_ECC_CNT_2	0x88
96 #define MC_COR_ECC_CNT_3	0x8c
97 #define MC_COR_ECC_CNT_4	0x90
98 #define MC_COR_ECC_CNT_5	0x94
99 
100 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
101 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
102 
103 
104 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
105 
106 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
107   #define THREE_DIMMS_PRESENT		(1 << 24)
108   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
109   #define QUAD_RANK_PRESENT		(1 << 22)
110   #define REGISTERED_DIMM		(1 << 15)
111 
112 #define MC_CHANNEL_MAPPER	0x60
113   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
114   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
115 
116 #define MC_CHANNEL_RANK_PRESENT 0x7c
117   #define RANK_PRESENT_MASK		0xffff
118 
119 #define MC_CHANNEL_ADDR_MATCH	0xf0
120 #define MC_CHANNEL_ERROR_MASK	0xf8
121 #define MC_CHANNEL_ERROR_INJECT	0xfc
122   #define INJECT_ADDR_PARITY	0x10
123   #define INJECT_ECC		0x08
124   #define MASK_CACHELINE	0x06
125   #define MASK_FULL_CACHELINE	0x06
126   #define MASK_MSB32_CACHELINE	0x04
127   #define MASK_LSB32_CACHELINE	0x02
128   #define NO_MASK_CACHELINE	0x00
129   #define REPEAT_EN		0x01
130 
131 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
132 
133 #define MC_DOD_CH_DIMM0		0x48
134 #define MC_DOD_CH_DIMM1		0x4c
135 #define MC_DOD_CH_DIMM2		0x50
136   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
137   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
138   #define DIMM_PRESENT_MASK	(1 << 9)
139   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
140   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
141   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
142   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
143   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
144   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
145   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
146   #define MC_DOD_NUMCOL_MASK		3
147   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
148 
149 #define MC_RANK_PRESENT		0x7c
150 
151 #define MC_SAG_CH_0	0x80
152 #define MC_SAG_CH_1	0x84
153 #define MC_SAG_CH_2	0x88
154 #define MC_SAG_CH_3	0x8c
155 #define MC_SAG_CH_4	0x90
156 #define MC_SAG_CH_5	0x94
157 #define MC_SAG_CH_6	0x98
158 #define MC_SAG_CH_7	0x9c
159 
160 #define MC_RIR_LIMIT_CH_0	0x40
161 #define MC_RIR_LIMIT_CH_1	0x44
162 #define MC_RIR_LIMIT_CH_2	0x48
163 #define MC_RIR_LIMIT_CH_3	0x4C
164 #define MC_RIR_LIMIT_CH_4	0x50
165 #define MC_RIR_LIMIT_CH_5	0x54
166 #define MC_RIR_LIMIT_CH_6	0x58
167 #define MC_RIR_LIMIT_CH_7	0x5C
168 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
169 
170 #define MC_RIR_WAY_CH		0x80
171   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
172   #define MC_RIR_WAY_RANK_MASK		0x7
173 
174 /*
175  * i7core structs
176  */
177 
178 #define NUM_CHANS 3
179 #define MAX_DIMMS 3		/* Max DIMMS per channel */
180 #define MAX_MCR_FUNC  4
181 #define MAX_CHAN_FUNC 3
182 
183 struct i7core_info {
184 	u32	mc_control;
185 	u32	mc_status;
186 	u32	max_dod;
187 	u32	ch_map;
188 };
189 
190 
191 struct i7core_inject {
192 	int	enable;
193 
194 	u32	section;
195 	u32	type;
196 	u32	eccmask;
197 
198 	/* Error address mask */
199 	int channel, dimm, rank, bank, page, col;
200 };
201 
202 struct i7core_channel {
203 	u32		ranks;
204 	u32		dimms;
205 };
206 
207 struct pci_id_descr {
208 	int			dev;
209 	int			func;
210 	int 			dev_id;
211 	int			optional;
212 };
213 
214 struct pci_id_table {
215 	struct pci_id_descr	*descr;
216 	int			n_devs;
217 };
218 
219 struct i7core_dev {
220 	struct list_head	list;
221 	u8			socket;
222 	struct pci_dev		**pdev;
223 	int			n_devs;
224 	struct mem_ctl_info	*mci;
225 };
226 
227 struct i7core_pvt {
228 	struct pci_dev	*pci_noncore;
229 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
230 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
231 
232 	struct i7core_dev *i7core_dev;
233 
234 	struct i7core_info	info;
235 	struct i7core_inject	inject;
236 	struct i7core_channel	channel[NUM_CHANS];
237 
238 	int		channels; /* Number of active channels */
239 
240 	int		ce_count_available;
241 	int 		csrow_map[NUM_CHANS][MAX_DIMMS];
242 
243 			/* ECC corrected errors counts per udimm */
244 	unsigned long	udimm_ce_count[MAX_DIMMS];
245 	int		udimm_last_ce_count[MAX_DIMMS];
246 			/* ECC corrected errors counts per rdimm */
247 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
248 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
249 
250 	unsigned int	is_registered;
251 
252 	/* mcelog glue */
253 	struct edac_mce		edac_mce;
254 
255 	/* Fifo double buffers */
256 	struct mce		mce_entry[MCE_LOG_LEN];
257 	struct mce		mce_outentry[MCE_LOG_LEN];
258 
259 	/* Fifo in/out counters */
260 	unsigned		mce_in, mce_out;
261 
262 	/* Count indicator to show errors not got */
263 	unsigned		mce_overrun;
264 };
265 
266 /* Static vars */
267 static LIST_HEAD(i7core_edac_list);
268 static DEFINE_MUTEX(i7core_edac_lock);
269 
270 #define PCI_DESCR(device, function, device_id)	\
271 	.dev = (device),			\
272 	.func = (function),			\
273 	.dev_id = (device_id)
274 
275 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
276 		/* Memory controller */
277 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
278 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
279 			/* Exists only for RDIMM */
280 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
281 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
282 
283 		/* Channel 0 */
284 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
285 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
286 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
287 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
288 
289 		/* Channel 1 */
290 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
291 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
292 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
293 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
294 
295 		/* Channel 2 */
296 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
297 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
298 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
299 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
300 
301 		/* Generic Non-core registers */
302 	/*
303 	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
304 	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
305 	 * the probing code needs to test for the other address in case of
306 	 * failure of this one
307 	 */
308 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
309 
310 };
311 
312 struct pci_id_descr pci_dev_descr_lynnfield[] = {
313 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
314 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
315 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
316 
317 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
318 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
319 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
320 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
321 
322 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
323 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
324 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
325 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
326 
327 	/*
328 	 * This is the PCI device has an alternate address on some
329 	 * processors like Core i7 860
330 	 */
331 	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
332 };
333 
334 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
335 		/* Memory controller */
336 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
337 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
338 			/* Exists only for RDIMM */
339 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
340 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
341 
342 		/* Channel 0 */
343 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
344 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
345 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
346 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
347 
348 		/* Channel 1 */
349 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
350 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
351 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
352 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
353 
354 		/* Channel 2 */
355 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
356 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
357 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
358 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
359 
360 		/* Generic Non-core registers */
361 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
362 
363 };
364 
365 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
366 struct pci_id_table pci_dev_table[] = {
367 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
368 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
369 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
370 };
371 
372 /*
373  *	pci_device_id	table for which devices we are looking for
374  */
375 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
376 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
377 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
378 	{0,}			/* 0 terminated list. */
379 };
380 
381 static struct edac_pci_ctl_info *i7core_pci;
382 
383 /****************************************************************************
384 			Anciliary status routines
385  ****************************************************************************/
386 
387 	/* MC_CONTROL bits */
388 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
389 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
390 
391 	/* MC_STATUS bits */
392 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
393 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
394 
395 	/* MC_MAX_DOD read functions */
396 static inline int numdimms(u32 dimms)
397 {
398 	return (dimms & 0x3) + 1;
399 }
400 
401 static inline int numrank(u32 rank)
402 {
403 	static int ranks[4] = { 1, 2, 4, -EINVAL };
404 
405 	return ranks[rank & 0x3];
406 }
407 
408 static inline int numbank(u32 bank)
409 {
410 	static int banks[4] = { 4, 8, 16, -EINVAL };
411 
412 	return banks[bank & 0x3];
413 }
414 
415 static inline int numrow(u32 row)
416 {
417 	static int rows[8] = {
418 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
419 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
420 	};
421 
422 	return rows[row & 0x7];
423 }
424 
425 static inline int numcol(u32 col)
426 {
427 	static int cols[8] = {
428 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
429 	};
430 	return cols[col & 0x3];
431 }
432 
433 static struct i7core_dev *get_i7core_dev(u8 socket)
434 {
435 	struct i7core_dev *i7core_dev;
436 
437 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
438 		if (i7core_dev->socket == socket)
439 			return i7core_dev;
440 	}
441 
442 	return NULL;
443 }
444 
445 /****************************************************************************
446 			Memory check routines
447  ****************************************************************************/
448 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
449 					  unsigned func)
450 {
451 	struct i7core_dev *i7core_dev = get_i7core_dev(socket);
452 	int i;
453 
454 	if (!i7core_dev)
455 		return NULL;
456 
457 	for (i = 0; i < i7core_dev->n_devs; i++) {
458 		if (!i7core_dev->pdev[i])
459 			continue;
460 
461 		if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
462 		    PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
463 			return i7core_dev->pdev[i];
464 		}
465 	}
466 
467 	return NULL;
468 }
469 
470 /**
471  * i7core_get_active_channels() - gets the number of channels and csrows
472  * @socket:	Quick Path Interconnect socket
473  * @channels:	Number of channels that will be returned
474  * @csrows:	Number of csrows found
475  *
476  * Since EDAC core needs to know in advance the number of available channels
477  * and csrows, in order to allocate memory for csrows/channels, it is needed
478  * to run two similar steps. At the first step, implemented on this function,
479  * it checks the number of csrows/channels present at one socket.
480  * this is used in order to properly allocate the size of mci components.
481  *
482  * It should be noticed that none of the current available datasheets explain
483  * or even mention how csrows are seen by the memory controller. So, we need
484  * to add a fake description for csrows.
485  * So, this driver is attributing one DIMM memory for one csrow.
486  */
487 static int i7core_get_active_channels(u8 socket, unsigned *channels,
488 				      unsigned *csrows)
489 {
490 	struct pci_dev *pdev = NULL;
491 	int i, j;
492 	u32 status, control;
493 
494 	*channels = 0;
495 	*csrows = 0;
496 
497 	pdev = get_pdev_slot_func(socket, 3, 0);
498 	if (!pdev) {
499 		i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
500 			      socket);
501 		return -ENODEV;
502 	}
503 
504 	/* Device 3 function 0 reads */
505 	pci_read_config_dword(pdev, MC_STATUS, &status);
506 	pci_read_config_dword(pdev, MC_CONTROL, &control);
507 
508 	for (i = 0; i < NUM_CHANS; i++) {
509 		u32 dimm_dod[3];
510 		/* Check if the channel is active */
511 		if (!(control & (1 << (8 + i))))
512 			continue;
513 
514 		/* Check if the channel is disabled */
515 		if (status & (1 << i))
516 			continue;
517 
518 		pdev = get_pdev_slot_func(socket, i + 4, 1);
519 		if (!pdev) {
520 			i7core_printk(KERN_ERR, "Couldn't find socket %d "
521 						"fn %d.%d!!!\n",
522 						socket, i + 4, 1);
523 			return -ENODEV;
524 		}
525 		/* Devices 4-6 function 1 */
526 		pci_read_config_dword(pdev,
527 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
528 		pci_read_config_dword(pdev,
529 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
530 		pci_read_config_dword(pdev,
531 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
532 
533 		(*channels)++;
534 
535 		for (j = 0; j < 3; j++) {
536 			if (!DIMM_PRESENT(dimm_dod[j]))
537 				continue;
538 			(*csrows)++;
539 		}
540 	}
541 
542 	debugf0("Number of active channels on socket %d: %d\n",
543 		socket, *channels);
544 
545 	return 0;
546 }
547 
548 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
549 {
550 	struct i7core_pvt *pvt = mci->pvt_info;
551 	struct csrow_info *csr;
552 	struct pci_dev *pdev;
553 	int i, j;
554 	unsigned long last_page = 0;
555 	enum edac_type mode;
556 	enum mem_type mtype;
557 
558 	/* Get data from the MC register, function 0 */
559 	pdev = pvt->pci_mcr[0];
560 	if (!pdev)
561 		return -ENODEV;
562 
563 	/* Device 3 function 0 reads */
564 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
565 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
566 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
567 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
568 
569 	debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
570 		pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
571 		pvt->info.max_dod, pvt->info.ch_map);
572 
573 	if (ECC_ENABLED(pvt)) {
574 		debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
575 		if (ECCx8(pvt))
576 			mode = EDAC_S8ECD8ED;
577 		else
578 			mode = EDAC_S4ECD4ED;
579 	} else {
580 		debugf0("ECC disabled\n");
581 		mode = EDAC_NONE;
582 	}
583 
584 	/* FIXME: need to handle the error codes */
585 	debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
586 		"x%x x 0x%x\n",
587 		numdimms(pvt->info.max_dod),
588 		numrank(pvt->info.max_dod >> 2),
589 		numbank(pvt->info.max_dod >> 4),
590 		numrow(pvt->info.max_dod >> 6),
591 		numcol(pvt->info.max_dod >> 9));
592 
593 	for (i = 0; i < NUM_CHANS; i++) {
594 		u32 data, dimm_dod[3], value[8];
595 
596 		if (!pvt->pci_ch[i][0])
597 			continue;
598 
599 		if (!CH_ACTIVE(pvt, i)) {
600 			debugf0("Channel %i is not active\n", i);
601 			continue;
602 		}
603 		if (CH_DISABLED(pvt, i)) {
604 			debugf0("Channel %i is disabled\n", i);
605 			continue;
606 		}
607 
608 		/* Devices 4-6 function 0 */
609 		pci_read_config_dword(pvt->pci_ch[i][0],
610 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
611 
612 		pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
613 						4 : 2;
614 
615 		if (data & REGISTERED_DIMM)
616 			mtype = MEM_RDDR3;
617 		else
618 			mtype = MEM_DDR3;
619 #if 0
620 		if (data & THREE_DIMMS_PRESENT)
621 			pvt->channel[i].dimms = 3;
622 		else if (data & SINGLE_QUAD_RANK_PRESENT)
623 			pvt->channel[i].dimms = 1;
624 		else
625 			pvt->channel[i].dimms = 2;
626 #endif
627 
628 		/* Devices 4-6 function 1 */
629 		pci_read_config_dword(pvt->pci_ch[i][1],
630 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
631 		pci_read_config_dword(pvt->pci_ch[i][1],
632 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
633 		pci_read_config_dword(pvt->pci_ch[i][1],
634 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
635 
636 		debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
637 			"%d ranks, %cDIMMs\n",
638 			i,
639 			RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
640 			data,
641 			pvt->channel[i].ranks,
642 			(data & REGISTERED_DIMM) ? 'R' : 'U');
643 
644 		for (j = 0; j < 3; j++) {
645 			u32 banks, ranks, rows, cols;
646 			u32 size, npages;
647 
648 			if (!DIMM_PRESENT(dimm_dod[j]))
649 				continue;
650 
651 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
652 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
653 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
654 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
655 
656 			/* DDR3 has 8 I/O banks */
657 			size = (rows * cols * banks * ranks) >> (20 - 3);
658 
659 			pvt->channel[i].dimms++;
660 
661 			debugf0("\tdimm %d %d Mb offset: %x, "
662 				"bank: %d, rank: %d, row: %#x, col: %#x\n",
663 				j, size,
664 				RANKOFFSET(dimm_dod[j]),
665 				banks, ranks, rows, cols);
666 
667 #if PAGE_SHIFT > 20
668 			npages = size >> (PAGE_SHIFT - 20);
669 #else
670 			npages = size << (20 - PAGE_SHIFT);
671 #endif
672 
673 			csr = &mci->csrows[*csrow];
674 			csr->first_page = last_page + 1;
675 			last_page += npages;
676 			csr->last_page = last_page;
677 			csr->nr_pages = npages;
678 
679 			csr->page_mask = 0;
680 			csr->grain = 8;
681 			csr->csrow_idx = *csrow;
682 			csr->nr_channels = 1;
683 
684 			csr->channels[0].chan_idx = i;
685 			csr->channels[0].ce_count = 0;
686 
687 			pvt->csrow_map[i][j] = *csrow;
688 
689 			switch (banks) {
690 			case 4:
691 				csr->dtype = DEV_X4;
692 				break;
693 			case 8:
694 				csr->dtype = DEV_X8;
695 				break;
696 			case 16:
697 				csr->dtype = DEV_X16;
698 				break;
699 			default:
700 				csr->dtype = DEV_UNKNOWN;
701 			}
702 
703 			csr->edac_mode = mode;
704 			csr->mtype = mtype;
705 
706 			(*csrow)++;
707 		}
708 
709 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
710 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
711 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
712 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
713 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
714 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
715 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
716 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
717 		debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
718 		for (j = 0; j < 8; j++)
719 			debugf1("\t\t%#x\t%#x\t%#x\n",
720 				(value[j] >> 27) & 0x1,
721 				(value[j] >> 24) & 0x7,
722 				(value[j] && ((1 << 24) - 1)));
723 	}
724 
725 	return 0;
726 }
727 
728 /****************************************************************************
729 			Error insertion routines
730  ****************************************************************************/
731 
732 /* The i7core has independent error injection features per channel.
733    However, to have a simpler code, we don't allow enabling error injection
734    on more than one channel.
735    Also, since a change at an inject parameter will be applied only at enable,
736    we're disabling error injection on all write calls to the sysfs nodes that
737    controls the error code injection.
738  */
739 static int disable_inject(struct mem_ctl_info *mci)
740 {
741 	struct i7core_pvt *pvt = mci->pvt_info;
742 
743 	pvt->inject.enable = 0;
744 
745 	if (!pvt->pci_ch[pvt->inject.channel][0])
746 		return -ENODEV;
747 
748 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
749 				MC_CHANNEL_ERROR_INJECT, 0);
750 
751 	return 0;
752 }
753 
754 /*
755  * i7core inject inject.section
756  *
757  *	accept and store error injection inject.section value
758  *	bit 0 - refers to the lower 32-byte half cacheline
759  *	bit 1 - refers to the upper 32-byte half cacheline
760  */
761 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
762 					   const char *data, size_t count)
763 {
764 	struct i7core_pvt *pvt = mci->pvt_info;
765 	unsigned long value;
766 	int rc;
767 
768 	if (pvt->inject.enable)
769 		disable_inject(mci);
770 
771 	rc = strict_strtoul(data, 10, &value);
772 	if ((rc < 0) || (value > 3))
773 		return -EIO;
774 
775 	pvt->inject.section = (u32) value;
776 	return count;
777 }
778 
779 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
780 					      char *data)
781 {
782 	struct i7core_pvt *pvt = mci->pvt_info;
783 	return sprintf(data, "0x%08x\n", pvt->inject.section);
784 }
785 
786 /*
787  * i7core inject.type
788  *
789  *	accept and store error injection inject.section value
790  *	bit 0 - repeat enable - Enable error repetition
791  *	bit 1 - inject ECC error
792  *	bit 2 - inject parity error
793  */
794 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
795 					const char *data, size_t count)
796 {
797 	struct i7core_pvt *pvt = mci->pvt_info;
798 	unsigned long value;
799 	int rc;
800 
801 	if (pvt->inject.enable)
802 		disable_inject(mci);
803 
804 	rc = strict_strtoul(data, 10, &value);
805 	if ((rc < 0) || (value > 7))
806 		return -EIO;
807 
808 	pvt->inject.type = (u32) value;
809 	return count;
810 }
811 
812 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
813 					      char *data)
814 {
815 	struct i7core_pvt *pvt = mci->pvt_info;
816 	return sprintf(data, "0x%08x\n", pvt->inject.type);
817 }
818 
819 /*
820  * i7core_inject_inject.eccmask_store
821  *
822  * The type of error (UE/CE) will depend on the inject.eccmask value:
823  *   Any bits set to a 1 will flip the corresponding ECC bit
824  *   Correctable errors can be injected by flipping 1 bit or the bits within
825  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
826  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
827  *   uncorrectable error to be injected.
828  */
829 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
830 					const char *data, size_t count)
831 {
832 	struct i7core_pvt *pvt = mci->pvt_info;
833 	unsigned long value;
834 	int rc;
835 
836 	if (pvt->inject.enable)
837 		disable_inject(mci);
838 
839 	rc = strict_strtoul(data, 10, &value);
840 	if (rc < 0)
841 		return -EIO;
842 
843 	pvt->inject.eccmask = (u32) value;
844 	return count;
845 }
846 
847 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
848 					      char *data)
849 {
850 	struct i7core_pvt *pvt = mci->pvt_info;
851 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
852 }
853 
854 /*
855  * i7core_addrmatch
856  *
857  * The type of error (UE/CE) will depend on the inject.eccmask value:
858  *   Any bits set to a 1 will flip the corresponding ECC bit
859  *   Correctable errors can be injected by flipping 1 bit or the bits within
860  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
861  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
862  *   uncorrectable error to be injected.
863  */
864 
865 #define DECLARE_ADDR_MATCH(param, limit)			\
866 static ssize_t i7core_inject_store_##param(			\
867 		struct mem_ctl_info *mci,			\
868 		const char *data, size_t count)			\
869 {								\
870 	struct i7core_pvt *pvt;					\
871 	long value;						\
872 	int rc;							\
873 								\
874 	debugf1("%s()\n", __func__);				\
875 	pvt = mci->pvt_info;					\
876 								\
877 	if (pvt->inject.enable)					\
878 		disable_inject(mci);				\
879 								\
880 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
881 		value = -1;					\
882 	else {							\
883 		rc = strict_strtoul(data, 10, &value);		\
884 		if ((rc < 0) || (value >= limit))		\
885 			return -EIO;				\
886 	}							\
887 								\
888 	pvt->inject.param = value;				\
889 								\
890 	return count;						\
891 }								\
892 								\
893 static ssize_t i7core_inject_show_##param(			\
894 		struct mem_ctl_info *mci,			\
895 		char *data)					\
896 {								\
897 	struct i7core_pvt *pvt;					\
898 								\
899 	pvt = mci->pvt_info;					\
900 	debugf1("%s() pvt=%p\n", __func__, pvt);		\
901 	if (pvt->inject.param < 0)				\
902 		return sprintf(data, "any\n");			\
903 	else							\
904 		return sprintf(data, "%d\n", pvt->inject.param);\
905 }
906 
907 #define ATTR_ADDR_MATCH(param)					\
908 	{							\
909 		.attr = {					\
910 			.name = #param,				\
911 			.mode = (S_IRUGO | S_IWUSR)		\
912 		},						\
913 		.show  = i7core_inject_show_##param,		\
914 		.store = i7core_inject_store_##param,		\
915 	}
916 
917 DECLARE_ADDR_MATCH(channel, 3);
918 DECLARE_ADDR_MATCH(dimm, 3);
919 DECLARE_ADDR_MATCH(rank, 4);
920 DECLARE_ADDR_MATCH(bank, 32);
921 DECLARE_ADDR_MATCH(page, 0x10000);
922 DECLARE_ADDR_MATCH(col, 0x4000);
923 
924 static int write_and_test(struct pci_dev *dev, int where, u32 val)
925 {
926 	u32 read;
927 	int count;
928 
929 	debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
930 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
931 		where, val);
932 
933 	for (count = 0; count < 10; count++) {
934 		if (count)
935 			msleep(100);
936 		pci_write_config_dword(dev, where, val);
937 		pci_read_config_dword(dev, where, &read);
938 
939 		if (read == val)
940 			return 0;
941 	}
942 
943 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
944 		"write=%08x. Read=%08x\n",
945 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
946 		where, val, read);
947 
948 	return -EINVAL;
949 }
950 
951 /*
952  * This routine prepares the Memory Controller for error injection.
953  * The error will be injected when some process tries to write to the
954  * memory that matches the given criteria.
955  * The criteria can be set in terms of a mask where dimm, rank, bank, page
956  * and col can be specified.
957  * A -1 value for any of the mask items will make the MCU to ignore
958  * that matching criteria for error injection.
959  *
960  * It should be noticed that the error will only happen after a write operation
961  * on a memory that matches the condition. if REPEAT_EN is not enabled at
962  * inject mask, then it will produce just one error. Otherwise, it will repeat
963  * until the injectmask would be cleaned.
964  *
965  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
966  *    is reliable enough to check if the MC is using the
967  *    three channels. However, this is not clear at the datasheet.
968  */
969 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
970 				       const char *data, size_t count)
971 {
972 	struct i7core_pvt *pvt = mci->pvt_info;
973 	u32 injectmask;
974 	u64 mask = 0;
975 	int  rc;
976 	long enable;
977 
978 	if (!pvt->pci_ch[pvt->inject.channel][0])
979 		return 0;
980 
981 	rc = strict_strtoul(data, 10, &enable);
982 	if ((rc < 0))
983 		return 0;
984 
985 	if (enable) {
986 		pvt->inject.enable = 1;
987 	} else {
988 		disable_inject(mci);
989 		return count;
990 	}
991 
992 	/* Sets pvt->inject.dimm mask */
993 	if (pvt->inject.dimm < 0)
994 		mask |= 1LL << 41;
995 	else {
996 		if (pvt->channel[pvt->inject.channel].dimms > 2)
997 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
998 		else
999 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
1000 	}
1001 
1002 	/* Sets pvt->inject.rank mask */
1003 	if (pvt->inject.rank < 0)
1004 		mask |= 1LL << 40;
1005 	else {
1006 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1007 			mask |= (pvt->inject.rank & 0x1LL) << 34;
1008 		else
1009 			mask |= (pvt->inject.rank & 0x3LL) << 34;
1010 	}
1011 
1012 	/* Sets pvt->inject.bank mask */
1013 	if (pvt->inject.bank < 0)
1014 		mask |= 1LL << 39;
1015 	else
1016 		mask |= (pvt->inject.bank & 0x15LL) << 30;
1017 
1018 	/* Sets pvt->inject.page mask */
1019 	if (pvt->inject.page < 0)
1020 		mask |= 1LL << 38;
1021 	else
1022 		mask |= (pvt->inject.page & 0xffff) << 14;
1023 
1024 	/* Sets pvt->inject.column mask */
1025 	if (pvt->inject.col < 0)
1026 		mask |= 1LL << 37;
1027 	else
1028 		mask |= (pvt->inject.col & 0x3fff);
1029 
1030 	/*
1031 	 * bit    0: REPEAT_EN
1032 	 * bits 1-2: MASK_HALF_CACHELINE
1033 	 * bit    3: INJECT_ECC
1034 	 * bit    4: INJECT_ADDR_PARITY
1035 	 */
1036 
1037 	injectmask = (pvt->inject.type & 1) |
1038 		     (pvt->inject.section & 0x3) << 1 |
1039 		     (pvt->inject.type & 0x6) << (3 - 1);
1040 
1041 	/* Unlock writes to registers - this register is write only */
1042 	pci_write_config_dword(pvt->pci_noncore,
1043 			       MC_CFG_CONTROL, 0x2);
1044 
1045 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1046 			       MC_CHANNEL_ADDR_MATCH, mask);
1047 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1048 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1049 
1050 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1051 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1052 
1053 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1054 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1055 
1056 	/*
1057 	 * This is something undocumented, based on my tests
1058 	 * Without writing 8 to this register, errors aren't injected. Not sure
1059 	 * why.
1060 	 */
1061 	pci_write_config_dword(pvt->pci_noncore,
1062 			       MC_CFG_CONTROL, 8);
1063 
1064 	debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1065 		" inject 0x%08x\n",
1066 		mask, pvt->inject.eccmask, injectmask);
1067 
1068 
1069 	return count;
1070 }
1071 
1072 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1073 					char *data)
1074 {
1075 	struct i7core_pvt *pvt = mci->pvt_info;
1076 	u32 injectmask;
1077 
1078 	if (!pvt->pci_ch[pvt->inject.channel][0])
1079 		return 0;
1080 
1081 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1082 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1083 
1084 	debugf0("Inject error read: 0x%018x\n", injectmask);
1085 
1086 	if (injectmask & 0x0c)
1087 		pvt->inject.enable = 1;
1088 
1089 	return sprintf(data, "%d\n", pvt->inject.enable);
1090 }
1091 
1092 #define DECLARE_COUNTER(param)					\
1093 static ssize_t i7core_show_counter_##param(			\
1094 		struct mem_ctl_info *mci,			\
1095 		char *data)					\
1096 {								\
1097 	struct i7core_pvt *pvt = mci->pvt_info;			\
1098 								\
1099 	debugf1("%s() \n", __func__);				\
1100 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1101 		return sprintf(data, "data unavailable\n");	\
1102 	return sprintf(data, "%lu\n",				\
1103 			pvt->udimm_ce_count[param]);		\
1104 }
1105 
1106 #define ATTR_COUNTER(param)					\
1107 	{							\
1108 		.attr = {					\
1109 			.name = __stringify(udimm##param),	\
1110 			.mode = (S_IRUGO | S_IWUSR)		\
1111 		},						\
1112 		.show  = i7core_show_counter_##param		\
1113 	}
1114 
1115 DECLARE_COUNTER(0);
1116 DECLARE_COUNTER(1);
1117 DECLARE_COUNTER(2);
1118 
1119 /*
1120  * Sysfs struct
1121  */
1122 
1123 
1124 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1125 	ATTR_ADDR_MATCH(channel),
1126 	ATTR_ADDR_MATCH(dimm),
1127 	ATTR_ADDR_MATCH(rank),
1128 	ATTR_ADDR_MATCH(bank),
1129 	ATTR_ADDR_MATCH(page),
1130 	ATTR_ADDR_MATCH(col),
1131 	{ .attr = { .name = NULL } }
1132 };
1133 
1134 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1135 	.name  = "inject_addrmatch",
1136 	.mcidev_attr = i7core_addrmatch_attrs,
1137 };
1138 
1139 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1140 	ATTR_COUNTER(0),
1141 	ATTR_COUNTER(1),
1142 	ATTR_COUNTER(2),
1143 };
1144 
1145 static struct mcidev_sysfs_group i7core_udimm_counters = {
1146 	.name  = "all_channel_counts",
1147 	.mcidev_attr = i7core_udimm_counters_attrs,
1148 };
1149 
1150 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1151 	{
1152 		.attr = {
1153 			.name = "inject_section",
1154 			.mode = (S_IRUGO | S_IWUSR)
1155 		},
1156 		.show  = i7core_inject_section_show,
1157 		.store = i7core_inject_section_store,
1158 	}, {
1159 		.attr = {
1160 			.name = "inject_type",
1161 			.mode = (S_IRUGO | S_IWUSR)
1162 		},
1163 		.show  = i7core_inject_type_show,
1164 		.store = i7core_inject_type_store,
1165 	}, {
1166 		.attr = {
1167 			.name = "inject_eccmask",
1168 			.mode = (S_IRUGO | S_IWUSR)
1169 		},
1170 		.show  = i7core_inject_eccmask_show,
1171 		.store = i7core_inject_eccmask_store,
1172 	}, {
1173 		.grp = &i7core_inject_addrmatch,
1174 	}, {
1175 		.attr = {
1176 			.name = "inject_enable",
1177 			.mode = (S_IRUGO | S_IWUSR)
1178 		},
1179 		.show  = i7core_inject_enable_show,
1180 		.store = i7core_inject_enable_store,
1181 	},
1182 	{ .attr = { .name = NULL } },	/* Reserved for udimm counters */
1183 	{ .attr = { .name = NULL } }
1184 };
1185 
1186 /****************************************************************************
1187 	Device initialization routines: put/get, init/exit
1188  ****************************************************************************/
1189 
1190 /*
1191  *	i7core_put_devices	'put' all the devices that we have
1192  *				reserved via 'get'
1193  */
1194 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1195 {
1196 	int i;
1197 
1198 	debugf0(__FILE__ ": %s()\n", __func__);
1199 	for (i = 0; i < i7core_dev->n_devs; i++) {
1200 		struct pci_dev *pdev = i7core_dev->pdev[i];
1201 		if (!pdev)
1202 			continue;
1203 		debugf0("Removing dev %02x:%02x.%d\n",
1204 			pdev->bus->number,
1205 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1206 		pci_dev_put(pdev);
1207 	}
1208 	kfree(i7core_dev->pdev);
1209 	list_del(&i7core_dev->list);
1210 	kfree(i7core_dev);
1211 }
1212 
1213 static void i7core_put_all_devices(void)
1214 {
1215 	struct i7core_dev *i7core_dev, *tmp;
1216 
1217 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1218 		i7core_put_devices(i7core_dev);
1219 }
1220 
1221 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1222 {
1223 	struct pci_dev *pdev = NULL;
1224 	int i;
1225 	/*
1226 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1227 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1228 	 * to detect them
1229 	 */
1230 	while (table && table->descr) {
1231 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1232 		if (unlikely(!pdev)) {
1233 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1234 				pcibios_scan_specific_bus(255-i);
1235 		}
1236 		pci_dev_put(pdev);
1237 		table++;
1238 	}
1239 }
1240 
1241 static unsigned i7core_pci_lastbus(void)
1242 {
1243 	int last_bus = 0, bus;
1244 	struct pci_bus *b = NULL;
1245 
1246 	while ((b = pci_find_next_bus(b)) != NULL) {
1247 		bus = b->number;
1248 		debugf0("Found bus %d\n", bus);
1249 		if (bus > last_bus)
1250 			last_bus = bus;
1251 	}
1252 
1253 	debugf0("Last bus %d\n", last_bus);
1254 
1255 	return last_bus;
1256 }
1257 
1258 /*
1259  *	i7core_get_devices	Find and perform 'get' operation on the MCH's
1260  *			device/functions we want to reference for this driver
1261  *
1262  *			Need to 'get' device 16 func 1 and func 2
1263  */
1264 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1265 			 struct pci_id_descr *dev_descr, unsigned n_devs,
1266 			 unsigned last_bus)
1267 {
1268 	struct i7core_dev *i7core_dev;
1269 
1270 	struct pci_dev *pdev = NULL;
1271 	u8 bus = 0;
1272 	u8 socket = 0;
1273 
1274 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1275 			      dev_descr->dev_id, *prev);
1276 
1277 	/*
1278 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1279 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1280 	 * to probe for the alternate address in case of failure
1281 	 */
1282 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1283 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1284 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1285 
1286 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1287 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1288 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1289 				      *prev);
1290 
1291 	if (!pdev) {
1292 		if (*prev) {
1293 			*prev = pdev;
1294 			return 0;
1295 		}
1296 
1297 		if (dev_descr->optional)
1298 			return 0;
1299 
1300 		if (devno == 0)
1301 			return -ENODEV;
1302 
1303 		i7core_printk(KERN_INFO,
1304 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1305 			dev_descr->dev, dev_descr->func,
1306 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1307 
1308 		/* End of list, leave */
1309 		return -ENODEV;
1310 	}
1311 	bus = pdev->bus->number;
1312 
1313 	socket = last_bus - bus;
1314 
1315 	i7core_dev = get_i7core_dev(socket);
1316 	if (!i7core_dev) {
1317 		i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1318 		if (!i7core_dev)
1319 			return -ENOMEM;
1320 		i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1321 					   GFP_KERNEL);
1322 		if (!i7core_dev->pdev) {
1323 			kfree(i7core_dev);
1324 			return -ENOMEM;
1325 		}
1326 		i7core_dev->socket = socket;
1327 		i7core_dev->n_devs = n_devs;
1328 		list_add_tail(&i7core_dev->list, &i7core_edac_list);
1329 	}
1330 
1331 	if (i7core_dev->pdev[devno]) {
1332 		i7core_printk(KERN_ERR,
1333 			"Duplicated device for "
1334 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1335 			bus, dev_descr->dev, dev_descr->func,
1336 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1337 		pci_dev_put(pdev);
1338 		return -ENODEV;
1339 	}
1340 
1341 	i7core_dev->pdev[devno] = pdev;
1342 
1343 	/* Sanity check */
1344 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1345 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1346 		i7core_printk(KERN_ERR,
1347 			"Device PCI ID %04x:%04x "
1348 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1349 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1350 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1351 			bus, dev_descr->dev, dev_descr->func);
1352 		return -ENODEV;
1353 	}
1354 
1355 	/* Be sure that the device is enabled */
1356 	if (unlikely(pci_enable_device(pdev) < 0)) {
1357 		i7core_printk(KERN_ERR,
1358 			"Couldn't enable "
1359 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1360 			bus, dev_descr->dev, dev_descr->func,
1361 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1362 		return -ENODEV;
1363 	}
1364 
1365 	debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1366 		socket, bus, dev_descr->dev,
1367 		dev_descr->func,
1368 		PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1369 
1370 	*prev = pdev;
1371 
1372 	return 0;
1373 }
1374 
1375 static int i7core_get_devices(struct pci_id_table *table)
1376 {
1377 	int i, rc, last_bus;
1378 	struct pci_dev *pdev = NULL;
1379 	struct pci_id_descr *dev_descr;
1380 
1381 	last_bus = i7core_pci_lastbus();
1382 
1383 	while (table && table->descr) {
1384 		dev_descr = table->descr;
1385 		for (i = 0; i < table->n_devs; i++) {
1386 			pdev = NULL;
1387 			do {
1388 				rc = i7core_get_onedevice(&pdev, i,
1389 							  &dev_descr[i],
1390 							  table->n_devs,
1391 							  last_bus);
1392 				if (rc < 0) {
1393 					if (i == 0) {
1394 						i = table->n_devs;
1395 						break;
1396 					}
1397 					i7core_put_all_devices();
1398 					return -ENODEV;
1399 				}
1400 			} while (pdev);
1401 		}
1402 		table++;
1403 	}
1404 
1405 	return 0;
1406 	return 0;
1407 }
1408 
1409 static int mci_bind_devs(struct mem_ctl_info *mci,
1410 			 struct i7core_dev *i7core_dev)
1411 {
1412 	struct i7core_pvt *pvt = mci->pvt_info;
1413 	struct pci_dev *pdev;
1414 	int i, func, slot;
1415 
1416 	/* Associates i7core_dev and mci for future usage */
1417 	pvt->i7core_dev = i7core_dev;
1418 	i7core_dev->mci = mci;
1419 
1420 	pvt->is_registered = 0;
1421 	for (i = 0; i < i7core_dev->n_devs; i++) {
1422 		pdev = i7core_dev->pdev[i];
1423 		if (!pdev)
1424 			continue;
1425 
1426 		func = PCI_FUNC(pdev->devfn);
1427 		slot = PCI_SLOT(pdev->devfn);
1428 		if (slot == 3) {
1429 			if (unlikely(func > MAX_MCR_FUNC))
1430 				goto error;
1431 			pvt->pci_mcr[func] = pdev;
1432 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1433 			if (unlikely(func > MAX_CHAN_FUNC))
1434 				goto error;
1435 			pvt->pci_ch[slot - 4][func] = pdev;
1436 		} else if (!slot && !func)
1437 			pvt->pci_noncore = pdev;
1438 		else
1439 			goto error;
1440 
1441 		debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1442 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1443 			pdev, i7core_dev->socket);
1444 
1445 		if (PCI_SLOT(pdev->devfn) == 3 &&
1446 			PCI_FUNC(pdev->devfn) == 2)
1447 			pvt->is_registered = 1;
1448 	}
1449 
1450 	/*
1451 	 * Add extra nodes to count errors on udimm
1452 	 * For registered memory, this is not needed, since the counters
1453 	 * are already displayed at the standard locations
1454 	 */
1455 	if (!pvt->is_registered)
1456 		i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1457 			&i7core_udimm_counters;
1458 
1459 	return 0;
1460 
1461 error:
1462 	i7core_printk(KERN_ERR, "Device %d, function %d "
1463 		      "is out of the expected range\n",
1464 		      slot, func);
1465 	return -EINVAL;
1466 }
1467 
1468 /****************************************************************************
1469 			Error check routines
1470  ****************************************************************************/
1471 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1472 					 int chan, int dimm, int add)
1473 {
1474 	char *msg;
1475 	struct i7core_pvt *pvt = mci->pvt_info;
1476 	int row = pvt->csrow_map[chan][dimm], i;
1477 
1478 	for (i = 0; i < add; i++) {
1479 		msg = kasprintf(GFP_KERNEL, "Corrected error "
1480 				"(Socket=%d channel=%d dimm=%d)",
1481 				pvt->i7core_dev->socket, chan, dimm);
1482 
1483 		edac_mc_handle_fbd_ce(mci, row, 0, msg);
1484 		kfree (msg);
1485 	}
1486 }
1487 
1488 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1489 			int chan, int new0, int new1, int new2)
1490 {
1491 	struct i7core_pvt *pvt = mci->pvt_info;
1492 	int add0 = 0, add1 = 0, add2 = 0;
1493 	/* Updates CE counters if it is not the first time here */
1494 	if (pvt->ce_count_available) {
1495 		/* Updates CE counters */
1496 
1497 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1498 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1499 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1500 
1501 		if (add2 < 0)
1502 			add2 += 0x7fff;
1503 		pvt->rdimm_ce_count[chan][2] += add2;
1504 
1505 		if (add1 < 0)
1506 			add1 += 0x7fff;
1507 		pvt->rdimm_ce_count[chan][1] += add1;
1508 
1509 		if (add0 < 0)
1510 			add0 += 0x7fff;
1511 		pvt->rdimm_ce_count[chan][0] += add0;
1512 	} else
1513 		pvt->ce_count_available = 1;
1514 
1515 	/* Store the new values */
1516 	pvt->rdimm_last_ce_count[chan][2] = new2;
1517 	pvt->rdimm_last_ce_count[chan][1] = new1;
1518 	pvt->rdimm_last_ce_count[chan][0] = new0;
1519 
1520 	/*updated the edac core */
1521 	if (add0 != 0)
1522 		i7core_rdimm_update_csrow(mci, chan, 0, add0);
1523 	if (add1 != 0)
1524 		i7core_rdimm_update_csrow(mci, chan, 1, add1);
1525 	if (add2 != 0)
1526 		i7core_rdimm_update_csrow(mci, chan, 2, add2);
1527 
1528 }
1529 
1530 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1531 {
1532 	struct i7core_pvt *pvt = mci->pvt_info;
1533 	u32 rcv[3][2];
1534 	int i, new0, new1, new2;
1535 
1536 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1537 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1538 								&rcv[0][0]);
1539 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1540 								&rcv[0][1]);
1541 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1542 								&rcv[1][0]);
1543 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1544 								&rcv[1][1]);
1545 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1546 								&rcv[2][0]);
1547 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1548 								&rcv[2][1]);
1549 	for (i = 0 ; i < 3; i++) {
1550 		debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1551 			(i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1552 		/*if the channel has 3 dimms*/
1553 		if (pvt->channel[i].dimms > 2) {
1554 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1555 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1556 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1557 		} else {
1558 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1559 					DIMM_BOT_COR_ERR(rcv[i][0]);
1560 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1561 					DIMM_BOT_COR_ERR(rcv[i][1]);
1562 			new2 = 0;
1563 		}
1564 
1565 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1566 	}
1567 }
1568 
1569 /* This function is based on the device 3 function 4 registers as described on:
1570  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1571  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1572  * also available at:
1573  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1574  */
1575 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1576 {
1577 	struct i7core_pvt *pvt = mci->pvt_info;
1578 	u32 rcv1, rcv0;
1579 	int new0, new1, new2;
1580 
1581 	if (!pvt->pci_mcr[4]) {
1582 		debugf0("%s MCR registers not found\n", __func__);
1583 		return;
1584 	}
1585 
1586 	/* Corrected test errors */
1587 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1588 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1589 
1590 	/* Store the new values */
1591 	new2 = DIMM2_COR_ERR(rcv1);
1592 	new1 = DIMM1_COR_ERR(rcv0);
1593 	new0 = DIMM0_COR_ERR(rcv0);
1594 
1595 	/* Updates CE counters if it is not the first time here */
1596 	if (pvt->ce_count_available) {
1597 		/* Updates CE counters */
1598 		int add0, add1, add2;
1599 
1600 		add2 = new2 - pvt->udimm_last_ce_count[2];
1601 		add1 = new1 - pvt->udimm_last_ce_count[1];
1602 		add0 = new0 - pvt->udimm_last_ce_count[0];
1603 
1604 		if (add2 < 0)
1605 			add2 += 0x7fff;
1606 		pvt->udimm_ce_count[2] += add2;
1607 
1608 		if (add1 < 0)
1609 			add1 += 0x7fff;
1610 		pvt->udimm_ce_count[1] += add1;
1611 
1612 		if (add0 < 0)
1613 			add0 += 0x7fff;
1614 		pvt->udimm_ce_count[0] += add0;
1615 
1616 		if (add0 | add1 | add2)
1617 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1618 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1619 				      add0, add1, add2);
1620 	} else
1621 		pvt->ce_count_available = 1;
1622 
1623 	/* Store the new values */
1624 	pvt->udimm_last_ce_count[2] = new2;
1625 	pvt->udimm_last_ce_count[1] = new1;
1626 	pvt->udimm_last_ce_count[0] = new0;
1627 }
1628 
1629 /*
1630  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1631  * Architectures Software Developer’s Manual Volume 3B.
1632  * Nehalem are defined as family 0x06, model 0x1a
1633  *
1634  * The MCA registers used here are the following ones:
1635  *     struct mce field	MCA Register
1636  *     m->status	MSR_IA32_MC8_STATUS
1637  *     m->addr		MSR_IA32_MC8_ADDR
1638  *     m->misc		MSR_IA32_MC8_MISC
1639  * In the case of Nehalem, the error information is masked at .status and .misc
1640  * fields
1641  */
1642 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1643 				    struct mce *m)
1644 {
1645 	struct i7core_pvt *pvt = mci->pvt_info;
1646 	char *type, *optype, *err, *msg;
1647 	unsigned long error = m->status & 0x1ff0000l;
1648 	u32 optypenum = (m->status >> 4) & 0x07;
1649 	u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1650 	u32 dimm = (m->misc >> 16) & 0x3;
1651 	u32 channel = (m->misc >> 18) & 0x3;
1652 	u32 syndrome = m->misc >> 32;
1653 	u32 errnum = find_first_bit(&error, 32);
1654 	int csrow;
1655 
1656 	if (m->mcgstatus & 1)
1657 		type = "FATAL";
1658 	else
1659 		type = "NON_FATAL";
1660 
1661 	switch (optypenum) {
1662 	case 0:
1663 		optype = "generic undef request";
1664 		break;
1665 	case 1:
1666 		optype = "read error";
1667 		break;
1668 	case 2:
1669 		optype = "write error";
1670 		break;
1671 	case 3:
1672 		optype = "addr/cmd error";
1673 		break;
1674 	case 4:
1675 		optype = "scrubbing error";
1676 		break;
1677 	default:
1678 		optype = "reserved";
1679 		break;
1680 	}
1681 
1682 	switch (errnum) {
1683 	case 16:
1684 		err = "read ECC error";
1685 		break;
1686 	case 17:
1687 		err = "RAS ECC error";
1688 		break;
1689 	case 18:
1690 		err = "write parity error";
1691 		break;
1692 	case 19:
1693 		err = "redundacy loss";
1694 		break;
1695 	case 20:
1696 		err = "reserved";
1697 		break;
1698 	case 21:
1699 		err = "memory range error";
1700 		break;
1701 	case 22:
1702 		err = "RTID out of range";
1703 		break;
1704 	case 23:
1705 		err = "address parity error";
1706 		break;
1707 	case 24:
1708 		err = "byte enable parity error";
1709 		break;
1710 	default:
1711 		err = "unknown";
1712 	}
1713 
1714 	/* FIXME: should convert addr into bank and rank information */
1715 	msg = kasprintf(GFP_ATOMIC,
1716 		"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1717 		"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1718 		type, (long long) m->addr, m->cpu, dimm, channel,
1719 		syndrome, core_err_cnt, (long long)m->status,
1720 		(long long)m->misc, optype, err);
1721 
1722 	debugf0("%s", msg);
1723 
1724 	csrow = pvt->csrow_map[channel][dimm];
1725 
1726 	/* Call the helper to output message */
1727 	if (m->mcgstatus & 1)
1728 		edac_mc_handle_fbd_ue(mci, csrow, 0,
1729 				0 /* FIXME: should be channel here */, msg);
1730 	else if (!pvt->is_registered)
1731 		edac_mc_handle_fbd_ce(mci, csrow,
1732 				0 /* FIXME: should be channel here */, msg);
1733 
1734 	kfree(msg);
1735 }
1736 
1737 /*
1738  *	i7core_check_error	Retrieve and process errors reported by the
1739  *				hardware. Called by the Core module.
1740  */
1741 static void i7core_check_error(struct mem_ctl_info *mci)
1742 {
1743 	struct i7core_pvt *pvt = mci->pvt_info;
1744 	int i;
1745 	unsigned count = 0;
1746 	struct mce *m;
1747 
1748 	/*
1749 	 * MCE first step: Copy all mce errors into a temporary buffer
1750 	 * We use a double buffering here, to reduce the risk of
1751 	 * loosing an error.
1752 	 */
1753 	smp_rmb();
1754 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1755 		% MCE_LOG_LEN;
1756 	if (!count)
1757 		goto check_ce_error;
1758 
1759 	m = pvt->mce_outentry;
1760 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1761 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1762 
1763 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1764 		smp_wmb();
1765 		pvt->mce_in = 0;
1766 		count -= l;
1767 		m += l;
1768 	}
1769 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1770 	smp_wmb();
1771 	pvt->mce_in += count;
1772 
1773 	smp_rmb();
1774 	if (pvt->mce_overrun) {
1775 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1776 			      pvt->mce_overrun);
1777 		smp_wmb();
1778 		pvt->mce_overrun = 0;
1779 	}
1780 
1781 	/*
1782 	 * MCE second step: parse errors and display
1783 	 */
1784 	for (i = 0; i < count; i++)
1785 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1786 
1787 	/*
1788 	 * Now, let's increment CE error counts
1789 	 */
1790 check_ce_error:
1791 	if (!pvt->is_registered)
1792 		i7core_udimm_check_mc_ecc_err(mci);
1793 	else
1794 		i7core_rdimm_check_mc_ecc_err(mci);
1795 }
1796 
1797 /*
1798  * i7core_mce_check_error	Replicates mcelog routine to get errors
1799  *				This routine simply queues mcelog errors, and
1800  *				return. The error itself should be handled later
1801  *				by i7core_check_error.
1802  * WARNING: As this routine should be called at NMI time, extra care should
1803  * be taken to avoid deadlocks, and to be as fast as possible.
1804  */
1805 static int i7core_mce_check_error(void *priv, struct mce *mce)
1806 {
1807 	struct mem_ctl_info *mci = priv;
1808 	struct i7core_pvt *pvt = mci->pvt_info;
1809 
1810 	/*
1811 	 * Just let mcelog handle it if the error is
1812 	 * outside the memory controller
1813 	 */
1814 	if (((mce->status & 0xffff) >> 7) != 1)
1815 		return 0;
1816 
1817 	/* Bank 8 registers are the only ones that we know how to handle */
1818 	if (mce->bank != 8)
1819 		return 0;
1820 
1821 #ifdef CONFIG_SMP
1822 	/* Only handle if it is the right mc controller */
1823 	if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1824 		return 0;
1825 #endif
1826 
1827 	smp_rmb();
1828 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1829 		smp_wmb();
1830 		pvt->mce_overrun++;
1831 		return 0;
1832 	}
1833 
1834 	/* Copy memory error at the ringbuffer */
1835 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1836 	smp_wmb();
1837 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1838 
1839 	/* Handle fatal errors immediately */
1840 	if (mce->mcgstatus & 1)
1841 		i7core_check_error(mci);
1842 
1843 	/* Advice mcelog that the error were handled */
1844 	return 1;
1845 }
1846 
1847 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1848 			       int num_channels, int num_csrows)
1849 {
1850 	struct mem_ctl_info *mci;
1851 	struct i7core_pvt *pvt;
1852 	int csrow = 0;
1853 	int rc;
1854 
1855 	/* allocate a new MC control structure */
1856 	mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1857 			    i7core_dev->socket);
1858 	if (unlikely(!mci))
1859 		return -ENOMEM;
1860 
1861 	debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1862 
1863 	/* record ptr to the generic device */
1864 	mci->dev = &i7core_dev->pdev[0]->dev;
1865 
1866 	pvt = mci->pvt_info;
1867 	memset(pvt, 0, sizeof(*pvt));
1868 
1869 	/*
1870 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1871 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1872 	 * memory channels
1873 	 */
1874 	mci->mtype_cap = MEM_FLAG_DDR3;
1875 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
1876 	mci->edac_cap = EDAC_FLAG_NONE;
1877 	mci->mod_name = "i7core_edac.c";
1878 	mci->mod_ver = I7CORE_REVISION;
1879 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1880 				  i7core_dev->socket);
1881 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
1882 	mci->ctl_page_to_phys = NULL;
1883 	mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1884 	/* Set the function pointer to an actual operation function */
1885 	mci->edac_check = i7core_check_error;
1886 
1887 	/* Store pci devices at mci for faster access */
1888 	rc = mci_bind_devs(mci, i7core_dev);
1889 	if (unlikely(rc < 0))
1890 		goto fail;
1891 
1892 	/* Get dimm basic config */
1893 	get_dimm_config(mci, &csrow);
1894 
1895 	/* add this new MC control structure to EDAC's list of MCs */
1896 	if (unlikely(edac_mc_add_mc(mci))) {
1897 		debugf0("MC: " __FILE__
1898 			": %s(): failed edac_mc_add_mc()\n", __func__);
1899 		/* FIXME: perhaps some code should go here that disables error
1900 		 * reporting if we just enabled it
1901 		 */
1902 
1903 		rc = -EINVAL;
1904 		goto fail;
1905 	}
1906 
1907 	/* allocating generic PCI control info */
1908 	i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1909 						 EDAC_MOD_STR);
1910 	if (unlikely(!i7core_pci)) {
1911 		printk(KERN_WARNING
1912 			"%s(): Unable to create PCI control\n",
1913 			__func__);
1914 		printk(KERN_WARNING
1915 			"%s(): PCI error report via EDAC not setup\n",
1916 			__func__);
1917 	}
1918 
1919 	/* Default error mask is any memory */
1920 	pvt->inject.channel = 0;
1921 	pvt->inject.dimm = -1;
1922 	pvt->inject.rank = -1;
1923 	pvt->inject.bank = -1;
1924 	pvt->inject.page = -1;
1925 	pvt->inject.col = -1;
1926 
1927 	/* Registers on edac_mce in order to receive memory errors */
1928 	pvt->edac_mce.priv = mci;
1929 	pvt->edac_mce.check_error = i7core_mce_check_error;
1930 
1931 	rc = edac_mce_register(&pvt->edac_mce);
1932 	if (unlikely(rc < 0)) {
1933 		debugf0("MC: " __FILE__
1934 			": %s(): failed edac_mce_register()\n", __func__);
1935 	}
1936 
1937 fail:
1938 	if (rc < 0)
1939 		edac_mc_free(mci);
1940 	return rc;
1941 }
1942 
1943 /*
1944  *	i7core_probe	Probe for ONE instance of device to see if it is
1945  *			present.
1946  *	return:
1947  *		0 for FOUND a device
1948  *		< 0 for error code
1949  */
1950 
1951 static int probed = 0;
1952 
1953 static int __devinit i7core_probe(struct pci_dev *pdev,
1954 				  const struct pci_device_id *id)
1955 {
1956 	int rc;
1957 	struct i7core_dev *i7core_dev;
1958 
1959 	/* get the pci devices we want to reserve for our use */
1960 	mutex_lock(&i7core_edac_lock);
1961 
1962 	/*
1963 	 * All memory controllers are allocated at the first pass.
1964 	 */
1965 	if (unlikely(probed >= 1)) {
1966 		mutex_unlock(&i7core_edac_lock);
1967 		return -EINVAL;
1968 	}
1969 	probed++;
1970 
1971 	rc = i7core_get_devices(pci_dev_table);
1972 	if (unlikely(rc < 0))
1973 		goto fail0;
1974 
1975 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1976 		int channels;
1977 		int csrows;
1978 
1979 		/* Check the number of active and not disabled channels */
1980 		rc = i7core_get_active_channels(i7core_dev->socket,
1981 						&channels, &csrows);
1982 		if (unlikely(rc < 0))
1983 			goto fail1;
1984 
1985 		rc = i7core_register_mci(i7core_dev, channels, csrows);
1986 		if (unlikely(rc < 0))
1987 			goto fail1;
1988 	}
1989 
1990 	i7core_printk(KERN_INFO, "Driver loaded.\n");
1991 
1992 	mutex_unlock(&i7core_edac_lock);
1993 	return 0;
1994 
1995 fail1:
1996 	i7core_put_all_devices();
1997 fail0:
1998 	mutex_unlock(&i7core_edac_lock);
1999 	return rc;
2000 }
2001 
2002 /*
2003  *	i7core_remove	destructor for one instance of device
2004  *
2005  */
2006 static void __devexit i7core_remove(struct pci_dev *pdev)
2007 {
2008 	struct mem_ctl_info *mci;
2009 	struct i7core_dev *i7core_dev, *tmp;
2010 
2011 	debugf0(__FILE__ ": %s()\n", __func__);
2012 
2013 	if (i7core_pci)
2014 		edac_pci_release_generic_ctl(i7core_pci);
2015 
2016 	/*
2017 	 * we have a trouble here: pdev value for removal will be wrong, since
2018 	 * it will point to the X58 register used to detect that the machine
2019 	 * is a Nehalem or upper design. However, due to the way several PCI
2020 	 * devices are grouped together to provide MC functionality, we need
2021 	 * to use a different method for releasing the devices
2022 	 */
2023 
2024 	mutex_lock(&i7core_edac_lock);
2025 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2026 		mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2027 		if (mci) {
2028 			struct i7core_pvt *pvt = mci->pvt_info;
2029 
2030 			i7core_dev = pvt->i7core_dev;
2031 			edac_mce_unregister(&pvt->edac_mce);
2032 			kfree(mci->ctl_name);
2033 			edac_mc_free(mci);
2034 			i7core_put_devices(i7core_dev);
2035 		} else {
2036 			i7core_printk(KERN_ERR,
2037 				      "Couldn't find mci for socket %d\n",
2038 				      i7core_dev->socket);
2039 		}
2040 	}
2041 	probed--;
2042 
2043 	mutex_unlock(&i7core_edac_lock);
2044 }
2045 
2046 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2047 
2048 /*
2049  *	i7core_driver	pci_driver structure for this module
2050  *
2051  */
2052 static struct pci_driver i7core_driver = {
2053 	.name     = "i7core_edac",
2054 	.probe    = i7core_probe,
2055 	.remove   = __devexit_p(i7core_remove),
2056 	.id_table = i7core_pci_tbl,
2057 };
2058 
2059 /*
2060  *	i7core_init		Module entry function
2061  *			Try to initialize this module for its devices
2062  */
2063 static int __init i7core_init(void)
2064 {
2065 	int pci_rc;
2066 
2067 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2068 
2069 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2070 	opstate_init();
2071 
2072 	i7core_xeon_pci_fixup(pci_dev_table);
2073 
2074 	pci_rc = pci_register_driver(&i7core_driver);
2075 
2076 	if (pci_rc >= 0)
2077 		return 0;
2078 
2079 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2080 		      pci_rc);
2081 
2082 	return pci_rc;
2083 }
2084 
2085 /*
2086  *	i7core_exit()	Module exit function
2087  *			Unregister the driver
2088  */
2089 static void __exit i7core_exit(void)
2090 {
2091 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2092 	pci_unregister_driver(&i7core_driver);
2093 }
2094 
2095 module_init(i7core_init);
2096 module_exit(i7core_exit);
2097 
2098 MODULE_LICENSE("GPL");
2099 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2100 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2101 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2102 		   I7CORE_REVISION);
2103 
2104 module_param(edac_op_state, int, 0444);
2105 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2106