xref: /linux/drivers/edac/i7core_edac.c (revision 092e0e7e520a1fca03e13c9f2d157432a8657ff2)
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *	 Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *	http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39 
40 #include "edac_core.h"
41 
42 /*
43  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
44  * registers start at bus 255, and are not reported by BIOS.
45  * We currently find devices with only 2 sockets. In order to support more QPI
46  * Quick Path Interconnect, just increment this number.
47  */
48 #define MAX_SOCKET_BUSES	2
49 
50 
51 /*
52  * Alter this version for the module when modifications are made
53  */
54 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
55 #define EDAC_MOD_STR      "i7core_edac"
56 
57 /*
58  * Debug macros
59  */
60 #define i7core_printk(level, fmt, arg...)			\
61 	edac_printk(level, "i7core", fmt, ##arg)
62 
63 #define i7core_mc_printk(mci, level, fmt, arg...)		\
64 	edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
65 
66 /*
67  * i7core Memory Controller Registers
68  */
69 
70 	/* OFFSETS for Device 0 Function 0 */
71 
72 #define MC_CFG_CONTROL	0x90
73 
74 	/* OFFSETS for Device 3 Function 0 */
75 
76 #define MC_CONTROL	0x48
77 #define MC_STATUS	0x4c
78 #define MC_MAX_DOD	0x64
79 
80 /*
81  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
82  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
83  */
84 
85 #define MC_TEST_ERR_RCV1	0x60
86   #define DIMM2_COR_ERR(r)			((r) & 0x7fff)
87 
88 #define MC_TEST_ERR_RCV0	0x64
89   #define DIMM1_COR_ERR(r)			(((r) >> 16) & 0x7fff)
90   #define DIMM0_COR_ERR(r)			((r) & 0x7fff)
91 
92 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
93 #define MC_COR_ECC_CNT_0	0x80
94 #define MC_COR_ECC_CNT_1	0x84
95 #define MC_COR_ECC_CNT_2	0x88
96 #define MC_COR_ECC_CNT_3	0x8c
97 #define MC_COR_ECC_CNT_4	0x90
98 #define MC_COR_ECC_CNT_5	0x94
99 
100 #define DIMM_TOP_COR_ERR(r)			(((r) >> 16) & 0x7fff)
101 #define DIMM_BOT_COR_ERR(r)			((r) & 0x7fff)
102 
103 
104 	/* OFFSETS for Devices 4,5 and 6 Function 0 */
105 
106 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
107   #define THREE_DIMMS_PRESENT		(1 << 24)
108   #define SINGLE_QUAD_RANK_PRESENT	(1 << 23)
109   #define QUAD_RANK_PRESENT		(1 << 22)
110   #define REGISTERED_DIMM		(1 << 15)
111 
112 #define MC_CHANNEL_MAPPER	0x60
113   #define RDLCH(r, ch)		((((r) >> (3 + (ch * 6))) & 0x07) - 1)
114   #define WRLCH(r, ch)		((((r) >> (ch * 6)) & 0x07) - 1)
115 
116 #define MC_CHANNEL_RANK_PRESENT 0x7c
117   #define RANK_PRESENT_MASK		0xffff
118 
119 #define MC_CHANNEL_ADDR_MATCH	0xf0
120 #define MC_CHANNEL_ERROR_MASK	0xf8
121 #define MC_CHANNEL_ERROR_INJECT	0xfc
122   #define INJECT_ADDR_PARITY	0x10
123   #define INJECT_ECC		0x08
124   #define MASK_CACHELINE	0x06
125   #define MASK_FULL_CACHELINE	0x06
126   #define MASK_MSB32_CACHELINE	0x04
127   #define MASK_LSB32_CACHELINE	0x02
128   #define NO_MASK_CACHELINE	0x00
129   #define REPEAT_EN		0x01
130 
131 	/* OFFSETS for Devices 4,5 and 6 Function 1 */
132 
133 #define MC_DOD_CH_DIMM0		0x48
134 #define MC_DOD_CH_DIMM1		0x4c
135 #define MC_DOD_CH_DIMM2		0x50
136   #define RANKOFFSET_MASK	((1 << 12) | (1 << 11) | (1 << 10))
137   #define RANKOFFSET(x)		((x & RANKOFFSET_MASK) >> 10)
138   #define DIMM_PRESENT_MASK	(1 << 9)
139   #define DIMM_PRESENT(x)	(((x) & DIMM_PRESENT_MASK) >> 9)
140   #define MC_DOD_NUMBANK_MASK		((1 << 8) | (1 << 7))
141   #define MC_DOD_NUMBANK(x)		(((x) & MC_DOD_NUMBANK_MASK) >> 7)
142   #define MC_DOD_NUMRANK_MASK		((1 << 6) | (1 << 5))
143   #define MC_DOD_NUMRANK(x)		(((x) & MC_DOD_NUMRANK_MASK) >> 5)
144   #define MC_DOD_NUMROW_MASK		((1 << 4) | (1 << 3) | (1 << 2))
145   #define MC_DOD_NUMROW(x)		(((x) & MC_DOD_NUMROW_MASK) >> 2)
146   #define MC_DOD_NUMCOL_MASK		3
147   #define MC_DOD_NUMCOL(x)		((x) & MC_DOD_NUMCOL_MASK)
148 
149 #define MC_RANK_PRESENT		0x7c
150 
151 #define MC_SAG_CH_0	0x80
152 #define MC_SAG_CH_1	0x84
153 #define MC_SAG_CH_2	0x88
154 #define MC_SAG_CH_3	0x8c
155 #define MC_SAG_CH_4	0x90
156 #define MC_SAG_CH_5	0x94
157 #define MC_SAG_CH_6	0x98
158 #define MC_SAG_CH_7	0x9c
159 
160 #define MC_RIR_LIMIT_CH_0	0x40
161 #define MC_RIR_LIMIT_CH_1	0x44
162 #define MC_RIR_LIMIT_CH_2	0x48
163 #define MC_RIR_LIMIT_CH_3	0x4C
164 #define MC_RIR_LIMIT_CH_4	0x50
165 #define MC_RIR_LIMIT_CH_5	0x54
166 #define MC_RIR_LIMIT_CH_6	0x58
167 #define MC_RIR_LIMIT_CH_7	0x5C
168 #define MC_RIR_LIMIT_MASK	((1 << 10) - 1)
169 
170 #define MC_RIR_WAY_CH		0x80
171   #define MC_RIR_WAY_OFFSET_MASK	(((1 << 14) - 1) & ~0x7)
172   #define MC_RIR_WAY_RANK_MASK		0x7
173 
174 /*
175  * i7core structs
176  */
177 
178 #define NUM_CHANS 3
179 #define MAX_DIMMS 3		/* Max DIMMS per channel */
180 #define MAX_MCR_FUNC  4
181 #define MAX_CHAN_FUNC 3
182 
183 struct i7core_info {
184 	u32	mc_control;
185 	u32	mc_status;
186 	u32	max_dod;
187 	u32	ch_map;
188 };
189 
190 
191 struct i7core_inject {
192 	int	enable;
193 
194 	u32	section;
195 	u32	type;
196 	u32	eccmask;
197 
198 	/* Error address mask */
199 	int channel, dimm, rank, bank, page, col;
200 };
201 
202 struct i7core_channel {
203 	u32		ranks;
204 	u32		dimms;
205 };
206 
207 struct pci_id_descr {
208 	int			dev;
209 	int			func;
210 	int 			dev_id;
211 	int			optional;
212 };
213 
214 struct pci_id_table {
215 	struct pci_id_descr	*descr;
216 	int			n_devs;
217 };
218 
219 struct i7core_dev {
220 	struct list_head	list;
221 	u8			socket;
222 	struct pci_dev		**pdev;
223 	int			n_devs;
224 	struct mem_ctl_info	*mci;
225 };
226 
227 struct i7core_pvt {
228 	struct pci_dev	*pci_noncore;
229 	struct pci_dev	*pci_mcr[MAX_MCR_FUNC + 1];
230 	struct pci_dev	*pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
231 
232 	struct i7core_dev *i7core_dev;
233 
234 	struct i7core_info	info;
235 	struct i7core_inject	inject;
236 	struct i7core_channel	channel[NUM_CHANS];
237 
238 	int		channels; /* Number of active channels */
239 
240 	int		ce_count_available;
241 	int 		csrow_map[NUM_CHANS][MAX_DIMMS];
242 
243 			/* ECC corrected errors counts per udimm */
244 	unsigned long	udimm_ce_count[MAX_DIMMS];
245 	int		udimm_last_ce_count[MAX_DIMMS];
246 			/* ECC corrected errors counts per rdimm */
247 	unsigned long	rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
248 	int		rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
249 
250 	unsigned int	is_registered;
251 
252 	/* mcelog glue */
253 	struct edac_mce		edac_mce;
254 
255 	/* Fifo double buffers */
256 	struct mce		mce_entry[MCE_LOG_LEN];
257 	struct mce		mce_outentry[MCE_LOG_LEN];
258 
259 	/* Fifo in/out counters */
260 	unsigned		mce_in, mce_out;
261 
262 	/* Count indicator to show errors not got */
263 	unsigned		mce_overrun;
264 };
265 
266 /* Static vars */
267 static LIST_HEAD(i7core_edac_list);
268 static DEFINE_MUTEX(i7core_edac_lock);
269 
270 #define PCI_DESCR(device, function, device_id)	\
271 	.dev = (device),			\
272 	.func = (function),			\
273 	.dev_id = (device_id)
274 
275 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
276 		/* Memory controller */
277 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
278 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
279 			/* Exists only for RDIMM */
280 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
281 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
282 
283 		/* Channel 0 */
284 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
285 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
286 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
287 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
288 
289 		/* Channel 1 */
290 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
291 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
292 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
293 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
294 
295 		/* Channel 2 */
296 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
297 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
298 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
299 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
300 
301 		/* Generic Non-core registers */
302 	/*
303 	 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
304 	 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
305 	 * the probing code needs to test for the other address in case of
306 	 * failure of this one
307 	 */
308 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
309 
310 };
311 
312 struct pci_id_descr pci_dev_descr_lynnfield[] = {
313 	{ PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
314 	{ PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
315 	{ PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
316 
317 	{ PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
318 	{ PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
319 	{ PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
320 	{ PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
321 
322 	{ PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
323 	{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
324 	{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
325 	{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
326 
327 	/*
328 	 * This is the PCI device has an alternate address on some
329 	 * processors like Core i7 860
330 	 */
331 	{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
332 };
333 
334 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
335 		/* Memory controller */
336 	{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
337 	{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
338 			/* Exists only for RDIMM */
339 	{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
340 	{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
341 
342 		/* Channel 0 */
343 	{ PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
344 	{ PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
345 	{ PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
346 	{ PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
347 
348 		/* Channel 1 */
349 	{ PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
350 	{ PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
351 	{ PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
352 	{ PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
353 
354 		/* Channel 2 */
355 	{ PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
356 	{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
357 	{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
358 	{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
359 
360 		/* Generic Non-core registers */
361 	{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
362 
363 };
364 
365 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
366 struct pci_id_table pci_dev_table[] = {
367 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
368 	PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
369 	PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
370 };
371 
372 /*
373  *	pci_device_id	table for which devices we are looking for
374  */
375 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
376 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
377 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
378 	{0,}			/* 0 terminated list. */
379 };
380 
381 static struct edac_pci_ctl_info *i7core_pci;
382 
383 /****************************************************************************
384 			Anciliary status routines
385  ****************************************************************************/
386 
387 	/* MC_CONTROL bits */
388 #define CH_ACTIVE(pvt, ch)	((pvt)->info.mc_control & (1 << (8 + ch)))
389 #define ECCx8(pvt)		((pvt)->info.mc_control & (1 << 1))
390 
391 	/* MC_STATUS bits */
392 #define ECC_ENABLED(pvt)	((pvt)->info.mc_status & (1 << 4))
393 #define CH_DISABLED(pvt, ch)	((pvt)->info.mc_status & (1 << ch))
394 
395 	/* MC_MAX_DOD read functions */
396 static inline int numdimms(u32 dimms)
397 {
398 	return (dimms & 0x3) + 1;
399 }
400 
401 static inline int numrank(u32 rank)
402 {
403 	static int ranks[4] = { 1, 2, 4, -EINVAL };
404 
405 	return ranks[rank & 0x3];
406 }
407 
408 static inline int numbank(u32 bank)
409 {
410 	static int banks[4] = { 4, 8, 16, -EINVAL };
411 
412 	return banks[bank & 0x3];
413 }
414 
415 static inline int numrow(u32 row)
416 {
417 	static int rows[8] = {
418 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
419 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
420 	};
421 
422 	return rows[row & 0x7];
423 }
424 
425 static inline int numcol(u32 col)
426 {
427 	static int cols[8] = {
428 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
429 	};
430 	return cols[col & 0x3];
431 }
432 
433 static struct i7core_dev *get_i7core_dev(u8 socket)
434 {
435 	struct i7core_dev *i7core_dev;
436 
437 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
438 		if (i7core_dev->socket == socket)
439 			return i7core_dev;
440 	}
441 
442 	return NULL;
443 }
444 
445 /****************************************************************************
446 			Memory check routines
447  ****************************************************************************/
448 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
449 					  unsigned func)
450 {
451 	struct i7core_dev *i7core_dev = get_i7core_dev(socket);
452 	int i;
453 
454 	if (!i7core_dev)
455 		return NULL;
456 
457 	for (i = 0; i < i7core_dev->n_devs; i++) {
458 		if (!i7core_dev->pdev[i])
459 			continue;
460 
461 		if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
462 		    PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
463 			return i7core_dev->pdev[i];
464 		}
465 	}
466 
467 	return NULL;
468 }
469 
470 /**
471  * i7core_get_active_channels() - gets the number of channels and csrows
472  * @socket:	Quick Path Interconnect socket
473  * @channels:	Number of channels that will be returned
474  * @csrows:	Number of csrows found
475  *
476  * Since EDAC core needs to know in advance the number of available channels
477  * and csrows, in order to allocate memory for csrows/channels, it is needed
478  * to run two similar steps. At the first step, implemented on this function,
479  * it checks the number of csrows/channels present at one socket.
480  * this is used in order to properly allocate the size of mci components.
481  *
482  * It should be noticed that none of the current available datasheets explain
483  * or even mention how csrows are seen by the memory controller. So, we need
484  * to add a fake description for csrows.
485  * So, this driver is attributing one DIMM memory for one csrow.
486  */
487 static int i7core_get_active_channels(u8 socket, unsigned *channels,
488 				      unsigned *csrows)
489 {
490 	struct pci_dev *pdev = NULL;
491 	int i, j;
492 	u32 status, control;
493 
494 	*channels = 0;
495 	*csrows = 0;
496 
497 	pdev = get_pdev_slot_func(socket, 3, 0);
498 	if (!pdev) {
499 		i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
500 			      socket);
501 		return -ENODEV;
502 	}
503 
504 	/* Device 3 function 0 reads */
505 	pci_read_config_dword(pdev, MC_STATUS, &status);
506 	pci_read_config_dword(pdev, MC_CONTROL, &control);
507 
508 	for (i = 0; i < NUM_CHANS; i++) {
509 		u32 dimm_dod[3];
510 		/* Check if the channel is active */
511 		if (!(control & (1 << (8 + i))))
512 			continue;
513 
514 		/* Check if the channel is disabled */
515 		if (status & (1 << i))
516 			continue;
517 
518 		pdev = get_pdev_slot_func(socket, i + 4, 1);
519 		if (!pdev) {
520 			i7core_printk(KERN_ERR, "Couldn't find socket %d "
521 						"fn %d.%d!!!\n",
522 						socket, i + 4, 1);
523 			return -ENODEV;
524 		}
525 		/* Devices 4-6 function 1 */
526 		pci_read_config_dword(pdev,
527 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
528 		pci_read_config_dword(pdev,
529 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
530 		pci_read_config_dword(pdev,
531 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
532 
533 		(*channels)++;
534 
535 		for (j = 0; j < 3; j++) {
536 			if (!DIMM_PRESENT(dimm_dod[j]))
537 				continue;
538 			(*csrows)++;
539 		}
540 	}
541 
542 	debugf0("Number of active channels on socket %d: %d\n",
543 		socket, *channels);
544 
545 	return 0;
546 }
547 
548 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
549 {
550 	struct i7core_pvt *pvt = mci->pvt_info;
551 	struct csrow_info *csr;
552 	struct pci_dev *pdev;
553 	int i, j;
554 	unsigned long last_page = 0;
555 	enum edac_type mode;
556 	enum mem_type mtype;
557 
558 	/* Get data from the MC register, function 0 */
559 	pdev = pvt->pci_mcr[0];
560 	if (!pdev)
561 		return -ENODEV;
562 
563 	/* Device 3 function 0 reads */
564 	pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
565 	pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
566 	pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
567 	pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
568 
569 	debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
570 		pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
571 		pvt->info.max_dod, pvt->info.ch_map);
572 
573 	if (ECC_ENABLED(pvt)) {
574 		debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
575 		if (ECCx8(pvt))
576 			mode = EDAC_S8ECD8ED;
577 		else
578 			mode = EDAC_S4ECD4ED;
579 	} else {
580 		debugf0("ECC disabled\n");
581 		mode = EDAC_NONE;
582 	}
583 
584 	/* FIXME: need to handle the error codes */
585 	debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
586 		"x%x x 0x%x\n",
587 		numdimms(pvt->info.max_dod),
588 		numrank(pvt->info.max_dod >> 2),
589 		numbank(pvt->info.max_dod >> 4),
590 		numrow(pvt->info.max_dod >> 6),
591 		numcol(pvt->info.max_dod >> 9));
592 
593 	for (i = 0; i < NUM_CHANS; i++) {
594 		u32 data, dimm_dod[3], value[8];
595 
596 		if (!pvt->pci_ch[i][0])
597 			continue;
598 
599 		if (!CH_ACTIVE(pvt, i)) {
600 			debugf0("Channel %i is not active\n", i);
601 			continue;
602 		}
603 		if (CH_DISABLED(pvt, i)) {
604 			debugf0("Channel %i is disabled\n", i);
605 			continue;
606 		}
607 
608 		/* Devices 4-6 function 0 */
609 		pci_read_config_dword(pvt->pci_ch[i][0],
610 				MC_CHANNEL_DIMM_INIT_PARAMS, &data);
611 
612 		pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
613 						4 : 2;
614 
615 		if (data & REGISTERED_DIMM)
616 			mtype = MEM_RDDR3;
617 		else
618 			mtype = MEM_DDR3;
619 #if 0
620 		if (data & THREE_DIMMS_PRESENT)
621 			pvt->channel[i].dimms = 3;
622 		else if (data & SINGLE_QUAD_RANK_PRESENT)
623 			pvt->channel[i].dimms = 1;
624 		else
625 			pvt->channel[i].dimms = 2;
626 #endif
627 
628 		/* Devices 4-6 function 1 */
629 		pci_read_config_dword(pvt->pci_ch[i][1],
630 				MC_DOD_CH_DIMM0, &dimm_dod[0]);
631 		pci_read_config_dword(pvt->pci_ch[i][1],
632 				MC_DOD_CH_DIMM1, &dimm_dod[1]);
633 		pci_read_config_dword(pvt->pci_ch[i][1],
634 				MC_DOD_CH_DIMM2, &dimm_dod[2]);
635 
636 		debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
637 			"%d ranks, %cDIMMs\n",
638 			i,
639 			RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
640 			data,
641 			pvt->channel[i].ranks,
642 			(data & REGISTERED_DIMM) ? 'R' : 'U');
643 
644 		for (j = 0; j < 3; j++) {
645 			u32 banks, ranks, rows, cols;
646 			u32 size, npages;
647 
648 			if (!DIMM_PRESENT(dimm_dod[j]))
649 				continue;
650 
651 			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
652 			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
653 			rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
654 			cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
655 
656 			/* DDR3 has 8 I/O banks */
657 			size = (rows * cols * banks * ranks) >> (20 - 3);
658 
659 			pvt->channel[i].dimms++;
660 
661 			debugf0("\tdimm %d %d Mb offset: %x, "
662 				"bank: %d, rank: %d, row: %#x, col: %#x\n",
663 				j, size,
664 				RANKOFFSET(dimm_dod[j]),
665 				banks, ranks, rows, cols);
666 
667 #if PAGE_SHIFT > 20
668 			npages = size >> (PAGE_SHIFT - 20);
669 #else
670 			npages = size << (20 - PAGE_SHIFT);
671 #endif
672 
673 			csr = &mci->csrows[*csrow];
674 			csr->first_page = last_page + 1;
675 			last_page += npages;
676 			csr->last_page = last_page;
677 			csr->nr_pages = npages;
678 
679 			csr->page_mask = 0;
680 			csr->grain = 8;
681 			csr->csrow_idx = *csrow;
682 			csr->nr_channels = 1;
683 
684 			csr->channels[0].chan_idx = i;
685 			csr->channels[0].ce_count = 0;
686 
687 			pvt->csrow_map[i][j] = *csrow;
688 
689 			switch (banks) {
690 			case 4:
691 				csr->dtype = DEV_X4;
692 				break;
693 			case 8:
694 				csr->dtype = DEV_X8;
695 				break;
696 			case 16:
697 				csr->dtype = DEV_X16;
698 				break;
699 			default:
700 				csr->dtype = DEV_UNKNOWN;
701 			}
702 
703 			csr->edac_mode = mode;
704 			csr->mtype = mtype;
705 
706 			(*csrow)++;
707 		}
708 
709 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
710 		pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
711 		pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
712 		pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
713 		pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
714 		pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
715 		pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
716 		pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
717 		debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
718 		for (j = 0; j < 8; j++)
719 			debugf1("\t\t%#x\t%#x\t%#x\n",
720 				(value[j] >> 27) & 0x1,
721 				(value[j] >> 24) & 0x7,
722 				(value[j] && ((1 << 24) - 1)));
723 	}
724 
725 	return 0;
726 }
727 
728 /****************************************************************************
729 			Error insertion routines
730  ****************************************************************************/
731 
732 /* The i7core has independent error injection features per channel.
733    However, to have a simpler code, we don't allow enabling error injection
734    on more than one channel.
735    Also, since a change at an inject parameter will be applied only at enable,
736    we're disabling error injection on all write calls to the sysfs nodes that
737    controls the error code injection.
738  */
739 static int disable_inject(struct mem_ctl_info *mci)
740 {
741 	struct i7core_pvt *pvt = mci->pvt_info;
742 
743 	pvt->inject.enable = 0;
744 
745 	if (!pvt->pci_ch[pvt->inject.channel][0])
746 		return -ENODEV;
747 
748 	pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
749 				MC_CHANNEL_ERROR_INJECT, 0);
750 
751 	return 0;
752 }
753 
754 /*
755  * i7core inject inject.section
756  *
757  *	accept and store error injection inject.section value
758  *	bit 0 - refers to the lower 32-byte half cacheline
759  *	bit 1 - refers to the upper 32-byte half cacheline
760  */
761 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
762 					   const char *data, size_t count)
763 {
764 	struct i7core_pvt *pvt = mci->pvt_info;
765 	unsigned long value;
766 	int rc;
767 
768 	if (pvt->inject.enable)
769 		disable_inject(mci);
770 
771 	rc = strict_strtoul(data, 10, &value);
772 	if ((rc < 0) || (value > 3))
773 		return -EIO;
774 
775 	pvt->inject.section = (u32) value;
776 	return count;
777 }
778 
779 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
780 					      char *data)
781 {
782 	struct i7core_pvt *pvt = mci->pvt_info;
783 	return sprintf(data, "0x%08x\n", pvt->inject.section);
784 }
785 
786 /*
787  * i7core inject.type
788  *
789  *	accept and store error injection inject.section value
790  *	bit 0 - repeat enable - Enable error repetition
791  *	bit 1 - inject ECC error
792  *	bit 2 - inject parity error
793  */
794 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
795 					const char *data, size_t count)
796 {
797 	struct i7core_pvt *pvt = mci->pvt_info;
798 	unsigned long value;
799 	int rc;
800 
801 	if (pvt->inject.enable)
802 		disable_inject(mci);
803 
804 	rc = strict_strtoul(data, 10, &value);
805 	if ((rc < 0) || (value > 7))
806 		return -EIO;
807 
808 	pvt->inject.type = (u32) value;
809 	return count;
810 }
811 
812 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
813 					      char *data)
814 {
815 	struct i7core_pvt *pvt = mci->pvt_info;
816 	return sprintf(data, "0x%08x\n", pvt->inject.type);
817 }
818 
819 /*
820  * i7core_inject_inject.eccmask_store
821  *
822  * The type of error (UE/CE) will depend on the inject.eccmask value:
823  *   Any bits set to a 1 will flip the corresponding ECC bit
824  *   Correctable errors can be injected by flipping 1 bit or the bits within
825  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
826  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
827  *   uncorrectable error to be injected.
828  */
829 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
830 					const char *data, size_t count)
831 {
832 	struct i7core_pvt *pvt = mci->pvt_info;
833 	unsigned long value;
834 	int rc;
835 
836 	if (pvt->inject.enable)
837 		disable_inject(mci);
838 
839 	rc = strict_strtoul(data, 10, &value);
840 	if (rc < 0)
841 		return -EIO;
842 
843 	pvt->inject.eccmask = (u32) value;
844 	return count;
845 }
846 
847 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
848 					      char *data)
849 {
850 	struct i7core_pvt *pvt = mci->pvt_info;
851 	return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
852 }
853 
854 /*
855  * i7core_addrmatch
856  *
857  * The type of error (UE/CE) will depend on the inject.eccmask value:
858  *   Any bits set to a 1 will flip the corresponding ECC bit
859  *   Correctable errors can be injected by flipping 1 bit or the bits within
860  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
861  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
862  *   uncorrectable error to be injected.
863  */
864 
865 #define DECLARE_ADDR_MATCH(param, limit)			\
866 static ssize_t i7core_inject_store_##param(			\
867 		struct mem_ctl_info *mci,			\
868 		const char *data, size_t count)			\
869 {								\
870 	struct i7core_pvt *pvt;					\
871 	long value;						\
872 	int rc;							\
873 								\
874 	debugf1("%s()\n", __func__);				\
875 	pvt = mci->pvt_info;					\
876 								\
877 	if (pvt->inject.enable)					\
878 		disable_inject(mci);				\
879 								\
880 	if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
881 		value = -1;					\
882 	else {							\
883 		rc = strict_strtoul(data, 10, &value);		\
884 		if ((rc < 0) || (value >= limit))		\
885 			return -EIO;				\
886 	}							\
887 								\
888 	pvt->inject.param = value;				\
889 								\
890 	return count;						\
891 }								\
892 								\
893 static ssize_t i7core_inject_show_##param(			\
894 		struct mem_ctl_info *mci,			\
895 		char *data)					\
896 {								\
897 	struct i7core_pvt *pvt;					\
898 								\
899 	pvt = mci->pvt_info;					\
900 	debugf1("%s() pvt=%p\n", __func__, pvt);		\
901 	if (pvt->inject.param < 0)				\
902 		return sprintf(data, "any\n");			\
903 	else							\
904 		return sprintf(data, "%d\n", pvt->inject.param);\
905 }
906 
907 #define ATTR_ADDR_MATCH(param)					\
908 	{							\
909 		.attr = {					\
910 			.name = #param,				\
911 			.mode = (S_IRUGO | S_IWUSR)		\
912 		},						\
913 		.show  = i7core_inject_show_##param,		\
914 		.store = i7core_inject_store_##param,		\
915 	}
916 
917 DECLARE_ADDR_MATCH(channel, 3);
918 DECLARE_ADDR_MATCH(dimm, 3);
919 DECLARE_ADDR_MATCH(rank, 4);
920 DECLARE_ADDR_MATCH(bank, 32);
921 DECLARE_ADDR_MATCH(page, 0x10000);
922 DECLARE_ADDR_MATCH(col, 0x4000);
923 
924 static int write_and_test(struct pci_dev *dev, int where, u32 val)
925 {
926 	u32 read;
927 	int count;
928 
929 	debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
930 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
931 		where, val);
932 
933 	for (count = 0; count < 10; count++) {
934 		if (count)
935 			msleep(100);
936 		pci_write_config_dword(dev, where, val);
937 		pci_read_config_dword(dev, where, &read);
938 
939 		if (read == val)
940 			return 0;
941 	}
942 
943 	i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
944 		"write=%08x. Read=%08x\n",
945 		dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
946 		where, val, read);
947 
948 	return -EINVAL;
949 }
950 
951 /*
952  * This routine prepares the Memory Controller for error injection.
953  * The error will be injected when some process tries to write to the
954  * memory that matches the given criteria.
955  * The criteria can be set in terms of a mask where dimm, rank, bank, page
956  * and col can be specified.
957  * A -1 value for any of the mask items will make the MCU to ignore
958  * that matching criteria for error injection.
959  *
960  * It should be noticed that the error will only happen after a write operation
961  * on a memory that matches the condition. if REPEAT_EN is not enabled at
962  * inject mask, then it will produce just one error. Otherwise, it will repeat
963  * until the injectmask would be cleaned.
964  *
965  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
966  *    is reliable enough to check if the MC is using the
967  *    three channels. However, this is not clear at the datasheet.
968  */
969 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
970 				       const char *data, size_t count)
971 {
972 	struct i7core_pvt *pvt = mci->pvt_info;
973 	u32 injectmask;
974 	u64 mask = 0;
975 	int  rc;
976 	long enable;
977 
978 	if (!pvt->pci_ch[pvt->inject.channel][0])
979 		return 0;
980 
981 	rc = strict_strtoul(data, 10, &enable);
982 	if ((rc < 0))
983 		return 0;
984 
985 	if (enable) {
986 		pvt->inject.enable = 1;
987 	} else {
988 		disable_inject(mci);
989 		return count;
990 	}
991 
992 	/* Sets pvt->inject.dimm mask */
993 	if (pvt->inject.dimm < 0)
994 		mask |= 1LL << 41;
995 	else {
996 		if (pvt->channel[pvt->inject.channel].dimms > 2)
997 			mask |= (pvt->inject.dimm & 0x3LL) << 35;
998 		else
999 			mask |= (pvt->inject.dimm & 0x1LL) << 36;
1000 	}
1001 
1002 	/* Sets pvt->inject.rank mask */
1003 	if (pvt->inject.rank < 0)
1004 		mask |= 1LL << 40;
1005 	else {
1006 		if (pvt->channel[pvt->inject.channel].dimms > 2)
1007 			mask |= (pvt->inject.rank & 0x1LL) << 34;
1008 		else
1009 			mask |= (pvt->inject.rank & 0x3LL) << 34;
1010 	}
1011 
1012 	/* Sets pvt->inject.bank mask */
1013 	if (pvt->inject.bank < 0)
1014 		mask |= 1LL << 39;
1015 	else
1016 		mask |= (pvt->inject.bank & 0x15LL) << 30;
1017 
1018 	/* Sets pvt->inject.page mask */
1019 	if (pvt->inject.page < 0)
1020 		mask |= 1LL << 38;
1021 	else
1022 		mask |= (pvt->inject.page & 0xffff) << 14;
1023 
1024 	/* Sets pvt->inject.column mask */
1025 	if (pvt->inject.col < 0)
1026 		mask |= 1LL << 37;
1027 	else
1028 		mask |= (pvt->inject.col & 0x3fff);
1029 
1030 	/*
1031 	 * bit    0: REPEAT_EN
1032 	 * bits 1-2: MASK_HALF_CACHELINE
1033 	 * bit    3: INJECT_ECC
1034 	 * bit    4: INJECT_ADDR_PARITY
1035 	 */
1036 
1037 	injectmask = (pvt->inject.type & 1) |
1038 		     (pvt->inject.section & 0x3) << 1 |
1039 		     (pvt->inject.type & 0x6) << (3 - 1);
1040 
1041 	/* Unlock writes to registers - this register is write only */
1042 	pci_write_config_dword(pvt->pci_noncore,
1043 			       MC_CFG_CONTROL, 0x2);
1044 
1045 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1046 			       MC_CHANNEL_ADDR_MATCH, mask);
1047 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1048 			       MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1049 
1050 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1051 			       MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1052 
1053 	write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1054 			       MC_CHANNEL_ERROR_INJECT, injectmask);
1055 
1056 	/*
1057 	 * This is something undocumented, based on my tests
1058 	 * Without writing 8 to this register, errors aren't injected. Not sure
1059 	 * why.
1060 	 */
1061 	pci_write_config_dword(pvt->pci_noncore,
1062 			       MC_CFG_CONTROL, 8);
1063 
1064 	debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1065 		" inject 0x%08x\n",
1066 		mask, pvt->inject.eccmask, injectmask);
1067 
1068 
1069 	return count;
1070 }
1071 
1072 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1073 					char *data)
1074 {
1075 	struct i7core_pvt *pvt = mci->pvt_info;
1076 	u32 injectmask;
1077 
1078 	if (!pvt->pci_ch[pvt->inject.channel][0])
1079 		return 0;
1080 
1081 	pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1082 			       MC_CHANNEL_ERROR_INJECT, &injectmask);
1083 
1084 	debugf0("Inject error read: 0x%018x\n", injectmask);
1085 
1086 	if (injectmask & 0x0c)
1087 		pvt->inject.enable = 1;
1088 
1089 	return sprintf(data, "%d\n", pvt->inject.enable);
1090 }
1091 
1092 #define DECLARE_COUNTER(param)					\
1093 static ssize_t i7core_show_counter_##param(			\
1094 		struct mem_ctl_info *mci,			\
1095 		char *data)					\
1096 {								\
1097 	struct i7core_pvt *pvt = mci->pvt_info;			\
1098 								\
1099 	debugf1("%s() \n", __func__);				\
1100 	if (!pvt->ce_count_available || (pvt->is_registered))	\
1101 		return sprintf(data, "data unavailable\n");	\
1102 	return sprintf(data, "%lu\n",				\
1103 			pvt->udimm_ce_count[param]);		\
1104 }
1105 
1106 #define ATTR_COUNTER(param)					\
1107 	{							\
1108 		.attr = {					\
1109 			.name = __stringify(udimm##param),	\
1110 			.mode = (S_IRUGO | S_IWUSR)		\
1111 		},						\
1112 		.show  = i7core_show_counter_##param		\
1113 	}
1114 
1115 DECLARE_COUNTER(0);
1116 DECLARE_COUNTER(1);
1117 DECLARE_COUNTER(2);
1118 
1119 /*
1120  * Sysfs struct
1121  */
1122 
1123 
1124 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1125 	ATTR_ADDR_MATCH(channel),
1126 	ATTR_ADDR_MATCH(dimm),
1127 	ATTR_ADDR_MATCH(rank),
1128 	ATTR_ADDR_MATCH(bank),
1129 	ATTR_ADDR_MATCH(page),
1130 	ATTR_ADDR_MATCH(col),
1131 	{ .attr = { .name = NULL } }
1132 };
1133 
1134 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1135 	.name  = "inject_addrmatch",
1136 	.mcidev_attr = i7core_addrmatch_attrs,
1137 };
1138 
1139 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1140 	ATTR_COUNTER(0),
1141 	ATTR_COUNTER(1),
1142 	ATTR_COUNTER(2),
1143 	{ .attr = { .name = NULL } }
1144 };
1145 
1146 static struct mcidev_sysfs_group i7core_udimm_counters = {
1147 	.name  = "all_channel_counts",
1148 	.mcidev_attr = i7core_udimm_counters_attrs,
1149 };
1150 
1151 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1152 	{
1153 		.attr = {
1154 			.name = "inject_section",
1155 			.mode = (S_IRUGO | S_IWUSR)
1156 		},
1157 		.show  = i7core_inject_section_show,
1158 		.store = i7core_inject_section_store,
1159 	}, {
1160 		.attr = {
1161 			.name = "inject_type",
1162 			.mode = (S_IRUGO | S_IWUSR)
1163 		},
1164 		.show  = i7core_inject_type_show,
1165 		.store = i7core_inject_type_store,
1166 	}, {
1167 		.attr = {
1168 			.name = "inject_eccmask",
1169 			.mode = (S_IRUGO | S_IWUSR)
1170 		},
1171 		.show  = i7core_inject_eccmask_show,
1172 		.store = i7core_inject_eccmask_store,
1173 	}, {
1174 		.grp = &i7core_inject_addrmatch,
1175 	}, {
1176 		.attr = {
1177 			.name = "inject_enable",
1178 			.mode = (S_IRUGO | S_IWUSR)
1179 		},
1180 		.show  = i7core_inject_enable_show,
1181 		.store = i7core_inject_enable_store,
1182 	},
1183 	{ .attr = { .name = NULL } },	/* Reserved for udimm counters */
1184 	{ .attr = { .name = NULL } }
1185 };
1186 
1187 /****************************************************************************
1188 	Device initialization routines: put/get, init/exit
1189  ****************************************************************************/
1190 
1191 /*
1192  *	i7core_put_devices	'put' all the devices that we have
1193  *				reserved via 'get'
1194  */
1195 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1196 {
1197 	int i;
1198 
1199 	debugf0(__FILE__ ": %s()\n", __func__);
1200 	for (i = 0; i < i7core_dev->n_devs; i++) {
1201 		struct pci_dev *pdev = i7core_dev->pdev[i];
1202 		if (!pdev)
1203 			continue;
1204 		debugf0("Removing dev %02x:%02x.%d\n",
1205 			pdev->bus->number,
1206 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1207 		pci_dev_put(pdev);
1208 	}
1209 	kfree(i7core_dev->pdev);
1210 	list_del(&i7core_dev->list);
1211 	kfree(i7core_dev);
1212 }
1213 
1214 static void i7core_put_all_devices(void)
1215 {
1216 	struct i7core_dev *i7core_dev, *tmp;
1217 
1218 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1219 		i7core_put_devices(i7core_dev);
1220 }
1221 
1222 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1223 {
1224 	struct pci_dev *pdev = NULL;
1225 	int i;
1226 	/*
1227 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1228 	 * aren't announced by acpi. So, we need to use a legacy scan probing
1229 	 * to detect them
1230 	 */
1231 	while (table && table->descr) {
1232 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1233 		if (unlikely(!pdev)) {
1234 			for (i = 0; i < MAX_SOCKET_BUSES; i++)
1235 				pcibios_scan_specific_bus(255-i);
1236 		}
1237 		pci_dev_put(pdev);
1238 		table++;
1239 	}
1240 }
1241 
1242 static unsigned i7core_pci_lastbus(void)
1243 {
1244 	int last_bus = 0, bus;
1245 	struct pci_bus *b = NULL;
1246 
1247 	while ((b = pci_find_next_bus(b)) != NULL) {
1248 		bus = b->number;
1249 		debugf0("Found bus %d\n", bus);
1250 		if (bus > last_bus)
1251 			last_bus = bus;
1252 	}
1253 
1254 	debugf0("Last bus %d\n", last_bus);
1255 
1256 	return last_bus;
1257 }
1258 
1259 /*
1260  *	i7core_get_devices	Find and perform 'get' operation on the MCH's
1261  *			device/functions we want to reference for this driver
1262  *
1263  *			Need to 'get' device 16 func 1 and func 2
1264  */
1265 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1266 			 struct pci_id_descr *dev_descr, unsigned n_devs,
1267 			 unsigned last_bus)
1268 {
1269 	struct i7core_dev *i7core_dev;
1270 
1271 	struct pci_dev *pdev = NULL;
1272 	u8 bus = 0;
1273 	u8 socket = 0;
1274 
1275 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1276 			      dev_descr->dev_id, *prev);
1277 
1278 	/*
1279 	 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1280 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1281 	 * to probe for the alternate address in case of failure
1282 	 */
1283 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1284 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1285 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1286 
1287 	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1288 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1289 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1290 				      *prev);
1291 
1292 	if (!pdev) {
1293 		if (*prev) {
1294 			*prev = pdev;
1295 			return 0;
1296 		}
1297 
1298 		if (dev_descr->optional)
1299 			return 0;
1300 
1301 		if (devno == 0)
1302 			return -ENODEV;
1303 
1304 		i7core_printk(KERN_INFO,
1305 			"Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1306 			dev_descr->dev, dev_descr->func,
1307 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1308 
1309 		/* End of list, leave */
1310 		return -ENODEV;
1311 	}
1312 	bus = pdev->bus->number;
1313 
1314 	socket = last_bus - bus;
1315 
1316 	i7core_dev = get_i7core_dev(socket);
1317 	if (!i7core_dev) {
1318 		i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1319 		if (!i7core_dev)
1320 			return -ENOMEM;
1321 		i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1322 					   GFP_KERNEL);
1323 		if (!i7core_dev->pdev) {
1324 			kfree(i7core_dev);
1325 			return -ENOMEM;
1326 		}
1327 		i7core_dev->socket = socket;
1328 		i7core_dev->n_devs = n_devs;
1329 		list_add_tail(&i7core_dev->list, &i7core_edac_list);
1330 	}
1331 
1332 	if (i7core_dev->pdev[devno]) {
1333 		i7core_printk(KERN_ERR,
1334 			"Duplicated device for "
1335 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1336 			bus, dev_descr->dev, dev_descr->func,
1337 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1338 		pci_dev_put(pdev);
1339 		return -ENODEV;
1340 	}
1341 
1342 	i7core_dev->pdev[devno] = pdev;
1343 
1344 	/* Sanity check */
1345 	if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1346 			PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1347 		i7core_printk(KERN_ERR,
1348 			"Device PCI ID %04x:%04x "
1349 			"has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1350 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1351 			bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1352 			bus, dev_descr->dev, dev_descr->func);
1353 		return -ENODEV;
1354 	}
1355 
1356 	/* Be sure that the device is enabled */
1357 	if (unlikely(pci_enable_device(pdev) < 0)) {
1358 		i7core_printk(KERN_ERR,
1359 			"Couldn't enable "
1360 			"dev %02x:%02x.%d PCI ID %04x:%04x\n",
1361 			bus, dev_descr->dev, dev_descr->func,
1362 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1363 		return -ENODEV;
1364 	}
1365 
1366 	debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1367 		socket, bus, dev_descr->dev,
1368 		dev_descr->func,
1369 		PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1370 
1371 	*prev = pdev;
1372 
1373 	return 0;
1374 }
1375 
1376 static int i7core_get_devices(struct pci_id_table *table)
1377 {
1378 	int i, rc, last_bus;
1379 	struct pci_dev *pdev = NULL;
1380 	struct pci_id_descr *dev_descr;
1381 
1382 	last_bus = i7core_pci_lastbus();
1383 
1384 	while (table && table->descr) {
1385 		dev_descr = table->descr;
1386 		for (i = 0; i < table->n_devs; i++) {
1387 			pdev = NULL;
1388 			do {
1389 				rc = i7core_get_onedevice(&pdev, i,
1390 							  &dev_descr[i],
1391 							  table->n_devs,
1392 							  last_bus);
1393 				if (rc < 0) {
1394 					if (i == 0) {
1395 						i = table->n_devs;
1396 						break;
1397 					}
1398 					i7core_put_all_devices();
1399 					return -ENODEV;
1400 				}
1401 			} while (pdev);
1402 		}
1403 		table++;
1404 	}
1405 
1406 	return 0;
1407 	return 0;
1408 }
1409 
1410 static int mci_bind_devs(struct mem_ctl_info *mci,
1411 			 struct i7core_dev *i7core_dev)
1412 {
1413 	struct i7core_pvt *pvt = mci->pvt_info;
1414 	struct pci_dev *pdev;
1415 	int i, func, slot;
1416 
1417 	/* Associates i7core_dev and mci for future usage */
1418 	pvt->i7core_dev = i7core_dev;
1419 	i7core_dev->mci = mci;
1420 
1421 	pvt->is_registered = 0;
1422 	for (i = 0; i < i7core_dev->n_devs; i++) {
1423 		pdev = i7core_dev->pdev[i];
1424 		if (!pdev)
1425 			continue;
1426 
1427 		func = PCI_FUNC(pdev->devfn);
1428 		slot = PCI_SLOT(pdev->devfn);
1429 		if (slot == 3) {
1430 			if (unlikely(func > MAX_MCR_FUNC))
1431 				goto error;
1432 			pvt->pci_mcr[func] = pdev;
1433 		} else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1434 			if (unlikely(func > MAX_CHAN_FUNC))
1435 				goto error;
1436 			pvt->pci_ch[slot - 4][func] = pdev;
1437 		} else if (!slot && !func)
1438 			pvt->pci_noncore = pdev;
1439 		else
1440 			goto error;
1441 
1442 		debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1443 			PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1444 			pdev, i7core_dev->socket);
1445 
1446 		if (PCI_SLOT(pdev->devfn) == 3 &&
1447 			PCI_FUNC(pdev->devfn) == 2)
1448 			pvt->is_registered = 1;
1449 	}
1450 
1451 	/*
1452 	 * Add extra nodes to count errors on udimm
1453 	 * For registered memory, this is not needed, since the counters
1454 	 * are already displayed at the standard locations
1455 	 */
1456 	if (!pvt->is_registered)
1457 		i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1458 			&i7core_udimm_counters;
1459 
1460 	return 0;
1461 
1462 error:
1463 	i7core_printk(KERN_ERR, "Device %d, function %d "
1464 		      "is out of the expected range\n",
1465 		      slot, func);
1466 	return -EINVAL;
1467 }
1468 
1469 /****************************************************************************
1470 			Error check routines
1471  ****************************************************************************/
1472 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1473 					 int chan, int dimm, int add)
1474 {
1475 	char *msg;
1476 	struct i7core_pvt *pvt = mci->pvt_info;
1477 	int row = pvt->csrow_map[chan][dimm], i;
1478 
1479 	for (i = 0; i < add; i++) {
1480 		msg = kasprintf(GFP_KERNEL, "Corrected error "
1481 				"(Socket=%d channel=%d dimm=%d)",
1482 				pvt->i7core_dev->socket, chan, dimm);
1483 
1484 		edac_mc_handle_fbd_ce(mci, row, 0, msg);
1485 		kfree (msg);
1486 	}
1487 }
1488 
1489 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1490 			int chan, int new0, int new1, int new2)
1491 {
1492 	struct i7core_pvt *pvt = mci->pvt_info;
1493 	int add0 = 0, add1 = 0, add2 = 0;
1494 	/* Updates CE counters if it is not the first time here */
1495 	if (pvt->ce_count_available) {
1496 		/* Updates CE counters */
1497 
1498 		add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1499 		add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1500 		add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1501 
1502 		if (add2 < 0)
1503 			add2 += 0x7fff;
1504 		pvt->rdimm_ce_count[chan][2] += add2;
1505 
1506 		if (add1 < 0)
1507 			add1 += 0x7fff;
1508 		pvt->rdimm_ce_count[chan][1] += add1;
1509 
1510 		if (add0 < 0)
1511 			add0 += 0x7fff;
1512 		pvt->rdimm_ce_count[chan][0] += add0;
1513 	} else
1514 		pvt->ce_count_available = 1;
1515 
1516 	/* Store the new values */
1517 	pvt->rdimm_last_ce_count[chan][2] = new2;
1518 	pvt->rdimm_last_ce_count[chan][1] = new1;
1519 	pvt->rdimm_last_ce_count[chan][0] = new0;
1520 
1521 	/*updated the edac core */
1522 	if (add0 != 0)
1523 		i7core_rdimm_update_csrow(mci, chan, 0, add0);
1524 	if (add1 != 0)
1525 		i7core_rdimm_update_csrow(mci, chan, 1, add1);
1526 	if (add2 != 0)
1527 		i7core_rdimm_update_csrow(mci, chan, 2, add2);
1528 
1529 }
1530 
1531 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1532 {
1533 	struct i7core_pvt *pvt = mci->pvt_info;
1534 	u32 rcv[3][2];
1535 	int i, new0, new1, new2;
1536 
1537 	/*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1538 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1539 								&rcv[0][0]);
1540 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1541 								&rcv[0][1]);
1542 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1543 								&rcv[1][0]);
1544 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1545 								&rcv[1][1]);
1546 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1547 								&rcv[2][0]);
1548 	pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1549 								&rcv[2][1]);
1550 	for (i = 0 ; i < 3; i++) {
1551 		debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1552 			(i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1553 		/*if the channel has 3 dimms*/
1554 		if (pvt->channel[i].dimms > 2) {
1555 			new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1556 			new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1557 			new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1558 		} else {
1559 			new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1560 					DIMM_BOT_COR_ERR(rcv[i][0]);
1561 			new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1562 					DIMM_BOT_COR_ERR(rcv[i][1]);
1563 			new2 = 0;
1564 		}
1565 
1566 		i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1567 	}
1568 }
1569 
1570 /* This function is based on the device 3 function 4 registers as described on:
1571  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1572  *	http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1573  * also available at:
1574  * 	http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1575  */
1576 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1577 {
1578 	struct i7core_pvt *pvt = mci->pvt_info;
1579 	u32 rcv1, rcv0;
1580 	int new0, new1, new2;
1581 
1582 	if (!pvt->pci_mcr[4]) {
1583 		debugf0("%s MCR registers not found\n", __func__);
1584 		return;
1585 	}
1586 
1587 	/* Corrected test errors */
1588 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1589 	pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1590 
1591 	/* Store the new values */
1592 	new2 = DIMM2_COR_ERR(rcv1);
1593 	new1 = DIMM1_COR_ERR(rcv0);
1594 	new0 = DIMM0_COR_ERR(rcv0);
1595 
1596 	/* Updates CE counters if it is not the first time here */
1597 	if (pvt->ce_count_available) {
1598 		/* Updates CE counters */
1599 		int add0, add1, add2;
1600 
1601 		add2 = new2 - pvt->udimm_last_ce_count[2];
1602 		add1 = new1 - pvt->udimm_last_ce_count[1];
1603 		add0 = new0 - pvt->udimm_last_ce_count[0];
1604 
1605 		if (add2 < 0)
1606 			add2 += 0x7fff;
1607 		pvt->udimm_ce_count[2] += add2;
1608 
1609 		if (add1 < 0)
1610 			add1 += 0x7fff;
1611 		pvt->udimm_ce_count[1] += add1;
1612 
1613 		if (add0 < 0)
1614 			add0 += 0x7fff;
1615 		pvt->udimm_ce_count[0] += add0;
1616 
1617 		if (add0 | add1 | add2)
1618 			i7core_printk(KERN_ERR, "New Corrected error(s): "
1619 				      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1620 				      add0, add1, add2);
1621 	} else
1622 		pvt->ce_count_available = 1;
1623 
1624 	/* Store the new values */
1625 	pvt->udimm_last_ce_count[2] = new2;
1626 	pvt->udimm_last_ce_count[1] = new1;
1627 	pvt->udimm_last_ce_count[0] = new0;
1628 }
1629 
1630 /*
1631  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1632  * Architectures Software Developer’s Manual Volume 3B.
1633  * Nehalem are defined as family 0x06, model 0x1a
1634  *
1635  * The MCA registers used here are the following ones:
1636  *     struct mce field	MCA Register
1637  *     m->status	MSR_IA32_MC8_STATUS
1638  *     m->addr		MSR_IA32_MC8_ADDR
1639  *     m->misc		MSR_IA32_MC8_MISC
1640  * In the case of Nehalem, the error information is masked at .status and .misc
1641  * fields
1642  */
1643 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1644 				    struct mce *m)
1645 {
1646 	struct i7core_pvt *pvt = mci->pvt_info;
1647 	char *type, *optype, *err, *msg;
1648 	unsigned long error = m->status & 0x1ff0000l;
1649 	u32 optypenum = (m->status >> 4) & 0x07;
1650 	u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1651 	u32 dimm = (m->misc >> 16) & 0x3;
1652 	u32 channel = (m->misc >> 18) & 0x3;
1653 	u32 syndrome = m->misc >> 32;
1654 	u32 errnum = find_first_bit(&error, 32);
1655 	int csrow;
1656 
1657 	if (m->mcgstatus & 1)
1658 		type = "FATAL";
1659 	else
1660 		type = "NON_FATAL";
1661 
1662 	switch (optypenum) {
1663 	case 0:
1664 		optype = "generic undef request";
1665 		break;
1666 	case 1:
1667 		optype = "read error";
1668 		break;
1669 	case 2:
1670 		optype = "write error";
1671 		break;
1672 	case 3:
1673 		optype = "addr/cmd error";
1674 		break;
1675 	case 4:
1676 		optype = "scrubbing error";
1677 		break;
1678 	default:
1679 		optype = "reserved";
1680 		break;
1681 	}
1682 
1683 	switch (errnum) {
1684 	case 16:
1685 		err = "read ECC error";
1686 		break;
1687 	case 17:
1688 		err = "RAS ECC error";
1689 		break;
1690 	case 18:
1691 		err = "write parity error";
1692 		break;
1693 	case 19:
1694 		err = "redundacy loss";
1695 		break;
1696 	case 20:
1697 		err = "reserved";
1698 		break;
1699 	case 21:
1700 		err = "memory range error";
1701 		break;
1702 	case 22:
1703 		err = "RTID out of range";
1704 		break;
1705 	case 23:
1706 		err = "address parity error";
1707 		break;
1708 	case 24:
1709 		err = "byte enable parity error";
1710 		break;
1711 	default:
1712 		err = "unknown";
1713 	}
1714 
1715 	/* FIXME: should convert addr into bank and rank information */
1716 	msg = kasprintf(GFP_ATOMIC,
1717 		"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1718 		"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1719 		type, (long long) m->addr, m->cpu, dimm, channel,
1720 		syndrome, core_err_cnt, (long long)m->status,
1721 		(long long)m->misc, optype, err);
1722 
1723 	debugf0("%s", msg);
1724 
1725 	csrow = pvt->csrow_map[channel][dimm];
1726 
1727 	/* Call the helper to output message */
1728 	if (m->mcgstatus & 1)
1729 		edac_mc_handle_fbd_ue(mci, csrow, 0,
1730 				0 /* FIXME: should be channel here */, msg);
1731 	else if (!pvt->is_registered)
1732 		edac_mc_handle_fbd_ce(mci, csrow,
1733 				0 /* FIXME: should be channel here */, msg);
1734 
1735 	kfree(msg);
1736 }
1737 
1738 /*
1739  *	i7core_check_error	Retrieve and process errors reported by the
1740  *				hardware. Called by the Core module.
1741  */
1742 static void i7core_check_error(struct mem_ctl_info *mci)
1743 {
1744 	struct i7core_pvt *pvt = mci->pvt_info;
1745 	int i;
1746 	unsigned count = 0;
1747 	struct mce *m;
1748 
1749 	/*
1750 	 * MCE first step: Copy all mce errors into a temporary buffer
1751 	 * We use a double buffering here, to reduce the risk of
1752 	 * loosing an error.
1753 	 */
1754 	smp_rmb();
1755 	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1756 		% MCE_LOG_LEN;
1757 	if (!count)
1758 		goto check_ce_error;
1759 
1760 	m = pvt->mce_outentry;
1761 	if (pvt->mce_in + count > MCE_LOG_LEN) {
1762 		unsigned l = MCE_LOG_LEN - pvt->mce_in;
1763 
1764 		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1765 		smp_wmb();
1766 		pvt->mce_in = 0;
1767 		count -= l;
1768 		m += l;
1769 	}
1770 	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1771 	smp_wmb();
1772 	pvt->mce_in += count;
1773 
1774 	smp_rmb();
1775 	if (pvt->mce_overrun) {
1776 		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1777 			      pvt->mce_overrun);
1778 		smp_wmb();
1779 		pvt->mce_overrun = 0;
1780 	}
1781 
1782 	/*
1783 	 * MCE second step: parse errors and display
1784 	 */
1785 	for (i = 0; i < count; i++)
1786 		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1787 
1788 	/*
1789 	 * Now, let's increment CE error counts
1790 	 */
1791 check_ce_error:
1792 	if (!pvt->is_registered)
1793 		i7core_udimm_check_mc_ecc_err(mci);
1794 	else
1795 		i7core_rdimm_check_mc_ecc_err(mci);
1796 }
1797 
1798 /*
1799  * i7core_mce_check_error	Replicates mcelog routine to get errors
1800  *				This routine simply queues mcelog errors, and
1801  *				return. The error itself should be handled later
1802  *				by i7core_check_error.
1803  * WARNING: As this routine should be called at NMI time, extra care should
1804  * be taken to avoid deadlocks, and to be as fast as possible.
1805  */
1806 static int i7core_mce_check_error(void *priv, struct mce *mce)
1807 {
1808 	struct mem_ctl_info *mci = priv;
1809 	struct i7core_pvt *pvt = mci->pvt_info;
1810 
1811 	/*
1812 	 * Just let mcelog handle it if the error is
1813 	 * outside the memory controller
1814 	 */
1815 	if (((mce->status & 0xffff) >> 7) != 1)
1816 		return 0;
1817 
1818 	/* Bank 8 registers are the only ones that we know how to handle */
1819 	if (mce->bank != 8)
1820 		return 0;
1821 
1822 #ifdef CONFIG_SMP
1823 	/* Only handle if it is the right mc controller */
1824 	if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1825 		return 0;
1826 #endif
1827 
1828 	smp_rmb();
1829 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1830 		smp_wmb();
1831 		pvt->mce_overrun++;
1832 		return 0;
1833 	}
1834 
1835 	/* Copy memory error at the ringbuffer */
1836 	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1837 	smp_wmb();
1838 	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1839 
1840 	/* Handle fatal errors immediately */
1841 	if (mce->mcgstatus & 1)
1842 		i7core_check_error(mci);
1843 
1844 	/* Advice mcelog that the error were handled */
1845 	return 1;
1846 }
1847 
1848 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1849 			       int num_channels, int num_csrows)
1850 {
1851 	struct mem_ctl_info *mci;
1852 	struct i7core_pvt *pvt;
1853 	int csrow = 0;
1854 	int rc;
1855 
1856 	/* allocate a new MC control structure */
1857 	mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1858 			    i7core_dev->socket);
1859 	if (unlikely(!mci))
1860 		return -ENOMEM;
1861 
1862 	debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1863 
1864 	/* record ptr to the generic device */
1865 	mci->dev = &i7core_dev->pdev[0]->dev;
1866 
1867 	pvt = mci->pvt_info;
1868 	memset(pvt, 0, sizeof(*pvt));
1869 
1870 	/*
1871 	 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1872 	 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1873 	 * memory channels
1874 	 */
1875 	mci->mtype_cap = MEM_FLAG_DDR3;
1876 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
1877 	mci->edac_cap = EDAC_FLAG_NONE;
1878 	mci->mod_name = "i7core_edac.c";
1879 	mci->mod_ver = I7CORE_REVISION;
1880 	mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1881 				  i7core_dev->socket);
1882 	mci->dev_name = pci_name(i7core_dev->pdev[0]);
1883 	mci->ctl_page_to_phys = NULL;
1884 	mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1885 	/* Set the function pointer to an actual operation function */
1886 	mci->edac_check = i7core_check_error;
1887 
1888 	/* Store pci devices at mci for faster access */
1889 	rc = mci_bind_devs(mci, i7core_dev);
1890 	if (unlikely(rc < 0))
1891 		goto fail;
1892 
1893 	/* Get dimm basic config */
1894 	get_dimm_config(mci, &csrow);
1895 
1896 	/* add this new MC control structure to EDAC's list of MCs */
1897 	if (unlikely(edac_mc_add_mc(mci))) {
1898 		debugf0("MC: " __FILE__
1899 			": %s(): failed edac_mc_add_mc()\n", __func__);
1900 		/* FIXME: perhaps some code should go here that disables error
1901 		 * reporting if we just enabled it
1902 		 */
1903 
1904 		rc = -EINVAL;
1905 		goto fail;
1906 	}
1907 
1908 	/* allocating generic PCI control info */
1909 	i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1910 						 EDAC_MOD_STR);
1911 	if (unlikely(!i7core_pci)) {
1912 		printk(KERN_WARNING
1913 			"%s(): Unable to create PCI control\n",
1914 			__func__);
1915 		printk(KERN_WARNING
1916 			"%s(): PCI error report via EDAC not setup\n",
1917 			__func__);
1918 	}
1919 
1920 	/* Default error mask is any memory */
1921 	pvt->inject.channel = 0;
1922 	pvt->inject.dimm = -1;
1923 	pvt->inject.rank = -1;
1924 	pvt->inject.bank = -1;
1925 	pvt->inject.page = -1;
1926 	pvt->inject.col = -1;
1927 
1928 	/* Registers on edac_mce in order to receive memory errors */
1929 	pvt->edac_mce.priv = mci;
1930 	pvt->edac_mce.check_error = i7core_mce_check_error;
1931 
1932 	rc = edac_mce_register(&pvt->edac_mce);
1933 	if (unlikely(rc < 0)) {
1934 		debugf0("MC: " __FILE__
1935 			": %s(): failed edac_mce_register()\n", __func__);
1936 	}
1937 
1938 fail:
1939 	if (rc < 0)
1940 		edac_mc_free(mci);
1941 	return rc;
1942 }
1943 
1944 /*
1945  *	i7core_probe	Probe for ONE instance of device to see if it is
1946  *			present.
1947  *	return:
1948  *		0 for FOUND a device
1949  *		< 0 for error code
1950  */
1951 
1952 static int probed = 0;
1953 
1954 static int __devinit i7core_probe(struct pci_dev *pdev,
1955 				  const struct pci_device_id *id)
1956 {
1957 	int rc;
1958 	struct i7core_dev *i7core_dev;
1959 
1960 	/* get the pci devices we want to reserve for our use */
1961 	mutex_lock(&i7core_edac_lock);
1962 
1963 	/*
1964 	 * All memory controllers are allocated at the first pass.
1965 	 */
1966 	if (unlikely(probed >= 1)) {
1967 		mutex_unlock(&i7core_edac_lock);
1968 		return -EINVAL;
1969 	}
1970 	probed++;
1971 
1972 	rc = i7core_get_devices(pci_dev_table);
1973 	if (unlikely(rc < 0))
1974 		goto fail0;
1975 
1976 	list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1977 		int channels;
1978 		int csrows;
1979 
1980 		/* Check the number of active and not disabled channels */
1981 		rc = i7core_get_active_channels(i7core_dev->socket,
1982 						&channels, &csrows);
1983 		if (unlikely(rc < 0))
1984 			goto fail1;
1985 
1986 		rc = i7core_register_mci(i7core_dev, channels, csrows);
1987 		if (unlikely(rc < 0))
1988 			goto fail1;
1989 	}
1990 
1991 	i7core_printk(KERN_INFO, "Driver loaded.\n");
1992 
1993 	mutex_unlock(&i7core_edac_lock);
1994 	return 0;
1995 
1996 fail1:
1997 	i7core_put_all_devices();
1998 fail0:
1999 	mutex_unlock(&i7core_edac_lock);
2000 	return rc;
2001 }
2002 
2003 /*
2004  *	i7core_remove	destructor for one instance of device
2005  *
2006  */
2007 static void __devexit i7core_remove(struct pci_dev *pdev)
2008 {
2009 	struct mem_ctl_info *mci;
2010 	struct i7core_dev *i7core_dev, *tmp;
2011 
2012 	debugf0(__FILE__ ": %s()\n", __func__);
2013 
2014 	if (i7core_pci)
2015 		edac_pci_release_generic_ctl(i7core_pci);
2016 
2017 	/*
2018 	 * we have a trouble here: pdev value for removal will be wrong, since
2019 	 * it will point to the X58 register used to detect that the machine
2020 	 * is a Nehalem or upper design. However, due to the way several PCI
2021 	 * devices are grouped together to provide MC functionality, we need
2022 	 * to use a different method for releasing the devices
2023 	 */
2024 
2025 	mutex_lock(&i7core_edac_lock);
2026 	list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2027 		mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2028 		if (mci) {
2029 			struct i7core_pvt *pvt = mci->pvt_info;
2030 
2031 			i7core_dev = pvt->i7core_dev;
2032 			edac_mce_unregister(&pvt->edac_mce);
2033 			kfree(mci->ctl_name);
2034 			edac_mc_free(mci);
2035 			i7core_put_devices(i7core_dev);
2036 		} else {
2037 			i7core_printk(KERN_ERR,
2038 				      "Couldn't find mci for socket %d\n",
2039 				      i7core_dev->socket);
2040 		}
2041 	}
2042 	probed--;
2043 
2044 	mutex_unlock(&i7core_edac_lock);
2045 }
2046 
2047 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2048 
2049 /*
2050  *	i7core_driver	pci_driver structure for this module
2051  *
2052  */
2053 static struct pci_driver i7core_driver = {
2054 	.name     = "i7core_edac",
2055 	.probe    = i7core_probe,
2056 	.remove   = __devexit_p(i7core_remove),
2057 	.id_table = i7core_pci_tbl,
2058 };
2059 
2060 /*
2061  *	i7core_init		Module entry function
2062  *			Try to initialize this module for its devices
2063  */
2064 static int __init i7core_init(void)
2065 {
2066 	int pci_rc;
2067 
2068 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2069 
2070 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
2071 	opstate_init();
2072 
2073 	i7core_xeon_pci_fixup(pci_dev_table);
2074 
2075 	pci_rc = pci_register_driver(&i7core_driver);
2076 
2077 	if (pci_rc >= 0)
2078 		return 0;
2079 
2080 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2081 		      pci_rc);
2082 
2083 	return pci_rc;
2084 }
2085 
2086 /*
2087  *	i7core_exit()	Module exit function
2088  *			Unregister the driver
2089  */
2090 static void __exit i7core_exit(void)
2091 {
2092 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
2093 	pci_unregister_driver(&i7core_driver);
2094 }
2095 
2096 module_init(i7core_init);
2097 module_exit(i7core_exit);
2098 
2099 MODULE_LICENSE("GPL");
2100 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2101 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2102 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2103 		   I7CORE_REVISION);
2104 
2105 module_param(edac_op_state, int, 0444);
2106 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2107