xref: /linux/drivers/edac/amd64_edac.c (revision 394d83c17fac2b7bcf05cb99d1e945135767bb6b)
1 #include "amd64_edac.h"
2 #include <asm/amd_nb.h>
3 
4 static struct edac_pci_ctl_info *amd64_ctl_pci;
5 
6 static int report_gart_errors;
7 module_param(report_gart_errors, int, 0644);
8 
9 /*
10  * Set by command line parameter. If BIOS has enabled the ECC, this override is
11  * cleared to prevent re-enabling the hardware by this driver.
12  */
13 static int ecc_enable_override;
14 module_param(ecc_enable_override, int, 0644);
15 
16 static struct msr __percpu *msrs;
17 
18 /*
19  * count successfully initialized driver instances for setup_pci_device()
20  */
21 static atomic_t drv_instances = ATOMIC_INIT(0);
22 
23 /* Per-node driver instances */
24 static struct mem_ctl_info **mcis;
25 static struct ecc_settings **ecc_stngs;
26 
27 /*
28  * Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and
29  * later.
30  */
31 static int ddr2_dbam_revCG[] = {
32 			   [0]		= 32,
33 			   [1]		= 64,
34 			   [2]		= 128,
35 			   [3]		= 256,
36 			   [4]		= 512,
37 			   [5]		= 1024,
38 			   [6]		= 2048,
39 };
40 
41 static int ddr2_dbam_revD[] = {
42 			   [0]		= 32,
43 			   [1]		= 64,
44 			   [2 ... 3]	= 128,
45 			   [4]		= 256,
46 			   [5]		= 512,
47 			   [6]		= 256,
48 			   [7]		= 512,
49 			   [8 ... 9]	= 1024,
50 			   [10]		= 2048,
51 };
52 
53 static int ddr2_dbam[] = { [0]		= 128,
54 			   [1]		= 256,
55 			   [2 ... 4]	= 512,
56 			   [5 ... 6]	= 1024,
57 			   [7 ... 8]	= 2048,
58 			   [9 ... 10]	= 4096,
59 			   [11]		= 8192,
60 };
61 
62 static int ddr3_dbam[] = { [0]		= -1,
63 			   [1]		= 256,
64 			   [2]		= 512,
65 			   [3 ... 4]	= -1,
66 			   [5 ... 6]	= 1024,
67 			   [7 ... 8]	= 2048,
68 			   [9 ... 10]	= 4096,
69 			   [11]		= 8192,
70 };
71 
72 /*
73  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
74  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
75  * or higher value'.
76  *
77  *FIXME: Produce a better mapping/linearisation.
78  */
79 
80 
81 struct scrubrate {
82        u32 scrubval;           /* bit pattern for scrub rate */
83        u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
84 } scrubrates[] = {
85 	{ 0x01, 1600000000UL},
86 	{ 0x02, 800000000UL},
87 	{ 0x03, 400000000UL},
88 	{ 0x04, 200000000UL},
89 	{ 0x05, 100000000UL},
90 	{ 0x06, 50000000UL},
91 	{ 0x07, 25000000UL},
92 	{ 0x08, 12284069UL},
93 	{ 0x09, 6274509UL},
94 	{ 0x0A, 3121951UL},
95 	{ 0x0B, 1560975UL},
96 	{ 0x0C, 781440UL},
97 	{ 0x0D, 390720UL},
98 	{ 0x0E, 195300UL},
99 	{ 0x0F, 97650UL},
100 	{ 0x10, 48854UL},
101 	{ 0x11, 24427UL},
102 	{ 0x12, 12213UL},
103 	{ 0x13, 6101UL},
104 	{ 0x14, 3051UL},
105 	{ 0x15, 1523UL},
106 	{ 0x16, 761UL},
107 	{ 0x00, 0UL},        /* scrubbing off */
108 };
109 
110 /*
111  * Memory scrubber control interface. For K8, memory scrubbing is handled by
112  * hardware and can involve L2 cache, dcache as well as the main memory. With
113  * F10, this is extended to L3 cache scrubbing on CPU models sporting that
114  * functionality.
115  *
116  * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
117  * (dram) over to cache lines. This is nasty, so we will use bandwidth in
118  * bytes/sec for the setting.
119  *
120  * Currently, we only do dram scrubbing. If the scrubbing is done in software on
121  * other archs, we might not have access to the caches directly.
122  */
123 
124 /*
125  * scan the scrub rate mapping table for a close or matching bandwidth value to
126  * issue. If requested is too big, then use last maximum value found.
127  */
128 static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
129 {
130 	u32 scrubval;
131 	int i;
132 
133 	/*
134 	 * map the configured rate (new_bw) to a value specific to the AMD64
135 	 * memory controller and apply to register. Search for the first
136 	 * bandwidth entry that is greater or equal than the setting requested
137 	 * and program that. If at last entry, turn off DRAM scrubbing.
138 	 */
139 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
140 		/*
141 		 * skip scrub rates which aren't recommended
142 		 * (see F10 BKDG, F3x58)
143 		 */
144 		if (scrubrates[i].scrubval < min_rate)
145 			continue;
146 
147 		if (scrubrates[i].bandwidth <= new_bw)
148 			break;
149 
150 		/*
151 		 * if no suitable bandwidth found, turn off DRAM scrubbing
152 		 * entirely by falling back to the last element in the
153 		 * scrubrates array.
154 		 */
155 	}
156 
157 	scrubval = scrubrates[i].scrubval;
158 
159 	pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F);
160 
161 	if (scrubval)
162 		return scrubrates[i].bandwidth;
163 
164 	return 0;
165 }
166 
167 static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
168 {
169 	struct amd64_pvt *pvt = mci->pvt_info;
170 
171 	return __amd64_set_scrub_rate(pvt->F3, bw, pvt->min_scrubrate);
172 }
173 
174 static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
175 {
176 	struct amd64_pvt *pvt = mci->pvt_info;
177 	u32 scrubval = 0;
178 	int i, retval = -EINVAL;
179 
180 	amd64_read_pci_cfg(pvt->F3, K8_SCRCTRL, &scrubval);
181 
182 	scrubval = scrubval & 0x001F;
183 
184 	amd64_debug("pci-read, sdram scrub control value: %d\n", scrubval);
185 
186 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
187 		if (scrubrates[i].scrubval == scrubval) {
188 			retval = scrubrates[i].bandwidth;
189 			break;
190 		}
191 	}
192 	return retval;
193 }
194 
195 /* Map from a CSROW entry to the mask entry that operates on it */
196 static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
197 {
198 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F)
199 		return csrow;
200 	else
201 		return csrow >> 1;
202 }
203 
204 /* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
205 static u32 amd64_get_dct_base(struct amd64_pvt *pvt, int dct, int csrow)
206 {
207 	if (dct == 0)
208 		return pvt->dcsb0[csrow];
209 	else
210 		return pvt->dcsb1[csrow];
211 }
212 
213 /*
214  * Return the 'mask' address the i'th CS entry. This function is needed because
215  * there number of DCSM registers on Rev E and prior vs Rev F and later is
216  * different.
217  */
218 static u32 amd64_get_dct_mask(struct amd64_pvt *pvt, int dct, int csrow)
219 {
220 	if (dct == 0)
221 		return pvt->dcsm0[amd64_map_to_dcs_mask(pvt, csrow)];
222 	else
223 		return pvt->dcsm1[amd64_map_to_dcs_mask(pvt, csrow)];
224 }
225 
226 
227 /*
228  * In *base and *limit, pass back the full 40-bit base and limit physical
229  * addresses for the node given by node_id.  This information is obtained from
230  * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
231  * base and limit addresses are of type SysAddr, as defined at the start of
232  * section 3.4.4 (p. 70).  They are the lowest and highest physical addresses
233  * in the address range they represent.
234  */
235 static void amd64_get_base_and_limit(struct amd64_pvt *pvt, int node_id,
236 			       u64 *base, u64 *limit)
237 {
238 	*base = pvt->dram_base[node_id];
239 	*limit = pvt->dram_limit[node_id];
240 }
241 
242 /*
243  * Return 1 if the SysAddr given by sys_addr matches the base/limit associated
244  * with node_id
245  */
246 static int amd64_base_limit_match(struct amd64_pvt *pvt,
247 					u64 sys_addr, int node_id)
248 {
249 	u64 base, limit, addr;
250 
251 	amd64_get_base_and_limit(pvt, node_id, &base, &limit);
252 
253 	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
254 	 * all ones if the most significant implemented address bit is 1.
255 	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
256 	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
257 	 * Application Programming.
258 	 */
259 	addr = sys_addr & 0x000000ffffffffffull;
260 
261 	return (addr >= base) && (addr <= limit);
262 }
263 
264 /*
265  * Attempt to map a SysAddr to a node. On success, return a pointer to the
266  * mem_ctl_info structure for the node that the SysAddr maps to.
267  *
268  * On failure, return NULL.
269  */
270 static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
271 						u64 sys_addr)
272 {
273 	struct amd64_pvt *pvt;
274 	int node_id;
275 	u32 intlv_en, bits;
276 
277 	/*
278 	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
279 	 * 3.4.4.2) registers to map the SysAddr to a node ID.
280 	 */
281 	pvt = mci->pvt_info;
282 
283 	/*
284 	 * The value of this field should be the same for all DRAM Base
285 	 * registers.  Therefore we arbitrarily choose to read it from the
286 	 * register for node 0.
287 	 */
288 	intlv_en = pvt->dram_IntlvEn[0];
289 
290 	if (intlv_en == 0) {
291 		for (node_id = 0; node_id < DRAM_REG_COUNT; node_id++) {
292 			if (amd64_base_limit_match(pvt, sys_addr, node_id))
293 				goto found;
294 		}
295 		goto err_no_match;
296 	}
297 
298 	if (unlikely((intlv_en != 0x01) &&
299 		     (intlv_en != 0x03) &&
300 		     (intlv_en != 0x07))) {
301 		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
302 		return NULL;
303 	}
304 
305 	bits = (((u32) sys_addr) >> 12) & intlv_en;
306 
307 	for (node_id = 0; ; ) {
308 		if ((pvt->dram_IntlvSel[node_id] & intlv_en) == bits)
309 			break;	/* intlv_sel field matches */
310 
311 		if (++node_id >= DRAM_REG_COUNT)
312 			goto err_no_match;
313 	}
314 
315 	/* sanity test for sys_addr */
316 	if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
317 		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
318 			   "range for node %d with node interleaving enabled.\n",
319 			   __func__, sys_addr, node_id);
320 		return NULL;
321 	}
322 
323 found:
324 	return edac_mc_find(node_id);
325 
326 err_no_match:
327 	debugf2("sys_addr 0x%lx doesn't match any node\n",
328 		(unsigned long)sys_addr);
329 
330 	return NULL;
331 }
332 
333 /*
334  * Extract the DRAM CS base address from selected csrow register.
335  */
336 static u64 base_from_dct_base(struct amd64_pvt *pvt, int csrow)
337 {
338 	return ((u64) (amd64_get_dct_base(pvt, 0, csrow) & pvt->dcsb_base)) <<
339 				pvt->dcs_shift;
340 }
341 
342 /*
343  * Extract the mask from the dcsb0[csrow] entry in a CPU revision-specific way.
344  */
345 static u64 mask_from_dct_mask(struct amd64_pvt *pvt, int csrow)
346 {
347 	u64 dcsm_bits, other_bits;
348 	u64 mask;
349 
350 	/* Extract bits from DRAM CS Mask. */
351 	dcsm_bits = amd64_get_dct_mask(pvt, 0, csrow) & pvt->dcsm_mask;
352 
353 	other_bits = pvt->dcsm_mask;
354 	other_bits = ~(other_bits << pvt->dcs_shift);
355 
356 	/*
357 	 * The extracted bits from DCSM belong in the spaces represented by
358 	 * the cleared bits in other_bits.
359 	 */
360 	mask = (dcsm_bits << pvt->dcs_shift) | other_bits;
361 
362 	return mask;
363 }
364 
365 /*
366  * @input_addr is an InputAddr associated with the node given by mci. Return the
367  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
368  */
369 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
370 {
371 	struct amd64_pvt *pvt;
372 	int csrow;
373 	u64 base, mask;
374 
375 	pvt = mci->pvt_info;
376 
377 	/*
378 	 * Here we use the DRAM CS Base and DRAM CS Mask registers. For each CS
379 	 * base/mask register pair, test the condition shown near the start of
380 	 * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
381 	 */
382 	for (csrow = 0; csrow < pvt->cs_count; csrow++) {
383 
384 		/* This DRAM chip select is disabled on this node */
385 		if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
386 			continue;
387 
388 		base = base_from_dct_base(pvt, csrow);
389 		mask = ~mask_from_dct_mask(pvt, csrow);
390 
391 		if ((input_addr & mask) == (base & mask)) {
392 			debugf2("InputAddr 0x%lx matches csrow %d (node %d)\n",
393 				(unsigned long)input_addr, csrow,
394 				pvt->mc_node_id);
395 
396 			return csrow;
397 		}
398 	}
399 
400 	debugf2("no matching csrow for InputAddr 0x%lx (MC node %d)\n",
401 		(unsigned long)input_addr, pvt->mc_node_id);
402 
403 	return -1;
404 }
405 
406 /*
407  * Return the base value defined by the DRAM Base register for the node
408  * represented by mci.  This function returns the full 40-bit value despite the
409  * fact that the register only stores bits 39-24 of the value. See section
410  * 3.4.4.1 (BKDG #26094, K8, revA-E)
411  */
412 static inline u64 get_dram_base(struct mem_ctl_info *mci)
413 {
414 	struct amd64_pvt *pvt = mci->pvt_info;
415 
416 	return pvt->dram_base[pvt->mc_node_id];
417 }
418 
419 /*
420  * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
421  * for the node represented by mci. Info is passed back in *hole_base,
422  * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
423  * info is invalid. Info may be invalid for either of the following reasons:
424  *
425  * - The revision of the node is not E or greater.  In this case, the DRAM Hole
426  *   Address Register does not exist.
427  *
428  * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
429  *   indicating that its contents are not valid.
430  *
431  * The values passed back in *hole_base, *hole_offset, and *hole_size are
432  * complete 32-bit values despite the fact that the bitfields in the DHAR
433  * only represent bits 31-24 of the base and offset values.
434  */
435 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
436 			     u64 *hole_offset, u64 *hole_size)
437 {
438 	struct amd64_pvt *pvt = mci->pvt_info;
439 	u64 base;
440 
441 	/* only revE and later have the DRAM Hole Address Register */
442 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
443 		debugf1("  revision %d for node %d does not support DHAR\n",
444 			pvt->ext_model, pvt->mc_node_id);
445 		return 1;
446 	}
447 
448 	/* only valid for Fam10h */
449 	if (boot_cpu_data.x86 == 0x10 &&
450 	    (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) == 0) {
451 		debugf1("  Dram Memory Hoisting is DISABLED on this system\n");
452 		return 1;
453 	}
454 
455 	if ((pvt->dhar & DHAR_VALID) == 0) {
456 		debugf1("  Dram Memory Hoisting is DISABLED on this node %d\n",
457 			pvt->mc_node_id);
458 		return 1;
459 	}
460 
461 	/* This node has Memory Hoisting */
462 
463 	/* +------------------+--------------------+--------------------+-----
464 	 * | memory           | DRAM hole          | relocated          |
465 	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
466 	 * |                  |                    | DRAM hole          |
467 	 * |                  |                    | [0x100000000,      |
468 	 * |                  |                    |  (0x100000000+     |
469 	 * |                  |                    |   (0xffffffff-x))] |
470 	 * +------------------+--------------------+--------------------+-----
471 	 *
472 	 * Above is a diagram of physical memory showing the DRAM hole and the
473 	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
474 	 * starts at address x (the base address) and extends through address
475 	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
476 	 * addresses in the hole so that they start at 0x100000000.
477 	 */
478 
479 	base = dhar_base(pvt->dhar);
480 
481 	*hole_base = base;
482 	*hole_size = (0x1ull << 32) - base;
483 
484 	if (boot_cpu_data.x86 > 0xf)
485 		*hole_offset = f10_dhar_offset(pvt->dhar);
486 	else
487 		*hole_offset = k8_dhar_offset(pvt->dhar);
488 
489 	debugf1("  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
490 		pvt->mc_node_id, (unsigned long)*hole_base,
491 		(unsigned long)*hole_offset, (unsigned long)*hole_size);
492 
493 	return 0;
494 }
495 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
496 
497 /*
498  * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
499  * assumed that sys_addr maps to the node given by mci.
500  *
501  * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
502  * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
503  * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
504  * then it is also involved in translating a SysAddr to a DramAddr. Sections
505  * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
506  * These parts of the documentation are unclear. I interpret them as follows:
507  *
508  * When node n receives a SysAddr, it processes the SysAddr as follows:
509  *
510  * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
511  *    Limit registers for node n. If the SysAddr is not within the range
512  *    specified by the base and limit values, then node n ignores the Sysaddr
513  *    (since it does not map to node n). Otherwise continue to step 2 below.
514  *
515  * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
516  *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
517  *    the range of relocated addresses (starting at 0x100000000) from the DRAM
518  *    hole. If not, skip to step 3 below. Else get the value of the
519  *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
520  *    offset defined by this value from the SysAddr.
521  *
522  * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
523  *    Base register for node n. To obtain the DramAddr, subtract the base
524  *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
525  */
526 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
527 {
528 	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
529 	int ret = 0;
530 
531 	dram_base = get_dram_base(mci);
532 
533 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
534 				      &hole_size);
535 	if (!ret) {
536 		if ((sys_addr >= (1ull << 32)) &&
537 		    (sys_addr < ((1ull << 32) + hole_size))) {
538 			/* use DHAR to translate SysAddr to DramAddr */
539 			dram_addr = sys_addr - hole_offset;
540 
541 			debugf2("using DHAR to translate SysAddr 0x%lx to "
542 				"DramAddr 0x%lx\n",
543 				(unsigned long)sys_addr,
544 				(unsigned long)dram_addr);
545 
546 			return dram_addr;
547 		}
548 	}
549 
550 	/*
551 	 * Translate the SysAddr to a DramAddr as shown near the start of
552 	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
553 	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
554 	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
555 	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
556 	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
557 	 * Programmer's Manual Volume 1 Application Programming.
558 	 */
559 	dram_addr = (sys_addr & 0xffffffffffull) - dram_base;
560 
561 	debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
562 		"DramAddr 0x%lx\n", (unsigned long)sys_addr,
563 		(unsigned long)dram_addr);
564 	return dram_addr;
565 }
566 
567 /*
568  * @intlv_en is the value of the IntlvEn field from a DRAM Base register
569  * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
570  * for node interleaving.
571  */
572 static int num_node_interleave_bits(unsigned intlv_en)
573 {
574 	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
575 	int n;
576 
577 	BUG_ON(intlv_en > 7);
578 	n = intlv_shift_table[intlv_en];
579 	return n;
580 }
581 
582 /* Translate the DramAddr given by @dram_addr to an InputAddr. */
583 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
584 {
585 	struct amd64_pvt *pvt;
586 	int intlv_shift;
587 	u64 input_addr;
588 
589 	pvt = mci->pvt_info;
590 
591 	/*
592 	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
593 	 * concerning translating a DramAddr to an InputAddr.
594 	 */
595 	intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
596 	input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
597 	    (dram_addr & 0xfff);
598 
599 	debugf2("  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
600 		intlv_shift, (unsigned long)dram_addr,
601 		(unsigned long)input_addr);
602 
603 	return input_addr;
604 }
605 
606 /*
607  * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
608  * assumed that @sys_addr maps to the node given by mci.
609  */
610 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
611 {
612 	u64 input_addr;
613 
614 	input_addr =
615 	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
616 
617 	debugf2("SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
618 		(unsigned long)sys_addr, (unsigned long)input_addr);
619 
620 	return input_addr;
621 }
622 
623 
624 /*
625  * @input_addr is an InputAddr associated with the node represented by mci.
626  * Translate @input_addr to a DramAddr and return the result.
627  */
628 static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
629 {
630 	struct amd64_pvt *pvt;
631 	int node_id, intlv_shift;
632 	u64 bits, dram_addr;
633 	u32 intlv_sel;
634 
635 	/*
636 	 * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
637 	 * shows how to translate a DramAddr to an InputAddr. Here we reverse
638 	 * this procedure. When translating from a DramAddr to an InputAddr, the
639 	 * bits used for node interleaving are discarded.  Here we recover these
640 	 * bits from the IntlvSel field of the DRAM Limit register (section
641 	 * 3.4.4.2) for the node that input_addr is associated with.
642 	 */
643 	pvt = mci->pvt_info;
644 	node_id = pvt->mc_node_id;
645 	BUG_ON((node_id < 0) || (node_id > 7));
646 
647 	intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
648 
649 	if (intlv_shift == 0) {
650 		debugf1("    InputAddr 0x%lx translates to DramAddr of "
651 			"same value\n",	(unsigned long)input_addr);
652 
653 		return input_addr;
654 	}
655 
656 	bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
657 	    (input_addr & 0xfff);
658 
659 	intlv_sel = pvt->dram_IntlvSel[node_id] & ((1 << intlv_shift) - 1);
660 	dram_addr = bits + (intlv_sel << 12);
661 
662 	debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
663 		"(%d node interleave bits)\n", (unsigned long)input_addr,
664 		(unsigned long)dram_addr, intlv_shift);
665 
666 	return dram_addr;
667 }
668 
669 /*
670  * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
671  * @dram_addr to a SysAddr.
672  */
673 static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
674 {
675 	struct amd64_pvt *pvt = mci->pvt_info;
676 	u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;
677 	int ret = 0;
678 
679 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
680 				      &hole_size);
681 	if (!ret) {
682 		if ((dram_addr >= hole_base) &&
683 		    (dram_addr < (hole_base + hole_size))) {
684 			sys_addr = dram_addr + hole_offset;
685 
686 			debugf1("using DHAR to translate DramAddr 0x%lx to "
687 				"SysAddr 0x%lx\n", (unsigned long)dram_addr,
688 				(unsigned long)sys_addr);
689 
690 			return sys_addr;
691 		}
692 	}
693 
694 	amd64_get_base_and_limit(pvt, pvt->mc_node_id, &base, &limit);
695 	sys_addr = dram_addr + base;
696 
697 	/*
698 	 * The sys_addr we have computed up to this point is a 40-bit value
699 	 * because the k8 deals with 40-bit values.  However, the value we are
700 	 * supposed to return is a full 64-bit physical address.  The AMD
701 	 * x86-64 architecture specifies that the most significant implemented
702 	 * address bit through bit 63 of a physical address must be either all
703 	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
704 	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
705 	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
706 	 * Programming.
707 	 */
708 	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
709 
710 	debugf1("    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
711 		pvt->mc_node_id, (unsigned long)dram_addr,
712 		(unsigned long)sys_addr);
713 
714 	return sys_addr;
715 }
716 
717 /*
718  * @input_addr is an InputAddr associated with the node given by mci. Translate
719  * @input_addr to a SysAddr.
720  */
721 static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
722 					 u64 input_addr)
723 {
724 	return dram_addr_to_sys_addr(mci,
725 				     input_addr_to_dram_addr(mci, input_addr));
726 }
727 
728 /*
729  * Find the minimum and maximum InputAddr values that map to the given @csrow.
730  * Pass back these values in *input_addr_min and *input_addr_max.
731  */
732 static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
733 			      u64 *input_addr_min, u64 *input_addr_max)
734 {
735 	struct amd64_pvt *pvt;
736 	u64 base, mask;
737 
738 	pvt = mci->pvt_info;
739 	BUG_ON((csrow < 0) || (csrow >= pvt->cs_count));
740 
741 	base = base_from_dct_base(pvt, csrow);
742 	mask = mask_from_dct_mask(pvt, csrow);
743 
744 	*input_addr_min = base & ~mask;
745 	*input_addr_max = base | mask | pvt->dcs_mask_notused;
746 }
747 
748 /* Map the Error address to a PAGE and PAGE OFFSET. */
749 static inline void error_address_to_page_and_offset(u64 error_address,
750 						    u32 *page, u32 *offset)
751 {
752 	*page = (u32) (error_address >> PAGE_SHIFT);
753 	*offset = ((u32) error_address) & ~PAGE_MASK;
754 }
755 
756 /*
757  * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
758  * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
759  * of a node that detected an ECC memory error.  mci represents the node that
760  * the error address maps to (possibly different from the node that detected
761  * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
762  * error.
763  */
764 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
765 {
766 	int csrow;
767 
768 	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
769 
770 	if (csrow == -1)
771 		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
772 				  "address 0x%lx\n", (unsigned long)sys_addr);
773 	return csrow;
774 }
775 
776 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
777 
778 static u16 extract_syndrome(struct err_regs *err)
779 {
780 	return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00);
781 }
782 
783 /*
784  * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
785  * are ECC capable.
786  */
787 static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
788 {
789 	int bit;
790 	enum dev_type edac_cap = EDAC_FLAG_NONE;
791 
792 	bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F)
793 		? 19
794 		: 17;
795 
796 	if (pvt->dclr0 & BIT(bit))
797 		edac_cap = EDAC_FLAG_SECDED;
798 
799 	return edac_cap;
800 }
801 
802 
803 static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt);
804 
805 static void amd64_dump_dramcfg_low(u32 dclr, int chan)
806 {
807 	debugf1("F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
808 
809 	debugf1("  DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
810 		(dclr & BIT(16)) ?  "un" : "",
811 		(dclr & BIT(19)) ? "yes" : "no");
812 
813 	debugf1("  PAR/ERR parity: %s\n",
814 		(dclr & BIT(8)) ?  "enabled" : "disabled");
815 
816 	debugf1("  DCT 128bit mode width: %s\n",
817 		(dclr & BIT(11)) ?  "128b" : "64b");
818 
819 	debugf1("  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
820 		(dclr & BIT(12)) ?  "yes" : "no",
821 		(dclr & BIT(13)) ?  "yes" : "no",
822 		(dclr & BIT(14)) ?  "yes" : "no",
823 		(dclr & BIT(15)) ?  "yes" : "no");
824 }
825 
826 /* Display and decode various NB registers for debug purposes. */
827 static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
828 {
829 	int ganged;
830 
831 	debugf1("F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
832 
833 	debugf1("  NB two channel DRAM capable: %s\n",
834 		(pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "yes" : "no");
835 
836 	debugf1("  ECC capable: %s, ChipKill ECC capable: %s\n",
837 		(pvt->nbcap & K8_NBCAP_SECDED) ? "yes" : "no",
838 		(pvt->nbcap & K8_NBCAP_CHIPKILL) ? "yes" : "no");
839 
840 	amd64_dump_dramcfg_low(pvt->dclr0, 0);
841 
842 	debugf1("F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
843 
844 	debugf1("F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, "
845 			"offset: 0x%08x\n",
846 			pvt->dhar,
847 			dhar_base(pvt->dhar),
848 			(boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt->dhar)
849 						   : f10_dhar_offset(pvt->dhar));
850 
851 	debugf1("  DramHoleValid: %s\n",
852 		(pvt->dhar & DHAR_VALID) ? "yes" : "no");
853 
854 	/* everything below this point is Fam10h and above */
855 	if (boot_cpu_data.x86 == 0xf) {
856 		amd64_debug_display_dimm_sizes(0, pvt);
857 		return;
858 	}
859 
860 	amd64_info("using %s syndromes.\n", ((pvt->syn_type == 8) ? "x8" : "x4"));
861 
862 	/* Only if NOT ganged does dclr1 have valid info */
863 	if (!dct_ganging_enabled(pvt))
864 		amd64_dump_dramcfg_low(pvt->dclr1, 1);
865 
866 	/*
867 	 * Determine if ganged and then dump memory sizes for first controller,
868 	 * and if NOT ganged dump info for 2nd controller.
869 	 */
870 	ganged = dct_ganging_enabled(pvt);
871 
872 	amd64_debug_display_dimm_sizes(0, pvt);
873 
874 	if (!ganged)
875 		amd64_debug_display_dimm_sizes(1, pvt);
876 }
877 
878 /* Read in both of DBAM registers */
879 static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
880 {
881 	amd64_read_pci_cfg(pvt->F2, DBAM0, &pvt->dbam0);
882 
883 	if (boot_cpu_data.x86 >= 0x10)
884 		amd64_read_pci_cfg(pvt->F2, DBAM1, &pvt->dbam1);
885 }
886 
887 /*
888  * NOTE: CPU Revision Dependent code: Rev E and Rev F
889  *
890  * Set the DCSB and DCSM mask values depending on the CPU revision value. Also
891  * set the shift factor for the DCSB and DCSM values.
892  *
893  * ->dcs_mask_notused, RevE:
894  *
895  * To find the max InputAddr for the csrow, start with the base address and set
896  * all bits that are "don't care" bits in the test at the start of section
897  * 3.5.4 (p. 84).
898  *
899  * The "don't care" bits are all set bits in the mask and all bits in the gaps
900  * between bit ranges [35:25] and [19:13]. The value REV_E_DCS_NOTUSED_BITS
901  * represents bits [24:20] and [12:0], which are all bits in the above-mentioned
902  * gaps.
903  *
904  * ->dcs_mask_notused, RevF and later:
905  *
906  * To find the max InputAddr for the csrow, start with the base address and set
907  * all bits that are "don't care" bits in the test at the start of NPT section
908  * 4.5.4 (p. 87).
909  *
910  * The "don't care" bits are all set bits in the mask and all bits in the gaps
911  * between bit ranges [36:27] and [21:13].
912  *
913  * The value REV_F_F1Xh_DCS_NOTUSED_BITS represents bits [26:22] and [12:0],
914  * which are all bits in the above-mentioned gaps.
915  */
916 static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
917 {
918 
919 	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
920 		pvt->dcsb_base		= REV_E_DCSB_BASE_BITS;
921 		pvt->dcsm_mask		= REV_E_DCSM_MASK_BITS;
922 		pvt->dcs_mask_notused	= REV_E_DCS_NOTUSED_BITS;
923 		pvt->dcs_shift		= REV_E_DCS_SHIFT;
924 		pvt->cs_count		= 8;
925 		pvt->num_dcsm		= 8;
926 	} else {
927 		pvt->dcsb_base		= REV_F_F1Xh_DCSB_BASE_BITS;
928 		pvt->dcsm_mask		= REV_F_F1Xh_DCSM_MASK_BITS;
929 		pvt->dcs_mask_notused	= REV_F_F1Xh_DCS_NOTUSED_BITS;
930 		pvt->dcs_shift		= REV_F_F1Xh_DCS_SHIFT;
931 		pvt->cs_count		= 8;
932 		pvt->num_dcsm		= 4;
933 	}
934 }
935 
936 /*
937  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask hw registers
938  */
939 static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
940 {
941 	int cs, reg;
942 
943 	amd64_set_dct_base_and_mask(pvt);
944 
945 	for (cs = 0; cs < pvt->cs_count; cs++) {
946 		reg = K8_DCSB0 + (cs * 4);
947 		if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsb0[cs]))
948 			debugf0("  DCSB0[%d]=0x%08x reg: F2x%x\n",
949 				cs, pvt->dcsb0[cs], reg);
950 
951 		/* If DCT are NOT ganged, then read in DCT1's base */
952 		if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
953 			reg = F10_DCSB1 + (cs * 4);
954 			if (!amd64_read_pci_cfg(pvt->F2, reg,
955 						&pvt->dcsb1[cs]))
956 				debugf0("  DCSB1[%d]=0x%08x reg: F2x%x\n",
957 					cs, pvt->dcsb1[cs], reg);
958 		} else {
959 			pvt->dcsb1[cs] = 0;
960 		}
961 	}
962 
963 	for (cs = 0; cs < pvt->num_dcsm; cs++) {
964 		reg = K8_DCSM0 + (cs * 4);
965 		if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsm0[cs]))
966 			debugf0("    DCSM0[%d]=0x%08x reg: F2x%x\n",
967 				cs, pvt->dcsm0[cs], reg);
968 
969 		/* If DCT are NOT ganged, then read in DCT1's mask */
970 		if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
971 			reg = F10_DCSM1 + (cs * 4);
972 			if (!amd64_read_pci_cfg(pvt->F2, reg,
973 						&pvt->dcsm1[cs]))
974 				debugf0("    DCSM1[%d]=0x%08x reg: F2x%x\n",
975 					cs, pvt->dcsm1[cs], reg);
976 		} else {
977 			pvt->dcsm1[cs] = 0;
978 		}
979 	}
980 }
981 
982 static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs)
983 {
984 	enum mem_type type;
985 
986 	if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= K8_REV_F) {
987 		if (pvt->dchr0 & DDR3_MODE)
988 			type = (pvt->dclr0 & BIT(16)) ?	MEM_DDR3 : MEM_RDDR3;
989 		else
990 			type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
991 	} else {
992 		type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
993 	}
994 
995 	amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
996 
997 	return type;
998 }
999 
1000 /*
1001  * Read the DRAM Configuration Low register. It differs between CG, D & E revs
1002  * and the later RevF memory controllers (DDR vs DDR2)
1003  *
1004  * Return:
1005  *      number of memory channels in operation
1006  * Pass back:
1007  *      contents of the DCL0_LOW register
1008  */
1009 static int k8_early_channel_count(struct amd64_pvt *pvt)
1010 {
1011 	int flag, err = 0;
1012 
1013 	err = amd64_read_pci_cfg(pvt->F2, F10_DCLR_0, &pvt->dclr0);
1014 	if (err)
1015 		return err;
1016 
1017 	if (pvt->ext_model >= K8_REV_F)
1018 		/* RevF (NPT) and later */
1019 		flag = pvt->dclr0 & F10_WIDTH_128;
1020 	else
1021 		/* RevE and earlier */
1022 		flag = pvt->dclr0 & REVE_WIDTH_128;
1023 
1024 	/* not used */
1025 	pvt->dclr1 = 0;
1026 
1027 	return (flag) ? 2 : 1;
1028 }
1029 
1030 /* extract the ERROR ADDRESS for the K8 CPUs */
1031 static u64 k8_get_error_address(struct mem_ctl_info *mci,
1032 				struct err_regs *info)
1033 {
1034 	return (((u64) (info->nbeah & 0xff)) << 32) +
1035 			(info->nbeal & ~0x03);
1036 }
1037 
1038 /*
1039  * Read the Base and Limit registers for K8 based Memory controllers; extract
1040  * fields from the 'raw' reg into separate data fields
1041  *
1042  * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN
1043  */
1044 static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
1045 {
1046 	u32 low;
1047 	u32 off = dram << 3;	/* 8 bytes between DRAM entries */
1048 
1049 	amd64_read_pci_cfg(pvt->F1, K8_DRAM_BASE_LOW + off, &low);
1050 
1051 	/* Extract parts into separate data entries */
1052 	pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
1053 	pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
1054 	pvt->dram_rw_en[dram] = (low & 0x3);
1055 
1056 	amd64_read_pci_cfg(pvt->F1, K8_DRAM_LIMIT_LOW + off, &low);
1057 
1058 	/*
1059 	 * Extract parts into separate data entries. Limit is the HIGHEST memory
1060 	 * location of the region, so lower 24 bits need to be all ones
1061 	 */
1062 	pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
1063 	pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
1064 	pvt->dram_DstNode[dram] = (low & 0x7);
1065 }
1066 
1067 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1068 				    struct err_regs *err_info, u64 sys_addr)
1069 {
1070 	struct mem_ctl_info *src_mci;
1071 	int channel, csrow;
1072 	u32 page, offset;
1073 	u16 syndrome;
1074 
1075 	syndrome = extract_syndrome(err_info);
1076 
1077 	/* CHIPKILL enabled */
1078 	if (err_info->nbcfg & K8_NBCFG_CHIPKILL) {
1079 		channel = get_channel_from_ecc_syndrome(mci, syndrome);
1080 		if (channel < 0) {
1081 			/*
1082 			 * Syndrome didn't map, so we don't know which of the
1083 			 * 2 DIMMs is in error. So we need to ID 'both' of them
1084 			 * as suspect.
1085 			 */
1086 			amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible "
1087 					   "error reporting race\n", syndrome);
1088 			edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1089 			return;
1090 		}
1091 	} else {
1092 		/*
1093 		 * non-chipkill ecc mode
1094 		 *
1095 		 * The k8 documentation is unclear about how to determine the
1096 		 * channel number when using non-chipkill memory.  This method
1097 		 * was obtained from email communication with someone at AMD.
1098 		 * (Wish the email was placed in this comment - norsk)
1099 		 */
1100 		channel = ((sys_addr & BIT(3)) != 0);
1101 	}
1102 
1103 	/*
1104 	 * Find out which node the error address belongs to. This may be
1105 	 * different from the node that detected the error.
1106 	 */
1107 	src_mci = find_mc_by_sys_addr(mci, sys_addr);
1108 	if (!src_mci) {
1109 		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1110 			     (unsigned long)sys_addr);
1111 		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1112 		return;
1113 	}
1114 
1115 	/* Now map the sys_addr to a CSROW */
1116 	csrow = sys_addr_to_csrow(src_mci, sys_addr);
1117 	if (csrow < 0) {
1118 		edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
1119 	} else {
1120 		error_address_to_page_and_offset(sys_addr, &page, &offset);
1121 
1122 		edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
1123 				  channel, EDAC_MOD_STR);
1124 	}
1125 }
1126 
1127 static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode)
1128 {
1129 	int *dbam_map;
1130 
1131 	if (pvt->ext_model >= K8_REV_F)
1132 		dbam_map = ddr2_dbam;
1133 	else if (pvt->ext_model >= K8_REV_D)
1134 		dbam_map = ddr2_dbam_revD;
1135 	else
1136 		dbam_map = ddr2_dbam_revCG;
1137 
1138 	return dbam_map[cs_mode];
1139 }
1140 
1141 /*
1142  * Get the number of DCT channels in use.
1143  *
1144  * Return:
1145  *	number of Memory Channels in operation
1146  * Pass back:
1147  *	contents of the DCL0_LOW register
1148  */
1149 static int f10_early_channel_count(struct amd64_pvt *pvt)
1150 {
1151 	int dbams[] = { DBAM0, DBAM1 };
1152 	int i, j, channels = 0;
1153 	u32 dbam;
1154 
1155 	/* If we are in 128 bit mode, then we are using 2 channels */
1156 	if (pvt->dclr0 & F10_WIDTH_128) {
1157 		channels = 2;
1158 		return channels;
1159 	}
1160 
1161 	/*
1162 	 * Need to check if in unganged mode: In such, there are 2 channels,
1163 	 * but they are not in 128 bit mode and thus the above 'dclr0' status
1164 	 * bit will be OFF.
1165 	 *
1166 	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1167 	 * their CSEnable bit on. If so, then SINGLE DIMM case.
1168 	 */
1169 	debugf0("Data width is not 128 bits - need more decoding\n");
1170 
1171 	/*
1172 	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1173 	 * is more than just one DIMM present in unganged mode. Need to check
1174 	 * both controllers since DIMMs can be placed in either one.
1175 	 */
1176 	for (i = 0; i < ARRAY_SIZE(dbams); i++) {
1177 		if (amd64_read_pci_cfg(pvt->F2, dbams[i], &dbam))
1178 			goto err_reg;
1179 
1180 		for (j = 0; j < 4; j++) {
1181 			if (DBAM_DIMM(j, dbam) > 0) {
1182 				channels++;
1183 				break;
1184 			}
1185 		}
1186 	}
1187 
1188 	if (channels > 2)
1189 		channels = 2;
1190 
1191 	amd64_info("MCT channel count: %d\n", channels);
1192 
1193 	return channels;
1194 
1195 err_reg:
1196 	return -1;
1197 
1198 }
1199 
1200 static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode)
1201 {
1202 	int *dbam_map;
1203 
1204 	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1205 		dbam_map = ddr3_dbam;
1206 	else
1207 		dbam_map = ddr2_dbam;
1208 
1209 	return dbam_map[cs_mode];
1210 }
1211 
1212 static u64 f10_get_error_address(struct mem_ctl_info *mci,
1213 			struct err_regs *info)
1214 {
1215 	return (((u64) (info->nbeah & 0xffff)) << 32) +
1216 			(info->nbeal & ~0x01);
1217 }
1218 
1219 /*
1220  * Read the Base and Limit registers for F10 based Memory controllers. Extract
1221  * fields from the 'raw' reg into separate data fields.
1222  *
1223  * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN.
1224  */
1225 static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
1226 {
1227 	u32 high_offset, low_offset, high_base, low_base, high_limit, low_limit;
1228 
1229 	low_offset = K8_DRAM_BASE_LOW + (dram << 3);
1230 	high_offset = F10_DRAM_BASE_HIGH + (dram << 3);
1231 
1232 	/* read the 'raw' DRAM BASE Address register */
1233 	amd64_read_pci_cfg(pvt->F1, low_offset, &low_base);
1234 	amd64_read_pci_cfg(pvt->F1, high_offset, &high_base);
1235 
1236 	/* Extract parts into separate data entries */
1237 	pvt->dram_rw_en[dram] = (low_base & 0x3);
1238 
1239 	if (pvt->dram_rw_en[dram] == 0)
1240 		return;
1241 
1242 	pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
1243 
1244 	pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) |
1245 			       (((u64)low_base  & 0xFFFF0000) << 8);
1246 
1247 	low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
1248 	high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
1249 
1250 	/* read the 'raw' LIMIT registers */
1251 	amd64_read_pci_cfg(pvt->F1, low_offset, &low_limit);
1252 	amd64_read_pci_cfg(pvt->F1, high_offset, &high_limit);
1253 
1254 	pvt->dram_DstNode[dram] = (low_limit & 0x7);
1255 	pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7;
1256 
1257 	/*
1258 	 * Extract address values and form a LIMIT address. Limit is the HIGHEST
1259 	 * memory location of the region, so low 24 bits need to be all ones.
1260 	 */
1261 	pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) |
1262 				(((u64) low_limit & 0xFFFF0000) << 8) |
1263 				0x00FFFFFF;
1264 }
1265 
1266 static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
1267 {
1268 
1269 	if (!amd64_read_pci_cfg(pvt->F2, F10_DCTL_SEL_LOW,
1270 				&pvt->dram_ctl_select_low)) {
1271 		debugf0("F2x110 (DCTL Sel. Low): 0x%08x, "
1272 			"High range addresses at: 0x%x\n",
1273 			pvt->dram_ctl_select_low,
1274 			dct_sel_baseaddr(pvt));
1275 
1276 		debugf0("  DCT mode: %s, All DCTs on: %s\n",
1277 			(dct_ganging_enabled(pvt) ? "ganged" : "unganged"),
1278 			(dct_dram_enabled(pvt) ? "yes"   : "no"));
1279 
1280 		if (!dct_ganging_enabled(pvt))
1281 			debugf0("  Address range split per DCT: %s\n",
1282 				(dct_high_range_enabled(pvt) ? "yes" : "no"));
1283 
1284 		debugf0("  DCT data interleave for ECC: %s, "
1285 			"DRAM cleared since last warm reset: %s\n",
1286 			(dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1287 			(dct_memory_cleared(pvt) ? "yes" : "no"));
1288 
1289 		debugf0("  DCT channel interleave: %s, "
1290 			"DCT interleave bits selector: 0x%x\n",
1291 			(dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1292 			dct_sel_interleave_addr(pvt));
1293 	}
1294 
1295 	amd64_read_pci_cfg(pvt->F2, F10_DCTL_SEL_HIGH,
1296 			   &pvt->dram_ctl_select_high);
1297 }
1298 
1299 /*
1300  * determine channel based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1301  * Interleaving Modes.
1302  */
1303 static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1304 				int hi_range_sel, u32 intlv_en)
1305 {
1306 	u32 cs, temp, dct_sel_high = (pvt->dram_ctl_select_low >> 1) & 1;
1307 
1308 	if (dct_ganging_enabled(pvt))
1309 		cs = 0;
1310 	else if (hi_range_sel)
1311 		cs = dct_sel_high;
1312 	else if (dct_interleave_enabled(pvt)) {
1313 		/*
1314 		 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1315 		 */
1316 		if (dct_sel_interleave_addr(pvt) == 0)
1317 			cs = sys_addr >> 6 & 1;
1318 		else if ((dct_sel_interleave_addr(pvt) >> 1) & 1) {
1319 			temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
1320 
1321 			if (dct_sel_interleave_addr(pvt) & 1)
1322 				cs = (sys_addr >> 9 & 1) ^ temp;
1323 			else
1324 				cs = (sys_addr >> 6 & 1) ^ temp;
1325 		} else if (intlv_en & 4)
1326 			cs = sys_addr >> 15 & 1;
1327 		else if (intlv_en & 2)
1328 			cs = sys_addr >> 14 & 1;
1329 		else if (intlv_en & 1)
1330 			cs = sys_addr >> 13 & 1;
1331 		else
1332 			cs = sys_addr >> 12 & 1;
1333 	} else if (dct_high_range_enabled(pvt) && !dct_ganging_enabled(pvt))
1334 		cs = ~dct_sel_high & 1;
1335 	else
1336 		cs = 0;
1337 
1338 	return cs;
1339 }
1340 
1341 static inline u32 f10_map_intlv_en_to_shift(u32 intlv_en)
1342 {
1343 	if (intlv_en == 1)
1344 		return 1;
1345 	else if (intlv_en == 3)
1346 		return 2;
1347 	else if (intlv_en == 7)
1348 		return 3;
1349 
1350 	return 0;
1351 }
1352 
1353 /* See F10h BKDG, 2.8.10.2 DctSelBaseOffset Programming */
1354 static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
1355 						 u32 dct_sel_base_addr,
1356 						 u64 dct_sel_base_off,
1357 						 u32 hole_valid, u32 hole_off,
1358 						 u64 dram_base)
1359 {
1360 	u64 chan_off;
1361 
1362 	if (hi_range_sel) {
1363 		if (!(dct_sel_base_addr & 0xFFFF0000) &&
1364 		   hole_valid && (sys_addr >= 0x100000000ULL))
1365 			chan_off = hole_off << 16;
1366 		else
1367 			chan_off = dct_sel_base_off;
1368 	} else {
1369 		if (hole_valid && (sys_addr >= 0x100000000ULL))
1370 			chan_off = hole_off << 16;
1371 		else
1372 			chan_off = dram_base & 0xFFFFF8000000ULL;
1373 	}
1374 
1375 	return (sys_addr & 0x0000FFFFFFFFFFC0ULL) -
1376 			(chan_off & 0x0000FFFFFF800000ULL);
1377 }
1378 
1379 /* Hack for the time being - Can we get this from BIOS?? */
1380 #define	CH0SPARE_RANK	0
1381 #define	CH1SPARE_RANK	1
1382 
1383 /*
1384  * checks if the csrow passed in is marked as SPARED, if so returns the new
1385  * spare row
1386  */
1387 static inline int f10_process_possible_spare(int csrow,
1388 				u32 cs, struct amd64_pvt *pvt)
1389 {
1390 	u32 swap_done;
1391 	u32 bad_dram_cs;
1392 
1393 	/* Depending on channel, isolate respective SPARING info */
1394 	if (cs) {
1395 		swap_done = F10_ONLINE_SPARE_SWAPDONE1(pvt->online_spare);
1396 		bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS1(pvt->online_spare);
1397 		if (swap_done && (csrow == bad_dram_cs))
1398 			csrow = CH1SPARE_RANK;
1399 	} else {
1400 		swap_done = F10_ONLINE_SPARE_SWAPDONE0(pvt->online_spare);
1401 		bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS0(pvt->online_spare);
1402 		if (swap_done && (csrow == bad_dram_cs))
1403 			csrow = CH0SPARE_RANK;
1404 	}
1405 	return csrow;
1406 }
1407 
1408 /*
1409  * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1410  * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1411  *
1412  * Return:
1413  *	-EINVAL:  NOT FOUND
1414  *	0..csrow = Chip-Select Row
1415  */
1416 static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
1417 {
1418 	struct mem_ctl_info *mci;
1419 	struct amd64_pvt *pvt;
1420 	u32 cs_base, cs_mask;
1421 	int cs_found = -EINVAL;
1422 	int csrow;
1423 
1424 	mci = mcis[nid];
1425 	if (!mci)
1426 		return cs_found;
1427 
1428 	pvt = mci->pvt_info;
1429 
1430 	debugf1("InputAddr=0x%x  channelselect=%d\n", in_addr, cs);
1431 
1432 	for (csrow = 0; csrow < pvt->cs_count; csrow++) {
1433 
1434 		cs_base = amd64_get_dct_base(pvt, cs, csrow);
1435 		if (!(cs_base & K8_DCSB_CS_ENABLE))
1436 			continue;
1437 
1438 		/*
1439 		 * We have an ENABLED CSROW, Isolate just the MASK bits of the
1440 		 * target: [28:19] and [13:5], which map to [36:27] and [21:13]
1441 		 * of the actual address.
1442 		 */
1443 		cs_base &= REV_F_F1Xh_DCSB_BASE_BITS;
1444 
1445 		/*
1446 		 * Get the DCT Mask, and ENABLE the reserved bits: [18:16] and
1447 		 * [4:0] to become ON. Then mask off bits [28:0] ([36:8])
1448 		 */
1449 		cs_mask = amd64_get_dct_mask(pvt, cs, csrow);
1450 
1451 		debugf1("    CSROW=%d CSBase=0x%x RAW CSMask=0x%x\n",
1452 				csrow, cs_base, cs_mask);
1453 
1454 		cs_mask = (cs_mask | 0x0007C01F) & 0x1FFFFFFF;
1455 
1456 		debugf1("              Final CSMask=0x%x\n", cs_mask);
1457 		debugf1("    (InputAddr & ~CSMask)=0x%x "
1458 				"(CSBase & ~CSMask)=0x%x\n",
1459 				(in_addr & ~cs_mask), (cs_base & ~cs_mask));
1460 
1461 		if ((in_addr & ~cs_mask) == (cs_base & ~cs_mask)) {
1462 			cs_found = f10_process_possible_spare(csrow, cs, pvt);
1463 
1464 			debugf1(" MATCH csrow=%d\n", cs_found);
1465 			break;
1466 		}
1467 	}
1468 	return cs_found;
1469 }
1470 
1471 /* For a given @dram_range, check if @sys_addr falls within it. */
1472 static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
1473 				  u64 sys_addr, int *nid, int *chan_sel)
1474 {
1475 	int node_id, cs_found = -EINVAL, high_range = 0;
1476 	u32 intlv_en, intlv_sel, intlv_shift, hole_off;
1477 	u32 hole_valid, tmp, dct_sel_base, channel;
1478 	u64 dram_base, chan_addr, dct_sel_base_off;
1479 
1480 	dram_base = pvt->dram_base[dram_range];
1481 	intlv_en = pvt->dram_IntlvEn[dram_range];
1482 
1483 	node_id = pvt->dram_DstNode[dram_range];
1484 	intlv_sel = pvt->dram_IntlvSel[dram_range];
1485 
1486 	debugf1("(dram=%d) Base=0x%llx SystemAddr= 0x%llx Limit=0x%llx\n",
1487 		dram_range, dram_base, sys_addr, pvt->dram_limit[dram_range]);
1488 
1489 	/*
1490 	 * This assumes that one node's DHAR is the same as all the other
1491 	 * nodes' DHAR.
1492 	 */
1493 	hole_off = (pvt->dhar & 0x0000FF80);
1494 	hole_valid = (pvt->dhar & 0x1);
1495 	dct_sel_base_off = (pvt->dram_ctl_select_high & 0xFFFFFC00) << 16;
1496 
1497 	debugf1("   HoleOffset=0x%x  HoleValid=0x%x IntlvSel=0x%x\n",
1498 			hole_off, hole_valid, intlv_sel);
1499 
1500 	if (intlv_en &&
1501 	    (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1502 		return -EINVAL;
1503 
1504 	dct_sel_base = dct_sel_baseaddr(pvt);
1505 
1506 	/*
1507 	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1508 	 * select between DCT0 and DCT1.
1509 	 */
1510 	if (dct_high_range_enabled(pvt) &&
1511 	   !dct_ganging_enabled(pvt) &&
1512 	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1513 		high_range = 1;
1514 
1515 	channel = f10_determine_channel(pvt, sys_addr, high_range, intlv_en);
1516 
1517 	chan_addr = f10_get_base_addr_offset(sys_addr, high_range, dct_sel_base,
1518 					     dct_sel_base_off, hole_valid,
1519 					     hole_off, dram_base);
1520 
1521 	intlv_shift = f10_map_intlv_en_to_shift(intlv_en);
1522 
1523 	/* remove Node ID (in case of memory interleaving) */
1524 	tmp = chan_addr & 0xFC0;
1525 
1526 	chan_addr = ((chan_addr >> intlv_shift) & 0xFFFFFFFFF000ULL) | tmp;
1527 
1528 	/* remove channel interleave and hash */
1529 	if (dct_interleave_enabled(pvt) &&
1530 	   !dct_high_range_enabled(pvt) &&
1531 	   !dct_ganging_enabled(pvt)) {
1532 		if (dct_sel_interleave_addr(pvt) != 1)
1533 			chan_addr = (chan_addr >> 1) & 0xFFFFFFFFFFFFFFC0ULL;
1534 		else {
1535 			tmp = chan_addr & 0xFC0;
1536 			chan_addr = ((chan_addr & 0xFFFFFFFFFFFFC000ULL) >> 1)
1537 					| tmp;
1538 		}
1539 	}
1540 
1541 	debugf1("   (ChannelAddrLong=0x%llx) >> 8 becomes InputAddr=0x%x\n",
1542 		chan_addr, (u32)(chan_addr >> 8));
1543 
1544 	cs_found = f10_lookup_addr_in_dct(chan_addr >> 8, node_id, channel);
1545 
1546 	if (cs_found >= 0) {
1547 		*nid = node_id;
1548 		*chan_sel = channel;
1549 	}
1550 	return cs_found;
1551 }
1552 
1553 static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1554 				       int *node, int *chan_sel)
1555 {
1556 	int dram_range, cs_found = -EINVAL;
1557 	u64 dram_base, dram_limit;
1558 
1559 	for (dram_range = 0; dram_range < DRAM_REG_COUNT; dram_range++) {
1560 
1561 		if (!pvt->dram_rw_en[dram_range])
1562 			continue;
1563 
1564 		dram_base = pvt->dram_base[dram_range];
1565 		dram_limit = pvt->dram_limit[dram_range];
1566 
1567 		if ((dram_base <= sys_addr) && (sys_addr <= dram_limit)) {
1568 
1569 			cs_found = f10_match_to_this_node(pvt, dram_range,
1570 							  sys_addr, node,
1571 							  chan_sel);
1572 			if (cs_found >= 0)
1573 				break;
1574 		}
1575 	}
1576 	return cs_found;
1577 }
1578 
1579 /*
1580  * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
1581  * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1582  *
1583  * The @sys_addr is usually an error address received from the hardware
1584  * (MCX_ADDR).
1585  */
1586 static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1587 				     struct err_regs *err_info,
1588 				     u64 sys_addr)
1589 {
1590 	struct amd64_pvt *pvt = mci->pvt_info;
1591 	u32 page, offset;
1592 	int nid, csrow, chan = 0;
1593 	u16 syndrome;
1594 
1595 	csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
1596 
1597 	if (csrow < 0) {
1598 		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1599 		return;
1600 	}
1601 
1602 	error_address_to_page_and_offset(sys_addr, &page, &offset);
1603 
1604 	syndrome = extract_syndrome(err_info);
1605 
1606 	/*
1607 	 * We need the syndromes for channel detection only when we're
1608 	 * ganged. Otherwise @chan should already contain the channel at
1609 	 * this point.
1610 	 */
1611 	if (dct_ganging_enabled(pvt) && (pvt->nbcfg & K8_NBCFG_CHIPKILL))
1612 		chan = get_channel_from_ecc_syndrome(mci, syndrome);
1613 
1614 	if (chan >= 0)
1615 		edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
1616 				  EDAC_MOD_STR);
1617 	else
1618 		/*
1619 		 * Channel unknown, report all channels on this CSROW as failed.
1620 		 */
1621 		for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
1622 			edac_mc_handle_ce(mci, page, offset, syndrome,
1623 					  csrow, chan, EDAC_MOD_STR);
1624 }
1625 
1626 /*
1627  * debug routine to display the memory sizes of all logical DIMMs and its
1628  * CSROWs as well
1629  */
1630 static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt)
1631 {
1632 	int dimm, size0, size1, factor = 0;
1633 	u32 dbam;
1634 	u32 *dcsb;
1635 
1636 	if (boot_cpu_data.x86 == 0xf) {
1637 		if (pvt->dclr0 & F10_WIDTH_128)
1638 			factor = 1;
1639 
1640 		/* K8 families < revF not supported yet */
1641 	       if (pvt->ext_model < K8_REV_F)
1642 			return;
1643 	       else
1644 		       WARN_ON(ctrl != 0);
1645 	}
1646 
1647 	debugf1("F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
1648 		ctrl, ctrl ? pvt->dbam1 : pvt->dbam0);
1649 
1650 	dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
1651 	dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0;
1652 
1653 	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
1654 
1655 	/* Dump memory sizes for DIMM and its CSROWs */
1656 	for (dimm = 0; dimm < 4; dimm++) {
1657 
1658 		size0 = 0;
1659 		if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE)
1660 			size0 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam));
1661 
1662 		size1 = 0;
1663 		if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE)
1664 			size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam));
1665 
1666 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
1667 				dimm * 2,     size0 << factor,
1668 				dimm * 2 + 1, size1 << factor);
1669 	}
1670 }
1671 
1672 static struct amd64_family_type amd64_family_types[] = {
1673 	[K8_CPUS] = {
1674 		.ctl_name = "K8",
1675 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
1676 		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
1677 		.ops = {
1678 			.early_channel_count	= k8_early_channel_count,
1679 			.get_error_address	= k8_get_error_address,
1680 			.read_dram_base_limit	= k8_read_dram_base_limit,
1681 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
1682 			.dbam_to_cs		= k8_dbam_to_chip_select,
1683 		}
1684 	},
1685 	[F10_CPUS] = {
1686 		.ctl_name = "F10h",
1687 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
1688 		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
1689 		.ops = {
1690 			.early_channel_count	= f10_early_channel_count,
1691 			.get_error_address	= f10_get_error_address,
1692 			.read_dram_base_limit	= f10_read_dram_base_limit,
1693 			.read_dram_ctl_register	= f10_read_dram_ctl_register,
1694 			.map_sysaddr_to_csrow	= f10_map_sysaddr_to_csrow,
1695 			.dbam_to_cs		= f10_dbam_to_chip_select,
1696 		}
1697 	},
1698 };
1699 
1700 static struct pci_dev *pci_get_related_function(unsigned int vendor,
1701 						unsigned int device,
1702 						struct pci_dev *related)
1703 {
1704 	struct pci_dev *dev = NULL;
1705 
1706 	dev = pci_get_device(vendor, device, dev);
1707 	while (dev) {
1708 		if ((dev->bus->number == related->bus->number) &&
1709 		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1710 			break;
1711 		dev = pci_get_device(vendor, device, dev);
1712 	}
1713 
1714 	return dev;
1715 }
1716 
1717 /*
1718  * These are tables of eigenvectors (one per line) which can be used for the
1719  * construction of the syndrome tables. The modified syndrome search algorithm
1720  * uses those to find the symbol in error and thus the DIMM.
1721  *
1722  * Algorithm courtesy of Ross LaFetra from AMD.
1723  */
1724 static u16 x4_vectors[] = {
1725 	0x2f57, 0x1afe, 0x66cc, 0xdd88,
1726 	0x11eb, 0x3396, 0x7f4c, 0xeac8,
1727 	0x0001, 0x0002, 0x0004, 0x0008,
1728 	0x1013, 0x3032, 0x4044, 0x8088,
1729 	0x106b, 0x30d6, 0x70fc, 0xe0a8,
1730 	0x4857, 0xc4fe, 0x13cc, 0x3288,
1731 	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
1732 	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
1733 	0x15c1, 0x2a42, 0x89ac, 0x4758,
1734 	0x2b03, 0x1602, 0x4f0c, 0xca08,
1735 	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
1736 	0x8ba7, 0x465e, 0x244c, 0x1cc8,
1737 	0x2b87, 0x164e, 0x642c, 0xdc18,
1738 	0x40b9, 0x80de, 0x1094, 0x20e8,
1739 	0x27db, 0x1eb6, 0x9dac, 0x7b58,
1740 	0x11c1, 0x2242, 0x84ac, 0x4c58,
1741 	0x1be5, 0x2d7a, 0x5e34, 0xa718,
1742 	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
1743 	0x4c97, 0xc87e, 0x11fc, 0x33a8,
1744 	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
1745 	0x16b3, 0x3d62, 0x4f34, 0x8518,
1746 	0x1e2f, 0x391a, 0x5cac, 0xf858,
1747 	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
1748 	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
1749 	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
1750 	0x4397, 0xc27e, 0x17fc, 0x3ea8,
1751 	0x1617, 0x3d3e, 0x6464, 0xb8b8,
1752 	0x23ff, 0x12aa, 0xab6c, 0x56d8,
1753 	0x2dfb, 0x1ba6, 0x913c, 0x7328,
1754 	0x185d, 0x2ca6, 0x7914, 0x9e28,
1755 	0x171b, 0x3e36, 0x7d7c, 0xebe8,
1756 	0x4199, 0x82ee, 0x19f4, 0x2e58,
1757 	0x4807, 0xc40e, 0x130c, 0x3208,
1758 	0x1905, 0x2e0a, 0x5804, 0xac08,
1759 	0x213f, 0x132a, 0xadfc, 0x5ba8,
1760 	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
1761 };
1762 
1763 static u16 x8_vectors[] = {
1764 	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
1765 	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
1766 	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
1767 	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
1768 	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
1769 	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
1770 	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
1771 	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
1772 	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
1773 	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
1774 	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
1775 	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
1776 	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
1777 	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
1778 	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
1779 	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
1780 	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
1781 	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1782 	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
1783 };
1784 
1785 static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs,
1786 			   int v_dim)
1787 {
1788 	unsigned int i, err_sym;
1789 
1790 	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
1791 		u16 s = syndrome;
1792 		int v_idx =  err_sym * v_dim;
1793 		int v_end = (err_sym + 1) * v_dim;
1794 
1795 		/* walk over all 16 bits of the syndrome */
1796 		for (i = 1; i < (1U << 16); i <<= 1) {
1797 
1798 			/* if bit is set in that eigenvector... */
1799 			if (v_idx < v_end && vectors[v_idx] & i) {
1800 				u16 ev_comp = vectors[v_idx++];
1801 
1802 				/* ... and bit set in the modified syndrome, */
1803 				if (s & i) {
1804 					/* remove it. */
1805 					s ^= ev_comp;
1806 
1807 					if (!s)
1808 						return err_sym;
1809 				}
1810 
1811 			} else if (s & i)
1812 				/* can't get to zero, move to next symbol */
1813 				break;
1814 		}
1815 	}
1816 
1817 	debugf0("syndrome(%x) not found\n", syndrome);
1818 	return -1;
1819 }
1820 
1821 static int map_err_sym_to_channel(int err_sym, int sym_size)
1822 {
1823 	if (sym_size == 4)
1824 		switch (err_sym) {
1825 		case 0x20:
1826 		case 0x21:
1827 			return 0;
1828 			break;
1829 		case 0x22:
1830 		case 0x23:
1831 			return 1;
1832 			break;
1833 		default:
1834 			return err_sym >> 4;
1835 			break;
1836 		}
1837 	/* x8 symbols */
1838 	else
1839 		switch (err_sym) {
1840 		/* imaginary bits not in a DIMM */
1841 		case 0x10:
1842 			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
1843 					  err_sym);
1844 			return -1;
1845 			break;
1846 
1847 		case 0x11:
1848 			return 0;
1849 			break;
1850 		case 0x12:
1851 			return 1;
1852 			break;
1853 		default:
1854 			return err_sym >> 3;
1855 			break;
1856 		}
1857 	return -1;
1858 }
1859 
1860 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
1861 {
1862 	struct amd64_pvt *pvt = mci->pvt_info;
1863 	int err_sym = -1;
1864 
1865 	if (pvt->syn_type == 8)
1866 		err_sym = decode_syndrome(syndrome, x8_vectors,
1867 					  ARRAY_SIZE(x8_vectors),
1868 					  pvt->syn_type);
1869 	else if (pvt->syn_type == 4)
1870 		err_sym = decode_syndrome(syndrome, x4_vectors,
1871 					  ARRAY_SIZE(x4_vectors),
1872 					  pvt->syn_type);
1873 	else {
1874 		amd64_warn("Illegal syndrome type: %u\n", pvt->syn_type);
1875 		return err_sym;
1876 	}
1877 
1878 	return map_err_sym_to_channel(err_sym, pvt->syn_type);
1879 }
1880 
1881 /*
1882  * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
1883  * ADDRESS and process.
1884  */
1885 static void amd64_handle_ce(struct mem_ctl_info *mci,
1886 			    struct err_regs *info)
1887 {
1888 	struct amd64_pvt *pvt = mci->pvt_info;
1889 	u64 sys_addr;
1890 
1891 	/* Ensure that the Error Address is VALID */
1892 	if (!(info->nbsh & K8_NBSH_VALID_ERROR_ADDR)) {
1893 		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1894 		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1895 		return;
1896 	}
1897 
1898 	sys_addr = pvt->ops->get_error_address(mci, info);
1899 
1900 	amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
1901 
1902 	pvt->ops->map_sysaddr_to_csrow(mci, info, sys_addr);
1903 }
1904 
1905 /* Handle any Un-correctable Errors (UEs) */
1906 static void amd64_handle_ue(struct mem_ctl_info *mci,
1907 			    struct err_regs *info)
1908 {
1909 	struct amd64_pvt *pvt = mci->pvt_info;
1910 	struct mem_ctl_info *log_mci, *src_mci = NULL;
1911 	int csrow;
1912 	u64 sys_addr;
1913 	u32 page, offset;
1914 
1915 	log_mci = mci;
1916 
1917 	if (!(info->nbsh & K8_NBSH_VALID_ERROR_ADDR)) {
1918 		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1919 		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
1920 		return;
1921 	}
1922 
1923 	sys_addr = pvt->ops->get_error_address(mci, info);
1924 
1925 	/*
1926 	 * Find out which node the error address belongs to. This may be
1927 	 * different from the node that detected the error.
1928 	 */
1929 	src_mci = find_mc_by_sys_addr(mci, sys_addr);
1930 	if (!src_mci) {
1931 		amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
1932 				  (unsigned long)sys_addr);
1933 		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
1934 		return;
1935 	}
1936 
1937 	log_mci = src_mci;
1938 
1939 	csrow = sys_addr_to_csrow(log_mci, sys_addr);
1940 	if (csrow < 0) {
1941 		amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
1942 				  (unsigned long)sys_addr);
1943 		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
1944 	} else {
1945 		error_address_to_page_and_offset(sys_addr, &page, &offset);
1946 		edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
1947 	}
1948 }
1949 
1950 static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1951 					    struct err_regs *info)
1952 {
1953 	u16 ec = EC(info->nbsl);
1954 	u8 xec = XEC(info->nbsl, 0x1f);
1955 	int ecc_type = (info->nbsh >> 13) & 0x3;
1956 
1957 	/* Bail early out if this was an 'observed' error */
1958 	if (PP(ec) == K8_NBSL_PP_OBS)
1959 		return;
1960 
1961 	/* Do only ECC errors */
1962 	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
1963 		return;
1964 
1965 	if (ecc_type == 2)
1966 		amd64_handle_ce(mci, info);
1967 	else if (ecc_type == 1)
1968 		amd64_handle_ue(mci, info);
1969 }
1970 
1971 void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
1972 {
1973 	struct mem_ctl_info *mci = mcis[node_id];
1974 	struct err_regs regs;
1975 
1976 	regs.nbsl  = (u32) m->status;
1977 	regs.nbsh  = (u32)(m->status >> 32);
1978 	regs.nbeal = (u32) m->addr;
1979 	regs.nbeah = (u32)(m->addr >> 32);
1980 	regs.nbcfg = nbcfg;
1981 
1982 	__amd64_decode_bus_error(mci, &regs);
1983 
1984 	/*
1985 	 * Check the UE bit of the NB status high register, if set generate some
1986 	 * logs. If NOT a GART error, then process the event as a NO-INFO event.
1987 	 * If it was a GART error, skip that process.
1988 	 *
1989 	 * FIXME: this should go somewhere else, if at all.
1990 	 */
1991 	if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
1992 		edac_mc_handle_ue_no_info(mci, "UE bit is set");
1993 
1994 }
1995 
1996 /*
1997  * Use pvt->F2 which contains the F2 CPU PCI device to get the related
1998  * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
1999  */
2000 static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
2001 {
2002 	/* Reserve the ADDRESS MAP Device */
2003 	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
2004 	if (!pvt->F1) {
2005 		amd64_err("error address map device not found: "
2006 			  "vendor %x device 0x%x (broken BIOS?)\n",
2007 			  PCI_VENDOR_ID_AMD, f1_id);
2008 		return -ENODEV;
2009 	}
2010 
2011 	/* Reserve the MISC Device */
2012 	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
2013 	if (!pvt->F3) {
2014 		pci_dev_put(pvt->F1);
2015 		pvt->F1 = NULL;
2016 
2017 		amd64_err("error F3 device not found: "
2018 			  "vendor %x device 0x%x (broken BIOS?)\n",
2019 			  PCI_VENDOR_ID_AMD, f3_id);
2020 
2021 		return -ENODEV;
2022 	}
2023 	debugf1("F1: %s\n", pci_name(pvt->F1));
2024 	debugf1("F2: %s\n", pci_name(pvt->F2));
2025 	debugf1("F3: %s\n", pci_name(pvt->F3));
2026 
2027 	return 0;
2028 }
2029 
2030 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2031 {
2032 	pci_dev_put(pvt->F1);
2033 	pci_dev_put(pvt->F3);
2034 }
2035 
2036 /*
2037  * Retrieve the hardware registers of the memory controller (this includes the
2038  * 'Address Map' and 'Misc' device regs)
2039  */
2040 static void read_mc_regs(struct amd64_pvt *pvt)
2041 {
2042 	u64 msr_val;
2043 	u32 tmp;
2044 	int dram;
2045 
2046 	/*
2047 	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2048 	 * those are Read-As-Zero
2049 	 */
2050 	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2051 	debugf0("  TOP_MEM:  0x%016llx\n", pvt->top_mem);
2052 
2053 	/* check first whether TOP_MEM2 is enabled */
2054 	rdmsrl(MSR_K8_SYSCFG, msr_val);
2055 	if (msr_val & (1U << 21)) {
2056 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2057 		debugf0("  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2058 	} else
2059 		debugf0("  TOP_MEM2 disabled.\n");
2060 
2061 	amd64_read_pci_cfg(pvt->F3, K8_NBCAP, &pvt->nbcap);
2062 
2063 	if (pvt->ops->read_dram_ctl_register)
2064 		pvt->ops->read_dram_ctl_register(pvt);
2065 
2066 	for (dram = 0; dram < DRAM_REG_COUNT; dram++) {
2067 		/*
2068 		 * Call CPU specific READ function to get the DRAM Base and
2069 		 * Limit values from the DCT.
2070 		 */
2071 		pvt->ops->read_dram_base_limit(pvt, dram);
2072 
2073 		/*
2074 		 * Only print out debug info on rows with both R and W Enabled.
2075 		 * Normal processing, compiler should optimize this whole 'if'
2076 		 * debug output block away.
2077 		 */
2078 		if (pvt->dram_rw_en[dram] != 0) {
2079 			debugf1("  DRAM-BASE[%d]: 0x%016llx "
2080 				"DRAM-LIMIT:  0x%016llx\n",
2081 				dram,
2082 				pvt->dram_base[dram],
2083 				pvt->dram_limit[dram]);
2084 
2085 			debugf1("        IntlvEn=%s %s %s "
2086 				"IntlvSel=%d DstNode=%d\n",
2087 				pvt->dram_IntlvEn[dram] ?
2088 					"Enabled" : "Disabled",
2089 				(pvt->dram_rw_en[dram] & 0x2) ? "W" : "!W",
2090 				(pvt->dram_rw_en[dram] & 0x1) ? "R" : "!R",
2091 				pvt->dram_IntlvSel[dram],
2092 				pvt->dram_DstNode[dram]);
2093 		}
2094 	}
2095 
2096 	amd64_read_dct_base_mask(pvt);
2097 
2098 	amd64_read_pci_cfg(pvt->F1, K8_DHAR, &pvt->dhar);
2099 	amd64_read_dbam_reg(pvt);
2100 
2101 	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2102 
2103 	amd64_read_pci_cfg(pvt->F2, F10_DCLR_0, &pvt->dclr0);
2104 	amd64_read_pci_cfg(pvt->F2, F10_DCHR_0, &pvt->dchr0);
2105 
2106 	if (boot_cpu_data.x86 >= 0x10) {
2107 		if (!dct_ganging_enabled(pvt)) {
2108 			amd64_read_pci_cfg(pvt->F2, F10_DCLR_1, &pvt->dclr1);
2109 			amd64_read_pci_cfg(pvt->F2, F10_DCHR_1, &pvt->dchr1);
2110 		}
2111 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2112 	}
2113 
2114 	if (boot_cpu_data.x86 == 0x10 &&
2115 	    boot_cpu_data.x86_model > 7 &&
2116 	    /* F3x180[EccSymbolSize]=1 => x8 symbols */
2117 	    tmp & BIT(25))
2118 		pvt->syn_type = 8;
2119 	else
2120 		pvt->syn_type = 4;
2121 
2122 	amd64_dump_misc_regs(pvt);
2123 }
2124 
2125 /*
2126  * NOTE: CPU Revision Dependent code
2127  *
2128  * Input:
2129  *	@csrow_nr ChipSelect Row Number (0..pvt->cs_count-1)
2130  *	k8 private pointer to -->
2131  *			DRAM Bank Address mapping register
2132  *			node_id
2133  *			DCL register where dual_channel_active is
2134  *
2135  * The DBAM register consists of 4 sets of 4 bits each definitions:
2136  *
2137  * Bits:	CSROWs
2138  * 0-3		CSROWs 0 and 1
2139  * 4-7		CSROWs 2 and 3
2140  * 8-11		CSROWs 4 and 5
2141  * 12-15	CSROWs 6 and 7
2142  *
2143  * Values range from: 0 to 15
2144  * The meaning of the values depends on CPU revision and dual-channel state,
2145  * see relevant BKDG more info.
2146  *
2147  * The memory controller provides for total of only 8 CSROWs in its current
2148  * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2149  * single channel or two (2) DIMMs in dual channel mode.
2150  *
2151  * The following code logic collapses the various tables for CSROW based on CPU
2152  * revision.
2153  *
2154  * Returns:
2155  *	The number of PAGE_SIZE pages on the specified CSROW number it
2156  *	encompasses
2157  *
2158  */
2159 static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
2160 {
2161 	u32 cs_mode, nr_pages;
2162 
2163 	/*
2164 	 * The math on this doesn't look right on the surface because x/2*4 can
2165 	 * be simplified to x*2 but this expression makes use of the fact that
2166 	 * it is integral math where 1/2=0. This intermediate value becomes the
2167 	 * number of bits to shift the DBAM register to extract the proper CSROW
2168 	 * field.
2169 	 */
2170 	cs_mode = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF;
2171 
2172 	nr_pages = pvt->ops->dbam_to_cs(pvt, cs_mode) << (20 - PAGE_SHIFT);
2173 
2174 	/*
2175 	 * If dual channel then double the memory size of single channel.
2176 	 * Channel count is 1 or 2
2177 	 */
2178 	nr_pages <<= (pvt->channel_count - 1);
2179 
2180 	debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
2181 	debugf0("    nr_pages= %u  channel-count = %d\n",
2182 		nr_pages, pvt->channel_count);
2183 
2184 	return nr_pages;
2185 }
2186 
2187 /*
2188  * Initialize the array of csrow attribute instances, based on the values
2189  * from pci config hardware registers.
2190  */
2191 static int init_csrows(struct mem_ctl_info *mci)
2192 {
2193 	struct csrow_info *csrow;
2194 	struct amd64_pvt *pvt = mci->pvt_info;
2195 	u64 input_addr_min, input_addr_max, sys_addr;
2196 	u32 val;
2197 	int i, empty = 1;
2198 
2199 	amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val);
2200 
2201 	pvt->nbcfg = val;
2202 	pvt->ctl_error_info.nbcfg = val;
2203 
2204 	debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2205 		pvt->mc_node_id, val,
2206 		!!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE));
2207 
2208 	for (i = 0; i < pvt->cs_count; i++) {
2209 		csrow = &mci->csrows[i];
2210 
2211 		if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
2212 			debugf1("----CSROW %d EMPTY for node %d\n", i,
2213 				pvt->mc_node_id);
2214 			continue;
2215 		}
2216 
2217 		debugf1("----CSROW %d VALID for MC node %d\n",
2218 			i, pvt->mc_node_id);
2219 
2220 		empty = 0;
2221 		csrow->nr_pages = amd64_csrow_nr_pages(i, pvt);
2222 		find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
2223 		sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
2224 		csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
2225 		sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
2226 		csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
2227 		csrow->page_mask = ~mask_from_dct_mask(pvt, i);
2228 		/* 8 bytes of resolution */
2229 
2230 		csrow->mtype = amd64_determine_memory_type(pvt, i);
2231 
2232 		debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
2233 		debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
2234 			(unsigned long)input_addr_min,
2235 			(unsigned long)input_addr_max);
2236 		debugf1("    sys_addr: 0x%lx  page_mask: 0x%lx\n",
2237 			(unsigned long)sys_addr, csrow->page_mask);
2238 		debugf1("    nr_pages: %u  first_page: 0x%lx "
2239 			"last_page: 0x%lx\n",
2240 			(unsigned)csrow->nr_pages,
2241 			csrow->first_page, csrow->last_page);
2242 
2243 		/*
2244 		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
2245 		 */
2246 		if (pvt->nbcfg & K8_NBCFG_ECC_ENABLE)
2247 			csrow->edac_mode =
2248 			    (pvt->nbcfg & K8_NBCFG_CHIPKILL) ?
2249 			    EDAC_S4ECD4ED : EDAC_SECDED;
2250 		else
2251 			csrow->edac_mode = EDAC_NONE;
2252 	}
2253 
2254 	return empty;
2255 }
2256 
2257 /* get all cores on this DCT */
2258 static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid)
2259 {
2260 	int cpu;
2261 
2262 	for_each_online_cpu(cpu)
2263 		if (amd_get_nb_id(cpu) == nid)
2264 			cpumask_set_cpu(cpu, mask);
2265 }
2266 
2267 /* check MCG_CTL on all the cpus on this node */
2268 static bool amd64_nb_mce_bank_enabled_on_node(int nid)
2269 {
2270 	cpumask_var_t mask;
2271 	int cpu, nbe;
2272 	bool ret = false;
2273 
2274 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2275 		amd64_warn("%s: Error allocating mask\n", __func__);
2276 		return false;
2277 	}
2278 
2279 	get_cpus_on_this_dct_cpumask(mask, nid);
2280 
2281 	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2282 
2283 	for_each_cpu(cpu, mask) {
2284 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2285 		nbe = reg->l & K8_MSR_MCGCTL_NBE;
2286 
2287 		debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2288 			cpu, reg->q,
2289 			(nbe ? "enabled" : "disabled"));
2290 
2291 		if (!nbe)
2292 			goto out;
2293 	}
2294 	ret = true;
2295 
2296 out:
2297 	free_cpumask_var(mask);
2298 	return ret;
2299 }
2300 
2301 static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
2302 {
2303 	cpumask_var_t cmask;
2304 	int cpu;
2305 
2306 	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2307 		amd64_warn("%s: error allocating mask\n", __func__);
2308 		return false;
2309 	}
2310 
2311 	get_cpus_on_this_dct_cpumask(cmask, nid);
2312 
2313 	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2314 
2315 	for_each_cpu(cpu, cmask) {
2316 
2317 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2318 
2319 		if (on) {
2320 			if (reg->l & K8_MSR_MCGCTL_NBE)
2321 				s->flags.nb_mce_enable = 1;
2322 
2323 			reg->l |= K8_MSR_MCGCTL_NBE;
2324 		} else {
2325 			/*
2326 			 * Turn off NB MCE reporting only when it was off before
2327 			 */
2328 			if (!s->flags.nb_mce_enable)
2329 				reg->l &= ~K8_MSR_MCGCTL_NBE;
2330 		}
2331 	}
2332 	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2333 
2334 	free_cpumask_var(cmask);
2335 
2336 	return 0;
2337 }
2338 
2339 static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2340 				       struct pci_dev *F3)
2341 {
2342 	bool ret = true;
2343 	u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2344 
2345 	if (toggle_ecc_err_reporting(s, nid, ON)) {
2346 		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2347 		return false;
2348 	}
2349 
2350 	amd64_read_pci_cfg(F3, K8_NBCTL, &value);
2351 
2352 	/* turn on UECCEn and CECCEn bits */
2353 	s->old_nbctl   = value & mask;
2354 	s->nbctl_valid = true;
2355 
2356 	value |= mask;
2357 	pci_write_config_dword(F3, K8_NBCTL, value);
2358 
2359 	amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2360 
2361 	debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2362 		nid, value,
2363 		!!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
2364 
2365 	if (!(value & K8_NBCFG_ECC_ENABLE)) {
2366 		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2367 
2368 		s->flags.nb_ecc_prev = 0;
2369 
2370 		/* Attempt to turn on DRAM ECC Enable */
2371 		value |= K8_NBCFG_ECC_ENABLE;
2372 		pci_write_config_dword(F3, K8_NBCFG, value);
2373 
2374 		amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2375 
2376 		if (!(value & K8_NBCFG_ECC_ENABLE)) {
2377 			amd64_warn("Hardware rejected DRAM ECC enable,"
2378 				   "check memory DIMM configuration.\n");
2379 			ret = false;
2380 		} else {
2381 			amd64_info("Hardware accepted DRAM ECC Enable\n");
2382 		}
2383 	} else {
2384 		s->flags.nb_ecc_prev = 1;
2385 	}
2386 
2387 	debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2388 		nid, value,
2389 		!!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
2390 
2391 	return ret;
2392 }
2393 
2394 static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2395 					struct pci_dev *F3)
2396 {
2397 	u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2398 
2399 	if (!s->nbctl_valid)
2400 		return;
2401 
2402 	amd64_read_pci_cfg(F3, K8_NBCTL, &value);
2403 	value &= ~mask;
2404 	value |= s->old_nbctl;
2405 
2406 	pci_write_config_dword(F3, K8_NBCTL, value);
2407 
2408 	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
2409 	if (!s->flags.nb_ecc_prev) {
2410 		amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2411 		value &= ~K8_NBCFG_ECC_ENABLE;
2412 		pci_write_config_dword(F3, K8_NBCFG, value);
2413 	}
2414 
2415 	/* restore the NB Enable MCGCTL bit */
2416 	if (toggle_ecc_err_reporting(s, nid, OFF))
2417 		amd64_warn("Error restoring NB MCGCTL settings!\n");
2418 }
2419 
2420 /*
2421  * EDAC requires that the BIOS have ECC enabled before
2422  * taking over the processing of ECC errors. A command line
2423  * option allows to force-enable hardware ECC later in
2424  * enable_ecc_error_reporting().
2425  */
2426 static const char *ecc_msg =
2427 	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
2428 	" Either enable ECC checking or force module loading by setting "
2429 	"'ecc_enable_override'.\n"
2430 	" (Note that use of the override may cause unknown side effects.)\n";
2431 
2432 static bool ecc_enabled(struct pci_dev *F3, u8 nid)
2433 {
2434 	u32 value;
2435 	u8 ecc_en = 0;
2436 	bool nb_mce_en = false;
2437 
2438 	amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2439 
2440 	ecc_en = !!(value & K8_NBCFG_ECC_ENABLE);
2441 	amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2442 
2443 	nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
2444 	if (!nb_mce_en)
2445 		amd64_notice("NB MCE bank disabled, set MSR "
2446 			     "0x%08x[4] on node %d to enable.\n",
2447 			     MSR_IA32_MCG_CTL, nid);
2448 
2449 	if (!ecc_en || !nb_mce_en) {
2450 		amd64_notice("%s", ecc_msg);
2451 		return false;
2452 	}
2453 	return true;
2454 }
2455 
2456 struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
2457 					  ARRAY_SIZE(amd64_inj_attrs) +
2458 					  1];
2459 
2460 struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
2461 
2462 static void set_mc_sysfs_attrs(struct mem_ctl_info *mci)
2463 {
2464 	unsigned int i = 0, j = 0;
2465 
2466 	for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
2467 		sysfs_attrs[i] = amd64_dbg_attrs[i];
2468 
2469 	if (boot_cpu_data.x86 >= 0x10)
2470 		for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
2471 			sysfs_attrs[i] = amd64_inj_attrs[j];
2472 
2473 	sysfs_attrs[i] = terminator;
2474 
2475 	mci->mc_driver_sysfs_attributes = sysfs_attrs;
2476 }
2477 
2478 static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
2479 {
2480 	struct amd64_pvt *pvt = mci->pvt_info;
2481 
2482 	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2483 	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
2484 
2485 	if (pvt->nbcap & K8_NBCAP_SECDED)
2486 		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2487 
2488 	if (pvt->nbcap & K8_NBCAP_CHIPKILL)
2489 		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
2490 
2491 	mci->edac_cap		= amd64_determine_edac_cap(pvt);
2492 	mci->mod_name		= EDAC_MOD_STR;
2493 	mci->mod_ver		= EDAC_AMD64_VERSION;
2494 	mci->ctl_name		= pvt->ctl_name;
2495 	mci->dev_name		= pci_name(pvt->F2);
2496 	mci->ctl_page_to_phys	= NULL;
2497 
2498 	/* memory scrubber interface */
2499 	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
2500 	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
2501 }
2502 
2503 /*
2504  * returns a pointer to the family descriptor on success, NULL otherwise.
2505  */
2506 static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
2507 {
2508 	u8 fam = boot_cpu_data.x86;
2509 	struct amd64_family_type *fam_type = NULL;
2510 
2511 	switch (fam) {
2512 	case 0xf:
2513 		fam_type		= &amd64_family_types[K8_CPUS];
2514 		pvt->ops		= &amd64_family_types[K8_CPUS].ops;
2515 		pvt->ctl_name		= fam_type->ctl_name;
2516 		pvt->min_scrubrate	= K8_MIN_SCRUB_RATE_BITS;
2517 		break;
2518 	case 0x10:
2519 		fam_type		= &amd64_family_types[F10_CPUS];
2520 		pvt->ops		= &amd64_family_types[F10_CPUS].ops;
2521 		pvt->ctl_name		= fam_type->ctl_name;
2522 		pvt->min_scrubrate	= F10_MIN_SCRUB_RATE_BITS;
2523 		break;
2524 
2525 	default:
2526 		amd64_err("Unsupported family!\n");
2527 		return NULL;
2528 	}
2529 
2530 	pvt->ext_model = boot_cpu_data.x86_model >> 4;
2531 
2532 	amd64_info("%s %sdetected (node %d).\n", pvt->ctl_name,
2533 		     (fam == 0xf ?
2534 				(pvt->ext_model >= K8_REV_F  ? "revF or later "
2535 							     : "revE or earlier ")
2536 				 : ""), pvt->mc_node_id);
2537 	return fam_type;
2538 }
2539 
2540 static int amd64_init_one_instance(struct pci_dev *F2)
2541 {
2542 	struct amd64_pvt *pvt = NULL;
2543 	struct amd64_family_type *fam_type = NULL;
2544 	struct mem_ctl_info *mci = NULL;
2545 	int err = 0, ret;
2546 	u8 nid = get_node_id(F2);
2547 
2548 	ret = -ENOMEM;
2549 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
2550 	if (!pvt)
2551 		goto err_ret;
2552 
2553 	pvt->mc_node_id	= nid;
2554 	pvt->F2 = F2;
2555 
2556 	ret = -EINVAL;
2557 	fam_type = amd64_per_family_init(pvt);
2558 	if (!fam_type)
2559 		goto err_free;
2560 
2561 	ret = -ENODEV;
2562 	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
2563 	if (err)
2564 		goto err_free;
2565 
2566 	read_mc_regs(pvt);
2567 
2568 	/*
2569 	 * We need to determine how many memory channels there are. Then use
2570 	 * that information for calculating the size of the dynamic instance
2571 	 * tables in the 'mci' structure.
2572 	 */
2573 	ret = -EINVAL;
2574 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
2575 	if (pvt->channel_count < 0)
2576 		goto err_siblings;
2577 
2578 	ret = -ENOMEM;
2579 	mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, nid);
2580 	if (!mci)
2581 		goto err_siblings;
2582 
2583 	mci->pvt_info = pvt;
2584 	mci->dev = &pvt->F2->dev;
2585 
2586 	setup_mci_misc_attrs(mci);
2587 
2588 	if (init_csrows(mci))
2589 		mci->edac_cap = EDAC_FLAG_NONE;
2590 
2591 	set_mc_sysfs_attrs(mci);
2592 
2593 	ret = -ENODEV;
2594 	if (edac_mc_add_mc(mci)) {
2595 		debugf1("failed edac_mc_add_mc()\n");
2596 		goto err_add_mc;
2597 	}
2598 
2599 	/* register stuff with EDAC MCE */
2600 	if (report_gart_errors)
2601 		amd_report_gart_errors(true);
2602 
2603 	amd_register_ecc_decoder(amd64_decode_bus_error);
2604 
2605 	mcis[nid] = mci;
2606 
2607 	atomic_inc(&drv_instances);
2608 
2609 	return 0;
2610 
2611 err_add_mc:
2612 	edac_mc_free(mci);
2613 
2614 err_siblings:
2615 	free_mc_sibling_devs(pvt);
2616 
2617 err_free:
2618 	kfree(pvt);
2619 
2620 err_ret:
2621 	return ret;
2622 }
2623 
2624 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
2625 					     const struct pci_device_id *mc_type)
2626 {
2627 	u8 nid = get_node_id(pdev);
2628 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2629 	struct ecc_settings *s;
2630 	int ret = 0;
2631 
2632 	ret = pci_enable_device(pdev);
2633 	if (ret < 0) {
2634 		debugf0("ret=%d\n", ret);
2635 		return -EIO;
2636 	}
2637 
2638 	ret = -ENOMEM;
2639 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
2640 	if (!s)
2641 		goto err_out;
2642 
2643 	ecc_stngs[nid] = s;
2644 
2645 	if (!ecc_enabled(F3, nid)) {
2646 		ret = -ENODEV;
2647 
2648 		if (!ecc_enable_override)
2649 			goto err_enable;
2650 
2651 		amd64_warn("Forcing ECC on!\n");
2652 
2653 		if (!enable_ecc_error_reporting(s, nid, F3))
2654 			goto err_enable;
2655 	}
2656 
2657 	ret = amd64_init_one_instance(pdev);
2658 	if (ret < 0) {
2659 		amd64_err("Error probing instance: %d\n", nid);
2660 		restore_ecc_error_reporting(s, nid, F3);
2661 	}
2662 
2663 	return ret;
2664 
2665 err_enable:
2666 	kfree(s);
2667 	ecc_stngs[nid] = NULL;
2668 
2669 err_out:
2670 	return ret;
2671 }
2672 
2673 static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
2674 {
2675 	struct mem_ctl_info *mci;
2676 	struct amd64_pvt *pvt;
2677 	u8 nid = get_node_id(pdev);
2678 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2679 	struct ecc_settings *s = ecc_stngs[nid];
2680 
2681 	/* Remove from EDAC CORE tracking list */
2682 	mci = edac_mc_del_mc(&pdev->dev);
2683 	if (!mci)
2684 		return;
2685 
2686 	pvt = mci->pvt_info;
2687 
2688 	restore_ecc_error_reporting(s, nid, F3);
2689 
2690 	free_mc_sibling_devs(pvt);
2691 
2692 	/* unregister from EDAC MCE */
2693 	amd_report_gart_errors(false);
2694 	amd_unregister_ecc_decoder(amd64_decode_bus_error);
2695 
2696 	kfree(ecc_stngs[nid]);
2697 	ecc_stngs[nid] = NULL;
2698 
2699 	/* Free the EDAC CORE resources */
2700 	mci->pvt_info = NULL;
2701 	mcis[nid] = NULL;
2702 
2703 	kfree(pvt);
2704 	edac_mc_free(mci);
2705 }
2706 
2707 /*
2708  * This table is part of the interface for loading drivers for PCI devices. The
2709  * PCI core identifies what devices are on a system during boot, and then
2710  * inquiry this table to see if this driver is for a given device found.
2711  */
2712 static const struct pci_device_id amd64_pci_table[] __devinitdata = {
2713 	{
2714 		.vendor		= PCI_VENDOR_ID_AMD,
2715 		.device		= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
2716 		.subvendor	= PCI_ANY_ID,
2717 		.subdevice	= PCI_ANY_ID,
2718 		.class		= 0,
2719 		.class_mask	= 0,
2720 	},
2721 	{
2722 		.vendor		= PCI_VENDOR_ID_AMD,
2723 		.device		= PCI_DEVICE_ID_AMD_10H_NB_DRAM,
2724 		.subvendor	= PCI_ANY_ID,
2725 		.subdevice	= PCI_ANY_ID,
2726 		.class		= 0,
2727 		.class_mask	= 0,
2728 	},
2729 	{0, }
2730 };
2731 MODULE_DEVICE_TABLE(pci, amd64_pci_table);
2732 
2733 static struct pci_driver amd64_pci_driver = {
2734 	.name		= EDAC_MOD_STR,
2735 	.probe		= amd64_probe_one_instance,
2736 	.remove		= __devexit_p(amd64_remove_one_instance),
2737 	.id_table	= amd64_pci_table,
2738 };
2739 
2740 static void setup_pci_device(void)
2741 {
2742 	struct mem_ctl_info *mci;
2743 	struct amd64_pvt *pvt;
2744 
2745 	if (amd64_ctl_pci)
2746 		return;
2747 
2748 	mci = mcis[0];
2749 	if (mci) {
2750 
2751 		pvt = mci->pvt_info;
2752 		amd64_ctl_pci =
2753 			edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2754 
2755 		if (!amd64_ctl_pci) {
2756 			pr_warning("%s(): Unable to create PCI control\n",
2757 				   __func__);
2758 
2759 			pr_warning("%s(): PCI error report via EDAC not set\n",
2760 				   __func__);
2761 			}
2762 	}
2763 }
2764 
2765 static int __init amd64_edac_init(void)
2766 {
2767 	int err = -ENODEV;
2768 
2769 	edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
2770 
2771 	opstate_init();
2772 
2773 	if (amd_cache_northbridges() < 0)
2774 		goto err_ret;
2775 
2776 	err = -ENOMEM;
2777 	mcis	  = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
2778 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
2779 	if (!(mcis && ecc_stngs))
2780 		goto err_ret;
2781 
2782 	msrs = msrs_alloc();
2783 	if (!msrs)
2784 		goto err_free;
2785 
2786 	err = pci_register_driver(&amd64_pci_driver);
2787 	if (err)
2788 		goto err_pci;
2789 
2790 	err = -ENODEV;
2791 	if (!atomic_read(&drv_instances))
2792 		goto err_no_instances;
2793 
2794 	setup_pci_device();
2795 	return 0;
2796 
2797 err_no_instances:
2798 	pci_unregister_driver(&amd64_pci_driver);
2799 
2800 err_pci:
2801 	msrs_free(msrs);
2802 	msrs = NULL;
2803 
2804 err_free:
2805 	kfree(mcis);
2806 	mcis = NULL;
2807 
2808 	kfree(ecc_stngs);
2809 	ecc_stngs = NULL;
2810 
2811 err_ret:
2812 	return err;
2813 }
2814 
2815 static void __exit amd64_edac_exit(void)
2816 {
2817 	if (amd64_ctl_pci)
2818 		edac_pci_release_generic_ctl(amd64_ctl_pci);
2819 
2820 	pci_unregister_driver(&amd64_pci_driver);
2821 
2822 	kfree(ecc_stngs);
2823 	ecc_stngs = NULL;
2824 
2825 	kfree(mcis);
2826 	mcis = NULL;
2827 
2828 	msrs_free(msrs);
2829 	msrs = NULL;
2830 }
2831 
2832 module_init(amd64_edac_init);
2833 module_exit(amd64_edac_exit);
2834 
2835 MODULE_LICENSE("GPL");
2836 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
2837 		"Dave Peterson, Thayne Harbaugh");
2838 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
2839 		EDAC_AMD64_VERSION);
2840 
2841 module_param(edac_op_state, int, 0444);
2842 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2843