xref: /linux/drivers/edac/amd64_edac.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 #include "amd64_edac.h"
2 #include <asm/amd_nb.h>
3 
4 static struct edac_pci_ctl_info *pci_ctl;
5 
6 static int report_gart_errors;
7 module_param(report_gart_errors, int, 0644);
8 
9 /*
10  * Set by command line parameter. If BIOS has enabled the ECC, this override is
11  * cleared to prevent re-enabling the hardware by this driver.
12  */
13 static int ecc_enable_override;
14 module_param(ecc_enable_override, int, 0644);
15 
16 static struct msr __percpu *msrs;
17 
18 /* Per-node stuff */
19 static struct ecc_settings **ecc_stngs;
20 
21 /*
22  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
23  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
24  * or higher value'.
25  *
26  *FIXME: Produce a better mapping/linearisation.
27  */
28 static const struct scrubrate {
29        u32 scrubval;           /* bit pattern for scrub rate */
30        u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
31 } scrubrates[] = {
32 	{ 0x01, 1600000000UL},
33 	{ 0x02, 800000000UL},
34 	{ 0x03, 400000000UL},
35 	{ 0x04, 200000000UL},
36 	{ 0x05, 100000000UL},
37 	{ 0x06, 50000000UL},
38 	{ 0x07, 25000000UL},
39 	{ 0x08, 12284069UL},
40 	{ 0x09, 6274509UL},
41 	{ 0x0A, 3121951UL},
42 	{ 0x0B, 1560975UL},
43 	{ 0x0C, 781440UL},
44 	{ 0x0D, 390720UL},
45 	{ 0x0E, 195300UL},
46 	{ 0x0F, 97650UL},
47 	{ 0x10, 48854UL},
48 	{ 0x11, 24427UL},
49 	{ 0x12, 12213UL},
50 	{ 0x13, 6101UL},
51 	{ 0x14, 3051UL},
52 	{ 0x15, 1523UL},
53 	{ 0x16, 761UL},
54 	{ 0x00, 0UL},        /* scrubbing off */
55 };
56 
57 int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
58 			       u32 *val, const char *func)
59 {
60 	int err = 0;
61 
62 	err = pci_read_config_dword(pdev, offset, val);
63 	if (err)
64 		amd64_warn("%s: error reading F%dx%03x.\n",
65 			   func, PCI_FUNC(pdev->devfn), offset);
66 
67 	return err;
68 }
69 
70 int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
71 				u32 val, const char *func)
72 {
73 	int err = 0;
74 
75 	err = pci_write_config_dword(pdev, offset, val);
76 	if (err)
77 		amd64_warn("%s: error writing to F%dx%03x.\n",
78 			   func, PCI_FUNC(pdev->devfn), offset);
79 
80 	return err;
81 }
82 
83 /*
84  * Select DCT to which PCI cfg accesses are routed
85  */
86 static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
87 {
88 	u32 reg = 0;
89 
90 	amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
91 	reg &= (pvt->model == 0x30) ? ~3 : ~1;
92 	reg |= dct;
93 	amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
94 }
95 
96 /*
97  *
98  * Depending on the family, F2 DCT reads need special handling:
99  *
100  * K8: has a single DCT only and no address offsets >= 0x100
101  *
102  * F10h: each DCT has its own set of regs
103  *	DCT0 -> F2x040..
104  *	DCT1 -> F2x140..
105  *
106  * F16h: has only 1 DCT
107  *
108  * F15h: we select which DCT we access using F1x10C[DctCfgSel]
109  */
110 static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct,
111 					 int offset, u32 *val)
112 {
113 	switch (pvt->fam) {
114 	case 0xf:
115 		if (dct || offset >= 0x100)
116 			return -EINVAL;
117 		break;
118 
119 	case 0x10:
120 		if (dct) {
121 			/*
122 			 * Note: If ganging is enabled, barring the regs
123 			 * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx
124 			 * return 0. (cf. Section 2.8.1 F10h BKDG)
125 			 */
126 			if (dct_ganging_enabled(pvt))
127 				return 0;
128 
129 			offset += 0x100;
130 		}
131 		break;
132 
133 	case 0x15:
134 		/*
135 		 * F15h: F2x1xx addresses do not map explicitly to DCT1.
136 		 * We should select which DCT we access using F1x10C[DctCfgSel]
137 		 */
138 		dct = (dct && pvt->model == 0x30) ? 3 : dct;
139 		f15h_select_dct(pvt, dct);
140 		break;
141 
142 	case 0x16:
143 		if (dct)
144 			return -EINVAL;
145 		break;
146 
147 	default:
148 		break;
149 	}
150 	return amd64_read_pci_cfg(pvt->F2, offset, val);
151 }
152 
153 /*
154  * Memory scrubber control interface. For K8, memory scrubbing is handled by
155  * hardware and can involve L2 cache, dcache as well as the main memory. With
156  * F10, this is extended to L3 cache scrubbing on CPU models sporting that
157  * functionality.
158  *
159  * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
160  * (dram) over to cache lines. This is nasty, so we will use bandwidth in
161  * bytes/sec for the setting.
162  *
163  * Currently, we only do dram scrubbing. If the scrubbing is done in software on
164  * other archs, we might not have access to the caches directly.
165  */
166 
167 /*
168  * scan the scrub rate mapping table for a close or matching bandwidth value to
169  * issue. If requested is too big, then use last maximum value found.
170  */
171 static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
172 {
173 	u32 scrubval;
174 	int i;
175 
176 	/*
177 	 * map the configured rate (new_bw) to a value specific to the AMD64
178 	 * memory controller and apply to register. Search for the first
179 	 * bandwidth entry that is greater or equal than the setting requested
180 	 * and program that. If at last entry, turn off DRAM scrubbing.
181 	 *
182 	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
183 	 * by falling back to the last element in scrubrates[].
184 	 */
185 	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
186 		/*
187 		 * skip scrub rates which aren't recommended
188 		 * (see F10 BKDG, F3x58)
189 		 */
190 		if (scrubrates[i].scrubval < min_rate)
191 			continue;
192 
193 		if (scrubrates[i].bandwidth <= new_bw)
194 			break;
195 	}
196 
197 	scrubval = scrubrates[i].scrubval;
198 
199 	if (pvt->fam == 0x15 && pvt->model == 0x60) {
200 		f15h_select_dct(pvt, 0);
201 		pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
202 		f15h_select_dct(pvt, 1);
203 		pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
204 	} else {
205 		pci_write_bits32(pvt->F3, SCRCTRL, scrubval, 0x001F);
206 	}
207 
208 	if (scrubval)
209 		return scrubrates[i].bandwidth;
210 
211 	return 0;
212 }
213 
214 static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
215 {
216 	struct amd64_pvt *pvt = mci->pvt_info;
217 	u32 min_scrubrate = 0x5;
218 
219 	if (pvt->fam == 0xf)
220 		min_scrubrate = 0x0;
221 
222 	if (pvt->fam == 0x15) {
223 		/* Erratum #505 */
224 		if (pvt->model < 0x10)
225 			f15h_select_dct(pvt, 0);
226 
227 		if (pvt->model == 0x60)
228 			min_scrubrate = 0x6;
229 	}
230 	return __set_scrub_rate(pvt, bw, min_scrubrate);
231 }
232 
233 static int get_scrub_rate(struct mem_ctl_info *mci)
234 {
235 	struct amd64_pvt *pvt = mci->pvt_info;
236 	u32 scrubval = 0;
237 	int i, retval = -EINVAL;
238 
239 	if (pvt->fam == 0x15) {
240 		/* Erratum #505 */
241 		if (pvt->model < 0x10)
242 			f15h_select_dct(pvt, 0);
243 
244 		if (pvt->model == 0x60)
245 			amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
246 	} else
247 		amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
248 
249 	scrubval = scrubval & 0x001F;
250 
251 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
252 		if (scrubrates[i].scrubval == scrubval) {
253 			retval = scrubrates[i].bandwidth;
254 			break;
255 		}
256 	}
257 	return retval;
258 }
259 
260 /*
261  * returns true if the SysAddr given by sys_addr matches the
262  * DRAM base/limit associated with node_id
263  */
264 static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid)
265 {
266 	u64 addr;
267 
268 	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
269 	 * all ones if the most significant implemented address bit is 1.
270 	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
271 	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
272 	 * Application Programming.
273 	 */
274 	addr = sys_addr & 0x000000ffffffffffull;
275 
276 	return ((addr >= get_dram_base(pvt, nid)) &&
277 		(addr <= get_dram_limit(pvt, nid)));
278 }
279 
280 /*
281  * Attempt to map a SysAddr to a node. On success, return a pointer to the
282  * mem_ctl_info structure for the node that the SysAddr maps to.
283  *
284  * On failure, return NULL.
285  */
286 static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
287 						u64 sys_addr)
288 {
289 	struct amd64_pvt *pvt;
290 	u8 node_id;
291 	u32 intlv_en, bits;
292 
293 	/*
294 	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
295 	 * 3.4.4.2) registers to map the SysAddr to a node ID.
296 	 */
297 	pvt = mci->pvt_info;
298 
299 	/*
300 	 * The value of this field should be the same for all DRAM Base
301 	 * registers.  Therefore we arbitrarily choose to read it from the
302 	 * register for node 0.
303 	 */
304 	intlv_en = dram_intlv_en(pvt, 0);
305 
306 	if (intlv_en == 0) {
307 		for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
308 			if (base_limit_match(pvt, sys_addr, node_id))
309 				goto found;
310 		}
311 		goto err_no_match;
312 	}
313 
314 	if (unlikely((intlv_en != 0x01) &&
315 		     (intlv_en != 0x03) &&
316 		     (intlv_en != 0x07))) {
317 		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
318 		return NULL;
319 	}
320 
321 	bits = (((u32) sys_addr) >> 12) & intlv_en;
322 
323 	for (node_id = 0; ; ) {
324 		if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
325 			break;	/* intlv_sel field matches */
326 
327 		if (++node_id >= DRAM_RANGES)
328 			goto err_no_match;
329 	}
330 
331 	/* sanity test for sys_addr */
332 	if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
333 		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
334 			   "range for node %d with node interleaving enabled.\n",
335 			   __func__, sys_addr, node_id);
336 		return NULL;
337 	}
338 
339 found:
340 	return edac_mc_find((int)node_id);
341 
342 err_no_match:
343 	edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
344 		 (unsigned long)sys_addr);
345 
346 	return NULL;
347 }
348 
349 /*
350  * compute the CS base address of the @csrow on the DRAM controller @dct.
351  * For details see F2x[5C:40] in the processor's BKDG
352  */
353 static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
354 				 u64 *base, u64 *mask)
355 {
356 	u64 csbase, csmask, base_bits, mask_bits;
357 	u8 addr_shift;
358 
359 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
360 		csbase		= pvt->csels[dct].csbases[csrow];
361 		csmask		= pvt->csels[dct].csmasks[csrow];
362 		base_bits	= GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
363 		mask_bits	= GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
364 		addr_shift	= 4;
365 
366 	/*
367 	 * F16h and F15h, models 30h and later need two addr_shift values:
368 	 * 8 for high and 6 for low (cf. F16h BKDG).
369 	 */
370 	} else if (pvt->fam == 0x16 ||
371 		  (pvt->fam == 0x15 && pvt->model >= 0x30)) {
372 		csbase          = pvt->csels[dct].csbases[csrow];
373 		csmask          = pvt->csels[dct].csmasks[csrow >> 1];
374 
375 		*base  = (csbase & GENMASK_ULL(15,  5)) << 6;
376 		*base |= (csbase & GENMASK_ULL(30, 19)) << 8;
377 
378 		*mask = ~0ULL;
379 		/* poke holes for the csmask */
380 		*mask &= ~((GENMASK_ULL(15, 5)  << 6) |
381 			   (GENMASK_ULL(30, 19) << 8));
382 
383 		*mask |= (csmask & GENMASK_ULL(15, 5))  << 6;
384 		*mask |= (csmask & GENMASK_ULL(30, 19)) << 8;
385 
386 		return;
387 	} else {
388 		csbase		= pvt->csels[dct].csbases[csrow];
389 		csmask		= pvt->csels[dct].csmasks[csrow >> 1];
390 		addr_shift	= 8;
391 
392 		if (pvt->fam == 0x15)
393 			base_bits = mask_bits =
394 				GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
395 		else
396 			base_bits = mask_bits =
397 				GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
398 	}
399 
400 	*base  = (csbase & base_bits) << addr_shift;
401 
402 	*mask  = ~0ULL;
403 	/* poke holes for the csmask */
404 	*mask &= ~(mask_bits << addr_shift);
405 	/* OR them in */
406 	*mask |= (csmask & mask_bits) << addr_shift;
407 }
408 
409 #define for_each_chip_select(i, dct, pvt) \
410 	for (i = 0; i < pvt->csels[dct].b_cnt; i++)
411 
412 #define chip_select_base(i, dct, pvt) \
413 	pvt->csels[dct].csbases[i]
414 
415 #define for_each_chip_select_mask(i, dct, pvt) \
416 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
417 
418 /*
419  * @input_addr is an InputAddr associated with the node given by mci. Return the
420  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
421  */
422 static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
423 {
424 	struct amd64_pvt *pvt;
425 	int csrow;
426 	u64 base, mask;
427 
428 	pvt = mci->pvt_info;
429 
430 	for_each_chip_select(csrow, 0, pvt) {
431 		if (!csrow_enabled(csrow, 0, pvt))
432 			continue;
433 
434 		get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
435 
436 		mask = ~mask;
437 
438 		if ((input_addr & mask) == (base & mask)) {
439 			edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
440 				 (unsigned long)input_addr, csrow,
441 				 pvt->mc_node_id);
442 
443 			return csrow;
444 		}
445 	}
446 	edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
447 		 (unsigned long)input_addr, pvt->mc_node_id);
448 
449 	return -1;
450 }
451 
452 /*
453  * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
454  * for the node represented by mci. Info is passed back in *hole_base,
455  * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
456  * info is invalid. Info may be invalid for either of the following reasons:
457  *
458  * - The revision of the node is not E or greater.  In this case, the DRAM Hole
459  *   Address Register does not exist.
460  *
461  * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
462  *   indicating that its contents are not valid.
463  *
464  * The values passed back in *hole_base, *hole_offset, and *hole_size are
465  * complete 32-bit values despite the fact that the bitfields in the DHAR
466  * only represent bits 31-24 of the base and offset values.
467  */
468 int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
469 			     u64 *hole_offset, u64 *hole_size)
470 {
471 	struct amd64_pvt *pvt = mci->pvt_info;
472 
473 	/* only revE and later have the DRAM Hole Address Register */
474 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) {
475 		edac_dbg(1, "  revision %d for node %d does not support DHAR\n",
476 			 pvt->ext_model, pvt->mc_node_id);
477 		return 1;
478 	}
479 
480 	/* valid for Fam10h and above */
481 	if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
482 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this system\n");
483 		return 1;
484 	}
485 
486 	if (!dhar_valid(pvt)) {
487 		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this node %d\n",
488 			 pvt->mc_node_id);
489 		return 1;
490 	}
491 
492 	/* This node has Memory Hoisting */
493 
494 	/* +------------------+--------------------+--------------------+-----
495 	 * | memory           | DRAM hole          | relocated          |
496 	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
497 	 * |                  |                    | DRAM hole          |
498 	 * |                  |                    | [0x100000000,      |
499 	 * |                  |                    |  (0x100000000+     |
500 	 * |                  |                    |   (0xffffffff-x))] |
501 	 * +------------------+--------------------+--------------------+-----
502 	 *
503 	 * Above is a diagram of physical memory showing the DRAM hole and the
504 	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
505 	 * starts at address x (the base address) and extends through address
506 	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
507 	 * addresses in the hole so that they start at 0x100000000.
508 	 */
509 
510 	*hole_base = dhar_base(pvt);
511 	*hole_size = (1ULL << 32) - *hole_base;
512 
513 	*hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt)
514 					: k8_dhar_offset(pvt);
515 
516 	edac_dbg(1, "  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
517 		 pvt->mc_node_id, (unsigned long)*hole_base,
518 		 (unsigned long)*hole_offset, (unsigned long)*hole_size);
519 
520 	return 0;
521 }
522 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
523 
524 /*
525  * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
526  * assumed that sys_addr maps to the node given by mci.
527  *
528  * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
529  * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
530  * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
531  * then it is also involved in translating a SysAddr to a DramAddr. Sections
532  * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
533  * These parts of the documentation are unclear. I interpret them as follows:
534  *
535  * When node n receives a SysAddr, it processes the SysAddr as follows:
536  *
537  * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
538  *    Limit registers for node n. If the SysAddr is not within the range
539  *    specified by the base and limit values, then node n ignores the Sysaddr
540  *    (since it does not map to node n). Otherwise continue to step 2 below.
541  *
542  * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
543  *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
544  *    the range of relocated addresses (starting at 0x100000000) from the DRAM
545  *    hole. If not, skip to step 3 below. Else get the value of the
546  *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
547  *    offset defined by this value from the SysAddr.
548  *
549  * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
550  *    Base register for node n. To obtain the DramAddr, subtract the base
551  *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
552  */
553 static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
554 {
555 	struct amd64_pvt *pvt = mci->pvt_info;
556 	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
557 	int ret;
558 
559 	dram_base = get_dram_base(pvt, pvt->mc_node_id);
560 
561 	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
562 				      &hole_size);
563 	if (!ret) {
564 		if ((sys_addr >= (1ULL << 32)) &&
565 		    (sys_addr < ((1ULL << 32) + hole_size))) {
566 			/* use DHAR to translate SysAddr to DramAddr */
567 			dram_addr = sys_addr - hole_offset;
568 
569 			edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
570 				 (unsigned long)sys_addr,
571 				 (unsigned long)dram_addr);
572 
573 			return dram_addr;
574 		}
575 	}
576 
577 	/*
578 	 * Translate the SysAddr to a DramAddr as shown near the start of
579 	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
580 	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
581 	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
582 	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
583 	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
584 	 * Programmer's Manual Volume 1 Application Programming.
585 	 */
586 	dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
587 
588 	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
589 		 (unsigned long)sys_addr, (unsigned long)dram_addr);
590 	return dram_addr;
591 }
592 
593 /*
594  * @intlv_en is the value of the IntlvEn field from a DRAM Base register
595  * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
596  * for node interleaving.
597  */
598 static int num_node_interleave_bits(unsigned intlv_en)
599 {
600 	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
601 	int n;
602 
603 	BUG_ON(intlv_en > 7);
604 	n = intlv_shift_table[intlv_en];
605 	return n;
606 }
607 
608 /* Translate the DramAddr given by @dram_addr to an InputAddr. */
609 static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
610 {
611 	struct amd64_pvt *pvt;
612 	int intlv_shift;
613 	u64 input_addr;
614 
615 	pvt = mci->pvt_info;
616 
617 	/*
618 	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
619 	 * concerning translating a DramAddr to an InputAddr.
620 	 */
621 	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
622 	input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
623 		      (dram_addr & 0xfff);
624 
625 	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
626 		 intlv_shift, (unsigned long)dram_addr,
627 		 (unsigned long)input_addr);
628 
629 	return input_addr;
630 }
631 
632 /*
633  * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
634  * assumed that @sys_addr maps to the node given by mci.
635  */
636 static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
637 {
638 	u64 input_addr;
639 
640 	input_addr =
641 	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
642 
643 	edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n",
644 		 (unsigned long)sys_addr, (unsigned long)input_addr);
645 
646 	return input_addr;
647 }
648 
649 /* Map the Error address to a PAGE and PAGE OFFSET. */
650 static inline void error_address_to_page_and_offset(u64 error_address,
651 						    struct err_info *err)
652 {
653 	err->page = (u32) (error_address >> PAGE_SHIFT);
654 	err->offset = ((u32) error_address) & ~PAGE_MASK;
655 }
656 
657 /*
658  * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
659  * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
660  * of a node that detected an ECC memory error.  mci represents the node that
661  * the error address maps to (possibly different from the node that detected
662  * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
663  * error.
664  */
665 static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
666 {
667 	int csrow;
668 
669 	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
670 
671 	if (csrow == -1)
672 		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
673 				  "address 0x%lx\n", (unsigned long)sys_addr);
674 	return csrow;
675 }
676 
677 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
678 
679 /*
680  * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
681  * are ECC capable.
682  */
683 static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
684 {
685 	u8 bit;
686 	unsigned long edac_cap = EDAC_FLAG_NONE;
687 
688 	bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
689 		? 19
690 		: 17;
691 
692 	if (pvt->dclr0 & BIT(bit))
693 		edac_cap = EDAC_FLAG_SECDED;
694 
695 	return edac_cap;
696 }
697 
698 static void debug_display_dimm_sizes(struct amd64_pvt *, u8);
699 
700 static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
701 {
702 	edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
703 
704 	if (pvt->dram_type == MEM_LRDDR3) {
705 		u32 dcsm = pvt->csels[chan].csmasks[0];
706 		/*
707 		 * It's assumed all LRDIMMs in a DCT are going to be of
708 		 * same 'type' until proven otherwise. So, use a cs
709 		 * value of '0' here to get dcsm value.
710 		 */
711 		edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
712 	}
713 
714 	edac_dbg(1, "All DIMMs support ECC:%s\n",
715 		    (dclr & BIT(19)) ? "yes" : "no");
716 
717 
718 	edac_dbg(1, "  PAR/ERR parity: %s\n",
719 		 (dclr & BIT(8)) ?  "enabled" : "disabled");
720 
721 	if (pvt->fam == 0x10)
722 		edac_dbg(1, "  DCT 128bit mode width: %s\n",
723 			 (dclr & BIT(11)) ?  "128b" : "64b");
724 
725 	edac_dbg(1, "  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
726 		 (dclr & BIT(12)) ?  "yes" : "no",
727 		 (dclr & BIT(13)) ?  "yes" : "no",
728 		 (dclr & BIT(14)) ?  "yes" : "no",
729 		 (dclr & BIT(15)) ?  "yes" : "no");
730 }
731 
732 /* Display and decode various NB registers for debug purposes. */
733 static void dump_misc_regs(struct amd64_pvt *pvt)
734 {
735 	edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
736 
737 	edac_dbg(1, "  NB two channel DRAM capable: %s\n",
738 		 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
739 
740 	edac_dbg(1, "  ECC capable: %s, ChipKill ECC capable: %s\n",
741 		 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
742 		 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
743 
744 	debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
745 
746 	edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
747 
748 	edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
749 		 pvt->dhar, dhar_base(pvt),
750 		 (pvt->fam == 0xf) ? k8_dhar_offset(pvt)
751 				   : f10_dhar_offset(pvt));
752 
753 	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
754 
755 	debug_display_dimm_sizes(pvt, 0);
756 
757 	/* everything below this point is Fam10h and above */
758 	if (pvt->fam == 0xf)
759 		return;
760 
761 	debug_display_dimm_sizes(pvt, 1);
762 
763 	amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
764 
765 	/* Only if NOT ganged does dclr1 have valid info */
766 	if (!dct_ganging_enabled(pvt))
767 		debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
768 }
769 
770 /*
771  * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
772  */
773 static void prep_chip_selects(struct amd64_pvt *pvt)
774 {
775 	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
776 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
777 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
778 	} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
779 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
780 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
781 	} else {
782 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
783 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
784 	}
785 }
786 
787 /*
788  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
789  */
790 static void read_dct_base_mask(struct amd64_pvt *pvt)
791 {
792 	int cs;
793 
794 	prep_chip_selects(pvt);
795 
796 	for_each_chip_select(cs, 0, pvt) {
797 		int reg0   = DCSB0 + (cs * 4);
798 		int reg1   = DCSB1 + (cs * 4);
799 		u32 *base0 = &pvt->csels[0].csbases[cs];
800 		u32 *base1 = &pvt->csels[1].csbases[cs];
801 
802 		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
803 			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
804 				 cs, *base0, reg0);
805 
806 		if (pvt->fam == 0xf)
807 			continue;
808 
809 		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
810 			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
811 				 cs, *base1, (pvt->fam == 0x10) ? reg1
812 								: reg0);
813 	}
814 
815 	for_each_chip_select_mask(cs, 0, pvt) {
816 		int reg0   = DCSM0 + (cs * 4);
817 		int reg1   = DCSM1 + (cs * 4);
818 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
819 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
820 
821 		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
822 			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
823 				 cs, *mask0, reg0);
824 
825 		if (pvt->fam == 0xf)
826 			continue;
827 
828 		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
829 			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
830 				 cs, *mask1, (pvt->fam == 0x10) ? reg1
831 								: reg0);
832 	}
833 }
834 
835 static void determine_memory_type(struct amd64_pvt *pvt)
836 {
837 	u32 dram_ctrl, dcsm;
838 
839 	switch (pvt->fam) {
840 	case 0xf:
841 		if (pvt->ext_model >= K8_REV_F)
842 			goto ddr3;
843 
844 		pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
845 		return;
846 
847 	case 0x10:
848 		if (pvt->dchr0 & DDR3_MODE)
849 			goto ddr3;
850 
851 		pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
852 		return;
853 
854 	case 0x15:
855 		if (pvt->model < 0x60)
856 			goto ddr3;
857 
858 		/*
859 		 * Model 0x60h needs special handling:
860 		 *
861 		 * We use a Chip Select value of '0' to obtain dcsm.
862 		 * Theoretically, it is possible to populate LRDIMMs of different
863 		 * 'Rank' value on a DCT. But this is not the common case. So,
864 		 * it's reasonable to assume all DIMMs are going to be of same
865 		 * 'type' until proven otherwise.
866 		 */
867 		amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl);
868 		dcsm = pvt->csels[0].csmasks[0];
869 
870 		if (((dram_ctrl >> 8) & 0x7) == 0x2)
871 			pvt->dram_type = MEM_DDR4;
872 		else if (pvt->dclr0 & BIT(16))
873 			pvt->dram_type = MEM_DDR3;
874 		else if (dcsm & 0x3)
875 			pvt->dram_type = MEM_LRDDR3;
876 		else
877 			pvt->dram_type = MEM_RDDR3;
878 
879 		return;
880 
881 	case 0x16:
882 		goto ddr3;
883 
884 	default:
885 		WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
886 		pvt->dram_type = MEM_EMPTY;
887 	}
888 	return;
889 
890 ddr3:
891 	pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
892 }
893 
894 /* Get the number of DCT channels the memory controller is using. */
895 static int k8_early_channel_count(struct amd64_pvt *pvt)
896 {
897 	int flag;
898 
899 	if (pvt->ext_model >= K8_REV_F)
900 		/* RevF (NPT) and later */
901 		flag = pvt->dclr0 & WIDTH_128;
902 	else
903 		/* RevE and earlier */
904 		flag = pvt->dclr0 & REVE_WIDTH_128;
905 
906 	/* not used */
907 	pvt->dclr1 = 0;
908 
909 	return (flag) ? 2 : 1;
910 }
911 
912 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
913 static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
914 {
915 	u16 mce_nid = amd_get_nb_id(m->extcpu);
916 	struct mem_ctl_info *mci;
917 	u8 start_bit = 1;
918 	u8 end_bit   = 47;
919 	u64 addr;
920 
921 	mci = edac_mc_find(mce_nid);
922 	if (!mci)
923 		return 0;
924 
925 	pvt = mci->pvt_info;
926 
927 	if (pvt->fam == 0xf) {
928 		start_bit = 3;
929 		end_bit   = 39;
930 	}
931 
932 	addr = m->addr & GENMASK_ULL(end_bit, start_bit);
933 
934 	/*
935 	 * Erratum 637 workaround
936 	 */
937 	if (pvt->fam == 0x15) {
938 		u64 cc6_base, tmp_addr;
939 		u32 tmp;
940 		u8 intlv_en;
941 
942 		if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
943 			return addr;
944 
945 
946 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
947 		intlv_en = tmp >> 21 & 0x7;
948 
949 		/* add [47:27] + 3 trailing bits */
950 		cc6_base  = (tmp & GENMASK_ULL(20, 0)) << 3;
951 
952 		/* reverse and add DramIntlvEn */
953 		cc6_base |= intlv_en ^ 0x7;
954 
955 		/* pin at [47:24] */
956 		cc6_base <<= 24;
957 
958 		if (!intlv_en)
959 			return cc6_base | (addr & GENMASK_ULL(23, 0));
960 
961 		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
962 
963 							/* faster log2 */
964 		tmp_addr  = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1);
965 
966 		/* OR DramIntlvSel into bits [14:12] */
967 		tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9;
968 
969 		/* add remaining [11:0] bits from original MC4_ADDR */
970 		tmp_addr |= addr & GENMASK_ULL(11, 0);
971 
972 		return cc6_base | tmp_addr;
973 	}
974 
975 	return addr;
976 }
977 
978 static struct pci_dev *pci_get_related_function(unsigned int vendor,
979 						unsigned int device,
980 						struct pci_dev *related)
981 {
982 	struct pci_dev *dev = NULL;
983 
984 	while ((dev = pci_get_device(vendor, device, dev))) {
985 		if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
986 		    (dev->bus->number == related->bus->number) &&
987 		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
988 			break;
989 	}
990 
991 	return dev;
992 }
993 
994 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
995 {
996 	struct amd_northbridge *nb;
997 	struct pci_dev *f1 = NULL;
998 	unsigned int pci_func;
999 	int off = range << 3;
1000 	u32 llim;
1001 
1002 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
1003 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
1004 
1005 	if (pvt->fam == 0xf)
1006 		return;
1007 
1008 	if (!dram_rw(pvt, range))
1009 		return;
1010 
1011 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
1012 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
1013 
1014 	/* F15h: factor in CC6 save area by reading dst node's limit reg */
1015 	if (pvt->fam != 0x15)
1016 		return;
1017 
1018 	nb = node_to_amd_nb(dram_dst_node(pvt, range));
1019 	if (WARN_ON(!nb))
1020 		return;
1021 
1022 	if (pvt->model == 0x60)
1023 		pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
1024 	else if (pvt->model == 0x30)
1025 		pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
1026 	else
1027 		pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1;
1028 
1029 	f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc);
1030 	if (WARN_ON(!f1))
1031 		return;
1032 
1033 	amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1034 
1035 	pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0);
1036 
1037 				    /* {[39:27],111b} */
1038 	pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1039 
1040 	pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0);
1041 
1042 				    /* [47:40] */
1043 	pvt->ranges[range].lim.hi |= llim >> 13;
1044 
1045 	pci_dev_put(f1);
1046 }
1047 
1048 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1049 				    struct err_info *err)
1050 {
1051 	struct amd64_pvt *pvt = mci->pvt_info;
1052 
1053 	error_address_to_page_and_offset(sys_addr, err);
1054 
1055 	/*
1056 	 * Find out which node the error address belongs to. This may be
1057 	 * different from the node that detected the error.
1058 	 */
1059 	err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
1060 	if (!err->src_mci) {
1061 		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1062 			     (unsigned long)sys_addr);
1063 		err->err_code = ERR_NODE;
1064 		return;
1065 	}
1066 
1067 	/* Now map the sys_addr to a CSROW */
1068 	err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
1069 	if (err->csrow < 0) {
1070 		err->err_code = ERR_CSROW;
1071 		return;
1072 	}
1073 
1074 	/* CHIPKILL enabled */
1075 	if (pvt->nbcfg & NBCFG_CHIPKILL) {
1076 		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1077 		if (err->channel < 0) {
1078 			/*
1079 			 * Syndrome didn't map, so we don't know which of the
1080 			 * 2 DIMMs is in error. So we need to ID 'both' of them
1081 			 * as suspect.
1082 			 */
1083 			amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
1084 				      "possible error reporting race\n",
1085 				      err->syndrome);
1086 			err->err_code = ERR_CHANNEL;
1087 			return;
1088 		}
1089 	} else {
1090 		/*
1091 		 * non-chipkill ecc mode
1092 		 *
1093 		 * The k8 documentation is unclear about how to determine the
1094 		 * channel number when using non-chipkill memory.  This method
1095 		 * was obtained from email communication with someone at AMD.
1096 		 * (Wish the email was placed in this comment - norsk)
1097 		 */
1098 		err->channel = ((sys_addr & BIT(3)) != 0);
1099 	}
1100 }
1101 
1102 static int ddr2_cs_size(unsigned i, bool dct_width)
1103 {
1104 	unsigned shift = 0;
1105 
1106 	if (i <= 2)
1107 		shift = i;
1108 	else if (!(i & 0x1))
1109 		shift = i >> 1;
1110 	else
1111 		shift = (i + 1) >> 1;
1112 
1113 	return 128 << (shift + !!dct_width);
1114 }
1115 
1116 static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1117 				  unsigned cs_mode, int cs_mask_nr)
1118 {
1119 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1120 
1121 	if (pvt->ext_model >= K8_REV_F) {
1122 		WARN_ON(cs_mode > 11);
1123 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1124 	}
1125 	else if (pvt->ext_model >= K8_REV_D) {
1126 		unsigned diff;
1127 		WARN_ON(cs_mode > 10);
1128 
1129 		/*
1130 		 * the below calculation, besides trying to win an obfuscated C
1131 		 * contest, maps cs_mode values to DIMM chip select sizes. The
1132 		 * mappings are:
1133 		 *
1134 		 * cs_mode	CS size (mb)
1135 		 * =======	============
1136 		 * 0		32
1137 		 * 1		64
1138 		 * 2		128
1139 		 * 3		128
1140 		 * 4		256
1141 		 * 5		512
1142 		 * 6		256
1143 		 * 7		512
1144 		 * 8		1024
1145 		 * 9		1024
1146 		 * 10		2048
1147 		 *
1148 		 * Basically, it calculates a value with which to shift the
1149 		 * smallest CS size of 32MB.
1150 		 *
1151 		 * ddr[23]_cs_size have a similar purpose.
1152 		 */
1153 		diff = cs_mode/3 + (unsigned)(cs_mode > 5);
1154 
1155 		return 32 << (cs_mode - diff);
1156 	}
1157 	else {
1158 		WARN_ON(cs_mode > 6);
1159 		return 32 << cs_mode;
1160 	}
1161 }
1162 
1163 /*
1164  * Get the number of DCT channels in use.
1165  *
1166  * Return:
1167  *	number of Memory Channels in operation
1168  * Pass back:
1169  *	contents of the DCL0_LOW register
1170  */
1171 static int f1x_early_channel_count(struct amd64_pvt *pvt)
1172 {
1173 	int i, j, channels = 0;
1174 
1175 	/* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1176 	if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128))
1177 		return 2;
1178 
1179 	/*
1180 	 * Need to check if in unganged mode: In such, there are 2 channels,
1181 	 * but they are not in 128 bit mode and thus the above 'dclr0' status
1182 	 * bit will be OFF.
1183 	 *
1184 	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1185 	 * their CSEnable bit on. If so, then SINGLE DIMM case.
1186 	 */
1187 	edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1188 
1189 	/*
1190 	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1191 	 * is more than just one DIMM present in unganged mode. Need to check
1192 	 * both controllers since DIMMs can be placed in either one.
1193 	 */
1194 	for (i = 0; i < 2; i++) {
1195 		u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1196 
1197 		for (j = 0; j < 4; j++) {
1198 			if (DBAM_DIMM(j, dbam) > 0) {
1199 				channels++;
1200 				break;
1201 			}
1202 		}
1203 	}
1204 
1205 	if (channels > 2)
1206 		channels = 2;
1207 
1208 	amd64_info("MCT channel count: %d\n", channels);
1209 
1210 	return channels;
1211 }
1212 
1213 static int ddr3_cs_size(unsigned i, bool dct_width)
1214 {
1215 	unsigned shift = 0;
1216 	int cs_size = 0;
1217 
1218 	if (i == 0 || i == 3 || i == 4)
1219 		cs_size = -1;
1220 	else if (i <= 2)
1221 		shift = i;
1222 	else if (i == 12)
1223 		shift = 7;
1224 	else if (!(i & 0x1))
1225 		shift = i >> 1;
1226 	else
1227 		shift = (i + 1) >> 1;
1228 
1229 	if (cs_size != -1)
1230 		cs_size = (128 * (1 << !!dct_width)) << shift;
1231 
1232 	return cs_size;
1233 }
1234 
1235 static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply)
1236 {
1237 	unsigned shift = 0;
1238 	int cs_size = 0;
1239 
1240 	if (i < 4 || i == 6)
1241 		cs_size = -1;
1242 	else if (i == 12)
1243 		shift = 7;
1244 	else if (!(i & 0x1))
1245 		shift = i >> 1;
1246 	else
1247 		shift = (i + 1) >> 1;
1248 
1249 	if (cs_size != -1)
1250 		cs_size = rank_multiply * (128 << shift);
1251 
1252 	return cs_size;
1253 }
1254 
1255 static int ddr4_cs_size(unsigned i)
1256 {
1257 	int cs_size = 0;
1258 
1259 	if (i == 0)
1260 		cs_size = -1;
1261 	else if (i == 1)
1262 		cs_size = 1024;
1263 	else
1264 		/* Min cs_size = 1G */
1265 		cs_size = 1024 * (1 << (i >> 1));
1266 
1267 	return cs_size;
1268 }
1269 
1270 static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1271 				   unsigned cs_mode, int cs_mask_nr)
1272 {
1273 	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1274 
1275 	WARN_ON(cs_mode > 11);
1276 
1277 	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1278 		return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1279 	else
1280 		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1281 }
1282 
1283 /*
1284  * F15h supports only 64bit DCT interfaces
1285  */
1286 static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1287 				   unsigned cs_mode, int cs_mask_nr)
1288 {
1289 	WARN_ON(cs_mode > 12);
1290 
1291 	return ddr3_cs_size(cs_mode, false);
1292 }
1293 
1294 /* F15h M60h supports DDR4 mapping as well.. */
1295 static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1296 					unsigned cs_mode, int cs_mask_nr)
1297 {
1298 	int cs_size;
1299 	u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr];
1300 
1301 	WARN_ON(cs_mode > 12);
1302 
1303 	if (pvt->dram_type == MEM_DDR4) {
1304 		if (cs_mode > 9)
1305 			return -1;
1306 
1307 		cs_size = ddr4_cs_size(cs_mode);
1308 	} else if (pvt->dram_type == MEM_LRDDR3) {
1309 		unsigned rank_multiply = dcsm & 0xf;
1310 
1311 		if (rank_multiply == 3)
1312 			rank_multiply = 4;
1313 		cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply);
1314 	} else {
1315 		/* Minimum cs size is 512mb for F15hM60h*/
1316 		if (cs_mode == 0x1)
1317 			return -1;
1318 
1319 		cs_size = ddr3_cs_size(cs_mode, false);
1320 	}
1321 
1322 	return cs_size;
1323 }
1324 
1325 /*
1326  * F16h and F15h model 30h have only limited cs_modes.
1327  */
1328 static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1329 				unsigned cs_mode, int cs_mask_nr)
1330 {
1331 	WARN_ON(cs_mode > 12);
1332 
1333 	if (cs_mode == 6 || cs_mode == 8 ||
1334 	    cs_mode == 9 || cs_mode == 12)
1335 		return -1;
1336 	else
1337 		return ddr3_cs_size(cs_mode, false);
1338 }
1339 
1340 static void read_dram_ctl_register(struct amd64_pvt *pvt)
1341 {
1342 
1343 	if (pvt->fam == 0xf)
1344 		return;
1345 
1346 	if (!amd64_read_pci_cfg(pvt->F2, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1347 		edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1348 			 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1349 
1350 		edac_dbg(0, "  DCTs operate in %s mode\n",
1351 			 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1352 
1353 		if (!dct_ganging_enabled(pvt))
1354 			edac_dbg(0, "  Address range split per DCT: %s\n",
1355 				 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1356 
1357 		edac_dbg(0, "  data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1358 			 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1359 			 (dct_memory_cleared(pvt) ? "yes" : "no"));
1360 
1361 		edac_dbg(0, "  channel interleave: %s, "
1362 			 "interleave bits selector: 0x%x\n",
1363 			 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1364 			 dct_sel_interleave_addr(pvt));
1365 	}
1366 
1367 	amd64_read_pci_cfg(pvt->F2, DCT_SEL_HI, &pvt->dct_sel_hi);
1368 }
1369 
1370 /*
1371  * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG,
1372  * 2.10.12 Memory Interleaving Modes).
1373  */
1374 static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1375 				     u8 intlv_en, int num_dcts_intlv,
1376 				     u32 dct_sel)
1377 {
1378 	u8 channel = 0;
1379 	u8 select;
1380 
1381 	if (!(intlv_en))
1382 		return (u8)(dct_sel);
1383 
1384 	if (num_dcts_intlv == 2) {
1385 		select = (sys_addr >> 8) & 0x3;
1386 		channel = select ? 0x3 : 0;
1387 	} else if (num_dcts_intlv == 4) {
1388 		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1389 		switch (intlv_addr) {
1390 		case 0x4:
1391 			channel = (sys_addr >> 8) & 0x3;
1392 			break;
1393 		case 0x5:
1394 			channel = (sys_addr >> 9) & 0x3;
1395 			break;
1396 		}
1397 	}
1398 	return channel;
1399 }
1400 
1401 /*
1402  * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1403  * Interleaving Modes.
1404  */
1405 static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1406 				bool hi_range_sel, u8 intlv_en)
1407 {
1408 	u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1409 
1410 	if (dct_ganging_enabled(pvt))
1411 		return 0;
1412 
1413 	if (hi_range_sel)
1414 		return dct_sel_high;
1415 
1416 	/*
1417 	 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1418 	 */
1419 	if (dct_interleave_enabled(pvt)) {
1420 		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1421 
1422 		/* return DCT select function: 0=DCT0, 1=DCT1 */
1423 		if (!intlv_addr)
1424 			return sys_addr >> 6 & 1;
1425 
1426 		if (intlv_addr & 0x2) {
1427 			u8 shift = intlv_addr & 0x1 ? 9 : 6;
1428 			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
1429 
1430 			return ((sys_addr >> shift) & 1) ^ temp;
1431 		}
1432 
1433 		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
1434 	}
1435 
1436 	if (dct_high_range_enabled(pvt))
1437 		return ~dct_sel_high & 1;
1438 
1439 	return 0;
1440 }
1441 
1442 /* Convert the sys_addr to the normalized DCT address */
1443 static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
1444 				 u64 sys_addr, bool hi_rng,
1445 				 u32 dct_sel_base_addr)
1446 {
1447 	u64 chan_off;
1448 	u64 dram_base		= get_dram_base(pvt, range);
1449 	u64 hole_off		= f10_dhar_offset(pvt);
1450 	u64 dct_sel_base_off	= (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1451 
1452 	if (hi_rng) {
1453 		/*
1454 		 * if
1455 		 * base address of high range is below 4Gb
1456 		 * (bits [47:27] at [31:11])
1457 		 * DRAM address space on this DCT is hoisted above 4Gb	&&
1458 		 * sys_addr > 4Gb
1459 		 *
1460 		 *	remove hole offset from sys_addr
1461 		 * else
1462 		 *	remove high range offset from sys_addr
1463 		 */
1464 		if ((!(dct_sel_base_addr >> 16) ||
1465 		     dct_sel_base_addr < dhar_base(pvt)) &&
1466 		    dhar_valid(pvt) &&
1467 		    (sys_addr >= BIT_64(32)))
1468 			chan_off = hole_off;
1469 		else
1470 			chan_off = dct_sel_base_off;
1471 	} else {
1472 		/*
1473 		 * if
1474 		 * we have a valid hole		&&
1475 		 * sys_addr > 4Gb
1476 		 *
1477 		 *	remove hole
1478 		 * else
1479 		 *	remove dram base to normalize to DCT address
1480 		 */
1481 		if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1482 			chan_off = hole_off;
1483 		else
1484 			chan_off = dram_base;
1485 	}
1486 
1487 	return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23));
1488 }
1489 
1490 /*
1491  * checks if the csrow passed in is marked as SPARED, if so returns the new
1492  * spare row
1493  */
1494 static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1495 {
1496 	int tmp_cs;
1497 
1498 	if (online_spare_swap_done(pvt, dct) &&
1499 	    csrow == online_spare_bad_dramcs(pvt, dct)) {
1500 
1501 		for_each_chip_select(tmp_cs, dct, pvt) {
1502 			if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
1503 				csrow = tmp_cs;
1504 				break;
1505 			}
1506 		}
1507 	}
1508 	return csrow;
1509 }
1510 
1511 /*
1512  * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1513  * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1514  *
1515  * Return:
1516  *	-EINVAL:  NOT FOUND
1517  *	0..csrow = Chip-Select Row
1518  */
1519 static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
1520 {
1521 	struct mem_ctl_info *mci;
1522 	struct amd64_pvt *pvt;
1523 	u64 cs_base, cs_mask;
1524 	int cs_found = -EINVAL;
1525 	int csrow;
1526 
1527 	mci = edac_mc_find(nid);
1528 	if (!mci)
1529 		return cs_found;
1530 
1531 	pvt = mci->pvt_info;
1532 
1533 	edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1534 
1535 	for_each_chip_select(csrow, dct, pvt) {
1536 		if (!csrow_enabled(csrow, dct, pvt))
1537 			continue;
1538 
1539 		get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1540 
1541 		edac_dbg(1, "    CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1542 			 csrow, cs_base, cs_mask);
1543 
1544 		cs_mask = ~cs_mask;
1545 
1546 		edac_dbg(1, "    (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1547 			 (in_addr & cs_mask), (cs_base & cs_mask));
1548 
1549 		if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
1550 			if (pvt->fam == 0x15 && pvt->model >= 0x30) {
1551 				cs_found =  csrow;
1552 				break;
1553 			}
1554 			cs_found = f10_process_possible_spare(pvt, dct, csrow);
1555 
1556 			edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1557 			break;
1558 		}
1559 	}
1560 	return cs_found;
1561 }
1562 
1563 /*
1564  * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1565  * swapped with a region located at the bottom of memory so that the GPU can use
1566  * the interleaved region and thus two channels.
1567  */
1568 static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1569 {
1570 	u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
1571 
1572 	if (pvt->fam == 0x10) {
1573 		/* only revC3 and revE have that feature */
1574 		if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3))
1575 			return sys_addr;
1576 	}
1577 
1578 	amd64_read_pci_cfg(pvt->F2, SWAP_INTLV_REG, &swap_reg);
1579 
1580 	if (!(swap_reg & 0x1))
1581 		return sys_addr;
1582 
1583 	swap_base	= (swap_reg >> 3) & 0x7f;
1584 	swap_limit	= (swap_reg >> 11) & 0x7f;
1585 	rgn_size	= (swap_reg >> 20) & 0x7f;
1586 	tmp_addr	= sys_addr >> 27;
1587 
1588 	if (!(sys_addr >> 34) &&
1589 	    (((tmp_addr >= swap_base) &&
1590 	     (tmp_addr <= swap_limit)) ||
1591 	     (tmp_addr < rgn_size)))
1592 		return sys_addr ^ (u64)swap_base << 27;
1593 
1594 	return sys_addr;
1595 }
1596 
1597 /* For a given @dram_range, check if @sys_addr falls within it. */
1598 static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1599 				  u64 sys_addr, int *chan_sel)
1600 {
1601 	int cs_found = -EINVAL;
1602 	u64 chan_addr;
1603 	u32 dct_sel_base;
1604 	u8 channel;
1605 	bool high_range = false;
1606 
1607 	u8 node_id    = dram_dst_node(pvt, range);
1608 	u8 intlv_en   = dram_intlv_en(pvt, range);
1609 	u32 intlv_sel = dram_intlv_sel(pvt, range);
1610 
1611 	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1612 		 range, sys_addr, get_dram_limit(pvt, range));
1613 
1614 	if (dhar_valid(pvt) &&
1615 	    dhar_base(pvt) <= sys_addr &&
1616 	    sys_addr < BIT_64(32)) {
1617 		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1618 			    sys_addr);
1619 		return -EINVAL;
1620 	}
1621 
1622 	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1623 		return -EINVAL;
1624 
1625 	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1626 
1627 	dct_sel_base = dct_sel_baseaddr(pvt);
1628 
1629 	/*
1630 	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1631 	 * select between DCT0 and DCT1.
1632 	 */
1633 	if (dct_high_range_enabled(pvt) &&
1634 	   !dct_ganging_enabled(pvt) &&
1635 	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1636 		high_range = true;
1637 
1638 	channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1639 
1640 	chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1641 					  high_range, dct_sel_base);
1642 
1643 	/* Remove node interleaving, see F1x120 */
1644 	if (intlv_en)
1645 		chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
1646 			    (chan_addr & 0xfff);
1647 
1648 	/* remove channel interleave */
1649 	if (dct_interleave_enabled(pvt) &&
1650 	   !dct_high_range_enabled(pvt) &&
1651 	   !dct_ganging_enabled(pvt)) {
1652 
1653 		if (dct_sel_interleave_addr(pvt) != 1) {
1654 			if (dct_sel_interleave_addr(pvt) == 0x3)
1655 				/* hash 9 */
1656 				chan_addr = ((chan_addr >> 10) << 9) |
1657 					     (chan_addr & 0x1ff);
1658 			else
1659 				/* A[6] or hash 6 */
1660 				chan_addr = ((chan_addr >> 7) << 6) |
1661 					     (chan_addr & 0x3f);
1662 		} else
1663 			/* A[12] */
1664 			chan_addr = ((chan_addr >> 13) << 12) |
1665 				     (chan_addr & 0xfff);
1666 	}
1667 
1668 	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1669 
1670 	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1671 
1672 	if (cs_found >= 0)
1673 		*chan_sel = channel;
1674 
1675 	return cs_found;
1676 }
1677 
1678 static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1679 					u64 sys_addr, int *chan_sel)
1680 {
1681 	int cs_found = -EINVAL;
1682 	int num_dcts_intlv = 0;
1683 	u64 chan_addr, chan_offset;
1684 	u64 dct_base, dct_limit;
1685 	u32 dct_cont_base_reg, dct_cont_limit_reg, tmp;
1686 	u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en;
1687 
1688 	u64 dhar_offset		= f10_dhar_offset(pvt);
1689 	u8 intlv_addr		= dct_sel_interleave_addr(pvt);
1690 	u8 node_id		= dram_dst_node(pvt, range);
1691 	u8 intlv_en		= dram_intlv_en(pvt, range);
1692 
1693 	amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg);
1694 	amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg);
1695 
1696 	dct_offset_en		= (u8) ((dct_cont_base_reg >> 3) & BIT(0));
1697 	dct_sel			= (u8) ((dct_cont_base_reg >> 4) & 0x7);
1698 
1699 	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1700 		 range, sys_addr, get_dram_limit(pvt, range));
1701 
1702 	if (!(get_dram_base(pvt, range)  <= sys_addr) &&
1703 	    !(get_dram_limit(pvt, range) >= sys_addr))
1704 		return -EINVAL;
1705 
1706 	if (dhar_valid(pvt) &&
1707 	    dhar_base(pvt) <= sys_addr &&
1708 	    sys_addr < BIT_64(32)) {
1709 		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1710 			    sys_addr);
1711 		return -EINVAL;
1712 	}
1713 
1714 	/* Verify sys_addr is within DCT Range. */
1715 	dct_base = (u64) dct_sel_baseaddr(pvt);
1716 	dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF;
1717 
1718 	if (!(dct_cont_base_reg & BIT(0)) &&
1719 	    !(dct_base <= (sys_addr >> 27) &&
1720 	      dct_limit >= (sys_addr >> 27)))
1721 		return -EINVAL;
1722 
1723 	/* Verify number of dct's that participate in channel interleaving. */
1724 	num_dcts_intlv = (int) hweight8(intlv_en);
1725 
1726 	if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4))
1727 		return -EINVAL;
1728 
1729 	channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
1730 					     num_dcts_intlv, dct_sel);
1731 
1732 	/* Verify we stay within the MAX number of channels allowed */
1733 	if (channel > 3)
1734 		return -EINVAL;
1735 
1736 	leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0));
1737 
1738 	/* Get normalized DCT addr */
1739 	if (leg_mmio_hole && (sys_addr >= BIT_64(32)))
1740 		chan_offset = dhar_offset;
1741 	else
1742 		chan_offset = dct_base << 27;
1743 
1744 	chan_addr = sys_addr - chan_offset;
1745 
1746 	/* remove channel interleave */
1747 	if (num_dcts_intlv == 2) {
1748 		if (intlv_addr == 0x4)
1749 			chan_addr = ((chan_addr >> 9) << 8) |
1750 						(chan_addr & 0xff);
1751 		else if (intlv_addr == 0x5)
1752 			chan_addr = ((chan_addr >> 10) << 9) |
1753 						(chan_addr & 0x1ff);
1754 		else
1755 			return -EINVAL;
1756 
1757 	} else if (num_dcts_intlv == 4) {
1758 		if (intlv_addr == 0x4)
1759 			chan_addr = ((chan_addr >> 10) << 8) |
1760 							(chan_addr & 0xff);
1761 		else if (intlv_addr == 0x5)
1762 			chan_addr = ((chan_addr >> 11) << 9) |
1763 							(chan_addr & 0x1ff);
1764 		else
1765 			return -EINVAL;
1766 	}
1767 
1768 	if (dct_offset_en) {
1769 		amd64_read_pci_cfg(pvt->F1,
1770 				   DRAM_CONT_HIGH_OFF + (int) channel * 4,
1771 				   &tmp);
1772 		chan_addr +=  (u64) ((tmp >> 11) & 0xfff) << 27;
1773 	}
1774 
1775 	f15h_select_dct(pvt, channel);
1776 
1777 	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1778 
1779 	/*
1780 	 * Find Chip select:
1781 	 * if channel = 3, then alias it to 1. This is because, in F15 M30h,
1782 	 * there is support for 4 DCT's, but only 2 are currently functional.
1783 	 * They are DCT0 and DCT3. But we have read all registers of DCT3 into
1784 	 * pvt->csels[1]. So we need to use '1' here to get correct info.
1785 	 * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
1786 	 */
1787 	alias_channel =  (channel == 3) ? 1 : channel;
1788 
1789 	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel);
1790 
1791 	if (cs_found >= 0)
1792 		*chan_sel = alias_channel;
1793 
1794 	return cs_found;
1795 }
1796 
1797 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt,
1798 					u64 sys_addr,
1799 					int *chan_sel)
1800 {
1801 	int cs_found = -EINVAL;
1802 	unsigned range;
1803 
1804 	for (range = 0; range < DRAM_RANGES; range++) {
1805 		if (!dram_rw(pvt, range))
1806 			continue;
1807 
1808 		if (pvt->fam == 0x15 && pvt->model >= 0x30)
1809 			cs_found = f15_m30h_match_to_this_node(pvt, range,
1810 							       sys_addr,
1811 							       chan_sel);
1812 
1813 		else if ((get_dram_base(pvt, range)  <= sys_addr) &&
1814 			 (get_dram_limit(pvt, range) >= sys_addr)) {
1815 			cs_found = f1x_match_to_this_node(pvt, range,
1816 							  sys_addr, chan_sel);
1817 			if (cs_found >= 0)
1818 				break;
1819 		}
1820 	}
1821 	return cs_found;
1822 }
1823 
1824 /*
1825  * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
1826  * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1827  *
1828  * The @sys_addr is usually an error address received from the hardware
1829  * (MCX_ADDR).
1830  */
1831 static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1832 				     struct err_info *err)
1833 {
1834 	struct amd64_pvt *pvt = mci->pvt_info;
1835 
1836 	error_address_to_page_and_offset(sys_addr, err);
1837 
1838 	err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
1839 	if (err->csrow < 0) {
1840 		err->err_code = ERR_CSROW;
1841 		return;
1842 	}
1843 
1844 	/*
1845 	 * We need the syndromes for channel detection only when we're
1846 	 * ganged. Otherwise @chan should already contain the channel at
1847 	 * this point.
1848 	 */
1849 	if (dct_ganging_enabled(pvt))
1850 		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1851 }
1852 
1853 /*
1854  * debug routine to display the memory sizes of all logical DIMMs and its
1855  * CSROWs
1856  */
1857 static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1858 {
1859 	int dimm, size0, size1;
1860 	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
1861 	u32 dbam  = ctrl ? pvt->dbam1 : pvt->dbam0;
1862 
1863 	if (pvt->fam == 0xf) {
1864 		/* K8 families < revF not supported yet */
1865 	       if (pvt->ext_model < K8_REV_F)
1866 			return;
1867 	       else
1868 		       WARN_ON(ctrl != 0);
1869 	}
1870 
1871 	if (pvt->fam == 0x10) {
1872 		dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1
1873 							   : pvt->dbam0;
1874 		dcsb = (ctrl && !dct_ganging_enabled(pvt)) ?
1875 				 pvt->csels[1].csbases :
1876 				 pvt->csels[0].csbases;
1877 	} else if (ctrl) {
1878 		dbam = pvt->dbam0;
1879 		dcsb = pvt->csels[1].csbases;
1880 	}
1881 	edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
1882 		 ctrl, dbam);
1883 
1884 	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
1885 
1886 	/* Dump memory sizes for DIMM and its CSROWs */
1887 	for (dimm = 0; dimm < 4; dimm++) {
1888 
1889 		size0 = 0;
1890 		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
1891 			/* For f15m60h, need multiplier for LRDIMM cs_size
1892 			 * calculation. We pass 'dimm' value to the dbam_to_cs
1893 			 * mapper so we can find the multiplier from the
1894 			 * corresponding DCSM.
1895 			 */
1896 			size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
1897 						     DBAM_DIMM(dimm, dbam),
1898 						     dimm);
1899 
1900 		size1 = 0;
1901 		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
1902 			size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
1903 						     DBAM_DIMM(dimm, dbam),
1904 						     dimm);
1905 
1906 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
1907 				dimm * 2,     size0,
1908 				dimm * 2 + 1, size1);
1909 	}
1910 }
1911 
1912 static struct amd64_family_type family_types[] = {
1913 	[K8_CPUS] = {
1914 		.ctl_name = "K8",
1915 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
1916 		.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
1917 		.ops = {
1918 			.early_channel_count	= k8_early_channel_count,
1919 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
1920 			.dbam_to_cs		= k8_dbam_to_chip_select,
1921 		}
1922 	},
1923 	[F10_CPUS] = {
1924 		.ctl_name = "F10h",
1925 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
1926 		.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
1927 		.ops = {
1928 			.early_channel_count	= f1x_early_channel_count,
1929 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1930 			.dbam_to_cs		= f10_dbam_to_chip_select,
1931 		}
1932 	},
1933 	[F15_CPUS] = {
1934 		.ctl_name = "F15h",
1935 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
1936 		.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
1937 		.ops = {
1938 			.early_channel_count	= f1x_early_channel_count,
1939 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1940 			.dbam_to_cs		= f15_dbam_to_chip_select,
1941 		}
1942 	},
1943 	[F15_M30H_CPUS] = {
1944 		.ctl_name = "F15h_M30h",
1945 		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
1946 		.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
1947 		.ops = {
1948 			.early_channel_count	= f1x_early_channel_count,
1949 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1950 			.dbam_to_cs		= f16_dbam_to_chip_select,
1951 		}
1952 	},
1953 	[F15_M60H_CPUS] = {
1954 		.ctl_name = "F15h_M60h",
1955 		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
1956 		.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
1957 		.ops = {
1958 			.early_channel_count	= f1x_early_channel_count,
1959 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1960 			.dbam_to_cs		= f15_m60h_dbam_to_chip_select,
1961 		}
1962 	},
1963 	[F16_CPUS] = {
1964 		.ctl_name = "F16h",
1965 		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
1966 		.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
1967 		.ops = {
1968 			.early_channel_count	= f1x_early_channel_count,
1969 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1970 			.dbam_to_cs		= f16_dbam_to_chip_select,
1971 		}
1972 	},
1973 	[F16_M30H_CPUS] = {
1974 		.ctl_name = "F16h_M30h",
1975 		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
1976 		.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
1977 		.ops = {
1978 			.early_channel_count	= f1x_early_channel_count,
1979 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1980 			.dbam_to_cs		= f16_dbam_to_chip_select,
1981 		}
1982 	},
1983 };
1984 
1985 /*
1986  * These are tables of eigenvectors (one per line) which can be used for the
1987  * construction of the syndrome tables. The modified syndrome search algorithm
1988  * uses those to find the symbol in error and thus the DIMM.
1989  *
1990  * Algorithm courtesy of Ross LaFetra from AMD.
1991  */
1992 static const u16 x4_vectors[] = {
1993 	0x2f57, 0x1afe, 0x66cc, 0xdd88,
1994 	0x11eb, 0x3396, 0x7f4c, 0xeac8,
1995 	0x0001, 0x0002, 0x0004, 0x0008,
1996 	0x1013, 0x3032, 0x4044, 0x8088,
1997 	0x106b, 0x30d6, 0x70fc, 0xe0a8,
1998 	0x4857, 0xc4fe, 0x13cc, 0x3288,
1999 	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
2000 	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
2001 	0x15c1, 0x2a42, 0x89ac, 0x4758,
2002 	0x2b03, 0x1602, 0x4f0c, 0xca08,
2003 	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
2004 	0x8ba7, 0x465e, 0x244c, 0x1cc8,
2005 	0x2b87, 0x164e, 0x642c, 0xdc18,
2006 	0x40b9, 0x80de, 0x1094, 0x20e8,
2007 	0x27db, 0x1eb6, 0x9dac, 0x7b58,
2008 	0x11c1, 0x2242, 0x84ac, 0x4c58,
2009 	0x1be5, 0x2d7a, 0x5e34, 0xa718,
2010 	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
2011 	0x4c97, 0xc87e, 0x11fc, 0x33a8,
2012 	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
2013 	0x16b3, 0x3d62, 0x4f34, 0x8518,
2014 	0x1e2f, 0x391a, 0x5cac, 0xf858,
2015 	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
2016 	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
2017 	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
2018 	0x4397, 0xc27e, 0x17fc, 0x3ea8,
2019 	0x1617, 0x3d3e, 0x6464, 0xb8b8,
2020 	0x23ff, 0x12aa, 0xab6c, 0x56d8,
2021 	0x2dfb, 0x1ba6, 0x913c, 0x7328,
2022 	0x185d, 0x2ca6, 0x7914, 0x9e28,
2023 	0x171b, 0x3e36, 0x7d7c, 0xebe8,
2024 	0x4199, 0x82ee, 0x19f4, 0x2e58,
2025 	0x4807, 0xc40e, 0x130c, 0x3208,
2026 	0x1905, 0x2e0a, 0x5804, 0xac08,
2027 	0x213f, 0x132a, 0xadfc, 0x5ba8,
2028 	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
2029 };
2030 
2031 static const u16 x8_vectors[] = {
2032 	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
2033 	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
2034 	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
2035 	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
2036 	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
2037 	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
2038 	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
2039 	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
2040 	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
2041 	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
2042 	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
2043 	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
2044 	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
2045 	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
2046 	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
2047 	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
2048 	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
2049 	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
2050 	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
2051 };
2052 
2053 static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
2054 			   unsigned v_dim)
2055 {
2056 	unsigned int i, err_sym;
2057 
2058 	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
2059 		u16 s = syndrome;
2060 		unsigned v_idx =  err_sym * v_dim;
2061 		unsigned v_end = (err_sym + 1) * v_dim;
2062 
2063 		/* walk over all 16 bits of the syndrome */
2064 		for (i = 1; i < (1U << 16); i <<= 1) {
2065 
2066 			/* if bit is set in that eigenvector... */
2067 			if (v_idx < v_end && vectors[v_idx] & i) {
2068 				u16 ev_comp = vectors[v_idx++];
2069 
2070 				/* ... and bit set in the modified syndrome, */
2071 				if (s & i) {
2072 					/* remove it. */
2073 					s ^= ev_comp;
2074 
2075 					if (!s)
2076 						return err_sym;
2077 				}
2078 
2079 			} else if (s & i)
2080 				/* can't get to zero, move to next symbol */
2081 				break;
2082 		}
2083 	}
2084 
2085 	edac_dbg(0, "syndrome(%x) not found\n", syndrome);
2086 	return -1;
2087 }
2088 
2089 static int map_err_sym_to_channel(int err_sym, int sym_size)
2090 {
2091 	if (sym_size == 4)
2092 		switch (err_sym) {
2093 		case 0x20:
2094 		case 0x21:
2095 			return 0;
2096 			break;
2097 		case 0x22:
2098 		case 0x23:
2099 			return 1;
2100 			break;
2101 		default:
2102 			return err_sym >> 4;
2103 			break;
2104 		}
2105 	/* x8 symbols */
2106 	else
2107 		switch (err_sym) {
2108 		/* imaginary bits not in a DIMM */
2109 		case 0x10:
2110 			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
2111 					  err_sym);
2112 			return -1;
2113 			break;
2114 
2115 		case 0x11:
2116 			return 0;
2117 			break;
2118 		case 0x12:
2119 			return 1;
2120 			break;
2121 		default:
2122 			return err_sym >> 3;
2123 			break;
2124 		}
2125 	return -1;
2126 }
2127 
2128 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
2129 {
2130 	struct amd64_pvt *pvt = mci->pvt_info;
2131 	int err_sym = -1;
2132 
2133 	if (pvt->ecc_sym_sz == 8)
2134 		err_sym = decode_syndrome(syndrome, x8_vectors,
2135 					  ARRAY_SIZE(x8_vectors),
2136 					  pvt->ecc_sym_sz);
2137 	else if (pvt->ecc_sym_sz == 4)
2138 		err_sym = decode_syndrome(syndrome, x4_vectors,
2139 					  ARRAY_SIZE(x4_vectors),
2140 					  pvt->ecc_sym_sz);
2141 	else {
2142 		amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
2143 		return err_sym;
2144 	}
2145 
2146 	return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
2147 }
2148 
2149 static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
2150 			    u8 ecc_type)
2151 {
2152 	enum hw_event_mc_err_type err_type;
2153 	const char *string;
2154 
2155 	if (ecc_type == 2)
2156 		err_type = HW_EVENT_ERR_CORRECTED;
2157 	else if (ecc_type == 1)
2158 		err_type = HW_EVENT_ERR_UNCORRECTED;
2159 	else {
2160 		WARN(1, "Something is rotten in the state of Denmark.\n");
2161 		return;
2162 	}
2163 
2164 	switch (err->err_code) {
2165 	case DECODE_OK:
2166 		string = "";
2167 		break;
2168 	case ERR_NODE:
2169 		string = "Failed to map error addr to a node";
2170 		break;
2171 	case ERR_CSROW:
2172 		string = "Failed to map error addr to a csrow";
2173 		break;
2174 	case ERR_CHANNEL:
2175 		string = "unknown syndrome - possible error reporting race";
2176 		break;
2177 	default:
2178 		string = "WTF error";
2179 		break;
2180 	}
2181 
2182 	edac_mc_handle_error(err_type, mci, 1,
2183 			     err->page, err->offset, err->syndrome,
2184 			     err->csrow, err->channel, -1,
2185 			     string, "");
2186 }
2187 
2188 static inline void decode_bus_error(int node_id, struct mce *m)
2189 {
2190 	struct mem_ctl_info *mci;
2191 	struct amd64_pvt *pvt;
2192 	u8 ecc_type = (m->status >> 45) & 0x3;
2193 	u8 xec = XEC(m->status, 0x1f);
2194 	u16 ec = EC(m->status);
2195 	u64 sys_addr;
2196 	struct err_info err;
2197 
2198 	mci = edac_mc_find(node_id);
2199 	if (!mci)
2200 		return;
2201 
2202 	pvt = mci->pvt_info;
2203 
2204 	/* Bail out early if this was an 'observed' error */
2205 	if (PP(ec) == NBSL_PP_OBS)
2206 		return;
2207 
2208 	/* Do only ECC errors */
2209 	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
2210 		return;
2211 
2212 	memset(&err, 0, sizeof(err));
2213 
2214 	sys_addr = get_error_address(pvt, m);
2215 
2216 	if (ecc_type == 2)
2217 		err.syndrome = extract_syndrome(m->status);
2218 
2219 	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
2220 
2221 	__log_bus_error(mci, &err, ecc_type);
2222 }
2223 
2224 /*
2225  * Use pvt->F3 which contains the F3 CPU PCI device to get the related
2226  * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
2227  */
2228 static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id)
2229 {
2230 	/* Reserve the ADDRESS MAP Device */
2231 	pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3);
2232 	if (!pvt->F1) {
2233 		amd64_err("error address map device not found: "
2234 			  "vendor %x device 0x%x (broken BIOS?)\n",
2235 			  PCI_VENDOR_ID_AMD, f1_id);
2236 		return -ENODEV;
2237 	}
2238 
2239 	/* Reserve the DCT Device */
2240 	pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3);
2241 	if (!pvt->F2) {
2242 		pci_dev_put(pvt->F1);
2243 		pvt->F1 = NULL;
2244 
2245 		amd64_err("error F2 device not found: "
2246 			  "vendor %x device 0x%x (broken BIOS?)\n",
2247 			  PCI_VENDOR_ID_AMD, f2_id);
2248 
2249 		return -ENODEV;
2250 	}
2251 	edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
2252 	edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
2253 	edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2254 
2255 	return 0;
2256 }
2257 
2258 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2259 {
2260 	pci_dev_put(pvt->F1);
2261 	pci_dev_put(pvt->F2);
2262 }
2263 
2264 /*
2265  * Retrieve the hardware registers of the memory controller (this includes the
2266  * 'Address Map' and 'Misc' device regs)
2267  */
2268 static void read_mc_regs(struct amd64_pvt *pvt)
2269 {
2270 	unsigned range;
2271 	u64 msr_val;
2272 	u32 tmp;
2273 
2274 	/*
2275 	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2276 	 * those are Read-As-Zero
2277 	 */
2278 	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2279 	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
2280 
2281 	/* check first whether TOP_MEM2 is enabled */
2282 	rdmsrl(MSR_K8_SYSCFG, msr_val);
2283 	if (msr_val & (1U << 21)) {
2284 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2285 		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2286 	} else
2287 		edac_dbg(0, "  TOP_MEM2 disabled\n");
2288 
2289 	amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2290 
2291 	read_dram_ctl_register(pvt);
2292 
2293 	for (range = 0; range < DRAM_RANGES; range++) {
2294 		u8 rw;
2295 
2296 		/* read settings for this DRAM range */
2297 		read_dram_base_limit_regs(pvt, range);
2298 
2299 		rw = dram_rw(pvt, range);
2300 		if (!rw)
2301 			continue;
2302 
2303 		edac_dbg(1, "  DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2304 			 range,
2305 			 get_dram_base(pvt, range),
2306 			 get_dram_limit(pvt, range));
2307 
2308 		edac_dbg(1, "   IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2309 			 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
2310 			 (rw & 0x1) ? "R" : "-",
2311 			 (rw & 0x2) ? "W" : "-",
2312 			 dram_intlv_sel(pvt, range),
2313 			 dram_dst_node(pvt, range));
2314 	}
2315 
2316 	read_dct_base_mask(pvt);
2317 
2318 	amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2319 	amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0);
2320 
2321 	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2322 
2323 	amd64_read_dct_pci_cfg(pvt, 0, DCLR0, &pvt->dclr0);
2324 	amd64_read_dct_pci_cfg(pvt, 0, DCHR0, &pvt->dchr0);
2325 
2326 	if (!dct_ganging_enabled(pvt)) {
2327 		amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1);
2328 		amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
2329 	}
2330 
2331 	pvt->ecc_sym_sz = 4;
2332 	determine_memory_type(pvt);
2333 	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
2334 
2335 	if (pvt->fam >= 0x10) {
2336 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2337 		/* F16h has only DCT0, so no need to read dbam1 */
2338 		if (pvt->fam != 0x16)
2339 			amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
2340 
2341 		/* F10h, revD and later can do x8 ECC too */
2342 		if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
2343 			pvt->ecc_sym_sz = 8;
2344 	}
2345 	dump_misc_regs(pvt);
2346 }
2347 
2348 /*
2349  * NOTE: CPU Revision Dependent code
2350  *
2351  * Input:
2352  *	@csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2353  *	k8 private pointer to -->
2354  *			DRAM Bank Address mapping register
2355  *			node_id
2356  *			DCL register where dual_channel_active is
2357  *
2358  * The DBAM register consists of 4 sets of 4 bits each definitions:
2359  *
2360  * Bits:	CSROWs
2361  * 0-3		CSROWs 0 and 1
2362  * 4-7		CSROWs 2 and 3
2363  * 8-11		CSROWs 4 and 5
2364  * 12-15	CSROWs 6 and 7
2365  *
2366  * Values range from: 0 to 15
2367  * The meaning of the values depends on CPU revision and dual-channel state,
2368  * see relevant BKDG more info.
2369  *
2370  * The memory controller provides for total of only 8 CSROWs in its current
2371  * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2372  * single channel or two (2) DIMMs in dual channel mode.
2373  *
2374  * The following code logic collapses the various tables for CSROW based on CPU
2375  * revision.
2376  *
2377  * Returns:
2378  *	The number of PAGE_SIZE pages on the specified CSROW number it
2379  *	encompasses
2380  *
2381  */
2382 static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2383 {
2384 	u32 cs_mode, nr_pages;
2385 	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2386 
2387 
2388 	/*
2389 	 * The math on this doesn't look right on the surface because x/2*4 can
2390 	 * be simplified to x*2 but this expression makes use of the fact that
2391 	 * it is integral math where 1/2=0. This intermediate value becomes the
2392 	 * number of bits to shift the DBAM register to extract the proper CSROW
2393 	 * field.
2394 	 */
2395 	cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
2396 
2397 	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2))
2398 							   << (20 - PAGE_SHIFT);
2399 
2400 	edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2401 		    csrow_nr, dct,  cs_mode);
2402 	edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
2403 
2404 	return nr_pages;
2405 }
2406 
2407 /*
2408  * Initialize the array of csrow attribute instances, based on the values
2409  * from pci config hardware registers.
2410  */
2411 static int init_csrows(struct mem_ctl_info *mci)
2412 {
2413 	struct amd64_pvt *pvt = mci->pvt_info;
2414 	struct csrow_info *csrow;
2415 	struct dimm_info *dimm;
2416 	enum edac_type edac_mode;
2417 	int i, j, empty = 1;
2418 	int nr_pages = 0;
2419 	u32 val;
2420 
2421 	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2422 
2423 	pvt->nbcfg = val;
2424 
2425 	edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2426 		 pvt->mc_node_id, val,
2427 		 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2428 
2429 	/*
2430 	 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
2431 	 */
2432 	for_each_chip_select(i, 0, pvt) {
2433 		bool row_dct0 = !!csrow_enabled(i, 0, pvt);
2434 		bool row_dct1 = false;
2435 
2436 		if (pvt->fam != 0xf)
2437 			row_dct1 = !!csrow_enabled(i, 1, pvt);
2438 
2439 		if (!row_dct0 && !row_dct1)
2440 			continue;
2441 
2442 		csrow = mci->csrows[i];
2443 		empty = 0;
2444 
2445 		edac_dbg(1, "MC node: %d, csrow: %d\n",
2446 			    pvt->mc_node_id, i);
2447 
2448 		if (row_dct0) {
2449 			nr_pages = get_csrow_nr_pages(pvt, 0, i);
2450 			csrow->channels[0]->dimm->nr_pages = nr_pages;
2451 		}
2452 
2453 		/* K8 has only one DCT */
2454 		if (pvt->fam != 0xf && row_dct1) {
2455 			int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i);
2456 
2457 			csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
2458 			nr_pages += row_dct1_pages;
2459 		}
2460 
2461 		edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
2462 
2463 		/*
2464 		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
2465 		 */
2466 		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
2467 			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
2468 				    EDAC_S4ECD4ED : EDAC_SECDED;
2469 		else
2470 			edac_mode = EDAC_NONE;
2471 
2472 		for (j = 0; j < pvt->channel_count; j++) {
2473 			dimm = csrow->channels[j]->dimm;
2474 			dimm->mtype = pvt->dram_type;
2475 			dimm->edac_mode = edac_mode;
2476 		}
2477 	}
2478 
2479 	return empty;
2480 }
2481 
2482 /* get all cores on this DCT */
2483 static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
2484 {
2485 	int cpu;
2486 
2487 	for_each_online_cpu(cpu)
2488 		if (amd_get_nb_id(cpu) == nid)
2489 			cpumask_set_cpu(cpu, mask);
2490 }
2491 
2492 /* check MCG_CTL on all the cpus on this node */
2493 static bool nb_mce_bank_enabled_on_node(u16 nid)
2494 {
2495 	cpumask_var_t mask;
2496 	int cpu, nbe;
2497 	bool ret = false;
2498 
2499 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2500 		amd64_warn("%s: Error allocating mask\n", __func__);
2501 		return false;
2502 	}
2503 
2504 	get_cpus_on_this_dct_cpumask(mask, nid);
2505 
2506 	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2507 
2508 	for_each_cpu(cpu, mask) {
2509 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2510 		nbe = reg->l & MSR_MCGCTL_NBE;
2511 
2512 		edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2513 			 cpu, reg->q,
2514 			 (nbe ? "enabled" : "disabled"));
2515 
2516 		if (!nbe)
2517 			goto out;
2518 	}
2519 	ret = true;
2520 
2521 out:
2522 	free_cpumask_var(mask);
2523 	return ret;
2524 }
2525 
2526 static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
2527 {
2528 	cpumask_var_t cmask;
2529 	int cpu;
2530 
2531 	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2532 		amd64_warn("%s: error allocating mask\n", __func__);
2533 		return false;
2534 	}
2535 
2536 	get_cpus_on_this_dct_cpumask(cmask, nid);
2537 
2538 	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2539 
2540 	for_each_cpu(cpu, cmask) {
2541 
2542 		struct msr *reg = per_cpu_ptr(msrs, cpu);
2543 
2544 		if (on) {
2545 			if (reg->l & MSR_MCGCTL_NBE)
2546 				s->flags.nb_mce_enable = 1;
2547 
2548 			reg->l |= MSR_MCGCTL_NBE;
2549 		} else {
2550 			/*
2551 			 * Turn off NB MCE reporting only when it was off before
2552 			 */
2553 			if (!s->flags.nb_mce_enable)
2554 				reg->l &= ~MSR_MCGCTL_NBE;
2555 		}
2556 	}
2557 	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2558 
2559 	free_cpumask_var(cmask);
2560 
2561 	return 0;
2562 }
2563 
2564 static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2565 				       struct pci_dev *F3)
2566 {
2567 	bool ret = true;
2568 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2569 
2570 	if (toggle_ecc_err_reporting(s, nid, ON)) {
2571 		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2572 		return false;
2573 	}
2574 
2575 	amd64_read_pci_cfg(F3, NBCTL, &value);
2576 
2577 	s->old_nbctl   = value & mask;
2578 	s->nbctl_valid = true;
2579 
2580 	value |= mask;
2581 	amd64_write_pci_cfg(F3, NBCTL, value);
2582 
2583 	amd64_read_pci_cfg(F3, NBCFG, &value);
2584 
2585 	edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2586 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2587 
2588 	if (!(value & NBCFG_ECC_ENABLE)) {
2589 		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2590 
2591 		s->flags.nb_ecc_prev = 0;
2592 
2593 		/* Attempt to turn on DRAM ECC Enable */
2594 		value |= NBCFG_ECC_ENABLE;
2595 		amd64_write_pci_cfg(F3, NBCFG, value);
2596 
2597 		amd64_read_pci_cfg(F3, NBCFG, &value);
2598 
2599 		if (!(value & NBCFG_ECC_ENABLE)) {
2600 			amd64_warn("Hardware rejected DRAM ECC enable,"
2601 				   "check memory DIMM configuration.\n");
2602 			ret = false;
2603 		} else {
2604 			amd64_info("Hardware accepted DRAM ECC Enable\n");
2605 		}
2606 	} else {
2607 		s->flags.nb_ecc_prev = 1;
2608 	}
2609 
2610 	edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2611 		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2612 
2613 	return ret;
2614 }
2615 
2616 static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2617 					struct pci_dev *F3)
2618 {
2619 	u32 value, mask = 0x3;		/* UECC/CECC enable */
2620 
2621 
2622 	if (!s->nbctl_valid)
2623 		return;
2624 
2625 	amd64_read_pci_cfg(F3, NBCTL, &value);
2626 	value &= ~mask;
2627 	value |= s->old_nbctl;
2628 
2629 	amd64_write_pci_cfg(F3, NBCTL, value);
2630 
2631 	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
2632 	if (!s->flags.nb_ecc_prev) {
2633 		amd64_read_pci_cfg(F3, NBCFG, &value);
2634 		value &= ~NBCFG_ECC_ENABLE;
2635 		amd64_write_pci_cfg(F3, NBCFG, value);
2636 	}
2637 
2638 	/* restore the NB Enable MCGCTL bit */
2639 	if (toggle_ecc_err_reporting(s, nid, OFF))
2640 		amd64_warn("Error restoring NB MCGCTL settings!\n");
2641 }
2642 
2643 /*
2644  * EDAC requires that the BIOS have ECC enabled before
2645  * taking over the processing of ECC errors. A command line
2646  * option allows to force-enable hardware ECC later in
2647  * enable_ecc_error_reporting().
2648  */
2649 static const char *ecc_msg =
2650 	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
2651 	" Either enable ECC checking or force module loading by setting "
2652 	"'ecc_enable_override'.\n"
2653 	" (Note that use of the override may cause unknown side effects.)\n";
2654 
2655 static bool ecc_enabled(struct pci_dev *F3, u16 nid)
2656 {
2657 	u32 value;
2658 	u8 ecc_en = 0;
2659 	bool nb_mce_en = false;
2660 
2661 	amd64_read_pci_cfg(F3, NBCFG, &value);
2662 
2663 	ecc_en = !!(value & NBCFG_ECC_ENABLE);
2664 	amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2665 
2666 	nb_mce_en = nb_mce_bank_enabled_on_node(nid);
2667 	if (!nb_mce_en)
2668 		amd64_notice("NB MCE bank disabled, set MSR "
2669 			     "0x%08x[4] on node %d to enable.\n",
2670 			     MSR_IA32_MCG_CTL, nid);
2671 
2672 	if (!ecc_en || !nb_mce_en) {
2673 		amd64_notice("%s", ecc_msg);
2674 		return false;
2675 	}
2676 	return true;
2677 }
2678 
2679 static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
2680 				 struct amd64_family_type *fam)
2681 {
2682 	struct amd64_pvt *pvt = mci->pvt_info;
2683 
2684 	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2685 	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
2686 
2687 	if (pvt->nbcap & NBCAP_SECDED)
2688 		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2689 
2690 	if (pvt->nbcap & NBCAP_CHIPKILL)
2691 		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
2692 
2693 	mci->edac_cap		= determine_edac_cap(pvt);
2694 	mci->mod_name		= EDAC_MOD_STR;
2695 	mci->mod_ver		= EDAC_AMD64_VERSION;
2696 	mci->ctl_name		= fam->ctl_name;
2697 	mci->dev_name		= pci_name(pvt->F2);
2698 	mci->ctl_page_to_phys	= NULL;
2699 
2700 	/* memory scrubber interface */
2701 	mci->set_sdram_scrub_rate = set_scrub_rate;
2702 	mci->get_sdram_scrub_rate = get_scrub_rate;
2703 }
2704 
2705 /*
2706  * returns a pointer to the family descriptor on success, NULL otherwise.
2707  */
2708 static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
2709 {
2710 	struct amd64_family_type *fam_type = NULL;
2711 
2712 	pvt->ext_model  = boot_cpu_data.x86_model >> 4;
2713 	pvt->stepping	= boot_cpu_data.x86_mask;
2714 	pvt->model	= boot_cpu_data.x86_model;
2715 	pvt->fam	= boot_cpu_data.x86;
2716 
2717 	switch (pvt->fam) {
2718 	case 0xf:
2719 		fam_type	= &family_types[K8_CPUS];
2720 		pvt->ops	= &family_types[K8_CPUS].ops;
2721 		break;
2722 
2723 	case 0x10:
2724 		fam_type	= &family_types[F10_CPUS];
2725 		pvt->ops	= &family_types[F10_CPUS].ops;
2726 		break;
2727 
2728 	case 0x15:
2729 		if (pvt->model == 0x30) {
2730 			fam_type = &family_types[F15_M30H_CPUS];
2731 			pvt->ops = &family_types[F15_M30H_CPUS].ops;
2732 			break;
2733 		} else if (pvt->model == 0x60) {
2734 			fam_type = &family_types[F15_M60H_CPUS];
2735 			pvt->ops = &family_types[F15_M60H_CPUS].ops;
2736 			break;
2737 		}
2738 
2739 		fam_type	= &family_types[F15_CPUS];
2740 		pvt->ops	= &family_types[F15_CPUS].ops;
2741 		break;
2742 
2743 	case 0x16:
2744 		if (pvt->model == 0x30) {
2745 			fam_type = &family_types[F16_M30H_CPUS];
2746 			pvt->ops = &family_types[F16_M30H_CPUS].ops;
2747 			break;
2748 		}
2749 		fam_type	= &family_types[F16_CPUS];
2750 		pvt->ops	= &family_types[F16_CPUS].ops;
2751 		break;
2752 
2753 	default:
2754 		amd64_err("Unsupported family!\n");
2755 		return NULL;
2756 	}
2757 
2758 	amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
2759 		     (pvt->fam == 0xf ?
2760 				(pvt->ext_model >= K8_REV_F  ? "revF or later "
2761 							     : "revE or earlier ")
2762 				 : ""), pvt->mc_node_id);
2763 	return fam_type;
2764 }
2765 
2766 static const struct attribute_group *amd64_edac_attr_groups[] = {
2767 #ifdef CONFIG_EDAC_DEBUG
2768 	&amd64_edac_dbg_group,
2769 #endif
2770 #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
2771 	&amd64_edac_inj_group,
2772 #endif
2773 	NULL
2774 };
2775 
2776 static int init_one_instance(unsigned int nid)
2777 {
2778 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2779 	struct amd64_family_type *fam_type = NULL;
2780 	struct mem_ctl_info *mci = NULL;
2781 	struct edac_mc_layer layers[2];
2782 	struct amd64_pvt *pvt = NULL;
2783 	int err = 0, ret;
2784 
2785 	ret = -ENOMEM;
2786 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
2787 	if (!pvt)
2788 		goto err_ret;
2789 
2790 	pvt->mc_node_id	= nid;
2791 	pvt->F3 = F3;
2792 
2793 	ret = -EINVAL;
2794 	fam_type = per_family_init(pvt);
2795 	if (!fam_type)
2796 		goto err_free;
2797 
2798 	ret = -ENODEV;
2799 	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id);
2800 	if (err)
2801 		goto err_free;
2802 
2803 	read_mc_regs(pvt);
2804 
2805 	/*
2806 	 * We need to determine how many memory channels there are. Then use
2807 	 * that information for calculating the size of the dynamic instance
2808 	 * tables in the 'mci' structure.
2809 	 */
2810 	ret = -EINVAL;
2811 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
2812 	if (pvt->channel_count < 0)
2813 		goto err_siblings;
2814 
2815 	ret = -ENOMEM;
2816 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
2817 	layers[0].size = pvt->csels[0].b_cnt;
2818 	layers[0].is_virt_csrow = true;
2819 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
2820 
2821 	/*
2822 	 * Always allocate two channels since we can have setups with DIMMs on
2823 	 * only one channel. Also, this simplifies handling later for the price
2824 	 * of a couple of KBs tops.
2825 	 */
2826 	layers[1].size = 2;
2827 	layers[1].is_virt_csrow = false;
2828 
2829 	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
2830 	if (!mci)
2831 		goto err_siblings;
2832 
2833 	mci->pvt_info = pvt;
2834 	mci->pdev = &pvt->F3->dev;
2835 
2836 	setup_mci_misc_attrs(mci, fam_type);
2837 
2838 	if (init_csrows(mci))
2839 		mci->edac_cap = EDAC_FLAG_NONE;
2840 
2841 	ret = -ENODEV;
2842 	if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
2843 		edac_dbg(1, "failed edac_mc_add_mc()\n");
2844 		goto err_add_mc;
2845 	}
2846 
2847 	/* register stuff with EDAC MCE */
2848 	if (report_gart_errors)
2849 		amd_report_gart_errors(true);
2850 
2851 	amd_register_ecc_decoder(decode_bus_error);
2852 
2853 	return 0;
2854 
2855 err_add_mc:
2856 	edac_mc_free(mci);
2857 
2858 err_siblings:
2859 	free_mc_sibling_devs(pvt);
2860 
2861 err_free:
2862 	kfree(pvt);
2863 
2864 err_ret:
2865 	return ret;
2866 }
2867 
2868 static int probe_one_instance(unsigned int nid)
2869 {
2870 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2871 	struct ecc_settings *s;
2872 	int ret;
2873 
2874 	ret = -ENOMEM;
2875 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
2876 	if (!s)
2877 		goto err_out;
2878 
2879 	ecc_stngs[nid] = s;
2880 
2881 	if (!ecc_enabled(F3, nid)) {
2882 		ret = -ENODEV;
2883 
2884 		if (!ecc_enable_override)
2885 			goto err_enable;
2886 
2887 		amd64_warn("Forcing ECC on!\n");
2888 
2889 		if (!enable_ecc_error_reporting(s, nid, F3))
2890 			goto err_enable;
2891 	}
2892 
2893 	ret = init_one_instance(nid);
2894 	if (ret < 0) {
2895 		amd64_err("Error probing instance: %d\n", nid);
2896 		restore_ecc_error_reporting(s, nid, F3);
2897 	}
2898 
2899 	return ret;
2900 
2901 err_enable:
2902 	kfree(s);
2903 	ecc_stngs[nid] = NULL;
2904 
2905 err_out:
2906 	return ret;
2907 }
2908 
2909 static void remove_one_instance(unsigned int nid)
2910 {
2911 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2912 	struct ecc_settings *s = ecc_stngs[nid];
2913 	struct mem_ctl_info *mci;
2914 	struct amd64_pvt *pvt;
2915 
2916 	mci = find_mci_by_dev(&F3->dev);
2917 	WARN_ON(!mci);
2918 
2919 	/* Remove from EDAC CORE tracking list */
2920 	mci = edac_mc_del_mc(&F3->dev);
2921 	if (!mci)
2922 		return;
2923 
2924 	pvt = mci->pvt_info;
2925 
2926 	restore_ecc_error_reporting(s, nid, F3);
2927 
2928 	free_mc_sibling_devs(pvt);
2929 
2930 	/* unregister from EDAC MCE */
2931 	amd_report_gart_errors(false);
2932 	amd_unregister_ecc_decoder(decode_bus_error);
2933 
2934 	kfree(ecc_stngs[nid]);
2935 	ecc_stngs[nid] = NULL;
2936 
2937 	/* Free the EDAC CORE resources */
2938 	mci->pvt_info = NULL;
2939 
2940 	kfree(pvt);
2941 	edac_mc_free(mci);
2942 }
2943 
2944 static void setup_pci_device(void)
2945 {
2946 	struct mem_ctl_info *mci;
2947 	struct amd64_pvt *pvt;
2948 
2949 	if (pci_ctl)
2950 		return;
2951 
2952 	mci = edac_mc_find(0);
2953 	if (!mci)
2954 		return;
2955 
2956 	pvt = mci->pvt_info;
2957 	pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2958 	if (!pci_ctl) {
2959 		pr_warn("%s(): Unable to create PCI control\n", __func__);
2960 		pr_warn("%s(): PCI error report via EDAC not set\n", __func__);
2961 	}
2962 }
2963 
2964 static int __init amd64_edac_init(void)
2965 {
2966 	int err = -ENODEV;
2967 	int i;
2968 
2969 	opstate_init();
2970 
2971 	if (amd_cache_northbridges() < 0)
2972 		goto err_ret;
2973 
2974 	err = -ENOMEM;
2975 	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
2976 	if (!ecc_stngs)
2977 		goto err_free;
2978 
2979 	msrs = msrs_alloc();
2980 	if (!msrs)
2981 		goto err_free;
2982 
2983 	for (i = 0; i < amd_nb_num(); i++)
2984 		if (probe_one_instance(i)) {
2985 			/* unwind properly */
2986 			while (--i >= 0)
2987 				remove_one_instance(i);
2988 
2989 			goto err_pci;
2990 		}
2991 
2992 	setup_pci_device();
2993 
2994 #ifdef CONFIG_X86_32
2995 	amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
2996 #endif
2997 
2998 	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
2999 
3000 	return 0;
3001 
3002 err_pci:
3003 	msrs_free(msrs);
3004 	msrs = NULL;
3005 
3006 err_free:
3007 	kfree(ecc_stngs);
3008 	ecc_stngs = NULL;
3009 
3010 err_ret:
3011 	return err;
3012 }
3013 
3014 static void __exit amd64_edac_exit(void)
3015 {
3016 	int i;
3017 
3018 	if (pci_ctl)
3019 		edac_pci_release_generic_ctl(pci_ctl);
3020 
3021 	for (i = 0; i < amd_nb_num(); i++)
3022 		remove_one_instance(i);
3023 
3024 	kfree(ecc_stngs);
3025 	ecc_stngs = NULL;
3026 
3027 	msrs_free(msrs);
3028 	msrs = NULL;
3029 }
3030 
3031 module_init(amd64_edac_init);
3032 module_exit(amd64_edac_exit);
3033 
3034 MODULE_LICENSE("GPL");
3035 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3036 		"Dave Peterson, Thayne Harbaugh");
3037 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3038 		EDAC_AMD64_VERSION);
3039 
3040 module_param(edac_op_state, int, 0444);
3041 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
3042