xref: /linux/drivers/edac/amd76x_edac.c (revision d91517839e5d95adc0cf4b28caa7af62a71de526)
1 /*
2  * AMD 76x Memory Controller kernel module
3  * (C) 2003 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * $Id: edac_amd76x.c,v 1.4.2.5 2005/10/05 00:43:44 dsp_llnl Exp $
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/init.h>
17 #include <linux/pci.h>
18 #include <linux/pci_ids.h>
19 #include <linux/edac.h>
20 #include "edac_core.h"
21 
22 #define AMD76X_REVISION	" Ver: 2.0.2"
23 #define EDAC_MOD_STR	"amd76x_edac"
24 
25 #define amd76x_printk(level, fmt, arg...) \
26 	edac_printk(level, "amd76x", fmt, ##arg)
27 
28 #define amd76x_mc_printk(mci, level, fmt, arg...) \
29 	edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg)
30 
31 #define AMD76X_NR_CSROWS 8
32 #define AMD76X_NR_DIMMS  4
33 
34 /* AMD 76x register addresses - device 0 function 0 - PCI bridge */
35 
36 #define AMD76X_ECC_MODE_STATUS	0x48	/* Mode and status of ECC (32b)
37 					 *
38 					 * 31:16 reserved
39 					 * 15:14 SERR enabled: x1=ue 1x=ce
40 					 * 13    reserved
41 					 * 12    diag: disabled, enabled
42 					 * 11:10 mode: dis, EC, ECC, ECC+scrub
43 					 *  9:8  status: x1=ue 1x=ce
44 					 *  7:4  UE cs row
45 					 *  3:0  CE cs row
46 					 */
47 
48 #define AMD76X_DRAM_MODE_STATUS	0x58	/* DRAM Mode and status (32b)
49 					 *
50 					 * 31:26 clock disable 5 - 0
51 					 * 25    SDRAM init
52 					 * 24    reserved
53 					 * 23    mode register service
54 					 * 22:21 suspend to RAM
55 					 * 20    burst refresh enable
56 					 * 19    refresh disable
57 					 * 18    reserved
58 					 * 17:16 cycles-per-refresh
59 					 * 15:8  reserved
60 					 *  7:0  x4 mode enable 7 - 0
61 					 */
62 
63 #define AMD76X_MEM_BASE_ADDR	0xC0	/* Memory base address (8 x 32b)
64 					 *
65 					 * 31:23 chip-select base
66 					 * 22:16 reserved
67 					 * 15:7  chip-select mask
68 					 *  6:3  reserved
69 					 *  2:1  address mode
70 					 *  0    chip-select enable
71 					 */
72 
73 struct amd76x_error_info {
74 	u32 ecc_mode_status;
75 };
76 
77 enum amd76x_chips {
78 	AMD761 = 0,
79 	AMD762
80 };
81 
82 struct amd76x_dev_info {
83 	const char *ctl_name;
84 };
85 
86 static const struct amd76x_dev_info amd76x_devs[] = {
87 	[AMD761] = {
88 		.ctl_name = "AMD761"},
89 	[AMD762] = {
90 		.ctl_name = "AMD762"},
91 };
92 
93 static struct edac_pci_ctl_info *amd76x_pci;
94 
95 /**
96  *	amd76x_get_error_info	-	fetch error information
97  *	@mci: Memory controller
98  *	@info: Info to fill in
99  *
100  *	Fetch and store the AMD76x ECC status. Clear pending status
101  *	on the chip so that further errors will be reported
102  */
103 static void amd76x_get_error_info(struct mem_ctl_info *mci,
104 				struct amd76x_error_info *info)
105 {
106 	struct pci_dev *pdev;
107 
108 	pdev = to_pci_dev(mci->pdev);
109 	pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS,
110 			&info->ecc_mode_status);
111 
112 	if (info->ecc_mode_status & BIT(8))
113 		pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS,
114 				 (u32) BIT(8), (u32) BIT(8));
115 
116 	if (info->ecc_mode_status & BIT(9))
117 		pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS,
118 				 (u32) BIT(9), (u32) BIT(9));
119 }
120 
121 /**
122  *	amd76x_process_error_info	-	Error check
123  *	@mci: Memory controller
124  *	@info: Previously fetched information from chip
125  *	@handle_errors: 1 if we should do recovery
126  *
127  *	Process the chip state and decide if an error has occurred.
128  *	A return of 1 indicates an error. Also if handle_errors is true
129  *	then attempt to handle and clean up after the error
130  */
131 static int amd76x_process_error_info(struct mem_ctl_info *mci,
132 				struct amd76x_error_info *info,
133 				int handle_errors)
134 {
135 	int error_found;
136 	u32 row;
137 
138 	error_found = 0;
139 
140 	/*
141 	 *      Check for an uncorrectable error
142 	 */
143 	if (info->ecc_mode_status & BIT(8)) {
144 		error_found = 1;
145 
146 		if (handle_errors) {
147 			row = (info->ecc_mode_status >> 4) & 0xf;
148 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
149 					     mci->csrows[row]->first_page, 0, 0,
150 					     row, 0, -1,
151 					     mci->ctl_name, "");
152 		}
153 	}
154 
155 	/*
156 	 *      Check for a correctable error
157 	 */
158 	if (info->ecc_mode_status & BIT(9)) {
159 		error_found = 1;
160 
161 		if (handle_errors) {
162 			row = info->ecc_mode_status & 0xf;
163 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
164 					     mci->csrows[row]->first_page, 0, 0,
165 					     row, 0, -1,
166 					     mci->ctl_name, "");
167 		}
168 	}
169 
170 	return error_found;
171 }
172 
173 /**
174  *	amd76x_check	-	Poll the controller
175  *	@mci: Memory controller
176  *
177  *	Called by the poll handlers this function reads the status
178  *	from the controller and checks for errors.
179  */
180 static void amd76x_check(struct mem_ctl_info *mci)
181 {
182 	struct amd76x_error_info info;
183 	edac_dbg(3, "\n");
184 	amd76x_get_error_info(mci, &info);
185 	amd76x_process_error_info(mci, &info, 1);
186 }
187 
188 static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
189 			enum edac_type edac_mode)
190 {
191 	struct csrow_info *csrow;
192 	struct dimm_info *dimm;
193 	u32 mba, mba_base, mba_mask, dms;
194 	int index;
195 
196 	for (index = 0; index < mci->nr_csrows; index++) {
197 		csrow = mci->csrows[index];
198 		dimm = csrow->channels[0]->dimm;
199 
200 		/* find the DRAM Chip Select Base address and mask */
201 		pci_read_config_dword(pdev,
202 				AMD76X_MEM_BASE_ADDR + (index * 4), &mba);
203 
204 		if (!(mba & BIT(0)))
205 			continue;
206 
207 		mba_base = mba & 0xff800000UL;
208 		mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL;
209 		pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms);
210 		csrow->first_page = mba_base >> PAGE_SHIFT;
211 		dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
212 		csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
213 		csrow->page_mask = mba_mask >> PAGE_SHIFT;
214 		dimm->grain = dimm->nr_pages << PAGE_SHIFT;
215 		dimm->mtype = MEM_RDDR;
216 		dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
217 		dimm->edac_mode = edac_mode;
218 	}
219 }
220 
221 /**
222  *	amd76x_probe1	-	Perform set up for detected device
223  *	@pdev; PCI device detected
224  *	@dev_idx: Device type index
225  *
226  *	We have found an AMD76x and now need to set up the memory
227  *	controller status reporting. We configure and set up the
228  *	memory controller reporting and claim the device.
229  */
230 static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
231 {
232 	static const enum edac_type ems_modes[] = {
233 		EDAC_NONE,
234 		EDAC_EC,
235 		EDAC_SECDED,
236 		EDAC_SECDED
237 	};
238 	struct mem_ctl_info *mci;
239 	struct edac_mc_layer layers[2];
240 	u32 ems;
241 	u32 ems_mode;
242 	struct amd76x_error_info discard;
243 
244 	edac_dbg(0, "\n");
245 	pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems);
246 	ems_mode = (ems >> 10) & 0x3;
247 
248 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
249 	layers[0].size = AMD76X_NR_CSROWS;
250 	layers[0].is_virt_csrow = true;
251 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
252 	layers[1].size = 1;
253 	layers[1].is_virt_csrow = false;
254 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
255 
256 	if (mci == NULL)
257 		return -ENOMEM;
258 
259 	edac_dbg(0, "mci = %p\n", mci);
260 	mci->pdev = &pdev->dev;
261 	mci->mtype_cap = MEM_FLAG_RDDR;
262 	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
263 	mci->edac_cap = ems_mode ?
264 		(EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE;
265 	mci->mod_name = EDAC_MOD_STR;
266 	mci->mod_ver = AMD76X_REVISION;
267 	mci->ctl_name = amd76x_devs[dev_idx].ctl_name;
268 	mci->dev_name = pci_name(pdev);
269 	mci->edac_check = amd76x_check;
270 	mci->ctl_page_to_phys = NULL;
271 
272 	amd76x_init_csrows(mci, pdev, ems_modes[ems_mode]);
273 	amd76x_get_error_info(mci, &discard);	/* clear counters */
274 
275 	/* Here we assume that we will never see multiple instances of this
276 	 * type of memory controller.  The ID is therefore hardcoded to 0.
277 	 */
278 	if (edac_mc_add_mc(mci)) {
279 		edac_dbg(3, "failed edac_mc_add_mc()\n");
280 		goto fail;
281 	}
282 
283 	/* allocating generic PCI control info */
284 	amd76x_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
285 	if (!amd76x_pci) {
286 		printk(KERN_WARNING
287 			"%s(): Unable to create PCI control\n",
288 			__func__);
289 		printk(KERN_WARNING
290 			"%s(): PCI error report via EDAC not setup\n",
291 			__func__);
292 	}
293 
294 	/* get this far and it's successful */
295 	edac_dbg(3, "success\n");
296 	return 0;
297 
298 fail:
299 	edac_mc_free(mci);
300 	return -ENODEV;
301 }
302 
303 /* returns count (>= 0), or negative on error */
304 static int amd76x_init_one(struct pci_dev *pdev,
305 			   const struct pci_device_id *ent)
306 {
307 	edac_dbg(0, "\n");
308 
309 	/* don't need to call pci_enable_device() */
310 	return amd76x_probe1(pdev, ent->driver_data);
311 }
312 
313 /**
314  *	amd76x_remove_one	-	driver shutdown
315  *	@pdev: PCI device being handed back
316  *
317  *	Called when the driver is unloaded. Find the matching mci
318  *	structure for the device then delete the mci and free the
319  *	resources.
320  */
321 static void amd76x_remove_one(struct pci_dev *pdev)
322 {
323 	struct mem_ctl_info *mci;
324 
325 	edac_dbg(0, "\n");
326 
327 	if (amd76x_pci)
328 		edac_pci_release_generic_ctl(amd76x_pci);
329 
330 	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
331 		return;
332 
333 	edac_mc_free(mci);
334 }
335 
336 static const struct pci_device_id amd76x_pci_tbl[] = {
337 	{
338 	 PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
339 	 AMD762},
340 	{
341 	 PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
342 	 AMD761},
343 	{
344 	 0,
345 	 }			/* 0 terminated list. */
346 };
347 
348 MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl);
349 
350 static struct pci_driver amd76x_driver = {
351 	.name = EDAC_MOD_STR,
352 	.probe = amd76x_init_one,
353 	.remove = amd76x_remove_one,
354 	.id_table = amd76x_pci_tbl,
355 };
356 
357 static int __init amd76x_init(void)
358 {
359        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
360        opstate_init();
361 
362 	return pci_register_driver(&amd76x_driver);
363 }
364 
365 static void __exit amd76x_exit(void)
366 {
367 	pci_unregister_driver(&amd76x_driver);
368 }
369 
370 module_init(amd76x_init);
371 module_exit(amd76x_exit);
372 
373 MODULE_LICENSE("GPL");
374 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh");
375 MODULE_DESCRIPTION("MC support for AMD 76x memory controllers");
376 
377 module_param(edac_op_state, int, 0444);
378 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
379