xref: /linux/drivers/edac/edac_mc.c (revision f2ee442115c9b6219083c019939a9cc0c9abb2f8)
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/sysdev.h>
29 #include <linux/ctype.h>
30 #include <linux/edac.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36 
37 /* lock to memory controller's control array */
38 static DEFINE_MUTEX(mem_ctls_mutex);
39 static LIST_HEAD(mc_devices);
40 
41 #ifdef CONFIG_EDAC_DEBUG
42 
43 static void edac_mc_dump_channel(struct channel_info *chan)
44 {
45 	debugf4("\tchannel = %p\n", chan);
46 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47 	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48 	debugf4("\tchannel->label = '%s'\n", chan->label);
49 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50 }
51 
52 static void edac_mc_dump_csrow(struct csrow_info *csrow)
53 {
54 	debugf4("\tcsrow = %p\n", csrow);
55 	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56 	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57 	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59 	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60 	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61 	debugf4("\tcsrow->channels = %p\n", csrow->channels);
62 	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63 }
64 
65 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66 {
67 	debugf3("\tmci = %p\n", mci);
68 	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69 	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70 	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71 	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72 	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73 		mci->nr_csrows, mci->csrows);
74 	debugf3("\tdev = %p\n", mci->dev);
75 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77 }
78 
79 #endif				/* CONFIG_EDAC_DEBUG */
80 
81 /*
82  * keep those in sync with the enum mem_type
83  */
84 const char *edac_mem_types[] = {
85 	"Empty csrow",
86 	"Reserved csrow type",
87 	"Unknown csrow type",
88 	"Fast page mode RAM",
89 	"Extended data out RAM",
90 	"Burst Extended data out RAM",
91 	"Single data rate SDRAM",
92 	"Registered single data rate SDRAM",
93 	"Double data rate SDRAM",
94 	"Registered Double data rate SDRAM",
95 	"Rambus DRAM",
96 	"Unbuffered DDR2 RAM",
97 	"Fully buffered DDR2",
98 	"Registered DDR2 RAM",
99 	"Rambus XDR",
100 	"Unbuffered DDR3 RAM",
101 	"Registered DDR3 RAM",
102 };
103 EXPORT_SYMBOL_GPL(edac_mem_types);
104 
105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106  * Adjust 'ptr' so that its alignment is at least as stringent as what the
107  * compiler would provide for X and return the aligned result.
108  *
109  * If 'size' is a constant, the compiler will optimize this whole function
110  * down to either a no-op or the addition of a constant to the value of 'ptr'.
111  */
112 void *edac_align_ptr(void *ptr, unsigned size)
113 {
114 	unsigned align, r;
115 
116 	/* Here we assume that the alignment of a "long long" is the most
117 	 * stringent alignment that the compiler will ever provide by default.
118 	 * As far as I know, this is a reasonable assumption.
119 	 */
120 	if (size > sizeof(long))
121 		align = sizeof(long long);
122 	else if (size > sizeof(int))
123 		align = sizeof(long);
124 	else if (size > sizeof(short))
125 		align = sizeof(int);
126 	else if (size > sizeof(char))
127 		align = sizeof(short);
128 	else
129 		return (char *)ptr;
130 
131 	r = size % align;
132 
133 	if (r == 0)
134 		return (char *)ptr;
135 
136 	return (void *)(((unsigned long)ptr) + align - r);
137 }
138 
139 /**
140  * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141  * @size_pvt:	size of private storage needed
142  * @nr_csrows:	Number of CWROWS needed for this MC
143  * @nr_chans:	Number of channels for the MC
144  *
145  * Everything is kmalloc'ed as one big chunk - more efficient.
146  * Only can be used if all structures have the same lifetime - otherwise
147  * you have to allocate and initialize your own structures.
148  *
149  * Use edac_mc_free() to free mc structures allocated by this function.
150  *
151  * Returns:
152  *	NULL allocation failed
153  *	struct mem_ctl_info pointer
154  */
155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156 				unsigned nr_chans, int edac_index)
157 {
158 	struct mem_ctl_info *mci;
159 	struct csrow_info *csi, *csrow;
160 	struct channel_info *chi, *chp, *chan;
161 	void *pvt;
162 	unsigned size;
163 	int row, chn;
164 	int err;
165 
166 	/* Figure out the offsets of the various items from the start of an mc
167 	 * structure.  We want the alignment of each item to be at least as
168 	 * stringent as what the compiler would provide if we could simply
169 	 * hardcode everything into a single struct.
170 	 */
171 	mci = (struct mem_ctl_info *)0;
172 	csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174 	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175 	size = ((unsigned long)pvt) + sz_pvt;
176 
177 	mci = kzalloc(size, GFP_KERNEL);
178 	if (mci == NULL)
179 		return NULL;
180 
181 	/* Adjust pointers so they point within the memory we just allocated
182 	 * rather than an imaginary chunk of memory located at address 0.
183 	 */
184 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185 	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
187 
188 	/* setup index and various internal pointers */
189 	mci->mc_idx = edac_index;
190 	mci->csrows = csi;
191 	mci->pvt_info = pvt;
192 	mci->nr_csrows = nr_csrows;
193 
194 	for (row = 0; row < nr_csrows; row++) {
195 		csrow = &csi[row];
196 		csrow->csrow_idx = row;
197 		csrow->mci = mci;
198 		csrow->nr_channels = nr_chans;
199 		chp = &chi[row * nr_chans];
200 		csrow->channels = chp;
201 
202 		for (chn = 0; chn < nr_chans; chn++) {
203 			chan = &chp[chn];
204 			chan->chan_idx = chn;
205 			chan->csrow = csrow;
206 		}
207 	}
208 
209 	mci->op_state = OP_ALLOC;
210 	INIT_LIST_HEAD(&mci->grp_kobj_list);
211 
212 	/*
213 	 * Initialize the 'root' kobj for the edac_mc controller
214 	 */
215 	err = edac_mc_register_sysfs_main_kobj(mci);
216 	if (err) {
217 		kfree(mci);
218 		return NULL;
219 	}
220 
221 	/* at this point, the root kobj is valid, and in order to
222 	 * 'free' the object, then the function:
223 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
224 	 * which will perform kobj unregistration and the actual free
225 	 * will occur during the kobject callback operation
226 	 */
227 	return mci;
228 }
229 EXPORT_SYMBOL_GPL(edac_mc_alloc);
230 
231 /**
232  * edac_mc_free
233  *	'Free' a previously allocated 'mci' structure
234  * @mci: pointer to a struct mem_ctl_info structure
235  */
236 void edac_mc_free(struct mem_ctl_info *mci)
237 {
238 	debugf1("%s()\n", __func__);
239 
240 	edac_mc_unregister_sysfs_main_kobj(mci);
241 
242 	/* free the mci instance memory here */
243 	kfree(mci);
244 }
245 EXPORT_SYMBOL_GPL(edac_mc_free);
246 
247 
248 /**
249  * find_mci_by_dev
250  *
251  *	scan list of controllers looking for the one that manages
252  *	the 'dev' device
253  * @dev: pointer to a struct device related with the MCI
254  */
255 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
256 {
257 	struct mem_ctl_info *mci;
258 	struct list_head *item;
259 
260 	debugf3("%s()\n", __func__);
261 
262 	list_for_each(item, &mc_devices) {
263 		mci = list_entry(item, struct mem_ctl_info, link);
264 
265 		if (mci->dev == dev)
266 			return mci;
267 	}
268 
269 	return NULL;
270 }
271 EXPORT_SYMBOL_GPL(find_mci_by_dev);
272 
273 /*
274  * handler for EDAC to check if NMI type handler has asserted interrupt
275  */
276 static int edac_mc_assert_error_check_and_clear(void)
277 {
278 	int old_state;
279 
280 	if (edac_op_state == EDAC_OPSTATE_POLL)
281 		return 1;
282 
283 	old_state = edac_err_assert;
284 	edac_err_assert = 0;
285 
286 	return old_state;
287 }
288 
289 /*
290  * edac_mc_workq_function
291  *	performs the operation scheduled by a workq request
292  */
293 static void edac_mc_workq_function(struct work_struct *work_req)
294 {
295 	struct delayed_work *d_work = to_delayed_work(work_req);
296 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
297 
298 	mutex_lock(&mem_ctls_mutex);
299 
300 	/* if this control struct has movd to offline state, we are done */
301 	if (mci->op_state == OP_OFFLINE) {
302 		mutex_unlock(&mem_ctls_mutex);
303 		return;
304 	}
305 
306 	/* Only poll controllers that are running polled and have a check */
307 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
308 		mci->edac_check(mci);
309 
310 	mutex_unlock(&mem_ctls_mutex);
311 
312 	/* Reschedule */
313 	queue_delayed_work(edac_workqueue, &mci->work,
314 			msecs_to_jiffies(edac_mc_get_poll_msec()));
315 }
316 
317 /*
318  * edac_mc_workq_setup
319  *	initialize a workq item for this mci
320  *	passing in the new delay period in msec
321  *
322  *	locking model:
323  *
324  *		called with the mem_ctls_mutex held
325  */
326 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
327 {
328 	debugf0("%s()\n", __func__);
329 
330 	/* if this instance is not in the POLL state, then simply return */
331 	if (mci->op_state != OP_RUNNING_POLL)
332 		return;
333 
334 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
335 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
336 }
337 
338 /*
339  * edac_mc_workq_teardown
340  *	stop the workq processing on this mci
341  *
342  *	locking model:
343  *
344  *		called WITHOUT lock held
345  */
346 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
347 {
348 	int status;
349 
350 	if (mci->op_state != OP_RUNNING_POLL)
351 		return;
352 
353 	status = cancel_delayed_work(&mci->work);
354 	if (status == 0) {
355 		debugf0("%s() not canceled, flush the queue\n",
356 			__func__);
357 
358 		/* workq instance might be running, wait for it */
359 		flush_workqueue(edac_workqueue);
360 	}
361 }
362 
363 /*
364  * edac_mc_reset_delay_period(unsigned long value)
365  *
366  *	user space has updated our poll period value, need to
367  *	reset our workq delays
368  */
369 void edac_mc_reset_delay_period(int value)
370 {
371 	struct mem_ctl_info *mci;
372 	struct list_head *item;
373 
374 	mutex_lock(&mem_ctls_mutex);
375 
376 	/* scan the list and turn off all workq timers, doing so under lock
377 	 */
378 	list_for_each(item, &mc_devices) {
379 		mci = list_entry(item, struct mem_ctl_info, link);
380 
381 		if (mci->op_state == OP_RUNNING_POLL)
382 			cancel_delayed_work(&mci->work);
383 	}
384 
385 	mutex_unlock(&mem_ctls_mutex);
386 
387 
388 	/* re-walk the list, and reset the poll delay */
389 	mutex_lock(&mem_ctls_mutex);
390 
391 	list_for_each(item, &mc_devices) {
392 		mci = list_entry(item, struct mem_ctl_info, link);
393 
394 		edac_mc_workq_setup(mci, (unsigned long) value);
395 	}
396 
397 	mutex_unlock(&mem_ctls_mutex);
398 }
399 
400 
401 
402 /* Return 0 on success, 1 on failure.
403  * Before calling this function, caller must
404  * assign a unique value to mci->mc_idx.
405  *
406  *	locking model:
407  *
408  *		called with the mem_ctls_mutex lock held
409  */
410 static int add_mc_to_global_list(struct mem_ctl_info *mci)
411 {
412 	struct list_head *item, *insert_before;
413 	struct mem_ctl_info *p;
414 
415 	insert_before = &mc_devices;
416 
417 	p = find_mci_by_dev(mci->dev);
418 	if (unlikely(p != NULL))
419 		goto fail0;
420 
421 	list_for_each(item, &mc_devices) {
422 		p = list_entry(item, struct mem_ctl_info, link);
423 
424 		if (p->mc_idx >= mci->mc_idx) {
425 			if (unlikely(p->mc_idx == mci->mc_idx))
426 				goto fail1;
427 
428 			insert_before = item;
429 			break;
430 		}
431 	}
432 
433 	list_add_tail_rcu(&mci->link, insert_before);
434 	atomic_inc(&edac_handlers);
435 	return 0;
436 
437 fail0:
438 	edac_printk(KERN_WARNING, EDAC_MC,
439 		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
440 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
441 	return 1;
442 
443 fail1:
444 	edac_printk(KERN_WARNING, EDAC_MC,
445 		"bug in low-level driver: attempt to assign\n"
446 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
447 	return 1;
448 }
449 
450 static void del_mc_from_global_list(struct mem_ctl_info *mci)
451 {
452 	atomic_dec(&edac_handlers);
453 	list_del_rcu(&mci->link);
454 
455 	/* these are for safe removal of devices from global list while
456 	 * NMI handlers may be traversing list
457 	 */
458 	synchronize_rcu();
459 	INIT_LIST_HEAD(&mci->link);
460 }
461 
462 /**
463  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
464  *
465  * If found, return a pointer to the structure.
466  * Else return NULL.
467  *
468  * Caller must hold mem_ctls_mutex.
469  */
470 struct mem_ctl_info *edac_mc_find(int idx)
471 {
472 	struct list_head *item;
473 	struct mem_ctl_info *mci;
474 
475 	list_for_each(item, &mc_devices) {
476 		mci = list_entry(item, struct mem_ctl_info, link);
477 
478 		if (mci->mc_idx >= idx) {
479 			if (mci->mc_idx == idx)
480 				return mci;
481 
482 			break;
483 		}
484 	}
485 
486 	return NULL;
487 }
488 EXPORT_SYMBOL(edac_mc_find);
489 
490 /**
491  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
492  *                 create sysfs entries associated with mci structure
493  * @mci: pointer to the mci structure to be added to the list
494  * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
495  *
496  * Return:
497  *	0	Success
498  *	!0	Failure
499  */
500 
501 /* FIXME - should a warning be printed if no error detection? correction? */
502 int edac_mc_add_mc(struct mem_ctl_info *mci)
503 {
504 	debugf0("%s()\n", __func__);
505 
506 #ifdef CONFIG_EDAC_DEBUG
507 	if (edac_debug_level >= 3)
508 		edac_mc_dump_mci(mci);
509 
510 	if (edac_debug_level >= 4) {
511 		int i;
512 
513 		for (i = 0; i < mci->nr_csrows; i++) {
514 			int j;
515 
516 			edac_mc_dump_csrow(&mci->csrows[i]);
517 			for (j = 0; j < mci->csrows[i].nr_channels; j++)
518 				edac_mc_dump_channel(&mci->csrows[i].
519 						channels[j]);
520 		}
521 	}
522 #endif
523 	mutex_lock(&mem_ctls_mutex);
524 
525 	if (add_mc_to_global_list(mci))
526 		goto fail0;
527 
528 	/* set load time so that error rate can be tracked */
529 	mci->start_time = jiffies;
530 
531 	if (edac_create_sysfs_mci_device(mci)) {
532 		edac_mc_printk(mci, KERN_WARNING,
533 			"failed to create sysfs device\n");
534 		goto fail1;
535 	}
536 
537 	/* If there IS a check routine, then we are running POLLED */
538 	if (mci->edac_check != NULL) {
539 		/* This instance is NOW RUNNING */
540 		mci->op_state = OP_RUNNING_POLL;
541 
542 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
543 	} else {
544 		mci->op_state = OP_RUNNING_INTERRUPT;
545 	}
546 
547 	/* Report action taken */
548 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
549 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
550 
551 	mutex_unlock(&mem_ctls_mutex);
552 	return 0;
553 
554 fail1:
555 	del_mc_from_global_list(mci);
556 
557 fail0:
558 	mutex_unlock(&mem_ctls_mutex);
559 	return 1;
560 }
561 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
562 
563 /**
564  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
565  *                 remove mci structure from global list
566  * @pdev: Pointer to 'struct device' representing mci structure to remove.
567  *
568  * Return pointer to removed mci structure, or NULL if device not found.
569  */
570 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
571 {
572 	struct mem_ctl_info *mci;
573 
574 	debugf0("%s()\n", __func__);
575 
576 	mutex_lock(&mem_ctls_mutex);
577 
578 	/* find the requested mci struct in the global list */
579 	mci = find_mci_by_dev(dev);
580 	if (mci == NULL) {
581 		mutex_unlock(&mem_ctls_mutex);
582 		return NULL;
583 	}
584 
585 	del_mc_from_global_list(mci);
586 	mutex_unlock(&mem_ctls_mutex);
587 
588 	/* flush workq processes */
589 	edac_mc_workq_teardown(mci);
590 
591 	/* marking MCI offline */
592 	mci->op_state = OP_OFFLINE;
593 
594 	/* remove from sysfs */
595 	edac_remove_sysfs_mci_device(mci);
596 
597 	edac_printk(KERN_INFO, EDAC_MC,
598 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
599 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
600 
601 	return mci;
602 }
603 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
604 
605 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
606 				u32 size)
607 {
608 	struct page *pg;
609 	void *virt_addr;
610 	unsigned long flags = 0;
611 
612 	debugf3("%s()\n", __func__);
613 
614 	/* ECC error page was not in our memory. Ignore it. */
615 	if (!pfn_valid(page))
616 		return;
617 
618 	/* Find the actual page structure then map it and fix */
619 	pg = pfn_to_page(page);
620 
621 	if (PageHighMem(pg))
622 		local_irq_save(flags);
623 
624 	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
625 
626 	/* Perform architecture specific atomic scrub operation */
627 	atomic_scrub(virt_addr + offset, size);
628 
629 	/* Unmap and complete */
630 	kunmap_atomic(virt_addr, KM_BOUNCE_READ);
631 
632 	if (PageHighMem(pg))
633 		local_irq_restore(flags);
634 }
635 
636 /* FIXME - should return -1 */
637 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
638 {
639 	struct csrow_info *csrows = mci->csrows;
640 	int row, i;
641 
642 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
643 	row = -1;
644 
645 	for (i = 0; i < mci->nr_csrows; i++) {
646 		struct csrow_info *csrow = &csrows[i];
647 
648 		if (csrow->nr_pages == 0)
649 			continue;
650 
651 		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
652 			"mask(0x%lx)\n", mci->mc_idx, __func__,
653 			csrow->first_page, page, csrow->last_page,
654 			csrow->page_mask);
655 
656 		if ((page >= csrow->first_page) &&
657 		    (page <= csrow->last_page) &&
658 		    ((page & csrow->page_mask) ==
659 		     (csrow->first_page & csrow->page_mask))) {
660 			row = i;
661 			break;
662 		}
663 	}
664 
665 	if (row == -1)
666 		edac_mc_printk(mci, KERN_ERR,
667 			"could not look up page error address %lx\n",
668 			(unsigned long)page);
669 
670 	return row;
671 }
672 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
673 
674 /* FIXME - setable log (warning/emerg) levels */
675 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
676 void edac_mc_handle_ce(struct mem_ctl_info *mci,
677 		unsigned long page_frame_number,
678 		unsigned long offset_in_page, unsigned long syndrome,
679 		int row, int channel, const char *msg)
680 {
681 	unsigned long remapped_page;
682 
683 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
684 
685 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
686 	if (row >= mci->nr_csrows || row < 0) {
687 		/* something is wrong */
688 		edac_mc_printk(mci, KERN_ERR,
689 			"INTERNAL ERROR: row out of range "
690 			"(%d >= %d)\n", row, mci->nr_csrows);
691 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
692 		return;
693 	}
694 
695 	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
696 		/* something is wrong */
697 		edac_mc_printk(mci, KERN_ERR,
698 			"INTERNAL ERROR: channel out of range "
699 			"(%d >= %d)\n", channel,
700 			mci->csrows[row].nr_channels);
701 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
702 		return;
703 	}
704 
705 	if (edac_mc_get_log_ce())
706 		/* FIXME - put in DIMM location */
707 		edac_mc_printk(mci, KERN_WARNING,
708 			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
709 			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
710 			page_frame_number, offset_in_page,
711 			mci->csrows[row].grain, syndrome, row, channel,
712 			mci->csrows[row].channels[channel].label, msg);
713 
714 	mci->ce_count++;
715 	mci->csrows[row].ce_count++;
716 	mci->csrows[row].channels[channel].ce_count++;
717 
718 	if (mci->scrub_mode & SCRUB_SW_SRC) {
719 		/*
720 		 * Some MC's can remap memory so that it is still available
721 		 * at a different address when PCI devices map into memory.
722 		 * MC's that can't do this lose the memory where PCI devices
723 		 * are mapped.  This mapping is MC dependent and so we call
724 		 * back into the MC driver for it to map the MC page to
725 		 * a physical (CPU) page which can then be mapped to a virtual
726 		 * page - which can then be scrubbed.
727 		 */
728 		remapped_page = mci->ctl_page_to_phys ?
729 			mci->ctl_page_to_phys(mci, page_frame_number) :
730 			page_frame_number;
731 
732 		edac_mc_scrub_block(remapped_page, offset_in_page,
733 				mci->csrows[row].grain);
734 	}
735 }
736 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
737 
738 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
739 {
740 	if (edac_mc_get_log_ce())
741 		edac_mc_printk(mci, KERN_WARNING,
742 			"CE - no information available: %s\n", msg);
743 
744 	mci->ce_noinfo_count++;
745 	mci->ce_count++;
746 }
747 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
748 
749 void edac_mc_handle_ue(struct mem_ctl_info *mci,
750 		unsigned long page_frame_number,
751 		unsigned long offset_in_page, int row, const char *msg)
752 {
753 	int len = EDAC_MC_LABEL_LEN * 4;
754 	char labels[len + 1];
755 	char *pos = labels;
756 	int chan;
757 	int chars;
758 
759 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
760 
761 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
762 	if (row >= mci->nr_csrows || row < 0) {
763 		/* something is wrong */
764 		edac_mc_printk(mci, KERN_ERR,
765 			"INTERNAL ERROR: row out of range "
766 			"(%d >= %d)\n", row, mci->nr_csrows);
767 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
768 		return;
769 	}
770 
771 	chars = snprintf(pos, len + 1, "%s",
772 			 mci->csrows[row].channels[0].label);
773 	len -= chars;
774 	pos += chars;
775 
776 	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
777 		chan++) {
778 		chars = snprintf(pos, len + 1, ":%s",
779 				 mci->csrows[row].channels[chan].label);
780 		len -= chars;
781 		pos += chars;
782 	}
783 
784 	if (edac_mc_get_log_ue())
785 		edac_mc_printk(mci, KERN_EMERG,
786 			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
787 			"labels \"%s\": %s\n", page_frame_number,
788 			offset_in_page, mci->csrows[row].grain, row,
789 			labels, msg);
790 
791 	if (edac_mc_get_panic_on_ue())
792 		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
793 			"row %d, labels \"%s\": %s\n", mci->mc_idx,
794 			page_frame_number, offset_in_page,
795 			mci->csrows[row].grain, row, labels, msg);
796 
797 	mci->ue_count++;
798 	mci->csrows[row].ue_count++;
799 }
800 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
801 
802 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
803 {
804 	if (edac_mc_get_panic_on_ue())
805 		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
806 
807 	if (edac_mc_get_log_ue())
808 		edac_mc_printk(mci, KERN_WARNING,
809 			"UE - no information available: %s\n", msg);
810 	mci->ue_noinfo_count++;
811 	mci->ue_count++;
812 }
813 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
814 
815 /*************************************************************
816  * On Fully Buffered DIMM modules, this help function is
817  * called to process UE events
818  */
819 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
820 			unsigned int csrow,
821 			unsigned int channela,
822 			unsigned int channelb, char *msg)
823 {
824 	int len = EDAC_MC_LABEL_LEN * 4;
825 	char labels[len + 1];
826 	char *pos = labels;
827 	int chars;
828 
829 	if (csrow >= mci->nr_csrows) {
830 		/* something is wrong */
831 		edac_mc_printk(mci, KERN_ERR,
832 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
833 			csrow, mci->nr_csrows);
834 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
835 		return;
836 	}
837 
838 	if (channela >= mci->csrows[csrow].nr_channels) {
839 		/* something is wrong */
840 		edac_mc_printk(mci, KERN_ERR,
841 			"INTERNAL ERROR: channel-a out of range "
842 			"(%d >= %d)\n",
843 			channela, mci->csrows[csrow].nr_channels);
844 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
845 		return;
846 	}
847 
848 	if (channelb >= mci->csrows[csrow].nr_channels) {
849 		/* something is wrong */
850 		edac_mc_printk(mci, KERN_ERR,
851 			"INTERNAL ERROR: channel-b out of range "
852 			"(%d >= %d)\n",
853 			channelb, mci->csrows[csrow].nr_channels);
854 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
855 		return;
856 	}
857 
858 	mci->ue_count++;
859 	mci->csrows[csrow].ue_count++;
860 
861 	/* Generate the DIMM labels from the specified channels */
862 	chars = snprintf(pos, len + 1, "%s",
863 			 mci->csrows[csrow].channels[channela].label);
864 	len -= chars;
865 	pos += chars;
866 	chars = snprintf(pos, len + 1, "-%s",
867 			 mci->csrows[csrow].channels[channelb].label);
868 
869 	if (edac_mc_get_log_ue())
870 		edac_mc_printk(mci, KERN_EMERG,
871 			"UE row %d, channel-a= %d channel-b= %d "
872 			"labels \"%s\": %s\n", csrow, channela, channelb,
873 			labels, msg);
874 
875 	if (edac_mc_get_panic_on_ue())
876 		panic("UE row %d, channel-a= %d channel-b= %d "
877 			"labels \"%s\": %s\n", csrow, channela,
878 			channelb, labels, msg);
879 }
880 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
881 
882 /*************************************************************
883  * On Fully Buffered DIMM modules, this help function is
884  * called to process CE events
885  */
886 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
887 			unsigned int csrow, unsigned int channel, char *msg)
888 {
889 
890 	/* Ensure boundary values */
891 	if (csrow >= mci->nr_csrows) {
892 		/* something is wrong */
893 		edac_mc_printk(mci, KERN_ERR,
894 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
895 			csrow, mci->nr_csrows);
896 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
897 		return;
898 	}
899 	if (channel >= mci->csrows[csrow].nr_channels) {
900 		/* something is wrong */
901 		edac_mc_printk(mci, KERN_ERR,
902 			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
903 			channel, mci->csrows[csrow].nr_channels);
904 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
905 		return;
906 	}
907 
908 	if (edac_mc_get_log_ce())
909 		/* FIXME - put in DIMM location */
910 		edac_mc_printk(mci, KERN_WARNING,
911 			"CE row %d, channel %d, label \"%s\": %s\n",
912 			csrow, channel,
913 			mci->csrows[csrow].channels[channel].label, msg);
914 
915 	mci->ce_count++;
916 	mci->csrows[csrow].ce_count++;
917 	mci->csrows[csrow].channels[channel].ce_count++;
918 }
919 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
920