xref: /linux/drivers/edac/edac_mc.c (revision 26b0d14106954ae46d2f4f7eec3481828a210f7d)
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <asm/uaccess.h>
31 #include <asm/page.h>
32 #include <asm/edac.h>
33 #include "edac_core.h"
34 #include "edac_module.h"
35 
36 /* lock to memory controller's control array */
37 static DEFINE_MUTEX(mem_ctls_mutex);
38 static LIST_HEAD(mc_devices);
39 
40 #ifdef CONFIG_EDAC_DEBUG
41 
42 static void edac_mc_dump_channel(struct rank_info *chan)
43 {
44 	debugf4("\tchannel = %p\n", chan);
45 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
47 	debugf4("\tchannel->dimm = %p\n", chan->dimm);
48 }
49 
50 static void edac_mc_dump_dimm(struct dimm_info *dimm)
51 {
52 	int i;
53 
54 	debugf4("\tdimm = %p\n", dimm);
55 	debugf4("\tdimm->label = '%s'\n", dimm->label);
56 	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
57 	debugf4("\tdimm location ");
58 	for (i = 0; i < dimm->mci->n_layers; i++) {
59 		printk(KERN_CONT "%d", dimm->location[i]);
60 		if (i < dimm->mci->n_layers - 1)
61 			printk(KERN_CONT ".");
62 	}
63 	printk(KERN_CONT "\n");
64 	debugf4("\tdimm->grain = %d\n", dimm->grain);
65 	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
66 }
67 
68 static void edac_mc_dump_csrow(struct csrow_info *csrow)
69 {
70 	debugf4("\tcsrow = %p\n", csrow);
71 	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
72 	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
73 	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
74 	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
75 	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
76 	debugf4("\tcsrow->channels = %p\n", csrow->channels);
77 	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
78 }
79 
80 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
81 {
82 	debugf3("\tmci = %p\n", mci);
83 	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84 	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85 	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
86 	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
87 	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
88 		mci->nr_csrows, mci->csrows);
89 	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
90 		mci->tot_dimms, mci->dimms);
91 	debugf3("\tdev = %p\n", mci->dev);
92 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
93 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
94 }
95 
96 #endif				/* CONFIG_EDAC_DEBUG */
97 
98 /*
99  * keep those in sync with the enum mem_type
100  */
101 const char *edac_mem_types[] = {
102 	"Empty csrow",
103 	"Reserved csrow type",
104 	"Unknown csrow type",
105 	"Fast page mode RAM",
106 	"Extended data out RAM",
107 	"Burst Extended data out RAM",
108 	"Single data rate SDRAM",
109 	"Registered single data rate SDRAM",
110 	"Double data rate SDRAM",
111 	"Registered Double data rate SDRAM",
112 	"Rambus DRAM",
113 	"Unbuffered DDR2 RAM",
114 	"Fully buffered DDR2",
115 	"Registered DDR2 RAM",
116 	"Rambus XDR",
117 	"Unbuffered DDR3 RAM",
118 	"Registered DDR3 RAM",
119 };
120 EXPORT_SYMBOL_GPL(edac_mem_types);
121 
122 /**
123  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
124  * @p:		pointer to a pointer with the memory offset to be used. At
125  *		return, this will be incremented to point to the next offset
126  * @size:	Size of the data structure to be reserved
127  * @n_elems:	Number of elements that should be reserved
128  *
129  * If 'size' is a constant, the compiler will optimize this whole function
130  * down to either a no-op or the addition of a constant to the value of '*p'.
131  *
132  * The 'p' pointer is absolutely needed to keep the proper advancing
133  * further in memory to the proper offsets when allocating the struct along
134  * with its embedded structs, as edac_device_alloc_ctl_info() does it
135  * above, for example.
136  *
137  * At return, the pointer 'p' will be incremented to be used on a next call
138  * to this function.
139  */
140 void *edac_align_ptr(void **p, unsigned size, int n_elems)
141 {
142 	unsigned align, r;
143 	void *ptr = *p;
144 
145 	*p += size * n_elems;
146 
147 	/*
148 	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
149 	 * 'size'.  Adjust 'p' so that its alignment is at least as
150 	 * stringent as what the compiler would provide for X and return
151 	 * the aligned result.
152 	 * Here we assume that the alignment of a "long long" is the most
153 	 * stringent alignment that the compiler will ever provide by default.
154 	 * As far as I know, this is a reasonable assumption.
155 	 */
156 	if (size > sizeof(long))
157 		align = sizeof(long long);
158 	else if (size > sizeof(int))
159 		align = sizeof(long);
160 	else if (size > sizeof(short))
161 		align = sizeof(int);
162 	else if (size > sizeof(char))
163 		align = sizeof(short);
164 	else
165 		return (char *)ptr;
166 
167 	r = (unsigned long)p % align;
168 
169 	if (r == 0)
170 		return (char *)ptr;
171 
172 	*p += align - r;
173 
174 	return (void *)(((unsigned long)ptr) + align - r);
175 }
176 
177 /**
178  * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
179  * @mc_num:		Memory controller number
180  * @n_layers:		Number of MC hierarchy layers
181  * layers:		Describes each layer as seen by the Memory Controller
182  * @size_pvt:		size of private storage needed
183  *
184  *
185  * Everything is kmalloc'ed as one big chunk - more efficient.
186  * Only can be used if all structures have the same lifetime - otherwise
187  * you have to allocate and initialize your own structures.
188  *
189  * Use edac_mc_free() to free mc structures allocated by this function.
190  *
191  * NOTE: drivers handle multi-rank memories in different ways: in some
192  * drivers, one multi-rank memory stick is mapped as one entry, while, in
193  * others, a single multi-rank memory stick would be mapped into several
194  * entries. Currently, this function will allocate multiple struct dimm_info
195  * on such scenarios, as grouping the multiple ranks require drivers change.
196  *
197  * Returns:
198  *	On failure: NULL
199  *	On success: struct mem_ctl_info pointer
200  */
201 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
202 				   unsigned n_layers,
203 				   struct edac_mc_layer *layers,
204 				   unsigned sz_pvt)
205 {
206 	struct mem_ctl_info *mci;
207 	struct edac_mc_layer *layer;
208 	struct csrow_info *csi, *csr;
209 	struct rank_info *chi, *chp, *chan;
210 	struct dimm_info *dimm;
211 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 	unsigned pos[EDAC_MAX_LAYERS];
213 	unsigned size, tot_dimms = 1, count = 1;
214 	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
215 	void *pvt, *p, *ptr = NULL;
216 	int i, j, err, row, chn, n, len;
217 	bool per_rank = false;
218 
219 	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220 	/*
221 	 * Calculate the total amount of dimms and csrows/cschannels while
222 	 * in the old API emulation mode
223 	 */
224 	for (i = 0; i < n_layers; i++) {
225 		tot_dimms *= layers[i].size;
226 		if (layers[i].is_virt_csrow)
227 			tot_csrows *= layers[i].size;
228 		else
229 			tot_channels *= layers[i].size;
230 
231 		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
232 			per_rank = true;
233 	}
234 
235 	/* Figure out the offsets of the various items from the start of an mc
236 	 * structure.  We want the alignment of each item to be at least as
237 	 * stringent as what the compiler would provide if we could simply
238 	 * hardcode everything into a single struct.
239 	 */
240 	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
241 	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242 	csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243 	chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244 	dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245 	for (i = 0; i < n_layers; i++) {
246 		count *= layers[i].size;
247 		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
248 		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249 		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250 		tot_errcount += 2 * count;
251 	}
252 
253 	debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
254 	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
255 	size = ((unsigned long)pvt) + sz_pvt;
256 
257 	debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258 		__func__, size,
259 		tot_dimms,
260 		per_rank ? "ranks" : "dimms",
261 		tot_csrows * tot_channels);
262 	mci = kzalloc(size, GFP_KERNEL);
263 	if (mci == NULL)
264 		return NULL;
265 
266 	/* Adjust pointers so they point within the memory we just allocated
267 	 * rather than an imaginary chunk of memory located at address 0.
268 	 */
269 	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
270 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
271 	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
272 	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
273 	for (i = 0; i < n_layers; i++) {
274 		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276 	}
277 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
278 
279 	/* setup index and various internal pointers */
280 	mci->mc_idx = mc_num;
281 	mci->csrows = csi;
282 	mci->dimms  = dimm;
283 	mci->tot_dimms = tot_dimms;
284 	mci->pvt_info = pvt;
285 	mci->n_layers = n_layers;
286 	mci->layers = layer;
287 	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
288 	mci->nr_csrows = tot_csrows;
289 	mci->num_cschannel = tot_channels;
290 	mci->mem_is_per_rank = per_rank;
291 
292 	/*
293 	 * Fill the csrow struct
294 	 */
295 	for (row = 0; row < tot_csrows; row++) {
296 		csr = &csi[row];
297 		csr->csrow_idx = row;
298 		csr->mci = mci;
299 		csr->nr_channels = tot_channels;
300 		chp = &chi[row * tot_channels];
301 		csr->channels = chp;
302 
303 		for (chn = 0; chn < tot_channels; chn++) {
304 			chan = &chp[chn];
305 			chan->chan_idx = chn;
306 			chan->csrow = csr;
307 		}
308 	}
309 
310 	/*
311 	 * Fill the dimm struct
312 	 */
313 	memset(&pos, 0, sizeof(pos));
314 	row = 0;
315 	chn = 0;
316 	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317 		per_rank ? "ranks" : "dimms");
318 	for (i = 0; i < tot_dimms; i++) {
319 		chan = &csi[row].channels[chn];
320 		dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
321 			       pos[0], pos[1], pos[2]);
322 		dimm->mci = mci;
323 
324 		debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
325 			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326 			pos[0], pos[1], pos[2], row, chn);
327 
328 		/*
329 		 * Copy DIMM location and initialize it.
330 		 */
331 		len = sizeof(dimm->label);
332 		p = dimm->label;
333 		n = snprintf(p, len, "mc#%u", mc_num);
334 		p += n;
335 		len -= n;
336 		for (j = 0; j < n_layers; j++) {
337 			n = snprintf(p, len, "%s#%u",
338 				     edac_layer_name[layers[j].type],
339 				     pos[j]);
340 			p += n;
341 			len -= n;
342 			dimm->location[j] = pos[j];
343 
344 			if (len <= 0)
345 				break;
346 		}
347 
348 		/* Link it to the csrows old API data */
349 		chan->dimm = dimm;
350 		dimm->csrow = row;
351 		dimm->cschannel = chn;
352 
353 		/* Increment csrow location */
354 		row++;
355 		if (row == tot_csrows) {
356 			row = 0;
357 			chn++;
358 		}
359 
360 		/* Increment dimm location */
361 		for (j = n_layers - 1; j >= 0; j--) {
362 			pos[j]++;
363 			if (pos[j] < layers[j].size)
364 				break;
365 			pos[j] = 0;
366 		}
367 	}
368 
369 	mci->op_state = OP_ALLOC;
370 	INIT_LIST_HEAD(&mci->grp_kobj_list);
371 
372 	/*
373 	 * Initialize the 'root' kobj for the edac_mc controller
374 	 */
375 	err = edac_mc_register_sysfs_main_kobj(mci);
376 	if (err) {
377 		kfree(mci);
378 		return NULL;
379 	}
380 
381 	/* at this point, the root kobj is valid, and in order to
382 	 * 'free' the object, then the function:
383 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
384 	 * which will perform kobj unregistration and the actual free
385 	 * will occur during the kobject callback operation
386 	 */
387 	return mci;
388 }
389 EXPORT_SYMBOL_GPL(edac_mc_alloc);
390 
391 /**
392  * edac_mc_free
393  *	'Free' a previously allocated 'mci' structure
394  * @mci: pointer to a struct mem_ctl_info structure
395  */
396 void edac_mc_free(struct mem_ctl_info *mci)
397 {
398 	debugf1("%s()\n", __func__);
399 
400 	edac_mc_unregister_sysfs_main_kobj(mci);
401 
402 	/* free the mci instance memory here */
403 	kfree(mci);
404 }
405 EXPORT_SYMBOL_GPL(edac_mc_free);
406 
407 
408 /**
409  * find_mci_by_dev
410  *
411  *	scan list of controllers looking for the one that manages
412  *	the 'dev' device
413  * @dev: pointer to a struct device related with the MCI
414  */
415 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
416 {
417 	struct mem_ctl_info *mci;
418 	struct list_head *item;
419 
420 	debugf3("%s()\n", __func__);
421 
422 	list_for_each(item, &mc_devices) {
423 		mci = list_entry(item, struct mem_ctl_info, link);
424 
425 		if (mci->dev == dev)
426 			return mci;
427 	}
428 
429 	return NULL;
430 }
431 EXPORT_SYMBOL_GPL(find_mci_by_dev);
432 
433 /*
434  * handler for EDAC to check if NMI type handler has asserted interrupt
435  */
436 static int edac_mc_assert_error_check_and_clear(void)
437 {
438 	int old_state;
439 
440 	if (edac_op_state == EDAC_OPSTATE_POLL)
441 		return 1;
442 
443 	old_state = edac_err_assert;
444 	edac_err_assert = 0;
445 
446 	return old_state;
447 }
448 
449 /*
450  * edac_mc_workq_function
451  *	performs the operation scheduled by a workq request
452  */
453 static void edac_mc_workq_function(struct work_struct *work_req)
454 {
455 	struct delayed_work *d_work = to_delayed_work(work_req);
456 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
457 
458 	mutex_lock(&mem_ctls_mutex);
459 
460 	/* if this control struct has movd to offline state, we are done */
461 	if (mci->op_state == OP_OFFLINE) {
462 		mutex_unlock(&mem_ctls_mutex);
463 		return;
464 	}
465 
466 	/* Only poll controllers that are running polled and have a check */
467 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
468 		mci->edac_check(mci);
469 
470 	mutex_unlock(&mem_ctls_mutex);
471 
472 	/* Reschedule */
473 	queue_delayed_work(edac_workqueue, &mci->work,
474 			msecs_to_jiffies(edac_mc_get_poll_msec()));
475 }
476 
477 /*
478  * edac_mc_workq_setup
479  *	initialize a workq item for this mci
480  *	passing in the new delay period in msec
481  *
482  *	locking model:
483  *
484  *		called with the mem_ctls_mutex held
485  */
486 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
487 {
488 	debugf0("%s()\n", __func__);
489 
490 	/* if this instance is not in the POLL state, then simply return */
491 	if (mci->op_state != OP_RUNNING_POLL)
492 		return;
493 
494 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
495 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
496 }
497 
498 /*
499  * edac_mc_workq_teardown
500  *	stop the workq processing on this mci
501  *
502  *	locking model:
503  *
504  *		called WITHOUT lock held
505  */
506 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
507 {
508 	int status;
509 
510 	if (mci->op_state != OP_RUNNING_POLL)
511 		return;
512 
513 	status = cancel_delayed_work(&mci->work);
514 	if (status == 0) {
515 		debugf0("%s() not canceled, flush the queue\n",
516 			__func__);
517 
518 		/* workq instance might be running, wait for it */
519 		flush_workqueue(edac_workqueue);
520 	}
521 }
522 
523 /*
524  * edac_mc_reset_delay_period(unsigned long value)
525  *
526  *	user space has updated our poll period value, need to
527  *	reset our workq delays
528  */
529 void edac_mc_reset_delay_period(int value)
530 {
531 	struct mem_ctl_info *mci;
532 	struct list_head *item;
533 
534 	mutex_lock(&mem_ctls_mutex);
535 
536 	/* scan the list and turn off all workq timers, doing so under lock
537 	 */
538 	list_for_each(item, &mc_devices) {
539 		mci = list_entry(item, struct mem_ctl_info, link);
540 
541 		if (mci->op_state == OP_RUNNING_POLL)
542 			cancel_delayed_work(&mci->work);
543 	}
544 
545 	mutex_unlock(&mem_ctls_mutex);
546 
547 
548 	/* re-walk the list, and reset the poll delay */
549 	mutex_lock(&mem_ctls_mutex);
550 
551 	list_for_each(item, &mc_devices) {
552 		mci = list_entry(item, struct mem_ctl_info, link);
553 
554 		edac_mc_workq_setup(mci, (unsigned long) value);
555 	}
556 
557 	mutex_unlock(&mem_ctls_mutex);
558 }
559 
560 
561 
562 /* Return 0 on success, 1 on failure.
563  * Before calling this function, caller must
564  * assign a unique value to mci->mc_idx.
565  *
566  *	locking model:
567  *
568  *		called with the mem_ctls_mutex lock held
569  */
570 static int add_mc_to_global_list(struct mem_ctl_info *mci)
571 {
572 	struct list_head *item, *insert_before;
573 	struct mem_ctl_info *p;
574 
575 	insert_before = &mc_devices;
576 
577 	p = find_mci_by_dev(mci->dev);
578 	if (unlikely(p != NULL))
579 		goto fail0;
580 
581 	list_for_each(item, &mc_devices) {
582 		p = list_entry(item, struct mem_ctl_info, link);
583 
584 		if (p->mc_idx >= mci->mc_idx) {
585 			if (unlikely(p->mc_idx == mci->mc_idx))
586 				goto fail1;
587 
588 			insert_before = item;
589 			break;
590 		}
591 	}
592 
593 	list_add_tail_rcu(&mci->link, insert_before);
594 	atomic_inc(&edac_handlers);
595 	return 0;
596 
597 fail0:
598 	edac_printk(KERN_WARNING, EDAC_MC,
599 		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
600 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
601 	return 1;
602 
603 fail1:
604 	edac_printk(KERN_WARNING, EDAC_MC,
605 		"bug in low-level driver: attempt to assign\n"
606 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
607 	return 1;
608 }
609 
610 static void del_mc_from_global_list(struct mem_ctl_info *mci)
611 {
612 	atomic_dec(&edac_handlers);
613 	list_del_rcu(&mci->link);
614 
615 	/* these are for safe removal of devices from global list while
616 	 * NMI handlers may be traversing list
617 	 */
618 	synchronize_rcu();
619 	INIT_LIST_HEAD(&mci->link);
620 }
621 
622 /**
623  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
624  *
625  * If found, return a pointer to the structure.
626  * Else return NULL.
627  *
628  * Caller must hold mem_ctls_mutex.
629  */
630 struct mem_ctl_info *edac_mc_find(int idx)
631 {
632 	struct list_head *item;
633 	struct mem_ctl_info *mci;
634 
635 	list_for_each(item, &mc_devices) {
636 		mci = list_entry(item, struct mem_ctl_info, link);
637 
638 		if (mci->mc_idx >= idx) {
639 			if (mci->mc_idx == idx)
640 				return mci;
641 
642 			break;
643 		}
644 	}
645 
646 	return NULL;
647 }
648 EXPORT_SYMBOL(edac_mc_find);
649 
650 /**
651  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
652  *                 create sysfs entries associated with mci structure
653  * @mci: pointer to the mci structure to be added to the list
654  *
655  * Return:
656  *	0	Success
657  *	!0	Failure
658  */
659 
660 /* FIXME - should a warning be printed if no error detection? correction? */
661 int edac_mc_add_mc(struct mem_ctl_info *mci)
662 {
663 	debugf0("%s()\n", __func__);
664 
665 #ifdef CONFIG_EDAC_DEBUG
666 	if (edac_debug_level >= 3)
667 		edac_mc_dump_mci(mci);
668 
669 	if (edac_debug_level >= 4) {
670 		int i;
671 
672 		for (i = 0; i < mci->nr_csrows; i++) {
673 			int j;
674 
675 			edac_mc_dump_csrow(&mci->csrows[i]);
676 			for (j = 0; j < mci->csrows[i].nr_channels; j++)
677 				edac_mc_dump_channel(&mci->csrows[i].
678 						channels[j]);
679 		}
680 		for (i = 0; i < mci->tot_dimms; i++)
681 			edac_mc_dump_dimm(&mci->dimms[i]);
682 	}
683 #endif
684 	mutex_lock(&mem_ctls_mutex);
685 
686 	if (add_mc_to_global_list(mci))
687 		goto fail0;
688 
689 	/* set load time so that error rate can be tracked */
690 	mci->start_time = jiffies;
691 
692 	if (edac_create_sysfs_mci_device(mci)) {
693 		edac_mc_printk(mci, KERN_WARNING,
694 			"failed to create sysfs device\n");
695 		goto fail1;
696 	}
697 
698 	/* If there IS a check routine, then we are running POLLED */
699 	if (mci->edac_check != NULL) {
700 		/* This instance is NOW RUNNING */
701 		mci->op_state = OP_RUNNING_POLL;
702 
703 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
704 	} else {
705 		mci->op_state = OP_RUNNING_INTERRUPT;
706 	}
707 
708 	/* Report action taken */
709 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
710 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
711 
712 	mutex_unlock(&mem_ctls_mutex);
713 	return 0;
714 
715 fail1:
716 	del_mc_from_global_list(mci);
717 
718 fail0:
719 	mutex_unlock(&mem_ctls_mutex);
720 	return 1;
721 }
722 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
723 
724 /**
725  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
726  *                 remove mci structure from global list
727  * @pdev: Pointer to 'struct device' representing mci structure to remove.
728  *
729  * Return pointer to removed mci structure, or NULL if device not found.
730  */
731 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
732 {
733 	struct mem_ctl_info *mci;
734 
735 	debugf0("%s()\n", __func__);
736 
737 	mutex_lock(&mem_ctls_mutex);
738 
739 	/* find the requested mci struct in the global list */
740 	mci = find_mci_by_dev(dev);
741 	if (mci == NULL) {
742 		mutex_unlock(&mem_ctls_mutex);
743 		return NULL;
744 	}
745 
746 	del_mc_from_global_list(mci);
747 	mutex_unlock(&mem_ctls_mutex);
748 
749 	/* flush workq processes */
750 	edac_mc_workq_teardown(mci);
751 
752 	/* marking MCI offline */
753 	mci->op_state = OP_OFFLINE;
754 
755 	/* remove from sysfs */
756 	edac_remove_sysfs_mci_device(mci);
757 
758 	edac_printk(KERN_INFO, EDAC_MC,
759 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
760 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
761 
762 	return mci;
763 }
764 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
765 
766 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
767 				u32 size)
768 {
769 	struct page *pg;
770 	void *virt_addr;
771 	unsigned long flags = 0;
772 
773 	debugf3("%s()\n", __func__);
774 
775 	/* ECC error page was not in our memory. Ignore it. */
776 	if (!pfn_valid(page))
777 		return;
778 
779 	/* Find the actual page structure then map it and fix */
780 	pg = pfn_to_page(page);
781 
782 	if (PageHighMem(pg))
783 		local_irq_save(flags);
784 
785 	virt_addr = kmap_atomic(pg);
786 
787 	/* Perform architecture specific atomic scrub operation */
788 	atomic_scrub(virt_addr + offset, size);
789 
790 	/* Unmap and complete */
791 	kunmap_atomic(virt_addr);
792 
793 	if (PageHighMem(pg))
794 		local_irq_restore(flags);
795 }
796 
797 /* FIXME - should return -1 */
798 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
799 {
800 	struct csrow_info *csrows = mci->csrows;
801 	int row, i, j, n;
802 
803 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
804 	row = -1;
805 
806 	for (i = 0; i < mci->nr_csrows; i++) {
807 		struct csrow_info *csrow = &csrows[i];
808 		n = 0;
809 		for (j = 0; j < csrow->nr_channels; j++) {
810 			struct dimm_info *dimm = csrow->channels[j].dimm;
811 			n += dimm->nr_pages;
812 		}
813 		if (n == 0)
814 			continue;
815 
816 		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
817 			"mask(0x%lx)\n", mci->mc_idx, __func__,
818 			csrow->first_page, page, csrow->last_page,
819 			csrow->page_mask);
820 
821 		if ((page >= csrow->first_page) &&
822 		    (page <= csrow->last_page) &&
823 		    ((page & csrow->page_mask) ==
824 		     (csrow->first_page & csrow->page_mask))) {
825 			row = i;
826 			break;
827 		}
828 	}
829 
830 	if (row == -1)
831 		edac_mc_printk(mci, KERN_ERR,
832 			"could not look up page error address %lx\n",
833 			(unsigned long)page);
834 
835 	return row;
836 }
837 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
838 
839 const char *edac_layer_name[] = {
840 	[EDAC_MC_LAYER_BRANCH] = "branch",
841 	[EDAC_MC_LAYER_CHANNEL] = "channel",
842 	[EDAC_MC_LAYER_SLOT] = "slot",
843 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
844 };
845 EXPORT_SYMBOL_GPL(edac_layer_name);
846 
847 static void edac_inc_ce_error(struct mem_ctl_info *mci,
848 				    bool enable_per_layer_report,
849 				    const int pos[EDAC_MAX_LAYERS])
850 {
851 	int i, index = 0;
852 
853 	mci->ce_mc++;
854 
855 	if (!enable_per_layer_report) {
856 		mci->ce_noinfo_count++;
857 		return;
858 	}
859 
860 	for (i = 0; i < mci->n_layers; i++) {
861 		if (pos[i] < 0)
862 			break;
863 		index += pos[i];
864 		mci->ce_per_layer[i][index]++;
865 
866 		if (i < mci->n_layers - 1)
867 			index *= mci->layers[i + 1].size;
868 	}
869 }
870 
871 static void edac_inc_ue_error(struct mem_ctl_info *mci,
872 				    bool enable_per_layer_report,
873 				    const int pos[EDAC_MAX_LAYERS])
874 {
875 	int i, index = 0;
876 
877 	mci->ue_mc++;
878 
879 	if (!enable_per_layer_report) {
880 		mci->ce_noinfo_count++;
881 		return;
882 	}
883 
884 	for (i = 0; i < mci->n_layers; i++) {
885 		if (pos[i] < 0)
886 			break;
887 		index += pos[i];
888 		mci->ue_per_layer[i][index]++;
889 
890 		if (i < mci->n_layers - 1)
891 			index *= mci->layers[i + 1].size;
892 	}
893 }
894 
895 static void edac_ce_error(struct mem_ctl_info *mci,
896 			  const int pos[EDAC_MAX_LAYERS],
897 			  const char *msg,
898 			  const char *location,
899 			  const char *label,
900 			  const char *detail,
901 			  const char *other_detail,
902 			  const bool enable_per_layer_report,
903 			  const unsigned long page_frame_number,
904 			  const unsigned long offset_in_page,
905 			  u32 grain)
906 {
907 	unsigned long remapped_page;
908 
909 	if (edac_mc_get_log_ce()) {
910 		if (other_detail && *other_detail)
911 			edac_mc_printk(mci, KERN_WARNING,
912 				       "CE %s on %s (%s%s - %s)\n",
913 				       msg, label, location,
914 				       detail, other_detail);
915 		else
916 			edac_mc_printk(mci, KERN_WARNING,
917 				       "CE %s on %s (%s%s)\n",
918 				       msg, label, location,
919 				       detail);
920 	}
921 	edac_inc_ce_error(mci, enable_per_layer_report, pos);
922 
923 	if (mci->scrub_mode & SCRUB_SW_SRC) {
924 		/*
925 			* Some memory controllers (called MCs below) can remap
926 			* memory so that it is still available at a different
927 			* address when PCI devices map into memory.
928 			* MC's that can't do this, lose the memory where PCI
929 			* devices are mapped. This mapping is MC-dependent
930 			* and so we call back into the MC driver for it to
931 			* map the MC page to a physical (CPU) page which can
932 			* then be mapped to a virtual page - which can then
933 			* be scrubbed.
934 			*/
935 		remapped_page = mci->ctl_page_to_phys ?
936 			mci->ctl_page_to_phys(mci, page_frame_number) :
937 			page_frame_number;
938 
939 		edac_mc_scrub_block(remapped_page,
940 					offset_in_page, grain);
941 	}
942 }
943 
944 static void edac_ue_error(struct mem_ctl_info *mci,
945 			  const int pos[EDAC_MAX_LAYERS],
946 			  const char *msg,
947 			  const char *location,
948 			  const char *label,
949 			  const char *detail,
950 			  const char *other_detail,
951 			  const bool enable_per_layer_report)
952 {
953 	if (edac_mc_get_log_ue()) {
954 		if (other_detail && *other_detail)
955 			edac_mc_printk(mci, KERN_WARNING,
956 				       "UE %s on %s (%s%s - %s)\n",
957 			               msg, label, location, detail,
958 				       other_detail);
959 		else
960 			edac_mc_printk(mci, KERN_WARNING,
961 				       "UE %s on %s (%s%s)\n",
962 			               msg, label, location, detail);
963 	}
964 
965 	if (edac_mc_get_panic_on_ue()) {
966 		if (other_detail && *other_detail)
967 			panic("UE %s on %s (%s%s - %s)\n",
968 			      msg, label, location, detail, other_detail);
969 		else
970 			panic("UE %s on %s (%s%s)\n",
971 			      msg, label, location, detail);
972 	}
973 
974 	edac_inc_ue_error(mci, enable_per_layer_report, pos);
975 }
976 
977 #define OTHER_LABEL " or "
978 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
979 			  struct mem_ctl_info *mci,
980 			  const unsigned long page_frame_number,
981 			  const unsigned long offset_in_page,
982 			  const unsigned long syndrome,
983 			  const int layer0,
984 			  const int layer1,
985 			  const int layer2,
986 			  const char *msg,
987 			  const char *other_detail,
988 			  const void *mcelog)
989 {
990 	/* FIXME: too much for stack: move it to some pre-alocated area */
991 	char detail[80], location[80];
992 	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
993 	char *p;
994 	int row = -1, chan = -1;
995 	int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
996 	int i;
997 	u32 grain;
998 	bool enable_per_layer_report = false;
999 
1000 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1001 
1002 	/*
1003 	 * Check if the event report is consistent and if the memory
1004 	 * location is known. If it is known, enable_per_layer_report will be
1005 	 * true, the DIMM(s) label info will be filled and the per-layer
1006 	 * error counters will be incremented.
1007 	 */
1008 	for (i = 0; i < mci->n_layers; i++) {
1009 		if (pos[i] >= (int)mci->layers[i].size) {
1010 			if (type == HW_EVENT_ERR_CORRECTED)
1011 				p = "CE";
1012 			else
1013 				p = "UE";
1014 
1015 			edac_mc_printk(mci, KERN_ERR,
1016 				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1017 				       edac_layer_name[mci->layers[i].type],
1018 				       pos[i], mci->layers[i].size);
1019 			/*
1020 			 * Instead of just returning it, let's use what's
1021 			 * known about the error. The increment routines and
1022 			 * the DIMM filter logic will do the right thing by
1023 			 * pointing the likely damaged DIMMs.
1024 			 */
1025 			pos[i] = -1;
1026 		}
1027 		if (pos[i] >= 0)
1028 			enable_per_layer_report = true;
1029 	}
1030 
1031 	/*
1032 	 * Get the dimm label/grain that applies to the match criteria.
1033 	 * As the error algorithm may not be able to point to just one memory
1034 	 * stick, the logic here will get all possible labels that could
1035 	 * pottentially be affected by the error.
1036 	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1037 	 * to have only the MC channel and the MC dimm (also called "branch")
1038 	 * but the channel is not known, as the memory is arranged in pairs,
1039 	 * where each memory belongs to a separate channel within the same
1040 	 * branch.
1041 	 */
1042 	grain = 0;
1043 	p = label;
1044 	*p = '\0';
1045 	for (i = 0; i < mci->tot_dimms; i++) {
1046 		struct dimm_info *dimm = &mci->dimms[i];
1047 
1048 		if (layer0 >= 0 && layer0 != dimm->location[0])
1049 			continue;
1050 		if (layer1 >= 0 && layer1 != dimm->location[1])
1051 			continue;
1052 		if (layer2 >= 0 && layer2 != dimm->location[2])
1053 			continue;
1054 
1055 		/* get the max grain, over the error match range */
1056 		if (dimm->grain > grain)
1057 			grain = dimm->grain;
1058 
1059 		/*
1060 		 * If the error is memory-controller wide, there's no need to
1061 		 * seek for the affected DIMMs because the whole
1062 		 * channel/memory controller/...  may be affected.
1063 		 * Also, don't show errors for empty DIMM slots.
1064 		 */
1065 		if (enable_per_layer_report && dimm->nr_pages) {
1066 			if (p != label) {
1067 				strcpy(p, OTHER_LABEL);
1068 				p += strlen(OTHER_LABEL);
1069 			}
1070 			strcpy(p, dimm->label);
1071 			p += strlen(p);
1072 			*p = '\0';
1073 
1074 			/*
1075 			 * get csrow/channel of the DIMM, in order to allow
1076 			 * incrementing the compat API counters
1077 			 */
1078 			debugf4("%s: %s csrows map: (%d,%d)\n",
1079 				__func__,
1080 				mci->mem_is_per_rank ? "rank" : "dimm",
1081 				dimm->csrow, dimm->cschannel);
1082 
1083 			if (row == -1)
1084 				row = dimm->csrow;
1085 			else if (row >= 0 && row != dimm->csrow)
1086 				row = -2;
1087 
1088 			if (chan == -1)
1089 				chan = dimm->cschannel;
1090 			else if (chan >= 0 && chan != dimm->cschannel)
1091 				chan = -2;
1092 		}
1093 	}
1094 
1095 	if (!enable_per_layer_report) {
1096 		strcpy(label, "any memory");
1097 	} else {
1098 		debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1099 			__func__, row, chan);
1100 		if (p == label)
1101 			strcpy(label, "unknown memory");
1102 		if (type == HW_EVENT_ERR_CORRECTED) {
1103 			if (row >= 0) {
1104 				mci->csrows[row].ce_count++;
1105 				if (chan >= 0)
1106 					mci->csrows[row].channels[chan].ce_count++;
1107 			}
1108 		} else
1109 			if (row >= 0)
1110 				mci->csrows[row].ue_count++;
1111 	}
1112 
1113 	/* Fill the RAM location data */
1114 	p = location;
1115 	for (i = 0; i < mci->n_layers; i++) {
1116 		if (pos[i] < 0)
1117 			continue;
1118 
1119 		p += sprintf(p, "%s:%d ",
1120 			     edac_layer_name[mci->layers[i].type],
1121 			     pos[i]);
1122 	}
1123 
1124 	/* Memory type dependent details about the error */
1125 	if (type == HW_EVENT_ERR_CORRECTED) {
1126 		snprintf(detail, sizeof(detail),
1127 			"page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
1128 			page_frame_number, offset_in_page,
1129 			grain, syndrome);
1130 		edac_ce_error(mci, pos, msg, location, label, detail,
1131 			      other_detail, enable_per_layer_report,
1132 			      page_frame_number, offset_in_page, grain);
1133 	} else {
1134 		snprintf(detail, sizeof(detail),
1135 			"page:0x%lx offset:0x%lx grain:%d",
1136 			page_frame_number, offset_in_page, grain);
1137 
1138 		edac_ue_error(mci, pos, msg, location, label, detail,
1139 			      other_detail, enable_per_layer_report);
1140 	}
1141 }
1142 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1143