xref: /linux/drivers/edac/edac_mc.c (revision c4c11dd160a8cc98f402c4e12f94b1572e822ffd)
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <linux/bitops.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36 
37 #define CREATE_TRACE_POINTS
38 #define TRACE_INCLUDE_PATH ../../include/ras
39 #include <ras/ras_event.h>
40 
41 /* lock to memory controller's control array */
42 static DEFINE_MUTEX(mem_ctls_mutex);
43 static LIST_HEAD(mc_devices);
44 
45 /*
46  * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
47  *	apei/ghes and i7core_edac to be used at the same time.
48  */
49 static void const *edac_mc_owner;
50 
51 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
52 			         unsigned len)
53 {
54 	struct mem_ctl_info *mci = dimm->mci;
55 	int i, n, count = 0;
56 	char *p = buf;
57 
58 	for (i = 0; i < mci->n_layers; i++) {
59 		n = snprintf(p, len, "%s %d ",
60 			      edac_layer_name[mci->layers[i].type],
61 			      dimm->location[i]);
62 		p += n;
63 		len -= n;
64 		count += n;
65 		if (!len)
66 			break;
67 	}
68 
69 	return count;
70 }
71 
72 #ifdef CONFIG_EDAC_DEBUG
73 
74 static void edac_mc_dump_channel(struct rank_info *chan)
75 {
76 	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
77 	edac_dbg(4, "    channel = %p\n", chan);
78 	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
79 	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
80 }
81 
82 static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
83 {
84 	char location[80];
85 
86 	edac_dimm_info_location(dimm, location, sizeof(location));
87 
88 	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
89 		 dimm->mci->csbased ? "rank" : "dimm",
90 		 number, location, dimm->csrow, dimm->cschannel);
91 	edac_dbg(4, "  dimm = %p\n", dimm);
92 	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
93 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
94 	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
95 	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
96 }
97 
98 static void edac_mc_dump_csrow(struct csrow_info *csrow)
99 {
100 	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
101 	edac_dbg(4, "  csrow = %p\n", csrow);
102 	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
103 	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
104 	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
105 	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
106 	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
107 	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
108 }
109 
110 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
111 {
112 	edac_dbg(3, "\tmci = %p\n", mci);
113 	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
114 	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
115 	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
116 	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
117 	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
118 		 mci->nr_csrows, mci->csrows);
119 	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
120 		 mci->tot_dimms, mci->dimms);
121 	edac_dbg(3, "\tdev = %p\n", mci->pdev);
122 	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
123 		 mci->mod_name, mci->ctl_name);
124 	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
125 }
126 
127 #endif				/* CONFIG_EDAC_DEBUG */
128 
129 /*
130  * keep those in sync with the enum mem_type
131  */
132 const char *edac_mem_types[] = {
133 	"Empty csrow",
134 	"Reserved csrow type",
135 	"Unknown csrow type",
136 	"Fast page mode RAM",
137 	"Extended data out RAM",
138 	"Burst Extended data out RAM",
139 	"Single data rate SDRAM",
140 	"Registered single data rate SDRAM",
141 	"Double data rate SDRAM",
142 	"Registered Double data rate SDRAM",
143 	"Rambus DRAM",
144 	"Unbuffered DDR2 RAM",
145 	"Fully buffered DDR2",
146 	"Registered DDR2 RAM",
147 	"Rambus XDR",
148 	"Unbuffered DDR3 RAM",
149 	"Registered DDR3 RAM",
150 };
151 EXPORT_SYMBOL_GPL(edac_mem_types);
152 
153 /**
154  * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
155  * @p:		pointer to a pointer with the memory offset to be used. At
156  *		return, this will be incremented to point to the next offset
157  * @size:	Size of the data structure to be reserved
158  * @n_elems:	Number of elements that should be reserved
159  *
160  * If 'size' is a constant, the compiler will optimize this whole function
161  * down to either a no-op or the addition of a constant to the value of '*p'.
162  *
163  * The 'p' pointer is absolutely needed to keep the proper advancing
164  * further in memory to the proper offsets when allocating the struct along
165  * with its embedded structs, as edac_device_alloc_ctl_info() does it
166  * above, for example.
167  *
168  * At return, the pointer 'p' will be incremented to be used on a next call
169  * to this function.
170  */
171 void *edac_align_ptr(void **p, unsigned size, int n_elems)
172 {
173 	unsigned align, r;
174 	void *ptr = *p;
175 
176 	*p += size * n_elems;
177 
178 	/*
179 	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
180 	 * 'size'.  Adjust 'p' so that its alignment is at least as
181 	 * stringent as what the compiler would provide for X and return
182 	 * the aligned result.
183 	 * Here we assume that the alignment of a "long long" is the most
184 	 * stringent alignment that the compiler will ever provide by default.
185 	 * As far as I know, this is a reasonable assumption.
186 	 */
187 	if (size > sizeof(long))
188 		align = sizeof(long long);
189 	else if (size > sizeof(int))
190 		align = sizeof(long);
191 	else if (size > sizeof(short))
192 		align = sizeof(int);
193 	else if (size > sizeof(char))
194 		align = sizeof(short);
195 	else
196 		return (char *)ptr;
197 
198 	r = (unsigned long)p % align;
199 
200 	if (r == 0)
201 		return (char *)ptr;
202 
203 	*p += align - r;
204 
205 	return (void *)(((unsigned long)ptr) + align - r);
206 }
207 
208 static void _edac_mc_free(struct mem_ctl_info *mci)
209 {
210 	int i, chn, row;
211 	struct csrow_info *csr;
212 	const unsigned int tot_dimms = mci->tot_dimms;
213 	const unsigned int tot_channels = mci->num_cschannel;
214 	const unsigned int tot_csrows = mci->nr_csrows;
215 
216 	if (mci->dimms) {
217 		for (i = 0; i < tot_dimms; i++)
218 			kfree(mci->dimms[i]);
219 		kfree(mci->dimms);
220 	}
221 	if (mci->csrows) {
222 		for (row = 0; row < tot_csrows; row++) {
223 			csr = mci->csrows[row];
224 			if (csr) {
225 				if (csr->channels) {
226 					for (chn = 0; chn < tot_channels; chn++)
227 						kfree(csr->channels[chn]);
228 					kfree(csr->channels);
229 				}
230 				kfree(csr);
231 			}
232 		}
233 		kfree(mci->csrows);
234 	}
235 	kfree(mci);
236 }
237 
238 /**
239  * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
240  * @mc_num:		Memory controller number
241  * @n_layers:		Number of MC hierarchy layers
242  * layers:		Describes each layer as seen by the Memory Controller
243  * @size_pvt:		size of private storage needed
244  *
245  *
246  * Everything is kmalloc'ed as one big chunk - more efficient.
247  * Only can be used if all structures have the same lifetime - otherwise
248  * you have to allocate and initialize your own structures.
249  *
250  * Use edac_mc_free() to free mc structures allocated by this function.
251  *
252  * NOTE: drivers handle multi-rank memories in different ways: in some
253  * drivers, one multi-rank memory stick is mapped as one entry, while, in
254  * others, a single multi-rank memory stick would be mapped into several
255  * entries. Currently, this function will allocate multiple struct dimm_info
256  * on such scenarios, as grouping the multiple ranks require drivers change.
257  *
258  * Returns:
259  *	On failure: NULL
260  *	On success: struct mem_ctl_info pointer
261  */
262 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
263 				   unsigned n_layers,
264 				   struct edac_mc_layer *layers,
265 				   unsigned sz_pvt)
266 {
267 	struct mem_ctl_info *mci;
268 	struct edac_mc_layer *layer;
269 	struct csrow_info *csr;
270 	struct rank_info *chan;
271 	struct dimm_info *dimm;
272 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
273 	unsigned pos[EDAC_MAX_LAYERS];
274 	unsigned size, tot_dimms = 1, count = 1;
275 	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
276 	void *pvt, *p, *ptr = NULL;
277 	int i, j, row, chn, n, len, off;
278 	bool per_rank = false;
279 
280 	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
281 	/*
282 	 * Calculate the total amount of dimms and csrows/cschannels while
283 	 * in the old API emulation mode
284 	 */
285 	for (i = 0; i < n_layers; i++) {
286 		tot_dimms *= layers[i].size;
287 		if (layers[i].is_virt_csrow)
288 			tot_csrows *= layers[i].size;
289 		else
290 			tot_channels *= layers[i].size;
291 
292 		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
293 			per_rank = true;
294 	}
295 
296 	/* Figure out the offsets of the various items from the start of an mc
297 	 * structure.  We want the alignment of each item to be at least as
298 	 * stringent as what the compiler would provide if we could simply
299 	 * hardcode everything into a single struct.
300 	 */
301 	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
302 	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
303 	for (i = 0; i < n_layers; i++) {
304 		count *= layers[i].size;
305 		edac_dbg(4, "errcount layer %d size %d\n", i, count);
306 		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
307 		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
308 		tot_errcount += 2 * count;
309 	}
310 
311 	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
312 	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
313 	size = ((unsigned long)pvt) + sz_pvt;
314 
315 	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
316 		 size,
317 		 tot_dimms,
318 		 per_rank ? "ranks" : "dimms",
319 		 tot_csrows * tot_channels);
320 
321 	mci = kzalloc(size, GFP_KERNEL);
322 	if (mci == NULL)
323 		return NULL;
324 
325 	/* Adjust pointers so they point within the memory we just allocated
326 	 * rather than an imaginary chunk of memory located at address 0.
327 	 */
328 	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
329 	for (i = 0; i < n_layers; i++) {
330 		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
331 		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
332 	}
333 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
334 
335 	/* setup index and various internal pointers */
336 	mci->mc_idx = mc_num;
337 	mci->tot_dimms = tot_dimms;
338 	mci->pvt_info = pvt;
339 	mci->n_layers = n_layers;
340 	mci->layers = layer;
341 	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
342 	mci->nr_csrows = tot_csrows;
343 	mci->num_cschannel = tot_channels;
344 	mci->csbased = per_rank;
345 
346 	/*
347 	 * Alocate and fill the csrow/channels structs
348 	 */
349 	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
350 	if (!mci->csrows)
351 		goto error;
352 	for (row = 0; row < tot_csrows; row++) {
353 		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
354 		if (!csr)
355 			goto error;
356 		mci->csrows[row] = csr;
357 		csr->csrow_idx = row;
358 		csr->mci = mci;
359 		csr->nr_channels = tot_channels;
360 		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
361 					GFP_KERNEL);
362 		if (!csr->channels)
363 			goto error;
364 
365 		for (chn = 0; chn < tot_channels; chn++) {
366 			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
367 			if (!chan)
368 				goto error;
369 			csr->channels[chn] = chan;
370 			chan->chan_idx = chn;
371 			chan->csrow = csr;
372 		}
373 	}
374 
375 	/*
376 	 * Allocate and fill the dimm structs
377 	 */
378 	mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
379 	if (!mci->dimms)
380 		goto error;
381 
382 	memset(&pos, 0, sizeof(pos));
383 	row = 0;
384 	chn = 0;
385 	for (i = 0; i < tot_dimms; i++) {
386 		chan = mci->csrows[row]->channels[chn];
387 		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
388 		if (off < 0 || off >= tot_dimms) {
389 			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
390 			goto error;
391 		}
392 
393 		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
394 		if (!dimm)
395 			goto error;
396 		mci->dimms[off] = dimm;
397 		dimm->mci = mci;
398 
399 		/*
400 		 * Copy DIMM location and initialize it.
401 		 */
402 		len = sizeof(dimm->label);
403 		p = dimm->label;
404 		n = snprintf(p, len, "mc#%u", mc_num);
405 		p += n;
406 		len -= n;
407 		for (j = 0; j < n_layers; j++) {
408 			n = snprintf(p, len, "%s#%u",
409 				     edac_layer_name[layers[j].type],
410 				     pos[j]);
411 			p += n;
412 			len -= n;
413 			dimm->location[j] = pos[j];
414 
415 			if (len <= 0)
416 				break;
417 		}
418 
419 		/* Link it to the csrows old API data */
420 		chan->dimm = dimm;
421 		dimm->csrow = row;
422 		dimm->cschannel = chn;
423 
424 		/* Increment csrow location */
425 		if (layers[0].is_virt_csrow) {
426 			chn++;
427 			if (chn == tot_channels) {
428 				chn = 0;
429 				row++;
430 			}
431 		} else {
432 			row++;
433 			if (row == tot_csrows) {
434 				row = 0;
435 				chn++;
436 			}
437 		}
438 
439 		/* Increment dimm location */
440 		for (j = n_layers - 1; j >= 0; j--) {
441 			pos[j]++;
442 			if (pos[j] < layers[j].size)
443 				break;
444 			pos[j] = 0;
445 		}
446 	}
447 
448 	mci->op_state = OP_ALLOC;
449 
450 	return mci;
451 
452 error:
453 	_edac_mc_free(mci);
454 
455 	return NULL;
456 }
457 EXPORT_SYMBOL_GPL(edac_mc_alloc);
458 
459 /**
460  * edac_mc_free
461  *	'Free' a previously allocated 'mci' structure
462  * @mci: pointer to a struct mem_ctl_info structure
463  */
464 void edac_mc_free(struct mem_ctl_info *mci)
465 {
466 	edac_dbg(1, "\n");
467 
468 	/* If we're not yet registered with sysfs free only what was allocated
469 	 * in edac_mc_alloc().
470 	 */
471 	if (!device_is_registered(&mci->dev)) {
472 		_edac_mc_free(mci);
473 		return;
474 	}
475 
476 	/* the mci instance is freed here, when the sysfs object is dropped */
477 	edac_unregister_sysfs(mci);
478 }
479 EXPORT_SYMBOL_GPL(edac_mc_free);
480 
481 
482 /**
483  * find_mci_by_dev
484  *
485  *	scan list of controllers looking for the one that manages
486  *	the 'dev' device
487  * @dev: pointer to a struct device related with the MCI
488  */
489 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
490 {
491 	struct mem_ctl_info *mci;
492 	struct list_head *item;
493 
494 	edac_dbg(3, "\n");
495 
496 	list_for_each(item, &mc_devices) {
497 		mci = list_entry(item, struct mem_ctl_info, link);
498 
499 		if (mci->pdev == dev)
500 			return mci;
501 	}
502 
503 	return NULL;
504 }
505 EXPORT_SYMBOL_GPL(find_mci_by_dev);
506 
507 /*
508  * handler for EDAC to check if NMI type handler has asserted interrupt
509  */
510 static int edac_mc_assert_error_check_and_clear(void)
511 {
512 	int old_state;
513 
514 	if (edac_op_state == EDAC_OPSTATE_POLL)
515 		return 1;
516 
517 	old_state = edac_err_assert;
518 	edac_err_assert = 0;
519 
520 	return old_state;
521 }
522 
523 /*
524  * edac_mc_workq_function
525  *	performs the operation scheduled by a workq request
526  */
527 static void edac_mc_workq_function(struct work_struct *work_req)
528 {
529 	struct delayed_work *d_work = to_delayed_work(work_req);
530 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
531 
532 	mutex_lock(&mem_ctls_mutex);
533 
534 	/* if this control struct has movd to offline state, we are done */
535 	if (mci->op_state == OP_OFFLINE) {
536 		mutex_unlock(&mem_ctls_mutex);
537 		return;
538 	}
539 
540 	/* Only poll controllers that are running polled and have a check */
541 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
542 		mci->edac_check(mci);
543 
544 	mutex_unlock(&mem_ctls_mutex);
545 
546 	/* Reschedule */
547 	queue_delayed_work(edac_workqueue, &mci->work,
548 			msecs_to_jiffies(edac_mc_get_poll_msec()));
549 }
550 
551 /*
552  * edac_mc_workq_setup
553  *	initialize a workq item for this mci
554  *	passing in the new delay period in msec
555  *
556  *	locking model:
557  *
558  *		called with the mem_ctls_mutex held
559  */
560 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
561 {
562 	edac_dbg(0, "\n");
563 
564 	/* if this instance is not in the POLL state, then simply return */
565 	if (mci->op_state != OP_RUNNING_POLL)
566 		return;
567 
568 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
569 	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
570 }
571 
572 /*
573  * edac_mc_workq_teardown
574  *	stop the workq processing on this mci
575  *
576  *	locking model:
577  *
578  *		called WITHOUT lock held
579  */
580 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
581 {
582 	int status;
583 
584 	if (mci->op_state != OP_RUNNING_POLL)
585 		return;
586 
587 	status = cancel_delayed_work(&mci->work);
588 	if (status == 0) {
589 		edac_dbg(0, "not canceled, flush the queue\n");
590 
591 		/* workq instance might be running, wait for it */
592 		flush_workqueue(edac_workqueue);
593 	}
594 }
595 
596 /*
597  * edac_mc_reset_delay_period(unsigned long value)
598  *
599  *	user space has updated our poll period value, need to
600  *	reset our workq delays
601  */
602 void edac_mc_reset_delay_period(int value)
603 {
604 	struct mem_ctl_info *mci;
605 	struct list_head *item;
606 
607 	mutex_lock(&mem_ctls_mutex);
608 
609 	list_for_each(item, &mc_devices) {
610 		mci = list_entry(item, struct mem_ctl_info, link);
611 
612 		edac_mc_workq_setup(mci, (unsigned long) value);
613 	}
614 
615 	mutex_unlock(&mem_ctls_mutex);
616 }
617 
618 
619 
620 /* Return 0 on success, 1 on failure.
621  * Before calling this function, caller must
622  * assign a unique value to mci->mc_idx.
623  *
624  *	locking model:
625  *
626  *		called with the mem_ctls_mutex lock held
627  */
628 static int add_mc_to_global_list(struct mem_ctl_info *mci)
629 {
630 	struct list_head *item, *insert_before;
631 	struct mem_ctl_info *p;
632 
633 	insert_before = &mc_devices;
634 
635 	p = find_mci_by_dev(mci->pdev);
636 	if (unlikely(p != NULL))
637 		goto fail0;
638 
639 	list_for_each(item, &mc_devices) {
640 		p = list_entry(item, struct mem_ctl_info, link);
641 
642 		if (p->mc_idx >= mci->mc_idx) {
643 			if (unlikely(p->mc_idx == mci->mc_idx))
644 				goto fail1;
645 
646 			insert_before = item;
647 			break;
648 		}
649 	}
650 
651 	list_add_tail_rcu(&mci->link, insert_before);
652 	atomic_inc(&edac_handlers);
653 	return 0;
654 
655 fail0:
656 	edac_printk(KERN_WARNING, EDAC_MC,
657 		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
658 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
659 	return 1;
660 
661 fail1:
662 	edac_printk(KERN_WARNING, EDAC_MC,
663 		"bug in low-level driver: attempt to assign\n"
664 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
665 	return 1;
666 }
667 
668 static int del_mc_from_global_list(struct mem_ctl_info *mci)
669 {
670 	int handlers = atomic_dec_return(&edac_handlers);
671 	list_del_rcu(&mci->link);
672 
673 	/* these are for safe removal of devices from global list while
674 	 * NMI handlers may be traversing list
675 	 */
676 	synchronize_rcu();
677 	INIT_LIST_HEAD(&mci->link);
678 
679 	return handlers;
680 }
681 
682 /**
683  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
684  *
685  * If found, return a pointer to the structure.
686  * Else return NULL.
687  *
688  * Caller must hold mem_ctls_mutex.
689  */
690 struct mem_ctl_info *edac_mc_find(int idx)
691 {
692 	struct list_head *item;
693 	struct mem_ctl_info *mci;
694 
695 	list_for_each(item, &mc_devices) {
696 		mci = list_entry(item, struct mem_ctl_info, link);
697 
698 		if (mci->mc_idx >= idx) {
699 			if (mci->mc_idx == idx)
700 				return mci;
701 
702 			break;
703 		}
704 	}
705 
706 	return NULL;
707 }
708 EXPORT_SYMBOL(edac_mc_find);
709 
710 /**
711  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
712  *                 create sysfs entries associated with mci structure
713  * @mci: pointer to the mci structure to be added to the list
714  *
715  * Return:
716  *	0	Success
717  *	!0	Failure
718  */
719 
720 /* FIXME - should a warning be printed if no error detection? correction? */
721 int edac_mc_add_mc(struct mem_ctl_info *mci)
722 {
723 	int ret = -EINVAL;
724 	edac_dbg(0, "\n");
725 
726 #ifdef CONFIG_EDAC_DEBUG
727 	if (edac_debug_level >= 3)
728 		edac_mc_dump_mci(mci);
729 
730 	if (edac_debug_level >= 4) {
731 		int i;
732 
733 		for (i = 0; i < mci->nr_csrows; i++) {
734 			struct csrow_info *csrow = mci->csrows[i];
735 			u32 nr_pages = 0;
736 			int j;
737 
738 			for (j = 0; j < csrow->nr_channels; j++)
739 				nr_pages += csrow->channels[j]->dimm->nr_pages;
740 			if (!nr_pages)
741 				continue;
742 			edac_mc_dump_csrow(csrow);
743 			for (j = 0; j < csrow->nr_channels; j++)
744 				if (csrow->channels[j]->dimm->nr_pages)
745 					edac_mc_dump_channel(csrow->channels[j]);
746 		}
747 		for (i = 0; i < mci->tot_dimms; i++)
748 			if (mci->dimms[i]->nr_pages)
749 				edac_mc_dump_dimm(mci->dimms[i], i);
750 	}
751 #endif
752 	mutex_lock(&mem_ctls_mutex);
753 
754 	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
755 		ret = -EPERM;
756 		goto fail0;
757 	}
758 
759 	if (add_mc_to_global_list(mci))
760 		goto fail0;
761 
762 	/* set load time so that error rate can be tracked */
763 	mci->start_time = jiffies;
764 
765 	if (edac_create_sysfs_mci_device(mci)) {
766 		edac_mc_printk(mci, KERN_WARNING,
767 			"failed to create sysfs device\n");
768 		goto fail1;
769 	}
770 
771 	/* If there IS a check routine, then we are running POLLED */
772 	if (mci->edac_check != NULL) {
773 		/* This instance is NOW RUNNING */
774 		mci->op_state = OP_RUNNING_POLL;
775 
776 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
777 	} else {
778 		mci->op_state = OP_RUNNING_INTERRUPT;
779 	}
780 
781 	/* Report action taken */
782 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
783 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
784 
785 	edac_mc_owner = mci->mod_name;
786 
787 	mutex_unlock(&mem_ctls_mutex);
788 	return 0;
789 
790 fail1:
791 	del_mc_from_global_list(mci);
792 
793 fail0:
794 	mutex_unlock(&mem_ctls_mutex);
795 	return ret;
796 }
797 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
798 
799 /**
800  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
801  *                 remove mci structure from global list
802  * @pdev: Pointer to 'struct device' representing mci structure to remove.
803  *
804  * Return pointer to removed mci structure, or NULL if device not found.
805  */
806 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
807 {
808 	struct mem_ctl_info *mci;
809 
810 	edac_dbg(0, "\n");
811 
812 	mutex_lock(&mem_ctls_mutex);
813 
814 	/* find the requested mci struct in the global list */
815 	mci = find_mci_by_dev(dev);
816 	if (mci == NULL) {
817 		mutex_unlock(&mem_ctls_mutex);
818 		return NULL;
819 	}
820 
821 	if (!del_mc_from_global_list(mci))
822 		edac_mc_owner = NULL;
823 	mutex_unlock(&mem_ctls_mutex);
824 
825 	/* flush workq processes */
826 	edac_mc_workq_teardown(mci);
827 
828 	/* marking MCI offline */
829 	mci->op_state = OP_OFFLINE;
830 
831 	/* remove from sysfs */
832 	edac_remove_sysfs_mci_device(mci);
833 
834 	edac_printk(KERN_INFO, EDAC_MC,
835 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
836 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
837 
838 	return mci;
839 }
840 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
841 
842 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
843 				u32 size)
844 {
845 	struct page *pg;
846 	void *virt_addr;
847 	unsigned long flags = 0;
848 
849 	edac_dbg(3, "\n");
850 
851 	/* ECC error page was not in our memory. Ignore it. */
852 	if (!pfn_valid(page))
853 		return;
854 
855 	/* Find the actual page structure then map it and fix */
856 	pg = pfn_to_page(page);
857 
858 	if (PageHighMem(pg))
859 		local_irq_save(flags);
860 
861 	virt_addr = kmap_atomic(pg);
862 
863 	/* Perform architecture specific atomic scrub operation */
864 	atomic_scrub(virt_addr + offset, size);
865 
866 	/* Unmap and complete */
867 	kunmap_atomic(virt_addr);
868 
869 	if (PageHighMem(pg))
870 		local_irq_restore(flags);
871 }
872 
873 /* FIXME - should return -1 */
874 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
875 {
876 	struct csrow_info **csrows = mci->csrows;
877 	int row, i, j, n;
878 
879 	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
880 	row = -1;
881 
882 	for (i = 0; i < mci->nr_csrows; i++) {
883 		struct csrow_info *csrow = csrows[i];
884 		n = 0;
885 		for (j = 0; j < csrow->nr_channels; j++) {
886 			struct dimm_info *dimm = csrow->channels[j]->dimm;
887 			n += dimm->nr_pages;
888 		}
889 		if (n == 0)
890 			continue;
891 
892 		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
893 			 mci->mc_idx,
894 			 csrow->first_page, page, csrow->last_page,
895 			 csrow->page_mask);
896 
897 		if ((page >= csrow->first_page) &&
898 		    (page <= csrow->last_page) &&
899 		    ((page & csrow->page_mask) ==
900 		     (csrow->first_page & csrow->page_mask))) {
901 			row = i;
902 			break;
903 		}
904 	}
905 
906 	if (row == -1)
907 		edac_mc_printk(mci, KERN_ERR,
908 			"could not look up page error address %lx\n",
909 			(unsigned long)page);
910 
911 	return row;
912 }
913 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
914 
915 const char *edac_layer_name[] = {
916 	[EDAC_MC_LAYER_BRANCH] = "branch",
917 	[EDAC_MC_LAYER_CHANNEL] = "channel",
918 	[EDAC_MC_LAYER_SLOT] = "slot",
919 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
920 	[EDAC_MC_LAYER_ALL_MEM] = "memory",
921 };
922 EXPORT_SYMBOL_GPL(edac_layer_name);
923 
924 static void edac_inc_ce_error(struct mem_ctl_info *mci,
925 			      bool enable_per_layer_report,
926 			      const int pos[EDAC_MAX_LAYERS],
927 			      const u16 count)
928 {
929 	int i, index = 0;
930 
931 	mci->ce_mc += count;
932 
933 	if (!enable_per_layer_report) {
934 		mci->ce_noinfo_count += count;
935 		return;
936 	}
937 
938 	for (i = 0; i < mci->n_layers; i++) {
939 		if (pos[i] < 0)
940 			break;
941 		index += pos[i];
942 		mci->ce_per_layer[i][index] += count;
943 
944 		if (i < mci->n_layers - 1)
945 			index *= mci->layers[i + 1].size;
946 	}
947 }
948 
949 static void edac_inc_ue_error(struct mem_ctl_info *mci,
950 				    bool enable_per_layer_report,
951 				    const int pos[EDAC_MAX_LAYERS],
952 				    const u16 count)
953 {
954 	int i, index = 0;
955 
956 	mci->ue_mc += count;
957 
958 	if (!enable_per_layer_report) {
959 		mci->ce_noinfo_count += count;
960 		return;
961 	}
962 
963 	for (i = 0; i < mci->n_layers; i++) {
964 		if (pos[i] < 0)
965 			break;
966 		index += pos[i];
967 		mci->ue_per_layer[i][index] += count;
968 
969 		if (i < mci->n_layers - 1)
970 			index *= mci->layers[i + 1].size;
971 	}
972 }
973 
974 static void edac_ce_error(struct mem_ctl_info *mci,
975 			  const u16 error_count,
976 			  const int pos[EDAC_MAX_LAYERS],
977 			  const char *msg,
978 			  const char *location,
979 			  const char *label,
980 			  const char *detail,
981 			  const char *other_detail,
982 			  const bool enable_per_layer_report,
983 			  const unsigned long page_frame_number,
984 			  const unsigned long offset_in_page,
985 			  long grain)
986 {
987 	unsigned long remapped_page;
988 	char *msg_aux = "";
989 
990 	if (*msg)
991 		msg_aux = " ";
992 
993 	if (edac_mc_get_log_ce()) {
994 		if (other_detail && *other_detail)
995 			edac_mc_printk(mci, KERN_WARNING,
996 				       "%d CE %s%son %s (%s %s - %s)\n",
997 				       error_count, msg, msg_aux, label,
998 				       location, detail, other_detail);
999 		else
1000 			edac_mc_printk(mci, KERN_WARNING,
1001 				       "%d CE %s%son %s (%s %s)\n",
1002 				       error_count, msg, msg_aux, label,
1003 				       location, detail);
1004 	}
1005 	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1006 
1007 	if (mci->scrub_mode & SCRUB_SW_SRC) {
1008 		/*
1009 			* Some memory controllers (called MCs below) can remap
1010 			* memory so that it is still available at a different
1011 			* address when PCI devices map into memory.
1012 			* MC's that can't do this, lose the memory where PCI
1013 			* devices are mapped. This mapping is MC-dependent
1014 			* and so we call back into the MC driver for it to
1015 			* map the MC page to a physical (CPU) page which can
1016 			* then be mapped to a virtual page - which can then
1017 			* be scrubbed.
1018 			*/
1019 		remapped_page = mci->ctl_page_to_phys ?
1020 			mci->ctl_page_to_phys(mci, page_frame_number) :
1021 			page_frame_number;
1022 
1023 		edac_mc_scrub_block(remapped_page,
1024 					offset_in_page, grain);
1025 	}
1026 }
1027 
1028 static void edac_ue_error(struct mem_ctl_info *mci,
1029 			  const u16 error_count,
1030 			  const int pos[EDAC_MAX_LAYERS],
1031 			  const char *msg,
1032 			  const char *location,
1033 			  const char *label,
1034 			  const char *detail,
1035 			  const char *other_detail,
1036 			  const bool enable_per_layer_report)
1037 {
1038 	char *msg_aux = "";
1039 
1040 	if (*msg)
1041 		msg_aux = " ";
1042 
1043 	if (edac_mc_get_log_ue()) {
1044 		if (other_detail && *other_detail)
1045 			edac_mc_printk(mci, KERN_WARNING,
1046 				       "%d UE %s%son %s (%s %s - %s)\n",
1047 				       error_count, msg, msg_aux, label,
1048 				       location, detail, other_detail);
1049 		else
1050 			edac_mc_printk(mci, KERN_WARNING,
1051 				       "%d UE %s%son %s (%s %s)\n",
1052 				       error_count, msg, msg_aux, label,
1053 				       location, detail);
1054 	}
1055 
1056 	if (edac_mc_get_panic_on_ue()) {
1057 		if (other_detail && *other_detail)
1058 			panic("UE %s%son %s (%s%s - %s)\n",
1059 			      msg, msg_aux, label, location, detail, other_detail);
1060 		else
1061 			panic("UE %s%son %s (%s%s)\n",
1062 			      msg, msg_aux, label, location, detail);
1063 	}
1064 
1065 	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1066 }
1067 
1068 /**
1069  * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1070  *			      anything to discover the error location
1071  *
1072  * @type:		severity of the error (CE/UE/Fatal)
1073  * @mci:		a struct mem_ctl_info pointer
1074  * @e:			error description
1075  *
1076  * This raw function is used internally by edac_mc_handle_error(). It should
1077  * only be called directly when the hardware error come directly from BIOS,
1078  * like in the case of APEI GHES driver.
1079  */
1080 void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1081 			      struct mem_ctl_info *mci,
1082 			      struct edac_raw_error_desc *e)
1083 {
1084 	char detail[80];
1085 	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1086 
1087 	/* Memory type dependent details about the error */
1088 	if (type == HW_EVENT_ERR_CORRECTED) {
1089 		snprintf(detail, sizeof(detail),
1090 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1091 			e->page_frame_number, e->offset_in_page,
1092 			e->grain, e->syndrome);
1093 		edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1094 			      detail, e->other_detail, e->enable_per_layer_report,
1095 			      e->page_frame_number, e->offset_in_page, e->grain);
1096 	} else {
1097 		snprintf(detail, sizeof(detail),
1098 			"page:0x%lx offset:0x%lx grain:%ld",
1099 			e->page_frame_number, e->offset_in_page, e->grain);
1100 
1101 		edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1102 			      detail, e->other_detail, e->enable_per_layer_report);
1103 	}
1104 
1105 
1106 }
1107 EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1108 
1109 /**
1110  * edac_mc_handle_error - reports a memory event to userspace
1111  *
1112  * @type:		severity of the error (CE/UE/Fatal)
1113  * @mci:		a struct mem_ctl_info pointer
1114  * @error_count:	Number of errors of the same type
1115  * @page_frame_number:	mem page where the error occurred
1116  * @offset_in_page:	offset of the error inside the page
1117  * @syndrome:		ECC syndrome
1118  * @top_layer:		Memory layer[0] position
1119  * @mid_layer:		Memory layer[1] position
1120  * @low_layer:		Memory layer[2] position
1121  * @msg:		Message meaningful to the end users that
1122  *			explains the event
1123  * @other_detail:	Technical details about the event that
1124  *			may help hardware manufacturers and
1125  *			EDAC developers to analyse the event
1126  */
1127 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1128 			  struct mem_ctl_info *mci,
1129 			  const u16 error_count,
1130 			  const unsigned long page_frame_number,
1131 			  const unsigned long offset_in_page,
1132 			  const unsigned long syndrome,
1133 			  const int top_layer,
1134 			  const int mid_layer,
1135 			  const int low_layer,
1136 			  const char *msg,
1137 			  const char *other_detail)
1138 {
1139 	char *p;
1140 	int row = -1, chan = -1;
1141 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1142 	int i, n_labels = 0;
1143 	u8 grain_bits;
1144 	struct edac_raw_error_desc *e = &mci->error_desc;
1145 
1146 	edac_dbg(3, "MC%d\n", mci->mc_idx);
1147 
1148 	/* Fills the error report buffer */
1149 	memset(e, 0, sizeof (*e));
1150 	e->error_count = error_count;
1151 	e->top_layer = top_layer;
1152 	e->mid_layer = mid_layer;
1153 	e->low_layer = low_layer;
1154 	e->page_frame_number = page_frame_number;
1155 	e->offset_in_page = offset_in_page;
1156 	e->syndrome = syndrome;
1157 	e->msg = msg;
1158 	e->other_detail = other_detail;
1159 
1160 	/*
1161 	 * Check if the event report is consistent and if the memory
1162 	 * location is known. If it is known, enable_per_layer_report will be
1163 	 * true, the DIMM(s) label info will be filled and the per-layer
1164 	 * error counters will be incremented.
1165 	 */
1166 	for (i = 0; i < mci->n_layers; i++) {
1167 		if (pos[i] >= (int)mci->layers[i].size) {
1168 
1169 			edac_mc_printk(mci, KERN_ERR,
1170 				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1171 				       edac_layer_name[mci->layers[i].type],
1172 				       pos[i], mci->layers[i].size);
1173 			/*
1174 			 * Instead of just returning it, let's use what's
1175 			 * known about the error. The increment routines and
1176 			 * the DIMM filter logic will do the right thing by
1177 			 * pointing the likely damaged DIMMs.
1178 			 */
1179 			pos[i] = -1;
1180 		}
1181 		if (pos[i] >= 0)
1182 			e->enable_per_layer_report = true;
1183 	}
1184 
1185 	/*
1186 	 * Get the dimm label/grain that applies to the match criteria.
1187 	 * As the error algorithm may not be able to point to just one memory
1188 	 * stick, the logic here will get all possible labels that could
1189 	 * pottentially be affected by the error.
1190 	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1191 	 * to have only the MC channel and the MC dimm (also called "branch")
1192 	 * but the channel is not known, as the memory is arranged in pairs,
1193 	 * where each memory belongs to a separate channel within the same
1194 	 * branch.
1195 	 */
1196 	p = e->label;
1197 	*p = '\0';
1198 
1199 	for (i = 0; i < mci->tot_dimms; i++) {
1200 		struct dimm_info *dimm = mci->dimms[i];
1201 
1202 		if (top_layer >= 0 && top_layer != dimm->location[0])
1203 			continue;
1204 		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1205 			continue;
1206 		if (low_layer >= 0 && low_layer != dimm->location[2])
1207 			continue;
1208 
1209 		/* get the max grain, over the error match range */
1210 		if (dimm->grain > e->grain)
1211 			e->grain = dimm->grain;
1212 
1213 		/*
1214 		 * If the error is memory-controller wide, there's no need to
1215 		 * seek for the affected DIMMs because the whole
1216 		 * channel/memory controller/...  may be affected.
1217 		 * Also, don't show errors for empty DIMM slots.
1218 		 */
1219 		if (e->enable_per_layer_report && dimm->nr_pages) {
1220 			if (n_labels >= EDAC_MAX_LABELS) {
1221 				e->enable_per_layer_report = false;
1222 				break;
1223 			}
1224 			n_labels++;
1225 			if (p != e->label) {
1226 				strcpy(p, OTHER_LABEL);
1227 				p += strlen(OTHER_LABEL);
1228 			}
1229 			strcpy(p, dimm->label);
1230 			p += strlen(p);
1231 			*p = '\0';
1232 
1233 			/*
1234 			 * get csrow/channel of the DIMM, in order to allow
1235 			 * incrementing the compat API counters
1236 			 */
1237 			edac_dbg(4, "%s csrows map: (%d,%d)\n",
1238 				 mci->csbased ? "rank" : "dimm",
1239 				 dimm->csrow, dimm->cschannel);
1240 			if (row == -1)
1241 				row = dimm->csrow;
1242 			else if (row >= 0 && row != dimm->csrow)
1243 				row = -2;
1244 
1245 			if (chan == -1)
1246 				chan = dimm->cschannel;
1247 			else if (chan >= 0 && chan != dimm->cschannel)
1248 				chan = -2;
1249 		}
1250 	}
1251 
1252 	if (!e->enable_per_layer_report) {
1253 		strcpy(e->label, "any memory");
1254 	} else {
1255 		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1256 		if (p == e->label)
1257 			strcpy(e->label, "unknown memory");
1258 		if (type == HW_EVENT_ERR_CORRECTED) {
1259 			if (row >= 0) {
1260 				mci->csrows[row]->ce_count += error_count;
1261 				if (chan >= 0)
1262 					mci->csrows[row]->channels[chan]->ce_count += error_count;
1263 			}
1264 		} else
1265 			if (row >= 0)
1266 				mci->csrows[row]->ue_count += error_count;
1267 	}
1268 
1269 	/* Fill the RAM location data */
1270 	p = e->location;
1271 
1272 	for (i = 0; i < mci->n_layers; i++) {
1273 		if (pos[i] < 0)
1274 			continue;
1275 
1276 		p += sprintf(p, "%s:%d ",
1277 			     edac_layer_name[mci->layers[i].type],
1278 			     pos[i]);
1279 	}
1280 	if (p > e->location)
1281 		*(p - 1) = '\0';
1282 
1283 	/* Report the error via the trace interface */
1284 	grain_bits = fls_long(e->grain) + 1;
1285 	trace_mc_event(type, e->msg, e->label, e->error_count,
1286 		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1287 		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1288 		       grain_bits, e->syndrome, e->other_detail);
1289 
1290 	edac_raw_mc_handle_error(type, mci, e);
1291 }
1292 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1293