1 /* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15 #include <linux/module.h> 16 #include <linux/proc_fs.h> 17 #include <linux/kernel.h> 18 #include <linux/types.h> 19 #include <linux/smp.h> 20 #include <linux/init.h> 21 #include <linux/sysctl.h> 22 #include <linux/highmem.h> 23 #include <linux/timer.h> 24 #include <linux/slab.h> 25 #include <linux/jiffies.h> 26 #include <linux/spinlock.h> 27 #include <linux/list.h> 28 #include <linux/ctype.h> 29 #include <linux/edac.h> 30 #include <asm/uaccess.h> 31 #include <asm/page.h> 32 #include <asm/edac.h> 33 #include "edac_core.h" 34 #include "edac_module.h" 35 36 /* lock to memory controller's control array */ 37 static DEFINE_MUTEX(mem_ctls_mutex); 38 static LIST_HEAD(mc_devices); 39 40 #ifdef CONFIG_EDAC_DEBUG 41 42 static void edac_mc_dump_channel(struct rank_info *chan) 43 { 44 debugf4("\tchannel = %p\n", chan); 45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 46 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 47 debugf4("\tchannel->dimm = %p\n", chan->dimm); 48 } 49 50 static void edac_mc_dump_dimm(struct dimm_info *dimm) 51 { 52 int i; 53 54 debugf4("\tdimm = %p\n", dimm); 55 debugf4("\tdimm->label = '%s'\n", dimm->label); 56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 57 debugf4("\tdimm location "); 58 for (i = 0; i < dimm->mci->n_layers; i++) { 59 printk(KERN_CONT "%d", dimm->location[i]); 60 if (i < dimm->mci->n_layers - 1) 61 printk(KERN_CONT "."); 62 } 63 printk(KERN_CONT "\n"); 64 debugf4("\tdimm->grain = %d\n", dimm->grain); 65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 66 } 67 68 static void edac_mc_dump_csrow(struct csrow_info *csrow) 69 { 70 debugf4("\tcsrow = %p\n", csrow); 71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 72 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 73 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 75 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 76 debugf4("\tcsrow->channels = %p\n", csrow->channels); 77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 78 } 79 80 static void edac_mc_dump_mci(struct mem_ctl_info *mci) 81 { 82 debugf3("\tmci = %p\n", mci); 83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 86 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 88 mci->nr_csrows, mci->csrows); 89 debugf3("\tmci->nr_dimms = %d, dimms = %p\n", 90 mci->tot_dimms, mci->dimms); 91 debugf3("\tdev = %p\n", mci->dev); 92 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 93 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 94 } 95 96 #endif /* CONFIG_EDAC_DEBUG */ 97 98 /* 99 * keep those in sync with the enum mem_type 100 */ 101 const char *edac_mem_types[] = { 102 "Empty csrow", 103 "Reserved csrow type", 104 "Unknown csrow type", 105 "Fast page mode RAM", 106 "Extended data out RAM", 107 "Burst Extended data out RAM", 108 "Single data rate SDRAM", 109 "Registered single data rate SDRAM", 110 "Double data rate SDRAM", 111 "Registered Double data rate SDRAM", 112 "Rambus DRAM", 113 "Unbuffered DDR2 RAM", 114 "Fully buffered DDR2", 115 "Registered DDR2 RAM", 116 "Rambus XDR", 117 "Unbuffered DDR3 RAM", 118 "Registered DDR3 RAM", 119 }; 120 EXPORT_SYMBOL_GPL(edac_mem_types); 121 122 /** 123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation 124 * @p: pointer to a pointer with the memory offset to be used. At 125 * return, this will be incremented to point to the next offset 126 * @size: Size of the data structure to be reserved 127 * @n_elems: Number of elements that should be reserved 128 * 129 * If 'size' is a constant, the compiler will optimize this whole function 130 * down to either a no-op or the addition of a constant to the value of '*p'. 131 * 132 * The 'p' pointer is absolutely needed to keep the proper advancing 133 * further in memory to the proper offsets when allocating the struct along 134 * with its embedded structs, as edac_device_alloc_ctl_info() does it 135 * above, for example. 136 * 137 * At return, the pointer 'p' will be incremented to be used on a next call 138 * to this function. 139 */ 140 void *edac_align_ptr(void **p, unsigned size, int n_elems) 141 { 142 unsigned align, r; 143 void *ptr = *p; 144 145 *p += size * n_elems; 146 147 /* 148 * 'p' can possibly be an unaligned item X such that sizeof(X) is 149 * 'size'. Adjust 'p' so that its alignment is at least as 150 * stringent as what the compiler would provide for X and return 151 * the aligned result. 152 * Here we assume that the alignment of a "long long" is the most 153 * stringent alignment that the compiler will ever provide by default. 154 * As far as I know, this is a reasonable assumption. 155 */ 156 if (size > sizeof(long)) 157 align = sizeof(long long); 158 else if (size > sizeof(int)) 159 align = sizeof(long); 160 else if (size > sizeof(short)) 161 align = sizeof(int); 162 else if (size > sizeof(char)) 163 align = sizeof(short); 164 else 165 return (char *)ptr; 166 167 r = (unsigned long)p % align; 168 169 if (r == 0) 170 return (char *)ptr; 171 172 *p += align - r; 173 174 return (void *)(((unsigned long)ptr) + align - r); 175 } 176 177 /** 178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure 179 * @mc_num: Memory controller number 180 * @n_layers: Number of MC hierarchy layers 181 * layers: Describes each layer as seen by the Memory Controller 182 * @size_pvt: size of private storage needed 183 * 184 * 185 * Everything is kmalloc'ed as one big chunk - more efficient. 186 * Only can be used if all structures have the same lifetime - otherwise 187 * you have to allocate and initialize your own structures. 188 * 189 * Use edac_mc_free() to free mc structures allocated by this function. 190 * 191 * NOTE: drivers handle multi-rank memories in different ways: in some 192 * drivers, one multi-rank memory stick is mapped as one entry, while, in 193 * others, a single multi-rank memory stick would be mapped into several 194 * entries. Currently, this function will allocate multiple struct dimm_info 195 * on such scenarios, as grouping the multiple ranks require drivers change. 196 * 197 * Returns: 198 * On failure: NULL 199 * On success: struct mem_ctl_info pointer 200 */ 201 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, 202 unsigned n_layers, 203 struct edac_mc_layer *layers, 204 unsigned sz_pvt) 205 { 206 struct mem_ctl_info *mci; 207 struct edac_mc_layer *layer; 208 struct csrow_info *csi, *csr; 209 struct rank_info *chi, *chp, *chan; 210 struct dimm_info *dimm; 211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 212 unsigned pos[EDAC_MAX_LAYERS]; 213 unsigned size, tot_dimms = 1, count = 1; 214 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; 215 void *pvt, *p, *ptr = NULL; 216 int i, j, err, row, chn, n, len; 217 bool per_rank = false; 218 219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); 220 /* 221 * Calculate the total amount of dimms and csrows/cschannels while 222 * in the old API emulation mode 223 */ 224 for (i = 0; i < n_layers; i++) { 225 tot_dimms *= layers[i].size; 226 if (layers[i].is_virt_csrow) 227 tot_csrows *= layers[i].size; 228 else 229 tot_channels *= layers[i].size; 230 231 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) 232 per_rank = true; 233 } 234 235 /* Figure out the offsets of the various items from the start of an mc 236 * structure. We want the alignment of each item to be at least as 237 * stringent as what the compiler would provide if we could simply 238 * hardcode everything into a single struct. 239 */ 240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1); 241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); 242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows); 243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels); 244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms); 245 for (i = 0; i < n_layers; i++) { 246 count *= layers[i].size; 247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count); 248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 250 tot_errcount += 2 * count; 251 } 252 253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount); 254 pvt = edac_align_ptr(&ptr, sz_pvt, 1); 255 size = ((unsigned long)pvt) + sz_pvt; 256 257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", 258 __func__, size, 259 tot_dimms, 260 per_rank ? "ranks" : "dimms", 261 tot_csrows * tot_channels); 262 mci = kzalloc(size, GFP_KERNEL); 263 if (mci == NULL) 264 return NULL; 265 266 /* Adjust pointers so they point within the memory we just allocated 267 * rather than an imaginary chunk of memory located at address 0. 268 */ 269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); 270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi)); 272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm)); 273 for (i = 0; i < n_layers; i++) { 274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); 275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); 276 } 277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 278 279 /* setup index and various internal pointers */ 280 mci->mc_idx = mc_num; 281 mci->csrows = csi; 282 mci->dimms = dimm; 283 mci->tot_dimms = tot_dimms; 284 mci->pvt_info = pvt; 285 mci->n_layers = n_layers; 286 mci->layers = layer; 287 memcpy(mci->layers, layers, sizeof(*layer) * n_layers); 288 mci->nr_csrows = tot_csrows; 289 mci->num_cschannel = tot_channels; 290 mci->mem_is_per_rank = per_rank; 291 292 /* 293 * Fill the csrow struct 294 */ 295 for (row = 0; row < tot_csrows; row++) { 296 csr = &csi[row]; 297 csr->csrow_idx = row; 298 csr->mci = mci; 299 csr->nr_channels = tot_channels; 300 chp = &chi[row * tot_channels]; 301 csr->channels = chp; 302 303 for (chn = 0; chn < tot_channels; chn++) { 304 chan = &chp[chn]; 305 chan->chan_idx = chn; 306 chan->csrow = csr; 307 } 308 } 309 310 /* 311 * Fill the dimm struct 312 */ 313 memset(&pos, 0, sizeof(pos)); 314 row = 0; 315 chn = 0; 316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms, 317 per_rank ? "ranks" : "dimms"); 318 for (i = 0; i < tot_dimms; i++) { 319 chan = &csi[row].channels[chn]; 320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers, 321 pos[0], pos[1], pos[2]); 322 dimm->mci = mci; 323 324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__, 325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms), 326 pos[0], pos[1], pos[2], row, chn); 327 328 /* 329 * Copy DIMM location and initialize it. 330 */ 331 len = sizeof(dimm->label); 332 p = dimm->label; 333 n = snprintf(p, len, "mc#%u", mc_num); 334 p += n; 335 len -= n; 336 for (j = 0; j < n_layers; j++) { 337 n = snprintf(p, len, "%s#%u", 338 edac_layer_name[layers[j].type], 339 pos[j]); 340 p += n; 341 len -= n; 342 dimm->location[j] = pos[j]; 343 344 if (len <= 0) 345 break; 346 } 347 348 /* Link it to the csrows old API data */ 349 chan->dimm = dimm; 350 dimm->csrow = row; 351 dimm->cschannel = chn; 352 353 /* Increment csrow location */ 354 row++; 355 if (row == tot_csrows) { 356 row = 0; 357 chn++; 358 } 359 360 /* Increment dimm location */ 361 for (j = n_layers - 1; j >= 0; j--) { 362 pos[j]++; 363 if (pos[j] < layers[j].size) 364 break; 365 pos[j] = 0; 366 } 367 } 368 369 mci->op_state = OP_ALLOC; 370 INIT_LIST_HEAD(&mci->grp_kobj_list); 371 372 /* 373 * Initialize the 'root' kobj for the edac_mc controller 374 */ 375 err = edac_mc_register_sysfs_main_kobj(mci); 376 if (err) { 377 kfree(mci); 378 return NULL; 379 } 380 381 /* at this point, the root kobj is valid, and in order to 382 * 'free' the object, then the function: 383 * edac_mc_unregister_sysfs_main_kobj() must be called 384 * which will perform kobj unregistration and the actual free 385 * will occur during the kobject callback operation 386 */ 387 return mci; 388 } 389 EXPORT_SYMBOL_GPL(edac_mc_alloc); 390 391 /** 392 * edac_mc_free 393 * 'Free' a previously allocated 'mci' structure 394 * @mci: pointer to a struct mem_ctl_info structure 395 */ 396 void edac_mc_free(struct mem_ctl_info *mci) 397 { 398 debugf1("%s()\n", __func__); 399 400 edac_mc_unregister_sysfs_main_kobj(mci); 401 402 /* free the mci instance memory here */ 403 kfree(mci); 404 } 405 EXPORT_SYMBOL_GPL(edac_mc_free); 406 407 408 /** 409 * find_mci_by_dev 410 * 411 * scan list of controllers looking for the one that manages 412 * the 'dev' device 413 * @dev: pointer to a struct device related with the MCI 414 */ 415 struct mem_ctl_info *find_mci_by_dev(struct device *dev) 416 { 417 struct mem_ctl_info *mci; 418 struct list_head *item; 419 420 debugf3("%s()\n", __func__); 421 422 list_for_each(item, &mc_devices) { 423 mci = list_entry(item, struct mem_ctl_info, link); 424 425 if (mci->dev == dev) 426 return mci; 427 } 428 429 return NULL; 430 } 431 EXPORT_SYMBOL_GPL(find_mci_by_dev); 432 433 /* 434 * handler for EDAC to check if NMI type handler has asserted interrupt 435 */ 436 static int edac_mc_assert_error_check_and_clear(void) 437 { 438 int old_state; 439 440 if (edac_op_state == EDAC_OPSTATE_POLL) 441 return 1; 442 443 old_state = edac_err_assert; 444 edac_err_assert = 0; 445 446 return old_state; 447 } 448 449 /* 450 * edac_mc_workq_function 451 * performs the operation scheduled by a workq request 452 */ 453 static void edac_mc_workq_function(struct work_struct *work_req) 454 { 455 struct delayed_work *d_work = to_delayed_work(work_req); 456 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 457 458 mutex_lock(&mem_ctls_mutex); 459 460 /* if this control struct has movd to offline state, we are done */ 461 if (mci->op_state == OP_OFFLINE) { 462 mutex_unlock(&mem_ctls_mutex); 463 return; 464 } 465 466 /* Only poll controllers that are running polled and have a check */ 467 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 468 mci->edac_check(mci); 469 470 mutex_unlock(&mem_ctls_mutex); 471 472 /* Reschedule */ 473 queue_delayed_work(edac_workqueue, &mci->work, 474 msecs_to_jiffies(edac_mc_get_poll_msec())); 475 } 476 477 /* 478 * edac_mc_workq_setup 479 * initialize a workq item for this mci 480 * passing in the new delay period in msec 481 * 482 * locking model: 483 * 484 * called with the mem_ctls_mutex held 485 */ 486 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 487 { 488 debugf0("%s()\n", __func__); 489 490 /* if this instance is not in the POLL state, then simply return */ 491 if (mci->op_state != OP_RUNNING_POLL) 492 return; 493 494 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 495 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 496 } 497 498 /* 499 * edac_mc_workq_teardown 500 * stop the workq processing on this mci 501 * 502 * locking model: 503 * 504 * called WITHOUT lock held 505 */ 506 static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 507 { 508 int status; 509 510 if (mci->op_state != OP_RUNNING_POLL) 511 return; 512 513 status = cancel_delayed_work(&mci->work); 514 if (status == 0) { 515 debugf0("%s() not canceled, flush the queue\n", 516 __func__); 517 518 /* workq instance might be running, wait for it */ 519 flush_workqueue(edac_workqueue); 520 } 521 } 522 523 /* 524 * edac_mc_reset_delay_period(unsigned long value) 525 * 526 * user space has updated our poll period value, need to 527 * reset our workq delays 528 */ 529 void edac_mc_reset_delay_period(int value) 530 { 531 struct mem_ctl_info *mci; 532 struct list_head *item; 533 534 mutex_lock(&mem_ctls_mutex); 535 536 /* scan the list and turn off all workq timers, doing so under lock 537 */ 538 list_for_each(item, &mc_devices) { 539 mci = list_entry(item, struct mem_ctl_info, link); 540 541 if (mci->op_state == OP_RUNNING_POLL) 542 cancel_delayed_work(&mci->work); 543 } 544 545 mutex_unlock(&mem_ctls_mutex); 546 547 548 /* re-walk the list, and reset the poll delay */ 549 mutex_lock(&mem_ctls_mutex); 550 551 list_for_each(item, &mc_devices) { 552 mci = list_entry(item, struct mem_ctl_info, link); 553 554 edac_mc_workq_setup(mci, (unsigned long) value); 555 } 556 557 mutex_unlock(&mem_ctls_mutex); 558 } 559 560 561 562 /* Return 0 on success, 1 on failure. 563 * Before calling this function, caller must 564 * assign a unique value to mci->mc_idx. 565 * 566 * locking model: 567 * 568 * called with the mem_ctls_mutex lock held 569 */ 570 static int add_mc_to_global_list(struct mem_ctl_info *mci) 571 { 572 struct list_head *item, *insert_before; 573 struct mem_ctl_info *p; 574 575 insert_before = &mc_devices; 576 577 p = find_mci_by_dev(mci->dev); 578 if (unlikely(p != NULL)) 579 goto fail0; 580 581 list_for_each(item, &mc_devices) { 582 p = list_entry(item, struct mem_ctl_info, link); 583 584 if (p->mc_idx >= mci->mc_idx) { 585 if (unlikely(p->mc_idx == mci->mc_idx)) 586 goto fail1; 587 588 insert_before = item; 589 break; 590 } 591 } 592 593 list_add_tail_rcu(&mci->link, insert_before); 594 atomic_inc(&edac_handlers); 595 return 0; 596 597 fail0: 598 edac_printk(KERN_WARNING, EDAC_MC, 599 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 600 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 601 return 1; 602 603 fail1: 604 edac_printk(KERN_WARNING, EDAC_MC, 605 "bug in low-level driver: attempt to assign\n" 606 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 607 return 1; 608 } 609 610 static void del_mc_from_global_list(struct mem_ctl_info *mci) 611 { 612 atomic_dec(&edac_handlers); 613 list_del_rcu(&mci->link); 614 615 /* these are for safe removal of devices from global list while 616 * NMI handlers may be traversing list 617 */ 618 synchronize_rcu(); 619 INIT_LIST_HEAD(&mci->link); 620 } 621 622 /** 623 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 624 * 625 * If found, return a pointer to the structure. 626 * Else return NULL. 627 * 628 * Caller must hold mem_ctls_mutex. 629 */ 630 struct mem_ctl_info *edac_mc_find(int idx) 631 { 632 struct list_head *item; 633 struct mem_ctl_info *mci; 634 635 list_for_each(item, &mc_devices) { 636 mci = list_entry(item, struct mem_ctl_info, link); 637 638 if (mci->mc_idx >= idx) { 639 if (mci->mc_idx == idx) 640 return mci; 641 642 break; 643 } 644 } 645 646 return NULL; 647 } 648 EXPORT_SYMBOL(edac_mc_find); 649 650 /** 651 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 652 * create sysfs entries associated with mci structure 653 * @mci: pointer to the mci structure to be added to the list 654 * 655 * Return: 656 * 0 Success 657 * !0 Failure 658 */ 659 660 /* FIXME - should a warning be printed if no error detection? correction? */ 661 int edac_mc_add_mc(struct mem_ctl_info *mci) 662 { 663 debugf0("%s()\n", __func__); 664 665 #ifdef CONFIG_EDAC_DEBUG 666 if (edac_debug_level >= 3) 667 edac_mc_dump_mci(mci); 668 669 if (edac_debug_level >= 4) { 670 int i; 671 672 for (i = 0; i < mci->nr_csrows; i++) { 673 int j; 674 675 edac_mc_dump_csrow(&mci->csrows[i]); 676 for (j = 0; j < mci->csrows[i].nr_channels; j++) 677 edac_mc_dump_channel(&mci->csrows[i]. 678 channels[j]); 679 } 680 for (i = 0; i < mci->tot_dimms; i++) 681 edac_mc_dump_dimm(&mci->dimms[i]); 682 } 683 #endif 684 mutex_lock(&mem_ctls_mutex); 685 686 if (add_mc_to_global_list(mci)) 687 goto fail0; 688 689 /* set load time so that error rate can be tracked */ 690 mci->start_time = jiffies; 691 692 if (edac_create_sysfs_mci_device(mci)) { 693 edac_mc_printk(mci, KERN_WARNING, 694 "failed to create sysfs device\n"); 695 goto fail1; 696 } 697 698 /* If there IS a check routine, then we are running POLLED */ 699 if (mci->edac_check != NULL) { 700 /* This instance is NOW RUNNING */ 701 mci->op_state = OP_RUNNING_POLL; 702 703 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 704 } else { 705 mci->op_state = OP_RUNNING_INTERRUPT; 706 } 707 708 /* Report action taken */ 709 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 710 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 711 712 mutex_unlock(&mem_ctls_mutex); 713 return 0; 714 715 fail1: 716 del_mc_from_global_list(mci); 717 718 fail0: 719 mutex_unlock(&mem_ctls_mutex); 720 return 1; 721 } 722 EXPORT_SYMBOL_GPL(edac_mc_add_mc); 723 724 /** 725 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 726 * remove mci structure from global list 727 * @pdev: Pointer to 'struct device' representing mci structure to remove. 728 * 729 * Return pointer to removed mci structure, or NULL if device not found. 730 */ 731 struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 732 { 733 struct mem_ctl_info *mci; 734 735 debugf0("%s()\n", __func__); 736 737 mutex_lock(&mem_ctls_mutex); 738 739 /* find the requested mci struct in the global list */ 740 mci = find_mci_by_dev(dev); 741 if (mci == NULL) { 742 mutex_unlock(&mem_ctls_mutex); 743 return NULL; 744 } 745 746 del_mc_from_global_list(mci); 747 mutex_unlock(&mem_ctls_mutex); 748 749 /* flush workq processes */ 750 edac_mc_workq_teardown(mci); 751 752 /* marking MCI offline */ 753 mci->op_state = OP_OFFLINE; 754 755 /* remove from sysfs */ 756 edac_remove_sysfs_mci_device(mci); 757 758 edac_printk(KERN_INFO, EDAC_MC, 759 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 760 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 761 762 return mci; 763 } 764 EXPORT_SYMBOL_GPL(edac_mc_del_mc); 765 766 static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 767 u32 size) 768 { 769 struct page *pg; 770 void *virt_addr; 771 unsigned long flags = 0; 772 773 debugf3("%s()\n", __func__); 774 775 /* ECC error page was not in our memory. Ignore it. */ 776 if (!pfn_valid(page)) 777 return; 778 779 /* Find the actual page structure then map it and fix */ 780 pg = pfn_to_page(page); 781 782 if (PageHighMem(pg)) 783 local_irq_save(flags); 784 785 virt_addr = kmap_atomic(pg); 786 787 /* Perform architecture specific atomic scrub operation */ 788 atomic_scrub(virt_addr + offset, size); 789 790 /* Unmap and complete */ 791 kunmap_atomic(virt_addr); 792 793 if (PageHighMem(pg)) 794 local_irq_restore(flags); 795 } 796 797 /* FIXME - should return -1 */ 798 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 799 { 800 struct csrow_info *csrows = mci->csrows; 801 int row, i, j, n; 802 803 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 804 row = -1; 805 806 for (i = 0; i < mci->nr_csrows; i++) { 807 struct csrow_info *csrow = &csrows[i]; 808 n = 0; 809 for (j = 0; j < csrow->nr_channels; j++) { 810 struct dimm_info *dimm = csrow->channels[j].dimm; 811 n += dimm->nr_pages; 812 } 813 if (n == 0) 814 continue; 815 816 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 817 "mask(0x%lx)\n", mci->mc_idx, __func__, 818 csrow->first_page, page, csrow->last_page, 819 csrow->page_mask); 820 821 if ((page >= csrow->first_page) && 822 (page <= csrow->last_page) && 823 ((page & csrow->page_mask) == 824 (csrow->first_page & csrow->page_mask))) { 825 row = i; 826 break; 827 } 828 } 829 830 if (row == -1) 831 edac_mc_printk(mci, KERN_ERR, 832 "could not look up page error address %lx\n", 833 (unsigned long)page); 834 835 return row; 836 } 837 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 838 839 const char *edac_layer_name[] = { 840 [EDAC_MC_LAYER_BRANCH] = "branch", 841 [EDAC_MC_LAYER_CHANNEL] = "channel", 842 [EDAC_MC_LAYER_SLOT] = "slot", 843 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", 844 }; 845 EXPORT_SYMBOL_GPL(edac_layer_name); 846 847 static void edac_inc_ce_error(struct mem_ctl_info *mci, 848 bool enable_per_layer_report, 849 const int pos[EDAC_MAX_LAYERS]) 850 { 851 int i, index = 0; 852 853 mci->ce_mc++; 854 855 if (!enable_per_layer_report) { 856 mci->ce_noinfo_count++; 857 return; 858 } 859 860 for (i = 0; i < mci->n_layers; i++) { 861 if (pos[i] < 0) 862 break; 863 index += pos[i]; 864 mci->ce_per_layer[i][index]++; 865 866 if (i < mci->n_layers - 1) 867 index *= mci->layers[i + 1].size; 868 } 869 } 870 871 static void edac_inc_ue_error(struct mem_ctl_info *mci, 872 bool enable_per_layer_report, 873 const int pos[EDAC_MAX_LAYERS]) 874 { 875 int i, index = 0; 876 877 mci->ue_mc++; 878 879 if (!enable_per_layer_report) { 880 mci->ce_noinfo_count++; 881 return; 882 } 883 884 for (i = 0; i < mci->n_layers; i++) { 885 if (pos[i] < 0) 886 break; 887 index += pos[i]; 888 mci->ue_per_layer[i][index]++; 889 890 if (i < mci->n_layers - 1) 891 index *= mci->layers[i + 1].size; 892 } 893 } 894 895 static void edac_ce_error(struct mem_ctl_info *mci, 896 const int pos[EDAC_MAX_LAYERS], 897 const char *msg, 898 const char *location, 899 const char *label, 900 const char *detail, 901 const char *other_detail, 902 const bool enable_per_layer_report, 903 const unsigned long page_frame_number, 904 const unsigned long offset_in_page, 905 u32 grain) 906 { 907 unsigned long remapped_page; 908 909 if (edac_mc_get_log_ce()) { 910 if (other_detail && *other_detail) 911 edac_mc_printk(mci, KERN_WARNING, 912 "CE %s on %s (%s%s - %s)\n", 913 msg, label, location, 914 detail, other_detail); 915 else 916 edac_mc_printk(mci, KERN_WARNING, 917 "CE %s on %s (%s%s)\n", 918 msg, label, location, 919 detail); 920 } 921 edac_inc_ce_error(mci, enable_per_layer_report, pos); 922 923 if (mci->scrub_mode & SCRUB_SW_SRC) { 924 /* 925 * Some memory controllers (called MCs below) can remap 926 * memory so that it is still available at a different 927 * address when PCI devices map into memory. 928 * MC's that can't do this, lose the memory where PCI 929 * devices are mapped. This mapping is MC-dependent 930 * and so we call back into the MC driver for it to 931 * map the MC page to a physical (CPU) page which can 932 * then be mapped to a virtual page - which can then 933 * be scrubbed. 934 */ 935 remapped_page = mci->ctl_page_to_phys ? 936 mci->ctl_page_to_phys(mci, page_frame_number) : 937 page_frame_number; 938 939 edac_mc_scrub_block(remapped_page, 940 offset_in_page, grain); 941 } 942 } 943 944 static void edac_ue_error(struct mem_ctl_info *mci, 945 const int pos[EDAC_MAX_LAYERS], 946 const char *msg, 947 const char *location, 948 const char *label, 949 const char *detail, 950 const char *other_detail, 951 const bool enable_per_layer_report) 952 { 953 if (edac_mc_get_log_ue()) { 954 if (other_detail && *other_detail) 955 edac_mc_printk(mci, KERN_WARNING, 956 "UE %s on %s (%s%s - %s)\n", 957 msg, label, location, detail, 958 other_detail); 959 else 960 edac_mc_printk(mci, KERN_WARNING, 961 "UE %s on %s (%s%s)\n", 962 msg, label, location, detail); 963 } 964 965 if (edac_mc_get_panic_on_ue()) { 966 if (other_detail && *other_detail) 967 panic("UE %s on %s (%s%s - %s)\n", 968 msg, label, location, detail, other_detail); 969 else 970 panic("UE %s on %s (%s%s)\n", 971 msg, label, location, detail); 972 } 973 974 edac_inc_ue_error(mci, enable_per_layer_report, pos); 975 } 976 977 #define OTHER_LABEL " or " 978 void edac_mc_handle_error(const enum hw_event_mc_err_type type, 979 struct mem_ctl_info *mci, 980 const unsigned long page_frame_number, 981 const unsigned long offset_in_page, 982 const unsigned long syndrome, 983 const int layer0, 984 const int layer1, 985 const int layer2, 986 const char *msg, 987 const char *other_detail, 988 const void *mcelog) 989 { 990 /* FIXME: too much for stack: move it to some pre-alocated area */ 991 char detail[80], location[80]; 992 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; 993 char *p; 994 int row = -1, chan = -1; 995 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 }; 996 int i; 997 u32 grain; 998 bool enable_per_layer_report = false; 999 1000 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 1001 1002 /* 1003 * Check if the event report is consistent and if the memory 1004 * location is known. If it is known, enable_per_layer_report will be 1005 * true, the DIMM(s) label info will be filled and the per-layer 1006 * error counters will be incremented. 1007 */ 1008 for (i = 0; i < mci->n_layers; i++) { 1009 if (pos[i] >= (int)mci->layers[i].size) { 1010 if (type == HW_EVENT_ERR_CORRECTED) 1011 p = "CE"; 1012 else 1013 p = "UE"; 1014 1015 edac_mc_printk(mci, KERN_ERR, 1016 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", 1017 edac_layer_name[mci->layers[i].type], 1018 pos[i], mci->layers[i].size); 1019 /* 1020 * Instead of just returning it, let's use what's 1021 * known about the error. The increment routines and 1022 * the DIMM filter logic will do the right thing by 1023 * pointing the likely damaged DIMMs. 1024 */ 1025 pos[i] = -1; 1026 } 1027 if (pos[i] >= 0) 1028 enable_per_layer_report = true; 1029 } 1030 1031 /* 1032 * Get the dimm label/grain that applies to the match criteria. 1033 * As the error algorithm may not be able to point to just one memory 1034 * stick, the logic here will get all possible labels that could 1035 * pottentially be affected by the error. 1036 * On FB-DIMM memory controllers, for uncorrected errors, it is common 1037 * to have only the MC channel and the MC dimm (also called "branch") 1038 * but the channel is not known, as the memory is arranged in pairs, 1039 * where each memory belongs to a separate channel within the same 1040 * branch. 1041 */ 1042 grain = 0; 1043 p = label; 1044 *p = '\0'; 1045 for (i = 0; i < mci->tot_dimms; i++) { 1046 struct dimm_info *dimm = &mci->dimms[i]; 1047 1048 if (layer0 >= 0 && layer0 != dimm->location[0]) 1049 continue; 1050 if (layer1 >= 0 && layer1 != dimm->location[1]) 1051 continue; 1052 if (layer2 >= 0 && layer2 != dimm->location[2]) 1053 continue; 1054 1055 /* get the max grain, over the error match range */ 1056 if (dimm->grain > grain) 1057 grain = dimm->grain; 1058 1059 /* 1060 * If the error is memory-controller wide, there's no need to 1061 * seek for the affected DIMMs because the whole 1062 * channel/memory controller/... may be affected. 1063 * Also, don't show errors for empty DIMM slots. 1064 */ 1065 if (enable_per_layer_report && dimm->nr_pages) { 1066 if (p != label) { 1067 strcpy(p, OTHER_LABEL); 1068 p += strlen(OTHER_LABEL); 1069 } 1070 strcpy(p, dimm->label); 1071 p += strlen(p); 1072 *p = '\0'; 1073 1074 /* 1075 * get csrow/channel of the DIMM, in order to allow 1076 * incrementing the compat API counters 1077 */ 1078 debugf4("%s: %s csrows map: (%d,%d)\n", 1079 __func__, 1080 mci->mem_is_per_rank ? "rank" : "dimm", 1081 dimm->csrow, dimm->cschannel); 1082 1083 if (row == -1) 1084 row = dimm->csrow; 1085 else if (row >= 0 && row != dimm->csrow) 1086 row = -2; 1087 1088 if (chan == -1) 1089 chan = dimm->cschannel; 1090 else if (chan >= 0 && chan != dimm->cschannel) 1091 chan = -2; 1092 } 1093 } 1094 1095 if (!enable_per_layer_report) { 1096 strcpy(label, "any memory"); 1097 } else { 1098 debugf4("%s: csrow/channel to increment: (%d,%d)\n", 1099 __func__, row, chan); 1100 if (p == label) 1101 strcpy(label, "unknown memory"); 1102 if (type == HW_EVENT_ERR_CORRECTED) { 1103 if (row >= 0) { 1104 mci->csrows[row].ce_count++; 1105 if (chan >= 0) 1106 mci->csrows[row].channels[chan].ce_count++; 1107 } 1108 } else 1109 if (row >= 0) 1110 mci->csrows[row].ue_count++; 1111 } 1112 1113 /* Fill the RAM location data */ 1114 p = location; 1115 for (i = 0; i < mci->n_layers; i++) { 1116 if (pos[i] < 0) 1117 continue; 1118 1119 p += sprintf(p, "%s:%d ", 1120 edac_layer_name[mci->layers[i].type], 1121 pos[i]); 1122 } 1123 1124 /* Memory type dependent details about the error */ 1125 if (type == HW_EVENT_ERR_CORRECTED) { 1126 snprintf(detail, sizeof(detail), 1127 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx", 1128 page_frame_number, offset_in_page, 1129 grain, syndrome); 1130 edac_ce_error(mci, pos, msg, location, label, detail, 1131 other_detail, enable_per_layer_report, 1132 page_frame_number, offset_in_page, grain); 1133 } else { 1134 snprintf(detail, sizeof(detail), 1135 "page:0x%lx offset:0x%lx grain:%d", 1136 page_frame_number, offset_in_page, grain); 1137 1138 edac_ue_error(mci, pos, msg, location, label, detail, 1139 other_detail, enable_per_layer_report); 1140 } 1141 } 1142 EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1143