1 /* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15 #include <linux/module.h> 16 #include <linux/proc_fs.h> 17 #include <linux/kernel.h> 18 #include <linux/types.h> 19 #include <linux/smp.h> 20 #include <linux/init.h> 21 #include <linux/sysctl.h> 22 #include <linux/highmem.h> 23 #include <linux/timer.h> 24 #include <linux/slab.h> 25 #include <linux/jiffies.h> 26 #include <linux/spinlock.h> 27 #include <linux/list.h> 28 #include <linux/sysdev.h> 29 #include <linux/ctype.h> 30 #include <linux/edac.h> 31 #include <asm/uaccess.h> 32 #include <asm/page.h> 33 #include <asm/edac.h> 34 #include "edac_core.h" 35 #include "edac_module.h" 36 37 /* lock to memory controller's control array */ 38 static DEFINE_MUTEX(mem_ctls_mutex); 39 static LIST_HEAD(mc_devices); 40 41 #ifdef CONFIG_EDAC_DEBUG 42 43 static void edac_mc_dump_channel(struct channel_info *chan) 44 { 45 debugf4("\tchannel = %p\n", chan); 46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count); 48 debugf4("\tchannel->label = '%s'\n", chan->label); 49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 50 } 51 52 static void edac_mc_dump_csrow(struct csrow_info *csrow) 53 { 54 debugf4("\tcsrow = %p\n", csrow); 55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); 60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 61 debugf4("\tcsrow->channels = %p\n", csrow->channels); 62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 63 } 64 65 static void edac_mc_dump_mci(struct mem_ctl_info *mci) 66 { 67 debugf3("\tmci = %p\n", mci); 68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 71 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 73 mci->nr_csrows, mci->csrows); 74 debugf3("\tdev = %p\n", mci->dev); 75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 77 } 78 79 #endif /* CONFIG_EDAC_DEBUG */ 80 81 /* 82 * keep those in sync with the enum mem_type 83 */ 84 const char *edac_mem_types[] = { 85 "Empty csrow", 86 "Reserved csrow type", 87 "Unknown csrow type", 88 "Fast page mode RAM", 89 "Extended data out RAM", 90 "Burst Extended data out RAM", 91 "Single data rate SDRAM", 92 "Registered single data rate SDRAM", 93 "Double data rate SDRAM", 94 "Registered Double data rate SDRAM", 95 "Rambus DRAM", 96 "Unbuffered DDR2 RAM", 97 "Fully buffered DDR2", 98 "Registered DDR2 RAM", 99 "Rambus XDR", 100 "Unbuffered DDR3 RAM", 101 "Registered DDR3 RAM", 102 }; 103 EXPORT_SYMBOL_GPL(edac_mem_types); 104 105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. 106 * Adjust 'ptr' so that its alignment is at least as stringent as what the 107 * compiler would provide for X and return the aligned result. 108 * 109 * If 'size' is a constant, the compiler will optimize this whole function 110 * down to either a no-op or the addition of a constant to the value of 'ptr'. 111 */ 112 void *edac_align_ptr(void *ptr, unsigned size) 113 { 114 unsigned align, r; 115 116 /* Here we assume that the alignment of a "long long" is the most 117 * stringent alignment that the compiler will ever provide by default. 118 * As far as I know, this is a reasonable assumption. 119 */ 120 if (size > sizeof(long)) 121 align = sizeof(long long); 122 else if (size > sizeof(int)) 123 align = sizeof(long); 124 else if (size > sizeof(short)) 125 align = sizeof(int); 126 else if (size > sizeof(char)) 127 align = sizeof(short); 128 else 129 return (char *)ptr; 130 131 r = size % align; 132 133 if (r == 0) 134 return (char *)ptr; 135 136 return (void *)(((unsigned long)ptr) + align - r); 137 } 138 139 /** 140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure 141 * @size_pvt: size of private storage needed 142 * @nr_csrows: Number of CWROWS needed for this MC 143 * @nr_chans: Number of channels for the MC 144 * 145 * Everything is kmalloc'ed as one big chunk - more efficient. 146 * Only can be used if all structures have the same lifetime - otherwise 147 * you have to allocate and initialize your own structures. 148 * 149 * Use edac_mc_free() to free mc structures allocated by this function. 150 * 151 * Returns: 152 * NULL allocation failed 153 * struct mem_ctl_info pointer 154 */ 155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 156 unsigned nr_chans, int edac_index) 157 { 158 struct mem_ctl_info *mci; 159 struct csrow_info *csi, *csrow; 160 struct channel_info *chi, *chp, *chan; 161 void *pvt; 162 unsigned size; 163 int row, chn; 164 int err; 165 166 /* Figure out the offsets of the various items from the start of an mc 167 * structure. We want the alignment of each item to be at least as 168 * stringent as what the compiler would provide if we could simply 169 * hardcode everything into a single struct. 170 */ 171 mci = (struct mem_ctl_info *)0; 172 csi = edac_align_ptr(&mci[1], sizeof(*csi)); 173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); 174 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); 175 size = ((unsigned long)pvt) + sz_pvt; 176 177 mci = kzalloc(size, GFP_KERNEL); 178 if (mci == NULL) 179 return NULL; 180 181 /* Adjust pointers so they point within the memory we just allocated 182 * rather than an imaginary chunk of memory located at address 0. 183 */ 184 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 185 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); 186 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 187 188 /* setup index and various internal pointers */ 189 mci->mc_idx = edac_index; 190 mci->csrows = csi; 191 mci->pvt_info = pvt; 192 mci->nr_csrows = nr_csrows; 193 194 for (row = 0; row < nr_csrows; row++) { 195 csrow = &csi[row]; 196 csrow->csrow_idx = row; 197 csrow->mci = mci; 198 csrow->nr_channels = nr_chans; 199 chp = &chi[row * nr_chans]; 200 csrow->channels = chp; 201 202 for (chn = 0; chn < nr_chans; chn++) { 203 chan = &chp[chn]; 204 chan->chan_idx = chn; 205 chan->csrow = csrow; 206 } 207 } 208 209 mci->op_state = OP_ALLOC; 210 INIT_LIST_HEAD(&mci->grp_kobj_list); 211 212 /* 213 * Initialize the 'root' kobj for the edac_mc controller 214 */ 215 err = edac_mc_register_sysfs_main_kobj(mci); 216 if (err) { 217 kfree(mci); 218 return NULL; 219 } 220 221 /* at this point, the root kobj is valid, and in order to 222 * 'free' the object, then the function: 223 * edac_mc_unregister_sysfs_main_kobj() must be called 224 * which will perform kobj unregistration and the actual free 225 * will occur during the kobject callback operation 226 */ 227 return mci; 228 } 229 EXPORT_SYMBOL_GPL(edac_mc_alloc); 230 231 /** 232 * edac_mc_free 233 * 'Free' a previously allocated 'mci' structure 234 * @mci: pointer to a struct mem_ctl_info structure 235 */ 236 void edac_mc_free(struct mem_ctl_info *mci) 237 { 238 debugf1("%s()\n", __func__); 239 240 edac_mc_unregister_sysfs_main_kobj(mci); 241 242 /* free the mci instance memory here */ 243 kfree(mci); 244 } 245 EXPORT_SYMBOL_GPL(edac_mc_free); 246 247 248 /** 249 * find_mci_by_dev 250 * 251 * scan list of controllers looking for the one that manages 252 * the 'dev' device 253 * @dev: pointer to a struct device related with the MCI 254 */ 255 struct mem_ctl_info *find_mci_by_dev(struct device *dev) 256 { 257 struct mem_ctl_info *mci; 258 struct list_head *item; 259 260 debugf3("%s()\n", __func__); 261 262 list_for_each(item, &mc_devices) { 263 mci = list_entry(item, struct mem_ctl_info, link); 264 265 if (mci->dev == dev) 266 return mci; 267 } 268 269 return NULL; 270 } 271 EXPORT_SYMBOL_GPL(find_mci_by_dev); 272 273 /* 274 * handler for EDAC to check if NMI type handler has asserted interrupt 275 */ 276 static int edac_mc_assert_error_check_and_clear(void) 277 { 278 int old_state; 279 280 if (edac_op_state == EDAC_OPSTATE_POLL) 281 return 1; 282 283 old_state = edac_err_assert; 284 edac_err_assert = 0; 285 286 return old_state; 287 } 288 289 /* 290 * edac_mc_workq_function 291 * performs the operation scheduled by a workq request 292 */ 293 static void edac_mc_workq_function(struct work_struct *work_req) 294 { 295 struct delayed_work *d_work = to_delayed_work(work_req); 296 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 297 298 mutex_lock(&mem_ctls_mutex); 299 300 /* if this control struct has movd to offline state, we are done */ 301 if (mci->op_state == OP_OFFLINE) { 302 mutex_unlock(&mem_ctls_mutex); 303 return; 304 } 305 306 /* Only poll controllers that are running polled and have a check */ 307 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 308 mci->edac_check(mci); 309 310 mutex_unlock(&mem_ctls_mutex); 311 312 /* Reschedule */ 313 queue_delayed_work(edac_workqueue, &mci->work, 314 msecs_to_jiffies(edac_mc_get_poll_msec())); 315 } 316 317 /* 318 * edac_mc_workq_setup 319 * initialize a workq item for this mci 320 * passing in the new delay period in msec 321 * 322 * locking model: 323 * 324 * called with the mem_ctls_mutex held 325 */ 326 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 327 { 328 debugf0("%s()\n", __func__); 329 330 /* if this instance is not in the POLL state, then simply return */ 331 if (mci->op_state != OP_RUNNING_POLL) 332 return; 333 334 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 335 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 336 } 337 338 /* 339 * edac_mc_workq_teardown 340 * stop the workq processing on this mci 341 * 342 * locking model: 343 * 344 * called WITHOUT lock held 345 */ 346 static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 347 { 348 int status; 349 350 if (mci->op_state != OP_RUNNING_POLL) 351 return; 352 353 status = cancel_delayed_work(&mci->work); 354 if (status == 0) { 355 debugf0("%s() not canceled, flush the queue\n", 356 __func__); 357 358 /* workq instance might be running, wait for it */ 359 flush_workqueue(edac_workqueue); 360 } 361 } 362 363 /* 364 * edac_mc_reset_delay_period(unsigned long value) 365 * 366 * user space has updated our poll period value, need to 367 * reset our workq delays 368 */ 369 void edac_mc_reset_delay_period(int value) 370 { 371 struct mem_ctl_info *mci; 372 struct list_head *item; 373 374 mutex_lock(&mem_ctls_mutex); 375 376 /* scan the list and turn off all workq timers, doing so under lock 377 */ 378 list_for_each(item, &mc_devices) { 379 mci = list_entry(item, struct mem_ctl_info, link); 380 381 if (mci->op_state == OP_RUNNING_POLL) 382 cancel_delayed_work(&mci->work); 383 } 384 385 mutex_unlock(&mem_ctls_mutex); 386 387 388 /* re-walk the list, and reset the poll delay */ 389 mutex_lock(&mem_ctls_mutex); 390 391 list_for_each(item, &mc_devices) { 392 mci = list_entry(item, struct mem_ctl_info, link); 393 394 edac_mc_workq_setup(mci, (unsigned long) value); 395 } 396 397 mutex_unlock(&mem_ctls_mutex); 398 } 399 400 401 402 /* Return 0 on success, 1 on failure. 403 * Before calling this function, caller must 404 * assign a unique value to mci->mc_idx. 405 * 406 * locking model: 407 * 408 * called with the mem_ctls_mutex lock held 409 */ 410 static int add_mc_to_global_list(struct mem_ctl_info *mci) 411 { 412 struct list_head *item, *insert_before; 413 struct mem_ctl_info *p; 414 415 insert_before = &mc_devices; 416 417 p = find_mci_by_dev(mci->dev); 418 if (unlikely(p != NULL)) 419 goto fail0; 420 421 list_for_each(item, &mc_devices) { 422 p = list_entry(item, struct mem_ctl_info, link); 423 424 if (p->mc_idx >= mci->mc_idx) { 425 if (unlikely(p->mc_idx == mci->mc_idx)) 426 goto fail1; 427 428 insert_before = item; 429 break; 430 } 431 } 432 433 list_add_tail_rcu(&mci->link, insert_before); 434 atomic_inc(&edac_handlers); 435 return 0; 436 437 fail0: 438 edac_printk(KERN_WARNING, EDAC_MC, 439 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 440 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 441 return 1; 442 443 fail1: 444 edac_printk(KERN_WARNING, EDAC_MC, 445 "bug in low-level driver: attempt to assign\n" 446 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 447 return 1; 448 } 449 450 static void del_mc_from_global_list(struct mem_ctl_info *mci) 451 { 452 atomic_dec(&edac_handlers); 453 list_del_rcu(&mci->link); 454 455 /* these are for safe removal of devices from global list while 456 * NMI handlers may be traversing list 457 */ 458 synchronize_rcu(); 459 INIT_LIST_HEAD(&mci->link); 460 } 461 462 /** 463 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 464 * 465 * If found, return a pointer to the structure. 466 * Else return NULL. 467 * 468 * Caller must hold mem_ctls_mutex. 469 */ 470 struct mem_ctl_info *edac_mc_find(int idx) 471 { 472 struct list_head *item; 473 struct mem_ctl_info *mci; 474 475 list_for_each(item, &mc_devices) { 476 mci = list_entry(item, struct mem_ctl_info, link); 477 478 if (mci->mc_idx >= idx) { 479 if (mci->mc_idx == idx) 480 return mci; 481 482 break; 483 } 484 } 485 486 return NULL; 487 } 488 EXPORT_SYMBOL(edac_mc_find); 489 490 /** 491 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 492 * create sysfs entries associated with mci structure 493 * @mci: pointer to the mci structure to be added to the list 494 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. 495 * 496 * Return: 497 * 0 Success 498 * !0 Failure 499 */ 500 501 /* FIXME - should a warning be printed if no error detection? correction? */ 502 int edac_mc_add_mc(struct mem_ctl_info *mci) 503 { 504 debugf0("%s()\n", __func__); 505 506 #ifdef CONFIG_EDAC_DEBUG 507 if (edac_debug_level >= 3) 508 edac_mc_dump_mci(mci); 509 510 if (edac_debug_level >= 4) { 511 int i; 512 513 for (i = 0; i < mci->nr_csrows; i++) { 514 int j; 515 516 edac_mc_dump_csrow(&mci->csrows[i]); 517 for (j = 0; j < mci->csrows[i].nr_channels; j++) 518 edac_mc_dump_channel(&mci->csrows[i]. 519 channels[j]); 520 } 521 } 522 #endif 523 mutex_lock(&mem_ctls_mutex); 524 525 if (add_mc_to_global_list(mci)) 526 goto fail0; 527 528 /* set load time so that error rate can be tracked */ 529 mci->start_time = jiffies; 530 531 if (edac_create_sysfs_mci_device(mci)) { 532 edac_mc_printk(mci, KERN_WARNING, 533 "failed to create sysfs device\n"); 534 goto fail1; 535 } 536 537 /* If there IS a check routine, then we are running POLLED */ 538 if (mci->edac_check != NULL) { 539 /* This instance is NOW RUNNING */ 540 mci->op_state = OP_RUNNING_POLL; 541 542 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 543 } else { 544 mci->op_state = OP_RUNNING_INTERRUPT; 545 } 546 547 /* Report action taken */ 548 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 549 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 550 551 mutex_unlock(&mem_ctls_mutex); 552 return 0; 553 554 fail1: 555 del_mc_from_global_list(mci); 556 557 fail0: 558 mutex_unlock(&mem_ctls_mutex); 559 return 1; 560 } 561 EXPORT_SYMBOL_GPL(edac_mc_add_mc); 562 563 /** 564 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 565 * remove mci structure from global list 566 * @pdev: Pointer to 'struct device' representing mci structure to remove. 567 * 568 * Return pointer to removed mci structure, or NULL if device not found. 569 */ 570 struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 571 { 572 struct mem_ctl_info *mci; 573 574 debugf0("%s()\n", __func__); 575 576 mutex_lock(&mem_ctls_mutex); 577 578 /* find the requested mci struct in the global list */ 579 mci = find_mci_by_dev(dev); 580 if (mci == NULL) { 581 mutex_unlock(&mem_ctls_mutex); 582 return NULL; 583 } 584 585 del_mc_from_global_list(mci); 586 mutex_unlock(&mem_ctls_mutex); 587 588 /* flush workq processes */ 589 edac_mc_workq_teardown(mci); 590 591 /* marking MCI offline */ 592 mci->op_state = OP_OFFLINE; 593 594 /* remove from sysfs */ 595 edac_remove_sysfs_mci_device(mci); 596 597 edac_printk(KERN_INFO, EDAC_MC, 598 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 599 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 600 601 return mci; 602 } 603 EXPORT_SYMBOL_GPL(edac_mc_del_mc); 604 605 static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 606 u32 size) 607 { 608 struct page *pg; 609 void *virt_addr; 610 unsigned long flags = 0; 611 612 debugf3("%s()\n", __func__); 613 614 /* ECC error page was not in our memory. Ignore it. */ 615 if (!pfn_valid(page)) 616 return; 617 618 /* Find the actual page structure then map it and fix */ 619 pg = pfn_to_page(page); 620 621 if (PageHighMem(pg)) 622 local_irq_save(flags); 623 624 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ); 625 626 /* Perform architecture specific atomic scrub operation */ 627 atomic_scrub(virt_addr + offset, size); 628 629 /* Unmap and complete */ 630 kunmap_atomic(virt_addr, KM_BOUNCE_READ); 631 632 if (PageHighMem(pg)) 633 local_irq_restore(flags); 634 } 635 636 /* FIXME - should return -1 */ 637 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 638 { 639 struct csrow_info *csrows = mci->csrows; 640 int row, i; 641 642 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 643 row = -1; 644 645 for (i = 0; i < mci->nr_csrows; i++) { 646 struct csrow_info *csrow = &csrows[i]; 647 648 if (csrow->nr_pages == 0) 649 continue; 650 651 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 652 "mask(0x%lx)\n", mci->mc_idx, __func__, 653 csrow->first_page, page, csrow->last_page, 654 csrow->page_mask); 655 656 if ((page >= csrow->first_page) && 657 (page <= csrow->last_page) && 658 ((page & csrow->page_mask) == 659 (csrow->first_page & csrow->page_mask))) { 660 row = i; 661 break; 662 } 663 } 664 665 if (row == -1) 666 edac_mc_printk(mci, KERN_ERR, 667 "could not look up page error address %lx\n", 668 (unsigned long)page); 669 670 return row; 671 } 672 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 673 674 /* FIXME - setable log (warning/emerg) levels */ 675 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ 676 void edac_mc_handle_ce(struct mem_ctl_info *mci, 677 unsigned long page_frame_number, 678 unsigned long offset_in_page, unsigned long syndrome, 679 int row, int channel, const char *msg) 680 { 681 unsigned long remapped_page; 682 683 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 684 685 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 686 if (row >= mci->nr_csrows || row < 0) { 687 /* something is wrong */ 688 edac_mc_printk(mci, KERN_ERR, 689 "INTERNAL ERROR: row out of range " 690 "(%d >= %d)\n", row, mci->nr_csrows); 691 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 692 return; 693 } 694 695 if (channel >= mci->csrows[row].nr_channels || channel < 0) { 696 /* something is wrong */ 697 edac_mc_printk(mci, KERN_ERR, 698 "INTERNAL ERROR: channel out of range " 699 "(%d >= %d)\n", channel, 700 mci->csrows[row].nr_channels); 701 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 702 return; 703 } 704 705 if (edac_mc_get_log_ce()) 706 /* FIXME - put in DIMM location */ 707 edac_mc_printk(mci, KERN_WARNING, 708 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " 709 "0x%lx, row %d, channel %d, label \"%s\": %s\n", 710 page_frame_number, offset_in_page, 711 mci->csrows[row].grain, syndrome, row, channel, 712 mci->csrows[row].channels[channel].label, msg); 713 714 mci->ce_count++; 715 mci->csrows[row].ce_count++; 716 mci->csrows[row].channels[channel].ce_count++; 717 718 if (mci->scrub_mode & SCRUB_SW_SRC) { 719 /* 720 * Some MC's can remap memory so that it is still available 721 * at a different address when PCI devices map into memory. 722 * MC's that can't do this lose the memory where PCI devices 723 * are mapped. This mapping is MC dependent and so we call 724 * back into the MC driver for it to map the MC page to 725 * a physical (CPU) page which can then be mapped to a virtual 726 * page - which can then be scrubbed. 727 */ 728 remapped_page = mci->ctl_page_to_phys ? 729 mci->ctl_page_to_phys(mci, page_frame_number) : 730 page_frame_number; 731 732 edac_mc_scrub_block(remapped_page, offset_in_page, 733 mci->csrows[row].grain); 734 } 735 } 736 EXPORT_SYMBOL_GPL(edac_mc_handle_ce); 737 738 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) 739 { 740 if (edac_mc_get_log_ce()) 741 edac_mc_printk(mci, KERN_WARNING, 742 "CE - no information available: %s\n", msg); 743 744 mci->ce_noinfo_count++; 745 mci->ce_count++; 746 } 747 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); 748 749 void edac_mc_handle_ue(struct mem_ctl_info *mci, 750 unsigned long page_frame_number, 751 unsigned long offset_in_page, int row, const char *msg) 752 { 753 int len = EDAC_MC_LABEL_LEN * 4; 754 char labels[len + 1]; 755 char *pos = labels; 756 int chan; 757 int chars; 758 759 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 760 761 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 762 if (row >= mci->nr_csrows || row < 0) { 763 /* something is wrong */ 764 edac_mc_printk(mci, KERN_ERR, 765 "INTERNAL ERROR: row out of range " 766 "(%d >= %d)\n", row, mci->nr_csrows); 767 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 768 return; 769 } 770 771 chars = snprintf(pos, len + 1, "%s", 772 mci->csrows[row].channels[0].label); 773 len -= chars; 774 pos += chars; 775 776 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); 777 chan++) { 778 chars = snprintf(pos, len + 1, ":%s", 779 mci->csrows[row].channels[chan].label); 780 len -= chars; 781 pos += chars; 782 } 783 784 if (edac_mc_get_log_ue()) 785 edac_mc_printk(mci, KERN_EMERG, 786 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " 787 "labels \"%s\": %s\n", page_frame_number, 788 offset_in_page, mci->csrows[row].grain, row, 789 labels, msg); 790 791 if (edac_mc_get_panic_on_ue()) 792 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " 793 "row %d, labels \"%s\": %s\n", mci->mc_idx, 794 page_frame_number, offset_in_page, 795 mci->csrows[row].grain, row, labels, msg); 796 797 mci->ue_count++; 798 mci->csrows[row].ue_count++; 799 } 800 EXPORT_SYMBOL_GPL(edac_mc_handle_ue); 801 802 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) 803 { 804 if (edac_mc_get_panic_on_ue()) 805 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); 806 807 if (edac_mc_get_log_ue()) 808 edac_mc_printk(mci, KERN_WARNING, 809 "UE - no information available: %s\n", msg); 810 mci->ue_noinfo_count++; 811 mci->ue_count++; 812 } 813 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 814 815 /************************************************************* 816 * On Fully Buffered DIMM modules, this help function is 817 * called to process UE events 818 */ 819 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, 820 unsigned int csrow, 821 unsigned int channela, 822 unsigned int channelb, char *msg) 823 { 824 int len = EDAC_MC_LABEL_LEN * 4; 825 char labels[len + 1]; 826 char *pos = labels; 827 int chars; 828 829 if (csrow >= mci->nr_csrows) { 830 /* something is wrong */ 831 edac_mc_printk(mci, KERN_ERR, 832 "INTERNAL ERROR: row out of range (%d >= %d)\n", 833 csrow, mci->nr_csrows); 834 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 835 return; 836 } 837 838 if (channela >= mci->csrows[csrow].nr_channels) { 839 /* something is wrong */ 840 edac_mc_printk(mci, KERN_ERR, 841 "INTERNAL ERROR: channel-a out of range " 842 "(%d >= %d)\n", 843 channela, mci->csrows[csrow].nr_channels); 844 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 845 return; 846 } 847 848 if (channelb >= mci->csrows[csrow].nr_channels) { 849 /* something is wrong */ 850 edac_mc_printk(mci, KERN_ERR, 851 "INTERNAL ERROR: channel-b out of range " 852 "(%d >= %d)\n", 853 channelb, mci->csrows[csrow].nr_channels); 854 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 855 return; 856 } 857 858 mci->ue_count++; 859 mci->csrows[csrow].ue_count++; 860 861 /* Generate the DIMM labels from the specified channels */ 862 chars = snprintf(pos, len + 1, "%s", 863 mci->csrows[csrow].channels[channela].label); 864 len -= chars; 865 pos += chars; 866 chars = snprintf(pos, len + 1, "-%s", 867 mci->csrows[csrow].channels[channelb].label); 868 869 if (edac_mc_get_log_ue()) 870 edac_mc_printk(mci, KERN_EMERG, 871 "UE row %d, channel-a= %d channel-b= %d " 872 "labels \"%s\": %s\n", csrow, channela, channelb, 873 labels, msg); 874 875 if (edac_mc_get_panic_on_ue()) 876 panic("UE row %d, channel-a= %d channel-b= %d " 877 "labels \"%s\": %s\n", csrow, channela, 878 channelb, labels, msg); 879 } 880 EXPORT_SYMBOL(edac_mc_handle_fbd_ue); 881 882 /************************************************************* 883 * On Fully Buffered DIMM modules, this help function is 884 * called to process CE events 885 */ 886 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, 887 unsigned int csrow, unsigned int channel, char *msg) 888 { 889 890 /* Ensure boundary values */ 891 if (csrow >= mci->nr_csrows) { 892 /* something is wrong */ 893 edac_mc_printk(mci, KERN_ERR, 894 "INTERNAL ERROR: row out of range (%d >= %d)\n", 895 csrow, mci->nr_csrows); 896 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 897 return; 898 } 899 if (channel >= mci->csrows[csrow].nr_channels) { 900 /* something is wrong */ 901 edac_mc_printk(mci, KERN_ERR, 902 "INTERNAL ERROR: channel out of range (%d >= %d)\n", 903 channel, mci->csrows[csrow].nr_channels); 904 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 905 return; 906 } 907 908 if (edac_mc_get_log_ce()) 909 /* FIXME - put in DIMM location */ 910 edac_mc_printk(mci, KERN_WARNING, 911 "CE row %d, channel %d, label \"%s\": %s\n", 912 csrow, channel, 913 mci->csrows[csrow].channels[channel].label, msg); 914 915 mci->ce_count++; 916 mci->csrows[csrow].ce_count++; 917 mci->csrows[csrow].channels[channel].ce_count++; 918 } 919 EXPORT_SYMBOL(edac_mc_handle_fbd_ce); 920