1 /* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15 #include <linux/module.h> 16 #include <linux/proc_fs.h> 17 #include <linux/kernel.h> 18 #include <linux/types.h> 19 #include <linux/smp.h> 20 #include <linux/init.h> 21 #include <linux/sysctl.h> 22 #include <linux/highmem.h> 23 #include <linux/timer.h> 24 #include <linux/slab.h> 25 #include <linux/jiffies.h> 26 #include <linux/spinlock.h> 27 #include <linux/list.h> 28 #include <linux/ctype.h> 29 #include <linux/edac.h> 30 #include <linux/bitops.h> 31 #include <asm/uaccess.h> 32 #include <asm/page.h> 33 #include <asm/edac.h> 34 #include "edac_core.h" 35 #include "edac_module.h" 36 37 #define CREATE_TRACE_POINTS 38 #define TRACE_INCLUDE_PATH ../../include/ras 39 #include <ras/ras_event.h> 40 41 /* lock to memory controller's control array */ 42 static DEFINE_MUTEX(mem_ctls_mutex); 43 static LIST_HEAD(mc_devices); 44 45 /* 46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g. 47 * apei/ghes and i7core_edac to be used at the same time. 48 */ 49 static void const *edac_mc_owner; 50 51 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, 52 unsigned len) 53 { 54 struct mem_ctl_info *mci = dimm->mci; 55 int i, n, count = 0; 56 char *p = buf; 57 58 for (i = 0; i < mci->n_layers; i++) { 59 n = snprintf(p, len, "%s %d ", 60 edac_layer_name[mci->layers[i].type], 61 dimm->location[i]); 62 p += n; 63 len -= n; 64 count += n; 65 if (!len) 66 break; 67 } 68 69 return count; 70 } 71 72 #ifdef CONFIG_EDAC_DEBUG 73 74 static void edac_mc_dump_channel(struct rank_info *chan) 75 { 76 edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx); 77 edac_dbg(4, " channel = %p\n", chan); 78 edac_dbg(4, " channel->csrow = %p\n", chan->csrow); 79 edac_dbg(4, " channel->dimm = %p\n", chan->dimm); 80 } 81 82 static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) 83 { 84 char location[80]; 85 86 edac_dimm_info_location(dimm, location, sizeof(location)); 87 88 edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", 89 dimm->mci->csbased ? "rank" : "dimm", 90 number, location, dimm->csrow, dimm->cschannel); 91 edac_dbg(4, " dimm = %p\n", dimm); 92 edac_dbg(4, " dimm->label = '%s'\n", dimm->label); 93 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); 94 edac_dbg(4, " dimm->grain = %d\n", dimm->grain); 95 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); 96 } 97 98 static void edac_mc_dump_csrow(struct csrow_info *csrow) 99 { 100 edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx); 101 edac_dbg(4, " csrow = %p\n", csrow); 102 edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page); 103 edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page); 104 edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask); 105 edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels); 106 edac_dbg(4, " csrow->channels = %p\n", csrow->channels); 107 edac_dbg(4, " csrow->mci = %p\n", csrow->mci); 108 } 109 110 static void edac_mc_dump_mci(struct mem_ctl_info *mci) 111 { 112 edac_dbg(3, "\tmci = %p\n", mci); 113 edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap); 114 edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 115 edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap); 116 edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check); 117 edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n", 118 mci->nr_csrows, mci->csrows); 119 edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n", 120 mci->tot_dimms, mci->dimms); 121 edac_dbg(3, "\tdev = %p\n", mci->pdev); 122 edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n", 123 mci->mod_name, mci->ctl_name); 124 edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info); 125 } 126 127 #endif /* CONFIG_EDAC_DEBUG */ 128 129 /* 130 * keep those in sync with the enum mem_type 131 */ 132 const char *edac_mem_types[] = { 133 "Empty csrow", 134 "Reserved csrow type", 135 "Unknown csrow type", 136 "Fast page mode RAM", 137 "Extended data out RAM", 138 "Burst Extended data out RAM", 139 "Single data rate SDRAM", 140 "Registered single data rate SDRAM", 141 "Double data rate SDRAM", 142 "Registered Double data rate SDRAM", 143 "Rambus DRAM", 144 "Unbuffered DDR2 RAM", 145 "Fully buffered DDR2", 146 "Registered DDR2 RAM", 147 "Rambus XDR", 148 "Unbuffered DDR3 RAM", 149 "Registered DDR3 RAM", 150 }; 151 EXPORT_SYMBOL_GPL(edac_mem_types); 152 153 /** 154 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation 155 * @p: pointer to a pointer with the memory offset to be used. At 156 * return, this will be incremented to point to the next offset 157 * @size: Size of the data structure to be reserved 158 * @n_elems: Number of elements that should be reserved 159 * 160 * If 'size' is a constant, the compiler will optimize this whole function 161 * down to either a no-op or the addition of a constant to the value of '*p'. 162 * 163 * The 'p' pointer is absolutely needed to keep the proper advancing 164 * further in memory to the proper offsets when allocating the struct along 165 * with its embedded structs, as edac_device_alloc_ctl_info() does it 166 * above, for example. 167 * 168 * At return, the pointer 'p' will be incremented to be used on a next call 169 * to this function. 170 */ 171 void *edac_align_ptr(void **p, unsigned size, int n_elems) 172 { 173 unsigned align, r; 174 void *ptr = *p; 175 176 *p += size * n_elems; 177 178 /* 179 * 'p' can possibly be an unaligned item X such that sizeof(X) is 180 * 'size'. Adjust 'p' so that its alignment is at least as 181 * stringent as what the compiler would provide for X and return 182 * the aligned result. 183 * Here we assume that the alignment of a "long long" is the most 184 * stringent alignment that the compiler will ever provide by default. 185 * As far as I know, this is a reasonable assumption. 186 */ 187 if (size > sizeof(long)) 188 align = sizeof(long long); 189 else if (size > sizeof(int)) 190 align = sizeof(long); 191 else if (size > sizeof(short)) 192 align = sizeof(int); 193 else if (size > sizeof(char)) 194 align = sizeof(short); 195 else 196 return (char *)ptr; 197 198 r = (unsigned long)p % align; 199 200 if (r == 0) 201 return (char *)ptr; 202 203 *p += align - r; 204 205 return (void *)(((unsigned long)ptr) + align - r); 206 } 207 208 static void _edac_mc_free(struct mem_ctl_info *mci) 209 { 210 int i, chn, row; 211 struct csrow_info *csr; 212 const unsigned int tot_dimms = mci->tot_dimms; 213 const unsigned int tot_channels = mci->num_cschannel; 214 const unsigned int tot_csrows = mci->nr_csrows; 215 216 if (mci->dimms) { 217 for (i = 0; i < tot_dimms; i++) 218 kfree(mci->dimms[i]); 219 kfree(mci->dimms); 220 } 221 if (mci->csrows) { 222 for (row = 0; row < tot_csrows; row++) { 223 csr = mci->csrows[row]; 224 if (csr) { 225 if (csr->channels) { 226 for (chn = 0; chn < tot_channels; chn++) 227 kfree(csr->channels[chn]); 228 kfree(csr->channels); 229 } 230 kfree(csr); 231 } 232 } 233 kfree(mci->csrows); 234 } 235 kfree(mci); 236 } 237 238 /** 239 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure 240 * @mc_num: Memory controller number 241 * @n_layers: Number of MC hierarchy layers 242 * layers: Describes each layer as seen by the Memory Controller 243 * @size_pvt: size of private storage needed 244 * 245 * 246 * Everything is kmalloc'ed as one big chunk - more efficient. 247 * Only can be used if all structures have the same lifetime - otherwise 248 * you have to allocate and initialize your own structures. 249 * 250 * Use edac_mc_free() to free mc structures allocated by this function. 251 * 252 * NOTE: drivers handle multi-rank memories in different ways: in some 253 * drivers, one multi-rank memory stick is mapped as one entry, while, in 254 * others, a single multi-rank memory stick would be mapped into several 255 * entries. Currently, this function will allocate multiple struct dimm_info 256 * on such scenarios, as grouping the multiple ranks require drivers change. 257 * 258 * Returns: 259 * On failure: NULL 260 * On success: struct mem_ctl_info pointer 261 */ 262 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, 263 unsigned n_layers, 264 struct edac_mc_layer *layers, 265 unsigned sz_pvt) 266 { 267 struct mem_ctl_info *mci; 268 struct edac_mc_layer *layer; 269 struct csrow_info *csr; 270 struct rank_info *chan; 271 struct dimm_info *dimm; 272 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 273 unsigned pos[EDAC_MAX_LAYERS]; 274 unsigned size, tot_dimms = 1, count = 1; 275 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; 276 void *pvt, *p, *ptr = NULL; 277 int i, j, row, chn, n, len, off; 278 bool per_rank = false; 279 280 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); 281 /* 282 * Calculate the total amount of dimms and csrows/cschannels while 283 * in the old API emulation mode 284 */ 285 for (i = 0; i < n_layers; i++) { 286 tot_dimms *= layers[i].size; 287 if (layers[i].is_virt_csrow) 288 tot_csrows *= layers[i].size; 289 else 290 tot_channels *= layers[i].size; 291 292 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) 293 per_rank = true; 294 } 295 296 /* Figure out the offsets of the various items from the start of an mc 297 * structure. We want the alignment of each item to be at least as 298 * stringent as what the compiler would provide if we could simply 299 * hardcode everything into a single struct. 300 */ 301 mci = edac_align_ptr(&ptr, sizeof(*mci), 1); 302 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); 303 for (i = 0; i < n_layers; i++) { 304 count *= layers[i].size; 305 edac_dbg(4, "errcount layer %d size %d\n", i, count); 306 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 307 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 308 tot_errcount += 2 * count; 309 } 310 311 edac_dbg(4, "allocating %d error counters\n", tot_errcount); 312 pvt = edac_align_ptr(&ptr, sz_pvt, 1); 313 size = ((unsigned long)pvt) + sz_pvt; 314 315 edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", 316 size, 317 tot_dimms, 318 per_rank ? "ranks" : "dimms", 319 tot_csrows * tot_channels); 320 321 mci = kzalloc(size, GFP_KERNEL); 322 if (mci == NULL) 323 return NULL; 324 325 /* Adjust pointers so they point within the memory we just allocated 326 * rather than an imaginary chunk of memory located at address 0. 327 */ 328 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); 329 for (i = 0; i < n_layers; i++) { 330 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); 331 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); 332 } 333 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 334 335 /* setup index and various internal pointers */ 336 mci->mc_idx = mc_num; 337 mci->tot_dimms = tot_dimms; 338 mci->pvt_info = pvt; 339 mci->n_layers = n_layers; 340 mci->layers = layer; 341 memcpy(mci->layers, layers, sizeof(*layer) * n_layers); 342 mci->nr_csrows = tot_csrows; 343 mci->num_cschannel = tot_channels; 344 mci->csbased = per_rank; 345 346 /* 347 * Alocate and fill the csrow/channels structs 348 */ 349 mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL); 350 if (!mci->csrows) 351 goto error; 352 for (row = 0; row < tot_csrows; row++) { 353 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL); 354 if (!csr) 355 goto error; 356 mci->csrows[row] = csr; 357 csr->csrow_idx = row; 358 csr->mci = mci; 359 csr->nr_channels = tot_channels; 360 csr->channels = kcalloc(tot_channels, sizeof(*csr->channels), 361 GFP_KERNEL); 362 if (!csr->channels) 363 goto error; 364 365 for (chn = 0; chn < tot_channels; chn++) { 366 chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL); 367 if (!chan) 368 goto error; 369 csr->channels[chn] = chan; 370 chan->chan_idx = chn; 371 chan->csrow = csr; 372 } 373 } 374 375 /* 376 * Allocate and fill the dimm structs 377 */ 378 mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL); 379 if (!mci->dimms) 380 goto error; 381 382 memset(&pos, 0, sizeof(pos)); 383 row = 0; 384 chn = 0; 385 for (i = 0; i < tot_dimms; i++) { 386 chan = mci->csrows[row]->channels[chn]; 387 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]); 388 if (off < 0 || off >= tot_dimms) { 389 edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n"); 390 goto error; 391 } 392 393 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL); 394 if (!dimm) 395 goto error; 396 mci->dimms[off] = dimm; 397 dimm->mci = mci; 398 399 /* 400 * Copy DIMM location and initialize it. 401 */ 402 len = sizeof(dimm->label); 403 p = dimm->label; 404 n = snprintf(p, len, "mc#%u", mc_num); 405 p += n; 406 len -= n; 407 for (j = 0; j < n_layers; j++) { 408 n = snprintf(p, len, "%s#%u", 409 edac_layer_name[layers[j].type], 410 pos[j]); 411 p += n; 412 len -= n; 413 dimm->location[j] = pos[j]; 414 415 if (len <= 0) 416 break; 417 } 418 419 /* Link it to the csrows old API data */ 420 chan->dimm = dimm; 421 dimm->csrow = row; 422 dimm->cschannel = chn; 423 424 /* Increment csrow location */ 425 if (layers[0].is_virt_csrow) { 426 chn++; 427 if (chn == tot_channels) { 428 chn = 0; 429 row++; 430 } 431 } else { 432 row++; 433 if (row == tot_csrows) { 434 row = 0; 435 chn++; 436 } 437 } 438 439 /* Increment dimm location */ 440 for (j = n_layers - 1; j >= 0; j--) { 441 pos[j]++; 442 if (pos[j] < layers[j].size) 443 break; 444 pos[j] = 0; 445 } 446 } 447 448 mci->op_state = OP_ALLOC; 449 450 return mci; 451 452 error: 453 _edac_mc_free(mci); 454 455 return NULL; 456 } 457 EXPORT_SYMBOL_GPL(edac_mc_alloc); 458 459 /** 460 * edac_mc_free 461 * 'Free' a previously allocated 'mci' structure 462 * @mci: pointer to a struct mem_ctl_info structure 463 */ 464 void edac_mc_free(struct mem_ctl_info *mci) 465 { 466 edac_dbg(1, "\n"); 467 468 /* If we're not yet registered with sysfs free only what was allocated 469 * in edac_mc_alloc(). 470 */ 471 if (!device_is_registered(&mci->dev)) { 472 _edac_mc_free(mci); 473 return; 474 } 475 476 /* the mci instance is freed here, when the sysfs object is dropped */ 477 edac_unregister_sysfs(mci); 478 } 479 EXPORT_SYMBOL_GPL(edac_mc_free); 480 481 482 /** 483 * find_mci_by_dev 484 * 485 * scan list of controllers looking for the one that manages 486 * the 'dev' device 487 * @dev: pointer to a struct device related with the MCI 488 */ 489 struct mem_ctl_info *find_mci_by_dev(struct device *dev) 490 { 491 struct mem_ctl_info *mci; 492 struct list_head *item; 493 494 edac_dbg(3, "\n"); 495 496 list_for_each(item, &mc_devices) { 497 mci = list_entry(item, struct mem_ctl_info, link); 498 499 if (mci->pdev == dev) 500 return mci; 501 } 502 503 return NULL; 504 } 505 EXPORT_SYMBOL_GPL(find_mci_by_dev); 506 507 /* 508 * handler for EDAC to check if NMI type handler has asserted interrupt 509 */ 510 static int edac_mc_assert_error_check_and_clear(void) 511 { 512 int old_state; 513 514 if (edac_op_state == EDAC_OPSTATE_POLL) 515 return 1; 516 517 old_state = edac_err_assert; 518 edac_err_assert = 0; 519 520 return old_state; 521 } 522 523 /* 524 * edac_mc_workq_function 525 * performs the operation scheduled by a workq request 526 */ 527 static void edac_mc_workq_function(struct work_struct *work_req) 528 { 529 struct delayed_work *d_work = to_delayed_work(work_req); 530 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 531 532 mutex_lock(&mem_ctls_mutex); 533 534 /* if this control struct has movd to offline state, we are done */ 535 if (mci->op_state == OP_OFFLINE) { 536 mutex_unlock(&mem_ctls_mutex); 537 return; 538 } 539 540 /* Only poll controllers that are running polled and have a check */ 541 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 542 mci->edac_check(mci); 543 544 mutex_unlock(&mem_ctls_mutex); 545 546 /* Reschedule */ 547 queue_delayed_work(edac_workqueue, &mci->work, 548 msecs_to_jiffies(edac_mc_get_poll_msec())); 549 } 550 551 /* 552 * edac_mc_workq_setup 553 * initialize a workq item for this mci 554 * passing in the new delay period in msec 555 * 556 * locking model: 557 * 558 * called with the mem_ctls_mutex held 559 */ 560 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 561 { 562 edac_dbg(0, "\n"); 563 564 /* if this instance is not in the POLL state, then simply return */ 565 if (mci->op_state != OP_RUNNING_POLL) 566 return; 567 568 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 569 mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 570 } 571 572 /* 573 * edac_mc_workq_teardown 574 * stop the workq processing on this mci 575 * 576 * locking model: 577 * 578 * called WITHOUT lock held 579 */ 580 static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 581 { 582 int status; 583 584 if (mci->op_state != OP_RUNNING_POLL) 585 return; 586 587 status = cancel_delayed_work(&mci->work); 588 if (status == 0) { 589 edac_dbg(0, "not canceled, flush the queue\n"); 590 591 /* workq instance might be running, wait for it */ 592 flush_workqueue(edac_workqueue); 593 } 594 } 595 596 /* 597 * edac_mc_reset_delay_period(unsigned long value) 598 * 599 * user space has updated our poll period value, need to 600 * reset our workq delays 601 */ 602 void edac_mc_reset_delay_period(int value) 603 { 604 struct mem_ctl_info *mci; 605 struct list_head *item; 606 607 mutex_lock(&mem_ctls_mutex); 608 609 list_for_each(item, &mc_devices) { 610 mci = list_entry(item, struct mem_ctl_info, link); 611 612 edac_mc_workq_setup(mci, (unsigned long) value); 613 } 614 615 mutex_unlock(&mem_ctls_mutex); 616 } 617 618 619 620 /* Return 0 on success, 1 on failure. 621 * Before calling this function, caller must 622 * assign a unique value to mci->mc_idx. 623 * 624 * locking model: 625 * 626 * called with the mem_ctls_mutex lock held 627 */ 628 static int add_mc_to_global_list(struct mem_ctl_info *mci) 629 { 630 struct list_head *item, *insert_before; 631 struct mem_ctl_info *p; 632 633 insert_before = &mc_devices; 634 635 p = find_mci_by_dev(mci->pdev); 636 if (unlikely(p != NULL)) 637 goto fail0; 638 639 list_for_each(item, &mc_devices) { 640 p = list_entry(item, struct mem_ctl_info, link); 641 642 if (p->mc_idx >= mci->mc_idx) { 643 if (unlikely(p->mc_idx == mci->mc_idx)) 644 goto fail1; 645 646 insert_before = item; 647 break; 648 } 649 } 650 651 list_add_tail_rcu(&mci->link, insert_before); 652 atomic_inc(&edac_handlers); 653 return 0; 654 655 fail0: 656 edac_printk(KERN_WARNING, EDAC_MC, 657 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev), 658 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 659 return 1; 660 661 fail1: 662 edac_printk(KERN_WARNING, EDAC_MC, 663 "bug in low-level driver: attempt to assign\n" 664 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 665 return 1; 666 } 667 668 static int del_mc_from_global_list(struct mem_ctl_info *mci) 669 { 670 int handlers = atomic_dec_return(&edac_handlers); 671 list_del_rcu(&mci->link); 672 673 /* these are for safe removal of devices from global list while 674 * NMI handlers may be traversing list 675 */ 676 synchronize_rcu(); 677 INIT_LIST_HEAD(&mci->link); 678 679 return handlers; 680 } 681 682 /** 683 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 684 * 685 * If found, return a pointer to the structure. 686 * Else return NULL. 687 * 688 * Caller must hold mem_ctls_mutex. 689 */ 690 struct mem_ctl_info *edac_mc_find(int idx) 691 { 692 struct list_head *item; 693 struct mem_ctl_info *mci; 694 695 list_for_each(item, &mc_devices) { 696 mci = list_entry(item, struct mem_ctl_info, link); 697 698 if (mci->mc_idx >= idx) { 699 if (mci->mc_idx == idx) 700 return mci; 701 702 break; 703 } 704 } 705 706 return NULL; 707 } 708 EXPORT_SYMBOL(edac_mc_find); 709 710 /** 711 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 712 * create sysfs entries associated with mci structure 713 * @mci: pointer to the mci structure to be added to the list 714 * 715 * Return: 716 * 0 Success 717 * !0 Failure 718 */ 719 720 /* FIXME - should a warning be printed if no error detection? correction? */ 721 int edac_mc_add_mc(struct mem_ctl_info *mci) 722 { 723 int ret = -EINVAL; 724 edac_dbg(0, "\n"); 725 726 #ifdef CONFIG_EDAC_DEBUG 727 if (edac_debug_level >= 3) 728 edac_mc_dump_mci(mci); 729 730 if (edac_debug_level >= 4) { 731 int i; 732 733 for (i = 0; i < mci->nr_csrows; i++) { 734 struct csrow_info *csrow = mci->csrows[i]; 735 u32 nr_pages = 0; 736 int j; 737 738 for (j = 0; j < csrow->nr_channels; j++) 739 nr_pages += csrow->channels[j]->dimm->nr_pages; 740 if (!nr_pages) 741 continue; 742 edac_mc_dump_csrow(csrow); 743 for (j = 0; j < csrow->nr_channels; j++) 744 if (csrow->channels[j]->dimm->nr_pages) 745 edac_mc_dump_channel(csrow->channels[j]); 746 } 747 for (i = 0; i < mci->tot_dimms; i++) 748 if (mci->dimms[i]->nr_pages) 749 edac_mc_dump_dimm(mci->dimms[i], i); 750 } 751 #endif 752 mutex_lock(&mem_ctls_mutex); 753 754 if (edac_mc_owner && edac_mc_owner != mci->mod_name) { 755 ret = -EPERM; 756 goto fail0; 757 } 758 759 if (add_mc_to_global_list(mci)) 760 goto fail0; 761 762 /* set load time so that error rate can be tracked */ 763 mci->start_time = jiffies; 764 765 if (edac_create_sysfs_mci_device(mci)) { 766 edac_mc_printk(mci, KERN_WARNING, 767 "failed to create sysfs device\n"); 768 goto fail1; 769 } 770 771 /* If there IS a check routine, then we are running POLLED */ 772 if (mci->edac_check != NULL) { 773 /* This instance is NOW RUNNING */ 774 mci->op_state = OP_RUNNING_POLL; 775 776 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 777 } else { 778 mci->op_state = OP_RUNNING_INTERRUPT; 779 } 780 781 /* Report action taken */ 782 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 783 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 784 785 edac_mc_owner = mci->mod_name; 786 787 mutex_unlock(&mem_ctls_mutex); 788 return 0; 789 790 fail1: 791 del_mc_from_global_list(mci); 792 793 fail0: 794 mutex_unlock(&mem_ctls_mutex); 795 return ret; 796 } 797 EXPORT_SYMBOL_GPL(edac_mc_add_mc); 798 799 /** 800 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 801 * remove mci structure from global list 802 * @pdev: Pointer to 'struct device' representing mci structure to remove. 803 * 804 * Return pointer to removed mci structure, or NULL if device not found. 805 */ 806 struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 807 { 808 struct mem_ctl_info *mci; 809 810 edac_dbg(0, "\n"); 811 812 mutex_lock(&mem_ctls_mutex); 813 814 /* find the requested mci struct in the global list */ 815 mci = find_mci_by_dev(dev); 816 if (mci == NULL) { 817 mutex_unlock(&mem_ctls_mutex); 818 return NULL; 819 } 820 821 if (!del_mc_from_global_list(mci)) 822 edac_mc_owner = NULL; 823 mutex_unlock(&mem_ctls_mutex); 824 825 /* flush workq processes */ 826 edac_mc_workq_teardown(mci); 827 828 /* marking MCI offline */ 829 mci->op_state = OP_OFFLINE; 830 831 /* remove from sysfs */ 832 edac_remove_sysfs_mci_device(mci); 833 834 edac_printk(KERN_INFO, EDAC_MC, 835 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 836 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 837 838 return mci; 839 } 840 EXPORT_SYMBOL_GPL(edac_mc_del_mc); 841 842 static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 843 u32 size) 844 { 845 struct page *pg; 846 void *virt_addr; 847 unsigned long flags = 0; 848 849 edac_dbg(3, "\n"); 850 851 /* ECC error page was not in our memory. Ignore it. */ 852 if (!pfn_valid(page)) 853 return; 854 855 /* Find the actual page structure then map it and fix */ 856 pg = pfn_to_page(page); 857 858 if (PageHighMem(pg)) 859 local_irq_save(flags); 860 861 virt_addr = kmap_atomic(pg); 862 863 /* Perform architecture specific atomic scrub operation */ 864 atomic_scrub(virt_addr + offset, size); 865 866 /* Unmap and complete */ 867 kunmap_atomic(virt_addr); 868 869 if (PageHighMem(pg)) 870 local_irq_restore(flags); 871 } 872 873 /* FIXME - should return -1 */ 874 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 875 { 876 struct csrow_info **csrows = mci->csrows; 877 int row, i, j, n; 878 879 edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page); 880 row = -1; 881 882 for (i = 0; i < mci->nr_csrows; i++) { 883 struct csrow_info *csrow = csrows[i]; 884 n = 0; 885 for (j = 0; j < csrow->nr_channels; j++) { 886 struct dimm_info *dimm = csrow->channels[j]->dimm; 887 n += dimm->nr_pages; 888 } 889 if (n == 0) 890 continue; 891 892 edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n", 893 mci->mc_idx, 894 csrow->first_page, page, csrow->last_page, 895 csrow->page_mask); 896 897 if ((page >= csrow->first_page) && 898 (page <= csrow->last_page) && 899 ((page & csrow->page_mask) == 900 (csrow->first_page & csrow->page_mask))) { 901 row = i; 902 break; 903 } 904 } 905 906 if (row == -1) 907 edac_mc_printk(mci, KERN_ERR, 908 "could not look up page error address %lx\n", 909 (unsigned long)page); 910 911 return row; 912 } 913 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 914 915 const char *edac_layer_name[] = { 916 [EDAC_MC_LAYER_BRANCH] = "branch", 917 [EDAC_MC_LAYER_CHANNEL] = "channel", 918 [EDAC_MC_LAYER_SLOT] = "slot", 919 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", 920 [EDAC_MC_LAYER_ALL_MEM] = "memory", 921 }; 922 EXPORT_SYMBOL_GPL(edac_layer_name); 923 924 static void edac_inc_ce_error(struct mem_ctl_info *mci, 925 bool enable_per_layer_report, 926 const int pos[EDAC_MAX_LAYERS], 927 const u16 count) 928 { 929 int i, index = 0; 930 931 mci->ce_mc += count; 932 933 if (!enable_per_layer_report) { 934 mci->ce_noinfo_count += count; 935 return; 936 } 937 938 for (i = 0; i < mci->n_layers; i++) { 939 if (pos[i] < 0) 940 break; 941 index += pos[i]; 942 mci->ce_per_layer[i][index] += count; 943 944 if (i < mci->n_layers - 1) 945 index *= mci->layers[i + 1].size; 946 } 947 } 948 949 static void edac_inc_ue_error(struct mem_ctl_info *mci, 950 bool enable_per_layer_report, 951 const int pos[EDAC_MAX_LAYERS], 952 const u16 count) 953 { 954 int i, index = 0; 955 956 mci->ue_mc += count; 957 958 if (!enable_per_layer_report) { 959 mci->ce_noinfo_count += count; 960 return; 961 } 962 963 for (i = 0; i < mci->n_layers; i++) { 964 if (pos[i] < 0) 965 break; 966 index += pos[i]; 967 mci->ue_per_layer[i][index] += count; 968 969 if (i < mci->n_layers - 1) 970 index *= mci->layers[i + 1].size; 971 } 972 } 973 974 static void edac_ce_error(struct mem_ctl_info *mci, 975 const u16 error_count, 976 const int pos[EDAC_MAX_LAYERS], 977 const char *msg, 978 const char *location, 979 const char *label, 980 const char *detail, 981 const char *other_detail, 982 const bool enable_per_layer_report, 983 const unsigned long page_frame_number, 984 const unsigned long offset_in_page, 985 long grain) 986 { 987 unsigned long remapped_page; 988 char *msg_aux = ""; 989 990 if (*msg) 991 msg_aux = " "; 992 993 if (edac_mc_get_log_ce()) { 994 if (other_detail && *other_detail) 995 edac_mc_printk(mci, KERN_WARNING, 996 "%d CE %s%son %s (%s %s - %s)\n", 997 error_count, msg, msg_aux, label, 998 location, detail, other_detail); 999 else 1000 edac_mc_printk(mci, KERN_WARNING, 1001 "%d CE %s%son %s (%s %s)\n", 1002 error_count, msg, msg_aux, label, 1003 location, detail); 1004 } 1005 edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); 1006 1007 if (mci->scrub_mode & SCRUB_SW_SRC) { 1008 /* 1009 * Some memory controllers (called MCs below) can remap 1010 * memory so that it is still available at a different 1011 * address when PCI devices map into memory. 1012 * MC's that can't do this, lose the memory where PCI 1013 * devices are mapped. This mapping is MC-dependent 1014 * and so we call back into the MC driver for it to 1015 * map the MC page to a physical (CPU) page which can 1016 * then be mapped to a virtual page - which can then 1017 * be scrubbed. 1018 */ 1019 remapped_page = mci->ctl_page_to_phys ? 1020 mci->ctl_page_to_phys(mci, page_frame_number) : 1021 page_frame_number; 1022 1023 edac_mc_scrub_block(remapped_page, 1024 offset_in_page, grain); 1025 } 1026 } 1027 1028 static void edac_ue_error(struct mem_ctl_info *mci, 1029 const u16 error_count, 1030 const int pos[EDAC_MAX_LAYERS], 1031 const char *msg, 1032 const char *location, 1033 const char *label, 1034 const char *detail, 1035 const char *other_detail, 1036 const bool enable_per_layer_report) 1037 { 1038 char *msg_aux = ""; 1039 1040 if (*msg) 1041 msg_aux = " "; 1042 1043 if (edac_mc_get_log_ue()) { 1044 if (other_detail && *other_detail) 1045 edac_mc_printk(mci, KERN_WARNING, 1046 "%d UE %s%son %s (%s %s - %s)\n", 1047 error_count, msg, msg_aux, label, 1048 location, detail, other_detail); 1049 else 1050 edac_mc_printk(mci, KERN_WARNING, 1051 "%d UE %s%son %s (%s %s)\n", 1052 error_count, msg, msg_aux, label, 1053 location, detail); 1054 } 1055 1056 if (edac_mc_get_panic_on_ue()) { 1057 if (other_detail && *other_detail) 1058 panic("UE %s%son %s (%s%s - %s)\n", 1059 msg, msg_aux, label, location, detail, other_detail); 1060 else 1061 panic("UE %s%son %s (%s%s)\n", 1062 msg, msg_aux, label, location, detail); 1063 } 1064 1065 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); 1066 } 1067 1068 /** 1069 * edac_raw_mc_handle_error - reports a memory event to userspace without doing 1070 * anything to discover the error location 1071 * 1072 * @type: severity of the error (CE/UE/Fatal) 1073 * @mci: a struct mem_ctl_info pointer 1074 * @e: error description 1075 * 1076 * This raw function is used internally by edac_mc_handle_error(). It should 1077 * only be called directly when the hardware error come directly from BIOS, 1078 * like in the case of APEI GHES driver. 1079 */ 1080 void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, 1081 struct mem_ctl_info *mci, 1082 struct edac_raw_error_desc *e) 1083 { 1084 char detail[80]; 1085 int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; 1086 1087 /* Memory type dependent details about the error */ 1088 if (type == HW_EVENT_ERR_CORRECTED) { 1089 snprintf(detail, sizeof(detail), 1090 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", 1091 e->page_frame_number, e->offset_in_page, 1092 e->grain, e->syndrome); 1093 edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, 1094 detail, e->other_detail, e->enable_per_layer_report, 1095 e->page_frame_number, e->offset_in_page, e->grain); 1096 } else { 1097 snprintf(detail, sizeof(detail), 1098 "page:0x%lx offset:0x%lx grain:%ld", 1099 e->page_frame_number, e->offset_in_page, e->grain); 1100 1101 edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, 1102 detail, e->other_detail, e->enable_per_layer_report); 1103 } 1104 1105 1106 } 1107 EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); 1108 1109 /** 1110 * edac_mc_handle_error - reports a memory event to userspace 1111 * 1112 * @type: severity of the error (CE/UE/Fatal) 1113 * @mci: a struct mem_ctl_info pointer 1114 * @error_count: Number of errors of the same type 1115 * @page_frame_number: mem page where the error occurred 1116 * @offset_in_page: offset of the error inside the page 1117 * @syndrome: ECC syndrome 1118 * @top_layer: Memory layer[0] position 1119 * @mid_layer: Memory layer[1] position 1120 * @low_layer: Memory layer[2] position 1121 * @msg: Message meaningful to the end users that 1122 * explains the event 1123 * @other_detail: Technical details about the event that 1124 * may help hardware manufacturers and 1125 * EDAC developers to analyse the event 1126 */ 1127 void edac_mc_handle_error(const enum hw_event_mc_err_type type, 1128 struct mem_ctl_info *mci, 1129 const u16 error_count, 1130 const unsigned long page_frame_number, 1131 const unsigned long offset_in_page, 1132 const unsigned long syndrome, 1133 const int top_layer, 1134 const int mid_layer, 1135 const int low_layer, 1136 const char *msg, 1137 const char *other_detail) 1138 { 1139 char *p; 1140 int row = -1, chan = -1; 1141 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; 1142 int i, n_labels = 0; 1143 u8 grain_bits; 1144 struct edac_raw_error_desc *e = &mci->error_desc; 1145 1146 edac_dbg(3, "MC%d\n", mci->mc_idx); 1147 1148 /* Fills the error report buffer */ 1149 memset(e, 0, sizeof (*e)); 1150 e->error_count = error_count; 1151 e->top_layer = top_layer; 1152 e->mid_layer = mid_layer; 1153 e->low_layer = low_layer; 1154 e->page_frame_number = page_frame_number; 1155 e->offset_in_page = offset_in_page; 1156 e->syndrome = syndrome; 1157 e->msg = msg; 1158 e->other_detail = other_detail; 1159 1160 /* 1161 * Check if the event report is consistent and if the memory 1162 * location is known. If it is known, enable_per_layer_report will be 1163 * true, the DIMM(s) label info will be filled and the per-layer 1164 * error counters will be incremented. 1165 */ 1166 for (i = 0; i < mci->n_layers; i++) { 1167 if (pos[i] >= (int)mci->layers[i].size) { 1168 1169 edac_mc_printk(mci, KERN_ERR, 1170 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", 1171 edac_layer_name[mci->layers[i].type], 1172 pos[i], mci->layers[i].size); 1173 /* 1174 * Instead of just returning it, let's use what's 1175 * known about the error. The increment routines and 1176 * the DIMM filter logic will do the right thing by 1177 * pointing the likely damaged DIMMs. 1178 */ 1179 pos[i] = -1; 1180 } 1181 if (pos[i] >= 0) 1182 e->enable_per_layer_report = true; 1183 } 1184 1185 /* 1186 * Get the dimm label/grain that applies to the match criteria. 1187 * As the error algorithm may not be able to point to just one memory 1188 * stick, the logic here will get all possible labels that could 1189 * pottentially be affected by the error. 1190 * On FB-DIMM memory controllers, for uncorrected errors, it is common 1191 * to have only the MC channel and the MC dimm (also called "branch") 1192 * but the channel is not known, as the memory is arranged in pairs, 1193 * where each memory belongs to a separate channel within the same 1194 * branch. 1195 */ 1196 p = e->label; 1197 *p = '\0'; 1198 1199 for (i = 0; i < mci->tot_dimms; i++) { 1200 struct dimm_info *dimm = mci->dimms[i]; 1201 1202 if (top_layer >= 0 && top_layer != dimm->location[0]) 1203 continue; 1204 if (mid_layer >= 0 && mid_layer != dimm->location[1]) 1205 continue; 1206 if (low_layer >= 0 && low_layer != dimm->location[2]) 1207 continue; 1208 1209 /* get the max grain, over the error match range */ 1210 if (dimm->grain > e->grain) 1211 e->grain = dimm->grain; 1212 1213 /* 1214 * If the error is memory-controller wide, there's no need to 1215 * seek for the affected DIMMs because the whole 1216 * channel/memory controller/... may be affected. 1217 * Also, don't show errors for empty DIMM slots. 1218 */ 1219 if (e->enable_per_layer_report && dimm->nr_pages) { 1220 if (n_labels >= EDAC_MAX_LABELS) { 1221 e->enable_per_layer_report = false; 1222 break; 1223 } 1224 n_labels++; 1225 if (p != e->label) { 1226 strcpy(p, OTHER_LABEL); 1227 p += strlen(OTHER_LABEL); 1228 } 1229 strcpy(p, dimm->label); 1230 p += strlen(p); 1231 *p = '\0'; 1232 1233 /* 1234 * get csrow/channel of the DIMM, in order to allow 1235 * incrementing the compat API counters 1236 */ 1237 edac_dbg(4, "%s csrows map: (%d,%d)\n", 1238 mci->csbased ? "rank" : "dimm", 1239 dimm->csrow, dimm->cschannel); 1240 if (row == -1) 1241 row = dimm->csrow; 1242 else if (row >= 0 && row != dimm->csrow) 1243 row = -2; 1244 1245 if (chan == -1) 1246 chan = dimm->cschannel; 1247 else if (chan >= 0 && chan != dimm->cschannel) 1248 chan = -2; 1249 } 1250 } 1251 1252 if (!e->enable_per_layer_report) { 1253 strcpy(e->label, "any memory"); 1254 } else { 1255 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); 1256 if (p == e->label) 1257 strcpy(e->label, "unknown memory"); 1258 if (type == HW_EVENT_ERR_CORRECTED) { 1259 if (row >= 0) { 1260 mci->csrows[row]->ce_count += error_count; 1261 if (chan >= 0) 1262 mci->csrows[row]->channels[chan]->ce_count += error_count; 1263 } 1264 } else 1265 if (row >= 0) 1266 mci->csrows[row]->ue_count += error_count; 1267 } 1268 1269 /* Fill the RAM location data */ 1270 p = e->location; 1271 1272 for (i = 0; i < mci->n_layers; i++) { 1273 if (pos[i] < 0) 1274 continue; 1275 1276 p += sprintf(p, "%s:%d ", 1277 edac_layer_name[mci->layers[i].type], 1278 pos[i]); 1279 } 1280 if (p > e->location) 1281 *(p - 1) = '\0'; 1282 1283 /* Report the error via the trace interface */ 1284 grain_bits = fls_long(e->grain) + 1; 1285 trace_mc_event(type, e->msg, e->label, e->error_count, 1286 mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, 1287 PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, 1288 grain_bits, e->syndrome, e->other_detail); 1289 1290 edac_raw_mc_handle_error(type, mci, e); 1291 } 1292 EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1293