1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4u Memory Scrubbing 31 * 32 * On detection of a correctable memory ECC error, the sun4u kernel 33 * returns the corrected data to the requester and re-writes it 34 * to memory (DRAM). So if the correctable error was transient, 35 * the read has effectively been cleaned (scrubbed) from memory. 36 * 37 * Scrubbing thus reduces the likelyhood that multiple transient errors 38 * will occur in the same memory word, making uncorrectable errors due 39 * to transients less likely. 40 * 41 * Thus is born the desire that every memory location be periodically 42 * accessed. 43 * 44 * This file implements a memory scrubbing thread. This scrubber 45 * guarantees that all of physical memory is accessed periodically 46 * (memscrub_period_sec -- 12 hours). 47 * 48 * It attempts to do this as unobtrusively as possible. The thread 49 * schedules itself to wake up at an interval such that if it reads 50 * memscrub_span_pages (8MB) on each wakeup, it will read all of physical 51 * memory in in memscrub_period_sec (12 hours). 52 * 53 * The scrubber uses the block load hardware to read memory @ 268MB/s, 54 * so it reads spans of 8MB in 0.03 seconds. Unlike the original sun4d 55 * scrubber the sun4u scrubber does not read ahead if the system is idle 56 * because we can read memory very efficently. 57 * 58 * The scrubber maintains a private copy of the phys_install memory list 59 * to keep track of what memory should be scrubbed. 60 * 61 * The global routines memscrub_add_span() and memscrub_delete_span() are 62 * used to add and delete from this list. If hotplug memory is later 63 * supported these two routines can be used to notify the scrubber of 64 * memory configuration changes. 65 * 66 * The following parameters can be set via /etc/system 67 * 68 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB) 69 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 70 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) 71 * memscrub_delay_start_sec = (5 minutes) 72 * memscrub_verbose = (0) 73 * memscrub_override_ticks = (1 tick) 74 * disable_memscrub = (0) 75 * pause_memscrub = (0) 76 * read_all_memscrub = (0) 77 * 78 * The scrubber will print NOTICE messages of what it is doing if 79 * "memscrub_verbose" is set. 80 * 81 * If the scrubber's sleep time calculation drops to zero ticks, 82 * memscrub_override_ticks will be used as the sleep time instead. The 83 * sleep time should only drop to zero on a system with over 32.95 84 * terabytes of memory, or where the default scrubber parameters have 85 * been adjusted. For example, reducing memscrub_span_pages or 86 * memscrub_period_sec causes the sleep time to drop to zero with less 87 * memory. Note that since the sleep time is calculated in clock ticks, 88 * using hires clock ticks allows for more memory before the sleep time 89 * becomes zero. 90 * 91 * The scrubber will exit (or never be started) if it finds the variable 92 * "disable_memscrub" set. 93 * 94 * The scrubber will pause (not read memory) when "pause_memscrub" 95 * is set. It will check the state of pause_memscrub at each wakeup 96 * period. The scrubber will not make up for lost time. If you 97 * pause the scrubber for a prolonged period of time you can use 98 * the "read_all_memscrub" switch (see below) to catch up. In addition, 99 * pause_memscrub is used internally by the post memory DR callbacks. 100 * It is set for the small period of time during which the callbacks 101 * are executing. This ensures "memscrub_lock" will be released, 102 * allowing the callbacks to finish. 103 * 104 * The scrubber will read all memory if "read_all_memscrub" is set. 105 * The normal span read will also occur during the wakeup. 106 * 107 * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system 108 * must have before we'll start the scrubber. 109 * 110 * MEMSCRUB_DFL_SPAN_PAGES (8MB) is based on the guess that 0.03 sec 111 * is a "good" amount of minimum time for the thread to run at a time. 112 * 113 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 114 * twice the frequency the hardware folk estimated would be necessary. 115 * 116 * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption 117 * that the scurbber should get its fair share of time (since it 118 * is short). At a priority of 0 the scrubber will be starved. 119 */ 120 121 #include <sys/systm.h> /* timeout, types, t_lock */ 122 #include <sys/cmn_err.h> 123 #include <sys/sysmacros.h> /* MIN */ 124 #include <sys/memlist.h> /* memlist */ 125 #include <sys/mem_config.h> /* memory add/delete */ 126 #include <sys/kmem.h> /* KMEM_NOSLEEP */ 127 #include <sys/cpuvar.h> /* ncpus_online */ 128 #include <sys/debug.h> /* ASSERTs */ 129 #include <sys/machsystm.h> /* lddphys */ 130 #include <sys/cpu_module.h> /* vtag_flushpage */ 131 #include <sys/kstat.h> 132 #include <sys/atomic.h> /* atomic_add_32 */ 133 134 #include <vm/hat.h> 135 #include <vm/seg_kmem.h> 136 #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */ 137 138 #include <sys/time.h> 139 #include <sys/callb.h> /* CPR callback */ 140 #include <sys/ontrap.h> 141 142 /* 143 * Should really have paddr_t defined, but it is broken. Use 144 * ms_paddr_t in the meantime to make the code cleaner 145 */ 146 typedef uint64_t ms_paddr_t; 147 148 /* 149 * Global Routines: 150 */ 151 int memscrub_add_span(pfn_t pfn, pgcnt_t pages); 152 int memscrub_delete_span(pfn_t pfn, pgcnt_t pages); 153 int memscrub_init(void); 154 155 /* 156 * Global Data: 157 */ 158 159 /* 160 * scrub if we have at least this many pages 161 */ 162 #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE) 163 164 /* 165 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 166 */ 167 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 168 169 /* 170 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 171 */ 172 #define MEMSCRUB_DFL_SPAN_PAGES ((8 * 1024 * 1024) / PAGESIZE) 173 174 /* 175 * almost anything is higher priority than scrubbing 176 */ 177 #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI 178 179 /* 180 * size used when scanning memory 181 */ 182 #define MEMSCRUB_BLOCK_SIZE 256 183 #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */ 184 #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT) 185 186 #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT 187 #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT 188 #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT 189 #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT 190 191 /* 192 * This message indicates that we have exceeded the limitations of 193 * the memscrubber. See the comments above regarding what would 194 * cause the sleep time to become zero. In DEBUG mode, this message 195 * is logged on the console and in the messages file. In non-DEBUG 196 * mode, it is only logged in the messages file. 197 */ 198 #ifdef DEBUG 199 #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \ 200 "seconds, consuming entire CPU." 201 #else 202 #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \ 203 "seconds, consuming entire CPU." 204 #endif /* DEBUG */ 205 206 /* 207 * we can patch these defaults in /etc/system if necessary 208 */ 209 uint_t disable_memscrub = 0; 210 uint_t pause_memscrub = 0; 211 uint_t read_all_memscrub = 0; 212 uint_t memscrub_verbose = 0; 213 uint_t memscrub_all_idle = 0; 214 uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 215 uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 216 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 217 uint_t memscrub_delay_start_sec = 5 * 60; 218 uint_t memscrub_override_ticks = 1; 219 220 /* 221 * Static Routines 222 */ 223 static void memscrubber(void); 224 static void memscrub_cleanup(void); 225 static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *); 226 static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp); 227 static void memscrub_scan(uint_t blks, ms_paddr_t src); 228 229 /* 230 * Static Data 231 */ 232 233 static struct memlist *memscrub_memlist; 234 static uint_t memscrub_phys_pages; 235 236 static kcondvar_t memscrub_cv; 237 static kmutex_t memscrub_lock; 238 /* 239 * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ... 240 */ 241 static void memscrub_init_mem_config(void); 242 static void memscrub_uninit_mem_config(void); 243 244 /* 245 * Keep track of some interesting statistics 246 */ 247 static struct memscrub_kstats { 248 kstat_named_t done_early; /* ahead of schedule */ 249 kstat_named_t early_sec; /* by cumulative num secs */ 250 kstat_named_t done_late; /* behind schedule */ 251 kstat_named_t late_sec; /* by cumulative num secs */ 252 kstat_named_t interval_ticks; /* num ticks between intervals */ 253 kstat_named_t force_run; /* forced to run, non-timeout */ 254 kstat_named_t errors_found; /* num errors found by memscrub */ 255 } memscrub_counts = { 256 { "done_early", KSTAT_DATA_UINT32 }, 257 { "early_sec", KSTAT_DATA_UINT32 }, 258 { "done_late", KSTAT_DATA_UINT32 }, 259 { "late_sec", KSTAT_DATA_UINT32 }, 260 { "interval_ticks", KSTAT_DATA_UINT32 }, 261 { "force_run", KSTAT_DATA_UINT32 }, 262 { "errors_found", KSTAT_DATA_UINT32 }, 263 }; 264 static struct kstat *memscrub_ksp = (struct kstat *)NULL; 265 266 static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */ 267 268 /* 269 * create memscrub_memlist from phys_install list 270 * initialize locks, set memscrub_phys_pages. 271 */ 272 int 273 memscrub_init(void) 274 { 275 struct memlist *src; 276 277 /* 278 * only startup the scrubber if we have a minimum 279 * number of pages 280 */ 281 if (physinstalled >= MEMSCRUB_MIN_PAGES) { 282 283 /* 284 * initialize locks 285 */ 286 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 287 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 288 289 /* 290 * copy phys_install to memscrub_memlist 291 */ 292 for (src = phys_install; src; src = src->next) { 293 if (memscrub_add_span( 294 (pfn_t)(src->address >> PAGESHIFT), 295 (pgcnt_t)(src->size >> PAGESHIFT))) { 296 memscrub_cleanup(); 297 return (-1); 298 } 299 } 300 301 /* 302 * initialize kstats 303 */ 304 memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat", 305 "misc", KSTAT_TYPE_NAMED, 306 sizeof (memscrub_counts) / sizeof (kstat_named_t), 307 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); 308 309 if (memscrub_ksp) { 310 memscrub_ksp->ks_data = (void *)&memscrub_counts; 311 kstat_install(memscrub_ksp); 312 } else { 313 cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n"); 314 } 315 316 /* 317 * create memscrubber thread 318 */ 319 (void) thread_create(NULL, 0, (void (*)())memscrubber, 320 NULL, 0, &p0, TS_RUN, memscrub_thread_pri); 321 322 /* 323 * We don't want call backs changing the list 324 * if there is no thread running. We do not 325 * attempt to deal with stopping/starting scrubbing 326 * on memory size changes. 327 */ 328 memscrub_init_mem_config(); 329 } 330 331 return (0); 332 } 333 334 static void 335 memscrub_cleanup(void) 336 { 337 memscrub_uninit_mem_config(); 338 while (memscrub_memlist) { 339 (void) memscrub_delete_span( 340 (pfn_t)(memscrub_memlist->address >> PAGESHIFT), 341 (pgcnt_t)(memscrub_memlist->size >> PAGESHIFT)); 342 } 343 if (memscrub_ksp) 344 kstat_delete(memscrub_ksp); 345 cv_destroy(&memscrub_cv); 346 mutex_destroy(&memscrub_lock); 347 } 348 349 #ifdef MEMSCRUB_DEBUG 350 static void 351 memscrub_printmemlist(char *title, struct memlist *listp) 352 { 353 struct memlist *list; 354 355 cmn_err(CE_CONT, "%s:\n", title); 356 357 for (list = listp; list; list = list->next) { 358 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 359 list->address, list->size); 360 } 361 } 362 #endif /* MEMSCRUB_DEBUG */ 363 364 /* ARGSUSED */ 365 static void 366 memscrub_wakeup(void *c) 367 { 368 /* 369 * grab mutex to guarantee that our wakeup call 370 * arrives after we go to sleep -- so we can't sleep forever. 371 */ 372 mutex_enter(&memscrub_lock); 373 cv_signal(&memscrub_cv); 374 mutex_exit(&memscrub_lock); 375 } 376 377 /* 378 * provide an interface external to the memscrubber 379 * which will force the memscrub thread to run vs. 380 * waiting for the timeout, if one is set 381 */ 382 void 383 memscrub_run(void) 384 { 385 memscrub_counts.force_run.value.ui32++; 386 if (memscrub_tid) { 387 (void) untimeout(memscrub_tid); 388 memscrub_wakeup((void *)NULL); 389 } 390 } 391 392 /* 393 * this calculation doesn't account for the time 394 * that the actual scan consumes -- so we'd fall 395 * slightly behind schedule with this interval. 396 * It's very small. 397 */ 398 399 static uint_t 400 compute_interval_ticks(void) 401 { 402 /* 403 * We use msp_safe mpp_safe below to insure somebody 404 * doesn't set memscrub_span_pages or memscrub_phys_pages 405 * to 0 on us. 406 */ 407 static uint_t msp_safe, mpp_safe; 408 static uint_t interval_ticks, period_ticks; 409 msp_safe = memscrub_span_pages; 410 mpp_safe = memscrub_phys_pages; 411 412 period_ticks = memscrub_period_sec * hz; 413 interval_ticks = period_ticks; 414 415 ASSERT(mutex_owned(&memscrub_lock)); 416 417 if ((msp_safe != 0) && (mpp_safe != 0)) { 418 if (memscrub_phys_pages <= msp_safe) { 419 interval_ticks = period_ticks; 420 } else { 421 interval_ticks = (period_ticks / 422 (mpp_safe / msp_safe)); 423 } 424 } 425 return (interval_ticks); 426 } 427 428 void 429 memscrubber(void) 430 { 431 ms_paddr_t address, addr; 432 time_t deadline; 433 pgcnt_t pages; 434 uint_t reached_end = 1; 435 uint_t paused_message = 0; 436 uint_t interval_ticks = 0; 437 uint_t sleep_warn_printed = 0; 438 callb_cpr_t cprinfo; 439 440 /* 441 * notify CPR of our existence 442 */ 443 CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); 444 445 mutex_enter(&memscrub_lock); 446 447 if (memscrub_memlist == NULL) { 448 cmn_err(CE_WARN, "memscrub_memlist not initialized."); 449 goto memscrub_exit; 450 } 451 452 address = memscrub_memlist->address; 453 454 deadline = gethrestime_sec() + memscrub_delay_start_sec; 455 456 for (;;) { 457 if (disable_memscrub) 458 break; 459 460 /* 461 * compute interval_ticks 462 */ 463 interval_ticks = compute_interval_ticks(); 464 465 /* 466 * If the calculated sleep time is zero, and pause_memscrub 467 * has been set, make sure we sleep so that another thread 468 * can acquire memscrub_lock. 469 */ 470 if (interval_ticks == 0 && pause_memscrub) { 471 interval_ticks = hz; 472 } 473 474 /* 475 * And as a fail safe, under normal non-paused operation, do 476 * not allow the sleep time to be zero. 477 */ 478 if (interval_ticks == 0) { 479 interval_ticks = memscrub_override_ticks; 480 if (!sleep_warn_printed) { 481 cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG); 482 sleep_warn_printed = 1; 483 } 484 } 485 486 memscrub_counts.interval_ticks.value.ui32 = interval_ticks; 487 488 /* 489 * Did we just reach the end of memory? If we are at the 490 * end of memory, delay end of memory processing until 491 * pause_memscrub is not set. 492 */ 493 if (reached_end && !pause_memscrub) { 494 time_t now = gethrestime_sec(); 495 496 if (now >= deadline) { 497 memscrub_counts.done_late.value.ui32++; 498 memscrub_counts.late_sec.value.ui32 += 499 (now - deadline); 500 /* 501 * past deadline, start right away 502 */ 503 interval_ticks = 0; 504 505 deadline = now + memscrub_period_sec; 506 } else { 507 /* 508 * we finished ahead of schedule. 509 * wait till previous deadline before re-start. 510 */ 511 interval_ticks = (deadline - now) * hz; 512 memscrub_counts.done_early.value.ui32++; 513 memscrub_counts.early_sec.value.ui32 += 514 (deadline - now); 515 deadline += memscrub_period_sec; 516 } 517 reached_end = 0; 518 sleep_warn_printed = 0; 519 } 520 521 if (interval_ticks != 0) { 522 /* 523 * it is safe from our standpoint for CPR to 524 * suspend the system 525 */ 526 CALLB_CPR_SAFE_BEGIN(&cprinfo); 527 528 /* 529 * hit the snooze bar 530 */ 531 memscrub_tid = timeout(memscrub_wakeup, NULL, 532 interval_ticks); 533 534 /* 535 * go to sleep 536 */ 537 cv_wait(&memscrub_cv, &memscrub_lock); 538 539 /* 540 * at this point, no timeout should be set 541 */ 542 memscrub_tid = 0; 543 544 /* 545 * we need to goto work and will be modifying 546 * our internal state and mapping/unmapping 547 * TTEs 548 */ 549 CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); 550 } 551 552 553 if (memscrub_phys_pages == 0) { 554 cmn_err(CE_WARN, "Memory scrubber has 0 pages to read"); 555 goto memscrub_exit; 556 } 557 558 if (!pause_memscrub) { 559 if (paused_message) { 560 paused_message = 0; 561 if (memscrub_verbose) 562 cmn_err(CE_NOTE, "Memory scrubber " 563 "resuming"); 564 } 565 566 if (read_all_memscrub) { 567 if (memscrub_verbose) 568 cmn_err(CE_NOTE, "Memory scrubber " 569 "reading all memory per request"); 570 571 addr = memscrub_memlist->address; 572 reached_end = 0; 573 while (!reached_end) { 574 if (disable_memscrub) 575 break; 576 pages = memscrub_phys_pages; 577 reached_end = memscrub_verify_span( 578 &addr, &pages); 579 memscrub_scan(pages * 580 MEMSCRUB_BLOCKS_PER_PAGE, addr); 581 addr += ((uint64_t)pages * PAGESIZE); 582 } 583 read_all_memscrub = 0; 584 } 585 586 /* 587 * read 1 span 588 */ 589 pages = memscrub_span_pages; 590 591 if (disable_memscrub) 592 break; 593 594 /* 595 * determine physical address range 596 */ 597 reached_end = memscrub_verify_span(&address, 598 &pages); 599 600 memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE, 601 address); 602 603 address += ((uint64_t)pages * PAGESIZE); 604 } 605 606 if (pause_memscrub && !paused_message) { 607 paused_message = 1; 608 if (memscrub_verbose) 609 cmn_err(CE_NOTE, "Memory scrubber paused"); 610 } 611 } 612 613 memscrub_exit: 614 cmn_err(CE_NOTE, "Memory scrubber exiting"); 615 CALLB_CPR_EXIT(&cprinfo); 616 memscrub_cleanup(); 617 thread_exit(); 618 /* NOTREACHED */ 619 } 620 621 /* 622 * condition address and size 623 * such that they span legal physical addresses. 624 * 625 * when appropriate, address will be rounded up to start of next 626 * struct memlist, and pages will be rounded down to the end of the 627 * memlist size. 628 * 629 * returns 1 if reached end of list, else returns 0. 630 */ 631 static int 632 memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp) 633 { 634 struct memlist *mlp; 635 ms_paddr_t address = *addrp; 636 uint64_t bytes = (uint64_t)*pagesp * PAGESIZE; 637 uint64_t bytes_remaining; 638 int reached_end = 0; 639 640 ASSERT(mutex_owned(&memscrub_lock)); 641 642 /* 643 * find memlist struct that contains addrp 644 * assumes memlist is sorted by ascending address. 645 */ 646 for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->next) { 647 /* 648 * if before this chunk, round up to beginning 649 */ 650 if (address < mlp->address) { 651 address = mlp->address; 652 break; 653 } 654 /* 655 * if before end of chunk, then we found it 656 */ 657 if (address < (mlp->address + mlp->size)) 658 break; 659 660 /* else go to next struct memlist */ 661 } 662 /* 663 * if we hit end of list, start at beginning 664 */ 665 if (mlp == NULL) { 666 mlp = memscrub_memlist; 667 address = mlp->address; 668 } 669 670 /* 671 * now we have legal address, and its mlp, condition bytes 672 */ 673 bytes_remaining = (mlp->address + mlp->size) - address; 674 675 if (bytes > bytes_remaining) 676 bytes = bytes_remaining; 677 678 /* 679 * will this span take us to end of list? 680 */ 681 if ((mlp->next == NULL) && 682 ((mlp->address + mlp->size) == (address + bytes))) 683 reached_end = 1; 684 685 /* return values */ 686 *addrp = address; 687 *pagesp = bytes / PAGESIZE; 688 689 return (reached_end); 690 } 691 692 /* 693 * add a span to the memscrub list 694 * add to memscrub_phys_pages 695 */ 696 int 697 memscrub_add_span(pfn_t pfn, pgcnt_t pages) 698 { 699 #ifdef MEMSCRUB_DEBUG 700 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 701 uint64_t bytes = (uint64_t)pages << PAGESHIFT; 702 #endif /* MEMSCRUB_DEBUG */ 703 704 int retval; 705 706 mutex_enter(&memscrub_lock); 707 708 #ifdef MEMSCRUB_DEBUG 709 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 710 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 711 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 712 " size: 0x%llx\n", address, bytes); 713 #endif /* MEMSCRUB_DEBUG */ 714 715 retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist, 716 &memscrub_phys_pages); 717 718 #ifdef MEMSCRUB_DEBUG 719 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 720 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 721 #endif /* MEMSCRUB_DEBUG */ 722 723 mutex_exit(&memscrub_lock); 724 725 return (retval); 726 } 727 728 static int 729 memscrub_add_span_gen( 730 pfn_t pfn, 731 pgcnt_t pages, 732 struct memlist **list, 733 uint_t *npgs) 734 { 735 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 736 uint64_t bytes = (uint64_t)pages << PAGESHIFT; 737 struct memlist *dst; 738 struct memlist *prev, *next; 739 int retval = 0; 740 741 /* 742 * allocate a new struct memlist 743 */ 744 745 dst = (struct memlist *) 746 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 747 748 if (dst == NULL) { 749 retval = -1; 750 goto add_done; 751 } 752 753 dst->address = address; 754 dst->size = bytes; 755 756 /* 757 * first insert 758 */ 759 if (*list == NULL) { 760 dst->prev = NULL; 761 dst->next = NULL; 762 *list = dst; 763 764 goto add_done; 765 } 766 767 /* 768 * insert into sorted list 769 */ 770 for (prev = NULL, next = *list; 771 next != NULL; 772 prev = next, next = next->next) { 773 if (address > (next->address + next->size)) 774 continue; 775 776 /* 777 * else insert here 778 */ 779 780 /* 781 * prepend to next 782 */ 783 if ((address + bytes) == next->address) { 784 kmem_free(dst, sizeof (struct memlist)); 785 786 next->address = address; 787 next->size += bytes; 788 789 goto add_done; 790 } 791 792 /* 793 * append to next 794 */ 795 if (address == (next->address + next->size)) { 796 kmem_free(dst, sizeof (struct memlist)); 797 798 if (next->next) { 799 /* 800 * don't overlap with next->next 801 */ 802 if ((address + bytes) > next->next->address) { 803 retval = -1; 804 goto add_done; 805 } 806 /* 807 * concatenate next and next->next 808 */ 809 if ((address + bytes) == next->next->address) { 810 struct memlist *mlp = next->next; 811 812 if (next == *list) 813 *list = next->next; 814 815 mlp->address = next->address; 816 mlp->size += next->size; 817 mlp->size += bytes; 818 819 if (next->prev) 820 next->prev->next = mlp; 821 mlp->prev = next->prev; 822 823 kmem_free(next, 824 sizeof (struct memlist)); 825 goto add_done; 826 } 827 } 828 829 next->size += bytes; 830 831 goto add_done; 832 } 833 834 /* don't overlap with next */ 835 if ((address + bytes) > next->address) { 836 retval = -1; 837 kmem_free(dst, sizeof (struct memlist)); 838 goto add_done; 839 } 840 841 /* 842 * insert before next 843 */ 844 dst->prev = prev; 845 dst->next = next; 846 next->prev = dst; 847 if (prev == NULL) { 848 *list = dst; 849 } else { 850 prev->next = dst; 851 } 852 goto add_done; 853 } /* end for */ 854 855 /* 856 * end of list, prev is valid and next is NULL 857 */ 858 prev->next = dst; 859 dst->prev = prev; 860 dst->next = NULL; 861 862 add_done: 863 864 if (retval != -1) 865 *npgs += pages; 866 867 return (retval); 868 } 869 870 /* 871 * delete a span from the memscrub list 872 * subtract from memscrub_phys_pages 873 */ 874 int 875 memscrub_delete_span(pfn_t pfn, pgcnt_t pages) 876 { 877 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 878 uint64_t bytes = (uint64_t)pages << PAGESHIFT; 879 struct memlist *dst, *next; 880 int retval = 0; 881 882 mutex_enter(&memscrub_lock); 883 884 #ifdef MEMSCRUB_DEBUG 885 memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist); 886 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 887 cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n", 888 address, bytes); 889 #endif /* MEMSCRUB_DEBUG */ 890 891 /* 892 * find struct memlist containing page 893 */ 894 for (next = memscrub_memlist; next != NULL; next = next->next) { 895 if ((address >= next->address) && 896 (address < next->address + next->size)) 897 break; 898 } 899 900 /* 901 * if start address not in list 902 */ 903 if (next == NULL) { 904 retval = -1; 905 goto delete_done; 906 } 907 908 /* 909 * error if size goes off end of this struct memlist 910 */ 911 if (address + bytes > next->address + next->size) { 912 retval = -1; 913 goto delete_done; 914 } 915 916 /* 917 * pages at beginning of struct memlist 918 */ 919 if (address == next->address) { 920 /* 921 * if start & size match, delete from list 922 */ 923 if (bytes == next->size) { 924 if (next == memscrub_memlist) 925 memscrub_memlist = next->next; 926 if (next->prev != NULL) 927 next->prev->next = next->next; 928 if (next->next != NULL) 929 next->next->prev = next->prev; 930 931 kmem_free(next, sizeof (struct memlist)); 932 } else { 933 /* 934 * increment start address by bytes 935 */ 936 next->address += bytes; 937 next->size -= bytes; 938 } 939 goto delete_done; 940 } 941 942 /* 943 * pages at end of struct memlist 944 */ 945 if (address + bytes == next->address + next->size) { 946 /* 947 * decrement size by bytes 948 */ 949 next->size -= bytes; 950 goto delete_done; 951 } 952 953 /* 954 * delete a span in the middle of the struct memlist 955 */ 956 { 957 /* 958 * create a new struct memlist 959 */ 960 dst = (struct memlist *) 961 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 962 963 if (dst == NULL) { 964 retval = -1; 965 goto delete_done; 966 } 967 968 /* 969 * existing struct memlist gets address 970 * and size up to pfn 971 */ 972 dst->address = address + bytes; 973 dst->size = (next->address + next->size) - dst->address; 974 next->size = address - next->address; 975 976 /* 977 * new struct memlist gets address starting 978 * after pfn, until end 979 */ 980 981 /* 982 * link in new memlist after old 983 */ 984 dst->next = next->next; 985 dst->prev = next; 986 987 if (next->next != NULL) 988 next->next->prev = dst; 989 next->next = dst; 990 } 991 992 delete_done: 993 if (retval != -1) { 994 memscrub_phys_pages -= pages; 995 if (memscrub_phys_pages == 0) 996 disable_memscrub = 1; 997 } 998 999 #ifdef MEMSCRUB_DEBUG 1000 memscrub_printmemlist("memscrub_memlist After", memscrub_memlist); 1001 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 1002 #endif /* MEMSCRUB_DEBUG */ 1003 1004 mutex_exit(&memscrub_lock); 1005 return (retval); 1006 } 1007 1008 static void 1009 memscrub_scan(uint_t blks, ms_paddr_t src) 1010 { 1011 uint_t psz, bpp, pgsread; 1012 pfn_t pfn; 1013 ms_paddr_t pa; 1014 caddr_t va; 1015 on_trap_data_t otd; 1016 1017 extern void memscrub_read(caddr_t src, uint_t blks); 1018 1019 ASSERT(mutex_owned(&memscrub_lock)); 1020 1021 pgsread = 0; 1022 pa = src; 1023 1024 while (blks != 0) { 1025 /* Ensure the PA is properly aligned */ 1026 if (((pa & MMU_PAGEMASK4M) == pa) && 1027 (blks >= MEMSCRUB_BPP4M)) { 1028 psz = MMU_PAGESIZE4M; 1029 bpp = MEMSCRUB_BPP4M; 1030 } else if (((pa & MMU_PAGEMASK512K) == pa) && 1031 (blks >= MEMSCRUB_BPP512K)) { 1032 psz = MMU_PAGESIZE512K; 1033 bpp = MEMSCRUB_BPP512K; 1034 } else if (((pa & MMU_PAGEMASK64K) == pa) && 1035 (blks >= MEMSCRUB_BPP64K)) { 1036 psz = MMU_PAGESIZE64K; 1037 bpp = MEMSCRUB_BPP64K; 1038 } else if ((pa & MMU_PAGEMASK) == pa) { 1039 psz = MMU_PAGESIZE; 1040 bpp = MEMSCRUB_BPP; 1041 } else { 1042 if (memscrub_verbose) { 1043 cmn_err(CE_NOTE, "Memory scrubber ignoring " 1044 "non-page aligned block starting at 0x%" 1045 PRIx64, src); 1046 } 1047 return; 1048 } 1049 if (blks < bpp) bpp = blks; 1050 1051 #ifdef MEMSCRUB_DEBUG 1052 cmn_err(CE_NOTE, "Going to run psz=%x, " 1053 "bpp=%x pa=%llx\n", psz, bpp, pa); 1054 #endif /* MEMSCRUB_DEBUG */ 1055 1056 /* 1057 * MEMSCRUBBASE is a 4MB aligned page in the 1058 * kernel so that we can quickly map the PA 1059 * to a VA for the block loads performed in 1060 * memscrub_read. 1061 */ 1062 pfn = mmu_btop(pa); 1063 va = (caddr_t)MEMSCRUBBASE; 1064 hat_devload(kas.a_hat, va, psz, pfn, PROT_READ, 1065 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 1066 1067 /* 1068 * Can't allow the memscrubber to migrate across CPUs as 1069 * we need to know whether CEEN is enabled for the current 1070 * CPU to enable us to scrub the memory. Don't use 1071 * kpreempt_disable as the time we take to scan a span (even 1072 * without cpu_check_ce having to manually cpu_check_block) 1073 * is too long to hold a higher priority thread (eg, RT) 1074 * off cpu. 1075 */ 1076 thread_affinity_set(curthread, CPU_CURRENT); 1077 1078 /* 1079 * Protect read scrub from async faults. For now, we simply 1080 * maintain a count of such faults caught. 1081 */ 1082 1083 if (!on_trap(&otd, OT_DATA_EC)) { 1084 memscrub_read(va, bpp); 1085 /* 1086 * Check if CEs require logging 1087 */ 1088 cpu_check_ce(SCRUBBER_CEEN_CHECK, 1089 (uint64_t)pa, va, psz); 1090 no_trap(); 1091 thread_affinity_clear(curthread); 1092 } else { 1093 no_trap(); 1094 thread_affinity_clear(curthread); 1095 1096 /* 1097 * Got an async error.. 1098 * Try rescanning it at MMU_PAGESIZE 1099 * granularity if we were trying to 1100 * read at a larger page size. 1101 * This is to ensure we continue to 1102 * scan the rest of the span. 1103 */ 1104 if (psz > MMU_PAGESIZE) { 1105 caddr_t vaddr = va; 1106 ms_paddr_t paddr = pa; 1107 int tmp = 0; 1108 for (; tmp < bpp; tmp += MEMSCRUB_BPP) { 1109 thread_affinity_set(curthread, CPU_CURRENT); 1110 if (!on_trap(&otd, OT_DATA_EC)) { 1111 memscrub_read(vaddr, MEMSCRUB_BPP); 1112 cpu_check_ce(SCRUBBER_CEEN_CHECK, 1113 (uint64_t)paddr, vaddr, MMU_PAGESIZE); 1114 no_trap(); 1115 } else { 1116 no_trap(); 1117 memscrub_counts.errors_found.value.ui32++; 1118 } 1119 thread_affinity_clear(curthread); 1120 vaddr += MMU_PAGESIZE; 1121 paddr += MMU_PAGESIZE; 1122 } 1123 } 1124 } 1125 hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK); 1126 1127 blks -= bpp; 1128 pa += psz; 1129 pgsread++; 1130 } 1131 if (memscrub_verbose) { 1132 cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting " 1133 "at 0x%" PRIx64, pgsread, src); 1134 } 1135 } 1136 1137 /* 1138 * The memory add/delete callback mechanism does not pass in the 1139 * page ranges. The phys_install list has been updated though, so 1140 * create a new scrub list from it. 1141 */ 1142 1143 static int 1144 new_memscrub() 1145 { 1146 struct memlist *src, *list, *old_list; 1147 uint_t npgs; 1148 1149 /* 1150 * copy phys_install to memscrub_memlist 1151 */ 1152 list = NULL; 1153 npgs = 0; 1154 memlist_read_lock(); 1155 for (src = phys_install; src; src = src->next) { 1156 if (memscrub_add_span_gen((pfn_t)(src->address >> PAGESHIFT), 1157 (pgcnt_t)(src->size >> PAGESHIFT), &list, &npgs)) { 1158 memlist_read_unlock(); 1159 while (list) { 1160 struct memlist *el; 1161 1162 el = list; 1163 list = list->next; 1164 kmem_free(el, sizeof (struct memlist)); 1165 } 1166 return (-1); 1167 } 1168 } 1169 memlist_read_unlock(); 1170 1171 mutex_enter(&memscrub_lock); 1172 memscrub_phys_pages = npgs; 1173 old_list = memscrub_memlist; 1174 memscrub_memlist = list; 1175 mutex_exit(&memscrub_lock); 1176 1177 while (old_list) { 1178 struct memlist *el; 1179 1180 el = old_list; 1181 old_list = old_list->next; 1182 kmem_free(el, sizeof (struct memlist)); 1183 } 1184 return (0); 1185 } 1186 1187 /*ARGSUSED*/ 1188 static void 1189 memscrub_mem_config_post_add( 1190 void *arg, 1191 pgcnt_t delta_pages) 1192 { 1193 /* 1194 * We increment pause_memscrub before entering new_memscrub(). This 1195 * will force the memscrubber to sleep, allowing the DR callback 1196 * thread to acquire memscrub_lock in new_memscrub(). The use of 1197 * atomic_add_32() allows concurrent memory DR operations to use the 1198 * callbacks safely. 1199 */ 1200 atomic_add_32(&pause_memscrub, 1); 1201 ASSERT(pause_memscrub != 0); 1202 1203 /* 1204 * "Don't care" if we are not scrubbing new memory. 1205 */ 1206 (void) new_memscrub(); 1207 1208 /* Restore the pause setting. */ 1209 atomic_add_32(&pause_memscrub, -1); 1210 } 1211 1212 /*ARGSUSED*/ 1213 static int 1214 memscrub_mem_config_pre_del( 1215 void *arg, 1216 pgcnt_t delta_pages) 1217 { 1218 /* Nothing to do. */ 1219 return (0); 1220 } 1221 1222 /*ARGSUSED*/ 1223 static void 1224 memscrub_mem_config_post_del( 1225 void *arg, 1226 pgcnt_t delta_pages, 1227 int cancelled) 1228 { 1229 /* 1230 * We increment pause_memscrub before entering new_memscrub(). This 1231 * will force the memscrubber to sleep, allowing the DR callback 1232 * thread to acquire memscrub_lock in new_memscrub(). The use of 1233 * atomic_add_32() allows concurrent memory DR operations to use the 1234 * callbacks safely. 1235 */ 1236 atomic_add_32(&pause_memscrub, 1); 1237 ASSERT(pause_memscrub != 0); 1238 1239 /* 1240 * Must stop scrubbing deleted memory as it may be disconnected. 1241 */ 1242 if (new_memscrub()) { 1243 disable_memscrub = 1; 1244 } 1245 1246 /* Restore the pause setting. */ 1247 atomic_add_32(&pause_memscrub, -1); 1248 } 1249 1250 static kphysm_setup_vector_t memscrub_mem_config_vec = { 1251 KPHYSM_SETUP_VECTOR_VERSION, 1252 memscrub_mem_config_post_add, 1253 memscrub_mem_config_pre_del, 1254 memscrub_mem_config_post_del, 1255 }; 1256 1257 static void 1258 memscrub_init_mem_config() 1259 { 1260 int ret; 1261 1262 ret = kphysm_setup_func_register(&memscrub_mem_config_vec, 1263 (void *)NULL); 1264 ASSERT(ret == 0); 1265 } 1266 1267 static void 1268 memscrub_uninit_mem_config() 1269 { 1270 /* This call is OK if the register call was not done. */ 1271 kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL); 1272 } 1273