1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * sun4u Memory Scrubbing 28 * 29 * On detection of a correctable memory ECC error, the sun4u kernel 30 * returns the corrected data to the requester and re-writes it 31 * to memory (DRAM). So if the correctable error was transient, 32 * the read has effectively been cleaned (scrubbed) from memory. 33 * 34 * Scrubbing thus reduces the likelyhood that multiple transient errors 35 * will occur in the same memory word, making uncorrectable errors due 36 * to transients less likely. 37 * 38 * Thus is born the desire that every memory location be periodically 39 * accessed. 40 * 41 * This file implements a memory scrubbing thread. This scrubber 42 * guarantees that all of physical memory is accessed periodically 43 * (memscrub_period_sec -- 12 hours). 44 * 45 * It attempts to do this as unobtrusively as possible. The thread 46 * schedules itself to wake up at an interval such that if it reads 47 * memscrub_span_pages (32MB) on each wakeup, it will read all of physical 48 * memory in in memscrub_period_sec (12 hours). 49 * 50 * The scrubber uses the block load and prefetch hardware to read memory 51 * @ 1300MB/s, so it reads spans of 32MB in 0.025 seconds. Unlike the 52 * original sun4d scrubber the sun4u scrubber does not read ahead if the 53 * system is idle because we can read memory very efficently. 54 * 55 * The scrubber maintains a private copy of the phys_install memory list 56 * to keep track of what memory should be scrubbed. 57 * 58 * The global routines memscrub_add_span() and memscrub_delete_span() are 59 * used to add and delete from this list. If hotplug memory is later 60 * supported these two routines can be used to notify the scrubber of 61 * memory configuration changes. 62 * 63 * The following parameters can be set via /etc/system 64 * 65 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB) 66 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 67 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) 68 * memscrub_delay_start_sec = (5 minutes) 69 * memscrub_verbose = (0) 70 * memscrub_override_ticks = (1 tick) 71 * disable_memscrub = (0) 72 * pause_memscrub = (0) 73 * read_all_memscrub = (0) 74 * 75 * The scrubber will print NOTICE messages of what it is doing if 76 * "memscrub_verbose" is set. 77 * 78 * If the scrubber's sleep time calculation drops to zero ticks, 79 * memscrub_override_ticks will be used as the sleep time instead. The 80 * sleep time should only drop to zero on a system with over 131.84 81 * terabytes of memory, or where the default scrubber parameters have 82 * been adjusted. For example, reducing memscrub_span_pages or 83 * memscrub_period_sec causes the sleep time to drop to zero with less 84 * memory. Note that since the sleep time is calculated in clock ticks, 85 * using hires clock ticks allows for more memory before the sleep time 86 * becomes zero. 87 * 88 * The scrubber will exit (or never be started) if it finds the variable 89 * "disable_memscrub" set. 90 * 91 * The scrubber will pause (not read memory) when "pause_memscrub" 92 * is set. It will check the state of pause_memscrub at each wakeup 93 * period. The scrubber will not make up for lost time. If you 94 * pause the scrubber for a prolonged period of time you can use 95 * the "read_all_memscrub" switch (see below) to catch up. In addition, 96 * pause_memscrub is used internally by the post memory DR callbacks. 97 * It is set for the small period of time during which the callbacks 98 * are executing. This ensures "memscrub_lock" will be released, 99 * allowing the callbacks to finish. 100 * 101 * The scrubber will read all memory if "read_all_memscrub" is set. 102 * The normal span read will also occur during the wakeup. 103 * 104 * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system 105 * must have before we'll start the scrubber. 106 * 107 * MEMSCRUB_DFL_SPAN_PAGES (32MB) is based on the guess that 0.025 sec 108 * is a "good" amount of minimum time for the thread to run at a time. 109 * 110 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 111 * twice the frequency the hardware folk estimated would be necessary. 112 * 113 * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption 114 * that the scurbber should get its fair share of time (since it 115 * is short). At a priority of 0 the scrubber will be starved. 116 */ 117 118 #include <sys/systm.h> /* timeout, types, t_lock */ 119 #include <sys/cmn_err.h> 120 #include <sys/sysmacros.h> /* MIN */ 121 #include <sys/memlist.h> /* memlist */ 122 #include <sys/mem_config.h> /* memory add/delete */ 123 #include <sys/kmem.h> /* KMEM_NOSLEEP */ 124 #include <sys/cpuvar.h> /* ncpus_online */ 125 #include <sys/debug.h> /* ASSERTs */ 126 #include <sys/machsystm.h> /* lddphys */ 127 #include <sys/cpu_module.h> /* vtag_flushpage */ 128 #include <sys/kstat.h> 129 #include <sys/atomic.h> /* atomic_add_32 */ 130 131 #include <vm/hat.h> 132 #include <vm/seg_kmem.h> 133 #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */ 134 135 #include <sys/time.h> 136 #include <sys/callb.h> /* CPR callback */ 137 #include <sys/ontrap.h> 138 139 /* 140 * Should really have paddr_t defined, but it is broken. Use 141 * ms_paddr_t in the meantime to make the code cleaner 142 */ 143 typedef uint64_t ms_paddr_t; 144 145 /* 146 * Global Routines: 147 */ 148 int memscrub_add_span(pfn_t pfn, pgcnt_t pages); 149 int memscrub_delete_span(pfn_t pfn, pgcnt_t pages); 150 int memscrub_init(void); 151 void memscrub_induced_error(void); 152 153 /* 154 * Global Data: 155 */ 156 157 /* 158 * scrub if we have at least this many pages 159 */ 160 #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE) 161 162 /* 163 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 164 */ 165 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 166 167 /* 168 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 169 */ 170 #define MEMSCRUB_DFL_SPAN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 171 172 /* 173 * almost anything is higher priority than scrubbing 174 */ 175 #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI 176 177 /* 178 * size used when scanning memory 179 */ 180 #define MEMSCRUB_BLOCK_SIZE 256 181 #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */ 182 #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT) 183 184 #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT 185 #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT 186 #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT 187 #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT 188 189 /* 190 * This message indicates that we have exceeded the limitations of 191 * the memscrubber. See the comments above regarding what would 192 * cause the sleep time to become zero. In DEBUG mode, this message 193 * is logged on the console and in the messages file. In non-DEBUG 194 * mode, it is only logged in the messages file. 195 */ 196 #ifdef DEBUG 197 #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \ 198 "seconds, consuming entire CPU." 199 #else 200 #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \ 201 "seconds, consuming entire CPU." 202 #endif /* DEBUG */ 203 204 /* 205 * we can patch these defaults in /etc/system if necessary 206 */ 207 uint_t disable_memscrub = 0; 208 uint_t pause_memscrub = 0; 209 uint_t read_all_memscrub = 0; 210 uint_t memscrub_verbose = 0; 211 uint_t memscrub_all_idle = 0; 212 uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 213 uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 214 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 215 uint_t memscrub_delay_start_sec = 5 * 60; 216 uint_t memscrub_override_ticks = 1; 217 218 /* 219 * Static Routines 220 */ 221 static void memscrubber(void); 222 static void memscrub_cleanup(void); 223 static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *); 224 static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp); 225 static void memscrub_scan(uint_t blks, ms_paddr_t src); 226 227 /* 228 * Static Data 229 */ 230 231 static struct memlist *memscrub_memlist; 232 static uint_t memscrub_phys_pages; 233 234 static kcondvar_t memscrub_cv; 235 static kmutex_t memscrub_lock; 236 /* 237 * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ... 238 */ 239 static void memscrub_init_mem_config(void); 240 static void memscrub_uninit_mem_config(void); 241 242 /* 243 * Linked list of memscrub aware spans having retired pages. 244 * Currently enabled only on sun4u USIII-based platforms. 245 */ 246 typedef struct memscrub_page_retire_span { 247 ms_paddr_t address; 248 struct memscrub_page_retire_span *next; 249 } memscrub_page_retire_span_t; 250 251 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL; 252 253 static void memscrub_page_retire_span_add(ms_paddr_t); 254 static void memscrub_page_retire_span_delete(ms_paddr_t); 255 static int memscrub_page_retire_span_search(ms_paddr_t); 256 static void memscrub_page_retire_span_list_update(void); 257 258 /* 259 * add_to_page_retire_list: Set by cpu_async_log_err() routine 260 * by calling memscrub_induced_error() when CE/UE occurs on a retired 261 * page due to memscrub reading. Cleared by memscrub after updating 262 * global page retire span list. Piggybacking on protection of 263 * memscrub_lock, which is held during set and clear. 264 * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running 265 * on softint context, which gets fired on a cpu memscrub thread currently 266 * running. Memscrub thread has affinity set during memscrub_read(), hence 267 * migration to new cpu not expected. 268 */ 269 static int add_to_page_retire_list = 0; 270 271 /* 272 * Keep track of some interesting statistics 273 */ 274 static struct memscrub_kstats { 275 kstat_named_t done_early; /* ahead of schedule */ 276 kstat_named_t early_sec; /* by cumulative num secs */ 277 kstat_named_t done_late; /* behind schedule */ 278 kstat_named_t late_sec; /* by cumulative num secs */ 279 kstat_named_t interval_ticks; /* num ticks between intervals */ 280 kstat_named_t force_run; /* forced to run, non-timeout */ 281 kstat_named_t errors_found; /* num errors found by memscrub */ 282 } memscrub_counts = { 283 { "done_early", KSTAT_DATA_UINT32 }, 284 { "early_sec", KSTAT_DATA_UINT32 }, 285 { "done_late", KSTAT_DATA_UINT32 }, 286 { "late_sec", KSTAT_DATA_UINT32 }, 287 { "interval_ticks", KSTAT_DATA_UINT32 }, 288 { "force_run", KSTAT_DATA_UINT32 }, 289 { "errors_found", KSTAT_DATA_UINT32 }, 290 }; 291 292 #define MEMSCRUB_STAT_INC(stat) memscrub_counts.stat.value.ui32++ 293 #define MEMSCRUB_STAT_SET(stat, val) memscrub_counts.stat.value.ui32 = (val) 294 #define MEMSCRUB_STAT_NINC(stat, val) memscrub_counts.stat.value.ui32 += (val) 295 296 static struct kstat *memscrub_ksp = (struct kstat *)NULL; 297 298 static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */ 299 300 /* 301 * create memscrub_memlist from phys_install list 302 * initialize locks, set memscrub_phys_pages. 303 */ 304 int 305 memscrub_init(void) 306 { 307 struct memlist *src; 308 309 /* 310 * only startup the scrubber if we have a minimum 311 * number of pages 312 */ 313 if (physinstalled >= MEMSCRUB_MIN_PAGES) { 314 315 /* 316 * initialize locks 317 */ 318 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 319 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 320 321 /* 322 * copy phys_install to memscrub_memlist 323 */ 324 for (src = phys_install; src; src = src->ml_next) { 325 if (memscrub_add_span( 326 (pfn_t)(src->ml_address >> PAGESHIFT), 327 (pgcnt_t)(src->ml_size >> PAGESHIFT))) { 328 memscrub_cleanup(); 329 return (-1); 330 } 331 } 332 333 /* 334 * initialize kstats 335 */ 336 memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat", 337 "misc", KSTAT_TYPE_NAMED, 338 sizeof (memscrub_counts) / sizeof (kstat_named_t), 339 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); 340 341 if (memscrub_ksp) { 342 memscrub_ksp->ks_data = (void *)&memscrub_counts; 343 kstat_install(memscrub_ksp); 344 } else { 345 cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n"); 346 } 347 348 /* 349 * create memscrubber thread 350 */ 351 (void) thread_create(NULL, 0, (void (*)())memscrubber, 352 NULL, 0, &p0, TS_RUN, memscrub_thread_pri); 353 354 /* 355 * We don't want call backs changing the list 356 * if there is no thread running. We do not 357 * attempt to deal with stopping/starting scrubbing 358 * on memory size changes. 359 */ 360 memscrub_init_mem_config(); 361 } 362 363 return (0); 364 } 365 366 static void 367 memscrub_cleanup(void) 368 { 369 memscrub_uninit_mem_config(); 370 while (memscrub_memlist) { 371 (void) memscrub_delete_span( 372 (pfn_t)(memscrub_memlist->ml_address >> PAGESHIFT), 373 (pgcnt_t)(memscrub_memlist->ml_size >> PAGESHIFT)); 374 } 375 if (memscrub_ksp) 376 kstat_delete(memscrub_ksp); 377 cv_destroy(&memscrub_cv); 378 mutex_destroy(&memscrub_lock); 379 } 380 381 #ifdef MEMSCRUB_DEBUG 382 static void 383 memscrub_printmemlist(char *title, struct memlist *listp) 384 { 385 struct memlist *list; 386 387 cmn_err(CE_CONT, "%s:\n", title); 388 389 for (list = listp; list; list = list->ml_next) { 390 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 391 list->ml_address, list->ml_size); 392 } 393 } 394 #endif /* MEMSCRUB_DEBUG */ 395 396 /* ARGSUSED */ 397 static void 398 memscrub_wakeup(void *c) 399 { 400 /* 401 * grab mutex to guarantee that our wakeup call 402 * arrives after we go to sleep -- so we can't sleep forever. 403 */ 404 mutex_enter(&memscrub_lock); 405 cv_signal(&memscrub_cv); 406 mutex_exit(&memscrub_lock); 407 } 408 409 /* 410 * provide an interface external to the memscrubber 411 * which will force the memscrub thread to run vs. 412 * waiting for the timeout, if one is set 413 */ 414 void 415 memscrub_run(void) 416 { 417 MEMSCRUB_STAT_INC(force_run); 418 if (memscrub_tid) { 419 (void) untimeout(memscrub_tid); 420 memscrub_wakeup((void *)NULL); 421 } 422 } 423 424 /* 425 * this calculation doesn't account for the time 426 * that the actual scan consumes -- so we'd fall 427 * slightly behind schedule with this interval. 428 * It's very small. 429 */ 430 431 static uint_t 432 compute_interval_ticks(void) 433 { 434 /* 435 * We use msp_safe mpp_safe below to insure somebody 436 * doesn't set memscrub_span_pages or memscrub_phys_pages 437 * to 0 on us. 438 */ 439 static uint_t msp_safe, mpp_safe; 440 static uint_t interval_ticks, period_ticks; 441 msp_safe = memscrub_span_pages; 442 mpp_safe = memscrub_phys_pages; 443 444 period_ticks = memscrub_period_sec * hz; 445 interval_ticks = period_ticks; 446 447 ASSERT(mutex_owned(&memscrub_lock)); 448 449 if ((msp_safe != 0) && (mpp_safe != 0)) { 450 if (memscrub_phys_pages <= msp_safe) { 451 interval_ticks = period_ticks; 452 } else { 453 interval_ticks = (period_ticks / 454 (mpp_safe / msp_safe)); 455 } 456 } 457 return (interval_ticks); 458 } 459 460 void 461 memscrubber(void) 462 { 463 ms_paddr_t address, addr; 464 time_t deadline; 465 pgcnt_t pages; 466 uint_t reached_end = 1; 467 uint_t paused_message = 0; 468 uint_t interval_ticks = 0; 469 uint_t sleep_warn_printed = 0; 470 callb_cpr_t cprinfo; 471 472 /* 473 * notify CPR of our existence 474 */ 475 CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); 476 477 mutex_enter(&memscrub_lock); 478 479 if (memscrub_memlist == NULL) { 480 cmn_err(CE_WARN, "memscrub_memlist not initialized."); 481 goto memscrub_exit; 482 } 483 484 address = memscrub_memlist->ml_address; 485 486 deadline = gethrestime_sec() + memscrub_delay_start_sec; 487 488 for (;;) { 489 if (disable_memscrub) 490 break; 491 492 /* 493 * compute interval_ticks 494 */ 495 interval_ticks = compute_interval_ticks(); 496 497 /* 498 * If the calculated sleep time is zero, and pause_memscrub 499 * has been set, make sure we sleep so that another thread 500 * can acquire memscrub_lock. 501 */ 502 if (interval_ticks == 0 && pause_memscrub) { 503 interval_ticks = hz; 504 } 505 506 /* 507 * And as a fail safe, under normal non-paused operation, do 508 * not allow the sleep time to be zero. 509 */ 510 if (interval_ticks == 0) { 511 interval_ticks = memscrub_override_ticks; 512 if (!sleep_warn_printed) { 513 cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG); 514 sleep_warn_printed = 1; 515 } 516 } 517 518 MEMSCRUB_STAT_SET(interval_ticks, interval_ticks); 519 520 /* 521 * Did we just reach the end of memory? If we are at the 522 * end of memory, delay end of memory processing until 523 * pause_memscrub is not set. 524 */ 525 if (reached_end && !pause_memscrub) { 526 time_t now = gethrestime_sec(); 527 528 if (now >= deadline) { 529 MEMSCRUB_STAT_INC(done_late); 530 MEMSCRUB_STAT_NINC(late_sec, now - deadline); 531 /* 532 * past deadline, start right away 533 */ 534 interval_ticks = 0; 535 536 deadline = now + memscrub_period_sec; 537 } else { 538 /* 539 * we finished ahead of schedule. 540 * wait till previous deadline before re-start. 541 */ 542 interval_ticks = (deadline - now) * hz; 543 MEMSCRUB_STAT_INC(done_early); 544 MEMSCRUB_STAT_NINC(early_sec, deadline - now); 545 deadline += memscrub_period_sec; 546 } 547 reached_end = 0; 548 sleep_warn_printed = 0; 549 } 550 551 if (interval_ticks != 0) { 552 /* 553 * it is safe from our standpoint for CPR to 554 * suspend the system 555 */ 556 CALLB_CPR_SAFE_BEGIN(&cprinfo); 557 558 /* 559 * hit the snooze bar 560 */ 561 memscrub_tid = timeout(memscrub_wakeup, NULL, 562 interval_ticks); 563 564 /* 565 * go to sleep 566 */ 567 cv_wait(&memscrub_cv, &memscrub_lock); 568 569 /* 570 * at this point, no timeout should be set 571 */ 572 memscrub_tid = 0; 573 574 /* 575 * we need to goto work and will be modifying 576 * our internal state and mapping/unmapping 577 * TTEs 578 */ 579 CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); 580 } 581 582 583 if (memscrub_phys_pages == 0) { 584 cmn_err(CE_WARN, "Memory scrubber has 0 pages to read"); 585 goto memscrub_exit; 586 } 587 588 if (!pause_memscrub) { 589 if (paused_message) { 590 paused_message = 0; 591 if (memscrub_verbose) 592 cmn_err(CE_NOTE, "Memory scrubber " 593 "resuming"); 594 } 595 596 if (read_all_memscrub) { 597 if (memscrub_verbose) 598 cmn_err(CE_NOTE, "Memory scrubber " 599 "reading all memory per request"); 600 601 addr = memscrub_memlist->ml_address; 602 reached_end = 0; 603 while (!reached_end) { 604 if (disable_memscrub) 605 break; 606 pages = memscrub_phys_pages; 607 reached_end = memscrub_verify_span( 608 &addr, &pages); 609 memscrub_scan(pages * 610 MEMSCRUB_BLOCKS_PER_PAGE, addr); 611 addr += ((uint64_t)pages * PAGESIZE); 612 } 613 read_all_memscrub = 0; 614 } 615 616 /* 617 * read 1 span 618 */ 619 pages = memscrub_span_pages; 620 621 if (disable_memscrub) 622 break; 623 624 /* 625 * determine physical address range 626 */ 627 reached_end = memscrub_verify_span(&address, 628 &pages); 629 630 memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE, 631 address); 632 633 address += ((uint64_t)pages * PAGESIZE); 634 } 635 636 if (pause_memscrub && !paused_message) { 637 paused_message = 1; 638 if (memscrub_verbose) 639 cmn_err(CE_NOTE, "Memory scrubber paused"); 640 } 641 } 642 643 memscrub_exit: 644 cmn_err(CE_NOTE, "Memory scrubber exiting"); 645 CALLB_CPR_EXIT(&cprinfo); 646 memscrub_cleanup(); 647 thread_exit(); 648 /* NOTREACHED */ 649 } 650 651 /* 652 * condition address and size 653 * such that they span legal physical addresses. 654 * 655 * when appropriate, address will be rounded up to start of next 656 * struct memlist, and pages will be rounded down to the end of the 657 * memlist size. 658 * 659 * returns 1 if reached end of list, else returns 0. 660 */ 661 static int 662 memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp) 663 { 664 struct memlist *mlp; 665 ms_paddr_t address = *addrp; 666 uint64_t bytes = (uint64_t)*pagesp * PAGESIZE; 667 uint64_t bytes_remaining; 668 int reached_end = 0; 669 670 ASSERT(mutex_owned(&memscrub_lock)); 671 672 /* 673 * find memlist struct that contains addrp 674 * assumes memlist is sorted by ascending address. 675 */ 676 for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->ml_next) { 677 /* 678 * if before this chunk, round up to beginning 679 */ 680 if (address < mlp->ml_address) { 681 address = mlp->ml_address; 682 break; 683 } 684 /* 685 * if before end of chunk, then we found it 686 */ 687 if (address < (mlp->ml_address + mlp->ml_size)) 688 break; 689 690 /* else go to next struct memlist */ 691 } 692 /* 693 * if we hit end of list, start at beginning 694 */ 695 if (mlp == NULL) { 696 mlp = memscrub_memlist; 697 address = mlp->ml_address; 698 } 699 700 /* 701 * now we have legal address, and its mlp, condition bytes 702 */ 703 bytes_remaining = (mlp->ml_address + mlp->ml_size) - address; 704 705 if (bytes > bytes_remaining) 706 bytes = bytes_remaining; 707 708 /* 709 * will this span take us to end of list? 710 */ 711 if ((mlp->ml_next == NULL) && 712 ((mlp->ml_address + mlp->ml_size) == (address + bytes))) 713 reached_end = 1; 714 715 /* return values */ 716 *addrp = address; 717 *pagesp = bytes / PAGESIZE; 718 719 return (reached_end); 720 } 721 722 /* 723 * add a span to the memscrub list 724 * add to memscrub_phys_pages 725 */ 726 int 727 memscrub_add_span(pfn_t pfn, pgcnt_t pages) 728 { 729 #ifdef MEMSCRUB_DEBUG 730 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 731 uint64_t bytes = (uint64_t)pages << PAGESHIFT; 732 #endif /* MEMSCRUB_DEBUG */ 733 734 int retval; 735 736 mutex_enter(&memscrub_lock); 737 738 #ifdef MEMSCRUB_DEBUG 739 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 740 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 741 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 742 " size: 0x%llx\n", address, bytes); 743 #endif /* MEMSCRUB_DEBUG */ 744 745 retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist, 746 &memscrub_phys_pages); 747 748 #ifdef MEMSCRUB_DEBUG 749 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 750 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 751 #endif /* MEMSCRUB_DEBUG */ 752 753 mutex_exit(&memscrub_lock); 754 755 return (retval); 756 } 757 758 static int 759 memscrub_add_span_gen( 760 pfn_t pfn, 761 pgcnt_t pages, 762 struct memlist **list, 763 uint_t *npgs) 764 { 765 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 766 uint64_t bytes = (uint64_t)pages << PAGESHIFT; 767 struct memlist *dst; 768 struct memlist *prev, *next; 769 int retval = 0; 770 771 /* 772 * allocate a new struct memlist 773 */ 774 775 dst = (struct memlist *) 776 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 777 778 if (dst == NULL) { 779 retval = -1; 780 goto add_done; 781 } 782 783 dst->ml_address = address; 784 dst->ml_size = bytes; 785 786 /* 787 * first insert 788 */ 789 if (*list == NULL) { 790 dst->ml_prev = NULL; 791 dst->ml_next = NULL; 792 *list = dst; 793 794 goto add_done; 795 } 796 797 /* 798 * insert into sorted list 799 */ 800 for (prev = NULL, next = *list; 801 next != NULL; 802 prev = next, next = next->ml_next) { 803 if (address > (next->ml_address + next->ml_size)) 804 continue; 805 806 /* 807 * else insert here 808 */ 809 810 /* 811 * prepend to next 812 */ 813 if ((address + bytes) == next->ml_address) { 814 kmem_free(dst, sizeof (struct memlist)); 815 816 next->ml_address = address; 817 next->ml_size += bytes; 818 819 goto add_done; 820 } 821 822 /* 823 * append to next 824 */ 825 if (address == (next->ml_address + next->ml_size)) { 826 kmem_free(dst, sizeof (struct memlist)); 827 828 if (next->ml_next) { 829 /* 830 * don't overlap with next->ml_next 831 */ 832 if ((address + bytes) > 833 next->ml_next->ml_address) { 834 retval = -1; 835 goto add_done; 836 } 837 /* 838 * concatenate next and next->ml_next 839 */ 840 if ((address + bytes) == 841 next->ml_next->ml_address) { 842 struct memlist *mlp = next->ml_next; 843 844 if (next == *list) 845 *list = next->ml_next; 846 847 mlp->ml_address = next->ml_address; 848 mlp->ml_size += next->ml_size; 849 mlp->ml_size += bytes; 850 851 if (next->ml_prev) 852 next->ml_prev->ml_next = mlp; 853 mlp->ml_prev = next->ml_prev; 854 855 kmem_free(next, 856 sizeof (struct memlist)); 857 goto add_done; 858 } 859 } 860 861 next->ml_size += bytes; 862 863 goto add_done; 864 } 865 866 /* don't overlap with next */ 867 if ((address + bytes) > next->ml_address) { 868 retval = -1; 869 kmem_free(dst, sizeof (struct memlist)); 870 goto add_done; 871 } 872 873 /* 874 * insert before next 875 */ 876 dst->ml_prev = prev; 877 dst->ml_next = next; 878 next->ml_prev = dst; 879 if (prev == NULL) { 880 *list = dst; 881 } else { 882 prev->ml_next = dst; 883 } 884 goto add_done; 885 } /* end for */ 886 887 /* 888 * end of list, prev is valid and next is NULL 889 */ 890 prev->ml_next = dst; 891 dst->ml_prev = prev; 892 dst->ml_next = NULL; 893 894 add_done: 895 896 if (retval != -1) 897 *npgs += pages; 898 899 return (retval); 900 } 901 902 /* 903 * delete a span from the memscrub list 904 * subtract from memscrub_phys_pages 905 */ 906 int 907 memscrub_delete_span(pfn_t pfn, pgcnt_t pages) 908 { 909 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 910 uint64_t bytes = (uint64_t)pages << PAGESHIFT; 911 struct memlist *dst, *next; 912 int retval = 0; 913 914 mutex_enter(&memscrub_lock); 915 916 #ifdef MEMSCRUB_DEBUG 917 memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist); 918 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 919 cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n", 920 address, bytes); 921 #endif /* MEMSCRUB_DEBUG */ 922 923 /* 924 * find struct memlist containing page 925 */ 926 for (next = memscrub_memlist; next != NULL; next = next->ml_next) { 927 if ((address >= next->ml_address) && 928 (address < next->ml_address + next->ml_size)) 929 break; 930 } 931 932 /* 933 * if start address not in list 934 */ 935 if (next == NULL) { 936 retval = -1; 937 goto delete_done; 938 } 939 940 /* 941 * error if size goes off end of this struct memlist 942 */ 943 if (address + bytes > next->ml_address + next->ml_size) { 944 retval = -1; 945 goto delete_done; 946 } 947 948 /* 949 * pages at beginning of struct memlist 950 */ 951 if (address == next->ml_address) { 952 /* 953 * if start & size match, delete from list 954 */ 955 if (bytes == next->ml_size) { 956 if (next == memscrub_memlist) 957 memscrub_memlist = next->ml_next; 958 if (next->ml_prev != NULL) 959 next->ml_prev->ml_next = next->ml_next; 960 if (next->ml_next != NULL) 961 next->ml_next->ml_prev = next->ml_prev; 962 963 kmem_free(next, sizeof (struct memlist)); 964 } else { 965 /* 966 * increment start address by bytes 967 */ 968 next->ml_address += bytes; 969 next->ml_size -= bytes; 970 } 971 goto delete_done; 972 } 973 974 /* 975 * pages at end of struct memlist 976 */ 977 if (address + bytes == next->ml_address + next->ml_size) { 978 /* 979 * decrement size by bytes 980 */ 981 next->ml_size -= bytes; 982 goto delete_done; 983 } 984 985 /* 986 * delete a span in the middle of the struct memlist 987 */ 988 { 989 /* 990 * create a new struct memlist 991 */ 992 dst = (struct memlist *) 993 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 994 995 if (dst == NULL) { 996 retval = -1; 997 goto delete_done; 998 } 999 1000 /* 1001 * existing struct memlist gets address 1002 * and size up to pfn 1003 */ 1004 dst->ml_address = address + bytes; 1005 dst->ml_size = 1006 (next->ml_address + next->ml_size) - dst->ml_address; 1007 next->ml_size = address - next->ml_address; 1008 1009 /* 1010 * new struct memlist gets address starting 1011 * after pfn, until end 1012 */ 1013 1014 /* 1015 * link in new memlist after old 1016 */ 1017 dst->ml_next = next->ml_next; 1018 dst->ml_prev = next; 1019 1020 if (next->ml_next != NULL) 1021 next->ml_next->ml_prev = dst; 1022 next->ml_next = dst; 1023 } 1024 1025 delete_done: 1026 if (retval != -1) { 1027 memscrub_phys_pages -= pages; 1028 if (memscrub_phys_pages == 0) 1029 disable_memscrub = 1; 1030 } 1031 1032 #ifdef MEMSCRUB_DEBUG 1033 memscrub_printmemlist("memscrub_memlist After", memscrub_memlist); 1034 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 1035 #endif /* MEMSCRUB_DEBUG */ 1036 1037 mutex_exit(&memscrub_lock); 1038 return (retval); 1039 } 1040 1041 static void 1042 memscrub_scan(uint_t blks, ms_paddr_t src) 1043 { 1044 uint_t psz, bpp, pgsread; 1045 pfn_t pfn; 1046 ms_paddr_t pa; 1047 caddr_t va; 1048 on_trap_data_t otd; 1049 int scan_mmu_pagesize = 0; 1050 int retired_pages = 0; 1051 1052 extern void memscrub_read(caddr_t src, uint_t blks); 1053 1054 ASSERT(mutex_owned(&memscrub_lock)); 1055 1056 pgsread = 0; 1057 pa = src; 1058 1059 if (memscrub_page_retire_span_list != NULL) { 1060 if (memscrub_page_retire_span_search(src)) { 1061 /* retired pages in current span */ 1062 scan_mmu_pagesize = 1; 1063 } 1064 } 1065 1066 #ifdef MEMSCRUB_DEBUG 1067 cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize); 1068 #endif /* MEMSCRUB_DEBUG */ 1069 1070 while (blks != 0) { 1071 /* Ensure the PA is properly aligned */ 1072 if (((pa & MMU_PAGEMASK4M) == pa) && 1073 (blks >= MEMSCRUB_BPP4M)) { 1074 psz = MMU_PAGESIZE4M; 1075 bpp = MEMSCRUB_BPP4M; 1076 } else if (((pa & MMU_PAGEMASK512K) == pa) && 1077 (blks >= MEMSCRUB_BPP512K)) { 1078 psz = MMU_PAGESIZE512K; 1079 bpp = MEMSCRUB_BPP512K; 1080 } else if (((pa & MMU_PAGEMASK64K) == pa) && 1081 (blks >= MEMSCRUB_BPP64K)) { 1082 psz = MMU_PAGESIZE64K; 1083 bpp = MEMSCRUB_BPP64K; 1084 } else if ((pa & MMU_PAGEMASK) == pa) { 1085 psz = MMU_PAGESIZE; 1086 bpp = MEMSCRUB_BPP; 1087 } else { 1088 if (memscrub_verbose) { 1089 cmn_err(CE_NOTE, "Memory scrubber ignoring " 1090 "non-page aligned block starting at 0x%" 1091 PRIx64, src); 1092 } 1093 return; 1094 } 1095 if (blks < bpp) bpp = blks; 1096 1097 #ifdef MEMSCRUB_DEBUG 1098 cmn_err(CE_NOTE, "Going to run psz=%x, " 1099 "bpp=%x pa=%llx\n", psz, bpp, pa); 1100 #endif /* MEMSCRUB_DEBUG */ 1101 1102 /* 1103 * MEMSCRUBBASE is a 4MB aligned page in the 1104 * kernel so that we can quickly map the PA 1105 * to a VA for the block loads performed in 1106 * memscrub_read. 1107 */ 1108 pfn = mmu_btop(pa); 1109 va = (caddr_t)MEMSCRUBBASE; 1110 hat_devload(kas.a_hat, va, psz, pfn, PROT_READ, 1111 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 1112 1113 /* 1114 * Can't allow the memscrubber to migrate across CPUs as 1115 * we need to know whether CEEN is enabled for the current 1116 * CPU to enable us to scrub the memory. Don't use 1117 * kpreempt_disable as the time we take to scan a span (even 1118 * without cpu_check_ce having to manually cpu_check_block) 1119 * is too long to hold a higher priority thread (eg, RT) 1120 * off cpu. 1121 */ 1122 thread_affinity_set(curthread, CPU_CURRENT); 1123 1124 /* 1125 * Protect read scrub from async faults. For now, we simply 1126 * maintain a count of such faults caught. 1127 */ 1128 1129 if (!on_trap(&otd, OT_DATA_EC) && !scan_mmu_pagesize) { 1130 memscrub_read(va, bpp); 1131 /* 1132 * Check if CEs require logging 1133 */ 1134 cpu_check_ce(SCRUBBER_CEEN_CHECK, 1135 (uint64_t)pa, va, psz); 1136 no_trap(); 1137 thread_affinity_clear(curthread); 1138 } else { 1139 no_trap(); 1140 thread_affinity_clear(curthread); 1141 1142 /* 1143 * Got an async error.. 1144 * Try rescanning it at MMU_PAGESIZE 1145 * granularity if we were trying to 1146 * read at a larger page size. 1147 * This is to ensure we continue to 1148 * scan the rest of the span. 1149 * OR scanning MMU_PAGESIZE granularity to avoid 1150 * reading retired pages memory when scan_mmu_pagesize 1151 * is set. 1152 */ 1153 if (psz > MMU_PAGESIZE || scan_mmu_pagesize) { 1154 caddr_t vaddr = va; 1155 ms_paddr_t paddr = pa; 1156 int tmp = 0; 1157 for (; tmp < bpp; tmp += MEMSCRUB_BPP) { 1158 /* Don't scrub retired pages */ 1159 if (page_retire_check(paddr, NULL) 1160 == 0) { 1161 vaddr += MMU_PAGESIZE; 1162 paddr += MMU_PAGESIZE; 1163 retired_pages++; 1164 continue; 1165 } 1166 thread_affinity_set(curthread, 1167 CPU_CURRENT); 1168 if (!on_trap(&otd, OT_DATA_EC)) { 1169 memscrub_read(vaddr, 1170 MEMSCRUB_BPP); 1171 cpu_check_ce( 1172 SCRUBBER_CEEN_CHECK, 1173 (uint64_t)paddr, vaddr, 1174 MMU_PAGESIZE); 1175 no_trap(); 1176 } else { 1177 no_trap(); 1178 MEMSCRUB_STAT_INC(errors_found); 1179 } 1180 thread_affinity_clear(curthread); 1181 vaddr += MMU_PAGESIZE; 1182 paddr += MMU_PAGESIZE; 1183 } 1184 } 1185 } 1186 hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK); 1187 1188 blks -= bpp; 1189 pa += psz; 1190 pgsread++; 1191 } 1192 1193 /* 1194 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired 1195 * pages found so delete span from global list. 1196 */ 1197 if (scan_mmu_pagesize && retired_pages == 0) 1198 memscrub_page_retire_span_delete(src); 1199 1200 /* 1201 * Encountered CE/UE on a retired page during memscrub read of current 1202 * span. Adding span to global list to enable avoid reading further. 1203 */ 1204 if (add_to_page_retire_list) { 1205 if (!memscrub_page_retire_span_search(src)) 1206 memscrub_page_retire_span_add(src); 1207 add_to_page_retire_list = 0; 1208 } 1209 1210 if (memscrub_verbose) { 1211 cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting " 1212 "at 0x%" PRIx64, pgsread, src); 1213 } 1214 } 1215 1216 /* 1217 * Called by cpu_async_log_err() when memscrub read causes 1218 * CE/UE on a retired page. 1219 */ 1220 void 1221 memscrub_induced_error(void) 1222 { 1223 add_to_page_retire_list = 1; 1224 } 1225 1226 /* 1227 * Called by page_retire() when toxic pages cannot be retired 1228 * immediately and are scheduled for retire. Memscrubber stops 1229 * scrubbing them to avoid further CE/UEs. 1230 */ 1231 void 1232 memscrub_notify(ms_paddr_t pa) 1233 { 1234 mutex_enter(&memscrub_lock); 1235 if (!memscrub_page_retire_span_search(pa)) 1236 memscrub_page_retire_span_add(pa); 1237 mutex_exit(&memscrub_lock); 1238 } 1239 1240 /* 1241 * Called by memscrub_scan() and memscrub_notify(). 1242 * pa: physical address of span with CE/UE, add to global list. 1243 */ 1244 static void 1245 memscrub_page_retire_span_add(ms_paddr_t pa) 1246 { 1247 memscrub_page_retire_span_t *new_span; 1248 1249 new_span = (memscrub_page_retire_span_t *) 1250 kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP); 1251 1252 if (new_span == NULL) { 1253 #ifdef MEMSCRUB_DEBUG 1254 cmn_err(CE_NOTE, "failed to allocate new span - span with" 1255 " retired page/s not tracked.\n"); 1256 #endif /* MEMSCRUB_DEBUG */ 1257 return; 1258 } 1259 1260 new_span->address = pa; 1261 new_span->next = memscrub_page_retire_span_list; 1262 memscrub_page_retire_span_list = new_span; 1263 } 1264 1265 /* 1266 * Called by memscrub_scan(). 1267 * pa: physical address of span to be removed from global list. 1268 */ 1269 static void 1270 memscrub_page_retire_span_delete(ms_paddr_t pa) 1271 { 1272 memscrub_page_retire_span_t *prev_span, *next_span; 1273 1274 prev_span = memscrub_page_retire_span_list; 1275 next_span = memscrub_page_retire_span_list->next; 1276 1277 if (pa == prev_span->address) { 1278 memscrub_page_retire_span_list = next_span; 1279 kmem_free(prev_span, sizeof (memscrub_page_retire_span_t)); 1280 return; 1281 } 1282 1283 while (next_span) { 1284 if (pa == next_span->address) { 1285 prev_span->next = next_span->next; 1286 kmem_free(next_span, 1287 sizeof (memscrub_page_retire_span_t)); 1288 return; 1289 } 1290 prev_span = next_span; 1291 next_span = next_span->next; 1292 } 1293 } 1294 1295 /* 1296 * Called by memscrub_scan() and memscrub_notify(). 1297 * pa: physical address of span to be searched in global list. 1298 */ 1299 static int 1300 memscrub_page_retire_span_search(ms_paddr_t pa) 1301 { 1302 memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list; 1303 1304 while (next_span) { 1305 if (pa == next_span->address) 1306 return (1); 1307 next_span = next_span->next; 1308 } 1309 return (0); 1310 } 1311 1312 /* 1313 * Called from new_memscrub() as a result of memory delete. 1314 * Using page_numtopp_nolock() to determine if we have valid PA. 1315 */ 1316 static void 1317 memscrub_page_retire_span_list_update(void) 1318 { 1319 memscrub_page_retire_span_t *prev, *cur, *next; 1320 1321 if (memscrub_page_retire_span_list == NULL) 1322 return; 1323 1324 prev = cur = memscrub_page_retire_span_list; 1325 next = cur->next; 1326 1327 while (cur) { 1328 if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) { 1329 if (cur == memscrub_page_retire_span_list) { 1330 memscrub_page_retire_span_list = next; 1331 kmem_free(cur, 1332 sizeof (memscrub_page_retire_span_t)); 1333 prev = cur = memscrub_page_retire_span_list; 1334 } else { 1335 prev->next = cur->next; 1336 kmem_free(cur, 1337 sizeof (memscrub_page_retire_span_t)); 1338 cur = next; 1339 } 1340 } else { 1341 prev = cur; 1342 cur = next; 1343 } 1344 if (cur != NULL) 1345 next = cur->next; 1346 } 1347 } 1348 1349 /* 1350 * The memory add/delete callback mechanism does not pass in the 1351 * page ranges. The phys_install list has been updated though, so 1352 * create a new scrub list from it. 1353 */ 1354 1355 static int 1356 new_memscrub(int update_page_retire_list) 1357 { 1358 struct memlist *src, *list, *old_list; 1359 uint_t npgs; 1360 1361 /* 1362 * copy phys_install to memscrub_memlist 1363 */ 1364 list = NULL; 1365 npgs = 0; 1366 memlist_read_lock(); 1367 for (src = phys_install; src; src = src->ml_next) { 1368 if (memscrub_add_span_gen((pfn_t)(src->ml_address >> PAGESHIFT), 1369 (pgcnt_t)(src->ml_size >> PAGESHIFT), &list, &npgs)) { 1370 memlist_read_unlock(); 1371 while (list) { 1372 struct memlist *el; 1373 1374 el = list; 1375 list = list->ml_next; 1376 kmem_free(el, sizeof (struct memlist)); 1377 } 1378 return (-1); 1379 } 1380 } 1381 memlist_read_unlock(); 1382 1383 mutex_enter(&memscrub_lock); 1384 memscrub_phys_pages = npgs; 1385 old_list = memscrub_memlist; 1386 memscrub_memlist = list; 1387 1388 if (update_page_retire_list) 1389 memscrub_page_retire_span_list_update(); 1390 1391 mutex_exit(&memscrub_lock); 1392 1393 while (old_list) { 1394 struct memlist *el; 1395 1396 el = old_list; 1397 old_list = old_list->ml_next; 1398 kmem_free(el, sizeof (struct memlist)); 1399 } 1400 1401 return (0); 1402 } 1403 1404 /*ARGSUSED*/ 1405 static void 1406 memscrub_mem_config_post_add( 1407 void *arg, 1408 pgcnt_t delta_pages) 1409 { 1410 /* 1411 * We increment pause_memscrub before entering new_memscrub(). This 1412 * will force the memscrubber to sleep, allowing the DR callback 1413 * thread to acquire memscrub_lock in new_memscrub(). The use of 1414 * atomic_add_32() allows concurrent memory DR operations to use the 1415 * callbacks safely. 1416 */ 1417 atomic_inc_32(&pause_memscrub); 1418 ASSERT(pause_memscrub != 0); 1419 1420 /* 1421 * "Don't care" if we are not scrubbing new memory. 1422 */ 1423 (void) new_memscrub(0); /* retain page retire list */ 1424 1425 /* Restore the pause setting. */ 1426 atomic_dec_32(&pause_memscrub); 1427 } 1428 1429 /*ARGSUSED*/ 1430 static int 1431 memscrub_mem_config_pre_del( 1432 void *arg, 1433 pgcnt_t delta_pages) 1434 { 1435 /* Nothing to do. */ 1436 return (0); 1437 } 1438 1439 /*ARGSUSED*/ 1440 static void 1441 memscrub_mem_config_post_del( 1442 void *arg, 1443 pgcnt_t delta_pages, 1444 int cancelled) 1445 { 1446 /* 1447 * We increment pause_memscrub before entering new_memscrub(). This 1448 * will force the memscrubber to sleep, allowing the DR callback 1449 * thread to acquire memscrub_lock in new_memscrub(). The use of 1450 * atomic_add_32() allows concurrent memory DR operations to use the 1451 * callbacks safely. 1452 */ 1453 atomic_inc_32(&pause_memscrub); 1454 ASSERT(pause_memscrub != 0); 1455 1456 /* 1457 * Must stop scrubbing deleted memory as it may be disconnected. 1458 */ 1459 if (new_memscrub(1)) { /* update page retire list */ 1460 disable_memscrub = 1; 1461 } 1462 1463 /* Restore the pause setting. */ 1464 atomic_dec_32(&pause_memscrub); 1465 } 1466 1467 static kphysm_setup_vector_t memscrub_mem_config_vec = { 1468 KPHYSM_SETUP_VECTOR_VERSION, 1469 memscrub_mem_config_post_add, 1470 memscrub_mem_config_pre_del, 1471 memscrub_mem_config_post_del, 1472 }; 1473 1474 static void 1475 memscrub_init_mem_config() 1476 { 1477 int ret; 1478 1479 ret = kphysm_setup_func_register(&memscrub_mem_config_vec, 1480 (void *)NULL); 1481 ASSERT(ret == 0); 1482 } 1483 1484 static void 1485 memscrub_uninit_mem_config() 1486 { 1487 /* This call is OK if the register call was not done. */ 1488 kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL); 1489 } 1490