1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * i86pc Memory Scrubbing 29 * 30 * On detection of a correctable memory ECC error, the i86pc hardware 31 * returns the corrected data to the requester and may re-write it 32 * to memory (DRAM or NVRAM). Machines which do not re-write this to 33 * memory should add an NMI handler to correct and rewrite. 34 * 35 * Scrubbing thus reduces the likelyhood that multiple transient errors 36 * will occur in the same memory word, making uncorrectable errors due 37 * to transients less likely. 38 * 39 * Thus is born the desire that every memory location be periodically 40 * accessed. 41 * 42 * This file implements a memory scrubbing thread. This scrubber 43 * guarantees that all of physical memory is accessed periodically 44 * (memscrub_period_sec -- 12 hours). 45 * 46 * It attempts to do this as unobtrusively as possible. The thread 47 * schedules itself to wake up at an interval such that if it reads 48 * memscrub_span_pages (4MB) on each wakeup, it will read all of physical 49 * memory in in memscrub_period_sec (12 hours). 50 * 51 * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200). 52 * When it completes a span, if all the CPUs are idle, it reads another span. 53 * Typically it soaks up idle time this way to reach its deadline early 54 * -- and sleeps until the next period begins. 55 * 56 * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups 57 * that run for 0.15 seconds at intervals of 67 seconds. 58 * 59 * In practice, the scrubber finds enough idle time to finish in a few 60 * minutes, and sleeps until its 12 hour deadline. 61 * 62 * The scrubber maintains a private copy of the phys_install memory list 63 * to keep track of what memory should be scrubbed. 64 * 65 * The following parameters can be set via /etc/system 66 * 67 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB) 68 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 69 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0) 70 * memscrub_delay_start_sec = (10 seconds) 71 * disable_memscrub = (0) 72 * 73 * the scrubber will exit (or never be started) if it finds the variable 74 * "disable_memscrub" set. 75 * 76 * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec 77 * is a "good" amount of minimum time for the thread to run at a time. 78 * 79 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 80 * twice the frequency the hardware folk estimated would be necessary. 81 * 82 * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly 83 * any other use of the system should be higher priority than scrubbing. 84 */ 85 86 #include <sys/types.h> 87 #include <sys/systm.h> /* timeout, types, t_lock */ 88 #include <sys/cmn_err.h> 89 #include <sys/sysmacros.h> /* MIN */ 90 #include <sys/memlist.h> /* memlist */ 91 #include <sys/kmem.h> /* KMEM_NOSLEEP */ 92 #include <sys/cpuvar.h> /* ncpus_online */ 93 #include <sys/debug.h> /* ASSERTs */ 94 #include <sys/vmem.h> 95 #include <sys/mman.h> 96 #include <vm/seg_kmem.h> 97 #include <vm/seg_kpm.h> 98 #include <vm/hat_i86.h> 99 #include <sys/callb.h> /* CPR callback */ 100 101 static caddr_t memscrub_window; 102 static hat_mempte_t memscrub_pte; 103 104 /* 105 * Global Data: 106 */ 107 /* 108 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 109 */ 110 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 111 112 /* 113 * start only if at least MEMSCRUB_MIN_PAGES in system 114 */ 115 #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 116 117 /* 118 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 119 */ 120 #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE) 121 122 /* 123 * almost anything is higher priority than scrubbing 124 */ 125 #define MEMSCRUB_DFL_THREAD_PRI 0 126 127 /* 128 * we can patch these defaults in /etc/system if necessary 129 */ 130 uint_t disable_memscrub = 0; 131 static uint_t disable_memscrub_quietly = 0; 132 pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES; 133 pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 134 time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 135 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 136 time_t memscrub_delay_start_sec = 10; 137 138 /* 139 * Static Routines 140 */ 141 static void memscrubber(void); 142 static int system_is_idle(void); 143 static int memscrub_add_span(uint64_t, uint64_t); 144 145 /* 146 * Static Data 147 */ 148 static struct memlist *memscrub_memlist; 149 static uint_t memscrub_phys_pages; 150 151 static kcondvar_t memscrub_cv; 152 static kmutex_t memscrub_lock; 153 154 /* 155 * memscrub_lock protects memscrub_memlist 156 */ 157 uint_t memscrub_scans_done; 158 159 uint_t memscrub_done_early; 160 uint_t memscrub_early_sec; 161 162 uint_t memscrub_done_late; 163 time_t memscrub_late_sec; 164 165 /* 166 * create memscrub_memlist from phys_install list 167 * initialize locks, set memscrub_phys_pages. 168 */ 169 void 170 memscrub_init() 171 { 172 struct memlist *src; 173 174 if (physmem < memscrub_min_pages) 175 return; 176 177 if (!kpm_enable) { 178 memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 179 memscrub_pte = hat_mempte_setup(memscrub_window); 180 } 181 182 /* 183 * copy phys_install to memscrub_memlist 184 */ 185 for (src = phys_install; src; src = src->next) { 186 if (memscrub_add_span(src->address, src->size)) { 187 cmn_err(CE_WARN, 188 "Software memory scrubber failed to initialize\n"); 189 return; 190 } 191 } 192 193 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 194 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 195 196 /* 197 * create memscrubber thread 198 */ 199 (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0, 200 TS_RUN, memscrub_thread_pri); 201 } 202 203 /* 204 * Function to cause the software memscrubber to exit quietly if the 205 * platform support has located a hardware scrubber and enabled it. 206 */ 207 void 208 memscrub_disable(void) 209 { 210 disable_memscrub_quietly = 1; 211 } 212 213 #ifdef MEMSCRUB_DEBUG 214 static void 215 memscrub_printmemlist(char *title, struct memlist *listp) 216 { 217 struct memlist *list; 218 219 cmn_err(CE_CONT, "%s:\n", title); 220 221 for (list = listp; list; list = list->next) { 222 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 223 list->address, list->size); 224 } 225 } 226 #endif /* MEMSCRUB_DEBUG */ 227 228 /* ARGSUSED */ 229 static void 230 memscrub_wakeup(void *c) 231 { 232 /* 233 * grab mutex to guarantee that our wakeup call 234 * arrives after we go to sleep -- so we can't sleep forever. 235 */ 236 mutex_enter(&memscrub_lock); 237 cv_signal(&memscrub_cv); 238 mutex_exit(&memscrub_lock); 239 } 240 241 /* 242 * this calculation doesn't account for the time that the actual scan 243 * consumes -- so we'd fall slightly behind schedule with this 244 * interval_sec. but the idle loop optimization below usually makes us 245 * come in way ahead of schedule. 246 */ 247 static int 248 compute_interval_sec() 249 { 250 if (memscrub_phys_pages <= memscrub_span_pages) 251 return (memscrub_period_sec); 252 else 253 return (memscrub_period_sec/ 254 (memscrub_phys_pages/memscrub_span_pages)); 255 } 256 257 static void 258 memscrubber() 259 { 260 time_t deadline; 261 uint64_t mlp_last_addr; 262 uint64_t mlp_next_addr; 263 int reached_end = 1; 264 time_t interval_sec = 0; 265 struct memlist *mlp; 266 267 extern void scan_memory(caddr_t, size_t); 268 callb_cpr_t cprinfo; 269 270 /* 271 * notify CPR of our existence 272 */ 273 CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); 274 275 if (memscrub_memlist == NULL) { 276 cmn_err(CE_WARN, "memscrub_memlist not initialized."); 277 goto memscrub_exit; 278 } 279 280 mlp = memscrub_memlist; 281 mlp_next_addr = mlp->address; 282 mlp_last_addr = mlp->address + mlp->size; 283 284 deadline = gethrestime_sec() + memscrub_delay_start_sec; 285 286 for (;;) { 287 if (disable_memscrub || disable_memscrub_quietly) 288 break; 289 290 mutex_enter(&memscrub_lock); 291 292 /* 293 * did we just reach the end of memory? 294 */ 295 if (reached_end) { 296 time_t now = gethrestime_sec(); 297 298 if (now >= deadline) { 299 memscrub_done_late++; 300 memscrub_late_sec += (now - deadline); 301 /* 302 * past deadline, start right away 303 */ 304 interval_sec = 0; 305 306 deadline = now + memscrub_period_sec; 307 } else { 308 /* 309 * we finished ahead of schedule. 310 * wait till previous dealine before re-start. 311 */ 312 interval_sec = deadline - now; 313 memscrub_done_early++; 314 memscrub_early_sec += interval_sec; 315 deadline += memscrub_period_sec; 316 } 317 } else { 318 interval_sec = compute_interval_sec(); 319 } 320 321 /* 322 * it is safe from our standpoint for CPR to 323 * suspend the system 324 */ 325 CALLB_CPR_SAFE_BEGIN(&cprinfo); 326 327 /* 328 * hit the snooze bar 329 */ 330 (void) timeout(memscrub_wakeup, NULL, interval_sec * hz); 331 332 /* 333 * go to sleep 334 */ 335 cv_wait(&memscrub_cv, &memscrub_lock); 336 337 /* we need to goto work */ 338 CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); 339 340 mutex_exit(&memscrub_lock); 341 342 do { 343 pgcnt_t pages = memscrub_span_pages; 344 uint64_t address = mlp_next_addr; 345 346 if (disable_memscrub || disable_memscrub_quietly) 347 break; 348 349 mutex_enter(&memscrub_lock); 350 351 /* 352 * Make sure we don't try to scan beyond the end of 353 * the current memlist. If we would, then resize 354 * our scan target for this iteration, and prepare 355 * to read the next memlist entry on the next 356 * iteration. 357 */ 358 reached_end = 0; 359 if (address + mmu_ptob(pages) >= mlp_last_addr) { 360 pages = mmu_btop(mlp_last_addr - address); 361 mlp = mlp->next; 362 if (mlp == NULL) { 363 reached_end = 1; 364 mlp = memscrub_memlist; 365 } 366 mlp_next_addr = mlp->address; 367 mlp_last_addr = mlp->address + mlp->size; 368 } else { 369 mlp_next_addr += mmu_ptob(pages); 370 } 371 372 mutex_exit(&memscrub_lock); 373 374 while (pages--) { 375 pfn_t pfn = btop(address); 376 377 /* 378 * Without segkpm, the memscrubber cannot 379 * be allowed to migrate across CPUs, as 380 * the CPU-specific mapping of 381 * memscrub_window would be incorrect. 382 * With segkpm, switching CPUs is legal, but 383 * inefficient. We don't use 384 * kpreempt_disable as it might hold a 385 * higher priority thread (eg, RT) too long 386 * off CPU. 387 */ 388 thread_affinity_set(curthread, CPU_CURRENT); 389 if (kpm_enable) 390 memscrub_window = hat_kpm_pfn2va(pfn); 391 else 392 hat_mempte_remap(pfn, memscrub_window, 393 memscrub_pte, 394 PROT_READ, HAT_LOAD_NOCONSIST); 395 396 scan_memory(memscrub_window, PAGESIZE); 397 398 thread_affinity_clear(curthread); 399 address += MMU_PAGESIZE; 400 } 401 402 memscrub_scans_done++; 403 } while (!reached_end && system_is_idle()); 404 } 405 406 memscrub_exit: 407 408 if (!disable_memscrub_quietly) 409 cmn_err(CE_NOTE, "Software memory scrubber exiting."); 410 /* 411 * We are about to bail, but don't have the memscrub_lock, 412 * and it is needed for CALLB_CPR_EXIT. 413 */ 414 mutex_enter(&memscrub_lock); 415 CALLB_CPR_EXIT(&cprinfo); 416 417 cv_destroy(&memscrub_cv); 418 419 thread_exit(); 420 } 421 422 423 /* 424 * return 1 if we're MP and all the other CPUs are idle 425 */ 426 static int 427 system_is_idle() 428 { 429 int cpu_id; 430 int found = 0; 431 432 if (1 == ncpus_online) 433 return (0); 434 435 for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) { 436 if (!cpu[cpu_id]) 437 continue; 438 439 found++; 440 441 if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) { 442 if (CPU->cpu_id == cpu_id && 443 CPU->cpu_disp->disp_nrunnable == 0) 444 continue; 445 return (0); 446 } 447 448 if (found == ncpus) 449 break; 450 } 451 return (1); 452 } 453 454 /* 455 * add a span to the memscrub list 456 */ 457 static int 458 memscrub_add_span(uint64_t start, uint64_t bytes) 459 { 460 struct memlist *dst; 461 struct memlist *prev, *next; 462 uint64_t end = start + bytes - 1; 463 int retval = 0; 464 465 mutex_enter(&memscrub_lock); 466 467 #ifdef MEMSCRUB_DEBUG 468 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 469 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 470 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 471 " size: 0x%llx\n", start, bytes); 472 #endif /* MEMSCRUB_DEBUG */ 473 474 /* 475 * Scan through the list to find the proper place to install it. 476 */ 477 prev = NULL; 478 next = memscrub_memlist; 479 while (next) { 480 uint64_t ns = next->address; 481 uint64_t ne = next->address + next->size - 1; 482 483 /* 484 * If this span overlaps with an existing span, then 485 * something has gone horribly wrong with the phys_install 486 * list. In fact, I'm surprised we made it this far. 487 */ 488 if ((start >= ns && start <= ne) || (end >= ns && end <= ne) || 489 (start < ns && end > ne)) 490 panic("memscrub found overlapping memory ranges " 491 "(0x%p-0x%p) and (0x%p-0x%p)", 492 (void *)(uintptr_t)start, (void *)(uintptr_t)end, 493 (void *)(uintptr_t)ns, (void *)(uintptr_t)ne); 494 495 /* 496 * New span can be appended to an existing one. 497 */ 498 if (start == ne + 1) { 499 next->size += bytes; 500 goto add_done; 501 } 502 503 /* 504 * New span can be prepended to an existing one. 505 */ 506 if (end + 1 == ns) { 507 next->size += bytes; 508 next->address = start; 509 goto add_done; 510 } 511 512 /* 513 * If the next span has a higher start address than the new 514 * one, then we have found the right spot for our 515 * insertion. 516 */ 517 if (ns > start) 518 break; 519 520 prev = next; 521 next = next->next; 522 } 523 524 /* 525 * allocate a new struct memlist 526 */ 527 dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 528 if (dst == NULL) { 529 retval = -1; 530 goto add_done; 531 } 532 dst->address = start; 533 dst->size = bytes; 534 dst->prev = prev; 535 dst->next = next; 536 537 if (prev) 538 prev->next = dst; 539 else 540 memscrub_memlist = dst; 541 542 if (next) 543 next->prev = dst; 544 545 add_done: 546 547 if (retval != -1) 548 memscrub_phys_pages += mmu_btop(bytes); 549 550 #ifdef MEMSCRUB_DEBUG 551 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 552 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 553 #endif /* MEMSCRUB_DEBUG */ 554 555 mutex_exit(&memscrub_lock); 556 return (retval); 557 } 558