1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 /* 31 * i86pc Memory Scrubbing 32 * 33 * On detection of a correctable memory ECC error, the i86pc hardware 34 * returns the corrected data to the requester and may re-write it 35 * to memory (DRAM or NVRAM). Machines which do not re-write this to 36 * memory should add an NMI handler to correct and rewrite. 37 * 38 * Scrubbing thus reduces the likelyhood that multiple transient errors 39 * will occur in the same memory word, making uncorrectable errors due 40 * to transients less likely. 41 * 42 * Thus is born the desire that every memory location be periodically 43 * accessed. 44 * 45 * This file implements a memory scrubbing thread. This scrubber 46 * guarantees that all of physical memory is accessed periodically 47 * (memscrub_period_sec -- 12 hours). 48 * 49 * It attempts to do this as unobtrusively as possible. The thread 50 * schedules itself to wake up at an interval such that if it reads 51 * memscrub_span_pages (4MB) on each wakeup, it will read all of physical 52 * memory in in memscrub_period_sec (12 hours). 53 * 54 * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200). 55 * When it completes a span, if all the CPUs are idle, it reads another span. 56 * Typically it soaks up idle time this way to reach its deadline early 57 * -- and sleeps until the next period begins. 58 * 59 * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups 60 * that run for 0.15 seconds at intervals of 67 seconds. 61 * 62 * In practice, the scrubber finds enough idle time to finish in a few 63 * minutes, and sleeps until its 12 hour deadline. 64 * 65 * The scrubber maintains a private copy of the phys_install memory list 66 * to keep track of what memory should be scrubbed. 67 * 68 * The following parameters can be set via /etc/system 69 * 70 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB) 71 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 72 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0) 73 * memscrub_delay_start_sec = (10 seconds) 74 * disable_memscrub = (0) 75 * 76 * the scrubber will exit (or never be started) if it finds the variable 77 * "disable_memscrub" set. 78 * 79 * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec 80 * is a "good" amount of minimum time for the thread to run at a time. 81 * 82 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 83 * twice the frequency the hardware folk estimated would be necessary. 84 * 85 * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly 86 * any other use of the system should be higher priority than scrubbing. 87 */ 88 89 #include <sys/types.h> 90 #include <sys/systm.h> /* timeout, types, t_lock */ 91 #include <sys/cmn_err.h> 92 #include <sys/sysmacros.h> /* MIN */ 93 #include <sys/memlist.h> /* memlist */ 94 #include <sys/kmem.h> /* KMEM_NOSLEEP */ 95 #include <sys/cpuvar.h> /* ncpus_online */ 96 #include <sys/debug.h> /* ASSERTs */ 97 #include <sys/vmem.h> 98 #include <sys/mman.h> 99 #include <vm/seg_kmem.h> 100 #include <vm/seg_kpm.h> 101 #include <vm/hat_i86.h> 102 103 static caddr_t memscrub_window; 104 static void *memscrub_pte; 105 106 /* 107 * Global Data: 108 */ 109 /* 110 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 111 */ 112 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 113 114 /* 115 * start only if at least MEMSCRUB_MIN_PAGES in system 116 */ 117 #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 118 119 /* 120 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 121 */ 122 #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE) 123 124 /* 125 * almost anything is higher priority than scrubbing 126 */ 127 #define MEMSCRUB_DFL_THREAD_PRI 0 128 129 /* 130 * we can patch these defaults in /etc/system if necessary 131 */ 132 uint_t disable_memscrub = 0; 133 static uint_t disable_memscrub_quietly = 0; 134 pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES; 135 pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 136 time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 137 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 138 time_t memscrub_delay_start_sec = 10; 139 140 /* 141 * Static Routines 142 */ 143 static void memscrubber(void); 144 static int system_is_idle(void); 145 static int memscrub_add_span(uint64_t, uint64_t); 146 147 /* 148 * Static Data 149 */ 150 static struct memlist *memscrub_memlist; 151 static uint_t memscrub_phys_pages; 152 153 static kcondvar_t memscrub_cv; 154 static kmutex_t memscrub_lock; 155 156 /* 157 * memscrub_lock protects memscrub_memlist 158 */ 159 uint_t memscrub_scans_done; 160 161 uint_t memscrub_done_early; 162 uint_t memscrub_early_sec; 163 164 uint_t memscrub_done_late; 165 time_t memscrub_late_sec; 166 167 /* 168 * create memscrub_memlist from phys_install list 169 * initialize locks, set memscrub_phys_pages. 170 */ 171 void 172 memscrub_init() 173 { 174 struct memlist *src; 175 176 if (physmem < memscrub_min_pages) 177 return; 178 179 if (!kpm_enable) { 180 memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 181 memscrub_pte = hat_mempte_setup(memscrub_window); 182 } 183 184 /* 185 * copy phys_install to memscrub_memlist 186 */ 187 for (src = phys_install; src; src = src->next) { 188 if (memscrub_add_span(src->address, src->size)) { 189 cmn_err(CE_WARN, 190 "Memory scrubber failed to initialize\n"); 191 return; 192 } 193 } 194 195 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 196 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 197 198 /* 199 * create memscrubber thread 200 */ 201 (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0, 202 TS_RUN, memscrub_thread_pri); 203 } 204 205 /* 206 * Function to cause the software memscrubber to exit quietly if the 207 * platform support has located a hardware scrubber and enabled it. 208 */ 209 void 210 memscrub_disable(void) 211 { 212 disable_memscrub_quietly = 1; 213 } 214 215 #ifdef MEMSCRUB_DEBUG 216 void 217 memscrub_printmemlist(char *title, struct memlist *listp) 218 { 219 struct memlist *list; 220 221 cmn_err(CE_CONT, "%s:\n", title); 222 223 for (list = listp; list; list = list->next) { 224 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 225 list->address, list->size); 226 } 227 } 228 #endif /* MEMSCRUB_DEBUG */ 229 230 /* ARGSUSED */ 231 void 232 memscrub_wakeup(void *c) 233 { 234 /* 235 * grab mutex to guarantee that our wakeup call 236 * arrives after we go to sleep -- so we can't sleep forever. 237 */ 238 mutex_enter(&memscrub_lock); 239 cv_signal(&memscrub_cv); 240 mutex_exit(&memscrub_lock); 241 } 242 243 /* 244 * this calculation doesn't account for the time that the actual scan 245 * consumes -- so we'd fall slightly behind schedule with this 246 * interval_sec. but the idle loop optimization below usually makes us 247 * come in way ahead of schedule. 248 */ 249 static int 250 compute_interval_sec() 251 { 252 if (memscrub_phys_pages <= memscrub_span_pages) 253 return (memscrub_period_sec); 254 else 255 return (memscrub_period_sec/ 256 (memscrub_phys_pages/memscrub_span_pages)); 257 } 258 259 void 260 memscrubber() 261 { 262 time_t deadline; 263 uint64_t mlp_last_addr; 264 uint64_t mlp_next_addr; 265 int reached_end = 1; 266 time_t interval_sec = 0; 267 struct memlist *mlp; 268 269 extern void scan_memory(caddr_t, size_t); 270 271 if (memscrub_memlist == NULL) { 272 cmn_err(CE_WARN, "memscrub_memlist not initialized."); 273 goto memscrub_exit; 274 } 275 276 mlp = memscrub_memlist; 277 mlp_next_addr = mlp->address; 278 mlp_last_addr = mlp->address + mlp->size; 279 280 deadline = gethrestime_sec() + memscrub_delay_start_sec; 281 282 for (;;) { 283 if (disable_memscrub || disable_memscrub_quietly) 284 break; 285 286 mutex_enter(&memscrub_lock); 287 288 /* 289 * did we just reach the end of memory? 290 */ 291 if (reached_end) { 292 time_t now = gethrestime_sec(); 293 294 if (now >= deadline) { 295 memscrub_done_late++; 296 memscrub_late_sec += (now - deadline); 297 /* 298 * past deadline, start right away 299 */ 300 interval_sec = 0; 301 302 deadline = now + memscrub_period_sec; 303 } else { 304 /* 305 * we finished ahead of schedule. 306 * wait till previous dealine before re-start. 307 */ 308 interval_sec = deadline - now; 309 memscrub_done_early++; 310 memscrub_early_sec += interval_sec; 311 deadline += memscrub_period_sec; 312 } 313 } else { 314 interval_sec = compute_interval_sec(); 315 } 316 317 /* 318 * hit the snooze bar 319 */ 320 (void) timeout(memscrub_wakeup, NULL, interval_sec * hz); 321 322 /* 323 * go to sleep 324 */ 325 cv_wait(&memscrub_cv, &memscrub_lock); 326 327 mutex_exit(&memscrub_lock); 328 329 do { 330 pgcnt_t pages = memscrub_span_pages; 331 uint64_t address = mlp_next_addr; 332 333 if (disable_memscrub || disable_memscrub_quietly) 334 break; 335 336 mutex_enter(&memscrub_lock); 337 338 /* 339 * Make sure we don't try to scan beyond the end of 340 * the current memlist. If we would, then resize 341 * our scan target for this iteration, and prepare 342 * to read the next memlist entry on the next 343 * iteration. 344 */ 345 reached_end = 0; 346 if (address + mmu_ptob(pages) >= mlp_last_addr) { 347 pages = mmu_btop(mlp_last_addr - address); 348 mlp = mlp->next; 349 if (mlp == NULL) { 350 reached_end = 1; 351 mlp = memscrub_memlist; 352 } 353 mlp_next_addr = mlp->address; 354 mlp_last_addr = mlp->address + mlp->size; 355 } else { 356 mlp_next_addr += mmu_ptob(pages); 357 } 358 359 mutex_exit(&memscrub_lock); 360 361 while (pages--) { 362 pfn_t pfn = btop(address); 363 364 /* 365 * Without segkpm, the memscrubber cannot 366 * be allowed to migrate across CPUs, as 367 * the CPU-specific mapping of 368 * memscrub_window would be incorrect. 369 * With segkpm, switching CPUs is legal, but 370 * inefficient. We don't use 371 * kpreempt_disable as it might hold a 372 * higher priority thread (eg, RT) too long 373 * off CPU. 374 */ 375 thread_affinity_set(curthread, CPU_CURRENT); 376 if (kpm_enable) 377 memscrub_window = hat_kpm_pfn2va(pfn); 378 else 379 hat_mempte_remap(pfn, memscrub_window, 380 memscrub_pte, 381 PROT_READ, HAT_LOAD_NOCONSIST); 382 383 scan_memory(memscrub_window, PAGESIZE); 384 385 thread_affinity_clear(curthread); 386 address += MMU_PAGESIZE; 387 } 388 389 memscrub_scans_done++; 390 } while (!reached_end && system_is_idle()); 391 } 392 393 memscrub_exit: 394 395 if (!disable_memscrub_quietly) 396 cmn_err(CE_NOTE, "memory scrubber exiting."); 397 398 cv_destroy(&memscrub_cv); 399 400 thread_exit(); 401 } 402 403 404 /* 405 * return 1 if we're MP and all the other CPUs are idle 406 */ 407 static int 408 system_is_idle() 409 { 410 int cpu_id; 411 int found = 0; 412 413 if (1 == ncpus_online) 414 return (0); 415 416 for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) { 417 if (!cpu[cpu_id]) 418 continue; 419 420 found++; 421 422 if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) { 423 if (CPU->cpu_id == cpu_id && 424 CPU->cpu_disp->disp_nrunnable == 0) 425 continue; 426 return (0); 427 } 428 429 if (found == ncpus) 430 break; 431 } 432 return (1); 433 } 434 435 /* 436 * add a span to the memscrub list 437 */ 438 static int 439 memscrub_add_span(uint64_t start, uint64_t bytes) 440 { 441 struct memlist *dst; 442 struct memlist *prev, *next; 443 uint64_t end = start + bytes - 1; 444 int retval = 0; 445 446 mutex_enter(&memscrub_lock); 447 448 #ifdef MEMSCRUB_DEBUG 449 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 450 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 451 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 452 " size: 0x%llx\n", start, bytes); 453 #endif /* MEMSCRUB_DEBUG */ 454 455 /* 456 * Scan through the list to find the proper place to install it. 457 */ 458 prev = NULL; 459 next = memscrub_memlist; 460 while (next) { 461 uint64_t ns = next->address; 462 uint64_t ne = next->address + next->size - 1; 463 464 /* 465 * If this span overlaps with an existing span, then 466 * something has gone horribly wrong with the phys_install 467 * list. In fact, I'm surprised we made it this far. 468 */ 469 if ((start >= ns && start <= ne) || (end >= ns && end <= ne) || 470 (start < ns && end > ne)) 471 panic("memscrub found overlapping memory ranges " 472 "(0x%p-0x%p) and (0x%p-0x%p)", 473 (void *)(uintptr_t)start, (void *)(uintptr_t)end, 474 (void *)(uintptr_t)ns, (void *)(uintptr_t)ne); 475 476 /* 477 * New span can be appended to an existing one. 478 */ 479 if (start == ne + 1) { 480 next->size += bytes; 481 goto add_done; 482 } 483 484 /* 485 * New span can be prepended to an existing one. 486 */ 487 if (end + 1 == ns) { 488 next->size += bytes; 489 next->address = start; 490 goto add_done; 491 } 492 493 /* 494 * If the next span has a higher start address than the new 495 * one, then we have found the right spot for our 496 * insertion. 497 */ 498 if (ns > start) 499 break; 500 501 prev = next; 502 next = next->next; 503 } 504 505 /* 506 * allocate a new struct memlist 507 */ 508 dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 509 if (dst == NULL) { 510 retval = -1; 511 goto add_done; 512 } 513 dst->address = start; 514 dst->size = bytes; 515 dst->prev = prev; 516 dst->next = next; 517 518 if (prev) 519 prev->next = dst; 520 else 521 memscrub_memlist = dst; 522 523 if (next) 524 next->prev = dst; 525 526 add_done: 527 528 if (retval != -1) 529 memscrub_phys_pages += mmu_btop(bytes); 530 531 #ifdef MEMSCRUB_DEBUG 532 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 533 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 534 #endif /* MEMSCRUB_DEBUG */ 535 536 mutex_exit(&memscrub_lock); 537 return (retval); 538 } 539