1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * i86pc Memory Scrubbing 31 * 32 * On detection of a correctable memory ECC error, the i86pc hardware 33 * returns the corrected data to the requester and may re-write it 34 * to memory (DRAM or NVRAM). Machines which do not re-write this to 35 * memory should add an NMI handler to correct and rewrite. 36 * 37 * Scrubbing thus reduces the likelyhood that multiple transient errors 38 * will occur in the same memory word, making uncorrectable errors due 39 * to transients less likely. 40 * 41 * Thus is born the desire that every memory location be periodically 42 * accessed. 43 * 44 * This file implements a memory scrubbing thread. This scrubber 45 * guarantees that all of physical memory is accessed periodically 46 * (memscrub_period_sec -- 12 hours). 47 * 48 * It attempts to do this as unobtrusively as possible. The thread 49 * schedules itself to wake up at an interval such that if it reads 50 * memscrub_span_pages (4MB) on each wakeup, it will read all of physical 51 * memory in in memscrub_period_sec (12 hours). 52 * 53 * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200). 54 * When it completes a span, if all the CPUs are idle, it reads another span. 55 * Typically it soaks up idle time this way to reach its deadline early 56 * -- and sleeps until the next period begins. 57 * 58 * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups 59 * that run for 0.15 seconds at intervals of 67 seconds. 60 * 61 * In practice, the scrubber finds enough idle time to finish in a few 62 * minutes, and sleeps until its 12 hour deadline. 63 * 64 * The scrubber maintains a private copy of the phys_install memory list 65 * to keep track of what memory should be scrubbed. 66 * 67 * The following parameters can be set via /etc/system 68 * 69 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB) 70 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 71 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0) 72 * memscrub_delay_start_sec = (10 seconds) 73 * disable_memscrub = (0) 74 * 75 * the scrubber will exit (or never be started) if it finds the variable 76 * "disable_memscrub" set. 77 * 78 * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec 79 * is a "good" amount of minimum time for the thread to run at a time. 80 * 81 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 82 * twice the frequency the hardware folk estimated would be necessary. 83 * 84 * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly 85 * any other use of the system should be higher priority than scrubbing. 86 */ 87 88 #include <sys/types.h> 89 #include <sys/systm.h> /* timeout, types, t_lock */ 90 #include <sys/cmn_err.h> 91 #include <sys/sysmacros.h> /* MIN */ 92 #include <sys/memlist.h> /* memlist */ 93 #include <sys/kmem.h> /* KMEM_NOSLEEP */ 94 #include <sys/cpuvar.h> /* ncpus_online */ 95 #include <sys/debug.h> /* ASSERTs */ 96 #include <sys/vmem.h> 97 #include <sys/mman.h> 98 #include <vm/seg_kmem.h> 99 #include <vm/seg_kpm.h> 100 #include <vm/hat_i86.h> 101 102 static caddr_t memscrub_window; 103 static void *memscrub_pte; 104 105 /* 106 * Global Data: 107 */ 108 /* 109 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 110 */ 111 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 112 113 /* 114 * start only if at least MEMSCRUB_MIN_PAGES in system 115 */ 116 #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 117 118 /* 119 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 120 */ 121 #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE) 122 123 /* 124 * almost anything is higher priority than scrubbing 125 */ 126 #define MEMSCRUB_DFL_THREAD_PRI 0 127 128 /* 129 * we can patch these defaults in /etc/system if necessary 130 */ 131 uint_t disable_memscrub = 0; 132 pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES; 133 pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 134 time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 135 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 136 time_t memscrub_delay_start_sec = 10; 137 138 /* 139 * Static Routines 140 */ 141 static void memscrubber(void); 142 static int system_is_idle(void); 143 static int memscrub_add_span(uint64_t, uint64_t); 144 145 /* 146 * Static Data 147 */ 148 static struct memlist *memscrub_memlist; 149 static uint_t memscrub_phys_pages; 150 151 static kcondvar_t memscrub_cv; 152 static kmutex_t memscrub_lock; 153 /* 154 * memscrub_lock protects memscrub_memlist 155 */ 156 uint_t memscrub_scans_done; 157 158 uint_t memscrub_done_early; 159 uint_t memscrub_early_sec; 160 161 uint_t memscrub_done_late; 162 time_t memscrub_late_sec; 163 164 /* 165 * create memscrub_memlist from phys_install list 166 * initialize locks, set memscrub_phys_pages. 167 */ 168 void 169 memscrub_init() 170 { 171 struct memlist *src; 172 173 if (physmem < memscrub_min_pages) 174 return; 175 176 if (!kpm_enable) { 177 memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 178 memscrub_pte = hat_mempte_setup(memscrub_window); 179 } 180 181 /* 182 * copy phys_install to memscrub_memlist 183 */ 184 for (src = phys_install; src; src = src->next) { 185 if (memscrub_add_span(src->address, src->size)) { 186 cmn_err(CE_WARN, 187 "Memory scrubber failed to initialize\n"); 188 return; 189 } 190 } 191 192 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 193 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 194 195 /* 196 * create memscrubber thread 197 */ 198 (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0, 199 TS_RUN, memscrub_thread_pri); 200 } 201 202 #ifdef MEMSCRUB_DEBUG 203 void 204 memscrub_printmemlist(char *title, struct memlist *listp) 205 { 206 struct memlist *list; 207 208 cmn_err(CE_CONT, "%s:\n", title); 209 210 for (list = listp; list; list = list->next) { 211 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 212 list->address, list->size); 213 } 214 } 215 #endif /* MEMSCRUB_DEBUG */ 216 217 /* ARGSUSED */ 218 void 219 memscrub_wakeup(void *c) 220 { 221 /* 222 * grab mutex to guarantee that our wakeup call 223 * arrives after we go to sleep -- so we can't sleep forever. 224 */ 225 mutex_enter(&memscrub_lock); 226 cv_signal(&memscrub_cv); 227 mutex_exit(&memscrub_lock); 228 } 229 230 /* 231 * this calculation doesn't account for the time that the actual scan 232 * consumes -- so we'd fall slightly behind schedule with this 233 * interval_sec. but the idle loop optimization below usually makes us 234 * come in way ahead of schedule. 235 */ 236 static int 237 compute_interval_sec() 238 { 239 if (memscrub_phys_pages <= memscrub_span_pages) 240 return (memscrub_period_sec); 241 else 242 return (memscrub_period_sec/ 243 (memscrub_phys_pages/memscrub_span_pages)); 244 } 245 246 void 247 memscrubber() 248 { 249 time_t deadline; 250 uint64_t mlp_last_addr; 251 uint64_t mlp_next_addr; 252 int reached_end = 1; 253 time_t interval_sec = 0; 254 struct memlist *mlp; 255 256 extern void scan_memory(caddr_t, size_t); 257 258 if (memscrub_memlist == NULL) { 259 cmn_err(CE_WARN, "memscrub_memlist not initialized."); 260 goto memscrub_exit; 261 } 262 263 mlp = memscrub_memlist; 264 mlp_next_addr = mlp->address; 265 mlp_last_addr = mlp->address + mlp->size; 266 267 deadline = gethrestime_sec() + memscrub_delay_start_sec; 268 269 for (;;) { 270 if (disable_memscrub) 271 break; 272 273 mutex_enter(&memscrub_lock); 274 275 /* 276 * did we just reach the end of memory? 277 */ 278 if (reached_end) { 279 time_t now = gethrestime_sec(); 280 281 if (now >= deadline) { 282 memscrub_done_late++; 283 memscrub_late_sec += (now - deadline); 284 /* 285 * past deadline, start right away 286 */ 287 interval_sec = 0; 288 289 deadline = now + memscrub_period_sec; 290 } else { 291 /* 292 * we finished ahead of schedule. 293 * wait till previous dealine before re-start. 294 */ 295 interval_sec = deadline - now; 296 memscrub_done_early++; 297 memscrub_early_sec += interval_sec; 298 deadline += memscrub_period_sec; 299 } 300 } else { 301 interval_sec = compute_interval_sec(); 302 } 303 304 /* 305 * hit the snooze bar 306 */ 307 (void) timeout(memscrub_wakeup, NULL, interval_sec * hz); 308 309 /* 310 * go to sleep 311 */ 312 cv_wait(&memscrub_cv, &memscrub_lock); 313 314 mutex_exit(&memscrub_lock); 315 316 do { 317 pgcnt_t pages = memscrub_span_pages; 318 uint64_t address = mlp_next_addr; 319 320 if (disable_memscrub) 321 break; 322 323 mutex_enter(&memscrub_lock); 324 325 /* 326 * Make sure we don't try to scan beyond the end of 327 * the current memlist. If we would, then resize 328 * our scan target for this iteration, and prepare 329 * to read the next memlist entry on the next 330 * iteration. 331 */ 332 reached_end = 0; 333 if (address + mmu_ptob(pages) >= mlp_last_addr) { 334 pages = mmu_btop(mlp_last_addr - address); 335 mlp = mlp->next; 336 if (mlp == NULL) { 337 reached_end = 1; 338 mlp = memscrub_memlist; 339 } 340 mlp_next_addr = mlp->address; 341 mlp_last_addr = mlp->address + mlp->size; 342 } else { 343 mlp_next_addr += mmu_ptob(pages); 344 } 345 346 mutex_exit(&memscrub_lock); 347 348 while (pages--) { 349 pfn_t pfn = btop(address); 350 351 /* 352 * Without segkpm, the memscrubber cannot 353 * be allowed to migrate across CPUs, as 354 * the CPU-specific mapping of 355 * memscrub_window would be incorrect. 356 * With segkpm, switching CPUs is legal, but 357 * inefficient. We don't use 358 * kpreempt_disable as it might hold a 359 * higher priority thread (eg, RT) too long 360 * off CPU. 361 */ 362 thread_affinity_set(curthread, CPU_CURRENT); 363 if (kpm_enable) 364 memscrub_window = hat_kpm_pfn2va(pfn); 365 else 366 hat_mempte_remap(pfn, memscrub_window, 367 memscrub_pte, 368 PROT_READ, HAT_LOAD_NOCONSIST); 369 370 scan_memory(memscrub_window, PAGESIZE); 371 372 thread_affinity_clear(curthread); 373 address += MMU_PAGESIZE; 374 } 375 376 memscrub_scans_done++; 377 } while (!reached_end && system_is_idle()); 378 } 379 380 memscrub_exit: 381 382 cmn_err(CE_NOTE, "memory scrubber exiting."); 383 384 cv_destroy(&memscrub_cv); 385 386 thread_exit(); 387 } 388 389 390 /* 391 * return 1 if we're MP and all the other CPUs are idle 392 */ 393 static int 394 system_is_idle() 395 { 396 int cpu_id; 397 int found = 0; 398 399 if (1 == ncpus_online) 400 return (0); 401 402 for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) { 403 if (!cpu[cpu_id]) 404 continue; 405 406 found++; 407 408 if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) { 409 if (CPU->cpu_id == cpu_id && 410 CPU->cpu_disp->disp_nrunnable == 0) 411 continue; 412 return (0); 413 } 414 415 if (found == ncpus) 416 break; 417 } 418 return (1); 419 } 420 421 /* 422 * add a span to the memscrub list 423 */ 424 static int 425 memscrub_add_span(uint64_t start, uint64_t bytes) 426 { 427 struct memlist *dst; 428 struct memlist *prev, *next; 429 uint64_t end = start + bytes - 1; 430 int retval = 0; 431 432 mutex_enter(&memscrub_lock); 433 434 #ifdef MEMSCRUB_DEBUG 435 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 436 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 437 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 438 " size: 0x%llx\n", start, bytes); 439 #endif /* MEMSCRUB_DEBUG */ 440 441 /* 442 * Scan through the list to find the proper place to install it. 443 */ 444 prev = NULL; 445 next = memscrub_memlist; 446 while (next) { 447 uint64_t ns = next->address; 448 uint64_t ne = next->address + next->size - 1; 449 450 /* 451 * If this span overlaps with an existing span, then 452 * something has gone horribly wrong with the phys_install 453 * list. In fact, I'm surprised we made it this far. 454 */ 455 if ((start >= ns && start <= ne) || (end >= ns && end <= ne) || 456 (start < ns && end > ne)) 457 panic("memscrub found overlapping memory ranges " 458 "(0x%p-0x%p) and (0x%p-0x%p)", 459 (void *)start, (void *)end, (void *)ns, (void *)ne); 460 461 /* 462 * New span can be appended to an existing one. 463 */ 464 if (start == ne + 1) { 465 next->size += bytes; 466 goto add_done; 467 } 468 469 /* 470 * New span can be prepended to an existing one. 471 */ 472 if (end + 1 == ns) { 473 next->size += bytes; 474 next->address = start; 475 goto add_done; 476 } 477 478 /* 479 * If the next span has a higher start address than the new 480 * one, then we have found the right spot for our 481 * insertion. 482 */ 483 if (ns > start) 484 break; 485 486 prev = next; 487 next = next->next; 488 } 489 490 /* 491 * allocate a new struct memlist 492 */ 493 dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 494 if (dst == NULL) { 495 retval = -1; 496 goto add_done; 497 } 498 dst->address = start; 499 dst->size = bytes; 500 dst->prev = prev; 501 dst->next = next; 502 503 if (prev) 504 prev->next = dst; 505 else 506 memscrub_memlist = dst; 507 508 if (next) 509 next->prev = dst; 510 511 add_done: 512 513 if (retval != -1) 514 memscrub_phys_pages += mmu_btop(bytes); 515 516 #ifdef MEMSCRUB_DEBUG 517 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 518 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 519 #endif /* MEMSCRUB_DEBUG */ 520 521 mutex_exit(&memscrub_lock); 522 return (retval); 523 } 524