1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate /* 30*7c478bd9Sstevel@tonic-gate * i86pc Memory Scrubbing 31*7c478bd9Sstevel@tonic-gate * 32*7c478bd9Sstevel@tonic-gate * On detection of a correctable memory ECC error, the i86pc hardware 33*7c478bd9Sstevel@tonic-gate * returns the corrected data to the requester and may re-write it 34*7c478bd9Sstevel@tonic-gate * to memory (DRAM or NVRAM). Machines which do not re-write this to 35*7c478bd9Sstevel@tonic-gate * memory should add an NMI handler to correct and rewrite. 36*7c478bd9Sstevel@tonic-gate * 37*7c478bd9Sstevel@tonic-gate * Scrubbing thus reduces the likelyhood that multiple transient errors 38*7c478bd9Sstevel@tonic-gate * will occur in the same memory word, making uncorrectable errors due 39*7c478bd9Sstevel@tonic-gate * to transients less likely. 40*7c478bd9Sstevel@tonic-gate * 41*7c478bd9Sstevel@tonic-gate * Thus is born the desire that every memory location be periodically 42*7c478bd9Sstevel@tonic-gate * accessed. 43*7c478bd9Sstevel@tonic-gate * 44*7c478bd9Sstevel@tonic-gate * This file implements a memory scrubbing thread. This scrubber 45*7c478bd9Sstevel@tonic-gate * guarantees that all of physical memory is accessed periodically 46*7c478bd9Sstevel@tonic-gate * (memscrub_period_sec -- 12 hours). 47*7c478bd9Sstevel@tonic-gate * 48*7c478bd9Sstevel@tonic-gate * It attempts to do this as unobtrusively as possible. The thread 49*7c478bd9Sstevel@tonic-gate * schedules itself to wake up at an interval such that if it reads 50*7c478bd9Sstevel@tonic-gate * memscrub_span_pages (4MB) on each wakeup, it will read all of physical 51*7c478bd9Sstevel@tonic-gate * memory in in memscrub_period_sec (12 hours). 52*7c478bd9Sstevel@tonic-gate * 53*7c478bd9Sstevel@tonic-gate * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200). 54*7c478bd9Sstevel@tonic-gate * When it completes a span, if all the CPUs are idle, it reads another span. 55*7c478bd9Sstevel@tonic-gate * Typically it soaks up idle time this way to reach its deadline early 56*7c478bd9Sstevel@tonic-gate * -- and sleeps until the next period begins. 57*7c478bd9Sstevel@tonic-gate * 58*7c478bd9Sstevel@tonic-gate * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups 59*7c478bd9Sstevel@tonic-gate * that run for 0.15 seconds at intervals of 67 seconds. 60*7c478bd9Sstevel@tonic-gate * 61*7c478bd9Sstevel@tonic-gate * In practice, the scrubber finds enough idle time to finish in a few 62*7c478bd9Sstevel@tonic-gate * minutes, and sleeps until its 12 hour deadline. 63*7c478bd9Sstevel@tonic-gate * 64*7c478bd9Sstevel@tonic-gate * The scrubber maintains a private copy of the phys_install memory list 65*7c478bd9Sstevel@tonic-gate * to keep track of what memory should be scrubbed. 66*7c478bd9Sstevel@tonic-gate * 67*7c478bd9Sstevel@tonic-gate * The following parameters can be set via /etc/system 68*7c478bd9Sstevel@tonic-gate * 69*7c478bd9Sstevel@tonic-gate * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB) 70*7c478bd9Sstevel@tonic-gate * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 71*7c478bd9Sstevel@tonic-gate * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0) 72*7c478bd9Sstevel@tonic-gate * memscrub_delay_start_sec = (10 seconds) 73*7c478bd9Sstevel@tonic-gate * disable_memscrub = (0) 74*7c478bd9Sstevel@tonic-gate * 75*7c478bd9Sstevel@tonic-gate * the scrubber will exit (or never be started) if it finds the variable 76*7c478bd9Sstevel@tonic-gate * "disable_memscrub" set. 77*7c478bd9Sstevel@tonic-gate * 78*7c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec 79*7c478bd9Sstevel@tonic-gate * is a "good" amount of minimum time for the thread to run at a time. 80*7c478bd9Sstevel@tonic-gate * 81*7c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 82*7c478bd9Sstevel@tonic-gate * twice the frequency the hardware folk estimated would be necessary. 83*7c478bd9Sstevel@tonic-gate * 84*7c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly 85*7c478bd9Sstevel@tonic-gate * any other use of the system should be higher priority than scrubbing. 86*7c478bd9Sstevel@tonic-gate */ 87*7c478bd9Sstevel@tonic-gate 88*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 89*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> /* timeout, types, t_lock */ 90*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 91*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> /* MIN */ 92*7c478bd9Sstevel@tonic-gate #include <sys/memlist.h> /* memlist */ 93*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> /* KMEM_NOSLEEP */ 94*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> /* ncpus_online */ 95*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> /* ASSERTs */ 96*7c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 97*7c478bd9Sstevel@tonic-gate #include <sys/mman.h> 98*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 99*7c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 100*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 101*7c478bd9Sstevel@tonic-gate 102*7c478bd9Sstevel@tonic-gate static caddr_t memscrub_window; 103*7c478bd9Sstevel@tonic-gate static void *memscrub_pte; 104*7c478bd9Sstevel@tonic-gate 105*7c478bd9Sstevel@tonic-gate /* 106*7c478bd9Sstevel@tonic-gate * Global Data: 107*7c478bd9Sstevel@tonic-gate */ 108*7c478bd9Sstevel@tonic-gate /* 109*7c478bd9Sstevel@tonic-gate * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 110*7c478bd9Sstevel@tonic-gate */ 111*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 112*7c478bd9Sstevel@tonic-gate 113*7c478bd9Sstevel@tonic-gate /* 114*7c478bd9Sstevel@tonic-gate * start only if at least MEMSCRUB_MIN_PAGES in system 115*7c478bd9Sstevel@tonic-gate */ 116*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 117*7c478bd9Sstevel@tonic-gate 118*7c478bd9Sstevel@tonic-gate /* 119*7c478bd9Sstevel@tonic-gate * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 120*7c478bd9Sstevel@tonic-gate */ 121*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE) 122*7c478bd9Sstevel@tonic-gate 123*7c478bd9Sstevel@tonic-gate /* 124*7c478bd9Sstevel@tonic-gate * almost anything is higher priority than scrubbing 125*7c478bd9Sstevel@tonic-gate */ 126*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_THREAD_PRI 0 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate /* 129*7c478bd9Sstevel@tonic-gate * we can patch these defaults in /etc/system if necessary 130*7c478bd9Sstevel@tonic-gate */ 131*7c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0; 132*7c478bd9Sstevel@tonic-gate pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES; 133*7c478bd9Sstevel@tonic-gate pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 134*7c478bd9Sstevel@tonic-gate time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 135*7c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 136*7c478bd9Sstevel@tonic-gate time_t memscrub_delay_start_sec = 10; 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate /* 139*7c478bd9Sstevel@tonic-gate * Static Routines 140*7c478bd9Sstevel@tonic-gate */ 141*7c478bd9Sstevel@tonic-gate static void memscrubber(void); 142*7c478bd9Sstevel@tonic-gate static int system_is_idle(void); 143*7c478bd9Sstevel@tonic-gate static int memscrub_add_span(uint64_t, uint64_t); 144*7c478bd9Sstevel@tonic-gate 145*7c478bd9Sstevel@tonic-gate /* 146*7c478bd9Sstevel@tonic-gate * Static Data 147*7c478bd9Sstevel@tonic-gate */ 148*7c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist; 149*7c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages; 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv; 152*7c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock; 153*7c478bd9Sstevel@tonic-gate /* 154*7c478bd9Sstevel@tonic-gate * memscrub_lock protects memscrub_memlist 155*7c478bd9Sstevel@tonic-gate */ 156*7c478bd9Sstevel@tonic-gate uint_t memscrub_scans_done; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate uint_t memscrub_done_early; 159*7c478bd9Sstevel@tonic-gate uint_t memscrub_early_sec; 160*7c478bd9Sstevel@tonic-gate 161*7c478bd9Sstevel@tonic-gate uint_t memscrub_done_late; 162*7c478bd9Sstevel@tonic-gate time_t memscrub_late_sec; 163*7c478bd9Sstevel@tonic-gate 164*7c478bd9Sstevel@tonic-gate /* 165*7c478bd9Sstevel@tonic-gate * create memscrub_memlist from phys_install list 166*7c478bd9Sstevel@tonic-gate * initialize locks, set memscrub_phys_pages. 167*7c478bd9Sstevel@tonic-gate */ 168*7c478bd9Sstevel@tonic-gate void 169*7c478bd9Sstevel@tonic-gate memscrub_init() 170*7c478bd9Sstevel@tonic-gate { 171*7c478bd9Sstevel@tonic-gate struct memlist *src; 172*7c478bd9Sstevel@tonic-gate 173*7c478bd9Sstevel@tonic-gate if (physmem < memscrub_min_pages) 174*7c478bd9Sstevel@tonic-gate return; 175*7c478bd9Sstevel@tonic-gate 176*7c478bd9Sstevel@tonic-gate if (!kpm_enable) { 177*7c478bd9Sstevel@tonic-gate memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 178*7c478bd9Sstevel@tonic-gate memscrub_pte = hat_mempte_setup(memscrub_window); 179*7c478bd9Sstevel@tonic-gate } 180*7c478bd9Sstevel@tonic-gate 181*7c478bd9Sstevel@tonic-gate /* 182*7c478bd9Sstevel@tonic-gate * copy phys_install to memscrub_memlist 183*7c478bd9Sstevel@tonic-gate */ 184*7c478bd9Sstevel@tonic-gate for (src = phys_install; src; src = src->next) { 185*7c478bd9Sstevel@tonic-gate if (memscrub_add_span(src->address, src->size)) { 186*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 187*7c478bd9Sstevel@tonic-gate "Memory scrubber failed to initialize\n"); 188*7c478bd9Sstevel@tonic-gate return; 189*7c478bd9Sstevel@tonic-gate } 190*7c478bd9Sstevel@tonic-gate } 191*7c478bd9Sstevel@tonic-gate 192*7c478bd9Sstevel@tonic-gate mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 193*7c478bd9Sstevel@tonic-gate cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate /* 196*7c478bd9Sstevel@tonic-gate * create memscrubber thread 197*7c478bd9Sstevel@tonic-gate */ 198*7c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0, 199*7c478bd9Sstevel@tonic-gate TS_RUN, memscrub_thread_pri); 200*7c478bd9Sstevel@tonic-gate } 201*7c478bd9Sstevel@tonic-gate 202*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 203*7c478bd9Sstevel@tonic-gate void 204*7c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp) 205*7c478bd9Sstevel@tonic-gate { 206*7c478bd9Sstevel@tonic-gate struct memlist *list; 207*7c478bd9Sstevel@tonic-gate 208*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "%s:\n", title); 209*7c478bd9Sstevel@tonic-gate 210*7c478bd9Sstevel@tonic-gate for (list = listp; list; list = list->next) { 211*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 212*7c478bd9Sstevel@tonic-gate list->address, list->size); 213*7c478bd9Sstevel@tonic-gate } 214*7c478bd9Sstevel@tonic-gate } 215*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 216*7c478bd9Sstevel@tonic-gate 217*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 218*7c478bd9Sstevel@tonic-gate void 219*7c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c) 220*7c478bd9Sstevel@tonic-gate { 221*7c478bd9Sstevel@tonic-gate /* 222*7c478bd9Sstevel@tonic-gate * grab mutex to guarantee that our wakeup call 223*7c478bd9Sstevel@tonic-gate * arrives after we go to sleep -- so we can't sleep forever. 224*7c478bd9Sstevel@tonic-gate */ 225*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 226*7c478bd9Sstevel@tonic-gate cv_signal(&memscrub_cv); 227*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 228*7c478bd9Sstevel@tonic-gate } 229*7c478bd9Sstevel@tonic-gate 230*7c478bd9Sstevel@tonic-gate /* 231*7c478bd9Sstevel@tonic-gate * this calculation doesn't account for the time that the actual scan 232*7c478bd9Sstevel@tonic-gate * consumes -- so we'd fall slightly behind schedule with this 233*7c478bd9Sstevel@tonic-gate * interval_sec. but the idle loop optimization below usually makes us 234*7c478bd9Sstevel@tonic-gate * come in way ahead of schedule. 235*7c478bd9Sstevel@tonic-gate */ 236*7c478bd9Sstevel@tonic-gate static int 237*7c478bd9Sstevel@tonic-gate compute_interval_sec() 238*7c478bd9Sstevel@tonic-gate { 239*7c478bd9Sstevel@tonic-gate if (memscrub_phys_pages <= memscrub_span_pages) 240*7c478bd9Sstevel@tonic-gate return (memscrub_period_sec); 241*7c478bd9Sstevel@tonic-gate else 242*7c478bd9Sstevel@tonic-gate return (memscrub_period_sec/ 243*7c478bd9Sstevel@tonic-gate (memscrub_phys_pages/memscrub_span_pages)); 244*7c478bd9Sstevel@tonic-gate } 245*7c478bd9Sstevel@tonic-gate 246*7c478bd9Sstevel@tonic-gate void 247*7c478bd9Sstevel@tonic-gate memscrubber() 248*7c478bd9Sstevel@tonic-gate { 249*7c478bd9Sstevel@tonic-gate time_t deadline; 250*7c478bd9Sstevel@tonic-gate uint64_t mlp_last_addr; 251*7c478bd9Sstevel@tonic-gate uint64_t mlp_next_addr; 252*7c478bd9Sstevel@tonic-gate int reached_end = 1; 253*7c478bd9Sstevel@tonic-gate time_t interval_sec = 0; 254*7c478bd9Sstevel@tonic-gate struct memlist *mlp; 255*7c478bd9Sstevel@tonic-gate 256*7c478bd9Sstevel@tonic-gate extern void scan_memory(caddr_t, size_t); 257*7c478bd9Sstevel@tonic-gate 258*7c478bd9Sstevel@tonic-gate if (memscrub_memlist == NULL) { 259*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "memscrub_memlist not initialized."); 260*7c478bd9Sstevel@tonic-gate goto memscrub_exit; 261*7c478bd9Sstevel@tonic-gate } 262*7c478bd9Sstevel@tonic-gate 263*7c478bd9Sstevel@tonic-gate mlp = memscrub_memlist; 264*7c478bd9Sstevel@tonic-gate mlp_next_addr = mlp->address; 265*7c478bd9Sstevel@tonic-gate mlp_last_addr = mlp->address + mlp->size; 266*7c478bd9Sstevel@tonic-gate 267*7c478bd9Sstevel@tonic-gate deadline = gethrestime_sec() + memscrub_delay_start_sec; 268*7c478bd9Sstevel@tonic-gate 269*7c478bd9Sstevel@tonic-gate for (;;) { 270*7c478bd9Sstevel@tonic-gate if (disable_memscrub) 271*7c478bd9Sstevel@tonic-gate break; 272*7c478bd9Sstevel@tonic-gate 273*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 274*7c478bd9Sstevel@tonic-gate 275*7c478bd9Sstevel@tonic-gate /* 276*7c478bd9Sstevel@tonic-gate * did we just reach the end of memory? 277*7c478bd9Sstevel@tonic-gate */ 278*7c478bd9Sstevel@tonic-gate if (reached_end) { 279*7c478bd9Sstevel@tonic-gate time_t now = gethrestime_sec(); 280*7c478bd9Sstevel@tonic-gate 281*7c478bd9Sstevel@tonic-gate if (now >= deadline) { 282*7c478bd9Sstevel@tonic-gate memscrub_done_late++; 283*7c478bd9Sstevel@tonic-gate memscrub_late_sec += (now - deadline); 284*7c478bd9Sstevel@tonic-gate /* 285*7c478bd9Sstevel@tonic-gate * past deadline, start right away 286*7c478bd9Sstevel@tonic-gate */ 287*7c478bd9Sstevel@tonic-gate interval_sec = 0; 288*7c478bd9Sstevel@tonic-gate 289*7c478bd9Sstevel@tonic-gate deadline = now + memscrub_period_sec; 290*7c478bd9Sstevel@tonic-gate } else { 291*7c478bd9Sstevel@tonic-gate /* 292*7c478bd9Sstevel@tonic-gate * we finished ahead of schedule. 293*7c478bd9Sstevel@tonic-gate * wait till previous dealine before re-start. 294*7c478bd9Sstevel@tonic-gate */ 295*7c478bd9Sstevel@tonic-gate interval_sec = deadline - now; 296*7c478bd9Sstevel@tonic-gate memscrub_done_early++; 297*7c478bd9Sstevel@tonic-gate memscrub_early_sec += interval_sec; 298*7c478bd9Sstevel@tonic-gate deadline += memscrub_period_sec; 299*7c478bd9Sstevel@tonic-gate } 300*7c478bd9Sstevel@tonic-gate } else { 301*7c478bd9Sstevel@tonic-gate interval_sec = compute_interval_sec(); 302*7c478bd9Sstevel@tonic-gate } 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate /* 305*7c478bd9Sstevel@tonic-gate * hit the snooze bar 306*7c478bd9Sstevel@tonic-gate */ 307*7c478bd9Sstevel@tonic-gate (void) timeout(memscrub_wakeup, NULL, interval_sec * hz); 308*7c478bd9Sstevel@tonic-gate 309*7c478bd9Sstevel@tonic-gate /* 310*7c478bd9Sstevel@tonic-gate * go to sleep 311*7c478bd9Sstevel@tonic-gate */ 312*7c478bd9Sstevel@tonic-gate cv_wait(&memscrub_cv, &memscrub_lock); 313*7c478bd9Sstevel@tonic-gate 314*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate do { 317*7c478bd9Sstevel@tonic-gate pgcnt_t pages = memscrub_span_pages; 318*7c478bd9Sstevel@tonic-gate uint64_t address = mlp_next_addr; 319*7c478bd9Sstevel@tonic-gate 320*7c478bd9Sstevel@tonic-gate if (disable_memscrub) 321*7c478bd9Sstevel@tonic-gate break; 322*7c478bd9Sstevel@tonic-gate 323*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate /* 326*7c478bd9Sstevel@tonic-gate * Make sure we don't try to scan beyond the end of 327*7c478bd9Sstevel@tonic-gate * the current memlist. If we would, then resize 328*7c478bd9Sstevel@tonic-gate * our scan target for this iteration, and prepare 329*7c478bd9Sstevel@tonic-gate * to read the next memlist entry on the next 330*7c478bd9Sstevel@tonic-gate * iteration. 331*7c478bd9Sstevel@tonic-gate */ 332*7c478bd9Sstevel@tonic-gate reached_end = 0; 333*7c478bd9Sstevel@tonic-gate if (address + mmu_ptob(pages) >= mlp_last_addr) { 334*7c478bd9Sstevel@tonic-gate pages = mmu_btop(mlp_last_addr - address); 335*7c478bd9Sstevel@tonic-gate mlp = mlp->next; 336*7c478bd9Sstevel@tonic-gate if (mlp == NULL) { 337*7c478bd9Sstevel@tonic-gate reached_end = 1; 338*7c478bd9Sstevel@tonic-gate mlp = memscrub_memlist; 339*7c478bd9Sstevel@tonic-gate } 340*7c478bd9Sstevel@tonic-gate mlp_next_addr = mlp->address; 341*7c478bd9Sstevel@tonic-gate mlp_last_addr = mlp->address + mlp->size; 342*7c478bd9Sstevel@tonic-gate } else { 343*7c478bd9Sstevel@tonic-gate mlp_next_addr += mmu_ptob(pages); 344*7c478bd9Sstevel@tonic-gate } 345*7c478bd9Sstevel@tonic-gate 346*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 347*7c478bd9Sstevel@tonic-gate 348*7c478bd9Sstevel@tonic-gate while (pages--) { 349*7c478bd9Sstevel@tonic-gate pfn_t pfn = btop(address); 350*7c478bd9Sstevel@tonic-gate 351*7c478bd9Sstevel@tonic-gate /* 352*7c478bd9Sstevel@tonic-gate * Without segkpm, the memscrubber cannot 353*7c478bd9Sstevel@tonic-gate * be allowed to migrate across CPUs, as 354*7c478bd9Sstevel@tonic-gate * the CPU-specific mapping of 355*7c478bd9Sstevel@tonic-gate * memscrub_window would be incorrect. 356*7c478bd9Sstevel@tonic-gate * With segkpm, switching CPUs is legal, but 357*7c478bd9Sstevel@tonic-gate * inefficient. We don't use 358*7c478bd9Sstevel@tonic-gate * kpreempt_disable as it might hold a 359*7c478bd9Sstevel@tonic-gate * higher priority thread (eg, RT) too long 360*7c478bd9Sstevel@tonic-gate * off CPU. 361*7c478bd9Sstevel@tonic-gate */ 362*7c478bd9Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT); 363*7c478bd9Sstevel@tonic-gate if (kpm_enable) 364*7c478bd9Sstevel@tonic-gate memscrub_window = hat_kpm_pfn2va(pfn); 365*7c478bd9Sstevel@tonic-gate else 366*7c478bd9Sstevel@tonic-gate hat_mempte_remap(pfn, memscrub_window, 367*7c478bd9Sstevel@tonic-gate memscrub_pte, 368*7c478bd9Sstevel@tonic-gate PROT_READ, HAT_LOAD_NOCONSIST); 369*7c478bd9Sstevel@tonic-gate 370*7c478bd9Sstevel@tonic-gate scan_memory(memscrub_window, PAGESIZE); 371*7c478bd9Sstevel@tonic-gate 372*7c478bd9Sstevel@tonic-gate thread_affinity_clear(curthread); 373*7c478bd9Sstevel@tonic-gate address += MMU_PAGESIZE; 374*7c478bd9Sstevel@tonic-gate } 375*7c478bd9Sstevel@tonic-gate 376*7c478bd9Sstevel@tonic-gate memscrub_scans_done++; 377*7c478bd9Sstevel@tonic-gate } while (!reached_end && system_is_idle()); 378*7c478bd9Sstevel@tonic-gate } 379*7c478bd9Sstevel@tonic-gate 380*7c478bd9Sstevel@tonic-gate memscrub_exit: 381*7c478bd9Sstevel@tonic-gate 382*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "memory scrubber exiting."); 383*7c478bd9Sstevel@tonic-gate 384*7c478bd9Sstevel@tonic-gate cv_destroy(&memscrub_cv); 385*7c478bd9Sstevel@tonic-gate 386*7c478bd9Sstevel@tonic-gate thread_exit(); 387*7c478bd9Sstevel@tonic-gate } 388*7c478bd9Sstevel@tonic-gate 389*7c478bd9Sstevel@tonic-gate 390*7c478bd9Sstevel@tonic-gate /* 391*7c478bd9Sstevel@tonic-gate * return 1 if we're MP and all the other CPUs are idle 392*7c478bd9Sstevel@tonic-gate */ 393*7c478bd9Sstevel@tonic-gate static int 394*7c478bd9Sstevel@tonic-gate system_is_idle() 395*7c478bd9Sstevel@tonic-gate { 396*7c478bd9Sstevel@tonic-gate int cpu_id; 397*7c478bd9Sstevel@tonic-gate int found = 0; 398*7c478bd9Sstevel@tonic-gate 399*7c478bd9Sstevel@tonic-gate if (1 == ncpus_online) 400*7c478bd9Sstevel@tonic-gate return (0); 401*7c478bd9Sstevel@tonic-gate 402*7c478bd9Sstevel@tonic-gate for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) { 403*7c478bd9Sstevel@tonic-gate if (!cpu[cpu_id]) 404*7c478bd9Sstevel@tonic-gate continue; 405*7c478bd9Sstevel@tonic-gate 406*7c478bd9Sstevel@tonic-gate found++; 407*7c478bd9Sstevel@tonic-gate 408*7c478bd9Sstevel@tonic-gate if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) { 409*7c478bd9Sstevel@tonic-gate if (CPU->cpu_id == cpu_id && 410*7c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_nrunnable == 0) 411*7c478bd9Sstevel@tonic-gate continue; 412*7c478bd9Sstevel@tonic-gate return (0); 413*7c478bd9Sstevel@tonic-gate } 414*7c478bd9Sstevel@tonic-gate 415*7c478bd9Sstevel@tonic-gate if (found == ncpus) 416*7c478bd9Sstevel@tonic-gate break; 417*7c478bd9Sstevel@tonic-gate } 418*7c478bd9Sstevel@tonic-gate return (1); 419*7c478bd9Sstevel@tonic-gate } 420*7c478bd9Sstevel@tonic-gate 421*7c478bd9Sstevel@tonic-gate /* 422*7c478bd9Sstevel@tonic-gate * add a span to the memscrub list 423*7c478bd9Sstevel@tonic-gate */ 424*7c478bd9Sstevel@tonic-gate static int 425*7c478bd9Sstevel@tonic-gate memscrub_add_span(uint64_t start, uint64_t bytes) 426*7c478bd9Sstevel@tonic-gate { 427*7c478bd9Sstevel@tonic-gate struct memlist *dst; 428*7c478bd9Sstevel@tonic-gate struct memlist *prev, *next; 429*7c478bd9Sstevel@tonic-gate uint64_t end = start + bytes - 1; 430*7c478bd9Sstevel@tonic-gate int retval = 0; 431*7c478bd9Sstevel@tonic-gate 432*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 433*7c478bd9Sstevel@tonic-gate 434*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 435*7c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 436*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 437*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 438*7c478bd9Sstevel@tonic-gate " size: 0x%llx\n", start, bytes); 439*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 440*7c478bd9Sstevel@tonic-gate 441*7c478bd9Sstevel@tonic-gate /* 442*7c478bd9Sstevel@tonic-gate * Scan through the list to find the proper place to install it. 443*7c478bd9Sstevel@tonic-gate */ 444*7c478bd9Sstevel@tonic-gate prev = NULL; 445*7c478bd9Sstevel@tonic-gate next = memscrub_memlist; 446*7c478bd9Sstevel@tonic-gate while (next) { 447*7c478bd9Sstevel@tonic-gate uint64_t ns = next->address; 448*7c478bd9Sstevel@tonic-gate uint64_t ne = next->address + next->size - 1; 449*7c478bd9Sstevel@tonic-gate 450*7c478bd9Sstevel@tonic-gate /* 451*7c478bd9Sstevel@tonic-gate * If this span overlaps with an existing span, then 452*7c478bd9Sstevel@tonic-gate * something has gone horribly wrong with the phys_install 453*7c478bd9Sstevel@tonic-gate * list. In fact, I'm surprised we made it this far. 454*7c478bd9Sstevel@tonic-gate */ 455*7c478bd9Sstevel@tonic-gate if ((start >= ns && start <= ne) || (end >= ns && end <= ne) || 456*7c478bd9Sstevel@tonic-gate (start < ns && end > ne)) 457*7c478bd9Sstevel@tonic-gate panic("memscrub found overlapping memory ranges " 458*7c478bd9Sstevel@tonic-gate "(0x%p-0x%p) and (0x%p-0x%p)", 459*7c478bd9Sstevel@tonic-gate (void *)start, (void *)end, (void *)ns, (void *)ne); 460*7c478bd9Sstevel@tonic-gate 461*7c478bd9Sstevel@tonic-gate /* 462*7c478bd9Sstevel@tonic-gate * New span can be appended to an existing one. 463*7c478bd9Sstevel@tonic-gate */ 464*7c478bd9Sstevel@tonic-gate if (start == ne + 1) { 465*7c478bd9Sstevel@tonic-gate next->size += bytes; 466*7c478bd9Sstevel@tonic-gate goto add_done; 467*7c478bd9Sstevel@tonic-gate } 468*7c478bd9Sstevel@tonic-gate 469*7c478bd9Sstevel@tonic-gate /* 470*7c478bd9Sstevel@tonic-gate * New span can be prepended to an existing one. 471*7c478bd9Sstevel@tonic-gate */ 472*7c478bd9Sstevel@tonic-gate if (end + 1 == ns) { 473*7c478bd9Sstevel@tonic-gate next->size += bytes; 474*7c478bd9Sstevel@tonic-gate next->address = start; 475*7c478bd9Sstevel@tonic-gate goto add_done; 476*7c478bd9Sstevel@tonic-gate } 477*7c478bd9Sstevel@tonic-gate 478*7c478bd9Sstevel@tonic-gate /* 479*7c478bd9Sstevel@tonic-gate * If the next span has a higher start address than the new 480*7c478bd9Sstevel@tonic-gate * one, then we have found the right spot for our 481*7c478bd9Sstevel@tonic-gate * insertion. 482*7c478bd9Sstevel@tonic-gate */ 483*7c478bd9Sstevel@tonic-gate if (ns > start) 484*7c478bd9Sstevel@tonic-gate break; 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate prev = next; 487*7c478bd9Sstevel@tonic-gate next = next->next; 488*7c478bd9Sstevel@tonic-gate } 489*7c478bd9Sstevel@tonic-gate 490*7c478bd9Sstevel@tonic-gate /* 491*7c478bd9Sstevel@tonic-gate * allocate a new struct memlist 492*7c478bd9Sstevel@tonic-gate */ 493*7c478bd9Sstevel@tonic-gate dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 494*7c478bd9Sstevel@tonic-gate if (dst == NULL) { 495*7c478bd9Sstevel@tonic-gate retval = -1; 496*7c478bd9Sstevel@tonic-gate goto add_done; 497*7c478bd9Sstevel@tonic-gate } 498*7c478bd9Sstevel@tonic-gate dst->address = start; 499*7c478bd9Sstevel@tonic-gate dst->size = bytes; 500*7c478bd9Sstevel@tonic-gate dst->prev = prev; 501*7c478bd9Sstevel@tonic-gate dst->next = next; 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate if (prev) 504*7c478bd9Sstevel@tonic-gate prev->next = dst; 505*7c478bd9Sstevel@tonic-gate else 506*7c478bd9Sstevel@tonic-gate memscrub_memlist = dst; 507*7c478bd9Sstevel@tonic-gate 508*7c478bd9Sstevel@tonic-gate if (next) 509*7c478bd9Sstevel@tonic-gate next->prev = dst; 510*7c478bd9Sstevel@tonic-gate 511*7c478bd9Sstevel@tonic-gate add_done: 512*7c478bd9Sstevel@tonic-gate 513*7c478bd9Sstevel@tonic-gate if (retval != -1) 514*7c478bd9Sstevel@tonic-gate memscrub_phys_pages += mmu_btop(bytes); 515*7c478bd9Sstevel@tonic-gate 516*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 517*7c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 518*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 519*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 520*7c478bd9Sstevel@tonic-gate 521*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 522*7c478bd9Sstevel@tonic-gate return (retval); 523*7c478bd9Sstevel@tonic-gate } 524