xref: /titanic_53/usr/src/uts/i86pc/os/memscrub.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*
30*7c478bd9Sstevel@tonic-gate  * i86pc Memory Scrubbing
31*7c478bd9Sstevel@tonic-gate  *
32*7c478bd9Sstevel@tonic-gate  * On detection of a correctable memory ECC error, the i86pc hardware
33*7c478bd9Sstevel@tonic-gate  * returns the corrected data to the requester and may re-write it
34*7c478bd9Sstevel@tonic-gate  * to memory (DRAM or NVRAM). Machines which do not re-write this to
35*7c478bd9Sstevel@tonic-gate  * memory should add an NMI handler to correct and rewrite.
36*7c478bd9Sstevel@tonic-gate  *
37*7c478bd9Sstevel@tonic-gate  * Scrubbing thus reduces the likelyhood that multiple transient errors
38*7c478bd9Sstevel@tonic-gate  * will occur in the same memory word, making uncorrectable errors due
39*7c478bd9Sstevel@tonic-gate  * to transients less likely.
40*7c478bd9Sstevel@tonic-gate  *
41*7c478bd9Sstevel@tonic-gate  * Thus is born the desire that every memory location be periodically
42*7c478bd9Sstevel@tonic-gate  * accessed.
43*7c478bd9Sstevel@tonic-gate  *
44*7c478bd9Sstevel@tonic-gate  * This file implements a memory scrubbing thread.  This scrubber
45*7c478bd9Sstevel@tonic-gate  * guarantees that all of physical memory is accessed periodically
46*7c478bd9Sstevel@tonic-gate  * (memscrub_period_sec -- 12 hours).
47*7c478bd9Sstevel@tonic-gate  *
48*7c478bd9Sstevel@tonic-gate  * It attempts to do this as unobtrusively as possible.  The thread
49*7c478bd9Sstevel@tonic-gate  * schedules itself to wake up at an interval such that if it reads
50*7c478bd9Sstevel@tonic-gate  * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
51*7c478bd9Sstevel@tonic-gate  * memory in in memscrub_period_sec (12 hours).
52*7c478bd9Sstevel@tonic-gate  *
53*7c478bd9Sstevel@tonic-gate  * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
54*7c478bd9Sstevel@tonic-gate  * When it completes a span, if all the CPUs are idle, it reads another span.
55*7c478bd9Sstevel@tonic-gate  * Typically it soaks up idle time this way to reach its deadline early
56*7c478bd9Sstevel@tonic-gate  * -- and sleeps until the next period begins.
57*7c478bd9Sstevel@tonic-gate  *
58*7c478bd9Sstevel@tonic-gate  * Maximal Cost Estimate:  8GB @ xxMB/s = xxx seconds spent in 640 wakeups
59*7c478bd9Sstevel@tonic-gate  * that run for 0.15 seconds at intervals of 67 seconds.
60*7c478bd9Sstevel@tonic-gate  *
61*7c478bd9Sstevel@tonic-gate  * In practice, the scrubber finds enough idle time to finish in a few
62*7c478bd9Sstevel@tonic-gate  * minutes, and sleeps until its 12 hour deadline.
63*7c478bd9Sstevel@tonic-gate  *
64*7c478bd9Sstevel@tonic-gate  * The scrubber maintains a private copy of the phys_install memory list
65*7c478bd9Sstevel@tonic-gate  * to keep track of what memory should be scrubbed.
66*7c478bd9Sstevel@tonic-gate  *
67*7c478bd9Sstevel@tonic-gate  * The following parameters can be set via /etc/system
68*7c478bd9Sstevel@tonic-gate  *
69*7c478bd9Sstevel@tonic-gate  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
70*7c478bd9Sstevel@tonic-gate  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
71*7c478bd9Sstevel@tonic-gate  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
72*7c478bd9Sstevel@tonic-gate  * memscrub_delay_start_sec = (10 seconds)
73*7c478bd9Sstevel@tonic-gate  * disable_memscrub = (0)
74*7c478bd9Sstevel@tonic-gate  *
75*7c478bd9Sstevel@tonic-gate  * the scrubber will exit (or never be started) if it finds the variable
76*7c478bd9Sstevel@tonic-gate  * "disable_memscrub" set.
77*7c478bd9Sstevel@tonic-gate  *
78*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_SPAN_PAGES  is based on the guess that 0.15 sec
79*7c478bd9Sstevel@tonic-gate  * is a "good" amount of minimum time for the thread to run at a time.
80*7c478bd9Sstevel@tonic-gate  *
81*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
82*7c478bd9Sstevel@tonic-gate  * twice the frequency the hardware folk estimated would be necessary.
83*7c478bd9Sstevel@tonic-gate  *
84*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
85*7c478bd9Sstevel@tonic-gate  * any other use of the system should be higher priority than scrubbing.
86*7c478bd9Sstevel@tonic-gate  */
87*7c478bd9Sstevel@tonic-gate 
88*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
89*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>		/* timeout, types, t_lock */
90*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
91*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>	/* MIN */
92*7c478bd9Sstevel@tonic-gate #include <sys/memlist.h>	/* memlist */
93*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>		/* KMEM_NOSLEEP */
94*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>		/* ncpus_online */
95*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>		/* ASSERTs */
96*7c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
97*7c478bd9Sstevel@tonic-gate #include <sys/mman.h>
98*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
99*7c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
100*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
101*7c478bd9Sstevel@tonic-gate 
102*7c478bd9Sstevel@tonic-gate static caddr_t	memscrub_window;
103*7c478bd9Sstevel@tonic-gate static void	*memscrub_pte;
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate /*
106*7c478bd9Sstevel@tonic-gate  * Global Data:
107*7c478bd9Sstevel@tonic-gate  */
108*7c478bd9Sstevel@tonic-gate /*
109*7c478bd9Sstevel@tonic-gate  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
110*7c478bd9Sstevel@tonic-gate  */
111*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
112*7c478bd9Sstevel@tonic-gate 
113*7c478bd9Sstevel@tonic-gate /*
114*7c478bd9Sstevel@tonic-gate  * start only if at least MEMSCRUB_MIN_PAGES in system
115*7c478bd9Sstevel@tonic-gate  */
116*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_MIN_PAGES	((32 * 1024 * 1024) / PAGESIZE)
117*7c478bd9Sstevel@tonic-gate 
118*7c478bd9Sstevel@tonic-gate /*
119*7c478bd9Sstevel@tonic-gate  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
120*7c478bd9Sstevel@tonic-gate  */
121*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_SPAN_PAGES	((4 * 1024 * 1024) / PAGESIZE)
122*7c478bd9Sstevel@tonic-gate 
123*7c478bd9Sstevel@tonic-gate /*
124*7c478bd9Sstevel@tonic-gate  * almost anything is higher priority than scrubbing
125*7c478bd9Sstevel@tonic-gate  */
126*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_THREAD_PRI	0
127*7c478bd9Sstevel@tonic-gate 
128*7c478bd9Sstevel@tonic-gate /*
129*7c478bd9Sstevel@tonic-gate  * we can patch these defaults in /etc/system if necessary
130*7c478bd9Sstevel@tonic-gate  */
131*7c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0;
132*7c478bd9Sstevel@tonic-gate pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES;
133*7c478bd9Sstevel@tonic-gate pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
134*7c478bd9Sstevel@tonic-gate time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
135*7c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
136*7c478bd9Sstevel@tonic-gate time_t memscrub_delay_start_sec = 10;
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate /*
139*7c478bd9Sstevel@tonic-gate  * Static Routines
140*7c478bd9Sstevel@tonic-gate  */
141*7c478bd9Sstevel@tonic-gate static void memscrubber(void);
142*7c478bd9Sstevel@tonic-gate static int system_is_idle(void);
143*7c478bd9Sstevel@tonic-gate static int memscrub_add_span(uint64_t, uint64_t);
144*7c478bd9Sstevel@tonic-gate 
145*7c478bd9Sstevel@tonic-gate /*
146*7c478bd9Sstevel@tonic-gate  * Static Data
147*7c478bd9Sstevel@tonic-gate  */
148*7c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist;
149*7c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages;
150*7c478bd9Sstevel@tonic-gate 
151*7c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv;
152*7c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock;
153*7c478bd9Sstevel@tonic-gate /*
154*7c478bd9Sstevel@tonic-gate  * memscrub_lock protects memscrub_memlist
155*7c478bd9Sstevel@tonic-gate  */
156*7c478bd9Sstevel@tonic-gate uint_t memscrub_scans_done;
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate uint_t memscrub_done_early;
159*7c478bd9Sstevel@tonic-gate uint_t memscrub_early_sec;
160*7c478bd9Sstevel@tonic-gate 
161*7c478bd9Sstevel@tonic-gate uint_t memscrub_done_late;
162*7c478bd9Sstevel@tonic-gate time_t memscrub_late_sec;
163*7c478bd9Sstevel@tonic-gate 
164*7c478bd9Sstevel@tonic-gate /*
165*7c478bd9Sstevel@tonic-gate  * create memscrub_memlist from phys_install list
166*7c478bd9Sstevel@tonic-gate  * initialize locks, set memscrub_phys_pages.
167*7c478bd9Sstevel@tonic-gate  */
168*7c478bd9Sstevel@tonic-gate void
169*7c478bd9Sstevel@tonic-gate memscrub_init()
170*7c478bd9Sstevel@tonic-gate {
171*7c478bd9Sstevel@tonic-gate 	struct memlist *src;
172*7c478bd9Sstevel@tonic-gate 
173*7c478bd9Sstevel@tonic-gate 	if (physmem < memscrub_min_pages)
174*7c478bd9Sstevel@tonic-gate 		return;
175*7c478bd9Sstevel@tonic-gate 
176*7c478bd9Sstevel@tonic-gate 	if (!kpm_enable) {
177*7c478bd9Sstevel@tonic-gate 		memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
178*7c478bd9Sstevel@tonic-gate 		memscrub_pte = hat_mempte_setup(memscrub_window);
179*7c478bd9Sstevel@tonic-gate 	}
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate 	/*
182*7c478bd9Sstevel@tonic-gate 	 * copy phys_install to memscrub_memlist
183*7c478bd9Sstevel@tonic-gate 	 */
184*7c478bd9Sstevel@tonic-gate 	for (src = phys_install; src; src = src->next) {
185*7c478bd9Sstevel@tonic-gate 		if (memscrub_add_span(src->address, src->size)) {
186*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
187*7c478bd9Sstevel@tonic-gate 			    "Memory scrubber failed to initialize\n");
188*7c478bd9Sstevel@tonic-gate 			return;
189*7c478bd9Sstevel@tonic-gate 		}
190*7c478bd9Sstevel@tonic-gate 	}
191*7c478bd9Sstevel@tonic-gate 
192*7c478bd9Sstevel@tonic-gate 	mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
193*7c478bd9Sstevel@tonic-gate 	cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate 	/*
196*7c478bd9Sstevel@tonic-gate 	 * create memscrubber thread
197*7c478bd9Sstevel@tonic-gate 	 */
198*7c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0,
199*7c478bd9Sstevel@tonic-gate 	    TS_RUN, memscrub_thread_pri);
200*7c478bd9Sstevel@tonic-gate }
201*7c478bd9Sstevel@tonic-gate 
202*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
203*7c478bd9Sstevel@tonic-gate void
204*7c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
205*7c478bd9Sstevel@tonic-gate {
206*7c478bd9Sstevel@tonic-gate 	struct memlist *list;
207*7c478bd9Sstevel@tonic-gate 
208*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "%s:\n", title);
209*7c478bd9Sstevel@tonic-gate 
210*7c478bd9Sstevel@tonic-gate 	for (list = listp; list; list = list->next) {
211*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
212*7c478bd9Sstevel@tonic-gate 		    list->address, list->size);
213*7c478bd9Sstevel@tonic-gate 	}
214*7c478bd9Sstevel@tonic-gate }
215*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
216*7c478bd9Sstevel@tonic-gate 
217*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
218*7c478bd9Sstevel@tonic-gate void
219*7c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c)
220*7c478bd9Sstevel@tonic-gate {
221*7c478bd9Sstevel@tonic-gate 	/*
222*7c478bd9Sstevel@tonic-gate 	 * grab mutex to guarantee that our wakeup call
223*7c478bd9Sstevel@tonic-gate 	 * arrives after we go to sleep -- so we can't sleep forever.
224*7c478bd9Sstevel@tonic-gate 	 */
225*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
226*7c478bd9Sstevel@tonic-gate 	cv_signal(&memscrub_cv);
227*7c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
228*7c478bd9Sstevel@tonic-gate }
229*7c478bd9Sstevel@tonic-gate 
230*7c478bd9Sstevel@tonic-gate /*
231*7c478bd9Sstevel@tonic-gate  * this calculation doesn't account for the time that the actual scan
232*7c478bd9Sstevel@tonic-gate  * consumes -- so we'd fall slightly behind schedule with this
233*7c478bd9Sstevel@tonic-gate  * interval_sec.  but the idle loop optimization below usually makes us
234*7c478bd9Sstevel@tonic-gate  * come in way ahead of schedule.
235*7c478bd9Sstevel@tonic-gate  */
236*7c478bd9Sstevel@tonic-gate static int
237*7c478bd9Sstevel@tonic-gate compute_interval_sec()
238*7c478bd9Sstevel@tonic-gate {
239*7c478bd9Sstevel@tonic-gate 	if (memscrub_phys_pages <= memscrub_span_pages)
240*7c478bd9Sstevel@tonic-gate 		return (memscrub_period_sec);
241*7c478bd9Sstevel@tonic-gate 	else
242*7c478bd9Sstevel@tonic-gate 		return (memscrub_period_sec/
243*7c478bd9Sstevel@tonic-gate 			(memscrub_phys_pages/memscrub_span_pages));
244*7c478bd9Sstevel@tonic-gate }
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate void
247*7c478bd9Sstevel@tonic-gate memscrubber()
248*7c478bd9Sstevel@tonic-gate {
249*7c478bd9Sstevel@tonic-gate 	time_t deadline;
250*7c478bd9Sstevel@tonic-gate 	uint64_t mlp_last_addr;
251*7c478bd9Sstevel@tonic-gate 	uint64_t mlp_next_addr;
252*7c478bd9Sstevel@tonic-gate 	int reached_end = 1;
253*7c478bd9Sstevel@tonic-gate 	time_t interval_sec = 0;
254*7c478bd9Sstevel@tonic-gate 	struct memlist *mlp;
255*7c478bd9Sstevel@tonic-gate 
256*7c478bd9Sstevel@tonic-gate 	extern void scan_memory(caddr_t, size_t);
257*7c478bd9Sstevel@tonic-gate 
258*7c478bd9Sstevel@tonic-gate 	if (memscrub_memlist == NULL) {
259*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
260*7c478bd9Sstevel@tonic-gate 		goto memscrub_exit;
261*7c478bd9Sstevel@tonic-gate 	}
262*7c478bd9Sstevel@tonic-gate 
263*7c478bd9Sstevel@tonic-gate 	mlp = memscrub_memlist;
264*7c478bd9Sstevel@tonic-gate 	mlp_next_addr = mlp->address;
265*7c478bd9Sstevel@tonic-gate 	mlp_last_addr = mlp->address + mlp->size;
266*7c478bd9Sstevel@tonic-gate 
267*7c478bd9Sstevel@tonic-gate 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
268*7c478bd9Sstevel@tonic-gate 
269*7c478bd9Sstevel@tonic-gate 	for (;;) {
270*7c478bd9Sstevel@tonic-gate 		if (disable_memscrub)
271*7c478bd9Sstevel@tonic-gate 			break;
272*7c478bd9Sstevel@tonic-gate 
273*7c478bd9Sstevel@tonic-gate 		mutex_enter(&memscrub_lock);
274*7c478bd9Sstevel@tonic-gate 
275*7c478bd9Sstevel@tonic-gate 		/*
276*7c478bd9Sstevel@tonic-gate 		 * did we just reach the end of memory?
277*7c478bd9Sstevel@tonic-gate 		 */
278*7c478bd9Sstevel@tonic-gate 		if (reached_end) {
279*7c478bd9Sstevel@tonic-gate 			time_t now = gethrestime_sec();
280*7c478bd9Sstevel@tonic-gate 
281*7c478bd9Sstevel@tonic-gate 			if (now >= deadline) {
282*7c478bd9Sstevel@tonic-gate 				memscrub_done_late++;
283*7c478bd9Sstevel@tonic-gate 				memscrub_late_sec += (now - deadline);
284*7c478bd9Sstevel@tonic-gate 				/*
285*7c478bd9Sstevel@tonic-gate 				 * past deadline, start right away
286*7c478bd9Sstevel@tonic-gate 				 */
287*7c478bd9Sstevel@tonic-gate 				interval_sec = 0;
288*7c478bd9Sstevel@tonic-gate 
289*7c478bd9Sstevel@tonic-gate 				deadline = now + memscrub_period_sec;
290*7c478bd9Sstevel@tonic-gate 			} else {
291*7c478bd9Sstevel@tonic-gate 				/*
292*7c478bd9Sstevel@tonic-gate 				 * we finished ahead of schedule.
293*7c478bd9Sstevel@tonic-gate 				 * wait till previous dealine before re-start.
294*7c478bd9Sstevel@tonic-gate 				 */
295*7c478bd9Sstevel@tonic-gate 				interval_sec = deadline - now;
296*7c478bd9Sstevel@tonic-gate 				memscrub_done_early++;
297*7c478bd9Sstevel@tonic-gate 				memscrub_early_sec += interval_sec;
298*7c478bd9Sstevel@tonic-gate 				deadline += memscrub_period_sec;
299*7c478bd9Sstevel@tonic-gate 			}
300*7c478bd9Sstevel@tonic-gate 		} else {
301*7c478bd9Sstevel@tonic-gate 			interval_sec = compute_interval_sec();
302*7c478bd9Sstevel@tonic-gate 		}
303*7c478bd9Sstevel@tonic-gate 
304*7c478bd9Sstevel@tonic-gate 		/*
305*7c478bd9Sstevel@tonic-gate 		 * hit the snooze bar
306*7c478bd9Sstevel@tonic-gate 		 */
307*7c478bd9Sstevel@tonic-gate 		(void) timeout(memscrub_wakeup, NULL, interval_sec * hz);
308*7c478bd9Sstevel@tonic-gate 
309*7c478bd9Sstevel@tonic-gate 		/*
310*7c478bd9Sstevel@tonic-gate 		 * go to sleep
311*7c478bd9Sstevel@tonic-gate 		 */
312*7c478bd9Sstevel@tonic-gate 		cv_wait(&memscrub_cv, &memscrub_lock);
313*7c478bd9Sstevel@tonic-gate 
314*7c478bd9Sstevel@tonic-gate 		mutex_exit(&memscrub_lock);
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 		do {
317*7c478bd9Sstevel@tonic-gate 			pgcnt_t pages = memscrub_span_pages;
318*7c478bd9Sstevel@tonic-gate 			uint64_t address = mlp_next_addr;
319*7c478bd9Sstevel@tonic-gate 
320*7c478bd9Sstevel@tonic-gate 			if (disable_memscrub)
321*7c478bd9Sstevel@tonic-gate 				break;
322*7c478bd9Sstevel@tonic-gate 
323*7c478bd9Sstevel@tonic-gate 			mutex_enter(&memscrub_lock);
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 			/*
326*7c478bd9Sstevel@tonic-gate 			 * Make sure we don't try to scan beyond the end of
327*7c478bd9Sstevel@tonic-gate 			 * the current memlist.  If we would, then resize
328*7c478bd9Sstevel@tonic-gate 			 * our scan target for this iteration, and prepare
329*7c478bd9Sstevel@tonic-gate 			 * to read the next memlist entry on the next
330*7c478bd9Sstevel@tonic-gate 			 * iteration.
331*7c478bd9Sstevel@tonic-gate 			 */
332*7c478bd9Sstevel@tonic-gate 			reached_end = 0;
333*7c478bd9Sstevel@tonic-gate 			if (address + mmu_ptob(pages) >= mlp_last_addr) {
334*7c478bd9Sstevel@tonic-gate 				pages = mmu_btop(mlp_last_addr - address);
335*7c478bd9Sstevel@tonic-gate 				mlp = mlp->next;
336*7c478bd9Sstevel@tonic-gate 				if (mlp == NULL) {
337*7c478bd9Sstevel@tonic-gate 					reached_end = 1;
338*7c478bd9Sstevel@tonic-gate 					mlp = memscrub_memlist;
339*7c478bd9Sstevel@tonic-gate 				}
340*7c478bd9Sstevel@tonic-gate 				mlp_next_addr = mlp->address;
341*7c478bd9Sstevel@tonic-gate 				mlp_last_addr = mlp->address + mlp->size;
342*7c478bd9Sstevel@tonic-gate 			} else {
343*7c478bd9Sstevel@tonic-gate 				mlp_next_addr += mmu_ptob(pages);
344*7c478bd9Sstevel@tonic-gate 			}
345*7c478bd9Sstevel@tonic-gate 
346*7c478bd9Sstevel@tonic-gate 			mutex_exit(&memscrub_lock);
347*7c478bd9Sstevel@tonic-gate 
348*7c478bd9Sstevel@tonic-gate 			while (pages--) {
349*7c478bd9Sstevel@tonic-gate 				pfn_t pfn = btop(address);
350*7c478bd9Sstevel@tonic-gate 
351*7c478bd9Sstevel@tonic-gate 				/*
352*7c478bd9Sstevel@tonic-gate 				 * Without segkpm, the memscrubber cannot
353*7c478bd9Sstevel@tonic-gate 				 * be allowed to migrate across CPUs, as
354*7c478bd9Sstevel@tonic-gate 				 * the CPU-specific mapping of
355*7c478bd9Sstevel@tonic-gate 				 * memscrub_window would be incorrect.
356*7c478bd9Sstevel@tonic-gate 				 * With segkpm, switching CPUs is legal, but
357*7c478bd9Sstevel@tonic-gate 				 * inefficient.  We don't use
358*7c478bd9Sstevel@tonic-gate 				 * kpreempt_disable as it might hold a
359*7c478bd9Sstevel@tonic-gate 				 * higher priority thread (eg, RT) too long
360*7c478bd9Sstevel@tonic-gate 				 * off CPU.
361*7c478bd9Sstevel@tonic-gate 				 */
362*7c478bd9Sstevel@tonic-gate 				thread_affinity_set(curthread, CPU_CURRENT);
363*7c478bd9Sstevel@tonic-gate 				if (kpm_enable)
364*7c478bd9Sstevel@tonic-gate 					memscrub_window = hat_kpm_pfn2va(pfn);
365*7c478bd9Sstevel@tonic-gate 				else
366*7c478bd9Sstevel@tonic-gate 					hat_mempte_remap(pfn, memscrub_window,
367*7c478bd9Sstevel@tonic-gate 					    memscrub_pte,
368*7c478bd9Sstevel@tonic-gate 					    PROT_READ, HAT_LOAD_NOCONSIST);
369*7c478bd9Sstevel@tonic-gate 
370*7c478bd9Sstevel@tonic-gate 				scan_memory(memscrub_window, PAGESIZE);
371*7c478bd9Sstevel@tonic-gate 
372*7c478bd9Sstevel@tonic-gate 				thread_affinity_clear(curthread);
373*7c478bd9Sstevel@tonic-gate 				address += MMU_PAGESIZE;
374*7c478bd9Sstevel@tonic-gate 			}
375*7c478bd9Sstevel@tonic-gate 
376*7c478bd9Sstevel@tonic-gate 			memscrub_scans_done++;
377*7c478bd9Sstevel@tonic-gate 		} while (!reached_end && system_is_idle());
378*7c478bd9Sstevel@tonic-gate 	}
379*7c478bd9Sstevel@tonic-gate 
380*7c478bd9Sstevel@tonic-gate memscrub_exit:
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "memory scrubber exiting.");
383*7c478bd9Sstevel@tonic-gate 
384*7c478bd9Sstevel@tonic-gate 	cv_destroy(&memscrub_cv);
385*7c478bd9Sstevel@tonic-gate 
386*7c478bd9Sstevel@tonic-gate 	thread_exit();
387*7c478bd9Sstevel@tonic-gate }
388*7c478bd9Sstevel@tonic-gate 
389*7c478bd9Sstevel@tonic-gate 
390*7c478bd9Sstevel@tonic-gate /*
391*7c478bd9Sstevel@tonic-gate  * return 1 if we're MP and all the other CPUs are idle
392*7c478bd9Sstevel@tonic-gate  */
393*7c478bd9Sstevel@tonic-gate static int
394*7c478bd9Sstevel@tonic-gate system_is_idle()
395*7c478bd9Sstevel@tonic-gate {
396*7c478bd9Sstevel@tonic-gate 	int cpu_id;
397*7c478bd9Sstevel@tonic-gate 	int found = 0;
398*7c478bd9Sstevel@tonic-gate 
399*7c478bd9Sstevel@tonic-gate 	if (1 == ncpus_online)
400*7c478bd9Sstevel@tonic-gate 		return (0);
401*7c478bd9Sstevel@tonic-gate 
402*7c478bd9Sstevel@tonic-gate 	for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) {
403*7c478bd9Sstevel@tonic-gate 		if (!cpu[cpu_id])
404*7c478bd9Sstevel@tonic-gate 			continue;
405*7c478bd9Sstevel@tonic-gate 
406*7c478bd9Sstevel@tonic-gate 		found++;
407*7c478bd9Sstevel@tonic-gate 
408*7c478bd9Sstevel@tonic-gate 		if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) {
409*7c478bd9Sstevel@tonic-gate 			if (CPU->cpu_id == cpu_id &&
410*7c478bd9Sstevel@tonic-gate 			    CPU->cpu_disp->disp_nrunnable == 0)
411*7c478bd9Sstevel@tonic-gate 				continue;
412*7c478bd9Sstevel@tonic-gate 			return (0);
413*7c478bd9Sstevel@tonic-gate 		}
414*7c478bd9Sstevel@tonic-gate 
415*7c478bd9Sstevel@tonic-gate 		if (found == ncpus)
416*7c478bd9Sstevel@tonic-gate 			break;
417*7c478bd9Sstevel@tonic-gate 	}
418*7c478bd9Sstevel@tonic-gate 	return (1);
419*7c478bd9Sstevel@tonic-gate }
420*7c478bd9Sstevel@tonic-gate 
421*7c478bd9Sstevel@tonic-gate /*
422*7c478bd9Sstevel@tonic-gate  * add a span to the memscrub list
423*7c478bd9Sstevel@tonic-gate  */
424*7c478bd9Sstevel@tonic-gate static int
425*7c478bd9Sstevel@tonic-gate memscrub_add_span(uint64_t start, uint64_t bytes)
426*7c478bd9Sstevel@tonic-gate {
427*7c478bd9Sstevel@tonic-gate 	struct memlist *dst;
428*7c478bd9Sstevel@tonic-gate 	struct memlist *prev, *next;
429*7c478bd9Sstevel@tonic-gate 	uint64_t end = start + bytes - 1;
430*7c478bd9Sstevel@tonic-gate 	int retval = 0;
431*7c478bd9Sstevel@tonic-gate 
432*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
433*7c478bd9Sstevel@tonic-gate 
434*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
435*7c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
436*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
437*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
438*7c478bd9Sstevel@tonic-gate 		" size: 0x%llx\n", start, bytes);
439*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
440*7c478bd9Sstevel@tonic-gate 
441*7c478bd9Sstevel@tonic-gate 	/*
442*7c478bd9Sstevel@tonic-gate 	 * Scan through the list to find the proper place to install it.
443*7c478bd9Sstevel@tonic-gate 	 */
444*7c478bd9Sstevel@tonic-gate 	prev = NULL;
445*7c478bd9Sstevel@tonic-gate 	next = memscrub_memlist;
446*7c478bd9Sstevel@tonic-gate 	while (next) {
447*7c478bd9Sstevel@tonic-gate 		uint64_t ns = next->address;
448*7c478bd9Sstevel@tonic-gate 		uint64_t ne = next->address + next->size - 1;
449*7c478bd9Sstevel@tonic-gate 
450*7c478bd9Sstevel@tonic-gate 		/*
451*7c478bd9Sstevel@tonic-gate 		 * If this span overlaps with an existing span, then
452*7c478bd9Sstevel@tonic-gate 		 * something has gone horribly wrong with the phys_install
453*7c478bd9Sstevel@tonic-gate 		 * list.  In fact, I'm surprised we made it this far.
454*7c478bd9Sstevel@tonic-gate 		 */
455*7c478bd9Sstevel@tonic-gate 		if ((start >= ns && start <= ne) || (end >= ns && end <= ne) ||
456*7c478bd9Sstevel@tonic-gate 		    (start < ns && end > ne))
457*7c478bd9Sstevel@tonic-gate 			panic("memscrub found overlapping memory ranges "
458*7c478bd9Sstevel@tonic-gate 			    "(0x%p-0x%p) and (0x%p-0x%p)",
459*7c478bd9Sstevel@tonic-gate 			    (void *)start, (void *)end, (void *)ns, (void *)ne);
460*7c478bd9Sstevel@tonic-gate 
461*7c478bd9Sstevel@tonic-gate 		/*
462*7c478bd9Sstevel@tonic-gate 		 * New span can be appended to an existing one.
463*7c478bd9Sstevel@tonic-gate 		 */
464*7c478bd9Sstevel@tonic-gate 		if (start == ne + 1) {
465*7c478bd9Sstevel@tonic-gate 			next->size += bytes;
466*7c478bd9Sstevel@tonic-gate 			goto add_done;
467*7c478bd9Sstevel@tonic-gate 		}
468*7c478bd9Sstevel@tonic-gate 
469*7c478bd9Sstevel@tonic-gate 		/*
470*7c478bd9Sstevel@tonic-gate 		 * New span can be prepended to an existing one.
471*7c478bd9Sstevel@tonic-gate 		 */
472*7c478bd9Sstevel@tonic-gate 		if (end + 1 == ns) {
473*7c478bd9Sstevel@tonic-gate 			next->size += bytes;
474*7c478bd9Sstevel@tonic-gate 			next->address = start;
475*7c478bd9Sstevel@tonic-gate 			goto add_done;
476*7c478bd9Sstevel@tonic-gate 		}
477*7c478bd9Sstevel@tonic-gate 
478*7c478bd9Sstevel@tonic-gate 		/*
479*7c478bd9Sstevel@tonic-gate 		 * If the next span has a higher start address than the new
480*7c478bd9Sstevel@tonic-gate 		 * one, then we have found the right spot for our
481*7c478bd9Sstevel@tonic-gate 		 * insertion.
482*7c478bd9Sstevel@tonic-gate 		 */
483*7c478bd9Sstevel@tonic-gate 		if (ns > start)
484*7c478bd9Sstevel@tonic-gate 			break;
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate 		prev = next;
487*7c478bd9Sstevel@tonic-gate 		next = next->next;
488*7c478bd9Sstevel@tonic-gate 	}
489*7c478bd9Sstevel@tonic-gate 
490*7c478bd9Sstevel@tonic-gate 	/*
491*7c478bd9Sstevel@tonic-gate 	 * allocate a new struct memlist
492*7c478bd9Sstevel@tonic-gate 	 */
493*7c478bd9Sstevel@tonic-gate 	dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
494*7c478bd9Sstevel@tonic-gate 	if (dst == NULL) {
495*7c478bd9Sstevel@tonic-gate 		retval = -1;
496*7c478bd9Sstevel@tonic-gate 		goto add_done;
497*7c478bd9Sstevel@tonic-gate 	}
498*7c478bd9Sstevel@tonic-gate 	dst->address = start;
499*7c478bd9Sstevel@tonic-gate 	dst->size = bytes;
500*7c478bd9Sstevel@tonic-gate 	dst->prev = prev;
501*7c478bd9Sstevel@tonic-gate 	dst->next = next;
502*7c478bd9Sstevel@tonic-gate 
503*7c478bd9Sstevel@tonic-gate 	if (prev)
504*7c478bd9Sstevel@tonic-gate 		prev->next = dst;
505*7c478bd9Sstevel@tonic-gate 	else
506*7c478bd9Sstevel@tonic-gate 		memscrub_memlist = dst;
507*7c478bd9Sstevel@tonic-gate 
508*7c478bd9Sstevel@tonic-gate 	if (next)
509*7c478bd9Sstevel@tonic-gate 		next->prev = dst;
510*7c478bd9Sstevel@tonic-gate 
511*7c478bd9Sstevel@tonic-gate add_done:
512*7c478bd9Sstevel@tonic-gate 
513*7c478bd9Sstevel@tonic-gate 	if (retval != -1)
514*7c478bd9Sstevel@tonic-gate 		memscrub_phys_pages += mmu_btop(bytes);
515*7c478bd9Sstevel@tonic-gate 
516*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
517*7c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
518*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
519*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
520*7c478bd9Sstevel@tonic-gate 
521*7c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
522*7c478bd9Sstevel@tonic-gate 	return (retval);
523*7c478bd9Sstevel@tonic-gate }
524