xref: /titanic_53/usr/src/uts/sun4u/os/memscrub.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*
30*7c478bd9Sstevel@tonic-gate  * sun4u Memory Scrubbing
31*7c478bd9Sstevel@tonic-gate  *
32*7c478bd9Sstevel@tonic-gate  * On detection of a correctable memory ECC error, the sun4u kernel
33*7c478bd9Sstevel@tonic-gate  * returns the corrected data to the requester and re-writes it
34*7c478bd9Sstevel@tonic-gate  * to memory (DRAM).  So if the correctable error was transient,
35*7c478bd9Sstevel@tonic-gate  * the read has effectively been cleaned (scrubbed) from memory.
36*7c478bd9Sstevel@tonic-gate  *
37*7c478bd9Sstevel@tonic-gate  * Scrubbing thus reduces the likelyhood that multiple transient errors
38*7c478bd9Sstevel@tonic-gate  * will occur in the same memory word, making uncorrectable errors due
39*7c478bd9Sstevel@tonic-gate  * to transients less likely.
40*7c478bd9Sstevel@tonic-gate  *
41*7c478bd9Sstevel@tonic-gate  * Thus is born the desire that every memory location be periodically
42*7c478bd9Sstevel@tonic-gate  * accessed.
43*7c478bd9Sstevel@tonic-gate  *
44*7c478bd9Sstevel@tonic-gate  * This file implements a memory scrubbing thread.  This scrubber
45*7c478bd9Sstevel@tonic-gate  * guarantees that all of physical memory is accessed periodically
46*7c478bd9Sstevel@tonic-gate  * (memscrub_period_sec -- 12 hours).
47*7c478bd9Sstevel@tonic-gate  *
48*7c478bd9Sstevel@tonic-gate  * It attempts to do this as unobtrusively as possible.  The thread
49*7c478bd9Sstevel@tonic-gate  * schedules itself to wake up at an interval such that if it reads
50*7c478bd9Sstevel@tonic-gate  * memscrub_span_pages (8MB) on each wakeup, it will read all of physical
51*7c478bd9Sstevel@tonic-gate  * memory in in memscrub_period_sec (12 hours).
52*7c478bd9Sstevel@tonic-gate  *
53*7c478bd9Sstevel@tonic-gate  * The scrubber uses the block load hardware to read memory @ 268MB/s,
54*7c478bd9Sstevel@tonic-gate  * so it reads spans of 8MB in 0.03 seconds.  Unlike the original sun4d
55*7c478bd9Sstevel@tonic-gate  * scrubber the sun4u scrubber does not read ahead if the system is idle
56*7c478bd9Sstevel@tonic-gate  * because we can read memory very efficently.
57*7c478bd9Sstevel@tonic-gate  *
58*7c478bd9Sstevel@tonic-gate  * The scrubber maintains a private copy of the phys_install memory list
59*7c478bd9Sstevel@tonic-gate  * to keep track of what memory should be scrubbed.
60*7c478bd9Sstevel@tonic-gate  *
61*7c478bd9Sstevel@tonic-gate  * The global routines memscrub_add_span() and memscrub_delete_span() are
62*7c478bd9Sstevel@tonic-gate  * used to add and delete from this list.  If hotplug memory is later
63*7c478bd9Sstevel@tonic-gate  * supported these two routines can be used to notify the scrubber of
64*7c478bd9Sstevel@tonic-gate  * memory configuration changes.
65*7c478bd9Sstevel@tonic-gate  *
66*7c478bd9Sstevel@tonic-gate  * The following parameters can be set via /etc/system
67*7c478bd9Sstevel@tonic-gate  *
68*7c478bd9Sstevel@tonic-gate  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
69*7c478bd9Sstevel@tonic-gate  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
70*7c478bd9Sstevel@tonic-gate  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
71*7c478bd9Sstevel@tonic-gate  * memscrub_delay_start_sec = (5 minutes)
72*7c478bd9Sstevel@tonic-gate  * memscrub_verbose = (0)
73*7c478bd9Sstevel@tonic-gate  * memscrub_override_ticks = (1 tick)
74*7c478bd9Sstevel@tonic-gate  * disable_memscrub = (0)
75*7c478bd9Sstevel@tonic-gate  * pause_memscrub = (0)
76*7c478bd9Sstevel@tonic-gate  * read_all_memscrub = (0)
77*7c478bd9Sstevel@tonic-gate  *
78*7c478bd9Sstevel@tonic-gate  * The scrubber will print NOTICE messages of what it is doing if
79*7c478bd9Sstevel@tonic-gate  * "memscrub_verbose" is set.
80*7c478bd9Sstevel@tonic-gate  *
81*7c478bd9Sstevel@tonic-gate  * If the scrubber's sleep time calculation drops to zero ticks,
82*7c478bd9Sstevel@tonic-gate  * memscrub_override_ticks will be used as the sleep time instead. The
83*7c478bd9Sstevel@tonic-gate  * sleep time should only drop to zero on a system with over 32.95
84*7c478bd9Sstevel@tonic-gate  * terabytes of memory, or where the default scrubber parameters have
85*7c478bd9Sstevel@tonic-gate  * been adjusted. For example, reducing memscrub_span_pages or
86*7c478bd9Sstevel@tonic-gate  * memscrub_period_sec causes the sleep time to drop to zero with less
87*7c478bd9Sstevel@tonic-gate  * memory. Note that since the sleep time is calculated in clock ticks,
88*7c478bd9Sstevel@tonic-gate  * using hires clock ticks allows for more memory before the sleep time
89*7c478bd9Sstevel@tonic-gate  * becomes zero.
90*7c478bd9Sstevel@tonic-gate  *
91*7c478bd9Sstevel@tonic-gate  * The scrubber will exit (or never be started) if it finds the variable
92*7c478bd9Sstevel@tonic-gate  * "disable_memscrub" set.
93*7c478bd9Sstevel@tonic-gate  *
94*7c478bd9Sstevel@tonic-gate  * The scrubber will pause (not read memory) when "pause_memscrub"
95*7c478bd9Sstevel@tonic-gate  * is set.  It will check the state of pause_memscrub at each wakeup
96*7c478bd9Sstevel@tonic-gate  * period.  The scrubber will not make up for lost time.  If you
97*7c478bd9Sstevel@tonic-gate  * pause the scrubber for a prolonged period of time you can use
98*7c478bd9Sstevel@tonic-gate  * the "read_all_memscrub" switch (see below) to catch up. In addition,
99*7c478bd9Sstevel@tonic-gate  * pause_memscrub is used internally by the post memory DR callbacks.
100*7c478bd9Sstevel@tonic-gate  * It is set for the small period of time during which the callbacks
101*7c478bd9Sstevel@tonic-gate  * are executing. This ensures "memscrub_lock" will be released,
102*7c478bd9Sstevel@tonic-gate  * allowing the callbacks to finish.
103*7c478bd9Sstevel@tonic-gate  *
104*7c478bd9Sstevel@tonic-gate  * The scrubber will read all memory if "read_all_memscrub" is set.
105*7c478bd9Sstevel@tonic-gate  * The normal span read will also occur during the wakeup.
106*7c478bd9Sstevel@tonic-gate  *
107*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
108*7c478bd9Sstevel@tonic-gate  * must have before we'll start the scrubber.
109*7c478bd9Sstevel@tonic-gate  *
110*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_SPAN_PAGES (8MB) is based on the guess that 0.03 sec
111*7c478bd9Sstevel@tonic-gate  * is a "good" amount of minimum time for the thread to run at a time.
112*7c478bd9Sstevel@tonic-gate  *
113*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
114*7c478bd9Sstevel@tonic-gate  * twice the frequency the hardware folk estimated would be necessary.
115*7c478bd9Sstevel@tonic-gate  *
116*7c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
117*7c478bd9Sstevel@tonic-gate  * that the scurbber should get its fair share of time (since it
118*7c478bd9Sstevel@tonic-gate  * is short).  At a priority of 0 the scrubber will be starved.
119*7c478bd9Sstevel@tonic-gate  */
120*7c478bd9Sstevel@tonic-gate 
121*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>		/* timeout, types, t_lock */
122*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
123*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>	/* MIN */
124*7c478bd9Sstevel@tonic-gate #include <sys/memlist.h>	/* memlist */
125*7c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>	/* memory add/delete */
126*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>		/* KMEM_NOSLEEP */
127*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>		/* ncpus_online */
128*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>		/* ASSERTs */
129*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>	/* lddphys */
130*7c478bd9Sstevel@tonic-gate #include <sys/cpu_module.h>	/* vtag_flushpage */
131*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
132*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>		/* atomic_add_32 */
133*7c478bd9Sstevel@tonic-gate 
134*7c478bd9Sstevel@tonic-gate #include <vm/hat.h>
135*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
136*7c478bd9Sstevel@tonic-gate #include <vm/hat_sfmmu.h>	/* XXX FIXME - delete */
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
139*7c478bd9Sstevel@tonic-gate #include <sys/callb.h>		/* CPR callback */
140*7c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
141*7c478bd9Sstevel@tonic-gate 
142*7c478bd9Sstevel@tonic-gate /*
143*7c478bd9Sstevel@tonic-gate  * Should really have paddr_t defined, but it is broken.  Use
144*7c478bd9Sstevel@tonic-gate  * ms_paddr_t in the meantime to make the code cleaner
145*7c478bd9Sstevel@tonic-gate  */
146*7c478bd9Sstevel@tonic-gate typedef uint64_t ms_paddr_t;
147*7c478bd9Sstevel@tonic-gate 
148*7c478bd9Sstevel@tonic-gate /*
149*7c478bd9Sstevel@tonic-gate  * Global Routines:
150*7c478bd9Sstevel@tonic-gate  */
151*7c478bd9Sstevel@tonic-gate int memscrub_add_span(pfn_t pfn, pgcnt_t pages);
152*7c478bd9Sstevel@tonic-gate int memscrub_delete_span(pfn_t pfn, pgcnt_t pages);
153*7c478bd9Sstevel@tonic-gate int memscrub_init(void);
154*7c478bd9Sstevel@tonic-gate 
155*7c478bd9Sstevel@tonic-gate /*
156*7c478bd9Sstevel@tonic-gate  * Global Data:
157*7c478bd9Sstevel@tonic-gate  */
158*7c478bd9Sstevel@tonic-gate 
159*7c478bd9Sstevel@tonic-gate /*
160*7c478bd9Sstevel@tonic-gate  * scrub if we have at least this many pages
161*7c478bd9Sstevel@tonic-gate  */
162*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE)
163*7c478bd9Sstevel@tonic-gate 
164*7c478bd9Sstevel@tonic-gate /*
165*7c478bd9Sstevel@tonic-gate  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
166*7c478bd9Sstevel@tonic-gate  */
167*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate /*
170*7c478bd9Sstevel@tonic-gate  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
171*7c478bd9Sstevel@tonic-gate  */
172*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_SPAN_PAGES	((8 * 1024 * 1024) / PAGESIZE)
173*7c478bd9Sstevel@tonic-gate 
174*7c478bd9Sstevel@tonic-gate /*
175*7c478bd9Sstevel@tonic-gate  * almost anything is higher priority than scrubbing
176*7c478bd9Sstevel@tonic-gate  */
177*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_THREAD_PRI	MINCLSYSPRI
178*7c478bd9Sstevel@tonic-gate 
179*7c478bd9Sstevel@tonic-gate /*
180*7c478bd9Sstevel@tonic-gate  * size used when scanning memory
181*7c478bd9Sstevel@tonic-gate  */
182*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BLOCK_SIZE		256
183*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BLOCK_SIZE_SHIFT	8 	/* log2(MEMSCRUB_BLOCK_SIZE) */
184*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BLOCKS_PER_PAGE	(PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT)
185*7c478bd9Sstevel@tonic-gate 
186*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP4M		MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT
187*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP512K	MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT
188*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP64K		MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT
189*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP		MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT
190*7c478bd9Sstevel@tonic-gate 
191*7c478bd9Sstevel@tonic-gate /*
192*7c478bd9Sstevel@tonic-gate  * This message indicates that we have exceeded the limitations of
193*7c478bd9Sstevel@tonic-gate  * the memscrubber. See the comments above regarding what would
194*7c478bd9Sstevel@tonic-gate  * cause the sleep time to become zero. In DEBUG mode, this message
195*7c478bd9Sstevel@tonic-gate  * is logged on the console and in the messages file. In non-DEBUG
196*7c478bd9Sstevel@tonic-gate  * mode, it is only logged in the messages file.
197*7c478bd9Sstevel@tonic-gate  */
198*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
199*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_OVERRIDE_MSG	"Memory scrubber sleep time is zero " \
200*7c478bd9Sstevel@tonic-gate 	"seconds, consuming entire CPU."
201*7c478bd9Sstevel@tonic-gate #else
202*7c478bd9Sstevel@tonic-gate #define	MEMSCRUB_OVERRIDE_MSG	"!Memory scrubber sleep time is zero " \
203*7c478bd9Sstevel@tonic-gate 	"seconds, consuming entire CPU."
204*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
205*7c478bd9Sstevel@tonic-gate 
206*7c478bd9Sstevel@tonic-gate /*
207*7c478bd9Sstevel@tonic-gate  * we can patch these defaults in /etc/system if necessary
208*7c478bd9Sstevel@tonic-gate  */
209*7c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0;
210*7c478bd9Sstevel@tonic-gate uint_t pause_memscrub = 0;
211*7c478bd9Sstevel@tonic-gate uint_t read_all_memscrub = 0;
212*7c478bd9Sstevel@tonic-gate uint_t memscrub_verbose = 0;
213*7c478bd9Sstevel@tonic-gate uint_t memscrub_all_idle = 0;
214*7c478bd9Sstevel@tonic-gate uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
215*7c478bd9Sstevel@tonic-gate uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
216*7c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
217*7c478bd9Sstevel@tonic-gate uint_t memscrub_delay_start_sec = 5 * 60;
218*7c478bd9Sstevel@tonic-gate uint_t memscrub_override_ticks = 1;
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate /*
221*7c478bd9Sstevel@tonic-gate  * Static Routines
222*7c478bd9Sstevel@tonic-gate  */
223*7c478bd9Sstevel@tonic-gate static void memscrubber(void);
224*7c478bd9Sstevel@tonic-gate static void memscrub_cleanup(void);
225*7c478bd9Sstevel@tonic-gate static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *);
226*7c478bd9Sstevel@tonic-gate static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp);
227*7c478bd9Sstevel@tonic-gate static void memscrub_scan(uint_t blks, ms_paddr_t src);
228*7c478bd9Sstevel@tonic-gate 
229*7c478bd9Sstevel@tonic-gate /*
230*7c478bd9Sstevel@tonic-gate  * Static Data
231*7c478bd9Sstevel@tonic-gate  */
232*7c478bd9Sstevel@tonic-gate 
233*7c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist;
234*7c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages;
235*7c478bd9Sstevel@tonic-gate 
236*7c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv;
237*7c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock;
238*7c478bd9Sstevel@tonic-gate /*
239*7c478bd9Sstevel@tonic-gate  * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
240*7c478bd9Sstevel@tonic-gate  */
241*7c478bd9Sstevel@tonic-gate static void memscrub_init_mem_config(void);
242*7c478bd9Sstevel@tonic-gate static void memscrub_uninit_mem_config(void);
243*7c478bd9Sstevel@tonic-gate 
244*7c478bd9Sstevel@tonic-gate /*
245*7c478bd9Sstevel@tonic-gate  * Keep track of some interesting statistics
246*7c478bd9Sstevel@tonic-gate  */
247*7c478bd9Sstevel@tonic-gate static struct memscrub_kstats {
248*7c478bd9Sstevel@tonic-gate 	kstat_named_t	done_early;	/* ahead of schedule */
249*7c478bd9Sstevel@tonic-gate 	kstat_named_t	early_sec;	/* by cumulative num secs */
250*7c478bd9Sstevel@tonic-gate 	kstat_named_t	done_late;	/* behind schedule */
251*7c478bd9Sstevel@tonic-gate 	kstat_named_t	late_sec;	/* by cumulative num secs */
252*7c478bd9Sstevel@tonic-gate 	kstat_named_t	interval_ticks;	/* num ticks between intervals */
253*7c478bd9Sstevel@tonic-gate 	kstat_named_t	force_run;	/* forced to run, non-timeout */
254*7c478bd9Sstevel@tonic-gate 	kstat_named_t	errors_found;	/* num errors found by memscrub */
255*7c478bd9Sstevel@tonic-gate } memscrub_counts = {
256*7c478bd9Sstevel@tonic-gate 	{ "done_early",		KSTAT_DATA_UINT32 },
257*7c478bd9Sstevel@tonic-gate 	{ "early_sec", 		KSTAT_DATA_UINT32 },
258*7c478bd9Sstevel@tonic-gate 	{ "done_late", 		KSTAT_DATA_UINT32 },
259*7c478bd9Sstevel@tonic-gate 	{ "late_sec",		KSTAT_DATA_UINT32 },
260*7c478bd9Sstevel@tonic-gate 	{ "interval_ticks",	KSTAT_DATA_UINT32 },
261*7c478bd9Sstevel@tonic-gate 	{ "force_run",		KSTAT_DATA_UINT32 },
262*7c478bd9Sstevel@tonic-gate 	{ "errors_found",	KSTAT_DATA_UINT32 },
263*7c478bd9Sstevel@tonic-gate };
264*7c478bd9Sstevel@tonic-gate static struct kstat *memscrub_ksp = (struct kstat *)NULL;
265*7c478bd9Sstevel@tonic-gate 
266*7c478bd9Sstevel@tonic-gate static timeout_id_t memscrub_tid = 0;	/* keep track of timeout id */
267*7c478bd9Sstevel@tonic-gate 
268*7c478bd9Sstevel@tonic-gate /*
269*7c478bd9Sstevel@tonic-gate  * create memscrub_memlist from phys_install list
270*7c478bd9Sstevel@tonic-gate  * initialize locks, set memscrub_phys_pages.
271*7c478bd9Sstevel@tonic-gate  */
272*7c478bd9Sstevel@tonic-gate int
273*7c478bd9Sstevel@tonic-gate memscrub_init(void)
274*7c478bd9Sstevel@tonic-gate {
275*7c478bd9Sstevel@tonic-gate 	struct memlist *src;
276*7c478bd9Sstevel@tonic-gate 
277*7c478bd9Sstevel@tonic-gate 	/*
278*7c478bd9Sstevel@tonic-gate 	 * only startup the scrubber if we have a minimum
279*7c478bd9Sstevel@tonic-gate 	 * number of pages
280*7c478bd9Sstevel@tonic-gate 	 */
281*7c478bd9Sstevel@tonic-gate 	if (physinstalled >= MEMSCRUB_MIN_PAGES) {
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate 		/*
284*7c478bd9Sstevel@tonic-gate 		 * initialize locks
285*7c478bd9Sstevel@tonic-gate 		 */
286*7c478bd9Sstevel@tonic-gate 		mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
287*7c478bd9Sstevel@tonic-gate 		cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
288*7c478bd9Sstevel@tonic-gate 
289*7c478bd9Sstevel@tonic-gate 		/*
290*7c478bd9Sstevel@tonic-gate 		 * copy phys_install to memscrub_memlist
291*7c478bd9Sstevel@tonic-gate 		 */
292*7c478bd9Sstevel@tonic-gate 		for (src = phys_install; src; src = src->next) {
293*7c478bd9Sstevel@tonic-gate 			if (memscrub_add_span(
294*7c478bd9Sstevel@tonic-gate 			    (pfn_t)(src->address >> PAGESHIFT),
295*7c478bd9Sstevel@tonic-gate 			    (pgcnt_t)(src->size >> PAGESHIFT))) {
296*7c478bd9Sstevel@tonic-gate 				memscrub_cleanup();
297*7c478bd9Sstevel@tonic-gate 				return (-1);
298*7c478bd9Sstevel@tonic-gate 			}
299*7c478bd9Sstevel@tonic-gate 		}
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate 		/*
302*7c478bd9Sstevel@tonic-gate 		 * initialize kstats
303*7c478bd9Sstevel@tonic-gate 		 */
304*7c478bd9Sstevel@tonic-gate 		memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat",
305*7c478bd9Sstevel@tonic-gate 			"misc", KSTAT_TYPE_NAMED,
306*7c478bd9Sstevel@tonic-gate 			sizeof (memscrub_counts) / sizeof (kstat_named_t),
307*7c478bd9Sstevel@tonic-gate 			KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
308*7c478bd9Sstevel@tonic-gate 
309*7c478bd9Sstevel@tonic-gate 		if (memscrub_ksp) {
310*7c478bd9Sstevel@tonic-gate 			memscrub_ksp->ks_data = (void *)&memscrub_counts;
311*7c478bd9Sstevel@tonic-gate 			kstat_install(memscrub_ksp);
312*7c478bd9Sstevel@tonic-gate 		} else {
313*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n");
314*7c478bd9Sstevel@tonic-gate 		}
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 		/*
317*7c478bd9Sstevel@tonic-gate 		 * create memscrubber thread
318*7c478bd9Sstevel@tonic-gate 		 */
319*7c478bd9Sstevel@tonic-gate 		(void) thread_create(NULL, 0, (void (*)())memscrubber,
320*7c478bd9Sstevel@tonic-gate 		    NULL, 0, &p0, TS_RUN, memscrub_thread_pri);
321*7c478bd9Sstevel@tonic-gate 
322*7c478bd9Sstevel@tonic-gate 		/*
323*7c478bd9Sstevel@tonic-gate 		 * We don't want call backs changing the list
324*7c478bd9Sstevel@tonic-gate 		 * if there is no thread running. We do not
325*7c478bd9Sstevel@tonic-gate 		 * attempt to deal with stopping/starting scrubbing
326*7c478bd9Sstevel@tonic-gate 		 * on memory size changes.
327*7c478bd9Sstevel@tonic-gate 		 */
328*7c478bd9Sstevel@tonic-gate 		memscrub_init_mem_config();
329*7c478bd9Sstevel@tonic-gate 	}
330*7c478bd9Sstevel@tonic-gate 
331*7c478bd9Sstevel@tonic-gate 	return (0);
332*7c478bd9Sstevel@tonic-gate }
333*7c478bd9Sstevel@tonic-gate 
334*7c478bd9Sstevel@tonic-gate static void
335*7c478bd9Sstevel@tonic-gate memscrub_cleanup(void)
336*7c478bd9Sstevel@tonic-gate {
337*7c478bd9Sstevel@tonic-gate 	memscrub_uninit_mem_config();
338*7c478bd9Sstevel@tonic-gate 	while (memscrub_memlist) {
339*7c478bd9Sstevel@tonic-gate 		(void) memscrub_delete_span(
340*7c478bd9Sstevel@tonic-gate 			(pfn_t)(memscrub_memlist->address >> PAGESHIFT),
341*7c478bd9Sstevel@tonic-gate 			(pgcnt_t)(memscrub_memlist->size >> PAGESHIFT));
342*7c478bd9Sstevel@tonic-gate 	}
343*7c478bd9Sstevel@tonic-gate 	if (memscrub_ksp)
344*7c478bd9Sstevel@tonic-gate 		kstat_delete(memscrub_ksp);
345*7c478bd9Sstevel@tonic-gate 	cv_destroy(&memscrub_cv);
346*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&memscrub_lock);
347*7c478bd9Sstevel@tonic-gate }
348*7c478bd9Sstevel@tonic-gate 
349*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
350*7c478bd9Sstevel@tonic-gate static void
351*7c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
352*7c478bd9Sstevel@tonic-gate {
353*7c478bd9Sstevel@tonic-gate 	struct memlist *list;
354*7c478bd9Sstevel@tonic-gate 
355*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "%s:\n", title);
356*7c478bd9Sstevel@tonic-gate 
357*7c478bd9Sstevel@tonic-gate 	for (list = listp; list; list = list->next) {
358*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
359*7c478bd9Sstevel@tonic-gate 		    list->address, list->size);
360*7c478bd9Sstevel@tonic-gate 	}
361*7c478bd9Sstevel@tonic-gate }
362*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
363*7c478bd9Sstevel@tonic-gate 
364*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
365*7c478bd9Sstevel@tonic-gate static void
366*7c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c)
367*7c478bd9Sstevel@tonic-gate {
368*7c478bd9Sstevel@tonic-gate 	/*
369*7c478bd9Sstevel@tonic-gate 	 * grab mutex to guarantee that our wakeup call
370*7c478bd9Sstevel@tonic-gate 	 * arrives after we go to sleep -- so we can't sleep forever.
371*7c478bd9Sstevel@tonic-gate 	 */
372*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
373*7c478bd9Sstevel@tonic-gate 	cv_signal(&memscrub_cv);
374*7c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
375*7c478bd9Sstevel@tonic-gate }
376*7c478bd9Sstevel@tonic-gate 
377*7c478bd9Sstevel@tonic-gate /*
378*7c478bd9Sstevel@tonic-gate  * provide an interface external to the memscrubber
379*7c478bd9Sstevel@tonic-gate  * which will force the memscrub thread to run vs.
380*7c478bd9Sstevel@tonic-gate  * waiting for the timeout, if one is set
381*7c478bd9Sstevel@tonic-gate  */
382*7c478bd9Sstevel@tonic-gate void
383*7c478bd9Sstevel@tonic-gate memscrub_run(void)
384*7c478bd9Sstevel@tonic-gate {
385*7c478bd9Sstevel@tonic-gate 	memscrub_counts.force_run.value.ui32++;
386*7c478bd9Sstevel@tonic-gate 	if (memscrub_tid) {
387*7c478bd9Sstevel@tonic-gate 		(void) untimeout(memscrub_tid);
388*7c478bd9Sstevel@tonic-gate 		memscrub_wakeup((void *)NULL);
389*7c478bd9Sstevel@tonic-gate 	}
390*7c478bd9Sstevel@tonic-gate }
391*7c478bd9Sstevel@tonic-gate 
392*7c478bd9Sstevel@tonic-gate /*
393*7c478bd9Sstevel@tonic-gate  * this calculation doesn't account for the time
394*7c478bd9Sstevel@tonic-gate  * that the actual scan consumes -- so we'd fall
395*7c478bd9Sstevel@tonic-gate  * slightly behind schedule with this interval.
396*7c478bd9Sstevel@tonic-gate  * It's very small.
397*7c478bd9Sstevel@tonic-gate  */
398*7c478bd9Sstevel@tonic-gate 
399*7c478bd9Sstevel@tonic-gate static uint_t
400*7c478bd9Sstevel@tonic-gate compute_interval_ticks(void)
401*7c478bd9Sstevel@tonic-gate {
402*7c478bd9Sstevel@tonic-gate 	/*
403*7c478bd9Sstevel@tonic-gate 	 * We use msp_safe mpp_safe below to insure somebody
404*7c478bd9Sstevel@tonic-gate 	 * doesn't set memscrub_span_pages or memscrub_phys_pages
405*7c478bd9Sstevel@tonic-gate 	 * to 0 on us.
406*7c478bd9Sstevel@tonic-gate 	 */
407*7c478bd9Sstevel@tonic-gate 	static uint_t msp_safe, mpp_safe;
408*7c478bd9Sstevel@tonic-gate 	static uint_t interval_ticks, period_ticks;
409*7c478bd9Sstevel@tonic-gate 	msp_safe = memscrub_span_pages;
410*7c478bd9Sstevel@tonic-gate 	mpp_safe = memscrub_phys_pages;
411*7c478bd9Sstevel@tonic-gate 
412*7c478bd9Sstevel@tonic-gate 	period_ticks = memscrub_period_sec * hz;
413*7c478bd9Sstevel@tonic-gate 	interval_ticks = period_ticks;
414*7c478bd9Sstevel@tonic-gate 
415*7c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&memscrub_lock));
416*7c478bd9Sstevel@tonic-gate 
417*7c478bd9Sstevel@tonic-gate 	if ((msp_safe != 0) && (mpp_safe != 0)) {
418*7c478bd9Sstevel@tonic-gate 		if (memscrub_phys_pages <= msp_safe) {
419*7c478bd9Sstevel@tonic-gate 			interval_ticks = period_ticks;
420*7c478bd9Sstevel@tonic-gate 		} else {
421*7c478bd9Sstevel@tonic-gate 			interval_ticks = (period_ticks /
422*7c478bd9Sstevel@tonic-gate 			    (mpp_safe / msp_safe));
423*7c478bd9Sstevel@tonic-gate 		}
424*7c478bd9Sstevel@tonic-gate 	}
425*7c478bd9Sstevel@tonic-gate 	return (interval_ticks);
426*7c478bd9Sstevel@tonic-gate }
427*7c478bd9Sstevel@tonic-gate 
428*7c478bd9Sstevel@tonic-gate void
429*7c478bd9Sstevel@tonic-gate memscrubber(void)
430*7c478bd9Sstevel@tonic-gate {
431*7c478bd9Sstevel@tonic-gate 	ms_paddr_t address, addr;
432*7c478bd9Sstevel@tonic-gate 	time_t deadline;
433*7c478bd9Sstevel@tonic-gate 	pgcnt_t pages;
434*7c478bd9Sstevel@tonic-gate 	uint_t reached_end = 1;
435*7c478bd9Sstevel@tonic-gate 	uint_t paused_message = 0;
436*7c478bd9Sstevel@tonic-gate 	uint_t interval_ticks = 0;
437*7c478bd9Sstevel@tonic-gate 	uint_t sleep_warn_printed = 0;
438*7c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
439*7c478bd9Sstevel@tonic-gate 
440*7c478bd9Sstevel@tonic-gate 	/*
441*7c478bd9Sstevel@tonic-gate 	 * notify CPR of our existence
442*7c478bd9Sstevel@tonic-gate 	 */
443*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
444*7c478bd9Sstevel@tonic-gate 
445*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
446*7c478bd9Sstevel@tonic-gate 
447*7c478bd9Sstevel@tonic-gate 	if (memscrub_memlist == NULL) {
448*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
449*7c478bd9Sstevel@tonic-gate 		goto memscrub_exit;
450*7c478bd9Sstevel@tonic-gate 	}
451*7c478bd9Sstevel@tonic-gate 
452*7c478bd9Sstevel@tonic-gate 	address = memscrub_memlist->address;
453*7c478bd9Sstevel@tonic-gate 
454*7c478bd9Sstevel@tonic-gate 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 	for (;;) {
457*7c478bd9Sstevel@tonic-gate 		if (disable_memscrub)
458*7c478bd9Sstevel@tonic-gate 			break;
459*7c478bd9Sstevel@tonic-gate 
460*7c478bd9Sstevel@tonic-gate 		/*
461*7c478bd9Sstevel@tonic-gate 		 * compute interval_ticks
462*7c478bd9Sstevel@tonic-gate 		 */
463*7c478bd9Sstevel@tonic-gate 		interval_ticks = compute_interval_ticks();
464*7c478bd9Sstevel@tonic-gate 
465*7c478bd9Sstevel@tonic-gate 		/*
466*7c478bd9Sstevel@tonic-gate 		 * If the calculated sleep time is zero, and pause_memscrub
467*7c478bd9Sstevel@tonic-gate 		 * has been set, make sure we sleep so that another thread
468*7c478bd9Sstevel@tonic-gate 		 * can acquire memscrub_lock.
469*7c478bd9Sstevel@tonic-gate 		 */
470*7c478bd9Sstevel@tonic-gate 		if (interval_ticks == 0 && pause_memscrub) {
471*7c478bd9Sstevel@tonic-gate 			interval_ticks = hz;
472*7c478bd9Sstevel@tonic-gate 		}
473*7c478bd9Sstevel@tonic-gate 
474*7c478bd9Sstevel@tonic-gate 		/*
475*7c478bd9Sstevel@tonic-gate 		 * And as a fail safe, under normal non-paused operation, do
476*7c478bd9Sstevel@tonic-gate 		 * not allow the sleep time to be zero.
477*7c478bd9Sstevel@tonic-gate 		 */
478*7c478bd9Sstevel@tonic-gate 		if (interval_ticks == 0) {
479*7c478bd9Sstevel@tonic-gate 			interval_ticks = memscrub_override_ticks;
480*7c478bd9Sstevel@tonic-gate 			if (!sleep_warn_printed) {
481*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG);
482*7c478bd9Sstevel@tonic-gate 				sleep_warn_printed = 1;
483*7c478bd9Sstevel@tonic-gate 			}
484*7c478bd9Sstevel@tonic-gate 		}
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate 		memscrub_counts.interval_ticks.value.ui32 = interval_ticks;
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate 		/*
489*7c478bd9Sstevel@tonic-gate 		 * Did we just reach the end of memory? If we are at the
490*7c478bd9Sstevel@tonic-gate 		 * end of memory, delay end of memory processing until
491*7c478bd9Sstevel@tonic-gate 		 * pause_memscrub is not set.
492*7c478bd9Sstevel@tonic-gate 		 */
493*7c478bd9Sstevel@tonic-gate 		if (reached_end && !pause_memscrub) {
494*7c478bd9Sstevel@tonic-gate 			time_t now = gethrestime_sec();
495*7c478bd9Sstevel@tonic-gate 
496*7c478bd9Sstevel@tonic-gate 			if (now >= deadline) {
497*7c478bd9Sstevel@tonic-gate 				memscrub_counts.done_late.value.ui32++;
498*7c478bd9Sstevel@tonic-gate 				memscrub_counts.late_sec.value.ui32 +=
499*7c478bd9Sstevel@tonic-gate 					(now - deadline);
500*7c478bd9Sstevel@tonic-gate 				/*
501*7c478bd9Sstevel@tonic-gate 				 * past deadline, start right away
502*7c478bd9Sstevel@tonic-gate 				 */
503*7c478bd9Sstevel@tonic-gate 				interval_ticks = 0;
504*7c478bd9Sstevel@tonic-gate 
505*7c478bd9Sstevel@tonic-gate 				deadline = now + memscrub_period_sec;
506*7c478bd9Sstevel@tonic-gate 			} else {
507*7c478bd9Sstevel@tonic-gate 				/*
508*7c478bd9Sstevel@tonic-gate 				 * we finished ahead of schedule.
509*7c478bd9Sstevel@tonic-gate 				 * wait till previous deadline before re-start.
510*7c478bd9Sstevel@tonic-gate 				 */
511*7c478bd9Sstevel@tonic-gate 				interval_ticks = (deadline - now) * hz;
512*7c478bd9Sstevel@tonic-gate 				memscrub_counts.done_early.value.ui32++;
513*7c478bd9Sstevel@tonic-gate 				memscrub_counts.early_sec.value.ui32 +=
514*7c478bd9Sstevel@tonic-gate 					(deadline - now);
515*7c478bd9Sstevel@tonic-gate 				deadline += memscrub_period_sec;
516*7c478bd9Sstevel@tonic-gate 			}
517*7c478bd9Sstevel@tonic-gate 			reached_end = 0;
518*7c478bd9Sstevel@tonic-gate 			sleep_warn_printed = 0;
519*7c478bd9Sstevel@tonic-gate 		}
520*7c478bd9Sstevel@tonic-gate 
521*7c478bd9Sstevel@tonic-gate 		if (interval_ticks != 0) {
522*7c478bd9Sstevel@tonic-gate 			/*
523*7c478bd9Sstevel@tonic-gate 			 * it is safe from our standpoint for CPR to
524*7c478bd9Sstevel@tonic-gate 			 * suspend the system
525*7c478bd9Sstevel@tonic-gate 			 */
526*7c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
527*7c478bd9Sstevel@tonic-gate 
528*7c478bd9Sstevel@tonic-gate 			/*
529*7c478bd9Sstevel@tonic-gate 			 * hit the snooze bar
530*7c478bd9Sstevel@tonic-gate 			 */
531*7c478bd9Sstevel@tonic-gate 			memscrub_tid = timeout(memscrub_wakeup, NULL,
532*7c478bd9Sstevel@tonic-gate 			    interval_ticks);
533*7c478bd9Sstevel@tonic-gate 
534*7c478bd9Sstevel@tonic-gate 			/*
535*7c478bd9Sstevel@tonic-gate 			 * go to sleep
536*7c478bd9Sstevel@tonic-gate 			 */
537*7c478bd9Sstevel@tonic-gate 			cv_wait(&memscrub_cv, &memscrub_lock);
538*7c478bd9Sstevel@tonic-gate 
539*7c478bd9Sstevel@tonic-gate 			/*
540*7c478bd9Sstevel@tonic-gate 			 * at this point, no timeout should be set
541*7c478bd9Sstevel@tonic-gate 			 */
542*7c478bd9Sstevel@tonic-gate 			memscrub_tid = 0;
543*7c478bd9Sstevel@tonic-gate 
544*7c478bd9Sstevel@tonic-gate 			/*
545*7c478bd9Sstevel@tonic-gate 			 * we need to goto work and will be modifying
546*7c478bd9Sstevel@tonic-gate 			 * our internal state and mapping/unmapping
547*7c478bd9Sstevel@tonic-gate 			 * TTEs
548*7c478bd9Sstevel@tonic-gate 			 */
549*7c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
550*7c478bd9Sstevel@tonic-gate 		}
551*7c478bd9Sstevel@tonic-gate 
552*7c478bd9Sstevel@tonic-gate 
553*7c478bd9Sstevel@tonic-gate 		if (memscrub_phys_pages == 0) {
554*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "Memory scrubber has 0 pages to read");
555*7c478bd9Sstevel@tonic-gate 			goto memscrub_exit;
556*7c478bd9Sstevel@tonic-gate 		}
557*7c478bd9Sstevel@tonic-gate 
558*7c478bd9Sstevel@tonic-gate 		if (!pause_memscrub) {
559*7c478bd9Sstevel@tonic-gate 			if (paused_message) {
560*7c478bd9Sstevel@tonic-gate 				paused_message = 0;
561*7c478bd9Sstevel@tonic-gate 				if (memscrub_verbose)
562*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "Memory scrubber "
563*7c478bd9Sstevel@tonic-gate 					    "resuming");
564*7c478bd9Sstevel@tonic-gate 			}
565*7c478bd9Sstevel@tonic-gate 
566*7c478bd9Sstevel@tonic-gate 			if (read_all_memscrub) {
567*7c478bd9Sstevel@tonic-gate 				if (memscrub_verbose)
568*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "Memory scrubber "
569*7c478bd9Sstevel@tonic-gate 					    "reading all memory per request");
570*7c478bd9Sstevel@tonic-gate 
571*7c478bd9Sstevel@tonic-gate 				addr = memscrub_memlist->address;
572*7c478bd9Sstevel@tonic-gate 				reached_end = 0;
573*7c478bd9Sstevel@tonic-gate 				while (!reached_end) {
574*7c478bd9Sstevel@tonic-gate 					if (disable_memscrub)
575*7c478bd9Sstevel@tonic-gate 						break;
576*7c478bd9Sstevel@tonic-gate 					pages = memscrub_phys_pages;
577*7c478bd9Sstevel@tonic-gate 					reached_end = memscrub_verify_span(
578*7c478bd9Sstevel@tonic-gate 					    &addr, &pages);
579*7c478bd9Sstevel@tonic-gate 					memscrub_scan(pages *
580*7c478bd9Sstevel@tonic-gate 					    MEMSCRUB_BLOCKS_PER_PAGE, addr);
581*7c478bd9Sstevel@tonic-gate 					addr += ((uint64_t)pages * PAGESIZE);
582*7c478bd9Sstevel@tonic-gate 				}
583*7c478bd9Sstevel@tonic-gate 				read_all_memscrub = 0;
584*7c478bd9Sstevel@tonic-gate 			}
585*7c478bd9Sstevel@tonic-gate 
586*7c478bd9Sstevel@tonic-gate 			/*
587*7c478bd9Sstevel@tonic-gate 			 * read 1 span
588*7c478bd9Sstevel@tonic-gate 			 */
589*7c478bd9Sstevel@tonic-gate 			pages = memscrub_span_pages;
590*7c478bd9Sstevel@tonic-gate 
591*7c478bd9Sstevel@tonic-gate 			if (disable_memscrub)
592*7c478bd9Sstevel@tonic-gate 				break;
593*7c478bd9Sstevel@tonic-gate 
594*7c478bd9Sstevel@tonic-gate 			/*
595*7c478bd9Sstevel@tonic-gate 			 * determine physical address range
596*7c478bd9Sstevel@tonic-gate 			 */
597*7c478bd9Sstevel@tonic-gate 			reached_end = memscrub_verify_span(&address,
598*7c478bd9Sstevel@tonic-gate 			    &pages);
599*7c478bd9Sstevel@tonic-gate 
600*7c478bd9Sstevel@tonic-gate 			memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE,
601*7c478bd9Sstevel@tonic-gate 			    address);
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate 			address += ((uint64_t)pages * PAGESIZE);
604*7c478bd9Sstevel@tonic-gate 		}
605*7c478bd9Sstevel@tonic-gate 
606*7c478bd9Sstevel@tonic-gate 		if (pause_memscrub && !paused_message) {
607*7c478bd9Sstevel@tonic-gate 			paused_message = 1;
608*7c478bd9Sstevel@tonic-gate 			if (memscrub_verbose)
609*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "Memory scrubber paused");
610*7c478bd9Sstevel@tonic-gate 		}
611*7c478bd9Sstevel@tonic-gate 	}
612*7c478bd9Sstevel@tonic-gate 
613*7c478bd9Sstevel@tonic-gate memscrub_exit:
614*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "Memory scrubber exiting");
615*7c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
616*7c478bd9Sstevel@tonic-gate 	memscrub_cleanup();
617*7c478bd9Sstevel@tonic-gate 	thread_exit();
618*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
619*7c478bd9Sstevel@tonic-gate }
620*7c478bd9Sstevel@tonic-gate 
621*7c478bd9Sstevel@tonic-gate /*
622*7c478bd9Sstevel@tonic-gate  * condition address and size
623*7c478bd9Sstevel@tonic-gate  * such that they span legal physical addresses.
624*7c478bd9Sstevel@tonic-gate  *
625*7c478bd9Sstevel@tonic-gate  * when appropriate, address will be rounded up to start of next
626*7c478bd9Sstevel@tonic-gate  * struct memlist, and pages will be rounded down to the end of the
627*7c478bd9Sstevel@tonic-gate  * memlist size.
628*7c478bd9Sstevel@tonic-gate  *
629*7c478bd9Sstevel@tonic-gate  * returns 1 if reached end of list, else returns 0.
630*7c478bd9Sstevel@tonic-gate  */
631*7c478bd9Sstevel@tonic-gate static int
632*7c478bd9Sstevel@tonic-gate memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp)
633*7c478bd9Sstevel@tonic-gate {
634*7c478bd9Sstevel@tonic-gate 	struct memlist *mlp;
635*7c478bd9Sstevel@tonic-gate 	ms_paddr_t address = *addrp;
636*7c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)*pagesp * PAGESIZE;
637*7c478bd9Sstevel@tonic-gate 	uint64_t bytes_remaining;
638*7c478bd9Sstevel@tonic-gate 	int reached_end = 0;
639*7c478bd9Sstevel@tonic-gate 
640*7c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&memscrub_lock));
641*7c478bd9Sstevel@tonic-gate 
642*7c478bd9Sstevel@tonic-gate 	/*
643*7c478bd9Sstevel@tonic-gate 	 * find memlist struct that contains addrp
644*7c478bd9Sstevel@tonic-gate 	 * assumes memlist is sorted by ascending address.
645*7c478bd9Sstevel@tonic-gate 	 */
646*7c478bd9Sstevel@tonic-gate 	for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->next) {
647*7c478bd9Sstevel@tonic-gate 		/*
648*7c478bd9Sstevel@tonic-gate 		 * if before this chunk, round up to beginning
649*7c478bd9Sstevel@tonic-gate 		 */
650*7c478bd9Sstevel@tonic-gate 		if (address < mlp->address) {
651*7c478bd9Sstevel@tonic-gate 			address = mlp->address;
652*7c478bd9Sstevel@tonic-gate 			break;
653*7c478bd9Sstevel@tonic-gate 		}
654*7c478bd9Sstevel@tonic-gate 		/*
655*7c478bd9Sstevel@tonic-gate 		 * if before end of chunk, then we found it
656*7c478bd9Sstevel@tonic-gate 		 */
657*7c478bd9Sstevel@tonic-gate 		if (address < (mlp->address + mlp->size))
658*7c478bd9Sstevel@tonic-gate 			break;
659*7c478bd9Sstevel@tonic-gate 
660*7c478bd9Sstevel@tonic-gate 		/* else go to next struct memlist */
661*7c478bd9Sstevel@tonic-gate 	}
662*7c478bd9Sstevel@tonic-gate 	/*
663*7c478bd9Sstevel@tonic-gate 	 * if we hit end of list, start at beginning
664*7c478bd9Sstevel@tonic-gate 	 */
665*7c478bd9Sstevel@tonic-gate 	if (mlp == NULL) {
666*7c478bd9Sstevel@tonic-gate 		mlp = memscrub_memlist;
667*7c478bd9Sstevel@tonic-gate 		address = mlp->address;
668*7c478bd9Sstevel@tonic-gate 	}
669*7c478bd9Sstevel@tonic-gate 
670*7c478bd9Sstevel@tonic-gate 	/*
671*7c478bd9Sstevel@tonic-gate 	 * now we have legal address, and its mlp, condition bytes
672*7c478bd9Sstevel@tonic-gate 	 */
673*7c478bd9Sstevel@tonic-gate 	bytes_remaining = (mlp->address + mlp->size) - address;
674*7c478bd9Sstevel@tonic-gate 
675*7c478bd9Sstevel@tonic-gate 	if (bytes > bytes_remaining)
676*7c478bd9Sstevel@tonic-gate 		bytes = bytes_remaining;
677*7c478bd9Sstevel@tonic-gate 
678*7c478bd9Sstevel@tonic-gate 	/*
679*7c478bd9Sstevel@tonic-gate 	 * will this span take us to end of list?
680*7c478bd9Sstevel@tonic-gate 	 */
681*7c478bd9Sstevel@tonic-gate 	if ((mlp->next == NULL) &&
682*7c478bd9Sstevel@tonic-gate 	    ((mlp->address + mlp->size) == (address + bytes)))
683*7c478bd9Sstevel@tonic-gate 		reached_end = 1;
684*7c478bd9Sstevel@tonic-gate 
685*7c478bd9Sstevel@tonic-gate 	/* return values */
686*7c478bd9Sstevel@tonic-gate 	*addrp = address;
687*7c478bd9Sstevel@tonic-gate 	*pagesp = bytes / PAGESIZE;
688*7c478bd9Sstevel@tonic-gate 
689*7c478bd9Sstevel@tonic-gate 	return (reached_end);
690*7c478bd9Sstevel@tonic-gate }
691*7c478bd9Sstevel@tonic-gate 
692*7c478bd9Sstevel@tonic-gate /*
693*7c478bd9Sstevel@tonic-gate  * add a span to the memscrub list
694*7c478bd9Sstevel@tonic-gate  * add to memscrub_phys_pages
695*7c478bd9Sstevel@tonic-gate  */
696*7c478bd9Sstevel@tonic-gate int
697*7c478bd9Sstevel@tonic-gate memscrub_add_span(pfn_t pfn, pgcnt_t pages)
698*7c478bd9Sstevel@tonic-gate {
699*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
700*7c478bd9Sstevel@tonic-gate 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
701*7c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
702*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
703*7c478bd9Sstevel@tonic-gate 
704*7c478bd9Sstevel@tonic-gate 	int retval;
705*7c478bd9Sstevel@tonic-gate 
706*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
707*7c478bd9Sstevel@tonic-gate 
708*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
709*7c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
710*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
711*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
712*7c478bd9Sstevel@tonic-gate 	    " size: 0x%llx\n", address, bytes);
713*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
714*7c478bd9Sstevel@tonic-gate 
715*7c478bd9Sstevel@tonic-gate 	retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist,
716*7c478bd9Sstevel@tonic-gate 	    &memscrub_phys_pages);
717*7c478bd9Sstevel@tonic-gate 
718*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
719*7c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
720*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
721*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
722*7c478bd9Sstevel@tonic-gate 
723*7c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
724*7c478bd9Sstevel@tonic-gate 
725*7c478bd9Sstevel@tonic-gate 	return (retval);
726*7c478bd9Sstevel@tonic-gate }
727*7c478bd9Sstevel@tonic-gate 
728*7c478bd9Sstevel@tonic-gate static int
729*7c478bd9Sstevel@tonic-gate memscrub_add_span_gen(
730*7c478bd9Sstevel@tonic-gate 	pfn_t pfn,
731*7c478bd9Sstevel@tonic-gate 	pgcnt_t pages,
732*7c478bd9Sstevel@tonic-gate 	struct memlist **list,
733*7c478bd9Sstevel@tonic-gate 	uint_t *npgs)
734*7c478bd9Sstevel@tonic-gate {
735*7c478bd9Sstevel@tonic-gate 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
736*7c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
737*7c478bd9Sstevel@tonic-gate 	struct memlist *dst;
738*7c478bd9Sstevel@tonic-gate 	struct memlist *prev, *next;
739*7c478bd9Sstevel@tonic-gate 	int retval = 0;
740*7c478bd9Sstevel@tonic-gate 
741*7c478bd9Sstevel@tonic-gate 	/*
742*7c478bd9Sstevel@tonic-gate 	 * allocate a new struct memlist
743*7c478bd9Sstevel@tonic-gate 	 */
744*7c478bd9Sstevel@tonic-gate 
745*7c478bd9Sstevel@tonic-gate 	dst = (struct memlist *)
746*7c478bd9Sstevel@tonic-gate 	    kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
747*7c478bd9Sstevel@tonic-gate 
748*7c478bd9Sstevel@tonic-gate 	if (dst == NULL) {
749*7c478bd9Sstevel@tonic-gate 		retval = -1;
750*7c478bd9Sstevel@tonic-gate 		goto add_done;
751*7c478bd9Sstevel@tonic-gate 	}
752*7c478bd9Sstevel@tonic-gate 
753*7c478bd9Sstevel@tonic-gate 	dst->address = address;
754*7c478bd9Sstevel@tonic-gate 	dst->size = bytes;
755*7c478bd9Sstevel@tonic-gate 
756*7c478bd9Sstevel@tonic-gate 	/*
757*7c478bd9Sstevel@tonic-gate 	 * first insert
758*7c478bd9Sstevel@tonic-gate 	 */
759*7c478bd9Sstevel@tonic-gate 	if (*list == NULL) {
760*7c478bd9Sstevel@tonic-gate 		dst->prev = NULL;
761*7c478bd9Sstevel@tonic-gate 		dst->next = NULL;
762*7c478bd9Sstevel@tonic-gate 		*list = dst;
763*7c478bd9Sstevel@tonic-gate 
764*7c478bd9Sstevel@tonic-gate 		goto add_done;
765*7c478bd9Sstevel@tonic-gate 	}
766*7c478bd9Sstevel@tonic-gate 
767*7c478bd9Sstevel@tonic-gate 	/*
768*7c478bd9Sstevel@tonic-gate 	 * insert into sorted list
769*7c478bd9Sstevel@tonic-gate 	 */
770*7c478bd9Sstevel@tonic-gate 	for (prev = NULL, next = *list;
771*7c478bd9Sstevel@tonic-gate 	    next != NULL;
772*7c478bd9Sstevel@tonic-gate 	    prev = next, next = next->next) {
773*7c478bd9Sstevel@tonic-gate 		if (address > (next->address + next->size))
774*7c478bd9Sstevel@tonic-gate 			continue;
775*7c478bd9Sstevel@tonic-gate 
776*7c478bd9Sstevel@tonic-gate 		/*
777*7c478bd9Sstevel@tonic-gate 		 * else insert here
778*7c478bd9Sstevel@tonic-gate 		 */
779*7c478bd9Sstevel@tonic-gate 
780*7c478bd9Sstevel@tonic-gate 		/*
781*7c478bd9Sstevel@tonic-gate 		 * prepend to next
782*7c478bd9Sstevel@tonic-gate 		 */
783*7c478bd9Sstevel@tonic-gate 		if ((address + bytes) == next->address) {
784*7c478bd9Sstevel@tonic-gate 			kmem_free(dst, sizeof (struct memlist));
785*7c478bd9Sstevel@tonic-gate 
786*7c478bd9Sstevel@tonic-gate 			next->address = address;
787*7c478bd9Sstevel@tonic-gate 			next->size += bytes;
788*7c478bd9Sstevel@tonic-gate 
789*7c478bd9Sstevel@tonic-gate 			goto add_done;
790*7c478bd9Sstevel@tonic-gate 		}
791*7c478bd9Sstevel@tonic-gate 
792*7c478bd9Sstevel@tonic-gate 		/*
793*7c478bd9Sstevel@tonic-gate 		 * append to next
794*7c478bd9Sstevel@tonic-gate 		 */
795*7c478bd9Sstevel@tonic-gate 		if (address == (next->address + next->size)) {
796*7c478bd9Sstevel@tonic-gate 			kmem_free(dst, sizeof (struct memlist));
797*7c478bd9Sstevel@tonic-gate 
798*7c478bd9Sstevel@tonic-gate 			if (next->next) {
799*7c478bd9Sstevel@tonic-gate 				/*
800*7c478bd9Sstevel@tonic-gate 				 * don't overlap with next->next
801*7c478bd9Sstevel@tonic-gate 				 */
802*7c478bd9Sstevel@tonic-gate 				if ((address + bytes) > next->next->address) {
803*7c478bd9Sstevel@tonic-gate 					retval = -1;
804*7c478bd9Sstevel@tonic-gate 					goto add_done;
805*7c478bd9Sstevel@tonic-gate 				}
806*7c478bd9Sstevel@tonic-gate 				/*
807*7c478bd9Sstevel@tonic-gate 				 * concatenate next and next->next
808*7c478bd9Sstevel@tonic-gate 				 */
809*7c478bd9Sstevel@tonic-gate 				if ((address + bytes) == next->next->address) {
810*7c478bd9Sstevel@tonic-gate 					struct memlist *mlp = next->next;
811*7c478bd9Sstevel@tonic-gate 
812*7c478bd9Sstevel@tonic-gate 					if (next == *list)
813*7c478bd9Sstevel@tonic-gate 						*list = next->next;
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate 					mlp->address = next->address;
816*7c478bd9Sstevel@tonic-gate 					mlp->size += next->size;
817*7c478bd9Sstevel@tonic-gate 					mlp->size += bytes;
818*7c478bd9Sstevel@tonic-gate 
819*7c478bd9Sstevel@tonic-gate 					if (next->prev)
820*7c478bd9Sstevel@tonic-gate 						next->prev->next = mlp;
821*7c478bd9Sstevel@tonic-gate 					mlp->prev = next->prev;
822*7c478bd9Sstevel@tonic-gate 
823*7c478bd9Sstevel@tonic-gate 					kmem_free(next,
824*7c478bd9Sstevel@tonic-gate 						sizeof (struct memlist));
825*7c478bd9Sstevel@tonic-gate 					goto add_done;
826*7c478bd9Sstevel@tonic-gate 				}
827*7c478bd9Sstevel@tonic-gate 			}
828*7c478bd9Sstevel@tonic-gate 
829*7c478bd9Sstevel@tonic-gate 			next->size += bytes;
830*7c478bd9Sstevel@tonic-gate 
831*7c478bd9Sstevel@tonic-gate 			goto add_done;
832*7c478bd9Sstevel@tonic-gate 		}
833*7c478bd9Sstevel@tonic-gate 
834*7c478bd9Sstevel@tonic-gate 		/* don't overlap with next */
835*7c478bd9Sstevel@tonic-gate 		if ((address + bytes) > next->address) {
836*7c478bd9Sstevel@tonic-gate 			retval = -1;
837*7c478bd9Sstevel@tonic-gate 			kmem_free(dst, sizeof (struct memlist));
838*7c478bd9Sstevel@tonic-gate 			goto add_done;
839*7c478bd9Sstevel@tonic-gate 		}
840*7c478bd9Sstevel@tonic-gate 
841*7c478bd9Sstevel@tonic-gate 		/*
842*7c478bd9Sstevel@tonic-gate 		 * insert before next
843*7c478bd9Sstevel@tonic-gate 		 */
844*7c478bd9Sstevel@tonic-gate 		dst->prev = prev;
845*7c478bd9Sstevel@tonic-gate 		dst->next = next;
846*7c478bd9Sstevel@tonic-gate 		next->prev = dst;
847*7c478bd9Sstevel@tonic-gate 		if (prev == NULL) {
848*7c478bd9Sstevel@tonic-gate 			*list = dst;
849*7c478bd9Sstevel@tonic-gate 		} else {
850*7c478bd9Sstevel@tonic-gate 			prev->next = dst;
851*7c478bd9Sstevel@tonic-gate 		}
852*7c478bd9Sstevel@tonic-gate 		goto add_done;
853*7c478bd9Sstevel@tonic-gate 	}	/* end for */
854*7c478bd9Sstevel@tonic-gate 
855*7c478bd9Sstevel@tonic-gate 	/*
856*7c478bd9Sstevel@tonic-gate 	 * end of list, prev is valid and next is NULL
857*7c478bd9Sstevel@tonic-gate 	 */
858*7c478bd9Sstevel@tonic-gate 	prev->next = dst;
859*7c478bd9Sstevel@tonic-gate 	dst->prev = prev;
860*7c478bd9Sstevel@tonic-gate 	dst->next = NULL;
861*7c478bd9Sstevel@tonic-gate 
862*7c478bd9Sstevel@tonic-gate add_done:
863*7c478bd9Sstevel@tonic-gate 
864*7c478bd9Sstevel@tonic-gate 	if (retval != -1)
865*7c478bd9Sstevel@tonic-gate 		*npgs += pages;
866*7c478bd9Sstevel@tonic-gate 
867*7c478bd9Sstevel@tonic-gate 	return (retval);
868*7c478bd9Sstevel@tonic-gate }
869*7c478bd9Sstevel@tonic-gate 
870*7c478bd9Sstevel@tonic-gate /*
871*7c478bd9Sstevel@tonic-gate  * delete a span from the memscrub list
872*7c478bd9Sstevel@tonic-gate  * subtract from memscrub_phys_pages
873*7c478bd9Sstevel@tonic-gate  */
874*7c478bd9Sstevel@tonic-gate int
875*7c478bd9Sstevel@tonic-gate memscrub_delete_span(pfn_t pfn, pgcnt_t pages)
876*7c478bd9Sstevel@tonic-gate {
877*7c478bd9Sstevel@tonic-gate 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
878*7c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
879*7c478bd9Sstevel@tonic-gate 	struct memlist *dst, *next;
880*7c478bd9Sstevel@tonic-gate 	int retval = 0;
881*7c478bd9Sstevel@tonic-gate 
882*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
883*7c478bd9Sstevel@tonic-gate 
884*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
885*7c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist);
886*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
887*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n",
888*7c478bd9Sstevel@tonic-gate 	    address, bytes);
889*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
890*7c478bd9Sstevel@tonic-gate 
891*7c478bd9Sstevel@tonic-gate 	/*
892*7c478bd9Sstevel@tonic-gate 	 * find struct memlist containing page
893*7c478bd9Sstevel@tonic-gate 	 */
894*7c478bd9Sstevel@tonic-gate 	for (next = memscrub_memlist; next != NULL; next = next->next) {
895*7c478bd9Sstevel@tonic-gate 		if ((address >= next->address) &&
896*7c478bd9Sstevel@tonic-gate 		    (address < next->address + next->size))
897*7c478bd9Sstevel@tonic-gate 			break;
898*7c478bd9Sstevel@tonic-gate 	}
899*7c478bd9Sstevel@tonic-gate 
900*7c478bd9Sstevel@tonic-gate 	/*
901*7c478bd9Sstevel@tonic-gate 	 * if start address not in list
902*7c478bd9Sstevel@tonic-gate 	 */
903*7c478bd9Sstevel@tonic-gate 	if (next == NULL) {
904*7c478bd9Sstevel@tonic-gate 		retval = -1;
905*7c478bd9Sstevel@tonic-gate 		goto delete_done;
906*7c478bd9Sstevel@tonic-gate 	}
907*7c478bd9Sstevel@tonic-gate 
908*7c478bd9Sstevel@tonic-gate 	/*
909*7c478bd9Sstevel@tonic-gate 	 * error if size goes off end of this struct memlist
910*7c478bd9Sstevel@tonic-gate 	 */
911*7c478bd9Sstevel@tonic-gate 	if (address + bytes > next->address + next->size) {
912*7c478bd9Sstevel@tonic-gate 		retval = -1;
913*7c478bd9Sstevel@tonic-gate 		goto delete_done;
914*7c478bd9Sstevel@tonic-gate 	}
915*7c478bd9Sstevel@tonic-gate 
916*7c478bd9Sstevel@tonic-gate 	/*
917*7c478bd9Sstevel@tonic-gate 	 * pages at beginning of struct memlist
918*7c478bd9Sstevel@tonic-gate 	 */
919*7c478bd9Sstevel@tonic-gate 	if (address == next->address) {
920*7c478bd9Sstevel@tonic-gate 		/*
921*7c478bd9Sstevel@tonic-gate 		 * if start & size match, delete from list
922*7c478bd9Sstevel@tonic-gate 		 */
923*7c478bd9Sstevel@tonic-gate 		if (bytes == next->size) {
924*7c478bd9Sstevel@tonic-gate 			if (next == memscrub_memlist)
925*7c478bd9Sstevel@tonic-gate 				memscrub_memlist = next->next;
926*7c478bd9Sstevel@tonic-gate 			if (next->prev != NULL)
927*7c478bd9Sstevel@tonic-gate 				next->prev->next = next->next;
928*7c478bd9Sstevel@tonic-gate 			if (next->next != NULL)
929*7c478bd9Sstevel@tonic-gate 				next->next->prev = next->prev;
930*7c478bd9Sstevel@tonic-gate 
931*7c478bd9Sstevel@tonic-gate 			kmem_free(next, sizeof (struct memlist));
932*7c478bd9Sstevel@tonic-gate 		} else {
933*7c478bd9Sstevel@tonic-gate 		/*
934*7c478bd9Sstevel@tonic-gate 		 * increment start address by bytes
935*7c478bd9Sstevel@tonic-gate 		 */
936*7c478bd9Sstevel@tonic-gate 			next->address += bytes;
937*7c478bd9Sstevel@tonic-gate 			next->size -= bytes;
938*7c478bd9Sstevel@tonic-gate 		}
939*7c478bd9Sstevel@tonic-gate 		goto delete_done;
940*7c478bd9Sstevel@tonic-gate 	}
941*7c478bd9Sstevel@tonic-gate 
942*7c478bd9Sstevel@tonic-gate 	/*
943*7c478bd9Sstevel@tonic-gate 	 * pages at end of struct memlist
944*7c478bd9Sstevel@tonic-gate 	 */
945*7c478bd9Sstevel@tonic-gate 	if (address + bytes == next->address + next->size) {
946*7c478bd9Sstevel@tonic-gate 		/*
947*7c478bd9Sstevel@tonic-gate 		 * decrement size by bytes
948*7c478bd9Sstevel@tonic-gate 		 */
949*7c478bd9Sstevel@tonic-gate 		next->size -= bytes;
950*7c478bd9Sstevel@tonic-gate 		goto delete_done;
951*7c478bd9Sstevel@tonic-gate 	}
952*7c478bd9Sstevel@tonic-gate 
953*7c478bd9Sstevel@tonic-gate 	/*
954*7c478bd9Sstevel@tonic-gate 	 * delete a span in the middle of the struct memlist
955*7c478bd9Sstevel@tonic-gate 	 */
956*7c478bd9Sstevel@tonic-gate 	{
957*7c478bd9Sstevel@tonic-gate 		/*
958*7c478bd9Sstevel@tonic-gate 		 * create a new struct memlist
959*7c478bd9Sstevel@tonic-gate 		 */
960*7c478bd9Sstevel@tonic-gate 		dst = (struct memlist *)
961*7c478bd9Sstevel@tonic-gate 		    kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
962*7c478bd9Sstevel@tonic-gate 
963*7c478bd9Sstevel@tonic-gate 		if (dst == NULL) {
964*7c478bd9Sstevel@tonic-gate 			retval = -1;
965*7c478bd9Sstevel@tonic-gate 			goto delete_done;
966*7c478bd9Sstevel@tonic-gate 		}
967*7c478bd9Sstevel@tonic-gate 
968*7c478bd9Sstevel@tonic-gate 		/*
969*7c478bd9Sstevel@tonic-gate 		 * existing struct memlist gets address
970*7c478bd9Sstevel@tonic-gate 		 * and size up to pfn
971*7c478bd9Sstevel@tonic-gate 		 */
972*7c478bd9Sstevel@tonic-gate 		dst->address = address + bytes;
973*7c478bd9Sstevel@tonic-gate 		dst->size = (next->address + next->size) - dst->address;
974*7c478bd9Sstevel@tonic-gate 		next->size = address - next->address;
975*7c478bd9Sstevel@tonic-gate 
976*7c478bd9Sstevel@tonic-gate 		/*
977*7c478bd9Sstevel@tonic-gate 		 * new struct memlist gets address starting
978*7c478bd9Sstevel@tonic-gate 		 * after pfn, until end
979*7c478bd9Sstevel@tonic-gate 		 */
980*7c478bd9Sstevel@tonic-gate 
981*7c478bd9Sstevel@tonic-gate 		/*
982*7c478bd9Sstevel@tonic-gate 		 * link in new memlist after old
983*7c478bd9Sstevel@tonic-gate 		 */
984*7c478bd9Sstevel@tonic-gate 		dst->next = next->next;
985*7c478bd9Sstevel@tonic-gate 		dst->prev = next;
986*7c478bd9Sstevel@tonic-gate 
987*7c478bd9Sstevel@tonic-gate 		if (next->next != NULL)
988*7c478bd9Sstevel@tonic-gate 			next->next->prev = dst;
989*7c478bd9Sstevel@tonic-gate 		next->next = dst;
990*7c478bd9Sstevel@tonic-gate 	}
991*7c478bd9Sstevel@tonic-gate 
992*7c478bd9Sstevel@tonic-gate delete_done:
993*7c478bd9Sstevel@tonic-gate 	if (retval != -1) {
994*7c478bd9Sstevel@tonic-gate 		memscrub_phys_pages -= pages;
995*7c478bd9Sstevel@tonic-gate 		if (memscrub_phys_pages == 0)
996*7c478bd9Sstevel@tonic-gate 			disable_memscrub = 1;
997*7c478bd9Sstevel@tonic-gate 	}
998*7c478bd9Sstevel@tonic-gate 
999*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
1000*7c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist After", memscrub_memlist);
1001*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
1002*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
1003*7c478bd9Sstevel@tonic-gate 
1004*7c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
1005*7c478bd9Sstevel@tonic-gate 	return (retval);
1006*7c478bd9Sstevel@tonic-gate }
1007*7c478bd9Sstevel@tonic-gate 
1008*7c478bd9Sstevel@tonic-gate static void
1009*7c478bd9Sstevel@tonic-gate memscrub_scan(uint_t blks, ms_paddr_t src)
1010*7c478bd9Sstevel@tonic-gate {
1011*7c478bd9Sstevel@tonic-gate 	uint_t 		psz, bpp, pgsread;
1012*7c478bd9Sstevel@tonic-gate 	pfn_t		pfn;
1013*7c478bd9Sstevel@tonic-gate 	ms_paddr_t	pa;
1014*7c478bd9Sstevel@tonic-gate 	caddr_t		va;
1015*7c478bd9Sstevel@tonic-gate 	on_trap_data_t	otd;
1016*7c478bd9Sstevel@tonic-gate 
1017*7c478bd9Sstevel@tonic-gate 	extern void memscrub_read(caddr_t src, uint_t blks);
1018*7c478bd9Sstevel@tonic-gate 
1019*7c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&memscrub_lock));
1020*7c478bd9Sstevel@tonic-gate 
1021*7c478bd9Sstevel@tonic-gate 	pgsread = 0;
1022*7c478bd9Sstevel@tonic-gate 	pa = src;
1023*7c478bd9Sstevel@tonic-gate 
1024*7c478bd9Sstevel@tonic-gate 	while (blks != 0) {
1025*7c478bd9Sstevel@tonic-gate 		/* Ensure the PA is properly aligned */
1026*7c478bd9Sstevel@tonic-gate 		if (((pa & MMU_PAGEMASK4M) == pa) &&
1027*7c478bd9Sstevel@tonic-gate 			(blks >= MEMSCRUB_BPP4M)) {
1028*7c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE4M;
1029*7c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP4M;
1030*7c478bd9Sstevel@tonic-gate 		} else if (((pa & MMU_PAGEMASK512K) == pa) &&
1031*7c478bd9Sstevel@tonic-gate 			(blks >= MEMSCRUB_BPP512K)) {
1032*7c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE512K;
1033*7c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP512K;
1034*7c478bd9Sstevel@tonic-gate 		} else if (((pa & MMU_PAGEMASK64K) == pa) &&
1035*7c478bd9Sstevel@tonic-gate 			(blks >= MEMSCRUB_BPP64K)) {
1036*7c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE64K;
1037*7c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP64K;
1038*7c478bd9Sstevel@tonic-gate 		} else if ((pa & MMU_PAGEMASK) == pa) {
1039*7c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE;
1040*7c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP;
1041*7c478bd9Sstevel@tonic-gate 		} else {
1042*7c478bd9Sstevel@tonic-gate 			if (memscrub_verbose) {
1043*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "Memory scrubber ignoring "
1044*7c478bd9Sstevel@tonic-gate 				    "non-page aligned block starting at 0x%"
1045*7c478bd9Sstevel@tonic-gate 				    PRIx64, src);
1046*7c478bd9Sstevel@tonic-gate 			}
1047*7c478bd9Sstevel@tonic-gate 			return;
1048*7c478bd9Sstevel@tonic-gate 		}
1049*7c478bd9Sstevel@tonic-gate 		if (blks < bpp) bpp = blks;
1050*7c478bd9Sstevel@tonic-gate 
1051*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
1052*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "Going to run psz=%x, "
1053*7c478bd9Sstevel@tonic-gate 		    "bpp=%x pa=%llx\n", psz, bpp, pa);
1054*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
1055*7c478bd9Sstevel@tonic-gate 
1056*7c478bd9Sstevel@tonic-gate 		/*
1057*7c478bd9Sstevel@tonic-gate 		 * MEMSCRUBBASE is a 4MB aligned page in the
1058*7c478bd9Sstevel@tonic-gate 		 * kernel so that we can quickly map the PA
1059*7c478bd9Sstevel@tonic-gate 		 * to a VA for the block loads performed in
1060*7c478bd9Sstevel@tonic-gate 		 * memscrub_read.
1061*7c478bd9Sstevel@tonic-gate 		 */
1062*7c478bd9Sstevel@tonic-gate 		pfn = mmu_btop(pa);
1063*7c478bd9Sstevel@tonic-gate 		va = (caddr_t)MEMSCRUBBASE;
1064*7c478bd9Sstevel@tonic-gate 		hat_devload(kas.a_hat, va, psz, pfn, PROT_READ,
1065*7c478bd9Sstevel@tonic-gate 			HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1066*7c478bd9Sstevel@tonic-gate 
1067*7c478bd9Sstevel@tonic-gate 		/*
1068*7c478bd9Sstevel@tonic-gate 		 * Can't allow the memscrubber to migrate across CPUs as
1069*7c478bd9Sstevel@tonic-gate 		 * we need to know whether CEEN is enabled for the current
1070*7c478bd9Sstevel@tonic-gate 		 * CPU to enable us to scrub the memory. Don't use
1071*7c478bd9Sstevel@tonic-gate 		 * kpreempt_disable as the time we take to scan a span (even
1072*7c478bd9Sstevel@tonic-gate 		 * without cpu_check_ce having to manually cpu_check_block)
1073*7c478bd9Sstevel@tonic-gate 		 * is too long to hold a higher priority thread (eg, RT)
1074*7c478bd9Sstevel@tonic-gate 		 * off cpu.
1075*7c478bd9Sstevel@tonic-gate 		 */
1076*7c478bd9Sstevel@tonic-gate 		thread_affinity_set(curthread, CPU_CURRENT);
1077*7c478bd9Sstevel@tonic-gate 
1078*7c478bd9Sstevel@tonic-gate 		/*
1079*7c478bd9Sstevel@tonic-gate 		 * Protect read scrub from async faults.  For now, we simply
1080*7c478bd9Sstevel@tonic-gate 		 * maintain a count of such faults caught.
1081*7c478bd9Sstevel@tonic-gate 		 */
1082*7c478bd9Sstevel@tonic-gate 
1083*7c478bd9Sstevel@tonic-gate 		if (!on_trap(&otd, OT_DATA_EC)) {
1084*7c478bd9Sstevel@tonic-gate 			memscrub_read(va, bpp);
1085*7c478bd9Sstevel@tonic-gate 			no_trap();
1086*7c478bd9Sstevel@tonic-gate 			/*
1087*7c478bd9Sstevel@tonic-gate 			 * Check if CEs require logging
1088*7c478bd9Sstevel@tonic-gate 			 */
1089*7c478bd9Sstevel@tonic-gate 			cpu_check_ce(SCRUBBER_CEEN_CHECK,
1090*7c478bd9Sstevel@tonic-gate 			    (uint64_t)pa, va, psz);
1091*7c478bd9Sstevel@tonic-gate 			thread_affinity_clear(curthread);
1092*7c478bd9Sstevel@tonic-gate 		} else {
1093*7c478bd9Sstevel@tonic-gate 			no_trap();
1094*7c478bd9Sstevel@tonic-gate 			thread_affinity_clear(curthread);
1095*7c478bd9Sstevel@tonic-gate 
1096*7c478bd9Sstevel@tonic-gate 			/*
1097*7c478bd9Sstevel@tonic-gate 			 * Got an async error..
1098*7c478bd9Sstevel@tonic-gate 			 * Try rescanning it at MMU_PAGESIZE
1099*7c478bd9Sstevel@tonic-gate 			 * granularity if we were trying to
1100*7c478bd9Sstevel@tonic-gate 			 * read at a larger page size.
1101*7c478bd9Sstevel@tonic-gate 			 * This is to ensure we continue to
1102*7c478bd9Sstevel@tonic-gate 			 * scan the rest of the span.
1103*7c478bd9Sstevel@tonic-gate 			 */
1104*7c478bd9Sstevel@tonic-gate 			if (psz > MMU_PAGESIZE) {
1105*7c478bd9Sstevel@tonic-gate 			    caddr_t vaddr = va;
1106*7c478bd9Sstevel@tonic-gate 			    ms_paddr_t paddr = pa;
1107*7c478bd9Sstevel@tonic-gate 			    int tmp = 0;
1108*7c478bd9Sstevel@tonic-gate 			    for (; tmp < bpp; tmp += MEMSCRUB_BPP) {
1109*7c478bd9Sstevel@tonic-gate 				thread_affinity_set(curthread, CPU_CURRENT);
1110*7c478bd9Sstevel@tonic-gate 				if (!on_trap(&otd, OT_DATA_EC))
1111*7c478bd9Sstevel@tonic-gate 				    memscrub_read(vaddr, MEMSCRUB_BPP);
1112*7c478bd9Sstevel@tonic-gate 				else
1113*7c478bd9Sstevel@tonic-gate 				    memscrub_counts.errors_found.value.ui32++;
1114*7c478bd9Sstevel@tonic-gate 				no_trap();
1115*7c478bd9Sstevel@tonic-gate 				/*
1116*7c478bd9Sstevel@tonic-gate 				 * Check if CEs require logging
1117*7c478bd9Sstevel@tonic-gate 				 */
1118*7c478bd9Sstevel@tonic-gate 				cpu_check_ce(SCRUBBER_CEEN_CHECK,
1119*7c478bd9Sstevel@tonic-gate 				    (uint64_t)paddr, vaddr, MMU_PAGESIZE);
1120*7c478bd9Sstevel@tonic-gate 				thread_affinity_clear(curthread);
1121*7c478bd9Sstevel@tonic-gate 				vaddr += MMU_PAGESIZE;
1122*7c478bd9Sstevel@tonic-gate 				paddr += MMU_PAGESIZE;
1123*7c478bd9Sstevel@tonic-gate 			    }
1124*7c478bd9Sstevel@tonic-gate 			}
1125*7c478bd9Sstevel@tonic-gate 		}
1126*7c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK);
1127*7c478bd9Sstevel@tonic-gate 
1128*7c478bd9Sstevel@tonic-gate 		blks -= bpp;
1129*7c478bd9Sstevel@tonic-gate 		pa += psz;
1130*7c478bd9Sstevel@tonic-gate 		pgsread++;
1131*7c478bd9Sstevel@tonic-gate 	}
1132*7c478bd9Sstevel@tonic-gate 	if (memscrub_verbose) {
1133*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting "
1134*7c478bd9Sstevel@tonic-gate 		    "at 0x%" PRIx64, pgsread, src);
1135*7c478bd9Sstevel@tonic-gate 	}
1136*7c478bd9Sstevel@tonic-gate }
1137*7c478bd9Sstevel@tonic-gate 
1138*7c478bd9Sstevel@tonic-gate /*
1139*7c478bd9Sstevel@tonic-gate  * The memory add/delete callback mechanism does not pass in the
1140*7c478bd9Sstevel@tonic-gate  * page ranges. The phys_install list has been updated though, so
1141*7c478bd9Sstevel@tonic-gate  * create a new scrub list from it.
1142*7c478bd9Sstevel@tonic-gate  */
1143*7c478bd9Sstevel@tonic-gate 
1144*7c478bd9Sstevel@tonic-gate static int
1145*7c478bd9Sstevel@tonic-gate new_memscrub()
1146*7c478bd9Sstevel@tonic-gate {
1147*7c478bd9Sstevel@tonic-gate 	struct memlist *src, *list, *old_list;
1148*7c478bd9Sstevel@tonic-gate 	uint_t npgs;
1149*7c478bd9Sstevel@tonic-gate 
1150*7c478bd9Sstevel@tonic-gate 	/*
1151*7c478bd9Sstevel@tonic-gate 	 * copy phys_install to memscrub_memlist
1152*7c478bd9Sstevel@tonic-gate 	 */
1153*7c478bd9Sstevel@tonic-gate 	list = NULL;
1154*7c478bd9Sstevel@tonic-gate 	npgs = 0;
1155*7c478bd9Sstevel@tonic-gate 	memlist_read_lock();
1156*7c478bd9Sstevel@tonic-gate 	for (src = phys_install; src; src = src->next) {
1157*7c478bd9Sstevel@tonic-gate 		if (memscrub_add_span_gen((pfn_t)(src->address >> PAGESHIFT),
1158*7c478bd9Sstevel@tonic-gate 		    (pgcnt_t)(src->size >> PAGESHIFT), &list, &npgs)) {
1159*7c478bd9Sstevel@tonic-gate 			memlist_read_unlock();
1160*7c478bd9Sstevel@tonic-gate 			while (list) {
1161*7c478bd9Sstevel@tonic-gate 				struct memlist *el;
1162*7c478bd9Sstevel@tonic-gate 
1163*7c478bd9Sstevel@tonic-gate 				el = list;
1164*7c478bd9Sstevel@tonic-gate 				list = list->next;
1165*7c478bd9Sstevel@tonic-gate 				kmem_free(el, sizeof (struct memlist));
1166*7c478bd9Sstevel@tonic-gate 			}
1167*7c478bd9Sstevel@tonic-gate 			return (-1);
1168*7c478bd9Sstevel@tonic-gate 		}
1169*7c478bd9Sstevel@tonic-gate 	}
1170*7c478bd9Sstevel@tonic-gate 	memlist_read_unlock();
1171*7c478bd9Sstevel@tonic-gate 
1172*7c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
1173*7c478bd9Sstevel@tonic-gate 	memscrub_phys_pages = npgs;
1174*7c478bd9Sstevel@tonic-gate 	old_list = memscrub_memlist;
1175*7c478bd9Sstevel@tonic-gate 	memscrub_memlist = list;
1176*7c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
1177*7c478bd9Sstevel@tonic-gate 
1178*7c478bd9Sstevel@tonic-gate 	while (old_list) {
1179*7c478bd9Sstevel@tonic-gate 		struct memlist *el;
1180*7c478bd9Sstevel@tonic-gate 
1181*7c478bd9Sstevel@tonic-gate 		el = old_list;
1182*7c478bd9Sstevel@tonic-gate 		old_list = old_list->next;
1183*7c478bd9Sstevel@tonic-gate 		kmem_free(el, sizeof (struct memlist));
1184*7c478bd9Sstevel@tonic-gate 	}
1185*7c478bd9Sstevel@tonic-gate 	return (0);
1186*7c478bd9Sstevel@tonic-gate }
1187*7c478bd9Sstevel@tonic-gate 
1188*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1189*7c478bd9Sstevel@tonic-gate static void
1190*7c478bd9Sstevel@tonic-gate memscrub_mem_config_post_add(
1191*7c478bd9Sstevel@tonic-gate 	void *arg,
1192*7c478bd9Sstevel@tonic-gate 	pgcnt_t delta_pages)
1193*7c478bd9Sstevel@tonic-gate {
1194*7c478bd9Sstevel@tonic-gate 	/*
1195*7c478bd9Sstevel@tonic-gate 	 * We increment pause_memscrub before entering new_memscrub(). This
1196*7c478bd9Sstevel@tonic-gate 	 * will force the memscrubber to sleep, allowing the DR callback
1197*7c478bd9Sstevel@tonic-gate 	 * thread to acquire memscrub_lock in new_memscrub(). The use of
1198*7c478bd9Sstevel@tonic-gate 	 * atomic_add_32() allows concurrent memory DR operations to use the
1199*7c478bd9Sstevel@tonic-gate 	 * callbacks safely.
1200*7c478bd9Sstevel@tonic-gate 	 */
1201*7c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, 1);
1202*7c478bd9Sstevel@tonic-gate 	ASSERT(pause_memscrub != 0);
1203*7c478bd9Sstevel@tonic-gate 
1204*7c478bd9Sstevel@tonic-gate 	/*
1205*7c478bd9Sstevel@tonic-gate 	 * "Don't care" if we are not scrubbing new memory.
1206*7c478bd9Sstevel@tonic-gate 	 */
1207*7c478bd9Sstevel@tonic-gate 	(void) new_memscrub();
1208*7c478bd9Sstevel@tonic-gate 
1209*7c478bd9Sstevel@tonic-gate 	/* Restore the pause setting. */
1210*7c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, -1);
1211*7c478bd9Sstevel@tonic-gate }
1212*7c478bd9Sstevel@tonic-gate 
1213*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1214*7c478bd9Sstevel@tonic-gate static int
1215*7c478bd9Sstevel@tonic-gate memscrub_mem_config_pre_del(
1216*7c478bd9Sstevel@tonic-gate 	void *arg,
1217*7c478bd9Sstevel@tonic-gate 	pgcnt_t delta_pages)
1218*7c478bd9Sstevel@tonic-gate {
1219*7c478bd9Sstevel@tonic-gate 	/* Nothing to do. */
1220*7c478bd9Sstevel@tonic-gate 	return (0);
1221*7c478bd9Sstevel@tonic-gate }
1222*7c478bd9Sstevel@tonic-gate 
1223*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1224*7c478bd9Sstevel@tonic-gate static void
1225*7c478bd9Sstevel@tonic-gate memscrub_mem_config_post_del(
1226*7c478bd9Sstevel@tonic-gate 	void *arg,
1227*7c478bd9Sstevel@tonic-gate 	pgcnt_t delta_pages,
1228*7c478bd9Sstevel@tonic-gate 	int cancelled)
1229*7c478bd9Sstevel@tonic-gate {
1230*7c478bd9Sstevel@tonic-gate 	/*
1231*7c478bd9Sstevel@tonic-gate 	 * We increment pause_memscrub before entering new_memscrub(). This
1232*7c478bd9Sstevel@tonic-gate 	 * will force the memscrubber to sleep, allowing the DR callback
1233*7c478bd9Sstevel@tonic-gate 	 * thread to acquire memscrub_lock in new_memscrub(). The use of
1234*7c478bd9Sstevel@tonic-gate 	 * atomic_add_32() allows concurrent memory DR operations to use the
1235*7c478bd9Sstevel@tonic-gate 	 * callbacks safely.
1236*7c478bd9Sstevel@tonic-gate 	 */
1237*7c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, 1);
1238*7c478bd9Sstevel@tonic-gate 	ASSERT(pause_memscrub != 0);
1239*7c478bd9Sstevel@tonic-gate 
1240*7c478bd9Sstevel@tonic-gate 	/*
1241*7c478bd9Sstevel@tonic-gate 	 * Must stop scrubbing deleted memory as it may be disconnected.
1242*7c478bd9Sstevel@tonic-gate 	 */
1243*7c478bd9Sstevel@tonic-gate 	if (new_memscrub()) {
1244*7c478bd9Sstevel@tonic-gate 		disable_memscrub = 1;
1245*7c478bd9Sstevel@tonic-gate 	}
1246*7c478bd9Sstevel@tonic-gate 
1247*7c478bd9Sstevel@tonic-gate 	/* Restore the pause setting. */
1248*7c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, -1);
1249*7c478bd9Sstevel@tonic-gate }
1250*7c478bd9Sstevel@tonic-gate 
1251*7c478bd9Sstevel@tonic-gate static kphysm_setup_vector_t memscrub_mem_config_vec = {
1252*7c478bd9Sstevel@tonic-gate 	KPHYSM_SETUP_VECTOR_VERSION,
1253*7c478bd9Sstevel@tonic-gate 	memscrub_mem_config_post_add,
1254*7c478bd9Sstevel@tonic-gate 	memscrub_mem_config_pre_del,
1255*7c478bd9Sstevel@tonic-gate 	memscrub_mem_config_post_del,
1256*7c478bd9Sstevel@tonic-gate };
1257*7c478bd9Sstevel@tonic-gate 
1258*7c478bd9Sstevel@tonic-gate static void
1259*7c478bd9Sstevel@tonic-gate memscrub_init_mem_config()
1260*7c478bd9Sstevel@tonic-gate {
1261*7c478bd9Sstevel@tonic-gate 	int ret;
1262*7c478bd9Sstevel@tonic-gate 
1263*7c478bd9Sstevel@tonic-gate 	ret = kphysm_setup_func_register(&memscrub_mem_config_vec,
1264*7c478bd9Sstevel@tonic-gate 	    (void *)NULL);
1265*7c478bd9Sstevel@tonic-gate 	ASSERT(ret == 0);
1266*7c478bd9Sstevel@tonic-gate }
1267*7c478bd9Sstevel@tonic-gate 
1268*7c478bd9Sstevel@tonic-gate static void
1269*7c478bd9Sstevel@tonic-gate memscrub_uninit_mem_config()
1270*7c478bd9Sstevel@tonic-gate {
1271*7c478bd9Sstevel@tonic-gate 	/* This call is OK if the register call was not done. */
1272*7c478bd9Sstevel@tonic-gate 	kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL);
1273*7c478bd9Sstevel@tonic-gate }
1274