xref: /illumos-gate/usr/src/uts/sun4u/os/memscrub.c (revision 61ef38f713de7319d4f58f032c73e289af705ca6)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*61ef38f7Svb70745  * Common Development and Distribution License (the "License").
6*61ef38f7Svb70745  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*61ef38f7Svb70745  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*
297c478bd9Sstevel@tonic-gate  * sun4u Memory Scrubbing
307c478bd9Sstevel@tonic-gate  *
317c478bd9Sstevel@tonic-gate  * On detection of a correctable memory ECC error, the sun4u kernel
327c478bd9Sstevel@tonic-gate  * returns the corrected data to the requester and re-writes it
337c478bd9Sstevel@tonic-gate  * to memory (DRAM).  So if the correctable error was transient,
347c478bd9Sstevel@tonic-gate  * the read has effectively been cleaned (scrubbed) from memory.
357c478bd9Sstevel@tonic-gate  *
367c478bd9Sstevel@tonic-gate  * Scrubbing thus reduces the likelyhood that multiple transient errors
377c478bd9Sstevel@tonic-gate  * will occur in the same memory word, making uncorrectable errors due
387c478bd9Sstevel@tonic-gate  * to transients less likely.
397c478bd9Sstevel@tonic-gate  *
407c478bd9Sstevel@tonic-gate  * Thus is born the desire that every memory location be periodically
417c478bd9Sstevel@tonic-gate  * accessed.
427c478bd9Sstevel@tonic-gate  *
437c478bd9Sstevel@tonic-gate  * This file implements a memory scrubbing thread.  This scrubber
447c478bd9Sstevel@tonic-gate  * guarantees that all of physical memory is accessed periodically
457c478bd9Sstevel@tonic-gate  * (memscrub_period_sec -- 12 hours).
467c478bd9Sstevel@tonic-gate  *
477c478bd9Sstevel@tonic-gate  * It attempts to do this as unobtrusively as possible.  The thread
487c478bd9Sstevel@tonic-gate  * schedules itself to wake up at an interval such that if it reads
497c478bd9Sstevel@tonic-gate  * memscrub_span_pages (8MB) on each wakeup, it will read all of physical
507c478bd9Sstevel@tonic-gate  * memory in in memscrub_period_sec (12 hours).
517c478bd9Sstevel@tonic-gate  *
527c478bd9Sstevel@tonic-gate  * The scrubber uses the block load hardware to read memory @ 268MB/s,
537c478bd9Sstevel@tonic-gate  * so it reads spans of 8MB in 0.03 seconds.  Unlike the original sun4d
547c478bd9Sstevel@tonic-gate  * scrubber the sun4u scrubber does not read ahead if the system is idle
557c478bd9Sstevel@tonic-gate  * because we can read memory very efficently.
567c478bd9Sstevel@tonic-gate  *
577c478bd9Sstevel@tonic-gate  * The scrubber maintains a private copy of the phys_install memory list
587c478bd9Sstevel@tonic-gate  * to keep track of what memory should be scrubbed.
597c478bd9Sstevel@tonic-gate  *
607c478bd9Sstevel@tonic-gate  * The global routines memscrub_add_span() and memscrub_delete_span() are
617c478bd9Sstevel@tonic-gate  * used to add and delete from this list.  If hotplug memory is later
627c478bd9Sstevel@tonic-gate  * supported these two routines can be used to notify the scrubber of
637c478bd9Sstevel@tonic-gate  * memory configuration changes.
647c478bd9Sstevel@tonic-gate  *
657c478bd9Sstevel@tonic-gate  * The following parameters can be set via /etc/system
667c478bd9Sstevel@tonic-gate  *
677c478bd9Sstevel@tonic-gate  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
687c478bd9Sstevel@tonic-gate  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
697c478bd9Sstevel@tonic-gate  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
707c478bd9Sstevel@tonic-gate  * memscrub_delay_start_sec = (5 minutes)
717c478bd9Sstevel@tonic-gate  * memscrub_verbose = (0)
727c478bd9Sstevel@tonic-gate  * memscrub_override_ticks = (1 tick)
737c478bd9Sstevel@tonic-gate  * disable_memscrub = (0)
747c478bd9Sstevel@tonic-gate  * pause_memscrub = (0)
757c478bd9Sstevel@tonic-gate  * read_all_memscrub = (0)
767c478bd9Sstevel@tonic-gate  *
777c478bd9Sstevel@tonic-gate  * The scrubber will print NOTICE messages of what it is doing if
787c478bd9Sstevel@tonic-gate  * "memscrub_verbose" is set.
797c478bd9Sstevel@tonic-gate  *
807c478bd9Sstevel@tonic-gate  * If the scrubber's sleep time calculation drops to zero ticks,
817c478bd9Sstevel@tonic-gate  * memscrub_override_ticks will be used as the sleep time instead. The
827c478bd9Sstevel@tonic-gate  * sleep time should only drop to zero on a system with over 32.95
837c478bd9Sstevel@tonic-gate  * terabytes of memory, or where the default scrubber parameters have
847c478bd9Sstevel@tonic-gate  * been adjusted. For example, reducing memscrub_span_pages or
857c478bd9Sstevel@tonic-gate  * memscrub_period_sec causes the sleep time to drop to zero with less
867c478bd9Sstevel@tonic-gate  * memory. Note that since the sleep time is calculated in clock ticks,
877c478bd9Sstevel@tonic-gate  * using hires clock ticks allows for more memory before the sleep time
887c478bd9Sstevel@tonic-gate  * becomes zero.
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  * The scrubber will exit (or never be started) if it finds the variable
917c478bd9Sstevel@tonic-gate  * "disable_memscrub" set.
927c478bd9Sstevel@tonic-gate  *
937c478bd9Sstevel@tonic-gate  * The scrubber will pause (not read memory) when "pause_memscrub"
947c478bd9Sstevel@tonic-gate  * is set.  It will check the state of pause_memscrub at each wakeup
957c478bd9Sstevel@tonic-gate  * period.  The scrubber will not make up for lost time.  If you
967c478bd9Sstevel@tonic-gate  * pause the scrubber for a prolonged period of time you can use
977c478bd9Sstevel@tonic-gate  * the "read_all_memscrub" switch (see below) to catch up. In addition,
987c478bd9Sstevel@tonic-gate  * pause_memscrub is used internally by the post memory DR callbacks.
997c478bd9Sstevel@tonic-gate  * It is set for the small period of time during which the callbacks
1007c478bd9Sstevel@tonic-gate  * are executing. This ensures "memscrub_lock" will be released,
1017c478bd9Sstevel@tonic-gate  * allowing the callbacks to finish.
1027c478bd9Sstevel@tonic-gate  *
1037c478bd9Sstevel@tonic-gate  * The scrubber will read all memory if "read_all_memscrub" is set.
1047c478bd9Sstevel@tonic-gate  * The normal span read will also occur during the wakeup.
1057c478bd9Sstevel@tonic-gate  *
1067c478bd9Sstevel@tonic-gate  * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
1077c478bd9Sstevel@tonic-gate  * must have before we'll start the scrubber.
1087c478bd9Sstevel@tonic-gate  *
1097c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_SPAN_PAGES (8MB) is based on the guess that 0.03 sec
1107c478bd9Sstevel@tonic-gate  * is a "good" amount of minimum time for the thread to run at a time.
1117c478bd9Sstevel@tonic-gate  *
1127c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
1137c478bd9Sstevel@tonic-gate  * twice the frequency the hardware folk estimated would be necessary.
1147c478bd9Sstevel@tonic-gate  *
1157c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
1167c478bd9Sstevel@tonic-gate  * that the scurbber should get its fair share of time (since it
1177c478bd9Sstevel@tonic-gate  * is short).  At a priority of 0 the scrubber will be starved.
1187c478bd9Sstevel@tonic-gate  */
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate #include <sys/systm.h>		/* timeout, types, t_lock */
1217c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
1227c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>	/* MIN */
1237c478bd9Sstevel@tonic-gate #include <sys/memlist.h>	/* memlist */
1247c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>	/* memory add/delete */
1257c478bd9Sstevel@tonic-gate #include <sys/kmem.h>		/* KMEM_NOSLEEP */
1267c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>		/* ncpus_online */
1277c478bd9Sstevel@tonic-gate #include <sys/debug.h>		/* ASSERTs */
1287c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>	/* lddphys */
1297c478bd9Sstevel@tonic-gate #include <sys/cpu_module.h>	/* vtag_flushpage */
1307c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
1317c478bd9Sstevel@tonic-gate #include <sys/atomic.h>		/* atomic_add_32 */
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate #include <vm/hat.h>
1347c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
1357c478bd9Sstevel@tonic-gate #include <vm/hat_sfmmu.h>	/* XXX FIXME - delete */
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate #include <sys/time.h>
1387c478bd9Sstevel@tonic-gate #include <sys/callb.h>		/* CPR callback */
1397c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate /*
1427c478bd9Sstevel@tonic-gate  * Should really have paddr_t defined, but it is broken.  Use
1437c478bd9Sstevel@tonic-gate  * ms_paddr_t in the meantime to make the code cleaner
1447c478bd9Sstevel@tonic-gate  */
1457c478bd9Sstevel@tonic-gate typedef uint64_t ms_paddr_t;
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate /*
1487c478bd9Sstevel@tonic-gate  * Global Routines:
1497c478bd9Sstevel@tonic-gate  */
1507c478bd9Sstevel@tonic-gate int memscrub_add_span(pfn_t pfn, pgcnt_t pages);
1517c478bd9Sstevel@tonic-gate int memscrub_delete_span(pfn_t pfn, pgcnt_t pages);
1527c478bd9Sstevel@tonic-gate int memscrub_init(void);
153*61ef38f7Svb70745 void memscrub_induced_error(void);
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate /*
1567c478bd9Sstevel@tonic-gate  * Global Data:
1577c478bd9Sstevel@tonic-gate  */
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate /*
1607c478bd9Sstevel@tonic-gate  * scrub if we have at least this many pages
1617c478bd9Sstevel@tonic-gate  */
1627c478bd9Sstevel@tonic-gate #define	MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE)
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate /*
1657c478bd9Sstevel@tonic-gate  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
1667c478bd9Sstevel@tonic-gate  */
1677c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate /*
1707c478bd9Sstevel@tonic-gate  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
1717c478bd9Sstevel@tonic-gate  */
1727c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_SPAN_PAGES	((8 * 1024 * 1024) / PAGESIZE)
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate /*
1757c478bd9Sstevel@tonic-gate  * almost anything is higher priority than scrubbing
1767c478bd9Sstevel@tonic-gate  */
1777c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_THREAD_PRI	MINCLSYSPRI
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate /*
1807c478bd9Sstevel@tonic-gate  * size used when scanning memory
1817c478bd9Sstevel@tonic-gate  */
1827c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BLOCK_SIZE		256
1837c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BLOCK_SIZE_SHIFT	8 	/* log2(MEMSCRUB_BLOCK_SIZE) */
1847c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BLOCKS_PER_PAGE	(PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT)
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP4M		MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT
1877c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP512K	MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT
1887c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP64K		MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT
1897c478bd9Sstevel@tonic-gate #define	MEMSCRUB_BPP		MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate /*
1927c478bd9Sstevel@tonic-gate  * This message indicates that we have exceeded the limitations of
1937c478bd9Sstevel@tonic-gate  * the memscrubber. See the comments above regarding what would
1947c478bd9Sstevel@tonic-gate  * cause the sleep time to become zero. In DEBUG mode, this message
1957c478bd9Sstevel@tonic-gate  * is logged on the console and in the messages file. In non-DEBUG
1967c478bd9Sstevel@tonic-gate  * mode, it is only logged in the messages file.
1977c478bd9Sstevel@tonic-gate  */
1987c478bd9Sstevel@tonic-gate #ifdef DEBUG
1997c478bd9Sstevel@tonic-gate #define	MEMSCRUB_OVERRIDE_MSG	"Memory scrubber sleep time is zero " \
2007c478bd9Sstevel@tonic-gate 	"seconds, consuming entire CPU."
2017c478bd9Sstevel@tonic-gate #else
2027c478bd9Sstevel@tonic-gate #define	MEMSCRUB_OVERRIDE_MSG	"!Memory scrubber sleep time is zero " \
2037c478bd9Sstevel@tonic-gate 	"seconds, consuming entire CPU."
2047c478bd9Sstevel@tonic-gate #endif /* DEBUG */
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate /*
2077c478bd9Sstevel@tonic-gate  * we can patch these defaults in /etc/system if necessary
2087c478bd9Sstevel@tonic-gate  */
2097c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0;
2107c478bd9Sstevel@tonic-gate uint_t pause_memscrub = 0;
2117c478bd9Sstevel@tonic-gate uint_t read_all_memscrub = 0;
2127c478bd9Sstevel@tonic-gate uint_t memscrub_verbose = 0;
2137c478bd9Sstevel@tonic-gate uint_t memscrub_all_idle = 0;
2147c478bd9Sstevel@tonic-gate uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
2157c478bd9Sstevel@tonic-gate uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
2167c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
2177c478bd9Sstevel@tonic-gate uint_t memscrub_delay_start_sec = 5 * 60;
2187c478bd9Sstevel@tonic-gate uint_t memscrub_override_ticks = 1;
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate /*
2217c478bd9Sstevel@tonic-gate  * Static Routines
2227c478bd9Sstevel@tonic-gate  */
2237c478bd9Sstevel@tonic-gate static void memscrubber(void);
2247c478bd9Sstevel@tonic-gate static void memscrub_cleanup(void);
2257c478bd9Sstevel@tonic-gate static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *);
2267c478bd9Sstevel@tonic-gate static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp);
2277c478bd9Sstevel@tonic-gate static void memscrub_scan(uint_t blks, ms_paddr_t src);
2287c478bd9Sstevel@tonic-gate 
2297c478bd9Sstevel@tonic-gate /*
2307c478bd9Sstevel@tonic-gate  * Static Data
2317c478bd9Sstevel@tonic-gate  */
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist;
2347c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages;
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv;
2377c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock;
2387c478bd9Sstevel@tonic-gate /*
2397c478bd9Sstevel@tonic-gate  * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
2407c478bd9Sstevel@tonic-gate  */
2417c478bd9Sstevel@tonic-gate static void memscrub_init_mem_config(void);
2427c478bd9Sstevel@tonic-gate static void memscrub_uninit_mem_config(void);
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate /*
245*61ef38f7Svb70745  * Linked list of memscrub aware spans having retired pages.
246*61ef38f7Svb70745  * Currently enabled only on sun4u USIII-based platforms.
247*61ef38f7Svb70745  */
248*61ef38f7Svb70745 typedef struct memscrub_page_retire_span {
249*61ef38f7Svb70745 	ms_paddr_t				address;
250*61ef38f7Svb70745 	struct memscrub_page_retire_span	*next;
251*61ef38f7Svb70745 } memscrub_page_retire_span_t;
252*61ef38f7Svb70745 
253*61ef38f7Svb70745 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL;
254*61ef38f7Svb70745 
255*61ef38f7Svb70745 static void memscrub_page_retire_span_add(ms_paddr_t);
256*61ef38f7Svb70745 static void memscrub_page_retire_span_delete(ms_paddr_t);
257*61ef38f7Svb70745 static int memscrub_page_retire_span_search(ms_paddr_t);
258*61ef38f7Svb70745 static void memscrub_page_retire_span_list_update(void);
259*61ef38f7Svb70745 
260*61ef38f7Svb70745 /*
261*61ef38f7Svb70745  * add_to_page_retire_list: Set by cpu_async_log_err() routine
262*61ef38f7Svb70745  * by calling memscrub_induced_error() when CE/UE occurs on a retired
263*61ef38f7Svb70745  * page due to memscrub reading.  Cleared by memscrub after updating
264*61ef38f7Svb70745  * global page retire span list.  Piggybacking on protection of
265*61ef38f7Svb70745  * memscrub_lock, which is held during set and clear.
266*61ef38f7Svb70745  * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running
267*61ef38f7Svb70745  * on softint context, which gets fired on a cpu memscrub thread currently
268*61ef38f7Svb70745  * running.  Memscrub thread has affinity set during memscrub_read(), hence
269*61ef38f7Svb70745  * migration to new cpu not expected.
270*61ef38f7Svb70745  */
271*61ef38f7Svb70745 static int add_to_page_retire_list = 0;
272*61ef38f7Svb70745 
273*61ef38f7Svb70745 /*
2747c478bd9Sstevel@tonic-gate  * Keep track of some interesting statistics
2757c478bd9Sstevel@tonic-gate  */
2767c478bd9Sstevel@tonic-gate static struct memscrub_kstats {
2777c478bd9Sstevel@tonic-gate 	kstat_named_t	done_early;	/* ahead of schedule */
2787c478bd9Sstevel@tonic-gate 	kstat_named_t	early_sec;	/* by cumulative num secs */
2797c478bd9Sstevel@tonic-gate 	kstat_named_t	done_late;	/* behind schedule */
2807c478bd9Sstevel@tonic-gate 	kstat_named_t	late_sec;	/* by cumulative num secs */
2817c478bd9Sstevel@tonic-gate 	kstat_named_t	interval_ticks;	/* num ticks between intervals */
2827c478bd9Sstevel@tonic-gate 	kstat_named_t	force_run;	/* forced to run, non-timeout */
2837c478bd9Sstevel@tonic-gate 	kstat_named_t	errors_found;	/* num errors found by memscrub */
2847c478bd9Sstevel@tonic-gate } memscrub_counts = {
2857c478bd9Sstevel@tonic-gate 	{ "done_early",		KSTAT_DATA_UINT32 },
2867c478bd9Sstevel@tonic-gate 	{ "early_sec", 		KSTAT_DATA_UINT32 },
2877c478bd9Sstevel@tonic-gate 	{ "done_late", 		KSTAT_DATA_UINT32 },
2887c478bd9Sstevel@tonic-gate 	{ "late_sec",		KSTAT_DATA_UINT32 },
2897c478bd9Sstevel@tonic-gate 	{ "interval_ticks",	KSTAT_DATA_UINT32 },
2907c478bd9Sstevel@tonic-gate 	{ "force_run",		KSTAT_DATA_UINT32 },
2917c478bd9Sstevel@tonic-gate 	{ "errors_found",	KSTAT_DATA_UINT32 },
2927c478bd9Sstevel@tonic-gate };
2937c478bd9Sstevel@tonic-gate static struct kstat *memscrub_ksp = (struct kstat *)NULL;
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate static timeout_id_t memscrub_tid = 0;	/* keep track of timeout id */
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate /*
2987c478bd9Sstevel@tonic-gate  * create memscrub_memlist from phys_install list
2997c478bd9Sstevel@tonic-gate  * initialize locks, set memscrub_phys_pages.
3007c478bd9Sstevel@tonic-gate  */
3017c478bd9Sstevel@tonic-gate int
3027c478bd9Sstevel@tonic-gate memscrub_init(void)
3037c478bd9Sstevel@tonic-gate {
3047c478bd9Sstevel@tonic-gate 	struct memlist *src;
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate 	/*
3077c478bd9Sstevel@tonic-gate 	 * only startup the scrubber if we have a minimum
3087c478bd9Sstevel@tonic-gate 	 * number of pages
3097c478bd9Sstevel@tonic-gate 	 */
3107c478bd9Sstevel@tonic-gate 	if (physinstalled >= MEMSCRUB_MIN_PAGES) {
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate 		/*
3137c478bd9Sstevel@tonic-gate 		 * initialize locks
3147c478bd9Sstevel@tonic-gate 		 */
3157c478bd9Sstevel@tonic-gate 		mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
3167c478bd9Sstevel@tonic-gate 		cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 		/*
3197c478bd9Sstevel@tonic-gate 		 * copy phys_install to memscrub_memlist
3207c478bd9Sstevel@tonic-gate 		 */
3217c478bd9Sstevel@tonic-gate 		for (src = phys_install; src; src = src->next) {
3227c478bd9Sstevel@tonic-gate 			if (memscrub_add_span(
3237c478bd9Sstevel@tonic-gate 			    (pfn_t)(src->address >> PAGESHIFT),
3247c478bd9Sstevel@tonic-gate 			    (pgcnt_t)(src->size >> PAGESHIFT))) {
3257c478bd9Sstevel@tonic-gate 				memscrub_cleanup();
3267c478bd9Sstevel@tonic-gate 				return (-1);
3277c478bd9Sstevel@tonic-gate 			}
3287c478bd9Sstevel@tonic-gate 		}
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 		/*
3317c478bd9Sstevel@tonic-gate 		 * initialize kstats
3327c478bd9Sstevel@tonic-gate 		 */
3337c478bd9Sstevel@tonic-gate 		memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat",
3347c478bd9Sstevel@tonic-gate 			"misc", KSTAT_TYPE_NAMED,
3357c478bd9Sstevel@tonic-gate 			sizeof (memscrub_counts) / sizeof (kstat_named_t),
3367c478bd9Sstevel@tonic-gate 			KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 		if (memscrub_ksp) {
3397c478bd9Sstevel@tonic-gate 			memscrub_ksp->ks_data = (void *)&memscrub_counts;
3407c478bd9Sstevel@tonic-gate 			kstat_install(memscrub_ksp);
3417c478bd9Sstevel@tonic-gate 		} else {
3427c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n");
3437c478bd9Sstevel@tonic-gate 		}
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate 		/*
3467c478bd9Sstevel@tonic-gate 		 * create memscrubber thread
3477c478bd9Sstevel@tonic-gate 		 */
3487c478bd9Sstevel@tonic-gate 		(void) thread_create(NULL, 0, (void (*)())memscrubber,
3497c478bd9Sstevel@tonic-gate 		    NULL, 0, &p0, TS_RUN, memscrub_thread_pri);
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate 		/*
3527c478bd9Sstevel@tonic-gate 		 * We don't want call backs changing the list
3537c478bd9Sstevel@tonic-gate 		 * if there is no thread running. We do not
3547c478bd9Sstevel@tonic-gate 		 * attempt to deal with stopping/starting scrubbing
3557c478bd9Sstevel@tonic-gate 		 * on memory size changes.
3567c478bd9Sstevel@tonic-gate 		 */
3577c478bd9Sstevel@tonic-gate 		memscrub_init_mem_config();
3587c478bd9Sstevel@tonic-gate 	}
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate 	return (0);
3617c478bd9Sstevel@tonic-gate }
3627c478bd9Sstevel@tonic-gate 
3637c478bd9Sstevel@tonic-gate static void
3647c478bd9Sstevel@tonic-gate memscrub_cleanup(void)
3657c478bd9Sstevel@tonic-gate {
3667c478bd9Sstevel@tonic-gate 	memscrub_uninit_mem_config();
3677c478bd9Sstevel@tonic-gate 	while (memscrub_memlist) {
3687c478bd9Sstevel@tonic-gate 		(void) memscrub_delete_span(
3697c478bd9Sstevel@tonic-gate 			(pfn_t)(memscrub_memlist->address >> PAGESHIFT),
3707c478bd9Sstevel@tonic-gate 			(pgcnt_t)(memscrub_memlist->size >> PAGESHIFT));
3717c478bd9Sstevel@tonic-gate 	}
3727c478bd9Sstevel@tonic-gate 	if (memscrub_ksp)
3737c478bd9Sstevel@tonic-gate 		kstat_delete(memscrub_ksp);
3747c478bd9Sstevel@tonic-gate 	cv_destroy(&memscrub_cv);
3757c478bd9Sstevel@tonic-gate 	mutex_destroy(&memscrub_lock);
3767c478bd9Sstevel@tonic-gate }
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
3797c478bd9Sstevel@tonic-gate static void
3807c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
3817c478bd9Sstevel@tonic-gate {
3827c478bd9Sstevel@tonic-gate 	struct memlist *list;
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "%s:\n", title);
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	for (list = listp; list; list = list->next) {
3877c478bd9Sstevel@tonic-gate 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
3887c478bd9Sstevel@tonic-gate 		    list->address, list->size);
3897c478bd9Sstevel@tonic-gate 	}
3907c478bd9Sstevel@tonic-gate }
3917c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate /* ARGSUSED */
3947c478bd9Sstevel@tonic-gate static void
3957c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c)
3967c478bd9Sstevel@tonic-gate {
3977c478bd9Sstevel@tonic-gate 	/*
3987c478bd9Sstevel@tonic-gate 	 * grab mutex to guarantee that our wakeup call
3997c478bd9Sstevel@tonic-gate 	 * arrives after we go to sleep -- so we can't sleep forever.
4007c478bd9Sstevel@tonic-gate 	 */
4017c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
4027c478bd9Sstevel@tonic-gate 	cv_signal(&memscrub_cv);
4037c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
4047c478bd9Sstevel@tonic-gate }
4057c478bd9Sstevel@tonic-gate 
4067c478bd9Sstevel@tonic-gate /*
4077c478bd9Sstevel@tonic-gate  * provide an interface external to the memscrubber
4087c478bd9Sstevel@tonic-gate  * which will force the memscrub thread to run vs.
4097c478bd9Sstevel@tonic-gate  * waiting for the timeout, if one is set
4107c478bd9Sstevel@tonic-gate  */
4117c478bd9Sstevel@tonic-gate void
4127c478bd9Sstevel@tonic-gate memscrub_run(void)
4137c478bd9Sstevel@tonic-gate {
4147c478bd9Sstevel@tonic-gate 	memscrub_counts.force_run.value.ui32++;
4157c478bd9Sstevel@tonic-gate 	if (memscrub_tid) {
4167c478bd9Sstevel@tonic-gate 		(void) untimeout(memscrub_tid);
4177c478bd9Sstevel@tonic-gate 		memscrub_wakeup((void *)NULL);
4187c478bd9Sstevel@tonic-gate 	}
4197c478bd9Sstevel@tonic-gate }
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate /*
4227c478bd9Sstevel@tonic-gate  * this calculation doesn't account for the time
4237c478bd9Sstevel@tonic-gate  * that the actual scan consumes -- so we'd fall
4247c478bd9Sstevel@tonic-gate  * slightly behind schedule with this interval.
4257c478bd9Sstevel@tonic-gate  * It's very small.
4267c478bd9Sstevel@tonic-gate  */
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate static uint_t
4297c478bd9Sstevel@tonic-gate compute_interval_ticks(void)
4307c478bd9Sstevel@tonic-gate {
4317c478bd9Sstevel@tonic-gate 	/*
4327c478bd9Sstevel@tonic-gate 	 * We use msp_safe mpp_safe below to insure somebody
4337c478bd9Sstevel@tonic-gate 	 * doesn't set memscrub_span_pages or memscrub_phys_pages
4347c478bd9Sstevel@tonic-gate 	 * to 0 on us.
4357c478bd9Sstevel@tonic-gate 	 */
4367c478bd9Sstevel@tonic-gate 	static uint_t msp_safe, mpp_safe;
4377c478bd9Sstevel@tonic-gate 	static uint_t interval_ticks, period_ticks;
4387c478bd9Sstevel@tonic-gate 	msp_safe = memscrub_span_pages;
4397c478bd9Sstevel@tonic-gate 	mpp_safe = memscrub_phys_pages;
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 	period_ticks = memscrub_period_sec * hz;
4427c478bd9Sstevel@tonic-gate 	interval_ticks = period_ticks;
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&memscrub_lock));
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	if ((msp_safe != 0) && (mpp_safe != 0)) {
4477c478bd9Sstevel@tonic-gate 		if (memscrub_phys_pages <= msp_safe) {
4487c478bd9Sstevel@tonic-gate 			interval_ticks = period_ticks;
4497c478bd9Sstevel@tonic-gate 		} else {
4507c478bd9Sstevel@tonic-gate 			interval_ticks = (period_ticks /
4517c478bd9Sstevel@tonic-gate 			    (mpp_safe / msp_safe));
4527c478bd9Sstevel@tonic-gate 		}
4537c478bd9Sstevel@tonic-gate 	}
4547c478bd9Sstevel@tonic-gate 	return (interval_ticks);
4557c478bd9Sstevel@tonic-gate }
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate void
4587c478bd9Sstevel@tonic-gate memscrubber(void)
4597c478bd9Sstevel@tonic-gate {
4607c478bd9Sstevel@tonic-gate 	ms_paddr_t address, addr;
4617c478bd9Sstevel@tonic-gate 	time_t deadline;
4627c478bd9Sstevel@tonic-gate 	pgcnt_t pages;
4637c478bd9Sstevel@tonic-gate 	uint_t reached_end = 1;
4647c478bd9Sstevel@tonic-gate 	uint_t paused_message = 0;
4657c478bd9Sstevel@tonic-gate 	uint_t interval_ticks = 0;
4667c478bd9Sstevel@tonic-gate 	uint_t sleep_warn_printed = 0;
4677c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate 	/*
4707c478bd9Sstevel@tonic-gate 	 * notify CPR of our existence
4717c478bd9Sstevel@tonic-gate 	 */
4727c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 	if (memscrub_memlist == NULL) {
4777c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
4787c478bd9Sstevel@tonic-gate 		goto memscrub_exit;
4797c478bd9Sstevel@tonic-gate 	}
4807c478bd9Sstevel@tonic-gate 
4817c478bd9Sstevel@tonic-gate 	address = memscrub_memlist->address;
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 	for (;;) {
4867c478bd9Sstevel@tonic-gate 		if (disable_memscrub)
4877c478bd9Sstevel@tonic-gate 			break;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 		/*
4907c478bd9Sstevel@tonic-gate 		 * compute interval_ticks
4917c478bd9Sstevel@tonic-gate 		 */
4927c478bd9Sstevel@tonic-gate 		interval_ticks = compute_interval_ticks();
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 		/*
4957c478bd9Sstevel@tonic-gate 		 * If the calculated sleep time is zero, and pause_memscrub
4967c478bd9Sstevel@tonic-gate 		 * has been set, make sure we sleep so that another thread
4977c478bd9Sstevel@tonic-gate 		 * can acquire memscrub_lock.
4987c478bd9Sstevel@tonic-gate 		 */
4997c478bd9Sstevel@tonic-gate 		if (interval_ticks == 0 && pause_memscrub) {
5007c478bd9Sstevel@tonic-gate 			interval_ticks = hz;
5017c478bd9Sstevel@tonic-gate 		}
5027c478bd9Sstevel@tonic-gate 
5037c478bd9Sstevel@tonic-gate 		/*
5047c478bd9Sstevel@tonic-gate 		 * And as a fail safe, under normal non-paused operation, do
5057c478bd9Sstevel@tonic-gate 		 * not allow the sleep time to be zero.
5067c478bd9Sstevel@tonic-gate 		 */
5077c478bd9Sstevel@tonic-gate 		if (interval_ticks == 0) {
5087c478bd9Sstevel@tonic-gate 			interval_ticks = memscrub_override_ticks;
5097c478bd9Sstevel@tonic-gate 			if (!sleep_warn_printed) {
5107c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG);
5117c478bd9Sstevel@tonic-gate 				sleep_warn_printed = 1;
5127c478bd9Sstevel@tonic-gate 			}
5137c478bd9Sstevel@tonic-gate 		}
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 		memscrub_counts.interval_ticks.value.ui32 = interval_ticks;
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 		/*
5187c478bd9Sstevel@tonic-gate 		 * Did we just reach the end of memory? If we are at the
5197c478bd9Sstevel@tonic-gate 		 * end of memory, delay end of memory processing until
5207c478bd9Sstevel@tonic-gate 		 * pause_memscrub is not set.
5217c478bd9Sstevel@tonic-gate 		 */
5227c478bd9Sstevel@tonic-gate 		if (reached_end && !pause_memscrub) {
5237c478bd9Sstevel@tonic-gate 			time_t now = gethrestime_sec();
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate 			if (now >= deadline) {
5267c478bd9Sstevel@tonic-gate 				memscrub_counts.done_late.value.ui32++;
5277c478bd9Sstevel@tonic-gate 				memscrub_counts.late_sec.value.ui32 +=
5287c478bd9Sstevel@tonic-gate 					(now - deadline);
5297c478bd9Sstevel@tonic-gate 				/*
5307c478bd9Sstevel@tonic-gate 				 * past deadline, start right away
5317c478bd9Sstevel@tonic-gate 				 */
5327c478bd9Sstevel@tonic-gate 				interval_ticks = 0;
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 				deadline = now + memscrub_period_sec;
5357c478bd9Sstevel@tonic-gate 			} else {
5367c478bd9Sstevel@tonic-gate 				/*
5377c478bd9Sstevel@tonic-gate 				 * we finished ahead of schedule.
5387c478bd9Sstevel@tonic-gate 				 * wait till previous deadline before re-start.
5397c478bd9Sstevel@tonic-gate 				 */
5407c478bd9Sstevel@tonic-gate 				interval_ticks = (deadline - now) * hz;
5417c478bd9Sstevel@tonic-gate 				memscrub_counts.done_early.value.ui32++;
5427c478bd9Sstevel@tonic-gate 				memscrub_counts.early_sec.value.ui32 +=
5437c478bd9Sstevel@tonic-gate 					(deadline - now);
5447c478bd9Sstevel@tonic-gate 				deadline += memscrub_period_sec;
5457c478bd9Sstevel@tonic-gate 			}
5467c478bd9Sstevel@tonic-gate 			reached_end = 0;
5477c478bd9Sstevel@tonic-gate 			sleep_warn_printed = 0;
5487c478bd9Sstevel@tonic-gate 		}
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 		if (interval_ticks != 0) {
5517c478bd9Sstevel@tonic-gate 			/*
5527c478bd9Sstevel@tonic-gate 			 * it is safe from our standpoint for CPR to
5537c478bd9Sstevel@tonic-gate 			 * suspend the system
5547c478bd9Sstevel@tonic-gate 			 */
5557c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 			/*
5587c478bd9Sstevel@tonic-gate 			 * hit the snooze bar
5597c478bd9Sstevel@tonic-gate 			 */
5607c478bd9Sstevel@tonic-gate 			memscrub_tid = timeout(memscrub_wakeup, NULL,
5617c478bd9Sstevel@tonic-gate 			    interval_ticks);
5627c478bd9Sstevel@tonic-gate 
5637c478bd9Sstevel@tonic-gate 			/*
5647c478bd9Sstevel@tonic-gate 			 * go to sleep
5657c478bd9Sstevel@tonic-gate 			 */
5667c478bd9Sstevel@tonic-gate 			cv_wait(&memscrub_cv, &memscrub_lock);
5677c478bd9Sstevel@tonic-gate 
5687c478bd9Sstevel@tonic-gate 			/*
5697c478bd9Sstevel@tonic-gate 			 * at this point, no timeout should be set
5707c478bd9Sstevel@tonic-gate 			 */
5717c478bd9Sstevel@tonic-gate 			memscrub_tid = 0;
5727c478bd9Sstevel@tonic-gate 
5737c478bd9Sstevel@tonic-gate 			/*
5747c478bd9Sstevel@tonic-gate 			 * we need to goto work and will be modifying
5757c478bd9Sstevel@tonic-gate 			 * our internal state and mapping/unmapping
5767c478bd9Sstevel@tonic-gate 			 * TTEs
5777c478bd9Sstevel@tonic-gate 			 */
5787c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
5797c478bd9Sstevel@tonic-gate 		}
5807c478bd9Sstevel@tonic-gate 
5817c478bd9Sstevel@tonic-gate 
5827c478bd9Sstevel@tonic-gate 		if (memscrub_phys_pages == 0) {
5837c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "Memory scrubber has 0 pages to read");
5847c478bd9Sstevel@tonic-gate 			goto memscrub_exit;
5857c478bd9Sstevel@tonic-gate 		}
5867c478bd9Sstevel@tonic-gate 
5877c478bd9Sstevel@tonic-gate 		if (!pause_memscrub) {
5887c478bd9Sstevel@tonic-gate 			if (paused_message) {
5897c478bd9Sstevel@tonic-gate 				paused_message = 0;
5907c478bd9Sstevel@tonic-gate 				if (memscrub_verbose)
5917c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "Memory scrubber "
5927c478bd9Sstevel@tonic-gate 					    "resuming");
5937c478bd9Sstevel@tonic-gate 			}
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate 			if (read_all_memscrub) {
5967c478bd9Sstevel@tonic-gate 				if (memscrub_verbose)
5977c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "Memory scrubber "
5987c478bd9Sstevel@tonic-gate 					    "reading all memory per request");
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate 				addr = memscrub_memlist->address;
6017c478bd9Sstevel@tonic-gate 				reached_end = 0;
6027c478bd9Sstevel@tonic-gate 				while (!reached_end) {
6037c478bd9Sstevel@tonic-gate 					if (disable_memscrub)
6047c478bd9Sstevel@tonic-gate 						break;
6057c478bd9Sstevel@tonic-gate 					pages = memscrub_phys_pages;
6067c478bd9Sstevel@tonic-gate 					reached_end = memscrub_verify_span(
6077c478bd9Sstevel@tonic-gate 					    &addr, &pages);
6087c478bd9Sstevel@tonic-gate 					memscrub_scan(pages *
6097c478bd9Sstevel@tonic-gate 					    MEMSCRUB_BLOCKS_PER_PAGE, addr);
6107c478bd9Sstevel@tonic-gate 					addr += ((uint64_t)pages * PAGESIZE);
6117c478bd9Sstevel@tonic-gate 				}
6127c478bd9Sstevel@tonic-gate 				read_all_memscrub = 0;
6137c478bd9Sstevel@tonic-gate 			}
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 			/*
6167c478bd9Sstevel@tonic-gate 			 * read 1 span
6177c478bd9Sstevel@tonic-gate 			 */
6187c478bd9Sstevel@tonic-gate 			pages = memscrub_span_pages;
6197c478bd9Sstevel@tonic-gate 
6207c478bd9Sstevel@tonic-gate 			if (disable_memscrub)
6217c478bd9Sstevel@tonic-gate 				break;
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 			/*
6247c478bd9Sstevel@tonic-gate 			 * determine physical address range
6257c478bd9Sstevel@tonic-gate 			 */
6267c478bd9Sstevel@tonic-gate 			reached_end = memscrub_verify_span(&address,
6277c478bd9Sstevel@tonic-gate 			    &pages);
6287c478bd9Sstevel@tonic-gate 
6297c478bd9Sstevel@tonic-gate 			memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE,
6307c478bd9Sstevel@tonic-gate 			    address);
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 			address += ((uint64_t)pages * PAGESIZE);
6337c478bd9Sstevel@tonic-gate 		}
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate 		if (pause_memscrub && !paused_message) {
6367c478bd9Sstevel@tonic-gate 			paused_message = 1;
6377c478bd9Sstevel@tonic-gate 			if (memscrub_verbose)
6387c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "Memory scrubber paused");
6397c478bd9Sstevel@tonic-gate 		}
6407c478bd9Sstevel@tonic-gate 	}
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate memscrub_exit:
6437c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "Memory scrubber exiting");
6447c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
6457c478bd9Sstevel@tonic-gate 	memscrub_cleanup();
6467c478bd9Sstevel@tonic-gate 	thread_exit();
6477c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
6487c478bd9Sstevel@tonic-gate }
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate /*
6517c478bd9Sstevel@tonic-gate  * condition address and size
6527c478bd9Sstevel@tonic-gate  * such that they span legal physical addresses.
6537c478bd9Sstevel@tonic-gate  *
6547c478bd9Sstevel@tonic-gate  * when appropriate, address will be rounded up to start of next
6557c478bd9Sstevel@tonic-gate  * struct memlist, and pages will be rounded down to the end of the
6567c478bd9Sstevel@tonic-gate  * memlist size.
6577c478bd9Sstevel@tonic-gate  *
6587c478bd9Sstevel@tonic-gate  * returns 1 if reached end of list, else returns 0.
6597c478bd9Sstevel@tonic-gate  */
6607c478bd9Sstevel@tonic-gate static int
6617c478bd9Sstevel@tonic-gate memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp)
6627c478bd9Sstevel@tonic-gate {
6637c478bd9Sstevel@tonic-gate 	struct memlist *mlp;
6647c478bd9Sstevel@tonic-gate 	ms_paddr_t address = *addrp;
6657c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)*pagesp * PAGESIZE;
6667c478bd9Sstevel@tonic-gate 	uint64_t bytes_remaining;
6677c478bd9Sstevel@tonic-gate 	int reached_end = 0;
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&memscrub_lock));
6707c478bd9Sstevel@tonic-gate 
6717c478bd9Sstevel@tonic-gate 	/*
6727c478bd9Sstevel@tonic-gate 	 * find memlist struct that contains addrp
6737c478bd9Sstevel@tonic-gate 	 * assumes memlist is sorted by ascending address.
6747c478bd9Sstevel@tonic-gate 	 */
6757c478bd9Sstevel@tonic-gate 	for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->next) {
6767c478bd9Sstevel@tonic-gate 		/*
6777c478bd9Sstevel@tonic-gate 		 * if before this chunk, round up to beginning
6787c478bd9Sstevel@tonic-gate 		 */
6797c478bd9Sstevel@tonic-gate 		if (address < mlp->address) {
6807c478bd9Sstevel@tonic-gate 			address = mlp->address;
6817c478bd9Sstevel@tonic-gate 			break;
6827c478bd9Sstevel@tonic-gate 		}
6837c478bd9Sstevel@tonic-gate 		/*
6847c478bd9Sstevel@tonic-gate 		 * if before end of chunk, then we found it
6857c478bd9Sstevel@tonic-gate 		 */
6867c478bd9Sstevel@tonic-gate 		if (address < (mlp->address + mlp->size))
6877c478bd9Sstevel@tonic-gate 			break;
6887c478bd9Sstevel@tonic-gate 
6897c478bd9Sstevel@tonic-gate 		/* else go to next struct memlist */
6907c478bd9Sstevel@tonic-gate 	}
6917c478bd9Sstevel@tonic-gate 	/*
6927c478bd9Sstevel@tonic-gate 	 * if we hit end of list, start at beginning
6937c478bd9Sstevel@tonic-gate 	 */
6947c478bd9Sstevel@tonic-gate 	if (mlp == NULL) {
6957c478bd9Sstevel@tonic-gate 		mlp = memscrub_memlist;
6967c478bd9Sstevel@tonic-gate 		address = mlp->address;
6977c478bd9Sstevel@tonic-gate 	}
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate 	/*
7007c478bd9Sstevel@tonic-gate 	 * now we have legal address, and its mlp, condition bytes
7017c478bd9Sstevel@tonic-gate 	 */
7027c478bd9Sstevel@tonic-gate 	bytes_remaining = (mlp->address + mlp->size) - address;
7037c478bd9Sstevel@tonic-gate 
7047c478bd9Sstevel@tonic-gate 	if (bytes > bytes_remaining)
7057c478bd9Sstevel@tonic-gate 		bytes = bytes_remaining;
7067c478bd9Sstevel@tonic-gate 
7077c478bd9Sstevel@tonic-gate 	/*
7087c478bd9Sstevel@tonic-gate 	 * will this span take us to end of list?
7097c478bd9Sstevel@tonic-gate 	 */
7107c478bd9Sstevel@tonic-gate 	if ((mlp->next == NULL) &&
7117c478bd9Sstevel@tonic-gate 	    ((mlp->address + mlp->size) == (address + bytes)))
7127c478bd9Sstevel@tonic-gate 		reached_end = 1;
7137c478bd9Sstevel@tonic-gate 
7147c478bd9Sstevel@tonic-gate 	/* return values */
7157c478bd9Sstevel@tonic-gate 	*addrp = address;
7167c478bd9Sstevel@tonic-gate 	*pagesp = bytes / PAGESIZE;
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 	return (reached_end);
7197c478bd9Sstevel@tonic-gate }
7207c478bd9Sstevel@tonic-gate 
7217c478bd9Sstevel@tonic-gate /*
7227c478bd9Sstevel@tonic-gate  * add a span to the memscrub list
7237c478bd9Sstevel@tonic-gate  * add to memscrub_phys_pages
7247c478bd9Sstevel@tonic-gate  */
7257c478bd9Sstevel@tonic-gate int
7267c478bd9Sstevel@tonic-gate memscrub_add_span(pfn_t pfn, pgcnt_t pages)
7277c478bd9Sstevel@tonic-gate {
7287c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
7297c478bd9Sstevel@tonic-gate 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
7307c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
7317c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate 	int retval;
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
7387c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
7397c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
7407c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
7417c478bd9Sstevel@tonic-gate 	    " size: 0x%llx\n", address, bytes);
7427c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 	retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist,
7457c478bd9Sstevel@tonic-gate 	    &memscrub_phys_pages);
7467c478bd9Sstevel@tonic-gate 
7477c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
7487c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
7497c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
7507c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
7517c478bd9Sstevel@tonic-gate 
7527c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
7537c478bd9Sstevel@tonic-gate 
7547c478bd9Sstevel@tonic-gate 	return (retval);
7557c478bd9Sstevel@tonic-gate }
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate static int
7587c478bd9Sstevel@tonic-gate memscrub_add_span_gen(
7597c478bd9Sstevel@tonic-gate 	pfn_t pfn,
7607c478bd9Sstevel@tonic-gate 	pgcnt_t pages,
7617c478bd9Sstevel@tonic-gate 	struct memlist **list,
7627c478bd9Sstevel@tonic-gate 	uint_t *npgs)
7637c478bd9Sstevel@tonic-gate {
7647c478bd9Sstevel@tonic-gate 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
7657c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
7667c478bd9Sstevel@tonic-gate 	struct memlist *dst;
7677c478bd9Sstevel@tonic-gate 	struct memlist *prev, *next;
7687c478bd9Sstevel@tonic-gate 	int retval = 0;
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate 	/*
7717c478bd9Sstevel@tonic-gate 	 * allocate a new struct memlist
7727c478bd9Sstevel@tonic-gate 	 */
7737c478bd9Sstevel@tonic-gate 
7747c478bd9Sstevel@tonic-gate 	dst = (struct memlist *)
7757c478bd9Sstevel@tonic-gate 	    kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 	if (dst == NULL) {
7787c478bd9Sstevel@tonic-gate 		retval = -1;
7797c478bd9Sstevel@tonic-gate 		goto add_done;
7807c478bd9Sstevel@tonic-gate 	}
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	dst->address = address;
7837c478bd9Sstevel@tonic-gate 	dst->size = bytes;
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	/*
7867c478bd9Sstevel@tonic-gate 	 * first insert
7877c478bd9Sstevel@tonic-gate 	 */
7887c478bd9Sstevel@tonic-gate 	if (*list == NULL) {
7897c478bd9Sstevel@tonic-gate 		dst->prev = NULL;
7907c478bd9Sstevel@tonic-gate 		dst->next = NULL;
7917c478bd9Sstevel@tonic-gate 		*list = dst;
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate 		goto add_done;
7947c478bd9Sstevel@tonic-gate 	}
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate 	/*
7977c478bd9Sstevel@tonic-gate 	 * insert into sorted list
7987c478bd9Sstevel@tonic-gate 	 */
7997c478bd9Sstevel@tonic-gate 	for (prev = NULL, next = *list;
8007c478bd9Sstevel@tonic-gate 	    next != NULL;
8017c478bd9Sstevel@tonic-gate 	    prev = next, next = next->next) {
8027c478bd9Sstevel@tonic-gate 		if (address > (next->address + next->size))
8037c478bd9Sstevel@tonic-gate 			continue;
8047c478bd9Sstevel@tonic-gate 
8057c478bd9Sstevel@tonic-gate 		/*
8067c478bd9Sstevel@tonic-gate 		 * else insert here
8077c478bd9Sstevel@tonic-gate 		 */
8087c478bd9Sstevel@tonic-gate 
8097c478bd9Sstevel@tonic-gate 		/*
8107c478bd9Sstevel@tonic-gate 		 * prepend to next
8117c478bd9Sstevel@tonic-gate 		 */
8127c478bd9Sstevel@tonic-gate 		if ((address + bytes) == next->address) {
8137c478bd9Sstevel@tonic-gate 			kmem_free(dst, sizeof (struct memlist));
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 			next->address = address;
8167c478bd9Sstevel@tonic-gate 			next->size += bytes;
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 			goto add_done;
8197c478bd9Sstevel@tonic-gate 		}
8207c478bd9Sstevel@tonic-gate 
8217c478bd9Sstevel@tonic-gate 		/*
8227c478bd9Sstevel@tonic-gate 		 * append to next
8237c478bd9Sstevel@tonic-gate 		 */
8247c478bd9Sstevel@tonic-gate 		if (address == (next->address + next->size)) {
8257c478bd9Sstevel@tonic-gate 			kmem_free(dst, sizeof (struct memlist));
8267c478bd9Sstevel@tonic-gate 
8277c478bd9Sstevel@tonic-gate 			if (next->next) {
8287c478bd9Sstevel@tonic-gate 				/*
8297c478bd9Sstevel@tonic-gate 				 * don't overlap with next->next
8307c478bd9Sstevel@tonic-gate 				 */
8317c478bd9Sstevel@tonic-gate 				if ((address + bytes) > next->next->address) {
8327c478bd9Sstevel@tonic-gate 					retval = -1;
8337c478bd9Sstevel@tonic-gate 					goto add_done;
8347c478bd9Sstevel@tonic-gate 				}
8357c478bd9Sstevel@tonic-gate 				/*
8367c478bd9Sstevel@tonic-gate 				 * concatenate next and next->next
8377c478bd9Sstevel@tonic-gate 				 */
8387c478bd9Sstevel@tonic-gate 				if ((address + bytes) == next->next->address) {
8397c478bd9Sstevel@tonic-gate 					struct memlist *mlp = next->next;
8407c478bd9Sstevel@tonic-gate 
8417c478bd9Sstevel@tonic-gate 					if (next == *list)
8427c478bd9Sstevel@tonic-gate 						*list = next->next;
8437c478bd9Sstevel@tonic-gate 
8447c478bd9Sstevel@tonic-gate 					mlp->address = next->address;
8457c478bd9Sstevel@tonic-gate 					mlp->size += next->size;
8467c478bd9Sstevel@tonic-gate 					mlp->size += bytes;
8477c478bd9Sstevel@tonic-gate 
8487c478bd9Sstevel@tonic-gate 					if (next->prev)
8497c478bd9Sstevel@tonic-gate 						next->prev->next = mlp;
8507c478bd9Sstevel@tonic-gate 					mlp->prev = next->prev;
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 					kmem_free(next,
8537c478bd9Sstevel@tonic-gate 						sizeof (struct memlist));
8547c478bd9Sstevel@tonic-gate 					goto add_done;
8557c478bd9Sstevel@tonic-gate 				}
8567c478bd9Sstevel@tonic-gate 			}
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 			next->size += bytes;
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate 			goto add_done;
8617c478bd9Sstevel@tonic-gate 		}
8627c478bd9Sstevel@tonic-gate 
8637c478bd9Sstevel@tonic-gate 		/* don't overlap with next */
8647c478bd9Sstevel@tonic-gate 		if ((address + bytes) > next->address) {
8657c478bd9Sstevel@tonic-gate 			retval = -1;
8667c478bd9Sstevel@tonic-gate 			kmem_free(dst, sizeof (struct memlist));
8677c478bd9Sstevel@tonic-gate 			goto add_done;
8687c478bd9Sstevel@tonic-gate 		}
8697c478bd9Sstevel@tonic-gate 
8707c478bd9Sstevel@tonic-gate 		/*
8717c478bd9Sstevel@tonic-gate 		 * insert before next
8727c478bd9Sstevel@tonic-gate 		 */
8737c478bd9Sstevel@tonic-gate 		dst->prev = prev;
8747c478bd9Sstevel@tonic-gate 		dst->next = next;
8757c478bd9Sstevel@tonic-gate 		next->prev = dst;
8767c478bd9Sstevel@tonic-gate 		if (prev == NULL) {
8777c478bd9Sstevel@tonic-gate 			*list = dst;
8787c478bd9Sstevel@tonic-gate 		} else {
8797c478bd9Sstevel@tonic-gate 			prev->next = dst;
8807c478bd9Sstevel@tonic-gate 		}
8817c478bd9Sstevel@tonic-gate 		goto add_done;
8827c478bd9Sstevel@tonic-gate 	}	/* end for */
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate 	/*
8857c478bd9Sstevel@tonic-gate 	 * end of list, prev is valid and next is NULL
8867c478bd9Sstevel@tonic-gate 	 */
8877c478bd9Sstevel@tonic-gate 	prev->next = dst;
8887c478bd9Sstevel@tonic-gate 	dst->prev = prev;
8897c478bd9Sstevel@tonic-gate 	dst->next = NULL;
8907c478bd9Sstevel@tonic-gate 
8917c478bd9Sstevel@tonic-gate add_done:
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate 	if (retval != -1)
8947c478bd9Sstevel@tonic-gate 		*npgs += pages;
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate 	return (retval);
8977c478bd9Sstevel@tonic-gate }
8987c478bd9Sstevel@tonic-gate 
8997c478bd9Sstevel@tonic-gate /*
9007c478bd9Sstevel@tonic-gate  * delete a span from the memscrub list
9017c478bd9Sstevel@tonic-gate  * subtract from memscrub_phys_pages
9027c478bd9Sstevel@tonic-gate  */
9037c478bd9Sstevel@tonic-gate int
9047c478bd9Sstevel@tonic-gate memscrub_delete_span(pfn_t pfn, pgcnt_t pages)
9057c478bd9Sstevel@tonic-gate {
9067c478bd9Sstevel@tonic-gate 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
9077c478bd9Sstevel@tonic-gate 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
9087c478bd9Sstevel@tonic-gate 	struct memlist *dst, *next;
9097c478bd9Sstevel@tonic-gate 	int retval = 0;
9107c478bd9Sstevel@tonic-gate 
9117c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
9147c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist);
9157c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
9167c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n",
9177c478bd9Sstevel@tonic-gate 	    address, bytes);
9187c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
9197c478bd9Sstevel@tonic-gate 
9207c478bd9Sstevel@tonic-gate 	/*
9217c478bd9Sstevel@tonic-gate 	 * find struct memlist containing page
9227c478bd9Sstevel@tonic-gate 	 */
9237c478bd9Sstevel@tonic-gate 	for (next = memscrub_memlist; next != NULL; next = next->next) {
9247c478bd9Sstevel@tonic-gate 		if ((address >= next->address) &&
9257c478bd9Sstevel@tonic-gate 		    (address < next->address + next->size))
9267c478bd9Sstevel@tonic-gate 			break;
9277c478bd9Sstevel@tonic-gate 	}
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	/*
9307c478bd9Sstevel@tonic-gate 	 * if start address not in list
9317c478bd9Sstevel@tonic-gate 	 */
9327c478bd9Sstevel@tonic-gate 	if (next == NULL) {
9337c478bd9Sstevel@tonic-gate 		retval = -1;
9347c478bd9Sstevel@tonic-gate 		goto delete_done;
9357c478bd9Sstevel@tonic-gate 	}
9367c478bd9Sstevel@tonic-gate 
9377c478bd9Sstevel@tonic-gate 	/*
9387c478bd9Sstevel@tonic-gate 	 * error if size goes off end of this struct memlist
9397c478bd9Sstevel@tonic-gate 	 */
9407c478bd9Sstevel@tonic-gate 	if (address + bytes > next->address + next->size) {
9417c478bd9Sstevel@tonic-gate 		retval = -1;
9427c478bd9Sstevel@tonic-gate 		goto delete_done;
9437c478bd9Sstevel@tonic-gate 	}
9447c478bd9Sstevel@tonic-gate 
9457c478bd9Sstevel@tonic-gate 	/*
9467c478bd9Sstevel@tonic-gate 	 * pages at beginning of struct memlist
9477c478bd9Sstevel@tonic-gate 	 */
9487c478bd9Sstevel@tonic-gate 	if (address == next->address) {
9497c478bd9Sstevel@tonic-gate 		/*
9507c478bd9Sstevel@tonic-gate 		 * if start & size match, delete from list
9517c478bd9Sstevel@tonic-gate 		 */
9527c478bd9Sstevel@tonic-gate 		if (bytes == next->size) {
9537c478bd9Sstevel@tonic-gate 			if (next == memscrub_memlist)
9547c478bd9Sstevel@tonic-gate 				memscrub_memlist = next->next;
9557c478bd9Sstevel@tonic-gate 			if (next->prev != NULL)
9567c478bd9Sstevel@tonic-gate 				next->prev->next = next->next;
9577c478bd9Sstevel@tonic-gate 			if (next->next != NULL)
9587c478bd9Sstevel@tonic-gate 				next->next->prev = next->prev;
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 			kmem_free(next, sizeof (struct memlist));
9617c478bd9Sstevel@tonic-gate 		} else {
9627c478bd9Sstevel@tonic-gate 		/*
9637c478bd9Sstevel@tonic-gate 		 * increment start address by bytes
9647c478bd9Sstevel@tonic-gate 		 */
9657c478bd9Sstevel@tonic-gate 			next->address += bytes;
9667c478bd9Sstevel@tonic-gate 			next->size -= bytes;
9677c478bd9Sstevel@tonic-gate 		}
9687c478bd9Sstevel@tonic-gate 		goto delete_done;
9697c478bd9Sstevel@tonic-gate 	}
9707c478bd9Sstevel@tonic-gate 
9717c478bd9Sstevel@tonic-gate 	/*
9727c478bd9Sstevel@tonic-gate 	 * pages at end of struct memlist
9737c478bd9Sstevel@tonic-gate 	 */
9747c478bd9Sstevel@tonic-gate 	if (address + bytes == next->address + next->size) {
9757c478bd9Sstevel@tonic-gate 		/*
9767c478bd9Sstevel@tonic-gate 		 * decrement size by bytes
9777c478bd9Sstevel@tonic-gate 		 */
9787c478bd9Sstevel@tonic-gate 		next->size -= bytes;
9797c478bd9Sstevel@tonic-gate 		goto delete_done;
9807c478bd9Sstevel@tonic-gate 	}
9817c478bd9Sstevel@tonic-gate 
9827c478bd9Sstevel@tonic-gate 	/*
9837c478bd9Sstevel@tonic-gate 	 * delete a span in the middle of the struct memlist
9847c478bd9Sstevel@tonic-gate 	 */
9857c478bd9Sstevel@tonic-gate 	{
9867c478bd9Sstevel@tonic-gate 		/*
9877c478bd9Sstevel@tonic-gate 		 * create a new struct memlist
9887c478bd9Sstevel@tonic-gate 		 */
9897c478bd9Sstevel@tonic-gate 		dst = (struct memlist *)
9907c478bd9Sstevel@tonic-gate 		    kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
9917c478bd9Sstevel@tonic-gate 
9927c478bd9Sstevel@tonic-gate 		if (dst == NULL) {
9937c478bd9Sstevel@tonic-gate 			retval = -1;
9947c478bd9Sstevel@tonic-gate 			goto delete_done;
9957c478bd9Sstevel@tonic-gate 		}
9967c478bd9Sstevel@tonic-gate 
9977c478bd9Sstevel@tonic-gate 		/*
9987c478bd9Sstevel@tonic-gate 		 * existing struct memlist gets address
9997c478bd9Sstevel@tonic-gate 		 * and size up to pfn
10007c478bd9Sstevel@tonic-gate 		 */
10017c478bd9Sstevel@tonic-gate 		dst->address = address + bytes;
10027c478bd9Sstevel@tonic-gate 		dst->size = (next->address + next->size) - dst->address;
10037c478bd9Sstevel@tonic-gate 		next->size = address - next->address;
10047c478bd9Sstevel@tonic-gate 
10057c478bd9Sstevel@tonic-gate 		/*
10067c478bd9Sstevel@tonic-gate 		 * new struct memlist gets address starting
10077c478bd9Sstevel@tonic-gate 		 * after pfn, until end
10087c478bd9Sstevel@tonic-gate 		 */
10097c478bd9Sstevel@tonic-gate 
10107c478bd9Sstevel@tonic-gate 		/*
10117c478bd9Sstevel@tonic-gate 		 * link in new memlist after old
10127c478bd9Sstevel@tonic-gate 		 */
10137c478bd9Sstevel@tonic-gate 		dst->next = next->next;
10147c478bd9Sstevel@tonic-gate 		dst->prev = next;
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate 		if (next->next != NULL)
10177c478bd9Sstevel@tonic-gate 			next->next->prev = dst;
10187c478bd9Sstevel@tonic-gate 		next->next = dst;
10197c478bd9Sstevel@tonic-gate 	}
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate delete_done:
10227c478bd9Sstevel@tonic-gate 	if (retval != -1) {
10237c478bd9Sstevel@tonic-gate 		memscrub_phys_pages -= pages;
10247c478bd9Sstevel@tonic-gate 		if (memscrub_phys_pages == 0)
10257c478bd9Sstevel@tonic-gate 			disable_memscrub = 1;
10267c478bd9Sstevel@tonic-gate 	}
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
10297c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist After", memscrub_memlist);
10307c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
10317c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
10327c478bd9Sstevel@tonic-gate 
10337c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
10347c478bd9Sstevel@tonic-gate 	return (retval);
10357c478bd9Sstevel@tonic-gate }
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate static void
10387c478bd9Sstevel@tonic-gate memscrub_scan(uint_t blks, ms_paddr_t src)
10397c478bd9Sstevel@tonic-gate {
10407c478bd9Sstevel@tonic-gate 	uint_t 		psz, bpp, pgsread;
10417c478bd9Sstevel@tonic-gate 	pfn_t		pfn;
10427c478bd9Sstevel@tonic-gate 	ms_paddr_t	pa;
10437c478bd9Sstevel@tonic-gate 	caddr_t		va;
10447c478bd9Sstevel@tonic-gate 	on_trap_data_t	otd;
1045*61ef38f7Svb70745 	int		scan_mmu_pagesize = 0;
1046*61ef38f7Svb70745 	int		retired_pages = 0;
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	extern void memscrub_read(caddr_t src, uint_t blks);
10497c478bd9Sstevel@tonic-gate 
10507c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&memscrub_lock));
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate 	pgsread = 0;
10537c478bd9Sstevel@tonic-gate 	pa = src;
10547c478bd9Sstevel@tonic-gate 
1055*61ef38f7Svb70745 	if (memscrub_page_retire_span_list != NULL) {
1056*61ef38f7Svb70745 		if (memscrub_page_retire_span_search(src)) {
1057*61ef38f7Svb70745 			/* retired pages in current span */
1058*61ef38f7Svb70745 			scan_mmu_pagesize = 1;
1059*61ef38f7Svb70745 		}
1060*61ef38f7Svb70745 	}
1061*61ef38f7Svb70745 
1062*61ef38f7Svb70745 #ifdef MEMSCRUB_DEBUG
1063*61ef38f7Svb70745 	cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize);
1064*61ef38f7Svb70745 #endif /* MEMSCRUB_DEBUG */
1065*61ef38f7Svb70745 
10667c478bd9Sstevel@tonic-gate 	while (blks != 0) {
10677c478bd9Sstevel@tonic-gate 		/* Ensure the PA is properly aligned */
10687c478bd9Sstevel@tonic-gate 		if (((pa & MMU_PAGEMASK4M) == pa) &&
10697c478bd9Sstevel@tonic-gate 			(blks >= MEMSCRUB_BPP4M)) {
10707c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE4M;
10717c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP4M;
10727c478bd9Sstevel@tonic-gate 		} else if (((pa & MMU_PAGEMASK512K) == pa) &&
10737c478bd9Sstevel@tonic-gate 			(blks >= MEMSCRUB_BPP512K)) {
10747c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE512K;
10757c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP512K;
10767c478bd9Sstevel@tonic-gate 		} else if (((pa & MMU_PAGEMASK64K) == pa) &&
10777c478bd9Sstevel@tonic-gate 			(blks >= MEMSCRUB_BPP64K)) {
10787c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE64K;
10797c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP64K;
10807c478bd9Sstevel@tonic-gate 		} else if ((pa & MMU_PAGEMASK) == pa) {
10817c478bd9Sstevel@tonic-gate 			psz = MMU_PAGESIZE;
10827c478bd9Sstevel@tonic-gate 			bpp = MEMSCRUB_BPP;
10837c478bd9Sstevel@tonic-gate 		} else {
10847c478bd9Sstevel@tonic-gate 			if (memscrub_verbose) {
10857c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "Memory scrubber ignoring "
10867c478bd9Sstevel@tonic-gate 				    "non-page aligned block starting at 0x%"
10877c478bd9Sstevel@tonic-gate 				    PRIx64, src);
10887c478bd9Sstevel@tonic-gate 			}
10897c478bd9Sstevel@tonic-gate 			return;
10907c478bd9Sstevel@tonic-gate 		}
10917c478bd9Sstevel@tonic-gate 		if (blks < bpp) bpp = blks;
10927c478bd9Sstevel@tonic-gate 
10937c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
10947c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "Going to run psz=%x, "
10957c478bd9Sstevel@tonic-gate 		    "bpp=%x pa=%llx\n", psz, bpp, pa);
10967c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
10977c478bd9Sstevel@tonic-gate 
10987c478bd9Sstevel@tonic-gate 		/*
10997c478bd9Sstevel@tonic-gate 		 * MEMSCRUBBASE is a 4MB aligned page in the
11007c478bd9Sstevel@tonic-gate 		 * kernel so that we can quickly map the PA
11017c478bd9Sstevel@tonic-gate 		 * to a VA for the block loads performed in
11027c478bd9Sstevel@tonic-gate 		 * memscrub_read.
11037c478bd9Sstevel@tonic-gate 		 */
11047c478bd9Sstevel@tonic-gate 		pfn = mmu_btop(pa);
11057c478bd9Sstevel@tonic-gate 		va = (caddr_t)MEMSCRUBBASE;
11067c478bd9Sstevel@tonic-gate 		hat_devload(kas.a_hat, va, psz, pfn, PROT_READ,
11077c478bd9Sstevel@tonic-gate 			HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
11087c478bd9Sstevel@tonic-gate 
11097c478bd9Sstevel@tonic-gate 		/*
11107c478bd9Sstevel@tonic-gate 		 * Can't allow the memscrubber to migrate across CPUs as
11117c478bd9Sstevel@tonic-gate 		 * we need to know whether CEEN is enabled for the current
11127c478bd9Sstevel@tonic-gate 		 * CPU to enable us to scrub the memory. Don't use
11137c478bd9Sstevel@tonic-gate 		 * kpreempt_disable as the time we take to scan a span (even
11147c478bd9Sstevel@tonic-gate 		 * without cpu_check_ce having to manually cpu_check_block)
11157c478bd9Sstevel@tonic-gate 		 * is too long to hold a higher priority thread (eg, RT)
11167c478bd9Sstevel@tonic-gate 		 * off cpu.
11177c478bd9Sstevel@tonic-gate 		 */
11187c478bd9Sstevel@tonic-gate 		thread_affinity_set(curthread, CPU_CURRENT);
11197c478bd9Sstevel@tonic-gate 
11207c478bd9Sstevel@tonic-gate 		/*
11217c478bd9Sstevel@tonic-gate 		 * Protect read scrub from async faults.  For now, we simply
11227c478bd9Sstevel@tonic-gate 		 * maintain a count of such faults caught.
11237c478bd9Sstevel@tonic-gate 		 */
11247c478bd9Sstevel@tonic-gate 
1125*61ef38f7Svb70745 		if (!scan_mmu_pagesize && !on_trap(&otd, OT_DATA_EC)) {
11267c478bd9Sstevel@tonic-gate 			memscrub_read(va, bpp);
11277c478bd9Sstevel@tonic-gate 			/*
11287c478bd9Sstevel@tonic-gate 			 * Check if CEs require logging
11297c478bd9Sstevel@tonic-gate 			 */
11307c478bd9Sstevel@tonic-gate 			cpu_check_ce(SCRUBBER_CEEN_CHECK,
11317c478bd9Sstevel@tonic-gate 			    (uint64_t)pa, va, psz);
1132a08365b4Srjnoe 			no_trap();
11337c478bd9Sstevel@tonic-gate 			thread_affinity_clear(curthread);
11347c478bd9Sstevel@tonic-gate 		} else {
11357c478bd9Sstevel@tonic-gate 			no_trap();
11367c478bd9Sstevel@tonic-gate 			thread_affinity_clear(curthread);
11377c478bd9Sstevel@tonic-gate 
11387c478bd9Sstevel@tonic-gate 			/*
11397c478bd9Sstevel@tonic-gate 			 * Got an async error..
11407c478bd9Sstevel@tonic-gate 			 * Try rescanning it at MMU_PAGESIZE
11417c478bd9Sstevel@tonic-gate 			 * granularity if we were trying to
11427c478bd9Sstevel@tonic-gate 			 * read at a larger page size.
11437c478bd9Sstevel@tonic-gate 			 * This is to ensure we continue to
11447c478bd9Sstevel@tonic-gate 			 * scan the rest of the span.
1145*61ef38f7Svb70745 			 * OR scanning MMU_PAGESIZE granularity to avoid
1146*61ef38f7Svb70745 			 * reading retired pages memory when scan_mmu_pagesize
1147*61ef38f7Svb70745 			 * is set.
11487c478bd9Sstevel@tonic-gate 			 */
1149*61ef38f7Svb70745 			if (psz > MMU_PAGESIZE || scan_mmu_pagesize) {
11507c478bd9Sstevel@tonic-gate 			    caddr_t vaddr = va;
11517c478bd9Sstevel@tonic-gate 			    ms_paddr_t paddr = pa;
11527c478bd9Sstevel@tonic-gate 			    int tmp = 0;
11537c478bd9Sstevel@tonic-gate 			    for (; tmp < bpp; tmp += MEMSCRUB_BPP) {
1154*61ef38f7Svb70745 				/* Don't scrub retired pages */
1155*61ef38f7Svb70745 				if (page_retire_check(paddr, NULL) == 0) {
1156*61ef38f7Svb70745 					vaddr += MMU_PAGESIZE;
1157*61ef38f7Svb70745 					paddr += MMU_PAGESIZE;
1158*61ef38f7Svb70745 					retired_pages++;
1159*61ef38f7Svb70745 					continue;
1160*61ef38f7Svb70745 				}
11617c478bd9Sstevel@tonic-gate 				thread_affinity_set(curthread, CPU_CURRENT);
1162a08365b4Srjnoe 				if (!on_trap(&otd, OT_DATA_EC)) {
11637c478bd9Sstevel@tonic-gate 				    memscrub_read(vaddr, MEMSCRUB_BPP);
11647c478bd9Sstevel@tonic-gate 				    cpu_check_ce(SCRUBBER_CEEN_CHECK,
11657c478bd9Sstevel@tonic-gate 					(uint64_t)paddr, vaddr, MMU_PAGESIZE);
1166a08365b4Srjnoe 				    no_trap();
1167a08365b4Srjnoe 				} else {
1168a08365b4Srjnoe 				    no_trap();
1169a08365b4Srjnoe 				    memscrub_counts.errors_found.value.ui32++;
1170a08365b4Srjnoe 				}
11717c478bd9Sstevel@tonic-gate 				thread_affinity_clear(curthread);
11727c478bd9Sstevel@tonic-gate 				vaddr += MMU_PAGESIZE;
11737c478bd9Sstevel@tonic-gate 				paddr += MMU_PAGESIZE;
11747c478bd9Sstevel@tonic-gate 			    }
11757c478bd9Sstevel@tonic-gate 			}
11767c478bd9Sstevel@tonic-gate 		}
11777c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK);
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 		blks -= bpp;
11807c478bd9Sstevel@tonic-gate 		pa += psz;
11817c478bd9Sstevel@tonic-gate 		pgsread++;
11827c478bd9Sstevel@tonic-gate 	}
1183*61ef38f7Svb70745 
1184*61ef38f7Svb70745 	/*
1185*61ef38f7Svb70745 	 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired
1186*61ef38f7Svb70745 	 * pages found so delete span from global list.
1187*61ef38f7Svb70745 	 */
1188*61ef38f7Svb70745 	if (scan_mmu_pagesize && retired_pages == 0)
1189*61ef38f7Svb70745 		memscrub_page_retire_span_delete(src);
1190*61ef38f7Svb70745 
1191*61ef38f7Svb70745 	/*
1192*61ef38f7Svb70745 	 * Encountered CE/UE on a retired page during memscrub read of current
1193*61ef38f7Svb70745 	 * span.  Adding span to global list to enable avoid reading further.
1194*61ef38f7Svb70745 	 */
1195*61ef38f7Svb70745 	if (add_to_page_retire_list) {
1196*61ef38f7Svb70745 		if (!memscrub_page_retire_span_search(src))
1197*61ef38f7Svb70745 			memscrub_page_retire_span_add(src);
1198*61ef38f7Svb70745 		add_to_page_retire_list = 0;
1199*61ef38f7Svb70745 	}
1200*61ef38f7Svb70745 
12017c478bd9Sstevel@tonic-gate 	if (memscrub_verbose) {
12027c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting "
12037c478bd9Sstevel@tonic-gate 		    "at 0x%" PRIx64, pgsread, src);
12047c478bd9Sstevel@tonic-gate 	}
12057c478bd9Sstevel@tonic-gate }
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate /*
1208*61ef38f7Svb70745  * Called by cpu_async_log_err() when memscrub read causes
1209*61ef38f7Svb70745  * CE/UE on a retired page.
1210*61ef38f7Svb70745  */
1211*61ef38f7Svb70745 void
1212*61ef38f7Svb70745 memscrub_induced_error(void)
1213*61ef38f7Svb70745 {
1214*61ef38f7Svb70745 	add_to_page_retire_list = 1;
1215*61ef38f7Svb70745 }
1216*61ef38f7Svb70745 
1217*61ef38f7Svb70745 
1218*61ef38f7Svb70745 /*
1219*61ef38f7Svb70745  * Called by memscrub_scan().
1220*61ef38f7Svb70745  * pa: physical address of span with CE/UE, add to global list.
1221*61ef38f7Svb70745  */
1222*61ef38f7Svb70745 static void
1223*61ef38f7Svb70745 memscrub_page_retire_span_add(ms_paddr_t pa)
1224*61ef38f7Svb70745 {
1225*61ef38f7Svb70745 	memscrub_page_retire_span_t *new_span;
1226*61ef38f7Svb70745 
1227*61ef38f7Svb70745 	new_span = (memscrub_page_retire_span_t *)
1228*61ef38f7Svb70745 	    kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP);
1229*61ef38f7Svb70745 
1230*61ef38f7Svb70745 	if (new_span == NULL) {
1231*61ef38f7Svb70745 #ifdef MEMSCRUB_DEBUG
1232*61ef38f7Svb70745 		cmn_err(CE_NOTE, "failed to allocate new span - span with"
1233*61ef38f7Svb70745 		    " retired page/s not tracked.\n");
1234*61ef38f7Svb70745 #endif /* MEMSCRUB_DEBUG */
1235*61ef38f7Svb70745 		return;
1236*61ef38f7Svb70745 	}
1237*61ef38f7Svb70745 
1238*61ef38f7Svb70745 	new_span->address = pa;
1239*61ef38f7Svb70745 	new_span->next = memscrub_page_retire_span_list;
1240*61ef38f7Svb70745 	memscrub_page_retire_span_list = new_span;
1241*61ef38f7Svb70745 }
1242*61ef38f7Svb70745 
1243*61ef38f7Svb70745 /*
1244*61ef38f7Svb70745  * Called by memscrub_scan().
1245*61ef38f7Svb70745  * pa: physical address of span to be removed from global list.
1246*61ef38f7Svb70745  */
1247*61ef38f7Svb70745 static void
1248*61ef38f7Svb70745 memscrub_page_retire_span_delete(ms_paddr_t pa)
1249*61ef38f7Svb70745 {
1250*61ef38f7Svb70745 	memscrub_page_retire_span_t *prev_span, *next_span;
1251*61ef38f7Svb70745 
1252*61ef38f7Svb70745 	prev_span = memscrub_page_retire_span_list;
1253*61ef38f7Svb70745 	next_span = memscrub_page_retire_span_list->next;
1254*61ef38f7Svb70745 
1255*61ef38f7Svb70745 	if (pa == prev_span->address) {
1256*61ef38f7Svb70745 		memscrub_page_retire_span_list = next_span;
1257*61ef38f7Svb70745 		kmem_free(prev_span, sizeof (memscrub_page_retire_span_t));
1258*61ef38f7Svb70745 		return;
1259*61ef38f7Svb70745 	}
1260*61ef38f7Svb70745 
1261*61ef38f7Svb70745 	while (next_span) {
1262*61ef38f7Svb70745 		if (pa == next_span->address) {
1263*61ef38f7Svb70745 			prev_span->next = next_span->next;
1264*61ef38f7Svb70745 			kmem_free(next_span,
1265*61ef38f7Svb70745 			    sizeof (memscrub_page_retire_span_t));
1266*61ef38f7Svb70745 			return;
1267*61ef38f7Svb70745 		}
1268*61ef38f7Svb70745 		prev_span = next_span;
1269*61ef38f7Svb70745 		next_span = next_span->next;
1270*61ef38f7Svb70745 	}
1271*61ef38f7Svb70745 }
1272*61ef38f7Svb70745 
1273*61ef38f7Svb70745 /*
1274*61ef38f7Svb70745  * Called by memscrub_scan().
1275*61ef38f7Svb70745  * pa: physical address of span to be searched in global list.
1276*61ef38f7Svb70745  */
1277*61ef38f7Svb70745 static int
1278*61ef38f7Svb70745 memscrub_page_retire_span_search(ms_paddr_t pa)
1279*61ef38f7Svb70745 {
1280*61ef38f7Svb70745 	memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list;
1281*61ef38f7Svb70745 
1282*61ef38f7Svb70745 	while (next_span) {
1283*61ef38f7Svb70745 		if (pa == next_span->address)
1284*61ef38f7Svb70745 			return (1);
1285*61ef38f7Svb70745 		next_span = next_span->next;
1286*61ef38f7Svb70745 	}
1287*61ef38f7Svb70745 	return (0);
1288*61ef38f7Svb70745 }
1289*61ef38f7Svb70745 
1290*61ef38f7Svb70745 /*
1291*61ef38f7Svb70745  * Called from new_memscrub() as a result of memory delete.
1292*61ef38f7Svb70745  * Using page_numtopp_nolock() to determine if we have valid PA.
1293*61ef38f7Svb70745  */
1294*61ef38f7Svb70745 static void
1295*61ef38f7Svb70745 memscrub_page_retire_span_list_update(void)
1296*61ef38f7Svb70745 {
1297*61ef38f7Svb70745 	memscrub_page_retire_span_t *prev, *cur, *next;
1298*61ef38f7Svb70745 
1299*61ef38f7Svb70745 	if (memscrub_page_retire_span_list == NULL)
1300*61ef38f7Svb70745 		return;
1301*61ef38f7Svb70745 
1302*61ef38f7Svb70745 	prev = cur = memscrub_page_retire_span_list;
1303*61ef38f7Svb70745 	next = cur->next;
1304*61ef38f7Svb70745 
1305*61ef38f7Svb70745 	while (cur) {
1306*61ef38f7Svb70745 		if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) {
1307*61ef38f7Svb70745 			if (cur == memscrub_page_retire_span_list) {
1308*61ef38f7Svb70745 				memscrub_page_retire_span_list = next;
1309*61ef38f7Svb70745 				kmem_free(cur,
1310*61ef38f7Svb70745 				    sizeof (memscrub_page_retire_span_t));
1311*61ef38f7Svb70745 				prev = cur = memscrub_page_retire_span_list;
1312*61ef38f7Svb70745 			} else {
1313*61ef38f7Svb70745 				prev->next = cur->next;
1314*61ef38f7Svb70745 				kmem_free(cur,
1315*61ef38f7Svb70745 				    sizeof (memscrub_page_retire_span_t));
1316*61ef38f7Svb70745 				cur = next;
1317*61ef38f7Svb70745 			}
1318*61ef38f7Svb70745 		} else {
1319*61ef38f7Svb70745 			prev = cur;
1320*61ef38f7Svb70745 			cur = next;
1321*61ef38f7Svb70745 		}
1322*61ef38f7Svb70745 		if (cur != NULL)
1323*61ef38f7Svb70745 			next = cur->next;
1324*61ef38f7Svb70745 	}
1325*61ef38f7Svb70745 }
1326*61ef38f7Svb70745 
1327*61ef38f7Svb70745 /*
13287c478bd9Sstevel@tonic-gate  * The memory add/delete callback mechanism does not pass in the
13297c478bd9Sstevel@tonic-gate  * page ranges. The phys_install list has been updated though, so
13307c478bd9Sstevel@tonic-gate  * create a new scrub list from it.
13317c478bd9Sstevel@tonic-gate  */
13327c478bd9Sstevel@tonic-gate 
13337c478bd9Sstevel@tonic-gate static int
1334*61ef38f7Svb70745 new_memscrub(int update_page_retire_list)
13357c478bd9Sstevel@tonic-gate {
13367c478bd9Sstevel@tonic-gate 	struct memlist *src, *list, *old_list;
13377c478bd9Sstevel@tonic-gate 	uint_t npgs;
13387c478bd9Sstevel@tonic-gate 
13397c478bd9Sstevel@tonic-gate 	/*
13407c478bd9Sstevel@tonic-gate 	 * copy phys_install to memscrub_memlist
13417c478bd9Sstevel@tonic-gate 	 */
13427c478bd9Sstevel@tonic-gate 	list = NULL;
13437c478bd9Sstevel@tonic-gate 	npgs = 0;
13447c478bd9Sstevel@tonic-gate 	memlist_read_lock();
13457c478bd9Sstevel@tonic-gate 	for (src = phys_install; src; src = src->next) {
13467c478bd9Sstevel@tonic-gate 		if (memscrub_add_span_gen((pfn_t)(src->address >> PAGESHIFT),
13477c478bd9Sstevel@tonic-gate 		    (pgcnt_t)(src->size >> PAGESHIFT), &list, &npgs)) {
13487c478bd9Sstevel@tonic-gate 			memlist_read_unlock();
13497c478bd9Sstevel@tonic-gate 			while (list) {
13507c478bd9Sstevel@tonic-gate 				struct memlist *el;
13517c478bd9Sstevel@tonic-gate 
13527c478bd9Sstevel@tonic-gate 				el = list;
13537c478bd9Sstevel@tonic-gate 				list = list->next;
13547c478bd9Sstevel@tonic-gate 				kmem_free(el, sizeof (struct memlist));
13557c478bd9Sstevel@tonic-gate 			}
13567c478bd9Sstevel@tonic-gate 			return (-1);
13577c478bd9Sstevel@tonic-gate 		}
13587c478bd9Sstevel@tonic-gate 	}
13597c478bd9Sstevel@tonic-gate 	memlist_read_unlock();
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
13627c478bd9Sstevel@tonic-gate 	memscrub_phys_pages = npgs;
13637c478bd9Sstevel@tonic-gate 	old_list = memscrub_memlist;
13647c478bd9Sstevel@tonic-gate 	memscrub_memlist = list;
1365*61ef38f7Svb70745 
1366*61ef38f7Svb70745 	if (update_page_retire_list)
1367*61ef38f7Svb70745 		memscrub_page_retire_span_list_update();
1368*61ef38f7Svb70745 
13697c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
13707c478bd9Sstevel@tonic-gate 
13717c478bd9Sstevel@tonic-gate 	while (old_list) {
13727c478bd9Sstevel@tonic-gate 		struct memlist *el;
13737c478bd9Sstevel@tonic-gate 
13747c478bd9Sstevel@tonic-gate 		el = old_list;
13757c478bd9Sstevel@tonic-gate 		old_list = old_list->next;
13767c478bd9Sstevel@tonic-gate 		kmem_free(el, sizeof (struct memlist));
13777c478bd9Sstevel@tonic-gate 	}
1378*61ef38f7Svb70745 
13797c478bd9Sstevel@tonic-gate 	return (0);
13807c478bd9Sstevel@tonic-gate }
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate /*ARGSUSED*/
13837c478bd9Sstevel@tonic-gate static void
13847c478bd9Sstevel@tonic-gate memscrub_mem_config_post_add(
13857c478bd9Sstevel@tonic-gate 	void *arg,
13867c478bd9Sstevel@tonic-gate 	pgcnt_t delta_pages)
13877c478bd9Sstevel@tonic-gate {
13887c478bd9Sstevel@tonic-gate 	/*
13897c478bd9Sstevel@tonic-gate 	 * We increment pause_memscrub before entering new_memscrub(). This
13907c478bd9Sstevel@tonic-gate 	 * will force the memscrubber to sleep, allowing the DR callback
13917c478bd9Sstevel@tonic-gate 	 * thread to acquire memscrub_lock in new_memscrub(). The use of
13927c478bd9Sstevel@tonic-gate 	 * atomic_add_32() allows concurrent memory DR operations to use the
13937c478bd9Sstevel@tonic-gate 	 * callbacks safely.
13947c478bd9Sstevel@tonic-gate 	 */
13957c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, 1);
13967c478bd9Sstevel@tonic-gate 	ASSERT(pause_memscrub != 0);
13977c478bd9Sstevel@tonic-gate 
13987c478bd9Sstevel@tonic-gate 	/*
13997c478bd9Sstevel@tonic-gate 	 * "Don't care" if we are not scrubbing new memory.
14007c478bd9Sstevel@tonic-gate 	 */
1401*61ef38f7Svb70745 	(void) new_memscrub(0);		/* retain page retire list */
14027c478bd9Sstevel@tonic-gate 
14037c478bd9Sstevel@tonic-gate 	/* Restore the pause setting. */
14047c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, -1);
14057c478bd9Sstevel@tonic-gate }
14067c478bd9Sstevel@tonic-gate 
14077c478bd9Sstevel@tonic-gate /*ARGSUSED*/
14087c478bd9Sstevel@tonic-gate static int
14097c478bd9Sstevel@tonic-gate memscrub_mem_config_pre_del(
14107c478bd9Sstevel@tonic-gate 	void *arg,
14117c478bd9Sstevel@tonic-gate 	pgcnt_t delta_pages)
14127c478bd9Sstevel@tonic-gate {
14137c478bd9Sstevel@tonic-gate 	/* Nothing to do. */
14147c478bd9Sstevel@tonic-gate 	return (0);
14157c478bd9Sstevel@tonic-gate }
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate /*ARGSUSED*/
14187c478bd9Sstevel@tonic-gate static void
14197c478bd9Sstevel@tonic-gate memscrub_mem_config_post_del(
14207c478bd9Sstevel@tonic-gate 	void *arg,
14217c478bd9Sstevel@tonic-gate 	pgcnt_t delta_pages,
14227c478bd9Sstevel@tonic-gate 	int cancelled)
14237c478bd9Sstevel@tonic-gate {
14247c478bd9Sstevel@tonic-gate 	/*
14257c478bd9Sstevel@tonic-gate 	 * We increment pause_memscrub before entering new_memscrub(). This
14267c478bd9Sstevel@tonic-gate 	 * will force the memscrubber to sleep, allowing the DR callback
14277c478bd9Sstevel@tonic-gate 	 * thread to acquire memscrub_lock in new_memscrub(). The use of
14287c478bd9Sstevel@tonic-gate 	 * atomic_add_32() allows concurrent memory DR operations to use the
14297c478bd9Sstevel@tonic-gate 	 * callbacks safely.
14307c478bd9Sstevel@tonic-gate 	 */
14317c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, 1);
14327c478bd9Sstevel@tonic-gate 	ASSERT(pause_memscrub != 0);
14337c478bd9Sstevel@tonic-gate 
14347c478bd9Sstevel@tonic-gate 	/*
14357c478bd9Sstevel@tonic-gate 	 * Must stop scrubbing deleted memory as it may be disconnected.
14367c478bd9Sstevel@tonic-gate 	 */
1437*61ef38f7Svb70745 	if (new_memscrub(1)) {	/* update page retire list */
14387c478bd9Sstevel@tonic-gate 		disable_memscrub = 1;
14397c478bd9Sstevel@tonic-gate 	}
14407c478bd9Sstevel@tonic-gate 
14417c478bd9Sstevel@tonic-gate 	/* Restore the pause setting. */
14427c478bd9Sstevel@tonic-gate 	atomic_add_32(&pause_memscrub, -1);
14437c478bd9Sstevel@tonic-gate }
14447c478bd9Sstevel@tonic-gate 
14457c478bd9Sstevel@tonic-gate static kphysm_setup_vector_t memscrub_mem_config_vec = {
14467c478bd9Sstevel@tonic-gate 	KPHYSM_SETUP_VECTOR_VERSION,
14477c478bd9Sstevel@tonic-gate 	memscrub_mem_config_post_add,
14487c478bd9Sstevel@tonic-gate 	memscrub_mem_config_pre_del,
14497c478bd9Sstevel@tonic-gate 	memscrub_mem_config_post_del,
14507c478bd9Sstevel@tonic-gate };
14517c478bd9Sstevel@tonic-gate 
14527c478bd9Sstevel@tonic-gate static void
14537c478bd9Sstevel@tonic-gate memscrub_init_mem_config()
14547c478bd9Sstevel@tonic-gate {
14557c478bd9Sstevel@tonic-gate 	int ret;
14567c478bd9Sstevel@tonic-gate 
14577c478bd9Sstevel@tonic-gate 	ret = kphysm_setup_func_register(&memscrub_mem_config_vec,
14587c478bd9Sstevel@tonic-gate 	    (void *)NULL);
14597c478bd9Sstevel@tonic-gate 	ASSERT(ret == 0);
14607c478bd9Sstevel@tonic-gate }
14617c478bd9Sstevel@tonic-gate 
14627c478bd9Sstevel@tonic-gate static void
14637c478bd9Sstevel@tonic-gate memscrub_uninit_mem_config()
14647c478bd9Sstevel@tonic-gate {
14657c478bd9Sstevel@tonic-gate 	/* This call is OK if the register call was not done. */
14667c478bd9Sstevel@tonic-gate 	kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL);
14677c478bd9Sstevel@tonic-gate }
1468