xref: /titanic_53/usr/src/uts/i86pc/os/memscrub.c (revision 2df1fe9ca32bb227b9158c67f5c00b54c20b10fd)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217aec1d6eScindi 
227c478bd9Sstevel@tonic-gate /*
23ae115bc7Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * i86pc Memory Scrubbing
317c478bd9Sstevel@tonic-gate  *
327c478bd9Sstevel@tonic-gate  * On detection of a correctable memory ECC error, the i86pc hardware
337c478bd9Sstevel@tonic-gate  * returns the corrected data to the requester and may re-write it
347c478bd9Sstevel@tonic-gate  * to memory (DRAM or NVRAM). Machines which do not re-write this to
357c478bd9Sstevel@tonic-gate  * memory should add an NMI handler to correct and rewrite.
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * Scrubbing thus reduces the likelyhood that multiple transient errors
387c478bd9Sstevel@tonic-gate  * will occur in the same memory word, making uncorrectable errors due
397c478bd9Sstevel@tonic-gate  * to transients less likely.
407c478bd9Sstevel@tonic-gate  *
417c478bd9Sstevel@tonic-gate  * Thus is born the desire that every memory location be periodically
427c478bd9Sstevel@tonic-gate  * accessed.
437c478bd9Sstevel@tonic-gate  *
447c478bd9Sstevel@tonic-gate  * This file implements a memory scrubbing thread.  This scrubber
457c478bd9Sstevel@tonic-gate  * guarantees that all of physical memory is accessed periodically
467c478bd9Sstevel@tonic-gate  * (memscrub_period_sec -- 12 hours).
477c478bd9Sstevel@tonic-gate  *
487c478bd9Sstevel@tonic-gate  * It attempts to do this as unobtrusively as possible.  The thread
497c478bd9Sstevel@tonic-gate  * schedules itself to wake up at an interval such that if it reads
507c478bd9Sstevel@tonic-gate  * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
517c478bd9Sstevel@tonic-gate  * memory in in memscrub_period_sec (12 hours).
527c478bd9Sstevel@tonic-gate  *
537c478bd9Sstevel@tonic-gate  * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
547c478bd9Sstevel@tonic-gate  * When it completes a span, if all the CPUs are idle, it reads another span.
557c478bd9Sstevel@tonic-gate  * Typically it soaks up idle time this way to reach its deadline early
567c478bd9Sstevel@tonic-gate  * -- and sleeps until the next period begins.
577c478bd9Sstevel@tonic-gate  *
587c478bd9Sstevel@tonic-gate  * Maximal Cost Estimate:  8GB @ xxMB/s = xxx seconds spent in 640 wakeups
597c478bd9Sstevel@tonic-gate  * that run for 0.15 seconds at intervals of 67 seconds.
607c478bd9Sstevel@tonic-gate  *
617c478bd9Sstevel@tonic-gate  * In practice, the scrubber finds enough idle time to finish in a few
627c478bd9Sstevel@tonic-gate  * minutes, and sleeps until its 12 hour deadline.
637c478bd9Sstevel@tonic-gate  *
647c478bd9Sstevel@tonic-gate  * The scrubber maintains a private copy of the phys_install memory list
657c478bd9Sstevel@tonic-gate  * to keep track of what memory should be scrubbed.
667c478bd9Sstevel@tonic-gate  *
677c478bd9Sstevel@tonic-gate  * The following parameters can be set via /etc/system
687c478bd9Sstevel@tonic-gate  *
697c478bd9Sstevel@tonic-gate  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
707c478bd9Sstevel@tonic-gate  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
717c478bd9Sstevel@tonic-gate  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
727c478bd9Sstevel@tonic-gate  * memscrub_delay_start_sec = (10 seconds)
737c478bd9Sstevel@tonic-gate  * disable_memscrub = (0)
747c478bd9Sstevel@tonic-gate  *
757c478bd9Sstevel@tonic-gate  * the scrubber will exit (or never be started) if it finds the variable
767c478bd9Sstevel@tonic-gate  * "disable_memscrub" set.
777c478bd9Sstevel@tonic-gate  *
787c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_SPAN_PAGES  is based on the guess that 0.15 sec
797c478bd9Sstevel@tonic-gate  * is a "good" amount of minimum time for the thread to run at a time.
807c478bd9Sstevel@tonic-gate  *
817c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
827c478bd9Sstevel@tonic-gate  * twice the frequency the hardware folk estimated would be necessary.
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
857c478bd9Sstevel@tonic-gate  * any other use of the system should be higher priority than scrubbing.
867c478bd9Sstevel@tonic-gate  */
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate #include <sys/types.h>
897c478bd9Sstevel@tonic-gate #include <sys/systm.h>		/* timeout, types, t_lock */
907c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
917c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>	/* MIN */
927c478bd9Sstevel@tonic-gate #include <sys/memlist.h>	/* memlist */
937c478bd9Sstevel@tonic-gate #include <sys/kmem.h>		/* KMEM_NOSLEEP */
947c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>		/* ncpus_online */
957c478bd9Sstevel@tonic-gate #include <sys/debug.h>		/* ASSERTs */
967c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
977c478bd9Sstevel@tonic-gate #include <sys/mman.h>
987c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
997c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
1007c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
101*2df1fe9cSrandyf #include <sys/callb.h>		/* CPR callback */
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate static caddr_t	memscrub_window;
104ae115bc7Smrj static hat_mempte_t memscrub_pte;
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate /*
1077c478bd9Sstevel@tonic-gate  * Global Data:
1087c478bd9Sstevel@tonic-gate  */
1097c478bd9Sstevel@tonic-gate /*
1107c478bd9Sstevel@tonic-gate  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
1117c478bd9Sstevel@tonic-gate  */
1127c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate /*
1157c478bd9Sstevel@tonic-gate  * start only if at least MEMSCRUB_MIN_PAGES in system
1167c478bd9Sstevel@tonic-gate  */
1177c478bd9Sstevel@tonic-gate #define	MEMSCRUB_MIN_PAGES	((32 * 1024 * 1024) / PAGESIZE)
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate /*
1207c478bd9Sstevel@tonic-gate  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
1217c478bd9Sstevel@tonic-gate  */
1227c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_SPAN_PAGES	((4 * 1024 * 1024) / PAGESIZE)
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate /*
1257c478bd9Sstevel@tonic-gate  * almost anything is higher priority than scrubbing
1267c478bd9Sstevel@tonic-gate  */
1277c478bd9Sstevel@tonic-gate #define	MEMSCRUB_DFL_THREAD_PRI	0
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate /*
1307c478bd9Sstevel@tonic-gate  * we can patch these defaults in /etc/system if necessary
1317c478bd9Sstevel@tonic-gate  */
1327c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0;
1337aec1d6eScindi static uint_t disable_memscrub_quietly = 0;
1347c478bd9Sstevel@tonic-gate pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES;
1357c478bd9Sstevel@tonic-gate pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
1367c478bd9Sstevel@tonic-gate time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
1377c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
1387c478bd9Sstevel@tonic-gate time_t memscrub_delay_start_sec = 10;
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate /*
1417c478bd9Sstevel@tonic-gate  * Static Routines
1427c478bd9Sstevel@tonic-gate  */
1437c478bd9Sstevel@tonic-gate static void memscrubber(void);
1447c478bd9Sstevel@tonic-gate static int system_is_idle(void);
1457c478bd9Sstevel@tonic-gate static int memscrub_add_span(uint64_t, uint64_t);
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate /*
1487c478bd9Sstevel@tonic-gate  * Static Data
1497c478bd9Sstevel@tonic-gate  */
1507c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist;
1517c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages;
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv;
1547c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock;
1557aec1d6eScindi 
1567c478bd9Sstevel@tonic-gate /*
1577c478bd9Sstevel@tonic-gate  * memscrub_lock protects memscrub_memlist
1587c478bd9Sstevel@tonic-gate  */
1597c478bd9Sstevel@tonic-gate uint_t memscrub_scans_done;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate uint_t memscrub_done_early;
1627c478bd9Sstevel@tonic-gate uint_t memscrub_early_sec;
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate uint_t memscrub_done_late;
1657c478bd9Sstevel@tonic-gate time_t memscrub_late_sec;
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate /*
1687c478bd9Sstevel@tonic-gate  * create memscrub_memlist from phys_install list
1697c478bd9Sstevel@tonic-gate  * initialize locks, set memscrub_phys_pages.
1707c478bd9Sstevel@tonic-gate  */
1717c478bd9Sstevel@tonic-gate void
1727c478bd9Sstevel@tonic-gate memscrub_init()
1737c478bd9Sstevel@tonic-gate {
1747c478bd9Sstevel@tonic-gate 	struct memlist *src;
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	if (physmem < memscrub_min_pages)
1777c478bd9Sstevel@tonic-gate 		return;
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 	if (!kpm_enable) {
1807c478bd9Sstevel@tonic-gate 		memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
1817c478bd9Sstevel@tonic-gate 		memscrub_pte = hat_mempte_setup(memscrub_window);
1827c478bd9Sstevel@tonic-gate 	}
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate 	/*
1857c478bd9Sstevel@tonic-gate 	 * copy phys_install to memscrub_memlist
1867c478bd9Sstevel@tonic-gate 	 */
1877c478bd9Sstevel@tonic-gate 	for (src = phys_install; src; src = src->next) {
1887c478bd9Sstevel@tonic-gate 		if (memscrub_add_span(src->address, src->size)) {
1897c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
1907c478bd9Sstevel@tonic-gate 			    "Memory scrubber failed to initialize\n");
1917c478bd9Sstevel@tonic-gate 			return;
1927c478bd9Sstevel@tonic-gate 		}
1937c478bd9Sstevel@tonic-gate 	}
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
1967c478bd9Sstevel@tonic-gate 	cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	/*
1997c478bd9Sstevel@tonic-gate 	 * create memscrubber thread
2007c478bd9Sstevel@tonic-gate 	 */
2017c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0,
2027c478bd9Sstevel@tonic-gate 	    TS_RUN, memscrub_thread_pri);
2037c478bd9Sstevel@tonic-gate }
2047c478bd9Sstevel@tonic-gate 
2057aec1d6eScindi /*
2067aec1d6eScindi  * Function to cause the software memscrubber to exit quietly if the
2077aec1d6eScindi  * platform support has located a hardware scrubber and enabled it.
2087aec1d6eScindi  */
2097aec1d6eScindi void
2107aec1d6eScindi memscrub_disable(void)
2117aec1d6eScindi {
2127aec1d6eScindi 	disable_memscrub_quietly = 1;
2137aec1d6eScindi }
2147aec1d6eScindi 
2157c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
2167c478bd9Sstevel@tonic-gate void
2177c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp)
2187c478bd9Sstevel@tonic-gate {
2197c478bd9Sstevel@tonic-gate 	struct memlist *list;
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "%s:\n", title);
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate 	for (list = listp; list; list = list->next) {
2247c478bd9Sstevel@tonic-gate 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
2257c478bd9Sstevel@tonic-gate 		    list->address, list->size);
2267c478bd9Sstevel@tonic-gate 	}
2277c478bd9Sstevel@tonic-gate }
2287c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate /* ARGSUSED */
2317c478bd9Sstevel@tonic-gate void
2327c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c)
2337c478bd9Sstevel@tonic-gate {
2347c478bd9Sstevel@tonic-gate 	/*
2357c478bd9Sstevel@tonic-gate 	 * grab mutex to guarantee that our wakeup call
2367c478bd9Sstevel@tonic-gate 	 * arrives after we go to sleep -- so we can't sleep forever.
2377c478bd9Sstevel@tonic-gate 	 */
2387c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
2397c478bd9Sstevel@tonic-gate 	cv_signal(&memscrub_cv);
2407c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
2417c478bd9Sstevel@tonic-gate }
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate /*
2447c478bd9Sstevel@tonic-gate  * this calculation doesn't account for the time that the actual scan
2457c478bd9Sstevel@tonic-gate  * consumes -- so we'd fall slightly behind schedule with this
2467c478bd9Sstevel@tonic-gate  * interval_sec.  but the idle loop optimization below usually makes us
2477c478bd9Sstevel@tonic-gate  * come in way ahead of schedule.
2487c478bd9Sstevel@tonic-gate  */
2497c478bd9Sstevel@tonic-gate static int
2507c478bd9Sstevel@tonic-gate compute_interval_sec()
2517c478bd9Sstevel@tonic-gate {
2527c478bd9Sstevel@tonic-gate 	if (memscrub_phys_pages <= memscrub_span_pages)
2537c478bd9Sstevel@tonic-gate 		return (memscrub_period_sec);
2547c478bd9Sstevel@tonic-gate 	else
2557c478bd9Sstevel@tonic-gate 		return (memscrub_period_sec/
2567c478bd9Sstevel@tonic-gate 		    (memscrub_phys_pages/memscrub_span_pages));
2577c478bd9Sstevel@tonic-gate }
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate void
2607c478bd9Sstevel@tonic-gate memscrubber()
2617c478bd9Sstevel@tonic-gate {
2627c478bd9Sstevel@tonic-gate 	time_t deadline;
2637c478bd9Sstevel@tonic-gate 	uint64_t mlp_last_addr;
2647c478bd9Sstevel@tonic-gate 	uint64_t mlp_next_addr;
2657c478bd9Sstevel@tonic-gate 	int reached_end = 1;
2667c478bd9Sstevel@tonic-gate 	time_t interval_sec = 0;
2677c478bd9Sstevel@tonic-gate 	struct memlist *mlp;
2687c478bd9Sstevel@tonic-gate 
2697c478bd9Sstevel@tonic-gate 	extern void scan_memory(caddr_t, size_t);
270*2df1fe9cSrandyf 	callb_cpr_t cprinfo;
271*2df1fe9cSrandyf 
272*2df1fe9cSrandyf 	/*
273*2df1fe9cSrandyf 	 * notify CPR of our existence
274*2df1fe9cSrandyf 	 */
275*2df1fe9cSrandyf 	CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 	if (memscrub_memlist == NULL) {
2787c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
2797c478bd9Sstevel@tonic-gate 		goto memscrub_exit;
2807c478bd9Sstevel@tonic-gate 	}
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate 	mlp = memscrub_memlist;
2837c478bd9Sstevel@tonic-gate 	mlp_next_addr = mlp->address;
2847c478bd9Sstevel@tonic-gate 	mlp_last_addr = mlp->address + mlp->size;
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 	for (;;) {
2897aec1d6eScindi 		if (disable_memscrub || disable_memscrub_quietly)
2907c478bd9Sstevel@tonic-gate 			break;
2917c478bd9Sstevel@tonic-gate 
2927c478bd9Sstevel@tonic-gate 		mutex_enter(&memscrub_lock);
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate 		/*
2957c478bd9Sstevel@tonic-gate 		 * did we just reach the end of memory?
2967c478bd9Sstevel@tonic-gate 		 */
2977c478bd9Sstevel@tonic-gate 		if (reached_end) {
2987c478bd9Sstevel@tonic-gate 			time_t now = gethrestime_sec();
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 			if (now >= deadline) {
3017c478bd9Sstevel@tonic-gate 				memscrub_done_late++;
3027c478bd9Sstevel@tonic-gate 				memscrub_late_sec += (now - deadline);
3037c478bd9Sstevel@tonic-gate 				/*
3047c478bd9Sstevel@tonic-gate 				 * past deadline, start right away
3057c478bd9Sstevel@tonic-gate 				 */
3067c478bd9Sstevel@tonic-gate 				interval_sec = 0;
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 				deadline = now + memscrub_period_sec;
3097c478bd9Sstevel@tonic-gate 			} else {
3107c478bd9Sstevel@tonic-gate 				/*
3117c478bd9Sstevel@tonic-gate 				 * we finished ahead of schedule.
3127c478bd9Sstevel@tonic-gate 				 * wait till previous dealine before re-start.
3137c478bd9Sstevel@tonic-gate 				 */
3147c478bd9Sstevel@tonic-gate 				interval_sec = deadline - now;
3157c478bd9Sstevel@tonic-gate 				memscrub_done_early++;
3167c478bd9Sstevel@tonic-gate 				memscrub_early_sec += interval_sec;
3177c478bd9Sstevel@tonic-gate 				deadline += memscrub_period_sec;
3187c478bd9Sstevel@tonic-gate 			}
3197c478bd9Sstevel@tonic-gate 		} else {
3207c478bd9Sstevel@tonic-gate 			interval_sec = compute_interval_sec();
3217c478bd9Sstevel@tonic-gate 		}
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 		/*
324*2df1fe9cSrandyf 		 * it is safe from our standpoint for CPR to
325*2df1fe9cSrandyf 		 * suspend the system
326*2df1fe9cSrandyf 		 */
327*2df1fe9cSrandyf 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
328*2df1fe9cSrandyf 
329*2df1fe9cSrandyf 		/*
3307c478bd9Sstevel@tonic-gate 		 * hit the snooze bar
3317c478bd9Sstevel@tonic-gate 		 */
3327c478bd9Sstevel@tonic-gate 		(void) timeout(memscrub_wakeup, NULL, interval_sec * hz);
3337c478bd9Sstevel@tonic-gate 
3347c478bd9Sstevel@tonic-gate 		/*
3357c478bd9Sstevel@tonic-gate 		 * go to sleep
3367c478bd9Sstevel@tonic-gate 		 */
3377c478bd9Sstevel@tonic-gate 		cv_wait(&memscrub_cv, &memscrub_lock);
3387c478bd9Sstevel@tonic-gate 
339*2df1fe9cSrandyf 		/* we need to goto work */
340*2df1fe9cSrandyf 		CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
341*2df1fe9cSrandyf 
3427c478bd9Sstevel@tonic-gate 		mutex_exit(&memscrub_lock);
3437c478bd9Sstevel@tonic-gate 
3447c478bd9Sstevel@tonic-gate 		do {
3457c478bd9Sstevel@tonic-gate 			pgcnt_t pages = memscrub_span_pages;
3467c478bd9Sstevel@tonic-gate 			uint64_t address = mlp_next_addr;
3477c478bd9Sstevel@tonic-gate 
3487aec1d6eScindi 			if (disable_memscrub || disable_memscrub_quietly)
3497c478bd9Sstevel@tonic-gate 				break;
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate 			mutex_enter(&memscrub_lock);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 			/*
3547c478bd9Sstevel@tonic-gate 			 * Make sure we don't try to scan beyond the end of
3557c478bd9Sstevel@tonic-gate 			 * the current memlist.  If we would, then resize
3567c478bd9Sstevel@tonic-gate 			 * our scan target for this iteration, and prepare
3577c478bd9Sstevel@tonic-gate 			 * to read the next memlist entry on the next
3587c478bd9Sstevel@tonic-gate 			 * iteration.
3597c478bd9Sstevel@tonic-gate 			 */
3607c478bd9Sstevel@tonic-gate 			reached_end = 0;
3617c478bd9Sstevel@tonic-gate 			if (address + mmu_ptob(pages) >= mlp_last_addr) {
3627c478bd9Sstevel@tonic-gate 				pages = mmu_btop(mlp_last_addr - address);
3637c478bd9Sstevel@tonic-gate 				mlp = mlp->next;
3647c478bd9Sstevel@tonic-gate 				if (mlp == NULL) {
3657c478bd9Sstevel@tonic-gate 					reached_end = 1;
3667c478bd9Sstevel@tonic-gate 					mlp = memscrub_memlist;
3677c478bd9Sstevel@tonic-gate 				}
3687c478bd9Sstevel@tonic-gate 				mlp_next_addr = mlp->address;
3697c478bd9Sstevel@tonic-gate 				mlp_last_addr = mlp->address + mlp->size;
3707c478bd9Sstevel@tonic-gate 			} else {
3717c478bd9Sstevel@tonic-gate 				mlp_next_addr += mmu_ptob(pages);
3727c478bd9Sstevel@tonic-gate 			}
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 			mutex_exit(&memscrub_lock);
3757c478bd9Sstevel@tonic-gate 
3767c478bd9Sstevel@tonic-gate 			while (pages--) {
3777c478bd9Sstevel@tonic-gate 				pfn_t pfn = btop(address);
3787c478bd9Sstevel@tonic-gate 
3797c478bd9Sstevel@tonic-gate 				/*
3807c478bd9Sstevel@tonic-gate 				 * Without segkpm, the memscrubber cannot
3817c478bd9Sstevel@tonic-gate 				 * be allowed to migrate across CPUs, as
3827c478bd9Sstevel@tonic-gate 				 * the CPU-specific mapping of
3837c478bd9Sstevel@tonic-gate 				 * memscrub_window would be incorrect.
3847c478bd9Sstevel@tonic-gate 				 * With segkpm, switching CPUs is legal, but
3857c478bd9Sstevel@tonic-gate 				 * inefficient.  We don't use
3867c478bd9Sstevel@tonic-gate 				 * kpreempt_disable as it might hold a
3877c478bd9Sstevel@tonic-gate 				 * higher priority thread (eg, RT) too long
3887c478bd9Sstevel@tonic-gate 				 * off CPU.
3897c478bd9Sstevel@tonic-gate 				 */
3907c478bd9Sstevel@tonic-gate 				thread_affinity_set(curthread, CPU_CURRENT);
3917c478bd9Sstevel@tonic-gate 				if (kpm_enable)
3927c478bd9Sstevel@tonic-gate 					memscrub_window = hat_kpm_pfn2va(pfn);
3937c478bd9Sstevel@tonic-gate 				else
3947c478bd9Sstevel@tonic-gate 					hat_mempte_remap(pfn, memscrub_window,
3957c478bd9Sstevel@tonic-gate 					    memscrub_pte,
3967c478bd9Sstevel@tonic-gate 					    PROT_READ, HAT_LOAD_NOCONSIST);
3977c478bd9Sstevel@tonic-gate 
3987c478bd9Sstevel@tonic-gate 				scan_memory(memscrub_window, PAGESIZE);
3997c478bd9Sstevel@tonic-gate 
4007c478bd9Sstevel@tonic-gate 				thread_affinity_clear(curthread);
4017c478bd9Sstevel@tonic-gate 				address += MMU_PAGESIZE;
4027c478bd9Sstevel@tonic-gate 			}
4037c478bd9Sstevel@tonic-gate 
4047c478bd9Sstevel@tonic-gate 			memscrub_scans_done++;
4057c478bd9Sstevel@tonic-gate 		} while (!reached_end && system_is_idle());
4067c478bd9Sstevel@tonic-gate 	}
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate memscrub_exit:
4097c478bd9Sstevel@tonic-gate 
4107aec1d6eScindi 	if (!disable_memscrub_quietly)
4117c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "memory scrubber exiting.");
412*2df1fe9cSrandyf 	/*
413*2df1fe9cSrandyf 	 * We are about to bail, but don't have the memscrub_lock,
414*2df1fe9cSrandyf 	 * and it is needed for CALLB_CPR_EXIT.
415*2df1fe9cSrandyf 	 */
416*2df1fe9cSrandyf 	mutex_enter(&memscrub_lock);
417*2df1fe9cSrandyf 	CALLB_CPR_EXIT(&cprinfo);
4187c478bd9Sstevel@tonic-gate 
4197c478bd9Sstevel@tonic-gate 	cv_destroy(&memscrub_cv);
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 	thread_exit();
4227c478bd9Sstevel@tonic-gate }
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate /*
4267c478bd9Sstevel@tonic-gate  * return 1 if we're MP and all the other CPUs are idle
4277c478bd9Sstevel@tonic-gate  */
4287c478bd9Sstevel@tonic-gate static int
4297c478bd9Sstevel@tonic-gate system_is_idle()
4307c478bd9Sstevel@tonic-gate {
4317c478bd9Sstevel@tonic-gate 	int cpu_id;
4327c478bd9Sstevel@tonic-gate 	int found = 0;
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 	if (1 == ncpus_online)
4357c478bd9Sstevel@tonic-gate 		return (0);
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) {
4387c478bd9Sstevel@tonic-gate 		if (!cpu[cpu_id])
4397c478bd9Sstevel@tonic-gate 			continue;
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 		found++;
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 		if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) {
4447c478bd9Sstevel@tonic-gate 			if (CPU->cpu_id == cpu_id &&
4457c478bd9Sstevel@tonic-gate 			    CPU->cpu_disp->disp_nrunnable == 0)
4467c478bd9Sstevel@tonic-gate 				continue;
4477c478bd9Sstevel@tonic-gate 			return (0);
4487c478bd9Sstevel@tonic-gate 		}
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 		if (found == ncpus)
4517c478bd9Sstevel@tonic-gate 			break;
4527c478bd9Sstevel@tonic-gate 	}
4537c478bd9Sstevel@tonic-gate 	return (1);
4547c478bd9Sstevel@tonic-gate }
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate /*
4577c478bd9Sstevel@tonic-gate  * add a span to the memscrub list
4587c478bd9Sstevel@tonic-gate  */
4597c478bd9Sstevel@tonic-gate static int
4607c478bd9Sstevel@tonic-gate memscrub_add_span(uint64_t start, uint64_t bytes)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate 	struct memlist *dst;
4637c478bd9Sstevel@tonic-gate 	struct memlist *prev, *next;
4647c478bd9Sstevel@tonic-gate 	uint64_t end = start + bytes - 1;
4657c478bd9Sstevel@tonic-gate 	int retval = 0;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 	mutex_enter(&memscrub_lock);
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
4707c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
4717c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
4727c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
4737c478bd9Sstevel@tonic-gate 	    " size: 0x%llx\n", start, bytes);
4747c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 	/*
4777c478bd9Sstevel@tonic-gate 	 * Scan through the list to find the proper place to install it.
4787c478bd9Sstevel@tonic-gate 	 */
4797c478bd9Sstevel@tonic-gate 	prev = NULL;
4807c478bd9Sstevel@tonic-gate 	next = memscrub_memlist;
4817c478bd9Sstevel@tonic-gate 	while (next) {
4827c478bd9Sstevel@tonic-gate 		uint64_t ns = next->address;
4837c478bd9Sstevel@tonic-gate 		uint64_t ne = next->address + next->size - 1;
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 		/*
4867c478bd9Sstevel@tonic-gate 		 * If this span overlaps with an existing span, then
4877c478bd9Sstevel@tonic-gate 		 * something has gone horribly wrong with the phys_install
4887c478bd9Sstevel@tonic-gate 		 * list.  In fact, I'm surprised we made it this far.
4897c478bd9Sstevel@tonic-gate 		 */
4907c478bd9Sstevel@tonic-gate 		if ((start >= ns && start <= ne) || (end >= ns && end <= ne) ||
4917c478bd9Sstevel@tonic-gate 		    (start < ns && end > ne))
4927c478bd9Sstevel@tonic-gate 			panic("memscrub found overlapping memory ranges "
4937c478bd9Sstevel@tonic-gate 			    "(0x%p-0x%p) and (0x%p-0x%p)",
4943dfcd6dcSdmick 			    (void *)(uintptr_t)start, (void *)(uintptr_t)end,
4953dfcd6dcSdmick 			    (void *)(uintptr_t)ns, (void *)(uintptr_t)ne);
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 		/*
4987c478bd9Sstevel@tonic-gate 		 * New span can be appended to an existing one.
4997c478bd9Sstevel@tonic-gate 		 */
5007c478bd9Sstevel@tonic-gate 		if (start == ne + 1) {
5017c478bd9Sstevel@tonic-gate 			next->size += bytes;
5027c478bd9Sstevel@tonic-gate 			goto add_done;
5037c478bd9Sstevel@tonic-gate 		}
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 		/*
5067c478bd9Sstevel@tonic-gate 		 * New span can be prepended to an existing one.
5077c478bd9Sstevel@tonic-gate 		 */
5087c478bd9Sstevel@tonic-gate 		if (end + 1 == ns) {
5097c478bd9Sstevel@tonic-gate 			next->size += bytes;
5107c478bd9Sstevel@tonic-gate 			next->address = start;
5117c478bd9Sstevel@tonic-gate 			goto add_done;
5127c478bd9Sstevel@tonic-gate 		}
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 		/*
5157c478bd9Sstevel@tonic-gate 		 * If the next span has a higher start address than the new
5167c478bd9Sstevel@tonic-gate 		 * one, then we have found the right spot for our
5177c478bd9Sstevel@tonic-gate 		 * insertion.
5187c478bd9Sstevel@tonic-gate 		 */
5197c478bd9Sstevel@tonic-gate 		if (ns > start)
5207c478bd9Sstevel@tonic-gate 			break;
5217c478bd9Sstevel@tonic-gate 
5227c478bd9Sstevel@tonic-gate 		prev = next;
5237c478bd9Sstevel@tonic-gate 		next = next->next;
5247c478bd9Sstevel@tonic-gate 	}
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate 	/*
5277c478bd9Sstevel@tonic-gate 	 * allocate a new struct memlist
5287c478bd9Sstevel@tonic-gate 	 */
5297c478bd9Sstevel@tonic-gate 	dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
5307c478bd9Sstevel@tonic-gate 	if (dst == NULL) {
5317c478bd9Sstevel@tonic-gate 		retval = -1;
5327c478bd9Sstevel@tonic-gate 		goto add_done;
5337c478bd9Sstevel@tonic-gate 	}
5347c478bd9Sstevel@tonic-gate 	dst->address = start;
5357c478bd9Sstevel@tonic-gate 	dst->size = bytes;
5367c478bd9Sstevel@tonic-gate 	dst->prev = prev;
5377c478bd9Sstevel@tonic-gate 	dst->next = next;
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	if (prev)
5407c478bd9Sstevel@tonic-gate 		prev->next = dst;
5417c478bd9Sstevel@tonic-gate 	else
5427c478bd9Sstevel@tonic-gate 		memscrub_memlist = dst;
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate 	if (next)
5457c478bd9Sstevel@tonic-gate 		next->prev = dst;
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate add_done:
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate 	if (retval != -1)
5507c478bd9Sstevel@tonic-gate 		memscrub_phys_pages += mmu_btop(bytes);
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG
5537c478bd9Sstevel@tonic-gate 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
5547c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
5557c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 	mutex_exit(&memscrub_lock);
5587c478bd9Sstevel@tonic-gate 	return (retval);
5597c478bd9Sstevel@tonic-gate }
560