xref: /titanic_44/usr/src/uts/sun4u/os/memscrub.c (revision 4496171313bed39e96f21bc2f9faf2868e267ae3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sun4u Memory Scrubbing
30  *
31  * On detection of a correctable memory ECC error, the sun4u kernel
32  * returns the corrected data to the requester and re-writes it
33  * to memory (DRAM).  So if the correctable error was transient,
34  * the read has effectively been cleaned (scrubbed) from memory.
35  *
36  * Scrubbing thus reduces the likelyhood that multiple transient errors
37  * will occur in the same memory word, making uncorrectable errors due
38  * to transients less likely.
39  *
40  * Thus is born the desire that every memory location be periodically
41  * accessed.
42  *
43  * This file implements a memory scrubbing thread.  This scrubber
44  * guarantees that all of physical memory is accessed periodically
45  * (memscrub_period_sec -- 12 hours).
46  *
47  * It attempts to do this as unobtrusively as possible.  The thread
48  * schedules itself to wake up at an interval such that if it reads
49  * memscrub_span_pages (8MB) on each wakeup, it will read all of physical
50  * memory in in memscrub_period_sec (12 hours).
51  *
52  * The scrubber uses the block load hardware to read memory @ 268MB/s,
53  * so it reads spans of 8MB in 0.03 seconds.  Unlike the original sun4d
54  * scrubber the sun4u scrubber does not read ahead if the system is idle
55  * because we can read memory very efficently.
56  *
57  * The scrubber maintains a private copy of the phys_install memory list
58  * to keep track of what memory should be scrubbed.
59  *
60  * The global routines memscrub_add_span() and memscrub_delete_span() are
61  * used to add and delete from this list.  If hotplug memory is later
62  * supported these two routines can be used to notify the scrubber of
63  * memory configuration changes.
64  *
65  * The following parameters can be set via /etc/system
66  *
67  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
68  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
69  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
70  * memscrub_delay_start_sec = (5 minutes)
71  * memscrub_verbose = (0)
72  * memscrub_override_ticks = (1 tick)
73  * disable_memscrub = (0)
74  * pause_memscrub = (0)
75  * read_all_memscrub = (0)
76  *
77  * The scrubber will print NOTICE messages of what it is doing if
78  * "memscrub_verbose" is set.
79  *
80  * If the scrubber's sleep time calculation drops to zero ticks,
81  * memscrub_override_ticks will be used as the sleep time instead. The
82  * sleep time should only drop to zero on a system with over 32.95
83  * terabytes of memory, or where the default scrubber parameters have
84  * been adjusted. For example, reducing memscrub_span_pages or
85  * memscrub_period_sec causes the sleep time to drop to zero with less
86  * memory. Note that since the sleep time is calculated in clock ticks,
87  * using hires clock ticks allows for more memory before the sleep time
88  * becomes zero.
89  *
90  * The scrubber will exit (or never be started) if it finds the variable
91  * "disable_memscrub" set.
92  *
93  * The scrubber will pause (not read memory) when "pause_memscrub"
94  * is set.  It will check the state of pause_memscrub at each wakeup
95  * period.  The scrubber will not make up for lost time.  If you
96  * pause the scrubber for a prolonged period of time you can use
97  * the "read_all_memscrub" switch (see below) to catch up. In addition,
98  * pause_memscrub is used internally by the post memory DR callbacks.
99  * It is set for the small period of time during which the callbacks
100  * are executing. This ensures "memscrub_lock" will be released,
101  * allowing the callbacks to finish.
102  *
103  * The scrubber will read all memory if "read_all_memscrub" is set.
104  * The normal span read will also occur during the wakeup.
105  *
106  * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
107  * must have before we'll start the scrubber.
108  *
109  * MEMSCRUB_DFL_SPAN_PAGES (8MB) is based on the guess that 0.03 sec
110  * is a "good" amount of minimum time for the thread to run at a time.
111  *
112  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
113  * twice the frequency the hardware folk estimated would be necessary.
114  *
115  * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
116  * that the scurbber should get its fair share of time (since it
117  * is short).  At a priority of 0 the scrubber will be starved.
118  */
119 
120 #include <sys/systm.h>		/* timeout, types, t_lock */
121 #include <sys/cmn_err.h>
122 #include <sys/sysmacros.h>	/* MIN */
123 #include <sys/memlist.h>	/* memlist */
124 #include <sys/mem_config.h>	/* memory add/delete */
125 #include <sys/kmem.h>		/* KMEM_NOSLEEP */
126 #include <sys/cpuvar.h>		/* ncpus_online */
127 #include <sys/debug.h>		/* ASSERTs */
128 #include <sys/machsystm.h>	/* lddphys */
129 #include <sys/cpu_module.h>	/* vtag_flushpage */
130 #include <sys/kstat.h>
131 #include <sys/atomic.h>		/* atomic_add_32 */
132 
133 #include <vm/hat.h>
134 #include <vm/seg_kmem.h>
135 #include <vm/hat_sfmmu.h>	/* XXX FIXME - delete */
136 
137 #include <sys/time.h>
138 #include <sys/callb.h>		/* CPR callback */
139 #include <sys/ontrap.h>
140 
141 /*
142  * Should really have paddr_t defined, but it is broken.  Use
143  * ms_paddr_t in the meantime to make the code cleaner
144  */
145 typedef uint64_t ms_paddr_t;
146 
147 /*
148  * Global Routines:
149  */
150 int memscrub_add_span(pfn_t pfn, pgcnt_t pages);
151 int memscrub_delete_span(pfn_t pfn, pgcnt_t pages);
152 int memscrub_init(void);
153 void memscrub_induced_error(void);
154 
155 /*
156  * Global Data:
157  */
158 
159 /*
160  * scrub if we have at least this many pages
161  */
162 #define	MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE)
163 
164 /*
165  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
166  */
167 #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
168 
169 /*
170  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
171  */
172 #define	MEMSCRUB_DFL_SPAN_PAGES	((8 * 1024 * 1024) / PAGESIZE)
173 
174 /*
175  * almost anything is higher priority than scrubbing
176  */
177 #define	MEMSCRUB_DFL_THREAD_PRI	MINCLSYSPRI
178 
179 /*
180  * size used when scanning memory
181  */
182 #define	MEMSCRUB_BLOCK_SIZE		256
183 #define	MEMSCRUB_BLOCK_SIZE_SHIFT	8 	/* log2(MEMSCRUB_BLOCK_SIZE) */
184 #define	MEMSCRUB_BLOCKS_PER_PAGE	(PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT)
185 
186 #define	MEMSCRUB_BPP4M		MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT
187 #define	MEMSCRUB_BPP512K	MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT
188 #define	MEMSCRUB_BPP64K		MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT
189 #define	MEMSCRUB_BPP		MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT
190 
191 /*
192  * This message indicates that we have exceeded the limitations of
193  * the memscrubber. See the comments above regarding what would
194  * cause the sleep time to become zero. In DEBUG mode, this message
195  * is logged on the console and in the messages file. In non-DEBUG
196  * mode, it is only logged in the messages file.
197  */
198 #ifdef DEBUG
199 #define	MEMSCRUB_OVERRIDE_MSG	"Memory scrubber sleep time is zero " \
200 	"seconds, consuming entire CPU."
201 #else
202 #define	MEMSCRUB_OVERRIDE_MSG	"!Memory scrubber sleep time is zero " \
203 	"seconds, consuming entire CPU."
204 #endif /* DEBUG */
205 
206 /*
207  * we can patch these defaults in /etc/system if necessary
208  */
209 uint_t disable_memscrub = 0;
210 uint_t pause_memscrub = 0;
211 uint_t read_all_memscrub = 0;
212 uint_t memscrub_verbose = 0;
213 uint_t memscrub_all_idle = 0;
214 uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
215 uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
216 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
217 uint_t memscrub_delay_start_sec = 5 * 60;
218 uint_t memscrub_override_ticks = 1;
219 
220 /*
221  * Static Routines
222  */
223 static void memscrubber(void);
224 static void memscrub_cleanup(void);
225 static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *);
226 static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp);
227 static void memscrub_scan(uint_t blks, ms_paddr_t src);
228 
229 /*
230  * Static Data
231  */
232 
233 static struct memlist *memscrub_memlist;
234 static uint_t memscrub_phys_pages;
235 
236 static kcondvar_t memscrub_cv;
237 static kmutex_t memscrub_lock;
238 /*
239  * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
240  */
241 static void memscrub_init_mem_config(void);
242 static void memscrub_uninit_mem_config(void);
243 
244 /*
245  * Linked list of memscrub aware spans having retired pages.
246  * Currently enabled only on sun4u USIII-based platforms.
247  */
248 typedef struct memscrub_page_retire_span {
249 	ms_paddr_t				address;
250 	struct memscrub_page_retire_span	*next;
251 } memscrub_page_retire_span_t;
252 
253 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL;
254 
255 static void memscrub_page_retire_span_add(ms_paddr_t);
256 static void memscrub_page_retire_span_delete(ms_paddr_t);
257 static int memscrub_page_retire_span_search(ms_paddr_t);
258 static void memscrub_page_retire_span_list_update(void);
259 
260 /*
261  * add_to_page_retire_list: Set by cpu_async_log_err() routine
262  * by calling memscrub_induced_error() when CE/UE occurs on a retired
263  * page due to memscrub reading.  Cleared by memscrub after updating
264  * global page retire span list.  Piggybacking on protection of
265  * memscrub_lock, which is held during set and clear.
266  * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running
267  * on softint context, which gets fired on a cpu memscrub thread currently
268  * running.  Memscrub thread has affinity set during memscrub_read(), hence
269  * migration to new cpu not expected.
270  */
271 static int add_to_page_retire_list = 0;
272 
273 /*
274  * Keep track of some interesting statistics
275  */
276 static struct memscrub_kstats {
277 	kstat_named_t	done_early;	/* ahead of schedule */
278 	kstat_named_t	early_sec;	/* by cumulative num secs */
279 	kstat_named_t	done_late;	/* behind schedule */
280 	kstat_named_t	late_sec;	/* by cumulative num secs */
281 	kstat_named_t	interval_ticks;	/* num ticks between intervals */
282 	kstat_named_t	force_run;	/* forced to run, non-timeout */
283 	kstat_named_t	errors_found;	/* num errors found by memscrub */
284 } memscrub_counts = {
285 	{ "done_early",		KSTAT_DATA_UINT32 },
286 	{ "early_sec", 		KSTAT_DATA_UINT32 },
287 	{ "done_late", 		KSTAT_DATA_UINT32 },
288 	{ "late_sec",		KSTAT_DATA_UINT32 },
289 	{ "interval_ticks",	KSTAT_DATA_UINT32 },
290 	{ "force_run",		KSTAT_DATA_UINT32 },
291 	{ "errors_found",	KSTAT_DATA_UINT32 },
292 };
293 static struct kstat *memscrub_ksp = (struct kstat *)NULL;
294 
295 static timeout_id_t memscrub_tid = 0;	/* keep track of timeout id */
296 
297 /*
298  * create memscrub_memlist from phys_install list
299  * initialize locks, set memscrub_phys_pages.
300  */
301 int
302 memscrub_init(void)
303 {
304 	struct memlist *src;
305 
306 	/*
307 	 * only startup the scrubber if we have a minimum
308 	 * number of pages
309 	 */
310 	if (physinstalled >= MEMSCRUB_MIN_PAGES) {
311 
312 		/*
313 		 * initialize locks
314 		 */
315 		mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
316 		cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
317 
318 		/*
319 		 * copy phys_install to memscrub_memlist
320 		 */
321 		for (src = phys_install; src; src = src->next) {
322 			if (memscrub_add_span(
323 			    (pfn_t)(src->address >> PAGESHIFT),
324 			    (pgcnt_t)(src->size >> PAGESHIFT))) {
325 				memscrub_cleanup();
326 				return (-1);
327 			}
328 		}
329 
330 		/*
331 		 * initialize kstats
332 		 */
333 		memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat",
334 			"misc", KSTAT_TYPE_NAMED,
335 			sizeof (memscrub_counts) / sizeof (kstat_named_t),
336 			KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
337 
338 		if (memscrub_ksp) {
339 			memscrub_ksp->ks_data = (void *)&memscrub_counts;
340 			kstat_install(memscrub_ksp);
341 		} else {
342 			cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n");
343 		}
344 
345 		/*
346 		 * create memscrubber thread
347 		 */
348 		(void) thread_create(NULL, 0, (void (*)())memscrubber,
349 		    NULL, 0, &p0, TS_RUN, memscrub_thread_pri);
350 
351 		/*
352 		 * We don't want call backs changing the list
353 		 * if there is no thread running. We do not
354 		 * attempt to deal with stopping/starting scrubbing
355 		 * on memory size changes.
356 		 */
357 		memscrub_init_mem_config();
358 	}
359 
360 	return (0);
361 }
362 
363 static void
364 memscrub_cleanup(void)
365 {
366 	memscrub_uninit_mem_config();
367 	while (memscrub_memlist) {
368 		(void) memscrub_delete_span(
369 			(pfn_t)(memscrub_memlist->address >> PAGESHIFT),
370 			(pgcnt_t)(memscrub_memlist->size >> PAGESHIFT));
371 	}
372 	if (memscrub_ksp)
373 		kstat_delete(memscrub_ksp);
374 	cv_destroy(&memscrub_cv);
375 	mutex_destroy(&memscrub_lock);
376 }
377 
378 #ifdef MEMSCRUB_DEBUG
379 static void
380 memscrub_printmemlist(char *title, struct memlist *listp)
381 {
382 	struct memlist *list;
383 
384 	cmn_err(CE_CONT, "%s:\n", title);
385 
386 	for (list = listp; list; list = list->next) {
387 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
388 		    list->address, list->size);
389 	}
390 }
391 #endif /* MEMSCRUB_DEBUG */
392 
393 /* ARGSUSED */
394 static void
395 memscrub_wakeup(void *c)
396 {
397 	/*
398 	 * grab mutex to guarantee that our wakeup call
399 	 * arrives after we go to sleep -- so we can't sleep forever.
400 	 */
401 	mutex_enter(&memscrub_lock);
402 	cv_signal(&memscrub_cv);
403 	mutex_exit(&memscrub_lock);
404 }
405 
406 /*
407  * provide an interface external to the memscrubber
408  * which will force the memscrub thread to run vs.
409  * waiting for the timeout, if one is set
410  */
411 void
412 memscrub_run(void)
413 {
414 	memscrub_counts.force_run.value.ui32++;
415 	if (memscrub_tid) {
416 		(void) untimeout(memscrub_tid);
417 		memscrub_wakeup((void *)NULL);
418 	}
419 }
420 
421 /*
422  * this calculation doesn't account for the time
423  * that the actual scan consumes -- so we'd fall
424  * slightly behind schedule with this interval.
425  * It's very small.
426  */
427 
428 static uint_t
429 compute_interval_ticks(void)
430 {
431 	/*
432 	 * We use msp_safe mpp_safe below to insure somebody
433 	 * doesn't set memscrub_span_pages or memscrub_phys_pages
434 	 * to 0 on us.
435 	 */
436 	static uint_t msp_safe, mpp_safe;
437 	static uint_t interval_ticks, period_ticks;
438 	msp_safe = memscrub_span_pages;
439 	mpp_safe = memscrub_phys_pages;
440 
441 	period_ticks = memscrub_period_sec * hz;
442 	interval_ticks = period_ticks;
443 
444 	ASSERT(mutex_owned(&memscrub_lock));
445 
446 	if ((msp_safe != 0) && (mpp_safe != 0)) {
447 		if (memscrub_phys_pages <= msp_safe) {
448 			interval_ticks = period_ticks;
449 		} else {
450 			interval_ticks = (period_ticks /
451 			    (mpp_safe / msp_safe));
452 		}
453 	}
454 	return (interval_ticks);
455 }
456 
457 void
458 memscrubber(void)
459 {
460 	ms_paddr_t address, addr;
461 	time_t deadline;
462 	pgcnt_t pages;
463 	uint_t reached_end = 1;
464 	uint_t paused_message = 0;
465 	uint_t interval_ticks = 0;
466 	uint_t sleep_warn_printed = 0;
467 	callb_cpr_t cprinfo;
468 
469 	/*
470 	 * notify CPR of our existence
471 	 */
472 	CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
473 
474 	mutex_enter(&memscrub_lock);
475 
476 	if (memscrub_memlist == NULL) {
477 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
478 		goto memscrub_exit;
479 	}
480 
481 	address = memscrub_memlist->address;
482 
483 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
484 
485 	for (;;) {
486 		if (disable_memscrub)
487 			break;
488 
489 		/*
490 		 * compute interval_ticks
491 		 */
492 		interval_ticks = compute_interval_ticks();
493 
494 		/*
495 		 * If the calculated sleep time is zero, and pause_memscrub
496 		 * has been set, make sure we sleep so that another thread
497 		 * can acquire memscrub_lock.
498 		 */
499 		if (interval_ticks == 0 && pause_memscrub) {
500 			interval_ticks = hz;
501 		}
502 
503 		/*
504 		 * And as a fail safe, under normal non-paused operation, do
505 		 * not allow the sleep time to be zero.
506 		 */
507 		if (interval_ticks == 0) {
508 			interval_ticks = memscrub_override_ticks;
509 			if (!sleep_warn_printed) {
510 				cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG);
511 				sleep_warn_printed = 1;
512 			}
513 		}
514 
515 		memscrub_counts.interval_ticks.value.ui32 = interval_ticks;
516 
517 		/*
518 		 * Did we just reach the end of memory? If we are at the
519 		 * end of memory, delay end of memory processing until
520 		 * pause_memscrub is not set.
521 		 */
522 		if (reached_end && !pause_memscrub) {
523 			time_t now = gethrestime_sec();
524 
525 			if (now >= deadline) {
526 				memscrub_counts.done_late.value.ui32++;
527 				memscrub_counts.late_sec.value.ui32 +=
528 					(now - deadline);
529 				/*
530 				 * past deadline, start right away
531 				 */
532 				interval_ticks = 0;
533 
534 				deadline = now + memscrub_period_sec;
535 			} else {
536 				/*
537 				 * we finished ahead of schedule.
538 				 * wait till previous deadline before re-start.
539 				 */
540 				interval_ticks = (deadline - now) * hz;
541 				memscrub_counts.done_early.value.ui32++;
542 				memscrub_counts.early_sec.value.ui32 +=
543 					(deadline - now);
544 				deadline += memscrub_period_sec;
545 			}
546 			reached_end = 0;
547 			sleep_warn_printed = 0;
548 		}
549 
550 		if (interval_ticks != 0) {
551 			/*
552 			 * it is safe from our standpoint for CPR to
553 			 * suspend the system
554 			 */
555 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
556 
557 			/*
558 			 * hit the snooze bar
559 			 */
560 			memscrub_tid = timeout(memscrub_wakeup, NULL,
561 			    interval_ticks);
562 
563 			/*
564 			 * go to sleep
565 			 */
566 			cv_wait(&memscrub_cv, &memscrub_lock);
567 
568 			/*
569 			 * at this point, no timeout should be set
570 			 */
571 			memscrub_tid = 0;
572 
573 			/*
574 			 * we need to goto work and will be modifying
575 			 * our internal state and mapping/unmapping
576 			 * TTEs
577 			 */
578 			CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
579 		}
580 
581 
582 		if (memscrub_phys_pages == 0) {
583 			cmn_err(CE_WARN, "Memory scrubber has 0 pages to read");
584 			goto memscrub_exit;
585 		}
586 
587 		if (!pause_memscrub) {
588 			if (paused_message) {
589 				paused_message = 0;
590 				if (memscrub_verbose)
591 					cmn_err(CE_NOTE, "Memory scrubber "
592 					    "resuming");
593 			}
594 
595 			if (read_all_memscrub) {
596 				if (memscrub_verbose)
597 					cmn_err(CE_NOTE, "Memory scrubber "
598 					    "reading all memory per request");
599 
600 				addr = memscrub_memlist->address;
601 				reached_end = 0;
602 				while (!reached_end) {
603 					if (disable_memscrub)
604 						break;
605 					pages = memscrub_phys_pages;
606 					reached_end = memscrub_verify_span(
607 					    &addr, &pages);
608 					memscrub_scan(pages *
609 					    MEMSCRUB_BLOCKS_PER_PAGE, addr);
610 					addr += ((uint64_t)pages * PAGESIZE);
611 				}
612 				read_all_memscrub = 0;
613 			}
614 
615 			/*
616 			 * read 1 span
617 			 */
618 			pages = memscrub_span_pages;
619 
620 			if (disable_memscrub)
621 				break;
622 
623 			/*
624 			 * determine physical address range
625 			 */
626 			reached_end = memscrub_verify_span(&address,
627 			    &pages);
628 
629 			memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE,
630 			    address);
631 
632 			address += ((uint64_t)pages * PAGESIZE);
633 		}
634 
635 		if (pause_memscrub && !paused_message) {
636 			paused_message = 1;
637 			if (memscrub_verbose)
638 				cmn_err(CE_NOTE, "Memory scrubber paused");
639 		}
640 	}
641 
642 memscrub_exit:
643 	cmn_err(CE_NOTE, "Memory scrubber exiting");
644 	CALLB_CPR_EXIT(&cprinfo);
645 	memscrub_cleanup();
646 	thread_exit();
647 	/* NOTREACHED */
648 }
649 
650 /*
651  * condition address and size
652  * such that they span legal physical addresses.
653  *
654  * when appropriate, address will be rounded up to start of next
655  * struct memlist, and pages will be rounded down to the end of the
656  * memlist size.
657  *
658  * returns 1 if reached end of list, else returns 0.
659  */
660 static int
661 memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp)
662 {
663 	struct memlist *mlp;
664 	ms_paddr_t address = *addrp;
665 	uint64_t bytes = (uint64_t)*pagesp * PAGESIZE;
666 	uint64_t bytes_remaining;
667 	int reached_end = 0;
668 
669 	ASSERT(mutex_owned(&memscrub_lock));
670 
671 	/*
672 	 * find memlist struct that contains addrp
673 	 * assumes memlist is sorted by ascending address.
674 	 */
675 	for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->next) {
676 		/*
677 		 * if before this chunk, round up to beginning
678 		 */
679 		if (address < mlp->address) {
680 			address = mlp->address;
681 			break;
682 		}
683 		/*
684 		 * if before end of chunk, then we found it
685 		 */
686 		if (address < (mlp->address + mlp->size))
687 			break;
688 
689 		/* else go to next struct memlist */
690 	}
691 	/*
692 	 * if we hit end of list, start at beginning
693 	 */
694 	if (mlp == NULL) {
695 		mlp = memscrub_memlist;
696 		address = mlp->address;
697 	}
698 
699 	/*
700 	 * now we have legal address, and its mlp, condition bytes
701 	 */
702 	bytes_remaining = (mlp->address + mlp->size) - address;
703 
704 	if (bytes > bytes_remaining)
705 		bytes = bytes_remaining;
706 
707 	/*
708 	 * will this span take us to end of list?
709 	 */
710 	if ((mlp->next == NULL) &&
711 	    ((mlp->address + mlp->size) == (address + bytes)))
712 		reached_end = 1;
713 
714 	/* return values */
715 	*addrp = address;
716 	*pagesp = bytes / PAGESIZE;
717 
718 	return (reached_end);
719 }
720 
721 /*
722  * add a span to the memscrub list
723  * add to memscrub_phys_pages
724  */
725 int
726 memscrub_add_span(pfn_t pfn, pgcnt_t pages)
727 {
728 #ifdef MEMSCRUB_DEBUG
729 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
730 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
731 #endif /* MEMSCRUB_DEBUG */
732 
733 	int retval;
734 
735 	mutex_enter(&memscrub_lock);
736 
737 #ifdef MEMSCRUB_DEBUG
738 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
739 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
740 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
741 	    " size: 0x%llx\n", address, bytes);
742 #endif /* MEMSCRUB_DEBUG */
743 
744 	retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist,
745 	    &memscrub_phys_pages);
746 
747 #ifdef MEMSCRUB_DEBUG
748 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
749 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
750 #endif /* MEMSCRUB_DEBUG */
751 
752 	mutex_exit(&memscrub_lock);
753 
754 	return (retval);
755 }
756 
757 static int
758 memscrub_add_span_gen(
759 	pfn_t pfn,
760 	pgcnt_t pages,
761 	struct memlist **list,
762 	uint_t *npgs)
763 {
764 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
765 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
766 	struct memlist *dst;
767 	struct memlist *prev, *next;
768 	int retval = 0;
769 
770 	/*
771 	 * allocate a new struct memlist
772 	 */
773 
774 	dst = (struct memlist *)
775 	    kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
776 
777 	if (dst == NULL) {
778 		retval = -1;
779 		goto add_done;
780 	}
781 
782 	dst->address = address;
783 	dst->size = bytes;
784 
785 	/*
786 	 * first insert
787 	 */
788 	if (*list == NULL) {
789 		dst->prev = NULL;
790 		dst->next = NULL;
791 		*list = dst;
792 
793 		goto add_done;
794 	}
795 
796 	/*
797 	 * insert into sorted list
798 	 */
799 	for (prev = NULL, next = *list;
800 	    next != NULL;
801 	    prev = next, next = next->next) {
802 		if (address > (next->address + next->size))
803 			continue;
804 
805 		/*
806 		 * else insert here
807 		 */
808 
809 		/*
810 		 * prepend to next
811 		 */
812 		if ((address + bytes) == next->address) {
813 			kmem_free(dst, sizeof (struct memlist));
814 
815 			next->address = address;
816 			next->size += bytes;
817 
818 			goto add_done;
819 		}
820 
821 		/*
822 		 * append to next
823 		 */
824 		if (address == (next->address + next->size)) {
825 			kmem_free(dst, sizeof (struct memlist));
826 
827 			if (next->next) {
828 				/*
829 				 * don't overlap with next->next
830 				 */
831 				if ((address + bytes) > next->next->address) {
832 					retval = -1;
833 					goto add_done;
834 				}
835 				/*
836 				 * concatenate next and next->next
837 				 */
838 				if ((address + bytes) == next->next->address) {
839 					struct memlist *mlp = next->next;
840 
841 					if (next == *list)
842 						*list = next->next;
843 
844 					mlp->address = next->address;
845 					mlp->size += next->size;
846 					mlp->size += bytes;
847 
848 					if (next->prev)
849 						next->prev->next = mlp;
850 					mlp->prev = next->prev;
851 
852 					kmem_free(next,
853 						sizeof (struct memlist));
854 					goto add_done;
855 				}
856 			}
857 
858 			next->size += bytes;
859 
860 			goto add_done;
861 		}
862 
863 		/* don't overlap with next */
864 		if ((address + bytes) > next->address) {
865 			retval = -1;
866 			kmem_free(dst, sizeof (struct memlist));
867 			goto add_done;
868 		}
869 
870 		/*
871 		 * insert before next
872 		 */
873 		dst->prev = prev;
874 		dst->next = next;
875 		next->prev = dst;
876 		if (prev == NULL) {
877 			*list = dst;
878 		} else {
879 			prev->next = dst;
880 		}
881 		goto add_done;
882 	}	/* end for */
883 
884 	/*
885 	 * end of list, prev is valid and next is NULL
886 	 */
887 	prev->next = dst;
888 	dst->prev = prev;
889 	dst->next = NULL;
890 
891 add_done:
892 
893 	if (retval != -1)
894 		*npgs += pages;
895 
896 	return (retval);
897 }
898 
899 /*
900  * delete a span from the memscrub list
901  * subtract from memscrub_phys_pages
902  */
903 int
904 memscrub_delete_span(pfn_t pfn, pgcnt_t pages)
905 {
906 	ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
907 	uint64_t bytes = (uint64_t)pages << PAGESHIFT;
908 	struct memlist *dst, *next;
909 	int retval = 0;
910 
911 	mutex_enter(&memscrub_lock);
912 
913 #ifdef MEMSCRUB_DEBUG
914 	memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist);
915 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
916 	cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n",
917 	    address, bytes);
918 #endif /* MEMSCRUB_DEBUG */
919 
920 	/*
921 	 * find struct memlist containing page
922 	 */
923 	for (next = memscrub_memlist; next != NULL; next = next->next) {
924 		if ((address >= next->address) &&
925 		    (address < next->address + next->size))
926 			break;
927 	}
928 
929 	/*
930 	 * if start address not in list
931 	 */
932 	if (next == NULL) {
933 		retval = -1;
934 		goto delete_done;
935 	}
936 
937 	/*
938 	 * error if size goes off end of this struct memlist
939 	 */
940 	if (address + bytes > next->address + next->size) {
941 		retval = -1;
942 		goto delete_done;
943 	}
944 
945 	/*
946 	 * pages at beginning of struct memlist
947 	 */
948 	if (address == next->address) {
949 		/*
950 		 * if start & size match, delete from list
951 		 */
952 		if (bytes == next->size) {
953 			if (next == memscrub_memlist)
954 				memscrub_memlist = next->next;
955 			if (next->prev != NULL)
956 				next->prev->next = next->next;
957 			if (next->next != NULL)
958 				next->next->prev = next->prev;
959 
960 			kmem_free(next, sizeof (struct memlist));
961 		} else {
962 		/*
963 		 * increment start address by bytes
964 		 */
965 			next->address += bytes;
966 			next->size -= bytes;
967 		}
968 		goto delete_done;
969 	}
970 
971 	/*
972 	 * pages at end of struct memlist
973 	 */
974 	if (address + bytes == next->address + next->size) {
975 		/*
976 		 * decrement size by bytes
977 		 */
978 		next->size -= bytes;
979 		goto delete_done;
980 	}
981 
982 	/*
983 	 * delete a span in the middle of the struct memlist
984 	 */
985 	{
986 		/*
987 		 * create a new struct memlist
988 		 */
989 		dst = (struct memlist *)
990 		    kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
991 
992 		if (dst == NULL) {
993 			retval = -1;
994 			goto delete_done;
995 		}
996 
997 		/*
998 		 * existing struct memlist gets address
999 		 * and size up to pfn
1000 		 */
1001 		dst->address = address + bytes;
1002 		dst->size = (next->address + next->size) - dst->address;
1003 		next->size = address - next->address;
1004 
1005 		/*
1006 		 * new struct memlist gets address starting
1007 		 * after pfn, until end
1008 		 */
1009 
1010 		/*
1011 		 * link in new memlist after old
1012 		 */
1013 		dst->next = next->next;
1014 		dst->prev = next;
1015 
1016 		if (next->next != NULL)
1017 			next->next->prev = dst;
1018 		next->next = dst;
1019 	}
1020 
1021 delete_done:
1022 	if (retval != -1) {
1023 		memscrub_phys_pages -= pages;
1024 		if (memscrub_phys_pages == 0)
1025 			disable_memscrub = 1;
1026 	}
1027 
1028 #ifdef MEMSCRUB_DEBUG
1029 	memscrub_printmemlist("memscrub_memlist After", memscrub_memlist);
1030 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
1031 #endif /* MEMSCRUB_DEBUG */
1032 
1033 	mutex_exit(&memscrub_lock);
1034 	return (retval);
1035 }
1036 
1037 static void
1038 memscrub_scan(uint_t blks, ms_paddr_t src)
1039 {
1040 	uint_t 		psz, bpp, pgsread;
1041 	pfn_t		pfn;
1042 	ms_paddr_t	pa;
1043 	caddr_t		va;
1044 	on_trap_data_t	otd;
1045 	int		scan_mmu_pagesize = 0;
1046 	int		retired_pages = 0;
1047 
1048 	extern void memscrub_read(caddr_t src, uint_t blks);
1049 
1050 	ASSERT(mutex_owned(&memscrub_lock));
1051 
1052 	pgsread = 0;
1053 	pa = src;
1054 
1055 	if (memscrub_page_retire_span_list != NULL) {
1056 		if (memscrub_page_retire_span_search(src)) {
1057 			/* retired pages in current span */
1058 			scan_mmu_pagesize = 1;
1059 		}
1060 	}
1061 
1062 #ifdef MEMSCRUB_DEBUG
1063 	cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize);
1064 #endif /* MEMSCRUB_DEBUG */
1065 
1066 	while (blks != 0) {
1067 		/* Ensure the PA is properly aligned */
1068 		if (((pa & MMU_PAGEMASK4M) == pa) &&
1069 			(blks >= MEMSCRUB_BPP4M)) {
1070 			psz = MMU_PAGESIZE4M;
1071 			bpp = MEMSCRUB_BPP4M;
1072 		} else if (((pa & MMU_PAGEMASK512K) == pa) &&
1073 			(blks >= MEMSCRUB_BPP512K)) {
1074 			psz = MMU_PAGESIZE512K;
1075 			bpp = MEMSCRUB_BPP512K;
1076 		} else if (((pa & MMU_PAGEMASK64K) == pa) &&
1077 			(blks >= MEMSCRUB_BPP64K)) {
1078 			psz = MMU_PAGESIZE64K;
1079 			bpp = MEMSCRUB_BPP64K;
1080 		} else if ((pa & MMU_PAGEMASK) == pa) {
1081 			psz = MMU_PAGESIZE;
1082 			bpp = MEMSCRUB_BPP;
1083 		} else {
1084 			if (memscrub_verbose) {
1085 				cmn_err(CE_NOTE, "Memory scrubber ignoring "
1086 				    "non-page aligned block starting at 0x%"
1087 				    PRIx64, src);
1088 			}
1089 			return;
1090 		}
1091 		if (blks < bpp) bpp = blks;
1092 
1093 #ifdef MEMSCRUB_DEBUG
1094 		cmn_err(CE_NOTE, "Going to run psz=%x, "
1095 		    "bpp=%x pa=%llx\n", psz, bpp, pa);
1096 #endif /* MEMSCRUB_DEBUG */
1097 
1098 		/*
1099 		 * MEMSCRUBBASE is a 4MB aligned page in the
1100 		 * kernel so that we can quickly map the PA
1101 		 * to a VA for the block loads performed in
1102 		 * memscrub_read.
1103 		 */
1104 		pfn = mmu_btop(pa);
1105 		va = (caddr_t)MEMSCRUBBASE;
1106 		hat_devload(kas.a_hat, va, psz, pfn, PROT_READ,
1107 			HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1108 
1109 		/*
1110 		 * Can't allow the memscrubber to migrate across CPUs as
1111 		 * we need to know whether CEEN is enabled for the current
1112 		 * CPU to enable us to scrub the memory. Don't use
1113 		 * kpreempt_disable as the time we take to scan a span (even
1114 		 * without cpu_check_ce having to manually cpu_check_block)
1115 		 * is too long to hold a higher priority thread (eg, RT)
1116 		 * off cpu.
1117 		 */
1118 		thread_affinity_set(curthread, CPU_CURRENT);
1119 
1120 		/*
1121 		 * Protect read scrub from async faults.  For now, we simply
1122 		 * maintain a count of such faults caught.
1123 		 */
1124 
1125 		if (!scan_mmu_pagesize && !on_trap(&otd, OT_DATA_EC)) {
1126 			memscrub_read(va, bpp);
1127 			/*
1128 			 * Check if CEs require logging
1129 			 */
1130 			cpu_check_ce(SCRUBBER_CEEN_CHECK,
1131 			    (uint64_t)pa, va, psz);
1132 			no_trap();
1133 			thread_affinity_clear(curthread);
1134 		} else {
1135 			no_trap();
1136 			thread_affinity_clear(curthread);
1137 
1138 			/*
1139 			 * Got an async error..
1140 			 * Try rescanning it at MMU_PAGESIZE
1141 			 * granularity if we were trying to
1142 			 * read at a larger page size.
1143 			 * This is to ensure we continue to
1144 			 * scan the rest of the span.
1145 			 * OR scanning MMU_PAGESIZE granularity to avoid
1146 			 * reading retired pages memory when scan_mmu_pagesize
1147 			 * is set.
1148 			 */
1149 			if (psz > MMU_PAGESIZE || scan_mmu_pagesize) {
1150 			    caddr_t vaddr = va;
1151 			    ms_paddr_t paddr = pa;
1152 			    int tmp = 0;
1153 			    for (; tmp < bpp; tmp += MEMSCRUB_BPP) {
1154 				/* Don't scrub retired pages */
1155 				if (page_retire_check(paddr, NULL) == 0) {
1156 					vaddr += MMU_PAGESIZE;
1157 					paddr += MMU_PAGESIZE;
1158 					retired_pages++;
1159 					continue;
1160 				}
1161 				thread_affinity_set(curthread, CPU_CURRENT);
1162 				if (!on_trap(&otd, OT_DATA_EC)) {
1163 				    memscrub_read(vaddr, MEMSCRUB_BPP);
1164 				    cpu_check_ce(SCRUBBER_CEEN_CHECK,
1165 					(uint64_t)paddr, vaddr, MMU_PAGESIZE);
1166 				    no_trap();
1167 				} else {
1168 				    no_trap();
1169 				    memscrub_counts.errors_found.value.ui32++;
1170 				}
1171 				thread_affinity_clear(curthread);
1172 				vaddr += MMU_PAGESIZE;
1173 				paddr += MMU_PAGESIZE;
1174 			    }
1175 			}
1176 		}
1177 		hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK);
1178 
1179 		blks -= bpp;
1180 		pa += psz;
1181 		pgsread++;
1182 	}
1183 
1184 	/*
1185 	 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired
1186 	 * pages found so delete span from global list.
1187 	 */
1188 	if (scan_mmu_pagesize && retired_pages == 0)
1189 		memscrub_page_retire_span_delete(src);
1190 
1191 	/*
1192 	 * Encountered CE/UE on a retired page during memscrub read of current
1193 	 * span.  Adding span to global list to enable avoid reading further.
1194 	 */
1195 	if (add_to_page_retire_list) {
1196 		if (!memscrub_page_retire_span_search(src))
1197 			memscrub_page_retire_span_add(src);
1198 		add_to_page_retire_list = 0;
1199 	}
1200 
1201 	if (memscrub_verbose) {
1202 		cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting "
1203 		    "at 0x%" PRIx64, pgsread, src);
1204 	}
1205 }
1206 
1207 /*
1208  * Called by cpu_async_log_err() when memscrub read causes
1209  * CE/UE on a retired page.
1210  */
1211 void
1212 memscrub_induced_error(void)
1213 {
1214 	add_to_page_retire_list = 1;
1215 }
1216 
1217 
1218 /*
1219  * Called by memscrub_scan().
1220  * pa: physical address of span with CE/UE, add to global list.
1221  */
1222 static void
1223 memscrub_page_retire_span_add(ms_paddr_t pa)
1224 {
1225 	memscrub_page_retire_span_t *new_span;
1226 
1227 	new_span = (memscrub_page_retire_span_t *)
1228 	    kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP);
1229 
1230 	if (new_span == NULL) {
1231 #ifdef MEMSCRUB_DEBUG
1232 		cmn_err(CE_NOTE, "failed to allocate new span - span with"
1233 		    " retired page/s not tracked.\n");
1234 #endif /* MEMSCRUB_DEBUG */
1235 		return;
1236 	}
1237 
1238 	new_span->address = pa;
1239 	new_span->next = memscrub_page_retire_span_list;
1240 	memscrub_page_retire_span_list = new_span;
1241 }
1242 
1243 /*
1244  * Called by memscrub_scan().
1245  * pa: physical address of span to be removed from global list.
1246  */
1247 static void
1248 memscrub_page_retire_span_delete(ms_paddr_t pa)
1249 {
1250 	memscrub_page_retire_span_t *prev_span, *next_span;
1251 
1252 	prev_span = memscrub_page_retire_span_list;
1253 	next_span = memscrub_page_retire_span_list->next;
1254 
1255 	if (pa == prev_span->address) {
1256 		memscrub_page_retire_span_list = next_span;
1257 		kmem_free(prev_span, sizeof (memscrub_page_retire_span_t));
1258 		return;
1259 	}
1260 
1261 	while (next_span) {
1262 		if (pa == next_span->address) {
1263 			prev_span->next = next_span->next;
1264 			kmem_free(next_span,
1265 			    sizeof (memscrub_page_retire_span_t));
1266 			return;
1267 		}
1268 		prev_span = next_span;
1269 		next_span = next_span->next;
1270 	}
1271 }
1272 
1273 /*
1274  * Called by memscrub_scan().
1275  * pa: physical address of span to be searched in global list.
1276  */
1277 static int
1278 memscrub_page_retire_span_search(ms_paddr_t pa)
1279 {
1280 	memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list;
1281 
1282 	while (next_span) {
1283 		if (pa == next_span->address)
1284 			return (1);
1285 		next_span = next_span->next;
1286 	}
1287 	return (0);
1288 }
1289 
1290 /*
1291  * Called from new_memscrub() as a result of memory delete.
1292  * Using page_numtopp_nolock() to determine if we have valid PA.
1293  */
1294 static void
1295 memscrub_page_retire_span_list_update(void)
1296 {
1297 	memscrub_page_retire_span_t *prev, *cur, *next;
1298 
1299 	if (memscrub_page_retire_span_list == NULL)
1300 		return;
1301 
1302 	prev = cur = memscrub_page_retire_span_list;
1303 	next = cur->next;
1304 
1305 	while (cur) {
1306 		if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) {
1307 			if (cur == memscrub_page_retire_span_list) {
1308 				memscrub_page_retire_span_list = next;
1309 				kmem_free(cur,
1310 				    sizeof (memscrub_page_retire_span_t));
1311 				prev = cur = memscrub_page_retire_span_list;
1312 			} else {
1313 				prev->next = cur->next;
1314 				kmem_free(cur,
1315 				    sizeof (memscrub_page_retire_span_t));
1316 				cur = next;
1317 			}
1318 		} else {
1319 			prev = cur;
1320 			cur = next;
1321 		}
1322 		if (cur != NULL)
1323 			next = cur->next;
1324 	}
1325 }
1326 
1327 /*
1328  * The memory add/delete callback mechanism does not pass in the
1329  * page ranges. The phys_install list has been updated though, so
1330  * create a new scrub list from it.
1331  */
1332 
1333 static int
1334 new_memscrub(int update_page_retire_list)
1335 {
1336 	struct memlist *src, *list, *old_list;
1337 	uint_t npgs;
1338 
1339 	/*
1340 	 * copy phys_install to memscrub_memlist
1341 	 */
1342 	list = NULL;
1343 	npgs = 0;
1344 	memlist_read_lock();
1345 	for (src = phys_install; src; src = src->next) {
1346 		if (memscrub_add_span_gen((pfn_t)(src->address >> PAGESHIFT),
1347 		    (pgcnt_t)(src->size >> PAGESHIFT), &list, &npgs)) {
1348 			memlist_read_unlock();
1349 			while (list) {
1350 				struct memlist *el;
1351 
1352 				el = list;
1353 				list = list->next;
1354 				kmem_free(el, sizeof (struct memlist));
1355 			}
1356 			return (-1);
1357 		}
1358 	}
1359 	memlist_read_unlock();
1360 
1361 	mutex_enter(&memscrub_lock);
1362 	memscrub_phys_pages = npgs;
1363 	old_list = memscrub_memlist;
1364 	memscrub_memlist = list;
1365 
1366 	if (update_page_retire_list)
1367 		memscrub_page_retire_span_list_update();
1368 
1369 	mutex_exit(&memscrub_lock);
1370 
1371 	while (old_list) {
1372 		struct memlist *el;
1373 
1374 		el = old_list;
1375 		old_list = old_list->next;
1376 		kmem_free(el, sizeof (struct memlist));
1377 	}
1378 
1379 	return (0);
1380 }
1381 
1382 /*ARGSUSED*/
1383 static void
1384 memscrub_mem_config_post_add(
1385 	void *arg,
1386 	pgcnt_t delta_pages)
1387 {
1388 	/*
1389 	 * We increment pause_memscrub before entering new_memscrub(). This
1390 	 * will force the memscrubber to sleep, allowing the DR callback
1391 	 * thread to acquire memscrub_lock in new_memscrub(). The use of
1392 	 * atomic_add_32() allows concurrent memory DR operations to use the
1393 	 * callbacks safely.
1394 	 */
1395 	atomic_add_32(&pause_memscrub, 1);
1396 	ASSERT(pause_memscrub != 0);
1397 
1398 	/*
1399 	 * "Don't care" if we are not scrubbing new memory.
1400 	 */
1401 	(void) new_memscrub(0);		/* retain page retire list */
1402 
1403 	/* Restore the pause setting. */
1404 	atomic_add_32(&pause_memscrub, -1);
1405 }
1406 
1407 /*ARGSUSED*/
1408 static int
1409 memscrub_mem_config_pre_del(
1410 	void *arg,
1411 	pgcnt_t delta_pages)
1412 {
1413 	/* Nothing to do. */
1414 	return (0);
1415 }
1416 
1417 /*ARGSUSED*/
1418 static void
1419 memscrub_mem_config_post_del(
1420 	void *arg,
1421 	pgcnt_t delta_pages,
1422 	int cancelled)
1423 {
1424 	/*
1425 	 * We increment pause_memscrub before entering new_memscrub(). This
1426 	 * will force the memscrubber to sleep, allowing the DR callback
1427 	 * thread to acquire memscrub_lock in new_memscrub(). The use of
1428 	 * atomic_add_32() allows concurrent memory DR operations to use the
1429 	 * callbacks safely.
1430 	 */
1431 	atomic_add_32(&pause_memscrub, 1);
1432 	ASSERT(pause_memscrub != 0);
1433 
1434 	/*
1435 	 * Must stop scrubbing deleted memory as it may be disconnected.
1436 	 */
1437 	if (new_memscrub(1)) {	/* update page retire list */
1438 		disable_memscrub = 1;
1439 	}
1440 
1441 	/* Restore the pause setting. */
1442 	atomic_add_32(&pause_memscrub, -1);
1443 }
1444 
1445 static kphysm_setup_vector_t memscrub_mem_config_vec = {
1446 	KPHYSM_SETUP_VECTOR_VERSION,
1447 	memscrub_mem_config_post_add,
1448 	memscrub_mem_config_pre_del,
1449 	memscrub_mem_config_post_del,
1450 };
1451 
1452 static void
1453 memscrub_init_mem_config()
1454 {
1455 	int ret;
1456 
1457 	ret = kphysm_setup_func_register(&memscrub_mem_config_vec,
1458 	    (void *)NULL);
1459 	ASSERT(ret == 0);
1460 }
1461 
1462 static void
1463 memscrub_uninit_mem_config()
1464 {
1465 	/* This call is OK if the register call was not done. */
1466 	kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL);
1467 }
1468