1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * sun4u Memory Scrubbing
28 *
29 * On detection of a correctable memory ECC error, the sun4u kernel
30 * returns the corrected data to the requester and re-writes it
31 * to memory (DRAM). So if the correctable error was transient,
32 * the read has effectively been cleaned (scrubbed) from memory.
33 *
34 * Scrubbing thus reduces the likelyhood that multiple transient errors
35 * will occur in the same memory word, making uncorrectable errors due
36 * to transients less likely.
37 *
38 * Thus is born the desire that every memory location be periodically
39 * accessed.
40 *
41 * This file implements a memory scrubbing thread. This scrubber
42 * guarantees that all of physical memory is accessed periodically
43 * (memscrub_period_sec -- 12 hours).
44 *
45 * It attempts to do this as unobtrusively as possible. The thread
46 * schedules itself to wake up at an interval such that if it reads
47 * memscrub_span_pages (32MB) on each wakeup, it will read all of physical
48 * memory in in memscrub_period_sec (12 hours).
49 *
50 * The scrubber uses the block load and prefetch hardware to read memory
51 * @ 1300MB/s, so it reads spans of 32MB in 0.025 seconds. Unlike the
52 * original sun4d scrubber the sun4u scrubber does not read ahead if the
53 * system is idle because we can read memory very efficently.
54 *
55 * The scrubber maintains a private copy of the phys_install memory list
56 * to keep track of what memory should be scrubbed.
57 *
58 * The global routines memscrub_add_span() and memscrub_delete_span() are
59 * used to add and delete from this list. If hotplug memory is later
60 * supported these two routines can be used to notify the scrubber of
61 * memory configuration changes.
62 *
63 * The following parameters can be set via /etc/system
64 *
65 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
66 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
67 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
68 * memscrub_delay_start_sec = (5 minutes)
69 * memscrub_verbose = (0)
70 * memscrub_override_ticks = (1 tick)
71 * disable_memscrub = (0)
72 * pause_memscrub = (0)
73 * read_all_memscrub = (0)
74 *
75 * The scrubber will print NOTICE messages of what it is doing if
76 * "memscrub_verbose" is set.
77 *
78 * If the scrubber's sleep time calculation drops to zero ticks,
79 * memscrub_override_ticks will be used as the sleep time instead. The
80 * sleep time should only drop to zero on a system with over 131.84
81 * terabytes of memory, or where the default scrubber parameters have
82 * been adjusted. For example, reducing memscrub_span_pages or
83 * memscrub_period_sec causes the sleep time to drop to zero with less
84 * memory. Note that since the sleep time is calculated in clock ticks,
85 * using hires clock ticks allows for more memory before the sleep time
86 * becomes zero.
87 *
88 * The scrubber will exit (or never be started) if it finds the variable
89 * "disable_memscrub" set.
90 *
91 * The scrubber will pause (not read memory) when "pause_memscrub"
92 * is set. It will check the state of pause_memscrub at each wakeup
93 * period. The scrubber will not make up for lost time. If you
94 * pause the scrubber for a prolonged period of time you can use
95 * the "read_all_memscrub" switch (see below) to catch up. In addition,
96 * pause_memscrub is used internally by the post memory DR callbacks.
97 * It is set for the small period of time during which the callbacks
98 * are executing. This ensures "memscrub_lock" will be released,
99 * allowing the callbacks to finish.
100 *
101 * The scrubber will read all memory if "read_all_memscrub" is set.
102 * The normal span read will also occur during the wakeup.
103 *
104 * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
105 * must have before we'll start the scrubber.
106 *
107 * MEMSCRUB_DFL_SPAN_PAGES (32MB) is based on the guess that 0.025 sec
108 * is a "good" amount of minimum time for the thread to run at a time.
109 *
110 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
111 * twice the frequency the hardware folk estimated would be necessary.
112 *
113 * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
114 * that the scurbber should get its fair share of time (since it
115 * is short). At a priority of 0 the scrubber will be starved.
116 */
117
118 #include <sys/systm.h> /* timeout, types, t_lock */
119 #include <sys/cmn_err.h>
120 #include <sys/sysmacros.h> /* MIN */
121 #include <sys/memlist.h> /* memlist */
122 #include <sys/mem_config.h> /* memory add/delete */
123 #include <sys/kmem.h> /* KMEM_NOSLEEP */
124 #include <sys/cpuvar.h> /* ncpus_online */
125 #include <sys/debug.h> /* ASSERTs */
126 #include <sys/machsystm.h> /* lddphys */
127 #include <sys/cpu_module.h> /* vtag_flushpage */
128 #include <sys/kstat.h>
129 #include <sys/atomic.h> /* atomic_add_32 */
130
131 #include <vm/hat.h>
132 #include <vm/seg_kmem.h>
133 #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */
134
135 #include <sys/time.h>
136 #include <sys/callb.h> /* CPR callback */
137 #include <sys/ontrap.h>
138
139 /*
140 * Should really have paddr_t defined, but it is broken. Use
141 * ms_paddr_t in the meantime to make the code cleaner
142 */
143 typedef uint64_t ms_paddr_t;
144
145 /*
146 * Global Routines:
147 */
148 int memscrub_add_span(pfn_t pfn, pgcnt_t pages);
149 int memscrub_delete_span(pfn_t pfn, pgcnt_t pages);
150 int memscrub_init(void);
151 void memscrub_induced_error(void);
152
153 /*
154 * Global Data:
155 */
156
157 /*
158 * scrub if we have at least this many pages
159 */
160 #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE)
161
162 /*
163 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
164 */
165 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */
166
167 /*
168 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
169 */
170 #define MEMSCRUB_DFL_SPAN_PAGES ((32 * 1024 * 1024) / PAGESIZE)
171
172 /*
173 * almost anything is higher priority than scrubbing
174 */
175 #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI
176
177 /*
178 * size used when scanning memory
179 */
180 #define MEMSCRUB_BLOCK_SIZE 256
181 #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */
182 #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT)
183
184 #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT
185 #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT
186 #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT
187 #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT
188
189 /*
190 * This message indicates that we have exceeded the limitations of
191 * the memscrubber. See the comments above regarding what would
192 * cause the sleep time to become zero. In DEBUG mode, this message
193 * is logged on the console and in the messages file. In non-DEBUG
194 * mode, it is only logged in the messages file.
195 */
196 #ifdef DEBUG
197 #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \
198 "seconds, consuming entire CPU."
199 #else
200 #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \
201 "seconds, consuming entire CPU."
202 #endif /* DEBUG */
203
204 /*
205 * we can patch these defaults in /etc/system if necessary
206 */
207 uint_t disable_memscrub = 0;
208 uint_t pause_memscrub = 0;
209 uint_t read_all_memscrub = 0;
210 uint_t memscrub_verbose = 0;
211 uint_t memscrub_all_idle = 0;
212 uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
213 uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
214 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
215 uint_t memscrub_delay_start_sec = 5 * 60;
216 uint_t memscrub_override_ticks = 1;
217
218 /*
219 * Static Routines
220 */
221 static void memscrubber(void);
222 static void memscrub_cleanup(void);
223 static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *);
224 static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp);
225 static void memscrub_scan(uint_t blks, ms_paddr_t src);
226
227 /*
228 * Static Data
229 */
230
231 static struct memlist *memscrub_memlist;
232 static uint_t memscrub_phys_pages;
233
234 static kcondvar_t memscrub_cv;
235 static kmutex_t memscrub_lock;
236 /*
237 * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
238 */
239 static void memscrub_init_mem_config(void);
240 static void memscrub_uninit_mem_config(void);
241
242 /*
243 * Linked list of memscrub aware spans having retired pages.
244 * Currently enabled only on sun4u USIII-based platforms.
245 */
246 typedef struct memscrub_page_retire_span {
247 ms_paddr_t address;
248 struct memscrub_page_retire_span *next;
249 } memscrub_page_retire_span_t;
250
251 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL;
252
253 static void memscrub_page_retire_span_add(ms_paddr_t);
254 static void memscrub_page_retire_span_delete(ms_paddr_t);
255 static int memscrub_page_retire_span_search(ms_paddr_t);
256 static void memscrub_page_retire_span_list_update(void);
257
258 /*
259 * add_to_page_retire_list: Set by cpu_async_log_err() routine
260 * by calling memscrub_induced_error() when CE/UE occurs on a retired
261 * page due to memscrub reading. Cleared by memscrub after updating
262 * global page retire span list. Piggybacking on protection of
263 * memscrub_lock, which is held during set and clear.
264 * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running
265 * on softint context, which gets fired on a cpu memscrub thread currently
266 * running. Memscrub thread has affinity set during memscrub_read(), hence
267 * migration to new cpu not expected.
268 */
269 static int add_to_page_retire_list = 0;
270
271 /*
272 * Keep track of some interesting statistics
273 */
274 static struct memscrub_kstats {
275 kstat_named_t done_early; /* ahead of schedule */
276 kstat_named_t early_sec; /* by cumulative num secs */
277 kstat_named_t done_late; /* behind schedule */
278 kstat_named_t late_sec; /* by cumulative num secs */
279 kstat_named_t interval_ticks; /* num ticks between intervals */
280 kstat_named_t force_run; /* forced to run, non-timeout */
281 kstat_named_t errors_found; /* num errors found by memscrub */
282 } memscrub_counts = {
283 { "done_early", KSTAT_DATA_UINT32 },
284 { "early_sec", KSTAT_DATA_UINT32 },
285 { "done_late", KSTAT_DATA_UINT32 },
286 { "late_sec", KSTAT_DATA_UINT32 },
287 { "interval_ticks", KSTAT_DATA_UINT32 },
288 { "force_run", KSTAT_DATA_UINT32 },
289 { "errors_found", KSTAT_DATA_UINT32 },
290 };
291
292 #define MEMSCRUB_STAT_INC(stat) memscrub_counts.stat.value.ui32++
293 #define MEMSCRUB_STAT_SET(stat, val) memscrub_counts.stat.value.ui32 = (val)
294 #define MEMSCRUB_STAT_NINC(stat, val) memscrub_counts.stat.value.ui32 += (val)
295
296 static struct kstat *memscrub_ksp = (struct kstat *)NULL;
297
298 static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */
299
300 /*
301 * create memscrub_memlist from phys_install list
302 * initialize locks, set memscrub_phys_pages.
303 */
304 int
memscrub_init(void)305 memscrub_init(void)
306 {
307 struct memlist *src;
308
309 /*
310 * only startup the scrubber if we have a minimum
311 * number of pages
312 */
313 if (physinstalled >= MEMSCRUB_MIN_PAGES) {
314
315 /*
316 * initialize locks
317 */
318 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
319 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
320
321 /*
322 * copy phys_install to memscrub_memlist
323 */
324 for (src = phys_install; src; src = src->ml_next) {
325 if (memscrub_add_span(
326 (pfn_t)(src->ml_address >> PAGESHIFT),
327 (pgcnt_t)(src->ml_size >> PAGESHIFT))) {
328 memscrub_cleanup();
329 return (-1);
330 }
331 }
332
333 /*
334 * initialize kstats
335 */
336 memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat",
337 "misc", KSTAT_TYPE_NAMED,
338 sizeof (memscrub_counts) / sizeof (kstat_named_t),
339 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
340
341 if (memscrub_ksp) {
342 memscrub_ksp->ks_data = (void *)&memscrub_counts;
343 kstat_install(memscrub_ksp);
344 } else {
345 cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n");
346 }
347
348 /*
349 * create memscrubber thread
350 */
351 (void) thread_create(NULL, 0, (void (*)())memscrubber,
352 NULL, 0, &p0, TS_RUN, memscrub_thread_pri);
353
354 /*
355 * We don't want call backs changing the list
356 * if there is no thread running. We do not
357 * attempt to deal with stopping/starting scrubbing
358 * on memory size changes.
359 */
360 memscrub_init_mem_config();
361 }
362
363 return (0);
364 }
365
366 static void
memscrub_cleanup(void)367 memscrub_cleanup(void)
368 {
369 memscrub_uninit_mem_config();
370 while (memscrub_memlist) {
371 (void) memscrub_delete_span(
372 (pfn_t)(memscrub_memlist->ml_address >> PAGESHIFT),
373 (pgcnt_t)(memscrub_memlist->ml_size >> PAGESHIFT));
374 }
375 if (memscrub_ksp)
376 kstat_delete(memscrub_ksp);
377 cv_destroy(&memscrub_cv);
378 mutex_destroy(&memscrub_lock);
379 }
380
381 #ifdef MEMSCRUB_DEBUG
382 static void
memscrub_printmemlist(char * title,struct memlist * listp)383 memscrub_printmemlist(char *title, struct memlist *listp)
384 {
385 struct memlist *list;
386
387 cmn_err(CE_CONT, "%s:\n", title);
388
389 for (list = listp; list; list = list->ml_next) {
390 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
391 list->ml_address, list->ml_size);
392 }
393 }
394 #endif /* MEMSCRUB_DEBUG */
395
396 /* ARGSUSED */
397 static void
memscrub_wakeup(void * c)398 memscrub_wakeup(void *c)
399 {
400 /*
401 * grab mutex to guarantee that our wakeup call
402 * arrives after we go to sleep -- so we can't sleep forever.
403 */
404 mutex_enter(&memscrub_lock);
405 cv_signal(&memscrub_cv);
406 mutex_exit(&memscrub_lock);
407 }
408
409 /*
410 * provide an interface external to the memscrubber
411 * which will force the memscrub thread to run vs.
412 * waiting for the timeout, if one is set
413 */
414 void
memscrub_run(void)415 memscrub_run(void)
416 {
417 MEMSCRUB_STAT_INC(force_run);
418 if (memscrub_tid) {
419 (void) untimeout(memscrub_tid);
420 memscrub_wakeup((void *)NULL);
421 }
422 }
423
424 /*
425 * this calculation doesn't account for the time
426 * that the actual scan consumes -- so we'd fall
427 * slightly behind schedule with this interval.
428 * It's very small.
429 */
430
431 static uint_t
compute_interval_ticks(void)432 compute_interval_ticks(void)
433 {
434 /*
435 * We use msp_safe mpp_safe below to insure somebody
436 * doesn't set memscrub_span_pages or memscrub_phys_pages
437 * to 0 on us.
438 */
439 static uint_t msp_safe, mpp_safe;
440 static uint_t interval_ticks, period_ticks;
441 msp_safe = memscrub_span_pages;
442 mpp_safe = memscrub_phys_pages;
443
444 period_ticks = memscrub_period_sec * hz;
445 interval_ticks = period_ticks;
446
447 ASSERT(mutex_owned(&memscrub_lock));
448
449 if ((msp_safe != 0) && (mpp_safe != 0)) {
450 if (memscrub_phys_pages <= msp_safe) {
451 interval_ticks = period_ticks;
452 } else {
453 interval_ticks = (period_ticks /
454 (mpp_safe / msp_safe));
455 }
456 }
457 return (interval_ticks);
458 }
459
460 void
memscrubber(void)461 memscrubber(void)
462 {
463 ms_paddr_t address, addr;
464 time_t deadline;
465 pgcnt_t pages;
466 uint_t reached_end = 1;
467 uint_t paused_message = 0;
468 uint_t interval_ticks = 0;
469 uint_t sleep_warn_printed = 0;
470 callb_cpr_t cprinfo;
471
472 /*
473 * notify CPR of our existence
474 */
475 CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
476
477 mutex_enter(&memscrub_lock);
478
479 if (memscrub_memlist == NULL) {
480 cmn_err(CE_WARN, "memscrub_memlist not initialized.");
481 goto memscrub_exit;
482 }
483
484 address = memscrub_memlist->ml_address;
485
486 deadline = gethrestime_sec() + memscrub_delay_start_sec;
487
488 for (;;) {
489 if (disable_memscrub)
490 break;
491
492 /*
493 * compute interval_ticks
494 */
495 interval_ticks = compute_interval_ticks();
496
497 /*
498 * If the calculated sleep time is zero, and pause_memscrub
499 * has been set, make sure we sleep so that another thread
500 * can acquire memscrub_lock.
501 */
502 if (interval_ticks == 0 && pause_memscrub) {
503 interval_ticks = hz;
504 }
505
506 /*
507 * And as a fail safe, under normal non-paused operation, do
508 * not allow the sleep time to be zero.
509 */
510 if (interval_ticks == 0) {
511 interval_ticks = memscrub_override_ticks;
512 if (!sleep_warn_printed) {
513 cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG);
514 sleep_warn_printed = 1;
515 }
516 }
517
518 MEMSCRUB_STAT_SET(interval_ticks, interval_ticks);
519
520 /*
521 * Did we just reach the end of memory? If we are at the
522 * end of memory, delay end of memory processing until
523 * pause_memscrub is not set.
524 */
525 if (reached_end && !pause_memscrub) {
526 time_t now = gethrestime_sec();
527
528 if (now >= deadline) {
529 MEMSCRUB_STAT_INC(done_late);
530 MEMSCRUB_STAT_NINC(late_sec, now - deadline);
531 /*
532 * past deadline, start right away
533 */
534 interval_ticks = 0;
535
536 deadline = now + memscrub_period_sec;
537 } else {
538 /*
539 * we finished ahead of schedule.
540 * wait till previous deadline before re-start.
541 */
542 interval_ticks = (deadline - now) * hz;
543 MEMSCRUB_STAT_INC(done_early);
544 MEMSCRUB_STAT_NINC(early_sec, deadline - now);
545 deadline += memscrub_period_sec;
546 }
547 reached_end = 0;
548 sleep_warn_printed = 0;
549 }
550
551 if (interval_ticks != 0) {
552 /*
553 * it is safe from our standpoint for CPR to
554 * suspend the system
555 */
556 CALLB_CPR_SAFE_BEGIN(&cprinfo);
557
558 /*
559 * hit the snooze bar
560 */
561 memscrub_tid = timeout(memscrub_wakeup, NULL,
562 interval_ticks);
563
564 /*
565 * go to sleep
566 */
567 cv_wait(&memscrub_cv, &memscrub_lock);
568
569 /*
570 * at this point, no timeout should be set
571 */
572 memscrub_tid = 0;
573
574 /*
575 * we need to goto work and will be modifying
576 * our internal state and mapping/unmapping
577 * TTEs
578 */
579 CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
580 }
581
582
583 if (memscrub_phys_pages == 0) {
584 cmn_err(CE_WARN, "Memory scrubber has 0 pages to read");
585 goto memscrub_exit;
586 }
587
588 if (!pause_memscrub) {
589 if (paused_message) {
590 paused_message = 0;
591 if (memscrub_verbose)
592 cmn_err(CE_NOTE, "Memory scrubber "
593 "resuming");
594 }
595
596 if (read_all_memscrub) {
597 if (memscrub_verbose)
598 cmn_err(CE_NOTE, "Memory scrubber "
599 "reading all memory per request");
600
601 addr = memscrub_memlist->ml_address;
602 reached_end = 0;
603 while (!reached_end) {
604 if (disable_memscrub)
605 break;
606 pages = memscrub_phys_pages;
607 reached_end = memscrub_verify_span(
608 &addr, &pages);
609 memscrub_scan(pages *
610 MEMSCRUB_BLOCKS_PER_PAGE, addr);
611 addr += ((uint64_t)pages * PAGESIZE);
612 }
613 read_all_memscrub = 0;
614 }
615
616 /*
617 * read 1 span
618 */
619 pages = memscrub_span_pages;
620
621 if (disable_memscrub)
622 break;
623
624 /*
625 * determine physical address range
626 */
627 reached_end = memscrub_verify_span(&address,
628 &pages);
629
630 memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE,
631 address);
632
633 address += ((uint64_t)pages * PAGESIZE);
634 }
635
636 if (pause_memscrub && !paused_message) {
637 paused_message = 1;
638 if (memscrub_verbose)
639 cmn_err(CE_NOTE, "Memory scrubber paused");
640 }
641 }
642
643 memscrub_exit:
644 cmn_err(CE_NOTE, "Memory scrubber exiting");
645 CALLB_CPR_EXIT(&cprinfo);
646 memscrub_cleanup();
647 thread_exit();
648 /* NOTREACHED */
649 }
650
651 /*
652 * condition address and size
653 * such that they span legal physical addresses.
654 *
655 * when appropriate, address will be rounded up to start of next
656 * struct memlist, and pages will be rounded down to the end of the
657 * memlist size.
658 *
659 * returns 1 if reached end of list, else returns 0.
660 */
661 static int
memscrub_verify_span(ms_paddr_t * addrp,pgcnt_t * pagesp)662 memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp)
663 {
664 struct memlist *mlp;
665 ms_paddr_t address = *addrp;
666 uint64_t bytes = (uint64_t)*pagesp * PAGESIZE;
667 uint64_t bytes_remaining;
668 int reached_end = 0;
669
670 ASSERT(mutex_owned(&memscrub_lock));
671
672 /*
673 * find memlist struct that contains addrp
674 * assumes memlist is sorted by ascending address.
675 */
676 for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->ml_next) {
677 /*
678 * if before this chunk, round up to beginning
679 */
680 if (address < mlp->ml_address) {
681 address = mlp->ml_address;
682 break;
683 }
684 /*
685 * if before end of chunk, then we found it
686 */
687 if (address < (mlp->ml_address + mlp->ml_size))
688 break;
689
690 /* else go to next struct memlist */
691 }
692 /*
693 * if we hit end of list, start at beginning
694 */
695 if (mlp == NULL) {
696 mlp = memscrub_memlist;
697 address = mlp->ml_address;
698 }
699
700 /*
701 * now we have legal address, and its mlp, condition bytes
702 */
703 bytes_remaining = (mlp->ml_address + mlp->ml_size) - address;
704
705 if (bytes > bytes_remaining)
706 bytes = bytes_remaining;
707
708 /*
709 * will this span take us to end of list?
710 */
711 if ((mlp->ml_next == NULL) &&
712 ((mlp->ml_address + mlp->ml_size) == (address + bytes)))
713 reached_end = 1;
714
715 /* return values */
716 *addrp = address;
717 *pagesp = bytes / PAGESIZE;
718
719 return (reached_end);
720 }
721
722 /*
723 * add a span to the memscrub list
724 * add to memscrub_phys_pages
725 */
726 int
memscrub_add_span(pfn_t pfn,pgcnt_t pages)727 memscrub_add_span(pfn_t pfn, pgcnt_t pages)
728 {
729 #ifdef MEMSCRUB_DEBUG
730 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
731 uint64_t bytes = (uint64_t)pages << PAGESHIFT;
732 #endif /* MEMSCRUB_DEBUG */
733
734 int retval;
735
736 mutex_enter(&memscrub_lock);
737
738 #ifdef MEMSCRUB_DEBUG
739 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
740 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
741 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
742 " size: 0x%llx\n", address, bytes);
743 #endif /* MEMSCRUB_DEBUG */
744
745 retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist,
746 &memscrub_phys_pages);
747
748 #ifdef MEMSCRUB_DEBUG
749 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
750 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
751 #endif /* MEMSCRUB_DEBUG */
752
753 mutex_exit(&memscrub_lock);
754
755 return (retval);
756 }
757
758 static int
memscrub_add_span_gen(pfn_t pfn,pgcnt_t pages,struct memlist ** list,uint_t * npgs)759 memscrub_add_span_gen(
760 pfn_t pfn,
761 pgcnt_t pages,
762 struct memlist **list,
763 uint_t *npgs)
764 {
765 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
766 uint64_t bytes = (uint64_t)pages << PAGESHIFT;
767 struct memlist *dst;
768 struct memlist *prev, *next;
769 int retval = 0;
770
771 /*
772 * allocate a new struct memlist
773 */
774
775 dst = (struct memlist *)
776 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
777
778 if (dst == NULL) {
779 retval = -1;
780 goto add_done;
781 }
782
783 dst->ml_address = address;
784 dst->ml_size = bytes;
785
786 /*
787 * first insert
788 */
789 if (*list == NULL) {
790 dst->ml_prev = NULL;
791 dst->ml_next = NULL;
792 *list = dst;
793
794 goto add_done;
795 }
796
797 /*
798 * insert into sorted list
799 */
800 for (prev = NULL, next = *list;
801 next != NULL;
802 prev = next, next = next->ml_next) {
803 if (address > (next->ml_address + next->ml_size))
804 continue;
805
806 /*
807 * else insert here
808 */
809
810 /*
811 * prepend to next
812 */
813 if ((address + bytes) == next->ml_address) {
814 kmem_free(dst, sizeof (struct memlist));
815
816 next->ml_address = address;
817 next->ml_size += bytes;
818
819 goto add_done;
820 }
821
822 /*
823 * append to next
824 */
825 if (address == (next->ml_address + next->ml_size)) {
826 kmem_free(dst, sizeof (struct memlist));
827
828 if (next->ml_next) {
829 /*
830 * don't overlap with next->ml_next
831 */
832 if ((address + bytes) >
833 next->ml_next->ml_address) {
834 retval = -1;
835 goto add_done;
836 }
837 /*
838 * concatenate next and next->ml_next
839 */
840 if ((address + bytes) ==
841 next->ml_next->ml_address) {
842 struct memlist *mlp = next->ml_next;
843
844 if (next == *list)
845 *list = next->ml_next;
846
847 mlp->ml_address = next->ml_address;
848 mlp->ml_size += next->ml_size;
849 mlp->ml_size += bytes;
850
851 if (next->ml_prev)
852 next->ml_prev->ml_next = mlp;
853 mlp->ml_prev = next->ml_prev;
854
855 kmem_free(next,
856 sizeof (struct memlist));
857 goto add_done;
858 }
859 }
860
861 next->ml_size += bytes;
862
863 goto add_done;
864 }
865
866 /* don't overlap with next */
867 if ((address + bytes) > next->ml_address) {
868 retval = -1;
869 kmem_free(dst, sizeof (struct memlist));
870 goto add_done;
871 }
872
873 /*
874 * insert before next
875 */
876 dst->ml_prev = prev;
877 dst->ml_next = next;
878 next->ml_prev = dst;
879 if (prev == NULL) {
880 *list = dst;
881 } else {
882 prev->ml_next = dst;
883 }
884 goto add_done;
885 } /* end for */
886
887 /*
888 * end of list, prev is valid and next is NULL
889 */
890 prev->ml_next = dst;
891 dst->ml_prev = prev;
892 dst->ml_next = NULL;
893
894 add_done:
895
896 if (retval != -1)
897 *npgs += pages;
898
899 return (retval);
900 }
901
902 /*
903 * delete a span from the memscrub list
904 * subtract from memscrub_phys_pages
905 */
906 int
memscrub_delete_span(pfn_t pfn,pgcnt_t pages)907 memscrub_delete_span(pfn_t pfn, pgcnt_t pages)
908 {
909 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
910 uint64_t bytes = (uint64_t)pages << PAGESHIFT;
911 struct memlist *dst, *next;
912 int retval = 0;
913
914 mutex_enter(&memscrub_lock);
915
916 #ifdef MEMSCRUB_DEBUG
917 memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist);
918 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
919 cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n",
920 address, bytes);
921 #endif /* MEMSCRUB_DEBUG */
922
923 /*
924 * find struct memlist containing page
925 */
926 for (next = memscrub_memlist; next != NULL; next = next->ml_next) {
927 if ((address >= next->ml_address) &&
928 (address < next->ml_address + next->ml_size))
929 break;
930 }
931
932 /*
933 * if start address not in list
934 */
935 if (next == NULL) {
936 retval = -1;
937 goto delete_done;
938 }
939
940 /*
941 * error if size goes off end of this struct memlist
942 */
943 if (address + bytes > next->ml_address + next->ml_size) {
944 retval = -1;
945 goto delete_done;
946 }
947
948 /*
949 * pages at beginning of struct memlist
950 */
951 if (address == next->ml_address) {
952 /*
953 * if start & size match, delete from list
954 */
955 if (bytes == next->ml_size) {
956 if (next == memscrub_memlist)
957 memscrub_memlist = next->ml_next;
958 if (next->ml_prev != NULL)
959 next->ml_prev->ml_next = next->ml_next;
960 if (next->ml_next != NULL)
961 next->ml_next->ml_prev = next->ml_prev;
962
963 kmem_free(next, sizeof (struct memlist));
964 } else {
965 /*
966 * increment start address by bytes
967 */
968 next->ml_address += bytes;
969 next->ml_size -= bytes;
970 }
971 goto delete_done;
972 }
973
974 /*
975 * pages at end of struct memlist
976 */
977 if (address + bytes == next->ml_address + next->ml_size) {
978 /*
979 * decrement size by bytes
980 */
981 next->ml_size -= bytes;
982 goto delete_done;
983 }
984
985 /*
986 * delete a span in the middle of the struct memlist
987 */
988 {
989 /*
990 * create a new struct memlist
991 */
992 dst = (struct memlist *)
993 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
994
995 if (dst == NULL) {
996 retval = -1;
997 goto delete_done;
998 }
999
1000 /*
1001 * existing struct memlist gets address
1002 * and size up to pfn
1003 */
1004 dst->ml_address = address + bytes;
1005 dst->ml_size =
1006 (next->ml_address + next->ml_size) - dst->ml_address;
1007 next->ml_size = address - next->ml_address;
1008
1009 /*
1010 * new struct memlist gets address starting
1011 * after pfn, until end
1012 */
1013
1014 /*
1015 * link in new memlist after old
1016 */
1017 dst->ml_next = next->ml_next;
1018 dst->ml_prev = next;
1019
1020 if (next->ml_next != NULL)
1021 next->ml_next->ml_prev = dst;
1022 next->ml_next = dst;
1023 }
1024
1025 delete_done:
1026 if (retval != -1) {
1027 memscrub_phys_pages -= pages;
1028 if (memscrub_phys_pages == 0)
1029 disable_memscrub = 1;
1030 }
1031
1032 #ifdef MEMSCRUB_DEBUG
1033 memscrub_printmemlist("memscrub_memlist After", memscrub_memlist);
1034 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
1035 #endif /* MEMSCRUB_DEBUG */
1036
1037 mutex_exit(&memscrub_lock);
1038 return (retval);
1039 }
1040
1041 static void
memscrub_scan(uint_t blks,ms_paddr_t src)1042 memscrub_scan(uint_t blks, ms_paddr_t src)
1043 {
1044 uint_t psz, bpp, pgsread;
1045 pfn_t pfn;
1046 ms_paddr_t pa;
1047 caddr_t va;
1048 on_trap_data_t otd;
1049 int scan_mmu_pagesize = 0;
1050 int retired_pages = 0;
1051
1052 extern void memscrub_read(caddr_t src, uint_t blks);
1053
1054 ASSERT(mutex_owned(&memscrub_lock));
1055
1056 pgsread = 0;
1057 pa = src;
1058
1059 if (memscrub_page_retire_span_list != NULL) {
1060 if (memscrub_page_retire_span_search(src)) {
1061 /* retired pages in current span */
1062 scan_mmu_pagesize = 1;
1063 }
1064 }
1065
1066 #ifdef MEMSCRUB_DEBUG
1067 cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize);
1068 #endif /* MEMSCRUB_DEBUG */
1069
1070 while (blks != 0) {
1071 /* Ensure the PA is properly aligned */
1072 if (((pa & MMU_PAGEMASK4M) == pa) &&
1073 (blks >= MEMSCRUB_BPP4M)) {
1074 psz = MMU_PAGESIZE4M;
1075 bpp = MEMSCRUB_BPP4M;
1076 } else if (((pa & MMU_PAGEMASK512K) == pa) &&
1077 (blks >= MEMSCRUB_BPP512K)) {
1078 psz = MMU_PAGESIZE512K;
1079 bpp = MEMSCRUB_BPP512K;
1080 } else if (((pa & MMU_PAGEMASK64K) == pa) &&
1081 (blks >= MEMSCRUB_BPP64K)) {
1082 psz = MMU_PAGESIZE64K;
1083 bpp = MEMSCRUB_BPP64K;
1084 } else if ((pa & MMU_PAGEMASK) == pa) {
1085 psz = MMU_PAGESIZE;
1086 bpp = MEMSCRUB_BPP;
1087 } else {
1088 if (memscrub_verbose) {
1089 cmn_err(CE_NOTE, "Memory scrubber ignoring "
1090 "non-page aligned block starting at 0x%"
1091 PRIx64, src);
1092 }
1093 return;
1094 }
1095 if (blks < bpp) bpp = blks;
1096
1097 #ifdef MEMSCRUB_DEBUG
1098 cmn_err(CE_NOTE, "Going to run psz=%x, "
1099 "bpp=%x pa=%llx\n", psz, bpp, pa);
1100 #endif /* MEMSCRUB_DEBUG */
1101
1102 /*
1103 * MEMSCRUBBASE is a 4MB aligned page in the
1104 * kernel so that we can quickly map the PA
1105 * to a VA for the block loads performed in
1106 * memscrub_read.
1107 */
1108 pfn = mmu_btop(pa);
1109 va = (caddr_t)MEMSCRUBBASE;
1110 hat_devload(kas.a_hat, va, psz, pfn, PROT_READ,
1111 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1112
1113 /*
1114 * Can't allow the memscrubber to migrate across CPUs as
1115 * we need to know whether CEEN is enabled for the current
1116 * CPU to enable us to scrub the memory. Don't use
1117 * kpreempt_disable as the time we take to scan a span (even
1118 * without cpu_check_ce having to manually cpu_check_block)
1119 * is too long to hold a higher priority thread (eg, RT)
1120 * off cpu.
1121 */
1122 thread_affinity_set(curthread, CPU_CURRENT);
1123
1124 /*
1125 * Protect read scrub from async faults. For now, we simply
1126 * maintain a count of such faults caught.
1127 */
1128
1129 if (!on_trap(&otd, OT_DATA_EC) && !scan_mmu_pagesize) {
1130 memscrub_read(va, bpp);
1131 /*
1132 * Check if CEs require logging
1133 */
1134 cpu_check_ce(SCRUBBER_CEEN_CHECK,
1135 (uint64_t)pa, va, psz);
1136 no_trap();
1137 thread_affinity_clear(curthread);
1138 } else {
1139 no_trap();
1140 thread_affinity_clear(curthread);
1141
1142 /*
1143 * Got an async error..
1144 * Try rescanning it at MMU_PAGESIZE
1145 * granularity if we were trying to
1146 * read at a larger page size.
1147 * This is to ensure we continue to
1148 * scan the rest of the span.
1149 * OR scanning MMU_PAGESIZE granularity to avoid
1150 * reading retired pages memory when scan_mmu_pagesize
1151 * is set.
1152 */
1153 if (psz > MMU_PAGESIZE || scan_mmu_pagesize) {
1154 caddr_t vaddr = va;
1155 ms_paddr_t paddr = pa;
1156 int tmp = 0;
1157 for (; tmp < bpp; tmp += MEMSCRUB_BPP) {
1158 /* Don't scrub retired pages */
1159 if (page_retire_check(paddr, NULL)
1160 == 0) {
1161 vaddr += MMU_PAGESIZE;
1162 paddr += MMU_PAGESIZE;
1163 retired_pages++;
1164 continue;
1165 }
1166 thread_affinity_set(curthread,
1167 CPU_CURRENT);
1168 if (!on_trap(&otd, OT_DATA_EC)) {
1169 memscrub_read(vaddr,
1170 MEMSCRUB_BPP);
1171 cpu_check_ce(
1172 SCRUBBER_CEEN_CHECK,
1173 (uint64_t)paddr, vaddr,
1174 MMU_PAGESIZE);
1175 no_trap();
1176 } else {
1177 no_trap();
1178 MEMSCRUB_STAT_INC(errors_found);
1179 }
1180 thread_affinity_clear(curthread);
1181 vaddr += MMU_PAGESIZE;
1182 paddr += MMU_PAGESIZE;
1183 }
1184 }
1185 }
1186 hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK);
1187
1188 blks -= bpp;
1189 pa += psz;
1190 pgsread++;
1191 }
1192
1193 /*
1194 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired
1195 * pages found so delete span from global list.
1196 */
1197 if (scan_mmu_pagesize && retired_pages == 0)
1198 memscrub_page_retire_span_delete(src);
1199
1200 /*
1201 * Encountered CE/UE on a retired page during memscrub read of current
1202 * span. Adding span to global list to enable avoid reading further.
1203 */
1204 if (add_to_page_retire_list) {
1205 if (!memscrub_page_retire_span_search(src))
1206 memscrub_page_retire_span_add(src);
1207 add_to_page_retire_list = 0;
1208 }
1209
1210 if (memscrub_verbose) {
1211 cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting "
1212 "at 0x%" PRIx64, pgsread, src);
1213 }
1214 }
1215
1216 /*
1217 * Called by cpu_async_log_err() when memscrub read causes
1218 * CE/UE on a retired page.
1219 */
1220 void
memscrub_induced_error(void)1221 memscrub_induced_error(void)
1222 {
1223 add_to_page_retire_list = 1;
1224 }
1225
1226 /*
1227 * Called by page_retire() when toxic pages cannot be retired
1228 * immediately and are scheduled for retire. Memscrubber stops
1229 * scrubbing them to avoid further CE/UEs.
1230 */
1231 void
memscrub_notify(ms_paddr_t pa)1232 memscrub_notify(ms_paddr_t pa)
1233 {
1234 mutex_enter(&memscrub_lock);
1235 if (!memscrub_page_retire_span_search(pa))
1236 memscrub_page_retire_span_add(pa);
1237 mutex_exit(&memscrub_lock);
1238 }
1239
1240 /*
1241 * Called by memscrub_scan() and memscrub_notify().
1242 * pa: physical address of span with CE/UE, add to global list.
1243 */
1244 static void
memscrub_page_retire_span_add(ms_paddr_t pa)1245 memscrub_page_retire_span_add(ms_paddr_t pa)
1246 {
1247 memscrub_page_retire_span_t *new_span;
1248
1249 new_span = (memscrub_page_retire_span_t *)
1250 kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP);
1251
1252 if (new_span == NULL) {
1253 #ifdef MEMSCRUB_DEBUG
1254 cmn_err(CE_NOTE, "failed to allocate new span - span with"
1255 " retired page/s not tracked.\n");
1256 #endif /* MEMSCRUB_DEBUG */
1257 return;
1258 }
1259
1260 new_span->address = pa;
1261 new_span->next = memscrub_page_retire_span_list;
1262 memscrub_page_retire_span_list = new_span;
1263 }
1264
1265 /*
1266 * Called by memscrub_scan().
1267 * pa: physical address of span to be removed from global list.
1268 */
1269 static void
memscrub_page_retire_span_delete(ms_paddr_t pa)1270 memscrub_page_retire_span_delete(ms_paddr_t pa)
1271 {
1272 memscrub_page_retire_span_t *prev_span, *next_span;
1273
1274 prev_span = memscrub_page_retire_span_list;
1275 next_span = memscrub_page_retire_span_list->next;
1276
1277 if (pa == prev_span->address) {
1278 memscrub_page_retire_span_list = next_span;
1279 kmem_free(prev_span, sizeof (memscrub_page_retire_span_t));
1280 return;
1281 }
1282
1283 while (next_span) {
1284 if (pa == next_span->address) {
1285 prev_span->next = next_span->next;
1286 kmem_free(next_span,
1287 sizeof (memscrub_page_retire_span_t));
1288 return;
1289 }
1290 prev_span = next_span;
1291 next_span = next_span->next;
1292 }
1293 }
1294
1295 /*
1296 * Called by memscrub_scan() and memscrub_notify().
1297 * pa: physical address of span to be searched in global list.
1298 */
1299 static int
memscrub_page_retire_span_search(ms_paddr_t pa)1300 memscrub_page_retire_span_search(ms_paddr_t pa)
1301 {
1302 memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list;
1303
1304 while (next_span) {
1305 if (pa == next_span->address)
1306 return (1);
1307 next_span = next_span->next;
1308 }
1309 return (0);
1310 }
1311
1312 /*
1313 * Called from new_memscrub() as a result of memory delete.
1314 * Using page_numtopp_nolock() to determine if we have valid PA.
1315 */
1316 static void
memscrub_page_retire_span_list_update(void)1317 memscrub_page_retire_span_list_update(void)
1318 {
1319 memscrub_page_retire_span_t *prev, *cur, *next;
1320
1321 if (memscrub_page_retire_span_list == NULL)
1322 return;
1323
1324 prev = cur = memscrub_page_retire_span_list;
1325 next = cur->next;
1326
1327 while (cur) {
1328 if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) {
1329 if (cur == memscrub_page_retire_span_list) {
1330 memscrub_page_retire_span_list = next;
1331 kmem_free(cur,
1332 sizeof (memscrub_page_retire_span_t));
1333 prev = cur = memscrub_page_retire_span_list;
1334 } else {
1335 prev->next = cur->next;
1336 kmem_free(cur,
1337 sizeof (memscrub_page_retire_span_t));
1338 cur = next;
1339 }
1340 } else {
1341 prev = cur;
1342 cur = next;
1343 }
1344 if (cur != NULL)
1345 next = cur->next;
1346 }
1347 }
1348
1349 /*
1350 * The memory add/delete callback mechanism does not pass in the
1351 * page ranges. The phys_install list has been updated though, so
1352 * create a new scrub list from it.
1353 */
1354
1355 static int
new_memscrub(int update_page_retire_list)1356 new_memscrub(int update_page_retire_list)
1357 {
1358 struct memlist *src, *list, *old_list;
1359 uint_t npgs;
1360
1361 /*
1362 * copy phys_install to memscrub_memlist
1363 */
1364 list = NULL;
1365 npgs = 0;
1366 memlist_read_lock();
1367 for (src = phys_install; src; src = src->ml_next) {
1368 if (memscrub_add_span_gen((pfn_t)(src->ml_address >> PAGESHIFT),
1369 (pgcnt_t)(src->ml_size >> PAGESHIFT), &list, &npgs)) {
1370 memlist_read_unlock();
1371 while (list) {
1372 struct memlist *el;
1373
1374 el = list;
1375 list = list->ml_next;
1376 kmem_free(el, sizeof (struct memlist));
1377 }
1378 return (-1);
1379 }
1380 }
1381 memlist_read_unlock();
1382
1383 mutex_enter(&memscrub_lock);
1384 memscrub_phys_pages = npgs;
1385 old_list = memscrub_memlist;
1386 memscrub_memlist = list;
1387
1388 if (update_page_retire_list)
1389 memscrub_page_retire_span_list_update();
1390
1391 mutex_exit(&memscrub_lock);
1392
1393 while (old_list) {
1394 struct memlist *el;
1395
1396 el = old_list;
1397 old_list = old_list->ml_next;
1398 kmem_free(el, sizeof (struct memlist));
1399 }
1400
1401 return (0);
1402 }
1403
1404 /*ARGSUSED*/
1405 static void
memscrub_mem_config_post_add(void * arg,pgcnt_t delta_pages)1406 memscrub_mem_config_post_add(
1407 void *arg,
1408 pgcnt_t delta_pages)
1409 {
1410 /*
1411 * We increment pause_memscrub before entering new_memscrub(). This
1412 * will force the memscrubber to sleep, allowing the DR callback
1413 * thread to acquire memscrub_lock in new_memscrub(). The use of
1414 * atomic_add_32() allows concurrent memory DR operations to use the
1415 * callbacks safely.
1416 */
1417 atomic_inc_32(&pause_memscrub);
1418 ASSERT(pause_memscrub != 0);
1419
1420 /*
1421 * "Don't care" if we are not scrubbing new memory.
1422 */
1423 (void) new_memscrub(0); /* retain page retire list */
1424
1425 /* Restore the pause setting. */
1426 atomic_dec_32(&pause_memscrub);
1427 }
1428
1429 /*ARGSUSED*/
1430 static int
memscrub_mem_config_pre_del(void * arg,pgcnt_t delta_pages)1431 memscrub_mem_config_pre_del(
1432 void *arg,
1433 pgcnt_t delta_pages)
1434 {
1435 /* Nothing to do. */
1436 return (0);
1437 }
1438
1439 /*ARGSUSED*/
1440 static void
memscrub_mem_config_post_del(void * arg,pgcnt_t delta_pages,int cancelled)1441 memscrub_mem_config_post_del(
1442 void *arg,
1443 pgcnt_t delta_pages,
1444 int cancelled)
1445 {
1446 /*
1447 * We increment pause_memscrub before entering new_memscrub(). This
1448 * will force the memscrubber to sleep, allowing the DR callback
1449 * thread to acquire memscrub_lock in new_memscrub(). The use of
1450 * atomic_add_32() allows concurrent memory DR operations to use the
1451 * callbacks safely.
1452 */
1453 atomic_inc_32(&pause_memscrub);
1454 ASSERT(pause_memscrub != 0);
1455
1456 /*
1457 * Must stop scrubbing deleted memory as it may be disconnected.
1458 */
1459 if (new_memscrub(1)) { /* update page retire list */
1460 disable_memscrub = 1;
1461 }
1462
1463 /* Restore the pause setting. */
1464 atomic_dec_32(&pause_memscrub);
1465 }
1466
1467 static kphysm_setup_vector_t memscrub_mem_config_vec = {
1468 KPHYSM_SETUP_VECTOR_VERSION,
1469 memscrub_mem_config_post_add,
1470 memscrub_mem_config_pre_del,
1471 memscrub_mem_config_post_del,
1472 };
1473
1474 static void
memscrub_init_mem_config()1475 memscrub_init_mem_config()
1476 {
1477 int ret;
1478
1479 ret = kphysm_setup_func_register(&memscrub_mem_config_vec,
1480 (void *)NULL);
1481 ASSERT(ret == 0);
1482 }
1483
1484 static void
memscrub_uninit_mem_config()1485 memscrub_uninit_mem_config()
1486 {
1487 /* This call is OK if the register call was not done. */
1488 kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL);
1489 }
1490