xref: /titanic_41/usr/src/uts/i86pc/os/memscrub.c (revision 200c5a5a428f15c16e2a526ed69d462af62e8e1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * i86pc Memory Scrubbing
31  *
32  * On detection of a correctable memory ECC error, the i86pc hardware
33  * returns the corrected data to the requester and may re-write it
34  * to memory (DRAM or NVRAM). Machines which do not re-write this to
35  * memory should add an NMI handler to correct and rewrite.
36  *
37  * Scrubbing thus reduces the likelyhood that multiple transient errors
38  * will occur in the same memory word, making uncorrectable errors due
39  * to transients less likely.
40  *
41  * Thus is born the desire that every memory location be periodically
42  * accessed.
43  *
44  * This file implements a memory scrubbing thread.  This scrubber
45  * guarantees that all of physical memory is accessed periodically
46  * (memscrub_period_sec -- 12 hours).
47  *
48  * It attempts to do this as unobtrusively as possible.  The thread
49  * schedules itself to wake up at an interval such that if it reads
50  * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
51  * memory in in memscrub_period_sec (12 hours).
52  *
53  * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
54  * When it completes a span, if all the CPUs are idle, it reads another span.
55  * Typically it soaks up idle time this way to reach its deadline early
56  * -- and sleeps until the next period begins.
57  *
58  * Maximal Cost Estimate:  8GB @ xxMB/s = xxx seconds spent in 640 wakeups
59  * that run for 0.15 seconds at intervals of 67 seconds.
60  *
61  * In practice, the scrubber finds enough idle time to finish in a few
62  * minutes, and sleeps until its 12 hour deadline.
63  *
64  * The scrubber maintains a private copy of the phys_install memory list
65  * to keep track of what memory should be scrubbed.
66  *
67  * The following parameters can be set via /etc/system
68  *
69  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
70  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
71  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
72  * memscrub_delay_start_sec = (10 seconds)
73  * disable_memscrub = (0)
74  *
75  * the scrubber will exit (or never be started) if it finds the variable
76  * "disable_memscrub" set.
77  *
78  * MEMSCRUB_DFL_SPAN_PAGES  is based on the guess that 0.15 sec
79  * is a "good" amount of minimum time for the thread to run at a time.
80  *
81  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
82  * twice the frequency the hardware folk estimated would be necessary.
83  *
84  * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
85  * any other use of the system should be higher priority than scrubbing.
86  */
87 
88 #include <sys/types.h>
89 #include <sys/systm.h>		/* timeout, types, t_lock */
90 #include <sys/cmn_err.h>
91 #include <sys/sysmacros.h>	/* MIN */
92 #include <sys/memlist.h>	/* memlist */
93 #include <sys/kmem.h>		/* KMEM_NOSLEEP */
94 #include <sys/cpuvar.h>		/* ncpus_online */
95 #include <sys/debug.h>		/* ASSERTs */
96 #include <sys/vmem.h>
97 #include <sys/mman.h>
98 #include <vm/seg_kmem.h>
99 #include <vm/seg_kpm.h>
100 #include <vm/hat_i86.h>
101 
102 static caddr_t	memscrub_window;
103 static hat_mempte_t memscrub_pte;
104 
105 /*
106  * Global Data:
107  */
108 /*
109  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
110  */
111 #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
112 
113 /*
114  * start only if at least MEMSCRUB_MIN_PAGES in system
115  */
116 #define	MEMSCRUB_MIN_PAGES	((32 * 1024 * 1024) / PAGESIZE)
117 
118 /*
119  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
120  */
121 #define	MEMSCRUB_DFL_SPAN_PAGES	((4 * 1024 * 1024) / PAGESIZE)
122 
123 /*
124  * almost anything is higher priority than scrubbing
125  */
126 #define	MEMSCRUB_DFL_THREAD_PRI	0
127 
128 /*
129  * we can patch these defaults in /etc/system if necessary
130  */
131 uint_t disable_memscrub = 0;
132 static uint_t disable_memscrub_quietly = 0;
133 pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES;
134 pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
135 time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
136 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
137 time_t memscrub_delay_start_sec = 10;
138 
139 /*
140  * Static Routines
141  */
142 static void memscrubber(void);
143 static int system_is_idle(void);
144 static int memscrub_add_span(uint64_t, uint64_t);
145 
146 /*
147  * Static Data
148  */
149 static struct memlist *memscrub_memlist;
150 static uint_t memscrub_phys_pages;
151 
152 static kcondvar_t memscrub_cv;
153 static kmutex_t memscrub_lock;
154 
155 /*
156  * memscrub_lock protects memscrub_memlist
157  */
158 uint_t memscrub_scans_done;
159 
160 uint_t memscrub_done_early;
161 uint_t memscrub_early_sec;
162 
163 uint_t memscrub_done_late;
164 time_t memscrub_late_sec;
165 
166 /*
167  * create memscrub_memlist from phys_install list
168  * initialize locks, set memscrub_phys_pages.
169  */
170 void
171 memscrub_init()
172 {
173 	struct memlist *src;
174 
175 	if (physmem < memscrub_min_pages)
176 		return;
177 
178 	if (!kpm_enable) {
179 		memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
180 		memscrub_pte = hat_mempte_setup(memscrub_window);
181 	}
182 
183 	/*
184 	 * copy phys_install to memscrub_memlist
185 	 */
186 	for (src = phys_install; src; src = src->next) {
187 		if (memscrub_add_span(src->address, src->size)) {
188 			cmn_err(CE_WARN,
189 			    "Memory scrubber failed to initialize\n");
190 			return;
191 		}
192 	}
193 
194 	mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
195 	cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
196 
197 	/*
198 	 * create memscrubber thread
199 	 */
200 	(void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0,
201 	    TS_RUN, memscrub_thread_pri);
202 }
203 
204 /*
205  * Function to cause the software memscrubber to exit quietly if the
206  * platform support has located a hardware scrubber and enabled it.
207  */
208 void
209 memscrub_disable(void)
210 {
211 	disable_memscrub_quietly = 1;
212 }
213 
214 #ifdef MEMSCRUB_DEBUG
215 void
216 memscrub_printmemlist(char *title, struct memlist *listp)
217 {
218 	struct memlist *list;
219 
220 	cmn_err(CE_CONT, "%s:\n", title);
221 
222 	for (list = listp; list; list = list->next) {
223 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
224 		    list->address, list->size);
225 	}
226 }
227 #endif /* MEMSCRUB_DEBUG */
228 
229 /* ARGSUSED */
230 void
231 memscrub_wakeup(void *c)
232 {
233 	/*
234 	 * grab mutex to guarantee that our wakeup call
235 	 * arrives after we go to sleep -- so we can't sleep forever.
236 	 */
237 	mutex_enter(&memscrub_lock);
238 	cv_signal(&memscrub_cv);
239 	mutex_exit(&memscrub_lock);
240 }
241 
242 /*
243  * this calculation doesn't account for the time that the actual scan
244  * consumes -- so we'd fall slightly behind schedule with this
245  * interval_sec.  but the idle loop optimization below usually makes us
246  * come in way ahead of schedule.
247  */
248 static int
249 compute_interval_sec()
250 {
251 	if (memscrub_phys_pages <= memscrub_span_pages)
252 		return (memscrub_period_sec);
253 	else
254 		return (memscrub_period_sec/
255 			(memscrub_phys_pages/memscrub_span_pages));
256 }
257 
258 void
259 memscrubber()
260 {
261 	time_t deadline;
262 	uint64_t mlp_last_addr;
263 	uint64_t mlp_next_addr;
264 	int reached_end = 1;
265 	time_t interval_sec = 0;
266 	struct memlist *mlp;
267 
268 	extern void scan_memory(caddr_t, size_t);
269 
270 	if (memscrub_memlist == NULL) {
271 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
272 		goto memscrub_exit;
273 	}
274 
275 	mlp = memscrub_memlist;
276 	mlp_next_addr = mlp->address;
277 	mlp_last_addr = mlp->address + mlp->size;
278 
279 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
280 
281 	for (;;) {
282 		if (disable_memscrub || disable_memscrub_quietly)
283 			break;
284 
285 		mutex_enter(&memscrub_lock);
286 
287 		/*
288 		 * did we just reach the end of memory?
289 		 */
290 		if (reached_end) {
291 			time_t now = gethrestime_sec();
292 
293 			if (now >= deadline) {
294 				memscrub_done_late++;
295 				memscrub_late_sec += (now - deadline);
296 				/*
297 				 * past deadline, start right away
298 				 */
299 				interval_sec = 0;
300 
301 				deadline = now + memscrub_period_sec;
302 			} else {
303 				/*
304 				 * we finished ahead of schedule.
305 				 * wait till previous dealine before re-start.
306 				 */
307 				interval_sec = deadline - now;
308 				memscrub_done_early++;
309 				memscrub_early_sec += interval_sec;
310 				deadline += memscrub_period_sec;
311 			}
312 		} else {
313 			interval_sec = compute_interval_sec();
314 		}
315 
316 		/*
317 		 * hit the snooze bar
318 		 */
319 		(void) timeout(memscrub_wakeup, NULL, interval_sec * hz);
320 
321 		/*
322 		 * go to sleep
323 		 */
324 		cv_wait(&memscrub_cv, &memscrub_lock);
325 
326 		mutex_exit(&memscrub_lock);
327 
328 		do {
329 			pgcnt_t pages = memscrub_span_pages;
330 			uint64_t address = mlp_next_addr;
331 
332 			if (disable_memscrub || disable_memscrub_quietly)
333 				break;
334 
335 			mutex_enter(&memscrub_lock);
336 
337 			/*
338 			 * Make sure we don't try to scan beyond the end of
339 			 * the current memlist.  If we would, then resize
340 			 * our scan target for this iteration, and prepare
341 			 * to read the next memlist entry on the next
342 			 * iteration.
343 			 */
344 			reached_end = 0;
345 			if (address + mmu_ptob(pages) >= mlp_last_addr) {
346 				pages = mmu_btop(mlp_last_addr - address);
347 				mlp = mlp->next;
348 				if (mlp == NULL) {
349 					reached_end = 1;
350 					mlp = memscrub_memlist;
351 				}
352 				mlp_next_addr = mlp->address;
353 				mlp_last_addr = mlp->address + mlp->size;
354 			} else {
355 				mlp_next_addr += mmu_ptob(pages);
356 			}
357 
358 			mutex_exit(&memscrub_lock);
359 
360 			while (pages--) {
361 				pfn_t pfn = btop(address);
362 
363 				/*
364 				 * Without segkpm, the memscrubber cannot
365 				 * be allowed to migrate across CPUs, as
366 				 * the CPU-specific mapping of
367 				 * memscrub_window would be incorrect.
368 				 * With segkpm, switching CPUs is legal, but
369 				 * inefficient.  We don't use
370 				 * kpreempt_disable as it might hold a
371 				 * higher priority thread (eg, RT) too long
372 				 * off CPU.
373 				 */
374 				thread_affinity_set(curthread, CPU_CURRENT);
375 				if (kpm_enable)
376 					memscrub_window = hat_kpm_pfn2va(pfn);
377 				else
378 					hat_mempte_remap(pfn, memscrub_window,
379 					    memscrub_pte,
380 					    PROT_READ, HAT_LOAD_NOCONSIST);
381 
382 				scan_memory(memscrub_window, PAGESIZE);
383 
384 				thread_affinity_clear(curthread);
385 				address += MMU_PAGESIZE;
386 			}
387 
388 			memscrub_scans_done++;
389 		} while (!reached_end && system_is_idle());
390 	}
391 
392 memscrub_exit:
393 
394 	if (!disable_memscrub_quietly)
395 		cmn_err(CE_NOTE, "memory scrubber exiting.");
396 
397 	cv_destroy(&memscrub_cv);
398 
399 	thread_exit();
400 }
401 
402 
403 /*
404  * return 1 if we're MP and all the other CPUs are idle
405  */
406 static int
407 system_is_idle()
408 {
409 	int cpu_id;
410 	int found = 0;
411 
412 	if (1 == ncpus_online)
413 		return (0);
414 
415 	for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) {
416 		if (!cpu[cpu_id])
417 			continue;
418 
419 		found++;
420 
421 		if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) {
422 			if (CPU->cpu_id == cpu_id &&
423 			    CPU->cpu_disp->disp_nrunnable == 0)
424 				continue;
425 			return (0);
426 		}
427 
428 		if (found == ncpus)
429 			break;
430 	}
431 	return (1);
432 }
433 
434 /*
435  * add a span to the memscrub list
436  */
437 static int
438 memscrub_add_span(uint64_t start, uint64_t bytes)
439 {
440 	struct memlist *dst;
441 	struct memlist *prev, *next;
442 	uint64_t end = start + bytes - 1;
443 	int retval = 0;
444 
445 	mutex_enter(&memscrub_lock);
446 
447 #ifdef MEMSCRUB_DEBUG
448 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
449 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
450 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
451 		" size: 0x%llx\n", start, bytes);
452 #endif /* MEMSCRUB_DEBUG */
453 
454 	/*
455 	 * Scan through the list to find the proper place to install it.
456 	 */
457 	prev = NULL;
458 	next = memscrub_memlist;
459 	while (next) {
460 		uint64_t ns = next->address;
461 		uint64_t ne = next->address + next->size - 1;
462 
463 		/*
464 		 * If this span overlaps with an existing span, then
465 		 * something has gone horribly wrong with the phys_install
466 		 * list.  In fact, I'm surprised we made it this far.
467 		 */
468 		if ((start >= ns && start <= ne) || (end >= ns && end <= ne) ||
469 		    (start < ns && end > ne))
470 			panic("memscrub found overlapping memory ranges "
471 			    "(0x%p-0x%p) and (0x%p-0x%p)",
472 			    (void *)(uintptr_t)start, (void *)(uintptr_t)end,
473 			    (void *)(uintptr_t)ns, (void *)(uintptr_t)ne);
474 
475 		/*
476 		 * New span can be appended to an existing one.
477 		 */
478 		if (start == ne + 1) {
479 			next->size += bytes;
480 			goto add_done;
481 		}
482 
483 		/*
484 		 * New span can be prepended to an existing one.
485 		 */
486 		if (end + 1 == ns) {
487 			next->size += bytes;
488 			next->address = start;
489 			goto add_done;
490 		}
491 
492 		/*
493 		 * If the next span has a higher start address than the new
494 		 * one, then we have found the right spot for our
495 		 * insertion.
496 		 */
497 		if (ns > start)
498 			break;
499 
500 		prev = next;
501 		next = next->next;
502 	}
503 
504 	/*
505 	 * allocate a new struct memlist
506 	 */
507 	dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
508 	if (dst == NULL) {
509 		retval = -1;
510 		goto add_done;
511 	}
512 	dst->address = start;
513 	dst->size = bytes;
514 	dst->prev = prev;
515 	dst->next = next;
516 
517 	if (prev)
518 		prev->next = dst;
519 	else
520 		memscrub_memlist = dst;
521 
522 	if (next)
523 		next->prev = dst;
524 
525 add_done:
526 
527 	if (retval != -1)
528 		memscrub_phys_pages += mmu_btop(bytes);
529 
530 #ifdef MEMSCRUB_DEBUG
531 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
532 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
533 #endif /* MEMSCRUB_DEBUG */
534 
535 	mutex_exit(&memscrub_lock);
536 	return (retval);
537 }
538