xref: /titanic_52/usr/src/uts/i86pc/os/memscrub.c (revision 4d0eb50e691de4c20b1dd9976ad6839fede8a42d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * i86pc Memory Scrubbing
29  *
30  * On detection of a correctable memory ECC error, the i86pc hardware
31  * returns the corrected data to the requester and may re-write it
32  * to memory (DRAM or NVRAM). Machines which do not re-write this to
33  * memory should add an NMI handler to correct and rewrite.
34  *
35  * Scrubbing thus reduces the likelyhood that multiple transient errors
36  * will occur in the same memory word, making uncorrectable errors due
37  * to transients less likely.
38  *
39  * Thus is born the desire that every memory location be periodically
40  * accessed.
41  *
42  * This file implements a memory scrubbing thread.  This scrubber
43  * guarantees that all of physical memory is accessed periodically
44  * (memscrub_period_sec -- 12 hours).
45  *
46  * It attempts to do this as unobtrusively as possible.  The thread
47  * schedules itself to wake up at an interval such that if it reads
48  * memscrub_span_pages (4MB) on each wakeup, it will read all of physical
49  * memory in in memscrub_period_sec (12 hours).
50  *
51  * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200).
52  * When it completes a span, if all the CPUs are idle, it reads another span.
53  * Typically it soaks up idle time this way to reach its deadline early
54  * -- and sleeps until the next period begins.
55  *
56  * Maximal Cost Estimate:  8GB @ xxMB/s = xxx seconds spent in 640 wakeups
57  * that run for 0.15 seconds at intervals of 67 seconds.
58  *
59  * In practice, the scrubber finds enough idle time to finish in a few
60  * minutes, and sleeps until its 12 hour deadline.
61  *
62  * The scrubber maintains a private copy of the phys_install memory list
63  * to keep track of what memory should be scrubbed.
64  *
65  * The following parameters can be set via /etc/system
66  *
67  * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB)
68  * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
69  * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0)
70  * memscrub_delay_start_sec = (10 seconds)
71  * disable_memscrub = (0)
72  *
73  * the scrubber will exit (or never be started) if it finds the variable
74  * "disable_memscrub" set.
75  *
76  * MEMSCRUB_DFL_SPAN_PAGES  is based on the guess that 0.15 sec
77  * is a "good" amount of minimum time for the thread to run at a time.
78  *
79  * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
80  * twice the frequency the hardware folk estimated would be necessary.
81  *
82  * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly
83  * any other use of the system should be higher priority than scrubbing.
84  */
85 
86 #include <sys/types.h>
87 #include <sys/systm.h>		/* timeout, types, t_lock */
88 #include <sys/cmn_err.h>
89 #include <sys/sysmacros.h>	/* MIN */
90 #include <sys/memlist.h>	/* memlist */
91 #include <sys/kmem.h>		/* KMEM_NOSLEEP */
92 #include <sys/cpuvar.h>		/* ncpus_online */
93 #include <sys/debug.h>		/* ASSERTs */
94 #include <sys/vmem.h>
95 #include <sys/mman.h>
96 #include <vm/seg_kmem.h>
97 #include <vm/seg_kpm.h>
98 #include <vm/hat_i86.h>
99 #include <sys/callb.h>		/* CPR callback */
100 
101 static caddr_t	memscrub_window;
102 static hat_mempte_t memscrub_pte;
103 
104 /*
105  * Global Data:
106  */
107 /*
108  * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
109  */
110 #define	MEMSCRUB_DFL_PERIOD_SEC	(12 * 60 * 60)	/* 12 hours */
111 
112 /*
113  * start only if at least MEMSCRUB_MIN_PAGES in system
114  */
115 #define	MEMSCRUB_MIN_PAGES	((32 * 1024 * 1024) / PAGESIZE)
116 
117 /*
118  * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
119  */
120 #define	MEMSCRUB_DFL_SPAN_PAGES	((4 * 1024 * 1024) / PAGESIZE)
121 
122 /*
123  * almost anything is higher priority than scrubbing
124  */
125 #define	MEMSCRUB_DFL_THREAD_PRI	0
126 
127 /*
128  * we can patch these defaults in /etc/system if necessary
129  */
130 uint_t disable_memscrub = 0;
131 static uint_t disable_memscrub_quietly = 0;
132 pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES;
133 pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
134 time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
135 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
136 time_t memscrub_delay_start_sec = 10;
137 
138 /*
139  * Static Routines
140  */
141 static void memscrubber(void);
142 static int system_is_idle(void);
143 static int memscrub_add_span(uint64_t, uint64_t);
144 
145 /*
146  * Static Data
147  */
148 static struct memlist *memscrub_memlist;
149 static uint_t memscrub_phys_pages;
150 
151 static kcondvar_t memscrub_cv;
152 static kmutex_t memscrub_lock;
153 
154 /*
155  * memscrub_lock protects memscrub_memlist
156  */
157 uint_t memscrub_scans_done;
158 
159 uint_t memscrub_done_early;
160 uint_t memscrub_early_sec;
161 
162 uint_t memscrub_done_late;
163 time_t memscrub_late_sec;
164 
165 /*
166  * create memscrub_memlist from phys_install list
167  * initialize locks, set memscrub_phys_pages.
168  */
169 void
170 memscrub_init()
171 {
172 	struct memlist *src;
173 
174 	if (physmem < memscrub_min_pages)
175 		return;
176 
177 	if (!kpm_enable) {
178 		memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
179 		memscrub_pte = hat_mempte_setup(memscrub_window);
180 	}
181 
182 	/*
183 	 * copy phys_install to memscrub_memlist
184 	 */
185 	for (src = phys_install; src; src = src->ml_next) {
186 		if (memscrub_add_span(src->ml_address, src->ml_size)) {
187 			cmn_err(CE_WARN,
188 			    "Software memory scrubber failed to initialize\n");
189 			return;
190 		}
191 	}
192 
193 	mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
194 	cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
195 
196 	/*
197 	 * create memscrubber thread
198 	 */
199 	(void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0,
200 	    TS_RUN, memscrub_thread_pri);
201 }
202 
203 /*
204  * Function to cause the software memscrubber to exit quietly if the
205  * platform support has located a hardware scrubber and enabled it.
206  */
207 void
208 memscrub_disable(void)
209 {
210 	disable_memscrub_quietly = 1;
211 }
212 
213 #ifdef MEMSCRUB_DEBUG
214 static void
215 memscrub_printmemlist(char *title, struct memlist *listp)
216 {
217 	struct memlist *list;
218 
219 	cmn_err(CE_CONT, "%s:\n", title);
220 
221 	for (list = listp; list; list = list->next) {
222 		cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
223 		    list->address, list->size);
224 	}
225 }
226 #endif /* MEMSCRUB_DEBUG */
227 
228 /* ARGSUSED */
229 static void
230 memscrub_wakeup(void *c)
231 {
232 	/*
233 	 * grab mutex to guarantee that our wakeup call
234 	 * arrives after we go to sleep -- so we can't sleep forever.
235 	 */
236 	mutex_enter(&memscrub_lock);
237 	cv_signal(&memscrub_cv);
238 	mutex_exit(&memscrub_lock);
239 }
240 
241 /*
242  * this calculation doesn't account for the time that the actual scan
243  * consumes -- so we'd fall slightly behind schedule with this
244  * interval_sec.  but the idle loop optimization below usually makes us
245  * come in way ahead of schedule.
246  */
247 static int
248 compute_interval_sec()
249 {
250 	if (memscrub_phys_pages <= memscrub_span_pages)
251 		return (memscrub_period_sec);
252 	else
253 		return (memscrub_period_sec/
254 		    (memscrub_phys_pages/memscrub_span_pages));
255 }
256 
257 static void
258 memscrubber()
259 {
260 	time_t deadline;
261 	uint64_t mlp_last_addr;
262 	uint64_t mlp_next_addr;
263 	int reached_end = 1;
264 	time_t interval_sec = 0;
265 	struct memlist *mlp;
266 
267 	extern void scan_memory(caddr_t, size_t);
268 	callb_cpr_t cprinfo;
269 
270 	/*
271 	 * notify CPR of our existence
272 	 */
273 	CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
274 
275 	if (memscrub_memlist == NULL) {
276 		cmn_err(CE_WARN, "memscrub_memlist not initialized.");
277 		goto memscrub_exit;
278 	}
279 
280 	mlp = memscrub_memlist;
281 	mlp_next_addr = mlp->ml_address;
282 	mlp_last_addr = mlp->ml_address + mlp->ml_size;
283 
284 	deadline = gethrestime_sec() + memscrub_delay_start_sec;
285 
286 	for (;;) {
287 		if (disable_memscrub || disable_memscrub_quietly)
288 			break;
289 
290 		mutex_enter(&memscrub_lock);
291 
292 		/*
293 		 * did we just reach the end of memory?
294 		 */
295 		if (reached_end) {
296 			time_t now = gethrestime_sec();
297 
298 			if (now >= deadline) {
299 				memscrub_done_late++;
300 				memscrub_late_sec += (now - deadline);
301 				/*
302 				 * past deadline, start right away
303 				 */
304 				interval_sec = 0;
305 
306 				deadline = now + memscrub_period_sec;
307 			} else {
308 				/*
309 				 * we finished ahead of schedule.
310 				 * wait till previous dealine before re-start.
311 				 */
312 				interval_sec = deadline - now;
313 				memscrub_done_early++;
314 				memscrub_early_sec += interval_sec;
315 				deadline += memscrub_period_sec;
316 			}
317 		} else {
318 			interval_sec = compute_interval_sec();
319 		}
320 
321 		/*
322 		 * it is safe from our standpoint for CPR to
323 		 * suspend the system
324 		 */
325 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
326 
327 		/*
328 		 * hit the snooze bar
329 		 */
330 		(void) timeout(memscrub_wakeup, NULL, interval_sec * hz);
331 
332 		/*
333 		 * go to sleep
334 		 */
335 		cv_wait(&memscrub_cv, &memscrub_lock);
336 
337 		/* we need to goto work */
338 		CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
339 
340 		mutex_exit(&memscrub_lock);
341 
342 		do {
343 			pgcnt_t pages = memscrub_span_pages;
344 			uint64_t address = mlp_next_addr;
345 
346 			if (disable_memscrub || disable_memscrub_quietly)
347 				break;
348 
349 			mutex_enter(&memscrub_lock);
350 
351 			/*
352 			 * Make sure we don't try to scan beyond the end of
353 			 * the current memlist.  If we would, then resize
354 			 * our scan target for this iteration, and prepare
355 			 * to read the next memlist entry on the next
356 			 * iteration.
357 			 */
358 			reached_end = 0;
359 			if (address + mmu_ptob(pages) >= mlp_last_addr) {
360 				pages = mmu_btop(mlp_last_addr - address);
361 				mlp = mlp->ml_next;
362 				if (mlp == NULL) {
363 					reached_end = 1;
364 					mlp = memscrub_memlist;
365 				}
366 				mlp_next_addr = mlp->ml_address;
367 				mlp_last_addr = mlp->ml_address + mlp->ml_size;
368 			} else {
369 				mlp_next_addr += mmu_ptob(pages);
370 			}
371 
372 			mutex_exit(&memscrub_lock);
373 
374 			while (pages--) {
375 				pfn_t pfn = btop(address);
376 
377 				/*
378 				 * Without segkpm, the memscrubber cannot
379 				 * be allowed to migrate across CPUs, as
380 				 * the CPU-specific mapping of
381 				 * memscrub_window would be incorrect.
382 				 * With segkpm, switching CPUs is legal, but
383 				 * inefficient.  We don't use
384 				 * kpreempt_disable as it might hold a
385 				 * higher priority thread (eg, RT) too long
386 				 * off CPU.
387 				 */
388 				thread_affinity_set(curthread, CPU_CURRENT);
389 				if (kpm_enable)
390 					memscrub_window = hat_kpm_pfn2va(pfn);
391 				else
392 					hat_mempte_remap(pfn, memscrub_window,
393 					    memscrub_pte,
394 					    PROT_READ, HAT_LOAD_NOCONSIST);
395 
396 				scan_memory(memscrub_window, PAGESIZE);
397 
398 				thread_affinity_clear(curthread);
399 				address += MMU_PAGESIZE;
400 			}
401 
402 			memscrub_scans_done++;
403 		} while (!reached_end && system_is_idle());
404 	}
405 
406 memscrub_exit:
407 
408 	if (!disable_memscrub_quietly)
409 		cmn_err(CE_NOTE, "Software memory scrubber exiting.");
410 	/*
411 	 * We are about to bail, but don't have the memscrub_lock,
412 	 * and it is needed for CALLB_CPR_EXIT.
413 	 */
414 	mutex_enter(&memscrub_lock);
415 	CALLB_CPR_EXIT(&cprinfo);
416 
417 	cv_destroy(&memscrub_cv);
418 
419 	thread_exit();
420 }
421 
422 
423 /*
424  * return 1 if we're MP and all the other CPUs are idle
425  */
426 static int
427 system_is_idle()
428 {
429 	int cpu_id;
430 	int found = 0;
431 
432 	if (1 == ncpus_online)
433 		return (0);
434 
435 	for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) {
436 		if (!cpu[cpu_id])
437 			continue;
438 
439 		found++;
440 
441 		if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) {
442 			if (CPU->cpu_id == cpu_id &&
443 			    CPU->cpu_disp->disp_nrunnable == 0)
444 				continue;
445 			return (0);
446 		}
447 
448 		if (found == ncpus)
449 			break;
450 	}
451 	return (1);
452 }
453 
454 /*
455  * add a span to the memscrub list
456  */
457 static int
458 memscrub_add_span(uint64_t start, uint64_t bytes)
459 {
460 	struct memlist *dst;
461 	struct memlist *prev, *next;
462 	uint64_t end = start + bytes - 1;
463 	int retval = 0;
464 
465 	mutex_enter(&memscrub_lock);
466 
467 #ifdef MEMSCRUB_DEBUG
468 	memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
469 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
470 	cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
471 	    " size: 0x%llx\n", start, bytes);
472 #endif /* MEMSCRUB_DEBUG */
473 
474 	/*
475 	 * Scan through the list to find the proper place to install it.
476 	 */
477 	prev = NULL;
478 	next = memscrub_memlist;
479 	while (next) {
480 		uint64_t ns = next->ml_address;
481 		uint64_t ne = next->ml_address + next->ml_size - 1;
482 
483 		/*
484 		 * If this span overlaps with an existing span, then
485 		 * something has gone horribly wrong with the phys_install
486 		 * list.  In fact, I'm surprised we made it this far.
487 		 */
488 		if ((start >= ns && start <= ne) || (end >= ns && end <= ne) ||
489 		    (start < ns && end > ne))
490 			panic("memscrub found overlapping memory ranges "
491 			    "(0x%p-0x%p) and (0x%p-0x%p)",
492 			    (void *)(uintptr_t)start, (void *)(uintptr_t)end,
493 			    (void *)(uintptr_t)ns, (void *)(uintptr_t)ne);
494 
495 		/*
496 		 * New span can be appended to an existing one.
497 		 */
498 		if (start == ne + 1) {
499 			next->ml_size += bytes;
500 			goto add_done;
501 		}
502 
503 		/*
504 		 * New span can be prepended to an existing one.
505 		 */
506 		if (end + 1 == ns) {
507 			next->ml_size += bytes;
508 			next->ml_address = start;
509 			goto add_done;
510 		}
511 
512 		/*
513 		 * If the next span has a higher start address than the new
514 		 * one, then we have found the right spot for our
515 		 * insertion.
516 		 */
517 		if (ns > start)
518 			break;
519 
520 		prev = next;
521 		next = next->ml_next;
522 	}
523 
524 	/*
525 	 * allocate a new struct memlist
526 	 */
527 	dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
528 	if (dst == NULL) {
529 		retval = -1;
530 		goto add_done;
531 	}
532 	dst->ml_address = start;
533 	dst->ml_size = bytes;
534 	dst->ml_prev = prev;
535 	dst->ml_next = next;
536 
537 	if (prev)
538 		prev->ml_next = dst;
539 	else
540 		memscrub_memlist = dst;
541 
542 	if (next)
543 		next->ml_prev = dst;
544 
545 add_done:
546 
547 	if (retval != -1)
548 		memscrub_phys_pages += mmu_btop(bytes);
549 
550 #ifdef MEMSCRUB_DEBUG
551 	memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
552 	cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
553 #endif /* MEMSCRUB_DEBUG */
554 
555 	mutex_exit(&memscrub_lock);
556 	return (retval);
557 }
558