17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ae115bc7Smrj * Common Development and Distribution License (the "License"). 6ae115bc7Smrj * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217aec1d6eScindi 227c478bd9Sstevel@tonic-gate /* 23ae115bc7Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * i86pc Memory Scrubbing 317c478bd9Sstevel@tonic-gate * 327c478bd9Sstevel@tonic-gate * On detection of a correctable memory ECC error, the i86pc hardware 337c478bd9Sstevel@tonic-gate * returns the corrected data to the requester and may re-write it 347c478bd9Sstevel@tonic-gate * to memory (DRAM or NVRAM). Machines which do not re-write this to 357c478bd9Sstevel@tonic-gate * memory should add an NMI handler to correct and rewrite. 367c478bd9Sstevel@tonic-gate * 377c478bd9Sstevel@tonic-gate * Scrubbing thus reduces the likelyhood that multiple transient errors 387c478bd9Sstevel@tonic-gate * will occur in the same memory word, making uncorrectable errors due 397c478bd9Sstevel@tonic-gate * to transients less likely. 407c478bd9Sstevel@tonic-gate * 417c478bd9Sstevel@tonic-gate * Thus is born the desire that every memory location be periodically 427c478bd9Sstevel@tonic-gate * accessed. 437c478bd9Sstevel@tonic-gate * 447c478bd9Sstevel@tonic-gate * This file implements a memory scrubbing thread. This scrubber 457c478bd9Sstevel@tonic-gate * guarantees that all of physical memory is accessed periodically 467c478bd9Sstevel@tonic-gate * (memscrub_period_sec -- 12 hours). 477c478bd9Sstevel@tonic-gate * 487c478bd9Sstevel@tonic-gate * It attempts to do this as unobtrusively as possible. The thread 497c478bd9Sstevel@tonic-gate * schedules itself to wake up at an interval such that if it reads 507c478bd9Sstevel@tonic-gate * memscrub_span_pages (4MB) on each wakeup, it will read all of physical 517c478bd9Sstevel@tonic-gate * memory in in memscrub_period_sec (12 hours). 527c478bd9Sstevel@tonic-gate * 537c478bd9Sstevel@tonic-gate * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200). 547c478bd9Sstevel@tonic-gate * When it completes a span, if all the CPUs are idle, it reads another span. 557c478bd9Sstevel@tonic-gate * Typically it soaks up idle time this way to reach its deadline early 567c478bd9Sstevel@tonic-gate * -- and sleeps until the next period begins. 577c478bd9Sstevel@tonic-gate * 587c478bd9Sstevel@tonic-gate * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups 597c478bd9Sstevel@tonic-gate * that run for 0.15 seconds at intervals of 67 seconds. 607c478bd9Sstevel@tonic-gate * 617c478bd9Sstevel@tonic-gate * In practice, the scrubber finds enough idle time to finish in a few 627c478bd9Sstevel@tonic-gate * minutes, and sleeps until its 12 hour deadline. 637c478bd9Sstevel@tonic-gate * 647c478bd9Sstevel@tonic-gate * The scrubber maintains a private copy of the phys_install memory list 657c478bd9Sstevel@tonic-gate * to keep track of what memory should be scrubbed. 667c478bd9Sstevel@tonic-gate * 677c478bd9Sstevel@tonic-gate * The following parameters can be set via /etc/system 687c478bd9Sstevel@tonic-gate * 697c478bd9Sstevel@tonic-gate * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB) 707c478bd9Sstevel@tonic-gate * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 717c478bd9Sstevel@tonic-gate * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0) 727c478bd9Sstevel@tonic-gate * memscrub_delay_start_sec = (10 seconds) 737c478bd9Sstevel@tonic-gate * disable_memscrub = (0) 747c478bd9Sstevel@tonic-gate * 757c478bd9Sstevel@tonic-gate * the scrubber will exit (or never be started) if it finds the variable 767c478bd9Sstevel@tonic-gate * "disable_memscrub" set. 777c478bd9Sstevel@tonic-gate * 787c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec 797c478bd9Sstevel@tonic-gate * is a "good" amount of minimum time for the thread to run at a time. 807c478bd9Sstevel@tonic-gate * 817c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 827c478bd9Sstevel@tonic-gate * twice the frequency the hardware folk estimated would be necessary. 837c478bd9Sstevel@tonic-gate * 847c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly 857c478bd9Sstevel@tonic-gate * any other use of the system should be higher priority than scrubbing. 867c478bd9Sstevel@tonic-gate */ 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate #include <sys/types.h> 897c478bd9Sstevel@tonic-gate #include <sys/systm.h> /* timeout, types, t_lock */ 907c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 917c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> /* MIN */ 927c478bd9Sstevel@tonic-gate #include <sys/memlist.h> /* memlist */ 937c478bd9Sstevel@tonic-gate #include <sys/kmem.h> /* KMEM_NOSLEEP */ 947c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> /* ncpus_online */ 957c478bd9Sstevel@tonic-gate #include <sys/debug.h> /* ASSERTs */ 967c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 977c478bd9Sstevel@tonic-gate #include <sys/mman.h> 987c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 997c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 1007c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 101*2df1fe9cSrandyf #include <sys/callb.h> /* CPR callback */ 1027c478bd9Sstevel@tonic-gate 1037c478bd9Sstevel@tonic-gate static caddr_t memscrub_window; 104ae115bc7Smrj static hat_mempte_t memscrub_pte; 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate /* 1077c478bd9Sstevel@tonic-gate * Global Data: 1087c478bd9Sstevel@tonic-gate */ 1097c478bd9Sstevel@tonic-gate /* 1107c478bd9Sstevel@tonic-gate * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 1117c478bd9Sstevel@tonic-gate */ 1127c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate /* 1157c478bd9Sstevel@tonic-gate * start only if at least MEMSCRUB_MIN_PAGES in system 1167c478bd9Sstevel@tonic-gate */ 1177c478bd9Sstevel@tonic-gate #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE) 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* 1207c478bd9Sstevel@tonic-gate * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 1217c478bd9Sstevel@tonic-gate */ 1227c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE) 1237c478bd9Sstevel@tonic-gate 1247c478bd9Sstevel@tonic-gate /* 1257c478bd9Sstevel@tonic-gate * almost anything is higher priority than scrubbing 1267c478bd9Sstevel@tonic-gate */ 1277c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_THREAD_PRI 0 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate /* 1307c478bd9Sstevel@tonic-gate * we can patch these defaults in /etc/system if necessary 1317c478bd9Sstevel@tonic-gate */ 1327c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0; 1337aec1d6eScindi static uint_t disable_memscrub_quietly = 0; 1347c478bd9Sstevel@tonic-gate pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES; 1357c478bd9Sstevel@tonic-gate pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 1367c478bd9Sstevel@tonic-gate time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 1377c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 1387c478bd9Sstevel@tonic-gate time_t memscrub_delay_start_sec = 10; 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate /* 1417c478bd9Sstevel@tonic-gate * Static Routines 1427c478bd9Sstevel@tonic-gate */ 1437c478bd9Sstevel@tonic-gate static void memscrubber(void); 1447c478bd9Sstevel@tonic-gate static int system_is_idle(void); 1457c478bd9Sstevel@tonic-gate static int memscrub_add_span(uint64_t, uint64_t); 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate /* 1487c478bd9Sstevel@tonic-gate * Static Data 1497c478bd9Sstevel@tonic-gate */ 1507c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist; 1517c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages; 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv; 1547c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock; 1557aec1d6eScindi 1567c478bd9Sstevel@tonic-gate /* 1577c478bd9Sstevel@tonic-gate * memscrub_lock protects memscrub_memlist 1587c478bd9Sstevel@tonic-gate */ 1597c478bd9Sstevel@tonic-gate uint_t memscrub_scans_done; 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate uint_t memscrub_done_early; 1627c478bd9Sstevel@tonic-gate uint_t memscrub_early_sec; 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate uint_t memscrub_done_late; 1657c478bd9Sstevel@tonic-gate time_t memscrub_late_sec; 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate /* 1687c478bd9Sstevel@tonic-gate * create memscrub_memlist from phys_install list 1697c478bd9Sstevel@tonic-gate * initialize locks, set memscrub_phys_pages. 1707c478bd9Sstevel@tonic-gate */ 1717c478bd9Sstevel@tonic-gate void 1727c478bd9Sstevel@tonic-gate memscrub_init() 1737c478bd9Sstevel@tonic-gate { 1747c478bd9Sstevel@tonic-gate struct memlist *src; 1757c478bd9Sstevel@tonic-gate 1767c478bd9Sstevel@tonic-gate if (physmem < memscrub_min_pages) 1777c478bd9Sstevel@tonic-gate return; 1787c478bd9Sstevel@tonic-gate 1797c478bd9Sstevel@tonic-gate if (!kpm_enable) { 1807c478bd9Sstevel@tonic-gate memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 1817c478bd9Sstevel@tonic-gate memscrub_pte = hat_mempte_setup(memscrub_window); 1827c478bd9Sstevel@tonic-gate } 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * copy phys_install to memscrub_memlist 1867c478bd9Sstevel@tonic-gate */ 1877c478bd9Sstevel@tonic-gate for (src = phys_install; src; src = src->next) { 1887c478bd9Sstevel@tonic-gate if (memscrub_add_span(src->address, src->size)) { 1897c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 1907c478bd9Sstevel@tonic-gate "Memory scrubber failed to initialize\n"); 1917c478bd9Sstevel@tonic-gate return; 1927c478bd9Sstevel@tonic-gate } 1937c478bd9Sstevel@tonic-gate } 1947c478bd9Sstevel@tonic-gate 1957c478bd9Sstevel@tonic-gate mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 1967c478bd9Sstevel@tonic-gate cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate /* 1997c478bd9Sstevel@tonic-gate * create memscrubber thread 2007c478bd9Sstevel@tonic-gate */ 2017c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0, 2027c478bd9Sstevel@tonic-gate TS_RUN, memscrub_thread_pri); 2037c478bd9Sstevel@tonic-gate } 2047c478bd9Sstevel@tonic-gate 2057aec1d6eScindi /* 2067aec1d6eScindi * Function to cause the software memscrubber to exit quietly if the 2077aec1d6eScindi * platform support has located a hardware scrubber and enabled it. 2087aec1d6eScindi */ 2097aec1d6eScindi void 2107aec1d6eScindi memscrub_disable(void) 2117aec1d6eScindi { 2127aec1d6eScindi disable_memscrub_quietly = 1; 2137aec1d6eScindi } 2147aec1d6eScindi 2157c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 2167c478bd9Sstevel@tonic-gate void 2177c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp) 2187c478bd9Sstevel@tonic-gate { 2197c478bd9Sstevel@tonic-gate struct memlist *list; 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "%s:\n", title); 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate for (list = listp; list; list = list->next) { 2247c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 2257c478bd9Sstevel@tonic-gate list->address, list->size); 2267c478bd9Sstevel@tonic-gate } 2277c478bd9Sstevel@tonic-gate } 2287c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2317c478bd9Sstevel@tonic-gate void 2327c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c) 2337c478bd9Sstevel@tonic-gate { 2347c478bd9Sstevel@tonic-gate /* 2357c478bd9Sstevel@tonic-gate * grab mutex to guarantee that our wakeup call 2367c478bd9Sstevel@tonic-gate * arrives after we go to sleep -- so we can't sleep forever. 2377c478bd9Sstevel@tonic-gate */ 2387c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 2397c478bd9Sstevel@tonic-gate cv_signal(&memscrub_cv); 2407c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 2417c478bd9Sstevel@tonic-gate } 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate /* 2447c478bd9Sstevel@tonic-gate * this calculation doesn't account for the time that the actual scan 2457c478bd9Sstevel@tonic-gate * consumes -- so we'd fall slightly behind schedule with this 2467c478bd9Sstevel@tonic-gate * interval_sec. but the idle loop optimization below usually makes us 2477c478bd9Sstevel@tonic-gate * come in way ahead of schedule. 2487c478bd9Sstevel@tonic-gate */ 2497c478bd9Sstevel@tonic-gate static int 2507c478bd9Sstevel@tonic-gate compute_interval_sec() 2517c478bd9Sstevel@tonic-gate { 2527c478bd9Sstevel@tonic-gate if (memscrub_phys_pages <= memscrub_span_pages) 2537c478bd9Sstevel@tonic-gate return (memscrub_period_sec); 2547c478bd9Sstevel@tonic-gate else 2557c478bd9Sstevel@tonic-gate return (memscrub_period_sec/ 2567c478bd9Sstevel@tonic-gate (memscrub_phys_pages/memscrub_span_pages)); 2577c478bd9Sstevel@tonic-gate } 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate void 2607c478bd9Sstevel@tonic-gate memscrubber() 2617c478bd9Sstevel@tonic-gate { 2627c478bd9Sstevel@tonic-gate time_t deadline; 2637c478bd9Sstevel@tonic-gate uint64_t mlp_last_addr; 2647c478bd9Sstevel@tonic-gate uint64_t mlp_next_addr; 2657c478bd9Sstevel@tonic-gate int reached_end = 1; 2667c478bd9Sstevel@tonic-gate time_t interval_sec = 0; 2677c478bd9Sstevel@tonic-gate struct memlist *mlp; 2687c478bd9Sstevel@tonic-gate 2697c478bd9Sstevel@tonic-gate extern void scan_memory(caddr_t, size_t); 270*2df1fe9cSrandyf callb_cpr_t cprinfo; 271*2df1fe9cSrandyf 272*2df1fe9cSrandyf /* 273*2df1fe9cSrandyf * notify CPR of our existence 274*2df1fe9cSrandyf */ 275*2df1fe9cSrandyf CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate if (memscrub_memlist == NULL) { 2787c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "memscrub_memlist not initialized."); 2797c478bd9Sstevel@tonic-gate goto memscrub_exit; 2807c478bd9Sstevel@tonic-gate } 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate mlp = memscrub_memlist; 2837c478bd9Sstevel@tonic-gate mlp_next_addr = mlp->address; 2847c478bd9Sstevel@tonic-gate mlp_last_addr = mlp->address + mlp->size; 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate deadline = gethrestime_sec() + memscrub_delay_start_sec; 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate for (;;) { 2897aec1d6eScindi if (disable_memscrub || disable_memscrub_quietly) 2907c478bd9Sstevel@tonic-gate break; 2917c478bd9Sstevel@tonic-gate 2927c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* 2957c478bd9Sstevel@tonic-gate * did we just reach the end of memory? 2967c478bd9Sstevel@tonic-gate */ 2977c478bd9Sstevel@tonic-gate if (reached_end) { 2987c478bd9Sstevel@tonic-gate time_t now = gethrestime_sec(); 2997c478bd9Sstevel@tonic-gate 3007c478bd9Sstevel@tonic-gate if (now >= deadline) { 3017c478bd9Sstevel@tonic-gate memscrub_done_late++; 3027c478bd9Sstevel@tonic-gate memscrub_late_sec += (now - deadline); 3037c478bd9Sstevel@tonic-gate /* 3047c478bd9Sstevel@tonic-gate * past deadline, start right away 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate interval_sec = 0; 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate deadline = now + memscrub_period_sec; 3097c478bd9Sstevel@tonic-gate } else { 3107c478bd9Sstevel@tonic-gate /* 3117c478bd9Sstevel@tonic-gate * we finished ahead of schedule. 3127c478bd9Sstevel@tonic-gate * wait till previous dealine before re-start. 3137c478bd9Sstevel@tonic-gate */ 3147c478bd9Sstevel@tonic-gate interval_sec = deadline - now; 3157c478bd9Sstevel@tonic-gate memscrub_done_early++; 3167c478bd9Sstevel@tonic-gate memscrub_early_sec += interval_sec; 3177c478bd9Sstevel@tonic-gate deadline += memscrub_period_sec; 3187c478bd9Sstevel@tonic-gate } 3197c478bd9Sstevel@tonic-gate } else { 3207c478bd9Sstevel@tonic-gate interval_sec = compute_interval_sec(); 3217c478bd9Sstevel@tonic-gate } 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate /* 324*2df1fe9cSrandyf * it is safe from our standpoint for CPR to 325*2df1fe9cSrandyf * suspend the system 326*2df1fe9cSrandyf */ 327*2df1fe9cSrandyf CALLB_CPR_SAFE_BEGIN(&cprinfo); 328*2df1fe9cSrandyf 329*2df1fe9cSrandyf /* 3307c478bd9Sstevel@tonic-gate * hit the snooze bar 3317c478bd9Sstevel@tonic-gate */ 3327c478bd9Sstevel@tonic-gate (void) timeout(memscrub_wakeup, NULL, interval_sec * hz); 3337c478bd9Sstevel@tonic-gate 3347c478bd9Sstevel@tonic-gate /* 3357c478bd9Sstevel@tonic-gate * go to sleep 3367c478bd9Sstevel@tonic-gate */ 3377c478bd9Sstevel@tonic-gate cv_wait(&memscrub_cv, &memscrub_lock); 3387c478bd9Sstevel@tonic-gate 339*2df1fe9cSrandyf /* we need to goto work */ 340*2df1fe9cSrandyf CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); 341*2df1fe9cSrandyf 3427c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate do { 3457c478bd9Sstevel@tonic-gate pgcnt_t pages = memscrub_span_pages; 3467c478bd9Sstevel@tonic-gate uint64_t address = mlp_next_addr; 3477c478bd9Sstevel@tonic-gate 3487aec1d6eScindi if (disable_memscrub || disable_memscrub_quietly) 3497c478bd9Sstevel@tonic-gate break; 3507c478bd9Sstevel@tonic-gate 3517c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 3527c478bd9Sstevel@tonic-gate 3537c478bd9Sstevel@tonic-gate /* 3547c478bd9Sstevel@tonic-gate * Make sure we don't try to scan beyond the end of 3557c478bd9Sstevel@tonic-gate * the current memlist. If we would, then resize 3567c478bd9Sstevel@tonic-gate * our scan target for this iteration, and prepare 3577c478bd9Sstevel@tonic-gate * to read the next memlist entry on the next 3587c478bd9Sstevel@tonic-gate * iteration. 3597c478bd9Sstevel@tonic-gate */ 3607c478bd9Sstevel@tonic-gate reached_end = 0; 3617c478bd9Sstevel@tonic-gate if (address + mmu_ptob(pages) >= mlp_last_addr) { 3627c478bd9Sstevel@tonic-gate pages = mmu_btop(mlp_last_addr - address); 3637c478bd9Sstevel@tonic-gate mlp = mlp->next; 3647c478bd9Sstevel@tonic-gate if (mlp == NULL) { 3657c478bd9Sstevel@tonic-gate reached_end = 1; 3667c478bd9Sstevel@tonic-gate mlp = memscrub_memlist; 3677c478bd9Sstevel@tonic-gate } 3687c478bd9Sstevel@tonic-gate mlp_next_addr = mlp->address; 3697c478bd9Sstevel@tonic-gate mlp_last_addr = mlp->address + mlp->size; 3707c478bd9Sstevel@tonic-gate } else { 3717c478bd9Sstevel@tonic-gate mlp_next_addr += mmu_ptob(pages); 3727c478bd9Sstevel@tonic-gate } 3737c478bd9Sstevel@tonic-gate 3747c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate while (pages--) { 3777c478bd9Sstevel@tonic-gate pfn_t pfn = btop(address); 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate /* 3807c478bd9Sstevel@tonic-gate * Without segkpm, the memscrubber cannot 3817c478bd9Sstevel@tonic-gate * be allowed to migrate across CPUs, as 3827c478bd9Sstevel@tonic-gate * the CPU-specific mapping of 3837c478bd9Sstevel@tonic-gate * memscrub_window would be incorrect. 3847c478bd9Sstevel@tonic-gate * With segkpm, switching CPUs is legal, but 3857c478bd9Sstevel@tonic-gate * inefficient. We don't use 3867c478bd9Sstevel@tonic-gate * kpreempt_disable as it might hold a 3877c478bd9Sstevel@tonic-gate * higher priority thread (eg, RT) too long 3887c478bd9Sstevel@tonic-gate * off CPU. 3897c478bd9Sstevel@tonic-gate */ 3907c478bd9Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT); 3917c478bd9Sstevel@tonic-gate if (kpm_enable) 3927c478bd9Sstevel@tonic-gate memscrub_window = hat_kpm_pfn2va(pfn); 3937c478bd9Sstevel@tonic-gate else 3947c478bd9Sstevel@tonic-gate hat_mempte_remap(pfn, memscrub_window, 3957c478bd9Sstevel@tonic-gate memscrub_pte, 3967c478bd9Sstevel@tonic-gate PROT_READ, HAT_LOAD_NOCONSIST); 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate scan_memory(memscrub_window, PAGESIZE); 3997c478bd9Sstevel@tonic-gate 4007c478bd9Sstevel@tonic-gate thread_affinity_clear(curthread); 4017c478bd9Sstevel@tonic-gate address += MMU_PAGESIZE; 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate memscrub_scans_done++; 4057c478bd9Sstevel@tonic-gate } while (!reached_end && system_is_idle()); 4067c478bd9Sstevel@tonic-gate } 4077c478bd9Sstevel@tonic-gate 4087c478bd9Sstevel@tonic-gate memscrub_exit: 4097c478bd9Sstevel@tonic-gate 4107aec1d6eScindi if (!disable_memscrub_quietly) 4117c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "memory scrubber exiting."); 412*2df1fe9cSrandyf /* 413*2df1fe9cSrandyf * We are about to bail, but don't have the memscrub_lock, 414*2df1fe9cSrandyf * and it is needed for CALLB_CPR_EXIT. 415*2df1fe9cSrandyf */ 416*2df1fe9cSrandyf mutex_enter(&memscrub_lock); 417*2df1fe9cSrandyf CALLB_CPR_EXIT(&cprinfo); 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate cv_destroy(&memscrub_cv); 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate thread_exit(); 4227c478bd9Sstevel@tonic-gate } 4237c478bd9Sstevel@tonic-gate 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate /* 4267c478bd9Sstevel@tonic-gate * return 1 if we're MP and all the other CPUs are idle 4277c478bd9Sstevel@tonic-gate */ 4287c478bd9Sstevel@tonic-gate static int 4297c478bd9Sstevel@tonic-gate system_is_idle() 4307c478bd9Sstevel@tonic-gate { 4317c478bd9Sstevel@tonic-gate int cpu_id; 4327c478bd9Sstevel@tonic-gate int found = 0; 4337c478bd9Sstevel@tonic-gate 4347c478bd9Sstevel@tonic-gate if (1 == ncpus_online) 4357c478bd9Sstevel@tonic-gate return (0); 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) { 4387c478bd9Sstevel@tonic-gate if (!cpu[cpu_id]) 4397c478bd9Sstevel@tonic-gate continue; 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate found++; 4427c478bd9Sstevel@tonic-gate 4437c478bd9Sstevel@tonic-gate if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) { 4447c478bd9Sstevel@tonic-gate if (CPU->cpu_id == cpu_id && 4457c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_nrunnable == 0) 4467c478bd9Sstevel@tonic-gate continue; 4477c478bd9Sstevel@tonic-gate return (0); 4487c478bd9Sstevel@tonic-gate } 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate if (found == ncpus) 4517c478bd9Sstevel@tonic-gate break; 4527c478bd9Sstevel@tonic-gate } 4537c478bd9Sstevel@tonic-gate return (1); 4547c478bd9Sstevel@tonic-gate } 4557c478bd9Sstevel@tonic-gate 4567c478bd9Sstevel@tonic-gate /* 4577c478bd9Sstevel@tonic-gate * add a span to the memscrub list 4587c478bd9Sstevel@tonic-gate */ 4597c478bd9Sstevel@tonic-gate static int 4607c478bd9Sstevel@tonic-gate memscrub_add_span(uint64_t start, uint64_t bytes) 4617c478bd9Sstevel@tonic-gate { 4627c478bd9Sstevel@tonic-gate struct memlist *dst; 4637c478bd9Sstevel@tonic-gate struct memlist *prev, *next; 4647c478bd9Sstevel@tonic-gate uint64_t end = start + bytes - 1; 4657c478bd9Sstevel@tonic-gate int retval = 0; 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 4707c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 4717c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 4727c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 4737c478bd9Sstevel@tonic-gate " size: 0x%llx\n", start, bytes); 4747c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate /* 4777c478bd9Sstevel@tonic-gate * Scan through the list to find the proper place to install it. 4787c478bd9Sstevel@tonic-gate */ 4797c478bd9Sstevel@tonic-gate prev = NULL; 4807c478bd9Sstevel@tonic-gate next = memscrub_memlist; 4817c478bd9Sstevel@tonic-gate while (next) { 4827c478bd9Sstevel@tonic-gate uint64_t ns = next->address; 4837c478bd9Sstevel@tonic-gate uint64_t ne = next->address + next->size - 1; 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate /* 4867c478bd9Sstevel@tonic-gate * If this span overlaps with an existing span, then 4877c478bd9Sstevel@tonic-gate * something has gone horribly wrong with the phys_install 4887c478bd9Sstevel@tonic-gate * list. In fact, I'm surprised we made it this far. 4897c478bd9Sstevel@tonic-gate */ 4907c478bd9Sstevel@tonic-gate if ((start >= ns && start <= ne) || (end >= ns && end <= ne) || 4917c478bd9Sstevel@tonic-gate (start < ns && end > ne)) 4927c478bd9Sstevel@tonic-gate panic("memscrub found overlapping memory ranges " 4937c478bd9Sstevel@tonic-gate "(0x%p-0x%p) and (0x%p-0x%p)", 4943dfcd6dcSdmick (void *)(uintptr_t)start, (void *)(uintptr_t)end, 4953dfcd6dcSdmick (void *)(uintptr_t)ns, (void *)(uintptr_t)ne); 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate /* 4987c478bd9Sstevel@tonic-gate * New span can be appended to an existing one. 4997c478bd9Sstevel@tonic-gate */ 5007c478bd9Sstevel@tonic-gate if (start == ne + 1) { 5017c478bd9Sstevel@tonic-gate next->size += bytes; 5027c478bd9Sstevel@tonic-gate goto add_done; 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate /* 5067c478bd9Sstevel@tonic-gate * New span can be prepended to an existing one. 5077c478bd9Sstevel@tonic-gate */ 5087c478bd9Sstevel@tonic-gate if (end + 1 == ns) { 5097c478bd9Sstevel@tonic-gate next->size += bytes; 5107c478bd9Sstevel@tonic-gate next->address = start; 5117c478bd9Sstevel@tonic-gate goto add_done; 5127c478bd9Sstevel@tonic-gate } 5137c478bd9Sstevel@tonic-gate 5147c478bd9Sstevel@tonic-gate /* 5157c478bd9Sstevel@tonic-gate * If the next span has a higher start address than the new 5167c478bd9Sstevel@tonic-gate * one, then we have found the right spot for our 5177c478bd9Sstevel@tonic-gate * insertion. 5187c478bd9Sstevel@tonic-gate */ 5197c478bd9Sstevel@tonic-gate if (ns > start) 5207c478bd9Sstevel@tonic-gate break; 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate prev = next; 5237c478bd9Sstevel@tonic-gate next = next->next; 5247c478bd9Sstevel@tonic-gate } 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate /* 5277c478bd9Sstevel@tonic-gate * allocate a new struct memlist 5287c478bd9Sstevel@tonic-gate */ 5297c478bd9Sstevel@tonic-gate dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 5307c478bd9Sstevel@tonic-gate if (dst == NULL) { 5317c478bd9Sstevel@tonic-gate retval = -1; 5327c478bd9Sstevel@tonic-gate goto add_done; 5337c478bd9Sstevel@tonic-gate } 5347c478bd9Sstevel@tonic-gate dst->address = start; 5357c478bd9Sstevel@tonic-gate dst->size = bytes; 5367c478bd9Sstevel@tonic-gate dst->prev = prev; 5377c478bd9Sstevel@tonic-gate dst->next = next; 5387c478bd9Sstevel@tonic-gate 5397c478bd9Sstevel@tonic-gate if (prev) 5407c478bd9Sstevel@tonic-gate prev->next = dst; 5417c478bd9Sstevel@tonic-gate else 5427c478bd9Sstevel@tonic-gate memscrub_memlist = dst; 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate if (next) 5457c478bd9Sstevel@tonic-gate next->prev = dst; 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate add_done: 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate if (retval != -1) 5507c478bd9Sstevel@tonic-gate memscrub_phys_pages += mmu_btop(bytes); 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 5537c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 5547c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 5557c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 5567c478bd9Sstevel@tonic-gate 5577c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 5587c478bd9Sstevel@tonic-gate return (retval); 5597c478bd9Sstevel@tonic-gate } 560