1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate /* 30*7c478bd9Sstevel@tonic-gate * sun4u Memory Scrubbing 31*7c478bd9Sstevel@tonic-gate * 32*7c478bd9Sstevel@tonic-gate * On detection of a correctable memory ECC error, the sun4u kernel 33*7c478bd9Sstevel@tonic-gate * returns the corrected data to the requester and re-writes it 34*7c478bd9Sstevel@tonic-gate * to memory (DRAM). So if the correctable error was transient, 35*7c478bd9Sstevel@tonic-gate * the read has effectively been cleaned (scrubbed) from memory. 36*7c478bd9Sstevel@tonic-gate * 37*7c478bd9Sstevel@tonic-gate * Scrubbing thus reduces the likelyhood that multiple transient errors 38*7c478bd9Sstevel@tonic-gate * will occur in the same memory word, making uncorrectable errors due 39*7c478bd9Sstevel@tonic-gate * to transients less likely. 40*7c478bd9Sstevel@tonic-gate * 41*7c478bd9Sstevel@tonic-gate * Thus is born the desire that every memory location be periodically 42*7c478bd9Sstevel@tonic-gate * accessed. 43*7c478bd9Sstevel@tonic-gate * 44*7c478bd9Sstevel@tonic-gate * This file implements a memory scrubbing thread. This scrubber 45*7c478bd9Sstevel@tonic-gate * guarantees that all of physical memory is accessed periodically 46*7c478bd9Sstevel@tonic-gate * (memscrub_period_sec -- 12 hours). 47*7c478bd9Sstevel@tonic-gate * 48*7c478bd9Sstevel@tonic-gate * It attempts to do this as unobtrusively as possible. The thread 49*7c478bd9Sstevel@tonic-gate * schedules itself to wake up at an interval such that if it reads 50*7c478bd9Sstevel@tonic-gate * memscrub_span_pages (8MB) on each wakeup, it will read all of physical 51*7c478bd9Sstevel@tonic-gate * memory in in memscrub_period_sec (12 hours). 52*7c478bd9Sstevel@tonic-gate * 53*7c478bd9Sstevel@tonic-gate * The scrubber uses the block load hardware to read memory @ 268MB/s, 54*7c478bd9Sstevel@tonic-gate * so it reads spans of 8MB in 0.03 seconds. Unlike the original sun4d 55*7c478bd9Sstevel@tonic-gate * scrubber the sun4u scrubber does not read ahead if the system is idle 56*7c478bd9Sstevel@tonic-gate * because we can read memory very efficently. 57*7c478bd9Sstevel@tonic-gate * 58*7c478bd9Sstevel@tonic-gate * The scrubber maintains a private copy of the phys_install memory list 59*7c478bd9Sstevel@tonic-gate * to keep track of what memory should be scrubbed. 60*7c478bd9Sstevel@tonic-gate * 61*7c478bd9Sstevel@tonic-gate * The global routines memscrub_add_span() and memscrub_delete_span() are 62*7c478bd9Sstevel@tonic-gate * used to add and delete from this list. If hotplug memory is later 63*7c478bd9Sstevel@tonic-gate * supported these two routines can be used to notify the scrubber of 64*7c478bd9Sstevel@tonic-gate * memory configuration changes. 65*7c478bd9Sstevel@tonic-gate * 66*7c478bd9Sstevel@tonic-gate * The following parameters can be set via /etc/system 67*7c478bd9Sstevel@tonic-gate * 68*7c478bd9Sstevel@tonic-gate * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB) 69*7c478bd9Sstevel@tonic-gate * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) 70*7c478bd9Sstevel@tonic-gate * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) 71*7c478bd9Sstevel@tonic-gate * memscrub_delay_start_sec = (5 minutes) 72*7c478bd9Sstevel@tonic-gate * memscrub_verbose = (0) 73*7c478bd9Sstevel@tonic-gate * memscrub_override_ticks = (1 tick) 74*7c478bd9Sstevel@tonic-gate * disable_memscrub = (0) 75*7c478bd9Sstevel@tonic-gate * pause_memscrub = (0) 76*7c478bd9Sstevel@tonic-gate * read_all_memscrub = (0) 77*7c478bd9Sstevel@tonic-gate * 78*7c478bd9Sstevel@tonic-gate * The scrubber will print NOTICE messages of what it is doing if 79*7c478bd9Sstevel@tonic-gate * "memscrub_verbose" is set. 80*7c478bd9Sstevel@tonic-gate * 81*7c478bd9Sstevel@tonic-gate * If the scrubber's sleep time calculation drops to zero ticks, 82*7c478bd9Sstevel@tonic-gate * memscrub_override_ticks will be used as the sleep time instead. The 83*7c478bd9Sstevel@tonic-gate * sleep time should only drop to zero on a system with over 32.95 84*7c478bd9Sstevel@tonic-gate * terabytes of memory, or where the default scrubber parameters have 85*7c478bd9Sstevel@tonic-gate * been adjusted. For example, reducing memscrub_span_pages or 86*7c478bd9Sstevel@tonic-gate * memscrub_period_sec causes the sleep time to drop to zero with less 87*7c478bd9Sstevel@tonic-gate * memory. Note that since the sleep time is calculated in clock ticks, 88*7c478bd9Sstevel@tonic-gate * using hires clock ticks allows for more memory before the sleep time 89*7c478bd9Sstevel@tonic-gate * becomes zero. 90*7c478bd9Sstevel@tonic-gate * 91*7c478bd9Sstevel@tonic-gate * The scrubber will exit (or never be started) if it finds the variable 92*7c478bd9Sstevel@tonic-gate * "disable_memscrub" set. 93*7c478bd9Sstevel@tonic-gate * 94*7c478bd9Sstevel@tonic-gate * The scrubber will pause (not read memory) when "pause_memscrub" 95*7c478bd9Sstevel@tonic-gate * is set. It will check the state of pause_memscrub at each wakeup 96*7c478bd9Sstevel@tonic-gate * period. The scrubber will not make up for lost time. If you 97*7c478bd9Sstevel@tonic-gate * pause the scrubber for a prolonged period of time you can use 98*7c478bd9Sstevel@tonic-gate * the "read_all_memscrub" switch (see below) to catch up. In addition, 99*7c478bd9Sstevel@tonic-gate * pause_memscrub is used internally by the post memory DR callbacks. 100*7c478bd9Sstevel@tonic-gate * It is set for the small period of time during which the callbacks 101*7c478bd9Sstevel@tonic-gate * are executing. This ensures "memscrub_lock" will be released, 102*7c478bd9Sstevel@tonic-gate * allowing the callbacks to finish. 103*7c478bd9Sstevel@tonic-gate * 104*7c478bd9Sstevel@tonic-gate * The scrubber will read all memory if "read_all_memscrub" is set. 105*7c478bd9Sstevel@tonic-gate * The normal span read will also occur during the wakeup. 106*7c478bd9Sstevel@tonic-gate * 107*7c478bd9Sstevel@tonic-gate * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system 108*7c478bd9Sstevel@tonic-gate * must have before we'll start the scrubber. 109*7c478bd9Sstevel@tonic-gate * 110*7c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_SPAN_PAGES (8MB) is based on the guess that 0.03 sec 111*7c478bd9Sstevel@tonic-gate * is a "good" amount of minimum time for the thread to run at a time. 112*7c478bd9Sstevel@tonic-gate * 113*7c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- 114*7c478bd9Sstevel@tonic-gate * twice the frequency the hardware folk estimated would be necessary. 115*7c478bd9Sstevel@tonic-gate * 116*7c478bd9Sstevel@tonic-gate * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption 117*7c478bd9Sstevel@tonic-gate * that the scurbber should get its fair share of time (since it 118*7c478bd9Sstevel@tonic-gate * is short). At a priority of 0 the scrubber will be starved. 119*7c478bd9Sstevel@tonic-gate */ 120*7c478bd9Sstevel@tonic-gate 121*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> /* timeout, types, t_lock */ 122*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 123*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> /* MIN */ 124*7c478bd9Sstevel@tonic-gate #include <sys/memlist.h> /* memlist */ 125*7c478bd9Sstevel@tonic-gate #include <sys/mem_config.h> /* memory add/delete */ 126*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> /* KMEM_NOSLEEP */ 127*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> /* ncpus_online */ 128*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> /* ASSERTs */ 129*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> /* lddphys */ 130*7c478bd9Sstevel@tonic-gate #include <sys/cpu_module.h> /* vtag_flushpage */ 131*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 132*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> /* atomic_add_32 */ 133*7c478bd9Sstevel@tonic-gate 134*7c478bd9Sstevel@tonic-gate #include <vm/hat.h> 135*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 136*7c478bd9Sstevel@tonic-gate #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */ 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate #include <sys/time.h> 139*7c478bd9Sstevel@tonic-gate #include <sys/callb.h> /* CPR callback */ 140*7c478bd9Sstevel@tonic-gate #include <sys/ontrap.h> 141*7c478bd9Sstevel@tonic-gate 142*7c478bd9Sstevel@tonic-gate /* 143*7c478bd9Sstevel@tonic-gate * Should really have paddr_t defined, but it is broken. Use 144*7c478bd9Sstevel@tonic-gate * ms_paddr_t in the meantime to make the code cleaner 145*7c478bd9Sstevel@tonic-gate */ 146*7c478bd9Sstevel@tonic-gate typedef uint64_t ms_paddr_t; 147*7c478bd9Sstevel@tonic-gate 148*7c478bd9Sstevel@tonic-gate /* 149*7c478bd9Sstevel@tonic-gate * Global Routines: 150*7c478bd9Sstevel@tonic-gate */ 151*7c478bd9Sstevel@tonic-gate int memscrub_add_span(pfn_t pfn, pgcnt_t pages); 152*7c478bd9Sstevel@tonic-gate int memscrub_delete_span(pfn_t pfn, pgcnt_t pages); 153*7c478bd9Sstevel@tonic-gate int memscrub_init(void); 154*7c478bd9Sstevel@tonic-gate 155*7c478bd9Sstevel@tonic-gate /* 156*7c478bd9Sstevel@tonic-gate * Global Data: 157*7c478bd9Sstevel@tonic-gate */ 158*7c478bd9Sstevel@tonic-gate 159*7c478bd9Sstevel@tonic-gate /* 160*7c478bd9Sstevel@tonic-gate * scrub if we have at least this many pages 161*7c478bd9Sstevel@tonic-gate */ 162*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE) 163*7c478bd9Sstevel@tonic-gate 164*7c478bd9Sstevel@tonic-gate /* 165*7c478bd9Sstevel@tonic-gate * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC 166*7c478bd9Sstevel@tonic-gate */ 167*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ 168*7c478bd9Sstevel@tonic-gate 169*7c478bd9Sstevel@tonic-gate /* 170*7c478bd9Sstevel@tonic-gate * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration 171*7c478bd9Sstevel@tonic-gate */ 172*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_SPAN_PAGES ((8 * 1024 * 1024) / PAGESIZE) 173*7c478bd9Sstevel@tonic-gate 174*7c478bd9Sstevel@tonic-gate /* 175*7c478bd9Sstevel@tonic-gate * almost anything is higher priority than scrubbing 176*7c478bd9Sstevel@tonic-gate */ 177*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI 178*7c478bd9Sstevel@tonic-gate 179*7c478bd9Sstevel@tonic-gate /* 180*7c478bd9Sstevel@tonic-gate * size used when scanning memory 181*7c478bd9Sstevel@tonic-gate */ 182*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BLOCK_SIZE 256 183*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */ 184*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT) 185*7c478bd9Sstevel@tonic-gate 186*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT 187*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT 188*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT 189*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT 190*7c478bd9Sstevel@tonic-gate 191*7c478bd9Sstevel@tonic-gate /* 192*7c478bd9Sstevel@tonic-gate * This message indicates that we have exceeded the limitations of 193*7c478bd9Sstevel@tonic-gate * the memscrubber. See the comments above regarding what would 194*7c478bd9Sstevel@tonic-gate * cause the sleep time to become zero. In DEBUG mode, this message 195*7c478bd9Sstevel@tonic-gate * is logged on the console and in the messages file. In non-DEBUG 196*7c478bd9Sstevel@tonic-gate * mode, it is only logged in the messages file. 197*7c478bd9Sstevel@tonic-gate */ 198*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 199*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \ 200*7c478bd9Sstevel@tonic-gate "seconds, consuming entire CPU." 201*7c478bd9Sstevel@tonic-gate #else 202*7c478bd9Sstevel@tonic-gate #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \ 203*7c478bd9Sstevel@tonic-gate "seconds, consuming entire CPU." 204*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 205*7c478bd9Sstevel@tonic-gate 206*7c478bd9Sstevel@tonic-gate /* 207*7c478bd9Sstevel@tonic-gate * we can patch these defaults in /etc/system if necessary 208*7c478bd9Sstevel@tonic-gate */ 209*7c478bd9Sstevel@tonic-gate uint_t disable_memscrub = 0; 210*7c478bd9Sstevel@tonic-gate uint_t pause_memscrub = 0; 211*7c478bd9Sstevel@tonic-gate uint_t read_all_memscrub = 0; 212*7c478bd9Sstevel@tonic-gate uint_t memscrub_verbose = 0; 213*7c478bd9Sstevel@tonic-gate uint_t memscrub_all_idle = 0; 214*7c478bd9Sstevel@tonic-gate uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; 215*7c478bd9Sstevel@tonic-gate uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; 216*7c478bd9Sstevel@tonic-gate uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; 217*7c478bd9Sstevel@tonic-gate uint_t memscrub_delay_start_sec = 5 * 60; 218*7c478bd9Sstevel@tonic-gate uint_t memscrub_override_ticks = 1; 219*7c478bd9Sstevel@tonic-gate 220*7c478bd9Sstevel@tonic-gate /* 221*7c478bd9Sstevel@tonic-gate * Static Routines 222*7c478bd9Sstevel@tonic-gate */ 223*7c478bd9Sstevel@tonic-gate static void memscrubber(void); 224*7c478bd9Sstevel@tonic-gate static void memscrub_cleanup(void); 225*7c478bd9Sstevel@tonic-gate static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *); 226*7c478bd9Sstevel@tonic-gate static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp); 227*7c478bd9Sstevel@tonic-gate static void memscrub_scan(uint_t blks, ms_paddr_t src); 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate /* 230*7c478bd9Sstevel@tonic-gate * Static Data 231*7c478bd9Sstevel@tonic-gate */ 232*7c478bd9Sstevel@tonic-gate 233*7c478bd9Sstevel@tonic-gate static struct memlist *memscrub_memlist; 234*7c478bd9Sstevel@tonic-gate static uint_t memscrub_phys_pages; 235*7c478bd9Sstevel@tonic-gate 236*7c478bd9Sstevel@tonic-gate static kcondvar_t memscrub_cv; 237*7c478bd9Sstevel@tonic-gate static kmutex_t memscrub_lock; 238*7c478bd9Sstevel@tonic-gate /* 239*7c478bd9Sstevel@tonic-gate * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ... 240*7c478bd9Sstevel@tonic-gate */ 241*7c478bd9Sstevel@tonic-gate static void memscrub_init_mem_config(void); 242*7c478bd9Sstevel@tonic-gate static void memscrub_uninit_mem_config(void); 243*7c478bd9Sstevel@tonic-gate 244*7c478bd9Sstevel@tonic-gate /* 245*7c478bd9Sstevel@tonic-gate * Keep track of some interesting statistics 246*7c478bd9Sstevel@tonic-gate */ 247*7c478bd9Sstevel@tonic-gate static struct memscrub_kstats { 248*7c478bd9Sstevel@tonic-gate kstat_named_t done_early; /* ahead of schedule */ 249*7c478bd9Sstevel@tonic-gate kstat_named_t early_sec; /* by cumulative num secs */ 250*7c478bd9Sstevel@tonic-gate kstat_named_t done_late; /* behind schedule */ 251*7c478bd9Sstevel@tonic-gate kstat_named_t late_sec; /* by cumulative num secs */ 252*7c478bd9Sstevel@tonic-gate kstat_named_t interval_ticks; /* num ticks between intervals */ 253*7c478bd9Sstevel@tonic-gate kstat_named_t force_run; /* forced to run, non-timeout */ 254*7c478bd9Sstevel@tonic-gate kstat_named_t errors_found; /* num errors found by memscrub */ 255*7c478bd9Sstevel@tonic-gate } memscrub_counts = { 256*7c478bd9Sstevel@tonic-gate { "done_early", KSTAT_DATA_UINT32 }, 257*7c478bd9Sstevel@tonic-gate { "early_sec", KSTAT_DATA_UINT32 }, 258*7c478bd9Sstevel@tonic-gate { "done_late", KSTAT_DATA_UINT32 }, 259*7c478bd9Sstevel@tonic-gate { "late_sec", KSTAT_DATA_UINT32 }, 260*7c478bd9Sstevel@tonic-gate { "interval_ticks", KSTAT_DATA_UINT32 }, 261*7c478bd9Sstevel@tonic-gate { "force_run", KSTAT_DATA_UINT32 }, 262*7c478bd9Sstevel@tonic-gate { "errors_found", KSTAT_DATA_UINT32 }, 263*7c478bd9Sstevel@tonic-gate }; 264*7c478bd9Sstevel@tonic-gate static struct kstat *memscrub_ksp = (struct kstat *)NULL; 265*7c478bd9Sstevel@tonic-gate 266*7c478bd9Sstevel@tonic-gate static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */ 267*7c478bd9Sstevel@tonic-gate 268*7c478bd9Sstevel@tonic-gate /* 269*7c478bd9Sstevel@tonic-gate * create memscrub_memlist from phys_install list 270*7c478bd9Sstevel@tonic-gate * initialize locks, set memscrub_phys_pages. 271*7c478bd9Sstevel@tonic-gate */ 272*7c478bd9Sstevel@tonic-gate int 273*7c478bd9Sstevel@tonic-gate memscrub_init(void) 274*7c478bd9Sstevel@tonic-gate { 275*7c478bd9Sstevel@tonic-gate struct memlist *src; 276*7c478bd9Sstevel@tonic-gate 277*7c478bd9Sstevel@tonic-gate /* 278*7c478bd9Sstevel@tonic-gate * only startup the scrubber if we have a minimum 279*7c478bd9Sstevel@tonic-gate * number of pages 280*7c478bd9Sstevel@tonic-gate */ 281*7c478bd9Sstevel@tonic-gate if (physinstalled >= MEMSCRUB_MIN_PAGES) { 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate /* 284*7c478bd9Sstevel@tonic-gate * initialize locks 285*7c478bd9Sstevel@tonic-gate */ 286*7c478bd9Sstevel@tonic-gate mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); 287*7c478bd9Sstevel@tonic-gate cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); 288*7c478bd9Sstevel@tonic-gate 289*7c478bd9Sstevel@tonic-gate /* 290*7c478bd9Sstevel@tonic-gate * copy phys_install to memscrub_memlist 291*7c478bd9Sstevel@tonic-gate */ 292*7c478bd9Sstevel@tonic-gate for (src = phys_install; src; src = src->next) { 293*7c478bd9Sstevel@tonic-gate if (memscrub_add_span( 294*7c478bd9Sstevel@tonic-gate (pfn_t)(src->address >> PAGESHIFT), 295*7c478bd9Sstevel@tonic-gate (pgcnt_t)(src->size >> PAGESHIFT))) { 296*7c478bd9Sstevel@tonic-gate memscrub_cleanup(); 297*7c478bd9Sstevel@tonic-gate return (-1); 298*7c478bd9Sstevel@tonic-gate } 299*7c478bd9Sstevel@tonic-gate } 300*7c478bd9Sstevel@tonic-gate 301*7c478bd9Sstevel@tonic-gate /* 302*7c478bd9Sstevel@tonic-gate * initialize kstats 303*7c478bd9Sstevel@tonic-gate */ 304*7c478bd9Sstevel@tonic-gate memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat", 305*7c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, 306*7c478bd9Sstevel@tonic-gate sizeof (memscrub_counts) / sizeof (kstat_named_t), 307*7c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); 308*7c478bd9Sstevel@tonic-gate 309*7c478bd9Sstevel@tonic-gate if (memscrub_ksp) { 310*7c478bd9Sstevel@tonic-gate memscrub_ksp->ks_data = (void *)&memscrub_counts; 311*7c478bd9Sstevel@tonic-gate kstat_install(memscrub_ksp); 312*7c478bd9Sstevel@tonic-gate } else { 313*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n"); 314*7c478bd9Sstevel@tonic-gate } 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate /* 317*7c478bd9Sstevel@tonic-gate * create memscrubber thread 318*7c478bd9Sstevel@tonic-gate */ 319*7c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, (void (*)())memscrubber, 320*7c478bd9Sstevel@tonic-gate NULL, 0, &p0, TS_RUN, memscrub_thread_pri); 321*7c478bd9Sstevel@tonic-gate 322*7c478bd9Sstevel@tonic-gate /* 323*7c478bd9Sstevel@tonic-gate * We don't want call backs changing the list 324*7c478bd9Sstevel@tonic-gate * if there is no thread running. We do not 325*7c478bd9Sstevel@tonic-gate * attempt to deal with stopping/starting scrubbing 326*7c478bd9Sstevel@tonic-gate * on memory size changes. 327*7c478bd9Sstevel@tonic-gate */ 328*7c478bd9Sstevel@tonic-gate memscrub_init_mem_config(); 329*7c478bd9Sstevel@tonic-gate } 330*7c478bd9Sstevel@tonic-gate 331*7c478bd9Sstevel@tonic-gate return (0); 332*7c478bd9Sstevel@tonic-gate } 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate static void 335*7c478bd9Sstevel@tonic-gate memscrub_cleanup(void) 336*7c478bd9Sstevel@tonic-gate { 337*7c478bd9Sstevel@tonic-gate memscrub_uninit_mem_config(); 338*7c478bd9Sstevel@tonic-gate while (memscrub_memlist) { 339*7c478bd9Sstevel@tonic-gate (void) memscrub_delete_span( 340*7c478bd9Sstevel@tonic-gate (pfn_t)(memscrub_memlist->address >> PAGESHIFT), 341*7c478bd9Sstevel@tonic-gate (pgcnt_t)(memscrub_memlist->size >> PAGESHIFT)); 342*7c478bd9Sstevel@tonic-gate } 343*7c478bd9Sstevel@tonic-gate if (memscrub_ksp) 344*7c478bd9Sstevel@tonic-gate kstat_delete(memscrub_ksp); 345*7c478bd9Sstevel@tonic-gate cv_destroy(&memscrub_cv); 346*7c478bd9Sstevel@tonic-gate mutex_destroy(&memscrub_lock); 347*7c478bd9Sstevel@tonic-gate } 348*7c478bd9Sstevel@tonic-gate 349*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 350*7c478bd9Sstevel@tonic-gate static void 351*7c478bd9Sstevel@tonic-gate memscrub_printmemlist(char *title, struct memlist *listp) 352*7c478bd9Sstevel@tonic-gate { 353*7c478bd9Sstevel@tonic-gate struct memlist *list; 354*7c478bd9Sstevel@tonic-gate 355*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "%s:\n", title); 356*7c478bd9Sstevel@tonic-gate 357*7c478bd9Sstevel@tonic-gate for (list = listp; list; list = list->next) { 358*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", 359*7c478bd9Sstevel@tonic-gate list->address, list->size); 360*7c478bd9Sstevel@tonic-gate } 361*7c478bd9Sstevel@tonic-gate } 362*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 363*7c478bd9Sstevel@tonic-gate 364*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 365*7c478bd9Sstevel@tonic-gate static void 366*7c478bd9Sstevel@tonic-gate memscrub_wakeup(void *c) 367*7c478bd9Sstevel@tonic-gate { 368*7c478bd9Sstevel@tonic-gate /* 369*7c478bd9Sstevel@tonic-gate * grab mutex to guarantee that our wakeup call 370*7c478bd9Sstevel@tonic-gate * arrives after we go to sleep -- so we can't sleep forever. 371*7c478bd9Sstevel@tonic-gate */ 372*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 373*7c478bd9Sstevel@tonic-gate cv_signal(&memscrub_cv); 374*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 375*7c478bd9Sstevel@tonic-gate } 376*7c478bd9Sstevel@tonic-gate 377*7c478bd9Sstevel@tonic-gate /* 378*7c478bd9Sstevel@tonic-gate * provide an interface external to the memscrubber 379*7c478bd9Sstevel@tonic-gate * which will force the memscrub thread to run vs. 380*7c478bd9Sstevel@tonic-gate * waiting for the timeout, if one is set 381*7c478bd9Sstevel@tonic-gate */ 382*7c478bd9Sstevel@tonic-gate void 383*7c478bd9Sstevel@tonic-gate memscrub_run(void) 384*7c478bd9Sstevel@tonic-gate { 385*7c478bd9Sstevel@tonic-gate memscrub_counts.force_run.value.ui32++; 386*7c478bd9Sstevel@tonic-gate if (memscrub_tid) { 387*7c478bd9Sstevel@tonic-gate (void) untimeout(memscrub_tid); 388*7c478bd9Sstevel@tonic-gate memscrub_wakeup((void *)NULL); 389*7c478bd9Sstevel@tonic-gate } 390*7c478bd9Sstevel@tonic-gate } 391*7c478bd9Sstevel@tonic-gate 392*7c478bd9Sstevel@tonic-gate /* 393*7c478bd9Sstevel@tonic-gate * this calculation doesn't account for the time 394*7c478bd9Sstevel@tonic-gate * that the actual scan consumes -- so we'd fall 395*7c478bd9Sstevel@tonic-gate * slightly behind schedule with this interval. 396*7c478bd9Sstevel@tonic-gate * It's very small. 397*7c478bd9Sstevel@tonic-gate */ 398*7c478bd9Sstevel@tonic-gate 399*7c478bd9Sstevel@tonic-gate static uint_t 400*7c478bd9Sstevel@tonic-gate compute_interval_ticks(void) 401*7c478bd9Sstevel@tonic-gate { 402*7c478bd9Sstevel@tonic-gate /* 403*7c478bd9Sstevel@tonic-gate * We use msp_safe mpp_safe below to insure somebody 404*7c478bd9Sstevel@tonic-gate * doesn't set memscrub_span_pages or memscrub_phys_pages 405*7c478bd9Sstevel@tonic-gate * to 0 on us. 406*7c478bd9Sstevel@tonic-gate */ 407*7c478bd9Sstevel@tonic-gate static uint_t msp_safe, mpp_safe; 408*7c478bd9Sstevel@tonic-gate static uint_t interval_ticks, period_ticks; 409*7c478bd9Sstevel@tonic-gate msp_safe = memscrub_span_pages; 410*7c478bd9Sstevel@tonic-gate mpp_safe = memscrub_phys_pages; 411*7c478bd9Sstevel@tonic-gate 412*7c478bd9Sstevel@tonic-gate period_ticks = memscrub_period_sec * hz; 413*7c478bd9Sstevel@tonic-gate interval_ticks = period_ticks; 414*7c478bd9Sstevel@tonic-gate 415*7c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock)); 416*7c478bd9Sstevel@tonic-gate 417*7c478bd9Sstevel@tonic-gate if ((msp_safe != 0) && (mpp_safe != 0)) { 418*7c478bd9Sstevel@tonic-gate if (memscrub_phys_pages <= msp_safe) { 419*7c478bd9Sstevel@tonic-gate interval_ticks = period_ticks; 420*7c478bd9Sstevel@tonic-gate } else { 421*7c478bd9Sstevel@tonic-gate interval_ticks = (period_ticks / 422*7c478bd9Sstevel@tonic-gate (mpp_safe / msp_safe)); 423*7c478bd9Sstevel@tonic-gate } 424*7c478bd9Sstevel@tonic-gate } 425*7c478bd9Sstevel@tonic-gate return (interval_ticks); 426*7c478bd9Sstevel@tonic-gate } 427*7c478bd9Sstevel@tonic-gate 428*7c478bd9Sstevel@tonic-gate void 429*7c478bd9Sstevel@tonic-gate memscrubber(void) 430*7c478bd9Sstevel@tonic-gate { 431*7c478bd9Sstevel@tonic-gate ms_paddr_t address, addr; 432*7c478bd9Sstevel@tonic-gate time_t deadline; 433*7c478bd9Sstevel@tonic-gate pgcnt_t pages; 434*7c478bd9Sstevel@tonic-gate uint_t reached_end = 1; 435*7c478bd9Sstevel@tonic-gate uint_t paused_message = 0; 436*7c478bd9Sstevel@tonic-gate uint_t interval_ticks = 0; 437*7c478bd9Sstevel@tonic-gate uint_t sleep_warn_printed = 0; 438*7c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 439*7c478bd9Sstevel@tonic-gate 440*7c478bd9Sstevel@tonic-gate /* 441*7c478bd9Sstevel@tonic-gate * notify CPR of our existence 442*7c478bd9Sstevel@tonic-gate */ 443*7c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); 444*7c478bd9Sstevel@tonic-gate 445*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 446*7c478bd9Sstevel@tonic-gate 447*7c478bd9Sstevel@tonic-gate if (memscrub_memlist == NULL) { 448*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "memscrub_memlist not initialized."); 449*7c478bd9Sstevel@tonic-gate goto memscrub_exit; 450*7c478bd9Sstevel@tonic-gate } 451*7c478bd9Sstevel@tonic-gate 452*7c478bd9Sstevel@tonic-gate address = memscrub_memlist->address; 453*7c478bd9Sstevel@tonic-gate 454*7c478bd9Sstevel@tonic-gate deadline = gethrestime_sec() + memscrub_delay_start_sec; 455*7c478bd9Sstevel@tonic-gate 456*7c478bd9Sstevel@tonic-gate for (;;) { 457*7c478bd9Sstevel@tonic-gate if (disable_memscrub) 458*7c478bd9Sstevel@tonic-gate break; 459*7c478bd9Sstevel@tonic-gate 460*7c478bd9Sstevel@tonic-gate /* 461*7c478bd9Sstevel@tonic-gate * compute interval_ticks 462*7c478bd9Sstevel@tonic-gate */ 463*7c478bd9Sstevel@tonic-gate interval_ticks = compute_interval_ticks(); 464*7c478bd9Sstevel@tonic-gate 465*7c478bd9Sstevel@tonic-gate /* 466*7c478bd9Sstevel@tonic-gate * If the calculated sleep time is zero, and pause_memscrub 467*7c478bd9Sstevel@tonic-gate * has been set, make sure we sleep so that another thread 468*7c478bd9Sstevel@tonic-gate * can acquire memscrub_lock. 469*7c478bd9Sstevel@tonic-gate */ 470*7c478bd9Sstevel@tonic-gate if (interval_ticks == 0 && pause_memscrub) { 471*7c478bd9Sstevel@tonic-gate interval_ticks = hz; 472*7c478bd9Sstevel@tonic-gate } 473*7c478bd9Sstevel@tonic-gate 474*7c478bd9Sstevel@tonic-gate /* 475*7c478bd9Sstevel@tonic-gate * And as a fail safe, under normal non-paused operation, do 476*7c478bd9Sstevel@tonic-gate * not allow the sleep time to be zero. 477*7c478bd9Sstevel@tonic-gate */ 478*7c478bd9Sstevel@tonic-gate if (interval_ticks == 0) { 479*7c478bd9Sstevel@tonic-gate interval_ticks = memscrub_override_ticks; 480*7c478bd9Sstevel@tonic-gate if (!sleep_warn_printed) { 481*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG); 482*7c478bd9Sstevel@tonic-gate sleep_warn_printed = 1; 483*7c478bd9Sstevel@tonic-gate } 484*7c478bd9Sstevel@tonic-gate } 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate memscrub_counts.interval_ticks.value.ui32 = interval_ticks; 487*7c478bd9Sstevel@tonic-gate 488*7c478bd9Sstevel@tonic-gate /* 489*7c478bd9Sstevel@tonic-gate * Did we just reach the end of memory? If we are at the 490*7c478bd9Sstevel@tonic-gate * end of memory, delay end of memory processing until 491*7c478bd9Sstevel@tonic-gate * pause_memscrub is not set. 492*7c478bd9Sstevel@tonic-gate */ 493*7c478bd9Sstevel@tonic-gate if (reached_end && !pause_memscrub) { 494*7c478bd9Sstevel@tonic-gate time_t now = gethrestime_sec(); 495*7c478bd9Sstevel@tonic-gate 496*7c478bd9Sstevel@tonic-gate if (now >= deadline) { 497*7c478bd9Sstevel@tonic-gate memscrub_counts.done_late.value.ui32++; 498*7c478bd9Sstevel@tonic-gate memscrub_counts.late_sec.value.ui32 += 499*7c478bd9Sstevel@tonic-gate (now - deadline); 500*7c478bd9Sstevel@tonic-gate /* 501*7c478bd9Sstevel@tonic-gate * past deadline, start right away 502*7c478bd9Sstevel@tonic-gate */ 503*7c478bd9Sstevel@tonic-gate interval_ticks = 0; 504*7c478bd9Sstevel@tonic-gate 505*7c478bd9Sstevel@tonic-gate deadline = now + memscrub_period_sec; 506*7c478bd9Sstevel@tonic-gate } else { 507*7c478bd9Sstevel@tonic-gate /* 508*7c478bd9Sstevel@tonic-gate * we finished ahead of schedule. 509*7c478bd9Sstevel@tonic-gate * wait till previous deadline before re-start. 510*7c478bd9Sstevel@tonic-gate */ 511*7c478bd9Sstevel@tonic-gate interval_ticks = (deadline - now) * hz; 512*7c478bd9Sstevel@tonic-gate memscrub_counts.done_early.value.ui32++; 513*7c478bd9Sstevel@tonic-gate memscrub_counts.early_sec.value.ui32 += 514*7c478bd9Sstevel@tonic-gate (deadline - now); 515*7c478bd9Sstevel@tonic-gate deadline += memscrub_period_sec; 516*7c478bd9Sstevel@tonic-gate } 517*7c478bd9Sstevel@tonic-gate reached_end = 0; 518*7c478bd9Sstevel@tonic-gate sleep_warn_printed = 0; 519*7c478bd9Sstevel@tonic-gate } 520*7c478bd9Sstevel@tonic-gate 521*7c478bd9Sstevel@tonic-gate if (interval_ticks != 0) { 522*7c478bd9Sstevel@tonic-gate /* 523*7c478bd9Sstevel@tonic-gate * it is safe from our standpoint for CPR to 524*7c478bd9Sstevel@tonic-gate * suspend the system 525*7c478bd9Sstevel@tonic-gate */ 526*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 527*7c478bd9Sstevel@tonic-gate 528*7c478bd9Sstevel@tonic-gate /* 529*7c478bd9Sstevel@tonic-gate * hit the snooze bar 530*7c478bd9Sstevel@tonic-gate */ 531*7c478bd9Sstevel@tonic-gate memscrub_tid = timeout(memscrub_wakeup, NULL, 532*7c478bd9Sstevel@tonic-gate interval_ticks); 533*7c478bd9Sstevel@tonic-gate 534*7c478bd9Sstevel@tonic-gate /* 535*7c478bd9Sstevel@tonic-gate * go to sleep 536*7c478bd9Sstevel@tonic-gate */ 537*7c478bd9Sstevel@tonic-gate cv_wait(&memscrub_cv, &memscrub_lock); 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate /* 540*7c478bd9Sstevel@tonic-gate * at this point, no timeout should be set 541*7c478bd9Sstevel@tonic-gate */ 542*7c478bd9Sstevel@tonic-gate memscrub_tid = 0; 543*7c478bd9Sstevel@tonic-gate 544*7c478bd9Sstevel@tonic-gate /* 545*7c478bd9Sstevel@tonic-gate * we need to goto work and will be modifying 546*7c478bd9Sstevel@tonic-gate * our internal state and mapping/unmapping 547*7c478bd9Sstevel@tonic-gate * TTEs 548*7c478bd9Sstevel@tonic-gate */ 549*7c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); 550*7c478bd9Sstevel@tonic-gate } 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate 553*7c478bd9Sstevel@tonic-gate if (memscrub_phys_pages == 0) { 554*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Memory scrubber has 0 pages to read"); 555*7c478bd9Sstevel@tonic-gate goto memscrub_exit; 556*7c478bd9Sstevel@tonic-gate } 557*7c478bd9Sstevel@tonic-gate 558*7c478bd9Sstevel@tonic-gate if (!pause_memscrub) { 559*7c478bd9Sstevel@tonic-gate if (paused_message) { 560*7c478bd9Sstevel@tonic-gate paused_message = 0; 561*7c478bd9Sstevel@tonic-gate if (memscrub_verbose) 562*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber " 563*7c478bd9Sstevel@tonic-gate "resuming"); 564*7c478bd9Sstevel@tonic-gate } 565*7c478bd9Sstevel@tonic-gate 566*7c478bd9Sstevel@tonic-gate if (read_all_memscrub) { 567*7c478bd9Sstevel@tonic-gate if (memscrub_verbose) 568*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber " 569*7c478bd9Sstevel@tonic-gate "reading all memory per request"); 570*7c478bd9Sstevel@tonic-gate 571*7c478bd9Sstevel@tonic-gate addr = memscrub_memlist->address; 572*7c478bd9Sstevel@tonic-gate reached_end = 0; 573*7c478bd9Sstevel@tonic-gate while (!reached_end) { 574*7c478bd9Sstevel@tonic-gate if (disable_memscrub) 575*7c478bd9Sstevel@tonic-gate break; 576*7c478bd9Sstevel@tonic-gate pages = memscrub_phys_pages; 577*7c478bd9Sstevel@tonic-gate reached_end = memscrub_verify_span( 578*7c478bd9Sstevel@tonic-gate &addr, &pages); 579*7c478bd9Sstevel@tonic-gate memscrub_scan(pages * 580*7c478bd9Sstevel@tonic-gate MEMSCRUB_BLOCKS_PER_PAGE, addr); 581*7c478bd9Sstevel@tonic-gate addr += ((uint64_t)pages * PAGESIZE); 582*7c478bd9Sstevel@tonic-gate } 583*7c478bd9Sstevel@tonic-gate read_all_memscrub = 0; 584*7c478bd9Sstevel@tonic-gate } 585*7c478bd9Sstevel@tonic-gate 586*7c478bd9Sstevel@tonic-gate /* 587*7c478bd9Sstevel@tonic-gate * read 1 span 588*7c478bd9Sstevel@tonic-gate */ 589*7c478bd9Sstevel@tonic-gate pages = memscrub_span_pages; 590*7c478bd9Sstevel@tonic-gate 591*7c478bd9Sstevel@tonic-gate if (disable_memscrub) 592*7c478bd9Sstevel@tonic-gate break; 593*7c478bd9Sstevel@tonic-gate 594*7c478bd9Sstevel@tonic-gate /* 595*7c478bd9Sstevel@tonic-gate * determine physical address range 596*7c478bd9Sstevel@tonic-gate */ 597*7c478bd9Sstevel@tonic-gate reached_end = memscrub_verify_span(&address, 598*7c478bd9Sstevel@tonic-gate &pages); 599*7c478bd9Sstevel@tonic-gate 600*7c478bd9Sstevel@tonic-gate memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE, 601*7c478bd9Sstevel@tonic-gate address); 602*7c478bd9Sstevel@tonic-gate 603*7c478bd9Sstevel@tonic-gate address += ((uint64_t)pages * PAGESIZE); 604*7c478bd9Sstevel@tonic-gate } 605*7c478bd9Sstevel@tonic-gate 606*7c478bd9Sstevel@tonic-gate if (pause_memscrub && !paused_message) { 607*7c478bd9Sstevel@tonic-gate paused_message = 1; 608*7c478bd9Sstevel@tonic-gate if (memscrub_verbose) 609*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber paused"); 610*7c478bd9Sstevel@tonic-gate } 611*7c478bd9Sstevel@tonic-gate } 612*7c478bd9Sstevel@tonic-gate 613*7c478bd9Sstevel@tonic-gate memscrub_exit: 614*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber exiting"); 615*7c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 616*7c478bd9Sstevel@tonic-gate memscrub_cleanup(); 617*7c478bd9Sstevel@tonic-gate thread_exit(); 618*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 619*7c478bd9Sstevel@tonic-gate } 620*7c478bd9Sstevel@tonic-gate 621*7c478bd9Sstevel@tonic-gate /* 622*7c478bd9Sstevel@tonic-gate * condition address and size 623*7c478bd9Sstevel@tonic-gate * such that they span legal physical addresses. 624*7c478bd9Sstevel@tonic-gate * 625*7c478bd9Sstevel@tonic-gate * when appropriate, address will be rounded up to start of next 626*7c478bd9Sstevel@tonic-gate * struct memlist, and pages will be rounded down to the end of the 627*7c478bd9Sstevel@tonic-gate * memlist size. 628*7c478bd9Sstevel@tonic-gate * 629*7c478bd9Sstevel@tonic-gate * returns 1 if reached end of list, else returns 0. 630*7c478bd9Sstevel@tonic-gate */ 631*7c478bd9Sstevel@tonic-gate static int 632*7c478bd9Sstevel@tonic-gate memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp) 633*7c478bd9Sstevel@tonic-gate { 634*7c478bd9Sstevel@tonic-gate struct memlist *mlp; 635*7c478bd9Sstevel@tonic-gate ms_paddr_t address = *addrp; 636*7c478bd9Sstevel@tonic-gate uint64_t bytes = (uint64_t)*pagesp * PAGESIZE; 637*7c478bd9Sstevel@tonic-gate uint64_t bytes_remaining; 638*7c478bd9Sstevel@tonic-gate int reached_end = 0; 639*7c478bd9Sstevel@tonic-gate 640*7c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock)); 641*7c478bd9Sstevel@tonic-gate 642*7c478bd9Sstevel@tonic-gate /* 643*7c478bd9Sstevel@tonic-gate * find memlist struct that contains addrp 644*7c478bd9Sstevel@tonic-gate * assumes memlist is sorted by ascending address. 645*7c478bd9Sstevel@tonic-gate */ 646*7c478bd9Sstevel@tonic-gate for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->next) { 647*7c478bd9Sstevel@tonic-gate /* 648*7c478bd9Sstevel@tonic-gate * if before this chunk, round up to beginning 649*7c478bd9Sstevel@tonic-gate */ 650*7c478bd9Sstevel@tonic-gate if (address < mlp->address) { 651*7c478bd9Sstevel@tonic-gate address = mlp->address; 652*7c478bd9Sstevel@tonic-gate break; 653*7c478bd9Sstevel@tonic-gate } 654*7c478bd9Sstevel@tonic-gate /* 655*7c478bd9Sstevel@tonic-gate * if before end of chunk, then we found it 656*7c478bd9Sstevel@tonic-gate */ 657*7c478bd9Sstevel@tonic-gate if (address < (mlp->address + mlp->size)) 658*7c478bd9Sstevel@tonic-gate break; 659*7c478bd9Sstevel@tonic-gate 660*7c478bd9Sstevel@tonic-gate /* else go to next struct memlist */ 661*7c478bd9Sstevel@tonic-gate } 662*7c478bd9Sstevel@tonic-gate /* 663*7c478bd9Sstevel@tonic-gate * if we hit end of list, start at beginning 664*7c478bd9Sstevel@tonic-gate */ 665*7c478bd9Sstevel@tonic-gate if (mlp == NULL) { 666*7c478bd9Sstevel@tonic-gate mlp = memscrub_memlist; 667*7c478bd9Sstevel@tonic-gate address = mlp->address; 668*7c478bd9Sstevel@tonic-gate } 669*7c478bd9Sstevel@tonic-gate 670*7c478bd9Sstevel@tonic-gate /* 671*7c478bd9Sstevel@tonic-gate * now we have legal address, and its mlp, condition bytes 672*7c478bd9Sstevel@tonic-gate */ 673*7c478bd9Sstevel@tonic-gate bytes_remaining = (mlp->address + mlp->size) - address; 674*7c478bd9Sstevel@tonic-gate 675*7c478bd9Sstevel@tonic-gate if (bytes > bytes_remaining) 676*7c478bd9Sstevel@tonic-gate bytes = bytes_remaining; 677*7c478bd9Sstevel@tonic-gate 678*7c478bd9Sstevel@tonic-gate /* 679*7c478bd9Sstevel@tonic-gate * will this span take us to end of list? 680*7c478bd9Sstevel@tonic-gate */ 681*7c478bd9Sstevel@tonic-gate if ((mlp->next == NULL) && 682*7c478bd9Sstevel@tonic-gate ((mlp->address + mlp->size) == (address + bytes))) 683*7c478bd9Sstevel@tonic-gate reached_end = 1; 684*7c478bd9Sstevel@tonic-gate 685*7c478bd9Sstevel@tonic-gate /* return values */ 686*7c478bd9Sstevel@tonic-gate *addrp = address; 687*7c478bd9Sstevel@tonic-gate *pagesp = bytes / PAGESIZE; 688*7c478bd9Sstevel@tonic-gate 689*7c478bd9Sstevel@tonic-gate return (reached_end); 690*7c478bd9Sstevel@tonic-gate } 691*7c478bd9Sstevel@tonic-gate 692*7c478bd9Sstevel@tonic-gate /* 693*7c478bd9Sstevel@tonic-gate * add a span to the memscrub list 694*7c478bd9Sstevel@tonic-gate * add to memscrub_phys_pages 695*7c478bd9Sstevel@tonic-gate */ 696*7c478bd9Sstevel@tonic-gate int 697*7c478bd9Sstevel@tonic-gate memscrub_add_span(pfn_t pfn, pgcnt_t pages) 698*7c478bd9Sstevel@tonic-gate { 699*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 700*7c478bd9Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 701*7c478bd9Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT; 702*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 703*7c478bd9Sstevel@tonic-gate 704*7c478bd9Sstevel@tonic-gate int retval; 705*7c478bd9Sstevel@tonic-gate 706*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 707*7c478bd9Sstevel@tonic-gate 708*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 709*7c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); 710*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 711*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" 712*7c478bd9Sstevel@tonic-gate " size: 0x%llx\n", address, bytes); 713*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 714*7c478bd9Sstevel@tonic-gate 715*7c478bd9Sstevel@tonic-gate retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist, 716*7c478bd9Sstevel@tonic-gate &memscrub_phys_pages); 717*7c478bd9Sstevel@tonic-gate 718*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 719*7c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); 720*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 721*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 722*7c478bd9Sstevel@tonic-gate 723*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 724*7c478bd9Sstevel@tonic-gate 725*7c478bd9Sstevel@tonic-gate return (retval); 726*7c478bd9Sstevel@tonic-gate } 727*7c478bd9Sstevel@tonic-gate 728*7c478bd9Sstevel@tonic-gate static int 729*7c478bd9Sstevel@tonic-gate memscrub_add_span_gen( 730*7c478bd9Sstevel@tonic-gate pfn_t pfn, 731*7c478bd9Sstevel@tonic-gate pgcnt_t pages, 732*7c478bd9Sstevel@tonic-gate struct memlist **list, 733*7c478bd9Sstevel@tonic-gate uint_t *npgs) 734*7c478bd9Sstevel@tonic-gate { 735*7c478bd9Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 736*7c478bd9Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT; 737*7c478bd9Sstevel@tonic-gate struct memlist *dst; 738*7c478bd9Sstevel@tonic-gate struct memlist *prev, *next; 739*7c478bd9Sstevel@tonic-gate int retval = 0; 740*7c478bd9Sstevel@tonic-gate 741*7c478bd9Sstevel@tonic-gate /* 742*7c478bd9Sstevel@tonic-gate * allocate a new struct memlist 743*7c478bd9Sstevel@tonic-gate */ 744*7c478bd9Sstevel@tonic-gate 745*7c478bd9Sstevel@tonic-gate dst = (struct memlist *) 746*7c478bd9Sstevel@tonic-gate kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 747*7c478bd9Sstevel@tonic-gate 748*7c478bd9Sstevel@tonic-gate if (dst == NULL) { 749*7c478bd9Sstevel@tonic-gate retval = -1; 750*7c478bd9Sstevel@tonic-gate goto add_done; 751*7c478bd9Sstevel@tonic-gate } 752*7c478bd9Sstevel@tonic-gate 753*7c478bd9Sstevel@tonic-gate dst->address = address; 754*7c478bd9Sstevel@tonic-gate dst->size = bytes; 755*7c478bd9Sstevel@tonic-gate 756*7c478bd9Sstevel@tonic-gate /* 757*7c478bd9Sstevel@tonic-gate * first insert 758*7c478bd9Sstevel@tonic-gate */ 759*7c478bd9Sstevel@tonic-gate if (*list == NULL) { 760*7c478bd9Sstevel@tonic-gate dst->prev = NULL; 761*7c478bd9Sstevel@tonic-gate dst->next = NULL; 762*7c478bd9Sstevel@tonic-gate *list = dst; 763*7c478bd9Sstevel@tonic-gate 764*7c478bd9Sstevel@tonic-gate goto add_done; 765*7c478bd9Sstevel@tonic-gate } 766*7c478bd9Sstevel@tonic-gate 767*7c478bd9Sstevel@tonic-gate /* 768*7c478bd9Sstevel@tonic-gate * insert into sorted list 769*7c478bd9Sstevel@tonic-gate */ 770*7c478bd9Sstevel@tonic-gate for (prev = NULL, next = *list; 771*7c478bd9Sstevel@tonic-gate next != NULL; 772*7c478bd9Sstevel@tonic-gate prev = next, next = next->next) { 773*7c478bd9Sstevel@tonic-gate if (address > (next->address + next->size)) 774*7c478bd9Sstevel@tonic-gate continue; 775*7c478bd9Sstevel@tonic-gate 776*7c478bd9Sstevel@tonic-gate /* 777*7c478bd9Sstevel@tonic-gate * else insert here 778*7c478bd9Sstevel@tonic-gate */ 779*7c478bd9Sstevel@tonic-gate 780*7c478bd9Sstevel@tonic-gate /* 781*7c478bd9Sstevel@tonic-gate * prepend to next 782*7c478bd9Sstevel@tonic-gate */ 783*7c478bd9Sstevel@tonic-gate if ((address + bytes) == next->address) { 784*7c478bd9Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist)); 785*7c478bd9Sstevel@tonic-gate 786*7c478bd9Sstevel@tonic-gate next->address = address; 787*7c478bd9Sstevel@tonic-gate next->size += bytes; 788*7c478bd9Sstevel@tonic-gate 789*7c478bd9Sstevel@tonic-gate goto add_done; 790*7c478bd9Sstevel@tonic-gate } 791*7c478bd9Sstevel@tonic-gate 792*7c478bd9Sstevel@tonic-gate /* 793*7c478bd9Sstevel@tonic-gate * append to next 794*7c478bd9Sstevel@tonic-gate */ 795*7c478bd9Sstevel@tonic-gate if (address == (next->address + next->size)) { 796*7c478bd9Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist)); 797*7c478bd9Sstevel@tonic-gate 798*7c478bd9Sstevel@tonic-gate if (next->next) { 799*7c478bd9Sstevel@tonic-gate /* 800*7c478bd9Sstevel@tonic-gate * don't overlap with next->next 801*7c478bd9Sstevel@tonic-gate */ 802*7c478bd9Sstevel@tonic-gate if ((address + bytes) > next->next->address) { 803*7c478bd9Sstevel@tonic-gate retval = -1; 804*7c478bd9Sstevel@tonic-gate goto add_done; 805*7c478bd9Sstevel@tonic-gate } 806*7c478bd9Sstevel@tonic-gate /* 807*7c478bd9Sstevel@tonic-gate * concatenate next and next->next 808*7c478bd9Sstevel@tonic-gate */ 809*7c478bd9Sstevel@tonic-gate if ((address + bytes) == next->next->address) { 810*7c478bd9Sstevel@tonic-gate struct memlist *mlp = next->next; 811*7c478bd9Sstevel@tonic-gate 812*7c478bd9Sstevel@tonic-gate if (next == *list) 813*7c478bd9Sstevel@tonic-gate *list = next->next; 814*7c478bd9Sstevel@tonic-gate 815*7c478bd9Sstevel@tonic-gate mlp->address = next->address; 816*7c478bd9Sstevel@tonic-gate mlp->size += next->size; 817*7c478bd9Sstevel@tonic-gate mlp->size += bytes; 818*7c478bd9Sstevel@tonic-gate 819*7c478bd9Sstevel@tonic-gate if (next->prev) 820*7c478bd9Sstevel@tonic-gate next->prev->next = mlp; 821*7c478bd9Sstevel@tonic-gate mlp->prev = next->prev; 822*7c478bd9Sstevel@tonic-gate 823*7c478bd9Sstevel@tonic-gate kmem_free(next, 824*7c478bd9Sstevel@tonic-gate sizeof (struct memlist)); 825*7c478bd9Sstevel@tonic-gate goto add_done; 826*7c478bd9Sstevel@tonic-gate } 827*7c478bd9Sstevel@tonic-gate } 828*7c478bd9Sstevel@tonic-gate 829*7c478bd9Sstevel@tonic-gate next->size += bytes; 830*7c478bd9Sstevel@tonic-gate 831*7c478bd9Sstevel@tonic-gate goto add_done; 832*7c478bd9Sstevel@tonic-gate } 833*7c478bd9Sstevel@tonic-gate 834*7c478bd9Sstevel@tonic-gate /* don't overlap with next */ 835*7c478bd9Sstevel@tonic-gate if ((address + bytes) > next->address) { 836*7c478bd9Sstevel@tonic-gate retval = -1; 837*7c478bd9Sstevel@tonic-gate kmem_free(dst, sizeof (struct memlist)); 838*7c478bd9Sstevel@tonic-gate goto add_done; 839*7c478bd9Sstevel@tonic-gate } 840*7c478bd9Sstevel@tonic-gate 841*7c478bd9Sstevel@tonic-gate /* 842*7c478bd9Sstevel@tonic-gate * insert before next 843*7c478bd9Sstevel@tonic-gate */ 844*7c478bd9Sstevel@tonic-gate dst->prev = prev; 845*7c478bd9Sstevel@tonic-gate dst->next = next; 846*7c478bd9Sstevel@tonic-gate next->prev = dst; 847*7c478bd9Sstevel@tonic-gate if (prev == NULL) { 848*7c478bd9Sstevel@tonic-gate *list = dst; 849*7c478bd9Sstevel@tonic-gate } else { 850*7c478bd9Sstevel@tonic-gate prev->next = dst; 851*7c478bd9Sstevel@tonic-gate } 852*7c478bd9Sstevel@tonic-gate goto add_done; 853*7c478bd9Sstevel@tonic-gate } /* end for */ 854*7c478bd9Sstevel@tonic-gate 855*7c478bd9Sstevel@tonic-gate /* 856*7c478bd9Sstevel@tonic-gate * end of list, prev is valid and next is NULL 857*7c478bd9Sstevel@tonic-gate */ 858*7c478bd9Sstevel@tonic-gate prev->next = dst; 859*7c478bd9Sstevel@tonic-gate dst->prev = prev; 860*7c478bd9Sstevel@tonic-gate dst->next = NULL; 861*7c478bd9Sstevel@tonic-gate 862*7c478bd9Sstevel@tonic-gate add_done: 863*7c478bd9Sstevel@tonic-gate 864*7c478bd9Sstevel@tonic-gate if (retval != -1) 865*7c478bd9Sstevel@tonic-gate *npgs += pages; 866*7c478bd9Sstevel@tonic-gate 867*7c478bd9Sstevel@tonic-gate return (retval); 868*7c478bd9Sstevel@tonic-gate } 869*7c478bd9Sstevel@tonic-gate 870*7c478bd9Sstevel@tonic-gate /* 871*7c478bd9Sstevel@tonic-gate * delete a span from the memscrub list 872*7c478bd9Sstevel@tonic-gate * subtract from memscrub_phys_pages 873*7c478bd9Sstevel@tonic-gate */ 874*7c478bd9Sstevel@tonic-gate int 875*7c478bd9Sstevel@tonic-gate memscrub_delete_span(pfn_t pfn, pgcnt_t pages) 876*7c478bd9Sstevel@tonic-gate { 877*7c478bd9Sstevel@tonic-gate ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT; 878*7c478bd9Sstevel@tonic-gate uint64_t bytes = (uint64_t)pages << PAGESHIFT; 879*7c478bd9Sstevel@tonic-gate struct memlist *dst, *next; 880*7c478bd9Sstevel@tonic-gate int retval = 0; 881*7c478bd9Sstevel@tonic-gate 882*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 883*7c478bd9Sstevel@tonic-gate 884*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 885*7c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist); 886*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 887*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n", 888*7c478bd9Sstevel@tonic-gate address, bytes); 889*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 890*7c478bd9Sstevel@tonic-gate 891*7c478bd9Sstevel@tonic-gate /* 892*7c478bd9Sstevel@tonic-gate * find struct memlist containing page 893*7c478bd9Sstevel@tonic-gate */ 894*7c478bd9Sstevel@tonic-gate for (next = memscrub_memlist; next != NULL; next = next->next) { 895*7c478bd9Sstevel@tonic-gate if ((address >= next->address) && 896*7c478bd9Sstevel@tonic-gate (address < next->address + next->size)) 897*7c478bd9Sstevel@tonic-gate break; 898*7c478bd9Sstevel@tonic-gate } 899*7c478bd9Sstevel@tonic-gate 900*7c478bd9Sstevel@tonic-gate /* 901*7c478bd9Sstevel@tonic-gate * if start address not in list 902*7c478bd9Sstevel@tonic-gate */ 903*7c478bd9Sstevel@tonic-gate if (next == NULL) { 904*7c478bd9Sstevel@tonic-gate retval = -1; 905*7c478bd9Sstevel@tonic-gate goto delete_done; 906*7c478bd9Sstevel@tonic-gate } 907*7c478bd9Sstevel@tonic-gate 908*7c478bd9Sstevel@tonic-gate /* 909*7c478bd9Sstevel@tonic-gate * error if size goes off end of this struct memlist 910*7c478bd9Sstevel@tonic-gate */ 911*7c478bd9Sstevel@tonic-gate if (address + bytes > next->address + next->size) { 912*7c478bd9Sstevel@tonic-gate retval = -1; 913*7c478bd9Sstevel@tonic-gate goto delete_done; 914*7c478bd9Sstevel@tonic-gate } 915*7c478bd9Sstevel@tonic-gate 916*7c478bd9Sstevel@tonic-gate /* 917*7c478bd9Sstevel@tonic-gate * pages at beginning of struct memlist 918*7c478bd9Sstevel@tonic-gate */ 919*7c478bd9Sstevel@tonic-gate if (address == next->address) { 920*7c478bd9Sstevel@tonic-gate /* 921*7c478bd9Sstevel@tonic-gate * if start & size match, delete from list 922*7c478bd9Sstevel@tonic-gate */ 923*7c478bd9Sstevel@tonic-gate if (bytes == next->size) { 924*7c478bd9Sstevel@tonic-gate if (next == memscrub_memlist) 925*7c478bd9Sstevel@tonic-gate memscrub_memlist = next->next; 926*7c478bd9Sstevel@tonic-gate if (next->prev != NULL) 927*7c478bd9Sstevel@tonic-gate next->prev->next = next->next; 928*7c478bd9Sstevel@tonic-gate if (next->next != NULL) 929*7c478bd9Sstevel@tonic-gate next->next->prev = next->prev; 930*7c478bd9Sstevel@tonic-gate 931*7c478bd9Sstevel@tonic-gate kmem_free(next, sizeof (struct memlist)); 932*7c478bd9Sstevel@tonic-gate } else { 933*7c478bd9Sstevel@tonic-gate /* 934*7c478bd9Sstevel@tonic-gate * increment start address by bytes 935*7c478bd9Sstevel@tonic-gate */ 936*7c478bd9Sstevel@tonic-gate next->address += bytes; 937*7c478bd9Sstevel@tonic-gate next->size -= bytes; 938*7c478bd9Sstevel@tonic-gate } 939*7c478bd9Sstevel@tonic-gate goto delete_done; 940*7c478bd9Sstevel@tonic-gate } 941*7c478bd9Sstevel@tonic-gate 942*7c478bd9Sstevel@tonic-gate /* 943*7c478bd9Sstevel@tonic-gate * pages at end of struct memlist 944*7c478bd9Sstevel@tonic-gate */ 945*7c478bd9Sstevel@tonic-gate if (address + bytes == next->address + next->size) { 946*7c478bd9Sstevel@tonic-gate /* 947*7c478bd9Sstevel@tonic-gate * decrement size by bytes 948*7c478bd9Sstevel@tonic-gate */ 949*7c478bd9Sstevel@tonic-gate next->size -= bytes; 950*7c478bd9Sstevel@tonic-gate goto delete_done; 951*7c478bd9Sstevel@tonic-gate } 952*7c478bd9Sstevel@tonic-gate 953*7c478bd9Sstevel@tonic-gate /* 954*7c478bd9Sstevel@tonic-gate * delete a span in the middle of the struct memlist 955*7c478bd9Sstevel@tonic-gate */ 956*7c478bd9Sstevel@tonic-gate { 957*7c478bd9Sstevel@tonic-gate /* 958*7c478bd9Sstevel@tonic-gate * create a new struct memlist 959*7c478bd9Sstevel@tonic-gate */ 960*7c478bd9Sstevel@tonic-gate dst = (struct memlist *) 961*7c478bd9Sstevel@tonic-gate kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); 962*7c478bd9Sstevel@tonic-gate 963*7c478bd9Sstevel@tonic-gate if (dst == NULL) { 964*7c478bd9Sstevel@tonic-gate retval = -1; 965*7c478bd9Sstevel@tonic-gate goto delete_done; 966*7c478bd9Sstevel@tonic-gate } 967*7c478bd9Sstevel@tonic-gate 968*7c478bd9Sstevel@tonic-gate /* 969*7c478bd9Sstevel@tonic-gate * existing struct memlist gets address 970*7c478bd9Sstevel@tonic-gate * and size up to pfn 971*7c478bd9Sstevel@tonic-gate */ 972*7c478bd9Sstevel@tonic-gate dst->address = address + bytes; 973*7c478bd9Sstevel@tonic-gate dst->size = (next->address + next->size) - dst->address; 974*7c478bd9Sstevel@tonic-gate next->size = address - next->address; 975*7c478bd9Sstevel@tonic-gate 976*7c478bd9Sstevel@tonic-gate /* 977*7c478bd9Sstevel@tonic-gate * new struct memlist gets address starting 978*7c478bd9Sstevel@tonic-gate * after pfn, until end 979*7c478bd9Sstevel@tonic-gate */ 980*7c478bd9Sstevel@tonic-gate 981*7c478bd9Sstevel@tonic-gate /* 982*7c478bd9Sstevel@tonic-gate * link in new memlist after old 983*7c478bd9Sstevel@tonic-gate */ 984*7c478bd9Sstevel@tonic-gate dst->next = next->next; 985*7c478bd9Sstevel@tonic-gate dst->prev = next; 986*7c478bd9Sstevel@tonic-gate 987*7c478bd9Sstevel@tonic-gate if (next->next != NULL) 988*7c478bd9Sstevel@tonic-gate next->next->prev = dst; 989*7c478bd9Sstevel@tonic-gate next->next = dst; 990*7c478bd9Sstevel@tonic-gate } 991*7c478bd9Sstevel@tonic-gate 992*7c478bd9Sstevel@tonic-gate delete_done: 993*7c478bd9Sstevel@tonic-gate if (retval != -1) { 994*7c478bd9Sstevel@tonic-gate memscrub_phys_pages -= pages; 995*7c478bd9Sstevel@tonic-gate if (memscrub_phys_pages == 0) 996*7c478bd9Sstevel@tonic-gate disable_memscrub = 1; 997*7c478bd9Sstevel@tonic-gate } 998*7c478bd9Sstevel@tonic-gate 999*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 1000*7c478bd9Sstevel@tonic-gate memscrub_printmemlist("memscrub_memlist After", memscrub_memlist); 1001*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); 1002*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 1003*7c478bd9Sstevel@tonic-gate 1004*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 1005*7c478bd9Sstevel@tonic-gate return (retval); 1006*7c478bd9Sstevel@tonic-gate } 1007*7c478bd9Sstevel@tonic-gate 1008*7c478bd9Sstevel@tonic-gate static void 1009*7c478bd9Sstevel@tonic-gate memscrub_scan(uint_t blks, ms_paddr_t src) 1010*7c478bd9Sstevel@tonic-gate { 1011*7c478bd9Sstevel@tonic-gate uint_t psz, bpp, pgsread; 1012*7c478bd9Sstevel@tonic-gate pfn_t pfn; 1013*7c478bd9Sstevel@tonic-gate ms_paddr_t pa; 1014*7c478bd9Sstevel@tonic-gate caddr_t va; 1015*7c478bd9Sstevel@tonic-gate on_trap_data_t otd; 1016*7c478bd9Sstevel@tonic-gate 1017*7c478bd9Sstevel@tonic-gate extern void memscrub_read(caddr_t src, uint_t blks); 1018*7c478bd9Sstevel@tonic-gate 1019*7c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&memscrub_lock)); 1020*7c478bd9Sstevel@tonic-gate 1021*7c478bd9Sstevel@tonic-gate pgsread = 0; 1022*7c478bd9Sstevel@tonic-gate pa = src; 1023*7c478bd9Sstevel@tonic-gate 1024*7c478bd9Sstevel@tonic-gate while (blks != 0) { 1025*7c478bd9Sstevel@tonic-gate /* Ensure the PA is properly aligned */ 1026*7c478bd9Sstevel@tonic-gate if (((pa & MMU_PAGEMASK4M) == pa) && 1027*7c478bd9Sstevel@tonic-gate (blks >= MEMSCRUB_BPP4M)) { 1028*7c478bd9Sstevel@tonic-gate psz = MMU_PAGESIZE4M; 1029*7c478bd9Sstevel@tonic-gate bpp = MEMSCRUB_BPP4M; 1030*7c478bd9Sstevel@tonic-gate } else if (((pa & MMU_PAGEMASK512K) == pa) && 1031*7c478bd9Sstevel@tonic-gate (blks >= MEMSCRUB_BPP512K)) { 1032*7c478bd9Sstevel@tonic-gate psz = MMU_PAGESIZE512K; 1033*7c478bd9Sstevel@tonic-gate bpp = MEMSCRUB_BPP512K; 1034*7c478bd9Sstevel@tonic-gate } else if (((pa & MMU_PAGEMASK64K) == pa) && 1035*7c478bd9Sstevel@tonic-gate (blks >= MEMSCRUB_BPP64K)) { 1036*7c478bd9Sstevel@tonic-gate psz = MMU_PAGESIZE64K; 1037*7c478bd9Sstevel@tonic-gate bpp = MEMSCRUB_BPP64K; 1038*7c478bd9Sstevel@tonic-gate } else if ((pa & MMU_PAGEMASK) == pa) { 1039*7c478bd9Sstevel@tonic-gate psz = MMU_PAGESIZE; 1040*7c478bd9Sstevel@tonic-gate bpp = MEMSCRUB_BPP; 1041*7c478bd9Sstevel@tonic-gate } else { 1042*7c478bd9Sstevel@tonic-gate if (memscrub_verbose) { 1043*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber ignoring " 1044*7c478bd9Sstevel@tonic-gate "non-page aligned block starting at 0x%" 1045*7c478bd9Sstevel@tonic-gate PRIx64, src); 1046*7c478bd9Sstevel@tonic-gate } 1047*7c478bd9Sstevel@tonic-gate return; 1048*7c478bd9Sstevel@tonic-gate } 1049*7c478bd9Sstevel@tonic-gate if (blks < bpp) bpp = blks; 1050*7c478bd9Sstevel@tonic-gate 1051*7c478bd9Sstevel@tonic-gate #ifdef MEMSCRUB_DEBUG 1052*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Going to run psz=%x, " 1053*7c478bd9Sstevel@tonic-gate "bpp=%x pa=%llx\n", psz, bpp, pa); 1054*7c478bd9Sstevel@tonic-gate #endif /* MEMSCRUB_DEBUG */ 1055*7c478bd9Sstevel@tonic-gate 1056*7c478bd9Sstevel@tonic-gate /* 1057*7c478bd9Sstevel@tonic-gate * MEMSCRUBBASE is a 4MB aligned page in the 1058*7c478bd9Sstevel@tonic-gate * kernel so that we can quickly map the PA 1059*7c478bd9Sstevel@tonic-gate * to a VA for the block loads performed in 1060*7c478bd9Sstevel@tonic-gate * memscrub_read. 1061*7c478bd9Sstevel@tonic-gate */ 1062*7c478bd9Sstevel@tonic-gate pfn = mmu_btop(pa); 1063*7c478bd9Sstevel@tonic-gate va = (caddr_t)MEMSCRUBBASE; 1064*7c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, va, psz, pfn, PROT_READ, 1065*7c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK); 1066*7c478bd9Sstevel@tonic-gate 1067*7c478bd9Sstevel@tonic-gate /* 1068*7c478bd9Sstevel@tonic-gate * Can't allow the memscrubber to migrate across CPUs as 1069*7c478bd9Sstevel@tonic-gate * we need to know whether CEEN is enabled for the current 1070*7c478bd9Sstevel@tonic-gate * CPU to enable us to scrub the memory. Don't use 1071*7c478bd9Sstevel@tonic-gate * kpreempt_disable as the time we take to scan a span (even 1072*7c478bd9Sstevel@tonic-gate * without cpu_check_ce having to manually cpu_check_block) 1073*7c478bd9Sstevel@tonic-gate * is too long to hold a higher priority thread (eg, RT) 1074*7c478bd9Sstevel@tonic-gate * off cpu. 1075*7c478bd9Sstevel@tonic-gate */ 1076*7c478bd9Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT); 1077*7c478bd9Sstevel@tonic-gate 1078*7c478bd9Sstevel@tonic-gate /* 1079*7c478bd9Sstevel@tonic-gate * Protect read scrub from async faults. For now, we simply 1080*7c478bd9Sstevel@tonic-gate * maintain a count of such faults caught. 1081*7c478bd9Sstevel@tonic-gate */ 1082*7c478bd9Sstevel@tonic-gate 1083*7c478bd9Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_EC)) { 1084*7c478bd9Sstevel@tonic-gate memscrub_read(va, bpp); 1085*7c478bd9Sstevel@tonic-gate no_trap(); 1086*7c478bd9Sstevel@tonic-gate /* 1087*7c478bd9Sstevel@tonic-gate * Check if CEs require logging 1088*7c478bd9Sstevel@tonic-gate */ 1089*7c478bd9Sstevel@tonic-gate cpu_check_ce(SCRUBBER_CEEN_CHECK, 1090*7c478bd9Sstevel@tonic-gate (uint64_t)pa, va, psz); 1091*7c478bd9Sstevel@tonic-gate thread_affinity_clear(curthread); 1092*7c478bd9Sstevel@tonic-gate } else { 1093*7c478bd9Sstevel@tonic-gate no_trap(); 1094*7c478bd9Sstevel@tonic-gate thread_affinity_clear(curthread); 1095*7c478bd9Sstevel@tonic-gate 1096*7c478bd9Sstevel@tonic-gate /* 1097*7c478bd9Sstevel@tonic-gate * Got an async error.. 1098*7c478bd9Sstevel@tonic-gate * Try rescanning it at MMU_PAGESIZE 1099*7c478bd9Sstevel@tonic-gate * granularity if we were trying to 1100*7c478bd9Sstevel@tonic-gate * read at a larger page size. 1101*7c478bd9Sstevel@tonic-gate * This is to ensure we continue to 1102*7c478bd9Sstevel@tonic-gate * scan the rest of the span. 1103*7c478bd9Sstevel@tonic-gate */ 1104*7c478bd9Sstevel@tonic-gate if (psz > MMU_PAGESIZE) { 1105*7c478bd9Sstevel@tonic-gate caddr_t vaddr = va; 1106*7c478bd9Sstevel@tonic-gate ms_paddr_t paddr = pa; 1107*7c478bd9Sstevel@tonic-gate int tmp = 0; 1108*7c478bd9Sstevel@tonic-gate for (; tmp < bpp; tmp += MEMSCRUB_BPP) { 1109*7c478bd9Sstevel@tonic-gate thread_affinity_set(curthread, CPU_CURRENT); 1110*7c478bd9Sstevel@tonic-gate if (!on_trap(&otd, OT_DATA_EC)) 1111*7c478bd9Sstevel@tonic-gate memscrub_read(vaddr, MEMSCRUB_BPP); 1112*7c478bd9Sstevel@tonic-gate else 1113*7c478bd9Sstevel@tonic-gate memscrub_counts.errors_found.value.ui32++; 1114*7c478bd9Sstevel@tonic-gate no_trap(); 1115*7c478bd9Sstevel@tonic-gate /* 1116*7c478bd9Sstevel@tonic-gate * Check if CEs require logging 1117*7c478bd9Sstevel@tonic-gate */ 1118*7c478bd9Sstevel@tonic-gate cpu_check_ce(SCRUBBER_CEEN_CHECK, 1119*7c478bd9Sstevel@tonic-gate (uint64_t)paddr, vaddr, MMU_PAGESIZE); 1120*7c478bd9Sstevel@tonic-gate thread_affinity_clear(curthread); 1121*7c478bd9Sstevel@tonic-gate vaddr += MMU_PAGESIZE; 1122*7c478bd9Sstevel@tonic-gate paddr += MMU_PAGESIZE; 1123*7c478bd9Sstevel@tonic-gate } 1124*7c478bd9Sstevel@tonic-gate } 1125*7c478bd9Sstevel@tonic-gate } 1126*7c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK); 1127*7c478bd9Sstevel@tonic-gate 1128*7c478bd9Sstevel@tonic-gate blks -= bpp; 1129*7c478bd9Sstevel@tonic-gate pa += psz; 1130*7c478bd9Sstevel@tonic-gate pgsread++; 1131*7c478bd9Sstevel@tonic-gate } 1132*7c478bd9Sstevel@tonic-gate if (memscrub_verbose) { 1133*7c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting " 1134*7c478bd9Sstevel@tonic-gate "at 0x%" PRIx64, pgsread, src); 1135*7c478bd9Sstevel@tonic-gate } 1136*7c478bd9Sstevel@tonic-gate } 1137*7c478bd9Sstevel@tonic-gate 1138*7c478bd9Sstevel@tonic-gate /* 1139*7c478bd9Sstevel@tonic-gate * The memory add/delete callback mechanism does not pass in the 1140*7c478bd9Sstevel@tonic-gate * page ranges. The phys_install list has been updated though, so 1141*7c478bd9Sstevel@tonic-gate * create a new scrub list from it. 1142*7c478bd9Sstevel@tonic-gate */ 1143*7c478bd9Sstevel@tonic-gate 1144*7c478bd9Sstevel@tonic-gate static int 1145*7c478bd9Sstevel@tonic-gate new_memscrub() 1146*7c478bd9Sstevel@tonic-gate { 1147*7c478bd9Sstevel@tonic-gate struct memlist *src, *list, *old_list; 1148*7c478bd9Sstevel@tonic-gate uint_t npgs; 1149*7c478bd9Sstevel@tonic-gate 1150*7c478bd9Sstevel@tonic-gate /* 1151*7c478bd9Sstevel@tonic-gate * copy phys_install to memscrub_memlist 1152*7c478bd9Sstevel@tonic-gate */ 1153*7c478bd9Sstevel@tonic-gate list = NULL; 1154*7c478bd9Sstevel@tonic-gate npgs = 0; 1155*7c478bd9Sstevel@tonic-gate memlist_read_lock(); 1156*7c478bd9Sstevel@tonic-gate for (src = phys_install; src; src = src->next) { 1157*7c478bd9Sstevel@tonic-gate if (memscrub_add_span_gen((pfn_t)(src->address >> PAGESHIFT), 1158*7c478bd9Sstevel@tonic-gate (pgcnt_t)(src->size >> PAGESHIFT), &list, &npgs)) { 1159*7c478bd9Sstevel@tonic-gate memlist_read_unlock(); 1160*7c478bd9Sstevel@tonic-gate while (list) { 1161*7c478bd9Sstevel@tonic-gate struct memlist *el; 1162*7c478bd9Sstevel@tonic-gate 1163*7c478bd9Sstevel@tonic-gate el = list; 1164*7c478bd9Sstevel@tonic-gate list = list->next; 1165*7c478bd9Sstevel@tonic-gate kmem_free(el, sizeof (struct memlist)); 1166*7c478bd9Sstevel@tonic-gate } 1167*7c478bd9Sstevel@tonic-gate return (-1); 1168*7c478bd9Sstevel@tonic-gate } 1169*7c478bd9Sstevel@tonic-gate } 1170*7c478bd9Sstevel@tonic-gate memlist_read_unlock(); 1171*7c478bd9Sstevel@tonic-gate 1172*7c478bd9Sstevel@tonic-gate mutex_enter(&memscrub_lock); 1173*7c478bd9Sstevel@tonic-gate memscrub_phys_pages = npgs; 1174*7c478bd9Sstevel@tonic-gate old_list = memscrub_memlist; 1175*7c478bd9Sstevel@tonic-gate memscrub_memlist = list; 1176*7c478bd9Sstevel@tonic-gate mutex_exit(&memscrub_lock); 1177*7c478bd9Sstevel@tonic-gate 1178*7c478bd9Sstevel@tonic-gate while (old_list) { 1179*7c478bd9Sstevel@tonic-gate struct memlist *el; 1180*7c478bd9Sstevel@tonic-gate 1181*7c478bd9Sstevel@tonic-gate el = old_list; 1182*7c478bd9Sstevel@tonic-gate old_list = old_list->next; 1183*7c478bd9Sstevel@tonic-gate kmem_free(el, sizeof (struct memlist)); 1184*7c478bd9Sstevel@tonic-gate } 1185*7c478bd9Sstevel@tonic-gate return (0); 1186*7c478bd9Sstevel@tonic-gate } 1187*7c478bd9Sstevel@tonic-gate 1188*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1189*7c478bd9Sstevel@tonic-gate static void 1190*7c478bd9Sstevel@tonic-gate memscrub_mem_config_post_add( 1191*7c478bd9Sstevel@tonic-gate void *arg, 1192*7c478bd9Sstevel@tonic-gate pgcnt_t delta_pages) 1193*7c478bd9Sstevel@tonic-gate { 1194*7c478bd9Sstevel@tonic-gate /* 1195*7c478bd9Sstevel@tonic-gate * We increment pause_memscrub before entering new_memscrub(). This 1196*7c478bd9Sstevel@tonic-gate * will force the memscrubber to sleep, allowing the DR callback 1197*7c478bd9Sstevel@tonic-gate * thread to acquire memscrub_lock in new_memscrub(). The use of 1198*7c478bd9Sstevel@tonic-gate * atomic_add_32() allows concurrent memory DR operations to use the 1199*7c478bd9Sstevel@tonic-gate * callbacks safely. 1200*7c478bd9Sstevel@tonic-gate */ 1201*7c478bd9Sstevel@tonic-gate atomic_add_32(&pause_memscrub, 1); 1202*7c478bd9Sstevel@tonic-gate ASSERT(pause_memscrub != 0); 1203*7c478bd9Sstevel@tonic-gate 1204*7c478bd9Sstevel@tonic-gate /* 1205*7c478bd9Sstevel@tonic-gate * "Don't care" if we are not scrubbing new memory. 1206*7c478bd9Sstevel@tonic-gate */ 1207*7c478bd9Sstevel@tonic-gate (void) new_memscrub(); 1208*7c478bd9Sstevel@tonic-gate 1209*7c478bd9Sstevel@tonic-gate /* Restore the pause setting. */ 1210*7c478bd9Sstevel@tonic-gate atomic_add_32(&pause_memscrub, -1); 1211*7c478bd9Sstevel@tonic-gate } 1212*7c478bd9Sstevel@tonic-gate 1213*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1214*7c478bd9Sstevel@tonic-gate static int 1215*7c478bd9Sstevel@tonic-gate memscrub_mem_config_pre_del( 1216*7c478bd9Sstevel@tonic-gate void *arg, 1217*7c478bd9Sstevel@tonic-gate pgcnt_t delta_pages) 1218*7c478bd9Sstevel@tonic-gate { 1219*7c478bd9Sstevel@tonic-gate /* Nothing to do. */ 1220*7c478bd9Sstevel@tonic-gate return (0); 1221*7c478bd9Sstevel@tonic-gate } 1222*7c478bd9Sstevel@tonic-gate 1223*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1224*7c478bd9Sstevel@tonic-gate static void 1225*7c478bd9Sstevel@tonic-gate memscrub_mem_config_post_del( 1226*7c478bd9Sstevel@tonic-gate void *arg, 1227*7c478bd9Sstevel@tonic-gate pgcnt_t delta_pages, 1228*7c478bd9Sstevel@tonic-gate int cancelled) 1229*7c478bd9Sstevel@tonic-gate { 1230*7c478bd9Sstevel@tonic-gate /* 1231*7c478bd9Sstevel@tonic-gate * We increment pause_memscrub before entering new_memscrub(). This 1232*7c478bd9Sstevel@tonic-gate * will force the memscrubber to sleep, allowing the DR callback 1233*7c478bd9Sstevel@tonic-gate * thread to acquire memscrub_lock in new_memscrub(). The use of 1234*7c478bd9Sstevel@tonic-gate * atomic_add_32() allows concurrent memory DR operations to use the 1235*7c478bd9Sstevel@tonic-gate * callbacks safely. 1236*7c478bd9Sstevel@tonic-gate */ 1237*7c478bd9Sstevel@tonic-gate atomic_add_32(&pause_memscrub, 1); 1238*7c478bd9Sstevel@tonic-gate ASSERT(pause_memscrub != 0); 1239*7c478bd9Sstevel@tonic-gate 1240*7c478bd9Sstevel@tonic-gate /* 1241*7c478bd9Sstevel@tonic-gate * Must stop scrubbing deleted memory as it may be disconnected. 1242*7c478bd9Sstevel@tonic-gate */ 1243*7c478bd9Sstevel@tonic-gate if (new_memscrub()) { 1244*7c478bd9Sstevel@tonic-gate disable_memscrub = 1; 1245*7c478bd9Sstevel@tonic-gate } 1246*7c478bd9Sstevel@tonic-gate 1247*7c478bd9Sstevel@tonic-gate /* Restore the pause setting. */ 1248*7c478bd9Sstevel@tonic-gate atomic_add_32(&pause_memscrub, -1); 1249*7c478bd9Sstevel@tonic-gate } 1250*7c478bd9Sstevel@tonic-gate 1251*7c478bd9Sstevel@tonic-gate static kphysm_setup_vector_t memscrub_mem_config_vec = { 1252*7c478bd9Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION, 1253*7c478bd9Sstevel@tonic-gate memscrub_mem_config_post_add, 1254*7c478bd9Sstevel@tonic-gate memscrub_mem_config_pre_del, 1255*7c478bd9Sstevel@tonic-gate memscrub_mem_config_post_del, 1256*7c478bd9Sstevel@tonic-gate }; 1257*7c478bd9Sstevel@tonic-gate 1258*7c478bd9Sstevel@tonic-gate static void 1259*7c478bd9Sstevel@tonic-gate memscrub_init_mem_config() 1260*7c478bd9Sstevel@tonic-gate { 1261*7c478bd9Sstevel@tonic-gate int ret; 1262*7c478bd9Sstevel@tonic-gate 1263*7c478bd9Sstevel@tonic-gate ret = kphysm_setup_func_register(&memscrub_mem_config_vec, 1264*7c478bd9Sstevel@tonic-gate (void *)NULL); 1265*7c478bd9Sstevel@tonic-gate ASSERT(ret == 0); 1266*7c478bd9Sstevel@tonic-gate } 1267*7c478bd9Sstevel@tonic-gate 1268*7c478bd9Sstevel@tonic-gate static void 1269*7c478bd9Sstevel@tonic-gate memscrub_uninit_mem_config() 1270*7c478bd9Sstevel@tonic-gate { 1271*7c478bd9Sstevel@tonic-gate /* This call is OK if the register call was not done. */ 1272*7c478bd9Sstevel@tonic-gate kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL); 1273*7c478bd9Sstevel@tonic-gate } 1274