1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/errno.h> 32 #include <sys/kmem.h> 33 #include <sys/vnode.h> 34 #include <sys/vfs_opreg.h> 35 #include <sys/swap.h> 36 #include <sys/sysmacros.h> 37 #include <sys/buf.h> 38 #include <sys/callb.h> 39 #include <sys/debug.h> 40 #include <vm/seg.h> 41 #include <sys/fs/swapnode.h> 42 #include <fs/fs_subr.h> 43 #include <sys/cmn_err.h> 44 #include <sys/mem_config.h> 45 #include <sys/atomic.h> 46 47 extern const fs_operation_def_t swap_vnodeops_template[]; 48 49 /* 50 * swapfs_minfree is the amount of physical memory (actually remaining 51 * availrmem) that we want to keep free for the rest of the system. This 52 * means that swapfs can only grow to availrmem - swapfs_minfree. This 53 * can be set as just constant value or a certain percentage of installed 54 * physical memory. It is set in swapinit(). 55 * 56 * Users who want to change the amount of memory that can be used as swap 57 * space should do so by setting swapfs_desfree at boot time, 58 * not swapfs_minfree. 59 */ 60 61 pgcnt_t swapfs_desfree = 0; 62 pgcnt_t swapfs_minfree = 0; 63 pgcnt_t swapfs_reserve = 0; 64 65 #ifdef SWAPFS_DEBUG 66 int swapfs_debug; 67 #endif /* SWAPFS_DEBUG */ 68 69 70 static int swapfs_vpcount; 71 static kmutex_t swapfs_lock; 72 static struct async_reqs *sw_ar, *sw_pendlist, *sw_freelist; 73 74 static struct vnode **swap_vnodes; /* ptr's to swap vnodes */ 75 76 static void swap_init_mem_config(void); 77 78 static pgcnt_t initial_swapfs_desfree; 79 static pgcnt_t initial_swapfs_minfree; 80 static pgcnt_t initial_swapfs_reserve; 81 82 static int swap_sync(struct vfs *vfsp, short flag, struct cred *cr); 83 84 static void 85 swapfs_recalc_save_initial(void) 86 { 87 initial_swapfs_desfree = swapfs_desfree; 88 initial_swapfs_minfree = swapfs_minfree; 89 initial_swapfs_reserve = swapfs_reserve; 90 } 91 92 static int 93 swapfs_recalc(pgcnt_t pgs) 94 { 95 pgcnt_t new_swapfs_desfree; 96 pgcnt_t new_swapfs_minfree; 97 pgcnt_t new_swapfs_reserve; 98 99 new_swapfs_desfree = initial_swapfs_desfree; 100 new_swapfs_minfree = initial_swapfs_minfree; 101 new_swapfs_reserve = initial_swapfs_reserve; 102 103 if (new_swapfs_desfree == 0) 104 new_swapfs_desfree = btopr(7 * 512 * 1024); /* 3-1/2Mb */; 105 106 if (new_swapfs_minfree == 0) { 107 /* 108 * We set this lower than we'd like here, 2Mb, because we 109 * always boot on swapfs. It's up to a safer value, 110 * swapfs_desfree, when/if we add physical swap devices 111 * in swapadd(). Users who want to change the amount of 112 * memory that can be used as swap space should do so by 113 * setting swapfs_desfree at boot time, not swapfs_minfree. 114 * However, swapfs_minfree is tunable by install as a 115 * workaround for bugid 1147463. 116 */ 117 new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3); 118 } 119 120 /* 121 * priv processes can reserve memory as swap as long as availrmem 122 * remains greater than swapfs_minfree; in the case of non-priv 123 * processes, memory can be reserved as swap only if availrmem 124 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus, 125 * swapfs_reserve amount of memswap is not available to non-priv 126 * processes. This protects daemons such as automounter dying 127 * as a result of application processes eating away almost entire 128 * membased swap. This safeguard becomes useless if apps are run 129 * with root access. 130 * 131 * set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever 132 * is greater up to the limit of 128 MB. 133 */ 134 if (new_swapfs_reserve == 0) 135 new_swapfs_reserve = MIN(btopr(128 * 1024 * 1024), 136 MAX(btopr(4 * 1024 * 1024), pgs >> 7)); 137 138 /* Test basic numeric viability. */ 139 if (new_swapfs_minfree > pgs) 140 return (0); 141 142 /* Equivalent test to anon_resvmem() check. */ 143 if (availrmem < new_swapfs_minfree) { 144 /* 145 * If ism pages are being used, then there must be agreement 146 * between these two policies. 147 */ 148 if ((availrmem > segspt_minfree) && (segspt_minfree > 0)) { 149 new_swapfs_minfree = segspt_minfree; 150 } else { 151 return (0); 152 } 153 } 154 155 swapfs_desfree = new_swapfs_desfree; 156 swapfs_minfree = new_swapfs_minfree; 157 swapfs_reserve = new_swapfs_reserve; 158 159 return (1); 160 } 161 162 /*ARGSUSED1*/ 163 int 164 swapinit(int fstype, char *name) 165 { /* reserve for mp */ 166 ssize_t sw_freelist_size = klustsize / PAGESIZE * 2; 167 int i, error; 168 169 static const fs_operation_def_t swap_vfsops[] = { 170 VFSNAME_SYNC, { .vfs_sync = swap_sync }, 171 NULL, NULL 172 }; 173 174 SWAPFS_PRINT(SWAP_SUBR, "swapinit\n", 0, 0, 0, 0, 0); 175 mutex_init(&swapfs_lock, NULL, MUTEX_DEFAULT, NULL); 176 177 swap_vnodes = kmem_zalloc(MAX_SWAP_VNODES * sizeof (struct vnode *), 178 KM_SLEEP); 179 180 swapfs_recalc_save_initial(); 181 if (!swapfs_recalc(physmem)) 182 cmn_err(CE_PANIC, "swapfs_minfree(%lu) > physmem(%lu)", 183 swapfs_minfree, physmem); 184 185 /* 186 * Arrange for a callback on memory size change. 187 */ 188 swap_init_mem_config(); 189 190 sw_ar = (struct async_reqs *) 191 kmem_zalloc(sw_freelist_size*sizeof (struct async_reqs), KM_SLEEP); 192 193 error = vfs_setfsops(fstype, swap_vfsops, NULL); 194 if (error != 0) { 195 cmn_err(CE_WARN, "swapinit: bad vfs ops template"); 196 return (error); 197 } 198 199 error = vn_make_ops(name, swap_vnodeops_template, &swap_vnodeops); 200 if (error != 0) { 201 (void) vfs_freevfsops_by_type(fstype); 202 cmn_err(CE_WARN, "swapinit: bad vnode ops template"); 203 return (error); 204 } 205 sw_freelist = sw_ar; 206 for (i = 0; i < sw_freelist_size - 1; i++) 207 sw_ar[i].a_next = &sw_ar[i + 1]; 208 209 return (0); 210 } 211 212 /* 213 * Get a swapfs vnode corresponding to the specified identifier. 214 */ 215 struct vnode * 216 swapfs_getvp(ulong_t vidx) 217 { 218 struct vnode *vp; 219 220 vp = swap_vnodes[vidx]; 221 if (vp) { 222 return (vp); 223 } 224 225 mutex_enter(&swapfs_lock); 226 vp = swap_vnodes[vidx]; 227 if (vp == NULL) { 228 vp = vn_alloc(KM_SLEEP); 229 vn_setops(vp, swap_vnodeops); 230 vp->v_type = VREG; 231 vp->v_flag |= (VISSWAP|VISSWAPFS); 232 swap_vnodes[vidx] = vp; 233 swapfs_vpcount++; 234 } 235 mutex_exit(&swapfs_lock); 236 return (vp); 237 } 238 239 int swap_lo; 240 241 /*ARGSUSED*/ 242 static int 243 swap_sync(struct vfs *vfsp, short flag, struct cred *cr) 244 { 245 struct vnode *vp; 246 int i; 247 248 if (!(flag & SYNC_ALL)) 249 return (1); 250 251 /* 252 * assumes that we are the only one left to access this so that 253 * no need to use swapfs_lock (since it's staticly defined) 254 */ 255 for (i = 0; i < MAX_SWAP_VNODES; i++) { 256 vp = swap_vnodes[i]; 257 if (vp) { 258 VN_HOLD(vp); 259 (void) VOP_PUTPAGE(vp, (offset_t)0, 0, 260 (B_ASYNC | B_FREE), kcred); 261 VN_RELE(vp); 262 } 263 } 264 return (0); 265 } 266 267 extern int sw_pending_size; 268 269 /* 270 * Take an async request off the pending queue 271 */ 272 struct async_reqs * 273 sw_getreq() 274 { 275 struct async_reqs *arg; 276 277 mutex_enter(&swapfs_lock); 278 arg = sw_pendlist; 279 if (arg) { 280 sw_pendlist = arg->a_next; 281 arg->a_next = NULL; 282 sw_pending_size -= PAGESIZE; 283 } 284 ASSERT(sw_pending_size >= 0); 285 mutex_exit(&swapfs_lock); 286 return (arg); 287 } 288 289 /* 290 * Put an async request on the pending queue 291 */ 292 void 293 sw_putreq(struct async_reqs *arg) 294 { 295 /* Hold onto it */ 296 VN_HOLD(arg->a_vp); 297 298 mutex_enter(&swapfs_lock); 299 arg->a_next = sw_pendlist; 300 sw_pendlist = arg; 301 sw_pending_size += PAGESIZE; 302 mutex_exit(&swapfs_lock); 303 } 304 305 /* 306 * Put an async request back on the pending queue 307 */ 308 void 309 sw_putbackreq(struct async_reqs *arg) 310 { 311 mutex_enter(&swapfs_lock); 312 arg->a_next = sw_pendlist; 313 sw_pendlist = arg; 314 sw_pending_size += PAGESIZE; 315 mutex_exit(&swapfs_lock); 316 } 317 318 /* 319 * Take an async request structure off the free list 320 */ 321 struct async_reqs * 322 sw_getfree() 323 { 324 struct async_reqs *arg; 325 326 mutex_enter(&swapfs_lock); 327 arg = sw_freelist; 328 if (arg) { 329 sw_freelist = arg->a_next; 330 arg->a_next = NULL; 331 } 332 mutex_exit(&swapfs_lock); 333 return (arg); 334 } 335 336 /* 337 * Put an async request structure on the free list 338 */ 339 void 340 sw_putfree(struct async_reqs *arg) 341 { 342 /* Release our hold - should have locked the page by now */ 343 VN_RELE(arg->a_vp); 344 345 mutex_enter(&swapfs_lock); 346 arg->a_next = sw_freelist; 347 sw_freelist = arg; 348 mutex_exit(&swapfs_lock); 349 } 350 351 static pgcnt_t swapfs_pending_delete; 352 353 /*ARGSUSED*/ 354 static void 355 swap_mem_config_post_add( 356 void *arg, 357 pgcnt_t delta_swaps) 358 { 359 (void) swapfs_recalc(physmem - swapfs_pending_delete); 360 } 361 362 /*ARGSUSED*/ 363 static int 364 swap_mem_config_pre_del( 365 void *arg, 366 pgcnt_t delta_swaps) 367 { 368 pgcnt_t nv; 369 370 nv = atomic_add_long_nv(&swapfs_pending_delete, (spgcnt_t)delta_swaps); 371 if (!swapfs_recalc(physmem - nv)) { 372 /* 373 * Tidy-up is done by the call to post_del which 374 * is always made. 375 */ 376 return (EBUSY); 377 } 378 return (0); 379 } 380 381 /*ARGSUSED*/ 382 static void 383 swap_mem_config_post_del( 384 void *arg, 385 pgcnt_t delta_swaps, 386 int cancelled) 387 { 388 pgcnt_t nv; 389 390 nv = atomic_add_long_nv(&swapfs_pending_delete, -(spgcnt_t)delta_swaps); 391 (void) swapfs_recalc(physmem - nv); 392 } 393 394 static kphysm_setup_vector_t swap_mem_config_vec = { 395 KPHYSM_SETUP_VECTOR_VERSION, 396 swap_mem_config_post_add, 397 swap_mem_config_pre_del, 398 swap_mem_config_post_del, 399 }; 400 401 static void 402 swap_init_mem_config(void) 403 { 404 int ret; 405 406 ret = kphysm_setup_func_register(&swap_mem_config_vec, (void *)NULL); 407 ASSERT(ret == 0); 408 } 409