1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
30 #include <sys/kmem.h>
31 #include <sys/vnode.h>
32 #include <sys/vfs_opreg.h>
33 #include <sys/swap.h>
34 #include <sys/sysmacros.h>
35 #include <sys/buf.h>
36 #include <sys/callb.h>
37 #include <sys/debug.h>
38 #include <vm/seg.h>
39 #include <sys/fs/swapnode.h>
40 #include <fs/fs_subr.h>
41 #include <sys/cmn_err.h>
42 #include <sys/mem_config.h>
43 #include <sys/atomic.h>
44
45 extern const fs_operation_def_t swap_vnodeops_template[];
46
47 /*
48 * swapfs_minfree is the amount of physical memory (actually remaining
49 * availrmem) that we want to keep free for the rest of the system. This
50 * means that swapfs can only grow to availrmem - swapfs_minfree. This
51 * can be set as just constant value or a certain percentage of installed
52 * physical memory. It is set in swapinit().
53 *
54 * Users who want to change the amount of memory that can be used as swap
55 * space should do so by setting swapfs_desfree at boot time,
56 * not swapfs_minfree.
57 */
58
59 pgcnt_t swapfs_desfree = 0;
60 pgcnt_t swapfs_minfree = 0;
61 pgcnt_t swapfs_reserve = 0;
62
63 #ifdef SWAPFS_DEBUG
64 int swapfs_debug;
65 #endif /* SWAPFS_DEBUG */
66
67
68 static int swapfs_vpcount;
69 static kmutex_t swapfs_lock;
70 static struct async_reqs *sw_ar, *sw_pendlist, *sw_freelist;
71
72 static struct vnode **swap_vnodes; /* ptr's to swap vnodes */
73
74 static void swap_init_mem_config(void);
75
76 static pgcnt_t initial_swapfs_desfree;
77 static pgcnt_t initial_swapfs_minfree;
78 static pgcnt_t initial_swapfs_reserve;
79
80 static int swap_sync(struct vfs *vfsp, short flag, struct cred *cr);
81
82 static void
swapfs_recalc_save_initial(void)83 swapfs_recalc_save_initial(void)
84 {
85 initial_swapfs_desfree = swapfs_desfree;
86 initial_swapfs_minfree = swapfs_minfree;
87 initial_swapfs_reserve = swapfs_reserve;
88 }
89
90 static int
swapfs_recalc(pgcnt_t pgs)91 swapfs_recalc(pgcnt_t pgs)
92 {
93 pgcnt_t new_swapfs_desfree;
94 pgcnt_t new_swapfs_minfree;
95 pgcnt_t new_swapfs_reserve;
96
97 new_swapfs_desfree = initial_swapfs_desfree;
98 new_swapfs_minfree = initial_swapfs_minfree;
99 new_swapfs_reserve = initial_swapfs_reserve;
100
101 if (new_swapfs_desfree == 0)
102 new_swapfs_desfree = btopr(7 * 512 * 1024); /* 3-1/2Mb */;
103
104 if (new_swapfs_minfree == 0) {
105 /*
106 * We set this lower than we'd like here, 2Mb, because we
107 * always boot on swapfs. It's up to a safer value,
108 * swapfs_desfree, when/if we add physical swap devices
109 * in swapadd(). Users who want to change the amount of
110 * memory that can be used as swap space should do so by
111 * setting swapfs_desfree at boot time, not swapfs_minfree.
112 * However, swapfs_minfree is tunable by install as a
113 * workaround for bugid 1147463.
114 */
115 new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3);
116 }
117
118 /*
119 * priv processes can reserve memory as swap as long as availrmem
120 * remains greater than swapfs_minfree; in the case of non-priv
121 * processes, memory can be reserved as swap only if availrmem
122 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
123 * swapfs_reserve amount of memswap is not available to non-priv
124 * processes. This protects daemons such as automounter dying
125 * as a result of application processes eating away almost entire
126 * membased swap. This safeguard becomes useless if apps are run
127 * with root access.
128 *
129 * set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever
130 * is greater up to the limit of 128 MB.
131 */
132 if (new_swapfs_reserve == 0)
133 new_swapfs_reserve = MIN(btopr(128 * 1024 * 1024),
134 MAX(btopr(4 * 1024 * 1024), pgs >> 7));
135
136 /* Test basic numeric viability. */
137 if (new_swapfs_minfree > pgs)
138 return (0);
139
140 /* Equivalent test to anon_resvmem() check. */
141 if (availrmem < new_swapfs_minfree) {
142 /*
143 * If ism pages are being used, then there must be agreement
144 * between these two policies.
145 */
146 if ((availrmem > segspt_minfree) && (segspt_minfree > 0)) {
147 new_swapfs_minfree = segspt_minfree;
148 } else {
149 return (0);
150 }
151 }
152
153 swapfs_desfree = new_swapfs_desfree;
154 swapfs_minfree = new_swapfs_minfree;
155 swapfs_reserve = new_swapfs_reserve;
156
157 return (1);
158 }
159
160 /*ARGSUSED1*/
161 int
swapinit(int fstype,char * name)162 swapinit(int fstype, char *name)
163 { /* reserve for mp */
164 ssize_t sw_freelist_size = klustsize / PAGESIZE * 2;
165 int i, error;
166
167 static const fs_operation_def_t swap_vfsops[] = {
168 VFSNAME_SYNC, { .vfs_sync = swap_sync },
169 NULL, NULL
170 };
171
172 SWAPFS_PRINT(SWAP_SUBR, "swapinit\n", 0, 0, 0, 0, 0);
173 mutex_init(&swapfs_lock, NULL, MUTEX_DEFAULT, NULL);
174
175 swap_vnodes = kmem_zalloc(MAX_SWAP_VNODES * sizeof (struct vnode *),
176 KM_SLEEP);
177
178 swapfs_recalc_save_initial();
179 if (!swapfs_recalc(physmem))
180 cmn_err(CE_PANIC, "swapfs_minfree(%lu) > physmem(%lu)",
181 swapfs_minfree, physmem);
182
183 /*
184 * Arrange for a callback on memory size change.
185 */
186 swap_init_mem_config();
187
188 sw_ar = (struct async_reqs *)
189 kmem_zalloc(sw_freelist_size*sizeof (struct async_reqs), KM_SLEEP);
190
191 error = vfs_setfsops(fstype, swap_vfsops, NULL);
192 if (error != 0) {
193 cmn_err(CE_WARN, "swapinit: bad vfs ops template");
194 return (error);
195 }
196
197 error = vn_make_ops(name, swap_vnodeops_template, &swap_vnodeops);
198 if (error != 0) {
199 (void) vfs_freevfsops_by_type(fstype);
200 cmn_err(CE_WARN, "swapinit: bad vnode ops template");
201 return (error);
202 }
203 sw_freelist = sw_ar;
204 for (i = 0; i < sw_freelist_size - 1; i++)
205 sw_ar[i].a_next = &sw_ar[i + 1];
206
207 return (0);
208 }
209
210 /*
211 * Get a swapfs vnode corresponding to the specified identifier.
212 */
213 struct vnode *
swapfs_getvp(ulong_t vidx)214 swapfs_getvp(ulong_t vidx)
215 {
216 struct vnode *vp;
217
218 vp = swap_vnodes[vidx];
219 if (vp) {
220 return (vp);
221 }
222
223 mutex_enter(&swapfs_lock);
224 vp = swap_vnodes[vidx];
225 if (vp == NULL) {
226 vp = vn_alloc(KM_SLEEP);
227 vn_setops(vp, swap_vnodeops);
228 vp->v_type = VREG;
229 vp->v_flag |= (VISSWAP|VISSWAPFS);
230 swap_vnodes[vidx] = vp;
231 swapfs_vpcount++;
232 }
233 mutex_exit(&swapfs_lock);
234 return (vp);
235 }
236
237 int swap_lo;
238
239 /*ARGSUSED*/
240 static int
swap_sync(struct vfs * vfsp,short flag,struct cred * cr)241 swap_sync(struct vfs *vfsp, short flag, struct cred *cr)
242 {
243 struct vnode *vp;
244 int i;
245
246 if (!(flag & SYNC_ALL))
247 return (1);
248
249 /*
250 * assumes that we are the only one left to access this so that
251 * no need to use swapfs_lock (since it's staticly defined)
252 */
253 for (i = 0; i < MAX_SWAP_VNODES; i++) {
254 vp = swap_vnodes[i];
255 if (vp) {
256 VN_HOLD(vp);
257 (void) VOP_PUTPAGE(vp, (offset_t)0, 0,
258 (B_ASYNC | B_FREE), kcred, NULL);
259 VN_RELE(vp);
260 }
261 }
262 return (0);
263 }
264
265 extern int sw_pending_size;
266
267 /*
268 * Take an async request off the pending queue
269 */
270 struct async_reqs *
sw_getreq()271 sw_getreq()
272 {
273 struct async_reqs *arg;
274
275 mutex_enter(&swapfs_lock);
276 arg = sw_pendlist;
277 if (arg) {
278 sw_pendlist = arg->a_next;
279 arg->a_next = NULL;
280 sw_pending_size -= PAGESIZE;
281 }
282 ASSERT(sw_pending_size >= 0);
283 mutex_exit(&swapfs_lock);
284 return (arg);
285 }
286
287 /*
288 * Put an async request on the pending queue
289 */
290 void
sw_putreq(struct async_reqs * arg)291 sw_putreq(struct async_reqs *arg)
292 {
293 /* Hold onto it */
294 VN_HOLD(arg->a_vp);
295
296 mutex_enter(&swapfs_lock);
297 arg->a_next = sw_pendlist;
298 sw_pendlist = arg;
299 sw_pending_size += PAGESIZE;
300 mutex_exit(&swapfs_lock);
301 }
302
303 /*
304 * Put an async request back on the pending queue
305 */
306 void
sw_putbackreq(struct async_reqs * arg)307 sw_putbackreq(struct async_reqs *arg)
308 {
309 mutex_enter(&swapfs_lock);
310 arg->a_next = sw_pendlist;
311 sw_pendlist = arg;
312 sw_pending_size += PAGESIZE;
313 mutex_exit(&swapfs_lock);
314 }
315
316 /*
317 * Take an async request structure off the free list
318 */
319 struct async_reqs *
sw_getfree()320 sw_getfree()
321 {
322 struct async_reqs *arg;
323
324 mutex_enter(&swapfs_lock);
325 arg = sw_freelist;
326 if (arg) {
327 sw_freelist = arg->a_next;
328 arg->a_next = NULL;
329 }
330 mutex_exit(&swapfs_lock);
331 return (arg);
332 }
333
334 /*
335 * Put an async request structure on the free list
336 */
337 void
sw_putfree(struct async_reqs * arg)338 sw_putfree(struct async_reqs *arg)
339 {
340 /* Release our hold - should have locked the page by now */
341 VN_RELE(arg->a_vp);
342
343 mutex_enter(&swapfs_lock);
344 arg->a_next = sw_freelist;
345 sw_freelist = arg;
346 mutex_exit(&swapfs_lock);
347 }
348
349 static pgcnt_t swapfs_pending_delete;
350
351 /*ARGSUSED*/
352 static void
swap_mem_config_post_add(void * arg,pgcnt_t delta_swaps)353 swap_mem_config_post_add(
354 void *arg,
355 pgcnt_t delta_swaps)
356 {
357 (void) swapfs_recalc(physmem - swapfs_pending_delete);
358 }
359
360 /*ARGSUSED*/
361 static int
swap_mem_config_pre_del(void * arg,pgcnt_t delta_swaps)362 swap_mem_config_pre_del(
363 void *arg,
364 pgcnt_t delta_swaps)
365 {
366 pgcnt_t nv;
367
368 nv = atomic_add_long_nv(&swapfs_pending_delete, (spgcnt_t)delta_swaps);
369 if (!swapfs_recalc(physmem - nv)) {
370 /*
371 * Tidy-up is done by the call to post_del which
372 * is always made.
373 */
374 cmn_err(CE_NOTE, "Memory operation refused to ensure system "
375 "doesn't deadlock due to excessive consumption by swapfs.");
376 return (EBUSY);
377 }
378 return (0);
379 }
380
381 /*ARGSUSED*/
382 static void
swap_mem_config_post_del(void * arg,pgcnt_t delta_swaps,int cancelled)383 swap_mem_config_post_del(
384 void *arg,
385 pgcnt_t delta_swaps,
386 int cancelled)
387 {
388 pgcnt_t nv;
389
390 nv = atomic_add_long_nv(&swapfs_pending_delete, -(spgcnt_t)delta_swaps);
391 (void) swapfs_recalc(physmem - nv);
392 }
393
394 static kphysm_setup_vector_t swap_mem_config_vec = {
395 KPHYSM_SETUP_VECTOR_VERSION,
396 swap_mem_config_post_add,
397 swap_mem_config_pre_del,
398 swap_mem_config_post_del,
399 };
400
401 static void
swap_init_mem_config(void)402 swap_init_mem_config(void)
403 {
404 int ret;
405
406 ret = kphysm_setup_func_register(&swap_mem_config_vec, (void *)NULL);
407 ASSERT(ret == 0);
408 }
409