xref: /freebsd/sys/vm/vm_pageout.c (revision a98ff317388a00b992f1bf8404dee596f9383f5e)
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  * Copyright (c) 2005 Yahoo! Technologies Norway AS
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * The Mach Operating System project at Carnegie-Mellon University.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
43  *
44  *
45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46  * All rights reserved.
47  *
48  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
49  *
50  * Permission to use, copy, modify and distribute this software and
51  * its documentation is hereby granted, provided that both the copyright
52  * notice and this permission notice appear in all copies of the
53  * software, derivative works or modified versions, and any portions
54  * thereof, and that both notices appear in supporting documentation.
55  *
56  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
57  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
58  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
59  *
60  * Carnegie Mellon requests users of this software to return to
61  *
62  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
63  *  School of Computer Science
64  *  Carnegie Mellon University
65  *  Pittsburgh PA 15213-3890
66  *
67  * any improvements or extensions that they make and grant Carnegie the
68  * rights to redistribute these changes.
69  */
70 
71 /*
72  *	The proverbial page-out daemon.
73  */
74 
75 #include <sys/cdefs.h>
76 __FBSDID("$FreeBSD$");
77 
78 #include "opt_vm.h"
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/kernel.h>
82 #include <sys/eventhandler.h>
83 #include <sys/lock.h>
84 #include <sys/mutex.h>
85 #include <sys/proc.h>
86 #include <sys/kthread.h>
87 #include <sys/ktr.h>
88 #include <sys/mount.h>
89 #include <sys/racct.h>
90 #include <sys/resourcevar.h>
91 #include <sys/sched.h>
92 #include <sys/signalvar.h>
93 #include <sys/vnode.h>
94 #include <sys/vmmeter.h>
95 #include <sys/rwlock.h>
96 #include <sys/sx.h>
97 #include <sys/sysctl.h>
98 
99 #include <vm/vm.h>
100 #include <vm/vm_param.h>
101 #include <vm/vm_object.h>
102 #include <vm/vm_page.h>
103 #include <vm/vm_map.h>
104 #include <vm/vm_pageout.h>
105 #include <vm/vm_pager.h>
106 #include <vm/swap_pager.h>
107 #include <vm/vm_extern.h>
108 #include <vm/uma.h>
109 
110 /*
111  * System initialization
112  */
113 
114 /* the kernel process "vm_pageout"*/
115 static void vm_pageout(void);
116 static int vm_pageout_clean(vm_page_t);
117 static void vm_pageout_scan(int pass);
118 
119 struct proc *pageproc;
120 
121 static struct kproc_desc page_kp = {
122 	"pagedaemon",
123 	vm_pageout,
124 	&pageproc
125 };
126 SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start,
127     &page_kp);
128 
129 #if !defined(NO_SWAPPING)
130 /* the kernel process "vm_daemon"*/
131 static void vm_daemon(void);
132 static struct	proc *vmproc;
133 
134 static struct kproc_desc vm_kp = {
135 	"vmdaemon",
136 	vm_daemon,
137 	&vmproc
138 };
139 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
140 #endif
141 
142 
143 int vm_pages_needed;		/* Event on which pageout daemon sleeps */
144 int vm_pageout_deficit;		/* Estimated number of pages deficit */
145 int vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
146 
147 #if !defined(NO_SWAPPING)
148 static int vm_pageout_req_swapout;	/* XXX */
149 static int vm_daemon_needed;
150 static struct mtx vm_daemon_mtx;
151 /* Allow for use by vm_pageout before vm_daemon is initialized. */
152 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
153 #endif
154 static int vm_max_launder = 32;
155 static int vm_pageout_stats_max;
156 static int vm_pageout_stats;
157 static int vm_pageout_stats_interval;
158 static int vm_pageout_full_stats;
159 static int vm_pageout_full_stats_interval;
160 static int defer_swap_pageouts;
161 static int disable_swap_pageouts;
162 
163 #if defined(NO_SWAPPING)
164 static int vm_swap_enabled = 0;
165 static int vm_swap_idle_enabled = 0;
166 #else
167 static int vm_swap_enabled = 1;
168 static int vm_swap_idle_enabled = 0;
169 #endif
170 
171 SYSCTL_INT(_vm, OID_AUTO, max_launder,
172 	CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
173 
174 SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max,
175 	CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length");
176 
177 SYSCTL_INT(_vm, OID_AUTO, pageout_stats,
178 	CTLFLAG_RD, &vm_pageout_stats, 0, "Number of partial stats scans");
179 
180 SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval,
181 	CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan");
182 
183 SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats,
184 	CTLFLAG_RD, &vm_pageout_full_stats, 0, "Number of full stats scans");
185 
186 SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval,
187 	CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan");
188 
189 #if defined(NO_SWAPPING)
190 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
191 	CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
192 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
193 	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
194 #else
195 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
196 	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
197 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
198 	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
199 #endif
200 
201 SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
202 	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
203 
204 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
205 	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
206 
207 static int pageout_lock_miss;
208 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
209 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
210 
211 #define VM_PAGEOUT_PAGE_COUNT 16
212 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
213 
214 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
215 SYSCTL_INT(_vm, OID_AUTO, max_wired,
216 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
217 
218 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
219 static boolean_t vm_pageout_launder(int, int, vm_paddr_t, vm_paddr_t);
220 #if !defined(NO_SWAPPING)
221 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
222 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
223 static void vm_req_vmdaemon(int req);
224 #endif
225 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
226 static void vm_pageout_page_stats(void);
227 
228 /*
229  * Initialize a dummy page for marking the caller's place in the specified
230  * paging queue.  In principle, this function only needs to set the flag
231  * PG_MARKER.  Nonetheless, it sets the flag VPO_BUSY and initializes the hold
232  * count to one as safety precautions.
233  */
234 static void
235 vm_pageout_init_marker(vm_page_t marker, u_short queue)
236 {
237 
238 	bzero(marker, sizeof(*marker));
239 	marker->flags = PG_MARKER;
240 	marker->oflags = VPO_BUSY;
241 	marker->queue = queue;
242 	marker->hold_count = 1;
243 }
244 
245 /*
246  * vm_pageout_fallback_object_lock:
247  *
248  * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
249  * known to have failed and page queue must be either PQ_ACTIVE or
250  * PQ_INACTIVE.  To avoid lock order violation, unlock the page queues
251  * while locking the vm object.  Use marker page to detect page queue
252  * changes and maintain notion of next page on page queue.  Return
253  * TRUE if no changes were detected, FALSE otherwise.  vm object is
254  * locked on return.
255  *
256  * This function depends on both the lock portion of struct vm_object
257  * and normal struct vm_page being type stable.
258  */
259 static boolean_t
260 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
261 {
262 	struct vm_page marker;
263 	struct vm_pagequeue *pq;
264 	boolean_t unchanged;
265 	u_short queue;
266 	vm_object_t object;
267 
268 	queue = m->queue;
269 	vm_pageout_init_marker(&marker, queue);
270 	pq = &vm_pagequeues[queue];
271 	object = m->object;
272 
273 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
274 	vm_pagequeue_unlock(pq);
275 	vm_page_unlock(m);
276 	VM_OBJECT_WLOCK(object);
277 	vm_page_lock(m);
278 	vm_pagequeue_lock(pq);
279 
280 	/* Page queue might have changed. */
281 	*next = TAILQ_NEXT(&marker, pageq);
282 	unchanged = (m->queue == queue &&
283 		     m->object == object &&
284 		     &marker == TAILQ_NEXT(m, pageq));
285 	TAILQ_REMOVE(&pq->pq_pl, &marker, pageq);
286 	return (unchanged);
287 }
288 
289 /*
290  * Lock the page while holding the page queue lock.  Use marker page
291  * to detect page queue changes and maintain notion of next page on
292  * page queue.  Return TRUE if no changes were detected, FALSE
293  * otherwise.  The page is locked on return. The page queue lock might
294  * be dropped and reacquired.
295  *
296  * This function depends on normal struct vm_page being type stable.
297  */
298 static boolean_t
299 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
300 {
301 	struct vm_page marker;
302 	struct vm_pagequeue *pq;
303 	boolean_t unchanged;
304 	u_short queue;
305 
306 	vm_page_lock_assert(m, MA_NOTOWNED);
307 	if (vm_page_trylock(m))
308 		return (TRUE);
309 
310 	queue = m->queue;
311 	vm_pageout_init_marker(&marker, queue);
312 	pq = &vm_pagequeues[queue];
313 
314 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
315 	vm_pagequeue_unlock(pq);
316 	vm_page_lock(m);
317 	vm_pagequeue_lock(pq);
318 
319 	/* Page queue might have changed. */
320 	*next = TAILQ_NEXT(&marker, pageq);
321 	unchanged = (m->queue == queue && &marker == TAILQ_NEXT(m, pageq));
322 	TAILQ_REMOVE(&pq->pq_pl, &marker, pageq);
323 	return (unchanged);
324 }
325 
326 /*
327  * vm_pageout_clean:
328  *
329  * Clean the page and remove it from the laundry.
330  *
331  * We set the busy bit to cause potential page faults on this page to
332  * block.  Note the careful timing, however, the busy bit isn't set till
333  * late and we cannot do anything that will mess with the page.
334  */
335 static int
336 vm_pageout_clean(vm_page_t m)
337 {
338 	vm_object_t object;
339 	vm_page_t mc[2*vm_pageout_page_count], pb, ps;
340 	int pageout_count;
341 	int ib, is, page_base;
342 	vm_pindex_t pindex = m->pindex;
343 
344 	vm_page_lock_assert(m, MA_OWNED);
345 	object = m->object;
346 	VM_OBJECT_ASSERT_WLOCKED(object);
347 
348 	/*
349 	 * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
350 	 * with the new swapper, but we could have serious problems paging
351 	 * out other object types if there is insufficient memory.
352 	 *
353 	 * Unfortunately, checking free memory here is far too late, so the
354 	 * check has been moved up a procedural level.
355 	 */
356 
357 	/*
358 	 * Can't clean the page if it's busy or held.
359 	 */
360 	KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0,
361 	    ("vm_pageout_clean: page %p is busy", m));
362 	KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m));
363 	vm_page_unlock(m);
364 
365 	mc[vm_pageout_page_count] = pb = ps = m;
366 	pageout_count = 1;
367 	page_base = vm_pageout_page_count;
368 	ib = 1;
369 	is = 1;
370 
371 	/*
372 	 * Scan object for clusterable pages.
373 	 *
374 	 * We can cluster ONLY if: ->> the page is NOT
375 	 * clean, wired, busy, held, or mapped into a
376 	 * buffer, and one of the following:
377 	 * 1) The page is inactive, or a seldom used
378 	 *    active page.
379 	 * -or-
380 	 * 2) we force the issue.
381 	 *
382 	 * During heavy mmap/modification loads the pageout
383 	 * daemon can really fragment the underlying file
384 	 * due to flushing pages out of order and not trying
385 	 * align the clusters (which leave sporatic out-of-order
386 	 * holes).  To solve this problem we do the reverse scan
387 	 * first and attempt to align our cluster, then do a
388 	 * forward scan if room remains.
389 	 */
390 more:
391 	while (ib && pageout_count < vm_pageout_page_count) {
392 		vm_page_t p;
393 
394 		if (ib > pindex) {
395 			ib = 0;
396 			break;
397 		}
398 
399 		if ((p = vm_page_prev(pb)) == NULL ||
400 		    (p->oflags & VPO_BUSY) != 0 || p->busy != 0) {
401 			ib = 0;
402 			break;
403 		}
404 		vm_page_lock(p);
405 		vm_page_test_dirty(p);
406 		if (p->dirty == 0 ||
407 		    p->queue != PQ_INACTIVE ||
408 		    p->hold_count != 0) {	/* may be undergoing I/O */
409 			vm_page_unlock(p);
410 			ib = 0;
411 			break;
412 		}
413 		vm_page_unlock(p);
414 		mc[--page_base] = pb = p;
415 		++pageout_count;
416 		++ib;
417 		/*
418 		 * alignment boundry, stop here and switch directions.  Do
419 		 * not clear ib.
420 		 */
421 		if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
422 			break;
423 	}
424 
425 	while (pageout_count < vm_pageout_page_count &&
426 	    pindex + is < object->size) {
427 		vm_page_t p;
428 
429 		if ((p = vm_page_next(ps)) == NULL ||
430 		    (p->oflags & VPO_BUSY) != 0 || p->busy != 0)
431 			break;
432 		vm_page_lock(p);
433 		vm_page_test_dirty(p);
434 		if (p->dirty == 0 ||
435 		    p->queue != PQ_INACTIVE ||
436 		    p->hold_count != 0) {	/* may be undergoing I/O */
437 			vm_page_unlock(p);
438 			break;
439 		}
440 		vm_page_unlock(p);
441 		mc[page_base + pageout_count] = ps = p;
442 		++pageout_count;
443 		++is;
444 	}
445 
446 	/*
447 	 * If we exhausted our forward scan, continue with the reverse scan
448 	 * when possible, even past a page boundry.  This catches boundry
449 	 * conditions.
450 	 */
451 	if (ib && pageout_count < vm_pageout_page_count)
452 		goto more;
453 
454 	/*
455 	 * we allow reads during pageouts...
456 	 */
457 	return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL,
458 	    NULL));
459 }
460 
461 /*
462  * vm_pageout_flush() - launder the given pages
463  *
464  *	The given pages are laundered.  Note that we setup for the start of
465  *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
466  *	reference count all in here rather then in the parent.  If we want
467  *	the parent to do more sophisticated things we may have to change
468  *	the ordering.
469  *
470  *	Returned runlen is the count of pages between mreq and first
471  *	page after mreq with status VM_PAGER_AGAIN.
472  *	*eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL
473  *	for any page in runlen set.
474  */
475 int
476 vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
477     boolean_t *eio)
478 {
479 	vm_object_t object = mc[0]->object;
480 	int pageout_status[count];
481 	int numpagedout = 0;
482 	int i, runlen;
483 
484 	VM_OBJECT_ASSERT_WLOCKED(object);
485 
486 	/*
487 	 * Initiate I/O.  Bump the vm_page_t->busy counter and
488 	 * mark the pages read-only.
489 	 *
490 	 * We do not have to fixup the clean/dirty bits here... we can
491 	 * allow the pager to do it after the I/O completes.
492 	 *
493 	 * NOTE! mc[i]->dirty may be partial or fragmented due to an
494 	 * edge case with file fragments.
495 	 */
496 	for (i = 0; i < count; i++) {
497 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
498 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
499 			mc[i], i, count));
500 		vm_page_io_start(mc[i]);
501 		pmap_remove_write(mc[i]);
502 	}
503 	vm_object_pip_add(object, count);
504 
505 	vm_pager_put_pages(object, mc, count, flags, pageout_status);
506 
507 	runlen = count - mreq;
508 	if (eio != NULL)
509 		*eio = FALSE;
510 	for (i = 0; i < count; i++) {
511 		vm_page_t mt = mc[i];
512 
513 		KASSERT(pageout_status[i] == VM_PAGER_PEND ||
514 		    !pmap_page_is_write_mapped(mt),
515 		    ("vm_pageout_flush: page %p is not write protected", mt));
516 		switch (pageout_status[i]) {
517 		case VM_PAGER_OK:
518 		case VM_PAGER_PEND:
519 			numpagedout++;
520 			break;
521 		case VM_PAGER_BAD:
522 			/*
523 			 * Page outside of range of object. Right now we
524 			 * essentially lose the changes by pretending it
525 			 * worked.
526 			 */
527 			vm_page_undirty(mt);
528 			break;
529 		case VM_PAGER_ERROR:
530 		case VM_PAGER_FAIL:
531 			/*
532 			 * If page couldn't be paged out, then reactivate the
533 			 * page so it doesn't clog the inactive list.  (We
534 			 * will try paging out it again later).
535 			 */
536 			vm_page_lock(mt);
537 			vm_page_activate(mt);
538 			vm_page_unlock(mt);
539 			if (eio != NULL && i >= mreq && i - mreq < runlen)
540 				*eio = TRUE;
541 			break;
542 		case VM_PAGER_AGAIN:
543 			if (i >= mreq && i - mreq < runlen)
544 				runlen = i - mreq;
545 			break;
546 		}
547 
548 		/*
549 		 * If the operation is still going, leave the page busy to
550 		 * block all other accesses. Also, leave the paging in
551 		 * progress indicator set so that we don't attempt an object
552 		 * collapse.
553 		 */
554 		if (pageout_status[i] != VM_PAGER_PEND) {
555 			vm_object_pip_wakeup(object);
556 			vm_page_io_finish(mt);
557 			if (vm_page_count_severe()) {
558 				vm_page_lock(mt);
559 				vm_page_try_to_cache(mt);
560 				vm_page_unlock(mt);
561 			}
562 		}
563 	}
564 	if (prunlen != NULL)
565 		*prunlen = runlen;
566 	return (numpagedout);
567 }
568 
569 static boolean_t
570 vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high)
571 {
572 	struct mount *mp;
573 	struct vm_pagequeue *pq;
574 	struct vnode *vp;
575 	vm_object_t object;
576 	vm_paddr_t pa;
577 	vm_page_t m, m_tmp, next;
578 
579 	pq = &vm_pagequeues[queue];
580 	vm_pagequeue_lock(pq);
581 	TAILQ_FOREACH_SAFE(m, &pq->pq_pl, pageq, next) {
582 		KASSERT(m->queue == queue,
583 		    ("vm_pageout_launder: page %p's queue is not %d", m,
584 		    queue));
585 		if ((m->flags & PG_MARKER) != 0)
586 			continue;
587 		pa = VM_PAGE_TO_PHYS(m);
588 		if (pa < low || pa + PAGE_SIZE > high)
589 			continue;
590 		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
591 			vm_page_unlock(m);
592 			continue;
593 		}
594 		object = m->object;
595 		if ((!VM_OBJECT_TRYWLOCK(object) &&
596 		    (!vm_pageout_fallback_object_lock(m, &next) ||
597 		    m->hold_count != 0)) || (m->oflags & VPO_BUSY) != 0 ||
598 		    m->busy != 0) {
599 			vm_page_unlock(m);
600 			VM_OBJECT_WUNLOCK(object);
601 			continue;
602 		}
603 		vm_page_test_dirty(m);
604 		if (m->dirty == 0 && object->ref_count != 0)
605 			pmap_remove_all(m);
606 		if (m->dirty != 0) {
607 			vm_page_unlock(m);
608 			if (tries == 0 || (object->flags & OBJ_DEAD) != 0) {
609 				VM_OBJECT_WUNLOCK(object);
610 				continue;
611 			}
612 			if (object->type == OBJT_VNODE) {
613 				vm_pagequeue_unlock(pq);
614 				vp = object->handle;
615 				vm_object_reference_locked(object);
616 				VM_OBJECT_WUNLOCK(object);
617 				(void)vn_start_write(vp, &mp, V_WAIT);
618 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
619 				VM_OBJECT_WLOCK(object);
620 				vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
621 				VM_OBJECT_WUNLOCK(object);
622 				VOP_UNLOCK(vp, 0);
623 				vm_object_deallocate(object);
624 				vn_finished_write(mp);
625 				return (TRUE);
626 			} else if (object->type == OBJT_SWAP ||
627 			    object->type == OBJT_DEFAULT) {
628 				vm_pagequeue_unlock(pq);
629 				m_tmp = m;
630 				vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC,
631 				    0, NULL, NULL);
632 				VM_OBJECT_WUNLOCK(object);
633 				return (TRUE);
634 			}
635 		} else {
636 			/*
637 			 * Dequeue here to prevent lock recursion in
638 			 * vm_page_cache().
639 			 */
640 			vm_page_dequeue_locked(m);
641 			vm_page_cache(m);
642 			vm_page_unlock(m);
643 		}
644 		VM_OBJECT_WUNLOCK(object);
645 	}
646 	vm_pagequeue_unlock(pq);
647 	return (FALSE);
648 }
649 
650 /*
651  * Increase the number of cached pages.  The specified value, "tries",
652  * determines which categories of pages are cached:
653  *
654  *  0: All clean, inactive pages within the specified physical address range
655  *     are cached.  Will not sleep.
656  *  1: The vm_lowmem handlers are called.  All inactive pages within
657  *     the specified physical address range are cached.  May sleep.
658  *  2: The vm_lowmem handlers are called.  All inactive and active pages
659  *     within the specified physical address range are cached.  May sleep.
660  */
661 void
662 vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
663 {
664 	int actl, actmax, inactl, inactmax;
665 
666 	if (tries > 0) {
667 		/*
668 		 * Decrease registered cache sizes.  The vm_lowmem handlers
669 		 * may acquire locks and/or sleep, so they can only be invoked
670 		 * when "tries" is greater than zero.
671 		 */
672 		EVENTHANDLER_INVOKE(vm_lowmem, 0);
673 
674 		/*
675 		 * We do this explicitly after the caches have been drained
676 		 * above.
677 		 */
678 		uma_reclaim();
679 	}
680 	inactl = 0;
681 	inactmax = cnt.v_inactive_count;
682 	actl = 0;
683 	actmax = tries < 2 ? 0 : cnt.v_active_count;
684 again:
685 	if (inactl < inactmax && vm_pageout_launder(PQ_INACTIVE, tries, low,
686 	    high)) {
687 		inactl++;
688 		goto again;
689 	}
690 	if (actl < actmax && vm_pageout_launder(PQ_ACTIVE, tries, low, high)) {
691 		actl++;
692 		goto again;
693 	}
694 }
695 
696 #if !defined(NO_SWAPPING)
697 /*
698  *	vm_pageout_object_deactivate_pages
699  *
700  *	Deactivate enough pages to satisfy the inactive target
701  *	requirements.
702  *
703  *	The object and map must be locked.
704  */
705 static void
706 vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
707     long desired)
708 {
709 	vm_object_t backing_object, object;
710 	vm_page_t p;
711 	int act_delta, remove_mode;
712 
713 	VM_OBJECT_ASSERT_LOCKED(first_object);
714 	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
715 		return;
716 	for (object = first_object;; object = backing_object) {
717 		if (pmap_resident_count(pmap) <= desired)
718 			goto unlock_return;
719 		VM_OBJECT_ASSERT_LOCKED(object);
720 		if ((object->flags & OBJ_UNMANAGED) != 0 ||
721 		    object->paging_in_progress != 0)
722 			goto unlock_return;
723 
724 		remove_mode = 0;
725 		if (object->shadow_count > 1)
726 			remove_mode = 1;
727 		/*
728 		 * Scan the object's entire memory queue.
729 		 */
730 		TAILQ_FOREACH(p, &object->memq, listq) {
731 			if (pmap_resident_count(pmap) <= desired)
732 				goto unlock_return;
733 			if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0)
734 				continue;
735 			PCPU_INC(cnt.v_pdpages);
736 			vm_page_lock(p);
737 			if (p->wire_count != 0 || p->hold_count != 0 ||
738 			    !pmap_page_exists_quick(pmap, p)) {
739 				vm_page_unlock(p);
740 				continue;
741 			}
742 			act_delta = pmap_ts_referenced(p);
743 			if ((p->aflags & PGA_REFERENCED) != 0) {
744 				if (act_delta == 0)
745 					act_delta = 1;
746 				vm_page_aflag_clear(p, PGA_REFERENCED);
747 			}
748 			if (p->queue != PQ_ACTIVE && act_delta != 0) {
749 				vm_page_activate(p);
750 				p->act_count += act_delta;
751 			} else if (p->queue == PQ_ACTIVE) {
752 				if (act_delta == 0) {
753 					p->act_count -= min(p->act_count,
754 					    ACT_DECLINE);
755 					if (!remove_mode && p->act_count == 0) {
756 						pmap_remove_all(p);
757 						vm_page_deactivate(p);
758 					} else
759 						vm_page_requeue(p);
760 				} else {
761 					vm_page_activate(p);
762 					if (p->act_count < ACT_MAX -
763 					    ACT_ADVANCE)
764 						p->act_count += ACT_ADVANCE;
765 					vm_page_requeue(p);
766 				}
767 			} else if (p->queue == PQ_INACTIVE)
768 				pmap_remove_all(p);
769 			vm_page_unlock(p);
770 		}
771 		if ((backing_object = object->backing_object) == NULL)
772 			goto unlock_return;
773 		VM_OBJECT_RLOCK(backing_object);
774 		if (object != first_object)
775 			VM_OBJECT_RUNLOCK(object);
776 	}
777 unlock_return:
778 	if (object != first_object)
779 		VM_OBJECT_RUNLOCK(object);
780 }
781 
782 /*
783  * deactivate some number of pages in a map, try to do it fairly, but
784  * that is really hard to do.
785  */
786 static void
787 vm_pageout_map_deactivate_pages(map, desired)
788 	vm_map_t map;
789 	long desired;
790 {
791 	vm_map_entry_t tmpe;
792 	vm_object_t obj, bigobj;
793 	int nothingwired;
794 
795 	if (!vm_map_trylock(map))
796 		return;
797 
798 	bigobj = NULL;
799 	nothingwired = TRUE;
800 
801 	/*
802 	 * first, search out the biggest object, and try to free pages from
803 	 * that.
804 	 */
805 	tmpe = map->header.next;
806 	while (tmpe != &map->header) {
807 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
808 			obj = tmpe->object.vm_object;
809 			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
810 				if (obj->shadow_count <= 1 &&
811 				    (bigobj == NULL ||
812 				     bigobj->resident_page_count < obj->resident_page_count)) {
813 					if (bigobj != NULL)
814 						VM_OBJECT_RUNLOCK(bigobj);
815 					bigobj = obj;
816 				} else
817 					VM_OBJECT_RUNLOCK(obj);
818 			}
819 		}
820 		if (tmpe->wired_count > 0)
821 			nothingwired = FALSE;
822 		tmpe = tmpe->next;
823 	}
824 
825 	if (bigobj != NULL) {
826 		vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired);
827 		VM_OBJECT_RUNLOCK(bigobj);
828 	}
829 	/*
830 	 * Next, hunt around for other pages to deactivate.  We actually
831 	 * do this search sort of wrong -- .text first is not the best idea.
832 	 */
833 	tmpe = map->header.next;
834 	while (tmpe != &map->header) {
835 		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
836 			break;
837 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
838 			obj = tmpe->object.vm_object;
839 			if (obj != NULL) {
840 				VM_OBJECT_RLOCK(obj);
841 				vm_pageout_object_deactivate_pages(map->pmap, obj, desired);
842 				VM_OBJECT_RUNLOCK(obj);
843 			}
844 		}
845 		tmpe = tmpe->next;
846 	}
847 
848 	/*
849 	 * Remove all mappings if a process is swapped out, this will free page
850 	 * table pages.
851 	 */
852 	if (desired == 0 && nothingwired) {
853 		pmap_remove(vm_map_pmap(map), vm_map_min(map),
854 		    vm_map_max(map));
855 	}
856 	vm_map_unlock(map);
857 }
858 #endif		/* !defined(NO_SWAPPING) */
859 
860 /*
861  *	vm_pageout_scan does the dirty work for the pageout daemon.
862  */
863 static void
864 vm_pageout_scan(int pass)
865 {
866 	vm_page_t m, next;
867 	struct vm_page marker;
868 	struct vm_pagequeue *pq;
869 	int page_shortage, maxscan, pcount;
870 	int addl_page_shortage;
871 	vm_object_t object;
872 	int act_delta;
873 	int vnodes_skipped = 0;
874 	int maxlaunder;
875 	boolean_t queues_locked;
876 
877 	vm_pageout_init_marker(&marker, PQ_INACTIVE);
878 
879 	/*
880 	 * Decrease registered cache sizes.
881 	 */
882 	EVENTHANDLER_INVOKE(vm_lowmem, 0);
883 	/*
884 	 * We do this explicitly after the caches have been drained above.
885 	 */
886 	uma_reclaim();
887 
888 	/*
889 	 * The addl_page_shortage is the number of temporarily
890 	 * stuck pages in the inactive queue.  In other words, the
891 	 * number of pages from cnt.v_inactive_count that should be
892 	 * discounted in setting the target for the active queue scan.
893 	 */
894 	addl_page_shortage = atomic_readandclear_int(&vm_pageout_deficit);
895 
896 	/*
897 	 * Calculate the number of pages we want to either free or move
898 	 * to the cache.
899 	 */
900 	page_shortage = vm_paging_target() + addl_page_shortage;
901 
902 	/*
903 	 * maxlaunder limits the number of dirty pages we flush per scan.
904 	 * For most systems a smaller value (16 or 32) is more robust under
905 	 * extreme memory and disk pressure because any unnecessary writes
906 	 * to disk can result in extreme performance degredation.  However,
907 	 * systems with excessive dirty pages (especially when MAP_NOSYNC is
908 	 * used) will die horribly with limited laundering.  If the pageout
909 	 * daemon cannot clean enough pages in the first pass, we let it go
910 	 * all out in succeeding passes.
911 	 */
912 	if ((maxlaunder = vm_max_launder) <= 1)
913 		maxlaunder = 1;
914 	if (pass)
915 		maxlaunder = 10000;
916 
917 	maxscan = cnt.v_inactive_count;
918 
919 	/*
920 	 * Start scanning the inactive queue for pages we can move to the
921 	 * cache or free.  The scan will stop when the target is reached or
922 	 * we have scanned the entire inactive queue.  Note that m->act_count
923 	 * is not used to form decisions for the inactive queue, only for the
924 	 * active queue.
925 	 */
926 	pq = &vm_pagequeues[PQ_INACTIVE];
927 	vm_pagequeue_lock(pq);
928 	queues_locked = TRUE;
929 	for (m = TAILQ_FIRST(&pq->pq_pl);
930 	     m != NULL && maxscan-- > 0 && page_shortage > 0;
931 	     m = next) {
932 		vm_pagequeue_assert_locked(pq);
933 		KASSERT(queues_locked, ("unlocked queues"));
934 		KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
935 
936 		PCPU_INC(cnt.v_pdpages);
937 		next = TAILQ_NEXT(m, pageq);
938 
939 		/*
940 		 * skip marker pages
941 		 */
942 		if (m->flags & PG_MARKER)
943 			continue;
944 
945 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
946 		    ("Fictitious page %p cannot be in inactive queue", m));
947 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
948 		    ("Unmanaged page %p cannot be in inactive queue", m));
949 
950 		/*
951 		 * The page or object lock acquisitions fail if the
952 		 * page was removed from the queue or moved to a
953 		 * different position within the queue.  In either
954 		 * case, addl_page_shortage should not be incremented.
955 		 */
956 		if (!vm_pageout_page_lock(m, &next)) {
957 			vm_page_unlock(m);
958 			continue;
959 		}
960 		object = m->object;
961 		if (!VM_OBJECT_TRYWLOCK(object) &&
962 		    !vm_pageout_fallback_object_lock(m, &next)) {
963 			vm_page_unlock(m);
964 			VM_OBJECT_WUNLOCK(object);
965 			continue;
966 		}
967 
968 		/*
969 		 * Don't mess with busy pages, keep them at at the
970 		 * front of the queue, most likely they are being
971 		 * paged out.  Increment addl_page_shortage for busy
972 		 * pages, because they may leave the inactive queue
973 		 * shortly after page scan is finished.
974 		 */
975 		if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) {
976 			vm_page_unlock(m);
977 			VM_OBJECT_WUNLOCK(object);
978 			addl_page_shortage++;
979 			continue;
980 		}
981 
982 		/*
983 		 * We unlock the inactive page queue, invalidating the
984 		 * 'next' pointer.  Use our marker to remember our
985 		 * place.
986 		 */
987 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
988 		vm_pagequeue_unlock(pq);
989 		queues_locked = FALSE;
990 
991 		/*
992 		 * We bump the activation count if the page has been
993 		 * referenced while in the inactive queue.  This makes
994 		 * it less likely that the page will be added back to the
995 		 * inactive queue prematurely again.  Here we check the
996 		 * page tables (or emulated bits, if any), given the upper
997 		 * level VM system not knowing anything about existing
998 		 * references.
999 		 */
1000 		act_delta = 0;
1001 		if ((m->aflags & PGA_REFERENCED) != 0) {
1002 			vm_page_aflag_clear(m, PGA_REFERENCED);
1003 			act_delta = 1;
1004 		}
1005 		if (object->ref_count != 0) {
1006 			act_delta += pmap_ts_referenced(m);
1007 		} else {
1008 			KASSERT(!pmap_page_is_mapped(m),
1009 			    ("vm_pageout_scan: page %p is mapped", m));
1010 		}
1011 
1012 		/*
1013 		 * If the upper level VM system knows about any page
1014 		 * references, we reactivate the page or requeue it.
1015 		 */
1016 		if (act_delta != 0) {
1017 			if (object->ref_count) {
1018 				vm_page_activate(m);
1019 				m->act_count += act_delta + ACT_ADVANCE;
1020 			} else {
1021 				vm_pagequeue_lock(pq);
1022 				queues_locked = TRUE;
1023 				vm_page_requeue_locked(m);
1024 			}
1025 			VM_OBJECT_WUNLOCK(object);
1026 			vm_page_unlock(m);
1027 			goto relock_queues;
1028 		}
1029 
1030 		if (m->hold_count != 0) {
1031 			vm_page_unlock(m);
1032 			VM_OBJECT_WUNLOCK(object);
1033 
1034 			/*
1035 			 * Held pages are essentially stuck in the
1036 			 * queue.  So, they ought to be discounted
1037 			 * from cnt.v_inactive_count.  See the
1038 			 * calculation of the page_shortage for the
1039 			 * loop over the active queue below.
1040 			 */
1041 			addl_page_shortage++;
1042 			goto relock_queues;
1043 		}
1044 
1045 		/*
1046 		 * If the page appears to be clean at the machine-independent
1047 		 * layer, then remove all of its mappings from the pmap in
1048 		 * anticipation of placing it onto the cache queue.  If,
1049 		 * however, any of the page's mappings allow write access,
1050 		 * then the page may still be modified until the last of those
1051 		 * mappings are removed.
1052 		 */
1053 		vm_page_test_dirty(m);
1054 		if (m->dirty == 0 && object->ref_count != 0)
1055 			pmap_remove_all(m);
1056 
1057 		if (m->valid == 0) {
1058 			/*
1059 			 * Invalid pages can be easily freed
1060 			 */
1061 			vm_page_free(m);
1062 			PCPU_INC(cnt.v_dfree);
1063 			--page_shortage;
1064 		} else if (m->dirty == 0) {
1065 			/*
1066 			 * Clean pages can be placed onto the cache queue.
1067 			 * This effectively frees them.
1068 			 */
1069 			vm_page_cache(m);
1070 			--page_shortage;
1071 		} else if ((m->flags & PG_WINATCFLS) == 0 && pass == 0) {
1072 			/*
1073 			 * Dirty pages need to be paged out, but flushing
1074 			 * a page is extremely expensive verses freeing
1075 			 * a clean page.  Rather then artificially limiting
1076 			 * the number of pages we can flush, we instead give
1077 			 * dirty pages extra priority on the inactive queue
1078 			 * by forcing them to be cycled through the queue
1079 			 * twice before being flushed, after which the
1080 			 * (now clean) page will cycle through once more
1081 			 * before being freed.  This significantly extends
1082 			 * the thrash point for a heavily loaded machine.
1083 			 */
1084 			m->flags |= PG_WINATCFLS;
1085 			vm_pagequeue_lock(pq);
1086 			queues_locked = TRUE;
1087 			vm_page_requeue_locked(m);
1088 		} else if (maxlaunder > 0) {
1089 			/*
1090 			 * We always want to try to flush some dirty pages if
1091 			 * we encounter them, to keep the system stable.
1092 			 * Normally this number is small, but under extreme
1093 			 * pressure where there are insufficient clean pages
1094 			 * on the inactive queue, we may have to go all out.
1095 			 */
1096 			int swap_pageouts_ok;
1097 			struct vnode *vp = NULL;
1098 			struct mount *mp = NULL;
1099 
1100 			if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
1101 				swap_pageouts_ok = 1;
1102 			} else {
1103 				swap_pageouts_ok = !(defer_swap_pageouts || disable_swap_pageouts);
1104 				swap_pageouts_ok |= (!disable_swap_pageouts && defer_swap_pageouts &&
1105 				vm_page_count_min());
1106 
1107 			}
1108 
1109 			/*
1110 			 * We don't bother paging objects that are "dead".
1111 			 * Those objects are in a "rundown" state.
1112 			 */
1113 			if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
1114 				vm_pagequeue_lock(pq);
1115 				vm_page_unlock(m);
1116 				VM_OBJECT_WUNLOCK(object);
1117 				queues_locked = TRUE;
1118 				vm_page_requeue_locked(m);
1119 				goto relock_queues;
1120 			}
1121 
1122 			/*
1123 			 * The object is already known NOT to be dead.   It
1124 			 * is possible for the vget() to block the whole
1125 			 * pageout daemon, but the new low-memory handling
1126 			 * code should prevent it.
1127 			 *
1128 			 * The previous code skipped locked vnodes and, worse,
1129 			 * reordered pages in the queue.  This results in
1130 			 * completely non-deterministic operation and, on a
1131 			 * busy system, can lead to extremely non-optimal
1132 			 * pageouts.  For example, it can cause clean pages
1133 			 * to be freed and dirty pages to be moved to the end
1134 			 * of the queue.  Since dirty pages are also moved to
1135 			 * the end of the queue once-cleaned, this gives
1136 			 * way too large a weighting to defering the freeing
1137 			 * of dirty pages.
1138 			 *
1139 			 * We can't wait forever for the vnode lock, we might
1140 			 * deadlock due to a vn_read() getting stuck in
1141 			 * vm_wait while holding this vnode.  We skip the
1142 			 * vnode if we can't get it in a reasonable amount
1143 			 * of time.
1144 			 */
1145 			if (object->type == OBJT_VNODE) {
1146 				vm_page_unlock(m);
1147 				vp = object->handle;
1148 				if (vp->v_type == VREG &&
1149 				    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
1150 					mp = NULL;
1151 					++pageout_lock_miss;
1152 					if (object->flags & OBJ_MIGHTBEDIRTY)
1153 						vnodes_skipped++;
1154 					goto unlock_and_continue;
1155 				}
1156 				KASSERT(mp != NULL,
1157 				    ("vp %p with NULL v_mount", vp));
1158 				vm_object_reference_locked(object);
1159 				VM_OBJECT_WUNLOCK(object);
1160 				if (vget(vp, LK_EXCLUSIVE | LK_TIMELOCK,
1161 				    curthread)) {
1162 					VM_OBJECT_WLOCK(object);
1163 					++pageout_lock_miss;
1164 					if (object->flags & OBJ_MIGHTBEDIRTY)
1165 						vnodes_skipped++;
1166 					vp = NULL;
1167 					goto unlock_and_continue;
1168 				}
1169 				VM_OBJECT_WLOCK(object);
1170 				vm_page_lock(m);
1171 				vm_pagequeue_lock(pq);
1172 				queues_locked = TRUE;
1173 				/*
1174 				 * The page might have been moved to another
1175 				 * queue during potential blocking in vget()
1176 				 * above.  The page might have been freed and
1177 				 * reused for another vnode.
1178 				 */
1179 				if (m->queue != PQ_INACTIVE ||
1180 				    m->object != object ||
1181 				    TAILQ_NEXT(m, pageq) != &marker) {
1182 					vm_page_unlock(m);
1183 					if (object->flags & OBJ_MIGHTBEDIRTY)
1184 						vnodes_skipped++;
1185 					goto unlock_and_continue;
1186 				}
1187 
1188 				/*
1189 				 * The page may have been busied during the
1190 				 * blocking in vget().  We don't move the
1191 				 * page back onto the end of the queue so that
1192 				 * statistics are more correct if we don't.
1193 				 */
1194 				if (m->busy || (m->oflags & VPO_BUSY)) {
1195 					vm_page_unlock(m);
1196 					goto unlock_and_continue;
1197 				}
1198 
1199 				/*
1200 				 * If the page has become held it might
1201 				 * be undergoing I/O, so skip it
1202 				 */
1203 				if (m->hold_count) {
1204 					vm_page_unlock(m);
1205 					vm_page_requeue_locked(m);
1206 					if (object->flags & OBJ_MIGHTBEDIRTY)
1207 						vnodes_skipped++;
1208 					goto unlock_and_continue;
1209 				}
1210 				vm_pagequeue_unlock(pq);
1211 				queues_locked = FALSE;
1212 			}
1213 
1214 			/*
1215 			 * If a page is dirty, then it is either being washed
1216 			 * (but not yet cleaned) or it is still in the
1217 			 * laundry.  If it is still in the laundry, then we
1218 			 * start the cleaning operation.
1219 			 *
1220 			 * decrement page_shortage on success to account for
1221 			 * the (future) cleaned page.  Otherwise we could wind
1222 			 * up laundering or cleaning too many pages.
1223 			 */
1224 			if (vm_pageout_clean(m) != 0) {
1225 				--page_shortage;
1226 				--maxlaunder;
1227 			}
1228 unlock_and_continue:
1229 			vm_page_lock_assert(m, MA_NOTOWNED);
1230 			VM_OBJECT_WUNLOCK(object);
1231 			if (mp != NULL) {
1232 				if (queues_locked) {
1233 					vm_pagequeue_unlock(pq);
1234 					queues_locked = FALSE;
1235 				}
1236 				if (vp != NULL)
1237 					vput(vp);
1238 				vm_object_deallocate(object);
1239 				vn_finished_write(mp);
1240 			}
1241 			vm_page_lock_assert(m, MA_NOTOWNED);
1242 			goto relock_queues;
1243 		}
1244 		vm_page_unlock(m);
1245 		VM_OBJECT_WUNLOCK(object);
1246 relock_queues:
1247 		if (!queues_locked) {
1248 			vm_pagequeue_lock(pq);
1249 			queues_locked = TRUE;
1250 		}
1251 		next = TAILQ_NEXT(&marker, pageq);
1252 		TAILQ_REMOVE(&pq->pq_pl, &marker, pageq);
1253 	}
1254 	vm_pagequeue_unlock(pq);
1255 
1256 	/*
1257 	 * Compute the number of pages we want to try to move from the
1258 	 * active queue to the inactive queue.
1259 	 */
1260 	page_shortage = vm_paging_target() +
1261 		cnt.v_inactive_target - cnt.v_inactive_count;
1262 	page_shortage += addl_page_shortage;
1263 
1264 	/*
1265 	 * Scan the active queue for things we can deactivate. We nominally
1266 	 * track the per-page activity counter and use it to locate
1267 	 * deactivation candidates.
1268 	 */
1269 	pcount = cnt.v_active_count;
1270 	pq = &vm_pagequeues[PQ_ACTIVE];
1271 	vm_pagequeue_lock(pq);
1272 	m = TAILQ_FIRST(&pq->pq_pl);
1273 	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
1274 
1275 		KASSERT(m->queue == PQ_ACTIVE,
1276 		    ("vm_pageout_scan: page %p isn't active", m));
1277 
1278 		next = TAILQ_NEXT(m, pageq);
1279 		if ((m->flags & PG_MARKER) != 0) {
1280 			m = next;
1281 			continue;
1282 		}
1283 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
1284 		    ("Fictitious page %p cannot be in active queue", m));
1285 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1286 		    ("Unmanaged page %p cannot be in active queue", m));
1287 		if (!vm_pageout_page_lock(m, &next)) {
1288 			vm_page_unlock(m);
1289 			m = next;
1290 			continue;
1291 		}
1292 		object = m->object;
1293 		if (!VM_OBJECT_TRYWLOCK(object) &&
1294 		    !vm_pageout_fallback_object_lock(m, &next)) {
1295 			VM_OBJECT_WUNLOCK(object);
1296 			vm_page_unlock(m);
1297 			m = next;
1298 			continue;
1299 		}
1300 
1301 		/*
1302 		 * Don't deactivate pages that are busy.
1303 		 */
1304 		if ((m->busy != 0) ||
1305 		    (m->oflags & VPO_BUSY) ||
1306 		    (m->hold_count != 0)) {
1307 			vm_page_unlock(m);
1308 			VM_OBJECT_WUNLOCK(object);
1309 			vm_page_requeue_locked(m);
1310 			m = next;
1311 			continue;
1312 		}
1313 
1314 		/*
1315 		 * The count for pagedaemon pages is done after checking the
1316 		 * page for eligibility...
1317 		 */
1318 		PCPU_INC(cnt.v_pdpages);
1319 
1320 		/*
1321 		 * Check to see "how much" the page has been used.
1322 		 */
1323 		act_delta = 0;
1324 		if (m->aflags & PGA_REFERENCED) {
1325 			vm_page_aflag_clear(m, PGA_REFERENCED);
1326 			act_delta += 1;
1327 		}
1328 		if (object->ref_count != 0)
1329 			act_delta += pmap_ts_referenced(m);
1330 
1331 		/*
1332 		 * Advance or decay the act_count based on recent usage.
1333 		 */
1334 		if (act_delta) {
1335 			m->act_count += ACT_ADVANCE + act_delta;
1336 			if (m->act_count > ACT_MAX)
1337 				m->act_count = ACT_MAX;
1338 		} else {
1339 			m->act_count -= min(m->act_count, ACT_DECLINE);
1340 			act_delta = m->act_count;
1341 		}
1342 
1343 		/*
1344 		 * Move this page to the tail of the active or inactive
1345 		 * queue depending on usage.
1346 		 */
1347 		if (act_delta == 0) {
1348 			KASSERT(object->ref_count != 0 ||
1349 			    !pmap_page_is_mapped(m),
1350 			    ("vm_pageout_scan: page %p is mapped", m));
1351 			/* Dequeue to avoid later lock recursion. */
1352 			vm_page_dequeue_locked(m);
1353 			vm_page_deactivate(m);
1354 			page_shortage--;
1355 		} else
1356 			vm_page_requeue_locked(m);
1357 		vm_page_unlock(m);
1358 		VM_OBJECT_WUNLOCK(object);
1359 		m = next;
1360 	}
1361 	vm_pagequeue_unlock(pq);
1362 #if !defined(NO_SWAPPING)
1363 	/*
1364 	 * Idle process swapout -- run once per second.
1365 	 */
1366 	if (vm_swap_idle_enabled) {
1367 		static long lsec;
1368 		if (time_second != lsec) {
1369 			vm_req_vmdaemon(VM_SWAP_IDLE);
1370 			lsec = time_second;
1371 		}
1372 	}
1373 #endif
1374 
1375 	/*
1376 	 * If we didn't get enough free pages, and we have skipped a vnode
1377 	 * in a writeable object, wakeup the sync daemon.  And kick swapout
1378 	 * if we did not get enough free pages.
1379 	 */
1380 	if (vm_paging_target() > 0) {
1381 		if (vnodes_skipped && vm_page_count_min())
1382 			(void) speedup_syncer();
1383 #if !defined(NO_SWAPPING)
1384 		if (vm_swap_enabled && vm_page_count_target())
1385 			vm_req_vmdaemon(VM_SWAP_NORMAL);
1386 #endif
1387 	}
1388 
1389 	/*
1390 	 * If we are critically low on one of RAM or swap and low on
1391 	 * the other, kill the largest process.  However, we avoid
1392 	 * doing this on the first pass in order to give ourselves a
1393 	 * chance to flush out dirty vnode-backed pages and to allow
1394 	 * active pages to be moved to the inactive queue and reclaimed.
1395 	 */
1396 	if (pass != 0 &&
1397 	    ((swap_pager_avail < 64 && vm_page_count_min()) ||
1398 	     (swap_pager_full && vm_paging_target() > 0)))
1399 		vm_pageout_oom(VM_OOM_MEM);
1400 }
1401 
1402 
1403 void
1404 vm_pageout_oom(int shortage)
1405 {
1406 	struct proc *p, *bigproc;
1407 	vm_offset_t size, bigsize;
1408 	struct thread *td;
1409 	struct vmspace *vm;
1410 
1411 	/*
1412 	 * We keep the process bigproc locked once we find it to keep anyone
1413 	 * from messing with it; however, there is a possibility of
1414 	 * deadlock if process B is bigproc and one of it's child processes
1415 	 * attempts to propagate a signal to B while we are waiting for A's
1416 	 * lock while walking this list.  To avoid this, we don't block on
1417 	 * the process lock but just skip a process if it is already locked.
1418 	 */
1419 	bigproc = NULL;
1420 	bigsize = 0;
1421 	sx_slock(&allproc_lock);
1422 	FOREACH_PROC_IN_SYSTEM(p) {
1423 		int breakout;
1424 
1425 		if (PROC_TRYLOCK(p) == 0)
1426 			continue;
1427 		/*
1428 		 * If this is a system, protected or killed process, skip it.
1429 		 */
1430 		if (p->p_state != PRS_NORMAL ||
1431 		    (p->p_flag & (P_INEXEC | P_PROTECTED | P_SYSTEM)) ||
1432 		    (p->p_pid == 1) || P_KILLED(p) ||
1433 		    ((p->p_pid < 48) && (swap_pager_avail != 0))) {
1434 			PROC_UNLOCK(p);
1435 			continue;
1436 		}
1437 		/*
1438 		 * If the process is in a non-running type state,
1439 		 * don't touch it.  Check all the threads individually.
1440 		 */
1441 		breakout = 0;
1442 		FOREACH_THREAD_IN_PROC(p, td) {
1443 			thread_lock(td);
1444 			if (!TD_ON_RUNQ(td) &&
1445 			    !TD_IS_RUNNING(td) &&
1446 			    !TD_IS_SLEEPING(td) &&
1447 			    !TD_IS_SUSPENDED(td)) {
1448 				thread_unlock(td);
1449 				breakout = 1;
1450 				break;
1451 			}
1452 			thread_unlock(td);
1453 		}
1454 		if (breakout) {
1455 			PROC_UNLOCK(p);
1456 			continue;
1457 		}
1458 		/*
1459 		 * get the process size
1460 		 */
1461 		vm = vmspace_acquire_ref(p);
1462 		if (vm == NULL) {
1463 			PROC_UNLOCK(p);
1464 			continue;
1465 		}
1466 		if (!vm_map_trylock_read(&vm->vm_map)) {
1467 			vmspace_free(vm);
1468 			PROC_UNLOCK(p);
1469 			continue;
1470 		}
1471 		size = vmspace_swap_count(vm);
1472 		vm_map_unlock_read(&vm->vm_map);
1473 		if (shortage == VM_OOM_MEM)
1474 			size += vmspace_resident_count(vm);
1475 		vmspace_free(vm);
1476 		/*
1477 		 * if the this process is bigger than the biggest one
1478 		 * remember it.
1479 		 */
1480 		if (size > bigsize) {
1481 			if (bigproc != NULL)
1482 				PROC_UNLOCK(bigproc);
1483 			bigproc = p;
1484 			bigsize = size;
1485 		} else
1486 			PROC_UNLOCK(p);
1487 	}
1488 	sx_sunlock(&allproc_lock);
1489 	if (bigproc != NULL) {
1490 		killproc(bigproc, "out of swap space");
1491 		sched_nice(bigproc, PRIO_MIN);
1492 		PROC_UNLOCK(bigproc);
1493 		wakeup(&cnt.v_free_count);
1494 	}
1495 }
1496 
1497 /*
1498  * This routine tries to maintain the pseudo LRU active queue,
1499  * so that during long periods of time where there is no paging,
1500  * that some statistic accumulation still occurs.  This code
1501  * helps the situation where paging just starts to occur.
1502  */
1503 static void
1504 vm_pageout_page_stats(void)
1505 {
1506 	struct vm_pagequeue *pq;
1507 	vm_object_t object;
1508 	vm_page_t m, next;
1509 	int pcount, tpcount;		/* Number of pages to check */
1510 	static int fullintervalcount = 0;
1511 	int page_shortage;
1512 
1513 	page_shortage =
1514 	    (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) -
1515 	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
1516 
1517 	if (page_shortage <= 0)
1518 		return;
1519 
1520 	pcount = cnt.v_active_count;
1521 	fullintervalcount += vm_pageout_stats_interval;
1522 	if (fullintervalcount < vm_pageout_full_stats_interval) {
1523 		vm_pageout_stats++;
1524 		tpcount = (int64_t)vm_pageout_stats_max * cnt.v_active_count /
1525 		    cnt.v_page_count;
1526 		if (pcount > tpcount)
1527 			pcount = tpcount;
1528 	} else {
1529 		vm_pageout_full_stats++;
1530 		fullintervalcount = 0;
1531 	}
1532 
1533 	pq = &vm_pagequeues[PQ_ACTIVE];
1534 	vm_pagequeue_lock(pq);
1535 	m = TAILQ_FIRST(&pq->pq_pl);
1536 	while ((m != NULL) && (pcount-- > 0)) {
1537 		int actcount;
1538 
1539 		KASSERT(m->queue == PQ_ACTIVE,
1540 		    ("vm_pageout_page_stats: page %p isn't active", m));
1541 
1542 		next = TAILQ_NEXT(m, pageq);
1543 		if ((m->flags & PG_MARKER) != 0) {
1544 			m = next;
1545 			continue;
1546 		}
1547 		vm_page_lock_assert(m, MA_NOTOWNED);
1548 		if (!vm_pageout_page_lock(m, &next)) {
1549 			vm_page_unlock(m);
1550 			m = next;
1551 			continue;
1552 		}
1553 		object = m->object;
1554 		if (!VM_OBJECT_TRYWLOCK(object) &&
1555 		    !vm_pageout_fallback_object_lock(m, &next)) {
1556 			VM_OBJECT_WUNLOCK(object);
1557 			vm_page_unlock(m);
1558 			m = next;
1559 			continue;
1560 		}
1561 
1562 		/*
1563 		 * Don't deactivate pages that are busy.
1564 		 */
1565 		if ((m->busy != 0) ||
1566 		    (m->oflags & VPO_BUSY) ||
1567 		    (m->hold_count != 0)) {
1568 			vm_page_unlock(m);
1569 			VM_OBJECT_WUNLOCK(object);
1570 			vm_page_requeue_locked(m);
1571 			m = next;
1572 			continue;
1573 		}
1574 
1575 		actcount = 0;
1576 		if (m->aflags & PGA_REFERENCED) {
1577 			vm_page_aflag_clear(m, PGA_REFERENCED);
1578 			actcount += 1;
1579 		}
1580 
1581 		actcount += pmap_ts_referenced(m);
1582 		if (actcount) {
1583 			m->act_count += ACT_ADVANCE + actcount;
1584 			if (m->act_count > ACT_MAX)
1585 				m->act_count = ACT_MAX;
1586 			vm_page_requeue_locked(m);
1587 		} else {
1588 			if (m->act_count == 0) {
1589 				/*
1590 				 * We turn off page access, so that we have
1591 				 * more accurate RSS stats.  We don't do this
1592 				 * in the normal page deactivation when the
1593 				 * system is loaded VM wise, because the
1594 				 * cost of the large number of page protect
1595 				 * operations would be higher than the value
1596 				 * of doing the operation.
1597 				 */
1598 				pmap_remove_all(m);
1599 				/* Dequeue to avoid later lock recursion. */
1600 				vm_page_dequeue_locked(m);
1601 				vm_page_deactivate(m);
1602 			} else {
1603 				m->act_count -= min(m->act_count, ACT_DECLINE);
1604 				vm_page_requeue_locked(m);
1605 			}
1606 		}
1607 		vm_page_unlock(m);
1608 		VM_OBJECT_WUNLOCK(object);
1609 		m = next;
1610 	}
1611 	vm_pagequeue_unlock(pq);
1612 }
1613 
1614 /*
1615  *	vm_pageout is the high level pageout daemon.
1616  */
1617 static void
1618 vm_pageout(void)
1619 {
1620 	int error, pass;
1621 
1622 	/*
1623 	 * Initialize some paging parameters.
1624 	 */
1625 	cnt.v_interrupt_free_min = 2;
1626 	if (cnt.v_page_count < 2000)
1627 		vm_pageout_page_count = 8;
1628 
1629 	/*
1630 	 * v_free_reserved needs to include enough for the largest
1631 	 * swap pager structures plus enough for any pv_entry structs
1632 	 * when paging.
1633 	 */
1634 	if (cnt.v_page_count > 1024)
1635 		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
1636 	else
1637 		cnt.v_free_min = 4;
1638 	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
1639 	    cnt.v_interrupt_free_min;
1640 	cnt.v_free_reserved = vm_pageout_page_count +
1641 	    cnt.v_pageout_free_min + (cnt.v_page_count / 768);
1642 	cnt.v_free_severe = cnt.v_free_min / 2;
1643 	cnt.v_free_min += cnt.v_free_reserved;
1644 	cnt.v_free_severe += cnt.v_free_reserved;
1645 
1646 	/*
1647 	 * v_free_target and v_cache_min control pageout hysteresis.  Note
1648 	 * that these are more a measure of the VM cache queue hysteresis
1649 	 * then the VM free queue.  Specifically, v_free_target is the
1650 	 * high water mark (free+cache pages).
1651 	 *
1652 	 * v_free_reserved + v_cache_min (mostly means v_cache_min) is the
1653 	 * low water mark, while v_free_min is the stop.  v_cache_min must
1654 	 * be big enough to handle memory needs while the pageout daemon
1655 	 * is signalled and run to free more pages.
1656 	 */
1657 	if (cnt.v_free_count > 6144)
1658 		cnt.v_free_target = 4 * cnt.v_free_min + cnt.v_free_reserved;
1659 	else
1660 		cnt.v_free_target = 2 * cnt.v_free_min + cnt.v_free_reserved;
1661 
1662 	if (cnt.v_free_count > 2048) {
1663 		cnt.v_cache_min = cnt.v_free_target;
1664 		cnt.v_cache_max = 2 * cnt.v_cache_min;
1665 		cnt.v_inactive_target = (3 * cnt.v_free_target) / 2;
1666 	} else {
1667 		cnt.v_cache_min = 0;
1668 		cnt.v_cache_max = 0;
1669 		cnt.v_inactive_target = cnt.v_free_count / 4;
1670 	}
1671 	if (cnt.v_inactive_target > cnt.v_free_count / 3)
1672 		cnt.v_inactive_target = cnt.v_free_count / 3;
1673 
1674 	/* XXX does not really belong here */
1675 	if (vm_page_max_wired == 0)
1676 		vm_page_max_wired = cnt.v_free_count / 3;
1677 
1678 	if (vm_pageout_stats_max == 0)
1679 		vm_pageout_stats_max = cnt.v_free_target;
1680 
1681 	/*
1682 	 * Set interval in seconds for stats scan.
1683 	 */
1684 	if (vm_pageout_stats_interval == 0)
1685 		vm_pageout_stats_interval = 5;
1686 	if (vm_pageout_full_stats_interval == 0)
1687 		vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4;
1688 
1689 	swap_pager_swap_init();
1690 	pass = 0;
1691 	/*
1692 	 * The pageout daemon is never done, so loop forever.
1693 	 */
1694 	while (TRUE) {
1695 		/*
1696 		 * If we have enough free memory, wakeup waiters.  Do
1697 		 * not clear vm_pages_needed until we reach our target,
1698 		 * otherwise we may be woken up over and over again and
1699 		 * waste a lot of cpu.
1700 		 */
1701 		mtx_lock(&vm_page_queue_free_mtx);
1702 		if (vm_pages_needed && !vm_page_count_min()) {
1703 			if (!vm_paging_needed())
1704 				vm_pages_needed = 0;
1705 			wakeup(&cnt.v_free_count);
1706 		}
1707 		if (vm_pages_needed) {
1708 			/*
1709 			 * Still not done, take a second pass without waiting
1710 			 * (unlimited dirty cleaning), otherwise sleep a bit
1711 			 * and try again.
1712 			 */
1713 			++pass;
1714 			if (pass > 1)
1715 				msleep(&vm_pages_needed,
1716 				    &vm_page_queue_free_mtx, PVM, "psleep",
1717 				    hz / 2);
1718 		} else {
1719 			/*
1720 			 * Good enough, sleep & handle stats.  Prime the pass
1721 			 * for the next run.
1722 			 */
1723 			if (pass > 1)
1724 				pass = 1;
1725 			else
1726 				pass = 0;
1727 			error = msleep(&vm_pages_needed,
1728 			    &vm_page_queue_free_mtx, PVM, "psleep",
1729 			    vm_pageout_stats_interval * hz);
1730 			if (error && !vm_pages_needed) {
1731 				mtx_unlock(&vm_page_queue_free_mtx);
1732 				pass = 0;
1733 				vm_pageout_page_stats();
1734 				continue;
1735 			}
1736 		}
1737 		if (vm_pages_needed)
1738 			cnt.v_pdwakeups++;
1739 		mtx_unlock(&vm_page_queue_free_mtx);
1740 		vm_pageout_scan(pass);
1741 	}
1742 }
1743 
1744 /*
1745  * Unless the free page queue lock is held by the caller, this function
1746  * should be regarded as advisory.  Specifically, the caller should
1747  * not msleep() on &cnt.v_free_count following this function unless
1748  * the free page queue lock is held until the msleep() is performed.
1749  */
1750 void
1751 pagedaemon_wakeup(void)
1752 {
1753 
1754 	if (!vm_pages_needed && curthread->td_proc != pageproc) {
1755 		vm_pages_needed = 1;
1756 		wakeup(&vm_pages_needed);
1757 	}
1758 }
1759 
1760 #if !defined(NO_SWAPPING)
1761 static void
1762 vm_req_vmdaemon(int req)
1763 {
1764 	static int lastrun = 0;
1765 
1766 	mtx_lock(&vm_daemon_mtx);
1767 	vm_pageout_req_swapout |= req;
1768 	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
1769 		wakeup(&vm_daemon_needed);
1770 		lastrun = ticks;
1771 	}
1772 	mtx_unlock(&vm_daemon_mtx);
1773 }
1774 
1775 static void
1776 vm_daemon(void)
1777 {
1778 	struct rlimit rsslim;
1779 	struct proc *p;
1780 	struct thread *td;
1781 	struct vmspace *vm;
1782 	int breakout, swapout_flags, tryagain, attempts;
1783 #ifdef RACCT
1784 	uint64_t rsize, ravailable;
1785 #endif
1786 
1787 	while (TRUE) {
1788 		mtx_lock(&vm_daemon_mtx);
1789 #ifdef RACCT
1790 		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", hz);
1791 #else
1792 		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", 0);
1793 #endif
1794 		swapout_flags = vm_pageout_req_swapout;
1795 		vm_pageout_req_swapout = 0;
1796 		mtx_unlock(&vm_daemon_mtx);
1797 		if (swapout_flags)
1798 			swapout_procs(swapout_flags);
1799 
1800 		/*
1801 		 * scan the processes for exceeding their rlimits or if
1802 		 * process is swapped out -- deactivate pages
1803 		 */
1804 		tryagain = 0;
1805 		attempts = 0;
1806 again:
1807 		attempts++;
1808 		sx_slock(&allproc_lock);
1809 		FOREACH_PROC_IN_SYSTEM(p) {
1810 			vm_pindex_t limit, size;
1811 
1812 			/*
1813 			 * if this is a system process or if we have already
1814 			 * looked at this process, skip it.
1815 			 */
1816 			PROC_LOCK(p);
1817 			if (p->p_state != PRS_NORMAL ||
1818 			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
1819 				PROC_UNLOCK(p);
1820 				continue;
1821 			}
1822 			/*
1823 			 * if the process is in a non-running type state,
1824 			 * don't touch it.
1825 			 */
1826 			breakout = 0;
1827 			FOREACH_THREAD_IN_PROC(p, td) {
1828 				thread_lock(td);
1829 				if (!TD_ON_RUNQ(td) &&
1830 				    !TD_IS_RUNNING(td) &&
1831 				    !TD_IS_SLEEPING(td) &&
1832 				    !TD_IS_SUSPENDED(td)) {
1833 					thread_unlock(td);
1834 					breakout = 1;
1835 					break;
1836 				}
1837 				thread_unlock(td);
1838 			}
1839 			if (breakout) {
1840 				PROC_UNLOCK(p);
1841 				continue;
1842 			}
1843 			/*
1844 			 * get a limit
1845 			 */
1846 			lim_rlimit(p, RLIMIT_RSS, &rsslim);
1847 			limit = OFF_TO_IDX(
1848 			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
1849 
1850 			/*
1851 			 * let processes that are swapped out really be
1852 			 * swapped out set the limit to nothing (will force a
1853 			 * swap-out.)
1854 			 */
1855 			if ((p->p_flag & P_INMEM) == 0)
1856 				limit = 0;	/* XXX */
1857 			vm = vmspace_acquire_ref(p);
1858 			PROC_UNLOCK(p);
1859 			if (vm == NULL)
1860 				continue;
1861 
1862 			size = vmspace_resident_count(vm);
1863 			if (size >= limit) {
1864 				vm_pageout_map_deactivate_pages(
1865 				    &vm->vm_map, limit);
1866 			}
1867 #ifdef RACCT
1868 			rsize = IDX_TO_OFF(size);
1869 			PROC_LOCK(p);
1870 			racct_set(p, RACCT_RSS, rsize);
1871 			ravailable = racct_get_available(p, RACCT_RSS);
1872 			PROC_UNLOCK(p);
1873 			if (rsize > ravailable) {
1874 				/*
1875 				 * Don't be overly aggressive; this might be
1876 				 * an innocent process, and the limit could've
1877 				 * been exceeded by some memory hog.  Don't
1878 				 * try to deactivate more than 1/4th of process'
1879 				 * resident set size.
1880 				 */
1881 				if (attempts <= 8) {
1882 					if (ravailable < rsize - (rsize / 4))
1883 						ravailable = rsize - (rsize / 4);
1884 				}
1885 				vm_pageout_map_deactivate_pages(
1886 				    &vm->vm_map, OFF_TO_IDX(ravailable));
1887 				/* Update RSS usage after paging out. */
1888 				size = vmspace_resident_count(vm);
1889 				rsize = IDX_TO_OFF(size);
1890 				PROC_LOCK(p);
1891 				racct_set(p, RACCT_RSS, rsize);
1892 				PROC_UNLOCK(p);
1893 				if (rsize > ravailable)
1894 					tryagain = 1;
1895 			}
1896 #endif
1897 			vmspace_free(vm);
1898 		}
1899 		sx_sunlock(&allproc_lock);
1900 		if (tryagain != 0 && attempts <= 10)
1901 			goto again;
1902 	}
1903 }
1904 #endif			/* !defined(NO_SWAPPING) */
1905