xref: /freebsd/sys/vm/vm_pageout.c (revision ef5d438ed4bc17ad7ece3e40fe4d1f9baf3aadf7)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * The Mach Operating System project at Carnegie-Mellon University.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
41  *
42  *
43  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
44  * All rights reserved.
45  *
46  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
47  *
48  * Permission to use, copy, modify and distribute this software and
49  * its documentation is hereby granted, provided that both the copyright
50  * notice and this permission notice appear in all copies of the
51  * software, derivative works or modified versions, and any portions
52  * thereof, and that both notices appear in supporting documentation.
53  *
54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57  *
58  * Carnegie Mellon requests users of this software to return to
59  *
60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
61  *  School of Computer Science
62  *  Carnegie Mellon University
63  *  Pittsburgh PA 15213-3890
64  *
65  * any improvements or extensions that they make and grant Carnegie the
66  * rights to redistribute these changes.
67  *
68  * $Id: vm_pageout.c,v 1.65 1996/01/31 12:44:33 davidg Exp $
69  */
70 
71 /*
72  *	The proverbial page-out daemon.
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/kernel.h>
78 #include <sys/proc.h>
79 #include <sys/resourcevar.h>
80 #include <sys/malloc.h>
81 #include <sys/kernel.h>
82 #include <sys/signalvar.h>
83 #include <sys/vnode.h>
84 #include <sys/vmmeter.h>
85 
86 #include <vm/vm.h>
87 #include <vm/vm_param.h>
88 #include <vm/vm_prot.h>
89 #include <vm/lock.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_map.h>
93 #include <vm/vm_pageout.h>
94 #include <vm/vm_kern.h>
95 #include <vm/vm_pager.h>
96 #include <vm/swap_pager.h>
97 #include <vm/vm_extern.h>
98 
99 /*
100  * System initialization
101  */
102 
103 /* the kernel process "vm_pageout"*/
104 static void vm_pageout __P((void));
105 static int vm_pageout_clean __P((vm_page_t, int));
106 static int vm_pageout_scan __P((void));
107 struct proc *pageproc;
108 
109 static struct kproc_desc page_kp = {
110 	"pagedaemon",
111 	vm_pageout,
112 	&pageproc
113 };
114 SYSINIT_KT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
115 
116 #ifndef NO_SWAPPING
117 /* the kernel process "vm_daemon"*/
118 static void vm_daemon __P((void));
119 static struct	proc *vmproc;
120 
121 static struct kproc_desc vm_kp = {
122 	"vmdaemon",
123 	vm_daemon,
124 	&vmproc
125 };
126 SYSINIT_KT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
127 #endif /* !NO_SWAPPING */
128 
129 
130 int vm_pages_needed;		/* Event on which pageout daemon sleeps */
131 
132 int vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
133 
134 extern int npendingio;
135 static int vm_pageout_req_swapout;	/* XXX */
136 static int vm_daemon_needed;
137 extern int nswiodone;
138 extern int vm_swap_size;
139 extern int vfs_update_wakeup;
140 
141 #define MAXSCAN 1024		/* maximum number of pages to scan in queues */
142 
143 #define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16)
144 
145 #define VM_PAGEOUT_PAGE_COUNT 16
146 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
147 
148 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
149 
150 typedef int freeer_fcn_t __P((vm_map_t, vm_object_t, int, int));
151 static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_map_entry_t,
152 						 int *, freeer_fcn_t *));
153 static freeer_fcn_t vm_pageout_object_deactivate_pages;
154 static void vm_req_vmdaemon __P((void));
155 
156 /*
157  * vm_pageout_clean:
158  *
159  * Clean the page and remove it from the laundry.
160  *
161  * We set the busy bit to cause potential page faults on this page to
162  * block.
163  *
164  * And we set pageout-in-progress to keep the object from disappearing
165  * during pageout.  This guarantees that the page won't move from the
166  * inactive queue.  (However, any other page on the inactive queue may
167  * move!)
168  */
169 static int
170 vm_pageout_clean(m, sync)
171 	vm_page_t m;
172 	int sync;
173 {
174 	register vm_object_t object;
175 	vm_page_t mc[2*VM_PAGEOUT_PAGE_COUNT];
176 	int pageout_count;
177 	int i, forward_okay, backward_okay, page_base;
178 	vm_pindex_t pindex = m->pindex;
179 
180 	object = m->object;
181 
182 	/*
183 	 * If not OBJT_SWAP, additional memory may be needed to do the pageout.
184 	 * Try to avoid the deadlock.
185 	 */
186 	if ((sync != VM_PAGEOUT_FORCE) &&
187 	    (object->type != OBJT_SWAP) &&
188 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min))
189 		return 0;
190 
191 	/*
192 	 * Don't mess with the page if it's busy.
193 	 */
194 	if ((!sync && m->hold_count != 0) ||
195 	    ((m->busy != 0) || (m->flags & PG_BUSY)))
196 		return 0;
197 
198 	/*
199 	 * Try collapsing before it's too late.
200 	 */
201 	if (!sync && object->backing_object) {
202 		vm_object_collapse(object);
203 	}
204 	mc[VM_PAGEOUT_PAGE_COUNT] = m;
205 	pageout_count = 1;
206 	page_base = VM_PAGEOUT_PAGE_COUNT;
207 	forward_okay = TRUE;
208 	if (pindex != 0)
209 		backward_okay = TRUE;
210 	else
211 		backward_okay = FALSE;
212 	/*
213 	 * Scan object for clusterable pages.
214 	 *
215 	 * We can cluster ONLY if: ->> the page is NOT
216 	 * clean, wired, busy, held, or mapped into a
217 	 * buffer, and one of the following:
218 	 * 1) The page is inactive, or a seldom used
219 	 *    active page.
220 	 * -or-
221 	 * 2) we force the issue.
222 	 */
223 	for (i = 1; (i < vm_pageout_page_count) && (forward_okay || backward_okay); i++) {
224 		vm_page_t p;
225 
226 		/*
227 		 * See if forward page is clusterable.
228 		 */
229 		if (forward_okay) {
230 			/*
231 			 * Stop forward scan at end of object.
232 			 */
233 			if ((pindex + i) > object->size) {
234 				forward_okay = FALSE;
235 				goto do_backward;
236 			}
237 			p = vm_page_lookup(object, pindex + i);
238 			if (p) {
239 				if ((p->queue == PQ_CACHE) || (p->flags & PG_BUSY) || p->busy) {
240 					forward_okay = FALSE;
241 					goto do_backward;
242 				}
243 				vm_page_test_dirty(p);
244 				if ((p->dirty & p->valid) != 0 &&
245 				    ((p->queue == PQ_INACTIVE) ||
246 				     (sync == VM_PAGEOUT_FORCE)) &&
247 				    (p->wire_count == 0) &&
248 				    (p->hold_count == 0)) {
249 					mc[VM_PAGEOUT_PAGE_COUNT + i] = p;
250 					pageout_count++;
251 					if (pageout_count == vm_pageout_page_count)
252 						break;
253 				} else {
254 					forward_okay = FALSE;
255 				}
256 			} else {
257 				forward_okay = FALSE;
258 			}
259 		}
260 do_backward:
261 		/*
262 		 * See if backward page is clusterable.
263 		 */
264 		if (backward_okay) {
265 			/*
266 			 * Stop backward scan at beginning of object.
267 			 */
268 			if ((pindex - i) == 0) {
269 				backward_okay = FALSE;
270 			}
271 			p = vm_page_lookup(object, pindex - i);
272 			if (p) {
273 				if ((p->queue == PQ_CACHE) || (p->flags & PG_BUSY) || p->busy) {
274 					backward_okay = FALSE;
275 					continue;
276 				}
277 				vm_page_test_dirty(p);
278 				if ((p->dirty & p->valid) != 0 &&
279 				    ((p->queue == PQ_INACTIVE) ||
280 				     (sync == VM_PAGEOUT_FORCE)) &&
281 				    (p->wire_count == 0) &&
282 				    (p->hold_count == 0)) {
283 					mc[VM_PAGEOUT_PAGE_COUNT - i] = p;
284 					pageout_count++;
285 					page_base--;
286 					if (pageout_count == vm_pageout_page_count)
287 						break;
288 				} else {
289 					backward_okay = FALSE;
290 				}
291 			} else {
292 				backward_okay = FALSE;
293 			}
294 		}
295 	}
296 
297 	/*
298 	 * we allow reads during pageouts...
299 	 */
300 	for (i = page_base; i < (page_base + pageout_count); i++) {
301 		mc[i]->flags |= PG_BUSY;
302 		vm_page_protect(mc[i], VM_PROT_READ);
303 	}
304 
305 	return vm_pageout_flush(&mc[page_base], pageout_count, sync);
306 }
307 
308 int
309 vm_pageout_flush(mc, count, sync)
310 	vm_page_t *mc;
311 	int count;
312 	int sync;
313 {
314 	register vm_object_t object;
315 	int pageout_status[count];
316 	int anyok = 0;
317 	int i;
318 
319 	object = mc[0]->object;
320 	object->paging_in_progress += count;
321 
322 	vm_pager_put_pages(object, mc, count,
323 	    ((sync || (object == kernel_object)) ? TRUE : FALSE),
324 	    pageout_status);
325 
326 
327 	for (i = 0; i < count; i++) {
328 		vm_page_t mt = mc[i];
329 
330 		switch (pageout_status[i]) {
331 		case VM_PAGER_OK:
332 			++anyok;
333 			break;
334 		case VM_PAGER_PEND:
335 			++anyok;
336 			break;
337 		case VM_PAGER_BAD:
338 			/*
339 			 * Page outside of range of object. Right now we
340 			 * essentially lose the changes by pretending it
341 			 * worked.
342 			 */
343 			pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
344 			mt->dirty = 0;
345 			break;
346 		case VM_PAGER_ERROR:
347 		case VM_PAGER_FAIL:
348 			/*
349 			 * If page couldn't be paged out, then reactivate the
350 			 * page so it doesn't clog the inactive list.  (We
351 			 * will try paging out it again later).
352 			 */
353 			if (mt->queue == PQ_INACTIVE)
354 				vm_page_activate(mt);
355 			break;
356 		case VM_PAGER_AGAIN:
357 			break;
358 		}
359 
360 
361 		/*
362 		 * If the operation is still going, leave the page busy to
363 		 * block all other accesses. Also, leave the paging in
364 		 * progress indicator set so that we don't attempt an object
365 		 * collapse.
366 		 */
367 		if (pageout_status[i] != VM_PAGER_PEND) {
368 			vm_object_pip_wakeup(object);
369 			PAGE_WAKEUP(mt);
370 		}
371 	}
372 	return anyok;
373 }
374 
375 /*
376  *	vm_pageout_object_deactivate_pages
377  *
378  *	deactivate enough pages to satisfy the inactive target
379  *	requirements or if vm_page_proc_limit is set, then
380  *	deactivate all of the pages in the object and its
381  *	backing_objects.
382  *
383  *	The object and map must be locked.
384  */
385 static int
386 vm_pageout_object_deactivate_pages(map, object, count, map_remove_only)
387 	vm_map_t map;
388 	vm_object_t object;
389 	int count;
390 	int map_remove_only;
391 {
392 	register vm_page_t p, next;
393 	int rcount;
394 	int dcount;
395 
396 	dcount = 0;
397 	if (count == 0)
398 		count = 1;
399 
400 	if (object->type == OBJT_DEVICE)
401 		return 0;
402 
403 	if (object->backing_object) {
404 		if (object->backing_object->ref_count == 1)
405 			dcount += vm_pageout_object_deactivate_pages(map,
406 			    object->backing_object, count / 2 + 1, map_remove_only);
407 		else
408 			vm_pageout_object_deactivate_pages(map,
409 			    object->backing_object, count, 1);
410 	}
411 	if (object->paging_in_progress)
412 		return dcount;
413 
414 	/*
415 	 * scan the objects entire memory queue
416 	 */
417 	rcount = object->resident_page_count;
418 	p = object->memq.tqh_first;
419 	while (p && (rcount-- > 0)) {
420 		next = p->listq.tqe_next;
421 		cnt.v_pdpages++;
422 		if (p->wire_count != 0 ||
423 		    p->hold_count != 0 ||
424 		    p->busy != 0 ||
425 		    (p->flags & PG_BUSY) ||
426 		    !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
427 			p = next;
428 			continue;
429 		}
430 		/*
431 		 * if a page is active, not wired and is in the processes
432 		 * pmap, then deactivate the page.
433 		 */
434 		if (p->queue == PQ_ACTIVE) {
435 			if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) &&
436 			    (p->flags & PG_REFERENCED) == 0) {
437 				p->act_count -= min(p->act_count, ACT_DECLINE);
438 				/*
439 				 * if the page act_count is zero -- then we
440 				 * deactivate
441 				 */
442 				if (!p->act_count) {
443 					if (!map_remove_only)
444 						vm_page_deactivate(p);
445 					vm_page_protect(p, VM_PROT_NONE);
446 					/*
447 					 * else if on the next go-around we
448 					 * will deactivate the page we need to
449 					 * place the page on the end of the
450 					 * queue to age the other pages in
451 					 * memory.
452 					 */
453 				} else {
454 					TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
455 					TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
456 				}
457 				/*
458 				 * see if we are done yet
459 				 */
460 				if (p->queue == PQ_INACTIVE) {
461 					--count;
462 					++dcount;
463 					if (count <= 0 &&
464 					    cnt.v_inactive_count > cnt.v_inactive_target) {
465 						return dcount;
466 					}
467 				}
468 			} else {
469 				/*
470 				 * Move the page to the bottom of the queue.
471 				 */
472 				pmap_clear_reference(VM_PAGE_TO_PHYS(p));
473 				p->flags &= ~PG_REFERENCED;
474 				if (p->act_count < ACT_MAX)
475 					p->act_count += ACT_ADVANCE;
476 
477 				TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
478 				TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
479 			}
480 		} else if (p->queue == PQ_INACTIVE) {
481 			vm_page_protect(p, VM_PROT_NONE);
482 		}
483 		p = next;
484 	}
485 	return dcount;
486 }
487 
488 /*
489  * deactivate some number of pages in a map, try to do it fairly, but
490  * that is really hard to do.
491  */
492 
493 static void
494 vm_pageout_map_deactivate_pages(map, entry, count, freeer)
495 	vm_map_t map;
496 	vm_map_entry_t entry;
497 	int *count;
498 	freeer_fcn_t *freeer;
499 {
500 	vm_map_t tmpm;
501 	vm_map_entry_t tmpe;
502 	vm_object_t obj;
503 
504 	if (*count <= 0)
505 		return;
506 	vm_map_reference(map);
507 	if (!lock_try_read(&map->lock)) {
508 		vm_map_deallocate(map);
509 		return;
510 	}
511 	if (entry == 0) {
512 		tmpe = map->header.next;
513 		while (tmpe != &map->header && *count > 0) {
514 			vm_pageout_map_deactivate_pages(map, tmpe, count, freeer);
515 			tmpe = tmpe->next;
516 		};
517 	} else if (entry->is_sub_map || entry->is_a_map) {
518 		tmpm = entry->object.share_map;
519 		tmpe = tmpm->header.next;
520 		while (tmpe != &tmpm->header && *count > 0) {
521 			vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer);
522 			tmpe = tmpe->next;
523 		};
524 	} else if ((obj = entry->object.vm_object) != 0) {
525 		*count -= (*freeer) (map, obj, *count, TRUE);
526 	}
527 	lock_read_done(&map->lock);
528 	vm_map_deallocate(map);
529 	return;
530 }
531 
532 /*
533  *	vm_pageout_scan does the dirty work for the pageout daemon.
534  */
535 static int
536 vm_pageout_scan()
537 {
538 	vm_page_t m;
539 	int page_shortage, maxscan, maxlaunder, pcount;
540 	int pages_freed;
541 	vm_page_t next;
542 	struct proc *p, *bigproc;
543 	vm_offset_t size, bigsize;
544 	vm_object_t object;
545 	int force_wakeup = 0;
546 	int vnodes_skipped = 0;
547 
548 	pages_freed = 0;
549 
550 	/*
551 	 * Start scanning the inactive queue for pages we can free. We keep
552 	 * scanning until we have enough free pages or we have scanned through
553 	 * the entire queue.  If we encounter dirty pages, we start cleaning
554 	 * them.
555 	 */
556 
557 	maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ?
558 	    MAXLAUNDER : cnt.v_inactive_target;
559 
560 rescan1:
561 	maxscan = cnt.v_inactive_count;
562 	m = vm_page_queue_inactive.tqh_first;
563 	while ((m != NULL) && (maxscan-- > 0) &&
564 	    ((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_cache_min + cnt.v_free_target))) {
565 		vm_page_t next;
566 
567 		cnt.v_pdpages++;
568 		next = m->pageq.tqe_next;
569 
570 #if defined(VM_DIAGNOSE)
571 		if (m->queue != PQ_INACTIVE) {
572 			printf("vm_pageout_scan: page not inactive?\n");
573 			break;
574 		}
575 #endif
576 
577 		/*
578 		 * dont mess with busy pages
579 		 */
580 		if (m->busy || (m->flags & PG_BUSY)) {
581 			m = next;
582 			continue;
583 		}
584 		if (m->hold_count) {
585 			TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
586 			TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
587 			m = next;
588 			continue;
589 		}
590 
591 		if (((m->flags & PG_REFERENCED) == 0) &&
592 		    pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
593 			m->flags |= PG_REFERENCED;
594 		}
595 		if (m->object->ref_count == 0) {
596 			m->flags &= ~PG_REFERENCED;
597 			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
598 		}
599 		if ((m->flags & PG_REFERENCED) != 0) {
600 			m->flags &= ~PG_REFERENCED;
601 			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
602 			vm_page_activate(m);
603 			if (m->act_count < ACT_MAX)
604 				m->act_count += ACT_ADVANCE;
605 			m = next;
606 			continue;
607 		}
608 
609 		if (m->dirty == 0) {
610 			vm_page_test_dirty(m);
611 		} else if (m->dirty != 0)
612 			m->dirty = VM_PAGE_BITS_ALL;
613 		if (m->valid == 0) {
614 			vm_page_protect(m, VM_PROT_NONE);
615 			vm_page_free(m);
616 			cnt.v_dfree++;
617 			++pages_freed;
618 		} else if (m->dirty == 0) {
619 			vm_page_cache(m);
620 			++pages_freed;
621 		} else if (maxlaunder > 0) {
622 			int written;
623 			struct vnode *vp = NULL;
624 
625 			object = m->object;
626 			if (object->flags & OBJ_DEAD) {
627 				m = next;
628 				continue;
629 			}
630 
631 			if (object->type == OBJT_VNODE) {
632 				vp = object->handle;
633 				if (VOP_ISLOCKED(vp) || vget(vp, 1)) {
634 					if (object->flags & OBJ_MIGHTBEDIRTY)
635 						++vnodes_skipped;
636 					m = next;
637 					continue;
638 				}
639 			}
640 
641 			/*
642 			 * If a page is dirty, then it is either being washed
643 			 * (but not yet cleaned) or it is still in the
644 			 * laundry.  If it is still in the laundry, then we
645 			 * start the cleaning operation.
646 			 */
647 			written = vm_pageout_clean(m, 0);
648 
649 			if (vp)
650 				vput(vp);
651 
652 			if (!next) {
653 				break;
654 			}
655 			maxlaunder -= written;
656 			/*
657 			 * if the next page has been re-activated, start
658 			 * scanning again
659 			 */
660 			if (next->queue != PQ_INACTIVE) {
661 				vm_pager_sync();
662 				goto rescan1;
663 			}
664 		}
665 		m = next;
666 	}
667 
668 	/*
669 	 * Compute the page shortage.  If we are still very low on memory be
670 	 * sure that we will move a minimal amount of pages from active to
671 	 * inactive.
672 	 */
673 
674 	page_shortage = cnt.v_inactive_target -
675 	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
676 	if (page_shortage <= 0) {
677 		if (pages_freed == 0) {
678 			page_shortage = cnt.v_free_min - cnt.v_free_count;
679 		} else {
680 			page_shortage = 1;
681 		}
682 	}
683 	maxscan = MAXSCAN;
684 	pcount = cnt.v_active_count;
685 	m = vm_page_queue_active.tqh_first;
686 	while ((m != NULL) && (maxscan > 0) &&
687 		(pcount-- > 0) && (page_shortage > 0)) {
688 
689 		cnt.v_pdpages++;
690 		next = m->pageq.tqe_next;
691 
692 		/*
693 		 * Don't deactivate pages that are busy.
694 		 */
695 		if ((m->busy != 0) ||
696 		    (m->flags & PG_BUSY) ||
697 		    (m->hold_count != 0)) {
698 			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
699 			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
700 			m = next;
701 			continue;
702 		}
703 		if (m->object->ref_count &&
704 			((m->flags & PG_REFERENCED) ||
705 			pmap_is_referenced(VM_PAGE_TO_PHYS(m))) ) {
706 			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
707 			m->flags &= ~PG_REFERENCED;
708 			if (m->act_count < ACT_MAX) {
709 				m->act_count += ACT_ADVANCE;
710 			}
711 			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
712 			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
713 		} else {
714 			m->flags &= ~PG_REFERENCED;
715 			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
716 			m->act_count -= min(m->act_count, ACT_DECLINE);
717 
718 			/*
719 			 * if the page act_count is zero -- then we deactivate
720 			 */
721 			if (!m->act_count && (page_shortage > 0)) {
722 				if (m->object->ref_count == 0) {
723 					--page_shortage;
724 					vm_page_test_dirty(m);
725 					if (m->dirty == 0) {
726 						m->act_count = 0;
727 						vm_page_cache(m);
728 					} else {
729 						vm_page_deactivate(m);
730 					}
731 				} else {
732 					vm_page_deactivate(m);
733 					--page_shortage;
734 				}
735 			} else if (m->act_count) {
736 				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
737 				TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
738 			}
739 		}
740 		maxscan--;
741 		m = next;
742 	}
743 
744 	/*
745 	 * We try to maintain some *really* free pages, this allows interrupt
746 	 * code to be guaranteed space.
747 	 */
748 	while (cnt.v_free_count < cnt.v_free_reserved) {
749 		m = vm_page_queue_cache.tqh_first;
750 		if (!m)
751 			break;
752 		vm_page_free(m);
753 		cnt.v_dfree++;
754 	}
755 
756 	/*
757 	 * If we didn't get enough free pages, and we have skipped a vnode
758 	 * in a writeable object, wakeup the sync daemon.  And kick swapout
759 	 * if we did not get enough free pages.
760 	 */
761 	if ((cnt.v_cache_count + cnt.v_free_count) <
762 		(cnt.v_free_target + cnt.v_cache_min) ) {
763 		if (vnodes_skipped &&
764 		    (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
765 			if (!vfs_update_wakeup) {
766 				vfs_update_wakeup = 1;
767 				wakeup(&vfs_update_wakeup);
768 			}
769 		}
770 #ifndef NO_SWAPPING
771 		/*
772 		 * now swap processes out if we are in low memory conditions
773 		 */
774 		if (!swap_pager_full && vm_swap_size &&
775 			vm_pageout_req_swapout == 0) {
776 			vm_pageout_req_swapout = 1;
777 			vm_req_vmdaemon();
778 		}
779 #endif
780 	}
781 
782 #ifndef NO_SWAPPING
783 	if ((cnt.v_inactive_count + cnt.v_free_count + cnt.v_cache_count) <
784 	    (cnt.v_inactive_target + cnt.v_free_min)) {
785 		vm_req_vmdaemon();
786 	}
787 #endif
788 
789 	/*
790 	 * make sure that we have swap space -- if we are low on memory and
791 	 * swap -- then kill the biggest process.
792 	 */
793 	if ((vm_swap_size == 0 || swap_pager_full) &&
794 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
795 		bigproc = NULL;
796 		bigsize = 0;
797 		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
798 			/*
799 			 * if this is a system process, skip it
800 			 */
801 			if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) ||
802 			    ((p->p_pid < 48) && (vm_swap_size != 0))) {
803 				continue;
804 			}
805 			/*
806 			 * if the process is in a non-running type state,
807 			 * don't touch it.
808 			 */
809 			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
810 				continue;
811 			}
812 			/*
813 			 * get the process size
814 			 */
815 			size = p->p_vmspace->vm_pmap.pm_stats.resident_count;
816 			/*
817 			 * if the this process is bigger than the biggest one
818 			 * remember it.
819 			 */
820 			if (size > bigsize) {
821 				bigproc = p;
822 				bigsize = size;
823 			}
824 		}
825 		if (bigproc != NULL) {
826 			killproc(bigproc, "out of swap space");
827 			bigproc->p_estcpu = 0;
828 			bigproc->p_nice = PRIO_MIN;
829 			resetpriority(bigproc);
830 			wakeup(&cnt.v_free_count);
831 		}
832 	}
833 	return force_wakeup;
834 }
835 
836 /*
837  *	vm_pageout is the high level pageout daemon.
838  */
839 static void
840 vm_pageout()
841 {
842 	(void) spl0();
843 
844 	/*
845 	 * Initialize some paging parameters.
846 	 */
847 
848 	cnt.v_interrupt_free_min = 2;
849 
850 	if (cnt.v_page_count > 1024)
851 		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
852 	else
853 		cnt.v_free_min = 4;
854 	/*
855 	 * free_reserved needs to include enough for the largest swap pager
856 	 * structures plus enough for any pv_entry structs when paging.
857 	 */
858 	cnt.v_pageout_free_min = 6 + cnt.v_page_count / 1024 +
859 				cnt.v_interrupt_free_min;
860 	cnt.v_free_reserved = cnt.v_pageout_free_min + 6;
861 	cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
862 	cnt.v_free_min += cnt.v_free_reserved;
863 
864 	if (cnt.v_page_count > 1024) {
865 		cnt.v_cache_max = (cnt.v_free_count - 1024) / 2;
866 		cnt.v_cache_min = (cnt.v_free_count - 1024) / 8;
867 		cnt.v_inactive_target = 2*cnt.v_cache_min + 192;
868 	} else {
869 		cnt.v_cache_min = 0;
870 		cnt.v_cache_max = 0;
871 		cnt.v_inactive_target = cnt.v_free_count / 4;
872 	}
873 
874 	/* XXX does not really belong here */
875 	if (vm_page_max_wired == 0)
876 		vm_page_max_wired = cnt.v_free_count / 3;
877 
878 
879 	swap_pager_swap_init();
880 	/*
881 	 * The pageout daemon is never done, so loop forever.
882 	 */
883 	while (TRUE) {
884 		int s = splhigh();
885 
886 		if (!vm_pages_needed ||
887 			((cnt.v_free_count >= cnt.v_free_reserved) &&
888 			 (cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min))) {
889 			vm_pages_needed = 0;
890 			tsleep(&vm_pages_needed, PVM, "psleep", 0);
891 		}
892 		vm_pages_needed = 0;
893 		splx(s);
894 		cnt.v_pdwakeups++;
895 		vm_pager_sync();
896 		vm_pageout_scan();
897 		vm_pager_sync();
898 		wakeup(&cnt.v_free_count);
899 		wakeup(kmem_map);
900 	}
901 }
902 
903 #ifndef NO_SWAPPING
904 static void
905 vm_req_vmdaemon()
906 {
907 	static int lastrun = 0;
908 
909 	if ((ticks > (lastrun + hz / 10)) || (ticks < lastrun)) {
910 		wakeup(&vm_daemon_needed);
911 		lastrun = ticks;
912 	}
913 }
914 
915 static void
916 vm_daemon()
917 {
918 	vm_object_t object;
919 	struct proc *p;
920 
921 	while (TRUE) {
922 		tsleep(&vm_daemon_needed, PUSER, "psleep", 0);
923 		if (vm_pageout_req_swapout) {
924 			swapout_procs();
925 			vm_pageout_req_swapout = 0;
926 		}
927 		/*
928 		 * scan the processes for exceeding their rlimits or if
929 		 * process is swapped out -- deactivate pages
930 		 */
931 
932 		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
933 			int overage;
934 			quad_t limit;
935 			vm_offset_t size;
936 
937 			/*
938 			 * if this is a system process or if we have already
939 			 * looked at this process, skip it.
940 			 */
941 			if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
942 				continue;
943 			}
944 			/*
945 			 * if the process is in a non-running type state,
946 			 * don't touch it.
947 			 */
948 			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
949 				continue;
950 			}
951 			/*
952 			 * get a limit
953 			 */
954 			limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
955 			    p->p_rlimit[RLIMIT_RSS].rlim_max);
956 
957 			/*
958 			 * let processes that are swapped out really be
959 			 * swapped out set the limit to nothing (will force a
960 			 * swap-out.)
961 			 */
962 			if ((p->p_flag & P_INMEM) == 0)
963 				limit = 0;	/* XXX */
964 
965 			size = p->p_vmspace->vm_pmap.pm_stats.resident_count * PAGE_SIZE;
966 			if (limit >= 0 && size >= limit) {
967 				overage = (size - limit) >> PAGE_SHIFT;
968 				vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
969 				    (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages);
970 			}
971 		}
972 
973 		/*
974 		 * we remove cached objects that have no RSS...
975 		 */
976 restart:
977 		object = vm_object_cached_list.tqh_first;
978 		while (object) {
979 			/*
980 			 * if there are no resident pages -- get rid of the object
981 			 */
982 			if (object->resident_page_count == 0) {
983 				vm_object_reference(object);
984 				pager_cache(object, FALSE);
985 				goto restart;
986 			}
987 			object = object->cached_list.tqe_next;
988 		}
989 	}
990 }
991 #endif /* !NO_SWAPPING */
992