1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 41 * 42 * 43 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 44 * All rights reserved. 45 * 46 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 * 68 * $Id: vm_pageout.c,v 1.128 1998/10/25 17:44:59 phk Exp $ 69 */ 70 71 /* 72 * The proverbial page-out daemon. 73 */ 74 75 #include "opt_vm.h" 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/resourcevar.h> 81 #include <sys/signalvar.h> 82 #include <sys/vnode.h> 83 #include <sys/vmmeter.h> 84 #include <sys/sysctl.h> 85 86 #include <vm/vm.h> 87 #include <vm/vm_param.h> 88 #include <vm/vm_prot.h> 89 #include <sys/lock.h> 90 #include <vm/vm_object.h> 91 #include <vm/vm_page.h> 92 #include <vm/vm_map.h> 93 #include <vm/vm_pageout.h> 94 #include <vm/vm_pager.h> 95 #include <vm/swap_pager.h> 96 #include <vm/vm_extern.h> 97 98 /* 99 * System initialization 100 */ 101 102 /* the kernel process "vm_pageout"*/ 103 static void vm_pageout __P((void)); 104 static int vm_pageout_clean __P((vm_page_t)); 105 static int vm_pageout_scan __P((void)); 106 static int vm_pageout_free_page_calc __P((vm_size_t count)); 107 struct proc *pageproc; 108 109 static struct kproc_desc page_kp = { 110 "pagedaemon", 111 vm_pageout, 112 &pageproc 113 }; 114 SYSINIT_KT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp) 115 116 #if !defined(NO_SWAPPING) 117 /* the kernel process "vm_daemon"*/ 118 static void vm_daemon __P((void)); 119 static struct proc *vmproc; 120 121 static struct kproc_desc vm_kp = { 122 "vmdaemon", 123 vm_daemon, 124 &vmproc 125 }; 126 SYSINIT_KT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp) 127 #endif 128 129 130 int vm_pages_needed=0; /* Event on which pageout daemon sleeps */ 131 int vm_pageout_deficit=0; /* Estimated number of pages deficit */ 132 int vm_pageout_pages_needed=0; /* flag saying that the pageout daemon needs pages */ 133 134 extern int npendingio; 135 #if !defined(NO_SWAPPING) 136 static int vm_pageout_req_swapout; /* XXX */ 137 static int vm_daemon_needed; 138 #endif 139 extern int nswiodone; 140 extern int vm_swap_size; 141 extern int vfs_update_wakeup; 142 static int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0; 143 static int vm_pageout_full_stats_interval = 0; 144 static int vm_pageout_stats_free_max=0, vm_pageout_algorithm_lru=0; 145 static int defer_swap_pageouts=0; 146 static int disable_swap_pageouts=0; 147 148 static int max_page_launder=100; 149 #if defined(NO_SWAPPING) 150 static int vm_swap_enabled=0; 151 static int vm_swap_idle_enabled=0; 152 #else 153 static int vm_swap_enabled=1; 154 static int vm_swap_idle_enabled=0; 155 #endif 156 157 SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm, 158 CTLFLAG_RW, &vm_pageout_algorithm_lru, 0, "LRU page mgmt"); 159 160 SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max, 161 CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length"); 162 163 SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval, 164 CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan"); 165 166 SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval, 167 CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan"); 168 169 SYSCTL_INT(_vm, OID_AUTO, pageout_stats_free_max, 170 CTLFLAG_RW, &vm_pageout_stats_free_max, 0, "Not implemented"); 171 172 #if defined(NO_SWAPPING) 173 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, 174 CTLFLAG_RD, &vm_swap_enabled, 0, ""); 175 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, 176 CTLFLAG_RD, &vm_swap_idle_enabled, 0, ""); 177 #else 178 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, 179 CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); 180 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, 181 CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); 182 #endif 183 184 SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts, 185 CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem"); 186 187 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, 188 CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); 189 190 SYSCTL_INT(_vm, OID_AUTO, max_page_launder, 191 CTLFLAG_RW, &max_page_launder, 0, "Maximum number of pages to clean per pass"); 192 193 194 #define VM_PAGEOUT_PAGE_COUNT 16 195 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; 196 197 int vm_page_max_wired; /* XXX max # of wired pages system-wide */ 198 199 #if !defined(NO_SWAPPING) 200 typedef void freeer_fcn_t __P((vm_map_t, vm_object_t, vm_pindex_t, int)); 201 static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_pindex_t)); 202 static freeer_fcn_t vm_pageout_object_deactivate_pages; 203 static void vm_req_vmdaemon __P((void)); 204 #endif 205 static void vm_pageout_page_stats(void); 206 void pmap_collect(void); 207 208 /* 209 * vm_pageout_clean: 210 * 211 * Clean the page and remove it from the laundry. 212 * 213 * We set the busy bit to cause potential page faults on this page to 214 * block. 215 * 216 * And we set pageout-in-progress to keep the object from disappearing 217 * during pageout. This guarantees that the page won't move from the 218 * inactive queue. (However, any other page on the inactive queue may 219 * move!) 220 */ 221 static int 222 vm_pageout_clean(m) 223 vm_page_t m; 224 { 225 register vm_object_t object; 226 vm_page_t mc[2*vm_pageout_page_count]; 227 int pageout_count; 228 int i, forward_okay, backward_okay, page_base; 229 vm_pindex_t pindex = m->pindex; 230 231 object = m->object; 232 233 /* 234 * If not OBJT_SWAP, additional memory may be needed to do the pageout. 235 * Try to avoid the deadlock. 236 */ 237 if ((object->type == OBJT_DEFAULT) && 238 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)) 239 return 0; 240 241 /* 242 * Don't mess with the page if it's busy. 243 */ 244 if ((m->hold_count != 0) || 245 ((m->busy != 0) || (m->flags & PG_BUSY))) 246 return 0; 247 248 /* 249 * Try collapsing before it's too late. 250 */ 251 if (object->backing_object) { 252 vm_object_collapse(object); 253 } 254 255 mc[vm_pageout_page_count] = m; 256 pageout_count = 1; 257 page_base = vm_pageout_page_count; 258 forward_okay = TRUE; 259 if (pindex != 0) 260 backward_okay = TRUE; 261 else 262 backward_okay = FALSE; 263 /* 264 * Scan object for clusterable pages. 265 * 266 * We can cluster ONLY if: ->> the page is NOT 267 * clean, wired, busy, held, or mapped into a 268 * buffer, and one of the following: 269 * 1) The page is inactive, or a seldom used 270 * active page. 271 * -or- 272 * 2) we force the issue. 273 */ 274 for (i = 1; (i < vm_pageout_page_count) && (forward_okay || backward_okay); i++) { 275 vm_page_t p; 276 277 /* 278 * See if forward page is clusterable. 279 */ 280 if (forward_okay) { 281 /* 282 * Stop forward scan at end of object. 283 */ 284 if ((pindex + i) > object->size) { 285 forward_okay = FALSE; 286 goto do_backward; 287 } 288 p = vm_page_lookup(object, pindex + i); 289 if (p) { 290 if (((p->queue - p->pc) == PQ_CACHE) || 291 (p->flags & PG_BUSY) || p->busy) { 292 forward_okay = FALSE; 293 goto do_backward; 294 } 295 vm_page_test_dirty(p); 296 if ((p->dirty & p->valid) != 0 && 297 (p->queue == PQ_INACTIVE) && 298 (p->wire_count == 0) && 299 (p->hold_count == 0)) { 300 mc[vm_pageout_page_count + i] = p; 301 pageout_count++; 302 if (pageout_count == vm_pageout_page_count) 303 break; 304 } else { 305 forward_okay = FALSE; 306 } 307 } else { 308 forward_okay = FALSE; 309 } 310 } 311 do_backward: 312 /* 313 * See if backward page is clusterable. 314 */ 315 if (backward_okay) { 316 /* 317 * Stop backward scan at beginning of object. 318 */ 319 if ((pindex - i) == 0) { 320 backward_okay = FALSE; 321 } 322 p = vm_page_lookup(object, pindex - i); 323 if (p) { 324 if (((p->queue - p->pc) == PQ_CACHE) || 325 (p->flags & PG_BUSY) || p->busy) { 326 backward_okay = FALSE; 327 continue; 328 } 329 vm_page_test_dirty(p); 330 if ((p->dirty & p->valid) != 0 && 331 (p->queue == PQ_INACTIVE) && 332 (p->wire_count == 0) && 333 (p->hold_count == 0)) { 334 mc[vm_pageout_page_count - i] = p; 335 pageout_count++; 336 page_base--; 337 if (pageout_count == vm_pageout_page_count) 338 break; 339 } else { 340 backward_okay = FALSE; 341 } 342 } else { 343 backward_okay = FALSE; 344 } 345 } 346 } 347 348 /* 349 * we allow reads during pageouts... 350 */ 351 return vm_pageout_flush(&mc[page_base], pageout_count, 0); 352 } 353 354 int 355 vm_pageout_flush(mc, count, flags) 356 vm_page_t *mc; 357 int count; 358 int flags; 359 { 360 register vm_object_t object; 361 int pageout_status[count]; 362 int numpagedout = 0; 363 int i; 364 365 for (i = 0; i < count; i++) { 366 vm_page_io_start(mc[i]); 367 vm_page_protect(mc[i], VM_PROT_READ); 368 } 369 370 object = mc[0]->object; 371 vm_object_pip_add(object, count); 372 373 vm_pager_put_pages(object, mc, count, 374 (flags | ((object == kernel_object) ? OBJPC_SYNC : 0)), 375 pageout_status); 376 377 for (i = 0; i < count; i++) { 378 vm_page_t mt = mc[i]; 379 380 switch (pageout_status[i]) { 381 case VM_PAGER_OK: 382 numpagedout++; 383 break; 384 case VM_PAGER_PEND: 385 numpagedout++; 386 break; 387 case VM_PAGER_BAD: 388 /* 389 * Page outside of range of object. Right now we 390 * essentially lose the changes by pretending it 391 * worked. 392 */ 393 pmap_clear_modify(VM_PAGE_TO_PHYS(mt)); 394 mt->dirty = 0; 395 break; 396 case VM_PAGER_ERROR: 397 case VM_PAGER_FAIL: 398 /* 399 * If page couldn't be paged out, then reactivate the 400 * page so it doesn't clog the inactive list. (We 401 * will try paging out it again later). 402 */ 403 vm_page_activate(mt); 404 break; 405 case VM_PAGER_AGAIN: 406 break; 407 } 408 409 /* 410 * If the operation is still going, leave the page busy to 411 * block all other accesses. Also, leave the paging in 412 * progress indicator set so that we don't attempt an object 413 * collapse. 414 */ 415 if (pageout_status[i] != VM_PAGER_PEND) { 416 vm_object_pip_wakeup(object); 417 vm_page_io_finish(mt); 418 } 419 } 420 return numpagedout; 421 } 422 423 #if !defined(NO_SWAPPING) 424 /* 425 * vm_pageout_object_deactivate_pages 426 * 427 * deactivate enough pages to satisfy the inactive target 428 * requirements or if vm_page_proc_limit is set, then 429 * deactivate all of the pages in the object and its 430 * backing_objects. 431 * 432 * The object and map must be locked. 433 */ 434 static void 435 vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only) 436 vm_map_t map; 437 vm_object_t object; 438 vm_pindex_t desired; 439 int map_remove_only; 440 { 441 register vm_page_t p, next; 442 int rcount; 443 int remove_mode; 444 int s; 445 446 if (object->type == OBJT_DEVICE) 447 return; 448 449 while (object) { 450 if (vm_map_pmap(map)->pm_stats.resident_count <= desired) 451 return; 452 if (object->paging_in_progress) 453 return; 454 455 remove_mode = map_remove_only; 456 if (object->shadow_count > 1) 457 remove_mode = 1; 458 /* 459 * scan the objects entire memory queue 460 */ 461 rcount = object->resident_page_count; 462 p = TAILQ_FIRST(&object->memq); 463 while (p && (rcount-- > 0)) { 464 int actcount; 465 if (vm_map_pmap(map)->pm_stats.resident_count <= desired) 466 return; 467 next = TAILQ_NEXT(p, listq); 468 cnt.v_pdpages++; 469 if (p->wire_count != 0 || 470 p->hold_count != 0 || 471 p->busy != 0 || 472 (p->flags & PG_BUSY) || 473 !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { 474 p = next; 475 continue; 476 } 477 478 actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(p)); 479 if (actcount) { 480 vm_page_flag_set(p, PG_REFERENCED); 481 } else if (p->flags & PG_REFERENCED) { 482 actcount = 1; 483 } 484 485 if ((p->queue != PQ_ACTIVE) && 486 (p->flags & PG_REFERENCED)) { 487 vm_page_activate(p); 488 p->act_count += actcount; 489 vm_page_flag_clear(p, PG_REFERENCED); 490 } else if (p->queue == PQ_ACTIVE) { 491 if ((p->flags & PG_REFERENCED) == 0) { 492 p->act_count -= min(p->act_count, ACT_DECLINE); 493 if (!remove_mode && (vm_pageout_algorithm_lru || (p->act_count == 0))) { 494 vm_page_protect(p, VM_PROT_NONE); 495 vm_page_deactivate(p); 496 } else { 497 s = splvm(); 498 TAILQ_REMOVE(&vm_page_queue_active, p, pageq); 499 TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); 500 splx(s); 501 } 502 } else { 503 vm_page_activate(p); 504 vm_page_flag_clear(p, PG_REFERENCED); 505 if (p->act_count < (ACT_MAX - ACT_ADVANCE)) 506 p->act_count += ACT_ADVANCE; 507 s = splvm(); 508 TAILQ_REMOVE(&vm_page_queue_active, p, pageq); 509 TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); 510 splx(s); 511 } 512 } else if (p->queue == PQ_INACTIVE) { 513 vm_page_protect(p, VM_PROT_NONE); 514 } 515 p = next; 516 } 517 object = object->backing_object; 518 } 519 return; 520 } 521 522 /* 523 * deactivate some number of pages in a map, try to do it fairly, but 524 * that is really hard to do. 525 */ 526 static void 527 vm_pageout_map_deactivate_pages(map, desired) 528 vm_map_t map; 529 vm_pindex_t desired; 530 { 531 vm_map_entry_t tmpe; 532 vm_object_t obj, bigobj; 533 534 if (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, (void *)0, curproc)) { 535 return; 536 } 537 538 bigobj = NULL; 539 540 /* 541 * first, search out the biggest object, and try to free pages from 542 * that. 543 */ 544 tmpe = map->header.next; 545 while (tmpe != &map->header) { 546 if ((tmpe->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) { 547 obj = tmpe->object.vm_object; 548 if ((obj != NULL) && (obj->shadow_count <= 1) && 549 ((bigobj == NULL) || 550 (bigobj->resident_page_count < obj->resident_page_count))) { 551 bigobj = obj; 552 } 553 } 554 tmpe = tmpe->next; 555 } 556 557 if (bigobj) 558 vm_pageout_object_deactivate_pages(map, bigobj, desired, 0); 559 560 /* 561 * Next, hunt around for other pages to deactivate. We actually 562 * do this search sort of wrong -- .text first is not the best idea. 563 */ 564 tmpe = map->header.next; 565 while (tmpe != &map->header) { 566 if (vm_map_pmap(map)->pm_stats.resident_count <= desired) 567 break; 568 if ((tmpe->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) { 569 obj = tmpe->object.vm_object; 570 if (obj) 571 vm_pageout_object_deactivate_pages(map, obj, desired, 0); 572 } 573 tmpe = tmpe->next; 574 }; 575 576 /* 577 * Remove all mappings if a process is swapped out, this will free page 578 * table pages. 579 */ 580 if (desired == 0) 581 pmap_remove(vm_map_pmap(map), 582 VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 583 vm_map_unlock(map); 584 return; 585 } 586 #endif 587 588 void 589 vm_pageout_page_free(vm_page_t m) { 590 struct vnode *vp; 591 vm_object_t object; 592 593 object = m->object; 594 object->ref_count++; 595 596 if (object->type == OBJT_VNODE) { 597 vp = object->handle; 598 vp->v_usecount++; 599 if (VSHOULDBUSY(vp)) 600 vbusy(vp); 601 } 602 603 vm_page_busy(m); 604 vm_page_protect(m, VM_PROT_NONE); 605 vm_page_free(m); 606 vm_object_deallocate(object); 607 } 608 609 /* 610 * vm_pageout_scan does the dirty work for the pageout daemon. 611 */ 612 static int 613 vm_pageout_scan() 614 { 615 vm_page_t m, next; 616 int page_shortage, addl_page_shortage, maxscan, pcount; 617 int maxlaunder; 618 int pages_freed; 619 struct proc *p, *bigproc; 620 vm_offset_t size, bigsize; 621 vm_object_t object; 622 int force_wakeup = 0; 623 int actcount; 624 int vnodes_skipped = 0; 625 int s; 626 627 /* 628 * Do whatever cleanup that the pmap code can. 629 */ 630 pmap_collect(); 631 632 /* 633 * Start scanning the inactive queue for pages we can free. We keep 634 * scanning until we have enough free pages or we have scanned through 635 * the entire queue. If we encounter dirty pages, we start cleaning 636 * them. 637 */ 638 639 pages_freed = 0; 640 addl_page_shortage = vm_pageout_deficit; 641 vm_pageout_deficit = 0; 642 643 if (max_page_launder == 0) 644 max_page_launder = 1; 645 maxlaunder = (cnt.v_inactive_target > max_page_launder) ? 646 max_page_launder : cnt.v_inactive_target; 647 648 rescan0: 649 maxscan = cnt.v_inactive_count; 650 for( m = TAILQ_FIRST(&vm_page_queue_inactive); 651 652 (m != NULL) && (maxscan-- > 0) && 653 ((cnt.v_cache_count + cnt.v_free_count) < 654 (cnt.v_cache_min + cnt.v_free_target)); 655 656 m = next) { 657 658 cnt.v_pdpages++; 659 660 if (m->queue != PQ_INACTIVE) { 661 goto rescan0; 662 } 663 664 next = TAILQ_NEXT(m, pageq); 665 666 if (m->hold_count) { 667 s = splvm(); 668 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); 669 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 670 splx(s); 671 addl_page_shortage++; 672 continue; 673 } 674 /* 675 * Dont mess with busy pages, keep in the front of the 676 * queue, most likely are being paged out. 677 */ 678 if (m->busy || (m->flags & PG_BUSY)) { 679 addl_page_shortage++; 680 continue; 681 } 682 683 /* 684 * If the object is not being used, we ignore previous references. 685 */ 686 if (m->object->ref_count == 0) { 687 vm_page_flag_clear(m, PG_REFERENCED); 688 pmap_clear_reference(VM_PAGE_TO_PHYS(m)); 689 690 /* 691 * Otherwise, if the page has been referenced while in the inactive 692 * queue, we bump the "activation count" upwards, making it less 693 * likely that the page will be added back to the inactive queue 694 * prematurely again. Here we check the page tables (or emulated 695 * bits, if any), given the upper level VM system not knowing anything 696 * about existing references. 697 */ 698 } else if (((m->flags & PG_REFERENCED) == 0) && 699 (actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)))) { 700 vm_page_activate(m); 701 m->act_count += (actcount + ACT_ADVANCE); 702 continue; 703 } 704 705 /* 706 * If the upper level VM system knows about any page references, 707 * we activate the page. We also set the "activation count" higher 708 * than normal so that we will less likely place pages back onto the 709 * inactive queue again. 710 */ 711 if ((m->flags & PG_REFERENCED) != 0) { 712 vm_page_flag_clear(m, PG_REFERENCED); 713 actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)); 714 vm_page_activate(m); 715 m->act_count += (actcount + ACT_ADVANCE + 1); 716 continue; 717 } 718 719 /* 720 * If the upper level VM system doesn't know anything about the 721 * page being dirty, we have to check for it again. As far as the 722 * VM code knows, any partially dirty pages are fully dirty. 723 */ 724 if (m->dirty == 0) { 725 vm_page_test_dirty(m); 726 } else { 727 m->dirty = VM_PAGE_BITS_ALL; 728 } 729 730 /* 731 * Invalid pages can be easily freed 732 */ 733 if (m->valid == 0) { 734 vm_pageout_page_free(m); 735 cnt.v_dfree++; 736 pages_freed++; 737 738 /* 739 * Clean pages can be placed onto the cache queue. 740 */ 741 } else if (m->dirty == 0) { 742 vm_page_cache(m); 743 pages_freed++; 744 745 /* 746 * Dirty pages need to be paged out. Note that we clean 747 * only a limited number of pages per pagedaemon pass. 748 */ 749 } else if (maxlaunder > 0) { 750 int written; 751 int swap_pageouts_ok; 752 struct vnode *vp = NULL; 753 754 object = m->object; 755 756 if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) { 757 swap_pageouts_ok = 1; 758 } else { 759 swap_pageouts_ok = !(defer_swap_pageouts || disable_swap_pageouts); 760 swap_pageouts_ok |= (!disable_swap_pageouts && defer_swap_pageouts && 761 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min); 762 763 } 764 765 /* 766 * We don't bother paging objects that are "dead". Those 767 * objects are in a "rundown" state. 768 */ 769 if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { 770 s = splvm(); 771 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); 772 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 773 splx(s); 774 continue; 775 } 776 777 if ((object->type == OBJT_VNODE) && 778 (object->flags & OBJ_DEAD) == 0) { 779 vp = object->handle; 780 if (VOP_ISLOCKED(vp) || 781 vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) { 782 if ((m->queue == PQ_INACTIVE) && 783 (m->hold_count == 0) && 784 (m->busy == 0) && 785 (m->flags & PG_BUSY) == 0) { 786 s = splvm(); 787 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); 788 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 789 splx(s); 790 } 791 if (object->flags & OBJ_MIGHTBEDIRTY) 792 vnodes_skipped++; 793 continue; 794 } 795 796 /* 797 * The page might have been moved to another queue 798 * during potential blocking in vget() above. 799 */ 800 if (m->queue != PQ_INACTIVE) { 801 if (object->flags & OBJ_MIGHTBEDIRTY) 802 vnodes_skipped++; 803 vput(vp); 804 continue; 805 } 806 807 /* 808 * The page may have been busied during the blocking in 809 * vput(); We don't move the page back onto the end of 810 * the queue so that statistics are more correct if we don't. 811 */ 812 if (m->busy || (m->flags & PG_BUSY)) { 813 vput(vp); 814 continue; 815 } 816 817 /* 818 * If the page has become held, then skip it 819 */ 820 if (m->hold_count) { 821 s = splvm(); 822 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); 823 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 824 splx(s); 825 if (object->flags & OBJ_MIGHTBEDIRTY) 826 vnodes_skipped++; 827 vput(vp); 828 continue; 829 } 830 } 831 832 /* 833 * If a page is dirty, then it is either being washed 834 * (but not yet cleaned) or it is still in the 835 * laundry. If it is still in the laundry, then we 836 * start the cleaning operation. 837 */ 838 written = vm_pageout_clean(m); 839 if (vp) 840 vput(vp); 841 842 maxlaunder -= written; 843 } 844 } 845 846 /* 847 * Compute the page shortage. If we are still very low on memory be 848 * sure that we will move a minimal amount of pages from active to 849 * inactive. 850 */ 851 page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) - 852 (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count); 853 page_shortage += addl_page_shortage; 854 if (page_shortage <= 0) { 855 page_shortage = 0; 856 } 857 858 pcount = cnt.v_active_count; 859 m = TAILQ_FIRST(&vm_page_queue_active); 860 while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { 861 862 /* 863 * This is a consistancy check, and should likely be a panic 864 * or warning. 865 */ 866 if (m->queue != PQ_ACTIVE) { 867 break; 868 } 869 870 next = TAILQ_NEXT(m, pageq); 871 /* 872 * Don't deactivate pages that are busy. 873 */ 874 if ((m->busy != 0) || 875 (m->flags & PG_BUSY) || 876 (m->hold_count != 0)) { 877 s = splvm(); 878 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 879 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 880 splx(s); 881 m = next; 882 continue; 883 } 884 885 /* 886 * The count for pagedaemon pages is done after checking the 887 * page for eligbility... 888 */ 889 cnt.v_pdpages++; 890 891 /* 892 * Check to see "how much" the page has been used. 893 */ 894 actcount = 0; 895 if (m->object->ref_count != 0) { 896 if (m->flags & PG_REFERENCED) { 897 actcount += 1; 898 } 899 actcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m)); 900 if (actcount) { 901 m->act_count += ACT_ADVANCE + actcount; 902 if (m->act_count > ACT_MAX) 903 m->act_count = ACT_MAX; 904 } 905 } 906 907 /* 908 * Since we have "tested" this bit, we need to clear it now. 909 */ 910 vm_page_flag_clear(m, PG_REFERENCED); 911 912 /* 913 * Only if an object is currently being used, do we use the 914 * page activation count stats. 915 */ 916 if (actcount && (m->object->ref_count != 0)) { 917 s = splvm(); 918 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 919 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 920 splx(s); 921 } else { 922 m->act_count -= min(m->act_count, ACT_DECLINE); 923 if (vm_pageout_algorithm_lru || 924 (m->object->ref_count == 0) || (m->act_count == 0)) { 925 page_shortage--; 926 if (m->object->ref_count == 0) { 927 vm_page_protect(m, VM_PROT_NONE); 928 if (m->dirty == 0) 929 vm_page_cache(m); 930 else 931 vm_page_deactivate(m); 932 } else { 933 vm_page_deactivate(m); 934 } 935 } else { 936 s = splvm(); 937 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 938 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 939 splx(s); 940 } 941 } 942 m = next; 943 } 944 945 s = splvm(); 946 /* 947 * We try to maintain some *really* free pages, this allows interrupt 948 * code to be guaranteed space. 949 */ 950 while (cnt.v_free_count < cnt.v_free_reserved) { 951 static int cache_rover = 0; 952 m = vm_page_list_find(PQ_CACHE, cache_rover); 953 if (!m) 954 break; 955 cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK; 956 vm_pageout_page_free(m); 957 cnt.v_dfree++; 958 } 959 splx(s); 960 961 #if !defined(NO_SWAPPING) 962 /* 963 * Idle process swapout -- run once per second. 964 */ 965 if (vm_swap_idle_enabled) { 966 static long lsec; 967 if (time_second != lsec) { 968 vm_pageout_req_swapout |= VM_SWAP_IDLE; 969 vm_req_vmdaemon(); 970 lsec = time_second; 971 } 972 } 973 #endif 974 975 /* 976 * If we didn't get enough free pages, and we have skipped a vnode 977 * in a writeable object, wakeup the sync daemon. And kick swapout 978 * if we did not get enough free pages. 979 */ 980 if ((cnt.v_cache_count + cnt.v_free_count) < 981 (cnt.v_free_target + cnt.v_cache_min) ) { 982 if (vnodes_skipped && 983 (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) { 984 if (!vfs_update_wakeup) { 985 vfs_update_wakeup = 1; 986 wakeup(&vfs_update_wakeup); 987 } 988 } 989 #if !defined(NO_SWAPPING) 990 if (vm_swap_enabled && 991 (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target)) { 992 vm_req_vmdaemon(); 993 vm_pageout_req_swapout |= VM_SWAP_NORMAL; 994 } 995 #endif 996 } 997 998 999 /* 1000 * make sure that we have swap space -- if we are low on memory and 1001 * swap -- then kill the biggest process. 1002 */ 1003 if ((vm_swap_size == 0 || swap_pager_full) && 1004 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) { 1005 bigproc = NULL; 1006 bigsize = 0; 1007 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1008 /* 1009 * if this is a system process, skip it 1010 */ 1011 if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) || 1012 ((p->p_pid < 48) && (vm_swap_size != 0))) { 1013 continue; 1014 } 1015 /* 1016 * if the process is in a non-running type state, 1017 * don't touch it. 1018 */ 1019 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1020 continue; 1021 } 1022 /* 1023 * get the process size 1024 */ 1025 size = p->p_vmspace->vm_pmap.pm_stats.resident_count; 1026 /* 1027 * if the this process is bigger than the biggest one 1028 * remember it. 1029 */ 1030 if (size > bigsize) { 1031 bigproc = p; 1032 bigsize = size; 1033 } 1034 } 1035 if (bigproc != NULL) { 1036 killproc(bigproc, "out of swap space"); 1037 bigproc->p_estcpu = 0; 1038 bigproc->p_nice = PRIO_MIN; 1039 resetpriority(bigproc); 1040 wakeup(&cnt.v_free_count); 1041 } 1042 } 1043 return force_wakeup; 1044 } 1045 1046 /* 1047 * This routine tries to maintain the pseudo LRU active queue, 1048 * so that during long periods of time where there is no paging, 1049 * that some statistic accumlation still occurs. This code 1050 * helps the situation where paging just starts to occur. 1051 */ 1052 static void 1053 vm_pageout_page_stats() 1054 { 1055 int s; 1056 vm_page_t m,next; 1057 int pcount,tpcount; /* Number of pages to check */ 1058 static int fullintervalcount = 0; 1059 int page_shortage; 1060 1061 page_shortage = (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) - 1062 (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count); 1063 if (page_shortage <= 0) 1064 return; 1065 1066 pcount = cnt.v_active_count; 1067 fullintervalcount += vm_pageout_stats_interval; 1068 if (fullintervalcount < vm_pageout_full_stats_interval) { 1069 tpcount = (vm_pageout_stats_max * cnt.v_active_count) / cnt.v_page_count; 1070 if (pcount > tpcount) 1071 pcount = tpcount; 1072 } 1073 1074 m = TAILQ_FIRST(&vm_page_queue_active); 1075 while ((m != NULL) && (pcount-- > 0)) { 1076 int actcount; 1077 1078 if (m->queue != PQ_ACTIVE) { 1079 break; 1080 } 1081 1082 next = TAILQ_NEXT(m, pageq); 1083 /* 1084 * Don't deactivate pages that are busy. 1085 */ 1086 if ((m->busy != 0) || 1087 (m->flags & PG_BUSY) || 1088 (m->hold_count != 0)) { 1089 s = splvm(); 1090 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 1091 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1092 splx(s); 1093 m = next; 1094 continue; 1095 } 1096 1097 actcount = 0; 1098 if (m->flags & PG_REFERENCED) { 1099 vm_page_flag_clear(m, PG_REFERENCED); 1100 actcount += 1; 1101 } 1102 1103 actcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m)); 1104 if (actcount) { 1105 m->act_count += ACT_ADVANCE + actcount; 1106 if (m->act_count > ACT_MAX) 1107 m->act_count = ACT_MAX; 1108 s = splvm(); 1109 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 1110 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1111 splx(s); 1112 } else { 1113 if (m->act_count == 0) { 1114 /* 1115 * We turn off page access, so that we have more accurate 1116 * RSS stats. We don't do this in the normal page deactivation 1117 * when the system is loaded VM wise, because the cost of 1118 * the large number of page protect operations would be higher 1119 * than the value of doing the operation. 1120 */ 1121 vm_page_protect(m, VM_PROT_NONE); 1122 vm_page_deactivate(m); 1123 } else { 1124 m->act_count -= min(m->act_count, ACT_DECLINE); 1125 s = splvm(); 1126 TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 1127 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1128 splx(s); 1129 } 1130 } 1131 1132 m = next; 1133 } 1134 } 1135 1136 static int 1137 vm_pageout_free_page_calc(count) 1138 vm_size_t count; 1139 { 1140 if (count < cnt.v_page_count) 1141 return 0; 1142 /* 1143 * free_reserved needs to include enough for the largest swap pager 1144 * structures plus enough for any pv_entry structs when paging. 1145 */ 1146 if (cnt.v_page_count > 1024) 1147 cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200; 1148 else 1149 cnt.v_free_min = 4; 1150 cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + 1151 cnt.v_interrupt_free_min; 1152 cnt.v_free_reserved = vm_pageout_page_count + 1153 cnt.v_pageout_free_min + (count / 768) + PQ_L2_SIZE; 1154 cnt.v_free_min += cnt.v_free_reserved; 1155 return 1; 1156 } 1157 1158 1159 /* 1160 * vm_pageout is the high level pageout daemon. 1161 */ 1162 static void 1163 vm_pageout() 1164 { 1165 /* 1166 * Initialize some paging parameters. 1167 */ 1168 1169 cnt.v_interrupt_free_min = 2; 1170 if (cnt.v_page_count < 2000) 1171 vm_pageout_page_count = 8; 1172 1173 vm_pageout_free_page_calc(cnt.v_page_count); 1174 /* 1175 * free_reserved needs to include enough for the largest swap pager 1176 * structures plus enough for any pv_entry structs when paging. 1177 */ 1178 if (cnt.v_free_count > 6144) 1179 cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved; 1180 else 1181 cnt.v_free_target = 2 * cnt.v_free_min + cnt.v_free_reserved; 1182 1183 if (cnt.v_free_count > 2048) { 1184 cnt.v_cache_min = cnt.v_free_target; 1185 cnt.v_cache_max = 2 * cnt.v_cache_min; 1186 cnt.v_inactive_target = (3 * cnt.v_free_target) / 2; 1187 } else { 1188 cnt.v_cache_min = 0; 1189 cnt.v_cache_max = 0; 1190 cnt.v_inactive_target = cnt.v_free_count / 4; 1191 } 1192 if (cnt.v_inactive_target > cnt.v_free_count / 3) 1193 cnt.v_inactive_target = cnt.v_free_count / 3; 1194 1195 /* XXX does not really belong here */ 1196 if (vm_page_max_wired == 0) 1197 vm_page_max_wired = cnt.v_free_count / 3; 1198 1199 if (vm_pageout_stats_max == 0) 1200 vm_pageout_stats_max = cnt.v_free_target; 1201 1202 /* 1203 * Set interval in seconds for stats scan. 1204 */ 1205 if (vm_pageout_stats_interval == 0) 1206 vm_pageout_stats_interval = 5; 1207 if (vm_pageout_full_stats_interval == 0) 1208 vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4; 1209 1210 1211 /* 1212 * Set maximum free per pass 1213 */ 1214 if (vm_pageout_stats_free_max == 0) 1215 vm_pageout_stats_free_max = 5; 1216 1217 max_page_launder = (cnt.v_page_count > 1800 ? 32 : 16); 1218 1219 swap_pager_swap_init(); 1220 /* 1221 * The pageout daemon is never done, so loop forever. 1222 */ 1223 while (TRUE) { 1224 int error; 1225 int s = splvm(); 1226 if (!vm_pages_needed || 1227 ((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_min)) { 1228 vm_pages_needed = 0; 1229 error = tsleep(&vm_pages_needed, 1230 PVM, "psleep", vm_pageout_stats_interval * hz); 1231 if (error && !vm_pages_needed) { 1232 splx(s); 1233 vm_pageout_page_stats(); 1234 continue; 1235 } 1236 } else if (vm_pages_needed) { 1237 vm_pages_needed = 0; 1238 tsleep(&vm_pages_needed, PVM, "psleep", hz/2); 1239 } 1240 1241 if (vm_pages_needed) 1242 cnt.v_pdwakeups++; 1243 vm_pages_needed = 0; 1244 splx(s); 1245 vm_pager_sync(); 1246 vm_pageout_scan(); 1247 vm_pageout_deficit = 0; 1248 vm_pager_sync(); 1249 wakeup(&cnt.v_free_count); 1250 } 1251 } 1252 1253 void 1254 pagedaemon_wakeup() 1255 { 1256 if (!vm_pages_needed && curproc != pageproc) { 1257 vm_pages_needed++; 1258 wakeup(&vm_pages_needed); 1259 } 1260 } 1261 1262 #if !defined(NO_SWAPPING) 1263 static void 1264 vm_req_vmdaemon() 1265 { 1266 static int lastrun = 0; 1267 1268 if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { 1269 wakeup(&vm_daemon_needed); 1270 lastrun = ticks; 1271 } 1272 } 1273 1274 static void 1275 vm_daemon() 1276 { 1277 struct proc *p; 1278 1279 while (TRUE) { 1280 tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0); 1281 if (vm_pageout_req_swapout) { 1282 swapout_procs(vm_pageout_req_swapout); 1283 vm_pageout_req_swapout = 0; 1284 } 1285 /* 1286 * scan the processes for exceeding their rlimits or if 1287 * process is swapped out -- deactivate pages 1288 */ 1289 1290 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1291 quad_t limit; 1292 vm_offset_t size; 1293 1294 /* 1295 * if this is a system process or if we have already 1296 * looked at this process, skip it. 1297 */ 1298 if (p->p_flag & (P_SYSTEM | P_WEXIT)) { 1299 continue; 1300 } 1301 /* 1302 * if the process is in a non-running type state, 1303 * don't touch it. 1304 */ 1305 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1306 continue; 1307 } 1308 /* 1309 * get a limit 1310 */ 1311 limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur, 1312 p->p_rlimit[RLIMIT_RSS].rlim_max); 1313 1314 /* 1315 * let processes that are swapped out really be 1316 * swapped out set the limit to nothing (will force a 1317 * swap-out.) 1318 */ 1319 if ((p->p_flag & P_INMEM) == 0) 1320 limit = 0; /* XXX */ 1321 1322 size = p->p_vmspace->vm_pmap.pm_stats.resident_count * PAGE_SIZE; 1323 if (limit >= 0 && size >= limit) { 1324 vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, 1325 (vm_pindex_t)(limit >> PAGE_SHIFT) ); 1326 } 1327 } 1328 } 1329 } 1330 #endif 1331