1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 * 62 * $FreeBSD$ 63 */ 64 65 #include "opt_vm.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/lock.h> 70 #include <sys/mutex.h> 71 #include <sys/proc.h> 72 #include <sys/resourcevar.h> 73 #include <sys/shm.h> 74 #include <sys/vmmeter.h> 75 #include <sys/sx.h> 76 #include <sys/sysctl.h> 77 78 #include <sys/kernel.h> 79 #include <sys/ktr.h> 80 #include <sys/unistd.h> 81 82 #include <machine/limits.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_param.h> 86 #include <vm/pmap.h> 87 #include <vm/vm_map.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_object.h> 91 #include <vm/vm_kern.h> 92 #include <vm/vm_extern.h> 93 #include <vm/vm_pager.h> 94 #include <vm/swap_pager.h> 95 96 #include <sys/user.h> 97 98 extern int maxslp; 99 100 /* 101 * System initialization 102 * 103 * Note: proc0 from proc.h 104 */ 105 static void vm_init_limits(void *); 106 SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0) 107 108 /* 109 * THIS MUST BE THE LAST INITIALIZATION ITEM!!! 110 * 111 * Note: run scheduling should be divorced from the vm system. 112 */ 113 static void scheduler(void *); 114 SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL) 115 116 #ifndef NO_SWAPPING 117 static void swapout(struct proc *); 118 static void vm_proc_swapin(struct proc *p); 119 static void vm_proc_swapout(struct proc *p); 120 #endif 121 122 /* 123 * MPSAFE 124 */ 125 int 126 kernacc(addr, len, rw) 127 caddr_t addr; 128 int len, rw; 129 { 130 boolean_t rv; 131 vm_offset_t saddr, eaddr; 132 vm_prot_t prot; 133 134 KASSERT((rw & ~VM_PROT_ALL) == 0, 135 ("illegal ``rw'' argument to kernacc (%x)\n", rw)); 136 prot = rw; 137 saddr = trunc_page((vm_offset_t)addr); 138 eaddr = round_page((vm_offset_t)addr + len); 139 rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); 140 return (rv == TRUE); 141 } 142 143 /* 144 * MPSAFE 145 */ 146 int 147 useracc(addr, len, rw) 148 caddr_t addr; 149 int len, rw; 150 { 151 boolean_t rv; 152 vm_prot_t prot; 153 vm_map_t map; 154 155 KASSERT((rw & ~VM_PROT_ALL) == 0, 156 ("illegal ``rw'' argument to useracc (%x)\n", rw)); 157 prot = rw; 158 map = &curproc->p_vmspace->vm_map; 159 if ((vm_offset_t)addr + len > vm_map_max(map) || 160 (vm_offset_t)addr + len < (vm_offset_t)addr) { 161 return (FALSE); 162 } 163 rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr), 164 round_page((vm_offset_t)addr + len), prot); 165 return (rv == TRUE); 166 } 167 168 /* 169 * MPSAFE 170 */ 171 void 172 vslock(addr, len) 173 caddr_t addr; 174 u_int len; 175 { 176 177 vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), 178 round_page((vm_offset_t)addr + len), FALSE); 179 } 180 181 /* 182 * MPSAFE 183 */ 184 void 185 vsunlock(addr, len) 186 caddr_t addr; 187 u_int len; 188 { 189 190 vm_map_unwire(&curproc->p_vmspace->vm_map, 191 trunc_page((vm_offset_t)addr), 192 round_page((vm_offset_t)addr + len), FALSE); 193 } 194 195 /* 196 * Create the U area for a new process. 197 * This routine directly affects the fork perf for a process. 198 */ 199 void 200 vm_proc_new(struct proc *p) 201 { 202 vm_page_t ma[UAREA_PAGES]; 203 vm_object_t upobj; 204 vm_offset_t up; 205 vm_page_t m; 206 u_int i; 207 208 /* 209 * Allocate object for the upage. 210 */ 211 upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES); 212 p->p_upages_obj = upobj; 213 214 /* 215 * Get a kernel virtual address for the U area for this process. 216 */ 217 up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE); 218 if (up == 0) 219 panic("vm_proc_new: upage allocation failed"); 220 p->p_uarea = (struct user *)up; 221 222 for (i = 0; i < UAREA_PAGES; i++) { 223 /* 224 * Get a uarea page. 225 */ 226 m = vm_page_grab(upobj, i, 227 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED); 228 ma[i] = m; 229 230 vm_page_lock_queues(); 231 vm_page_wakeup(m); 232 vm_page_flag_clear(m, PG_ZERO); 233 m->valid = VM_PAGE_BITS_ALL; 234 vm_page_unlock_queues(); 235 } 236 237 /* 238 * Enter the pages into the kernel address space. 239 */ 240 pmap_qenter(up, ma, UAREA_PAGES); 241 } 242 243 /* 244 * Dispose the U area for a process that has exited. 245 * This routine directly impacts the exit perf of a process. 246 * XXX proc_zone is marked UMA_ZONE_NOFREE, so this should never be called. 247 */ 248 void 249 vm_proc_dispose(struct proc *p) 250 { 251 vm_object_t upobj; 252 vm_offset_t up; 253 vm_page_t m; 254 255 upobj = p->p_upages_obj; 256 if (upobj->resident_page_count != UAREA_PAGES) 257 panic("vm_proc_dispose: incorrect number of pages in upobj"); 258 vm_page_lock_queues(); 259 while ((m = TAILQ_FIRST(&upobj->memq)) != NULL) { 260 vm_page_busy(m); 261 vm_page_unwire(m, 0); 262 vm_page_free(m); 263 } 264 vm_page_unlock_queues(); 265 up = (vm_offset_t)p->p_uarea; 266 pmap_qremove(up, UAREA_PAGES); 267 kmem_free(kernel_map, up, UAREA_PAGES * PAGE_SIZE); 268 vm_object_deallocate(upobj); 269 } 270 271 #ifndef NO_SWAPPING 272 /* 273 * Allow the U area for a process to be prejudicially paged out. 274 */ 275 static void 276 vm_proc_swapout(struct proc *p) 277 { 278 vm_object_t upobj; 279 vm_offset_t up; 280 vm_page_t m; 281 282 upobj = p->p_upages_obj; 283 if (upobj->resident_page_count != UAREA_PAGES) 284 panic("vm_proc_dispose: incorrect number of pages in upobj"); 285 vm_page_lock_queues(); 286 TAILQ_FOREACH(m, &upobj->memq, listq) { 287 vm_page_dirty(m); 288 vm_page_unwire(m, 0); 289 } 290 vm_page_unlock_queues(); 291 up = (vm_offset_t)p->p_uarea; 292 pmap_qremove(up, UAREA_PAGES); 293 } 294 295 /* 296 * Bring the U area for a specified process back in. 297 */ 298 static void 299 vm_proc_swapin(struct proc *p) 300 { 301 vm_page_t ma[UAREA_PAGES]; 302 vm_object_t upobj; 303 vm_offset_t up; 304 vm_page_t m; 305 int rv; 306 int i; 307 308 upobj = p->p_upages_obj; 309 for (i = 0; i < UAREA_PAGES; i++) { 310 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 311 if (m->valid != VM_PAGE_BITS_ALL) { 312 rv = vm_pager_get_pages(upobj, &m, 1, 0); 313 if (rv != VM_PAGER_OK) 314 panic("vm_proc_swapin: cannot get upage"); 315 } 316 ma[i] = m; 317 } 318 if (upobj->resident_page_count != UAREA_PAGES) 319 panic("vm_proc_swapin: lost pages from upobj"); 320 vm_page_lock_queues(); 321 TAILQ_FOREACH(m, &upobj->memq, listq) { 322 m->valid = VM_PAGE_BITS_ALL; 323 vm_page_wire(m); 324 vm_page_wakeup(m); 325 } 326 vm_page_unlock_queues(); 327 up = (vm_offset_t)p->p_uarea; 328 pmap_qenter(up, ma, UAREA_PAGES); 329 } 330 331 /* 332 * Swap in the UAREAs of all processes swapped out to the given device. 333 * The pages in the UAREA are marked dirty and their swap metadata is freed. 334 */ 335 void 336 vm_proc_swapin_all(int devidx) 337 { 338 struct proc *p; 339 vm_object_t object; 340 vm_page_t m; 341 342 retry: 343 sx_slock(&allproc_lock); 344 FOREACH_PROC_IN_SYSTEM(p) { 345 PROC_LOCK(p); 346 mtx_lock_spin(&sched_lock); 347 348 object = p->p_upages_obj; 349 if (object != NULL && 350 swap_pager_isswapped(p->p_upages_obj, devidx)) { 351 sx_sunlock(&allproc_lock); 352 faultin(p); 353 mtx_unlock_spin(&sched_lock); 354 PROC_UNLOCK(p); 355 vm_page_lock_queues(); 356 TAILQ_FOREACH(m, &object->memq, listq) 357 vm_page_dirty(m); 358 vm_page_unlock_queues(); 359 swap_pager_freespace(object, 0, 360 object->un_pager.swp.swp_bcount); 361 goto retry; 362 } 363 364 mtx_unlock_spin(&sched_lock); 365 PROC_UNLOCK(p); 366 } 367 sx_sunlock(&allproc_lock); 368 } 369 #endif 370 371 /* 372 * Implement fork's actions on an address space. 373 * Here we arrange for the address space to be copied or referenced, 374 * allocate a user struct (pcb and kernel stack), then call the 375 * machine-dependent layer to fill those in and make the new process 376 * ready to run. The new process is set up so that it returns directly 377 * to user mode to avoid stack copying and relocation problems. 378 */ 379 void 380 vm_forkproc(td, p2, td2, flags) 381 struct thread *td; 382 struct proc *p2; 383 struct thread *td2; 384 int flags; 385 { 386 struct proc *p1 = td->td_proc; 387 struct user *up; 388 389 GIANT_REQUIRED; 390 391 if ((flags & RFPROC) == 0) { 392 /* 393 * Divorce the memory, if it is shared, essentially 394 * this changes shared memory amongst threads, into 395 * COW locally. 396 */ 397 if ((flags & RFMEM) == 0) { 398 if (p1->p_vmspace->vm_refcnt > 1) { 399 vmspace_unshare(p1); 400 } 401 } 402 cpu_fork(td, p2, td2, flags); 403 return; 404 } 405 406 if (flags & RFMEM) { 407 p2->p_vmspace = p1->p_vmspace; 408 p1->p_vmspace->vm_refcnt++; 409 } 410 411 while (vm_page_count_severe()) { 412 VM_WAIT; 413 } 414 415 if ((flags & RFMEM) == 0) { 416 p2->p_vmspace = vmspace_fork(p1->p_vmspace); 417 418 pmap_pinit2(vmspace_pmap(p2->p_vmspace)); 419 420 if (p1->p_vmspace->vm_shm) 421 shmfork(p1, p2); 422 } 423 424 /* XXXKSE this is unsatisfactory but should be adequate */ 425 up = p2->p_uarea; 426 427 /* 428 * p_stats currently points at fields in the user struct 429 * but not at &u, instead at p_addr. Copy parts of 430 * p_stats; zero the rest of p_stats (statistics). 431 * 432 * If procsig->ps_refcnt is 1 and p2->p_sigacts is NULL we dont' need 433 * to share sigacts, so we use the up->u_sigacts. 434 */ 435 p2->p_stats = &up->u_stats; 436 if (p2->p_sigacts == NULL) { 437 if (p2->p_procsig->ps_refcnt != 1) 438 printf ("PID:%d NULL sigacts with refcnt not 1!\n",p2->p_pid); 439 p2->p_sigacts = &up->u_sigacts; 440 up->u_sigacts = *p1->p_sigacts; 441 } 442 443 bzero(&up->u_stats.pstat_startzero, 444 (unsigned) ((caddr_t) &up->u_stats.pstat_endzero - 445 (caddr_t) &up->u_stats.pstat_startzero)); 446 bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, 447 ((caddr_t) &up->u_stats.pstat_endcopy - 448 (caddr_t) &up->u_stats.pstat_startcopy)); 449 450 451 /* 452 * cpu_fork will copy and update the pcb, set up the kernel stack, 453 * and make the child ready to run. 454 */ 455 cpu_fork(td, p2, td2, flags); 456 } 457 458 /* 459 * Called after process has been wait(2)'ed apon and is being reaped. 460 * The idea is to reclaim resources that we could not reclaim while 461 * the process was still executing. 462 */ 463 void 464 vm_waitproc(p) 465 struct proc *p; 466 { 467 468 GIANT_REQUIRED; 469 cpu_wait(p); 470 vmspace_exitfree(p); /* and clean-out the vmspace */ 471 } 472 473 /* 474 * Set default limits for VM system. 475 * Called for proc 0, and then inherited by all others. 476 * 477 * XXX should probably act directly on proc0. 478 */ 479 static void 480 vm_init_limits(udata) 481 void *udata; 482 { 483 struct proc *p = udata; 484 int rss_limit; 485 486 /* 487 * Set up the initial limits on process VM. Set the maximum resident 488 * set size to be half of (reasonably) available memory. Since this 489 * is a soft limit, it comes into effect only when the system is out 490 * of memory - half of main memory helps to favor smaller processes, 491 * and reduces thrashing of the object cache. 492 */ 493 p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; 494 p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz; 495 p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; 496 p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; 497 /* limit the limit to no less than 2MB */ 498 rss_limit = max(cnt.v_free_count, 512); 499 p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); 500 p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; 501 } 502 503 void 504 faultin(p) 505 struct proc *p; 506 { 507 508 GIANT_REQUIRED; 509 PROC_LOCK_ASSERT(p, MA_OWNED); 510 mtx_assert(&sched_lock, MA_OWNED); 511 #ifdef NO_SWAPPING 512 if ((p->p_sflag & PS_INMEM) == 0) 513 panic("faultin: proc swapped out with NO_SWAPPING!"); 514 #else 515 if ((p->p_sflag & PS_INMEM) == 0) { 516 struct thread *td; 517 518 ++p->p_lock; 519 /* 520 * If another process is swapping in this process, 521 * just wait until it finishes. 522 */ 523 if (p->p_sflag & PS_SWAPPINGIN) { 524 mtx_unlock_spin(&sched_lock); 525 msleep(&p->p_sflag, &p->p_mtx, PVM, "faultin", 0); 526 mtx_lock_spin(&sched_lock); 527 --p->p_lock; 528 return; 529 } 530 531 p->p_sflag |= PS_SWAPPINGIN; 532 mtx_unlock_spin(&sched_lock); 533 PROC_UNLOCK(p); 534 535 vm_proc_swapin(p); 536 FOREACH_THREAD_IN_PROC (p, td) { 537 pmap_swapin_thread(td); 538 TD_CLR_SWAPPED(td); 539 } 540 541 PROC_LOCK(p); 542 mtx_lock_spin(&sched_lock); 543 p->p_sflag &= ~PS_SWAPPINGIN; 544 p->p_sflag |= PS_INMEM; 545 FOREACH_THREAD_IN_PROC (p, td) 546 if (TD_CAN_RUN(td)) 547 setrunnable(td); 548 549 wakeup(&p->p_sflag); 550 551 /* undo the effect of setting SLOCK above */ 552 --p->p_lock; 553 } 554 #endif 555 } 556 557 /* 558 * This swapin algorithm attempts to swap-in processes only if there 559 * is enough space for them. Of course, if a process waits for a long 560 * time, it will be swapped in anyway. 561 * 562 * XXXKSE - process with the thread with highest priority counts.. 563 * 564 * Giant is still held at this point, to be released in tsleep. 565 */ 566 /* ARGSUSED*/ 567 static void 568 scheduler(dummy) 569 void *dummy; 570 { 571 struct proc *p; 572 struct thread *td; 573 int pri; 574 struct proc *pp; 575 int ppri; 576 577 mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); 578 /* GIANT_REQUIRED */ 579 580 loop: 581 if (vm_page_count_min()) { 582 VM_WAIT; 583 goto loop; 584 } 585 586 pp = NULL; 587 ppri = INT_MIN; 588 sx_slock(&allproc_lock); 589 FOREACH_PROC_IN_SYSTEM(p) { 590 struct ksegrp *kg; 591 if (p->p_sflag & (PS_INMEM | PS_SWAPPING | PS_SWAPPINGIN)) { 592 continue; 593 } 594 mtx_lock_spin(&sched_lock); 595 FOREACH_THREAD_IN_PROC(p, td) { 596 /* 597 * An otherwise runnable thread of a process 598 * swapped out has only the TDI_SWAPPED bit set. 599 * 600 */ 601 if (td->td_inhibitors == TDI_SWAPPED) { 602 kg = td->td_ksegrp; 603 pri = p->p_swtime + kg->kg_slptime; 604 if ((p->p_sflag & PS_SWAPINREQ) == 0) { 605 pri -= kg->kg_nice * 8; 606 } 607 608 /* 609 * if this ksegrp is higher priority 610 * and there is enough space, then select 611 * this process instead of the previous 612 * selection. 613 */ 614 if (pri > ppri) { 615 pp = p; 616 ppri = pri; 617 } 618 } 619 } 620 mtx_unlock_spin(&sched_lock); 621 } 622 sx_sunlock(&allproc_lock); 623 624 /* 625 * Nothing to do, back to sleep. 626 */ 627 if ((p = pp) == NULL) { 628 tsleep(&proc0, PVM, "sched", maxslp * hz / 2); 629 goto loop; 630 } 631 PROC_LOCK(p); 632 mtx_lock_spin(&sched_lock); 633 634 /* 635 * Another process may be bringing or may have already 636 * brought this process in while we traverse all threads. 637 * Or, this process may even be being swapped out again. 638 */ 639 if (p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) { 640 mtx_unlock_spin(&sched_lock); 641 PROC_UNLOCK(p); 642 goto loop; 643 } 644 645 p->p_sflag &= ~PS_SWAPINREQ; 646 647 /* 648 * We would like to bring someone in. (only if there is space). 649 * [What checks the space? ] 650 */ 651 faultin(p); 652 PROC_UNLOCK(p); 653 p->p_swtime = 0; 654 mtx_unlock_spin(&sched_lock); 655 goto loop; 656 } 657 658 #ifndef NO_SWAPPING 659 660 /* 661 * Swap_idle_threshold1 is the guaranteed swapped in time for a process 662 */ 663 static int swap_idle_threshold1 = 2; 664 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, 665 CTLFLAG_RW, &swap_idle_threshold1, 0, ""); 666 667 /* 668 * Swap_idle_threshold2 is the time that a process can be idle before 669 * it will be swapped out, if idle swapping is enabled. 670 */ 671 static int swap_idle_threshold2 = 10; 672 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, 673 CTLFLAG_RW, &swap_idle_threshold2, 0, ""); 674 675 /* 676 * Swapout is driven by the pageout daemon. Very simple, we find eligible 677 * procs and unwire their u-areas. We try to always "swap" at least one 678 * process in case we need the room for a swapin. 679 * If any procs have been sleeping/stopped for at least maxslp seconds, 680 * they are swapped. Else, we swap the longest-sleeping or stopped process, 681 * if any, otherwise the longest-resident process. 682 */ 683 void 684 swapout_procs(action) 685 int action; 686 { 687 struct proc *p; 688 struct thread *td; 689 struct ksegrp *kg; 690 struct proc *outp, *outp2; 691 int outpri, outpri2; 692 int didswap = 0; 693 694 GIANT_REQUIRED; 695 696 outp = outp2 = NULL; 697 outpri = outpri2 = INT_MIN; 698 retry: 699 sx_slock(&allproc_lock); 700 FOREACH_PROC_IN_SYSTEM(p) { 701 struct vmspace *vm; 702 int minslptime = 100000; 703 704 /* 705 * Watch out for a process in 706 * creation. It may have no 707 * address space or lock yet. 708 */ 709 mtx_lock_spin(&sched_lock); 710 if (p->p_state == PRS_NEW) { 711 mtx_unlock_spin(&sched_lock); 712 continue; 713 } 714 mtx_unlock_spin(&sched_lock); 715 716 /* 717 * An aio daemon switches its 718 * address space while running. 719 * Perform a quick check whether 720 * a process has P_SYSTEM. 721 */ 722 PROC_LOCK(p); 723 if ((p->p_flag & P_SYSTEM) != 0) { 724 PROC_UNLOCK(p); 725 continue; 726 } 727 728 /* 729 * Do not swapout a process that 730 * is waiting for VM data 731 * structures as there is a possible 732 * deadlock. Test this first as 733 * this may block. 734 * 735 * Lock the map until swapout 736 * finishes, or a thread of this 737 * process may attempt to alter 738 * the map. 739 */ 740 vm = p->p_vmspace; 741 KASSERT(vm != NULL, 742 ("swapout_procs: a process has no address space")); 743 ++vm->vm_refcnt; 744 PROC_UNLOCK(p); 745 if (!vm_map_trylock(&vm->vm_map)) 746 goto nextproc1; 747 748 PROC_LOCK(p); 749 if (p->p_lock != 0 || 750 (p->p_flag & (P_STOPPED_SINGLE|P_TRACED|P_SYSTEM|P_WEXIT) 751 ) != 0) { 752 goto nextproc2; 753 } 754 /* 755 * only aiod changes vmspace, however it will be 756 * skipped because of the if statement above checking 757 * for P_SYSTEM 758 */ 759 mtx_lock_spin(&sched_lock); 760 if ((p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) != PS_INMEM) 761 goto nextproc; 762 763 switch (p->p_state) { 764 default: 765 /* Don't swap out processes in any sort 766 * of 'special' state. */ 767 goto nextproc; 768 769 case PRS_NORMAL: 770 /* 771 * do not swapout a realtime process 772 * Check all the thread groups.. 773 */ 774 FOREACH_KSEGRP_IN_PROC(p, kg) { 775 if (PRI_IS_REALTIME(kg->kg_pri_class)) 776 goto nextproc; 777 778 /* 779 * Guarantee swap_idle_threshold1 780 * time in memory. 781 */ 782 if (kg->kg_slptime < swap_idle_threshold1) 783 goto nextproc; 784 785 /* 786 * Do not swapout a process if it is 787 * waiting on a critical event of some 788 * kind or there is a thread whose 789 * pageable memory may be accessed. 790 * 791 * This could be refined to support 792 * swapping out a thread. 793 */ 794 FOREACH_THREAD_IN_GROUP(kg, td) { 795 if ((td->td_priority) < PSOCK || 796 !thread_safetoswapout(td)) 797 goto nextproc; 798 } 799 /* 800 * If the system is under memory stress, 801 * or if we are swapping 802 * idle processes >= swap_idle_threshold2, 803 * then swap the process out. 804 */ 805 if (((action & VM_SWAP_NORMAL) == 0) && 806 (((action & VM_SWAP_IDLE) == 0) || 807 (kg->kg_slptime < swap_idle_threshold2))) 808 goto nextproc; 809 810 if (minslptime > kg->kg_slptime) 811 minslptime = kg->kg_slptime; 812 } 813 814 /* 815 * If the process has been asleep for awhile and had 816 * most of its pages taken away already, swap it out. 817 */ 818 if ((action & VM_SWAP_NORMAL) || 819 ((action & VM_SWAP_IDLE) && 820 (minslptime > swap_idle_threshold2))) { 821 swapout(p); 822 didswap++; 823 824 /* 825 * swapout() unlocks a proc lock. This is 826 * ugly, but avoids superfluous lock. 827 */ 828 mtx_unlock_spin(&sched_lock); 829 vm_map_unlock(&vm->vm_map); 830 vmspace_free(vm); 831 sx_sunlock(&allproc_lock); 832 goto retry; 833 } 834 } 835 nextproc: 836 mtx_unlock_spin(&sched_lock); 837 nextproc2: 838 PROC_UNLOCK(p); 839 vm_map_unlock(&vm->vm_map); 840 nextproc1: 841 vmspace_free(vm); 842 continue; 843 } 844 sx_sunlock(&allproc_lock); 845 /* 846 * If we swapped something out, and another process needed memory, 847 * then wakeup the sched process. 848 */ 849 if (didswap) 850 wakeup(&proc0); 851 } 852 853 static void 854 swapout(p) 855 struct proc *p; 856 { 857 struct thread *td; 858 859 PROC_LOCK_ASSERT(p, MA_OWNED); 860 mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); 861 #if defined(SWAP_DEBUG) 862 printf("swapping out %d\n", p->p_pid); 863 #endif 864 865 /* 866 * The states of this process and its threads may have changed 867 * by now. Assuming that there is only one pageout daemon thread, 868 * this process should still be in memory. 869 */ 870 KASSERT((p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) == PS_INMEM, 871 ("swapout: lost a swapout race?")); 872 873 #if defined(INVARIANTS) 874 /* 875 * Make sure that all threads are safe to be swapped out. 876 * 877 * Alternatively, we could swap out only safe threads. 878 */ 879 FOREACH_THREAD_IN_PROC(p, td) { 880 KASSERT(thread_safetoswapout(td), 881 ("swapout: there is a thread not safe for swapout")); 882 } 883 #endif /* INVARIANTS */ 884 885 ++p->p_stats->p_ru.ru_nswap; 886 /* 887 * remember the process resident count 888 */ 889 p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace); 890 891 PROC_UNLOCK(p); 892 p->p_sflag &= ~PS_INMEM; 893 p->p_sflag |= PS_SWAPPING; 894 mtx_unlock_spin(&sched_lock); 895 896 vm_proc_swapout(p); 897 FOREACH_THREAD_IN_PROC(p, td) { 898 pmap_swapout_thread(td); 899 TD_SET_SWAPPED(td); 900 } 901 mtx_lock_spin(&sched_lock); 902 p->p_sflag &= ~PS_SWAPPING; 903 p->p_swtime = 0; 904 } 905 #endif /* !NO_SWAPPING */ 906