1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 6df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 9df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 10df8bae1dSRodney W. Grimes * are met: 11df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 13df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 15df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 16df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 175929bcfaSPhilippe Charnier * must display the following acknowledgement: 18df8bae1dSRodney W. Grimes * This product includes software developed by the University of 19df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 20df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 363c4dd356SDavid Greenman * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * 39df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40df8bae1dSRodney W. Grimes * All rights reserved. 41df8bae1dSRodney W. Grimes * 42df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 43df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 44df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 45df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 46df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 47df8bae1dSRodney W. Grimes * 48df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 53df8bae1dSRodney W. Grimes * 54df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55df8bae1dSRodney W. Grimes * School of Computer Science 56df8bae1dSRodney W. Grimes * Carnegie Mellon University 57df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 60df8bae1dSRodney W. Grimes * rights to redistribute these changes. 613c4dd356SDavid Greenman * 62c3aac50fSPeter Wemm * $FreeBSD$ 63df8bae1dSRodney W. Grimes */ 64df8bae1dSRodney W. Grimes 65faa5f8d8SAndrzej Bialecki #include "opt_vm.h" 66e9822d92SJoerg Wunsch 67df8bae1dSRodney W. Grimes #include <sys/param.h> 68df8bae1dSRodney W. Grimes #include <sys/systm.h> 69fb919e4dSMark Murray #include <sys/lock.h> 70fb919e4dSMark Murray #include <sys/mutex.h> 71df8bae1dSRodney W. Grimes #include <sys/proc.h> 72df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 733aa12267SBruce Evans #include <sys/shm.h> 74efeaf95aSDavid Greenman #include <sys/vmmeter.h> 751005a129SJohn Baldwin #include <sys/sx.h> 76ceb0cf87SJohn Dyson #include <sys/sysctl.h> 77df8bae1dSRodney W. Grimes 7826f9a767SRodney W. Grimes #include <sys/kernel.h> 790384fff8SJason Evans #include <sys/ktr.h> 80a2a1c95cSPeter Wemm #include <sys/unistd.h> 8126f9a767SRodney W. Grimes 82b1037dcdSBruce Evans #include <machine/limits.h> 83b1037dcdSBruce Evans 84df8bae1dSRodney W. Grimes #include <vm/vm.h> 85efeaf95aSDavid Greenman #include <vm/vm_param.h> 86efeaf95aSDavid Greenman #include <vm/pmap.h> 87efeaf95aSDavid Greenman #include <vm/vm_map.h> 88df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 8926f9a767SRodney W. Grimes #include <vm/vm_pageout.h> 90a136efe9SPeter Wemm #include <vm/vm_object.h> 91df8bae1dSRodney W. Grimes #include <vm/vm_kern.h> 92efeaf95aSDavid Greenman #include <vm/vm_extern.h> 93a136efe9SPeter Wemm #include <vm/vm_pager.h> 94efeaf95aSDavid Greenman 95efeaf95aSDavid Greenman #include <sys/user.h> 96df8bae1dSRodney W. Grimes 97ea754954SJohn Baldwin extern int maxslp; 98ea754954SJohn Baldwin 992b14f991SJulian Elischer /* 1002b14f991SJulian Elischer * System initialization 1012b14f991SJulian Elischer * 1022b14f991SJulian Elischer * Note: proc0 from proc.h 1032b14f991SJulian Elischer */ 10411caded3SAlfred Perlstein static void vm_init_limits(void *); 1054590fd3aSDavid Greenman SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0) 1062b14f991SJulian Elischer 1072b14f991SJulian Elischer /* 1082b14f991SJulian Elischer * THIS MUST BE THE LAST INITIALIZATION ITEM!!! 1092b14f991SJulian Elischer * 1102b14f991SJulian Elischer * Note: run scheduling should be divorced from the vm system. 1112b14f991SJulian Elischer */ 11211caded3SAlfred Perlstein static void scheduler(void *); 1132b14f991SJulian Elischer SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL) 1142b14f991SJulian Elischer 115e50f5c2eSBruce Evans #ifndef NO_SWAPPING 11611caded3SAlfred Perlstein static void swapout(struct proc *); 117a136efe9SPeter Wemm static void vm_proc_swapin(struct proc *p); 118a136efe9SPeter Wemm static void vm_proc_swapout(struct proc *p); 119e50f5c2eSBruce Evans #endif 120f708ef1bSPoul-Henning Kamp 12143a90f3aSAlan Cox /* 12243a90f3aSAlan Cox * MPSAFE 12343a90f3aSAlan Cox */ 124df8bae1dSRodney W. Grimes int 125df8bae1dSRodney W. Grimes kernacc(addr, len, rw) 126df8bae1dSRodney W. Grimes caddr_t addr; 127df8bae1dSRodney W. Grimes int len, rw; 128df8bae1dSRodney W. Grimes { 129df8bae1dSRodney W. Grimes boolean_t rv; 130df8bae1dSRodney W. Grimes vm_offset_t saddr, eaddr; 13102c58685SPoul-Henning Kamp vm_prot_t prot; 132df8bae1dSRodney W. Grimes 133e50f5c2eSBruce Evans KASSERT((rw & ~VM_PROT_ALL) == 0, 13402c58685SPoul-Henning Kamp ("illegal ``rw'' argument to kernacc (%x)\n", rw)); 13502c58685SPoul-Henning Kamp prot = rw; 1366cde7a16SDavid Greenman saddr = trunc_page((vm_offset_t)addr); 1376cde7a16SDavid Greenman eaddr = round_page((vm_offset_t)addr + len); 138df8bae1dSRodney W. Grimes rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); 139df8bae1dSRodney W. Grimes return (rv == TRUE); 140df8bae1dSRodney W. Grimes } 141df8bae1dSRodney W. Grimes 14243a90f3aSAlan Cox /* 14343a90f3aSAlan Cox * MPSAFE 14443a90f3aSAlan Cox */ 145df8bae1dSRodney W. Grimes int 146df8bae1dSRodney W. Grimes useracc(addr, len, rw) 147df8bae1dSRodney W. Grimes caddr_t addr; 148df8bae1dSRodney W. Grimes int len, rw; 149df8bae1dSRodney W. Grimes { 150df8bae1dSRodney W. Grimes boolean_t rv; 15102c58685SPoul-Henning Kamp vm_prot_t prot; 15205ba50f5SJake Burkholder vm_map_t map; 153df8bae1dSRodney W. Grimes 154e50f5c2eSBruce Evans KASSERT((rw & ~VM_PROT_ALL) == 0, 15502c58685SPoul-Henning Kamp ("illegal ``rw'' argument to useracc (%x)\n", rw)); 15602c58685SPoul-Henning Kamp prot = rw; 15705ba50f5SJake Burkholder map = &curproc->p_vmspace->vm_map; 15805ba50f5SJake Burkholder if ((vm_offset_t)addr + len > vm_map_max(map) || 15905ba50f5SJake Burkholder (vm_offset_t)addr + len < (vm_offset_t)addr) { 16026f9a767SRodney W. Grimes return (FALSE); 16126f9a767SRodney W. Grimes } 16205ba50f5SJake Burkholder rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr), 16305ba50f5SJake Burkholder round_page((vm_offset_t)addr + len), prot); 164df8bae1dSRodney W. Grimes return (rv == TRUE); 165df8bae1dSRodney W. Grimes } 166df8bae1dSRodney W. Grimes 16743a90f3aSAlan Cox /* 16843a90f3aSAlan Cox * MPSAFE 16943a90f3aSAlan Cox */ 170df8bae1dSRodney W. Grimes void 171df8bae1dSRodney W. Grimes vslock(addr, len) 172df8bae1dSRodney W. Grimes caddr_t addr; 173df8bae1dSRodney W. Grimes u_int len; 174df8bae1dSRodney W. Grimes { 17543a90f3aSAlan Cox 1761d7cf06cSAlan Cox vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), 1776cde7a16SDavid Greenman round_page((vm_offset_t)addr + len), FALSE); 178df8bae1dSRodney W. Grimes } 179df8bae1dSRodney W. Grimes 180319490fbSAlan Cox /* 181319490fbSAlan Cox * MPSAFE 182319490fbSAlan Cox */ 183df8bae1dSRodney W. Grimes void 1847de47255SPoul-Henning Kamp vsunlock(addr, len) 185df8bae1dSRodney W. Grimes caddr_t addr; 186df8bae1dSRodney W. Grimes u_int len; 187df8bae1dSRodney W. Grimes { 188319490fbSAlan Cox 1891d7cf06cSAlan Cox vm_map_unwire(&curproc->p_vmspace->vm_map, 19023955314SAlfred Perlstein trunc_page((vm_offset_t)addr), 1911d7cf06cSAlan Cox round_page((vm_offset_t)addr + len), FALSE); 192df8bae1dSRodney W. Grimes } 193df8bae1dSRodney W. Grimes 194df8bae1dSRodney W. Grimes /* 195a136efe9SPeter Wemm * Create the U area for a new process. 196a136efe9SPeter Wemm * This routine directly affects the fork perf for a process. 197a136efe9SPeter Wemm */ 198a136efe9SPeter Wemm void 199a136efe9SPeter Wemm vm_proc_new(struct proc *p) 200a136efe9SPeter Wemm { 201a136efe9SPeter Wemm vm_page_t ma[UAREA_PAGES]; 202a136efe9SPeter Wemm vm_object_t upobj; 203a136efe9SPeter Wemm vm_offset_t up; 204a136efe9SPeter Wemm vm_page_t m; 205a136efe9SPeter Wemm u_int i; 206a136efe9SPeter Wemm 207a136efe9SPeter Wemm /* 208a136efe9SPeter Wemm * Allocate object for the upage. 209a136efe9SPeter Wemm */ 210a136efe9SPeter Wemm upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES); 211a136efe9SPeter Wemm p->p_upages_obj = upobj; 212a136efe9SPeter Wemm 213a136efe9SPeter Wemm /* 214a136efe9SPeter Wemm * Get a kernel virtual address for the U area for this process. 215a136efe9SPeter Wemm */ 216a136efe9SPeter Wemm up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE); 217a136efe9SPeter Wemm if (up == 0) 218a136efe9SPeter Wemm panic("vm_proc_new: upage allocation failed"); 219a136efe9SPeter Wemm p->p_uarea = (struct user *)up; 220a136efe9SPeter Wemm 221a136efe9SPeter Wemm for (i = 0; i < UAREA_PAGES; i++) { 222a136efe9SPeter Wemm /* 223a136efe9SPeter Wemm * Get a uarea page. 224a136efe9SPeter Wemm */ 22514f8ceaaSAlan Cox m = vm_page_grab(upobj, i, 22614f8ceaaSAlan Cox VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED); 227a136efe9SPeter Wemm ma[i] = m; 228a136efe9SPeter Wemm 229a136efe9SPeter Wemm vm_page_wakeup(m); 230a136efe9SPeter Wemm vm_page_flag_clear(m, PG_ZERO); 231a136efe9SPeter Wemm m->valid = VM_PAGE_BITS_ALL; 232a136efe9SPeter Wemm } 233a136efe9SPeter Wemm 234a136efe9SPeter Wemm /* 235a136efe9SPeter Wemm * Enter the pages into the kernel address space. 236a136efe9SPeter Wemm */ 237a136efe9SPeter Wemm pmap_qenter(up, ma, UAREA_PAGES); 238a136efe9SPeter Wemm } 239a136efe9SPeter Wemm 240a136efe9SPeter Wemm /* 241a136efe9SPeter Wemm * Dispose the U area for a process that has exited. 242a136efe9SPeter Wemm * This routine directly impacts the exit perf of a process. 243a136efe9SPeter Wemm * XXX proc_zone is marked UMA_ZONE_NOFREE, so this should never be called. 244a136efe9SPeter Wemm */ 245a136efe9SPeter Wemm void 246a136efe9SPeter Wemm vm_proc_dispose(struct proc *p) 247a136efe9SPeter Wemm { 248a136efe9SPeter Wemm vm_object_t upobj; 249a136efe9SPeter Wemm vm_offset_t up; 250a136efe9SPeter Wemm vm_page_t m; 251a136efe9SPeter Wemm 252a136efe9SPeter Wemm upobj = p->p_upages_obj; 253f59685a4SPeter Wemm if (upobj->resident_page_count != UAREA_PAGES) 254f59685a4SPeter Wemm panic("vm_proc_dispose: incorrect number of pages in upobj"); 2552d09a6adSAlan Cox vm_page_lock_queues(); 256f59685a4SPeter Wemm while ((m = TAILQ_FIRST(&upobj->memq)) != NULL) { 257a136efe9SPeter Wemm vm_page_busy(m); 258a136efe9SPeter Wemm vm_page_unwire(m, 0); 259a136efe9SPeter Wemm vm_page_free(m); 260a136efe9SPeter Wemm } 2612d09a6adSAlan Cox vm_page_unlock_queues(); 262f59685a4SPeter Wemm up = (vm_offset_t)p->p_uarea; 263a136efe9SPeter Wemm pmap_qremove(up, UAREA_PAGES); 264a136efe9SPeter Wemm kmem_free(kernel_map, up, UAREA_PAGES * PAGE_SIZE); 265a136efe9SPeter Wemm vm_object_deallocate(upobj); 266a136efe9SPeter Wemm } 267a136efe9SPeter Wemm 268a136efe9SPeter Wemm #ifndef NO_SWAPPING 269a136efe9SPeter Wemm /* 270a136efe9SPeter Wemm * Allow the U area for a process to be prejudicially paged out. 271a136efe9SPeter Wemm */ 272a136efe9SPeter Wemm void 273a136efe9SPeter Wemm vm_proc_swapout(struct proc *p) 274a136efe9SPeter Wemm { 275a136efe9SPeter Wemm vm_object_t upobj; 276a136efe9SPeter Wemm vm_offset_t up; 277a136efe9SPeter Wemm vm_page_t m; 278a136efe9SPeter Wemm 279a136efe9SPeter Wemm upobj = p->p_upages_obj; 280f59685a4SPeter Wemm if (upobj->resident_page_count != UAREA_PAGES) 281f59685a4SPeter Wemm panic("vm_proc_dispose: incorrect number of pages in upobj"); 2822d09a6adSAlan Cox vm_page_lock_queues(); 283f59685a4SPeter Wemm TAILQ_FOREACH(m, &upobj->memq, listq) { 284a136efe9SPeter Wemm vm_page_dirty(m); 285a136efe9SPeter Wemm vm_page_unwire(m, 0); 286a136efe9SPeter Wemm } 2872d09a6adSAlan Cox vm_page_unlock_queues(); 288f59685a4SPeter Wemm up = (vm_offset_t)p->p_uarea; 289a136efe9SPeter Wemm pmap_qremove(up, UAREA_PAGES); 290a136efe9SPeter Wemm } 291a136efe9SPeter Wemm 292a136efe9SPeter Wemm /* 293a136efe9SPeter Wemm * Bring the U area for a specified process back in. 294a136efe9SPeter Wemm */ 295a136efe9SPeter Wemm void 296a136efe9SPeter Wemm vm_proc_swapin(struct proc *p) 297a136efe9SPeter Wemm { 298a136efe9SPeter Wemm vm_page_t ma[UAREA_PAGES]; 299a136efe9SPeter Wemm vm_object_t upobj; 300a136efe9SPeter Wemm vm_offset_t up; 301a136efe9SPeter Wemm vm_page_t m; 302a136efe9SPeter Wemm int rv; 303a136efe9SPeter Wemm int i; 304a136efe9SPeter Wemm 305a136efe9SPeter Wemm upobj = p->p_upages_obj; 306a136efe9SPeter Wemm for (i = 0; i < UAREA_PAGES; i++) { 307a136efe9SPeter Wemm m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 308a136efe9SPeter Wemm if (m->valid != VM_PAGE_BITS_ALL) { 309a136efe9SPeter Wemm rv = vm_pager_get_pages(upobj, &m, 1, 0); 310a136efe9SPeter Wemm if (rv != VM_PAGER_OK) 311a136efe9SPeter Wemm panic("vm_proc_swapin: cannot get upage"); 312a136efe9SPeter Wemm } 313a136efe9SPeter Wemm ma[i] = m; 314a7e9138eSPeter Wemm } 315a7e9138eSPeter Wemm if (upobj->resident_page_count != UAREA_PAGES) 316a7e9138eSPeter Wemm panic("vm_proc_swapin: lost pages from upobj"); 317e16cfdbeSAlan Cox vm_page_lock_queues(); 318a7e9138eSPeter Wemm TAILQ_FOREACH(m, &upobj->memq, listq) { 319a7e9138eSPeter Wemm m->valid = VM_PAGE_BITS_ALL; 320a136efe9SPeter Wemm vm_page_wire(m); 321a136efe9SPeter Wemm vm_page_wakeup(m); 322a136efe9SPeter Wemm } 323e16cfdbeSAlan Cox vm_page_unlock_queues(); 324f59685a4SPeter Wemm up = (vm_offset_t)p->p_uarea; 325a136efe9SPeter Wemm pmap_qenter(up, ma, UAREA_PAGES); 326a136efe9SPeter Wemm } 327a136efe9SPeter Wemm #endif 328a136efe9SPeter Wemm 329a136efe9SPeter Wemm /* 330df8bae1dSRodney W. Grimes * Implement fork's actions on an address space. 331df8bae1dSRodney W. Grimes * Here we arrange for the address space to be copied or referenced, 332df8bae1dSRodney W. Grimes * allocate a user struct (pcb and kernel stack), then call the 333df8bae1dSRodney W. Grimes * machine-dependent layer to fill those in and make the new process 334a2a1c95cSPeter Wemm * ready to run. The new process is set up so that it returns directly 335a2a1c95cSPeter Wemm * to user mode to avoid stack copying and relocation problems. 336df8bae1dSRodney W. Grimes */ 337a2a1c95cSPeter Wemm void 338079b7badSJulian Elischer vm_forkproc(td, p2, td2, flags) 339b40ce416SJulian Elischer struct thread *td; 340b40ce416SJulian Elischer struct proc *p2; 341079b7badSJulian Elischer struct thread *td2; 342a2a1c95cSPeter Wemm int flags; 343df8bae1dSRodney W. Grimes { 344b40ce416SJulian Elischer struct proc *p1 = td->td_proc; 34554d92145SMatthew Dillon struct user *up; 346df8bae1dSRodney W. Grimes 3470cddd8f0SMatthew Dillon GIANT_REQUIRED; 3480cddd8f0SMatthew Dillon 34991c28bfdSLuoqi Chen if ((flags & RFPROC) == 0) { 35091c28bfdSLuoqi Chen /* 35191c28bfdSLuoqi Chen * Divorce the memory, if it is shared, essentially 35291c28bfdSLuoqi Chen * this changes shared memory amongst threads, into 35391c28bfdSLuoqi Chen * COW locally. 35491c28bfdSLuoqi Chen */ 35591c28bfdSLuoqi Chen if ((flags & RFMEM) == 0) { 35691c28bfdSLuoqi Chen if (p1->p_vmspace->vm_refcnt > 1) { 35791c28bfdSLuoqi Chen vmspace_unshare(p1); 35891c28bfdSLuoqi Chen } 35991c28bfdSLuoqi Chen } 360079b7badSJulian Elischer cpu_fork(td, p2, td2, flags); 36191c28bfdSLuoqi Chen return; 36291c28bfdSLuoqi Chen } 36391c28bfdSLuoqi Chen 3645856e12eSJohn Dyson if (flags & RFMEM) { 3655856e12eSJohn Dyson p2->p_vmspace = p1->p_vmspace; 3665856e12eSJohn Dyson p1->p_vmspace->vm_refcnt++; 3675856e12eSJohn Dyson } 3685856e12eSJohn Dyson 36990ecac61SMatthew Dillon while (vm_page_count_severe()) { 37026f9a767SRodney W. Grimes VM_WAIT; 3710d94caffSDavid Greenman } 37226f9a767SRodney W. Grimes 3735856e12eSJohn Dyson if ((flags & RFMEM) == 0) { 374df8bae1dSRodney W. Grimes p2->p_vmspace = vmspace_fork(p1->p_vmspace); 375df8bae1dSRodney W. Grimes 376d4da2dbaSAlan Cox pmap_pinit2(vmspace_pmap(p2->p_vmspace)); 377d4da2dbaSAlan Cox 378df8bae1dSRodney W. Grimes if (p1->p_vmspace->vm_shm) 379dabee6feSPeter Wemm shmfork(p1, p2); 380a2a1c95cSPeter Wemm } 381df8bae1dSRodney W. Grimes 382b40ce416SJulian Elischer /* XXXKSE this is unsatisfactory but should be adequate */ 383b40ce416SJulian Elischer up = p2->p_uarea; 384df8bae1dSRodney W. Grimes 38539fb8e6bSJulian Elischer /* 38639fb8e6bSJulian Elischer * p_stats currently points at fields in the user struct 38739fb8e6bSJulian Elischer * but not at &u, instead at p_addr. Copy parts of 38839fb8e6bSJulian Elischer * p_stats; zero the rest of p_stats (statistics). 389dc9c271aSJulian Elischer * 390dc9c271aSJulian Elischer * If procsig->ps_refcnt is 1 and p2->p_sigacts is NULL we dont' need 391dc9c271aSJulian Elischer * to share sigacts, so we use the up->u_sigacts. 39239fb8e6bSJulian Elischer */ 39339fb8e6bSJulian Elischer p2->p_stats = &up->u_stats; 394dc9c271aSJulian Elischer if (p2->p_sigacts == NULL) { 395dc9c271aSJulian Elischer if (p2->p_procsig->ps_refcnt != 1) 396dc9c271aSJulian Elischer printf ("PID:%d NULL sigacts with refcnt not 1!\n",p2->p_pid); 397dc9c271aSJulian Elischer p2->p_sigacts = &up->u_sigacts; 398dc9c271aSJulian Elischer up->u_sigacts = *p1->p_sigacts; 399dc9c271aSJulian Elischer } 40088c5ea45SJulian Elischer 401df8bae1dSRodney W. Grimes bzero(&up->u_stats.pstat_startzero, 402df8bae1dSRodney W. Grimes (unsigned) ((caddr_t) &up->u_stats.pstat_endzero - 403df8bae1dSRodney W. Grimes (caddr_t) &up->u_stats.pstat_startzero)); 404df8bae1dSRodney W. Grimes bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, 405df8bae1dSRodney W. Grimes ((caddr_t) &up->u_stats.pstat_endcopy - 406df8bae1dSRodney W. Grimes (caddr_t) &up->u_stats.pstat_startcopy)); 407df8bae1dSRodney W. Grimes 408df8bae1dSRodney W. Grimes 409df8bae1dSRodney W. Grimes /* 410a2a1c95cSPeter Wemm * cpu_fork will copy and update the pcb, set up the kernel stack, 411a2a1c95cSPeter Wemm * and make the child ready to run. 412df8bae1dSRodney W. Grimes */ 413079b7badSJulian Elischer cpu_fork(td, p2, td2, flags); 414df8bae1dSRodney W. Grimes } 415df8bae1dSRodney W. Grimes 416df8bae1dSRodney W. Grimes /* 417eb30c1c0SPeter Wemm * Called after process has been wait(2)'ed apon and is being reaped. 418eb30c1c0SPeter Wemm * The idea is to reclaim resources that we could not reclaim while 419eb30c1c0SPeter Wemm * the process was still executing. 420eb30c1c0SPeter Wemm */ 421eb30c1c0SPeter Wemm void 422eb30c1c0SPeter Wemm vm_waitproc(p) 423eb30c1c0SPeter Wemm struct proc *p; 424eb30c1c0SPeter Wemm { 425eb30c1c0SPeter Wemm 426eb30c1c0SPeter Wemm GIANT_REQUIRED; 427eb30c1c0SPeter Wemm cpu_wait(p); 428582ec34cSAlfred Perlstein vmspace_exitfree(p); /* and clean-out the vmspace */ 429eb30c1c0SPeter Wemm } 430eb30c1c0SPeter Wemm 431eb30c1c0SPeter Wemm /* 432df8bae1dSRodney W. Grimes * Set default limits for VM system. 433df8bae1dSRodney W. Grimes * Called for proc 0, and then inherited by all others. 4342b14f991SJulian Elischer * 4352b14f991SJulian Elischer * XXX should probably act directly on proc0. 436df8bae1dSRodney W. Grimes */ 4372b14f991SJulian Elischer static void 4382b14f991SJulian Elischer vm_init_limits(udata) 4394590fd3aSDavid Greenman void *udata; 440df8bae1dSRodney W. Grimes { 44154d92145SMatthew Dillon struct proc *p = udata; 442bbc0ec52SDavid Greenman int rss_limit; 443df8bae1dSRodney W. Grimes 444df8bae1dSRodney W. Grimes /* 4450d94caffSDavid Greenman * Set up the initial limits on process VM. Set the maximum resident 4460d94caffSDavid Greenman * set size to be half of (reasonably) available memory. Since this 4470d94caffSDavid Greenman * is a soft limit, it comes into effect only when the system is out 4480d94caffSDavid Greenman * of memory - half of main memory helps to favor smaller processes, 449bbc0ec52SDavid Greenman * and reduces thrashing of the object cache. 450df8bae1dSRodney W. Grimes */ 451cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; 452cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz; 453cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; 454cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; 455dd0bd066SDavid Greenman /* limit the limit to no less than 2MB */ 456f2daac0cSDavid Greenman rss_limit = max(cnt.v_free_count, 512); 457bbc0ec52SDavid Greenman p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); 45826f9a767SRodney W. Grimes p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; 459df8bae1dSRodney W. Grimes } 460df8bae1dSRodney W. Grimes 46126f9a767SRodney W. Grimes void 46226f9a767SRodney W. Grimes faultin(p) 46326f9a767SRodney W. Grimes struct proc *p; 46426f9a767SRodney W. Grimes { 46526f9a767SRodney W. Grimes 466a136efe9SPeter Wemm GIANT_REQUIRED; 467c96d52a9SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 4681d7b9ed2SJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 469a136efe9SPeter Wemm #ifdef NO_SWAPPING 470a136efe9SPeter Wemm if ((p->p_sflag & PS_INMEM) == 0) 471a136efe9SPeter Wemm panic("faultin: proc swapped out with NO_SWAPPING!"); 472a136efe9SPeter Wemm #else 4735074aecdSJohn Baldwin if ((p->p_sflag & PS_INMEM) == 0) { 474a136efe9SPeter Wemm struct thread *td; 475a136efe9SPeter Wemm 47626f9a767SRodney W. Grimes ++p->p_lock; 4771d7b9ed2SJulian Elischer /* 4781d7b9ed2SJulian Elischer * If another process is swapping in this process, 4791d7b9ed2SJulian Elischer * just wait until it finishes. 4801d7b9ed2SJulian Elischer */ 4811d7b9ed2SJulian Elischer if (p->p_sflag & PS_SWAPPINGIN) { 4821d7b9ed2SJulian Elischer mtx_unlock_spin(&sched_lock); 4831d7b9ed2SJulian Elischer msleep(&p->p_sflag, &p->p_mtx, PVM, "faultin", 0); 4841d7b9ed2SJulian Elischer mtx_lock_spin(&sched_lock); 4851d7b9ed2SJulian Elischer --p->p_lock; 4861d7b9ed2SJulian Elischer return; 4871d7b9ed2SJulian Elischer } 4881d7b9ed2SJulian Elischer 4891d7b9ed2SJulian Elischer p->p_sflag |= PS_SWAPPINGIN; 4909ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 49145ece682SJohn Baldwin PROC_UNLOCK(p); 49226f9a767SRodney W. Grimes 493a136efe9SPeter Wemm vm_proc_swapin(p); 49471fad9fdSJulian Elischer FOREACH_THREAD_IN_PROC (p, td) { 495b40ce416SJulian Elischer pmap_swapin_thread(td); 49671fad9fdSJulian Elischer TD_CLR_SWAPPED(td); 49771fad9fdSJulian Elischer } 49826f9a767SRodney W. Grimes 49945ece682SJohn Baldwin PROC_LOCK(p); 5009ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 5019eb881f8SSeigo Tanimura p->p_sflag &= ~PS_SWAPPINGIN; 5029eb881f8SSeigo Tanimura p->p_sflag |= PS_INMEM; 503b40ce416SJulian Elischer FOREACH_THREAD_IN_PROC (p, td) 50471fad9fdSJulian Elischer if (TD_CAN_RUN(td)) 50571fad9fdSJulian Elischer setrunnable(td); 50626f9a767SRodney W. Grimes 5071d7b9ed2SJulian Elischer wakeup(&p->p_sflag); 50826f9a767SRodney W. Grimes 50926f9a767SRodney W. Grimes /* undo the effect of setting SLOCK above */ 51026f9a767SRodney W. Grimes --p->p_lock; 51126f9a767SRodney W. Grimes } 512a136efe9SPeter Wemm #endif 51326f9a767SRodney W. Grimes } 51426f9a767SRodney W. Grimes 515df8bae1dSRodney W. Grimes /* 51626f9a767SRodney W. Grimes * This swapin algorithm attempts to swap-in processes only if there 51726f9a767SRodney W. Grimes * is enough space for them. Of course, if a process waits for a long 51826f9a767SRodney W. Grimes * time, it will be swapped in anyway. 5190384fff8SJason Evans * 520e602ba25SJulian Elischer * XXXKSE - process with the thread with highest priority counts.. 521b40ce416SJulian Elischer * 5220384fff8SJason Evans * Giant is still held at this point, to be released in tsleep. 523df8bae1dSRodney W. Grimes */ 5242b14f991SJulian Elischer /* ARGSUSED*/ 5252b14f991SJulian Elischer static void 526d841aaa7SBruce Evans scheduler(dummy) 527d841aaa7SBruce Evans void *dummy; 528df8bae1dSRodney W. Grimes { 52954d92145SMatthew Dillon struct proc *p; 530e602ba25SJulian Elischer struct thread *td; 53154d92145SMatthew Dillon int pri; 532df8bae1dSRodney W. Grimes struct proc *pp; 533df8bae1dSRodney W. Grimes int ppri; 534df8bae1dSRodney W. Grimes 535c96d52a9SJohn Baldwin mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); 5360cddd8f0SMatthew Dillon /* GIANT_REQUIRED */ 5370384fff8SJason Evans 538df8bae1dSRodney W. Grimes loop: 53990ecac61SMatthew Dillon if (vm_page_count_min()) { 5400d94caffSDavid Greenman VM_WAIT; 54190ecac61SMatthew Dillon goto loop; 5420d94caffSDavid Greenman } 54326f9a767SRodney W. Grimes 544df8bae1dSRodney W. Grimes pp = NULL; 545df8bae1dSRodney W. Grimes ppri = INT_MIN; 5461005a129SJohn Baldwin sx_slock(&allproc_lock); 547b40ce416SJulian Elischer FOREACH_PROC_IN_SYSTEM(p) { 548b40ce416SJulian Elischer struct ksegrp *kg; 5491d7b9ed2SJulian Elischer if (p->p_sflag & (PS_INMEM | PS_SWAPPING | PS_SWAPPINGIN)) { 550e602ba25SJulian Elischer continue; 551e602ba25SJulian Elischer } 5529ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 553e602ba25SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) { 5541d7b9ed2SJulian Elischer /* 55571fad9fdSJulian Elischer * An otherwise runnable thread of a process 55671fad9fdSJulian Elischer * swapped out has only the TDI_SWAPPED bit set. 55771fad9fdSJulian Elischer * 5581d7b9ed2SJulian Elischer */ 55971fad9fdSJulian Elischer if (td->td_inhibitors == TDI_SWAPPED) { 560e602ba25SJulian Elischer kg = td->td_ksegrp; 561b40ce416SJulian Elischer pri = p->p_swtime + kg->kg_slptime; 5625074aecdSJohn Baldwin if ((p->p_sflag & PS_SWAPINREQ) == 0) { 563b40ce416SJulian Elischer pri -= kg->kg_nice * 8; 564a669a6e9SJohn Dyson } 56595461b45SJohn Dyson 56626f9a767SRodney W. Grimes /* 567b40ce416SJulian Elischer * if this ksegrp is higher priority 568b40ce416SJulian Elischer * and there is enough space, then select 569b40ce416SJulian Elischer * this process instead of the previous 570b40ce416SJulian Elischer * selection. 57126f9a767SRodney W. Grimes */ 5720d94caffSDavid Greenman if (pri > ppri) { 573df8bae1dSRodney W. Grimes pp = p; 574df8bae1dSRodney W. Grimes ppri = pri; 575df8bae1dSRodney W. Grimes } 576df8bae1dSRodney W. Grimes } 577b40ce416SJulian Elischer } 5789ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 579df8bae1dSRodney W. Grimes } 5801005a129SJohn Baldwin sx_sunlock(&allproc_lock); 58126f9a767SRodney W. Grimes 582df8bae1dSRodney W. Grimes /* 583a669a6e9SJohn Dyson * Nothing to do, back to sleep. 584df8bae1dSRodney W. Grimes */ 585df8bae1dSRodney W. Grimes if ((p = pp) == NULL) { 586ea754954SJohn Baldwin tsleep(&proc0, PVM, "sched", maxslp * hz / 2); 587df8bae1dSRodney W. Grimes goto loop; 588df8bae1dSRodney W. Grimes } 5891d7b9ed2SJulian Elischer PROC_LOCK(p); 5909ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 5911d7b9ed2SJulian Elischer 5921d7b9ed2SJulian Elischer /* 5931d7b9ed2SJulian Elischer * Another process may be bringing or may have already 5941d7b9ed2SJulian Elischer * brought this process in while we traverse all threads. 5951d7b9ed2SJulian Elischer * Or, this process may even be being swapped out again. 5961d7b9ed2SJulian Elischer */ 5971d7b9ed2SJulian Elischer if (p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) { 5989ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 5991d7b9ed2SJulian Elischer PROC_UNLOCK(p); 6001d7b9ed2SJulian Elischer goto loop; 6011d7b9ed2SJulian Elischer } 6021d7b9ed2SJulian Elischer 6031d7b9ed2SJulian Elischer p->p_sflag &= ~PS_SWAPINREQ; 604a669a6e9SJohn Dyson 605df8bae1dSRodney W. Grimes /* 60626f9a767SRodney W. Grimes * We would like to bring someone in. (only if there is space). 607e602ba25SJulian Elischer * [What checks the space? ] 608df8bae1dSRodney W. Grimes */ 60926f9a767SRodney W. Grimes faultin(p); 61045ece682SJohn Baldwin PROC_UNLOCK(p); 611df8bae1dSRodney W. Grimes p->p_swtime = 0; 6129ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 613df8bae1dSRodney W. Grimes goto loop; 614df8bae1dSRodney W. Grimes } 615df8bae1dSRodney W. Grimes 6165afce282SDavid Greenman #ifndef NO_SWAPPING 6175afce282SDavid Greenman 618ceb0cf87SJohn Dyson /* 619ceb0cf87SJohn Dyson * Swap_idle_threshold1 is the guaranteed swapped in time for a process 620ceb0cf87SJohn Dyson */ 621303b270bSEivind Eklund static int swap_idle_threshold1 = 2; 622ceb0cf87SJohn Dyson SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, 623ceb0cf87SJohn Dyson CTLFLAG_RW, &swap_idle_threshold1, 0, ""); 624ceb0cf87SJohn Dyson 625ceb0cf87SJohn Dyson /* 626ceb0cf87SJohn Dyson * Swap_idle_threshold2 is the time that a process can be idle before 627ceb0cf87SJohn Dyson * it will be swapped out, if idle swapping is enabled. 628ceb0cf87SJohn Dyson */ 629303b270bSEivind Eklund static int swap_idle_threshold2 = 10; 630ceb0cf87SJohn Dyson SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, 631ceb0cf87SJohn Dyson CTLFLAG_RW, &swap_idle_threshold2, 0, ""); 632ceb0cf87SJohn Dyson 633df8bae1dSRodney W. Grimes /* 634df8bae1dSRodney W. Grimes * Swapout is driven by the pageout daemon. Very simple, we find eligible 635df8bae1dSRodney W. Grimes * procs and unwire their u-areas. We try to always "swap" at least one 636df8bae1dSRodney W. Grimes * process in case we need the room for a swapin. 637df8bae1dSRodney W. Grimes * If any procs have been sleeping/stopped for at least maxslp seconds, 638df8bae1dSRodney W. Grimes * they are swapped. Else, we swap the longest-sleeping or stopped process, 639df8bae1dSRodney W. Grimes * if any, otherwise the longest-resident process. 640df8bae1dSRodney W. Grimes */ 641df8bae1dSRodney W. Grimes void 6423a2dc656SJohn Dyson swapout_procs(action) 6433a2dc656SJohn Dyson int action; 644df8bae1dSRodney W. Grimes { 64554d92145SMatthew Dillon struct proc *p; 646e602ba25SJulian Elischer struct thread *td; 647b40ce416SJulian Elischer struct ksegrp *kg; 648df8bae1dSRodney W. Grimes struct proc *outp, *outp2; 649df8bae1dSRodney W. Grimes int outpri, outpri2; 650df8bae1dSRodney W. Grimes int didswap = 0; 651df8bae1dSRodney W. Grimes 6520cddd8f0SMatthew Dillon GIANT_REQUIRED; 6530cddd8f0SMatthew Dillon 654df8bae1dSRodney W. Grimes outp = outp2 = NULL; 65526f9a767SRodney W. Grimes outpri = outpri2 = INT_MIN; 6560d94caffSDavid Greenman retry: 6573a2189d4SJohn Baldwin sx_slock(&allproc_lock); 658e602ba25SJulian Elischer FOREACH_PROC_IN_SYSTEM(p) { 659b18bfc3dSJohn Dyson struct vmspace *vm; 660b40ce416SJulian Elischer int minslptime = 100000; 661b18bfc3dSJohn Dyson 6629eb881f8SSeigo Tanimura /* 6639eb881f8SSeigo Tanimura * Do not swapout a process that 6649eb881f8SSeigo Tanimura * is waiting for VM data 6659eb881f8SSeigo Tanimura * structures there is a possible 6669eb881f8SSeigo Tanimura * deadlock. Test this first as 6679eb881f8SSeigo Tanimura * this may block. 6689eb881f8SSeigo Tanimura * 6699eb881f8SSeigo Tanimura * Lock the map until swapout 6709eb881f8SSeigo Tanimura * finishes, or a thread of this 6719eb881f8SSeigo Tanimura * process may attempt to alter 6729eb881f8SSeigo Tanimura * the map. 673b1f99ebeSSeigo Tanimura * 674b1f99ebeSSeigo Tanimura * Watch out for a process in 675b1f99ebeSSeigo Tanimura * creation. It may have no 676b1f99ebeSSeigo Tanimura * address space yet. 677b1f99ebeSSeigo Tanimura * 678b1f99ebeSSeigo Tanimura * An aio daemon switches its 679b1f99ebeSSeigo Tanimura * address space while running. 680b1f99ebeSSeigo Tanimura * Perform a quick check whether 681b1f99ebeSSeigo Tanimura * a process has P_SYSTEM. 6829eb881f8SSeigo Tanimura */ 683b1f99ebeSSeigo Tanimura PROC_LOCK(p); 684b1f99ebeSSeigo Tanimura if ((p->p_flag & P_SYSTEM) != 0) { 685b1f99ebeSSeigo Tanimura PROC_UNLOCK(p); 686b1f99ebeSSeigo Tanimura continue; 687b1f99ebeSSeigo Tanimura } 688b1f99ebeSSeigo Tanimura mtx_lock_spin(&sched_lock); 689b1f99ebeSSeigo Tanimura if (p->p_state == PRS_NEW) { 690b1f99ebeSSeigo Tanimura mtx_unlock_spin(&sched_lock); 691b1f99ebeSSeigo Tanimura PROC_UNLOCK(p); 692b1f99ebeSSeigo Tanimura continue; 693b1f99ebeSSeigo Tanimura } 6949eb881f8SSeigo Tanimura vm = p->p_vmspace; 695b1f99ebeSSeigo Tanimura KASSERT(vm != NULL, 696b1f99ebeSSeigo Tanimura ("swapout_procs: a process has no address space")); 6979eb881f8SSeigo Tanimura ++vm->vm_refcnt; 698b1f99ebeSSeigo Tanimura mtx_unlock_spin(&sched_lock); 699b1f99ebeSSeigo Tanimura PROC_UNLOCK(p); 7009eb881f8SSeigo Tanimura if (!vm_map_trylock(&vm->vm_map)) 7019eb881f8SSeigo Tanimura goto nextproc1; 7029eb881f8SSeigo Tanimura 7035074aecdSJohn Baldwin PROC_LOCK(p); 70469b40456SJohn Baldwin if (p->p_lock != 0 || 7051279572aSDavid Xu (p->p_flag & (P_STOPPED_SINGLE|P_TRACED|P_SYSTEM|P_WEXIT) 7061279572aSDavid Xu ) != 0) { 7079eb881f8SSeigo Tanimura goto nextproc2; 7085074aecdSJohn Baldwin } 70923955314SAlfred Perlstein /* 71023955314SAlfred Perlstein * only aiod changes vmspace, however it will be 71123955314SAlfred Perlstein * skipped because of the if statement above checking 71223955314SAlfred Perlstein * for P_SYSTEM 71323955314SAlfred Perlstein */ 7149ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 7159eb881f8SSeigo Tanimura if ((p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) != PS_INMEM) 7169eb881f8SSeigo Tanimura goto nextproc; 71769b40456SJohn Baldwin 718e602ba25SJulian Elischer switch (p->p_state) { 7190d94caffSDavid Greenman default: 720e602ba25SJulian Elischer /* Don't swap out processes in any sort 721e602ba25SJulian Elischer * of 'special' state. */ 7229eb881f8SSeigo Tanimura goto nextproc; 723df8bae1dSRodney W. Grimes 724e602ba25SJulian Elischer case PRS_NORMAL: 72526f9a767SRodney W. Grimes /* 726bfbfac11SDavid Greenman * do not swapout a realtime process 727b40ce416SJulian Elischer * Check all the thread groups.. 728bfbfac11SDavid Greenman */ 729b40ce416SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 7309eb881f8SSeigo Tanimura if (PRI_IS_REALTIME(kg->kg_pri_class)) 731b40ce416SJulian Elischer goto nextproc; 732bfbfac11SDavid Greenman 733bfbfac11SDavid Greenman /* 7349eb881f8SSeigo Tanimura * Guarantee swap_idle_threshold1 735ceb0cf87SJohn Dyson * time in memory. 7360d94caffSDavid Greenman */ 7379eb881f8SSeigo Tanimura if (kg->kg_slptime < swap_idle_threshold1) 738b40ce416SJulian Elischer goto nextproc; 7399eb881f8SSeigo Tanimura 7401d7b9ed2SJulian Elischer /* 7419eb881f8SSeigo Tanimura * Do not swapout a process if it is 7429eb881f8SSeigo Tanimura * waiting on a critical event of some 7439eb881f8SSeigo Tanimura * kind or there is a thread whose 7449eb881f8SSeigo Tanimura * pageable memory may be accessed. 7451d7b9ed2SJulian Elischer * 7461d7b9ed2SJulian Elischer * This could be refined to support 7471d7b9ed2SJulian Elischer * swapping out a thread. 7481d7b9ed2SJulian Elischer */ 7499eb881f8SSeigo Tanimura FOREACH_THREAD_IN_GROUP(kg, td) { 7501d7b9ed2SJulian Elischer if ((td->td_priority) < PSOCK || 7519eb881f8SSeigo Tanimura !thread_safetoswapout(td)) 752e602ba25SJulian Elischer goto nextproc; 753e602ba25SJulian Elischer } 754ceb0cf87SJohn Dyson /* 755b40ce416SJulian Elischer * If the system is under memory stress, 756b40ce416SJulian Elischer * or if we are swapping 757b40ce416SJulian Elischer * idle processes >= swap_idle_threshold2, 758b40ce416SJulian Elischer * then swap the process out. 759ceb0cf87SJohn Dyson */ 760ceb0cf87SJohn Dyson if (((action & VM_SWAP_NORMAL) == 0) && 761ceb0cf87SJohn Dyson (((action & VM_SWAP_IDLE) == 0) || 7629eb881f8SSeigo Tanimura (kg->kg_slptime < swap_idle_threshold2))) 763b40ce416SJulian Elischer goto nextproc; 7649eb881f8SSeigo Tanimura 765b40ce416SJulian Elischer if (minslptime > kg->kg_slptime) 766b40ce416SJulian Elischer minslptime = kg->kg_slptime; 767b40ce416SJulian Elischer } 7680d94caffSDavid Greenman 76911b224dcSDavid Greenman /* 7700d94caffSDavid Greenman * If the process has been asleep for awhile and had 7710d94caffSDavid Greenman * most of its pages taken away already, swap it out. 77211b224dcSDavid Greenman */ 773ceb0cf87SJohn Dyson if ((action & VM_SWAP_NORMAL) || 774ceb0cf87SJohn Dyson ((action & VM_SWAP_IDLE) && 775b40ce416SJulian Elischer (minslptime > swap_idle_threshold2))) { 776df8bae1dSRodney W. Grimes swapout(p); 777df8bae1dSRodney W. Grimes didswap++; 7789eb881f8SSeigo Tanimura 7799eb881f8SSeigo Tanimura /* 7809eb881f8SSeigo Tanimura * swapout() unlocks a proc lock. This is 7819eb881f8SSeigo Tanimura * ugly, but avoids superfluous lock. 7829eb881f8SSeigo Tanimura */ 7839eb881f8SSeigo Tanimura mtx_unlock_spin(&sched_lock); 7849eb881f8SSeigo Tanimura vm_map_unlock(&vm->vm_map); 7859eb881f8SSeigo Tanimura vmspace_free(vm); 7869eb881f8SSeigo Tanimura sx_sunlock(&allproc_lock); 7870d94caffSDavid Greenman goto retry; 788c96d52a9SJohn Baldwin } 78926f9a767SRodney W. Grimes } 790b40ce416SJulian Elischer nextproc: 7919eb881f8SSeigo Tanimura mtx_unlock_spin(&sched_lock); 7929eb881f8SSeigo Tanimura nextproc2: 7939eb881f8SSeigo Tanimura PROC_UNLOCK(p); 7949eb881f8SSeigo Tanimura vm_map_unlock(&vm->vm_map); 7959eb881f8SSeigo Tanimura nextproc1: 7969eb881f8SSeigo Tanimura vmspace_free(vm); 79730171114SPeter Wemm continue; 798ceb0cf87SJohn Dyson } 7991005a129SJohn Baldwin sx_sunlock(&allproc_lock); 80026f9a767SRodney W. Grimes /* 80126f9a767SRodney W. Grimes * If we swapped something out, and another process needed memory, 80226f9a767SRodney W. Grimes * then wakeup the sched process. 80326f9a767SRodney W. Grimes */ 8040d94caffSDavid Greenman if (didswap) 80524a1cce3SDavid Greenman wakeup(&proc0); 806df8bae1dSRodney W. Grimes } 807df8bae1dSRodney W. Grimes 808f708ef1bSPoul-Henning Kamp static void 809df8bae1dSRodney W. Grimes swapout(p) 81054d92145SMatthew Dillon struct proc *p; 811df8bae1dSRodney W. Grimes { 812b40ce416SJulian Elischer struct thread *td; 813df8bae1dSRodney W. Grimes 814ea754954SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 8159eb881f8SSeigo Tanimura mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); 816d3a34985SJohn Dyson #if defined(SWAP_DEBUG) 817d3a34985SJohn Dyson printf("swapping out %d\n", p->p_pid); 818d3a34985SJohn Dyson #endif 8191d7b9ed2SJulian Elischer 8201d7b9ed2SJulian Elischer /* 8219eb881f8SSeigo Tanimura * The states of this process and its threads may have changed 8229eb881f8SSeigo Tanimura * by now. Assuming that there is only one pageout daemon thread, 8239eb881f8SSeigo Tanimura * this process should still be in memory. 8249eb881f8SSeigo Tanimura */ 8259eb881f8SSeigo Tanimura KASSERT((p->p_sflag & (PS_INMEM|PS_SWAPPING|PS_SWAPPINGIN)) == PS_INMEM, 8269eb881f8SSeigo Tanimura ("swapout: lost a swapout race?")); 8279eb881f8SSeigo Tanimura 8289eb881f8SSeigo Tanimura #if defined(INVARIANTS) 8299eb881f8SSeigo Tanimura /* 8301d7b9ed2SJulian Elischer * Make sure that all threads are safe to be swapped out. 8311d7b9ed2SJulian Elischer * 8321d7b9ed2SJulian Elischer * Alternatively, we could swap out only safe threads. 8331d7b9ed2SJulian Elischer */ 8341d7b9ed2SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) { 8359eb881f8SSeigo Tanimura KASSERT(thread_safetoswapout(td), 8369eb881f8SSeigo Tanimura ("swapout: there is a thread not safe for swapout")); 8371d7b9ed2SJulian Elischer } 8389eb881f8SSeigo Tanimura #endif /* INVARIANTS */ 8391d7b9ed2SJulian Elischer 84026f9a767SRodney W. Grimes ++p->p_stats->p_ru.ru_nswap; 841df8bae1dSRodney W. Grimes /* 84226f9a767SRodney W. Grimes * remember the process resident count 843df8bae1dSRodney W. Grimes */ 844b1028ad1SLuoqi Chen p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace); 845df8bae1dSRodney W. Grimes 846c86b6ff5SJohn Baldwin PROC_UNLOCK(p); 84771fad9fdSJulian Elischer FOREACH_THREAD_IN_PROC (p, td) /* shouldn't be possible, but..... */ 84871fad9fdSJulian Elischer if (TD_ON_RUNQ(td)) { /* XXXKSE */ 84971fad9fdSJulian Elischer panic("swapping out runnable process"); 850b40ce416SJulian Elischer remrunqueue(td); /* XXXKSE */ 8511d7b9ed2SJulian Elischer } 8529eb881f8SSeigo Tanimura p->p_sflag &= ~PS_INMEM; 8539eb881f8SSeigo Tanimura p->p_sflag |= PS_SWAPPING; 8549ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 85526f9a767SRodney W. Grimes 856a136efe9SPeter Wemm vm_proc_swapout(p); 85771fad9fdSJulian Elischer FOREACH_THREAD_IN_PROC(p, td) { 858b40ce416SJulian Elischer pmap_swapout_thread(td); 85971fad9fdSJulian Elischer TD_SET_SWAPPED(td); 86071fad9fdSJulian Elischer } 8619ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 8625074aecdSJohn Baldwin p->p_sflag &= ~PS_SWAPPING; 863df8bae1dSRodney W. Grimes p->p_swtime = 0; 864df8bae1dSRodney W. Grimes } 8655afce282SDavid Greenman #endif /* !NO_SWAPPING */ 866