1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 6df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 9df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 10df8bae1dSRodney W. Grimes * are met: 11df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 13df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 15df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 16df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 175929bcfaSPhilippe Charnier * must display the following acknowledgement: 18df8bae1dSRodney W. Grimes * This product includes software developed by the University of 19df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 20df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 363c4dd356SDavid Greenman * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * 39df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40df8bae1dSRodney W. Grimes * All rights reserved. 41df8bae1dSRodney W. Grimes * 42df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 43df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 44df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 45df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 46df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 47df8bae1dSRodney W. Grimes * 48df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 53df8bae1dSRodney W. Grimes * 54df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55df8bae1dSRodney W. Grimes * School of Computer Science 56df8bae1dSRodney W. Grimes * Carnegie Mellon University 57df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 60df8bae1dSRodney W. Grimes * rights to redistribute these changes. 613c4dd356SDavid Greenman * 62c3aac50fSPeter Wemm * $FreeBSD$ 63df8bae1dSRodney W. Grimes */ 64df8bae1dSRodney W. Grimes 65faa5f8d8SAndrzej Bialecki #include "opt_vm.h" 66e9822d92SJoerg Wunsch 67df8bae1dSRodney W. Grimes #include <sys/param.h> 68df8bae1dSRodney W. Grimes #include <sys/systm.h> 69fb919e4dSMark Murray #include <sys/lock.h> 70fb919e4dSMark Murray #include <sys/mutex.h> 71df8bae1dSRodney W. Grimes #include <sys/proc.h> 72df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 733aa12267SBruce Evans #include <sys/shm.h> 74efeaf95aSDavid Greenman #include <sys/vmmeter.h> 751005a129SJohn Baldwin #include <sys/sx.h> 76ceb0cf87SJohn Dyson #include <sys/sysctl.h> 77df8bae1dSRodney W. Grimes 7826f9a767SRodney W. Grimes #include <sys/kernel.h> 790384fff8SJason Evans #include <sys/ktr.h> 80a2a1c95cSPeter Wemm #include <sys/unistd.h> 8126f9a767SRodney W. Grimes 82b1037dcdSBruce Evans #include <machine/limits.h> 83b1037dcdSBruce Evans 84df8bae1dSRodney W. Grimes #include <vm/vm.h> 85efeaf95aSDavid Greenman #include <vm/vm_param.h> 86efeaf95aSDavid Greenman #include <vm/pmap.h> 87efeaf95aSDavid Greenman #include <vm/vm_map.h> 88df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 8926f9a767SRodney W. Grimes #include <vm/vm_pageout.h> 90df8bae1dSRodney W. Grimes #include <vm/vm_kern.h> 91efeaf95aSDavid Greenman #include <vm/vm_extern.h> 92efeaf95aSDavid Greenman 93efeaf95aSDavid Greenman #include <sys/user.h> 94df8bae1dSRodney W. Grimes 95ea754954SJohn Baldwin extern int maxslp; 96ea754954SJohn Baldwin 972b14f991SJulian Elischer /* 982b14f991SJulian Elischer * System initialization 992b14f991SJulian Elischer * 1002b14f991SJulian Elischer * Note: proc0 from proc.h 1012b14f991SJulian Elischer */ 1022b14f991SJulian Elischer 1034590fd3aSDavid Greenman static void vm_init_limits __P((void *)); 1044590fd3aSDavid Greenman SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0) 1052b14f991SJulian Elischer 1062b14f991SJulian Elischer /* 1072b14f991SJulian Elischer * THIS MUST BE THE LAST INITIALIZATION ITEM!!! 1082b14f991SJulian Elischer * 1092b14f991SJulian Elischer * Note: run scheduling should be divorced from the vm system. 1102b14f991SJulian Elischer */ 1114590fd3aSDavid Greenman static void scheduler __P((void *)); 1122b14f991SJulian Elischer SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL) 1132b14f991SJulian Elischer 114e50f5c2eSBruce Evans #ifndef NO_SWAPPING 115f708ef1bSPoul-Henning Kamp static void swapout __P((struct proc *)); 116e50f5c2eSBruce Evans #endif 117f708ef1bSPoul-Henning Kamp 118df8bae1dSRodney W. Grimes int 119df8bae1dSRodney W. Grimes kernacc(addr, len, rw) 120df8bae1dSRodney W. Grimes caddr_t addr; 121df8bae1dSRodney W. Grimes int len, rw; 122df8bae1dSRodney W. Grimes { 123df8bae1dSRodney W. Grimes boolean_t rv; 124df8bae1dSRodney W. Grimes vm_offset_t saddr, eaddr; 12502c58685SPoul-Henning Kamp vm_prot_t prot; 126df8bae1dSRodney W. Grimes 127e50f5c2eSBruce Evans KASSERT((rw & ~VM_PROT_ALL) == 0, 12802c58685SPoul-Henning Kamp ("illegal ``rw'' argument to kernacc (%x)\n", rw)); 12902c58685SPoul-Henning Kamp prot = rw; 1306cde7a16SDavid Greenman saddr = trunc_page((vm_offset_t)addr); 1316cde7a16SDavid Greenman eaddr = round_page((vm_offset_t)addr + len); 132bc0d3334SJohn Dyson vm_map_lock_read(kernel_map); 133df8bae1dSRodney W. Grimes rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); 134bc0d3334SJohn Dyson vm_map_unlock_read(kernel_map); 135df8bae1dSRodney W. Grimes return (rv == TRUE); 136df8bae1dSRodney W. Grimes } 137df8bae1dSRodney W. Grimes 138df8bae1dSRodney W. Grimes int 139df8bae1dSRodney W. Grimes useracc(addr, len, rw) 140df8bae1dSRodney W. Grimes caddr_t addr; 141df8bae1dSRodney W. Grimes int len, rw; 142df8bae1dSRodney W. Grimes { 143df8bae1dSRodney W. Grimes boolean_t rv; 14402c58685SPoul-Henning Kamp vm_prot_t prot; 145bc0d3334SJohn Dyson vm_map_t map; 146bc0d3334SJohn Dyson vm_map_entry_t save_hint; 147df8bae1dSRodney W. Grimes 1480cddd8f0SMatthew Dillon GIANT_REQUIRED; 1490cddd8f0SMatthew Dillon 150e50f5c2eSBruce Evans KASSERT((rw & ~VM_PROT_ALL) == 0, 15102c58685SPoul-Henning Kamp ("illegal ``rw'' argument to useracc (%x)\n", rw)); 15202c58685SPoul-Henning Kamp prot = rw; 15326f9a767SRodney W. Grimes /* 154bbc0ec52SDavid Greenman * XXX - check separately to disallow access to user area and user 155bbc0ec52SDavid Greenman * page tables - they are in the map. 15626f9a767SRodney W. Grimes * 1570d94caffSDavid Greenman * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was once 1580d94caffSDavid Greenman * only used (as an end address) in trap.c. Use it as an end address 1590d94caffSDavid Greenman * here too. This bogusness has spread. I just fixed where it was 1600d94caffSDavid Greenman * used as a max in vm_mmap.c. 16126f9a767SRodney W. Grimes */ 162bbc0ec52SDavid Greenman if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS 163bbc0ec52SDavid Greenman || (vm_offset_t) addr + len < (vm_offset_t) addr) { 16426f9a767SRodney W. Grimes return (FALSE); 16526f9a767SRodney W. Grimes } 166bc0d3334SJohn Dyson map = &curproc->p_vmspace->vm_map; 167bc0d3334SJohn Dyson vm_map_lock_read(map); 168bc0d3334SJohn Dyson /* 169bc0d3334SJohn Dyson * We save the map hint, and restore it. Useracc appears to distort 170bc0d3334SJohn Dyson * the map hint unnecessarily. 171bc0d3334SJohn Dyson */ 172bc0d3334SJohn Dyson save_hint = map->hint; 173bc0d3334SJohn Dyson rv = vm_map_check_protection(map, 1746cde7a16SDavid Greenman trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot); 175bc0d3334SJohn Dyson map->hint = save_hint; 176bc0d3334SJohn Dyson vm_map_unlock_read(map); 177bc0d3334SJohn Dyson 178df8bae1dSRodney W. Grimes return (rv == TRUE); 179df8bae1dSRodney W. Grimes } 180df8bae1dSRodney W. Grimes 181df8bae1dSRodney W. Grimes void 182df8bae1dSRodney W. Grimes vslock(addr, len) 183df8bae1dSRodney W. Grimes caddr_t addr; 184df8bae1dSRodney W. Grimes u_int len; 185df8bae1dSRodney W. Grimes { 1860cddd8f0SMatthew Dillon GIANT_REQUIRED; 18723955314SAlfred Perlstein vm_map_pageable(&curproc->p_vmspace->vm_map, 18823955314SAlfred Perlstein trunc_page((vm_offset_t)addr), 1896cde7a16SDavid Greenman round_page((vm_offset_t)addr + len), FALSE); 190df8bae1dSRodney W. Grimes } 191df8bae1dSRodney W. Grimes 192df8bae1dSRodney W. Grimes void 1937de47255SPoul-Henning Kamp vsunlock(addr, len) 194df8bae1dSRodney W. Grimes caddr_t addr; 195df8bae1dSRodney W. Grimes u_int len; 196df8bae1dSRodney W. Grimes { 1970cddd8f0SMatthew Dillon GIANT_REQUIRED; 19823955314SAlfred Perlstein vm_map_pageable(&curproc->p_vmspace->vm_map, 19923955314SAlfred Perlstein trunc_page((vm_offset_t)addr), 2006cde7a16SDavid Greenman round_page((vm_offset_t)addr + len), TRUE); 201df8bae1dSRodney W. Grimes } 202df8bae1dSRodney W. Grimes 203df8bae1dSRodney W. Grimes /* 204df8bae1dSRodney W. Grimes * Implement fork's actions on an address space. 205df8bae1dSRodney W. Grimes * Here we arrange for the address space to be copied or referenced, 206df8bae1dSRodney W. Grimes * allocate a user struct (pcb and kernel stack), then call the 207df8bae1dSRodney W. Grimes * machine-dependent layer to fill those in and make the new process 208a2a1c95cSPeter Wemm * ready to run. The new process is set up so that it returns directly 209a2a1c95cSPeter Wemm * to user mode to avoid stack copying and relocation problems. 210df8bae1dSRodney W. Grimes */ 211a2a1c95cSPeter Wemm void 212b40ce416SJulian Elischer vm_forkproc(td, p2, flags) 213b40ce416SJulian Elischer struct thread *td; 214b40ce416SJulian Elischer struct proc *p2; 215a2a1c95cSPeter Wemm int flags; 216df8bae1dSRodney W. Grimes { 217b40ce416SJulian Elischer struct proc *p1 = td->td_proc; 21854d92145SMatthew Dillon struct user *up; 219df8bae1dSRodney W. Grimes 2200cddd8f0SMatthew Dillon GIANT_REQUIRED; 2210cddd8f0SMatthew Dillon 22291c28bfdSLuoqi Chen if ((flags & RFPROC) == 0) { 22391c28bfdSLuoqi Chen /* 22491c28bfdSLuoqi Chen * Divorce the memory, if it is shared, essentially 22591c28bfdSLuoqi Chen * this changes shared memory amongst threads, into 22691c28bfdSLuoqi Chen * COW locally. 22791c28bfdSLuoqi Chen */ 22891c28bfdSLuoqi Chen if ((flags & RFMEM) == 0) { 22991c28bfdSLuoqi Chen if (p1->p_vmspace->vm_refcnt > 1) { 23091c28bfdSLuoqi Chen vmspace_unshare(p1); 23191c28bfdSLuoqi Chen } 23291c28bfdSLuoqi Chen } 233b40ce416SJulian Elischer cpu_fork(td, p2, flags); 23491c28bfdSLuoqi Chen return; 23591c28bfdSLuoqi Chen } 23691c28bfdSLuoqi Chen 2375856e12eSJohn Dyson if (flags & RFMEM) { 2385856e12eSJohn Dyson p2->p_vmspace = p1->p_vmspace; 2395856e12eSJohn Dyson p1->p_vmspace->vm_refcnt++; 2405856e12eSJohn Dyson } 2415856e12eSJohn Dyson 24290ecac61SMatthew Dillon while (vm_page_count_severe()) { 24326f9a767SRodney W. Grimes VM_WAIT; 2440d94caffSDavid Greenman } 24526f9a767SRodney W. Grimes 2465856e12eSJohn Dyson if ((flags & RFMEM) == 0) { 247df8bae1dSRodney W. Grimes p2->p_vmspace = vmspace_fork(p1->p_vmspace); 248df8bae1dSRodney W. Grimes 249d4da2dbaSAlan Cox pmap_pinit2(vmspace_pmap(p2->p_vmspace)); 250d4da2dbaSAlan Cox 251df8bae1dSRodney W. Grimes if (p1->p_vmspace->vm_shm) 252dabee6feSPeter Wemm shmfork(p1, p2); 253a2a1c95cSPeter Wemm } 254df8bae1dSRodney W. Grimes 255675878e7SJohn Dyson pmap_new_proc(p2); 256b40ce416SJulian Elischer pmap_new_thread(&p2->p_thread); /* Initial thread */ 25726f9a767SRodney W. Grimes 258b40ce416SJulian Elischer /* XXXKSE this is unsatisfactory but should be adequate */ 259b40ce416SJulian Elischer up = p2->p_uarea; 260df8bae1dSRodney W. Grimes 26139fb8e6bSJulian Elischer /* 26239fb8e6bSJulian Elischer * p_stats currently points at fields in the user struct 26339fb8e6bSJulian Elischer * but not at &u, instead at p_addr. Copy parts of 26439fb8e6bSJulian Elischer * p_stats; zero the rest of p_stats (statistics). 265dc9c271aSJulian Elischer * 266dc9c271aSJulian Elischer * If procsig->ps_refcnt is 1 and p2->p_sigacts is NULL we dont' need 267dc9c271aSJulian Elischer * to share sigacts, so we use the up->u_sigacts. 26839fb8e6bSJulian Elischer */ 26939fb8e6bSJulian Elischer p2->p_stats = &up->u_stats; 270dc9c271aSJulian Elischer if (p2->p_sigacts == NULL) { 271dc9c271aSJulian Elischer if (p2->p_procsig->ps_refcnt != 1) 272dc9c271aSJulian Elischer printf ("PID:%d NULL sigacts with refcnt not 1!\n",p2->p_pid); 273dc9c271aSJulian Elischer p2->p_sigacts = &up->u_sigacts; 274dc9c271aSJulian Elischer up->u_sigacts = *p1->p_sigacts; 275dc9c271aSJulian Elischer } 27688c5ea45SJulian Elischer 277df8bae1dSRodney W. Grimes bzero(&up->u_stats.pstat_startzero, 278df8bae1dSRodney W. Grimes (unsigned) ((caddr_t) &up->u_stats.pstat_endzero - 279df8bae1dSRodney W. Grimes (caddr_t) &up->u_stats.pstat_startzero)); 280df8bae1dSRodney W. Grimes bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, 281df8bae1dSRodney W. Grimes ((caddr_t) &up->u_stats.pstat_endcopy - 282df8bae1dSRodney W. Grimes (caddr_t) &up->u_stats.pstat_startcopy)); 283df8bae1dSRodney W. Grimes 284df8bae1dSRodney W. Grimes 285df8bae1dSRodney W. Grimes /* 286a2a1c95cSPeter Wemm * cpu_fork will copy and update the pcb, set up the kernel stack, 287a2a1c95cSPeter Wemm * and make the child ready to run. 288df8bae1dSRodney W. Grimes */ 289b40ce416SJulian Elischer cpu_fork(td, p2, flags); 290df8bae1dSRodney W. Grimes } 291df8bae1dSRodney W. Grimes 292df8bae1dSRodney W. Grimes /* 293eb30c1c0SPeter Wemm * Called after process has been wait(2)'ed apon and is being reaped. 294eb30c1c0SPeter Wemm * The idea is to reclaim resources that we could not reclaim while 295eb30c1c0SPeter Wemm * the process was still executing. 296eb30c1c0SPeter Wemm */ 297eb30c1c0SPeter Wemm void 298eb30c1c0SPeter Wemm vm_waitproc(p) 299eb30c1c0SPeter Wemm struct proc *p; 300eb30c1c0SPeter Wemm { 301b40ce416SJulian Elischer struct thread *td; 302eb30c1c0SPeter Wemm 303eb30c1c0SPeter Wemm GIANT_REQUIRED; 304eb30c1c0SPeter Wemm cpu_wait(p); 305eb30c1c0SPeter Wemm pmap_dispose_proc(p); /* drop per-process resources */ 306b40ce416SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) 307b40ce416SJulian Elischer pmap_dispose_thread(td); 308eb30c1c0SPeter Wemm vmspace_free(p->p_vmspace); /* and clean-out the vmspace */ 309eb30c1c0SPeter Wemm } 310eb30c1c0SPeter Wemm 311eb30c1c0SPeter Wemm /* 312df8bae1dSRodney W. Grimes * Set default limits for VM system. 313df8bae1dSRodney W. Grimes * Called for proc 0, and then inherited by all others. 3142b14f991SJulian Elischer * 3152b14f991SJulian Elischer * XXX should probably act directly on proc0. 316df8bae1dSRodney W. Grimes */ 3172b14f991SJulian Elischer static void 3182b14f991SJulian Elischer vm_init_limits(udata) 3194590fd3aSDavid Greenman void *udata; 320df8bae1dSRodney W. Grimes { 32154d92145SMatthew Dillon struct proc *p = udata; 322bbc0ec52SDavid Greenman int rss_limit; 323df8bae1dSRodney W. Grimes 324df8bae1dSRodney W. Grimes /* 3250d94caffSDavid Greenman * Set up the initial limits on process VM. Set the maximum resident 3260d94caffSDavid Greenman * set size to be half of (reasonably) available memory. Since this 3270d94caffSDavid Greenman * is a soft limit, it comes into effect only when the system is out 3280d94caffSDavid Greenman * of memory - half of main memory helps to favor smaller processes, 329bbc0ec52SDavid Greenman * and reduces thrashing of the object cache. 330df8bae1dSRodney W. Grimes */ 331cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; 332cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz; 333cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; 334cbc89bfbSPaul Saab p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; 335dd0bd066SDavid Greenman /* limit the limit to no less than 2MB */ 336f2daac0cSDavid Greenman rss_limit = max(cnt.v_free_count, 512); 337bbc0ec52SDavid Greenman p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); 33826f9a767SRodney W. Grimes p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; 339df8bae1dSRodney W. Grimes } 340df8bae1dSRodney W. Grimes 34126f9a767SRodney W. Grimes void 34226f9a767SRodney W. Grimes faultin(p) 34326f9a767SRodney W. Grimes struct proc *p; 34426f9a767SRodney W. Grimes { 345b40ce416SJulian Elischer struct thread *td; 3460cddd8f0SMatthew Dillon GIANT_REQUIRED; 34726f9a767SRodney W. Grimes 348c96d52a9SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 3499ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 3505074aecdSJohn Baldwin if ((p->p_sflag & PS_INMEM) == 0) { 35126f9a767SRodney W. Grimes ++p->p_lock; 3529ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 35345ece682SJohn Baldwin PROC_UNLOCK(p); 35426f9a767SRodney W. Grimes 355675878e7SJohn Dyson pmap_swapin_proc(p); 356b40ce416SJulian Elischer FOREACH_THREAD_IN_PROC (p, td) 357b40ce416SJulian Elischer pmap_swapin_thread(td); 35826f9a767SRodney W. Grimes 35945ece682SJohn Baldwin PROC_LOCK(p); 3609ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 361b40ce416SJulian Elischer FOREACH_THREAD_IN_PROC (p, td) 362b40ce416SJulian Elischer if (td->td_proc->p_stat == SRUN) /* XXXKSE */ 363b40ce416SJulian Elischer setrunqueue(td); 36426f9a767SRodney W. Grimes 3655074aecdSJohn Baldwin p->p_sflag |= PS_INMEM; 36626f9a767SRodney W. Grimes 36726f9a767SRodney W. Grimes /* undo the effect of setting SLOCK above */ 36826f9a767SRodney W. Grimes --p->p_lock; 36926f9a767SRodney W. Grimes } 3709ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 37126f9a767SRodney W. Grimes } 37226f9a767SRodney W. Grimes 373df8bae1dSRodney W. Grimes /* 37426f9a767SRodney W. Grimes * This swapin algorithm attempts to swap-in processes only if there 37526f9a767SRodney W. Grimes * is enough space for them. Of course, if a process waits for a long 37626f9a767SRodney W. Grimes * time, it will be swapped in anyway. 3770384fff8SJason Evans * 378b40ce416SJulian Elischer * XXXKSE - KSEGRP with highest priority counts.. 379b40ce416SJulian Elischer * 3800384fff8SJason Evans * Giant is still held at this point, to be released in tsleep. 381df8bae1dSRodney W. Grimes */ 3822b14f991SJulian Elischer /* ARGSUSED*/ 3832b14f991SJulian Elischer static void 384d841aaa7SBruce Evans scheduler(dummy) 385d841aaa7SBruce Evans void *dummy; 386df8bae1dSRodney W. Grimes { 38754d92145SMatthew Dillon struct proc *p; 38854d92145SMatthew Dillon int pri; 389df8bae1dSRodney W. Grimes struct proc *pp; 390df8bae1dSRodney W. Grimes int ppri; 391df8bae1dSRodney W. Grimes 392c96d52a9SJohn Baldwin mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); 3930cddd8f0SMatthew Dillon /* GIANT_REQUIRED */ 3940384fff8SJason Evans 395df8bae1dSRodney W. Grimes loop: 39690ecac61SMatthew Dillon if (vm_page_count_min()) { 3970d94caffSDavid Greenman VM_WAIT; 39890ecac61SMatthew Dillon goto loop; 3990d94caffSDavid Greenman } 40026f9a767SRodney W. Grimes 401df8bae1dSRodney W. Grimes pp = NULL; 402df8bae1dSRodney W. Grimes ppri = INT_MIN; 4031005a129SJohn Baldwin sx_slock(&allproc_lock); 404b40ce416SJulian Elischer FOREACH_PROC_IN_SYSTEM(p) { 405b40ce416SJulian Elischer struct ksegrp *kg; 4069ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 407b40ce416SJulian Elischer if (p->p_stat == SRUN 408b40ce416SJulian Elischer && (p->p_sflag & (PS_INMEM | PS_SWAPPING)) == 0) { 409b40ce416SJulian Elischer /* Find the minimum sleeptime for the process */ 410b40ce416SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 411b40ce416SJulian Elischer pri = p->p_swtime + kg->kg_slptime; 4125074aecdSJohn Baldwin if ((p->p_sflag & PS_SWAPINREQ) == 0) { 413b40ce416SJulian Elischer pri -= kg->kg_nice * 8; 414a669a6e9SJohn Dyson } 41595461b45SJohn Dyson 416b40ce416SJulian Elischer 41726f9a767SRodney W. Grimes /* 418b40ce416SJulian Elischer * if this ksegrp is higher priority 419b40ce416SJulian Elischer * and there is enough space, then select 420b40ce416SJulian Elischer * this process instead of the previous 421b40ce416SJulian Elischer * selection. 42226f9a767SRodney W. Grimes */ 4230d94caffSDavid Greenman if (pri > ppri) { 424df8bae1dSRodney W. Grimes pp = p; 425df8bae1dSRodney W. Grimes ppri = pri; 426df8bae1dSRodney W. Grimes } 427df8bae1dSRodney W. Grimes } 428b40ce416SJulian Elischer } 4299ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 430df8bae1dSRodney W. Grimes } 4311005a129SJohn Baldwin sx_sunlock(&allproc_lock); 43226f9a767SRodney W. Grimes 433df8bae1dSRodney W. Grimes /* 434a669a6e9SJohn Dyson * Nothing to do, back to sleep. 435df8bae1dSRodney W. Grimes */ 436df8bae1dSRodney W. Grimes if ((p = pp) == NULL) { 437ea754954SJohn Baldwin tsleep(&proc0, PVM, "sched", maxslp * hz / 2); 438df8bae1dSRodney W. Grimes goto loop; 439df8bae1dSRodney W. Grimes } 4409ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 4415074aecdSJohn Baldwin p->p_sflag &= ~PS_SWAPINREQ; 4429ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 443a669a6e9SJohn Dyson 444df8bae1dSRodney W. Grimes /* 44526f9a767SRodney W. Grimes * We would like to bring someone in. (only if there is space). 446df8bae1dSRodney W. Grimes */ 44745ece682SJohn Baldwin PROC_LOCK(p); 44826f9a767SRodney W. Grimes faultin(p); 44945ece682SJohn Baldwin PROC_UNLOCK(p); 4509ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 451df8bae1dSRodney W. Grimes p->p_swtime = 0; 4529ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 453df8bae1dSRodney W. Grimes goto loop; 454df8bae1dSRodney W. Grimes } 455df8bae1dSRodney W. Grimes 4565afce282SDavid Greenman #ifndef NO_SWAPPING 4575afce282SDavid Greenman 458ceb0cf87SJohn Dyson /* 459ceb0cf87SJohn Dyson * Swap_idle_threshold1 is the guaranteed swapped in time for a process 460ceb0cf87SJohn Dyson */ 461303b270bSEivind Eklund static int swap_idle_threshold1 = 2; 462ceb0cf87SJohn Dyson SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, 463ceb0cf87SJohn Dyson CTLFLAG_RW, &swap_idle_threshold1, 0, ""); 464ceb0cf87SJohn Dyson 465ceb0cf87SJohn Dyson /* 466ceb0cf87SJohn Dyson * Swap_idle_threshold2 is the time that a process can be idle before 467ceb0cf87SJohn Dyson * it will be swapped out, if idle swapping is enabled. 468ceb0cf87SJohn Dyson */ 469303b270bSEivind Eklund static int swap_idle_threshold2 = 10; 470ceb0cf87SJohn Dyson SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, 471ceb0cf87SJohn Dyson CTLFLAG_RW, &swap_idle_threshold2, 0, ""); 472ceb0cf87SJohn Dyson 473df8bae1dSRodney W. Grimes /* 474df8bae1dSRodney W. Grimes * Swapout is driven by the pageout daemon. Very simple, we find eligible 475df8bae1dSRodney W. Grimes * procs and unwire their u-areas. We try to always "swap" at least one 476df8bae1dSRodney W. Grimes * process in case we need the room for a swapin. 477df8bae1dSRodney W. Grimes * If any procs have been sleeping/stopped for at least maxslp seconds, 478df8bae1dSRodney W. Grimes * they are swapped. Else, we swap the longest-sleeping or stopped process, 479df8bae1dSRodney W. Grimes * if any, otherwise the longest-resident process. 480df8bae1dSRodney W. Grimes */ 481df8bae1dSRodney W. Grimes void 4823a2dc656SJohn Dyson swapout_procs(action) 4833a2dc656SJohn Dyson int action; 484df8bae1dSRodney W. Grimes { 48554d92145SMatthew Dillon struct proc *p; 486b40ce416SJulian Elischer struct ksegrp *kg; 487df8bae1dSRodney W. Grimes struct proc *outp, *outp2; 488df8bae1dSRodney W. Grimes int outpri, outpri2; 489df8bae1dSRodney W. Grimes int didswap = 0; 490df8bae1dSRodney W. Grimes 4910cddd8f0SMatthew Dillon GIANT_REQUIRED; 4920cddd8f0SMatthew Dillon 493df8bae1dSRodney W. Grimes outp = outp2 = NULL; 49426f9a767SRodney W. Grimes outpri = outpri2 = INT_MIN; 4950d94caffSDavid Greenman retry: 4963a2189d4SJohn Baldwin sx_slock(&allproc_lock); 4975074aecdSJohn Baldwin LIST_FOREACH(p, &allproc, p_list) { 498b18bfc3dSJohn Dyson struct vmspace *vm; 499b40ce416SJulian Elischer int minslptime = 100000; 500b18bfc3dSJohn Dyson 5015074aecdSJohn Baldwin PROC_LOCK(p); 50269b40456SJohn Baldwin if (p->p_lock != 0 || 50369b40456SJohn Baldwin (p->p_flag & (P_TRACED|P_SYSTEM|P_WEXIT)) != 0) { 5045074aecdSJohn Baldwin PROC_UNLOCK(p); 5055074aecdSJohn Baldwin continue; 5065074aecdSJohn Baldwin } 50723955314SAlfred Perlstein /* 50823955314SAlfred Perlstein * only aiod changes vmspace, however it will be 50923955314SAlfred Perlstein * skipped because of the if statement above checking 51023955314SAlfred Perlstein * for P_SYSTEM 51123955314SAlfred Perlstein */ 512b18bfc3dSJohn Dyson vm = p->p_vmspace; 5139ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 51469b40456SJohn Baldwin if ((p->p_sflag & (PS_INMEM|PS_SWAPPING)) != PS_INMEM) { 5159ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 516ea754954SJohn Baldwin PROC_UNLOCK(p); 51769b40456SJohn Baldwin continue; 51869b40456SJohn Baldwin } 51969b40456SJohn Baldwin 520df8bae1dSRodney W. Grimes switch (p->p_stat) { 5210d94caffSDavid Greenman default: 5229ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 523ea754954SJohn Baldwin PROC_UNLOCK(p); 524df8bae1dSRodney W. Grimes continue; 525df8bae1dSRodney W. Grimes 526df8bae1dSRodney W. Grimes case SSLEEP: 527df8bae1dSRodney W. Grimes case SSTOP: 52826f9a767SRodney W. Grimes /* 529bfbfac11SDavid Greenman * do not swapout a realtime process 530b40ce416SJulian Elischer * Check all the thread groups.. 531bfbfac11SDavid Greenman */ 532b40ce416SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 533b40ce416SJulian Elischer if (PRI_IS_REALTIME(kg->kg_pri.pri_class)) { 5349ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 535ea754954SJohn Baldwin PROC_UNLOCK(p); 536b40ce416SJulian Elischer goto nextproc; 537c8a6b001SJohn Baldwin } 538bfbfac11SDavid Greenman 539bfbfac11SDavid Greenman /* 540b40ce416SJulian Elischer * Do not swapout a process waiting 541b40ce416SJulian Elischer * on a critical event of some kind. 542b40ce416SJulian Elischer * Also guarantee swap_idle_threshold1 543ceb0cf87SJohn Dyson * time in memory. 5440d94caffSDavid Greenman */ 545b40ce416SJulian Elischer if (((kg->kg_pri.pri_level) < PSOCK) || 546b40ce416SJulian Elischer (kg->kg_slptime < swap_idle_threshold1)) { 5479ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 548ea754954SJohn Baldwin PROC_UNLOCK(p); 549b40ce416SJulian Elischer goto nextproc; 550c8a6b001SJohn Baldwin } 551ceb0cf87SJohn Dyson 552ceb0cf87SJohn Dyson /* 553b40ce416SJulian Elischer * If the system is under memory stress, 554b40ce416SJulian Elischer * or if we are swapping 555b40ce416SJulian Elischer * idle processes >= swap_idle_threshold2, 556b40ce416SJulian Elischer * then swap the process out. 557ceb0cf87SJohn Dyson */ 558ceb0cf87SJohn Dyson if (((action & VM_SWAP_NORMAL) == 0) && 559ceb0cf87SJohn Dyson (((action & VM_SWAP_IDLE) == 0) || 560b40ce416SJulian Elischer (kg->kg_slptime < swap_idle_threshold2))) { 5619ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 562ea754954SJohn Baldwin PROC_UNLOCK(p); 563b40ce416SJulian Elischer goto nextproc; 5645074aecdSJohn Baldwin } 565b40ce416SJulian Elischer if (minslptime > kg->kg_slptime) 566b40ce416SJulian Elischer minslptime = kg->kg_slptime; 567b40ce416SJulian Elischer } 5680d94caffSDavid Greenman 569b40ce416SJulian Elischer mtx_unlock_spin(&sched_lock); 570b18bfc3dSJohn Dyson ++vm->vm_refcnt; 571d3a34985SJohn Dyson /* 572b40ce416SJulian Elischer * do not swapout a process that 573b40ce416SJulian Elischer * is waiting for VM 574b40ce416SJulian Elischer * data structures there is a 575b40ce416SJulian Elischer * possible deadlock. 576d3a34985SJohn Dyson */ 577996c772fSJohn Dyson if (lockmgr(&vm->vm_map.lock, 578996c772fSJohn Dyson LK_EXCLUSIVE | LK_NOWAIT, 579b40ce416SJulian Elischer NULL, curthread)) { 580b18bfc3dSJohn Dyson vmspace_free(vm); 581ea754954SJohn Baldwin PROC_UNLOCK(p); 582b40ce416SJulian Elischer goto nextproc; 583d3a34985SJohn Dyson } 584b18bfc3dSJohn Dyson vm_map_unlock(&vm->vm_map); 58511b224dcSDavid Greenman /* 5860d94caffSDavid Greenman * If the process has been asleep for awhile and had 5870d94caffSDavid Greenman * most of its pages taken away already, swap it out. 58811b224dcSDavid Greenman */ 589ceb0cf87SJohn Dyson if ((action & VM_SWAP_NORMAL) || 590ceb0cf87SJohn Dyson ((action & VM_SWAP_IDLE) && 591b40ce416SJulian Elischer (minslptime > swap_idle_threshold2))) { 5923a2189d4SJohn Baldwin sx_sunlock(&allproc_lock); 593df8bae1dSRodney W. Grimes swapout(p); 594b18bfc3dSJohn Dyson vmspace_free(vm); 595df8bae1dSRodney W. Grimes didswap++; 5960d94caffSDavid Greenman goto retry; 597c96d52a9SJohn Baldwin } 598ea754954SJohn Baldwin PROC_UNLOCK(p); 5993a2189d4SJohn Baldwin vmspace_free(vm); 60026f9a767SRodney W. Grimes } 601b40ce416SJulian Elischer nextproc: 602ceb0cf87SJohn Dyson } 6031005a129SJohn Baldwin sx_sunlock(&allproc_lock); 60426f9a767SRodney W. Grimes /* 60526f9a767SRodney W. Grimes * If we swapped something out, and another process needed memory, 60626f9a767SRodney W. Grimes * then wakeup the sched process. 60726f9a767SRodney W. Grimes */ 6080d94caffSDavid Greenman if (didswap) 60924a1cce3SDavid Greenman wakeup(&proc0); 610df8bae1dSRodney W. Grimes } 611df8bae1dSRodney W. Grimes 612f708ef1bSPoul-Henning Kamp static void 613df8bae1dSRodney W. Grimes swapout(p) 61454d92145SMatthew Dillon struct proc *p; 615df8bae1dSRodney W. Grimes { 616b40ce416SJulian Elischer struct thread *td; 617df8bae1dSRodney W. Grimes 618ea754954SJohn Baldwin PROC_LOCK_ASSERT(p, MA_OWNED); 619d3a34985SJohn Dyson #if defined(SWAP_DEBUG) 620d3a34985SJohn Dyson printf("swapping out %d\n", p->p_pid); 621d3a34985SJohn Dyson #endif 62226f9a767SRodney W. Grimes ++p->p_stats->p_ru.ru_nswap; 623df8bae1dSRodney W. Grimes /* 62426f9a767SRodney W. Grimes * remember the process resident count 625df8bae1dSRodney W. Grimes */ 626b1028ad1SLuoqi Chen p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace); 627df8bae1dSRodney W. Grimes 6289ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 6295074aecdSJohn Baldwin p->p_sflag &= ~PS_INMEM; 6305074aecdSJohn Baldwin p->p_sflag |= PS_SWAPPING; 631c86b6ff5SJohn Baldwin PROC_UNLOCK(p); 632b40ce416SJulian Elischer FOREACH_THREAD_IN_PROC (p, td) 633b40ce416SJulian Elischer if (td->td_proc->p_stat == SRUN) /* XXXKSE */ 634b40ce416SJulian Elischer remrunqueue(td); /* XXXKSE */ 6359ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 63626f9a767SRodney W. Grimes 637675878e7SJohn Dyson pmap_swapout_proc(p); 638b40ce416SJulian Elischer FOREACH_THREAD_IN_PROC(p, td) 639b40ce416SJulian Elischer pmap_swapout_thread(td); 6406d40c3d3SDavid Greenman 6419ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 6425074aecdSJohn Baldwin p->p_sflag &= ~PS_SWAPPING; 643df8bae1dSRodney W. Grimes p->p_swtime = 0; 6449ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 645df8bae1dSRodney W. Grimes } 6465afce282SDavid Greenman #endif /* !NO_SWAPPING */ 647