1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 19df8bae1dSRodney W. Grimes * must display the following acknowledgement: 20df8bae1dSRodney W. Grimes * This product includes software developed by the University of 21df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 22df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 23df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 24df8bae1dSRodney W. Grimes * without specific prior written permission. 25df8bae1dSRodney W. Grimes * 26df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36df8bae1dSRodney W. Grimes * SUCH DAMAGE. 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 39c3aac50fSPeter Wemm * $FreeBSD$ 40df8bae1dSRodney W. Grimes */ 41df8bae1dSRodney W. Grimes 42db6a20e2SGarrett Wollman #include "opt_ktrace.h" 43db6a20e2SGarrett Wollman 44df8bae1dSRodney W. Grimes #include <sys/param.h> 45df8bae1dSRodney W. Grimes #include <sys/systm.h> 46d2d3e875SBruce Evans #include <sys/sysproto.h> 47df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 48df8bae1dSRodney W. Grimes #include <sys/kernel.h> 49c76e95c3SPeter Wemm #include <sys/sysctl.h> 50df8bae1dSRodney W. Grimes #include <sys/malloc.h> 5135e0e5b3SJohn Baldwin #include <sys/mutex.h> 52df8bae1dSRodney W. Grimes #include <sys/proc.h> 53df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 54df8bae1dSRodney W. Grimes #include <sys/vnode.h> 55df8bae1dSRodney W. Grimes #include <sys/acct.h> 560384fff8SJason Evans #include <sys/ktr.h> 57df8bae1dSRodney W. Grimes #include <sys/ktrace.h> 58b71fec07SBruce Evans #include <sys/unistd.h> 5975c13541SPoul-Henning Kamp #include <sys/jail.h> 60df8bae1dSRodney W. Grimes 61d93f860cSPoul-Henning Kamp #include <vm/vm.h> 62996c772fSJohn Dyson #include <sys/lock.h> 63dabee6feSPeter Wemm #include <vm/pmap.h> 64dabee6feSPeter Wemm #include <vm/vm_map.h> 65efeaf95aSDavid Greenman #include <vm/vm_extern.h> 662d8acc0fSJohn Dyson #include <vm/vm_zone.h> 67d93f860cSPoul-Henning Kamp 68dc9c271aSJulian Elischer #include <sys/user.h> 6988c5ea45SJulian Elischer 7093efcae8SPoul-Henning Kamp static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback"); 7193efcae8SPoul-Henning Kamp 72be67169aSBruce Evans static int fast_vfork = 1; 7347fdd692SNeil Blakey-Milner SYSCTL_INT(_kern, OID_AUTO, fast_vfork, CTLFLAG_RW, &fast_vfork, 0, 7447fdd692SNeil Blakey-Milner "flag to indicate whether we have a fast vfork()"); 75c76e95c3SPeter Wemm 76fed06968SJulian Elischer /* 77e0d898b4SJulian Elischer * These are the stuctures used to create a callout list for things to do 78e0d898b4SJulian Elischer * when forking a process 79fed06968SJulian Elischer */ 8093efcae8SPoul-Henning Kamp struct forklist { 81fed06968SJulian Elischer forklist_fn function; 82e3975643SJake Burkholder TAILQ_ENTRY(forklist) next; 8393efcae8SPoul-Henning Kamp }; 84fed06968SJulian Elischer 85e3975643SJake Burkholder TAILQ_HEAD(forklist_head, forklist); 8693efcae8SPoul-Henning Kamp static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list); 87fed06968SJulian Elischer 88d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 89ad7507e2SSteven Wallace struct fork_args { 90ad7507e2SSteven Wallace int dummy; 91ad7507e2SSteven Wallace }; 92d2d3e875SBruce Evans #endif 93ad7507e2SSteven Wallace 94df8bae1dSRodney W. Grimes /* ARGSUSED */ 9526f9a767SRodney W. Grimes int 96cb226aaaSPoul-Henning Kamp fork(p, uap) 97df8bae1dSRodney W. Grimes struct proc *p; 98df8bae1dSRodney W. Grimes struct fork_args *uap; 99df8bae1dSRodney W. Grimes { 100df8abd0bSPeter Wemm int error; 101df8abd0bSPeter Wemm struct proc *p2; 102be67169aSBruce Evans 103df8abd0bSPeter Wemm error = fork1(p, RFFDG | RFPROC, &p2); 104df8abd0bSPeter Wemm if (error == 0) { 105df8abd0bSPeter Wemm p->p_retval[0] = p2->p_pid; 106df8abd0bSPeter Wemm p->p_retval[1] = 0; 107df8abd0bSPeter Wemm } 108df8abd0bSPeter Wemm return error; 109df8bae1dSRodney W. Grimes } 110df8bae1dSRodney W. Grimes 111df8bae1dSRodney W. Grimes /* ARGSUSED */ 11226f9a767SRodney W. Grimes int 113cb226aaaSPoul-Henning Kamp vfork(p, uap) 114df8bae1dSRodney W. Grimes struct proc *p; 115dabee6feSPeter Wemm struct vfork_args *uap; 116df8bae1dSRodney W. Grimes { 117df8abd0bSPeter Wemm int error; 118df8abd0bSPeter Wemm struct proc *p2; 119be67169aSBruce Evans 120df8abd0bSPeter Wemm error = fork1(p, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2); 121df8abd0bSPeter Wemm if (error == 0) { 122df8abd0bSPeter Wemm p->p_retval[0] = p2->p_pid; 123df8abd0bSPeter Wemm p->p_retval[1] = 0; 124df8abd0bSPeter Wemm } 125df8abd0bSPeter Wemm return error; 126df8bae1dSRodney W. Grimes } 127df8bae1dSRodney W. Grimes 128dabee6feSPeter Wemm int 129cb226aaaSPoul-Henning Kamp rfork(p, uap) 130dabee6feSPeter Wemm struct proc *p; 131dabee6feSPeter Wemm struct rfork_args *uap; 132dabee6feSPeter Wemm { 133df8abd0bSPeter Wemm int error; 134df8abd0bSPeter Wemm struct proc *p2; 135be67169aSBruce Evans 1360384fff8SJason Evans /* mask kernel only flags out of the user flags */ 1370384fff8SJason Evans error = fork1(p, uap->flags & ~RFKERNELONLY, &p2); 138df8abd0bSPeter Wemm if (error == 0) { 1391943af61SPeter Wemm p->p_retval[0] = p2 ? p2->p_pid : 0; 140df8abd0bSPeter Wemm p->p_retval[1] = 0; 141df8abd0bSPeter Wemm } 142df8abd0bSPeter Wemm return error; 143dabee6feSPeter Wemm } 144dabee6feSPeter Wemm 145dabee6feSPeter Wemm 146df8bae1dSRodney W. Grimes int nprocs = 1; /* process 0 */ 14751068190SWolfram Schneider static int nextpid = 0; 148df8bae1dSRodney W. Grimes 149bb6a234eSPeter Wemm /* 150bb6a234eSPeter Wemm * Random component to nextpid generation. We mix in a random factor to make 151bb6a234eSPeter Wemm * it a little harder to predict. We sanity check the modulus value to avoid 152bb6a234eSPeter Wemm * doing it in critical paths. Don't let it be too small or we pointlessly 153bb6a234eSPeter Wemm * waste randomness entropy, and don't let it be impossibly large. Using a 154bb6a234eSPeter Wemm * modulus that is too big causes a LOT more process table scans and slows 155bb6a234eSPeter Wemm * down fork processing as the pidchecked caching is defeated. 156bb6a234eSPeter Wemm */ 157ee3fd601SDan Moschuk static int randompid = 0; 158bb6a234eSPeter Wemm 159bb6a234eSPeter Wemm static int 16082d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 161bb6a234eSPeter Wemm { 162bb6a234eSPeter Wemm int error, pid; 163bb6a234eSPeter Wemm 164bb6a234eSPeter Wemm pid = randompid; 165bb6a234eSPeter Wemm error = sysctl_handle_int(oidp, &pid, 0, req); 166bb6a234eSPeter Wemm if (error || !req->newptr) 167bb6a234eSPeter Wemm return (error); 168bb6a234eSPeter Wemm if (pid < 0 || pid > PID_MAX - 100) /* out of range */ 169bb6a234eSPeter Wemm pid = PID_MAX - 100; 170bb6a234eSPeter Wemm else if (pid < 2) /* NOP */ 171bb6a234eSPeter Wemm pid = 0; 172bb6a234eSPeter Wemm else if (pid < 100) /* Make it reasonable */ 173bb6a234eSPeter Wemm pid = 100; 174bb6a234eSPeter Wemm randompid = pid; 175bb6a234eSPeter Wemm return (error); 176bb6a234eSPeter Wemm } 177bb6a234eSPeter Wemm 178bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 179bb6a234eSPeter Wemm 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 180ee3fd601SDan Moschuk 18174b2192aSJohn Dyson int 182df8abd0bSPeter Wemm fork1(p1, flags, procp) 1830384fff8SJason Evans struct proc *p1; /* parent proc */ 1840e3eb7eeSSujal Patel int flags; 1850384fff8SJason Evans struct proc **procp; /* child proc */ 186df8bae1dSRodney W. Grimes { 187df8abd0bSPeter Wemm struct proc *p2, *pptr; 188df8abd0bSPeter Wemm uid_t uid; 189df8bae1dSRodney W. Grimes struct proc *newproc; 1900384fff8SJason Evans int trypid; 191c6362551SAlfred Perlstein int ok; 19251068190SWolfram Schneider static int pidchecked = 0; 19393efcae8SPoul-Henning Kamp struct forklist *ep; 1945856e12eSJohn Dyson 1950384fff8SJason Evans /* Can't copy and clear */ 1960e3eb7eeSSujal Patel if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 197dabee6feSPeter Wemm return (EINVAL); 198df8bae1dSRodney W. Grimes 199df8bae1dSRodney W. Grimes /* 2005856e12eSJohn Dyson * Here we don't create a new process, but we divorce 2015856e12eSJohn Dyson * certain parts of a process from itself. 2025856e12eSJohn Dyson */ 2035856e12eSJohn Dyson if ((flags & RFPROC) == 0) { 2045856e12eSJohn Dyson 20591c28bfdSLuoqi Chen vm_fork(p1, 0, flags); 2065856e12eSJohn Dyson 2075856e12eSJohn Dyson /* 2085856e12eSJohn Dyson * Close all file descriptors. 2095856e12eSJohn Dyson */ 2105856e12eSJohn Dyson if (flags & RFCFDG) { 2115856e12eSJohn Dyson struct filedesc *fdtmp; 2125856e12eSJohn Dyson fdtmp = fdinit(p1); 2135856e12eSJohn Dyson fdfree(p1); 2145856e12eSJohn Dyson p1->p_fd = fdtmp; 2155856e12eSJohn Dyson } 2165856e12eSJohn Dyson 2175856e12eSJohn Dyson /* 2185856e12eSJohn Dyson * Unshare file descriptors (from parent.) 2195856e12eSJohn Dyson */ 2205856e12eSJohn Dyson if (flags & RFFDG) { 2215856e12eSJohn Dyson if (p1->p_fd->fd_refcnt > 1) { 2225856e12eSJohn Dyson struct filedesc *newfd; 2235856e12eSJohn Dyson newfd = fdcopy(p1); 2245856e12eSJohn Dyson fdfree(p1); 2255856e12eSJohn Dyson p1->p_fd = newfd; 2265856e12eSJohn Dyson } 2275856e12eSJohn Dyson } 2281943af61SPeter Wemm *procp = NULL; 2295856e12eSJohn Dyson return (0); 2305856e12eSJohn Dyson } 2315856e12eSJohn Dyson 2325856e12eSJohn Dyson /* 233df8bae1dSRodney W. Grimes * Although process entries are dynamically created, we still keep 234df8bae1dSRodney W. Grimes * a global limit on the maximum number we will create. Don't allow 235df8bae1dSRodney W. Grimes * a nonprivileged user to use the last process; don't let root 236df8bae1dSRodney W. Grimes * exceed the limit. The variable nprocs is the current number of 237df8bae1dSRodney W. Grimes * processes, maxproc is the limit. 238df8bae1dSRodney W. Grimes */ 239df8bae1dSRodney W. Grimes uid = p1->p_cred->p_ruid; 240df8bae1dSRodney W. Grimes if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { 241df8bae1dSRodney W. Grimes tablefull("proc"); 242df8bae1dSRodney W. Grimes return (EAGAIN); 243df8bae1dSRodney W. Grimes } 244df8bae1dSRodney W. Grimes /* 245ef5dc8a9SJohn Dyson * Increment the nprocs resource before blocking can occur. There 246ef5dc8a9SJohn Dyson * are hard-limits as to the number of processes that can run. 247ef5dc8a9SJohn Dyson */ 248ef5dc8a9SJohn Dyson nprocs++; 249ef5dc8a9SJohn Dyson 250ef5dc8a9SJohn Dyson /* 251df8bae1dSRodney W. Grimes * Increment the count of procs running with this uid. Don't allow 252df8bae1dSRodney W. Grimes * a nonprivileged user to exceed their current limit. 253df8bae1dSRodney W. Grimes */ 254f535380cSDon Lewis ok = chgproccnt(p1->p_cred->p_uidinfo, 1, 25542fd51ceSDon Lewis (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0); 25642fd51ceSDon Lewis if (!ok) { 257ef5dc8a9SJohn Dyson /* 258ef5dc8a9SJohn Dyson * Back out the process count 259ef5dc8a9SJohn Dyson */ 260ef5dc8a9SJohn Dyson nprocs--; 261df8bae1dSRodney W. Grimes return (EAGAIN); 262df8bae1dSRodney W. Grimes } 263df8bae1dSRodney W. Grimes 264df8bae1dSRodney W. Grimes /* Allocate new proc. */ 2652d8acc0fSJohn Dyson newproc = zalloc(proc_zone); 266df8bae1dSRodney W. Grimes 267df8bae1dSRodney W. Grimes /* 2682c1011f7SJohn Dyson * Setup linkage for kernel based threading 2692c1011f7SJohn Dyson */ 2702c1011f7SJohn Dyson if((flags & RFTHREAD) != 0) { 2712c1011f7SJohn Dyson newproc->p_peers = p1->p_peers; 2722c1011f7SJohn Dyson p1->p_peers = newproc; 2732c1011f7SJohn Dyson newproc->p_leader = p1->p_leader; 2742c1011f7SJohn Dyson } else { 2752c1011f7SJohn Dyson newproc->p_peers = 0; 2762c1011f7SJohn Dyson newproc->p_leader = newproc; 2772c1011f7SJohn Dyson } 2782c1011f7SJohn Dyson 279d4da2dbaSAlan Cox newproc->p_vmspace = NULL; 280d4da2dbaSAlan Cox 2812c1011f7SJohn Dyson /* 282df8bae1dSRodney W. Grimes * Find an unused process ID. We remember a range of unused IDs 283df8bae1dSRodney W. Grimes * ready to use (from nextpid+1 through pidchecked-1). 2840384fff8SJason Evans * 2850384fff8SJason Evans * If RFHIGHPID is set (used during system boot), do not allocate 2860384fff8SJason Evans * low-numbered pids. 287df8bae1dSRodney W. Grimes */ 288c0c25570SJake Burkholder ALLPROC_LOCK(AP_EXCLUSIVE); 2890384fff8SJason Evans trypid = nextpid + 1; 2900384fff8SJason Evans if (flags & RFHIGHPID) { 2910384fff8SJason Evans if (trypid < 10) { 2920384fff8SJason Evans trypid = 10; 2930384fff8SJason Evans } 2940384fff8SJason Evans } else { 295bb6a234eSPeter Wemm if (randompid) 2960384fff8SJason Evans trypid += arc4random() % randompid; 2970384fff8SJason Evans } 298df8bae1dSRodney W. Grimes retry: 299df8bae1dSRodney W. Grimes /* 300df8bae1dSRodney W. Grimes * If the process ID prototype has wrapped around, 301df8bae1dSRodney W. Grimes * restart somewhat above 0, as the low-numbered procs 302df8bae1dSRodney W. Grimes * tend to include daemons that don't exit. 303df8bae1dSRodney W. Grimes */ 3040384fff8SJason Evans if (trypid >= PID_MAX) { 3050384fff8SJason Evans trypid = trypid % PID_MAX; 3060384fff8SJason Evans if (trypid < 100) 3070384fff8SJason Evans trypid += 100; 308df8bae1dSRodney W. Grimes pidchecked = 0; 309df8bae1dSRodney W. Grimes } 3100384fff8SJason Evans if (trypid >= pidchecked) { 311df8bae1dSRodney W. Grimes int doingzomb = 0; 312df8bae1dSRodney W. Grimes 313df8bae1dSRodney W. Grimes pidchecked = PID_MAX; 314df8bae1dSRodney W. Grimes /* 315df8bae1dSRodney W. Grimes * Scan the active and zombie procs to check whether this pid 316df8bae1dSRodney W. Grimes * is in use. Remember the lowest pid that's greater 3170384fff8SJason Evans * than trypid, so we can avoid checking for a while. 318df8bae1dSRodney W. Grimes */ 3192e3c8fcbSPoul-Henning Kamp p2 = LIST_FIRST(&allproc); 320df8bae1dSRodney W. Grimes again: 3212e3c8fcbSPoul-Henning Kamp for (; p2 != 0; p2 = LIST_NEXT(p2, p_list)) { 3220384fff8SJason Evans while (p2->p_pid == trypid || 3230384fff8SJason Evans p2->p_pgrp->pg_id == trypid || 3240384fff8SJason Evans p2->p_session->s_sid == trypid) { 3250384fff8SJason Evans trypid++; 3260384fff8SJason Evans if (trypid >= pidchecked) 327df8bae1dSRodney W. Grimes goto retry; 328df8bae1dSRodney W. Grimes } 3290384fff8SJason Evans if (p2->p_pid > trypid && pidchecked > p2->p_pid) 330df8bae1dSRodney W. Grimes pidchecked = p2->p_pid; 3310384fff8SJason Evans if (p2->p_pgrp->pg_id > trypid && 332df8bae1dSRodney W. Grimes pidchecked > p2->p_pgrp->pg_id) 333df8bae1dSRodney W. Grimes pidchecked = p2->p_pgrp->pg_id; 3340384fff8SJason Evans if (p2->p_session->s_sid > trypid && 335643a8daaSDon Lewis pidchecked > p2->p_session->s_sid) 336643a8daaSDon Lewis pidchecked = p2->p_session->s_sid; 337df8bae1dSRodney W. Grimes } 338df8bae1dSRodney W. Grimes if (!doingzomb) { 339df8bae1dSRodney W. Grimes doingzomb = 1; 3402e3c8fcbSPoul-Henning Kamp p2 = LIST_FIRST(&zombproc); 341df8bae1dSRodney W. Grimes goto again; 342df8bae1dSRodney W. Grimes } 343df8bae1dSRodney W. Grimes } 344df8bae1dSRodney W. Grimes 345df8bae1dSRodney W. Grimes /* 3460384fff8SJason Evans * RFHIGHPID does not mess with the nextpid counter during boot. 3470384fff8SJason Evans */ 3480384fff8SJason Evans if (flags & RFHIGHPID) 3490384fff8SJason Evans pidchecked = 0; 3500384fff8SJason Evans else 3510384fff8SJason Evans nextpid = trypid; 3520384fff8SJason Evans 353553629ebSJake Burkholder p2 = newproc; 354a448b62aSJake Burkholder p2->p_intr_nesting_level = 0; 355553629ebSJake Burkholder p2->p_stat = SIDL; /* protect against others */ 356553629ebSJake Burkholder p2->p_pid = trypid; 357553629ebSJake Burkholder LIST_INSERT_HEAD(&allproc, p2, p_list); 358553629ebSJake Burkholder LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 359c0c25570SJake Burkholder ALLPROC_LOCK(AP_RELEASE); 360553629ebSJake Burkholder 3610384fff8SJason Evans /* 362df8bae1dSRodney W. Grimes * Make a proc table entry for the new process. 363df8bae1dSRodney W. Grimes * Start by zeroing the section of proc that is zero-initialized, 364df8bae1dSRodney W. Grimes * then copy the section that is copied directly from the parent. 365df8bae1dSRodney W. Grimes */ 366df8bae1dSRodney W. Grimes bzero(&p2->p_startzero, 367df8bae1dSRodney W. Grimes (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); 368df8bae1dSRodney W. Grimes bcopy(&p1->p_startcopy, &p2->p_startcopy, 369df8bae1dSRodney W. Grimes (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); 370df8bae1dSRodney W. Grimes 3714971f62aSJohn Baldwin mtx_init(&p2->p_mtx, "process lock", MTX_DEF); 3722244ea07SJohn Dyson p2->p_aioinfo = NULL; 3732244ea07SJohn Dyson 374df8bae1dSRodney W. Grimes /* 375df8bae1dSRodney W. Grimes * Duplicate sub-structures as needed. 376df8bae1dSRodney W. Grimes * Increase reference counts on shared objects. 377df8bae1dSRodney W. Grimes * The p_stats and p_sigacts substructs are set in vm_fork. 378df8bae1dSRodney W. Grimes */ 379df8bae1dSRodney W. Grimes p2->p_flag = P_INMEM; 380df8bae1dSRodney W. Grimes if (p1->p_flag & P_PROFIL) 381df8bae1dSRodney W. Grimes startprofclock(p2); 382df8bae1dSRodney W. Grimes MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), 383df8bae1dSRodney W. Grimes M_SUBPROC, M_WAITOK); 384df8bae1dSRodney W. Grimes bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); 385df8bae1dSRodney W. Grimes p2->p_cred->p_refcnt = 1; 386df8bae1dSRodney W. Grimes crhold(p1->p_ucred); 387f535380cSDon Lewis uihold(p1->p_cred->p_uidinfo); 388df8bae1dSRodney W. Grimes 38975c13541SPoul-Henning Kamp if (p2->p_prison) { 39075c13541SPoul-Henning Kamp p2->p_prison->pr_ref++; 39175c13541SPoul-Henning Kamp p2->p_flag |= P_JAILED; 39275c13541SPoul-Henning Kamp } 39375c13541SPoul-Henning Kamp 394b9df5231SPoul-Henning Kamp if (p2->p_args) 395b9df5231SPoul-Henning Kamp p2->p_args->ar_ref++; 396b9df5231SPoul-Henning Kamp 3976626c604SJulian Elischer if (flags & RFSIGSHARE) { 398dc9c271aSJulian Elischer p2->p_procsig = p1->p_procsig; 3996626c604SJulian Elischer p2->p_procsig->ps_refcnt++; 400dc9c271aSJulian Elischer if (p1->p_sigacts == &p1->p_addr->u_sigacts) { 401dc9c271aSJulian Elischer struct sigacts *newsigacts; 402dc9c271aSJulian Elischer int s; 403dc9c271aSJulian Elischer 404dc9c271aSJulian Elischer /* Create the shared sigacts structure */ 405df8abd0bSPeter Wemm MALLOC(newsigacts, struct sigacts *, 406df8abd0bSPeter Wemm sizeof(struct sigacts), M_SUBPROC, M_WAITOK); 407dc9c271aSJulian Elischer s = splhigh(); 408df8abd0bSPeter Wemm /* 409df8abd0bSPeter Wemm * Set p_sigacts to the new shared structure. 410df8abd0bSPeter Wemm * Note that this is updating p1->p_sigacts at the 411df8abd0bSPeter Wemm * same time, since p_sigacts is just a pointer to 412df8abd0bSPeter Wemm * the shared p_procsig->ps_sigacts. 413dc9c271aSJulian Elischer */ 414dc9c271aSJulian Elischer p2->p_sigacts = newsigacts; 415df8abd0bSPeter Wemm bcopy(&p1->p_addr->u_sigacts, p2->p_sigacts, 416df8abd0bSPeter Wemm sizeof(*p2->p_sigacts)); 417dc9c271aSJulian Elischer *p2->p_sigacts = p1->p_addr->u_sigacts; 418dc9c271aSJulian Elischer splx(s); 419dc9c271aSJulian Elischer } 4206626c604SJulian Elischer } else { 421dc9c271aSJulian Elischer MALLOC(p2->p_procsig, struct procsig *, sizeof(struct procsig), 422dc9c271aSJulian Elischer M_SUBPROC, M_WAITOK); 423df8abd0bSPeter Wemm bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig)); 424dc9c271aSJulian Elischer p2->p_procsig->ps_refcnt = 1; 425df8abd0bSPeter Wemm p2->p_sigacts = NULL; /* finished in vm_fork() */ 4266626c604SJulian Elischer } 4274ac9ae70SJulian Elischer if (flags & RFLINUXTHPN) 4286626c604SJulian Elischer p2->p_sigparent = SIGUSR1; 4294ac9ae70SJulian Elischer else 4304ac9ae70SJulian Elischer p2->p_sigparent = SIGCHLD; 43188c5ea45SJulian Elischer 432df8bae1dSRodney W. Grimes /* bump references to the text vnode (for procfs) */ 433df8bae1dSRodney W. Grimes p2->p_textvp = p1->p_textvp; 434df8bae1dSRodney W. Grimes if (p2->p_textvp) 435df8bae1dSRodney W. Grimes VREF(p2->p_textvp); 436df8bae1dSRodney W. Grimes 4370e3eb7eeSSujal Patel if (flags & RFCFDG) 438dabee6feSPeter Wemm p2->p_fd = fdinit(p1); 4390e3eb7eeSSujal Patel else if (flags & RFFDG) 440df8bae1dSRodney W. Grimes p2->p_fd = fdcopy(p1); 441dabee6feSPeter Wemm else 442dabee6feSPeter Wemm p2->p_fd = fdshare(p1); 443dabee6feSPeter Wemm 444df8bae1dSRodney W. Grimes /* 445df8bae1dSRodney W. Grimes * If p_limit is still copy-on-write, bump refcnt, 446df8bae1dSRodney W. Grimes * otherwise get a copy that won't be modified. 447df8bae1dSRodney W. Grimes * (If PL_SHAREMOD is clear, the structure is shared 448df8bae1dSRodney W. Grimes * copy-on-write.) 449df8bae1dSRodney W. Grimes */ 450df8bae1dSRodney W. Grimes if (p1->p_limit->p_lflags & PL_SHAREMOD) 451df8bae1dSRodney W. Grimes p2->p_limit = limcopy(p1->p_limit); 452df8bae1dSRodney W. Grimes else { 453df8bae1dSRodney W. Grimes p2->p_limit = p1->p_limit; 454df8bae1dSRodney W. Grimes p2->p_limit->p_refcnt++; 455df8bae1dSRodney W. Grimes } 456df8bae1dSRodney W. Grimes 45770e534e7SDavid Greenman /* 458be67169aSBruce Evans * Preserve some more flags in subprocess. P_PROFIL has already 459be67169aSBruce Evans * been preserved. 46070e534e7SDavid Greenman */ 46170e534e7SDavid Greenman p2->p_flag |= p1->p_flag & P_SUGID; 462df8bae1dSRodney W. Grimes if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 463df8bae1dSRodney W. Grimes p2->p_flag |= P_CONTROLT; 4640e3eb7eeSSujal Patel if (flags & RFPPWAIT) 465df8bae1dSRodney W. Grimes p2->p_flag |= P_PPWAIT; 466be67169aSBruce Evans 467b75356e1SJeffrey Hsu LIST_INSERT_AFTER(p1, p2, p_pglist); 4680e3eb7eeSSujal Patel 4690e3eb7eeSSujal Patel /* 4700e3eb7eeSSujal Patel * Attach the new process to its parent. 4710e3eb7eeSSujal Patel * 4720e3eb7eeSSujal Patel * If RFNOWAIT is set, the newly created process becomes a child 4730e3eb7eeSSujal Patel * of init. This effectively disassociates the child from the 4740e3eb7eeSSujal Patel * parent. 4750e3eb7eeSSujal Patel */ 4760e3eb7eeSSujal Patel if (flags & RFNOWAIT) 4770e3eb7eeSSujal Patel pptr = initproc; 4780e3eb7eeSSujal Patel else 4790e3eb7eeSSujal Patel pptr = p1; 48098f03f90SJake Burkholder PROCTREE_LOCK(PT_EXCLUSIVE); 4810e3eb7eeSSujal Patel p2->p_pptr = pptr; 4820e3eb7eeSSujal Patel LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 48398f03f90SJake Burkholder PROCTREE_LOCK(PT_RELEASE); 484b75356e1SJeffrey Hsu LIST_INIT(&p2->p_children); 4850384fff8SJason Evans LIST_INIT(&p2->p_heldmtx); 4860384fff8SJason Evans LIST_INIT(&p2->p_contested); 487b75356e1SJeffrey Hsu 4884f559836SJake Burkholder callout_init(&p2->p_itcallout, 0); 4891512b5d6SJake Burkholder callout_init(&p2->p_slpcallout, 1); 4904f559836SJake Burkholder 491df8bae1dSRodney W. Grimes #ifdef KTRACE 492df8bae1dSRodney W. Grimes /* 493df8bae1dSRodney W. Grimes * Copy traceflag and tracefile if enabled. 494df8bae1dSRodney W. Grimes * If not inherited, these were zeroed above. 495df8bae1dSRodney W. Grimes */ 496df8bae1dSRodney W. Grimes if (p1->p_traceflag&KTRFAC_INHERIT) { 497df8bae1dSRodney W. Grimes p2->p_traceflag = p1->p_traceflag; 498df8bae1dSRodney W. Grimes if ((p2->p_tracep = p1->p_tracep) != NULL) 499df8bae1dSRodney W. Grimes VREF(p2->p_tracep); 500df8bae1dSRodney W. Grimes } 501df8bae1dSRodney W. Grimes #endif 502df8bae1dSRodney W. Grimes 503df8bae1dSRodney W. Grimes /* 5040d2afceeSDavid Greenman * set priority of child to be that of parent 5050d2afceeSDavid Greenman */ 5060d2afceeSDavid Greenman p2->p_estcpu = p1->p_estcpu; 5070d2afceeSDavid Greenman 5080d2afceeSDavid Greenman /* 509df8bae1dSRodney W. Grimes * This begins the section where we must prevent the parent 510df8bae1dSRodney W. Grimes * from being swapped. 511df8bae1dSRodney W. Grimes */ 512af8ad83eSPeter Wemm PHOLD(p1); 5130d2afceeSDavid Greenman 514df8bae1dSRodney W. Grimes /* 515a2a1c95cSPeter Wemm * Finish creating the child process. It will return via a different 516a2a1c95cSPeter Wemm * execution path later. (ie: directly into user mode) 517dabee6feSPeter Wemm */ 518a2a1c95cSPeter Wemm vm_fork(p1, p2, flags); 519df8bae1dSRodney W. Grimes 520df8bae1dSRodney W. Grimes /* 521e9189611SPeter Wemm * Both processes are set up, now check if any loadable modules want 522e0d898b4SJulian Elischer * to adjust anything. 523fed06968SJulian Elischer * What if they have an error? XXX 524fed06968SJulian Elischer */ 52593efcae8SPoul-Henning Kamp TAILQ_FOREACH(ep, &fork_list, next) { 526fed06968SJulian Elischer (*ep->function)(p1, p2, flags); 527fed06968SJulian Elischer } 528fed06968SJulian Elischer 529fed06968SJulian Elischer /* 5300384fff8SJason Evans * If RFSTOPPED not requested, make child runnable and add to 5310384fff8SJason Evans * run queue. 532df8bae1dSRodney W. Grimes */ 533a2a1c95cSPeter Wemm microtime(&(p2->p_stats->p_start)); 534a2a1c95cSPeter Wemm p2->p_acflag = AFORK; 5350384fff8SJason Evans if ((flags & RFSTOPPED) == 0) { 5360384fff8SJason Evans splhigh(); 5370384fff8SJason Evans mtx_enter(&sched_lock, MTX_SPIN); 538df8bae1dSRodney W. Grimes p2->p_stat = SRUN; 539df8bae1dSRodney W. Grimes setrunqueue(p2); 5400384fff8SJason Evans mtx_exit(&sched_lock, MTX_SPIN); 5410384fff8SJason Evans spl0(); 5420384fff8SJason Evans } 543df8bae1dSRodney W. Grimes 544df8bae1dSRodney W. Grimes /* 545df8bae1dSRodney W. Grimes * Now can be swapped. 546df8bae1dSRodney W. Grimes */ 547af8ad83eSPeter Wemm PRELE(p1); 548df8bae1dSRodney W. Grimes 549df8bae1dSRodney W. Grimes /* 550cb679c38SJonathan Lemon * tell any interested parties about the new process 551cb679c38SJonathan Lemon */ 552cb679c38SJonathan Lemon KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); 553cb679c38SJonathan Lemon 554cb679c38SJonathan Lemon /* 555df8bae1dSRodney W. Grimes * Preserve synchronization semantics of vfork. If waiting for 556df8bae1dSRodney W. Grimes * child to exec or exit, set P_PPWAIT on child, and sleep on our 557df8bae1dSRodney W. Grimes * proc (in case of exit). 558df8bae1dSRodney W. Grimes */ 559df8bae1dSRodney W. Grimes while (p2->p_flag & P_PPWAIT) 560df8bae1dSRodney W. Grimes tsleep(p1, PWAIT, "ppwait", 0); 561df8bae1dSRodney W. Grimes 562df8bae1dSRodney W. Grimes /* 563df8abd0bSPeter Wemm * Return child proc pointer to parent. 564df8bae1dSRodney W. Grimes */ 565df8abd0bSPeter Wemm *procp = p2; 566df8bae1dSRodney W. Grimes return (0); 567df8bae1dSRodney W. Grimes } 568fed06968SJulian Elischer 569e0d898b4SJulian Elischer /* 570e0d898b4SJulian Elischer * The next two functionms are general routines to handle adding/deleting 571e0d898b4SJulian Elischer * items on the fork callout list. 572e0d898b4SJulian Elischer * 573e0d898b4SJulian Elischer * at_fork(): 574e0d898b4SJulian Elischer * Take the arguments given and put them onto the fork callout list, 575fed06968SJulian Elischer * However first make sure that it's not already there. 576e0d898b4SJulian Elischer * Returns 0 on success or a standard error number. 577fed06968SJulian Elischer */ 57893efcae8SPoul-Henning Kamp 579fed06968SJulian Elischer int 580eb776aeaSBruce Evans at_fork(function) 581eb776aeaSBruce Evans forklist_fn function; 582fed06968SJulian Elischer { 58393efcae8SPoul-Henning Kamp struct forklist *ep; 584e0d898b4SJulian Elischer 58593efcae8SPoul-Henning Kamp #ifdef INVARIANTS 586e0d898b4SJulian Elischer /* let the programmer know if he's been stupid */ 587e0d898b4SJulian Elischer if (rm_at_fork(function)) 58893efcae8SPoul-Henning Kamp printf("WARNING: fork callout entry (%p) already present\n", 58993efcae8SPoul-Henning Kamp function); 59093efcae8SPoul-Henning Kamp #endif 59193efcae8SPoul-Henning Kamp ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT); 592e0d898b4SJulian Elischer if (ep == NULL) 593e0d898b4SJulian Elischer return (ENOMEM); 594fed06968SJulian Elischer ep->function = function; 59593efcae8SPoul-Henning Kamp TAILQ_INSERT_TAIL(&fork_list, ep, next); 596e0d898b4SJulian Elischer return (0); 597fed06968SJulian Elischer } 598e0d898b4SJulian Elischer 599fed06968SJulian Elischer /* 60093efcae8SPoul-Henning Kamp * Scan the exit callout list for the given item and remove it.. 60193efcae8SPoul-Henning Kamp * Returns the number of items removed (0 or 1) 602fed06968SJulian Elischer */ 60393efcae8SPoul-Henning Kamp 604fed06968SJulian Elischer int 605eb776aeaSBruce Evans rm_at_fork(function) 606eb776aeaSBruce Evans forklist_fn function; 607fed06968SJulian Elischer { 60893efcae8SPoul-Henning Kamp struct forklist *ep; 609fed06968SJulian Elischer 61093efcae8SPoul-Henning Kamp TAILQ_FOREACH(ep, &fork_list, next) { 611fed06968SJulian Elischer if (ep->function == function) { 61293efcae8SPoul-Henning Kamp TAILQ_REMOVE(&fork_list, ep, next); 61393efcae8SPoul-Henning Kamp free(ep, M_ATFORK); 61493efcae8SPoul-Henning Kamp return(1); 615fed06968SJulian Elischer } 616fed06968SJulian Elischer } 61793efcae8SPoul-Henning Kamp return (0); 618fed06968SJulian Elischer } 619