19454b2d8SWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 35df8bae1dSRodney W. Grimes */ 36df8bae1dSRodney W. Grimes 37677b542eSDavid E. O'Brien #include <sys/cdefs.h> 38677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 39677b542eSDavid E. O'Brien 405d217f17SJohn Birrell #include "opt_kdtrace.h" 41db6a20e2SGarrett Wollman #include "opt_ktrace.h" 428a945d10SKonstantin Belousov #include "opt_kstack_pages.h" 43cfb5f768SJonathan Anderson #include "opt_procdesc.h" 44db6a20e2SGarrett Wollman 45df8bae1dSRodney W. Grimes #include <sys/param.h> 46df8bae1dSRodney W. Grimes #include <sys/systm.h> 47d2d3e875SBruce Evans #include <sys/sysproto.h> 4875b8b3b2SJohn Baldwin #include <sys/eventhandler.h> 49cfb5f768SJonathan Anderson #include <sys/fcntl.h> 50df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 510304c731SJamie Gritton #include <sys/jail.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 5370fca427SJohn Baldwin #include <sys/kthread.h> 54c76e95c3SPeter Wemm #include <sys/sysctl.h> 5519284646SJohn Baldwin #include <sys/lock.h> 56df8bae1dSRodney W. Grimes #include <sys/malloc.h> 5735e0e5b3SJohn Baldwin #include <sys/mutex.h> 58acd3428bSRobert Watson #include <sys/priv.h> 59df8bae1dSRodney W. Grimes #include <sys/proc.h> 60cfb5f768SJonathan Anderson #include <sys/procdesc.h> 619ccba881SMatthew N. Dodd #include <sys/pioctl.h> 62097055e2SEdward Tomasz Napierala #include <sys/racct.h> 63df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 64b43179fbSJeff Roberson #include <sys/sched.h> 65a7b124c3SJohn Baldwin #include <sys/syscall.h> 6670fca427SJohn Baldwin #include <sys/vmmeter.h> 67df8bae1dSRodney W. Grimes #include <sys/vnode.h> 68df8bae1dSRodney W. Grimes #include <sys/acct.h> 690384fff8SJason Evans #include <sys/ktr.h> 70df8bae1dSRodney W. Grimes #include <sys/ktrace.h> 71b71fec07SBruce Evans #include <sys/unistd.h> 725d217f17SJohn Birrell #include <sys/sdt.h> 7357934cd3SJohn Baldwin #include <sys/sx.h> 74e5d81ef1SDmitry Chagin #include <sys/sysent.h> 756004362eSDavid Schultz #include <sys/signalvar.h> 76df8bae1dSRodney W. Grimes 77fcf7f27aSRobert Watson #include <security/audit/audit.h> 78aed55708SRobert Watson #include <security/mac/mac_framework.h> 79fcf7f27aSRobert Watson 80d93f860cSPoul-Henning Kamp #include <vm/vm.h> 81dabee6feSPeter Wemm #include <vm/pmap.h> 82dabee6feSPeter Wemm #include <vm/vm_map.h> 83efeaf95aSDavid Greenman #include <vm/vm_extern.h> 84c897b813SJeff Roberson #include <vm/uma.h> 85d93f860cSPoul-Henning Kamp 865d217f17SJohn Birrell #ifdef KDTRACE_HOOKS 875d217f17SJohn Birrell #include <sys/dtrace_bsd.h> 885d217f17SJohn Birrell dtrace_fork_func_t dtrace_fasttrap_fork; 895d217f17SJohn Birrell #endif 905d217f17SJohn Birrell 915d217f17SJohn Birrell SDT_PROVIDER_DECLARE(proc); 9279856499SRui Paulo SDT_PROBE_DEFINE(proc, kernel, , create, create); 935d217f17SJohn Birrell SDT_PROBE_ARGTYPE(proc, kernel, , create, 0, "struct proc *"); 945d217f17SJohn Birrell SDT_PROBE_ARGTYPE(proc, kernel, , create, 1, "struct proc *"); 955d217f17SJohn Birrell SDT_PROBE_ARGTYPE(proc, kernel, , create, 2, "int"); 9688c5ea45SJulian Elischer 97d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 98ad7507e2SSteven Wallace struct fork_args { 99ad7507e2SSteven Wallace int dummy; 100ad7507e2SSteven Wallace }; 101d2d3e875SBruce Evans #endif 102ad7507e2SSteven Wallace 103df8bae1dSRodney W. Grimes /* ARGSUSED */ 10426f9a767SRodney W. Grimes int 1058451d0ddSKip Macy sys_fork(struct thread *td, struct fork_args *uap) 106df8bae1dSRodney W. Grimes { 107df8abd0bSPeter Wemm int error; 108df8abd0bSPeter Wemm struct proc *p2; 109be67169aSBruce Evans 110cfb5f768SJonathan Anderson error = fork1(td, RFFDG | RFPROC, 0, &p2, NULL, 0); 111df8abd0bSPeter Wemm if (error == 0) { 112b40ce416SJulian Elischer td->td_retval[0] = p2->p_pid; 113b40ce416SJulian Elischer td->td_retval[1] = 0; 114df8abd0bSPeter Wemm } 11570fca427SJohn Baldwin return (error); 116df8bae1dSRodney W. Grimes } 117df8bae1dSRodney W. Grimes 118cfb5f768SJonathan Anderson /* ARGUSED */ 119cfb5f768SJonathan Anderson int 1208451d0ddSKip Macy sys_pdfork(td, uap) 121cfb5f768SJonathan Anderson struct thread *td; 122cfb5f768SJonathan Anderson struct pdfork_args *uap; 123cfb5f768SJonathan Anderson { 124cfb5f768SJonathan Anderson #ifdef PROCDESC 125cfb5f768SJonathan Anderson int error, fd; 126cfb5f768SJonathan Anderson struct proc *p2; 127cfb5f768SJonathan Anderson 128cfb5f768SJonathan Anderson /* 129cfb5f768SJonathan Anderson * It is necessary to return fd by reference because 0 is a valid file 130cfb5f768SJonathan Anderson * descriptor number, and the child needs to be able to distinguish 131cfb5f768SJonathan Anderson * itself from the parent using the return value. 132cfb5f768SJonathan Anderson */ 133cfb5f768SJonathan Anderson error = fork1(td, RFFDG | RFPROC | RFPROCDESC, 0, &p2, 134cfb5f768SJonathan Anderson &fd, uap->flags); 135cfb5f768SJonathan Anderson if (error == 0) { 136cfb5f768SJonathan Anderson td->td_retval[0] = p2->p_pid; 137cfb5f768SJonathan Anderson td->td_retval[1] = 0; 138cfb5f768SJonathan Anderson error = copyout(&fd, uap->fdp, sizeof(fd)); 139cfb5f768SJonathan Anderson } 140cfb5f768SJonathan Anderson return (error); 141cfb5f768SJonathan Anderson #else 142cfb5f768SJonathan Anderson return (ENOSYS); 143cfb5f768SJonathan Anderson #endif 144cfb5f768SJonathan Anderson } 145cfb5f768SJonathan Anderson 146df8bae1dSRodney W. Grimes /* ARGSUSED */ 14726f9a767SRodney W. Grimes int 1488451d0ddSKip Macy sys_vfork(struct thread *td, struct vfork_args *uap) 149df8bae1dSRodney W. Grimes { 15050d6e424SKip Macy int error, flags; 151df8abd0bSPeter Wemm struct proc *p2; 152be67169aSBruce Evans 15350d6e424SKip Macy #ifdef XEN 15450d6e424SKip Macy flags = RFFDG | RFPROC; /* validate that this is still an issue */ 15550d6e424SKip Macy #else 15650d6e424SKip Macy flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; 15750d6e424SKip Macy #endif 158cfb5f768SJonathan Anderson error = fork1(td, flags, 0, &p2, NULL, 0); 159df8abd0bSPeter Wemm if (error == 0) { 160b40ce416SJulian Elischer td->td_retval[0] = p2->p_pid; 161b40ce416SJulian Elischer td->td_retval[1] = 0; 162df8abd0bSPeter Wemm } 16370fca427SJohn Baldwin return (error); 164df8bae1dSRodney W. Grimes } 165df8bae1dSRodney W. Grimes 166dabee6feSPeter Wemm int 1678451d0ddSKip Macy sys_rfork(struct thread *td, struct rfork_args *uap) 168dabee6feSPeter Wemm { 169df8abd0bSPeter Wemm struct proc *p2; 170c8564ad4SBruce Evans int error; 171be67169aSBruce Evans 172c8564ad4SBruce Evans /* Don't allow kernel-only flags. */ 173885ccc61SJohn Baldwin if ((uap->flags & RFKERNELONLY) != 0) 174885ccc61SJohn Baldwin return (EINVAL); 175c8564ad4SBruce Evans 17614961ba7SRobert Watson AUDIT_ARG_FFLAGS(uap->flags); 177cfb5f768SJonathan Anderson error = fork1(td, uap->flags, 0, &p2, NULL, 0); 178df8abd0bSPeter Wemm if (error == 0) { 179b40ce416SJulian Elischer td->td_retval[0] = p2 ? p2->p_pid : 0; 180b40ce416SJulian Elischer td->td_retval[1] = 0; 181df8abd0bSPeter Wemm } 18270fca427SJohn Baldwin return (error); 183dabee6feSPeter Wemm } 184dabee6feSPeter Wemm 185df8bae1dSRodney W. Grimes int nprocs = 1; /* process 0 */ 1868f7e4eb5SDag-Erling Smørgrav int lastpid = 0; 1878f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 188d941d475SRobert Watson "Last used PID"); 189df8bae1dSRodney W. Grimes 190bb6a234eSPeter Wemm /* 1918f7e4eb5SDag-Erling Smørgrav * Random component to lastpid generation. We mix in a random factor to make 192bb6a234eSPeter Wemm * it a little harder to predict. We sanity check the modulus value to avoid 193bb6a234eSPeter Wemm * doing it in critical paths. Don't let it be too small or we pointlessly 194bb6a234eSPeter Wemm * waste randomness entropy, and don't let it be impossibly large. Using a 195bb6a234eSPeter Wemm * modulus that is too big causes a LOT more process table scans and slows 196bb6a234eSPeter Wemm * down fork processing as the pidchecked caching is defeated. 197bb6a234eSPeter Wemm */ 198ee3fd601SDan Moschuk static int randompid = 0; 199bb6a234eSPeter Wemm 200bb6a234eSPeter Wemm static int 20182d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 202bb6a234eSPeter Wemm { 203bb6a234eSPeter Wemm int error, pid; 204bb6a234eSPeter Wemm 20547934cefSDon Lewis error = sysctl_wire_old_buffer(req, sizeof(int)); 20647934cefSDon Lewis if (error != 0) 20747934cefSDon Lewis return(error); 2083fc755c1SJohn Baldwin sx_xlock(&allproc_lock); 209bb6a234eSPeter Wemm pid = randompid; 210bb6a234eSPeter Wemm error = sysctl_handle_int(oidp, &pid, 0, req); 2113fc755c1SJohn Baldwin if (error == 0 && req->newptr != NULL) { 212bb6a234eSPeter Wemm if (pid < 0 || pid > PID_MAX - 100) /* out of range */ 213bb6a234eSPeter Wemm pid = PID_MAX - 100; 214bb6a234eSPeter Wemm else if (pid < 2) /* NOP */ 215bb6a234eSPeter Wemm pid = 0; 216bb6a234eSPeter Wemm else if (pid < 100) /* Make it reasonable */ 217bb6a234eSPeter Wemm pid = 100; 218bb6a234eSPeter Wemm randompid = pid; 2193fc755c1SJohn Baldwin } 2203fc755c1SJohn Baldwin sx_xunlock(&allproc_lock); 221bb6a234eSPeter Wemm return (error); 222bb6a234eSPeter Wemm } 223bb6a234eSPeter Wemm 224bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 225bb6a234eSPeter Wemm 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 226ee3fd601SDan Moschuk 2271d845e86SEdward Tomasz Napierala static int 228afd01097SEdward Tomasz Napierala fork_findpid(int flags) 229afd01097SEdward Tomasz Napierala { 230afd01097SEdward Tomasz Napierala struct proc *p; 231afd01097SEdward Tomasz Napierala int trypid; 232afd01097SEdward Tomasz Napierala static int pidchecked = 0; 233afd01097SEdward Tomasz Napierala 2343e73ff1eSEdward Tomasz Napierala /* 2353e73ff1eSEdward Tomasz Napierala * Requires allproc_lock in order to iterate over the list 2363e73ff1eSEdward Tomasz Napierala * of processes, and proctree_lock to access p_pgrp. 2373e73ff1eSEdward Tomasz Napierala */ 2383e73ff1eSEdward Tomasz Napierala sx_assert(&allproc_lock, SX_LOCKED); 2393e73ff1eSEdward Tomasz Napierala sx_assert(&proctree_lock, SX_LOCKED); 240afd01097SEdward Tomasz Napierala 241afd01097SEdward Tomasz Napierala /* 242afd01097SEdward Tomasz Napierala * Find an unused process ID. We remember a range of unused IDs 243afd01097SEdward Tomasz Napierala * ready to use (from lastpid+1 through pidchecked-1). 244afd01097SEdward Tomasz Napierala * 245afd01097SEdward Tomasz Napierala * If RFHIGHPID is set (used during system boot), do not allocate 246afd01097SEdward Tomasz Napierala * low-numbered pids. 247afd01097SEdward Tomasz Napierala */ 248afd01097SEdward Tomasz Napierala trypid = lastpid + 1; 249afd01097SEdward Tomasz Napierala if (flags & RFHIGHPID) { 250afd01097SEdward Tomasz Napierala if (trypid < 10) 251afd01097SEdward Tomasz Napierala trypid = 10; 252afd01097SEdward Tomasz Napierala } else { 253afd01097SEdward Tomasz Napierala if (randompid) 254afd01097SEdward Tomasz Napierala trypid += arc4random() % randompid; 255afd01097SEdward Tomasz Napierala } 256afd01097SEdward Tomasz Napierala retry: 257afd01097SEdward Tomasz Napierala /* 258afd01097SEdward Tomasz Napierala * If the process ID prototype has wrapped around, 259afd01097SEdward Tomasz Napierala * restart somewhat above 0, as the low-numbered procs 260afd01097SEdward Tomasz Napierala * tend to include daemons that don't exit. 261afd01097SEdward Tomasz Napierala */ 262afd01097SEdward Tomasz Napierala if (trypid >= PID_MAX) { 263afd01097SEdward Tomasz Napierala trypid = trypid % PID_MAX; 264afd01097SEdward Tomasz Napierala if (trypid < 100) 265afd01097SEdward Tomasz Napierala trypid += 100; 266afd01097SEdward Tomasz Napierala pidchecked = 0; 267afd01097SEdward Tomasz Napierala } 268afd01097SEdward Tomasz Napierala if (trypid >= pidchecked) { 269afd01097SEdward Tomasz Napierala int doingzomb = 0; 270afd01097SEdward Tomasz Napierala 271afd01097SEdward Tomasz Napierala pidchecked = PID_MAX; 272afd01097SEdward Tomasz Napierala /* 273afd01097SEdward Tomasz Napierala * Scan the active and zombie procs to check whether this pid 274afd01097SEdward Tomasz Napierala * is in use. Remember the lowest pid that's greater 275afd01097SEdward Tomasz Napierala * than trypid, so we can avoid checking for a while. 276afd01097SEdward Tomasz Napierala */ 277afd01097SEdward Tomasz Napierala p = LIST_FIRST(&allproc); 278afd01097SEdward Tomasz Napierala again: 279afd01097SEdward Tomasz Napierala for (; p != NULL; p = LIST_NEXT(p, p_list)) { 280afd01097SEdward Tomasz Napierala while (p->p_pid == trypid || 281afd01097SEdward Tomasz Napierala (p->p_pgrp != NULL && 282afd01097SEdward Tomasz Napierala (p->p_pgrp->pg_id == trypid || 283afd01097SEdward Tomasz Napierala (p->p_session != NULL && 284afd01097SEdward Tomasz Napierala p->p_session->s_sid == trypid)))) { 285afd01097SEdward Tomasz Napierala trypid++; 286afd01097SEdward Tomasz Napierala if (trypid >= pidchecked) 287afd01097SEdward Tomasz Napierala goto retry; 288afd01097SEdward Tomasz Napierala } 289afd01097SEdward Tomasz Napierala if (p->p_pid > trypid && pidchecked > p->p_pid) 290afd01097SEdward Tomasz Napierala pidchecked = p->p_pid; 291afd01097SEdward Tomasz Napierala if (p->p_pgrp != NULL) { 292afd01097SEdward Tomasz Napierala if (p->p_pgrp->pg_id > trypid && 293afd01097SEdward Tomasz Napierala pidchecked > p->p_pgrp->pg_id) 294afd01097SEdward Tomasz Napierala pidchecked = p->p_pgrp->pg_id; 295afd01097SEdward Tomasz Napierala if (p->p_session != NULL && 296afd01097SEdward Tomasz Napierala p->p_session->s_sid > trypid && 297afd01097SEdward Tomasz Napierala pidchecked > p->p_session->s_sid) 298afd01097SEdward Tomasz Napierala pidchecked = p->p_session->s_sid; 299afd01097SEdward Tomasz Napierala } 300afd01097SEdward Tomasz Napierala } 301afd01097SEdward Tomasz Napierala if (!doingzomb) { 302afd01097SEdward Tomasz Napierala doingzomb = 1; 303afd01097SEdward Tomasz Napierala p = LIST_FIRST(&zombproc); 304afd01097SEdward Tomasz Napierala goto again; 305afd01097SEdward Tomasz Napierala } 306afd01097SEdward Tomasz Napierala } 307afd01097SEdward Tomasz Napierala 308afd01097SEdward Tomasz Napierala /* 309afd01097SEdward Tomasz Napierala * RFHIGHPID does not mess with the lastpid counter during boot. 310afd01097SEdward Tomasz Napierala */ 311afd01097SEdward Tomasz Napierala if (flags & RFHIGHPID) 312afd01097SEdward Tomasz Napierala pidchecked = 0; 313afd01097SEdward Tomasz Napierala else 314afd01097SEdward Tomasz Napierala lastpid = trypid; 315afd01097SEdward Tomasz Napierala 316afd01097SEdward Tomasz Napierala return (trypid); 317afd01097SEdward Tomasz Napierala } 318afd01097SEdward Tomasz Napierala 319afd01097SEdward Tomasz Napierala static int 3203e73ff1eSEdward Tomasz Napierala fork_norfproc(struct thread *td, int flags) 3211d845e86SEdward Tomasz Napierala { 3221d845e86SEdward Tomasz Napierala int error; 3231d845e86SEdward Tomasz Napierala struct proc *p1; 3241d845e86SEdward Tomasz Napierala 325087bfb0eSEdward Tomasz Napierala KASSERT((flags & RFPROC) == 0, 326087bfb0eSEdward Tomasz Napierala ("fork_norfproc called with RFPROC set")); 3271d845e86SEdward Tomasz Napierala p1 = td->td_proc; 3281d845e86SEdward Tomasz Napierala 3291d845e86SEdward Tomasz Napierala if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && 3301d845e86SEdward Tomasz Napierala (flags & (RFCFDG | RFFDG))) { 3311d845e86SEdward Tomasz Napierala PROC_LOCK(p1); 3321d845e86SEdward Tomasz Napierala if (thread_single(SINGLE_BOUNDARY)) { 3331d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3341d845e86SEdward Tomasz Napierala return (ERESTART); 3351d845e86SEdward Tomasz Napierala } 3361d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3371d845e86SEdward Tomasz Napierala } 3381d845e86SEdward Tomasz Napierala 3391d845e86SEdward Tomasz Napierala error = vm_forkproc(td, NULL, NULL, NULL, flags); 3401d845e86SEdward Tomasz Napierala if (error) 3411d845e86SEdward Tomasz Napierala goto fail; 3421d845e86SEdward Tomasz Napierala 3431d845e86SEdward Tomasz Napierala /* 3441d845e86SEdward Tomasz Napierala * Close all file descriptors. 3451d845e86SEdward Tomasz Napierala */ 3461d845e86SEdward Tomasz Napierala if (flags & RFCFDG) { 3471d845e86SEdward Tomasz Napierala struct filedesc *fdtmp; 3481d845e86SEdward Tomasz Napierala fdtmp = fdinit(td->td_proc->p_fd); 3491d845e86SEdward Tomasz Napierala fdfree(td); 3501d845e86SEdward Tomasz Napierala p1->p_fd = fdtmp; 3511d845e86SEdward Tomasz Napierala } 3521d845e86SEdward Tomasz Napierala 3531d845e86SEdward Tomasz Napierala /* 3541d845e86SEdward Tomasz Napierala * Unshare file descriptors (from parent). 3551d845e86SEdward Tomasz Napierala */ 3561d845e86SEdward Tomasz Napierala if (flags & RFFDG) 3571d845e86SEdward Tomasz Napierala fdunshare(p1, td); 3581d845e86SEdward Tomasz Napierala 3591d845e86SEdward Tomasz Napierala fail: 3601d845e86SEdward Tomasz Napierala if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && 3611d845e86SEdward Tomasz Napierala (flags & (RFCFDG | RFFDG))) { 3621d845e86SEdward Tomasz Napierala PROC_LOCK(p1); 3631d845e86SEdward Tomasz Napierala thread_single_end(); 3641d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3651d845e86SEdward Tomasz Napierala } 3661d845e86SEdward Tomasz Napierala return (error); 3671d845e86SEdward Tomasz Napierala } 3681d845e86SEdward Tomasz Napierala 369afd01097SEdward Tomasz Napierala static void 370afd01097SEdward Tomasz Napierala do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, 371cfb5f768SJonathan Anderson struct vmspace *vm2, int pdflags) 372df8bae1dSRodney W. Grimes { 373afd01097SEdward Tomasz Napierala struct proc *p1, *pptr; 3746fa39a73SKonstantin Belousov int p2_held, trypid; 3755641ae5dSJohn Baldwin struct filedesc *fd; 376ad05d580STor Egge struct filedesc_to_leader *fdtol; 3773fc755c1SJohn Baldwin struct sigacts *newsigacts; 3785856e12eSJohn Dyson 379afd01097SEdward Tomasz Napierala sx_assert(&proctree_lock, SX_SLOCKED); 380afd01097SEdward Tomasz Napierala sx_assert(&allproc_lock, SX_XLOCKED); 381df8bae1dSRodney W. Grimes 3826fa39a73SKonstantin Belousov p2_held = 0; 38370fca427SJohn Baldwin p1 = td->td_proc; 38470fca427SJohn Baldwin 385df8bae1dSRodney W. Grimes /* 386ef5dc8a9SJohn Dyson * Increment the nprocs resource before blocking can occur. There 387ef5dc8a9SJohn Dyson * are hard-limits as to the number of processes that can run. 388ef5dc8a9SJohn Dyson */ 389ef5dc8a9SJohn Dyson nprocs++; 390ef5dc8a9SJohn Dyson 391afd01097SEdward Tomasz Napierala trypid = fork_findpid(flags); 392df8bae1dSRodney W. Grimes 3935ce2f678SJohn Baldwin sx_sunlock(&proctree_lock); 394df8bae1dSRodney W. Grimes 395e602ba25SJulian Elischer p2->p_state = PRS_NEW; /* protect against others */ 396553629ebSJake Burkholder p2->p_pid = trypid; 39714961ba7SRobert Watson AUDIT_ARG_PID(p2->p_pid); 398553629ebSJake Burkholder LIST_INSERT_HEAD(&allproc, p2, p_list); 399553629ebSJake Burkholder LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 400cf7d9a8cSDavid Xu tidhash_add(td2); 4011ad9ee86SXin LI PROC_LOCK(p2); 4021ad9ee86SXin LI PROC_LOCK(p1); 4031ad9ee86SXin LI 4041005a129SJohn Baldwin sx_xunlock(&allproc_lock); 405553629ebSJake Burkholder 4061ad9ee86SXin LI bcopy(&p1->p_startcopy, &p2->p_startcopy, 4071ad9ee86SXin LI __rangeof(struct proc, p_startcopy, p_endcopy)); 4088b4a2800SKonstantin Belousov pargs_hold(p2->p_args); 4091ad9ee86SXin LI PROC_UNLOCK(p1); 4101ad9ee86SXin LI 4111ad9ee86SXin LI bzero(&p2->p_startzero, 4121ad9ee86SXin LI __rangeof(struct proc, p_startzero, p_endzero)); 4131ad9ee86SXin LI 4141ad9ee86SXin LI p2->p_ucred = crhold(td->td_ucred); 415413628a7SBjoern A. Zeeb 4160304c731SJamie Gritton /* Tell the prison that we exist. */ 417413628a7SBjoern A. Zeeb prison_proc_hold(p2->p_ucred->cr_prison); 418413628a7SBjoern A. Zeeb 4191ad9ee86SXin LI PROC_UNLOCK(p2); 4201ad9ee86SXin LI 4210384fff8SJason Evans /* 4223fc755c1SJohn Baldwin * Malloc things while we don't hold any locks. 4233fc755c1SJohn Baldwin */ 42490af4afaSJohn Baldwin if (flags & RFSIGSHARE) 4253fc755c1SJohn Baldwin newsigacts = NULL; 42690af4afaSJohn Baldwin else 42790af4afaSJohn Baldwin newsigacts = sigacts_alloc(); 4283fc755c1SJohn Baldwin 4293fc755c1SJohn Baldwin /* 4303fc755c1SJohn Baldwin * Copy filedesc. 4313fc755c1SJohn Baldwin */ 432ad05d580STor Egge if (flags & RFCFDG) { 433598b7ec8SPoul-Henning Kamp fd = fdinit(p1->p_fd); 434ad05d580STor Egge fdtol = NULL; 435ad05d580STor Egge } else if (flags & RFFDG) { 436598b7ec8SPoul-Henning Kamp fd = fdcopy(p1->p_fd); 437ad05d580STor Egge fdtol = NULL; 438ad05d580STor Egge } else { 439c7f1c11bSAlfred Perlstein fd = fdshare(p1->p_fd); 440ad05d580STor Egge if (p1->p_fdtol == NULL) 4413e73ff1eSEdward Tomasz Napierala p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, 442ad05d580STor Egge p1->p_leader); 443ad05d580STor Egge if ((flags & RFTHREAD) != 0) { 444ad05d580STor Egge /* 4453e73ff1eSEdward Tomasz Napierala * Shared file descriptor table, and shared 4463e73ff1eSEdward Tomasz Napierala * process leaders. 447ad05d580STor Egge */ 448ad05d580STor Egge fdtol = p1->p_fdtol; 4495e3f7694SRobert Watson FILEDESC_XLOCK(p1->p_fd); 450ad05d580STor Egge fdtol->fdl_refcount++; 4515e3f7694SRobert Watson FILEDESC_XUNLOCK(p1->p_fd); 452ad05d580STor Egge } else { 453ad05d580STor Egge /* 4543e73ff1eSEdward Tomasz Napierala * Shared file descriptor table, and different 4553e73ff1eSEdward Tomasz Napierala * process leaders. 456ad05d580STor Egge */ 457ad05d580STor Egge fdtol = filedesc_to_leader_alloc(p1->p_fdtol, 4583e73ff1eSEdward Tomasz Napierala p1->p_fd, p2); 459ad05d580STor Egge } 460ad05d580STor Egge } 4613fc755c1SJohn Baldwin /* 462df8bae1dSRodney W. Grimes * Make a proc table entry for the new process. 463df8bae1dSRodney W. Grimes * Start by zeroing the section of proc that is zero-initialized, 464df8bae1dSRodney W. Grimes * then copy the section that is copied directly from the parent. 465df8bae1dSRodney W. Grimes */ 466316ec49aSScott Long 4677d447c95SJohn Baldwin PROC_LOCK(p2); 4687d447c95SJohn Baldwin PROC_LOCK(p1); 4697d447c95SJohn Baldwin 470079b7badSJulian Elischer bzero(&td2->td_startzero, 4716db36923SDavid Schultz __rangeof(struct thread, td_startzero, td_endzero)); 472079b7badSJulian Elischer 473079b7badSJulian Elischer bcopy(&td->td_startcopy, &td2->td_startcopy, 4746db36923SDavid Schultz __rangeof(struct thread, td_startcopy, td_endcopy)); 475df8bae1dSRodney W. Grimes 4764b9322aeSJulian Elischer bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); 477a30ec4b9SDavid Xu td2->td_sigstk = td->td_sigstk; 4783d5c30f7SDavid Xu td2->td_sigmask = td->td_sigmask; 479b61ce5b0SJeff Roberson td2->td_flags = TDF_INMEM; 480acbe332aSDavid Xu td2->td_lend_user_pri = PRI_MAX; 481a30ec4b9SDavid Xu 48221ca7b57SMarko Zec #ifdef VIMAGE 48321ca7b57SMarko Zec td2->td_vnet = NULL; 48421ca7b57SMarko Zec td2->td_vnet_lpush = NULL; 48521ca7b57SMarko Zec #endif 48621ca7b57SMarko Zec 487df8bae1dSRodney W. Grimes /* 48822d19207SJohn Baldwin * Allow the scheduler to initialize the child. 48922d19207SJohn Baldwin */ 49022d19207SJohn Baldwin thread_lock(td); 49122d19207SJohn Baldwin sched_fork(td, td2); 49222d19207SJohn Baldwin thread_unlock(td); 49322d19207SJohn Baldwin 49422d19207SJohn Baldwin /* 495df8bae1dSRodney W. Grimes * Duplicate sub-structures as needed. 496df8bae1dSRodney W. Grimes * Increase reference counts on shared objects. 497df8bae1dSRodney W. Grimes */ 498b61ce5b0SJeff Roberson p2->p_flag = P_INMEM; 49954b0e65fSJeff Roberson p2->p_swtick = ticks; 5009752f794SJohn Baldwin if (p1->p_flag & P_PROFIL) 5019752f794SJohn Baldwin startprofclock(p2); 5028460a577SJohn Birrell td2->td_ucred = crhold(p2->p_ucred); 503b9df5231SPoul-Henning Kamp 5046626c604SJulian Elischer if (flags & RFSIGSHARE) { 50590af4afaSJohn Baldwin p2->p_sigacts = sigacts_hold(p1->p_sigacts); 5066626c604SJulian Elischer } else { 50790af4afaSJohn Baldwin sigacts_copy(newsigacts, p1->p_sigacts); 50890af4afaSJohn Baldwin p2->p_sigacts = newsigacts; 5096626c604SJulian Elischer } 510f49d8202SKonstantin Belousov 511f49d8202SKonstantin Belousov if (flags & RFTSIGZMB) 512f49d8202SKonstantin Belousov p2->p_sigparent = RFTSIGNUM(flags); 513f49d8202SKonstantin Belousov else if (flags & RFLINUXTHPN) 5146626c604SJulian Elischer p2->p_sigparent = SIGUSR1; 5154ac9ae70SJulian Elischer else 5164ac9ae70SJulian Elischer p2->p_sigparent = SIGCHLD; 51788c5ea45SJulian Elischer 518df8bae1dSRodney W. Grimes p2->p_textvp = p1->p_textvp; 5195641ae5dSJohn Baldwin p2->p_fd = fd; 520ad05d580STor Egge p2->p_fdtol = fdtol; 521dabee6feSPeter Wemm 522df8bae1dSRodney W. Grimes /* 523c8564ad4SBruce Evans * p_limit is copy-on-write. Bump its refcount. 524df8bae1dSRodney W. Grimes */ 5251c4bcd05SJeff Roberson lim_fork(p1, p2); 5268b059651SDavid Schultz 5278b059651SDavid Schultz pstats_fork(p1->p_stats, p2->p_stats); 5288b059651SDavid Schultz 529299bc736SDavid Schultz PROC_UNLOCK(p1); 530cda5aba4SDavid Schultz PROC_UNLOCK(p2); 531df8bae1dSRodney W. Grimes 5323e73ff1eSEdward Tomasz Napierala /* Bump references to the text vnode (for procfs). */ 533a69d88afSPeter Wemm if (p2->p_textvp) 534a69d88afSPeter Wemm vref(p2->p_textvp); 535a69d88afSPeter Wemm 536c6544064SJohn Baldwin /* 537c8564ad4SBruce Evans * Set up linkage for kernel based threading. 538c6544064SJohn Baldwin */ 539c6544064SJohn Baldwin if ((flags & RFTHREAD) != 0) { 540c6544064SJohn Baldwin mtx_lock(&ppeers_lock); 541c6544064SJohn Baldwin p2->p_peers = p1->p_peers; 542c6544064SJohn Baldwin p1->p_peers = p2; 543c6544064SJohn Baldwin p2->p_leader = p1->p_leader; 544c6544064SJohn Baldwin mtx_unlock(&ppeers_lock); 545c6544064SJohn Baldwin PROC_LOCK(p1->p_leader); 546c6544064SJohn Baldwin if ((p1->p_leader->p_flag & P_WEXIT) != 0) { 547c6544064SJohn Baldwin PROC_UNLOCK(p1->p_leader); 548c6544064SJohn Baldwin /* 549c6544064SJohn Baldwin * The task leader is exiting, so process p1 is 550c6544064SJohn Baldwin * going to be killed shortly. Since p1 obviously 551c6544064SJohn Baldwin * isn't dead yet, we know that the leader is either 552c6544064SJohn Baldwin * sending SIGKILL's to all the processes in this 553c6544064SJohn Baldwin * task or is sleeping waiting for all the peers to 554c6544064SJohn Baldwin * exit. We let p1 complete the fork, but we need 555c6544064SJohn Baldwin * to go ahead and kill the new process p2 since 556c6544064SJohn Baldwin * the task leader may not get a chance to send 557c6544064SJohn Baldwin * SIGKILL to it. We leave it on the list so that 558c6544064SJohn Baldwin * the task leader will wait for this new process 559c6544064SJohn Baldwin * to commit suicide. 560c6544064SJohn Baldwin */ 561c6544064SJohn Baldwin PROC_LOCK(p2); 5628451d0ddSKip Macy kern_psignal(p2, SIGKILL); 563c6544064SJohn Baldwin PROC_UNLOCK(p2); 564293d2d22SRobert Watson } else 565293d2d22SRobert Watson PROC_UNLOCK(p1->p_leader); 566c6544064SJohn Baldwin } else { 567c6544064SJohn Baldwin p2->p_peers = NULL; 568c6544064SJohn Baldwin p2->p_leader = p2; 569c6544064SJohn Baldwin } 570c6544064SJohn Baldwin 5713fc755c1SJohn Baldwin sx_xlock(&proctree_lock); 5723fc755c1SJohn Baldwin PGRP_LOCK(p1->p_pgrp); 5733fc755c1SJohn Baldwin PROC_LOCK(p2); 5743fc755c1SJohn Baldwin PROC_LOCK(p1); 5753fc755c1SJohn Baldwin 57670e534e7SDavid Greenman /* 5779752f794SJohn Baldwin * Preserve some more flags in subprocess. P_PROFIL has already 578be67169aSBruce Evans * been preserved. 57970e534e7SDavid Greenman */ 580a30ec4b9SDavid Xu p2->p_flag |= p1->p_flag & P_SUGID; 581a30ec4b9SDavid Xu td2->td_pflags |= td->td_pflags & TDP_ALTSTACK; 582f591779bSSeigo Tanimura SESS_LOCK(p1->p_session); 583df8bae1dSRodney W. Grimes if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 584df8bae1dSRodney W. Grimes p2->p_flag |= P_CONTROLT; 585f591779bSSeigo Tanimura SESS_UNLOCK(p1->p_session); 5860e3eb7eeSSujal Patel if (flags & RFPPWAIT) 587df8bae1dSRodney W. Grimes p2->p_flag |= P_PPWAIT; 588be67169aSBruce Evans 5895cded904SOlivier Houchard p2->p_pgrp = p1->p_pgrp; 590b75356e1SJeffrey Hsu LIST_INSERT_AFTER(p1, p2, p_pglist); 5912a60b9b9SSeigo Tanimura PGRP_UNLOCK(p1->p_pgrp); 592b75356e1SJeffrey Hsu LIST_INIT(&p2->p_children); 593b75356e1SJeffrey Hsu 594c06eb4e2SSam Leffler callout_init(&p2->p_itcallout, CALLOUT_MPSAFE); 5954f559836SJake Burkholder 596df8bae1dSRodney W. Grimes /* 597df95311aSMatthew N. Dodd * If PF_FORK is set, the child process inherits the 598df95311aSMatthew N. Dodd * procfs ioctl flags from its parent. 599df95311aSMatthew N. Dodd */ 600df95311aSMatthew N. Dodd if (p1->p_pfsflags & PF_FORK) { 601df95311aSMatthew N. Dodd p2->p_stops = p1->p_stops; 602df95311aSMatthew N. Dodd p2->p_pfsflags = p1->p_pfsflags; 603df95311aSMatthew N. Dodd } 604df95311aSMatthew N. Dodd 605df95311aSMatthew N. Dodd /* 606df8bae1dSRodney W. Grimes * This begins the section where we must prevent the parent 607cda5aba4SDavid Schultz * from being swapped. 608df8bae1dSRodney W. Grimes */ 609cda5aba4SDavid Schultz _PHOLD(p1); 61057934cd3SJohn Baldwin PROC_UNLOCK(p1); 6110d2afceeSDavid Greenman 612df8bae1dSRodney W. Grimes /* 6133fc755c1SJohn Baldwin * Attach the new process to its parent. 6143fc755c1SJohn Baldwin * 6153fc755c1SJohn Baldwin * If RFNOWAIT is set, the newly created process becomes a child 6163fc755c1SJohn Baldwin * of init. This effectively disassociates the child from the 6173fc755c1SJohn Baldwin * parent. 6183fc755c1SJohn Baldwin */ 6193fc755c1SJohn Baldwin if (flags & RFNOWAIT) 6203fc755c1SJohn Baldwin pptr = initproc; 6213fc755c1SJohn Baldwin else 6223fc755c1SJohn Baldwin pptr = p1; 6233fc755c1SJohn Baldwin p2->p_pptr = pptr; 6243fc755c1SJohn Baldwin LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 6253fc755c1SJohn Baldwin sx_xunlock(&proctree_lock); 6263fc755c1SJohn Baldwin 627bb0e8070SJohn Baldwin /* Inform accounting that we have forked. */ 628bb0e8070SJohn Baldwin p2->p_acflag = AFORK; 629bb0e8070SJohn Baldwin PROC_UNLOCK(p2); 630bb0e8070SJohn Baldwin 6317705d4b2SDmitry Chagin #ifdef KTRACE 6327705d4b2SDmitry Chagin ktrprocfork(p1, p2); 6337705d4b2SDmitry Chagin #endif 6347705d4b2SDmitry Chagin 6353fc755c1SJohn Baldwin /* 636a2a1c95cSPeter Wemm * Finish creating the child process. It will return via a different 637a2a1c95cSPeter Wemm * execution path later. (ie: directly into user mode) 638dabee6feSPeter Wemm */ 63989b57fcfSKonstantin Belousov vm_forkproc(td, p2, td2, vm2, flags); 640df8bae1dSRodney W. Grimes 6415d22597fSHajimu UMEMOTO if (flags == (RFFDG | RFPROC)) { 642393a081dSAttilio Rao PCPU_INC(cnt.v_forks); 643393a081dSAttilio Rao PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize + 64494ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6455d22597fSHajimu UMEMOTO } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { 646393a081dSAttilio Rao PCPU_INC(cnt.v_vforks); 647393a081dSAttilio Rao PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize + 64894ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6495d22597fSHajimu UMEMOTO } else if (p1 == &proc0) { 650393a081dSAttilio Rao PCPU_INC(cnt.v_kthreads); 651393a081dSAttilio Rao PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize + 65294ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6535d22597fSHajimu UMEMOTO } else { 654393a081dSAttilio Rao PCPU_INC(cnt.v_rforks); 655393a081dSAttilio Rao PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize + 65694ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6575d22597fSHajimu UMEMOTO } 6585d22597fSHajimu UMEMOTO 659cfb5f768SJonathan Anderson #ifdef PROCDESC 660cfb5f768SJonathan Anderson /* 661cfb5f768SJonathan Anderson * Associate the process descriptor with the process before anything 662cfb5f768SJonathan Anderson * can happen that might cause that process to need the descriptor. 663cfb5f768SJonathan Anderson * However, don't do this until after fork(2) can no longer fail. 664cfb5f768SJonathan Anderson */ 665cfb5f768SJonathan Anderson if (flags & RFPROCDESC) 666cfb5f768SJonathan Anderson procdesc_new(p2, pdflags); 667cfb5f768SJonathan Anderson #endif 668cfb5f768SJonathan Anderson 669df8bae1dSRodney W. Grimes /* 670e9189611SPeter Wemm * Both processes are set up, now check if any loadable modules want 671e0d898b4SJulian Elischer * to adjust anything. 672fed06968SJulian Elischer */ 67375b8b3b2SJohn Baldwin EVENTHANDLER_INVOKE(process_fork, p1, p2, flags); 674fed06968SJulian Elischer 675fed06968SJulian Elischer /* 6764c3558aaSJohn Baldwin * Set the child start time and mark the process as being complete. 6774c3558aaSJohn Baldwin */ 6788e6fa660SJohn Baldwin PROC_LOCK(p2); 6798e6fa660SJohn Baldwin PROC_LOCK(p1); 6804c3558aaSJohn Baldwin microuptime(&p2->p_stats->p_start); 68111bda9b8SJeff Roberson PROC_SLOCK(p2); 6824c3558aaSJohn Baldwin p2->p_state = PRS_NORMAL; 68311bda9b8SJeff Roberson PROC_SUNLOCK(p2); 6846fa39a73SKonstantin Belousov 685d3555b6fSRui Paulo #ifdef KDTRACE_HOOKS 686d3555b6fSRui Paulo /* 687d3555b6fSRui Paulo * Tell the DTrace fasttrap provider about the new process 688d3555b6fSRui Paulo * if it has registered an interest. We have to do this only after 689d3555b6fSRui Paulo * p_state is PRS_NORMAL since the fasttrap module will use pfind() 690d3555b6fSRui Paulo * later on. 691d3555b6fSRui Paulo */ 6928e6fa660SJohn Baldwin if (dtrace_fasttrap_fork) 693d3555b6fSRui Paulo dtrace_fasttrap_fork(p1, p2); 694d3555b6fSRui Paulo #endif 6956fa39a73SKonstantin Belousov if ((p1->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED | 6966fa39a73SKonstantin Belousov P_FOLLOWFORK)) { 6974c3558aaSJohn Baldwin /* 6986fa39a73SKonstantin Belousov * Arrange for debugger to receive the fork event. 6996fa39a73SKonstantin Belousov * 7006fa39a73SKonstantin Belousov * We can report PL_FLAG_FORKED regardless of 7016fa39a73SKonstantin Belousov * P_FOLLOWFORK settings, but it does not make a sense 7026fa39a73SKonstantin Belousov * for runaway child. 703df8bae1dSRodney W. Grimes */ 7046fa39a73SKonstantin Belousov td->td_dbgflags |= TDB_FORK; 7056fa39a73SKonstantin Belousov td->td_dbg_forked = p2->p_pid; 7066fa39a73SKonstantin Belousov td2->td_dbgflags |= TDB_STOPATFORK; 7076fa39a73SKonstantin Belousov _PHOLD(p2); 7086fa39a73SKonstantin Belousov p2_held = 1; 7096fa39a73SKonstantin Belousov } 7108e6fa660SJohn Baldwin PROC_UNLOCK(p2); 7110384fff8SJason Evans if ((flags & RFSTOPPED) == 0) { 7126fa39a73SKonstantin Belousov /* 7136fa39a73SKonstantin Belousov * If RFSTOPPED not requested, make child runnable and 7146fa39a73SKonstantin Belousov * add to run queue. 7156fa39a73SKonstantin Belousov */ 71611bda9b8SJeff Roberson thread_lock(td2); 71771fad9fdSJulian Elischer TD_SET_CAN_RUN(td2); 718f0393f06SJeff Roberson sched_add(td2, SRQ_BORING); 71911bda9b8SJeff Roberson thread_unlock(td2); 7200384fff8SJason Evans } 721df8bae1dSRodney W. Grimes 722df8bae1dSRodney W. Grimes /* 723df8bae1dSRodney W. Grimes * Now can be swapped. 724df8bae1dSRodney W. Grimes */ 72557934cd3SJohn Baldwin _PRELE(p1); 7267054ee4eSKonstantin Belousov PROC_UNLOCK(p1); 727df8bae1dSRodney W. Grimes 728df8bae1dSRodney W. Grimes /* 72970fca427SJohn Baldwin * Tell any interested parties about the new process. 730cb679c38SJonathan Lemon */ 7317054ee4eSKonstantin Belousov knote_fork(&p1->p_klist, p2->p_pid); 7325d217f17SJohn Birrell SDT_PROBE(proc, kernel, , create, p2, p1, flags, 0, 0); 7335d217f17SJohn Birrell 734cb679c38SJonathan Lemon /* 7356fa39a73SKonstantin Belousov * Wait until debugger is attached to child. 7366fa39a73SKonstantin Belousov */ 7376fa39a73SKonstantin Belousov PROC_LOCK(p2); 7386fa39a73SKonstantin Belousov while ((td2->td_dbgflags & TDB_STOPATFORK) != 0) 7396fa39a73SKonstantin Belousov cv_wait(&p2->p_dbgwait, &p2->p_mtx); 7406fa39a73SKonstantin Belousov if (p2_held) 7416fa39a73SKonstantin Belousov _PRELE(p2); 7426fa39a73SKonstantin Belousov 7436fa39a73SKonstantin Belousov /* 744df8bae1dSRodney W. Grimes * Preserve synchronization semantics of vfork. If waiting for 745df8bae1dSRodney W. Grimes * child to exec or exit, set P_PPWAIT on child, and sleep on our 746df8bae1dSRodney W. Grimes * proc (in case of exit). 747df8bae1dSRodney W. Grimes */ 748df8bae1dSRodney W. Grimes while (p2->p_flag & P_PPWAIT) 749aeb32571SKonstantin Belousov cv_wait(&p2->p_pwait, &p2->p_mtx); 75057934cd3SJohn Baldwin PROC_UNLOCK(p2); 751afd01097SEdward Tomasz Napierala } 752afd01097SEdward Tomasz Napierala 753afd01097SEdward Tomasz Napierala int 754cfb5f768SJonathan Anderson fork1(struct thread *td, int flags, int pages, struct proc **procp, 755cfb5f768SJonathan Anderson int *procdescp, int pdflags) 756afd01097SEdward Tomasz Napierala { 757afd01097SEdward Tomasz Napierala struct proc *p1; 758afd01097SEdward Tomasz Napierala struct proc *newproc; 759afd01097SEdward Tomasz Napierala int ok; 760afd01097SEdward Tomasz Napierala struct thread *td2; 761afd01097SEdward Tomasz Napierala struct vmspace *vm2; 762afd01097SEdward Tomasz Napierala vm_ooffset_t mem_charged; 763afd01097SEdward Tomasz Napierala int error; 764afd01097SEdward Tomasz Napierala static int curfail; 765afd01097SEdward Tomasz Napierala static struct timeval lastfail; 766cfb5f768SJonathan Anderson #ifdef PROCDESC 767cfb5f768SJonathan Anderson struct file *fp_procdesc = NULL; 768cfb5f768SJonathan Anderson #endif 769afd01097SEdward Tomasz Napierala 770f49d8202SKonstantin Belousov /* Check for the undefined or unimplemented flags. */ 771f49d8202SKonstantin Belousov if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) 772f49d8202SKonstantin Belousov return (EINVAL); 773f49d8202SKonstantin Belousov 774f49d8202SKonstantin Belousov /* Signal value requires RFTSIGZMB. */ 775f49d8202SKonstantin Belousov if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) 776f49d8202SKonstantin Belousov return (EINVAL); 777f49d8202SKonstantin Belousov 778afd01097SEdward Tomasz Napierala /* Can't copy and clear. */ 779afd01097SEdward Tomasz Napierala if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 780afd01097SEdward Tomasz Napierala return (EINVAL); 781afd01097SEdward Tomasz Napierala 782f49d8202SKonstantin Belousov /* Check the validity of the signal number. */ 783f49d8202SKonstantin Belousov if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) 784f49d8202SKonstantin Belousov return (EINVAL); 785f49d8202SKonstantin Belousov 786cfb5f768SJonathan Anderson #ifdef PROCDESC 787cfb5f768SJonathan Anderson if ((flags & RFPROCDESC) != 0) { 788cfb5f768SJonathan Anderson /* Can't not create a process yet get a process descriptor. */ 789cfb5f768SJonathan Anderson if ((flags & RFPROC) == 0) 790cfb5f768SJonathan Anderson return (EINVAL); 791cfb5f768SJonathan Anderson 792cfb5f768SJonathan Anderson /* Must provide a place to put a procdesc if creating one. */ 793cfb5f768SJonathan Anderson if (procdescp == NULL) 794cfb5f768SJonathan Anderson return (EINVAL); 795cfb5f768SJonathan Anderson } 796cfb5f768SJonathan Anderson #endif 797cfb5f768SJonathan Anderson 798afd01097SEdward Tomasz Napierala p1 = td->td_proc; 799afd01097SEdward Tomasz Napierala 800afd01097SEdward Tomasz Napierala /* 801afd01097SEdward Tomasz Napierala * Here we don't create a new process, but we divorce 802afd01097SEdward Tomasz Napierala * certain parts of a process from itself. 803afd01097SEdward Tomasz Napierala */ 8043e73ff1eSEdward Tomasz Napierala if ((flags & RFPROC) == 0) { 8053e73ff1eSEdward Tomasz Napierala *procp = NULL; 8063e73ff1eSEdward Tomasz Napierala return (fork_norfproc(td, flags)); 8073e73ff1eSEdward Tomasz Napierala } 808afd01097SEdward Tomasz Napierala 809cfb5f768SJonathan Anderson #ifdef PROCDESC 810cfb5f768SJonathan Anderson /* 811cfb5f768SJonathan Anderson * If required, create a process descriptor in the parent first; we 812cfb5f768SJonathan Anderson * will abandon it if something goes wrong. We don't finit() until 813cfb5f768SJonathan Anderson * later. 814cfb5f768SJonathan Anderson */ 815cfb5f768SJonathan Anderson if (flags & RFPROCDESC) { 816cfb5f768SJonathan Anderson error = falloc(td, &fp_procdesc, procdescp, 0); 817b38520f0SEdward Tomasz Napierala if (error != 0) 818cfb5f768SJonathan Anderson return (error); 819cfb5f768SJonathan Anderson } 820cfb5f768SJonathan Anderson #endif 821cfb5f768SJonathan Anderson 822afd01097SEdward Tomasz Napierala mem_charged = 0; 823afd01097SEdward Tomasz Napierala vm2 = NULL; 824afd01097SEdward Tomasz Napierala if (pages == 0) 825afd01097SEdward Tomasz Napierala pages = KSTACK_PAGES; 826afd01097SEdward Tomasz Napierala /* Allocate new proc. */ 827afd01097SEdward Tomasz Napierala newproc = uma_zalloc(proc_zone, M_WAITOK); 828afd01097SEdward Tomasz Napierala td2 = FIRST_THREAD_IN_PROC(newproc); 829afd01097SEdward Tomasz Napierala if (td2 == NULL) { 830afd01097SEdward Tomasz Napierala td2 = thread_alloc(pages); 831afd01097SEdward Tomasz Napierala if (td2 == NULL) { 832afd01097SEdward Tomasz Napierala error = ENOMEM; 833afd01097SEdward Tomasz Napierala goto fail1; 834afd01097SEdward Tomasz Napierala } 835afd01097SEdward Tomasz Napierala proc_linkup(newproc, td2); 836afd01097SEdward Tomasz Napierala } else { 837afd01097SEdward Tomasz Napierala if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) { 838afd01097SEdward Tomasz Napierala if (td2->td_kstack != 0) 839afd01097SEdward Tomasz Napierala vm_thread_dispose(td2); 840afd01097SEdward Tomasz Napierala if (!thread_alloc_stack(td2, pages)) { 841afd01097SEdward Tomasz Napierala error = ENOMEM; 842afd01097SEdward Tomasz Napierala goto fail1; 843afd01097SEdward Tomasz Napierala } 844afd01097SEdward Tomasz Napierala } 845afd01097SEdward Tomasz Napierala } 846afd01097SEdward Tomasz Napierala 847afd01097SEdward Tomasz Napierala if ((flags & RFMEM) == 0) { 848afd01097SEdward Tomasz Napierala vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); 849afd01097SEdward Tomasz Napierala if (vm2 == NULL) { 850afd01097SEdward Tomasz Napierala error = ENOMEM; 851afd01097SEdward Tomasz Napierala goto fail1; 852afd01097SEdward Tomasz Napierala } 853afd01097SEdward Tomasz Napierala if (!swap_reserve(mem_charged)) { 854afd01097SEdward Tomasz Napierala /* 855afd01097SEdward Tomasz Napierala * The swap reservation failed. The accounting 856afd01097SEdward Tomasz Napierala * from the entries of the copied vm2 will be 857afd01097SEdward Tomasz Napierala * substracted in vmspace_free(), so force the 858afd01097SEdward Tomasz Napierala * reservation there. 859afd01097SEdward Tomasz Napierala */ 860afd01097SEdward Tomasz Napierala swap_reserve_force(mem_charged); 861afd01097SEdward Tomasz Napierala error = ENOMEM; 862afd01097SEdward Tomasz Napierala goto fail1; 863afd01097SEdward Tomasz Napierala } 864afd01097SEdward Tomasz Napierala } else 865afd01097SEdward Tomasz Napierala vm2 = NULL; 866afd01097SEdward Tomasz Napierala 867097055e2SEdward Tomasz Napierala /* 868097055e2SEdward Tomasz Napierala * XXX: This is ugly; when we copy resource usage, we need to bump 869097055e2SEdward Tomasz Napierala * per-cred resource counters. 870097055e2SEdward Tomasz Napierala */ 871097055e2SEdward Tomasz Napierala newproc->p_ucred = p1->p_ucred; 872097055e2SEdward Tomasz Napierala 873097055e2SEdward Tomasz Napierala /* 874097055e2SEdward Tomasz Napierala * Initialize resource accounting for the child process. 875097055e2SEdward Tomasz Napierala */ 876097055e2SEdward Tomasz Napierala error = racct_proc_fork(p1, newproc); 877097055e2SEdward Tomasz Napierala if (error != 0) { 878097055e2SEdward Tomasz Napierala error = EAGAIN; 879097055e2SEdward Tomasz Napierala goto fail1; 880097055e2SEdward Tomasz Napierala } 881097055e2SEdward Tomasz Napierala 8821dbf9dccSEdward Tomasz Napierala #ifdef MAC 8831dbf9dccSEdward Tomasz Napierala mac_proc_init(newproc); 8841dbf9dccSEdward Tomasz Napierala #endif 8851dbf9dccSEdward Tomasz Napierala knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx); 8861dbf9dccSEdward Tomasz Napierala STAILQ_INIT(&newproc->p_ktr); 8871dbf9dccSEdward Tomasz Napierala 888afd01097SEdward Tomasz Napierala /* We have to lock the process tree while we look for a pid. */ 889afd01097SEdward Tomasz Napierala sx_slock(&proctree_lock); 890afd01097SEdward Tomasz Napierala 891afd01097SEdward Tomasz Napierala /* 892afd01097SEdward Tomasz Napierala * Although process entries are dynamically created, we still keep 893afd01097SEdward Tomasz Napierala * a global limit on the maximum number we will create. Don't allow 894afd01097SEdward Tomasz Napierala * a nonprivileged user to use the last ten processes; don't let root 895afd01097SEdward Tomasz Napierala * exceed the limit. The variable nprocs is the current number of 896afd01097SEdward Tomasz Napierala * processes, maxproc is the limit. 897afd01097SEdward Tomasz Napierala */ 898afd01097SEdward Tomasz Napierala sx_xlock(&allproc_lock); 899afd01097SEdward Tomasz Napierala if ((nprocs >= maxproc - 10 && priv_check_cred(td->td_ucred, 900afd01097SEdward Tomasz Napierala PRIV_MAXPROC, 0) != 0) || nprocs >= maxproc) { 901afd01097SEdward Tomasz Napierala error = EAGAIN; 902afd01097SEdward Tomasz Napierala goto fail; 903afd01097SEdward Tomasz Napierala } 904afd01097SEdward Tomasz Napierala 90558c77a9dSEdward Tomasz Napierala /* 906afd01097SEdward Tomasz Napierala * Increment the count of procs running with this uid. Don't allow 907afd01097SEdward Tomasz Napierala * a nonprivileged user to exceed their current limit. 908afd01097SEdward Tomasz Napierala * 909afd01097SEdward Tomasz Napierala * XXXRW: Can we avoid privilege here if it's not needed? 910afd01097SEdward Tomasz Napierala */ 911afd01097SEdward Tomasz Napierala error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0); 912afd01097SEdward Tomasz Napierala if (error == 0) 913afd01097SEdward Tomasz Napierala ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0); 914afd01097SEdward Tomasz Napierala else { 915afd01097SEdward Tomasz Napierala PROC_LOCK(p1); 916afd01097SEdward Tomasz Napierala ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 917afd01097SEdward Tomasz Napierala lim_cur(p1, RLIMIT_NPROC)); 918afd01097SEdward Tomasz Napierala PROC_UNLOCK(p1); 919afd01097SEdward Tomasz Napierala } 920afd01097SEdward Tomasz Napierala if (ok) { 921cfb5f768SJonathan Anderson do_fork(td, flags, newproc, td2, vm2, pdflags); 922afd01097SEdward Tomasz Napierala 92349539972SJulian Elischer /* 924df8abd0bSPeter Wemm * Return child proc pointer to parent. 925df8bae1dSRodney W. Grimes */ 926afd01097SEdward Tomasz Napierala *procp = newproc; 927cfb5f768SJonathan Anderson #ifdef PROCDESC 928cfb5f768SJonathan Anderson if (flags & RFPROCDESC) 929cfb5f768SJonathan Anderson procdesc_finit(newproc->p_procdesc, fp_procdesc); 930cfb5f768SJonathan Anderson #endif 93172a401d9SEdward Tomasz Napierala racct_proc_fork_done(newproc); 932df8bae1dSRodney W. Grimes return (0); 933afd01097SEdward Tomasz Napierala } 934afd01097SEdward Tomasz Napierala 935afd01097SEdward Tomasz Napierala error = EAGAIN; 936c6544064SJohn Baldwin fail: 9375ce2f678SJohn Baldwin sx_sunlock(&proctree_lock); 938b083ea51SMike Silbersack if (ppsratecheck(&lastfail, &curfail, 1)) 939b083ea51SMike Silbersack printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n", 9402905ade2SPawel Jakub Dawidek td->td_ucred->cr_ruid); 941c6544064SJohn Baldwin sx_xunlock(&allproc_lock); 9426bea667fSRobert Watson #ifdef MAC 94330d239bcSRobert Watson mac_proc_destroy(newproc); 9446bea667fSRobert Watson #endif 94589b57fcfSKonstantin Belousov fail1: 9461dbf9dccSEdward Tomasz Napierala racct_proc_exit(newproc); 94769aa768aSKonstantin Belousov if (vm2 != NULL) 94869aa768aSKonstantin Belousov vmspace_free(vm2); 949c6544064SJohn Baldwin uma_zfree(proc_zone, newproc); 950cfb5f768SJonathan Anderson #ifdef PROCDESC 951cfb5f768SJonathan Anderson if (((flags & RFPROCDESC) != 0) && (fp_procdesc != NULL)) 952cfb5f768SJonathan Anderson fdrop(fp_procdesc, td); 953cfb5f768SJonathan Anderson #endif 95484d37a46SJohn Baldwin pause("fork", hz / 2); 955c6544064SJohn Baldwin return (error); 956df8bae1dSRodney W. Grimes } 957fed06968SJulian Elischer 958e0d898b4SJulian Elischer /* 959a7b124c3SJohn Baldwin * Handle the return of a child process from fork1(). This function 960a7b124c3SJohn Baldwin * is called from the MD fork_trampoline() entry point. 961a7b124c3SJohn Baldwin */ 962a7b124c3SJohn Baldwin void 9631d845e86SEdward Tomasz Napierala fork_exit(void (*callout)(void *, struct trapframe *), void *arg, 9641d845e86SEdward Tomasz Napierala struct trapframe *frame) 965a7b124c3SJohn Baldwin { 966696058c3SJulian Elischer struct proc *p; 96770fca427SJohn Baldwin struct thread *td; 968fe54587fSJeff Roberson struct thread *dtd; 96970fca427SJohn Baldwin 9700047b9a9SBosko Milekic td = curthread; 9710047b9a9SBosko Milekic p = td->td_proc; 9720047b9a9SBosko Milekic KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); 9730047b9a9SBosko Milekic 9746617724cSJeff Roberson CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", 975e01eafefSJulian Elischer td, td->td_sched, p->p_pid, td->td_name); 9760047b9a9SBosko Milekic 97711bda9b8SJeff Roberson sched_fork_exit(td); 978a7b124c3SJohn Baldwin /* 979fe54587fSJeff Roberson * Processes normally resume in mi_switch() after being 980fe54587fSJeff Roberson * cpu_switch()'ed to, but when children start up they arrive here 981fe54587fSJeff Roberson * instead, so we must do much the same things as mi_switch() would. 982fe54587fSJeff Roberson */ 983fe54587fSJeff Roberson if ((dtd = PCPU_GET(deadthread))) { 984fe54587fSJeff Roberson PCPU_SET(deadthread, NULL); 985fe54587fSJeff Roberson thread_stash(dtd); 986fe54587fSJeff Roberson } 987fe54587fSJeff Roberson thread_unlock(td); 988fe54587fSJeff Roberson 989fe54587fSJeff Roberson /* 990a7b124c3SJohn Baldwin * cpu_set_fork_handler intercepts this function call to 991a7b124c3SJohn Baldwin * have this call a non-return function to stay in kernel mode. 992a7b124c3SJohn Baldwin * initproc has its own fork handler, but it does return. 993a7b124c3SJohn Baldwin */ 9945813dc03SJohn Baldwin KASSERT(callout != NULL, ("NULL callout in fork_exit")); 9958865286bSJohn Baldwin callout(arg, frame); 996a7b124c3SJohn Baldwin 997a7b124c3SJohn Baldwin /* 998a7b124c3SJohn Baldwin * Check if a kernel thread misbehaved and returned from its main 999a7b124c3SJohn Baldwin * function. 1000a7b124c3SJohn Baldwin */ 1001a7b124c3SJohn Baldwin if (p->p_flag & P_KTHREAD) { 1002a7b124c3SJohn Baldwin printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", 1003e01eafefSJulian Elischer td->td_name, p->p_pid); 10043745c395SJulian Elischer kproc_exit(0); 1005a7b124c3SJohn Baldwin } 1006a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 1007993182e5SAlexander Leidinger 1008e5d81ef1SDmitry Chagin if (p->p_sysent->sv_schedtail != NULL) 1009e5d81ef1SDmitry Chagin (p->p_sysent->sv_schedtail)(td); 1010a7b124c3SJohn Baldwin } 1011a7b124c3SJohn Baldwin 1012a7b124c3SJohn Baldwin /* 1013a7b124c3SJohn Baldwin * Simplified back end of syscall(), used when returning from fork() 1014a7b124c3SJohn Baldwin * directly into user mode. Giant is not held on entry, and must not 1015a7b124c3SJohn Baldwin * be held on return. This function is passed in to fork_exit() as the 1016a7b124c3SJohn Baldwin * first parameter and is called when returning to a new userland process. 1017a7b124c3SJohn Baldwin */ 1018a7b124c3SJohn Baldwin void 10191d845e86SEdward Tomasz Napierala fork_return(struct thread *td, struct trapframe *frame) 1020a7b124c3SJohn Baldwin { 10216fa39a73SKonstantin Belousov struct proc *p, *dbg; 10226fa39a73SKonstantin Belousov 10236fa39a73SKonstantin Belousov if (td->td_dbgflags & TDB_STOPATFORK) { 10246fa39a73SKonstantin Belousov p = td->td_proc; 10256fa39a73SKonstantin Belousov sx_xlock(&proctree_lock); 10266fa39a73SKonstantin Belousov PROC_LOCK(p); 10276fa39a73SKonstantin Belousov if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) == 10286fa39a73SKonstantin Belousov (P_TRACED | P_FOLLOWFORK)) { 10296fa39a73SKonstantin Belousov /* 10306fa39a73SKonstantin Belousov * If debugger still wants auto-attach for the 10316fa39a73SKonstantin Belousov * parent's children, do it now. 10326fa39a73SKonstantin Belousov */ 10336fa39a73SKonstantin Belousov dbg = p->p_pptr->p_pptr; 10346fa39a73SKonstantin Belousov p->p_flag |= P_TRACED; 10356fa39a73SKonstantin Belousov p->p_oppid = p->p_pptr->p_pid; 10366fa39a73SKonstantin Belousov proc_reparent(p, dbg); 10376fa39a73SKonstantin Belousov sx_xunlock(&proctree_lock); 1038*db327339SKonstantin Belousov td->td_dbgflags |= TDB_CHILD; 10396fa39a73SKonstantin Belousov ptracestop(td, SIGSTOP); 1040*db327339SKonstantin Belousov td->td_dbgflags &= ~TDB_CHILD; 10416fa39a73SKonstantin Belousov } else { 10426fa39a73SKonstantin Belousov /* 10436fa39a73SKonstantin Belousov * ... otherwise clear the request. 10446fa39a73SKonstantin Belousov */ 10456fa39a73SKonstantin Belousov sx_xunlock(&proctree_lock); 10466fa39a73SKonstantin Belousov td->td_dbgflags &= ~TDB_STOPATFORK; 10476fa39a73SKonstantin Belousov cv_broadcast(&p->p_dbgwait); 10486fa39a73SKonstantin Belousov } 10496fa39a73SKonstantin Belousov PROC_UNLOCK(p); 10506fa39a73SKonstantin Belousov } 1051a7b124c3SJohn Baldwin 1052eb2da9a5SPoul-Henning Kamp userret(td, frame); 10536fa39a73SKonstantin Belousov 1054a7b124c3SJohn Baldwin #ifdef KTRACE 1055af300f23SJohn Baldwin if (KTRPOINT(td, KTR_SYSRET)) 1056af300f23SJohn Baldwin ktrsysret(SYS_fork, 0, 0); 1057a7b124c3SJohn Baldwin #endif 1058a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 1059a7b124c3SJohn Baldwin } 1060