19454b2d8SWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1869a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 35df8bae1dSRodney W. Grimes */ 36df8bae1dSRodney W. Grimes 37677b542eSDavid E. O'Brien #include <sys/cdefs.h> 38677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 39677b542eSDavid E. O'Brien 40db6a20e2SGarrett Wollman #include "opt_ktrace.h" 418a945d10SKonstantin Belousov #include "opt_kstack_pages.h" 42db6a20e2SGarrett Wollman 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45d2d3e875SBruce Evans #include <sys/sysproto.h> 4675b8b3b2SJohn Baldwin #include <sys/eventhandler.h> 47cfb5f768SJonathan Anderson #include <sys/fcntl.h> 48df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 490304c731SJamie Gritton #include <sys/jail.h> 50df8bae1dSRodney W. Grimes #include <sys/kernel.h> 5170fca427SJohn Baldwin #include <sys/kthread.h> 52c76e95c3SPeter Wemm #include <sys/sysctl.h> 5319284646SJohn Baldwin #include <sys/lock.h> 54df8bae1dSRodney W. Grimes #include <sys/malloc.h> 5535e0e5b3SJohn Baldwin #include <sys/mutex.h> 56acd3428bSRobert Watson #include <sys/priv.h> 57df8bae1dSRodney W. Grimes #include <sys/proc.h> 58cfb5f768SJonathan Anderson #include <sys/procdesc.h> 599ccba881SMatthew N. Dodd #include <sys/pioctl.h> 60189ac973SJohn Baldwin #include <sys/ptrace.h> 61097055e2SEdward Tomasz Napierala #include <sys/racct.h> 62df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 63b43179fbSJeff Roberson #include <sys/sched.h> 64a7b124c3SJohn Baldwin #include <sys/syscall.h> 6570fca427SJohn Baldwin #include <sys/vmmeter.h> 66df8bae1dSRodney W. Grimes #include <sys/vnode.h> 67df8bae1dSRodney W. Grimes #include <sys/acct.h> 680384fff8SJason Evans #include <sys/ktr.h> 69df8bae1dSRodney W. Grimes #include <sys/ktrace.h> 70b71fec07SBruce Evans #include <sys/unistd.h> 715d217f17SJohn Birrell #include <sys/sdt.h> 7257934cd3SJohn Baldwin #include <sys/sx.h> 73e5d81ef1SDmitry Chagin #include <sys/sysent.h> 746004362eSDavid Schultz #include <sys/signalvar.h> 75df8bae1dSRodney W. Grimes 76fcf7f27aSRobert Watson #include <security/audit/audit.h> 77aed55708SRobert Watson #include <security/mac/mac_framework.h> 78fcf7f27aSRobert Watson 79d93f860cSPoul-Henning Kamp #include <vm/vm.h> 80dabee6feSPeter Wemm #include <vm/pmap.h> 81dabee6feSPeter Wemm #include <vm/vm_map.h> 82efeaf95aSDavid Greenman #include <vm/vm_extern.h> 83c897b813SJeff Roberson #include <vm/uma.h> 846520495aSAdrian Chadd #include <vm/vm_domain.h> 85d93f860cSPoul-Henning Kamp 865d217f17SJohn Birrell #ifdef KDTRACE_HOOKS 875d217f17SJohn Birrell #include <sys/dtrace_bsd.h> 885d217f17SJohn Birrell dtrace_fork_func_t dtrace_fasttrap_fork; 895d217f17SJohn Birrell #endif 905d217f17SJohn Birrell 915d217f17SJohn Birrell SDT_PROVIDER_DECLARE(proc); 9236160958SMark Johnston SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int"); 9388c5ea45SJulian Elischer 94d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 95ad7507e2SSteven Wallace struct fork_args { 96ad7507e2SSteven Wallace int dummy; 97ad7507e2SSteven Wallace }; 98d2d3e875SBruce Evans #endif 99ad7507e2SSteven Wallace 100df8bae1dSRodney W. Grimes /* ARGSUSED */ 10126f9a767SRodney W. Grimes int 1028451d0ddSKip Macy sys_fork(struct thread *td, struct fork_args *uap) 103df8bae1dSRodney W. Grimes { 10433fd9b9aSMateusz Guzik struct fork_req fr; 105813361c1SMateusz Guzik int error, pid; 106be67169aSBruce Evans 10733fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 10833fd9b9aSMateusz Guzik fr.fr_flags = RFFDG | RFPROC; 109813361c1SMateusz Guzik fr.fr_pidp = &pid; 11033fd9b9aSMateusz Guzik error = fork1(td, &fr); 111df8abd0bSPeter Wemm if (error == 0) { 112813361c1SMateusz Guzik td->td_retval[0] = pid; 113b40ce416SJulian Elischer td->td_retval[1] = 0; 114df8abd0bSPeter Wemm } 11570fca427SJohn Baldwin return (error); 116df8bae1dSRodney W. Grimes } 117df8bae1dSRodney W. Grimes 118cfb5f768SJonathan Anderson /* ARGUSED */ 119cfb5f768SJonathan Anderson int 1200c829a30SMateusz Guzik sys_pdfork(struct thread *td, struct pdfork_args *uap) 121cfb5f768SJonathan Anderson { 12233fd9b9aSMateusz Guzik struct fork_req fr; 123813361c1SMateusz Guzik int error, fd, pid; 124cfb5f768SJonathan Anderson 12533fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 12633fd9b9aSMateusz Guzik fr.fr_flags = RFFDG | RFPROC | RFPROCDESC; 127813361c1SMateusz Guzik fr.fr_pidp = &pid; 12833fd9b9aSMateusz Guzik fr.fr_pd_fd = &fd; 12933fd9b9aSMateusz Guzik fr.fr_pd_flags = uap->flags; 130cfb5f768SJonathan Anderson /* 131cfb5f768SJonathan Anderson * It is necessary to return fd by reference because 0 is a valid file 132cfb5f768SJonathan Anderson * descriptor number, and the child needs to be able to distinguish 133cfb5f768SJonathan Anderson * itself from the parent using the return value. 134cfb5f768SJonathan Anderson */ 13533fd9b9aSMateusz Guzik error = fork1(td, &fr); 136cfb5f768SJonathan Anderson if (error == 0) { 137813361c1SMateusz Guzik td->td_retval[0] = pid; 138cfb5f768SJonathan Anderson td->td_retval[1] = 0; 139cfb5f768SJonathan Anderson error = copyout(&fd, uap->fdp, sizeof(fd)); 140cfb5f768SJonathan Anderson } 141cfb5f768SJonathan Anderson return (error); 142cfb5f768SJonathan Anderson } 143cfb5f768SJonathan Anderson 144df8bae1dSRodney W. Grimes /* ARGSUSED */ 14526f9a767SRodney W. Grimes int 1468451d0ddSKip Macy sys_vfork(struct thread *td, struct vfork_args *uap) 147df8bae1dSRodney W. Grimes { 14833fd9b9aSMateusz Guzik struct fork_req fr; 149813361c1SMateusz Guzik int error, pid; 150be67169aSBruce Evans 15133fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 15233fd9b9aSMateusz Guzik fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; 153813361c1SMateusz Guzik fr.fr_pidp = &pid; 15433fd9b9aSMateusz Guzik error = fork1(td, &fr); 155df8abd0bSPeter Wemm if (error == 0) { 156813361c1SMateusz Guzik td->td_retval[0] = pid; 157b40ce416SJulian Elischer td->td_retval[1] = 0; 158df8abd0bSPeter Wemm } 15970fca427SJohn Baldwin return (error); 160df8bae1dSRodney W. Grimes } 161df8bae1dSRodney W. Grimes 162dabee6feSPeter Wemm int 1638451d0ddSKip Macy sys_rfork(struct thread *td, struct rfork_args *uap) 164dabee6feSPeter Wemm { 16533fd9b9aSMateusz Guzik struct fork_req fr; 166813361c1SMateusz Guzik int error, pid; 167be67169aSBruce Evans 168c8564ad4SBruce Evans /* Don't allow kernel-only flags. */ 169885ccc61SJohn Baldwin if ((uap->flags & RFKERNELONLY) != 0) 170885ccc61SJohn Baldwin return (EINVAL); 171c8564ad4SBruce Evans 17214961ba7SRobert Watson AUDIT_ARG_FFLAGS(uap->flags); 17333fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 17433fd9b9aSMateusz Guzik fr.fr_flags = uap->flags; 175813361c1SMateusz Guzik fr.fr_pidp = &pid; 17633fd9b9aSMateusz Guzik error = fork1(td, &fr); 177df8abd0bSPeter Wemm if (error == 0) { 178813361c1SMateusz Guzik td->td_retval[0] = pid; 179b40ce416SJulian Elischer td->td_retval[1] = 0; 180df8abd0bSPeter Wemm } 18170fca427SJohn Baldwin return (error); 182dabee6feSPeter Wemm } 183dabee6feSPeter Wemm 184df8bae1dSRodney W. Grimes int nprocs = 1; /* process 0 */ 1858f7e4eb5SDag-Erling Smørgrav int lastpid = 0; 1868f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 187d941d475SRobert Watson "Last used PID"); 188df8bae1dSRodney W. Grimes 189bb6a234eSPeter Wemm /* 1908f7e4eb5SDag-Erling Smørgrav * Random component to lastpid generation. We mix in a random factor to make 191bb6a234eSPeter Wemm * it a little harder to predict. We sanity check the modulus value to avoid 192bb6a234eSPeter Wemm * doing it in critical paths. Don't let it be too small or we pointlessly 193bb6a234eSPeter Wemm * waste randomness entropy, and don't let it be impossibly large. Using a 194bb6a234eSPeter Wemm * modulus that is too big causes a LOT more process table scans and slows 195bb6a234eSPeter Wemm * down fork processing as the pidchecked caching is defeated. 196bb6a234eSPeter Wemm */ 197ee3fd601SDan Moschuk static int randompid = 0; 198bb6a234eSPeter Wemm 199bb6a234eSPeter Wemm static int 20082d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 201bb6a234eSPeter Wemm { 202bb6a234eSPeter Wemm int error, pid; 203bb6a234eSPeter Wemm 20447934cefSDon Lewis error = sysctl_wire_old_buffer(req, sizeof(int)); 20547934cefSDon Lewis if (error != 0) 20647934cefSDon Lewis return(error); 2073fc755c1SJohn Baldwin sx_xlock(&allproc_lock); 208bb6a234eSPeter Wemm pid = randompid; 209bb6a234eSPeter Wemm error = sysctl_handle_int(oidp, &pid, 0, req); 2103fc755c1SJohn Baldwin if (error == 0 && req->newptr != NULL) { 21102c6fc21SKonstantin Belousov if (pid < 0 || pid > pid_max - 100) /* out of range */ 21202c6fc21SKonstantin Belousov pid = pid_max - 100; 213bb6a234eSPeter Wemm else if (pid < 2) /* NOP */ 214bb6a234eSPeter Wemm pid = 0; 215bb6a234eSPeter Wemm else if (pid < 100) /* Make it reasonable */ 216bb6a234eSPeter Wemm pid = 100; 217bb6a234eSPeter Wemm randompid = pid; 2183fc755c1SJohn Baldwin } 2193fc755c1SJohn Baldwin sx_xunlock(&allproc_lock); 220bb6a234eSPeter Wemm return (error); 221bb6a234eSPeter Wemm } 222bb6a234eSPeter Wemm 223bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 224bb6a234eSPeter Wemm 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 225ee3fd601SDan Moschuk 2261d845e86SEdward Tomasz Napierala static int 227afd01097SEdward Tomasz Napierala fork_findpid(int flags) 228afd01097SEdward Tomasz Napierala { 229afd01097SEdward Tomasz Napierala struct proc *p; 230afd01097SEdward Tomasz Napierala int trypid; 231afd01097SEdward Tomasz Napierala static int pidchecked = 0; 232afd01097SEdward Tomasz Napierala 2333e73ff1eSEdward Tomasz Napierala /* 2343e73ff1eSEdward Tomasz Napierala * Requires allproc_lock in order to iterate over the list 2353e73ff1eSEdward Tomasz Napierala * of processes, and proctree_lock to access p_pgrp. 2363e73ff1eSEdward Tomasz Napierala */ 2373e73ff1eSEdward Tomasz Napierala sx_assert(&allproc_lock, SX_LOCKED); 2383e73ff1eSEdward Tomasz Napierala sx_assert(&proctree_lock, SX_LOCKED); 239afd01097SEdward Tomasz Napierala 240afd01097SEdward Tomasz Napierala /* 241afd01097SEdward Tomasz Napierala * Find an unused process ID. We remember a range of unused IDs 242afd01097SEdward Tomasz Napierala * ready to use (from lastpid+1 through pidchecked-1). 243afd01097SEdward Tomasz Napierala * 244afd01097SEdward Tomasz Napierala * If RFHIGHPID is set (used during system boot), do not allocate 245afd01097SEdward Tomasz Napierala * low-numbered pids. 246afd01097SEdward Tomasz Napierala */ 247afd01097SEdward Tomasz Napierala trypid = lastpid + 1; 248afd01097SEdward Tomasz Napierala if (flags & RFHIGHPID) { 249afd01097SEdward Tomasz Napierala if (trypid < 10) 250afd01097SEdward Tomasz Napierala trypid = 10; 251afd01097SEdward Tomasz Napierala } else { 252afd01097SEdward Tomasz Napierala if (randompid) 253afd01097SEdward Tomasz Napierala trypid += arc4random() % randompid; 254afd01097SEdward Tomasz Napierala } 255afd01097SEdward Tomasz Napierala retry: 256afd01097SEdward Tomasz Napierala /* 257afd01097SEdward Tomasz Napierala * If the process ID prototype has wrapped around, 258afd01097SEdward Tomasz Napierala * restart somewhat above 0, as the low-numbered procs 259afd01097SEdward Tomasz Napierala * tend to include daemons that don't exit. 260afd01097SEdward Tomasz Napierala */ 26102c6fc21SKonstantin Belousov if (trypid >= pid_max) { 26202c6fc21SKonstantin Belousov trypid = trypid % pid_max; 263afd01097SEdward Tomasz Napierala if (trypid < 100) 264afd01097SEdward Tomasz Napierala trypid += 100; 265afd01097SEdward Tomasz Napierala pidchecked = 0; 266afd01097SEdward Tomasz Napierala } 267afd01097SEdward Tomasz Napierala if (trypid >= pidchecked) { 268afd01097SEdward Tomasz Napierala int doingzomb = 0; 269afd01097SEdward Tomasz Napierala 270afd01097SEdward Tomasz Napierala pidchecked = PID_MAX; 271afd01097SEdward Tomasz Napierala /* 272afd01097SEdward Tomasz Napierala * Scan the active and zombie procs to check whether this pid 273afd01097SEdward Tomasz Napierala * is in use. Remember the lowest pid that's greater 274afd01097SEdward Tomasz Napierala * than trypid, so we can avoid checking for a while. 275237623b0SKonstantin Belousov * 276237623b0SKonstantin Belousov * Avoid reuse of the process group id, session id or 277237623b0SKonstantin Belousov * the reaper subtree id. Note that for process group 278237623b0SKonstantin Belousov * and sessions, the amount of reserved pids is 279237623b0SKonstantin Belousov * limited by process limit. For the subtree ids, the 280237623b0SKonstantin Belousov * id is kept reserved only while there is a 281237623b0SKonstantin Belousov * non-reaped process in the subtree, so amount of 282237623b0SKonstantin Belousov * reserved pids is limited by process limit times 283237623b0SKonstantin Belousov * two. 284afd01097SEdward Tomasz Napierala */ 285afd01097SEdward Tomasz Napierala p = LIST_FIRST(&allproc); 286afd01097SEdward Tomasz Napierala again: 287afd01097SEdward Tomasz Napierala for (; p != NULL; p = LIST_NEXT(p, p_list)) { 288afd01097SEdward Tomasz Napierala while (p->p_pid == trypid || 289237623b0SKonstantin Belousov p->p_reapsubtree == trypid || 290afd01097SEdward Tomasz Napierala (p->p_pgrp != NULL && 291afd01097SEdward Tomasz Napierala (p->p_pgrp->pg_id == trypid || 292afd01097SEdward Tomasz Napierala (p->p_session != NULL && 293afd01097SEdward Tomasz Napierala p->p_session->s_sid == trypid)))) { 294afd01097SEdward Tomasz Napierala trypid++; 295afd01097SEdward Tomasz Napierala if (trypid >= pidchecked) 296afd01097SEdward Tomasz Napierala goto retry; 297afd01097SEdward Tomasz Napierala } 298afd01097SEdward Tomasz Napierala if (p->p_pid > trypid && pidchecked > p->p_pid) 299afd01097SEdward Tomasz Napierala pidchecked = p->p_pid; 300afd01097SEdward Tomasz Napierala if (p->p_pgrp != NULL) { 301afd01097SEdward Tomasz Napierala if (p->p_pgrp->pg_id > trypid && 302afd01097SEdward Tomasz Napierala pidchecked > p->p_pgrp->pg_id) 303afd01097SEdward Tomasz Napierala pidchecked = p->p_pgrp->pg_id; 304afd01097SEdward Tomasz Napierala if (p->p_session != NULL && 305afd01097SEdward Tomasz Napierala p->p_session->s_sid > trypid && 306afd01097SEdward Tomasz Napierala pidchecked > p->p_session->s_sid) 307afd01097SEdward Tomasz Napierala pidchecked = p->p_session->s_sid; 308afd01097SEdward Tomasz Napierala } 309afd01097SEdward Tomasz Napierala } 310afd01097SEdward Tomasz Napierala if (!doingzomb) { 311afd01097SEdward Tomasz Napierala doingzomb = 1; 312afd01097SEdward Tomasz Napierala p = LIST_FIRST(&zombproc); 313afd01097SEdward Tomasz Napierala goto again; 314afd01097SEdward Tomasz Napierala } 315afd01097SEdward Tomasz Napierala } 316afd01097SEdward Tomasz Napierala 317afd01097SEdward Tomasz Napierala /* 318afd01097SEdward Tomasz Napierala * RFHIGHPID does not mess with the lastpid counter during boot. 319afd01097SEdward Tomasz Napierala */ 320afd01097SEdward Tomasz Napierala if (flags & RFHIGHPID) 321afd01097SEdward Tomasz Napierala pidchecked = 0; 322afd01097SEdward Tomasz Napierala else 323afd01097SEdward Tomasz Napierala lastpid = trypid; 324afd01097SEdward Tomasz Napierala 325afd01097SEdward Tomasz Napierala return (trypid); 326afd01097SEdward Tomasz Napierala } 327afd01097SEdward Tomasz Napierala 328afd01097SEdward Tomasz Napierala static int 3293e73ff1eSEdward Tomasz Napierala fork_norfproc(struct thread *td, int flags) 3301d845e86SEdward Tomasz Napierala { 3311d845e86SEdward Tomasz Napierala int error; 3321d845e86SEdward Tomasz Napierala struct proc *p1; 3331d845e86SEdward Tomasz Napierala 334087bfb0eSEdward Tomasz Napierala KASSERT((flags & RFPROC) == 0, 335087bfb0eSEdward Tomasz Napierala ("fork_norfproc called with RFPROC set")); 3361d845e86SEdward Tomasz Napierala p1 = td->td_proc; 3371d845e86SEdward Tomasz Napierala 3381d845e86SEdward Tomasz Napierala if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && 3391d845e86SEdward Tomasz Napierala (flags & (RFCFDG | RFFDG))) { 3401d845e86SEdward Tomasz Napierala PROC_LOCK(p1); 3416ddcc233SKonstantin Belousov if (thread_single(p1, SINGLE_BOUNDARY)) { 3421d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3431d845e86SEdward Tomasz Napierala return (ERESTART); 3441d845e86SEdward Tomasz Napierala } 3451d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3461d845e86SEdward Tomasz Napierala } 3471d845e86SEdward Tomasz Napierala 3481d845e86SEdward Tomasz Napierala error = vm_forkproc(td, NULL, NULL, NULL, flags); 3491d845e86SEdward Tomasz Napierala if (error) 3501d845e86SEdward Tomasz Napierala goto fail; 3511d845e86SEdward Tomasz Napierala 3521d845e86SEdward Tomasz Napierala /* 3531d845e86SEdward Tomasz Napierala * Close all file descriptors. 3541d845e86SEdward Tomasz Napierala */ 3551d845e86SEdward Tomasz Napierala if (flags & RFCFDG) { 3561d845e86SEdward Tomasz Napierala struct filedesc *fdtmp; 357eb48fbd9SMateusz Guzik fdtmp = fdinit(td->td_proc->p_fd, false); 3582609222aSPawel Jakub Dawidek fdescfree(td); 3591d845e86SEdward Tomasz Napierala p1->p_fd = fdtmp; 3601d845e86SEdward Tomasz Napierala } 3611d845e86SEdward Tomasz Napierala 3621d845e86SEdward Tomasz Napierala /* 3631d845e86SEdward Tomasz Napierala * Unshare file descriptors (from parent). 3641d845e86SEdward Tomasz Napierala */ 3651d845e86SEdward Tomasz Napierala if (flags & RFFDG) 366b9d32c36SMateusz Guzik fdunshare(td); 3671d845e86SEdward Tomasz Napierala 3681d845e86SEdward Tomasz Napierala fail: 3691d845e86SEdward Tomasz Napierala if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && 3701d845e86SEdward Tomasz Napierala (flags & (RFCFDG | RFFDG))) { 3711d845e86SEdward Tomasz Napierala PROC_LOCK(p1); 3726ddcc233SKonstantin Belousov thread_single_end(p1, SINGLE_BOUNDARY); 3731d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3741d845e86SEdward Tomasz Napierala } 3751d845e86SEdward Tomasz Napierala return (error); 3761d845e86SEdward Tomasz Napierala } 3771d845e86SEdward Tomasz Napierala 378afd01097SEdward Tomasz Napierala static void 379813361c1SMateusz Guzik do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2, 380813361c1SMateusz Guzik struct vmspace *vm2, struct file *fp_procdesc) 381df8bae1dSRodney W. Grimes { 382afd01097SEdward Tomasz Napierala struct proc *p1, *pptr; 383813361c1SMateusz Guzik int trypid; 3845641ae5dSJohn Baldwin struct filedesc *fd; 385ad05d580STor Egge struct filedesc_to_leader *fdtol; 3863fc755c1SJohn Baldwin struct sigacts *newsigacts; 3875856e12eSJohn Dyson 388afd01097SEdward Tomasz Napierala sx_assert(&proctree_lock, SX_SLOCKED); 389afd01097SEdward Tomasz Napierala sx_assert(&allproc_lock, SX_XLOCKED); 390df8bae1dSRodney W. Grimes 39170fca427SJohn Baldwin p1 = td->td_proc; 39270fca427SJohn Baldwin 393813361c1SMateusz Guzik trypid = fork_findpid(fr->fr_flags); 394df8bae1dSRodney W. Grimes 3955ce2f678SJohn Baldwin sx_sunlock(&proctree_lock); 396df8bae1dSRodney W. Grimes 397e602ba25SJulian Elischer p2->p_state = PRS_NEW; /* protect against others */ 398553629ebSJake Burkholder p2->p_pid = trypid; 39914961ba7SRobert Watson AUDIT_ARG_PID(p2->p_pid); 400553629ebSJake Burkholder LIST_INSERT_HEAD(&allproc, p2, p_list); 4016ddcc233SKonstantin Belousov allproc_gen++; 402553629ebSJake Burkholder LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 403cf7d9a8cSDavid Xu tidhash_add(td2); 4041ad9ee86SXin LI PROC_LOCK(p2); 4051ad9ee86SXin LI PROC_LOCK(p1); 4061ad9ee86SXin LI 4071005a129SJohn Baldwin sx_xunlock(&allproc_lock); 408553629ebSJake Burkholder 4091ad9ee86SXin LI bcopy(&p1->p_startcopy, &p2->p_startcopy, 4101ad9ee86SXin LI __rangeof(struct proc, p_startcopy, p_endcopy)); 4118b4a2800SKonstantin Belousov pargs_hold(p2->p_args); 4126520495aSAdrian Chadd 4131ad9ee86SXin LI PROC_UNLOCK(p1); 4141ad9ee86SXin LI 4151ad9ee86SXin LI bzero(&p2->p_startzero, 4161ad9ee86SXin LI __rangeof(struct proc, p_startzero, p_endzero)); 4171ad9ee86SXin LI 4180304c731SJamie Gritton /* Tell the prison that we exist. */ 419413628a7SBjoern A. Zeeb prison_proc_hold(p2->p_ucred->cr_prison); 420413628a7SBjoern A. Zeeb 4211ad9ee86SXin LI PROC_UNLOCK(p2); 4221ad9ee86SXin LI 4230384fff8SJason Evans /* 4243fc755c1SJohn Baldwin * Malloc things while we don't hold any locks. 4253fc755c1SJohn Baldwin */ 426813361c1SMateusz Guzik if (fr->fr_flags & RFSIGSHARE) 4273fc755c1SJohn Baldwin newsigacts = NULL; 42890af4afaSJohn Baldwin else 42990af4afaSJohn Baldwin newsigacts = sigacts_alloc(); 4303fc755c1SJohn Baldwin 4313fc755c1SJohn Baldwin /* 4323fc755c1SJohn Baldwin * Copy filedesc. 4333fc755c1SJohn Baldwin */ 434813361c1SMateusz Guzik if (fr->fr_flags & RFCFDG) { 435eb48fbd9SMateusz Guzik fd = fdinit(p1->p_fd, false); 436ad05d580STor Egge fdtol = NULL; 437813361c1SMateusz Guzik } else if (fr->fr_flags & RFFDG) { 438598b7ec8SPoul-Henning Kamp fd = fdcopy(p1->p_fd); 439ad05d580STor Egge fdtol = NULL; 440ad05d580STor Egge } else { 441c7f1c11bSAlfred Perlstein fd = fdshare(p1->p_fd); 442ad05d580STor Egge if (p1->p_fdtol == NULL) 4433e73ff1eSEdward Tomasz Napierala p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, 444ad05d580STor Egge p1->p_leader); 445813361c1SMateusz Guzik if ((fr->fr_flags & RFTHREAD) != 0) { 446ad05d580STor Egge /* 4473e73ff1eSEdward Tomasz Napierala * Shared file descriptor table, and shared 4483e73ff1eSEdward Tomasz Napierala * process leaders. 449ad05d580STor Egge */ 450ad05d580STor Egge fdtol = p1->p_fdtol; 4515e3f7694SRobert Watson FILEDESC_XLOCK(p1->p_fd); 452ad05d580STor Egge fdtol->fdl_refcount++; 4535e3f7694SRobert Watson FILEDESC_XUNLOCK(p1->p_fd); 454ad05d580STor Egge } else { 455ad05d580STor Egge /* 4563e73ff1eSEdward Tomasz Napierala * Shared file descriptor table, and different 4573e73ff1eSEdward Tomasz Napierala * process leaders. 458ad05d580STor Egge */ 459ad05d580STor Egge fdtol = filedesc_to_leader_alloc(p1->p_fdtol, 4603e73ff1eSEdward Tomasz Napierala p1->p_fd, p2); 461ad05d580STor Egge } 462ad05d580STor Egge } 4633fc755c1SJohn Baldwin /* 464df8bae1dSRodney W. Grimes * Make a proc table entry for the new process. 465df8bae1dSRodney W. Grimes * Start by zeroing the section of proc that is zero-initialized, 466df8bae1dSRodney W. Grimes * then copy the section that is copied directly from the parent. 467df8bae1dSRodney W. Grimes */ 468316ec49aSScott Long 4697d447c95SJohn Baldwin PROC_LOCK(p2); 4707d447c95SJohn Baldwin PROC_LOCK(p1); 4717d447c95SJohn Baldwin 472079b7badSJulian Elischer bzero(&td2->td_startzero, 4736db36923SDavid Schultz __rangeof(struct thread, td_startzero, td_endzero)); 474079b7badSJulian Elischer 475079b7badSJulian Elischer bcopy(&td->td_startcopy, &td2->td_startcopy, 4766db36923SDavid Schultz __rangeof(struct thread, td_startcopy, td_endcopy)); 477df8bae1dSRodney W. Grimes 4784b9322aeSJulian Elischer bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); 479a30ec4b9SDavid Xu td2->td_sigstk = td->td_sigstk; 480b61ce5b0SJeff Roberson td2->td_flags = TDF_INMEM; 481acbe332aSDavid Xu td2->td_lend_user_pri = PRI_MAX; 482a30ec4b9SDavid Xu 48321ca7b57SMarko Zec #ifdef VIMAGE 48421ca7b57SMarko Zec td2->td_vnet = NULL; 48521ca7b57SMarko Zec td2->td_vnet_lpush = NULL; 48621ca7b57SMarko Zec #endif 48721ca7b57SMarko Zec 488df8bae1dSRodney W. Grimes /* 48922d19207SJohn Baldwin * Allow the scheduler to initialize the child. 49022d19207SJohn Baldwin */ 49122d19207SJohn Baldwin thread_lock(td); 49222d19207SJohn Baldwin sched_fork(td, td2); 49322d19207SJohn Baldwin thread_unlock(td); 49422d19207SJohn Baldwin 49522d19207SJohn Baldwin /* 496df8bae1dSRodney W. Grimes * Duplicate sub-structures as needed. 497df8bae1dSRodney W. Grimes * Increase reference counts on shared objects. 498df8bae1dSRodney W. Grimes */ 499b61ce5b0SJeff Roberson p2->p_flag = P_INMEM; 500*643f6f47SKonstantin Belousov p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP); 50154b0e65fSJeff Roberson p2->p_swtick = ticks; 5029752f794SJohn Baldwin if (p1->p_flag & P_PROFIL) 5039752f794SJohn Baldwin startprofclock(p2); 504b9df5231SPoul-Henning Kamp 5056520495aSAdrian Chadd /* 5066520495aSAdrian Chadd * Whilst the proc lock is held, copy the VM domain data out 5076520495aSAdrian Chadd * using the VM domain method. 5086520495aSAdrian Chadd */ 5096520495aSAdrian Chadd vm_domain_policy_init(&p2->p_vm_dom_policy); 5106520495aSAdrian Chadd vm_domain_policy_localcopy(&p2->p_vm_dom_policy, 5116520495aSAdrian Chadd &p1->p_vm_dom_policy); 5126520495aSAdrian Chadd 513813361c1SMateusz Guzik if (fr->fr_flags & RFSIGSHARE) { 51490af4afaSJohn Baldwin p2->p_sigacts = sigacts_hold(p1->p_sigacts); 5156626c604SJulian Elischer } else { 51690af4afaSJohn Baldwin sigacts_copy(newsigacts, p1->p_sigacts); 51790af4afaSJohn Baldwin p2->p_sigacts = newsigacts; 5186626c604SJulian Elischer } 519f49d8202SKonstantin Belousov 520813361c1SMateusz Guzik if (fr->fr_flags & RFTSIGZMB) 521813361c1SMateusz Guzik p2->p_sigparent = RFTSIGNUM(fr->fr_flags); 522813361c1SMateusz Guzik else if (fr->fr_flags & RFLINUXTHPN) 5236626c604SJulian Elischer p2->p_sigparent = SIGUSR1; 5244ac9ae70SJulian Elischer else 5254ac9ae70SJulian Elischer p2->p_sigparent = SIGCHLD; 52688c5ea45SJulian Elischer 527df8bae1dSRodney W. Grimes p2->p_textvp = p1->p_textvp; 5285641ae5dSJohn Baldwin p2->p_fd = fd; 529ad05d580STor Egge p2->p_fdtol = fdtol; 530dabee6feSPeter Wemm 53155648840SJohn Baldwin if (p1->p_flag2 & P2_INHERIT_PROTECTED) { 53255648840SJohn Baldwin p2->p_flag |= P_PROTECTED; 53355648840SJohn Baldwin p2->p_flag2 |= P2_INHERIT_PROTECTED; 53455648840SJohn Baldwin } 53555648840SJohn Baldwin 536df8bae1dSRodney W. Grimes /* 537c8564ad4SBruce Evans * p_limit is copy-on-write. Bump its refcount. 538df8bae1dSRodney W. Grimes */ 5391c4bcd05SJeff Roberson lim_fork(p1, p2); 5408b059651SDavid Schultz 5414ea6a9a2SMateusz Guzik thread_cow_get_proc(td2, p2); 5424ea6a9a2SMateusz Guzik 5438b059651SDavid Schultz pstats_fork(p1->p_stats, p2->p_stats); 5448b059651SDavid Schultz 545299bc736SDavid Schultz PROC_UNLOCK(p1); 546cda5aba4SDavid Schultz PROC_UNLOCK(p2); 547df8bae1dSRodney W. Grimes 5483e73ff1eSEdward Tomasz Napierala /* Bump references to the text vnode (for procfs). */ 549a69d88afSPeter Wemm if (p2->p_textvp) 550a69d88afSPeter Wemm vref(p2->p_textvp); 551a69d88afSPeter Wemm 552c6544064SJohn Baldwin /* 553c8564ad4SBruce Evans * Set up linkage for kernel based threading. 554c6544064SJohn Baldwin */ 555813361c1SMateusz Guzik if ((fr->fr_flags & RFTHREAD) != 0) { 556c6544064SJohn Baldwin mtx_lock(&ppeers_lock); 557c6544064SJohn Baldwin p2->p_peers = p1->p_peers; 558c6544064SJohn Baldwin p1->p_peers = p2; 559c6544064SJohn Baldwin p2->p_leader = p1->p_leader; 560c6544064SJohn Baldwin mtx_unlock(&ppeers_lock); 561c6544064SJohn Baldwin PROC_LOCK(p1->p_leader); 562c6544064SJohn Baldwin if ((p1->p_leader->p_flag & P_WEXIT) != 0) { 563c6544064SJohn Baldwin PROC_UNLOCK(p1->p_leader); 564c6544064SJohn Baldwin /* 565c6544064SJohn Baldwin * The task leader is exiting, so process p1 is 566c6544064SJohn Baldwin * going to be killed shortly. Since p1 obviously 567c6544064SJohn Baldwin * isn't dead yet, we know that the leader is either 568c6544064SJohn Baldwin * sending SIGKILL's to all the processes in this 569c6544064SJohn Baldwin * task or is sleeping waiting for all the peers to 570c6544064SJohn Baldwin * exit. We let p1 complete the fork, but we need 571c6544064SJohn Baldwin * to go ahead and kill the new process p2 since 572c6544064SJohn Baldwin * the task leader may not get a chance to send 573c6544064SJohn Baldwin * SIGKILL to it. We leave it on the list so that 574c6544064SJohn Baldwin * the task leader will wait for this new process 575c6544064SJohn Baldwin * to commit suicide. 576c6544064SJohn Baldwin */ 577c6544064SJohn Baldwin PROC_LOCK(p2); 5788451d0ddSKip Macy kern_psignal(p2, SIGKILL); 579c6544064SJohn Baldwin PROC_UNLOCK(p2); 580293d2d22SRobert Watson } else 581293d2d22SRobert Watson PROC_UNLOCK(p1->p_leader); 582c6544064SJohn Baldwin } else { 583c6544064SJohn Baldwin p2->p_peers = NULL; 584c6544064SJohn Baldwin p2->p_leader = p2; 585c6544064SJohn Baldwin } 586c6544064SJohn Baldwin 5873fc755c1SJohn Baldwin sx_xlock(&proctree_lock); 5883fc755c1SJohn Baldwin PGRP_LOCK(p1->p_pgrp); 5893fc755c1SJohn Baldwin PROC_LOCK(p2); 5903fc755c1SJohn Baldwin PROC_LOCK(p1); 5913fc755c1SJohn Baldwin 59270e534e7SDavid Greenman /* 5939752f794SJohn Baldwin * Preserve some more flags in subprocess. P_PROFIL has already 594be67169aSBruce Evans * been preserved. 59570e534e7SDavid Greenman */ 596a30ec4b9SDavid Xu p2->p_flag |= p1->p_flag & P_SUGID; 597aff57357SEd Schouten td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING; 598f591779bSSeigo Tanimura SESS_LOCK(p1->p_session); 599df8bae1dSRodney W. Grimes if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 600df8bae1dSRodney W. Grimes p2->p_flag |= P_CONTROLT; 601f591779bSSeigo Tanimura SESS_UNLOCK(p1->p_session); 602813361c1SMateusz Guzik if (fr->fr_flags & RFPPWAIT) 603df8bae1dSRodney W. Grimes p2->p_flag |= P_PPWAIT; 604be67169aSBruce Evans 6055cded904SOlivier Houchard p2->p_pgrp = p1->p_pgrp; 606b75356e1SJeffrey Hsu LIST_INSERT_AFTER(p1, p2, p_pglist); 6072a60b9b9SSeigo Tanimura PGRP_UNLOCK(p1->p_pgrp); 608b75356e1SJeffrey Hsu LIST_INIT(&p2->p_children); 609dcd43281SKonstantin Belousov LIST_INIT(&p2->p_orphans); 610b75356e1SJeffrey Hsu 611f7e50ea7SKonstantin Belousov callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0); 6124f559836SJake Burkholder 613df8bae1dSRodney W. Grimes /* 614df95311aSMatthew N. Dodd * If PF_FORK is set, the child process inherits the 615df95311aSMatthew N. Dodd * procfs ioctl flags from its parent. 616df95311aSMatthew N. Dodd */ 617df95311aSMatthew N. Dodd if (p1->p_pfsflags & PF_FORK) { 618df95311aSMatthew N. Dodd p2->p_stops = p1->p_stops; 619df95311aSMatthew N. Dodd p2->p_pfsflags = p1->p_pfsflags; 620df95311aSMatthew N. Dodd } 621df95311aSMatthew N. Dodd 622df95311aSMatthew N. Dodd /* 623df8bae1dSRodney W. Grimes * This begins the section where we must prevent the parent 624cda5aba4SDavid Schultz * from being swapped. 625df8bae1dSRodney W. Grimes */ 626cda5aba4SDavid Schultz _PHOLD(p1); 62757934cd3SJohn Baldwin PROC_UNLOCK(p1); 6280d2afceeSDavid Greenman 629df8bae1dSRodney W. Grimes /* 6303fc755c1SJohn Baldwin * Attach the new process to its parent. 6313fc755c1SJohn Baldwin * 6323fc755c1SJohn Baldwin * If RFNOWAIT is set, the newly created process becomes a child 6333fc755c1SJohn Baldwin * of init. This effectively disassociates the child from the 6343fc755c1SJohn Baldwin * parent. 6353fc755c1SJohn Baldwin */ 636813361c1SMateusz Guzik if ((fr->fr_flags & RFNOWAIT) != 0) { 637237623b0SKonstantin Belousov pptr = p1->p_reaper; 638237623b0SKonstantin Belousov p2->p_reaper = pptr; 639237623b0SKonstantin Belousov } else { 640237623b0SKonstantin Belousov p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? 641237623b0SKonstantin Belousov p1 : p1->p_reaper; 6423fc755c1SJohn Baldwin pptr = p1; 643237623b0SKonstantin Belousov } 6443fc755c1SJohn Baldwin p2->p_pptr = pptr; 6453fc755c1SJohn Baldwin LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 646237623b0SKonstantin Belousov LIST_INIT(&p2->p_reaplist); 647237623b0SKonstantin Belousov LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); 648237623b0SKonstantin Belousov if (p2->p_reaper == p1) 649237623b0SKonstantin Belousov p2->p_reapsubtree = p2->p_pid; 6503fc755c1SJohn Baldwin sx_xunlock(&proctree_lock); 6513fc755c1SJohn Baldwin 652bb0e8070SJohn Baldwin /* Inform accounting that we have forked. */ 653bb0e8070SJohn Baldwin p2->p_acflag = AFORK; 654bb0e8070SJohn Baldwin PROC_UNLOCK(p2); 655bb0e8070SJohn Baldwin 6567705d4b2SDmitry Chagin #ifdef KTRACE 6577705d4b2SDmitry Chagin ktrprocfork(p1, p2); 6587705d4b2SDmitry Chagin #endif 6597705d4b2SDmitry Chagin 6603fc755c1SJohn Baldwin /* 661a2a1c95cSPeter Wemm * Finish creating the child process. It will return via a different 662a2a1c95cSPeter Wemm * execution path later. (ie: directly into user mode) 663dabee6feSPeter Wemm */ 664813361c1SMateusz Guzik vm_forkproc(td, p2, td2, vm2, fr->fr_flags); 665df8bae1dSRodney W. Grimes 666813361c1SMateusz Guzik if (fr->fr_flags == (RFFDG | RFPROC)) { 667393a081dSAttilio Rao PCPU_INC(cnt.v_forks); 668393a081dSAttilio Rao PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize + 66994ddc707SAlan Cox p2->p_vmspace->vm_ssize); 670813361c1SMateusz Guzik } else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { 671393a081dSAttilio Rao PCPU_INC(cnt.v_vforks); 672393a081dSAttilio Rao PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize + 67394ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6745d22597fSHajimu UMEMOTO } else if (p1 == &proc0) { 675393a081dSAttilio Rao PCPU_INC(cnt.v_kthreads); 676393a081dSAttilio Rao PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize + 67794ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6785d22597fSHajimu UMEMOTO } else { 679393a081dSAttilio Rao PCPU_INC(cnt.v_rforks); 680393a081dSAttilio Rao PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize + 68194ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6825d22597fSHajimu UMEMOTO } 6835d22597fSHajimu UMEMOTO 684cfb5f768SJonathan Anderson /* 685cfb5f768SJonathan Anderson * Associate the process descriptor with the process before anything 686cfb5f768SJonathan Anderson * can happen that might cause that process to need the descriptor. 687cfb5f768SJonathan Anderson * However, don't do this until after fork(2) can no longer fail. 688cfb5f768SJonathan Anderson */ 689813361c1SMateusz Guzik if (fr->fr_flags & RFPROCDESC) 690813361c1SMateusz Guzik procdesc_new(p2, fr->fr_pd_flags); 691cfb5f768SJonathan Anderson 692df8bae1dSRodney W. Grimes /* 693e9189611SPeter Wemm * Both processes are set up, now check if any loadable modules want 694e0d898b4SJulian Elischer * to adjust anything. 695fed06968SJulian Elischer */ 696813361c1SMateusz Guzik EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags); 697fed06968SJulian Elischer 698fed06968SJulian Elischer /* 6994c3558aaSJohn Baldwin * Set the child start time and mark the process as being complete. 7004c3558aaSJohn Baldwin */ 7018e6fa660SJohn Baldwin PROC_LOCK(p2); 7028e6fa660SJohn Baldwin PROC_LOCK(p1); 7034c3558aaSJohn Baldwin microuptime(&p2->p_stats->p_start); 70411bda9b8SJeff Roberson PROC_SLOCK(p2); 7054c3558aaSJohn Baldwin p2->p_state = PRS_NORMAL; 70611bda9b8SJeff Roberson PROC_SUNLOCK(p2); 7076fa39a73SKonstantin Belousov 708d3555b6fSRui Paulo #ifdef KDTRACE_HOOKS 709d3555b6fSRui Paulo /* 7107159310fSMark Johnston * Tell the DTrace fasttrap provider about the new process so that any 7117159310fSMark Johnston * tracepoints inherited from the parent can be removed. We have to do 7127159310fSMark Johnston * this only after p_state is PRS_NORMAL since the fasttrap module will 7137159310fSMark Johnston * use pfind() later on. 714d3555b6fSRui Paulo */ 715813361c1SMateusz Guzik if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork) 716d3555b6fSRui Paulo dtrace_fasttrap_fork(p1, p2); 717d3555b6fSRui Paulo #endif 718813361c1SMateusz Guzik /* 719813361c1SMateusz Guzik * Hold the process so that it cannot exit after we make it runnable, 720813361c1SMateusz Guzik * but before we wait for the debugger. 721813361c1SMateusz Guzik */ 722813361c1SMateusz Guzik _PHOLD(p2); 7238d570f64SJohn Baldwin if (p1->p_ptevents & PTRACE_FORK) { 7244c3558aaSJohn Baldwin /* 7256fa39a73SKonstantin Belousov * Arrange for debugger to receive the fork event. 7266fa39a73SKonstantin Belousov * 7276fa39a73SKonstantin Belousov * We can report PL_FLAG_FORKED regardless of 7286fa39a73SKonstantin Belousov * P_FOLLOWFORK settings, but it does not make a sense 7296fa39a73SKonstantin Belousov * for runaway child. 730df8bae1dSRodney W. Grimes */ 7316fa39a73SKonstantin Belousov td->td_dbgflags |= TDB_FORK; 7326fa39a73SKonstantin Belousov td->td_dbg_forked = p2->p_pid; 7336fa39a73SKonstantin Belousov td2->td_dbgflags |= TDB_STOPATFORK; 7346fa39a73SKonstantin Belousov } 735813361c1SMateusz Guzik if (fr->fr_flags & RFPPWAIT) { 7361d7ca9bbSKonstantin Belousov td->td_pflags |= TDP_RFPPWAIT; 7371d7ca9bbSKonstantin Belousov td->td_rfppwait_p = p2; 738fc4f075aSJohn Baldwin td->td_dbgflags |= TDB_VFORK; 7391d7ca9bbSKonstantin Belousov } 7408e6fa660SJohn Baldwin PROC_UNLOCK(p2); 741df8bae1dSRodney W. Grimes 742df8bae1dSRodney W. Grimes /* 743df8bae1dSRodney W. Grimes * Now can be swapped. 744df8bae1dSRodney W. Grimes */ 74557934cd3SJohn Baldwin _PRELE(p1); 7467054ee4eSKonstantin Belousov PROC_UNLOCK(p1); 747df8bae1dSRodney W. Grimes 748df8bae1dSRodney W. Grimes /* 74970fca427SJohn Baldwin * Tell any interested parties about the new process. 750cb679c38SJonathan Lemon */ 7519e590ff0SKonstantin Belousov knote_fork(p1->p_klist, p2->p_pid); 752813361c1SMateusz Guzik SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags); 7535d217f17SJohn Birrell 754813361c1SMateusz Guzik if (fr->fr_flags & RFPROCDESC) { 755813361c1SMateusz Guzik procdesc_finit(p2->p_procdesc, fp_procdesc); 756813361c1SMateusz Guzik fdrop(fp_procdesc, td); 757813361c1SMateusz Guzik } 758813361c1SMateusz Guzik 759813361c1SMateusz Guzik if ((fr->fr_flags & RFSTOPPED) == 0) { 760813361c1SMateusz Guzik /* 761813361c1SMateusz Guzik * If RFSTOPPED not requested, make child runnable and 762813361c1SMateusz Guzik * add to run queue. 763813361c1SMateusz Guzik */ 764813361c1SMateusz Guzik thread_lock(td2); 765813361c1SMateusz Guzik TD_SET_CAN_RUN(td2); 766813361c1SMateusz Guzik sched_add(td2, SRQ_BORING); 767813361c1SMateusz Guzik thread_unlock(td2); 768813361c1SMateusz Guzik if (fr->fr_pidp != NULL) 769813361c1SMateusz Guzik *fr->fr_pidp = p2->p_pid; 770813361c1SMateusz Guzik } else { 771813361c1SMateusz Guzik *fr->fr_procp = p2; 772813361c1SMateusz Guzik } 773813361c1SMateusz Guzik 774813361c1SMateusz Guzik PROC_LOCK(p2); 775cb679c38SJonathan Lemon /* 7766fa39a73SKonstantin Belousov * Wait until debugger is attached to child. 7776fa39a73SKonstantin Belousov */ 7784732ae43SKonstantin Belousov while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0) 7796fa39a73SKonstantin Belousov cv_wait(&p2->p_dbgwait, &p2->p_mtx); 7806fa39a73SKonstantin Belousov _PRELE(p2); 781813361c1SMateusz Guzik racct_proc_fork_done(p2); 78257934cd3SJohn Baldwin PROC_UNLOCK(p2); 783afd01097SEdward Tomasz Napierala } 784afd01097SEdward Tomasz Napierala 785afd01097SEdward Tomasz Napierala int 78633fd9b9aSMateusz Guzik fork1(struct thread *td, struct fork_req *fr) 787afd01097SEdward Tomasz Napierala { 7884b48959fSKonstantin Belousov struct proc *p1, *newproc; 789afd01097SEdward Tomasz Napierala struct thread *td2; 790afd01097SEdward Tomasz Napierala struct vmspace *vm2; 7914b48959fSKonstantin Belousov struct file *fp_procdesc; 792afd01097SEdward Tomasz Napierala vm_ooffset_t mem_charged; 7934b48959fSKonstantin Belousov int error, nprocs_new, ok; 794afd01097SEdward Tomasz Napierala static int curfail; 795afd01097SEdward Tomasz Napierala static struct timeval lastfail; 79633fd9b9aSMateusz Guzik int flags, pages; 79733fd9b9aSMateusz Guzik 79833fd9b9aSMateusz Guzik flags = fr->fr_flags; 79933fd9b9aSMateusz Guzik pages = fr->fr_pages; 800afd01097SEdward Tomasz Napierala 801813361c1SMateusz Guzik if ((flags & RFSTOPPED) != 0) 802813361c1SMateusz Guzik MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL); 803813361c1SMateusz Guzik else 804813361c1SMateusz Guzik MPASS(fr->fr_procp == NULL); 805813361c1SMateusz Guzik 806f49d8202SKonstantin Belousov /* Check for the undefined or unimplemented flags. */ 807f49d8202SKonstantin Belousov if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) 808f49d8202SKonstantin Belousov return (EINVAL); 809f49d8202SKonstantin Belousov 810f49d8202SKonstantin Belousov /* Signal value requires RFTSIGZMB. */ 811f49d8202SKonstantin Belousov if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) 812f49d8202SKonstantin Belousov return (EINVAL); 813f49d8202SKonstantin Belousov 814afd01097SEdward Tomasz Napierala /* Can't copy and clear. */ 815afd01097SEdward Tomasz Napierala if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 816afd01097SEdward Tomasz Napierala return (EINVAL); 817afd01097SEdward Tomasz Napierala 818f49d8202SKonstantin Belousov /* Check the validity of the signal number. */ 819f49d8202SKonstantin Belousov if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) 820f49d8202SKonstantin Belousov return (EINVAL); 821f49d8202SKonstantin Belousov 822cfb5f768SJonathan Anderson if ((flags & RFPROCDESC) != 0) { 823cfb5f768SJonathan Anderson /* Can't not create a process yet get a process descriptor. */ 824cfb5f768SJonathan Anderson if ((flags & RFPROC) == 0) 825cfb5f768SJonathan Anderson return (EINVAL); 826cfb5f768SJonathan Anderson 827cfb5f768SJonathan Anderson /* Must provide a place to put a procdesc if creating one. */ 82833fd9b9aSMateusz Guzik if (fr->fr_pd_fd == NULL) 829cfb5f768SJonathan Anderson return (EINVAL); 830b3a73448SMariusz Zaborski 831b3a73448SMariusz Zaborski /* Check if we are using supported flags. */ 832b3a73448SMariusz Zaborski if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0) 833b3a73448SMariusz Zaborski return (EINVAL); 834cfb5f768SJonathan Anderson } 835cfb5f768SJonathan Anderson 836afd01097SEdward Tomasz Napierala p1 = td->td_proc; 837afd01097SEdward Tomasz Napierala 838afd01097SEdward Tomasz Napierala /* 839afd01097SEdward Tomasz Napierala * Here we don't create a new process, but we divorce 840afd01097SEdward Tomasz Napierala * certain parts of a process from itself. 841afd01097SEdward Tomasz Napierala */ 8423e73ff1eSEdward Tomasz Napierala if ((flags & RFPROC) == 0) { 843813361c1SMateusz Guzik if (fr->fr_procp != NULL) 84433fd9b9aSMateusz Guzik *fr->fr_procp = NULL; 845813361c1SMateusz Guzik else if (fr->fr_pidp != NULL) 846813361c1SMateusz Guzik *fr->fr_pidp = 0; 8473e73ff1eSEdward Tomasz Napierala return (fork_norfproc(td, flags)); 8483e73ff1eSEdward Tomasz Napierala } 849afd01097SEdward Tomasz Napierala 8504b48959fSKonstantin Belousov fp_procdesc = NULL; 8514b48959fSKonstantin Belousov newproc = NULL; 8524b48959fSKonstantin Belousov vm2 = NULL; 8534b48959fSKonstantin Belousov 8544b48959fSKonstantin Belousov /* 8554b48959fSKonstantin Belousov * Increment the nprocs resource before allocations occur. 8564b48959fSKonstantin Belousov * Although process entries are dynamically created, we still 8574b48959fSKonstantin Belousov * keep a global limit on the maximum number we will 8584b48959fSKonstantin Belousov * create. There are hard-limits as to the number of processes 8594b48959fSKonstantin Belousov * that can run, established by the KVA and memory usage for 8604b48959fSKonstantin Belousov * the process data. 8614b48959fSKonstantin Belousov * 8624b48959fSKonstantin Belousov * Don't allow a nonprivileged user to use the last ten 8634b48959fSKonstantin Belousov * processes; don't let root exceed the limit. 8644b48959fSKonstantin Belousov */ 8654b48959fSKonstantin Belousov nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1; 8664b48959fSKonstantin Belousov if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred, 8674b48959fSKonstantin Belousov PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) { 8684b48959fSKonstantin Belousov error = EAGAIN; 8694b48959fSKonstantin Belousov sx_xlock(&allproc_lock); 8704b48959fSKonstantin Belousov if (ppsratecheck(&lastfail, &curfail, 1)) { 8714b48959fSKonstantin Belousov printf("maxproc limit exceeded by uid %u (pid %d); " 8724b48959fSKonstantin Belousov "see tuning(7) and login.conf(5)\n", 8734b48959fSKonstantin Belousov td->td_ucred->cr_ruid, p1->p_pid); 8744b48959fSKonstantin Belousov } 8754b48959fSKonstantin Belousov sx_xunlock(&allproc_lock); 8764b48959fSKonstantin Belousov goto fail2; 8774b48959fSKonstantin Belousov } 8784b48959fSKonstantin Belousov 879cfb5f768SJonathan Anderson /* 880cfb5f768SJonathan Anderson * If required, create a process descriptor in the parent first; we 881cfb5f768SJonathan Anderson * will abandon it if something goes wrong. We don't finit() until 882cfb5f768SJonathan Anderson * later. 883cfb5f768SJonathan Anderson */ 884cfb5f768SJonathan Anderson if (flags & RFPROCDESC) { 885b3a73448SMariusz Zaborski error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd, 886b3a73448SMariusz Zaborski fr->fr_pd_flags, fr->fr_pd_fcaps); 887b38520f0SEdward Tomasz Napierala if (error != 0) 888d8f3dc78SKonstantin Belousov goto fail2; 889cfb5f768SJonathan Anderson } 890cfb5f768SJonathan Anderson 891afd01097SEdward Tomasz Napierala mem_charged = 0; 892afd01097SEdward Tomasz Napierala if (pages == 0) 893edc82223SKonstantin Belousov pages = kstack_pages; 894afd01097SEdward Tomasz Napierala /* Allocate new proc. */ 895afd01097SEdward Tomasz Napierala newproc = uma_zalloc(proc_zone, M_WAITOK); 896afd01097SEdward Tomasz Napierala td2 = FIRST_THREAD_IN_PROC(newproc); 897afd01097SEdward Tomasz Napierala if (td2 == NULL) { 898afd01097SEdward Tomasz Napierala td2 = thread_alloc(pages); 899afd01097SEdward Tomasz Napierala if (td2 == NULL) { 900afd01097SEdward Tomasz Napierala error = ENOMEM; 90112cec311SMateusz Guzik goto fail2; 902afd01097SEdward Tomasz Napierala } 903afd01097SEdward Tomasz Napierala proc_linkup(newproc, td2); 904afd01097SEdward Tomasz Napierala } else { 905afd01097SEdward Tomasz Napierala if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) { 906afd01097SEdward Tomasz Napierala if (td2->td_kstack != 0) 907afd01097SEdward Tomasz Napierala vm_thread_dispose(td2); 908afd01097SEdward Tomasz Napierala if (!thread_alloc_stack(td2, pages)) { 909afd01097SEdward Tomasz Napierala error = ENOMEM; 91012cec311SMateusz Guzik goto fail2; 911afd01097SEdward Tomasz Napierala } 912afd01097SEdward Tomasz Napierala } 913afd01097SEdward Tomasz Napierala } 914afd01097SEdward Tomasz Napierala 915afd01097SEdward Tomasz Napierala if ((flags & RFMEM) == 0) { 916afd01097SEdward Tomasz Napierala vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); 917afd01097SEdward Tomasz Napierala if (vm2 == NULL) { 918afd01097SEdward Tomasz Napierala error = ENOMEM; 91912cec311SMateusz Guzik goto fail2; 920afd01097SEdward Tomasz Napierala } 921afd01097SEdward Tomasz Napierala if (!swap_reserve(mem_charged)) { 922afd01097SEdward Tomasz Napierala /* 923afd01097SEdward Tomasz Napierala * The swap reservation failed. The accounting 924afd01097SEdward Tomasz Napierala * from the entries of the copied vm2 will be 925e3043798SPedro F. Giffuni * subtracted in vmspace_free(), so force the 926afd01097SEdward Tomasz Napierala * reservation there. 927afd01097SEdward Tomasz Napierala */ 928afd01097SEdward Tomasz Napierala swap_reserve_force(mem_charged); 929afd01097SEdward Tomasz Napierala error = ENOMEM; 93012cec311SMateusz Guzik goto fail2; 931afd01097SEdward Tomasz Napierala } 932afd01097SEdward Tomasz Napierala } else 933afd01097SEdward Tomasz Napierala vm2 = NULL; 934afd01097SEdward Tomasz Napierala 935097055e2SEdward Tomasz Napierala /* 936097055e2SEdward Tomasz Napierala * XXX: This is ugly; when we copy resource usage, we need to bump 937097055e2SEdward Tomasz Napierala * per-cred resource counters. 938097055e2SEdward Tomasz Napierala */ 939ffb34484SMateusz Guzik proc_set_cred_init(newproc, crhold(td->td_ucred)); 940097055e2SEdward Tomasz Napierala 941097055e2SEdward Tomasz Napierala /* 942097055e2SEdward Tomasz Napierala * Initialize resource accounting for the child process. 943097055e2SEdward Tomasz Napierala */ 944097055e2SEdward Tomasz Napierala error = racct_proc_fork(p1, newproc); 945097055e2SEdward Tomasz Napierala if (error != 0) { 946097055e2SEdward Tomasz Napierala error = EAGAIN; 947097055e2SEdward Tomasz Napierala goto fail1; 948097055e2SEdward Tomasz Napierala } 949097055e2SEdward Tomasz Napierala 9501dbf9dccSEdward Tomasz Napierala #ifdef MAC 9511dbf9dccSEdward Tomasz Napierala mac_proc_init(newproc); 9521dbf9dccSEdward Tomasz Napierala #endif 9539e590ff0SKonstantin Belousov newproc->p_klist = knlist_alloc(&newproc->p_mtx); 9541dbf9dccSEdward Tomasz Napierala STAILQ_INIT(&newproc->p_ktr); 9551dbf9dccSEdward Tomasz Napierala 956afd01097SEdward Tomasz Napierala /* We have to lock the process tree while we look for a pid. */ 957afd01097SEdward Tomasz Napierala sx_slock(&proctree_lock); 958afd01097SEdward Tomasz Napierala sx_xlock(&allproc_lock); 959afd01097SEdward Tomasz Napierala 96058c77a9dSEdward Tomasz Napierala /* 961afd01097SEdward Tomasz Napierala * Increment the count of procs running with this uid. Don't allow 962afd01097SEdward Tomasz Napierala * a nonprivileged user to exceed their current limit. 963afd01097SEdward Tomasz Napierala * 964afd01097SEdward Tomasz Napierala * XXXRW: Can we avoid privilege here if it's not needed? 965afd01097SEdward Tomasz Napierala */ 966afd01097SEdward Tomasz Napierala error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0); 967afd01097SEdward Tomasz Napierala if (error == 0) 968afd01097SEdward Tomasz Napierala ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0); 969afd01097SEdward Tomasz Napierala else { 970afd01097SEdward Tomasz Napierala ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 971f6f6d240SMateusz Guzik lim_cur(td, RLIMIT_NPROC)); 972afd01097SEdward Tomasz Napierala } 973afd01097SEdward Tomasz Napierala if (ok) { 974813361c1SMateusz Guzik do_fork(td, fr, newproc, td2, vm2, fp_procdesc); 975df8bae1dSRodney W. Grimes return (0); 976afd01097SEdward Tomasz Napierala } 977afd01097SEdward Tomasz Napierala 978afd01097SEdward Tomasz Napierala error = EAGAIN; 9795ce2f678SJohn Baldwin sx_sunlock(&proctree_lock); 980c6544064SJohn Baldwin sx_xunlock(&allproc_lock); 9816bea667fSRobert Watson #ifdef MAC 98230d239bcSRobert Watson mac_proc_destroy(newproc); 9836bea667fSRobert Watson #endif 9841dbf9dccSEdward Tomasz Napierala racct_proc_exit(newproc); 985ab27d5d8SEdward Tomasz Napierala fail1: 986edf1796dSMateusz Guzik crfree(newproc->p_ucred); 987edf1796dSMateusz Guzik newproc->p_ucred = NULL; 98812cec311SMateusz Guzik fail2: 98969aa768aSKonstantin Belousov if (vm2 != NULL) 99069aa768aSKonstantin Belousov vmspace_free(vm2); 991c6544064SJohn Baldwin uma_zfree(proc_zone, newproc); 992de265498SPawel Jakub Dawidek if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) { 99333fd9b9aSMateusz Guzik fdclose(td, fp_procdesc, *fr->fr_pd_fd); 994cfb5f768SJonathan Anderson fdrop(fp_procdesc, td); 9950a7007b9SPawel Jakub Dawidek } 9964b48959fSKonstantin Belousov atomic_add_int(&nprocs, -1); 99784d37a46SJohn Baldwin pause("fork", hz / 2); 998c6544064SJohn Baldwin return (error); 999df8bae1dSRodney W. Grimes } 1000fed06968SJulian Elischer 1001e0d898b4SJulian Elischer /* 1002a7b124c3SJohn Baldwin * Handle the return of a child process from fork1(). This function 1003a7b124c3SJohn Baldwin * is called from the MD fork_trampoline() entry point. 1004a7b124c3SJohn Baldwin */ 1005a7b124c3SJohn Baldwin void 10061d845e86SEdward Tomasz Napierala fork_exit(void (*callout)(void *, struct trapframe *), void *arg, 10071d845e86SEdward Tomasz Napierala struct trapframe *frame) 1008a7b124c3SJohn Baldwin { 1009696058c3SJulian Elischer struct proc *p; 101070fca427SJohn Baldwin struct thread *td; 1011fe54587fSJeff Roberson struct thread *dtd; 101270fca427SJohn Baldwin 10130047b9a9SBosko Milekic td = curthread; 10140047b9a9SBosko Milekic p = td->td_proc; 10150047b9a9SBosko Milekic KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); 10160047b9a9SBosko Milekic 10176617724cSJeff Roberson CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", 101893ccd6bfSKonstantin Belousov td, td_get_sched(td), p->p_pid, td->td_name); 10190047b9a9SBosko Milekic 102011bda9b8SJeff Roberson sched_fork_exit(td); 1021a7b124c3SJohn Baldwin /* 1022fe54587fSJeff Roberson * Processes normally resume in mi_switch() after being 1023fe54587fSJeff Roberson * cpu_switch()'ed to, but when children start up they arrive here 1024fe54587fSJeff Roberson * instead, so we must do much the same things as mi_switch() would. 1025fe54587fSJeff Roberson */ 1026fe54587fSJeff Roberson if ((dtd = PCPU_GET(deadthread))) { 1027fe54587fSJeff Roberson PCPU_SET(deadthread, NULL); 1028fe54587fSJeff Roberson thread_stash(dtd); 1029fe54587fSJeff Roberson } 1030fe54587fSJeff Roberson thread_unlock(td); 1031fe54587fSJeff Roberson 1032fe54587fSJeff Roberson /* 10335c2cf818SKonstantin Belousov * cpu_fork_kthread_handler intercepts this function call to 1034a7b124c3SJohn Baldwin * have this call a non-return function to stay in kernel mode. 1035a7b124c3SJohn Baldwin * initproc has its own fork handler, but it does return. 1036a7b124c3SJohn Baldwin */ 10375813dc03SJohn Baldwin KASSERT(callout != NULL, ("NULL callout in fork_exit")); 10388865286bSJohn Baldwin callout(arg, frame); 1039a7b124c3SJohn Baldwin 1040a7b124c3SJohn Baldwin /* 1041a7b124c3SJohn Baldwin * Check if a kernel thread misbehaved and returned from its main 1042a7b124c3SJohn Baldwin * function. 1043a7b124c3SJohn Baldwin */ 1044db57c70aSKonstantin Belousov if (p->p_flag & P_KPROC) { 1045a7b124c3SJohn Baldwin printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", 1046e01eafefSJulian Elischer td->td_name, p->p_pid); 1047fb1f4582SJohn Baldwin kthread_exit(); 1048a7b124c3SJohn Baldwin } 1049a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 1050993182e5SAlexander Leidinger 1051e5d81ef1SDmitry Chagin if (p->p_sysent->sv_schedtail != NULL) 1052e5d81ef1SDmitry Chagin (p->p_sysent->sv_schedtail)(td); 1053aff57357SEd Schouten td->td_pflags &= ~TDP_FORKING; 1054a7b124c3SJohn Baldwin } 1055a7b124c3SJohn Baldwin 1056a7b124c3SJohn Baldwin /* 1057a7b124c3SJohn Baldwin * Simplified back end of syscall(), used when returning from fork() 1058e69ba32fSKonstantin Belousov * directly into user mode. This function is passed in to fork_exit() 1059e69ba32fSKonstantin Belousov * as the first parameter and is called when returning to a new 1060e69ba32fSKonstantin Belousov * userland process. 1061a7b124c3SJohn Baldwin */ 1062a7b124c3SJohn Baldwin void 10631d845e86SEdward Tomasz Napierala fork_return(struct thread *td, struct trapframe *frame) 1064a7b124c3SJohn Baldwin { 10656fa39a73SKonstantin Belousov struct proc *p, *dbg; 10666fa39a73SKonstantin Belousov 10676fa39a73SKonstantin Belousov p = td->td_proc; 1068189ac973SJohn Baldwin if (td->td_dbgflags & TDB_STOPATFORK) { 10696fa39a73SKonstantin Belousov sx_xlock(&proctree_lock); 10706fa39a73SKonstantin Belousov PROC_LOCK(p); 10718d570f64SJohn Baldwin if (p->p_pptr->p_ptevents & PTRACE_FORK) { 10726fa39a73SKonstantin Belousov /* 10736fa39a73SKonstantin Belousov * If debugger still wants auto-attach for the 10746fa39a73SKonstantin Belousov * parent's children, do it now. 10756fa39a73SKonstantin Belousov */ 10766fa39a73SKonstantin Belousov dbg = p->p_pptr->p_pptr; 1077e5574e09SMark Johnston proc_set_traced(p, true); 1078515b7a0bSJohn Baldwin CTR2(KTR_PTRACE, 1079515b7a0bSJohn Baldwin "fork_return: attaching to new child pid %d: oppid %d", 1080515b7a0bSJohn Baldwin p->p_pid, p->p_oppid); 10816fa39a73SKonstantin Belousov proc_reparent(p, dbg); 10826fa39a73SKonstantin Belousov sx_xunlock(&proctree_lock); 1083b7a25e63SKonstantin Belousov td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP; 10846fa39a73SKonstantin Belousov ptracestop(td, SIGSTOP); 1085189ac973SJohn Baldwin td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); 10866fa39a73SKonstantin Belousov } else { 10876fa39a73SKonstantin Belousov /* 10886fa39a73SKonstantin Belousov * ... otherwise clear the request. 10896fa39a73SKonstantin Belousov */ 10906fa39a73SKonstantin Belousov sx_xunlock(&proctree_lock); 10916fa39a73SKonstantin Belousov td->td_dbgflags &= ~TDB_STOPATFORK; 10926fa39a73SKonstantin Belousov cv_broadcast(&p->p_dbgwait); 10936fa39a73SKonstantin Belousov } 10946fa39a73SKonstantin Belousov PROC_UNLOCK(p); 10955fcfab6eSJohn Baldwin } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) { 1096189ac973SJohn Baldwin /* 1097189ac973SJohn Baldwin * This is the start of a new thread in a traced 1098189ac973SJohn Baldwin * process. Report a system call exit event. 1099189ac973SJohn Baldwin */ 1100189ac973SJohn Baldwin PROC_LOCK(p); 1101189ac973SJohn Baldwin td->td_dbgflags |= TDB_SCX; 1102189ac973SJohn Baldwin _STOPEVENT(p, S_SCX, td->td_dbg_sc_code); 11038d570f64SJohn Baldwin if ((p->p_ptevents & PTRACE_SCX) != 0 || 11045fcfab6eSJohn Baldwin (td->td_dbgflags & TDB_BORN) != 0) 1105189ac973SJohn Baldwin ptracestop(td, SIGTRAP); 11065fcfab6eSJohn Baldwin td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); 1107189ac973SJohn Baldwin PROC_UNLOCK(p); 11086fa39a73SKonstantin Belousov } 1109a7b124c3SJohn Baldwin 1110eb2da9a5SPoul-Henning Kamp userret(td, frame); 11116fa39a73SKonstantin Belousov 1112a7b124c3SJohn Baldwin #ifdef KTRACE 1113af300f23SJohn Baldwin if (KTRPOINT(td, KTR_SYSRET)) 1114af300f23SJohn Baldwin ktrsysret(SYS_fork, 0, 0); 1115a7b124c3SJohn Baldwin #endif 1116a7b124c3SJohn Baldwin } 1117