19454b2d8SWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1991, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 7df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 8df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 9df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 11df8bae1dSRodney W. Grimes * 12df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 13df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 14df8bae1dSRodney W. Grimes * are met: 15df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 17df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 18df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 19df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 2069a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes */ 36df8bae1dSRodney W. Grimes 37677b542eSDavid E. O'Brien #include <sys/cdefs.h> 38db6a20e2SGarrett Wollman #include "opt_ktrace.h" 398a945d10SKonstantin Belousov #include "opt_kstack_pages.h" 40db6a20e2SGarrett Wollman 41df8bae1dSRodney W. Grimes #include <sys/param.h> 42df8bae1dSRodney W. Grimes #include <sys/systm.h> 4334ebdceaSMateusz Guzik #include <sys/bitstring.h> 44d2d3e875SBruce Evans #include <sys/sysproto.h> 4575b8b3b2SJohn Baldwin #include <sys/eventhandler.h> 46cfb5f768SJonathan Anderson #include <sys/fcntl.h> 47df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 480304c731SJamie Gritton #include <sys/jail.h> 49df8bae1dSRodney W. Grimes #include <sys/kernel.h> 5070fca427SJohn Baldwin #include <sys/kthread.h> 51c76e95c3SPeter Wemm #include <sys/sysctl.h> 5219284646SJohn Baldwin #include <sys/lock.h> 53df8bae1dSRodney W. Grimes #include <sys/malloc.h> 545dda15adSMark Johnston #include <sys/msan.h> 5535e0e5b3SJohn Baldwin #include <sys/mutex.h> 56acd3428bSRobert Watson #include <sys/priv.h> 57df8bae1dSRodney W. Grimes #include <sys/proc.h> 58cfb5f768SJonathan Anderson #include <sys/procdesc.h> 59189ac973SJohn Baldwin #include <sys/ptrace.h> 60097055e2SEdward Tomasz Napierala #include <sys/racct.h> 61df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 62b43179fbSJeff Roberson #include <sys/sched.h> 63a7b124c3SJohn Baldwin #include <sys/syscall.h> 6470fca427SJohn Baldwin #include <sys/vmmeter.h> 65df8bae1dSRodney W. Grimes #include <sys/vnode.h> 66df8bae1dSRodney W. Grimes #include <sys/acct.h> 670384fff8SJason Evans #include <sys/ktr.h> 68df8bae1dSRodney W. Grimes #include <sys/ktrace.h> 69b71fec07SBruce Evans #include <sys/unistd.h> 705d217f17SJohn Birrell #include <sys/sdt.h> 7157934cd3SJohn Baldwin #include <sys/sx.h> 72e5d81ef1SDmitry Chagin #include <sys/sysent.h> 736004362eSDavid Schultz #include <sys/signalvar.h> 74df8bae1dSRodney W. Grimes 75fcf7f27aSRobert Watson #include <security/audit/audit.h> 76aed55708SRobert Watson #include <security/mac/mac_framework.h> 77fcf7f27aSRobert Watson 78d93f860cSPoul-Henning Kamp #include <vm/vm.h> 79dabee6feSPeter Wemm #include <vm/pmap.h> 80dabee6feSPeter Wemm #include <vm/vm_map.h> 81efeaf95aSDavid Greenman #include <vm/vm_extern.h> 82c897b813SJeff Roberson #include <vm/uma.h> 83d93f860cSPoul-Henning Kamp 845d217f17SJohn Birrell #ifdef KDTRACE_HOOKS 855d217f17SJohn Birrell #include <sys/dtrace_bsd.h> 865d217f17SJohn Birrell dtrace_fork_func_t dtrace_fasttrap_fork; 875d217f17SJohn Birrell #endif 885d217f17SJohn Birrell 895d217f17SJohn Birrell SDT_PROVIDER_DECLARE(proc); 9036160958SMark Johnston SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int"); 9188c5ea45SJulian Elischer 92d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 93ad7507e2SSteven Wallace struct fork_args { 94ad7507e2SSteven Wallace int dummy; 95ad7507e2SSteven Wallace }; 96d2d3e875SBruce Evans #endif 97ad7507e2SSteven Wallace 98df8bae1dSRodney W. Grimes /* ARGSUSED */ 9926f9a767SRodney W. Grimes int 1008451d0ddSKip Macy sys_fork(struct thread *td, struct fork_args *uap) 101df8bae1dSRodney W. Grimes { 10233fd9b9aSMateusz Guzik struct fork_req fr; 103813361c1SMateusz Guzik int error, pid; 104be67169aSBruce Evans 10533fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 10633fd9b9aSMateusz Guzik fr.fr_flags = RFFDG | RFPROC; 107813361c1SMateusz Guzik fr.fr_pidp = &pid; 10833fd9b9aSMateusz Guzik error = fork1(td, &fr); 109df8abd0bSPeter Wemm if (error == 0) { 110813361c1SMateusz Guzik td->td_retval[0] = pid; 111b40ce416SJulian Elischer td->td_retval[1] = 0; 112df8abd0bSPeter Wemm } 11370fca427SJohn Baldwin return (error); 114df8bae1dSRodney W. Grimes } 115df8bae1dSRodney W. Grimes 116cfb5f768SJonathan Anderson /* ARGUSED */ 117cfb5f768SJonathan Anderson int 1180c829a30SMateusz Guzik sys_pdfork(struct thread *td, struct pdfork_args *uap) 119cfb5f768SJonathan Anderson { 12033fd9b9aSMateusz Guzik struct fork_req fr; 121813361c1SMateusz Guzik int error, fd, pid; 122cfb5f768SJonathan Anderson 12333fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 12433fd9b9aSMateusz Guzik fr.fr_flags = RFFDG | RFPROC | RFPROCDESC; 125813361c1SMateusz Guzik fr.fr_pidp = &pid; 12633fd9b9aSMateusz Guzik fr.fr_pd_fd = &fd; 12733fd9b9aSMateusz Guzik fr.fr_pd_flags = uap->flags; 128757a5642SChristian S.J. Peron AUDIT_ARG_FFLAGS(uap->flags); 129cfb5f768SJonathan Anderson /* 130cfb5f768SJonathan Anderson * It is necessary to return fd by reference because 0 is a valid file 131cfb5f768SJonathan Anderson * descriptor number, and the child needs to be able to distinguish 132cfb5f768SJonathan Anderson * itself from the parent using the return value. 133cfb5f768SJonathan Anderson */ 13433fd9b9aSMateusz Guzik error = fork1(td, &fr); 135cfb5f768SJonathan Anderson if (error == 0) { 136813361c1SMateusz Guzik td->td_retval[0] = pid; 137cfb5f768SJonathan Anderson td->td_retval[1] = 0; 138cfb5f768SJonathan Anderson error = copyout(&fd, uap->fdp, sizeof(fd)); 139cfb5f768SJonathan Anderson } 140cfb5f768SJonathan Anderson return (error); 141cfb5f768SJonathan Anderson } 142cfb5f768SJonathan Anderson 143df8bae1dSRodney W. Grimes /* ARGSUSED */ 14426f9a767SRodney W. Grimes int 1458451d0ddSKip Macy sys_vfork(struct thread *td, struct vfork_args *uap) 146df8bae1dSRodney W. Grimes { 14733fd9b9aSMateusz Guzik struct fork_req fr; 148813361c1SMateusz Guzik int error, pid; 149be67169aSBruce Evans 15033fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 15133fd9b9aSMateusz Guzik fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; 152813361c1SMateusz Guzik fr.fr_pidp = &pid; 15333fd9b9aSMateusz Guzik error = fork1(td, &fr); 154df8abd0bSPeter Wemm if (error == 0) { 155813361c1SMateusz Guzik td->td_retval[0] = pid; 156b40ce416SJulian Elischer td->td_retval[1] = 0; 157df8abd0bSPeter Wemm } 15870fca427SJohn Baldwin return (error); 159df8bae1dSRodney W. Grimes } 160df8bae1dSRodney W. Grimes 161dabee6feSPeter Wemm int 1628451d0ddSKip Macy sys_rfork(struct thread *td, struct rfork_args *uap) 163dabee6feSPeter Wemm { 16433fd9b9aSMateusz Guzik struct fork_req fr; 165813361c1SMateusz Guzik int error, pid; 166be67169aSBruce Evans 167c8564ad4SBruce Evans /* Don't allow kernel-only flags. */ 168885ccc61SJohn Baldwin if ((uap->flags & RFKERNELONLY) != 0) 169885ccc61SJohn Baldwin return (EINVAL); 170079c5b9eSKyle Evans /* RFSPAWN must not appear with others */ 171079c5b9eSKyle Evans if ((uap->flags & RFSPAWN) != 0 && uap->flags != RFSPAWN) 172079c5b9eSKyle Evans return (EINVAL); 173c8564ad4SBruce Evans 17414961ba7SRobert Watson AUDIT_ARG_FFLAGS(uap->flags); 17533fd9b9aSMateusz Guzik bzero(&fr, sizeof(fr)); 176079c5b9eSKyle Evans if ((uap->flags & RFSPAWN) != 0) { 177079c5b9eSKyle Evans fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; 178079c5b9eSKyle Evans fr.fr_flags2 = FR2_DROPSIG_CAUGHT; 179079c5b9eSKyle Evans } else { 18033fd9b9aSMateusz Guzik fr.fr_flags = uap->flags; 181079c5b9eSKyle Evans } 182813361c1SMateusz Guzik fr.fr_pidp = &pid; 18333fd9b9aSMateusz Guzik error = fork1(td, &fr); 184df8abd0bSPeter Wemm if (error == 0) { 185813361c1SMateusz Guzik td->td_retval[0] = pid; 186b40ce416SJulian Elischer td->td_retval[1] = 0; 187df8abd0bSPeter Wemm } 18870fca427SJohn Baldwin return (error); 189dabee6feSPeter Wemm } 190dabee6feSPeter Wemm 19137d2b1f3SMateusz Guzik int __exclusive_cache_line nprocs = 1; /* process 0 */ 1928f7e4eb5SDag-Erling Smørgrav int lastpid = 0; 1938f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 194d941d475SRobert Watson "Last used PID"); 195df8bae1dSRodney W. Grimes 196bb6a234eSPeter Wemm /* 1978f7e4eb5SDag-Erling Smørgrav * Random component to lastpid generation. We mix in a random factor to make 198bb6a234eSPeter Wemm * it a little harder to predict. We sanity check the modulus value to avoid 199bb6a234eSPeter Wemm * doing it in critical paths. Don't let it be too small or we pointlessly 200bb6a234eSPeter Wemm * waste randomness entropy, and don't let it be impossibly large. Using a 201bb6a234eSPeter Wemm * modulus that is too big causes a LOT more process table scans and slows 202bb6a234eSPeter Wemm * down fork processing as the pidchecked caching is defeated. 203bb6a234eSPeter Wemm */ 204ee3fd601SDan Moschuk static int randompid = 0; 205bb6a234eSPeter Wemm 206bb6a234eSPeter Wemm static int 20782d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 208bb6a234eSPeter Wemm { 209bb6a234eSPeter Wemm int error, pid; 210bb6a234eSPeter Wemm 21147934cefSDon Lewis error = sysctl_wire_old_buffer(req, sizeof(int)); 21247934cefSDon Lewis if (error != 0) 21347934cefSDon Lewis return(error); 2143fc755c1SJohn Baldwin sx_xlock(&allproc_lock); 215bb6a234eSPeter Wemm pid = randompid; 216bb6a234eSPeter Wemm error = sysctl_handle_int(oidp, &pid, 0, req); 2173fc755c1SJohn Baldwin if (error == 0 && req->newptr != NULL) { 218008a0935SDag-Erling Smørgrav if (pid == 0) 219008a0935SDag-Erling Smørgrav randompid = 0; 220008a0935SDag-Erling Smørgrav else if (pid == 1) 221008a0935SDag-Erling Smørgrav /* generate a random PID modulus between 100 and 1123 */ 222008a0935SDag-Erling Smørgrav randompid = 100 + arc4random() % 1024; 223008a0935SDag-Erling Smørgrav else if (pid < 0 || pid > pid_max - 100) 224008a0935SDag-Erling Smørgrav /* out of range */ 225008a0935SDag-Erling Smørgrav randompid = pid_max - 100; 226008a0935SDag-Erling Smørgrav else if (pid < 100) 227008a0935SDag-Erling Smørgrav /* Make it reasonable */ 228008a0935SDag-Erling Smørgrav randompid = 100; 229008a0935SDag-Erling Smørgrav else 230bb6a234eSPeter Wemm randompid = pid; 2313fc755c1SJohn Baldwin } 2323fc755c1SJohn Baldwin sx_xunlock(&allproc_lock); 233bb6a234eSPeter Wemm return (error); 234bb6a234eSPeter Wemm } 235bb6a234eSPeter Wemm 2367029da5cSPawel Biernacki SYSCTL_PROC(_kern, OID_AUTO, randompid, 2377029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 2387029da5cSPawel Biernacki sysctl_kern_randompid, "I", 2397029da5cSPawel Biernacki "Random PID modulus. Special values: 0: disable, 1: choose random value"); 240ee3fd601SDan Moschuk 24134ebdceaSMateusz Guzik extern bitstr_t proc_id_pidmap; 24234ebdceaSMateusz Guzik extern bitstr_t proc_id_grpidmap; 24334ebdceaSMateusz Guzik extern bitstr_t proc_id_sessidmap; 24434ebdceaSMateusz Guzik extern bitstr_t proc_id_reapmap; 24534ebdceaSMateusz Guzik 24619b75ef5SMateusz Guzik /* 24719b75ef5SMateusz Guzik * Find an unused process ID 24819b75ef5SMateusz Guzik * 24919b75ef5SMateusz Guzik * If RFHIGHPID is set (used during system boot), do not allocate 25019b75ef5SMateusz Guzik * low-numbered pids. 25119b75ef5SMateusz Guzik */ 2521d845e86SEdward Tomasz Napierala static int 253afd01097SEdward Tomasz Napierala fork_findpid(int flags) 254afd01097SEdward Tomasz Napierala { 25534ebdceaSMateusz Guzik pid_t result; 25650c7615fSMateusz Guzik int trypid, random; 25750c7615fSMateusz Guzik 25850c7615fSMateusz Guzik /* 25950c7615fSMateusz Guzik * Avoid calling arc4random with procid_lock held. 26050c7615fSMateusz Guzik */ 26150c7615fSMateusz Guzik random = 0; 26250c7615fSMateusz Guzik if (__predict_false(randompid)) 26350c7615fSMateusz Guzik random = arc4random() % randompid; 26450c7615fSMateusz Guzik 26550c7615fSMateusz Guzik mtx_lock(&procid_lock); 266afd01097SEdward Tomasz Napierala 267afd01097SEdward Tomasz Napierala trypid = lastpid + 1; 268afd01097SEdward Tomasz Napierala if (flags & RFHIGHPID) { 269afd01097SEdward Tomasz Napierala if (trypid < 10) 270afd01097SEdward Tomasz Napierala trypid = 10; 271afd01097SEdward Tomasz Napierala } else { 27250c7615fSMateusz Guzik trypid += random; 273afd01097SEdward Tomasz Napierala } 274afd01097SEdward Tomasz Napierala retry: 275b05641b6SMateusz Guzik if (trypid >= pid_max) 276b05641b6SMateusz Guzik trypid = 2; 277afd01097SEdward Tomasz Napierala 27834ebdceaSMateusz Guzik bit_ffc_at(&proc_id_pidmap, trypid, pid_max, &result); 279eab2132aSMateusz Guzik if (result == -1) { 280b05641b6SMateusz Guzik KASSERT(trypid != 2, ("unexpectedly ran out of IDs")); 281b05641b6SMateusz Guzik trypid = 2; 282afd01097SEdward Tomasz Napierala goto retry; 283eab2132aSMateusz Guzik } 28434ebdceaSMateusz Guzik if (bit_test(&proc_id_grpidmap, result) || 28534ebdceaSMateusz Guzik bit_test(&proc_id_sessidmap, result) || 28634ebdceaSMateusz Guzik bit_test(&proc_id_reapmap, result)) { 28719b75ef5SMateusz Guzik trypid = result + 1; 28834ebdceaSMateusz Guzik goto retry; 289afd01097SEdward Tomasz Napierala } 290afd01097SEdward Tomasz Napierala 291afd01097SEdward Tomasz Napierala /* 292afd01097SEdward Tomasz Napierala * RFHIGHPID does not mess with the lastpid counter during boot. 293afd01097SEdward Tomasz Napierala */ 29434ebdceaSMateusz Guzik if ((flags & RFHIGHPID) == 0) 29534ebdceaSMateusz Guzik lastpid = result; 296afd01097SEdward Tomasz Napierala 29734ebdceaSMateusz Guzik bit_set(&proc_id_pidmap, result); 29834ebdceaSMateusz Guzik mtx_unlock(&procid_lock); 2991e9a1bf5SMateusz Guzik 30034ebdceaSMateusz Guzik return (result); 301afd01097SEdward Tomasz Napierala } 302afd01097SEdward Tomasz Napierala 303afd01097SEdward Tomasz Napierala static int 3043e73ff1eSEdward Tomasz Napierala fork_norfproc(struct thread *td, int flags) 3051d845e86SEdward Tomasz Napierala { 3061d845e86SEdward Tomasz Napierala struct proc *p1; 307bd76586bSKonstantin Belousov int error; 3081d845e86SEdward Tomasz Napierala 309087bfb0eSEdward Tomasz Napierala KASSERT((flags & RFPROC) == 0, 310087bfb0eSEdward Tomasz Napierala ("fork_norfproc called with RFPROC set")); 3111d845e86SEdward Tomasz Napierala p1 = td->td_proc; 3121d845e86SEdward Tomasz Napierala 3139246b309SMark Johnston /* 3149246b309SMark Johnston * Quiesce other threads if necessary. If RFMEM is not specified we 3159246b309SMark Johnston * must ensure that other threads do not concurrently create a second 3169246b309SMark Johnston * process sharing the vmspace, see vmspace_unshare(). 3179246b309SMark Johnston */ 3189246b309SMark Johnston if ((p1->p_flag & (P_HADTHREADS | P_SYSTEM)) == P_HADTHREADS && 3199246b309SMark Johnston ((flags & (RFCFDG | RFFDG)) != 0 || (flags & RFMEM) == 0)) { 3201d845e86SEdward Tomasz Napierala PROC_LOCK(p1); 3216ddcc233SKonstantin Belousov if (thread_single(p1, SINGLE_BOUNDARY)) { 3221d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3231d845e86SEdward Tomasz Napierala return (ERESTART); 3241d845e86SEdward Tomasz Napierala } 3251d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3261d845e86SEdward Tomasz Napierala } 3271d845e86SEdward Tomasz Napierala 3281d845e86SEdward Tomasz Napierala error = vm_forkproc(td, NULL, NULL, NULL, flags); 329bd76586bSKonstantin Belousov if (error != 0) 3301d845e86SEdward Tomasz Napierala goto fail; 3311d845e86SEdward Tomasz Napierala 3321d845e86SEdward Tomasz Napierala /* 3331d845e86SEdward Tomasz Napierala * Close all file descriptors. 3341d845e86SEdward Tomasz Napierala */ 335bd76586bSKonstantin Belousov if ((flags & RFCFDG) != 0) { 3361d845e86SEdward Tomasz Napierala struct filedesc *fdtmp; 33785078b85SConrad Meyer struct pwddesc *pdtmp; 338bd76586bSKonstantin Belousov 33985078b85SConrad Meyer pdtmp = pdinit(td->td_proc->p_pd, false); 340893d20c9SMateusz Guzik fdtmp = fdinit(); 34185078b85SConrad Meyer pdescfree(td); 3422609222aSPawel Jakub Dawidek fdescfree(td); 3431d845e86SEdward Tomasz Napierala p1->p_fd = fdtmp; 34485078b85SConrad Meyer p1->p_pd = pdtmp; 3451d845e86SEdward Tomasz Napierala } 3461d845e86SEdward Tomasz Napierala 3471d845e86SEdward Tomasz Napierala /* 3481d845e86SEdward Tomasz Napierala * Unshare file descriptors (from parent). 3491d845e86SEdward Tomasz Napierala */ 350bd76586bSKonstantin Belousov if ((flags & RFFDG) != 0) { 351b9d32c36SMateusz Guzik fdunshare(td); 35285078b85SConrad Meyer pdunshare(td); 35385078b85SConrad Meyer } 3541d845e86SEdward Tomasz Napierala 3551d845e86SEdward Tomasz Napierala fail: 3569246b309SMark Johnston if ((p1->p_flag & (P_HADTHREADS | P_SYSTEM)) == P_HADTHREADS && 3579246b309SMark Johnston ((flags & (RFCFDG | RFFDG)) != 0 || (flags & RFMEM) == 0)) { 3581d845e86SEdward Tomasz Napierala PROC_LOCK(p1); 3596ddcc233SKonstantin Belousov thread_single_end(p1, SINGLE_BOUNDARY); 3601d845e86SEdward Tomasz Napierala PROC_UNLOCK(p1); 3611d845e86SEdward Tomasz Napierala } 3621d845e86SEdward Tomasz Napierala return (error); 3631d845e86SEdward Tomasz Napierala } 3641d845e86SEdward Tomasz Napierala 365afd01097SEdward Tomasz Napierala static void 366813361c1SMateusz Guzik do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2, 367813361c1SMateusz Guzik struct vmspace *vm2, struct file *fp_procdesc) 368df8bae1dSRodney W. Grimes { 369afd01097SEdward Tomasz Napierala struct proc *p1, *pptr; 3705641ae5dSJohn Baldwin struct filedesc *fd; 371ad05d580STor Egge struct filedesc_to_leader *fdtol; 37285078b85SConrad Meyer struct pwddesc *pd; 3733fc755c1SJohn Baldwin struct sigacts *newsigacts; 3745856e12eSJohn Dyson 37570fca427SJohn Baldwin p1 = td->td_proc; 37670fca427SJohn Baldwin 3771ad9ee86SXin LI PROC_LOCK(p1); 3781ad9ee86SXin LI bcopy(&p1->p_startcopy, &p2->p_startcopy, 3791ad9ee86SXin LI __rangeof(struct proc, p_startcopy, p_endcopy)); 3808b4a2800SKonstantin Belousov pargs_hold(p2->p_args); 3811ad9ee86SXin LI PROC_UNLOCK(p1); 3821ad9ee86SXin LI 3831ad9ee86SXin LI bzero(&p2->p_startzero, 3841ad9ee86SXin LI __rangeof(struct proc, p_startzero, p_endzero)); 3851ad9ee86SXin LI 3860304c731SJamie Gritton /* Tell the prison that we exist. */ 387413628a7SBjoern A. Zeeb prison_proc_hold(p2->p_ucred->cr_prison); 388413628a7SBjoern A. Zeeb 38950c7615fSMateusz Guzik p2->p_state = PRS_NEW; /* protect against others */ 39050c7615fSMateusz Guzik p2->p_pid = fork_findpid(fr->fr_flags); 39150c7615fSMateusz Guzik AUDIT_ARG_PID(p2->p_pid); 39246dd801aSColin Percival TSFORK(p2->p_pid, p1->p_pid); 39350c7615fSMateusz Guzik 39450c7615fSMateusz Guzik sx_xlock(&allproc_lock); 39550c7615fSMateusz Guzik LIST_INSERT_HEAD(&allproc, p2, p_list); 39650c7615fSMateusz Guzik allproc_gen++; 3975ecb5444SMateusz Guzik prison_proc_link(p2->p_ucred->cr_prison, p2); 39850c7615fSMateusz Guzik sx_xunlock(&allproc_lock); 39950c7615fSMateusz Guzik 40050c7615fSMateusz Guzik sx_xlock(PIDHASHLOCK(p2->p_pid)); 40150c7615fSMateusz Guzik LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 40250c7615fSMateusz Guzik sx_xunlock(PIDHASHLOCK(p2->p_pid)); 4031ad9ee86SXin LI 40481d68271SMateusz Guzik tidhash_add(td2); 40581d68271SMateusz Guzik 4060384fff8SJason Evans /* 4073fc755c1SJohn Baldwin * Malloc things while we don't hold any locks. 4083fc755c1SJohn Baldwin */ 409813361c1SMateusz Guzik if (fr->fr_flags & RFSIGSHARE) 4103fc755c1SJohn Baldwin newsigacts = NULL; 41190af4afaSJohn Baldwin else 41290af4afaSJohn Baldwin newsigacts = sigacts_alloc(); 4133fc755c1SJohn Baldwin 4143fc755c1SJohn Baldwin /* 4153fc755c1SJohn Baldwin * Copy filedesc. 4163fc755c1SJohn Baldwin */ 417813361c1SMateusz Guzik if (fr->fr_flags & RFCFDG) { 41885078b85SConrad Meyer pd = pdinit(p1->p_pd, false); 419893d20c9SMateusz Guzik fd = fdinit(); 420ad05d580STor Egge fdtol = NULL; 421813361c1SMateusz Guzik } else if (fr->fr_flags & RFFDG) { 422f8f74aaaSConrad Meyer if (fr->fr_flags2 & FR2_SHARE_PATHS) 423f8f74aaaSConrad Meyer pd = pdshare(p1->p_pd); 424f8f74aaaSConrad Meyer else 42585078b85SConrad Meyer pd = pdcopy(p1->p_pd); 426598b7ec8SPoul-Henning Kamp fd = fdcopy(p1->p_fd); 427ad05d580STor Egge fdtol = NULL; 428ad05d580STor Egge } else { 429f8f74aaaSConrad Meyer if (fr->fr_flags2 & FR2_SHARE_PATHS) 430f8f74aaaSConrad Meyer pd = pdcopy(p1->p_pd); 431f8f74aaaSConrad Meyer else 43285078b85SConrad Meyer pd = pdshare(p1->p_pd); 433c7f1c11bSAlfred Perlstein fd = fdshare(p1->p_fd); 434ad05d580STor Egge if (p1->p_fdtol == NULL) 4353e73ff1eSEdward Tomasz Napierala p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, 436ad05d580STor Egge p1->p_leader); 437813361c1SMateusz Guzik if ((fr->fr_flags & RFTHREAD) != 0) { 438ad05d580STor Egge /* 4393e73ff1eSEdward Tomasz Napierala * Shared file descriptor table, and shared 4403e73ff1eSEdward Tomasz Napierala * process leaders. 441ad05d580STor Egge */ 442d07675a9SMark Johnston fdtol = filedesc_to_leader_share(p1->p_fdtol, p1->p_fd); 443ad05d580STor Egge } else { 444ad05d580STor Egge /* 4453e73ff1eSEdward Tomasz Napierala * Shared file descriptor table, and different 4463e73ff1eSEdward Tomasz Napierala * process leaders. 447ad05d580STor Egge */ 448ad05d580STor Egge fdtol = filedesc_to_leader_alloc(p1->p_fdtol, 4493e73ff1eSEdward Tomasz Napierala p1->p_fd, p2); 450ad05d580STor Egge } 451ad05d580STor Egge } 4523fc755c1SJohn Baldwin /* 453df8bae1dSRodney W. Grimes * Make a proc table entry for the new process. 454df8bae1dSRodney W. Grimes * Start by zeroing the section of proc that is zero-initialized, 455df8bae1dSRodney W. Grimes * then copy the section that is copied directly from the parent. 456df8bae1dSRodney W. Grimes */ 457316ec49aSScott Long 4587d447c95SJohn Baldwin PROC_LOCK(p2); 4597d447c95SJohn Baldwin PROC_LOCK(p1); 4607d447c95SJohn Baldwin 461079b7badSJulian Elischer bzero(&td2->td_startzero, 4626db36923SDavid Schultz __rangeof(struct thread, td_startzero, td_endzero)); 463079b7badSJulian Elischer 464079b7badSJulian Elischer bcopy(&td->td_startcopy, &td2->td_startcopy, 4656db36923SDavid Schultz __rangeof(struct thread, td_startcopy, td_endcopy)); 466df8bae1dSRodney W. Grimes 4674b9322aeSJulian Elischer bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); 468a30ec4b9SDavid Xu td2->td_sigstk = td->td_sigstk; 469b61ce5b0SJeff Roberson td2->td_flags = TDF_INMEM; 470acbe332aSDavid Xu td2->td_lend_user_pri = PRI_MAX; 471a30ec4b9SDavid Xu 47221ca7b57SMarko Zec #ifdef VIMAGE 47321ca7b57SMarko Zec td2->td_vnet = NULL; 47421ca7b57SMarko Zec td2->td_vnet_lpush = NULL; 47521ca7b57SMarko Zec #endif 47621ca7b57SMarko Zec 477df8bae1dSRodney W. Grimes /* 47822d19207SJohn Baldwin * Allow the scheduler to initialize the child. 47922d19207SJohn Baldwin */ 48022d19207SJohn Baldwin thread_lock(td); 48122d19207SJohn Baldwin sched_fork(td, td2); 482626d6992SEdward Tomasz Napierala /* 483626d6992SEdward Tomasz Napierala * Request AST to check for TDP_RFPPWAIT. Do it here 484626d6992SEdward Tomasz Napierala * to avoid calling thread_lock() again. 485626d6992SEdward Tomasz Napierala */ 486626d6992SEdward Tomasz Napierala if ((fr->fr_flags & RFPPWAIT) != 0) 487c6d31b83SKonstantin Belousov ast_sched_locked(td, TDA_VFORK); 48822d19207SJohn Baldwin thread_unlock(td); 48922d19207SJohn Baldwin 49022d19207SJohn Baldwin /* 491df8bae1dSRodney W. Grimes * Duplicate sub-structures as needed. 492df8bae1dSRodney W. Grimes * Increase reference counts on shared objects. 493df8bae1dSRodney W. Grimes */ 494b61ce5b0SJeff Roberson p2->p_flag = P_INMEM; 495fa50a355SKonstantin Belousov p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE | 4962ffee5c1SMark Johnston P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | 497fe69291fSKonstantin Belousov P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE | P2_TRAPCAP | 498796a8e1aSKonstantin Belousov P2_STKGAP_DISABLE | P2_STKGAP_DISABLE_EXEC | P2_NO_NEW_PRIVS | 499796a8e1aSKonstantin Belousov P2_WXORX_DISABLE | P2_WXORX_ENABLE_EXEC); 50054b0e65fSJeff Roberson p2->p_swtick = ticks; 5019752f794SJohn Baldwin if (p1->p_flag & P_PROFIL) 5029752f794SJohn Baldwin startprofclock(p2); 503b9df5231SPoul-Henning Kamp 504813361c1SMateusz Guzik if (fr->fr_flags & RFSIGSHARE) { 50590af4afaSJohn Baldwin p2->p_sigacts = sigacts_hold(p1->p_sigacts); 5066626c604SJulian Elischer } else { 50790af4afaSJohn Baldwin sigacts_copy(newsigacts, p1->p_sigacts); 50890af4afaSJohn Baldwin p2->p_sigacts = newsigacts; 509640d5404SJohn Baldwin if ((fr->fr_flags2 & (FR2_DROPSIG_CAUGHT | FR2_KPROC)) != 0) { 510079c5b9eSKyle Evans mtx_lock(&p2->p_sigacts->ps_mtx); 511640d5404SJohn Baldwin if ((fr->fr_flags2 & FR2_DROPSIG_CAUGHT) != 0) 512079c5b9eSKyle Evans sig_drop_caught(p2); 513640d5404SJohn Baldwin if ((fr->fr_flags2 & FR2_KPROC) != 0) 514640d5404SJohn Baldwin p2->p_sigacts->ps_flag |= PS_NOCLDWAIT; 515079c5b9eSKyle Evans mtx_unlock(&p2->p_sigacts->ps_mtx); 516079c5b9eSKyle Evans } 5176626c604SJulian Elischer } 518f49d8202SKonstantin Belousov 519813361c1SMateusz Guzik if (fr->fr_flags & RFTSIGZMB) 520813361c1SMateusz Guzik p2->p_sigparent = RFTSIGNUM(fr->fr_flags); 521813361c1SMateusz Guzik else if (fr->fr_flags & RFLINUXTHPN) 5226626c604SJulian Elischer p2->p_sigparent = SIGUSR1; 5234ac9ae70SJulian Elischer else 5244ac9ae70SJulian Elischer p2->p_sigparent = SIGCHLD; 52588c5ea45SJulian Elischer 526640d5404SJohn Baldwin if ((fr->fr_flags2 & FR2_KPROC) != 0) { 527640d5404SJohn Baldwin p2->p_flag |= P_SYSTEM | P_KPROC; 528640d5404SJohn Baldwin td2->td_pflags |= TDP_KTHREAD; 529640d5404SJohn Baldwin } 530640d5404SJohn Baldwin 531df8bae1dSRodney W. Grimes p2->p_textvp = p1->p_textvp; 532351d5f7fSKonstantin Belousov p2->p_textdvp = p1->p_textdvp; 5335641ae5dSJohn Baldwin p2->p_fd = fd; 534ad05d580STor Egge p2->p_fdtol = fdtol; 53585078b85SConrad Meyer p2->p_pd = pd; 536dabee6feSPeter Wemm 53755648840SJohn Baldwin if (p1->p_flag2 & P2_INHERIT_PROTECTED) { 53855648840SJohn Baldwin p2->p_flag |= P_PROTECTED; 53955648840SJohn Baldwin p2->p_flag2 |= P2_INHERIT_PROTECTED; 54055648840SJohn Baldwin } 54155648840SJohn Baldwin 542df8bae1dSRodney W. Grimes /* 543c8564ad4SBruce Evans * p_limit is copy-on-write. Bump its refcount. 544df8bae1dSRodney W. Grimes */ 5451c4bcd05SJeff Roberson lim_fork(p1, p2); 5468b059651SDavid Schultz 5474ea6a9a2SMateusz Guzik thread_cow_get_proc(td2, p2); 5484ea6a9a2SMateusz Guzik 5498b059651SDavid Schultz pstats_fork(p1->p_stats, p2->p_stats); 5508b059651SDavid Schultz 551299bc736SDavid Schultz PROC_UNLOCK(p1); 552cda5aba4SDavid Schultz PROC_UNLOCK(p2); 553df8bae1dSRodney W. Grimes 554351d5f7fSKonstantin Belousov /* 555351d5f7fSKonstantin Belousov * Bump references to the text vnode and directory, and copy 556351d5f7fSKonstantin Belousov * the hardlink name. 557351d5f7fSKonstantin Belousov */ 558351d5f7fSKonstantin Belousov if (p2->p_textvp != NULL) 5595afb134cSMateusz Guzik vrefact(p2->p_textvp); 560351d5f7fSKonstantin Belousov if (p2->p_textdvp != NULL) 561351d5f7fSKonstantin Belousov vrefact(p2->p_textdvp); 562351d5f7fSKonstantin Belousov p2->p_binname = p1->p_binname == NULL ? NULL : 563351d5f7fSKonstantin Belousov strdup(p1->p_binname, M_PARGS); 564a69d88afSPeter Wemm 565c6544064SJohn Baldwin /* 566c8564ad4SBruce Evans * Set up linkage for kernel based threading. 567c6544064SJohn Baldwin */ 568813361c1SMateusz Guzik if ((fr->fr_flags & RFTHREAD) != 0) { 569c6544064SJohn Baldwin mtx_lock(&ppeers_lock); 570c6544064SJohn Baldwin p2->p_peers = p1->p_peers; 571c6544064SJohn Baldwin p1->p_peers = p2; 572c6544064SJohn Baldwin p2->p_leader = p1->p_leader; 573c6544064SJohn Baldwin mtx_unlock(&ppeers_lock); 574c6544064SJohn Baldwin PROC_LOCK(p1->p_leader); 575c6544064SJohn Baldwin if ((p1->p_leader->p_flag & P_WEXIT) != 0) { 576c6544064SJohn Baldwin PROC_UNLOCK(p1->p_leader); 577c6544064SJohn Baldwin /* 578c6544064SJohn Baldwin * The task leader is exiting, so process p1 is 579c6544064SJohn Baldwin * going to be killed shortly. Since p1 obviously 580c6544064SJohn Baldwin * isn't dead yet, we know that the leader is either 581c6544064SJohn Baldwin * sending SIGKILL's to all the processes in this 582c6544064SJohn Baldwin * task or is sleeping waiting for all the peers to 583c6544064SJohn Baldwin * exit. We let p1 complete the fork, but we need 584c6544064SJohn Baldwin * to go ahead and kill the new process p2 since 585c6544064SJohn Baldwin * the task leader may not get a chance to send 586c6544064SJohn Baldwin * SIGKILL to it. We leave it on the list so that 587c6544064SJohn Baldwin * the task leader will wait for this new process 588c6544064SJohn Baldwin * to commit suicide. 589c6544064SJohn Baldwin */ 590c6544064SJohn Baldwin PROC_LOCK(p2); 5918451d0ddSKip Macy kern_psignal(p2, SIGKILL); 592c6544064SJohn Baldwin PROC_UNLOCK(p2); 593293d2d22SRobert Watson } else 594293d2d22SRobert Watson PROC_UNLOCK(p1->p_leader); 595c6544064SJohn Baldwin } else { 596c6544064SJohn Baldwin p2->p_peers = NULL; 597c6544064SJohn Baldwin p2->p_leader = p2; 598c6544064SJohn Baldwin } 599c6544064SJohn Baldwin 6003fc755c1SJohn Baldwin sx_xlock(&proctree_lock); 6013fc755c1SJohn Baldwin PGRP_LOCK(p1->p_pgrp); 6023fc755c1SJohn Baldwin PROC_LOCK(p2); 6033fc755c1SJohn Baldwin PROC_LOCK(p1); 6043fc755c1SJohn Baldwin 60570e534e7SDavid Greenman /* 6069752f794SJohn Baldwin * Preserve some more flags in subprocess. P_PROFIL has already 607be67169aSBruce Evans * been preserved. 60870e534e7SDavid Greenman */ 609a30ec4b9SDavid Xu p2->p_flag |= p1->p_flag & P_SUGID; 610a0558fe9SMateusz Guzik td2->td_pflags |= (td->td_pflags & (TDP_ALTSTACK | TDP_SIGFASTBLOCK)); 611f591779bSSeigo Tanimura SESS_LOCK(p1->p_session); 612df8bae1dSRodney W. Grimes if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 613df8bae1dSRodney W. Grimes p2->p_flag |= P_CONTROLT; 614f591779bSSeigo Tanimura SESS_UNLOCK(p1->p_session); 615813361c1SMateusz Guzik if (fr->fr_flags & RFPPWAIT) 616df8bae1dSRodney W. Grimes p2->p_flag |= P_PPWAIT; 617be67169aSBruce Evans 6185cded904SOlivier Houchard p2->p_pgrp = p1->p_pgrp; 619b75356e1SJeffrey Hsu LIST_INSERT_AFTER(p1, p2, p_pglist); 6202a60b9b9SSeigo Tanimura PGRP_UNLOCK(p1->p_pgrp); 621b75356e1SJeffrey Hsu LIST_INIT(&p2->p_children); 622dcd43281SKonstantin Belousov LIST_INIT(&p2->p_orphans); 623b75356e1SJeffrey Hsu 624f7e50ea7SKonstantin Belousov callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0); 6254f559836SJake Burkholder 626df8bae1dSRodney W. Grimes /* 627df8bae1dSRodney W. Grimes * This begins the section where we must prevent the parent 628cda5aba4SDavid Schultz * from being swapped. 629df8bae1dSRodney W. Grimes */ 630cda5aba4SDavid Schultz _PHOLD(p1); 63157934cd3SJohn Baldwin PROC_UNLOCK(p1); 6320d2afceeSDavid Greenman 633df8bae1dSRodney W. Grimes /* 6343fc755c1SJohn Baldwin * Attach the new process to its parent. 6353fc755c1SJohn Baldwin * 6363fc755c1SJohn Baldwin * If RFNOWAIT is set, the newly created process becomes a child 6373fc755c1SJohn Baldwin * of init. This effectively disassociates the child from the 6383fc755c1SJohn Baldwin * parent. 6393fc755c1SJohn Baldwin */ 640813361c1SMateusz Guzik if ((fr->fr_flags & RFNOWAIT) != 0) { 641237623b0SKonstantin Belousov pptr = p1->p_reaper; 642237623b0SKonstantin Belousov p2->p_reaper = pptr; 643237623b0SKonstantin Belousov } else { 644237623b0SKonstantin Belousov p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? 645237623b0SKonstantin Belousov p1 : p1->p_reaper; 6463fc755c1SJohn Baldwin pptr = p1; 647237623b0SKonstantin Belousov } 6483fc755c1SJohn Baldwin p2->p_pptr = pptr; 6492c054ce9SMateusz Guzik p2->p_oppid = pptr->p_pid; 6503fc755c1SJohn Baldwin LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 651237623b0SKonstantin Belousov LIST_INIT(&p2->p_reaplist); 652237623b0SKonstantin Belousov LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); 65334ebdceaSMateusz Guzik if (p2->p_reaper == p1 && p1 != initproc) { 654237623b0SKonstantin Belousov p2->p_reapsubtree = p2->p_pid; 65534ebdceaSMateusz Guzik proc_id_set_cond(PROC_ID_REAP, p2->p_pid); 65634ebdceaSMateusz Guzik } 6573fc755c1SJohn Baldwin sx_xunlock(&proctree_lock); 6583fc755c1SJohn Baldwin 659bb0e8070SJohn Baldwin /* Inform accounting that we have forked. */ 660bb0e8070SJohn Baldwin p2->p_acflag = AFORK; 661bb0e8070SJohn Baldwin PROC_UNLOCK(p2); 662bb0e8070SJohn Baldwin 6637705d4b2SDmitry Chagin #ifdef KTRACE 6647705d4b2SDmitry Chagin ktrprocfork(p1, p2); 6657705d4b2SDmitry Chagin #endif 6667705d4b2SDmitry Chagin 6673fc755c1SJohn Baldwin /* 668a2a1c95cSPeter Wemm * Finish creating the child process. It will return via a different 669a2a1c95cSPeter Wemm * execution path later. (ie: directly into user mode) 670dabee6feSPeter Wemm */ 671813361c1SMateusz Guzik vm_forkproc(td, p2, td2, vm2, fr->fr_flags); 672df8bae1dSRodney W. Grimes 673813361c1SMateusz Guzik if (fr->fr_flags == (RFFDG | RFPROC)) { 67483c9dea1SGleb Smirnoff VM_CNT_INC(v_forks); 67583c9dea1SGleb Smirnoff VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize + 67694ddc707SAlan Cox p2->p_vmspace->vm_ssize); 677813361c1SMateusz Guzik } else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { 67883c9dea1SGleb Smirnoff VM_CNT_INC(v_vforks); 67983c9dea1SGleb Smirnoff VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize + 68094ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6815d22597fSHajimu UMEMOTO } else if (p1 == &proc0) { 68283c9dea1SGleb Smirnoff VM_CNT_INC(v_kthreads); 68383c9dea1SGleb Smirnoff VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize + 68494ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6855d22597fSHajimu UMEMOTO } else { 68683c9dea1SGleb Smirnoff VM_CNT_INC(v_rforks); 68783c9dea1SGleb Smirnoff VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize + 68894ddc707SAlan Cox p2->p_vmspace->vm_ssize); 6895d22597fSHajimu UMEMOTO } 6905d22597fSHajimu UMEMOTO 691cfb5f768SJonathan Anderson /* 692cfb5f768SJonathan Anderson * Associate the process descriptor with the process before anything 693cfb5f768SJonathan Anderson * can happen that might cause that process to need the descriptor. 694cfb5f768SJonathan Anderson * However, don't do this until after fork(2) can no longer fail. 695cfb5f768SJonathan Anderson */ 696813361c1SMateusz Guzik if (fr->fr_flags & RFPROCDESC) 697813361c1SMateusz Guzik procdesc_new(p2, fr->fr_pd_flags); 698cfb5f768SJonathan Anderson 699df8bae1dSRodney W. Grimes /* 700e9189611SPeter Wemm * Both processes are set up, now check if any loadable modules want 701e0d898b4SJulian Elischer * to adjust anything. 702fed06968SJulian Elischer */ 7032ca45184SMatt Joras EVENTHANDLER_DIRECT_INVOKE(process_fork, p1, p2, fr->fr_flags); 704fed06968SJulian Elischer 705fed06968SJulian Elischer /* 7064c3558aaSJohn Baldwin * Set the child start time and mark the process as being complete. 7074c3558aaSJohn Baldwin */ 7088e6fa660SJohn Baldwin PROC_LOCK(p2); 7098e6fa660SJohn Baldwin PROC_LOCK(p1); 7104c3558aaSJohn Baldwin microuptime(&p2->p_stats->p_start); 71111bda9b8SJeff Roberson PROC_SLOCK(p2); 7124c3558aaSJohn Baldwin p2->p_state = PRS_NORMAL; 71311bda9b8SJeff Roberson PROC_SUNLOCK(p2); 7146fa39a73SKonstantin Belousov 715d3555b6fSRui Paulo #ifdef KDTRACE_HOOKS 716d3555b6fSRui Paulo /* 7177159310fSMark Johnston * Tell the DTrace fasttrap provider about the new process so that any 7187159310fSMark Johnston * tracepoints inherited from the parent can be removed. We have to do 7197159310fSMark Johnston * this only after p_state is PRS_NORMAL since the fasttrap module will 7207159310fSMark Johnston * use pfind() later on. 721d3555b6fSRui Paulo */ 722813361c1SMateusz Guzik if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork) 723d3555b6fSRui Paulo dtrace_fasttrap_fork(p1, p2); 724d3555b6fSRui Paulo #endif 725813361c1SMateusz Guzik if (fr->fr_flags & RFPPWAIT) { 7261d7ca9bbSKonstantin Belousov td->td_pflags |= TDP_RFPPWAIT; 7271d7ca9bbSKonstantin Belousov td->td_rfppwait_p = p2; 728fc4f075aSJohn Baldwin td->td_dbgflags |= TDB_VFORK; 7291d7ca9bbSKonstantin Belousov } 7308e6fa660SJohn Baldwin PROC_UNLOCK(p2); 731df8bae1dSRodney W. Grimes 732df8bae1dSRodney W. Grimes /* 733e52327e3SMateusz Guzik * Tell any interested parties about the new process. 734e52327e3SMateusz Guzik */ 735e52327e3SMateusz Guzik knote_fork(p1->p_klist, p2->p_pid); 736e52327e3SMateusz Guzik 737e52327e3SMateusz Guzik /* 738df8bae1dSRodney W. Grimes * Now can be swapped. 739df8bae1dSRodney W. Grimes */ 74057934cd3SJohn Baldwin _PRELE(p1); 7417054ee4eSKonstantin Belousov PROC_UNLOCK(p1); 742813361c1SMateusz Guzik SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags); 7435d217f17SJohn Birrell 744813361c1SMateusz Guzik if (fr->fr_flags & RFPROCDESC) { 745813361c1SMateusz Guzik procdesc_finit(p2->p_procdesc, fp_procdesc); 746813361c1SMateusz Guzik fdrop(fp_procdesc, td); 747813361c1SMateusz Guzik } 748813361c1SMateusz Guzik 7496e22bbf6SKonstantin Belousov /* 7506e22bbf6SKonstantin Belousov * Speculative check for PTRACE_FORK. PTRACE_FORK is not 7516e22bbf6SKonstantin Belousov * synced with forks in progress so it is OK if we miss it 7526e22bbf6SKonstantin Belousov * if being set atm. 7536e22bbf6SKonstantin Belousov */ 7546e22bbf6SKonstantin Belousov if ((p1->p_ptevents & PTRACE_FORK) != 0) { 7556e22bbf6SKonstantin Belousov sx_xlock(&proctree_lock); 7566e22bbf6SKonstantin Belousov PROC_LOCK(p2); 7576e22bbf6SKonstantin Belousov 7586e22bbf6SKonstantin Belousov /* 7596e22bbf6SKonstantin Belousov * p1->p_ptevents & p1->p_pptr are protected by both 7606e22bbf6SKonstantin Belousov * process and proctree locks for modifications, 7616e22bbf6SKonstantin Belousov * so owning proctree_lock allows the race-free read. 7626e22bbf6SKonstantin Belousov */ 7636e22bbf6SKonstantin Belousov if ((p1->p_ptevents & PTRACE_FORK) != 0) { 7646e22bbf6SKonstantin Belousov /* 7656e22bbf6SKonstantin Belousov * Arrange for debugger to receive the fork event. 7666e22bbf6SKonstantin Belousov * 7676e22bbf6SKonstantin Belousov * We can report PL_FLAG_FORKED regardless of 7686e22bbf6SKonstantin Belousov * P_FOLLOWFORK settings, but it does not make a sense 7696e22bbf6SKonstantin Belousov * for runaway child. 7706e22bbf6SKonstantin Belousov */ 7716e22bbf6SKonstantin Belousov td->td_dbgflags |= TDB_FORK; 7726e22bbf6SKonstantin Belousov td->td_dbg_forked = p2->p_pid; 7736e22bbf6SKonstantin Belousov td2->td_dbgflags |= TDB_STOPATFORK; 7746e22bbf6SKonstantin Belousov proc_set_traced(p2, true); 7756e22bbf6SKonstantin Belousov CTR2(KTR_PTRACE, 7766e22bbf6SKonstantin Belousov "do_fork: attaching to new child pid %d: oppid %d", 7776e22bbf6SKonstantin Belousov p2->p_pid, p2->p_oppid); 7782c054ce9SMateusz Guzik proc_reparent(p2, p1->p_pptr, false); 7796e22bbf6SKonstantin Belousov } 7806e22bbf6SKonstantin Belousov PROC_UNLOCK(p2); 7816e22bbf6SKonstantin Belousov sx_xunlock(&proctree_lock); 7826e22bbf6SKonstantin Belousov } 7836e22bbf6SKonstantin Belousov 784a5ac8272SMateusz Guzik racct_proc_fork_done(p2); 785a5ac8272SMateusz Guzik 786813361c1SMateusz Guzik if ((fr->fr_flags & RFSTOPPED) == 0) { 787a5ac8272SMateusz Guzik if (fr->fr_pidp != NULL) 788a5ac8272SMateusz Guzik *fr->fr_pidp = p2->p_pid; 789813361c1SMateusz Guzik /* 790813361c1SMateusz Guzik * If RFSTOPPED not requested, make child runnable and 791813361c1SMateusz Guzik * add to run queue. 792813361c1SMateusz Guzik */ 793813361c1SMateusz Guzik thread_lock(td2); 794813361c1SMateusz Guzik TD_SET_CAN_RUN(td2); 795813361c1SMateusz Guzik sched_add(td2, SRQ_BORING); 796813361c1SMateusz Guzik } else { 797813361c1SMateusz Guzik *fr->fr_procp = p2; 798813361c1SMateusz Guzik } 799afd01097SEdward Tomasz Napierala } 800afd01097SEdward Tomasz Napierala 801c6d31b83SKonstantin Belousov static void 802c6d31b83SKonstantin Belousov ast_vfork(struct thread *td, int tda __unused) 8037d065d87SMateusz Guzik { 8047d065d87SMateusz Guzik struct proc *p, *p2; 8057d065d87SMateusz Guzik 8067d065d87SMateusz Guzik MPASS(td->td_pflags & TDP_RFPPWAIT); 8077d065d87SMateusz Guzik 8087d065d87SMateusz Guzik p = td->td_proc; 8097d065d87SMateusz Guzik /* 8107d065d87SMateusz Guzik * Preserve synchronization semantics of vfork. If 8117d065d87SMateusz Guzik * waiting for child to exec or exit, fork set 8127d065d87SMateusz Guzik * P_PPWAIT on child, and there we sleep on our proc 8137d065d87SMateusz Guzik * (in case of exit). 8147d065d87SMateusz Guzik * 8157d065d87SMateusz Guzik * Do it after the ptracestop() above is finished, to 8167d065d87SMateusz Guzik * not block our debugger until child execs or exits 8177d065d87SMateusz Guzik * to finish vfork wait. 8187d065d87SMateusz Guzik */ 8197d065d87SMateusz Guzik td->td_pflags &= ~TDP_RFPPWAIT; 8207d065d87SMateusz Guzik p2 = td->td_rfppwait_p; 8217d065d87SMateusz Guzik again: 8227d065d87SMateusz Guzik PROC_LOCK(p2); 8237d065d87SMateusz Guzik while (p2->p_flag & P_PPWAIT) { 8247d065d87SMateusz Guzik PROC_LOCK(p); 8257d065d87SMateusz Guzik if (thread_suspend_check_needed()) { 8267d065d87SMateusz Guzik PROC_UNLOCK(p2); 8277d065d87SMateusz Guzik thread_suspend_check(0); 8287d065d87SMateusz Guzik PROC_UNLOCK(p); 8297d065d87SMateusz Guzik goto again; 8307d065d87SMateusz Guzik } else { 8317d065d87SMateusz Guzik PROC_UNLOCK(p); 8327d065d87SMateusz Guzik } 8337d065d87SMateusz Guzik cv_timedwait(&p2->p_pwait, &p2->p_mtx, hz); 8347d065d87SMateusz Guzik } 8357d065d87SMateusz Guzik PROC_UNLOCK(p2); 8367d065d87SMateusz Guzik 8377d065d87SMateusz Guzik if (td->td_dbgflags & TDB_VFORK) { 8387d065d87SMateusz Guzik PROC_LOCK(p); 8397d065d87SMateusz Guzik if (p->p_ptevents & PTRACE_VFORK) 8407d065d87SMateusz Guzik ptracestop(td, SIGTRAP, NULL); 8417d065d87SMateusz Guzik td->td_dbgflags &= ~TDB_VFORK; 8427d065d87SMateusz Guzik PROC_UNLOCK(p); 8437d065d87SMateusz Guzik } 8447d065d87SMateusz Guzik } 8457d065d87SMateusz Guzik 846afd01097SEdward Tomasz Napierala int 84733fd9b9aSMateusz Guzik fork1(struct thread *td, struct fork_req *fr) 848afd01097SEdward Tomasz Napierala { 8494b48959fSKonstantin Belousov struct proc *p1, *newproc; 850afd01097SEdward Tomasz Napierala struct thread *td2; 851afd01097SEdward Tomasz Napierala struct vmspace *vm2; 85260cdcb64SMateusz Guzik struct ucred *cred; 8534b48959fSKonstantin Belousov struct file *fp_procdesc; 8543360b485SKonstantin Belousov struct pgrp *pg; 855afd01097SEdward Tomasz Napierala vm_ooffset_t mem_charged; 85660cdcb64SMateusz Guzik int error, nprocs_new; 857afd01097SEdward Tomasz Napierala static int curfail; 858afd01097SEdward Tomasz Napierala static struct timeval lastfail; 85933fd9b9aSMateusz Guzik int flags, pages; 860232b922cSKonstantin Belousov bool killsx_locked, singlethreaded; 86133fd9b9aSMateusz Guzik 86233fd9b9aSMateusz Guzik flags = fr->fr_flags; 86333fd9b9aSMateusz Guzik pages = fr->fr_pages; 864afd01097SEdward Tomasz Napierala 865813361c1SMateusz Guzik if ((flags & RFSTOPPED) != 0) 866813361c1SMateusz Guzik MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL); 867813361c1SMateusz Guzik else 868813361c1SMateusz Guzik MPASS(fr->fr_procp == NULL); 869813361c1SMateusz Guzik 870f49d8202SKonstantin Belousov /* Check for the undefined or unimplemented flags. */ 871f49d8202SKonstantin Belousov if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) 872f49d8202SKonstantin Belousov return (EINVAL); 873f49d8202SKonstantin Belousov 874f49d8202SKonstantin Belousov /* Signal value requires RFTSIGZMB. */ 875f49d8202SKonstantin Belousov if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) 876f49d8202SKonstantin Belousov return (EINVAL); 877f49d8202SKonstantin Belousov 878afd01097SEdward Tomasz Napierala /* Can't copy and clear. */ 879afd01097SEdward Tomasz Napierala if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 880afd01097SEdward Tomasz Napierala return (EINVAL); 881afd01097SEdward Tomasz Napierala 882f49d8202SKonstantin Belousov /* Check the validity of the signal number. */ 883f49d8202SKonstantin Belousov if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) 884f49d8202SKonstantin Belousov return (EINVAL); 885f49d8202SKonstantin Belousov 886cfb5f768SJonathan Anderson if ((flags & RFPROCDESC) != 0) { 887cfb5f768SJonathan Anderson /* Can't not create a process yet get a process descriptor. */ 888cfb5f768SJonathan Anderson if ((flags & RFPROC) == 0) 889cfb5f768SJonathan Anderson return (EINVAL); 890cfb5f768SJonathan Anderson 891cfb5f768SJonathan Anderson /* Must provide a place to put a procdesc if creating one. */ 89233fd9b9aSMateusz Guzik if (fr->fr_pd_fd == NULL) 893cfb5f768SJonathan Anderson return (EINVAL); 894b3a73448SMariusz Zaborski 895b3a73448SMariusz Zaborski /* Check if we are using supported flags. */ 896b3a73448SMariusz Zaborski if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0) 897b3a73448SMariusz Zaborski return (EINVAL); 898cfb5f768SJonathan Anderson } 899cfb5f768SJonathan Anderson 900afd01097SEdward Tomasz Napierala p1 = td->td_proc; 901afd01097SEdward Tomasz Napierala 902afd01097SEdward Tomasz Napierala /* 903afd01097SEdward Tomasz Napierala * Here we don't create a new process, but we divorce 904afd01097SEdward Tomasz Napierala * certain parts of a process from itself. 905afd01097SEdward Tomasz Napierala */ 9063e73ff1eSEdward Tomasz Napierala if ((flags & RFPROC) == 0) { 907813361c1SMateusz Guzik if (fr->fr_procp != NULL) 90833fd9b9aSMateusz Guzik *fr->fr_procp = NULL; 909813361c1SMateusz Guzik else if (fr->fr_pidp != NULL) 910813361c1SMateusz Guzik *fr->fr_pidp = 0; 9113e73ff1eSEdward Tomasz Napierala return (fork_norfproc(td, flags)); 9123e73ff1eSEdward Tomasz Napierala } 913afd01097SEdward Tomasz Napierala 9144b48959fSKonstantin Belousov fp_procdesc = NULL; 9154b48959fSKonstantin Belousov newproc = NULL; 9164b48959fSKonstantin Belousov vm2 = NULL; 9173360b485SKonstantin Belousov killsx_locked = false; 918232b922cSKonstantin Belousov singlethreaded = false; 9194b48959fSKonstantin Belousov 9204b48959fSKonstantin Belousov /* 9214b48959fSKonstantin Belousov * Increment the nprocs resource before allocations occur. 9224b48959fSKonstantin Belousov * Although process entries are dynamically created, we still 9234b48959fSKonstantin Belousov * keep a global limit on the maximum number we will 9244b48959fSKonstantin Belousov * create. There are hard-limits as to the number of processes 9254b48959fSKonstantin Belousov * that can run, established by the KVA and memory usage for 9264b48959fSKonstantin Belousov * the process data. 9274b48959fSKonstantin Belousov * 9284b48959fSKonstantin Belousov * Don't allow a nonprivileged user to use the last ten 9294b48959fSKonstantin Belousov * processes; don't let root exceed the limit. 9304b48959fSKonstantin Belousov */ 9314b48959fSKonstantin Belousov nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1; 9327d43b5c9SMark Johnston if (nprocs_new >= maxproc - 10) { 9337d43b5c9SMark Johnston if (priv_check_cred(td->td_ucred, PRIV_MAXPROC) != 0 || 934be8dd142SKonstantin Belousov nprocs_new >= maxproc) { 9354b48959fSKonstantin Belousov error = EAGAIN; 9364b48959fSKonstantin Belousov sx_xlock(&allproc_lock); 9374b48959fSKonstantin Belousov if (ppsratecheck(&lastfail, &curfail, 1)) { 9387d43b5c9SMark Johnston printf("maxproc limit exceeded by uid %u " 9397d43b5c9SMark Johnston "(pid %d); see tuning(7) and " 9407d43b5c9SMark Johnston "login.conf(5)\n", 9414b48959fSKonstantin Belousov td->td_ucred->cr_ruid, p1->p_pid); 9424b48959fSKonstantin Belousov } 9434b48959fSKonstantin Belousov sx_xunlock(&allproc_lock); 9444b48959fSKonstantin Belousov goto fail2; 9454b48959fSKonstantin Belousov } 9467d43b5c9SMark Johnston } 9474b48959fSKonstantin Belousov 948cfb5f768SJonathan Anderson /* 949232b922cSKonstantin Belousov * If we are possibly multi-threaded, and there is a process 950232b922cSKonstantin Belousov * sending a signal to our group right now, ensure that our 951232b922cSKonstantin Belousov * other threads cannot be chosen for the signal queueing. 952232b922cSKonstantin Belousov * Otherwise, this might delay signal action, and make the new 953232b922cSKonstantin Belousov * child escape the signaling. 9543360b485SKonstantin Belousov */ 9553360b485SKonstantin Belousov pg = p1->p_pgrp; 956232b922cSKonstantin Belousov if (p1->p_numthreads > 1) { 957232b922cSKonstantin Belousov if (sx_try_slock(&pg->pg_killsx) != 0) { 958232b922cSKonstantin Belousov killsx_locked = true; 959232b922cSKonstantin Belousov } else { 960232b922cSKonstantin Belousov PROC_LOCK(p1); 961232b922cSKonstantin Belousov if (thread_single(p1, SINGLE_BOUNDARY)) { 962232b922cSKonstantin Belousov PROC_UNLOCK(p1); 9633360b485SKonstantin Belousov error = ERESTART; 9643360b485SKonstantin Belousov goto fail2; 965232b922cSKonstantin Belousov } 966232b922cSKonstantin Belousov PROC_UNLOCK(p1); 967232b922cSKonstantin Belousov singlethreaded = true; 968232b922cSKonstantin Belousov } 969232b922cSKonstantin Belousov } 970232b922cSKonstantin Belousov 971232b922cSKonstantin Belousov /* 972232b922cSKonstantin Belousov * Atomically check for signals and block processes from sending 973232b922cSKonstantin Belousov * a signal to our process group until the child is visible. 974232b922cSKonstantin Belousov */ 975232b922cSKonstantin Belousov if (!killsx_locked && sx_slock_sig(&pg->pg_killsx) != 0) { 976232b922cSKonstantin Belousov error = ERESTART; 977232b922cSKonstantin Belousov goto fail2; 978232b922cSKonstantin Belousov } 979232b922cSKonstantin Belousov if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0)) { 9803360b485SKonstantin Belousov /* 9813360b485SKonstantin Belousov * Either the process was moved to other process 9823360b485SKonstantin Belousov * group, or there is pending signal. sx_slock_sig() 9833360b485SKonstantin Belousov * does not check for signals if not sleeping for the 9843360b485SKonstantin Belousov * lock. 9853360b485SKonstantin Belousov */ 9863360b485SKonstantin Belousov sx_sunlock(&pg->pg_killsx); 987474708c3SKonstantin Belousov killsx_locked = false; 9883360b485SKonstantin Belousov error = ERESTART; 9893360b485SKonstantin Belousov goto fail2; 9903360b485SKonstantin Belousov } else { 9913360b485SKonstantin Belousov killsx_locked = true; 9923360b485SKonstantin Belousov } 9933360b485SKonstantin Belousov 9943360b485SKonstantin Belousov /* 995cfb5f768SJonathan Anderson * If required, create a process descriptor in the parent first; we 996cfb5f768SJonathan Anderson * will abandon it if something goes wrong. We don't finit() until 997cfb5f768SJonathan Anderson * later. 998cfb5f768SJonathan Anderson */ 999cfb5f768SJonathan Anderson if (flags & RFPROCDESC) { 1000b3a73448SMariusz Zaborski error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd, 1001b3a73448SMariusz Zaborski fr->fr_pd_flags, fr->fr_pd_fcaps); 1002b38520f0SEdward Tomasz Napierala if (error != 0) 1003d8f3dc78SKonstantin Belousov goto fail2; 1004757a5642SChristian S.J. Peron AUDIT_ARG_FD(*fr->fr_pd_fd); 1005cfb5f768SJonathan Anderson } 1006cfb5f768SJonathan Anderson 1007afd01097SEdward Tomasz Napierala mem_charged = 0; 1008afd01097SEdward Tomasz Napierala if (pages == 0) 1009edc82223SKonstantin Belousov pages = kstack_pages; 1010afd01097SEdward Tomasz Napierala /* Allocate new proc. */ 1011afd01097SEdward Tomasz Napierala newproc = uma_zalloc(proc_zone, M_WAITOK); 1012afd01097SEdward Tomasz Napierala td2 = FIRST_THREAD_IN_PROC(newproc); 1013afd01097SEdward Tomasz Napierala if (td2 == NULL) { 1014afd01097SEdward Tomasz Napierala td2 = thread_alloc(pages); 1015afd01097SEdward Tomasz Napierala if (td2 == NULL) { 1016afd01097SEdward Tomasz Napierala error = ENOMEM; 101712cec311SMateusz Guzik goto fail2; 1018afd01097SEdward Tomasz Napierala } 1019afd01097SEdward Tomasz Napierala proc_linkup(newproc, td2); 1020afd01097SEdward Tomasz Napierala } else { 1021*800da341SMark Johnston error = thread_recycle(td2, pages); 1022*800da341SMark Johnston if (error != 0) 102312cec311SMateusz Guzik goto fail2; 1024afd01097SEdward Tomasz Napierala } 1025afd01097SEdward Tomasz Napierala 1026afd01097SEdward Tomasz Napierala if ((flags & RFMEM) == 0) { 1027afd01097SEdward Tomasz Napierala vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); 1028afd01097SEdward Tomasz Napierala if (vm2 == NULL) { 1029afd01097SEdward Tomasz Napierala error = ENOMEM; 103012cec311SMateusz Guzik goto fail2; 1031afd01097SEdward Tomasz Napierala } 1032afd01097SEdward Tomasz Napierala if (!swap_reserve(mem_charged)) { 1033afd01097SEdward Tomasz Napierala /* 1034afd01097SEdward Tomasz Napierala * The swap reservation failed. The accounting 1035afd01097SEdward Tomasz Napierala * from the entries of the copied vm2 will be 1036e3043798SPedro F. Giffuni * subtracted in vmspace_free(), so force the 1037afd01097SEdward Tomasz Napierala * reservation there. 1038afd01097SEdward Tomasz Napierala */ 1039afd01097SEdward Tomasz Napierala swap_reserve_force(mem_charged); 1040afd01097SEdward Tomasz Napierala error = ENOMEM; 104112cec311SMateusz Guzik goto fail2; 1042afd01097SEdward Tomasz Napierala } 1043afd01097SEdward Tomasz Napierala } else 1044afd01097SEdward Tomasz Napierala vm2 = NULL; 1045afd01097SEdward Tomasz Napierala 1046097055e2SEdward Tomasz Napierala /* 1047097055e2SEdward Tomasz Napierala * XXX: This is ugly; when we copy resource usage, we need to bump 1048097055e2SEdward Tomasz Napierala * per-cred resource counters. 1049097055e2SEdward Tomasz Napierala */ 105092541c12SOlivier Certner newproc->p_ucred = crcowget(td->td_ucred); 1051097055e2SEdward Tomasz Napierala 1052097055e2SEdward Tomasz Napierala /* 1053097055e2SEdward Tomasz Napierala * Initialize resource accounting for the child process. 1054097055e2SEdward Tomasz Napierala */ 1055097055e2SEdward Tomasz Napierala error = racct_proc_fork(p1, newproc); 1056097055e2SEdward Tomasz Napierala if (error != 0) { 1057097055e2SEdward Tomasz Napierala error = EAGAIN; 1058097055e2SEdward Tomasz Napierala goto fail1; 1059097055e2SEdward Tomasz Napierala } 1060097055e2SEdward Tomasz Napierala 10611dbf9dccSEdward Tomasz Napierala #ifdef MAC 10621dbf9dccSEdward Tomasz Napierala mac_proc_init(newproc); 10631dbf9dccSEdward Tomasz Napierala #endif 10649e590ff0SKonstantin Belousov newproc->p_klist = knlist_alloc(&newproc->p_mtx); 10651dbf9dccSEdward Tomasz Napierala STAILQ_INIT(&newproc->p_ktr); 10661dbf9dccSEdward Tomasz Napierala 106758c77a9dSEdward Tomasz Napierala /* 1068afd01097SEdward Tomasz Napierala * Increment the count of procs running with this uid. Don't allow 1069afd01097SEdward Tomasz Napierala * a nonprivileged user to exceed their current limit. 1070afd01097SEdward Tomasz Napierala */ 107160cdcb64SMateusz Guzik cred = td->td_ucred; 107260cdcb64SMateusz Guzik if (!chgproccnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPROC))) { 107360cdcb64SMateusz Guzik if (priv_check_cred(cred, PRIV_PROC_LIMIT) != 0) 107460cdcb64SMateusz Guzik goto fail0; 107560cdcb64SMateusz Guzik chgproccnt(cred->cr_ruidinfo, 1, 0); 1076afd01097SEdward Tomasz Napierala } 1077afd01097SEdward Tomasz Napierala 107860cdcb64SMateusz Guzik do_fork(td, fr, newproc, td2, vm2, fp_procdesc); 1079232b922cSKonstantin Belousov error = 0; 1080232b922cSKonstantin Belousov goto cleanup; 108160cdcb64SMateusz Guzik fail0: 1082afd01097SEdward Tomasz Napierala error = EAGAIN; 10836bea667fSRobert Watson #ifdef MAC 108430d239bcSRobert Watson mac_proc_destroy(newproc); 10856bea667fSRobert Watson #endif 10861dbf9dccSEdward Tomasz Napierala racct_proc_exit(newproc); 1087ab27d5d8SEdward Tomasz Napierala fail1: 10881724c563SMateusz Guzik proc_unset_cred(newproc); 108912cec311SMateusz Guzik fail2: 109069aa768aSKonstantin Belousov if (vm2 != NULL) 109169aa768aSKonstantin Belousov vmspace_free(vm2); 1092c6544064SJohn Baldwin uma_zfree(proc_zone, newproc); 1093de265498SPawel Jakub Dawidek if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) { 109433fd9b9aSMateusz Guzik fdclose(td, fp_procdesc, *fr->fr_pd_fd); 1095cfb5f768SJonathan Anderson fdrop(fp_procdesc, td); 10960a7007b9SPawel Jakub Dawidek } 10974b48959fSKonstantin Belousov atomic_add_int(&nprocs, -1); 1098232b922cSKonstantin Belousov cleanup: 10993360b485SKonstantin Belousov if (killsx_locked) 11003360b485SKonstantin Belousov sx_sunlock(&pg->pg_killsx); 1101232b922cSKonstantin Belousov if (singlethreaded) { 1102232b922cSKonstantin Belousov PROC_LOCK(p1); 1103232b922cSKonstantin Belousov thread_single_end(p1, SINGLE_BOUNDARY); 1104232b922cSKonstantin Belousov PROC_UNLOCK(p1); 1105232b922cSKonstantin Belousov } 1106232b922cSKonstantin Belousov if (error != 0) 110784d37a46SJohn Baldwin pause("fork", hz / 2); 1108c6544064SJohn Baldwin return (error); 1109df8bae1dSRodney W. Grimes } 1110fed06968SJulian Elischer 1111e0d898b4SJulian Elischer /* 1112a7b124c3SJohn Baldwin * Handle the return of a child process from fork1(). This function 1113a7b124c3SJohn Baldwin * is called from the MD fork_trampoline() entry point. 1114a7b124c3SJohn Baldwin */ 1115a7b124c3SJohn Baldwin void 11161d845e86SEdward Tomasz Napierala fork_exit(void (*callout)(void *, struct trapframe *), void *arg, 11171d845e86SEdward Tomasz Napierala struct trapframe *frame) 1118a7b124c3SJohn Baldwin { 1119696058c3SJulian Elischer struct proc *p; 112070fca427SJohn Baldwin struct thread *td; 1121fe54587fSJeff Roberson struct thread *dtd; 112270fca427SJohn Baldwin 1123b0f71f1bSMark Johnston kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); 1124b0f71f1bSMark Johnston 11250047b9a9SBosko Milekic td = curthread; 11260047b9a9SBosko Milekic p = td->td_proc; 11270047b9a9SBosko Milekic KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); 11280047b9a9SBosko Milekic 11296617724cSJeff Roberson CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", 113093ccd6bfSKonstantin Belousov td, td_get_sched(td), p->p_pid, td->td_name); 11310047b9a9SBosko Milekic 113211bda9b8SJeff Roberson sched_fork_exit(td); 1133fce3b1c3SKonstantin Belousov 1134a7b124c3SJohn Baldwin /* 1135fe54587fSJeff Roberson * Processes normally resume in mi_switch() after being 1136fe54587fSJeff Roberson * cpu_switch()'ed to, but when children start up they arrive here 1137fe54587fSJeff Roberson * instead, so we must do much the same things as mi_switch() would. 1138fe54587fSJeff Roberson */ 1139fe54587fSJeff Roberson if ((dtd = PCPU_GET(deadthread))) { 1140fe54587fSJeff Roberson PCPU_SET(deadthread, NULL); 1141fe54587fSJeff Roberson thread_stash(dtd); 1142fe54587fSJeff Roberson } 1143fe54587fSJeff Roberson thread_unlock(td); 1144fe54587fSJeff Roberson 1145fe54587fSJeff Roberson /* 11465c2cf818SKonstantin Belousov * cpu_fork_kthread_handler intercepts this function call to 1147a7b124c3SJohn Baldwin * have this call a non-return function to stay in kernel mode. 1148a7b124c3SJohn Baldwin * initproc has its own fork handler, but it does return. 1149a7b124c3SJohn Baldwin */ 11505813dc03SJohn Baldwin KASSERT(callout != NULL, ("NULL callout in fork_exit")); 11518865286bSJohn Baldwin callout(arg, frame); 1152a7b124c3SJohn Baldwin 1153a7b124c3SJohn Baldwin /* 1154a7b124c3SJohn Baldwin * Check if a kernel thread misbehaved and returned from its main 1155a7b124c3SJohn Baldwin * function. 1156a7b124c3SJohn Baldwin */ 1157db57c70aSKonstantin Belousov if (p->p_flag & P_KPROC) { 1158a7b124c3SJohn Baldwin printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", 1159e01eafefSJulian Elischer td->td_name, p->p_pid); 1160fb1f4582SJohn Baldwin kthread_exit(); 1161a7b124c3SJohn Baldwin } 1162a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 1163993182e5SAlexander Leidinger 1164eac62420SOlivier Certner /* 1165eac62420SOlivier Certner * Now going to return to userland. 1166eac62420SOlivier Certner */ 1167eac62420SOlivier Certner 1168e5d81ef1SDmitry Chagin if (p->p_sysent->sv_schedtail != NULL) 1169e5d81ef1SDmitry Chagin (p->p_sysent->sv_schedtail)(td); 1170eac62420SOlivier Certner 1171eac62420SOlivier Certner userret(td, frame); 1172a7b124c3SJohn Baldwin } 1173a7b124c3SJohn Baldwin 1174a7b124c3SJohn Baldwin /* 1175a7b124c3SJohn Baldwin * Simplified back end of syscall(), used when returning from fork() 1176e69ba32fSKonstantin Belousov * directly into user mode. This function is passed in to fork_exit() 1177e69ba32fSKonstantin Belousov * as the first parameter and is called when returning to a new 1178e69ba32fSKonstantin Belousov * userland process. 1179a7b124c3SJohn Baldwin */ 1180a7b124c3SJohn Baldwin void 11811d845e86SEdward Tomasz Napierala fork_return(struct thread *td, struct trapframe *frame) 1182a7b124c3SJohn Baldwin { 11836e22bbf6SKonstantin Belousov struct proc *p; 11846fa39a73SKonstantin Belousov 11856fa39a73SKonstantin Belousov p = td->td_proc; 1186189ac973SJohn Baldwin if (td->td_dbgflags & TDB_STOPATFORK) { 11876fa39a73SKonstantin Belousov PROC_LOCK(p); 11886e22bbf6SKonstantin Belousov if ((p->p_flag & P_TRACED) != 0) { 11896fa39a73SKonstantin Belousov /* 11906e22bbf6SKonstantin Belousov * Inform the debugger if one is still present. 11916fa39a73SKonstantin Belousov */ 1192b7a25e63SKonstantin Belousov td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP; 119382a4538fSEric Badger ptracestop(td, SIGSTOP, NULL); 1194189ac973SJohn Baldwin td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); 11956fa39a73SKonstantin Belousov } else { 11966fa39a73SKonstantin Belousov /* 11976fa39a73SKonstantin Belousov * ... otherwise clear the request. 11986fa39a73SKonstantin Belousov */ 11996fa39a73SKonstantin Belousov td->td_dbgflags &= ~TDB_STOPATFORK; 12006fa39a73SKonstantin Belousov } 12016fa39a73SKonstantin Belousov PROC_UNLOCK(p); 1202653738e8SJohn Baldwin } else if (p->p_flag & P_TRACED) { 1203189ac973SJohn Baldwin /* 1204189ac973SJohn Baldwin * This is the start of a new thread in a traced 1205189ac973SJohn Baldwin * process. Report a system call exit event. 1206189ac973SJohn Baldwin */ 1207189ac973SJohn Baldwin PROC_LOCK(p); 1208189ac973SJohn Baldwin td->td_dbgflags |= TDB_SCX; 12098d570f64SJohn Baldwin if ((p->p_ptevents & PTRACE_SCX) != 0 || 12105fcfab6eSJohn Baldwin (td->td_dbgflags & TDB_BORN) != 0) 121182a4538fSEric Badger ptracestop(td, SIGTRAP, NULL); 12125fcfab6eSJohn Baldwin td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); 1213189ac973SJohn Baldwin PROC_UNLOCK(p); 12146fa39a73SKonstantin Belousov } 1215a7b124c3SJohn Baldwin 1216cc7b7306SJamie Gritton /* 1217cc7b7306SJamie Gritton * If the prison was killed mid-fork, die along with it. 1218cc7b7306SJamie Gritton */ 1219cc7b7306SJamie Gritton if (!prison_isalive(td->td_ucred->cr_prison)) 1220cc7b7306SJamie Gritton exit1(td, 0, SIGKILL); 1221cc7b7306SJamie Gritton 1222a7b124c3SJohn Baldwin #ifdef KTRACE 12230282f875SDmitry Chagin if (KTRPOINT(td, KTR_SYSRET)) 12240282f875SDmitry Chagin ktrsysret(td->td_sa.code, 0, 0); 1225a7b124c3SJohn Baldwin #endif 1226a7b124c3SJohn Baldwin } 1227c6d31b83SKonstantin Belousov 1228c6d31b83SKonstantin Belousov static void 1229c6d31b83SKonstantin Belousov fork_init(void *arg __unused) 1230c6d31b83SKonstantin Belousov { 1231c6d31b83SKonstantin Belousov ast_register(TDA_VFORK, ASTR_ASTF_REQUIRED | ASTR_TDP, TDP_RFPPWAIT, 1232c6d31b83SKonstantin Belousov ast_vfork); 1233c6d31b83SKonstantin Belousov } 1234c6d31b83SKonstantin Belousov SYSINIT(fork, SI_SUB_INTRINSIC, SI_ORDER_ANY, fork_init, NULL); 1235