1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 19df8bae1dSRodney W. Grimes * must display the following acknowledgement: 20df8bae1dSRodney W. Grimes * This product includes software developed by the University of 21df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 22df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 23df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 24df8bae1dSRodney W. Grimes * without specific prior written permission. 25df8bae1dSRodney W. Grimes * 26df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36df8bae1dSRodney W. Grimes * SUCH DAMAGE. 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 39c3aac50fSPeter Wemm * $FreeBSD$ 40df8bae1dSRodney W. Grimes */ 41df8bae1dSRodney W. Grimes 42db6a20e2SGarrett Wollman #include "opt_ktrace.h" 43db6a20e2SGarrett Wollman 44df8bae1dSRodney W. Grimes #include <sys/param.h> 45df8bae1dSRodney W. Grimes #include <sys/systm.h> 46d2d3e875SBruce Evans #include <sys/sysproto.h> 47df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 48df8bae1dSRodney W. Grimes #include <sys/kernel.h> 49c76e95c3SPeter Wemm #include <sys/sysctl.h> 5019284646SJohn Baldwin #include <sys/lock.h> 51df8bae1dSRodney W. Grimes #include <sys/malloc.h> 5235e0e5b3SJohn Baldwin #include <sys/mutex.h> 53df8bae1dSRodney W. Grimes #include <sys/proc.h> 549ccba881SMatthew N. Dodd #include <sys/pioctl.h> 55df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 56b43179fbSJeff Roberson #include <sys/sched.h> 57a7b124c3SJohn Baldwin #include <sys/syscall.h> 58df8bae1dSRodney W. Grimes #include <sys/vnode.h> 59df8bae1dSRodney W. Grimes #include <sys/acct.h> 600384fff8SJason Evans #include <sys/ktr.h> 61df8bae1dSRodney W. Grimes #include <sys/ktrace.h> 62a7b124c3SJohn Baldwin #include <sys/kthread.h> 63b71fec07SBruce Evans #include <sys/unistd.h> 6475c13541SPoul-Henning Kamp #include <sys/jail.h> 6557934cd3SJohn Baldwin #include <sys/sx.h> 66df8bae1dSRodney W. Grimes 67d93f860cSPoul-Henning Kamp #include <vm/vm.h> 68dabee6feSPeter Wemm #include <vm/pmap.h> 69dabee6feSPeter Wemm #include <vm/vm_map.h> 70efeaf95aSDavid Greenman #include <vm/vm_extern.h> 71c897b813SJeff Roberson #include <vm/uma.h> 72d93f860cSPoul-Henning Kamp 735d22597fSHajimu UMEMOTO #include <sys/vmmeter.h> 74dc9c271aSJulian Elischer #include <sys/user.h> 75182da820SMatthew Dillon #include <machine/critical.h> 7688c5ea45SJulian Elischer 7793efcae8SPoul-Henning Kamp static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback"); 7893efcae8SPoul-Henning Kamp 79fed06968SJulian Elischer /* 80e0d898b4SJulian Elischer * These are the stuctures used to create a callout list for things to do 81e0d898b4SJulian Elischer * when forking a process 82fed06968SJulian Elischer */ 8393efcae8SPoul-Henning Kamp struct forklist { 84fed06968SJulian Elischer forklist_fn function; 85e3975643SJake Burkholder TAILQ_ENTRY(forklist) next; 8693efcae8SPoul-Henning Kamp }; 87fed06968SJulian Elischer 8857934cd3SJohn Baldwin static struct sx fork_list_lock; 8957934cd3SJohn Baldwin 90e3975643SJake Burkholder TAILQ_HEAD(forklist_head, forklist); 9193efcae8SPoul-Henning Kamp static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list); 92fed06968SJulian Elischer 93d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 94ad7507e2SSteven Wallace struct fork_args { 95ad7507e2SSteven Wallace int dummy; 96ad7507e2SSteven Wallace }; 97d2d3e875SBruce Evans #endif 98ad7507e2SSteven Wallace 99cc6712eaSMike Silbersack int forksleep; /* Place for fork1() to sleep on. */ 100cc6712eaSMike Silbersack 10157934cd3SJohn Baldwin static void 10257934cd3SJohn Baldwin init_fork_list(void *data __unused) 10357934cd3SJohn Baldwin { 10457934cd3SJohn Baldwin 10557934cd3SJohn Baldwin sx_init(&fork_list_lock, "fork list"); 10657934cd3SJohn Baldwin } 10757934cd3SJohn Baldwin SYSINIT(fork_list, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_fork_list, NULL); 10857934cd3SJohn Baldwin 109116734c4SMatthew Dillon /* 110116734c4SMatthew Dillon * MPSAFE 111116734c4SMatthew Dillon */ 112df8bae1dSRodney W. Grimes /* ARGSUSED */ 11326f9a767SRodney W. Grimes int 114b40ce416SJulian Elischer fork(td, uap) 115b40ce416SJulian Elischer struct thread *td; 116df8bae1dSRodney W. Grimes struct fork_args *uap; 117df8bae1dSRodney W. Grimes { 118df8abd0bSPeter Wemm int error; 119df8abd0bSPeter Wemm struct proc *p2; 120be67169aSBruce Evans 121116734c4SMatthew Dillon mtx_lock(&Giant); 122316ec49aSScott Long error = fork1(td, RFFDG | RFPROC, 0, &p2); 123df8abd0bSPeter Wemm if (error == 0) { 124b40ce416SJulian Elischer td->td_retval[0] = p2->p_pid; 125b40ce416SJulian Elischer td->td_retval[1] = 0; 126df8abd0bSPeter Wemm } 127116734c4SMatthew Dillon mtx_unlock(&Giant); 128df8abd0bSPeter Wemm return error; 129df8bae1dSRodney W. Grimes } 130df8bae1dSRodney W. Grimes 131116734c4SMatthew Dillon /* 132116734c4SMatthew Dillon * MPSAFE 133116734c4SMatthew Dillon */ 134df8bae1dSRodney W. Grimes /* ARGSUSED */ 13526f9a767SRodney W. Grimes int 136b40ce416SJulian Elischer vfork(td, uap) 137b40ce416SJulian Elischer struct thread *td; 138dabee6feSPeter Wemm struct vfork_args *uap; 139df8bae1dSRodney W. Grimes { 140df8abd0bSPeter Wemm int error; 141df8abd0bSPeter Wemm struct proc *p2; 142be67169aSBruce Evans 143116734c4SMatthew Dillon mtx_lock(&Giant); 144316ec49aSScott Long error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, 0, &p2); 145df8abd0bSPeter Wemm if (error == 0) { 146b40ce416SJulian Elischer td->td_retval[0] = p2->p_pid; 147b40ce416SJulian Elischer td->td_retval[1] = 0; 148df8abd0bSPeter Wemm } 149116734c4SMatthew Dillon mtx_unlock(&Giant); 150df8abd0bSPeter Wemm return error; 151df8bae1dSRodney W. Grimes } 152df8bae1dSRodney W. Grimes 153116734c4SMatthew Dillon /* 154116734c4SMatthew Dillon * MPSAFE 155116734c4SMatthew Dillon */ 156dabee6feSPeter Wemm int 157b40ce416SJulian Elischer rfork(td, uap) 158b40ce416SJulian Elischer struct thread *td; 159dabee6feSPeter Wemm struct rfork_args *uap; 160dabee6feSPeter Wemm { 161df8abd0bSPeter Wemm int error; 162df8abd0bSPeter Wemm struct proc *p2; 163be67169aSBruce Evans 164885ccc61SJohn Baldwin /* Don't allow kernel only flags. */ 165885ccc61SJohn Baldwin if ((uap->flags & RFKERNELONLY) != 0) 166885ccc61SJohn Baldwin return (EINVAL); 167116734c4SMatthew Dillon mtx_lock(&Giant); 168316ec49aSScott Long error = fork1(td, uap->flags, 0, &p2); 169df8abd0bSPeter Wemm if (error == 0) { 170b40ce416SJulian Elischer td->td_retval[0] = p2 ? p2->p_pid : 0; 171b40ce416SJulian Elischer td->td_retval[1] = 0; 172df8abd0bSPeter Wemm } 173116734c4SMatthew Dillon mtx_unlock(&Giant); 174df8abd0bSPeter Wemm return error; 175dabee6feSPeter Wemm } 176dabee6feSPeter Wemm 177dabee6feSPeter Wemm 178df8bae1dSRodney W. Grimes int nprocs = 1; /* process 0 */ 1798f7e4eb5SDag-Erling Smørgrav int lastpid = 0; 1808f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 181d941d475SRobert Watson "Last used PID"); 182df8bae1dSRodney W. Grimes 183bb6a234eSPeter Wemm /* 1848f7e4eb5SDag-Erling Smørgrav * Random component to lastpid generation. We mix in a random factor to make 185bb6a234eSPeter Wemm * it a little harder to predict. We sanity check the modulus value to avoid 186bb6a234eSPeter Wemm * doing it in critical paths. Don't let it be too small or we pointlessly 187bb6a234eSPeter Wemm * waste randomness entropy, and don't let it be impossibly large. Using a 188bb6a234eSPeter Wemm * modulus that is too big causes a LOT more process table scans and slows 189bb6a234eSPeter Wemm * down fork processing as the pidchecked caching is defeated. 190bb6a234eSPeter Wemm */ 191ee3fd601SDan Moschuk static int randompid = 0; 192bb6a234eSPeter Wemm 193bb6a234eSPeter Wemm static int 19482d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 195bb6a234eSPeter Wemm { 196bb6a234eSPeter Wemm int error, pid; 197bb6a234eSPeter Wemm 1985c38b6dbSDon Lewis sysctl_wire_old_buffer(req, sizeof(int)); 1993fc755c1SJohn Baldwin sx_xlock(&allproc_lock); 200bb6a234eSPeter Wemm pid = randompid; 201bb6a234eSPeter Wemm error = sysctl_handle_int(oidp, &pid, 0, req); 2023fc755c1SJohn Baldwin if (error == 0 && req->newptr != NULL) { 203bb6a234eSPeter Wemm if (pid < 0 || pid > PID_MAX - 100) /* out of range */ 204bb6a234eSPeter Wemm pid = PID_MAX - 100; 205bb6a234eSPeter Wemm else if (pid < 2) /* NOP */ 206bb6a234eSPeter Wemm pid = 0; 207bb6a234eSPeter Wemm else if (pid < 100) /* Make it reasonable */ 208bb6a234eSPeter Wemm pid = 100; 209bb6a234eSPeter Wemm randompid = pid; 2103fc755c1SJohn Baldwin } 2113fc755c1SJohn Baldwin sx_xunlock(&allproc_lock); 212bb6a234eSPeter Wemm return (error); 213bb6a234eSPeter Wemm } 214bb6a234eSPeter Wemm 215bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 216bb6a234eSPeter Wemm 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 217ee3fd601SDan Moschuk 21874b2192aSJohn Dyson int 219316ec49aSScott Long fork1(td, flags, pages, procp) 220b40ce416SJulian Elischer struct thread *td; /* parent proc */ 2210e3eb7eeSSujal Patel int flags; 222316ec49aSScott Long int pages; 2230384fff8SJason Evans struct proc **procp; /* child proc */ 224df8bae1dSRodney W. Grimes { 225df8abd0bSPeter Wemm struct proc *p2, *pptr; 226df8abd0bSPeter Wemm uid_t uid; 227df8bae1dSRodney W. Grimes struct proc *newproc; 2280384fff8SJason Evans int trypid; 229c6362551SAlfred Perlstein int ok; 23051068190SWolfram Schneider static int pidchecked = 0; 23193efcae8SPoul-Henning Kamp struct forklist *ep; 2325641ae5dSJohn Baldwin struct filedesc *fd; 233b40ce416SJulian Elischer struct proc *p1 = td->td_proc; 234079b7badSJulian Elischer struct thread *td2; 235079b7badSJulian Elischer struct kse *ke2; 236079b7badSJulian Elischer struct ksegrp *kg2; 2373fc755c1SJohn Baldwin struct sigacts *newsigacts; 2383fc755c1SJohn Baldwin struct procsig *newprocsig; 2395856e12eSJohn Dyson 2400cddd8f0SMatthew Dillon GIANT_REQUIRED; 2410cddd8f0SMatthew Dillon 2420384fff8SJason Evans /* Can't copy and clear */ 2430e3eb7eeSSujal Patel if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 244dabee6feSPeter Wemm return (EINVAL); 245df8bae1dSRodney W. Grimes 246df8bae1dSRodney W. Grimes /* 2475856e12eSJohn Dyson * Here we don't create a new process, but we divorce 2485856e12eSJohn Dyson * certain parts of a process from itself. 2495856e12eSJohn Dyson */ 2505856e12eSJohn Dyson if ((flags & RFPROC) == 0) { 251079b7badSJulian Elischer vm_forkproc(td, NULL, NULL, flags); 2525856e12eSJohn Dyson 2535856e12eSJohn Dyson /* 2545856e12eSJohn Dyson * Close all file descriptors. 2555856e12eSJohn Dyson */ 2565856e12eSJohn Dyson if (flags & RFCFDG) { 2575856e12eSJohn Dyson struct filedesc *fdtmp; 258b40ce416SJulian Elischer fdtmp = fdinit(td); /* XXXKSE */ 2595641ae5dSJohn Baldwin PROC_LOCK(p1); 260b40ce416SJulian Elischer fdfree(td); /* XXXKSE */ 2615856e12eSJohn Dyson p1->p_fd = fdtmp; 2625641ae5dSJohn Baldwin PROC_UNLOCK(p1); 2635856e12eSJohn Dyson } 2645856e12eSJohn Dyson 2655856e12eSJohn Dyson /* 2665856e12eSJohn Dyson * Unshare file descriptors (from parent.) 2675856e12eSJohn Dyson */ 2685856e12eSJohn Dyson if (flags & RFFDG) { 269426da3bcSAlfred Perlstein FILEDESC_LOCK(p1->p_fd); 2705856e12eSJohn Dyson if (p1->p_fd->fd_refcnt > 1) { 2715856e12eSJohn Dyson struct filedesc *newfd; 272426da3bcSAlfred Perlstein 273b40ce416SJulian Elischer newfd = fdcopy(td); 274426da3bcSAlfred Perlstein FILEDESC_UNLOCK(p1->p_fd); 2755641ae5dSJohn Baldwin PROC_LOCK(p1); 276b40ce416SJulian Elischer fdfree(td); 2775856e12eSJohn Dyson p1->p_fd = newfd; 2785641ae5dSJohn Baldwin PROC_UNLOCK(p1); 279426da3bcSAlfred Perlstein } else 280426da3bcSAlfred Perlstein FILEDESC_UNLOCK(p1->p_fd); 2815856e12eSJohn Dyson } 2821943af61SPeter Wemm *procp = NULL; 2835856e12eSJohn Dyson return (0); 2845856e12eSJohn Dyson } 2855856e12eSJohn Dyson 286e602ba25SJulian Elischer if (p1->p_flag & P_KSES) { 287e602ba25SJulian Elischer /* 288e602ba25SJulian Elischer * Idle the other threads for a second. 289e602ba25SJulian Elischer * Since the user space is copied, it must remain stable. 290e602ba25SJulian Elischer * In addition, all threads (from the user perspective) 291e602ba25SJulian Elischer * need to either be suspended or in the kernel, 292e602ba25SJulian Elischer * where they will try restart in the parent and will 293e602ba25SJulian Elischer * be aborted in the child. 294e602ba25SJulian Elischer */ 295e602ba25SJulian Elischer PROC_LOCK(p1); 2961279572aSDavid Xu if (thread_single(SINGLE_NO_EXIT)) { 297e602ba25SJulian Elischer /* Abort.. someone else is single threading before us */ 298e602ba25SJulian Elischer PROC_UNLOCK(p1); 299e602ba25SJulian Elischer return (ERESTART); 300e602ba25SJulian Elischer } 301e602ba25SJulian Elischer PROC_UNLOCK(p1); 302e602ba25SJulian Elischer /* 303e602ba25SJulian Elischer * All other activity in this process 304e602ba25SJulian Elischer * is now suspended at the user boundary, 305e602ba25SJulian Elischer * (or other safe places if we think of any). 306e602ba25SJulian Elischer */ 307e602ba25SJulian Elischer } 308e602ba25SJulian Elischer 3093fc755c1SJohn Baldwin /* Allocate new proc. */ 3103fc755c1SJohn Baldwin newproc = uma_zalloc(proc_zone, M_WAITOK); 3113fc755c1SJohn Baldwin 3125856e12eSJohn Dyson /* 313df8bae1dSRodney W. Grimes * Although process entries are dynamically created, we still keep 314df8bae1dSRodney W. Grimes * a global limit on the maximum number we will create. Don't allow 315c4441bc7SMike Silbersack * a nonprivileged user to use the last ten processes; don't let root 316df8bae1dSRodney W. Grimes * exceed the limit. The variable nprocs is the current number of 317df8bae1dSRodney W. Grimes * processes, maxproc is the limit. 318df8bae1dSRodney W. Grimes */ 3193fc755c1SJohn Baldwin sx_xlock(&allproc_lock); 3203fc755c1SJohn Baldwin uid = td->td_ucred->cr_ruid; 321cc6712eaSMike Silbersack if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) { 3223fc755c1SJohn Baldwin sx_xunlock(&allproc_lock); 3233fc755c1SJohn Baldwin uma_zfree(proc_zone, newproc); 324e602ba25SJulian Elischer if (p1->p_flag & P_KSES) { 325e602ba25SJulian Elischer PROC_LOCK(p1); 326e602ba25SJulian Elischer thread_single_end(); 327e602ba25SJulian Elischer PROC_UNLOCK(p1); 328e602ba25SJulian Elischer } 329cc6712eaSMike Silbersack tsleep(&forksleep, PUSER, "fork", hz / 2); 330df8bae1dSRodney W. Grimes return (EAGAIN); 331df8bae1dSRodney W. Grimes } 332df8bae1dSRodney W. Grimes /* 3333fc755c1SJohn Baldwin * Increment the count of procs running with this uid. Don't allow 3343fc755c1SJohn Baldwin * a nonprivileged user to exceed their current limit. 3353fc755c1SJohn Baldwin */ 3363fc755c1SJohn Baldwin PROC_LOCK(p1); 3373fc755c1SJohn Baldwin ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 3383fc755c1SJohn Baldwin (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0); 3393fc755c1SJohn Baldwin PROC_UNLOCK(p1); 3403fc755c1SJohn Baldwin if (!ok) { 3413fc755c1SJohn Baldwin sx_xunlock(&allproc_lock); 3423fc755c1SJohn Baldwin uma_zfree(proc_zone, newproc); 343e602ba25SJulian Elischer if (p1->p_flag & P_KSES) { 344e602ba25SJulian Elischer PROC_LOCK(p1); 345e602ba25SJulian Elischer thread_single_end(); 346e602ba25SJulian Elischer PROC_UNLOCK(p1); 347e602ba25SJulian Elischer } 3483fc755c1SJohn Baldwin tsleep(&forksleep, PUSER, "fork", hz / 2); 3493fc755c1SJohn Baldwin return (EAGAIN); 3503fc755c1SJohn Baldwin } 3513fc755c1SJohn Baldwin 3523fc755c1SJohn Baldwin /* 353ef5dc8a9SJohn Dyson * Increment the nprocs resource before blocking can occur. There 354ef5dc8a9SJohn Dyson * are hard-limits as to the number of processes that can run. 355ef5dc8a9SJohn Dyson */ 356ef5dc8a9SJohn Dyson nprocs++; 357ef5dc8a9SJohn Dyson 358ef5dc8a9SJohn Dyson /* 359df8bae1dSRodney W. Grimes * Find an unused process ID. We remember a range of unused IDs 3608f7e4eb5SDag-Erling Smørgrav * ready to use (from lastpid+1 through pidchecked-1). 3610384fff8SJason Evans * 3620384fff8SJason Evans * If RFHIGHPID is set (used during system boot), do not allocate 3630384fff8SJason Evans * low-numbered pids. 364df8bae1dSRodney W. Grimes */ 3658f7e4eb5SDag-Erling Smørgrav trypid = lastpid + 1; 3660384fff8SJason Evans if (flags & RFHIGHPID) { 3670384fff8SJason Evans if (trypid < 10) { 3680384fff8SJason Evans trypid = 10; 3690384fff8SJason Evans } 3700384fff8SJason Evans } else { 371bb6a234eSPeter Wemm if (randompid) 3720384fff8SJason Evans trypid += arc4random() % randompid; 3730384fff8SJason Evans } 374df8bae1dSRodney W. Grimes retry: 375df8bae1dSRodney W. Grimes /* 376df8bae1dSRodney W. Grimes * If the process ID prototype has wrapped around, 377df8bae1dSRodney W. Grimes * restart somewhat above 0, as the low-numbered procs 378df8bae1dSRodney W. Grimes * tend to include daemons that don't exit. 379df8bae1dSRodney W. Grimes */ 3800384fff8SJason Evans if (trypid >= PID_MAX) { 3810384fff8SJason Evans trypid = trypid % PID_MAX; 3820384fff8SJason Evans if (trypid < 100) 3830384fff8SJason Evans trypid += 100; 384df8bae1dSRodney W. Grimes pidchecked = 0; 385df8bae1dSRodney W. Grimes } 3860384fff8SJason Evans if (trypid >= pidchecked) { 387df8bae1dSRodney W. Grimes int doingzomb = 0; 388df8bae1dSRodney W. Grimes 389df8bae1dSRodney W. Grimes pidchecked = PID_MAX; 390df8bae1dSRodney W. Grimes /* 391df8bae1dSRodney W. Grimes * Scan the active and zombie procs to check whether this pid 392df8bae1dSRodney W. Grimes * is in use. Remember the lowest pid that's greater 3930384fff8SJason Evans * than trypid, so we can avoid checking for a while. 394df8bae1dSRodney W. Grimes */ 3952e3c8fcbSPoul-Henning Kamp p2 = LIST_FIRST(&allproc); 396df8bae1dSRodney W. Grimes again: 397a7b124c3SJohn Baldwin for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) { 398f591779bSSeigo Tanimura PROC_LOCK(p2); 3990384fff8SJason Evans while (p2->p_pid == trypid || 4000384fff8SJason Evans p2->p_pgrp->pg_id == trypid || 4010384fff8SJason Evans p2->p_session->s_sid == trypid) { 4020384fff8SJason Evans trypid++; 403f591779bSSeigo Tanimura if (trypid >= pidchecked) { 404f591779bSSeigo Tanimura PROC_UNLOCK(p2); 405df8bae1dSRodney W. Grimes goto retry; 406df8bae1dSRodney W. Grimes } 407f591779bSSeigo Tanimura } 4080384fff8SJason Evans if (p2->p_pid > trypid && pidchecked > p2->p_pid) 409df8bae1dSRodney W. Grimes pidchecked = p2->p_pid; 4100384fff8SJason Evans if (p2->p_pgrp->pg_id > trypid && 411df8bae1dSRodney W. Grimes pidchecked > p2->p_pgrp->pg_id) 412df8bae1dSRodney W. Grimes pidchecked = p2->p_pgrp->pg_id; 4130384fff8SJason Evans if (p2->p_session->s_sid > trypid && 414643a8daaSDon Lewis pidchecked > p2->p_session->s_sid) 415643a8daaSDon Lewis pidchecked = p2->p_session->s_sid; 416f591779bSSeigo Tanimura PROC_UNLOCK(p2); 417df8bae1dSRodney W. Grimes } 418df8bae1dSRodney W. Grimes if (!doingzomb) { 419df8bae1dSRodney W. Grimes doingzomb = 1; 4202e3c8fcbSPoul-Henning Kamp p2 = LIST_FIRST(&zombproc); 421df8bae1dSRodney W. Grimes goto again; 422df8bae1dSRodney W. Grimes } 423df8bae1dSRodney W. Grimes } 424df8bae1dSRodney W. Grimes 425df8bae1dSRodney W. Grimes /* 4268f7e4eb5SDag-Erling Smørgrav * RFHIGHPID does not mess with the lastpid counter during boot. 4270384fff8SJason Evans */ 4280384fff8SJason Evans if (flags & RFHIGHPID) 4290384fff8SJason Evans pidchecked = 0; 4300384fff8SJason Evans else 4318f7e4eb5SDag-Erling Smørgrav lastpid = trypid; 4320384fff8SJason Evans 433553629ebSJake Burkholder p2 = newproc; 434e602ba25SJulian Elischer p2->p_state = PRS_NEW; /* protect against others */ 435553629ebSJake Burkholder p2->p_pid = trypid; 436553629ebSJake Burkholder LIST_INSERT_HEAD(&allproc, p2, p_list); 437553629ebSJake Burkholder LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 4381005a129SJohn Baldwin sx_xunlock(&allproc_lock); 439553629ebSJake Burkholder 4400384fff8SJason Evans /* 4413fc755c1SJohn Baldwin * Malloc things while we don't hold any locks. 4423fc755c1SJohn Baldwin */ 4433fc755c1SJohn Baldwin if (flags & RFSIGSHARE) { 4443fc755c1SJohn Baldwin MALLOC(newsigacts, struct sigacts *, 4453fc755c1SJohn Baldwin sizeof(struct sigacts), M_SUBPROC, M_WAITOK); 4463fc755c1SJohn Baldwin newprocsig = NULL; 4473fc755c1SJohn Baldwin } else { 4483fc755c1SJohn Baldwin newsigacts = NULL; 4493fc755c1SJohn Baldwin MALLOC(newprocsig, struct procsig *, sizeof(struct procsig), 4503fc755c1SJohn Baldwin M_SUBPROC, M_WAITOK); 4513fc755c1SJohn Baldwin } 4523fc755c1SJohn Baldwin 4533fc755c1SJohn Baldwin /* 4543fc755c1SJohn Baldwin * Copy filedesc. 4553fc755c1SJohn Baldwin * XXX: This is busted. fd*() need to not take proc 4563fc755c1SJohn Baldwin * arguments or something. 4573fc755c1SJohn Baldwin */ 4583fc755c1SJohn Baldwin if (flags & RFCFDG) 4593fc755c1SJohn Baldwin fd = fdinit(td); 4603fc755c1SJohn Baldwin else if (flags & RFFDG) { 4613fc755c1SJohn Baldwin FILEDESC_LOCK(p1->p_fd); 4623fc755c1SJohn Baldwin fd = fdcopy(td); 4633fc755c1SJohn Baldwin FILEDESC_UNLOCK(p1->p_fd); 4643fc755c1SJohn Baldwin } else 4653fc755c1SJohn Baldwin fd = fdshare(p1); 4663fc755c1SJohn Baldwin 4673fc755c1SJohn Baldwin /* 468df8bae1dSRodney W. Grimes * Make a proc table entry for the new process. 469df8bae1dSRodney W. Grimes * Start by zeroing the section of proc that is zero-initialized, 470df8bae1dSRodney W. Grimes * then copy the section that is copied directly from the parent. 471df8bae1dSRodney W. Grimes */ 4721faf202eSJulian Elischer td2 = FIRST_THREAD_IN_PROC(p2); 4731faf202eSJulian Elischer kg2 = FIRST_KSEGRP_IN_PROC(p2); 4741faf202eSJulian Elischer ke2 = FIRST_KSE_IN_KSEGRP(kg2); 475079b7badSJulian Elischer 476316ec49aSScott Long /* Allocate and switch to an alternate kstack if specified */ 477316ec49aSScott Long if (pages != 0) 478316ec49aSScott Long pmap_new_altkstack(td2, pages); 479316ec49aSScott Long 480079b7badSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 481079b7badSJulian Elischer 482df8bae1dSRodney W. Grimes bzero(&p2->p_startzero, 483079b7badSJulian Elischer (unsigned) RANGEOF(struct proc, p_startzero, p_endzero)); 484079b7badSJulian Elischer bzero(&ke2->ke_startzero, 485079b7badSJulian Elischer (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); 486079b7badSJulian Elischer bzero(&td2->td_startzero, 487079b7badSJulian Elischer (unsigned) RANGEOF(struct thread, td_startzero, td_endzero)); 488079b7badSJulian Elischer bzero(&kg2->kg_startzero, 489079b7badSJulian Elischer (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero)); 490079b7badSJulian Elischer 4913fc755c1SJohn Baldwin mtx_init(&p2->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); 4923fc755c1SJohn Baldwin PROC_LOCK(p2); 49357934cd3SJohn Baldwin PROC_LOCK(p1); 4943fc755c1SJohn Baldwin 495df8bae1dSRodney W. Grimes bcopy(&p1->p_startcopy, &p2->p_startcopy, 496079b7badSJulian Elischer (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy)); 497079b7badSJulian Elischer bcopy(&td->td_startcopy, &td2->td_startcopy, 498079b7badSJulian Elischer (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 499079b7badSJulian Elischer bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy, 500079b7badSJulian Elischer (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 501079b7badSJulian Elischer #undef RANGEOF 502df8bae1dSRodney W. Grimes 503e602ba25SJulian Elischer /* Set up the thread as an active thread (as if runnable). */ 504c3b98db0SJulian Elischer ke2->ke_state = KES_THREAD; 505e602ba25SJulian Elischer ke2->ke_thread = td2; 506e602ba25SJulian Elischer td2->td_kse = ke2; 507e602ba25SJulian Elischer td2->td_flags &= ~TDF_UNBOUND; /* For the rest of this syscall. */ 508e602ba25SJulian Elischer 509df8bae1dSRodney W. Grimes /* 510df8bae1dSRodney W. Grimes * Duplicate sub-structures as needed. 511df8bae1dSRodney W. Grimes * Increase reference counts on shared objects. 512eb30c1c0SPeter Wemm * The p_stats and p_sigacts substructs are set in vm_forkproc. 513df8bae1dSRodney W. Grimes */ 514a7b124c3SJohn Baldwin p2->p_flag = 0; 5159ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 516a7b124c3SJohn Baldwin p2->p_sflag = PS_INMEM; 517a7b124c3SJohn Baldwin if (p1->p_sflag & PS_PROFIL) 518df8bae1dSRodney W. Grimes startprofclock(p2); 519b43179fbSJeff Roberson /* 520b43179fbSJeff Roberson * Allow the scheduler to adjust the priority of the child and 521b43179fbSJeff Roberson * parent while we hold the sched_lock. 522b43179fbSJeff Roberson */ 523b43179fbSJeff Roberson sched_fork(td->td_ksegrp, kg2); 524b43179fbSJeff Roberson 5259ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 5263fc755c1SJohn Baldwin p2->p_ucred = crhold(td->td_ucred); 527079b7badSJulian Elischer td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */ 528df8bae1dSRodney W. Grimes 5293fc755c1SJohn Baldwin /* 5303fc755c1SJohn Baldwin * Setup linkage for kernel based threading 5313fc755c1SJohn Baldwin */ 5323fc755c1SJohn Baldwin if((flags & RFTHREAD) != 0) { 5333fc755c1SJohn Baldwin /* 5343fc755c1SJohn Baldwin * XXX: This assumes a leader is a parent or grandparent of 5353fc755c1SJohn Baldwin * all processes in a task. 5363fc755c1SJohn Baldwin */ 5373fc755c1SJohn Baldwin if (p1->p_leader != p1) 5383fc755c1SJohn Baldwin PROC_LOCK(p1->p_leader); 5393fc755c1SJohn Baldwin p2->p_peers = p1->p_peers; 5403fc755c1SJohn Baldwin p1->p_peers = p2; 5413fc755c1SJohn Baldwin p2->p_leader = p1->p_leader; 5423fc755c1SJohn Baldwin if (p1->p_leader != p1) 5433fc755c1SJohn Baldwin PROC_UNLOCK(p1->p_leader); 5443fc755c1SJohn Baldwin } else { 5453fc755c1SJohn Baldwin p2->p_peers = NULL; 5463fc755c1SJohn Baldwin p2->p_leader = p2; 5473fc755c1SJohn Baldwin } 5483fc755c1SJohn Baldwin 5498899023fSAlfred Perlstein pargs_hold(p2->p_args); 550b9df5231SPoul-Henning Kamp 5516626c604SJulian Elischer if (flags & RFSIGSHARE) { 552dc9c271aSJulian Elischer p2->p_procsig = p1->p_procsig; 5536626c604SJulian Elischer p2->p_procsig->ps_refcnt++; 554b40ce416SJulian Elischer if (p1->p_sigacts == &p1->p_uarea->u_sigacts) { 555df8abd0bSPeter Wemm /* 556df8abd0bSPeter Wemm * Set p_sigacts to the new shared structure. 557df8abd0bSPeter Wemm * Note that this is updating p1->p_sigacts at the 558df8abd0bSPeter Wemm * same time, since p_sigacts is just a pointer to 559df8abd0bSPeter Wemm * the shared p_procsig->ps_sigacts. 560dc9c271aSJulian Elischer */ 561dc9c271aSJulian Elischer p2->p_sigacts = newsigacts; 5623fc755c1SJohn Baldwin newsigacts = NULL; 563b40ce416SJulian Elischer *p2->p_sigacts = p1->p_uarea->u_sigacts; 564dc9c271aSJulian Elischer } 5656626c604SJulian Elischer } else { 5663fc755c1SJohn Baldwin p2->p_procsig = newprocsig; 5673fc755c1SJohn Baldwin newprocsig = NULL; 568df8abd0bSPeter Wemm bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig)); 569dc9c271aSJulian Elischer p2->p_procsig->ps_refcnt = 1; 570eb30c1c0SPeter Wemm p2->p_sigacts = NULL; /* finished in vm_forkproc() */ 5716626c604SJulian Elischer } 5724ac9ae70SJulian Elischer if (flags & RFLINUXTHPN) 5736626c604SJulian Elischer p2->p_sigparent = SIGUSR1; 5744ac9ae70SJulian Elischer else 5754ac9ae70SJulian Elischer p2->p_sigparent = SIGCHLD; 57688c5ea45SJulian Elischer 5773fc755c1SJohn Baldwin /* Bump references to the text vnode (for procfs) */ 578df8bae1dSRodney W. Grimes p2->p_textvp = p1->p_textvp; 579df8bae1dSRodney W. Grimes if (p2->p_textvp) 580df8bae1dSRodney W. Grimes VREF(p2->p_textvp); 5815641ae5dSJohn Baldwin p2->p_fd = fd; 5823fc755c1SJohn Baldwin PROC_UNLOCK(p1); 5833fc755c1SJohn Baldwin PROC_UNLOCK(p2); 584dabee6feSPeter Wemm 585df8bae1dSRodney W. Grimes /* 586df8bae1dSRodney W. Grimes * If p_limit is still copy-on-write, bump refcnt, 587df8bae1dSRodney W. Grimes * otherwise get a copy that won't be modified. 588df8bae1dSRodney W. Grimes * (If PL_SHAREMOD is clear, the structure is shared 589df8bae1dSRodney W. Grimes * copy-on-write.) 590df8bae1dSRodney W. Grimes */ 591df8bae1dSRodney W. Grimes if (p1->p_limit->p_lflags & PL_SHAREMOD) 592df8bae1dSRodney W. Grimes p2->p_limit = limcopy(p1->p_limit); 593df8bae1dSRodney W. Grimes else { 594df8bae1dSRodney W. Grimes p2->p_limit = p1->p_limit; 595df8bae1dSRodney W. Grimes p2->p_limit->p_refcnt++; 596df8bae1dSRodney W. Grimes } 597df8bae1dSRodney W. Grimes 5983fc755c1SJohn Baldwin sx_xlock(&proctree_lock); 5993fc755c1SJohn Baldwin PGRP_LOCK(p1->p_pgrp); 6003fc755c1SJohn Baldwin PROC_LOCK(p2); 6013fc755c1SJohn Baldwin PROC_LOCK(p1); 6023fc755c1SJohn Baldwin 60370e534e7SDavid Greenman /* 60457934cd3SJohn Baldwin * Preserve some more flags in subprocess. PS_PROFIL has already 605be67169aSBruce Evans * been preserved. 60670e534e7SDavid Greenman */ 6073b26be6aSAkinori MUSHA p2->p_flag |= p1->p_flag & (P_SUGID | P_ALTSTACK); 608f591779bSSeigo Tanimura SESS_LOCK(p1->p_session); 609df8bae1dSRodney W. Grimes if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 610df8bae1dSRodney W. Grimes p2->p_flag |= P_CONTROLT; 611f591779bSSeigo Tanimura SESS_UNLOCK(p1->p_session); 6120e3eb7eeSSujal Patel if (flags & RFPPWAIT) 613df8bae1dSRodney W. Grimes p2->p_flag |= P_PPWAIT; 614be67169aSBruce Evans 615b75356e1SJeffrey Hsu LIST_INSERT_AFTER(p1, p2, p_pglist); 6162a60b9b9SSeigo Tanimura PGRP_UNLOCK(p1->p_pgrp); 617b75356e1SJeffrey Hsu LIST_INIT(&p2->p_children); 618b75356e1SJeffrey Hsu 6194f559836SJake Burkholder callout_init(&p2->p_itcallout, 0); 6204f559836SJake Burkholder 621df8bae1dSRodney W. Grimes #ifdef KTRACE 622df8bae1dSRodney W. Grimes /* 623af300f23SJohn Baldwin * Copy traceflag and tracefile if enabled. 624df8bae1dSRodney W. Grimes */ 625af300f23SJohn Baldwin mtx_lock(&ktrace_mtx); 626af300f23SJohn Baldwin KASSERT(p2->p_tracep == NULL, ("new process has a ktrace vnode")); 627af300f23SJohn Baldwin if (p1->p_traceflag & KTRFAC_INHERIT) { 628df8bae1dSRodney W. Grimes p2->p_traceflag = p1->p_traceflag; 6293fc755c1SJohn Baldwin if ((p2->p_tracep = p1->p_tracep) != NULL) 630df8bae1dSRodney W. Grimes VREF(p2->p_tracep); 631df8bae1dSRodney W. Grimes } 632af300f23SJohn Baldwin mtx_unlock(&ktrace_mtx); 633df8bae1dSRodney W. Grimes #endif 634df8bae1dSRodney W. Grimes 635df8bae1dSRodney W. Grimes /* 636df95311aSMatthew N. Dodd * If PF_FORK is set, the child process inherits the 637df95311aSMatthew N. Dodd * procfs ioctl flags from its parent. 638df95311aSMatthew N. Dodd */ 639df95311aSMatthew N. Dodd if (p1->p_pfsflags & PF_FORK) { 640df95311aSMatthew N. Dodd p2->p_stops = p1->p_stops; 641df95311aSMatthew N. Dodd p2->p_pfsflags = p1->p_pfsflags; 642df95311aSMatthew N. Dodd } 643df95311aSMatthew N. Dodd 644df95311aSMatthew N. Dodd /* 645df8bae1dSRodney W. Grimes * This begins the section where we must prevent the parent 646df8bae1dSRodney W. Grimes * from being swapped. 647df8bae1dSRodney W. Grimes */ 64857934cd3SJohn Baldwin _PHOLD(p1); 64957934cd3SJohn Baldwin PROC_UNLOCK(p1); 6500d2afceeSDavid Greenman 651df8bae1dSRodney W. Grimes /* 6523fc755c1SJohn Baldwin * Attach the new process to its parent. 6533fc755c1SJohn Baldwin * 6543fc755c1SJohn Baldwin * If RFNOWAIT is set, the newly created process becomes a child 6553fc755c1SJohn Baldwin * of init. This effectively disassociates the child from the 6563fc755c1SJohn Baldwin * parent. 6573fc755c1SJohn Baldwin */ 6583fc755c1SJohn Baldwin if (flags & RFNOWAIT) 6593fc755c1SJohn Baldwin pptr = initproc; 6603fc755c1SJohn Baldwin else 6613fc755c1SJohn Baldwin pptr = p1; 6623fc755c1SJohn Baldwin p2->p_pptr = pptr; 6633fc755c1SJohn Baldwin LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 6643fc755c1SJohn Baldwin PROC_UNLOCK(p2); 6653fc755c1SJohn Baldwin sx_xunlock(&proctree_lock); 6663fc755c1SJohn Baldwin 6673fc755c1SJohn Baldwin KASSERT(newprocsig == NULL, ("unused newprocsig")); 6683fc755c1SJohn Baldwin if (newsigacts != NULL) 6693fc755c1SJohn Baldwin FREE(newsigacts, M_SUBPROC); 6703fc755c1SJohn Baldwin /* 671a2a1c95cSPeter Wemm * Finish creating the child process. It will return via a different 672a2a1c95cSPeter Wemm * execution path later. (ie: directly into user mode) 673dabee6feSPeter Wemm */ 674079b7badSJulian Elischer vm_forkproc(td, p2, td2, flags); 675df8bae1dSRodney W. Grimes 6765d22597fSHajimu UMEMOTO if (flags == (RFFDG | RFPROC)) { 6775d22597fSHajimu UMEMOTO cnt.v_forks++; 6789b28af91SJohn Baldwin cnt.v_forkpages += p2->p_vmspace->vm_dsize + 6799b28af91SJohn Baldwin p2->p_vmspace->vm_ssize; 6805d22597fSHajimu UMEMOTO } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { 6815d22597fSHajimu UMEMOTO cnt.v_vforks++; 6829b28af91SJohn Baldwin cnt.v_vforkpages += p2->p_vmspace->vm_dsize + 6839b28af91SJohn Baldwin p2->p_vmspace->vm_ssize; 6845d22597fSHajimu UMEMOTO } else if (p1 == &proc0) { 6855d22597fSHajimu UMEMOTO cnt.v_kthreads++; 6869b28af91SJohn Baldwin cnt.v_kthreadpages += p2->p_vmspace->vm_dsize + 6879b28af91SJohn Baldwin p2->p_vmspace->vm_ssize; 6885d22597fSHajimu UMEMOTO } else { 6895d22597fSHajimu UMEMOTO cnt.v_rforks++; 6909b28af91SJohn Baldwin cnt.v_rforkpages += p2->p_vmspace->vm_dsize + 6919b28af91SJohn Baldwin p2->p_vmspace->vm_ssize; 6925d22597fSHajimu UMEMOTO } 6935d22597fSHajimu UMEMOTO 694df8bae1dSRodney W. Grimes /* 695e9189611SPeter Wemm * Both processes are set up, now check if any loadable modules want 696e0d898b4SJulian Elischer * to adjust anything. 697fed06968SJulian Elischer * What if they have an error? XXX 698fed06968SJulian Elischer */ 69957934cd3SJohn Baldwin sx_slock(&fork_list_lock); 70093efcae8SPoul-Henning Kamp TAILQ_FOREACH(ep, &fork_list, next) { 701fed06968SJulian Elischer (*ep->function)(p1, p2, flags); 702fed06968SJulian Elischer } 70357934cd3SJohn Baldwin sx_sunlock(&fork_list_lock); 704fed06968SJulian Elischer 705fed06968SJulian Elischer /* 7060384fff8SJason Evans * If RFSTOPPED not requested, make child runnable and add to 7070384fff8SJason Evans * run queue. 708df8bae1dSRodney W. Grimes */ 709a2a1c95cSPeter Wemm microtime(&(p2->p_stats->p_start)); 710a2a1c95cSPeter Wemm p2->p_acflag = AFORK; 7110384fff8SJason Evans if ((flags & RFSTOPPED) == 0) { 7129ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 71366d59314SJulian Elischer p2->p_state = PRS_NORMAL; 71471fad9fdSJulian Elischer TD_SET_CAN_RUN(td2); 715079b7badSJulian Elischer setrunqueue(td2); 7169ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 7170384fff8SJason Evans } 718df8bae1dSRodney W. Grimes 719df8bae1dSRodney W. Grimes /* 720df8bae1dSRodney W. Grimes * Now can be swapped. 721df8bae1dSRodney W. Grimes */ 72257934cd3SJohn Baldwin PROC_LOCK(p1); 72357934cd3SJohn Baldwin _PRELE(p1); 724df8bae1dSRodney W. Grimes 725df8bae1dSRodney W. Grimes /* 726cb679c38SJonathan Lemon * tell any interested parties about the new process 727cb679c38SJonathan Lemon */ 728cb679c38SJonathan Lemon KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); 72957934cd3SJohn Baldwin PROC_UNLOCK(p1); 730cb679c38SJonathan Lemon 731cb679c38SJonathan Lemon /* 732df8bae1dSRodney W. Grimes * Preserve synchronization semantics of vfork. If waiting for 733df8bae1dSRodney W. Grimes * child to exec or exit, set P_PPWAIT on child, and sleep on our 734df8bae1dSRodney W. Grimes * proc (in case of exit). 735df8bae1dSRodney W. Grimes */ 73657934cd3SJohn Baldwin PROC_LOCK(p2); 737df8bae1dSRodney W. Grimes while (p2->p_flag & P_PPWAIT) 73857934cd3SJohn Baldwin msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0); 73957934cd3SJohn Baldwin PROC_UNLOCK(p2); 740df8bae1dSRodney W. Grimes 741df8bae1dSRodney W. Grimes /* 74249539972SJulian Elischer * If other threads are waiting, let them continue now 74349539972SJulian Elischer */ 74449539972SJulian Elischer if (p1->p_flag & P_KSES) { 74549539972SJulian Elischer PROC_LOCK(p1); 74649539972SJulian Elischer thread_single_end(); 74749539972SJulian Elischer PROC_UNLOCK(p1); 74849539972SJulian Elischer } 74949539972SJulian Elischer 75049539972SJulian Elischer /* 751df8abd0bSPeter Wemm * Return child proc pointer to parent. 752df8bae1dSRodney W. Grimes */ 753df8abd0bSPeter Wemm *procp = p2; 754df8bae1dSRodney W. Grimes return (0); 755df8bae1dSRodney W. Grimes } 756fed06968SJulian Elischer 757e0d898b4SJulian Elischer /* 758e0d898b4SJulian Elischer * The next two functionms are general routines to handle adding/deleting 759e0d898b4SJulian Elischer * items on the fork callout list. 760e0d898b4SJulian Elischer * 761e0d898b4SJulian Elischer * at_fork(): 762e0d898b4SJulian Elischer * Take the arguments given and put them onto the fork callout list, 763fed06968SJulian Elischer * However first make sure that it's not already there. 764e0d898b4SJulian Elischer * Returns 0 on success or a standard error number. 765fed06968SJulian Elischer */ 76693efcae8SPoul-Henning Kamp 767fed06968SJulian Elischer int 768eb776aeaSBruce Evans at_fork(function) 769eb776aeaSBruce Evans forklist_fn function; 770fed06968SJulian Elischer { 77193efcae8SPoul-Henning Kamp struct forklist *ep; 772e0d898b4SJulian Elischer 77393efcae8SPoul-Henning Kamp #ifdef INVARIANTS 774e0d898b4SJulian Elischer /* let the programmer know if he's been stupid */ 775e0d898b4SJulian Elischer if (rm_at_fork(function)) 77693efcae8SPoul-Henning Kamp printf("WARNING: fork callout entry (%p) already present\n", 77793efcae8SPoul-Henning Kamp function); 77893efcae8SPoul-Henning Kamp #endif 77993efcae8SPoul-Henning Kamp ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT); 780e0d898b4SJulian Elischer if (ep == NULL) 781e0d898b4SJulian Elischer return (ENOMEM); 782fed06968SJulian Elischer ep->function = function; 78357934cd3SJohn Baldwin sx_xlock(&fork_list_lock); 78493efcae8SPoul-Henning Kamp TAILQ_INSERT_TAIL(&fork_list, ep, next); 78557934cd3SJohn Baldwin sx_xunlock(&fork_list_lock); 786e0d898b4SJulian Elischer return (0); 787fed06968SJulian Elischer } 788e0d898b4SJulian Elischer 789fed06968SJulian Elischer /* 79093efcae8SPoul-Henning Kamp * Scan the exit callout list for the given item and remove it.. 79193efcae8SPoul-Henning Kamp * Returns the number of items removed (0 or 1) 792fed06968SJulian Elischer */ 79393efcae8SPoul-Henning Kamp 794fed06968SJulian Elischer int 795eb776aeaSBruce Evans rm_at_fork(function) 796eb776aeaSBruce Evans forklist_fn function; 797fed06968SJulian Elischer { 79893efcae8SPoul-Henning Kamp struct forklist *ep; 799fed06968SJulian Elischer 80057934cd3SJohn Baldwin sx_xlock(&fork_list_lock); 80193efcae8SPoul-Henning Kamp TAILQ_FOREACH(ep, &fork_list, next) { 802fed06968SJulian Elischer if (ep->function == function) { 80393efcae8SPoul-Henning Kamp TAILQ_REMOVE(&fork_list, ep, next); 80457934cd3SJohn Baldwin sx_xunlock(&fork_list_lock); 80593efcae8SPoul-Henning Kamp free(ep, M_ATFORK); 80693efcae8SPoul-Henning Kamp return(1); 807fed06968SJulian Elischer } 808fed06968SJulian Elischer } 80957934cd3SJohn Baldwin sx_xunlock(&fork_list_lock); 81093efcae8SPoul-Henning Kamp return (0); 811fed06968SJulian Elischer } 812a7b124c3SJohn Baldwin 813a7b124c3SJohn Baldwin /* 814a7b124c3SJohn Baldwin * Handle the return of a child process from fork1(). This function 815a7b124c3SJohn Baldwin * is called from the MD fork_trampoline() entry point. 816a7b124c3SJohn Baldwin */ 817a7b124c3SJohn Baldwin void 818a7b124c3SJohn Baldwin fork_exit(callout, arg, frame) 8198865286bSJohn Baldwin void (*callout)(void *, struct trapframe *); 820a7b124c3SJohn Baldwin void *arg; 8212a36ec35SJohn Baldwin struct trapframe *frame; 822a7b124c3SJohn Baldwin { 823b40ce416SJulian Elischer struct thread *td = curthread; 824b40ce416SJulian Elischer struct proc *p = td->td_proc; 82557934cd3SJohn Baldwin 826201b0ea8SJohn Baldwin td->td_kse->ke_oncpu = PCPU_GET(cpuid); 827e602ba25SJulian Elischer p->p_state = PRS_NORMAL; 8285813dc03SJohn Baldwin /* 829d74ac681SMatthew Dillon * Finish setting up thread glue. We need to initialize 830d74ac681SMatthew Dillon * the thread into a td_critnest=1 state. Some platforms 831d74ac681SMatthew Dillon * may have already partially or fully initialized td_critnest 832d74ac681SMatthew Dillon * and/or td_md.md_savecrit (when applciable). 833d74ac681SMatthew Dillon * 834d74ac681SMatthew Dillon * see <arch>/<arch>/critical.c 8355813dc03SJohn Baldwin */ 836b40ce416SJulian Elischer sched_lock.mtx_lock = (uintptr_t)td; 8375813dc03SJohn Baldwin sched_lock.mtx_recurse = 0; 838d74ac681SMatthew Dillon cpu_critical_fork_exit(); 839e602ba25SJulian Elischer CTR3(KTR_PROC, "fork_exit: new thread %p (pid %d, %s)", td, p->p_pid, 840201b0ea8SJohn Baldwin p->p_comm); 8411cbb9c3bSPoul-Henning Kamp if (PCPU_GET(switchtime.sec) == 0) 8421cbb9c3bSPoul-Henning Kamp binuptime(PCPU_PTR(switchtime)); 843a7b124c3SJohn Baldwin PCPU_SET(switchticks, ticks); 844201b0ea8SJohn Baldwin mtx_unlock_spin(&sched_lock); 845a7b124c3SJohn Baldwin 846a7b124c3SJohn Baldwin /* 847a7b124c3SJohn Baldwin * cpu_set_fork_handler intercepts this function call to 848a7b124c3SJohn Baldwin * have this call a non-return function to stay in kernel mode. 849a7b124c3SJohn Baldwin * initproc has its own fork handler, but it does return. 850a7b124c3SJohn Baldwin */ 8515813dc03SJohn Baldwin KASSERT(callout != NULL, ("NULL callout in fork_exit")); 8528865286bSJohn Baldwin callout(arg, frame); 853a7b124c3SJohn Baldwin 854a7b124c3SJohn Baldwin /* 855a7b124c3SJohn Baldwin * Check if a kernel thread misbehaved and returned from its main 856a7b124c3SJohn Baldwin * function. 857a7b124c3SJohn Baldwin */ 85857934cd3SJohn Baldwin PROC_LOCK(p); 859a7b124c3SJohn Baldwin if (p->p_flag & P_KTHREAD) { 86057934cd3SJohn Baldwin PROC_UNLOCK(p); 8619ed346baSBosko Milekic mtx_lock(&Giant); 862a7b124c3SJohn Baldwin printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", 863a7b124c3SJohn Baldwin p->p_comm, p->p_pid); 864a7b124c3SJohn Baldwin kthread_exit(0); 865a7b124c3SJohn Baldwin } 86657934cd3SJohn Baldwin PROC_UNLOCK(p); 867aaa1c771SJonathan Mini #ifdef DIAGNOSTIC 868aaa1c771SJonathan Mini cred_free_thread(td); 869aaa1c771SJonathan Mini #endif 870a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 871a7b124c3SJohn Baldwin } 872a7b124c3SJohn Baldwin 873a7b124c3SJohn Baldwin /* 874a7b124c3SJohn Baldwin * Simplified back end of syscall(), used when returning from fork() 875a7b124c3SJohn Baldwin * directly into user mode. Giant is not held on entry, and must not 876a7b124c3SJohn Baldwin * be held on return. This function is passed in to fork_exit() as the 877a7b124c3SJohn Baldwin * first parameter and is called when returning to a new userland process. 878a7b124c3SJohn Baldwin */ 879a7b124c3SJohn Baldwin void 880b40ce416SJulian Elischer fork_return(td, frame) 881b40ce416SJulian Elischer struct thread *td; 882a7b124c3SJohn Baldwin struct trapframe *frame; 883a7b124c3SJohn Baldwin { 884a7b124c3SJohn Baldwin 885b40ce416SJulian Elischer userret(td, frame, 0); 886a7b124c3SJohn Baldwin #ifdef KTRACE 887af300f23SJohn Baldwin if (KTRPOINT(td, KTR_SYSRET)) 888af300f23SJohn Baldwin ktrsysret(SYS_fork, 0, 0); 889a7b124c3SJohn Baldwin #endif 890a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 891a7b124c3SJohn Baldwin } 892