1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * (c) UNIX System Laboratories, Inc. 5df8bae1dSRodney W. Grimes * All or some portions of this file are derived from material licensed 6df8bae1dSRodney W. Grimes * to the University of California by American Telephone and Telegraph 7df8bae1dSRodney W. Grimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8df8bae1dSRodney W. Grimes * the permission of UNIX System Laboratories, Inc. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 19df8bae1dSRodney W. Grimes * must display the following acknowledgement: 20df8bae1dSRodney W. Grimes * This product includes software developed by the University of 21df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 22df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 23df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 24df8bae1dSRodney W. Grimes * without specific prior written permission. 25df8bae1dSRodney W. Grimes * 26df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36df8bae1dSRodney W. Grimes * SUCH DAMAGE. 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 39c3aac50fSPeter Wemm * $FreeBSD$ 40df8bae1dSRodney W. Grimes */ 41df8bae1dSRodney W. Grimes 42db6a20e2SGarrett Wollman #include "opt_ktrace.h" 43db6a20e2SGarrett Wollman 44df8bae1dSRodney W. Grimes #include <sys/param.h> 45df8bae1dSRodney W. Grimes #include <sys/systm.h> 46d2d3e875SBruce Evans #include <sys/sysproto.h> 47df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 48df8bae1dSRodney W. Grimes #include <sys/kernel.h> 49c76e95c3SPeter Wemm #include <sys/sysctl.h> 5019284646SJohn Baldwin #include <sys/lock.h> 51df8bae1dSRodney W. Grimes #include <sys/malloc.h> 5235e0e5b3SJohn Baldwin #include <sys/mutex.h> 53df8bae1dSRodney W. Grimes #include <sys/proc.h> 54df8bae1dSRodney W. Grimes #include <sys/resourcevar.h> 55a7b124c3SJohn Baldwin #include <sys/syscall.h> 56df8bae1dSRodney W. Grimes #include <sys/vnode.h> 57df8bae1dSRodney W. Grimes #include <sys/acct.h> 580384fff8SJason Evans #include <sys/ktr.h> 59df8bae1dSRodney W. Grimes #include <sys/ktrace.h> 60a7b124c3SJohn Baldwin #include <sys/kthread.h> 61b71fec07SBruce Evans #include <sys/unistd.h> 6275c13541SPoul-Henning Kamp #include <sys/jail.h> 6357934cd3SJohn Baldwin #include <sys/sx.h> 64df8bae1dSRodney W. Grimes 65d93f860cSPoul-Henning Kamp #include <vm/vm.h> 66dabee6feSPeter Wemm #include <vm/pmap.h> 67dabee6feSPeter Wemm #include <vm/vm_map.h> 68efeaf95aSDavid Greenman #include <vm/vm_extern.h> 69c897b813SJeff Roberson #include <vm/uma.h> 70d93f860cSPoul-Henning Kamp 715d22597fSHajimu UMEMOTO #include <sys/vmmeter.h> 72dc9c271aSJulian Elischer #include <sys/user.h> 7388c5ea45SJulian Elischer 7493efcae8SPoul-Henning Kamp static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback"); 7593efcae8SPoul-Henning Kamp 76fed06968SJulian Elischer /* 77e0d898b4SJulian Elischer * These are the stuctures used to create a callout list for things to do 78e0d898b4SJulian Elischer * when forking a process 79fed06968SJulian Elischer */ 8093efcae8SPoul-Henning Kamp struct forklist { 81fed06968SJulian Elischer forklist_fn function; 82e3975643SJake Burkholder TAILQ_ENTRY(forklist) next; 8393efcae8SPoul-Henning Kamp }; 84fed06968SJulian Elischer 8557934cd3SJohn Baldwin static struct sx fork_list_lock; 8657934cd3SJohn Baldwin 87e3975643SJake Burkholder TAILQ_HEAD(forklist_head, forklist); 8893efcae8SPoul-Henning Kamp static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list); 89fed06968SJulian Elischer 90d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 91ad7507e2SSteven Wallace struct fork_args { 92ad7507e2SSteven Wallace int dummy; 93ad7507e2SSteven Wallace }; 94d2d3e875SBruce Evans #endif 95ad7507e2SSteven Wallace 96cc6712eaSMike Silbersack int forksleep; /* Place for fork1() to sleep on. */ 97cc6712eaSMike Silbersack 9857934cd3SJohn Baldwin static void 9957934cd3SJohn Baldwin init_fork_list(void *data __unused) 10057934cd3SJohn Baldwin { 10157934cd3SJohn Baldwin 10257934cd3SJohn Baldwin sx_init(&fork_list_lock, "fork list"); 10357934cd3SJohn Baldwin } 10457934cd3SJohn Baldwin SYSINIT(fork_list, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_fork_list, NULL); 10557934cd3SJohn Baldwin 106116734c4SMatthew Dillon /* 107116734c4SMatthew Dillon * MPSAFE 108116734c4SMatthew Dillon */ 109df8bae1dSRodney W. Grimes /* ARGSUSED */ 11026f9a767SRodney W. Grimes int 111b40ce416SJulian Elischer fork(td, uap) 112b40ce416SJulian Elischer struct thread *td; 113df8bae1dSRodney W. Grimes struct fork_args *uap; 114df8bae1dSRodney W. Grimes { 115df8abd0bSPeter Wemm int error; 116df8abd0bSPeter Wemm struct proc *p2; 117be67169aSBruce Evans 118116734c4SMatthew Dillon mtx_lock(&Giant); 119b40ce416SJulian Elischer error = fork1(td, RFFDG | RFPROC, &p2); 120df8abd0bSPeter Wemm if (error == 0) { 121b40ce416SJulian Elischer td->td_retval[0] = p2->p_pid; 122b40ce416SJulian Elischer td->td_retval[1] = 0; 123df8abd0bSPeter Wemm } 124116734c4SMatthew Dillon mtx_unlock(&Giant); 125df8abd0bSPeter Wemm return error; 126df8bae1dSRodney W. Grimes } 127df8bae1dSRodney W. Grimes 128116734c4SMatthew Dillon /* 129116734c4SMatthew Dillon * MPSAFE 130116734c4SMatthew Dillon */ 131df8bae1dSRodney W. Grimes /* ARGSUSED */ 13226f9a767SRodney W. Grimes int 133b40ce416SJulian Elischer vfork(td, uap) 134b40ce416SJulian Elischer struct thread *td; 135dabee6feSPeter Wemm struct vfork_args *uap; 136df8bae1dSRodney W. Grimes { 137df8abd0bSPeter Wemm int error; 138df8abd0bSPeter Wemm struct proc *p2; 139be67169aSBruce Evans 140116734c4SMatthew Dillon mtx_lock(&Giant); 141b40ce416SJulian Elischer error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2); 142df8abd0bSPeter Wemm if (error == 0) { 143b40ce416SJulian Elischer td->td_retval[0] = p2->p_pid; 144b40ce416SJulian Elischer td->td_retval[1] = 0; 145df8abd0bSPeter Wemm } 146116734c4SMatthew Dillon mtx_unlock(&Giant); 147df8abd0bSPeter Wemm return error; 148df8bae1dSRodney W. Grimes } 149df8bae1dSRodney W. Grimes 150116734c4SMatthew Dillon /* 151116734c4SMatthew Dillon * MPSAFE 152116734c4SMatthew Dillon */ 153dabee6feSPeter Wemm int 154b40ce416SJulian Elischer rfork(td, uap) 155b40ce416SJulian Elischer struct thread *td; 156dabee6feSPeter Wemm struct rfork_args *uap; 157dabee6feSPeter Wemm { 158df8abd0bSPeter Wemm int error; 159df8abd0bSPeter Wemm struct proc *p2; 160be67169aSBruce Evans 161885ccc61SJohn Baldwin /* Don't allow kernel only flags. */ 162885ccc61SJohn Baldwin if ((uap->flags & RFKERNELONLY) != 0) 163885ccc61SJohn Baldwin return (EINVAL); 164116734c4SMatthew Dillon mtx_lock(&Giant); 165885ccc61SJohn Baldwin error = fork1(td, uap->flags, &p2); 166df8abd0bSPeter Wemm if (error == 0) { 167b40ce416SJulian Elischer td->td_retval[0] = p2 ? p2->p_pid : 0; 168b40ce416SJulian Elischer td->td_retval[1] = 0; 169df8abd0bSPeter Wemm } 170116734c4SMatthew Dillon mtx_unlock(&Giant); 171df8abd0bSPeter Wemm return error; 172dabee6feSPeter Wemm } 173dabee6feSPeter Wemm 174dabee6feSPeter Wemm 175df8bae1dSRodney W. Grimes int nprocs = 1; /* process 0 */ 1768f7e4eb5SDag-Erling Smørgrav int lastpid = 0; 1778f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 178d941d475SRobert Watson "Last used PID"); 179df8bae1dSRodney W. Grimes 180bb6a234eSPeter Wemm /* 1818f7e4eb5SDag-Erling Smørgrav * Random component to lastpid generation. We mix in a random factor to make 182bb6a234eSPeter Wemm * it a little harder to predict. We sanity check the modulus value to avoid 183bb6a234eSPeter Wemm * doing it in critical paths. Don't let it be too small or we pointlessly 184bb6a234eSPeter Wemm * waste randomness entropy, and don't let it be impossibly large. Using a 185bb6a234eSPeter Wemm * modulus that is too big causes a LOT more process table scans and slows 186bb6a234eSPeter Wemm * down fork processing as the pidchecked caching is defeated. 187bb6a234eSPeter Wemm */ 188ee3fd601SDan Moschuk static int randompid = 0; 189bb6a234eSPeter Wemm 190bb6a234eSPeter Wemm static int 19182d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) 192bb6a234eSPeter Wemm { 193bb6a234eSPeter Wemm int error, pid; 194bb6a234eSPeter Wemm 195bb6a234eSPeter Wemm pid = randompid; 196bb6a234eSPeter Wemm error = sysctl_handle_int(oidp, &pid, 0, req); 197bb6a234eSPeter Wemm if (error || !req->newptr) 198bb6a234eSPeter Wemm return (error); 199bb6a234eSPeter Wemm if (pid < 0 || pid > PID_MAX - 100) /* out of range */ 200bb6a234eSPeter Wemm pid = PID_MAX - 100; 201bb6a234eSPeter Wemm else if (pid < 2) /* NOP */ 202bb6a234eSPeter Wemm pid = 0; 203bb6a234eSPeter Wemm else if (pid < 100) /* Make it reasonable */ 204bb6a234eSPeter Wemm pid = 100; 205bb6a234eSPeter Wemm randompid = pid; 206bb6a234eSPeter Wemm return (error); 207bb6a234eSPeter Wemm } 208bb6a234eSPeter Wemm 209bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 210bb6a234eSPeter Wemm 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); 211ee3fd601SDan Moschuk 212b40ce416SJulian Elischer #if 0 213b40ce416SJulian Elischer void 214b40ce416SJulian Elischer kse_init(struct kse *kse1, struct kse *kse2) 215b40ce416SJulian Elischer { 216b40ce416SJulian Elischer } 217b40ce416SJulian Elischer 218b40ce416SJulian Elischer void 219b40ce416SJulian Elischer thread_init(struct thread *thread1, struct thread *thread2) 220b40ce416SJulian Elischer { 221b40ce416SJulian Elischer } 222b40ce416SJulian Elischer 223b40ce416SJulian Elischer void 224b40ce416SJulian Elischer ksegrp_init(struct ksegrp *ksegrp1, struct ksegrp *ksegrp2) 225b40ce416SJulian Elischer { 226b40ce416SJulian Elischer } 227b40ce416SJulian Elischer #endif 228b40ce416SJulian Elischer 22974b2192aSJohn Dyson int 230b40ce416SJulian Elischer fork1(td, flags, procp) 231b40ce416SJulian Elischer struct thread *td; /* parent proc */ 2320e3eb7eeSSujal Patel int flags; 2330384fff8SJason Evans struct proc **procp; /* child proc */ 234df8bae1dSRodney W. Grimes { 235df8abd0bSPeter Wemm struct proc *p2, *pptr; 236df8abd0bSPeter Wemm uid_t uid; 237df8bae1dSRodney W. Grimes struct proc *newproc; 2380384fff8SJason Evans int trypid; 239c6362551SAlfred Perlstein int ok; 24051068190SWolfram Schneider static int pidchecked = 0; 24193efcae8SPoul-Henning Kamp struct forklist *ep; 2425641ae5dSJohn Baldwin struct filedesc *fd; 243b40ce416SJulian Elischer struct proc *p1 = td->td_proc; 244079b7badSJulian Elischer struct thread *td2; 245079b7badSJulian Elischer struct kse *ke2; 246079b7badSJulian Elischer struct ksegrp *kg2; 2475856e12eSJohn Dyson 2480cddd8f0SMatthew Dillon GIANT_REQUIRED; 2490cddd8f0SMatthew Dillon 2500384fff8SJason Evans /* Can't copy and clear */ 2510e3eb7eeSSujal Patel if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 252dabee6feSPeter Wemm return (EINVAL); 253df8bae1dSRodney W. Grimes 254df8bae1dSRodney W. Grimes /* 2555856e12eSJohn Dyson * Here we don't create a new process, but we divorce 2565856e12eSJohn Dyson * certain parts of a process from itself. 2575856e12eSJohn Dyson */ 2585856e12eSJohn Dyson if ((flags & RFPROC) == 0) { 259079b7badSJulian Elischer vm_forkproc(td, NULL, NULL, flags); 2605856e12eSJohn Dyson 2615856e12eSJohn Dyson /* 2625856e12eSJohn Dyson * Close all file descriptors. 2635856e12eSJohn Dyson */ 2645856e12eSJohn Dyson if (flags & RFCFDG) { 2655856e12eSJohn Dyson struct filedesc *fdtmp; 266b40ce416SJulian Elischer fdtmp = fdinit(td); /* XXXKSE */ 2675641ae5dSJohn Baldwin PROC_LOCK(p1); 268b40ce416SJulian Elischer fdfree(td); /* XXXKSE */ 2695856e12eSJohn Dyson p1->p_fd = fdtmp; 2705641ae5dSJohn Baldwin PROC_UNLOCK(p1); 2715856e12eSJohn Dyson } 2725856e12eSJohn Dyson 2735856e12eSJohn Dyson /* 2745856e12eSJohn Dyson * Unshare file descriptors (from parent.) 2755856e12eSJohn Dyson */ 2765856e12eSJohn Dyson if (flags & RFFDG) { 277426da3bcSAlfred Perlstein FILEDESC_LOCK(p1->p_fd); 2785856e12eSJohn Dyson if (p1->p_fd->fd_refcnt > 1) { 2795856e12eSJohn Dyson struct filedesc *newfd; 280426da3bcSAlfred Perlstein 281b40ce416SJulian Elischer newfd = fdcopy(td); 282426da3bcSAlfred Perlstein FILEDESC_UNLOCK(p1->p_fd); 2835641ae5dSJohn Baldwin PROC_LOCK(p1); 284b40ce416SJulian Elischer fdfree(td); 2855856e12eSJohn Dyson p1->p_fd = newfd; 2865641ae5dSJohn Baldwin PROC_UNLOCK(p1); 287426da3bcSAlfred Perlstein } else 288426da3bcSAlfred Perlstein FILEDESC_UNLOCK(p1->p_fd); 2895856e12eSJohn Dyson } 2901943af61SPeter Wemm *procp = NULL; 2915856e12eSJohn Dyson return (0); 2925856e12eSJohn Dyson } 2935856e12eSJohn Dyson 2945856e12eSJohn Dyson /* 295df8bae1dSRodney W. Grimes * Although process entries are dynamically created, we still keep 296df8bae1dSRodney W. Grimes * a global limit on the maximum number we will create. Don't allow 297df8bae1dSRodney W. Grimes * a nonprivileged user to use the last process; don't let root 298df8bae1dSRodney W. Grimes * exceed the limit. The variable nprocs is the current number of 299df8bae1dSRodney W. Grimes * processes, maxproc is the limit. 300df8bae1dSRodney W. Grimes */ 301b1fc0ec1SRobert Watson uid = p1->p_ucred->cr_ruid; 302cc6712eaSMike Silbersack if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) { 303cc6712eaSMike Silbersack tsleep(&forksleep, PUSER, "fork", hz / 2); 304df8bae1dSRodney W. Grimes return (EAGAIN); 305df8bae1dSRodney W. Grimes } 306df8bae1dSRodney W. Grimes /* 307ef5dc8a9SJohn Dyson * Increment the nprocs resource before blocking can occur. There 308ef5dc8a9SJohn Dyson * are hard-limits as to the number of processes that can run. 309ef5dc8a9SJohn Dyson */ 310ef5dc8a9SJohn Dyson nprocs++; 311ef5dc8a9SJohn Dyson 312ef5dc8a9SJohn Dyson /* 313df8bae1dSRodney W. Grimes * Increment the count of procs running with this uid. Don't allow 314df8bae1dSRodney W. Grimes * a nonprivileged user to exceed their current limit. 315df8bae1dSRodney W. Grimes */ 316b1fc0ec1SRobert Watson ok = chgproccnt(p1->p_ucred->cr_ruidinfo, 1, 31742fd51ceSDon Lewis (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0); 31842fd51ceSDon Lewis if (!ok) { 319ef5dc8a9SJohn Dyson /* 320ef5dc8a9SJohn Dyson * Back out the process count 321ef5dc8a9SJohn Dyson */ 322ef5dc8a9SJohn Dyson nprocs--; 323cc6712eaSMike Silbersack tsleep(&forksleep, PUSER, "fork", hz / 2); 324df8bae1dSRodney W. Grimes return (EAGAIN); 325df8bae1dSRodney W. Grimes } 326df8bae1dSRodney W. Grimes 327df8bae1dSRodney W. Grimes /* Allocate new proc. */ 328c897b813SJeff Roberson newproc = uma_zalloc(proc_zone, M_WAITOK); 329df8bae1dSRodney W. Grimes 330df8bae1dSRodney W. Grimes /* 3312c1011f7SJohn Dyson * Setup linkage for kernel based threading 3322c1011f7SJohn Dyson */ 3332c1011f7SJohn Dyson if((flags & RFTHREAD) != 0) { 3342c1011f7SJohn Dyson newproc->p_peers = p1->p_peers; 3352c1011f7SJohn Dyson p1->p_peers = newproc; 3362c1011f7SJohn Dyson newproc->p_leader = p1->p_leader; 3372c1011f7SJohn Dyson } else { 338a7b124c3SJohn Baldwin newproc->p_peers = NULL; 3392c1011f7SJohn Dyson newproc->p_leader = newproc; 3402c1011f7SJohn Dyson } 3412c1011f7SJohn Dyson 342d4da2dbaSAlan Cox newproc->p_vmspace = NULL; 343d4da2dbaSAlan Cox 3442c1011f7SJohn Dyson /* 345df8bae1dSRodney W. Grimes * Find an unused process ID. We remember a range of unused IDs 3468f7e4eb5SDag-Erling Smørgrav * ready to use (from lastpid+1 through pidchecked-1). 3470384fff8SJason Evans * 3480384fff8SJason Evans * If RFHIGHPID is set (used during system boot), do not allocate 3490384fff8SJason Evans * low-numbered pids. 350df8bae1dSRodney W. Grimes */ 3511005a129SJohn Baldwin sx_xlock(&allproc_lock); 3528f7e4eb5SDag-Erling Smørgrav trypid = lastpid + 1; 3530384fff8SJason Evans if (flags & RFHIGHPID) { 3540384fff8SJason Evans if (trypid < 10) { 3550384fff8SJason Evans trypid = 10; 3560384fff8SJason Evans } 3570384fff8SJason Evans } else { 358bb6a234eSPeter Wemm if (randompid) 3590384fff8SJason Evans trypid += arc4random() % randompid; 3600384fff8SJason Evans } 361df8bae1dSRodney W. Grimes retry: 362df8bae1dSRodney W. Grimes /* 363df8bae1dSRodney W. Grimes * If the process ID prototype has wrapped around, 364df8bae1dSRodney W. Grimes * restart somewhat above 0, as the low-numbered procs 365df8bae1dSRodney W. Grimes * tend to include daemons that don't exit. 366df8bae1dSRodney W. Grimes */ 3670384fff8SJason Evans if (trypid >= PID_MAX) { 3680384fff8SJason Evans trypid = trypid % PID_MAX; 3690384fff8SJason Evans if (trypid < 100) 3700384fff8SJason Evans trypid += 100; 371df8bae1dSRodney W. Grimes pidchecked = 0; 372df8bae1dSRodney W. Grimes } 3730384fff8SJason Evans if (trypid >= pidchecked) { 374df8bae1dSRodney W. Grimes int doingzomb = 0; 375df8bae1dSRodney W. Grimes 376df8bae1dSRodney W. Grimes pidchecked = PID_MAX; 377df8bae1dSRodney W. Grimes /* 378df8bae1dSRodney W. Grimes * Scan the active and zombie procs to check whether this pid 379df8bae1dSRodney W. Grimes * is in use. Remember the lowest pid that's greater 3800384fff8SJason Evans * than trypid, so we can avoid checking for a while. 381df8bae1dSRodney W. Grimes */ 3822e3c8fcbSPoul-Henning Kamp p2 = LIST_FIRST(&allproc); 383df8bae1dSRodney W. Grimes again: 384a7b124c3SJohn Baldwin for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) { 385f591779bSSeigo Tanimura PROC_LOCK(p2); 3860384fff8SJason Evans while (p2->p_pid == trypid || 3870384fff8SJason Evans p2->p_pgrp->pg_id == trypid || 3880384fff8SJason Evans p2->p_session->s_sid == trypid) { 3890384fff8SJason Evans trypid++; 390f591779bSSeigo Tanimura if (trypid >= pidchecked) { 391f591779bSSeigo Tanimura PROC_UNLOCK(p2); 392df8bae1dSRodney W. Grimes goto retry; 393df8bae1dSRodney W. Grimes } 394f591779bSSeigo Tanimura } 3950384fff8SJason Evans if (p2->p_pid > trypid && pidchecked > p2->p_pid) 396df8bae1dSRodney W. Grimes pidchecked = p2->p_pid; 3970384fff8SJason Evans if (p2->p_pgrp->pg_id > trypid && 398df8bae1dSRodney W. Grimes pidchecked > p2->p_pgrp->pg_id) 399df8bae1dSRodney W. Grimes pidchecked = p2->p_pgrp->pg_id; 4000384fff8SJason Evans if (p2->p_session->s_sid > trypid && 401643a8daaSDon Lewis pidchecked > p2->p_session->s_sid) 402643a8daaSDon Lewis pidchecked = p2->p_session->s_sid; 403f591779bSSeigo Tanimura PROC_UNLOCK(p2); 404df8bae1dSRodney W. Grimes } 405df8bae1dSRodney W. Grimes if (!doingzomb) { 406df8bae1dSRodney W. Grimes doingzomb = 1; 4072e3c8fcbSPoul-Henning Kamp p2 = LIST_FIRST(&zombproc); 408df8bae1dSRodney W. Grimes goto again; 409df8bae1dSRodney W. Grimes } 410df8bae1dSRodney W. Grimes } 411df8bae1dSRodney W. Grimes 412df8bae1dSRodney W. Grimes /* 4138f7e4eb5SDag-Erling Smørgrav * RFHIGHPID does not mess with the lastpid counter during boot. 4140384fff8SJason Evans */ 4150384fff8SJason Evans if (flags & RFHIGHPID) 4160384fff8SJason Evans pidchecked = 0; 4170384fff8SJason Evans else 4188f7e4eb5SDag-Erling Smørgrav lastpid = trypid; 4190384fff8SJason Evans 420553629ebSJake Burkholder p2 = newproc; 421553629ebSJake Burkholder p2->p_stat = SIDL; /* protect against others */ 422553629ebSJake Burkholder p2->p_pid = trypid; 423553629ebSJake Burkholder LIST_INSERT_HEAD(&allproc, p2, p_list); 424553629ebSJake Burkholder LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 4251005a129SJohn Baldwin sx_xunlock(&allproc_lock); 426553629ebSJake Burkholder 4270384fff8SJason Evans /* 428df8bae1dSRodney W. Grimes * Make a proc table entry for the new process. 429df8bae1dSRodney W. Grimes * Start by zeroing the section of proc that is zero-initialized, 430df8bae1dSRodney W. Grimes * then copy the section that is copied directly from the parent. 431df8bae1dSRodney W. Grimes */ 432079b7badSJulian Elischer td2 = thread_get(p2); 433079b7badSJulian Elischer ke2 = &p2->p_kse; 434079b7badSJulian Elischer kg2 = &p2->p_ksegrp; 435079b7badSJulian Elischer 436079b7badSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 437079b7badSJulian Elischer 438df8bae1dSRodney W. Grimes bzero(&p2->p_startzero, 439079b7badSJulian Elischer (unsigned) RANGEOF(struct proc, p_startzero, p_endzero)); 440079b7badSJulian Elischer bzero(&ke2->ke_startzero, 441079b7badSJulian Elischer (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); 442079b7badSJulian Elischer bzero(&td2->td_startzero, 443079b7badSJulian Elischer (unsigned) RANGEOF(struct thread, td_startzero, td_endzero)); 444079b7badSJulian Elischer bzero(&kg2->kg_startzero, 445079b7badSJulian Elischer (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero)); 446079b7badSJulian Elischer 44757934cd3SJohn Baldwin PROC_LOCK(p1); 448df8bae1dSRodney W. Grimes bcopy(&p1->p_startcopy, &p2->p_startcopy, 449079b7badSJulian Elischer (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy)); 450079b7badSJulian Elischer bcopy(&td->td_kse->ke_startcopy, &ke2->ke_startcopy, 451079b7badSJulian Elischer (unsigned) RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 452079b7badSJulian Elischer bcopy(&td->td_startcopy, &td2->td_startcopy, 453079b7badSJulian Elischer (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 454079b7badSJulian Elischer bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy, 455079b7badSJulian Elischer (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 456079b7badSJulian Elischer #undef RANGEOF 45757934cd3SJohn Baldwin PROC_UNLOCK(p1); 458df8bae1dSRodney W. Grimes 459b40ce416SJulian Elischer /* 460b40ce416SJulian Elischer * XXXKSE Theoretically only the running thread would get copied 461b40ce416SJulian Elischer * Others in the kernel would be 'aborted' in the child. 462b40ce416SJulian Elischer * i.e return E*something* 463b40ce416SJulian Elischer */ 464079b7badSJulian Elischer proc_linkup(p2, kg2, ke2, td2); 465b40ce416SJulian Elischer 4664971f62aSJohn Baldwin mtx_init(&p2->p_mtx, "process lock", MTX_DEF); 46757934cd3SJohn Baldwin PROC_LOCK(p2); 468b40ce416SJulian Elischer /* note.. XXXKSE no pcb or u-area yet */ 4692244ea07SJohn Dyson 470df8bae1dSRodney W. Grimes /* 471df8bae1dSRodney W. Grimes * Duplicate sub-structures as needed. 472df8bae1dSRodney W. Grimes * Increase reference counts on shared objects. 473eb30c1c0SPeter Wemm * The p_stats and p_sigacts substructs are set in vm_forkproc. 474df8bae1dSRodney W. Grimes */ 475a7b124c3SJohn Baldwin p2->p_flag = 0; 4769ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 477a7b124c3SJohn Baldwin p2->p_sflag = PS_INMEM; 478a7b124c3SJohn Baldwin if (p1->p_sflag & PS_PROFIL) 479df8bae1dSRodney W. Grimes startprofclock(p2); 4809ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 48157934cd3SJohn Baldwin PROC_LOCK(p1); 482bd78ceceSJohn Baldwin p2->p_ucred = crhold(p1->p_ucred); 483079b7badSJulian Elischer td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */ 48477c40664SJulian Elischer #ifdef DIAGNOSTIC /* see the comment in ast() */ 48577c40664SJulian Elischer td2->td_ucred_cache = NULL; 48677c40664SJulian Elischer #endif 487df8bae1dSRodney W. Grimes 488b9df5231SPoul-Henning Kamp if (p2->p_args) 489b9df5231SPoul-Henning Kamp p2->p_args->ar_ref++; 490b9df5231SPoul-Henning Kamp 4916626c604SJulian Elischer if (flags & RFSIGSHARE) { 492dc9c271aSJulian Elischer p2->p_procsig = p1->p_procsig; 4936626c604SJulian Elischer p2->p_procsig->ps_refcnt++; 494b40ce416SJulian Elischer if (p1->p_sigacts == &p1->p_uarea->u_sigacts) { 495dc9c271aSJulian Elischer struct sigacts *newsigacts; 496dc9c271aSJulian Elischer 49757934cd3SJohn Baldwin PROC_UNLOCK(p1); 49857934cd3SJohn Baldwin PROC_UNLOCK(p2); 499dc9c271aSJulian Elischer /* Create the shared sigacts structure */ 500df8abd0bSPeter Wemm MALLOC(newsigacts, struct sigacts *, 501df8abd0bSPeter Wemm sizeof(struct sigacts), M_SUBPROC, M_WAITOK); 50257934cd3SJohn Baldwin PROC_LOCK(p2); 50357934cd3SJohn Baldwin PROC_LOCK(p1); 504df8abd0bSPeter Wemm /* 505df8abd0bSPeter Wemm * Set p_sigacts to the new shared structure. 506df8abd0bSPeter Wemm * Note that this is updating p1->p_sigacts at the 507df8abd0bSPeter Wemm * same time, since p_sigacts is just a pointer to 508df8abd0bSPeter Wemm * the shared p_procsig->ps_sigacts. 509dc9c271aSJulian Elischer */ 510dc9c271aSJulian Elischer p2->p_sigacts = newsigacts; 511b40ce416SJulian Elischer *p2->p_sigacts = p1->p_uarea->u_sigacts; 512dc9c271aSJulian Elischer } 5136626c604SJulian Elischer } else { 51457934cd3SJohn Baldwin PROC_UNLOCK(p1); 51557934cd3SJohn Baldwin PROC_UNLOCK(p2); 516dc9c271aSJulian Elischer MALLOC(p2->p_procsig, struct procsig *, sizeof(struct procsig), 517dc9c271aSJulian Elischer M_SUBPROC, M_WAITOK); 51857934cd3SJohn Baldwin PROC_LOCK(p2); 51957934cd3SJohn Baldwin PROC_LOCK(p1); 520df8abd0bSPeter Wemm bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig)); 521dc9c271aSJulian Elischer p2->p_procsig->ps_refcnt = 1; 522eb30c1c0SPeter Wemm p2->p_sigacts = NULL; /* finished in vm_forkproc() */ 5236626c604SJulian Elischer } 5244ac9ae70SJulian Elischer if (flags & RFLINUXTHPN) 5256626c604SJulian Elischer p2->p_sigparent = SIGUSR1; 5264ac9ae70SJulian Elischer else 5274ac9ae70SJulian Elischer p2->p_sigparent = SIGCHLD; 52888c5ea45SJulian Elischer 529df8bae1dSRodney W. Grimes /* bump references to the text vnode (for procfs) */ 530df8bae1dSRodney W. Grimes p2->p_textvp = p1->p_textvp; 5315641ae5dSJohn Baldwin PROC_UNLOCK(p1); 5325641ae5dSJohn Baldwin PROC_UNLOCK(p2); 533df8bae1dSRodney W. Grimes if (p2->p_textvp) 534df8bae1dSRodney W. Grimes VREF(p2->p_textvp); 535df8bae1dSRodney W. Grimes 5360e3eb7eeSSujal Patel if (flags & RFCFDG) 537b40ce416SJulian Elischer fd = fdinit(td); 538426da3bcSAlfred Perlstein else if (flags & RFFDG) { 539426da3bcSAlfred Perlstein FILEDESC_LOCK(p1->p_fd); 540b40ce416SJulian Elischer fd = fdcopy(td); 541426da3bcSAlfred Perlstein FILEDESC_UNLOCK(p1->p_fd); 542426da3bcSAlfred Perlstein } else 5435641ae5dSJohn Baldwin fd = fdshare(p1); 5445641ae5dSJohn Baldwin PROC_LOCK(p2); 5455641ae5dSJohn Baldwin p2->p_fd = fd; 546dabee6feSPeter Wemm 547df8bae1dSRodney W. Grimes /* 548df8bae1dSRodney W. Grimes * If p_limit is still copy-on-write, bump refcnt, 549df8bae1dSRodney W. Grimes * otherwise get a copy that won't be modified. 550df8bae1dSRodney W. Grimes * (If PL_SHAREMOD is clear, the structure is shared 551df8bae1dSRodney W. Grimes * copy-on-write.) 552df8bae1dSRodney W. Grimes */ 5535641ae5dSJohn Baldwin PROC_LOCK(p1); 554df8bae1dSRodney W. Grimes if (p1->p_limit->p_lflags & PL_SHAREMOD) 555df8bae1dSRodney W. Grimes p2->p_limit = limcopy(p1->p_limit); 556df8bae1dSRodney W. Grimes else { 557df8bae1dSRodney W. Grimes p2->p_limit = p1->p_limit; 558df8bae1dSRodney W. Grimes p2->p_limit->p_refcnt++; 559df8bae1dSRodney W. Grimes } 560df8bae1dSRodney W. Grimes 56170e534e7SDavid Greenman /* 56257934cd3SJohn Baldwin * Preserve some more flags in subprocess. PS_PROFIL has already 563be67169aSBruce Evans * been preserved. 56470e534e7SDavid Greenman */ 5653b26be6aSAkinori MUSHA p2->p_flag |= p1->p_flag & (P_SUGID | P_ALTSTACK); 566f591779bSSeigo Tanimura SESS_LOCK(p1->p_session); 567df8bae1dSRodney W. Grimes if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 568df8bae1dSRodney W. Grimes p2->p_flag |= P_CONTROLT; 569f591779bSSeigo Tanimura SESS_UNLOCK(p1->p_session); 5700e3eb7eeSSujal Patel if (flags & RFPPWAIT) 571df8bae1dSRodney W. Grimes p2->p_flag |= P_PPWAIT; 572be67169aSBruce Evans 573b75356e1SJeffrey Hsu LIST_INSERT_AFTER(p1, p2, p_pglist); 57457934cd3SJohn Baldwin PROC_UNLOCK(p1); 57557934cd3SJohn Baldwin PROC_UNLOCK(p2); 5760e3eb7eeSSujal Patel 5770e3eb7eeSSujal Patel /* 5780e3eb7eeSSujal Patel * Attach the new process to its parent. 5790e3eb7eeSSujal Patel * 5800e3eb7eeSSujal Patel * If RFNOWAIT is set, the newly created process becomes a child 5810e3eb7eeSSujal Patel * of init. This effectively disassociates the child from the 5820e3eb7eeSSujal Patel * parent. 5830e3eb7eeSSujal Patel */ 5840e3eb7eeSSujal Patel if (flags & RFNOWAIT) 5850e3eb7eeSSujal Patel pptr = initproc; 5860e3eb7eeSSujal Patel else 5870e3eb7eeSSujal Patel pptr = p1; 5881005a129SJohn Baldwin sx_xlock(&proctree_lock); 58957934cd3SJohn Baldwin PROC_LOCK(p2); 5900e3eb7eeSSujal Patel p2->p_pptr = pptr; 59157934cd3SJohn Baldwin PROC_UNLOCK(p2); 5920e3eb7eeSSujal Patel LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 5931005a129SJohn Baldwin sx_xunlock(&proctree_lock); 59457934cd3SJohn Baldwin PROC_LOCK(p2); 595b75356e1SJeffrey Hsu LIST_INIT(&p2->p_children); 596079b7badSJulian Elischer LIST_INIT(&td2->td_contested); /* XXXKSE only 1 thread? */ 597b75356e1SJeffrey Hsu 5984f559836SJake Burkholder callout_init(&p2->p_itcallout, 0); 599079b7badSJulian Elischer callout_init(&td2->td_slpcallout, 1); /* XXXKSE */ 6004f559836SJake Burkholder 60157934cd3SJohn Baldwin PROC_LOCK(p1); 602df8bae1dSRodney W. Grimes #ifdef KTRACE 603df8bae1dSRodney W. Grimes /* 60479deba82SMatthew Dillon * Copy traceflag and tracefile if enabled. If not inherited, 60579deba82SMatthew Dillon * these were zeroed above but we still could have a trace race 60679deba82SMatthew Dillon * so make sure p2's p_tracep is NULL. 607df8bae1dSRodney W. Grimes */ 60879deba82SMatthew Dillon if ((p1->p_traceflag & KTRFAC_INHERIT) && p2->p_tracep == NULL) { 609df8bae1dSRodney W. Grimes p2->p_traceflag = p1->p_traceflag; 6105641ae5dSJohn Baldwin if ((p2->p_tracep = p1->p_tracep) != NULL) { 6115641ae5dSJohn Baldwin PROC_UNLOCK(p1); 6125641ae5dSJohn Baldwin PROC_UNLOCK(p2); 613df8bae1dSRodney W. Grimes VREF(p2->p_tracep); 6145641ae5dSJohn Baldwin PROC_LOCK(p2); 6155641ae5dSJohn Baldwin PROC_LOCK(p1); 6165641ae5dSJohn Baldwin } 617df8bae1dSRodney W. Grimes } 618df8bae1dSRodney W. Grimes #endif 619df8bae1dSRodney W. Grimes 620df8bae1dSRodney W. Grimes /* 6210d2afceeSDavid Greenman * set priority of child to be that of parent 622b40ce416SJulian Elischer * XXXKSE hey! copying the estcpu seems dodgy.. should split it.. 6230d2afceeSDavid Greenman */ 6245641ae5dSJohn Baldwin mtx_lock_spin(&sched_lock); 625b40ce416SJulian Elischer p2->p_ksegrp.kg_estcpu = p1->p_ksegrp.kg_estcpu; 6265641ae5dSJohn Baldwin mtx_unlock_spin(&sched_lock); 6270d2afceeSDavid Greenman 6280d2afceeSDavid Greenman /* 629df8bae1dSRodney W. Grimes * This begins the section where we must prevent the parent 630df8bae1dSRodney W. Grimes * from being swapped. 631df8bae1dSRodney W. Grimes */ 63257934cd3SJohn Baldwin _PHOLD(p1); 63357934cd3SJohn Baldwin PROC_UNLOCK(p1); 63457934cd3SJohn Baldwin PROC_UNLOCK(p2); 6350d2afceeSDavid Greenman 636df8bae1dSRodney W. Grimes /* 637a2a1c95cSPeter Wemm * Finish creating the child process. It will return via a different 638a2a1c95cSPeter Wemm * execution path later. (ie: directly into user mode) 639dabee6feSPeter Wemm */ 640079b7badSJulian Elischer vm_forkproc(td, p2, td2, flags); 641df8bae1dSRodney W. Grimes 6425d22597fSHajimu UMEMOTO if (flags == (RFFDG | RFPROC)) { 6435d22597fSHajimu UMEMOTO cnt.v_forks++; 6445d22597fSHajimu UMEMOTO cnt.v_forkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; 6455d22597fSHajimu UMEMOTO } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { 6465d22597fSHajimu UMEMOTO cnt.v_vforks++; 6475d22597fSHajimu UMEMOTO cnt.v_vforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; 6485d22597fSHajimu UMEMOTO } else if (p1 == &proc0) { 6495d22597fSHajimu UMEMOTO cnt.v_kthreads++; 6505d22597fSHajimu UMEMOTO cnt.v_kthreadpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; 6515d22597fSHajimu UMEMOTO } else { 6525d22597fSHajimu UMEMOTO cnt.v_rforks++; 6535d22597fSHajimu UMEMOTO cnt.v_rforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize; 6545d22597fSHajimu UMEMOTO } 6555d22597fSHajimu UMEMOTO 656df8bae1dSRodney W. Grimes /* 657e9189611SPeter Wemm * Both processes are set up, now check if any loadable modules want 658e0d898b4SJulian Elischer * to adjust anything. 659fed06968SJulian Elischer * What if they have an error? XXX 660fed06968SJulian Elischer */ 66157934cd3SJohn Baldwin sx_slock(&fork_list_lock); 66293efcae8SPoul-Henning Kamp TAILQ_FOREACH(ep, &fork_list, next) { 663fed06968SJulian Elischer (*ep->function)(p1, p2, flags); 664fed06968SJulian Elischer } 66557934cd3SJohn Baldwin sx_sunlock(&fork_list_lock); 666fed06968SJulian Elischer 667fed06968SJulian Elischer /* 6680384fff8SJason Evans * If RFSTOPPED not requested, make child runnable and add to 6690384fff8SJason Evans * run queue. 670df8bae1dSRodney W. Grimes */ 671a2a1c95cSPeter Wemm microtime(&(p2->p_stats->p_start)); 672a2a1c95cSPeter Wemm p2->p_acflag = AFORK; 6730384fff8SJason Evans if ((flags & RFSTOPPED) == 0) { 6749ed346baSBosko Milekic mtx_lock_spin(&sched_lock); 675df8bae1dSRodney W. Grimes p2->p_stat = SRUN; 676079b7badSJulian Elischer setrunqueue(td2); 6779ed346baSBosko Milekic mtx_unlock_spin(&sched_lock); 6780384fff8SJason Evans } 679df8bae1dSRodney W. Grimes 680df8bae1dSRodney W. Grimes /* 681df8bae1dSRodney W. Grimes * Now can be swapped. 682df8bae1dSRodney W. Grimes */ 68357934cd3SJohn Baldwin PROC_LOCK(p1); 68457934cd3SJohn Baldwin _PRELE(p1); 685df8bae1dSRodney W. Grimes 686df8bae1dSRodney W. Grimes /* 687cb679c38SJonathan Lemon * tell any interested parties about the new process 688cb679c38SJonathan Lemon */ 689cb679c38SJonathan Lemon KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); 69057934cd3SJohn Baldwin PROC_UNLOCK(p1); 691cb679c38SJonathan Lemon 692cb679c38SJonathan Lemon /* 693df8bae1dSRodney W. Grimes * Preserve synchronization semantics of vfork. If waiting for 694df8bae1dSRodney W. Grimes * child to exec or exit, set P_PPWAIT on child, and sleep on our 695df8bae1dSRodney W. Grimes * proc (in case of exit). 696df8bae1dSRodney W. Grimes */ 69757934cd3SJohn Baldwin PROC_LOCK(p2); 698df8bae1dSRodney W. Grimes while (p2->p_flag & P_PPWAIT) 69957934cd3SJohn Baldwin msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0); 70057934cd3SJohn Baldwin PROC_UNLOCK(p2); 701df8bae1dSRodney W. Grimes 702df8bae1dSRodney W. Grimes /* 703df8abd0bSPeter Wemm * Return child proc pointer to parent. 704df8bae1dSRodney W. Grimes */ 705df8abd0bSPeter Wemm *procp = p2; 706df8bae1dSRodney W. Grimes return (0); 707df8bae1dSRodney W. Grimes } 708fed06968SJulian Elischer 709e0d898b4SJulian Elischer /* 710e0d898b4SJulian Elischer * The next two functionms are general routines to handle adding/deleting 711e0d898b4SJulian Elischer * items on the fork callout list. 712e0d898b4SJulian Elischer * 713e0d898b4SJulian Elischer * at_fork(): 714e0d898b4SJulian Elischer * Take the arguments given and put them onto the fork callout list, 715fed06968SJulian Elischer * However first make sure that it's not already there. 716e0d898b4SJulian Elischer * Returns 0 on success or a standard error number. 717fed06968SJulian Elischer */ 71893efcae8SPoul-Henning Kamp 719fed06968SJulian Elischer int 720eb776aeaSBruce Evans at_fork(function) 721eb776aeaSBruce Evans forklist_fn function; 722fed06968SJulian Elischer { 72393efcae8SPoul-Henning Kamp struct forklist *ep; 724e0d898b4SJulian Elischer 72593efcae8SPoul-Henning Kamp #ifdef INVARIANTS 726e0d898b4SJulian Elischer /* let the programmer know if he's been stupid */ 727e0d898b4SJulian Elischer if (rm_at_fork(function)) 72893efcae8SPoul-Henning Kamp printf("WARNING: fork callout entry (%p) already present\n", 72993efcae8SPoul-Henning Kamp function); 73093efcae8SPoul-Henning Kamp #endif 73193efcae8SPoul-Henning Kamp ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT); 732e0d898b4SJulian Elischer if (ep == NULL) 733e0d898b4SJulian Elischer return (ENOMEM); 734fed06968SJulian Elischer ep->function = function; 73557934cd3SJohn Baldwin sx_xlock(&fork_list_lock); 73693efcae8SPoul-Henning Kamp TAILQ_INSERT_TAIL(&fork_list, ep, next); 73757934cd3SJohn Baldwin sx_xunlock(&fork_list_lock); 738e0d898b4SJulian Elischer return (0); 739fed06968SJulian Elischer } 740e0d898b4SJulian Elischer 741fed06968SJulian Elischer /* 74293efcae8SPoul-Henning Kamp * Scan the exit callout list for the given item and remove it.. 74393efcae8SPoul-Henning Kamp * Returns the number of items removed (0 or 1) 744fed06968SJulian Elischer */ 74593efcae8SPoul-Henning Kamp 746fed06968SJulian Elischer int 747eb776aeaSBruce Evans rm_at_fork(function) 748eb776aeaSBruce Evans forklist_fn function; 749fed06968SJulian Elischer { 75093efcae8SPoul-Henning Kamp struct forklist *ep; 751fed06968SJulian Elischer 75257934cd3SJohn Baldwin sx_xlock(&fork_list_lock); 75393efcae8SPoul-Henning Kamp TAILQ_FOREACH(ep, &fork_list, next) { 754fed06968SJulian Elischer if (ep->function == function) { 75593efcae8SPoul-Henning Kamp TAILQ_REMOVE(&fork_list, ep, next); 75657934cd3SJohn Baldwin sx_xunlock(&fork_list_lock); 75793efcae8SPoul-Henning Kamp free(ep, M_ATFORK); 75893efcae8SPoul-Henning Kamp return(1); 759fed06968SJulian Elischer } 760fed06968SJulian Elischer } 76157934cd3SJohn Baldwin sx_xunlock(&fork_list_lock); 76293efcae8SPoul-Henning Kamp return (0); 763fed06968SJulian Elischer } 764a7b124c3SJohn Baldwin 765a7b124c3SJohn Baldwin /* 766a7b124c3SJohn Baldwin * Handle the return of a child process from fork1(). This function 767a7b124c3SJohn Baldwin * is called from the MD fork_trampoline() entry point. 768a7b124c3SJohn Baldwin */ 769a7b124c3SJohn Baldwin void 770a7b124c3SJohn Baldwin fork_exit(callout, arg, frame) 7718865286bSJohn Baldwin void (*callout)(void *, struct trapframe *); 772a7b124c3SJohn Baldwin void *arg; 7732a36ec35SJohn Baldwin struct trapframe *frame; 774a7b124c3SJohn Baldwin { 775b40ce416SJulian Elischer struct thread *td = curthread; 776b40ce416SJulian Elischer struct proc *p = td->td_proc; 77757934cd3SJohn Baldwin 778201b0ea8SJohn Baldwin td->td_kse->ke_oncpu = PCPU_GET(cpuid); 7795813dc03SJohn Baldwin /* 780181df8c9SMatthew Dillon * Setup the sched_lock state so that we can release it. 7815813dc03SJohn Baldwin */ 782b40ce416SJulian Elischer sched_lock.mtx_lock = (uintptr_t)td; 7835813dc03SJohn Baldwin sched_lock.mtx_recurse = 0; 7847e1f6dfeSJohn Baldwin td->td_critnest = 1; 7857e1f6dfeSJohn Baldwin td->td_savecrit = CRITICAL_FORK; 786201b0ea8SJohn Baldwin CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid, 787201b0ea8SJohn Baldwin p->p_comm); 7881cbb9c3bSPoul-Henning Kamp if (PCPU_GET(switchtime.sec) == 0) 7891cbb9c3bSPoul-Henning Kamp binuptime(PCPU_PTR(switchtime)); 790a7b124c3SJohn Baldwin PCPU_SET(switchticks, ticks); 791201b0ea8SJohn Baldwin mtx_unlock_spin(&sched_lock); 792a7b124c3SJohn Baldwin 793a7b124c3SJohn Baldwin /* 794a7b124c3SJohn Baldwin * cpu_set_fork_handler intercepts this function call to 795a7b124c3SJohn Baldwin * have this call a non-return function to stay in kernel mode. 796a7b124c3SJohn Baldwin * initproc has its own fork handler, but it does return. 797a7b124c3SJohn Baldwin */ 7985813dc03SJohn Baldwin KASSERT(callout != NULL, ("NULL callout in fork_exit")); 7998865286bSJohn Baldwin callout(arg, frame); 800a7b124c3SJohn Baldwin 801a7b124c3SJohn Baldwin /* 802a7b124c3SJohn Baldwin * Check if a kernel thread misbehaved and returned from its main 803a7b124c3SJohn Baldwin * function. 804a7b124c3SJohn Baldwin */ 80557934cd3SJohn Baldwin PROC_LOCK(p); 806a7b124c3SJohn Baldwin if (p->p_flag & P_KTHREAD) { 80757934cd3SJohn Baldwin PROC_UNLOCK(p); 8089ed346baSBosko Milekic mtx_lock(&Giant); 809a7b124c3SJohn Baldwin printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", 810a7b124c3SJohn Baldwin p->p_comm, p->p_pid); 811a7b124c3SJohn Baldwin kthread_exit(0); 812a7b124c3SJohn Baldwin } 81357934cd3SJohn Baldwin PROC_UNLOCK(p); 81477c40664SJulian Elischer #ifdef DIAGNOSTIC /* see the comment in ast() */ 81577c40664SJulian Elischer if (td->td_ucred_cache) 81677c40664SJulian Elischer panic("fork_exit:thread already has cached ucred"); 81777c40664SJulian Elischer td->td_ucred_cache = td->td_ucred; 8188e2e767bSJohn Baldwin td->td_ucred = NULL; 81977c40664SJulian Elischer #endif /* DIAGNOSTIC */ 820a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 821a7b124c3SJohn Baldwin } 822a7b124c3SJohn Baldwin 823a7b124c3SJohn Baldwin /* 824a7b124c3SJohn Baldwin * Simplified back end of syscall(), used when returning from fork() 825a7b124c3SJohn Baldwin * directly into user mode. Giant is not held on entry, and must not 826a7b124c3SJohn Baldwin * be held on return. This function is passed in to fork_exit() as the 827a7b124c3SJohn Baldwin * first parameter and is called when returning to a new userland process. 828a7b124c3SJohn Baldwin */ 829a7b124c3SJohn Baldwin void 830b40ce416SJulian Elischer fork_return(td, frame) 831b40ce416SJulian Elischer struct thread *td; 832a7b124c3SJohn Baldwin struct trapframe *frame; 833a7b124c3SJohn Baldwin { 834a7b124c3SJohn Baldwin 835b40ce416SJulian Elischer userret(td, frame, 0); 836a7b124c3SJohn Baldwin #ifdef KTRACE 837b40ce416SJulian Elischer if (KTRPOINT(td->td_proc, KTR_SYSRET)) { 838b40ce416SJulian Elischer ktrsysret(td->td_proc->p_tracep, SYS_fork, 0, 0); 839a7b124c3SJohn Baldwin } 840a7b124c3SJohn Baldwin #endif 841a7b124c3SJohn Baldwin mtx_assert(&Giant, MA_NOTOWNED); 842a7b124c3SJohn Baldwin } 843