xref: /freebsd/sys/kern/kern_fork.c (revision 6004362e66b76f04a5b994af40067c600da40d0a)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
19df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
20df8bae1dSRodney W. Grimes  *    without specific prior written permission.
21df8bae1dSRodney W. Grimes  *
22df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
33df8bae1dSRodney W. Grimes  *
34df8bae1dSRodney W. Grimes  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37677b542eSDavid E. O'Brien #include <sys/cdefs.h>
38677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
39677b542eSDavid E. O'Brien 
40db6a20e2SGarrett Wollman #include "opt_ktrace.h"
412555374cSRobert Watson #include "opt_mac.h"
42db6a20e2SGarrett Wollman 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45d2d3e875SBruce Evans #include <sys/sysproto.h>
4675b8b3b2SJohn Baldwin #include <sys/eventhandler.h>
47df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
48df8bae1dSRodney W. Grimes #include <sys/kernel.h>
4970fca427SJohn Baldwin #include <sys/kthread.h>
50c76e95c3SPeter Wemm #include <sys/sysctl.h>
5119284646SJohn Baldwin #include <sys/lock.h>
52df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5335e0e5b3SJohn Baldwin #include <sys/mutex.h>
54df8bae1dSRodney W. Grimes #include <sys/proc.h>
559ccba881SMatthew N. Dodd #include <sys/pioctl.h>
56df8bae1dSRodney W. Grimes #include <sys/resourcevar.h>
57b43179fbSJeff Roberson #include <sys/sched.h>
58a7b124c3SJohn Baldwin #include <sys/syscall.h>
5970fca427SJohn Baldwin #include <sys/vmmeter.h>
60df8bae1dSRodney W. Grimes #include <sys/vnode.h>
61df8bae1dSRodney W. Grimes #include <sys/acct.h>
622555374cSRobert Watson #include <sys/mac.h>
630384fff8SJason Evans #include <sys/ktr.h>
64df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
65b71fec07SBruce Evans #include <sys/unistd.h>
6657934cd3SJohn Baldwin #include <sys/sx.h>
676004362eSDavid Schultz #include <sys/signalvar.h>
68df8bae1dSRodney W. Grimes 
69d93f860cSPoul-Henning Kamp #include <vm/vm.h>
70dabee6feSPeter Wemm #include <vm/pmap.h>
71dabee6feSPeter Wemm #include <vm/vm_map.h>
72efeaf95aSDavid Greenman #include <vm/vm_extern.h>
73c897b813SJeff Roberson #include <vm/uma.h>
74d93f860cSPoul-Henning Kamp 
75182da820SMatthew Dillon #include <machine/critical.h>
7688c5ea45SJulian Elischer 
77d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
78ad7507e2SSteven Wallace struct fork_args {
79ad7507e2SSteven Wallace 	int     dummy;
80ad7507e2SSteven Wallace };
81d2d3e875SBruce Evans #endif
82ad7507e2SSteven Wallace 
8351da11a2SMark Murray static int forksleep; /* Place for fork1() to sleep on. */
84cc6712eaSMike Silbersack 
85116734c4SMatthew Dillon /*
86116734c4SMatthew Dillon  * MPSAFE
87116734c4SMatthew Dillon  */
88df8bae1dSRodney W. Grimes /* ARGSUSED */
8926f9a767SRodney W. Grimes int
90b40ce416SJulian Elischer fork(td, uap)
91b40ce416SJulian Elischer 	struct thread *td;
92df8bae1dSRodney W. Grimes 	struct fork_args *uap;
93df8bae1dSRodney W. Grimes {
94df8abd0bSPeter Wemm 	int error;
95df8abd0bSPeter Wemm 	struct proc *p2;
96be67169aSBruce Evans 
97316ec49aSScott Long 	error = fork1(td, RFFDG | RFPROC, 0, &p2);
98df8abd0bSPeter Wemm 	if (error == 0) {
99b40ce416SJulian Elischer 		td->td_retval[0] = p2->p_pid;
100b40ce416SJulian Elischer 		td->td_retval[1] = 0;
101df8abd0bSPeter Wemm 	}
10270fca427SJohn Baldwin 	return (error);
103df8bae1dSRodney W. Grimes }
104df8bae1dSRodney W. Grimes 
105116734c4SMatthew Dillon /*
106116734c4SMatthew Dillon  * MPSAFE
107116734c4SMatthew Dillon  */
108df8bae1dSRodney W. Grimes /* ARGSUSED */
10926f9a767SRodney W. Grimes int
110b40ce416SJulian Elischer vfork(td, uap)
111b40ce416SJulian Elischer 	struct thread *td;
112dabee6feSPeter Wemm 	struct vfork_args *uap;
113df8bae1dSRodney W. Grimes {
114df8abd0bSPeter Wemm 	int error;
115df8abd0bSPeter Wemm 	struct proc *p2;
116be67169aSBruce Evans 
117316ec49aSScott Long 	error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, 0, &p2);
118df8abd0bSPeter Wemm 	if (error == 0) {
119b40ce416SJulian Elischer 		td->td_retval[0] = p2->p_pid;
120b40ce416SJulian Elischer 		td->td_retval[1] = 0;
121df8abd0bSPeter Wemm 	}
12270fca427SJohn Baldwin 	return (error);
123df8bae1dSRodney W. Grimes }
124df8bae1dSRodney W. Grimes 
125116734c4SMatthew Dillon /*
126116734c4SMatthew Dillon  * MPSAFE
127116734c4SMatthew Dillon  */
128dabee6feSPeter Wemm int
129b40ce416SJulian Elischer rfork(td, uap)
130b40ce416SJulian Elischer 	struct thread *td;
131dabee6feSPeter Wemm 	struct rfork_args *uap;
132dabee6feSPeter Wemm {
133df8abd0bSPeter Wemm 	struct proc *p2;
134c8564ad4SBruce Evans 	int error;
135be67169aSBruce Evans 
136c8564ad4SBruce Evans 	/* Don't allow kernel-only flags. */
137885ccc61SJohn Baldwin 	if ((uap->flags & RFKERNELONLY) != 0)
138885ccc61SJohn Baldwin 		return (EINVAL);
139c8564ad4SBruce Evans 
140316ec49aSScott Long 	error = fork1(td, uap->flags, 0, &p2);
141df8abd0bSPeter Wemm 	if (error == 0) {
142b40ce416SJulian Elischer 		td->td_retval[0] = p2 ? p2->p_pid : 0;
143b40ce416SJulian Elischer 		td->td_retval[1] = 0;
144df8abd0bSPeter Wemm 	}
14570fca427SJohn Baldwin 	return (error);
146dabee6feSPeter Wemm }
147dabee6feSPeter Wemm 
148df8bae1dSRodney W. Grimes int	nprocs = 1;		/* process 0 */
1498f7e4eb5SDag-Erling Smørgrav int	lastpid = 0;
1508f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
151d941d475SRobert Watson     "Last used PID");
152df8bae1dSRodney W. Grimes 
153bb6a234eSPeter Wemm /*
1548f7e4eb5SDag-Erling Smørgrav  * Random component to lastpid generation.  We mix in a random factor to make
155bb6a234eSPeter Wemm  * it a little harder to predict.  We sanity check the modulus value to avoid
156bb6a234eSPeter Wemm  * doing it in critical paths.  Don't let it be too small or we pointlessly
157bb6a234eSPeter Wemm  * waste randomness entropy, and don't let it be impossibly large.  Using a
158bb6a234eSPeter Wemm  * modulus that is too big causes a LOT more process table scans and slows
159bb6a234eSPeter Wemm  * down fork processing as the pidchecked caching is defeated.
160bb6a234eSPeter Wemm  */
161ee3fd601SDan Moschuk static int randompid = 0;
162bb6a234eSPeter Wemm 
163bb6a234eSPeter Wemm static int
16482d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
165bb6a234eSPeter Wemm {
166bb6a234eSPeter Wemm 	int error, pid;
167bb6a234eSPeter Wemm 
16847934cefSDon Lewis 	error = sysctl_wire_old_buffer(req, sizeof(int));
16947934cefSDon Lewis 	if (error != 0)
17047934cefSDon Lewis 		return(error);
1713fc755c1SJohn Baldwin 	sx_xlock(&allproc_lock);
172bb6a234eSPeter Wemm 	pid = randompid;
173bb6a234eSPeter Wemm 	error = sysctl_handle_int(oidp, &pid, 0, req);
1743fc755c1SJohn Baldwin 	if (error == 0 && req->newptr != NULL) {
175bb6a234eSPeter Wemm 		if (pid < 0 || pid > PID_MAX - 100)	/* out of range */
176bb6a234eSPeter Wemm 			pid = PID_MAX - 100;
177bb6a234eSPeter Wemm 		else if (pid < 2)			/* NOP */
178bb6a234eSPeter Wemm 			pid = 0;
179bb6a234eSPeter Wemm 		else if (pid < 100)			/* Make it reasonable */
180bb6a234eSPeter Wemm 			pid = 100;
181bb6a234eSPeter Wemm 		randompid = pid;
1823fc755c1SJohn Baldwin 	}
1833fc755c1SJohn Baldwin 	sx_xunlock(&allproc_lock);
184bb6a234eSPeter Wemm 	return (error);
185bb6a234eSPeter Wemm }
186bb6a234eSPeter Wemm 
187bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
188bb6a234eSPeter Wemm     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
189ee3fd601SDan Moschuk 
19074b2192aSJohn Dyson int
191316ec49aSScott Long fork1(td, flags, pages, procp)
19270fca427SJohn Baldwin 	struct thread *td;
1930e3eb7eeSSujal Patel 	int flags;
194316ec49aSScott Long 	int pages;
19570fca427SJohn Baldwin 	struct proc **procp;
196df8bae1dSRodney W. Grimes {
19770fca427SJohn Baldwin 	struct proc *p1, *p2, *pptr;
198df8abd0bSPeter Wemm 	uid_t uid;
199df8bae1dSRodney W. Grimes 	struct proc *newproc;
20070fca427SJohn Baldwin 	int ok, trypid;
201b083ea51SMike Silbersack 	static int curfail, pidchecked = 0;
202b083ea51SMike Silbersack 	static struct timeval lastfail;
2035641ae5dSJohn Baldwin 	struct filedesc *fd;
204ad05d580STor Egge 	struct filedesc_to_leader *fdtol;
205079b7badSJulian Elischer 	struct thread *td2;
206079b7badSJulian Elischer 	struct ksegrp *kg2;
2073fc755c1SJohn Baldwin 	struct sigacts *newsigacts;
208c6544064SJohn Baldwin 	int error;
2095856e12eSJohn Dyson 
21070fca427SJohn Baldwin 	/* Can't copy and clear. */
2110e3eb7eeSSujal Patel 	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
212dabee6feSPeter Wemm 		return (EINVAL);
213df8bae1dSRodney W. Grimes 
21470fca427SJohn Baldwin 	p1 = td->td_proc;
21570fca427SJohn Baldwin 
216df8bae1dSRodney W. Grimes 	/*
2175856e12eSJohn Dyson 	 * Here we don't create a new process, but we divorce
2185856e12eSJohn Dyson 	 * certain parts of a process from itself.
2195856e12eSJohn Dyson 	 */
2205856e12eSJohn Dyson 	if ((flags & RFPROC) == 0) {
221079b7badSJulian Elischer 		vm_forkproc(td, NULL, NULL, flags);
2225856e12eSJohn Dyson 
2235856e12eSJohn Dyson 		/*
2245856e12eSJohn Dyson 		 * Close all file descriptors.
2255856e12eSJohn Dyson 		 */
2265856e12eSJohn Dyson 		if (flags & RFCFDG) {
2275856e12eSJohn Dyson 			struct filedesc *fdtmp;
228c7f1c11bSAlfred Perlstein 			fdtmp = fdinit(td->td_proc->p_fd);
229c7f1c11bSAlfred Perlstein 			fdfree(td);
2305856e12eSJohn Dyson 			p1->p_fd = fdtmp;
2315856e12eSJohn Dyson 		}
2325856e12eSJohn Dyson 
2335856e12eSJohn Dyson 		/*
234c8564ad4SBruce Evans 		 * Unshare file descriptors (from parent).
2355856e12eSJohn Dyson 		 */
2365856e12eSJohn Dyson 		if (flags & RFFDG) {
237124e4c3bSPoul-Henning Kamp 			FILEDESC_LOCK_FAST(p1->p_fd);
2385856e12eSJohn Dyson 			if (p1->p_fd->fd_refcnt > 1) {
2395856e12eSJohn Dyson 				struct filedesc *newfd;
240426da3bcSAlfred Perlstein 
241124e4c3bSPoul-Henning Kamp 				FILEDESC_UNLOCK_FAST(p1->p_fd);
242598b7ec8SPoul-Henning Kamp 				newfd = fdcopy(p1->p_fd);
243b40ce416SJulian Elischer 				fdfree(td);
2445856e12eSJohn Dyson 				p1->p_fd = newfd;
245426da3bcSAlfred Perlstein 			} else
246124e4c3bSPoul-Henning Kamp 				FILEDESC_UNLOCK_FAST(p1->p_fd);
2475856e12eSJohn Dyson 		}
2481943af61SPeter Wemm 		*procp = NULL;
2495856e12eSJohn Dyson 		return (0);
2505856e12eSJohn Dyson 	}
2515856e12eSJohn Dyson 
2522c10d16aSJeff Roberson 	/*
2532c10d16aSJeff Roberson 	 * Note 1:1 allows for forking with one thread coming out on the
2542c10d16aSJeff Roberson 	 * other side with the expectation that the process is about to
2552c10d16aSJeff Roberson 	 * exec.
2562c10d16aSJeff Roberson 	 */
257a3aa5592SJulian Elischer 	if (p1->p_flag & P_HADTHREADS) {
258e602ba25SJulian Elischer 		/*
259e602ba25SJulian Elischer 		 * Idle the other threads for a second.
260e602ba25SJulian Elischer 		 * Since the user space is copied, it must remain stable.
261e602ba25SJulian Elischer 		 * In addition, all threads (from the user perspective)
262e602ba25SJulian Elischer 		 * need to either be suspended or in the kernel,
263e602ba25SJulian Elischer 		 * where they will try restart in the parent and will
264e602ba25SJulian Elischer 		 * be aborted in the child.
265e602ba25SJulian Elischer 		 */
266e602ba25SJulian Elischer 		PROC_LOCK(p1);
2671279572aSDavid Xu 		if (thread_single(SINGLE_NO_EXIT)) {
268c8564ad4SBruce Evans 			/* Abort. Someone else is single threading before us. */
269e602ba25SJulian Elischer 			PROC_UNLOCK(p1);
270e602ba25SJulian Elischer 			return (ERESTART);
271e602ba25SJulian Elischer 		}
272e602ba25SJulian Elischer 		PROC_UNLOCK(p1);
273e602ba25SJulian Elischer 		/*
274e602ba25SJulian Elischer 		 * All other activity in this process
275e602ba25SJulian Elischer 		 * is now suspended at the user boundary,
276e602ba25SJulian Elischer 		 * (or other safe places if we think of any).
277e602ba25SJulian Elischer 		 */
278e602ba25SJulian Elischer 	}
279e602ba25SJulian Elischer 
2803fc755c1SJohn Baldwin 	/* Allocate new proc. */
281a163d034SWarner Losh 	newproc = uma_zalloc(proc_zone, M_WAITOK);
2822555374cSRobert Watson #ifdef MAC
2832555374cSRobert Watson 	mac_init_proc(newproc);
2842555374cSRobert Watson #endif
285ad3b9257SJohn-Mark Gurney 	knlist_init(&newproc->p_klist, &newproc->p_mtx);
2863fc755c1SJohn Baldwin 
2875ce2f678SJohn Baldwin 	/* We have to lock the process tree while we look for a pid. */
2885ce2f678SJohn Baldwin 	sx_slock(&proctree_lock);
2895ce2f678SJohn Baldwin 
2905856e12eSJohn Dyson 	/*
291df8bae1dSRodney W. Grimes 	 * Although process entries are dynamically created, we still keep
292df8bae1dSRodney W. Grimes 	 * a global limit on the maximum number we will create.  Don't allow
293c4441bc7SMike Silbersack 	 * a nonprivileged user to use the last ten processes; don't let root
294df8bae1dSRodney W. Grimes 	 * exceed the limit. The variable nprocs is the current number of
295df8bae1dSRodney W. Grimes 	 * processes, maxproc is the limit.
296df8bae1dSRodney W. Grimes 	 */
2973fc755c1SJohn Baldwin 	sx_xlock(&allproc_lock);
2983fc755c1SJohn Baldwin 	uid = td->td_ucred->cr_ruid;
29966d5c640SColin Percival 	if ((nprocs >= maxproc - 10 &&
30066d5c640SColin Percival 	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
30165bba83fSColin Percival 	    nprocs >= maxproc) {
302c6544064SJohn Baldwin 		error = EAGAIN;
303c6544064SJohn Baldwin 		goto fail;
304e602ba25SJulian Elischer 	}
305c6544064SJohn Baldwin 
306df8bae1dSRodney W. Grimes 	/*
3073fc755c1SJohn Baldwin 	 * Increment the count of procs running with this uid. Don't allow
3083fc755c1SJohn Baldwin 	 * a nonprivileged user to exceed their current limit.
3093fc755c1SJohn Baldwin 	 */
3103fc755c1SJohn Baldwin 	PROC_LOCK(p1);
3113fc755c1SJohn Baldwin 	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
31291d5354aSJohn Baldwin 		(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
3133fc755c1SJohn Baldwin 	PROC_UNLOCK(p1);
3143fc755c1SJohn Baldwin 	if (!ok) {
315c6544064SJohn Baldwin 		error = EAGAIN;
316c6544064SJohn Baldwin 		goto fail;
3173fc755c1SJohn Baldwin 	}
3183fc755c1SJohn Baldwin 
3193fc755c1SJohn Baldwin 	/*
320ef5dc8a9SJohn Dyson 	 * Increment the nprocs resource before blocking can occur.  There
321ef5dc8a9SJohn Dyson 	 * are hard-limits as to the number of processes that can run.
322ef5dc8a9SJohn Dyson 	 */
323ef5dc8a9SJohn Dyson 	nprocs++;
324ef5dc8a9SJohn Dyson 
325ef5dc8a9SJohn Dyson 	/*
326df8bae1dSRodney W. Grimes 	 * Find an unused process ID.  We remember a range of unused IDs
3278f7e4eb5SDag-Erling Smørgrav 	 * ready to use (from lastpid+1 through pidchecked-1).
3280384fff8SJason Evans 	 *
3290384fff8SJason Evans 	 * If RFHIGHPID is set (used during system boot), do not allocate
3300384fff8SJason Evans 	 * low-numbered pids.
331df8bae1dSRodney W. Grimes 	 */
3328f7e4eb5SDag-Erling Smørgrav 	trypid = lastpid + 1;
3330384fff8SJason Evans 	if (flags & RFHIGHPID) {
33470fca427SJohn Baldwin 		if (trypid < 10)
3350384fff8SJason Evans 			trypid = 10;
3360384fff8SJason Evans 	} else {
337bb6a234eSPeter Wemm 		if (randompid)
3380384fff8SJason Evans 			trypid += arc4random() % randompid;
3390384fff8SJason Evans 	}
340df8bae1dSRodney W. Grimes retry:
341df8bae1dSRodney W. Grimes 	/*
342df8bae1dSRodney W. Grimes 	 * If the process ID prototype has wrapped around,
343df8bae1dSRodney W. Grimes 	 * restart somewhat above 0, as the low-numbered procs
344df8bae1dSRodney W. Grimes 	 * tend to include daemons that don't exit.
345df8bae1dSRodney W. Grimes 	 */
3460384fff8SJason Evans 	if (trypid >= PID_MAX) {
3470384fff8SJason Evans 		trypid = trypid % PID_MAX;
3480384fff8SJason Evans 		if (trypid < 100)
3490384fff8SJason Evans 			trypid += 100;
350df8bae1dSRodney W. Grimes 		pidchecked = 0;
351df8bae1dSRodney W. Grimes 	}
3520384fff8SJason Evans 	if (trypid >= pidchecked) {
353df8bae1dSRodney W. Grimes 		int doingzomb = 0;
354df8bae1dSRodney W. Grimes 
355df8bae1dSRodney W. Grimes 		pidchecked = PID_MAX;
356df8bae1dSRodney W. Grimes 		/*
357df8bae1dSRodney W. Grimes 		 * Scan the active and zombie procs to check whether this pid
358df8bae1dSRodney W. Grimes 		 * is in use.  Remember the lowest pid that's greater
3590384fff8SJason Evans 		 * than trypid, so we can avoid checking for a while.
360df8bae1dSRodney W. Grimes 		 */
3612e3c8fcbSPoul-Henning Kamp 		p2 = LIST_FIRST(&allproc);
362df8bae1dSRodney W. Grimes again:
363a7b124c3SJohn Baldwin 		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
364f591779bSSeigo Tanimura 			PROC_LOCK(p2);
3650384fff8SJason Evans 			while (p2->p_pid == trypid ||
3665ce2f678SJohn Baldwin 			    (p2->p_pgrp != NULL &&
3675ce2f678SJohn Baldwin 			    (p2->p_pgrp->pg_id == trypid ||
3685ce2f678SJohn Baldwin 			    (p2->p_session != NULL &&
3695ce2f678SJohn Baldwin 			    p2->p_session->s_sid == trypid)))) {
3700384fff8SJason Evans 				trypid++;
371f591779bSSeigo Tanimura 				if (trypid >= pidchecked) {
372f591779bSSeigo Tanimura 					PROC_UNLOCK(p2);
373df8bae1dSRodney W. Grimes 					goto retry;
374df8bae1dSRodney W. Grimes 				}
375f591779bSSeigo Tanimura 			}
3760384fff8SJason Evans 			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
377df8bae1dSRodney W. Grimes 				pidchecked = p2->p_pid;
3785ce2f678SJohn Baldwin 			if (p2->p_pgrp != NULL) {
3790384fff8SJason Evans 				if (p2->p_pgrp->pg_id > trypid &&
380df8bae1dSRodney W. Grimes 				    pidchecked > p2->p_pgrp->pg_id)
381df8bae1dSRodney W. Grimes 					pidchecked = p2->p_pgrp->pg_id;
3825ce2f678SJohn Baldwin 				if (p2->p_session != NULL &&
3835ce2f678SJohn Baldwin 				    p2->p_session->s_sid > trypid &&
384643a8daaSDon Lewis 				    pidchecked > p2->p_session->s_sid)
385643a8daaSDon Lewis 					pidchecked = p2->p_session->s_sid;
3865ce2f678SJohn Baldwin 			}
387f591779bSSeigo Tanimura 			PROC_UNLOCK(p2);
388df8bae1dSRodney W. Grimes 		}
389df8bae1dSRodney W. Grimes 		if (!doingzomb) {
390df8bae1dSRodney W. Grimes 			doingzomb = 1;
3912e3c8fcbSPoul-Henning Kamp 			p2 = LIST_FIRST(&zombproc);
392df8bae1dSRodney W. Grimes 			goto again;
393df8bae1dSRodney W. Grimes 		}
394df8bae1dSRodney W. Grimes 	}
3955ce2f678SJohn Baldwin 	sx_sunlock(&proctree_lock);
396df8bae1dSRodney W. Grimes 
397df8bae1dSRodney W. Grimes 	/*
3988f7e4eb5SDag-Erling Smørgrav 	 * RFHIGHPID does not mess with the lastpid counter during boot.
3990384fff8SJason Evans 	 */
4000384fff8SJason Evans 	if (flags & RFHIGHPID)
4010384fff8SJason Evans 		pidchecked = 0;
4020384fff8SJason Evans 	else
4038f7e4eb5SDag-Erling Smørgrav 		lastpid = trypid;
4040384fff8SJason Evans 
405553629ebSJake Burkholder 	p2 = newproc;
406e602ba25SJulian Elischer 	p2->p_state = PRS_NEW;		/* protect against others */
407553629ebSJake Burkholder 	p2->p_pid = trypid;
408553629ebSJake Burkholder 	LIST_INSERT_HEAD(&allproc, p2, p_list);
409553629ebSJake Burkholder 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
4101005a129SJohn Baldwin 	sx_xunlock(&allproc_lock);
411553629ebSJake Burkholder 
4120384fff8SJason Evans 	/*
4133fc755c1SJohn Baldwin 	 * Malloc things while we don't hold any locks.
4143fc755c1SJohn Baldwin 	 */
41590af4afaSJohn Baldwin 	if (flags & RFSIGSHARE)
4163fc755c1SJohn Baldwin 		newsigacts = NULL;
41790af4afaSJohn Baldwin 	else
41890af4afaSJohn Baldwin 		newsigacts = sigacts_alloc();
4193fc755c1SJohn Baldwin 
4203fc755c1SJohn Baldwin 	/*
4213fc755c1SJohn Baldwin 	 * Copy filedesc.
4223fc755c1SJohn Baldwin 	 */
423ad05d580STor Egge 	if (flags & RFCFDG) {
424598b7ec8SPoul-Henning Kamp 		fd = fdinit(p1->p_fd);
425ad05d580STor Egge 		fdtol = NULL;
426ad05d580STor Egge 	} else if (flags & RFFDG) {
427598b7ec8SPoul-Henning Kamp 		fd = fdcopy(p1->p_fd);
428ad05d580STor Egge 		fdtol = NULL;
429ad05d580STor Egge 	} else {
430c7f1c11bSAlfred Perlstein 		fd = fdshare(p1->p_fd);
431ad05d580STor Egge 		if (p1->p_fdtol == NULL)
432ad05d580STor Egge 			p1->p_fdtol =
433ad05d580STor Egge 				filedesc_to_leader_alloc(NULL,
434ad05d580STor Egge 							 NULL,
435ad05d580STor Egge 							 p1->p_leader);
436ad05d580STor Egge 		if ((flags & RFTHREAD) != 0) {
437ad05d580STor Egge 			/*
438ad05d580STor Egge 			 * Shared file descriptor table and
439ad05d580STor Egge 			 * shared process leaders.
440ad05d580STor Egge 			 */
441ad05d580STor Egge 			fdtol = p1->p_fdtol;
442124e4c3bSPoul-Henning Kamp 			FILEDESC_LOCK_FAST(p1->p_fd);
443ad05d580STor Egge 			fdtol->fdl_refcount++;
444124e4c3bSPoul-Henning Kamp 			FILEDESC_UNLOCK_FAST(p1->p_fd);
445ad05d580STor Egge 		} else {
446ad05d580STor Egge 			/*
447ad05d580STor Egge 			 * Shared file descriptor table, and
448ad05d580STor Egge 			 * different process leaders
449ad05d580STor Egge 			 */
450ad05d580STor Egge 			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
451ad05d580STor Egge 							 p1->p_fd,
452ad05d580STor Egge 							 p2);
453ad05d580STor Egge 		}
454ad05d580STor Egge 	}
4553fc755c1SJohn Baldwin 	/*
456df8bae1dSRodney W. Grimes 	 * Make a proc table entry for the new process.
457df8bae1dSRodney W. Grimes 	 * Start by zeroing the section of proc that is zero-initialized,
458df8bae1dSRodney W. Grimes 	 * then copy the section that is copied directly from the parent.
459df8bae1dSRodney W. Grimes 	 */
4601faf202eSJulian Elischer 	td2 = FIRST_THREAD_IN_PROC(p2);
4611faf202eSJulian Elischer 	kg2 = FIRST_KSEGRP_IN_PROC(p2);
462079b7badSJulian Elischer 
463c8564ad4SBruce Evans 	/* Allocate and switch to an alternate kstack if specified. */
4645fadbfeaSAlan Cox 	if (pages != 0)
46589f4fca2SAlan Cox 		vm_thread_new_altkstack(td2, pages);
466316ec49aSScott Long 
4677d447c95SJohn Baldwin 	PROC_LOCK(p2);
4687d447c95SJohn Baldwin 	PROC_LOCK(p1);
4697d447c95SJohn Baldwin 
470df8bae1dSRodney W. Grimes 	bzero(&p2->p_startzero,
4716db36923SDavid Schultz 	    __rangeof(struct proc, p_startzero, p_endzero));
472079b7badSJulian Elischer 	bzero(&td2->td_startzero,
4736db36923SDavid Schultz 	    __rangeof(struct thread, td_startzero, td_endzero));
474079b7badSJulian Elischer 	bzero(&kg2->kg_startzero,
4756db36923SDavid Schultz 	    __rangeof(struct ksegrp, kg_startzero, kg_endzero));
476079b7badSJulian Elischer 
477df8bae1dSRodney W. Grimes 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
4786db36923SDavid Schultz 	    __rangeof(struct proc, p_startcopy, p_endcopy));
479079b7badSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
4806db36923SDavid Schultz 	    __rangeof(struct thread, td_startcopy, td_endcopy));
481079b7badSJulian Elischer 	bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy,
4826db36923SDavid Schultz 	    __rangeof(struct ksegrp, kg_startcopy, kg_endcopy));
483df8bae1dSRodney W. Grimes 
484a30ec4b9SDavid Xu 	td2->td_sigstk = td->td_sigstk;
485a30ec4b9SDavid Xu 
486df8bae1dSRodney W. Grimes 	/*
487df8bae1dSRodney W. Grimes 	 * Duplicate sub-structures as needed.
488df8bae1dSRodney W. Grimes 	 * Increase reference counts on shared objects.
489df8bae1dSRodney W. Grimes 	 */
490a7b124c3SJohn Baldwin 	p2->p_flag = 0;
4919752f794SJohn Baldwin 	if (p1->p_flag & P_PROFIL)
4929752f794SJohn Baldwin 		startprofclock(p2);
4939ed346baSBosko Milekic 	mtx_lock_spin(&sched_lock);
494a7b124c3SJohn Baldwin 	p2->p_sflag = PS_INMEM;
495b43179fbSJeff Roberson 	/*
496b43179fbSJeff Roberson 	 * Allow the scheduler to adjust the priority of the child and
497b43179fbSJeff Roberson 	 * parent while we hold the sched_lock.
498b43179fbSJeff Roberson 	 */
499ed062c8dSJulian Elischer 	sched_fork(td, td2);
500b43179fbSJeff Roberson 
5019ed346baSBosko Milekic 	mtx_unlock_spin(&sched_lock);
5023fc755c1SJohn Baldwin 	p2->p_ucred = crhold(td->td_ucred);
503079b7badSJulian Elischer 	td2->td_ucred = crhold(p2->p_ucred);	/* XXXKSE */
504df8bae1dSRodney W. Grimes 
5058899023fSAlfred Perlstein 	pargs_hold(p2->p_args);
506b9df5231SPoul-Henning Kamp 
5076626c604SJulian Elischer 	if (flags & RFSIGSHARE) {
50890af4afaSJohn Baldwin 		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
5096626c604SJulian Elischer 	} else {
51090af4afaSJohn Baldwin 		sigacts_copy(newsigacts, p1->p_sigacts);
51190af4afaSJohn Baldwin 		p2->p_sigacts = newsigacts;
5126626c604SJulian Elischer 	}
5134ac9ae70SJulian Elischer 	if (flags & RFLINUXTHPN)
5146626c604SJulian Elischer 	        p2->p_sigparent = SIGUSR1;
5154ac9ae70SJulian Elischer 	else
5164ac9ae70SJulian Elischer 	        p2->p_sigparent = SIGCHLD;
51788c5ea45SJulian Elischer 
518df8bae1dSRodney W. Grimes 	p2->p_textvp = p1->p_textvp;
5195641ae5dSJohn Baldwin 	p2->p_fd = fd;
520ad05d580STor Egge 	p2->p_fdtol = fdtol;
521dabee6feSPeter Wemm 
522df8bae1dSRodney W. Grimes 	/*
523c8564ad4SBruce Evans 	 * p_limit is copy-on-write.  Bump its refcount.
524df8bae1dSRodney W. Grimes 	 */
52591d5354aSJohn Baldwin 	p2->p_limit = lim_hold(p1->p_limit);
5268b059651SDavid Schultz 
5278b059651SDavid Schultz 	pstats_fork(p1->p_stats, p2->p_stats);
5288b059651SDavid Schultz 
529299bc736SDavid Schultz 	PROC_UNLOCK(p1);
530cda5aba4SDavid Schultz 	PROC_UNLOCK(p2);
531df8bae1dSRodney W. Grimes 
532a69d88afSPeter Wemm 	/* Bump references to the text vnode (for procfs) */
533a69d88afSPeter Wemm 	if (p2->p_textvp)
534a69d88afSPeter Wemm 		vref(p2->p_textvp);
535a69d88afSPeter Wemm 
536c6544064SJohn Baldwin 	/*
537c8564ad4SBruce Evans 	 * Set up linkage for kernel based threading.
538c6544064SJohn Baldwin 	 */
539c6544064SJohn Baldwin 	if ((flags & RFTHREAD) != 0) {
540c6544064SJohn Baldwin 		mtx_lock(&ppeers_lock);
541c6544064SJohn Baldwin 		p2->p_peers = p1->p_peers;
542c6544064SJohn Baldwin 		p1->p_peers = p2;
543c6544064SJohn Baldwin 		p2->p_leader = p1->p_leader;
544c6544064SJohn Baldwin 		mtx_unlock(&ppeers_lock);
545c6544064SJohn Baldwin 		PROC_LOCK(p1->p_leader);
546c6544064SJohn Baldwin 		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
547c6544064SJohn Baldwin 			PROC_UNLOCK(p1->p_leader);
548c6544064SJohn Baldwin 			/*
549c6544064SJohn Baldwin 			 * The task leader is exiting, so process p1 is
550c6544064SJohn Baldwin 			 * going to be killed shortly.  Since p1 obviously
551c6544064SJohn Baldwin 			 * isn't dead yet, we know that the leader is either
552c6544064SJohn Baldwin 			 * sending SIGKILL's to all the processes in this
553c6544064SJohn Baldwin 			 * task or is sleeping waiting for all the peers to
554c6544064SJohn Baldwin 			 * exit.  We let p1 complete the fork, but we need
555c6544064SJohn Baldwin 			 * to go ahead and kill the new process p2 since
556c6544064SJohn Baldwin 			 * the task leader may not get a chance to send
557c6544064SJohn Baldwin 			 * SIGKILL to it.  We leave it on the list so that
558c6544064SJohn Baldwin 			 * the task leader will wait for this new process
559c6544064SJohn Baldwin 			 * to commit suicide.
560c6544064SJohn Baldwin 			 */
561c6544064SJohn Baldwin 			PROC_LOCK(p2);
562c6544064SJohn Baldwin 			psignal(p2, SIGKILL);
563c6544064SJohn Baldwin 			PROC_UNLOCK(p2);
564293d2d22SRobert Watson 		} else
565293d2d22SRobert Watson 			PROC_UNLOCK(p1->p_leader);
566c6544064SJohn Baldwin 	} else {
567c6544064SJohn Baldwin 		p2->p_peers = NULL;
568c6544064SJohn Baldwin 		p2->p_leader = p2;
569c6544064SJohn Baldwin 	}
570c6544064SJohn Baldwin 
5713fc755c1SJohn Baldwin 	sx_xlock(&proctree_lock);
5723fc755c1SJohn Baldwin 	PGRP_LOCK(p1->p_pgrp);
5733fc755c1SJohn Baldwin 	PROC_LOCK(p2);
5743fc755c1SJohn Baldwin 	PROC_LOCK(p1);
5753fc755c1SJohn Baldwin 
57670e534e7SDavid Greenman 	/*
5779752f794SJohn Baldwin 	 * Preserve some more flags in subprocess.  P_PROFIL has already
578be67169aSBruce Evans 	 * been preserved.
57970e534e7SDavid Greenman 	 */
580a30ec4b9SDavid Xu 	p2->p_flag |= p1->p_flag & P_SUGID;
581a30ec4b9SDavid Xu 	td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
582f591779bSSeigo Tanimura 	SESS_LOCK(p1->p_session);
583df8bae1dSRodney W. Grimes 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
584df8bae1dSRodney W. Grimes 		p2->p_flag |= P_CONTROLT;
585f591779bSSeigo Tanimura 	SESS_UNLOCK(p1->p_session);
5860e3eb7eeSSujal Patel 	if (flags & RFPPWAIT)
587df8bae1dSRodney W. Grimes 		p2->p_flag |= P_PPWAIT;
588be67169aSBruce Evans 
5895cded904SOlivier Houchard 	p2->p_pgrp = p1->p_pgrp;
590b75356e1SJeffrey Hsu 	LIST_INSERT_AFTER(p1, p2, p_pglist);
5912a60b9b9SSeigo Tanimura 	PGRP_UNLOCK(p1->p_pgrp);
592b75356e1SJeffrey Hsu 	LIST_INIT(&p2->p_children);
593b75356e1SJeffrey Hsu 
594c06eb4e2SSam Leffler 	callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
5954f559836SJake Burkholder 
596df8bae1dSRodney W. Grimes #ifdef KTRACE
597df8bae1dSRodney W. Grimes 	/*
598af300f23SJohn Baldwin 	 * Copy traceflag and tracefile if enabled.
599df8bae1dSRodney W. Grimes 	 */
600af300f23SJohn Baldwin 	mtx_lock(&ktrace_mtx);
601a5881ea5SJohn Baldwin 	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
602af300f23SJohn Baldwin 	if (p1->p_traceflag & KTRFAC_INHERIT) {
603df8bae1dSRodney W. Grimes 		p2->p_traceflag = p1->p_traceflag;
604a5881ea5SJohn Baldwin 		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
605a5881ea5SJohn Baldwin 			VREF(p2->p_tracevp);
606a5881ea5SJohn Baldwin 			KASSERT(p1->p_tracecred != NULL,
607a5881ea5SJohn Baldwin 			    ("ktrace vnode with no cred"));
608a5881ea5SJohn Baldwin 			p2->p_tracecred = crhold(p1->p_tracecred);
609a5881ea5SJohn Baldwin 		}
610df8bae1dSRodney W. Grimes 	}
611af300f23SJohn Baldwin 	mtx_unlock(&ktrace_mtx);
612df8bae1dSRodney W. Grimes #endif
613df8bae1dSRodney W. Grimes 
614df8bae1dSRodney W. Grimes 	/*
615df95311aSMatthew N. Dodd 	 * If PF_FORK is set, the child process inherits the
616df95311aSMatthew N. Dodd 	 * procfs ioctl flags from its parent.
617df95311aSMatthew N. Dodd 	 */
618df95311aSMatthew N. Dodd 	if (p1->p_pfsflags & PF_FORK) {
619df95311aSMatthew N. Dodd 		p2->p_stops = p1->p_stops;
620df95311aSMatthew N. Dodd 		p2->p_pfsflags = p1->p_pfsflags;
621df95311aSMatthew N. Dodd 	}
622df95311aSMatthew N. Dodd 
623df95311aSMatthew N. Dodd 	/*
624df8bae1dSRodney W. Grimes 	 * This begins the section where we must prevent the parent
625cda5aba4SDavid Schultz 	 * from being swapped.
626df8bae1dSRodney W. Grimes 	 */
627cda5aba4SDavid Schultz 	_PHOLD(p1);
62857934cd3SJohn Baldwin 	PROC_UNLOCK(p1);
6290d2afceeSDavid Greenman 
630df8bae1dSRodney W. Grimes 	/*
6313fc755c1SJohn Baldwin 	 * Attach the new process to its parent.
6323fc755c1SJohn Baldwin 	 *
6333fc755c1SJohn Baldwin 	 * If RFNOWAIT is set, the newly created process becomes a child
6343fc755c1SJohn Baldwin 	 * of init.  This effectively disassociates the child from the
6353fc755c1SJohn Baldwin 	 * parent.
6363fc755c1SJohn Baldwin 	 */
6373fc755c1SJohn Baldwin 	if (flags & RFNOWAIT)
6383fc755c1SJohn Baldwin 		pptr = initproc;
6393fc755c1SJohn Baldwin 	else
6403fc755c1SJohn Baldwin 		pptr = p1;
6413fc755c1SJohn Baldwin 	p2->p_pptr = pptr;
6423fc755c1SJohn Baldwin 	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
6433fc755c1SJohn Baldwin 	sx_xunlock(&proctree_lock);
6443fc755c1SJohn Baldwin 
645bb0e8070SJohn Baldwin 	/* Inform accounting that we have forked. */
646bb0e8070SJohn Baldwin 	p2->p_acflag = AFORK;
647bb0e8070SJohn Baldwin 	PROC_UNLOCK(p2);
648bb0e8070SJohn Baldwin 
6493fc755c1SJohn Baldwin 	/*
650a2a1c95cSPeter Wemm 	 * Finish creating the child process.  It will return via a different
651a2a1c95cSPeter Wemm 	 * execution path later.  (ie: directly into user mode)
652dabee6feSPeter Wemm 	 */
653079b7badSJulian Elischer 	vm_forkproc(td, p2, td2, flags);
654df8bae1dSRodney W. Grimes 
6555d22597fSHajimu UMEMOTO 	if (flags == (RFFDG | RFPROC)) {
65694ddc707SAlan Cox 		atomic_add_int(&cnt.v_forks, 1);
65794ddc707SAlan Cox 		atomic_add_int(&cnt.v_forkpages, p2->p_vmspace->vm_dsize +
65894ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6595d22597fSHajimu UMEMOTO 	} else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
66094ddc707SAlan Cox 		atomic_add_int(&cnt.v_vforks, 1);
66194ddc707SAlan Cox 		atomic_add_int(&cnt.v_vforkpages, p2->p_vmspace->vm_dsize +
66294ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6635d22597fSHajimu UMEMOTO 	} else if (p1 == &proc0) {
66494ddc707SAlan Cox 		atomic_add_int(&cnt.v_kthreads, 1);
66594ddc707SAlan Cox 		atomic_add_int(&cnt.v_kthreadpages, p2->p_vmspace->vm_dsize +
66694ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6675d22597fSHajimu UMEMOTO 	} else {
66894ddc707SAlan Cox 		atomic_add_int(&cnt.v_rforks, 1);
66994ddc707SAlan Cox 		atomic_add_int(&cnt.v_rforkpages, p2->p_vmspace->vm_dsize +
67094ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6715d22597fSHajimu UMEMOTO 	}
6725d22597fSHajimu UMEMOTO 
673df8bae1dSRodney W. Grimes 	/*
674e9189611SPeter Wemm 	 * Both processes are set up, now check if any loadable modules want
675e0d898b4SJulian Elischer 	 * to adjust anything.
676fed06968SJulian Elischer 	 *   What if they have an error? XXX
677fed06968SJulian Elischer 	 */
67875b8b3b2SJohn Baldwin 	EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
679fed06968SJulian Elischer 
680fed06968SJulian Elischer 	/*
6814c3558aaSJohn Baldwin 	 * Set the child start time and mark the process as being complete.
6824c3558aaSJohn Baldwin 	 */
6834c3558aaSJohn Baldwin 	microuptime(&p2->p_stats->p_start);
6844c3558aaSJohn Baldwin 	mtx_lock_spin(&sched_lock);
6854c3558aaSJohn Baldwin 	p2->p_state = PRS_NORMAL;
6864c3558aaSJohn Baldwin 
6874c3558aaSJohn Baldwin 	/*
6880384fff8SJason Evans 	 * If RFSTOPPED not requested, make child runnable and add to
6890384fff8SJason Evans 	 * run queue.
690df8bae1dSRodney W. Grimes 	 */
6910384fff8SJason Evans 	if ((flags & RFSTOPPED) == 0) {
69271fad9fdSJulian Elischer 		TD_SET_CAN_RUN(td2);
6932630e4c9SJulian Elischer 		setrunqueue(td2, SRQ_BORING);
6940384fff8SJason Evans 	}
6954c3558aaSJohn Baldwin 	mtx_unlock_spin(&sched_lock);
696df8bae1dSRodney W. Grimes 
697df8bae1dSRodney W. Grimes 	/*
698df8bae1dSRodney W. Grimes 	 * Now can be swapped.
699df8bae1dSRodney W. Grimes 	 */
70057934cd3SJohn Baldwin 	PROC_LOCK(p1);
70157934cd3SJohn Baldwin 	_PRELE(p1);
702df8bae1dSRodney W. Grimes 
703df8bae1dSRodney W. Grimes 	/*
70470fca427SJohn Baldwin 	 * Tell any interested parties about the new process.
705cb679c38SJonathan Lemon 	 */
706ad3b9257SJohn-Mark Gurney 	KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid);
70770fca427SJohn Baldwin 
70857934cd3SJohn Baldwin 	PROC_UNLOCK(p1);
709cb679c38SJonathan Lemon 
710cb679c38SJonathan Lemon 	/*
711df8bae1dSRodney W. Grimes 	 * Preserve synchronization semantics of vfork.  If waiting for
712df8bae1dSRodney W. Grimes 	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
713df8bae1dSRodney W. Grimes 	 * proc (in case of exit).
714df8bae1dSRodney W. Grimes 	 */
71557934cd3SJohn Baldwin 	PROC_LOCK(p2);
716df8bae1dSRodney W. Grimes 	while (p2->p_flag & P_PPWAIT)
71757934cd3SJohn Baldwin 		msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0);
71857934cd3SJohn Baldwin 	PROC_UNLOCK(p2);
719df8bae1dSRodney W. Grimes 
720df8bae1dSRodney W. Grimes 	/*
721c8564ad4SBruce Evans 	 * If other threads are waiting, let them continue now.
72249539972SJulian Elischer 	 */
723a3aa5592SJulian Elischer 	if (p1->p_flag & P_HADTHREADS) {
72449539972SJulian Elischer 		PROC_LOCK(p1);
72549539972SJulian Elischer 		thread_single_end();
72649539972SJulian Elischer 		PROC_UNLOCK(p1);
72749539972SJulian Elischer 	}
72849539972SJulian Elischer 
72949539972SJulian Elischer 	/*
730df8abd0bSPeter Wemm 	 * Return child proc pointer to parent.
731df8bae1dSRodney W. Grimes 	 */
732df8abd0bSPeter Wemm 	*procp = p2;
733df8bae1dSRodney W. Grimes 	return (0);
734c6544064SJohn Baldwin fail:
7355ce2f678SJohn Baldwin 	sx_sunlock(&proctree_lock);
736b083ea51SMike Silbersack 	if (ppsratecheck(&lastfail, &curfail, 1))
737b083ea51SMike Silbersack 		printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
738b083ea51SMike Silbersack 			uid);
739c6544064SJohn Baldwin 	sx_xunlock(&allproc_lock);
7406bea667fSRobert Watson #ifdef MAC
7416bea667fSRobert Watson 	mac_destroy_proc(newproc);
7426bea667fSRobert Watson #endif
743c6544064SJohn Baldwin 	uma_zfree(proc_zone, newproc);
744a3aa5592SJulian Elischer 	if (p1->p_flag & P_HADTHREADS) {
745c6544064SJohn Baldwin 		PROC_LOCK(p1);
746c6544064SJohn Baldwin 		thread_single_end();
747c6544064SJohn Baldwin 		PROC_UNLOCK(p1);
748c6544064SJohn Baldwin 	}
749c6544064SJohn Baldwin 	tsleep(&forksleep, PUSER, "fork", hz / 2);
750c6544064SJohn Baldwin 	return (error);
751df8bae1dSRodney W. Grimes }
752fed06968SJulian Elischer 
753e0d898b4SJulian Elischer /*
754a7b124c3SJohn Baldwin  * Handle the return of a child process from fork1().  This function
755a7b124c3SJohn Baldwin  * is called from the MD fork_trampoline() entry point.
756a7b124c3SJohn Baldwin  */
757a7b124c3SJohn Baldwin void
758a7b124c3SJohn Baldwin fork_exit(callout, arg, frame)
7598865286bSJohn Baldwin 	void (*callout)(void *, struct trapframe *);
760a7b124c3SJohn Baldwin 	void *arg;
7612a36ec35SJohn Baldwin 	struct trapframe *frame;
762a7b124c3SJohn Baldwin {
763696058c3SJulian Elischer 	struct proc *p;
76470fca427SJohn Baldwin 	struct thread *td;
76570fca427SJohn Baldwin 
76670fca427SJohn Baldwin 	/*
7670047b9a9SBosko Milekic 	 * Finish setting up thread glue so that it begins execution in a
7680047b9a9SBosko Milekic 	 * non-nested critical section with sched_lock held but not recursed.
7690047b9a9SBosko Milekic 	 */
7700047b9a9SBosko Milekic 	td = curthread;
7710047b9a9SBosko Milekic 	p = td->td_proc;
7720047b9a9SBosko Milekic 	td->td_oncpu = PCPU_GET(cpuid);
7730047b9a9SBosko Milekic 	KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
7740047b9a9SBosko Milekic 
7750047b9a9SBosko Milekic 	sched_lock.mtx_lock = (uintptr_t)td;
7760047b9a9SBosko Milekic 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
7770047b9a9SBosko Milekic 	cpu_critical_fork_exit();
778732d9528SJulian Elischer 	CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)",
779ed062c8dSJulian Elischer 		td, td->td_sched, p->p_pid, p->p_comm);
7800047b9a9SBosko Milekic 
7810047b9a9SBosko Milekic 	/*
78270fca427SJohn Baldwin 	 * Processes normally resume in mi_switch() after being
78370fca427SJohn Baldwin 	 * cpu_switch()'ed to, but when children start up they arrive here
78470fca427SJohn Baldwin 	 * instead, so we must do much the same things as mi_switch() would.
78570fca427SJohn Baldwin 	 */
78657934cd3SJohn Baldwin 
787696058c3SJulian Elischer 	if ((td = PCPU_GET(deadthread))) {
788696058c3SJulian Elischer 		PCPU_SET(deadthread, NULL);
789696058c3SJulian Elischer 		thread_stash(td);
790696058c3SJulian Elischer 	}
791696058c3SJulian Elischer 	td = curthread;
792201b0ea8SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
793a7b124c3SJohn Baldwin 
794a7b124c3SJohn Baldwin 	/*
795a7b124c3SJohn Baldwin 	 * cpu_set_fork_handler intercepts this function call to
796a7b124c3SJohn Baldwin 	 * have this call a non-return function to stay in kernel mode.
797a7b124c3SJohn Baldwin 	 * initproc has its own fork handler, but it does return.
798a7b124c3SJohn Baldwin 	 */
7995813dc03SJohn Baldwin 	KASSERT(callout != NULL, ("NULL callout in fork_exit"));
8008865286bSJohn Baldwin 	callout(arg, frame);
801a7b124c3SJohn Baldwin 
802a7b124c3SJohn Baldwin 	/*
803a7b124c3SJohn Baldwin 	 * Check if a kernel thread misbehaved and returned from its main
804a7b124c3SJohn Baldwin 	 * function.
805a7b124c3SJohn Baldwin 	 */
80657934cd3SJohn Baldwin 	PROC_LOCK(p);
807a7b124c3SJohn Baldwin 	if (p->p_flag & P_KTHREAD) {
80857934cd3SJohn Baldwin 		PROC_UNLOCK(p);
809a7b124c3SJohn Baldwin 		printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
810a7b124c3SJohn Baldwin 		    p->p_comm, p->p_pid);
811a7b124c3SJohn Baldwin 		kthread_exit(0);
812a7b124c3SJohn Baldwin 	}
81357934cd3SJohn Baldwin 	PROC_UNLOCK(p);
814a7b124c3SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
815a7b124c3SJohn Baldwin }
816a7b124c3SJohn Baldwin 
817a7b124c3SJohn Baldwin /*
818a7b124c3SJohn Baldwin  * Simplified back end of syscall(), used when returning from fork()
819a7b124c3SJohn Baldwin  * directly into user mode.  Giant is not held on entry, and must not
820a7b124c3SJohn Baldwin  * be held on return.  This function is passed in to fork_exit() as the
821a7b124c3SJohn Baldwin  * first parameter and is called when returning to a new userland process.
822a7b124c3SJohn Baldwin  */
823a7b124c3SJohn Baldwin void
824b40ce416SJulian Elischer fork_return(td, frame)
825b40ce416SJulian Elischer 	struct thread *td;
826a7b124c3SJohn Baldwin 	struct trapframe *frame;
827a7b124c3SJohn Baldwin {
828a7b124c3SJohn Baldwin 
829b40ce416SJulian Elischer 	userret(td, frame, 0);
830a7b124c3SJohn Baldwin #ifdef KTRACE
831af300f23SJohn Baldwin 	if (KTRPOINT(td, KTR_SYSRET))
832af300f23SJohn Baldwin 		ktrsysret(SYS_fork, 0, 0);
833a7b124c3SJohn Baldwin #endif
834a7b124c3SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
835a7b124c3SJohn Baldwin }
836