xref: /freebsd/sys/kern/kern_fork.c (revision fcf7f27a3669d0d6a943b7e890f11ae6f5f00c2c)
19454b2d8SWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
19df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
20df8bae1dSRodney W. Grimes  *    without specific prior written permission.
21df8bae1dSRodney W. Grimes  *
22df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
33df8bae1dSRodney W. Grimes  *
34df8bae1dSRodney W. Grimes  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37677b542eSDavid E. O'Brien #include <sys/cdefs.h>
38677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
39677b542eSDavid E. O'Brien 
40db6a20e2SGarrett Wollman #include "opt_ktrace.h"
412555374cSRobert Watson #include "opt_mac.h"
42db6a20e2SGarrett Wollman 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45d2d3e875SBruce Evans #include <sys/sysproto.h>
4675b8b3b2SJohn Baldwin #include <sys/eventhandler.h>
47df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
48df8bae1dSRodney W. Grimes #include <sys/kernel.h>
4970fca427SJohn Baldwin #include <sys/kthread.h>
50c76e95c3SPeter Wemm #include <sys/sysctl.h>
5119284646SJohn Baldwin #include <sys/lock.h>
52df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5335e0e5b3SJohn Baldwin #include <sys/mutex.h>
54df8bae1dSRodney W. Grimes #include <sys/proc.h>
559ccba881SMatthew N. Dodd #include <sys/pioctl.h>
56df8bae1dSRodney W. Grimes #include <sys/resourcevar.h>
57b43179fbSJeff Roberson #include <sys/sched.h>
58a7b124c3SJohn Baldwin #include <sys/syscall.h>
5970fca427SJohn Baldwin #include <sys/vmmeter.h>
60df8bae1dSRodney W. Grimes #include <sys/vnode.h>
61df8bae1dSRodney W. Grimes #include <sys/acct.h>
622555374cSRobert Watson #include <sys/mac.h>
630384fff8SJason Evans #include <sys/ktr.h>
64df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
65b71fec07SBruce Evans #include <sys/unistd.h>
6657934cd3SJohn Baldwin #include <sys/sx.h>
676004362eSDavid Schultz #include <sys/signalvar.h>
68df8bae1dSRodney W. Grimes 
69fcf7f27aSRobert Watson #include <security/audit/audit.h>
70fcf7f27aSRobert Watson 
71d93f860cSPoul-Henning Kamp #include <vm/vm.h>
72dabee6feSPeter Wemm #include <vm/pmap.h>
73dabee6feSPeter Wemm #include <vm/vm_map.h>
74efeaf95aSDavid Greenman #include <vm/vm_extern.h>
75c897b813SJeff Roberson #include <vm/uma.h>
76d93f860cSPoul-Henning Kamp 
7788c5ea45SJulian Elischer 
78d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
79ad7507e2SSteven Wallace struct fork_args {
80ad7507e2SSteven Wallace 	int     dummy;
81ad7507e2SSteven Wallace };
82d2d3e875SBruce Evans #endif
83ad7507e2SSteven Wallace 
8451da11a2SMark Murray static int forksleep; /* Place for fork1() to sleep on. */
85cc6712eaSMike Silbersack 
86116734c4SMatthew Dillon /*
87116734c4SMatthew Dillon  * MPSAFE
88116734c4SMatthew Dillon  */
89df8bae1dSRodney W. Grimes /* ARGSUSED */
9026f9a767SRodney W. Grimes int
91b40ce416SJulian Elischer fork(td, uap)
92b40ce416SJulian Elischer 	struct thread *td;
93df8bae1dSRodney W. Grimes 	struct fork_args *uap;
94df8bae1dSRodney W. Grimes {
95df8abd0bSPeter Wemm 	int error;
96df8abd0bSPeter Wemm 	struct proc *p2;
97be67169aSBruce Evans 
98316ec49aSScott Long 	error = fork1(td, RFFDG | RFPROC, 0, &p2);
99df8abd0bSPeter Wemm 	if (error == 0) {
100b40ce416SJulian Elischer 		td->td_retval[0] = p2->p_pid;
101b40ce416SJulian Elischer 		td->td_retval[1] = 0;
102df8abd0bSPeter Wemm 	}
10370fca427SJohn Baldwin 	return (error);
104df8bae1dSRodney W. Grimes }
105df8bae1dSRodney W. Grimes 
106116734c4SMatthew Dillon /*
107116734c4SMatthew Dillon  * MPSAFE
108116734c4SMatthew Dillon  */
109df8bae1dSRodney W. Grimes /* ARGSUSED */
11026f9a767SRodney W. Grimes int
111b40ce416SJulian Elischer vfork(td, uap)
112b40ce416SJulian Elischer 	struct thread *td;
113dabee6feSPeter Wemm 	struct vfork_args *uap;
114df8bae1dSRodney W. Grimes {
115df8abd0bSPeter Wemm 	int error;
116df8abd0bSPeter Wemm 	struct proc *p2;
117be67169aSBruce Evans 
118316ec49aSScott Long 	error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, 0, &p2);
119df8abd0bSPeter Wemm 	if (error == 0) {
120b40ce416SJulian Elischer 		td->td_retval[0] = p2->p_pid;
121b40ce416SJulian Elischer 		td->td_retval[1] = 0;
122df8abd0bSPeter Wemm 	}
12370fca427SJohn Baldwin 	return (error);
124df8bae1dSRodney W. Grimes }
125df8bae1dSRodney W. Grimes 
126116734c4SMatthew Dillon /*
127116734c4SMatthew Dillon  * MPSAFE
128116734c4SMatthew Dillon  */
129dabee6feSPeter Wemm int
130b40ce416SJulian Elischer rfork(td, uap)
131b40ce416SJulian Elischer 	struct thread *td;
132dabee6feSPeter Wemm 	struct rfork_args *uap;
133dabee6feSPeter Wemm {
134df8abd0bSPeter Wemm 	struct proc *p2;
135c8564ad4SBruce Evans 	int error;
136be67169aSBruce Evans 
137c8564ad4SBruce Evans 	/* Don't allow kernel-only flags. */
138885ccc61SJohn Baldwin 	if ((uap->flags & RFKERNELONLY) != 0)
139885ccc61SJohn Baldwin 		return (EINVAL);
140c8564ad4SBruce Evans 
141316ec49aSScott Long 	error = fork1(td, uap->flags, 0, &p2);
142df8abd0bSPeter Wemm 	if (error == 0) {
143b40ce416SJulian Elischer 		td->td_retval[0] = p2 ? p2->p_pid : 0;
144b40ce416SJulian Elischer 		td->td_retval[1] = 0;
145df8abd0bSPeter Wemm 	}
14670fca427SJohn Baldwin 	return (error);
147dabee6feSPeter Wemm }
148dabee6feSPeter Wemm 
149df8bae1dSRodney W. Grimes int	nprocs = 1;		/* process 0 */
1508f7e4eb5SDag-Erling Smørgrav int	lastpid = 0;
1518f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
152d941d475SRobert Watson     "Last used PID");
153df8bae1dSRodney W. Grimes 
154bb6a234eSPeter Wemm /*
1558f7e4eb5SDag-Erling Smørgrav  * Random component to lastpid generation.  We mix in a random factor to make
156bb6a234eSPeter Wemm  * it a little harder to predict.  We sanity check the modulus value to avoid
157bb6a234eSPeter Wemm  * doing it in critical paths.  Don't let it be too small or we pointlessly
158bb6a234eSPeter Wemm  * waste randomness entropy, and don't let it be impossibly large.  Using a
159bb6a234eSPeter Wemm  * modulus that is too big causes a LOT more process table scans and slows
160bb6a234eSPeter Wemm  * down fork processing as the pidchecked caching is defeated.
161bb6a234eSPeter Wemm  */
162ee3fd601SDan Moschuk static int randompid = 0;
163bb6a234eSPeter Wemm 
164bb6a234eSPeter Wemm static int
16582d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
166bb6a234eSPeter Wemm {
167bb6a234eSPeter Wemm 	int error, pid;
168bb6a234eSPeter Wemm 
16947934cefSDon Lewis 	error = sysctl_wire_old_buffer(req, sizeof(int));
17047934cefSDon Lewis 	if (error != 0)
17147934cefSDon Lewis 		return(error);
1723fc755c1SJohn Baldwin 	sx_xlock(&allproc_lock);
173bb6a234eSPeter Wemm 	pid = randompid;
174bb6a234eSPeter Wemm 	error = sysctl_handle_int(oidp, &pid, 0, req);
1753fc755c1SJohn Baldwin 	if (error == 0 && req->newptr != NULL) {
176bb6a234eSPeter Wemm 		if (pid < 0 || pid > PID_MAX - 100)	/* out of range */
177bb6a234eSPeter Wemm 			pid = PID_MAX - 100;
178bb6a234eSPeter Wemm 		else if (pid < 2)			/* NOP */
179bb6a234eSPeter Wemm 			pid = 0;
180bb6a234eSPeter Wemm 		else if (pid < 100)			/* Make it reasonable */
181bb6a234eSPeter Wemm 			pid = 100;
182bb6a234eSPeter Wemm 		randompid = pid;
1833fc755c1SJohn Baldwin 	}
1843fc755c1SJohn Baldwin 	sx_xunlock(&allproc_lock);
185bb6a234eSPeter Wemm 	return (error);
186bb6a234eSPeter Wemm }
187bb6a234eSPeter Wemm 
188bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
189bb6a234eSPeter Wemm     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
190ee3fd601SDan Moschuk 
19174b2192aSJohn Dyson int
192316ec49aSScott Long fork1(td, flags, pages, procp)
19370fca427SJohn Baldwin 	struct thread *td;
1940e3eb7eeSSujal Patel 	int flags;
195316ec49aSScott Long 	int pages;
19670fca427SJohn Baldwin 	struct proc **procp;
197df8bae1dSRodney W. Grimes {
19870fca427SJohn Baldwin 	struct proc *p1, *p2, *pptr;
199df8abd0bSPeter Wemm 	uid_t uid;
200df8bae1dSRodney W. Grimes 	struct proc *newproc;
20170fca427SJohn Baldwin 	int ok, trypid;
202b083ea51SMike Silbersack 	static int curfail, pidchecked = 0;
203b083ea51SMike Silbersack 	static struct timeval lastfail;
2045641ae5dSJohn Baldwin 	struct filedesc *fd;
205ad05d580STor Egge 	struct filedesc_to_leader *fdtol;
206079b7badSJulian Elischer 	struct thread *td2;
207079b7badSJulian Elischer 	struct ksegrp *kg2;
2083fc755c1SJohn Baldwin 	struct sigacts *newsigacts;
209c6544064SJohn Baldwin 	int error;
2105856e12eSJohn Dyson 
21170fca427SJohn Baldwin 	/* Can't copy and clear. */
2120e3eb7eeSSujal Patel 	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
213dabee6feSPeter Wemm 		return (EINVAL);
214df8bae1dSRodney W. Grimes 
21570fca427SJohn Baldwin 	p1 = td->td_proc;
21670fca427SJohn Baldwin 
217df8bae1dSRodney W. Grimes 	/*
2185856e12eSJohn Dyson 	 * Here we don't create a new process, but we divorce
2195856e12eSJohn Dyson 	 * certain parts of a process from itself.
2205856e12eSJohn Dyson 	 */
2215856e12eSJohn Dyson 	if ((flags & RFPROC) == 0) {
222079b7badSJulian Elischer 		vm_forkproc(td, NULL, NULL, flags);
2235856e12eSJohn Dyson 
2245856e12eSJohn Dyson 		/*
2255856e12eSJohn Dyson 		 * Close all file descriptors.
2265856e12eSJohn Dyson 		 */
2275856e12eSJohn Dyson 		if (flags & RFCFDG) {
2285856e12eSJohn Dyson 			struct filedesc *fdtmp;
229c7f1c11bSAlfred Perlstein 			fdtmp = fdinit(td->td_proc->p_fd);
230c7f1c11bSAlfred Perlstein 			fdfree(td);
2315856e12eSJohn Dyson 			p1->p_fd = fdtmp;
2325856e12eSJohn Dyson 		}
2335856e12eSJohn Dyson 
2345856e12eSJohn Dyson 		/*
235c8564ad4SBruce Evans 		 * Unshare file descriptors (from parent).
2365856e12eSJohn Dyson 		 */
237c113083cSPoul-Henning Kamp 		if (flags & RFFDG)
238c113083cSPoul-Henning Kamp 			fdunshare(p1, td);
2391943af61SPeter Wemm 		*procp = NULL;
2405856e12eSJohn Dyson 		return (0);
2415856e12eSJohn Dyson 	}
2425856e12eSJohn Dyson 
2432c10d16aSJeff Roberson 	/*
2442c10d16aSJeff Roberson 	 * Note 1:1 allows for forking with one thread coming out on the
2452c10d16aSJeff Roberson 	 * other side with the expectation that the process is about to
2462c10d16aSJeff Roberson 	 * exec.
2472c10d16aSJeff Roberson 	 */
248a3aa5592SJulian Elischer 	if (p1->p_flag & P_HADTHREADS) {
249e602ba25SJulian Elischer 		/*
250e602ba25SJulian Elischer 		 * Idle the other threads for a second.
251e602ba25SJulian Elischer 		 * Since the user space is copied, it must remain stable.
252e602ba25SJulian Elischer 		 * In addition, all threads (from the user perspective)
253e602ba25SJulian Elischer 		 * need to either be suspended or in the kernel,
254e602ba25SJulian Elischer 		 * where they will try restart in the parent and will
255e602ba25SJulian Elischer 		 * be aborted in the child.
256e602ba25SJulian Elischer 		 */
257e602ba25SJulian Elischer 		PROC_LOCK(p1);
2581279572aSDavid Xu 		if (thread_single(SINGLE_NO_EXIT)) {
259c8564ad4SBruce Evans 			/* Abort. Someone else is single threading before us. */
260e602ba25SJulian Elischer 			PROC_UNLOCK(p1);
261e602ba25SJulian Elischer 			return (ERESTART);
262e602ba25SJulian Elischer 		}
263e602ba25SJulian Elischer 		PROC_UNLOCK(p1);
264e602ba25SJulian Elischer 		/*
265e602ba25SJulian Elischer 		 * All other activity in this process
266e602ba25SJulian Elischer 		 * is now suspended at the user boundary,
267e602ba25SJulian Elischer 		 * (or other safe places if we think of any).
268e602ba25SJulian Elischer 		 */
269e602ba25SJulian Elischer 	}
270e602ba25SJulian Elischer 
2713fc755c1SJohn Baldwin 	/* Allocate new proc. */
272a163d034SWarner Losh 	newproc = uma_zalloc(proc_zone, M_WAITOK);
2732555374cSRobert Watson #ifdef MAC
2742555374cSRobert Watson 	mac_init_proc(newproc);
2752555374cSRobert Watson #endif
276fcf7f27aSRobert Watson #ifdef AUDIT
277fcf7f27aSRobert Watson 	audit_proc_alloc(newproc);
278fcf7f27aSRobert Watson #endif
279571dcd15SSuleiman Souhlal 	knlist_init(&newproc->p_klist, &newproc->p_mtx, NULL, NULL, NULL);
2802c255e9dSRobert Watson 	STAILQ_INIT(&newproc->p_ktr);
2813fc755c1SJohn Baldwin 
2825ce2f678SJohn Baldwin 	/* We have to lock the process tree while we look for a pid. */
2835ce2f678SJohn Baldwin 	sx_slock(&proctree_lock);
2845ce2f678SJohn Baldwin 
2855856e12eSJohn Dyson 	/*
286df8bae1dSRodney W. Grimes 	 * Although process entries are dynamically created, we still keep
287df8bae1dSRodney W. Grimes 	 * a global limit on the maximum number we will create.  Don't allow
288c4441bc7SMike Silbersack 	 * a nonprivileged user to use the last ten processes; don't let root
289df8bae1dSRodney W. Grimes 	 * exceed the limit. The variable nprocs is the current number of
290df8bae1dSRodney W. Grimes 	 * processes, maxproc is the limit.
291df8bae1dSRodney W. Grimes 	 */
2923fc755c1SJohn Baldwin 	sx_xlock(&allproc_lock);
2933fc755c1SJohn Baldwin 	uid = td->td_ucred->cr_ruid;
29466d5c640SColin Percival 	if ((nprocs >= maxproc - 10 &&
29566d5c640SColin Percival 	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
29665bba83fSColin Percival 	    nprocs >= maxproc) {
297c6544064SJohn Baldwin 		error = EAGAIN;
298c6544064SJohn Baldwin 		goto fail;
299e602ba25SJulian Elischer 	}
300c6544064SJohn Baldwin 
301df8bae1dSRodney W. Grimes 	/*
3023fc755c1SJohn Baldwin 	 * Increment the count of procs running with this uid. Don't allow
3033fc755c1SJohn Baldwin 	 * a nonprivileged user to exceed their current limit.
3043fc755c1SJohn Baldwin 	 */
3053fc755c1SJohn Baldwin 	PROC_LOCK(p1);
3063fc755c1SJohn Baldwin 	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
30791d5354aSJohn Baldwin 		(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
3083fc755c1SJohn Baldwin 	PROC_UNLOCK(p1);
3093fc755c1SJohn Baldwin 	if (!ok) {
310c6544064SJohn Baldwin 		error = EAGAIN;
311c6544064SJohn Baldwin 		goto fail;
3123fc755c1SJohn Baldwin 	}
3133fc755c1SJohn Baldwin 
3143fc755c1SJohn Baldwin 	/*
315ef5dc8a9SJohn Dyson 	 * Increment the nprocs resource before blocking can occur.  There
316ef5dc8a9SJohn Dyson 	 * are hard-limits as to the number of processes that can run.
317ef5dc8a9SJohn Dyson 	 */
318ef5dc8a9SJohn Dyson 	nprocs++;
319ef5dc8a9SJohn Dyson 
320ef5dc8a9SJohn Dyson 	/*
321df8bae1dSRodney W. Grimes 	 * Find an unused process ID.  We remember a range of unused IDs
3228f7e4eb5SDag-Erling Smørgrav 	 * ready to use (from lastpid+1 through pidchecked-1).
3230384fff8SJason Evans 	 *
3240384fff8SJason Evans 	 * If RFHIGHPID is set (used during system boot), do not allocate
3250384fff8SJason Evans 	 * low-numbered pids.
326df8bae1dSRodney W. Grimes 	 */
3278f7e4eb5SDag-Erling Smørgrav 	trypid = lastpid + 1;
3280384fff8SJason Evans 	if (flags & RFHIGHPID) {
32970fca427SJohn Baldwin 		if (trypid < 10)
3300384fff8SJason Evans 			trypid = 10;
3310384fff8SJason Evans 	} else {
332bb6a234eSPeter Wemm 		if (randompid)
3330384fff8SJason Evans 			trypid += arc4random() % randompid;
3340384fff8SJason Evans 	}
335df8bae1dSRodney W. Grimes retry:
336df8bae1dSRodney W. Grimes 	/*
337df8bae1dSRodney W. Grimes 	 * If the process ID prototype has wrapped around,
338df8bae1dSRodney W. Grimes 	 * restart somewhat above 0, as the low-numbered procs
339df8bae1dSRodney W. Grimes 	 * tend to include daemons that don't exit.
340df8bae1dSRodney W. Grimes 	 */
3410384fff8SJason Evans 	if (trypid >= PID_MAX) {
3420384fff8SJason Evans 		trypid = trypid % PID_MAX;
3430384fff8SJason Evans 		if (trypid < 100)
3440384fff8SJason Evans 			trypid += 100;
345df8bae1dSRodney W. Grimes 		pidchecked = 0;
346df8bae1dSRodney W. Grimes 	}
3470384fff8SJason Evans 	if (trypid >= pidchecked) {
348df8bae1dSRodney W. Grimes 		int doingzomb = 0;
349df8bae1dSRodney W. Grimes 
350df8bae1dSRodney W. Grimes 		pidchecked = PID_MAX;
351df8bae1dSRodney W. Grimes 		/*
352df8bae1dSRodney W. Grimes 		 * Scan the active and zombie procs to check whether this pid
353df8bae1dSRodney W. Grimes 		 * is in use.  Remember the lowest pid that's greater
3540384fff8SJason Evans 		 * than trypid, so we can avoid checking for a while.
355df8bae1dSRodney W. Grimes 		 */
3562e3c8fcbSPoul-Henning Kamp 		p2 = LIST_FIRST(&allproc);
357df8bae1dSRodney W. Grimes again:
358a7b124c3SJohn Baldwin 		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
359f591779bSSeigo Tanimura 			PROC_LOCK(p2);
3600384fff8SJason Evans 			while (p2->p_pid == trypid ||
3615ce2f678SJohn Baldwin 			    (p2->p_pgrp != NULL &&
3625ce2f678SJohn Baldwin 			    (p2->p_pgrp->pg_id == trypid ||
3635ce2f678SJohn Baldwin 			    (p2->p_session != NULL &&
3645ce2f678SJohn Baldwin 			    p2->p_session->s_sid == trypid)))) {
3650384fff8SJason Evans 				trypid++;
366f591779bSSeigo Tanimura 				if (trypid >= pidchecked) {
367f591779bSSeigo Tanimura 					PROC_UNLOCK(p2);
368df8bae1dSRodney W. Grimes 					goto retry;
369df8bae1dSRodney W. Grimes 				}
370f591779bSSeigo Tanimura 			}
3710384fff8SJason Evans 			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
372df8bae1dSRodney W. Grimes 				pidchecked = p2->p_pid;
3735ce2f678SJohn Baldwin 			if (p2->p_pgrp != NULL) {
3740384fff8SJason Evans 				if (p2->p_pgrp->pg_id > trypid &&
375df8bae1dSRodney W. Grimes 				    pidchecked > p2->p_pgrp->pg_id)
376df8bae1dSRodney W. Grimes 					pidchecked = p2->p_pgrp->pg_id;
3775ce2f678SJohn Baldwin 				if (p2->p_session != NULL &&
3785ce2f678SJohn Baldwin 				    p2->p_session->s_sid > trypid &&
379643a8daaSDon Lewis 				    pidchecked > p2->p_session->s_sid)
380643a8daaSDon Lewis 					pidchecked = p2->p_session->s_sid;
3815ce2f678SJohn Baldwin 			}
382f591779bSSeigo Tanimura 			PROC_UNLOCK(p2);
383df8bae1dSRodney W. Grimes 		}
384df8bae1dSRodney W. Grimes 		if (!doingzomb) {
385df8bae1dSRodney W. Grimes 			doingzomb = 1;
3862e3c8fcbSPoul-Henning Kamp 			p2 = LIST_FIRST(&zombproc);
387df8bae1dSRodney W. Grimes 			goto again;
388df8bae1dSRodney W. Grimes 		}
389df8bae1dSRodney W. Grimes 	}
3905ce2f678SJohn Baldwin 	sx_sunlock(&proctree_lock);
391df8bae1dSRodney W. Grimes 
392df8bae1dSRodney W. Grimes 	/*
3938f7e4eb5SDag-Erling Smørgrav 	 * RFHIGHPID does not mess with the lastpid counter during boot.
3940384fff8SJason Evans 	 */
3950384fff8SJason Evans 	if (flags & RFHIGHPID)
3960384fff8SJason Evans 		pidchecked = 0;
3970384fff8SJason Evans 	else
3988f7e4eb5SDag-Erling Smørgrav 		lastpid = trypid;
3990384fff8SJason Evans 
400553629ebSJake Burkholder 	p2 = newproc;
401e602ba25SJulian Elischer 	p2->p_state = PRS_NEW;		/* protect against others */
402553629ebSJake Burkholder 	p2->p_pid = trypid;
403553629ebSJake Burkholder 	LIST_INSERT_HEAD(&allproc, p2, p_list);
404553629ebSJake Burkholder 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
4051005a129SJohn Baldwin 	sx_xunlock(&allproc_lock);
406553629ebSJake Burkholder 
4070384fff8SJason Evans 	/*
4083fc755c1SJohn Baldwin 	 * Malloc things while we don't hold any locks.
4093fc755c1SJohn Baldwin 	 */
41090af4afaSJohn Baldwin 	if (flags & RFSIGSHARE)
4113fc755c1SJohn Baldwin 		newsigacts = NULL;
41290af4afaSJohn Baldwin 	else
41390af4afaSJohn Baldwin 		newsigacts = sigacts_alloc();
4143fc755c1SJohn Baldwin 
4153fc755c1SJohn Baldwin 	/*
4163fc755c1SJohn Baldwin 	 * Copy filedesc.
4173fc755c1SJohn Baldwin 	 */
418ad05d580STor Egge 	if (flags & RFCFDG) {
419598b7ec8SPoul-Henning Kamp 		fd = fdinit(p1->p_fd);
420ad05d580STor Egge 		fdtol = NULL;
421ad05d580STor Egge 	} else if (flags & RFFDG) {
422598b7ec8SPoul-Henning Kamp 		fd = fdcopy(p1->p_fd);
423ad05d580STor Egge 		fdtol = NULL;
424ad05d580STor Egge 	} else {
425c7f1c11bSAlfred Perlstein 		fd = fdshare(p1->p_fd);
426ad05d580STor Egge 		if (p1->p_fdtol == NULL)
427ad05d580STor Egge 			p1->p_fdtol =
428ad05d580STor Egge 				filedesc_to_leader_alloc(NULL,
429ad05d580STor Egge 							 NULL,
430ad05d580STor Egge 							 p1->p_leader);
431ad05d580STor Egge 		if ((flags & RFTHREAD) != 0) {
432ad05d580STor Egge 			/*
433ad05d580STor Egge 			 * Shared file descriptor table and
434ad05d580STor Egge 			 * shared process leaders.
435ad05d580STor Egge 			 */
436ad05d580STor Egge 			fdtol = p1->p_fdtol;
437124e4c3bSPoul-Henning Kamp 			FILEDESC_LOCK_FAST(p1->p_fd);
438ad05d580STor Egge 			fdtol->fdl_refcount++;
439124e4c3bSPoul-Henning Kamp 			FILEDESC_UNLOCK_FAST(p1->p_fd);
440ad05d580STor Egge 		} else {
441ad05d580STor Egge 			/*
442ad05d580STor Egge 			 * Shared file descriptor table, and
443ad05d580STor Egge 			 * different process leaders
444ad05d580STor Egge 			 */
445ad05d580STor Egge 			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
446ad05d580STor Egge 							 p1->p_fd,
447ad05d580STor Egge 							 p2);
448ad05d580STor Egge 		}
449ad05d580STor Egge 	}
4503fc755c1SJohn Baldwin 	/*
451df8bae1dSRodney W. Grimes 	 * Make a proc table entry for the new process.
452df8bae1dSRodney W. Grimes 	 * Start by zeroing the section of proc that is zero-initialized,
453df8bae1dSRodney W. Grimes 	 * then copy the section that is copied directly from the parent.
454df8bae1dSRodney W. Grimes 	 */
4551faf202eSJulian Elischer 	td2 = FIRST_THREAD_IN_PROC(p2);
4561faf202eSJulian Elischer 	kg2 = FIRST_KSEGRP_IN_PROC(p2);
457079b7badSJulian Elischer 
458c8564ad4SBruce Evans 	/* Allocate and switch to an alternate kstack if specified. */
4595fadbfeaSAlan Cox 	if (pages != 0)
46089f4fca2SAlan Cox 		vm_thread_new_altkstack(td2, pages);
461316ec49aSScott Long 
4627d447c95SJohn Baldwin 	PROC_LOCK(p2);
4637d447c95SJohn Baldwin 	PROC_LOCK(p1);
4647d447c95SJohn Baldwin 
465df8bae1dSRodney W. Grimes 	bzero(&p2->p_startzero,
4666db36923SDavid Schultz 	    __rangeof(struct proc, p_startzero, p_endzero));
467079b7badSJulian Elischer 	bzero(&td2->td_startzero,
4686db36923SDavid Schultz 	    __rangeof(struct thread, td_startzero, td_endzero));
469079b7badSJulian Elischer 	bzero(&kg2->kg_startzero,
4706db36923SDavid Schultz 	    __rangeof(struct ksegrp, kg_startzero, kg_endzero));
471079b7badSJulian Elischer 
472df8bae1dSRodney W. Grimes 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
4736db36923SDavid Schultz 	    __rangeof(struct proc, p_startcopy, p_endcopy));
474079b7badSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
4756db36923SDavid Schultz 	    __rangeof(struct thread, td_startcopy, td_endcopy));
476079b7badSJulian Elischer 	bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy,
4776db36923SDavid Schultz 	    __rangeof(struct ksegrp, kg_startcopy, kg_endcopy));
478df8bae1dSRodney W. Grimes 
479a30ec4b9SDavid Xu 	td2->td_sigstk = td->td_sigstk;
4803d5c30f7SDavid Xu 	td2->td_sigmask = td->td_sigmask;
481a30ec4b9SDavid Xu 
482df8bae1dSRodney W. Grimes 	/*
483df8bae1dSRodney W. Grimes 	 * Duplicate sub-structures as needed.
484df8bae1dSRodney W. Grimes 	 * Increase reference counts on shared objects.
485df8bae1dSRodney W. Grimes 	 */
486a7b124c3SJohn Baldwin 	p2->p_flag = 0;
4879752f794SJohn Baldwin 	if (p1->p_flag & P_PROFIL)
4889752f794SJohn Baldwin 		startprofclock(p2);
4899ed346baSBosko Milekic 	mtx_lock_spin(&sched_lock);
490a7b124c3SJohn Baldwin 	p2->p_sflag = PS_INMEM;
491b43179fbSJeff Roberson 	/*
492b43179fbSJeff Roberson 	 * Allow the scheduler to adjust the priority of the child and
493b43179fbSJeff Roberson 	 * parent while we hold the sched_lock.
494b43179fbSJeff Roberson 	 */
495ed062c8dSJulian Elischer 	sched_fork(td, td2);
496b43179fbSJeff Roberson 
4979ed346baSBosko Milekic 	mtx_unlock_spin(&sched_lock);
4983fc755c1SJohn Baldwin 	p2->p_ucred = crhold(td->td_ucred);
499079b7badSJulian Elischer 	td2->td_ucred = crhold(p2->p_ucred);	/* XXXKSE */
500fcf7f27aSRobert Watson #ifdef AUDIT
501fcf7f27aSRobert Watson 	audit_proc_fork(p1, p2);
502fcf7f27aSRobert Watson #endif
5038899023fSAlfred Perlstein 	pargs_hold(p2->p_args);
504b9df5231SPoul-Henning Kamp 
5056626c604SJulian Elischer 	if (flags & RFSIGSHARE) {
50690af4afaSJohn Baldwin 		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
5076626c604SJulian Elischer 	} else {
50890af4afaSJohn Baldwin 		sigacts_copy(newsigacts, p1->p_sigacts);
50990af4afaSJohn Baldwin 		p2->p_sigacts = newsigacts;
5106626c604SJulian Elischer 	}
5114ac9ae70SJulian Elischer 	if (flags & RFLINUXTHPN)
5126626c604SJulian Elischer 	        p2->p_sigparent = SIGUSR1;
5134ac9ae70SJulian Elischer 	else
5144ac9ae70SJulian Elischer 	        p2->p_sigparent = SIGCHLD;
51588c5ea45SJulian Elischer 
516df8bae1dSRodney W. Grimes 	p2->p_textvp = p1->p_textvp;
5175641ae5dSJohn Baldwin 	p2->p_fd = fd;
518ad05d580STor Egge 	p2->p_fdtol = fdtol;
519dabee6feSPeter Wemm 
520df8bae1dSRodney W. Grimes 	/*
521c8564ad4SBruce Evans 	 * p_limit is copy-on-write.  Bump its refcount.
522df8bae1dSRodney W. Grimes 	 */
52391d5354aSJohn Baldwin 	p2->p_limit = lim_hold(p1->p_limit);
5248b059651SDavid Schultz 
5258b059651SDavid Schultz 	pstats_fork(p1->p_stats, p2->p_stats);
5268b059651SDavid Schultz 
527299bc736SDavid Schultz 	PROC_UNLOCK(p1);
528cda5aba4SDavid Schultz 	PROC_UNLOCK(p2);
529df8bae1dSRodney W. Grimes 
530a69d88afSPeter Wemm 	/* Bump references to the text vnode (for procfs) */
531a69d88afSPeter Wemm 	if (p2->p_textvp)
532a69d88afSPeter Wemm 		vref(p2->p_textvp);
533a69d88afSPeter Wemm 
534c6544064SJohn Baldwin 	/*
535c8564ad4SBruce Evans 	 * Set up linkage for kernel based threading.
536c6544064SJohn Baldwin 	 */
537c6544064SJohn Baldwin 	if ((flags & RFTHREAD) != 0) {
538c6544064SJohn Baldwin 		mtx_lock(&ppeers_lock);
539c6544064SJohn Baldwin 		p2->p_peers = p1->p_peers;
540c6544064SJohn Baldwin 		p1->p_peers = p2;
541c6544064SJohn Baldwin 		p2->p_leader = p1->p_leader;
542c6544064SJohn Baldwin 		mtx_unlock(&ppeers_lock);
543c6544064SJohn Baldwin 		PROC_LOCK(p1->p_leader);
544c6544064SJohn Baldwin 		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
545c6544064SJohn Baldwin 			PROC_UNLOCK(p1->p_leader);
546c6544064SJohn Baldwin 			/*
547c6544064SJohn Baldwin 			 * The task leader is exiting, so process p1 is
548c6544064SJohn Baldwin 			 * going to be killed shortly.  Since p1 obviously
549c6544064SJohn Baldwin 			 * isn't dead yet, we know that the leader is either
550c6544064SJohn Baldwin 			 * sending SIGKILL's to all the processes in this
551c6544064SJohn Baldwin 			 * task or is sleeping waiting for all the peers to
552c6544064SJohn Baldwin 			 * exit.  We let p1 complete the fork, but we need
553c6544064SJohn Baldwin 			 * to go ahead and kill the new process p2 since
554c6544064SJohn Baldwin 			 * the task leader may not get a chance to send
555c6544064SJohn Baldwin 			 * SIGKILL to it.  We leave it on the list so that
556c6544064SJohn Baldwin 			 * the task leader will wait for this new process
557c6544064SJohn Baldwin 			 * to commit suicide.
558c6544064SJohn Baldwin 			 */
559c6544064SJohn Baldwin 			PROC_LOCK(p2);
560c6544064SJohn Baldwin 			psignal(p2, SIGKILL);
561c6544064SJohn Baldwin 			PROC_UNLOCK(p2);
562293d2d22SRobert Watson 		} else
563293d2d22SRobert Watson 			PROC_UNLOCK(p1->p_leader);
564c6544064SJohn Baldwin 	} else {
565c6544064SJohn Baldwin 		p2->p_peers = NULL;
566c6544064SJohn Baldwin 		p2->p_leader = p2;
567c6544064SJohn Baldwin 	}
568c6544064SJohn Baldwin 
5693fc755c1SJohn Baldwin 	sx_xlock(&proctree_lock);
5703fc755c1SJohn Baldwin 	PGRP_LOCK(p1->p_pgrp);
5713fc755c1SJohn Baldwin 	PROC_LOCK(p2);
5723fc755c1SJohn Baldwin 	PROC_LOCK(p1);
5733fc755c1SJohn Baldwin 
57470e534e7SDavid Greenman 	/*
5759752f794SJohn Baldwin 	 * Preserve some more flags in subprocess.  P_PROFIL has already
576be67169aSBruce Evans 	 * been preserved.
57770e534e7SDavid Greenman 	 */
578a30ec4b9SDavid Xu 	p2->p_flag |= p1->p_flag & P_SUGID;
579a30ec4b9SDavid Xu 	td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
580f591779bSSeigo Tanimura 	SESS_LOCK(p1->p_session);
581df8bae1dSRodney W. Grimes 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
582df8bae1dSRodney W. Grimes 		p2->p_flag |= P_CONTROLT;
583f591779bSSeigo Tanimura 	SESS_UNLOCK(p1->p_session);
5840e3eb7eeSSujal Patel 	if (flags & RFPPWAIT)
585df8bae1dSRodney W. Grimes 		p2->p_flag |= P_PPWAIT;
586be67169aSBruce Evans 
5875cded904SOlivier Houchard 	p2->p_pgrp = p1->p_pgrp;
588b75356e1SJeffrey Hsu 	LIST_INSERT_AFTER(p1, p2, p_pglist);
5892a60b9b9SSeigo Tanimura 	PGRP_UNLOCK(p1->p_pgrp);
590b75356e1SJeffrey Hsu 	LIST_INIT(&p2->p_children);
591b75356e1SJeffrey Hsu 
592c06eb4e2SSam Leffler 	callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
5934f559836SJake Burkholder 
594df8bae1dSRodney W. Grimes #ifdef KTRACE
595df8bae1dSRodney W. Grimes 	/*
596af300f23SJohn Baldwin 	 * Copy traceflag and tracefile if enabled.
597df8bae1dSRodney W. Grimes 	 */
598af300f23SJohn Baldwin 	mtx_lock(&ktrace_mtx);
599a5881ea5SJohn Baldwin 	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
600af300f23SJohn Baldwin 	if (p1->p_traceflag & KTRFAC_INHERIT) {
601df8bae1dSRodney W. Grimes 		p2->p_traceflag = p1->p_traceflag;
602a5881ea5SJohn Baldwin 		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
603a5881ea5SJohn Baldwin 			VREF(p2->p_tracevp);
604a5881ea5SJohn Baldwin 			KASSERT(p1->p_tracecred != NULL,
605a5881ea5SJohn Baldwin 			    ("ktrace vnode with no cred"));
606a5881ea5SJohn Baldwin 			p2->p_tracecred = crhold(p1->p_tracecred);
607a5881ea5SJohn Baldwin 		}
608df8bae1dSRodney W. Grimes 	}
609af300f23SJohn Baldwin 	mtx_unlock(&ktrace_mtx);
610df8bae1dSRodney W. Grimes #endif
611df8bae1dSRodney W. Grimes 
612df8bae1dSRodney W. Grimes 	/*
613df95311aSMatthew N. Dodd 	 * If PF_FORK is set, the child process inherits the
614df95311aSMatthew N. Dodd 	 * procfs ioctl flags from its parent.
615df95311aSMatthew N. Dodd 	 */
616df95311aSMatthew N. Dodd 	if (p1->p_pfsflags & PF_FORK) {
617df95311aSMatthew N. Dodd 		p2->p_stops = p1->p_stops;
618df95311aSMatthew N. Dodd 		p2->p_pfsflags = p1->p_pfsflags;
619df95311aSMatthew N. Dodd 	}
620df95311aSMatthew N. Dodd 
621df95311aSMatthew N. Dodd 	/*
622df8bae1dSRodney W. Grimes 	 * This begins the section where we must prevent the parent
623cda5aba4SDavid Schultz 	 * from being swapped.
624df8bae1dSRodney W. Grimes 	 */
625cda5aba4SDavid Schultz 	_PHOLD(p1);
62657934cd3SJohn Baldwin 	PROC_UNLOCK(p1);
6270d2afceeSDavid Greenman 
628df8bae1dSRodney W. Grimes 	/*
6293fc755c1SJohn Baldwin 	 * Attach the new process to its parent.
6303fc755c1SJohn Baldwin 	 *
6313fc755c1SJohn Baldwin 	 * If RFNOWAIT is set, the newly created process becomes a child
6323fc755c1SJohn Baldwin 	 * of init.  This effectively disassociates the child from the
6333fc755c1SJohn Baldwin 	 * parent.
6343fc755c1SJohn Baldwin 	 */
6353fc755c1SJohn Baldwin 	if (flags & RFNOWAIT)
6363fc755c1SJohn Baldwin 		pptr = initproc;
6373fc755c1SJohn Baldwin 	else
6383fc755c1SJohn Baldwin 		pptr = p1;
6393fc755c1SJohn Baldwin 	p2->p_pptr = pptr;
6403fc755c1SJohn Baldwin 	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
6413fc755c1SJohn Baldwin 	sx_xunlock(&proctree_lock);
6423fc755c1SJohn Baldwin 
643bb0e8070SJohn Baldwin 	/* Inform accounting that we have forked. */
644bb0e8070SJohn Baldwin 	p2->p_acflag = AFORK;
645bb0e8070SJohn Baldwin 	PROC_UNLOCK(p2);
646bb0e8070SJohn Baldwin 
6473fc755c1SJohn Baldwin 	/*
648a2a1c95cSPeter Wemm 	 * Finish creating the child process.  It will return via a different
649a2a1c95cSPeter Wemm 	 * execution path later.  (ie: directly into user mode)
650dabee6feSPeter Wemm 	 */
651079b7badSJulian Elischer 	vm_forkproc(td, p2, td2, flags);
652df8bae1dSRodney W. Grimes 
6535d22597fSHajimu UMEMOTO 	if (flags == (RFFDG | RFPROC)) {
65494ddc707SAlan Cox 		atomic_add_int(&cnt.v_forks, 1);
65594ddc707SAlan Cox 		atomic_add_int(&cnt.v_forkpages, p2->p_vmspace->vm_dsize +
65694ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6575d22597fSHajimu UMEMOTO 	} else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
65894ddc707SAlan Cox 		atomic_add_int(&cnt.v_vforks, 1);
65994ddc707SAlan Cox 		atomic_add_int(&cnt.v_vforkpages, p2->p_vmspace->vm_dsize +
66094ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6615d22597fSHajimu UMEMOTO 	} else if (p1 == &proc0) {
66294ddc707SAlan Cox 		atomic_add_int(&cnt.v_kthreads, 1);
66394ddc707SAlan Cox 		atomic_add_int(&cnt.v_kthreadpages, p2->p_vmspace->vm_dsize +
66494ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6655d22597fSHajimu UMEMOTO 	} else {
66694ddc707SAlan Cox 		atomic_add_int(&cnt.v_rforks, 1);
66794ddc707SAlan Cox 		atomic_add_int(&cnt.v_rforkpages, p2->p_vmspace->vm_dsize +
66894ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6695d22597fSHajimu UMEMOTO 	}
6705d22597fSHajimu UMEMOTO 
671df8bae1dSRodney W. Grimes 	/*
672e9189611SPeter Wemm 	 * Both processes are set up, now check if any loadable modules want
673e0d898b4SJulian Elischer 	 * to adjust anything.
674fed06968SJulian Elischer 	 *   What if they have an error? XXX
675fed06968SJulian Elischer 	 */
67675b8b3b2SJohn Baldwin 	EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
677fed06968SJulian Elischer 
678fed06968SJulian Elischer 	/*
6794c3558aaSJohn Baldwin 	 * Set the child start time and mark the process as being complete.
6804c3558aaSJohn Baldwin 	 */
6814c3558aaSJohn Baldwin 	microuptime(&p2->p_stats->p_start);
6824c3558aaSJohn Baldwin 	mtx_lock_spin(&sched_lock);
6834c3558aaSJohn Baldwin 	p2->p_state = PRS_NORMAL;
6844c3558aaSJohn Baldwin 
6854c3558aaSJohn Baldwin 	/*
6860384fff8SJason Evans 	 * If RFSTOPPED not requested, make child runnable and add to
6870384fff8SJason Evans 	 * run queue.
688df8bae1dSRodney W. Grimes 	 */
6890384fff8SJason Evans 	if ((flags & RFSTOPPED) == 0) {
69071fad9fdSJulian Elischer 		TD_SET_CAN_RUN(td2);
6912630e4c9SJulian Elischer 		setrunqueue(td2, SRQ_BORING);
6920384fff8SJason Evans 	}
6934c3558aaSJohn Baldwin 	mtx_unlock_spin(&sched_lock);
694df8bae1dSRodney W. Grimes 
695df8bae1dSRodney W. Grimes 	/*
696df8bae1dSRodney W. Grimes 	 * Now can be swapped.
697df8bae1dSRodney W. Grimes 	 */
69857934cd3SJohn Baldwin 	PROC_LOCK(p1);
69957934cd3SJohn Baldwin 	_PRELE(p1);
700df8bae1dSRodney W. Grimes 
701df8bae1dSRodney W. Grimes 	/*
70270fca427SJohn Baldwin 	 * Tell any interested parties about the new process.
703cb679c38SJonathan Lemon 	 */
704ad3b9257SJohn-Mark Gurney 	KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid);
70570fca427SJohn Baldwin 
70657934cd3SJohn Baldwin 	PROC_UNLOCK(p1);
707cb679c38SJonathan Lemon 
708cb679c38SJonathan Lemon 	/*
709df8bae1dSRodney W. Grimes 	 * Preserve synchronization semantics of vfork.  If waiting for
710df8bae1dSRodney W. Grimes 	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
711df8bae1dSRodney W. Grimes 	 * proc (in case of exit).
712df8bae1dSRodney W. Grimes 	 */
71357934cd3SJohn Baldwin 	PROC_LOCK(p2);
714df8bae1dSRodney W. Grimes 	while (p2->p_flag & P_PPWAIT)
71557934cd3SJohn Baldwin 		msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0);
71657934cd3SJohn Baldwin 	PROC_UNLOCK(p2);
717df8bae1dSRodney W. Grimes 
718df8bae1dSRodney W. Grimes 	/*
719c8564ad4SBruce Evans 	 * If other threads are waiting, let them continue now.
72049539972SJulian Elischer 	 */
721a3aa5592SJulian Elischer 	if (p1->p_flag & P_HADTHREADS) {
72249539972SJulian Elischer 		PROC_LOCK(p1);
72349539972SJulian Elischer 		thread_single_end();
72449539972SJulian Elischer 		PROC_UNLOCK(p1);
72549539972SJulian Elischer 	}
72649539972SJulian Elischer 
72749539972SJulian Elischer 	/*
728df8abd0bSPeter Wemm 	 * Return child proc pointer to parent.
729df8bae1dSRodney W. Grimes 	 */
730df8abd0bSPeter Wemm 	*procp = p2;
731df8bae1dSRodney W. Grimes 	return (0);
732c6544064SJohn Baldwin fail:
7335ce2f678SJohn Baldwin 	sx_sunlock(&proctree_lock);
734b083ea51SMike Silbersack 	if (ppsratecheck(&lastfail, &curfail, 1))
735b083ea51SMike Silbersack 		printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
736b083ea51SMike Silbersack 			uid);
737c6544064SJohn Baldwin 	sx_xunlock(&allproc_lock);
7386bea667fSRobert Watson #ifdef MAC
7396bea667fSRobert Watson 	mac_destroy_proc(newproc);
7406bea667fSRobert Watson #endif
741fcf7f27aSRobert Watson #ifdef AUDIT
742fcf7f27aSRobert Watson 	audit_proc_free(newproc);
743fcf7f27aSRobert Watson #endif
744c6544064SJohn Baldwin 	uma_zfree(proc_zone, newproc);
745a3aa5592SJulian Elischer 	if (p1->p_flag & P_HADTHREADS) {
746c6544064SJohn Baldwin 		PROC_LOCK(p1);
747c6544064SJohn Baldwin 		thread_single_end();
748c6544064SJohn Baldwin 		PROC_UNLOCK(p1);
749c6544064SJohn Baldwin 	}
750c6544064SJohn Baldwin 	tsleep(&forksleep, PUSER, "fork", hz / 2);
751c6544064SJohn Baldwin 	return (error);
752df8bae1dSRodney W. Grimes }
753fed06968SJulian Elischer 
754e0d898b4SJulian Elischer /*
755a7b124c3SJohn Baldwin  * Handle the return of a child process from fork1().  This function
756a7b124c3SJohn Baldwin  * is called from the MD fork_trampoline() entry point.
757a7b124c3SJohn Baldwin  */
758a7b124c3SJohn Baldwin void
759a7b124c3SJohn Baldwin fork_exit(callout, arg, frame)
7608865286bSJohn Baldwin 	void (*callout)(void *, struct trapframe *);
761a7b124c3SJohn Baldwin 	void *arg;
7622a36ec35SJohn Baldwin 	struct trapframe *frame;
763a7b124c3SJohn Baldwin {
764696058c3SJulian Elischer 	struct proc *p;
76570fca427SJohn Baldwin 	struct thread *td;
76670fca427SJohn Baldwin 
76770fca427SJohn Baldwin 	/*
7680047b9a9SBosko Milekic 	 * Finish setting up thread glue so that it begins execution in a
7690047b9a9SBosko Milekic 	 * non-nested critical section with sched_lock held but not recursed.
7700047b9a9SBosko Milekic 	 */
7710047b9a9SBosko Milekic 	td = curthread;
7720047b9a9SBosko Milekic 	p = td->td_proc;
7730047b9a9SBosko Milekic 	td->td_oncpu = PCPU_GET(cpuid);
7740047b9a9SBosko Milekic 	KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
7750047b9a9SBosko Milekic 
7760047b9a9SBosko Milekic 	sched_lock.mtx_lock = (uintptr_t)td;
7770047b9a9SBosko Milekic 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
778732d9528SJulian Elischer 	CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)",
779ed062c8dSJulian Elischer 		td, td->td_sched, p->p_pid, p->p_comm);
7800047b9a9SBosko Milekic 
7810047b9a9SBosko Milekic 	/*
78270fca427SJohn Baldwin 	 * Processes normally resume in mi_switch() after being
78370fca427SJohn Baldwin 	 * cpu_switch()'ed to, but when children start up they arrive here
78470fca427SJohn Baldwin 	 * instead, so we must do much the same things as mi_switch() would.
78570fca427SJohn Baldwin 	 */
78657934cd3SJohn Baldwin 
787696058c3SJulian Elischer 	if ((td = PCPU_GET(deadthread))) {
788696058c3SJulian Elischer 		PCPU_SET(deadthread, NULL);
789696058c3SJulian Elischer 		thread_stash(td);
790696058c3SJulian Elischer 	}
791696058c3SJulian Elischer 	td = curthread;
792201b0ea8SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
793a7b124c3SJohn Baldwin 
794a7b124c3SJohn Baldwin 	/*
795a7b124c3SJohn Baldwin 	 * cpu_set_fork_handler intercepts this function call to
796a7b124c3SJohn Baldwin 	 * have this call a non-return function to stay in kernel mode.
797a7b124c3SJohn Baldwin 	 * initproc has its own fork handler, but it does return.
798a7b124c3SJohn Baldwin 	 */
7995813dc03SJohn Baldwin 	KASSERT(callout != NULL, ("NULL callout in fork_exit"));
8008865286bSJohn Baldwin 	callout(arg, frame);
801a7b124c3SJohn Baldwin 
802a7b124c3SJohn Baldwin 	/*
803a7b124c3SJohn Baldwin 	 * Check if a kernel thread misbehaved and returned from its main
804a7b124c3SJohn Baldwin 	 * function.
805a7b124c3SJohn Baldwin 	 */
80657934cd3SJohn Baldwin 	PROC_LOCK(p);
807a7b124c3SJohn Baldwin 	if (p->p_flag & P_KTHREAD) {
80857934cd3SJohn Baldwin 		PROC_UNLOCK(p);
809a7b124c3SJohn Baldwin 		printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
810a7b124c3SJohn Baldwin 		    p->p_comm, p->p_pid);
811a7b124c3SJohn Baldwin 		kthread_exit(0);
812a7b124c3SJohn Baldwin 	}
81357934cd3SJohn Baldwin 	PROC_UNLOCK(p);
814a7b124c3SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
815a7b124c3SJohn Baldwin }
816a7b124c3SJohn Baldwin 
817a7b124c3SJohn Baldwin /*
818a7b124c3SJohn Baldwin  * Simplified back end of syscall(), used when returning from fork()
819a7b124c3SJohn Baldwin  * directly into user mode.  Giant is not held on entry, and must not
820a7b124c3SJohn Baldwin  * be held on return.  This function is passed in to fork_exit() as the
821a7b124c3SJohn Baldwin  * first parameter and is called when returning to a new userland process.
822a7b124c3SJohn Baldwin  */
823a7b124c3SJohn Baldwin void
824b40ce416SJulian Elischer fork_return(td, frame)
825b40ce416SJulian Elischer 	struct thread *td;
826a7b124c3SJohn Baldwin 	struct trapframe *frame;
827a7b124c3SJohn Baldwin {
828a7b124c3SJohn Baldwin 
829b40ce416SJulian Elischer 	userret(td, frame, 0);
830a7b124c3SJohn Baldwin #ifdef KTRACE
831af300f23SJohn Baldwin 	if (KTRPOINT(td, KTR_SYSRET))
832af300f23SJohn Baldwin 		ktrsysret(SYS_fork, 0, 0);
833a7b124c3SJohn Baldwin #endif
834a7b124c3SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
835a7b124c3SJohn Baldwin }
836