xref: /freebsd/sys/kern/kern_fork.c (revision a448b62ac998ab4ae5e85fc7235cbb5c5dbe60ef)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
19df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
20df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
21df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
22df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
23df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
24df8bae1dSRodney W. Grimes  *    without specific prior written permission.
25df8bae1dSRodney W. Grimes  *
26df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
39c3aac50fSPeter Wemm  * $FreeBSD$
40df8bae1dSRodney W. Grimes  */
41df8bae1dSRodney W. Grimes 
42db6a20e2SGarrett Wollman #include "opt_ktrace.h"
43db6a20e2SGarrett Wollman 
44df8bae1dSRodney W. Grimes #include <sys/param.h>
45df8bae1dSRodney W. Grimes #include <sys/systm.h>
46d2d3e875SBruce Evans #include <sys/sysproto.h>
47df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
48df8bae1dSRodney W. Grimes #include <sys/kernel.h>
49c76e95c3SPeter Wemm #include <sys/sysctl.h>
50df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5135e0e5b3SJohn Baldwin #include <sys/mutex.h>
52df8bae1dSRodney W. Grimes #include <sys/proc.h>
53df8bae1dSRodney W. Grimes #include <sys/resourcevar.h>
54df8bae1dSRodney W. Grimes #include <sys/vnode.h>
55df8bae1dSRodney W. Grimes #include <sys/acct.h>
560384fff8SJason Evans #include <sys/ktr.h>
57df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
58b71fec07SBruce Evans #include <sys/unistd.h>
5975c13541SPoul-Henning Kamp #include <sys/jail.h>
60df8bae1dSRodney W. Grimes 
61d93f860cSPoul-Henning Kamp #include <vm/vm.h>
62996c772fSJohn Dyson #include <sys/lock.h>
63dabee6feSPeter Wemm #include <vm/pmap.h>
64dabee6feSPeter Wemm #include <vm/vm_map.h>
65efeaf95aSDavid Greenman #include <vm/vm_extern.h>
662d8acc0fSJohn Dyson #include <vm/vm_zone.h>
67d93f860cSPoul-Henning Kamp 
68dc9c271aSJulian Elischer #include <sys/user.h>
6988c5ea45SJulian Elischer 
7093efcae8SPoul-Henning Kamp static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback");
7193efcae8SPoul-Henning Kamp 
72be67169aSBruce Evans static int	fast_vfork = 1;
7347fdd692SNeil Blakey-Milner SYSCTL_INT(_kern, OID_AUTO, fast_vfork, CTLFLAG_RW, &fast_vfork, 0,
7447fdd692SNeil Blakey-Milner     "flag to indicate whether we have a fast vfork()");
75c76e95c3SPeter Wemm 
76fed06968SJulian Elischer /*
77e0d898b4SJulian Elischer  * These are the stuctures used to create a callout list for things to do
78e0d898b4SJulian Elischer  * when forking a process
79fed06968SJulian Elischer  */
8093efcae8SPoul-Henning Kamp struct forklist {
81fed06968SJulian Elischer 	forklist_fn function;
82e3975643SJake Burkholder 	TAILQ_ENTRY(forklist) next;
8393efcae8SPoul-Henning Kamp };
84fed06968SJulian Elischer 
85e3975643SJake Burkholder TAILQ_HEAD(forklist_head, forklist);
8693efcae8SPoul-Henning Kamp static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list);
87fed06968SJulian Elischer 
88d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
89ad7507e2SSteven Wallace struct fork_args {
90ad7507e2SSteven Wallace 	int     dummy;
91ad7507e2SSteven Wallace };
92d2d3e875SBruce Evans #endif
93ad7507e2SSteven Wallace 
94df8bae1dSRodney W. Grimes /* ARGSUSED */
9526f9a767SRodney W. Grimes int
96cb226aaaSPoul-Henning Kamp fork(p, uap)
97df8bae1dSRodney W. Grimes 	struct proc *p;
98df8bae1dSRodney W. Grimes 	struct fork_args *uap;
99df8bae1dSRodney W. Grimes {
100df8abd0bSPeter Wemm 	int error;
101df8abd0bSPeter Wemm 	struct proc *p2;
102be67169aSBruce Evans 
103df8abd0bSPeter Wemm 	error = fork1(p, RFFDG | RFPROC, &p2);
104df8abd0bSPeter Wemm 	if (error == 0) {
105df8abd0bSPeter Wemm 		p->p_retval[0] = p2->p_pid;
106df8abd0bSPeter Wemm 		p->p_retval[1] = 0;
107df8abd0bSPeter Wemm 	}
108df8abd0bSPeter Wemm 	return error;
109df8bae1dSRodney W. Grimes }
110df8bae1dSRodney W. Grimes 
111df8bae1dSRodney W. Grimes /* ARGSUSED */
11226f9a767SRodney W. Grimes int
113cb226aaaSPoul-Henning Kamp vfork(p, uap)
114df8bae1dSRodney W. Grimes 	struct proc *p;
115dabee6feSPeter Wemm 	struct vfork_args *uap;
116df8bae1dSRodney W. Grimes {
117df8abd0bSPeter Wemm 	int error;
118df8abd0bSPeter Wemm 	struct proc *p2;
119be67169aSBruce Evans 
120df8abd0bSPeter Wemm 	error = fork1(p, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2);
121df8abd0bSPeter Wemm 	if (error == 0) {
122df8abd0bSPeter Wemm 		p->p_retval[0] = p2->p_pid;
123df8abd0bSPeter Wemm 		p->p_retval[1] = 0;
124df8abd0bSPeter Wemm 	}
125df8abd0bSPeter Wemm 	return error;
126df8bae1dSRodney W. Grimes }
127df8bae1dSRodney W. Grimes 
128dabee6feSPeter Wemm int
129cb226aaaSPoul-Henning Kamp rfork(p, uap)
130dabee6feSPeter Wemm 	struct proc *p;
131dabee6feSPeter Wemm 	struct rfork_args *uap;
132dabee6feSPeter Wemm {
133df8abd0bSPeter Wemm 	int error;
134df8abd0bSPeter Wemm 	struct proc *p2;
135be67169aSBruce Evans 
1360384fff8SJason Evans 	/* mask kernel only flags out of the user flags */
1370384fff8SJason Evans 	error = fork1(p, uap->flags & ~RFKERNELONLY, &p2);
138df8abd0bSPeter Wemm 	if (error == 0) {
1391943af61SPeter Wemm 		p->p_retval[0] = p2 ? p2->p_pid : 0;
140df8abd0bSPeter Wemm 		p->p_retval[1] = 0;
141df8abd0bSPeter Wemm 	}
142df8abd0bSPeter Wemm 	return error;
143dabee6feSPeter Wemm }
144dabee6feSPeter Wemm 
145dabee6feSPeter Wemm 
146df8bae1dSRodney W. Grimes int	nprocs = 1;				/* process 0 */
14751068190SWolfram Schneider static int nextpid = 0;
148df8bae1dSRodney W. Grimes 
149bb6a234eSPeter Wemm /*
150bb6a234eSPeter Wemm  * Random component to nextpid generation.  We mix in a random factor to make
151bb6a234eSPeter Wemm  * it a little harder to predict.  We sanity check the modulus value to avoid
152bb6a234eSPeter Wemm  * doing it in critical paths.  Don't let it be too small or we pointlessly
153bb6a234eSPeter Wemm  * waste randomness entropy, and don't let it be impossibly large.  Using a
154bb6a234eSPeter Wemm  * modulus that is too big causes a LOT more process table scans and slows
155bb6a234eSPeter Wemm  * down fork processing as the pidchecked caching is defeated.
156bb6a234eSPeter Wemm  */
157ee3fd601SDan Moschuk static int randompid = 0;
158bb6a234eSPeter Wemm 
159bb6a234eSPeter Wemm static int
16082d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
161bb6a234eSPeter Wemm {
162bb6a234eSPeter Wemm 	int error, pid;
163bb6a234eSPeter Wemm 
164bb6a234eSPeter Wemm 	pid = randompid;
165bb6a234eSPeter Wemm 	error = sysctl_handle_int(oidp, &pid, 0, req);
166bb6a234eSPeter Wemm 	if (error || !req->newptr)
167bb6a234eSPeter Wemm 		return (error);
168bb6a234eSPeter Wemm 	if (pid < 0 || pid > PID_MAX - 100)	/* out of range */
169bb6a234eSPeter Wemm 		pid = PID_MAX - 100;
170bb6a234eSPeter Wemm 	else if (pid < 2)			/* NOP */
171bb6a234eSPeter Wemm 		pid = 0;
172bb6a234eSPeter Wemm 	else if (pid < 100)			/* Make it reasonable */
173bb6a234eSPeter Wemm 		pid = 100;
174bb6a234eSPeter Wemm 	randompid = pid;
175bb6a234eSPeter Wemm 	return (error);
176bb6a234eSPeter Wemm }
177bb6a234eSPeter Wemm 
178bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
179bb6a234eSPeter Wemm     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
180ee3fd601SDan Moschuk 
18174b2192aSJohn Dyson int
182df8abd0bSPeter Wemm fork1(p1, flags, procp)
1830384fff8SJason Evans 	struct proc *p1;			/* parent proc */
1840e3eb7eeSSujal Patel 	int flags;
1850384fff8SJason Evans 	struct proc **procp;			/* child proc */
186df8bae1dSRodney W. Grimes {
187df8abd0bSPeter Wemm 	struct proc *p2, *pptr;
188df8abd0bSPeter Wemm 	uid_t uid;
189df8bae1dSRodney W. Grimes 	struct proc *newproc;
1900384fff8SJason Evans 	int trypid;
191c6362551SAlfred Perlstein 	int ok;
19251068190SWolfram Schneider 	static int pidchecked = 0;
19393efcae8SPoul-Henning Kamp 	struct forklist *ep;
1945856e12eSJohn Dyson 
1950384fff8SJason Evans 	/* Can't copy and clear */
1960e3eb7eeSSujal Patel 	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
197dabee6feSPeter Wemm 		return (EINVAL);
198df8bae1dSRodney W. Grimes 
199df8bae1dSRodney W. Grimes 	/*
2005856e12eSJohn Dyson 	 * Here we don't create a new process, but we divorce
2015856e12eSJohn Dyson 	 * certain parts of a process from itself.
2025856e12eSJohn Dyson 	 */
2035856e12eSJohn Dyson 	if ((flags & RFPROC) == 0) {
2045856e12eSJohn Dyson 
20591c28bfdSLuoqi Chen 		vm_fork(p1, 0, flags);
2065856e12eSJohn Dyson 
2075856e12eSJohn Dyson 		/*
2085856e12eSJohn Dyson 		 * Close all file descriptors.
2095856e12eSJohn Dyson 		 */
2105856e12eSJohn Dyson 		if (flags & RFCFDG) {
2115856e12eSJohn Dyson 			struct filedesc *fdtmp;
2125856e12eSJohn Dyson 			fdtmp = fdinit(p1);
2135856e12eSJohn Dyson 			fdfree(p1);
2145856e12eSJohn Dyson 			p1->p_fd = fdtmp;
2155856e12eSJohn Dyson 		}
2165856e12eSJohn Dyson 
2175856e12eSJohn Dyson 		/*
2185856e12eSJohn Dyson 		 * Unshare file descriptors (from parent.)
2195856e12eSJohn Dyson 		 */
2205856e12eSJohn Dyson 		if (flags & RFFDG) {
2215856e12eSJohn Dyson 			if (p1->p_fd->fd_refcnt > 1) {
2225856e12eSJohn Dyson 				struct filedesc *newfd;
2235856e12eSJohn Dyson 				newfd = fdcopy(p1);
2245856e12eSJohn Dyson 				fdfree(p1);
2255856e12eSJohn Dyson 				p1->p_fd = newfd;
2265856e12eSJohn Dyson 			}
2275856e12eSJohn Dyson 		}
2281943af61SPeter Wemm 		*procp = NULL;
2295856e12eSJohn Dyson 		return (0);
2305856e12eSJohn Dyson 	}
2315856e12eSJohn Dyson 
2325856e12eSJohn Dyson 	/*
233df8bae1dSRodney W. Grimes 	 * Although process entries are dynamically created, we still keep
234df8bae1dSRodney W. Grimes 	 * a global limit on the maximum number we will create.  Don't allow
235df8bae1dSRodney W. Grimes 	 * a nonprivileged user to use the last process; don't let root
236df8bae1dSRodney W. Grimes 	 * exceed the limit. The variable nprocs is the current number of
237df8bae1dSRodney W. Grimes 	 * processes, maxproc is the limit.
238df8bae1dSRodney W. Grimes 	 */
239df8bae1dSRodney W. Grimes 	uid = p1->p_cred->p_ruid;
240df8bae1dSRodney W. Grimes 	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
241df8bae1dSRodney W. Grimes 		tablefull("proc");
242df8bae1dSRodney W. Grimes 		return (EAGAIN);
243df8bae1dSRodney W. Grimes 	}
244df8bae1dSRodney W. Grimes 	/*
245ef5dc8a9SJohn Dyson 	 * Increment the nprocs resource before blocking can occur.  There
246ef5dc8a9SJohn Dyson 	 * are hard-limits as to the number of processes that can run.
247ef5dc8a9SJohn Dyson 	 */
248ef5dc8a9SJohn Dyson 	nprocs++;
249ef5dc8a9SJohn Dyson 
250ef5dc8a9SJohn Dyson 	/*
251df8bae1dSRodney W. Grimes 	 * Increment the count of procs running with this uid. Don't allow
252df8bae1dSRodney W. Grimes 	 * a nonprivileged user to exceed their current limit.
253df8bae1dSRodney W. Grimes 	 */
254f535380cSDon Lewis 	ok = chgproccnt(p1->p_cred->p_uidinfo, 1,
25542fd51ceSDon Lewis 		(uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0);
25642fd51ceSDon Lewis 	if (!ok) {
257ef5dc8a9SJohn Dyson 		/*
258ef5dc8a9SJohn Dyson 		 * Back out the process count
259ef5dc8a9SJohn Dyson 		 */
260ef5dc8a9SJohn Dyson 		nprocs--;
261df8bae1dSRodney W. Grimes 		return (EAGAIN);
262df8bae1dSRodney W. Grimes 	}
263df8bae1dSRodney W. Grimes 
264df8bae1dSRodney W. Grimes 	/* Allocate new proc. */
2652d8acc0fSJohn Dyson 	newproc = zalloc(proc_zone);
266df8bae1dSRodney W. Grimes 
267df8bae1dSRodney W. Grimes 	/*
2682c1011f7SJohn Dyson 	 * Setup linkage for kernel based threading
2692c1011f7SJohn Dyson 	 */
2702c1011f7SJohn Dyson 	if((flags & RFTHREAD) != 0) {
2712c1011f7SJohn Dyson 		newproc->p_peers = p1->p_peers;
2722c1011f7SJohn Dyson 		p1->p_peers = newproc;
2732c1011f7SJohn Dyson 		newproc->p_leader = p1->p_leader;
2742c1011f7SJohn Dyson 	} else {
2752c1011f7SJohn Dyson 		newproc->p_peers = 0;
2762c1011f7SJohn Dyson 		newproc->p_leader = newproc;
2772c1011f7SJohn Dyson 	}
2782c1011f7SJohn Dyson 
279d4da2dbaSAlan Cox 	newproc->p_vmspace = NULL;
280d4da2dbaSAlan Cox 
2812c1011f7SJohn Dyson 	/*
282df8bae1dSRodney W. Grimes 	 * Find an unused process ID.  We remember a range of unused IDs
283df8bae1dSRodney W. Grimes 	 * ready to use (from nextpid+1 through pidchecked-1).
2840384fff8SJason Evans 	 *
2850384fff8SJason Evans 	 * If RFHIGHPID is set (used during system boot), do not allocate
2860384fff8SJason Evans 	 * low-numbered pids.
287df8bae1dSRodney W. Grimes 	 */
288c0c25570SJake Burkholder 	ALLPROC_LOCK(AP_EXCLUSIVE);
2890384fff8SJason Evans 	trypid = nextpid + 1;
2900384fff8SJason Evans 	if (flags & RFHIGHPID) {
2910384fff8SJason Evans 		if (trypid < 10) {
2920384fff8SJason Evans 			trypid = 10;
2930384fff8SJason Evans 		}
2940384fff8SJason Evans 	} else {
295bb6a234eSPeter Wemm 		if (randompid)
2960384fff8SJason Evans 			trypid += arc4random() % randompid;
2970384fff8SJason Evans 	}
298df8bae1dSRodney W. Grimes retry:
299df8bae1dSRodney W. Grimes 	/*
300df8bae1dSRodney W. Grimes 	 * If the process ID prototype has wrapped around,
301df8bae1dSRodney W. Grimes 	 * restart somewhat above 0, as the low-numbered procs
302df8bae1dSRodney W. Grimes 	 * tend to include daemons that don't exit.
303df8bae1dSRodney W. Grimes 	 */
3040384fff8SJason Evans 	if (trypid >= PID_MAX) {
3050384fff8SJason Evans 		trypid = trypid % PID_MAX;
3060384fff8SJason Evans 		if (trypid < 100)
3070384fff8SJason Evans 			trypid += 100;
308df8bae1dSRodney W. Grimes 		pidchecked = 0;
309df8bae1dSRodney W. Grimes 	}
3100384fff8SJason Evans 	if (trypid >= pidchecked) {
311df8bae1dSRodney W. Grimes 		int doingzomb = 0;
312df8bae1dSRodney W. Grimes 
313df8bae1dSRodney W. Grimes 		pidchecked = PID_MAX;
314df8bae1dSRodney W. Grimes 		/*
315df8bae1dSRodney W. Grimes 		 * Scan the active and zombie procs to check whether this pid
316df8bae1dSRodney W. Grimes 		 * is in use.  Remember the lowest pid that's greater
3170384fff8SJason Evans 		 * than trypid, so we can avoid checking for a while.
318df8bae1dSRodney W. Grimes 		 */
3192e3c8fcbSPoul-Henning Kamp 		p2 = LIST_FIRST(&allproc);
320df8bae1dSRodney W. Grimes again:
3212e3c8fcbSPoul-Henning Kamp 		for (; p2 != 0; p2 = LIST_NEXT(p2, p_list)) {
3220384fff8SJason Evans 			while (p2->p_pid == trypid ||
3230384fff8SJason Evans 			    p2->p_pgrp->pg_id == trypid ||
3240384fff8SJason Evans 			    p2->p_session->s_sid == trypid) {
3250384fff8SJason Evans 				trypid++;
3260384fff8SJason Evans 				if (trypid >= pidchecked)
327df8bae1dSRodney W. Grimes 					goto retry;
328df8bae1dSRodney W. Grimes 			}
3290384fff8SJason Evans 			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
330df8bae1dSRodney W. Grimes 				pidchecked = p2->p_pid;
3310384fff8SJason Evans 			if (p2->p_pgrp->pg_id > trypid &&
332df8bae1dSRodney W. Grimes 			    pidchecked > p2->p_pgrp->pg_id)
333df8bae1dSRodney W. Grimes 				pidchecked = p2->p_pgrp->pg_id;
3340384fff8SJason Evans 			if (p2->p_session->s_sid > trypid &&
335643a8daaSDon Lewis 			    pidchecked > p2->p_session->s_sid)
336643a8daaSDon Lewis 				pidchecked = p2->p_session->s_sid;
337df8bae1dSRodney W. Grimes 		}
338df8bae1dSRodney W. Grimes 		if (!doingzomb) {
339df8bae1dSRodney W. Grimes 			doingzomb = 1;
3402e3c8fcbSPoul-Henning Kamp 			p2 = LIST_FIRST(&zombproc);
341df8bae1dSRodney W. Grimes 			goto again;
342df8bae1dSRodney W. Grimes 		}
343df8bae1dSRodney W. Grimes 	}
344df8bae1dSRodney W. Grimes 
345df8bae1dSRodney W. Grimes 	/*
3460384fff8SJason Evans 	 * RFHIGHPID does not mess with the nextpid counter during boot.
3470384fff8SJason Evans 	 */
3480384fff8SJason Evans 	if (flags & RFHIGHPID)
3490384fff8SJason Evans 		pidchecked = 0;
3500384fff8SJason Evans 	else
3510384fff8SJason Evans 		nextpid = trypid;
3520384fff8SJason Evans 
353553629ebSJake Burkholder 	p2 = newproc;
354a448b62aSJake Burkholder 	p2->p_intr_nesting_level = 0;
355553629ebSJake Burkholder 	p2->p_stat = SIDL;			/* protect against others */
356553629ebSJake Burkholder 	p2->p_pid = trypid;
357553629ebSJake Burkholder 	LIST_INSERT_HEAD(&allproc, p2, p_list);
358553629ebSJake Burkholder 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
359c0c25570SJake Burkholder 	ALLPROC_LOCK(AP_RELEASE);
360553629ebSJake Burkholder 
3610384fff8SJason Evans 	/*
362df8bae1dSRodney W. Grimes 	 * Make a proc table entry for the new process.
363df8bae1dSRodney W. Grimes 	 * Start by zeroing the section of proc that is zero-initialized,
364df8bae1dSRodney W. Grimes 	 * then copy the section that is copied directly from the parent.
365df8bae1dSRodney W. Grimes 	 */
366df8bae1dSRodney W. Grimes 	bzero(&p2->p_startzero,
367df8bae1dSRodney W. Grimes 	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
368df8bae1dSRodney W. Grimes 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
369df8bae1dSRodney W. Grimes 	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
370df8bae1dSRodney W. Grimes 
3714971f62aSJohn Baldwin 	mtx_init(&p2->p_mtx, "process lock", MTX_DEF);
3722244ea07SJohn Dyson 	p2->p_aioinfo = NULL;
3732244ea07SJohn Dyson 
374df8bae1dSRodney W. Grimes 	/*
375df8bae1dSRodney W. Grimes 	 * Duplicate sub-structures as needed.
376df8bae1dSRodney W. Grimes 	 * Increase reference counts on shared objects.
377df8bae1dSRodney W. Grimes 	 * The p_stats and p_sigacts substructs are set in vm_fork.
378df8bae1dSRodney W. Grimes 	 */
379df8bae1dSRodney W. Grimes 	p2->p_flag = P_INMEM;
380df8bae1dSRodney W. Grimes 	if (p1->p_flag & P_PROFIL)
381df8bae1dSRodney W. Grimes 		startprofclock(p2);
382df8bae1dSRodney W. Grimes 	MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred),
383df8bae1dSRodney W. Grimes 	    M_SUBPROC, M_WAITOK);
384df8bae1dSRodney W. Grimes 	bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
385df8bae1dSRodney W. Grimes 	p2->p_cred->p_refcnt = 1;
386df8bae1dSRodney W. Grimes 	crhold(p1->p_ucred);
387f535380cSDon Lewis 	uihold(p1->p_cred->p_uidinfo);
388df8bae1dSRodney W. Grimes 
38975c13541SPoul-Henning Kamp 	if (p2->p_prison) {
39075c13541SPoul-Henning Kamp 		p2->p_prison->pr_ref++;
39175c13541SPoul-Henning Kamp 		p2->p_flag |= P_JAILED;
39275c13541SPoul-Henning Kamp 	}
39375c13541SPoul-Henning Kamp 
394b9df5231SPoul-Henning Kamp 	if (p2->p_args)
395b9df5231SPoul-Henning Kamp 		p2->p_args->ar_ref++;
396b9df5231SPoul-Henning Kamp 
3976626c604SJulian Elischer 	if (flags & RFSIGSHARE) {
398dc9c271aSJulian Elischer 		p2->p_procsig = p1->p_procsig;
3996626c604SJulian Elischer 		p2->p_procsig->ps_refcnt++;
400dc9c271aSJulian Elischer 		if (p1->p_sigacts == &p1->p_addr->u_sigacts) {
401dc9c271aSJulian Elischer 			struct sigacts *newsigacts;
402dc9c271aSJulian Elischer 			int s;
403dc9c271aSJulian Elischer 
404dc9c271aSJulian Elischer 			/* Create the shared sigacts structure */
405df8abd0bSPeter Wemm 			MALLOC(newsigacts, struct sigacts *,
406df8abd0bSPeter Wemm 			    sizeof(struct sigacts), M_SUBPROC, M_WAITOK);
407dc9c271aSJulian Elischer 			s = splhigh();
408df8abd0bSPeter Wemm 			/*
409df8abd0bSPeter Wemm 			 * Set p_sigacts to the new shared structure.
410df8abd0bSPeter Wemm 			 * Note that this is updating p1->p_sigacts at the
411df8abd0bSPeter Wemm 			 * same time, since p_sigacts is just a pointer to
412df8abd0bSPeter Wemm 			 * the shared p_procsig->ps_sigacts.
413dc9c271aSJulian Elischer 			 */
414dc9c271aSJulian Elischer 			p2->p_sigacts  = newsigacts;
415df8abd0bSPeter Wemm 			bcopy(&p1->p_addr->u_sigacts, p2->p_sigacts,
416df8abd0bSPeter Wemm 			    sizeof(*p2->p_sigacts));
417dc9c271aSJulian Elischer 			*p2->p_sigacts = p1->p_addr->u_sigacts;
418dc9c271aSJulian Elischer 			splx(s);
419dc9c271aSJulian Elischer 		}
4206626c604SJulian Elischer 	} else {
421dc9c271aSJulian Elischer 		MALLOC(p2->p_procsig, struct procsig *, sizeof(struct procsig),
422dc9c271aSJulian Elischer 		    M_SUBPROC, M_WAITOK);
423df8abd0bSPeter Wemm 		bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig));
424dc9c271aSJulian Elischer 		p2->p_procsig->ps_refcnt = 1;
425df8abd0bSPeter Wemm 		p2->p_sigacts = NULL;	/* finished in vm_fork() */
4266626c604SJulian Elischer 	}
4274ac9ae70SJulian Elischer 	if (flags & RFLINUXTHPN)
4286626c604SJulian Elischer 	        p2->p_sigparent = SIGUSR1;
4294ac9ae70SJulian Elischer 	else
4304ac9ae70SJulian Elischer 	        p2->p_sigparent = SIGCHLD;
43188c5ea45SJulian Elischer 
432df8bae1dSRodney W. Grimes 	/* bump references to the text vnode (for procfs) */
433df8bae1dSRodney W. Grimes 	p2->p_textvp = p1->p_textvp;
434df8bae1dSRodney W. Grimes 	if (p2->p_textvp)
435df8bae1dSRodney W. Grimes 		VREF(p2->p_textvp);
436df8bae1dSRodney W. Grimes 
4370e3eb7eeSSujal Patel 	if (flags & RFCFDG)
438dabee6feSPeter Wemm 		p2->p_fd = fdinit(p1);
4390e3eb7eeSSujal Patel 	else if (flags & RFFDG)
440df8bae1dSRodney W. Grimes 		p2->p_fd = fdcopy(p1);
441dabee6feSPeter Wemm 	else
442dabee6feSPeter Wemm 		p2->p_fd = fdshare(p1);
443dabee6feSPeter Wemm 
444df8bae1dSRodney W. Grimes 	/*
445df8bae1dSRodney W. Grimes 	 * If p_limit is still copy-on-write, bump refcnt,
446df8bae1dSRodney W. Grimes 	 * otherwise get a copy that won't be modified.
447df8bae1dSRodney W. Grimes 	 * (If PL_SHAREMOD is clear, the structure is shared
448df8bae1dSRodney W. Grimes 	 * copy-on-write.)
449df8bae1dSRodney W. Grimes 	 */
450df8bae1dSRodney W. Grimes 	if (p1->p_limit->p_lflags & PL_SHAREMOD)
451df8bae1dSRodney W. Grimes 		p2->p_limit = limcopy(p1->p_limit);
452df8bae1dSRodney W. Grimes 	else {
453df8bae1dSRodney W. Grimes 		p2->p_limit = p1->p_limit;
454df8bae1dSRodney W. Grimes 		p2->p_limit->p_refcnt++;
455df8bae1dSRodney W. Grimes 	}
456df8bae1dSRodney W. Grimes 
45770e534e7SDavid Greenman 	/*
458be67169aSBruce Evans 	 * Preserve some more flags in subprocess.  P_PROFIL has already
459be67169aSBruce Evans 	 * been preserved.
46070e534e7SDavid Greenman 	 */
46170e534e7SDavid Greenman 	p2->p_flag |= p1->p_flag & P_SUGID;
462df8bae1dSRodney W. Grimes 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
463df8bae1dSRodney W. Grimes 		p2->p_flag |= P_CONTROLT;
4640e3eb7eeSSujal Patel 	if (flags & RFPPWAIT)
465df8bae1dSRodney W. Grimes 		p2->p_flag |= P_PPWAIT;
466be67169aSBruce Evans 
467b75356e1SJeffrey Hsu 	LIST_INSERT_AFTER(p1, p2, p_pglist);
4680e3eb7eeSSujal Patel 
4690e3eb7eeSSujal Patel 	/*
4700e3eb7eeSSujal Patel 	 * Attach the new process to its parent.
4710e3eb7eeSSujal Patel 	 *
4720e3eb7eeSSujal Patel 	 * If RFNOWAIT is set, the newly created process becomes a child
4730e3eb7eeSSujal Patel 	 * of init.  This effectively disassociates the child from the
4740e3eb7eeSSujal Patel 	 * parent.
4750e3eb7eeSSujal Patel 	 */
4760e3eb7eeSSujal Patel 	if (flags & RFNOWAIT)
4770e3eb7eeSSujal Patel 		pptr = initproc;
4780e3eb7eeSSujal Patel 	else
4790e3eb7eeSSujal Patel 		pptr = p1;
48098f03f90SJake Burkholder 	PROCTREE_LOCK(PT_EXCLUSIVE);
4810e3eb7eeSSujal Patel 	p2->p_pptr = pptr;
4820e3eb7eeSSujal Patel 	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
48398f03f90SJake Burkholder 	PROCTREE_LOCK(PT_RELEASE);
484b75356e1SJeffrey Hsu 	LIST_INIT(&p2->p_children);
4850384fff8SJason Evans 	LIST_INIT(&p2->p_heldmtx);
4860384fff8SJason Evans 	LIST_INIT(&p2->p_contested);
487b75356e1SJeffrey Hsu 
4884f559836SJake Burkholder 	callout_init(&p2->p_itcallout, 0);
4891512b5d6SJake Burkholder 	callout_init(&p2->p_slpcallout, 1);
4904f559836SJake Burkholder 
491df8bae1dSRodney W. Grimes #ifdef KTRACE
492df8bae1dSRodney W. Grimes 	/*
493df8bae1dSRodney W. Grimes 	 * Copy traceflag and tracefile if enabled.
494df8bae1dSRodney W. Grimes 	 * If not inherited, these were zeroed above.
495df8bae1dSRodney W. Grimes 	 */
496df8bae1dSRodney W. Grimes 	if (p1->p_traceflag&KTRFAC_INHERIT) {
497df8bae1dSRodney W. Grimes 		p2->p_traceflag = p1->p_traceflag;
498df8bae1dSRodney W. Grimes 		if ((p2->p_tracep = p1->p_tracep) != NULL)
499df8bae1dSRodney W. Grimes 			VREF(p2->p_tracep);
500df8bae1dSRodney W. Grimes 	}
501df8bae1dSRodney W. Grimes #endif
502df8bae1dSRodney W. Grimes 
503df8bae1dSRodney W. Grimes 	/*
5040d2afceeSDavid Greenman 	 * set priority of child to be that of parent
5050d2afceeSDavid Greenman 	 */
5060d2afceeSDavid Greenman 	p2->p_estcpu = p1->p_estcpu;
5070d2afceeSDavid Greenman 
5080d2afceeSDavid Greenman 	/*
509df8bae1dSRodney W. Grimes 	 * This begins the section where we must prevent the parent
510df8bae1dSRodney W. Grimes 	 * from being swapped.
511df8bae1dSRodney W. Grimes 	 */
512af8ad83eSPeter Wemm 	PHOLD(p1);
5130d2afceeSDavid Greenman 
514df8bae1dSRodney W. Grimes 	/*
515a2a1c95cSPeter Wemm 	 * Finish creating the child process.  It will return via a different
516a2a1c95cSPeter Wemm 	 * execution path later.  (ie: directly into user mode)
517dabee6feSPeter Wemm 	 */
518a2a1c95cSPeter Wemm 	vm_fork(p1, p2, flags);
519df8bae1dSRodney W. Grimes 
520df8bae1dSRodney W. Grimes 	/*
521e9189611SPeter Wemm 	 * Both processes are set up, now check if any loadable modules want
522e0d898b4SJulian Elischer 	 * to adjust anything.
523fed06968SJulian Elischer 	 *   What if they have an error? XXX
524fed06968SJulian Elischer 	 */
52593efcae8SPoul-Henning Kamp 	TAILQ_FOREACH(ep, &fork_list, next) {
526fed06968SJulian Elischer 		(*ep->function)(p1, p2, flags);
527fed06968SJulian Elischer 	}
528fed06968SJulian Elischer 
529fed06968SJulian Elischer 	/*
5300384fff8SJason Evans 	 * If RFSTOPPED not requested, make child runnable and add to
5310384fff8SJason Evans 	 * run queue.
532df8bae1dSRodney W. Grimes 	 */
533a2a1c95cSPeter Wemm 	microtime(&(p2->p_stats->p_start));
534a2a1c95cSPeter Wemm 	p2->p_acflag = AFORK;
5350384fff8SJason Evans 	if ((flags & RFSTOPPED) == 0) {
5360384fff8SJason Evans 		splhigh();
5370384fff8SJason Evans 		mtx_enter(&sched_lock, MTX_SPIN);
538df8bae1dSRodney W. Grimes 		p2->p_stat = SRUN;
539df8bae1dSRodney W. Grimes 		setrunqueue(p2);
5400384fff8SJason Evans 		mtx_exit(&sched_lock, MTX_SPIN);
5410384fff8SJason Evans 		spl0();
5420384fff8SJason Evans 	}
543df8bae1dSRodney W. Grimes 
544df8bae1dSRodney W. Grimes 	/*
545df8bae1dSRodney W. Grimes 	 * Now can be swapped.
546df8bae1dSRodney W. Grimes 	 */
547af8ad83eSPeter Wemm 	PRELE(p1);
548df8bae1dSRodney W. Grimes 
549df8bae1dSRodney W. Grimes 	/*
550cb679c38SJonathan Lemon 	 * tell any interested parties about the new process
551cb679c38SJonathan Lemon 	 */
552cb679c38SJonathan Lemon 	KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
553cb679c38SJonathan Lemon 
554cb679c38SJonathan Lemon 	/*
555df8bae1dSRodney W. Grimes 	 * Preserve synchronization semantics of vfork.  If waiting for
556df8bae1dSRodney W. Grimes 	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
557df8bae1dSRodney W. Grimes 	 * proc (in case of exit).
558df8bae1dSRodney W. Grimes 	 */
559df8bae1dSRodney W. Grimes 	while (p2->p_flag & P_PPWAIT)
560df8bae1dSRodney W. Grimes 		tsleep(p1, PWAIT, "ppwait", 0);
561df8bae1dSRodney W. Grimes 
562df8bae1dSRodney W. Grimes 	/*
563df8abd0bSPeter Wemm 	 * Return child proc pointer to parent.
564df8bae1dSRodney W. Grimes 	 */
565df8abd0bSPeter Wemm 	*procp = p2;
566df8bae1dSRodney W. Grimes 	return (0);
567df8bae1dSRodney W. Grimes }
568fed06968SJulian Elischer 
569e0d898b4SJulian Elischer /*
570e0d898b4SJulian Elischer  * The next two functionms are general routines to handle adding/deleting
571e0d898b4SJulian Elischer  * items on the fork callout list.
572e0d898b4SJulian Elischer  *
573e0d898b4SJulian Elischer  * at_fork():
574e0d898b4SJulian Elischer  * Take the arguments given and put them onto the fork callout list,
575fed06968SJulian Elischer  * However first make sure that it's not already there.
576e0d898b4SJulian Elischer  * Returns 0 on success or a standard error number.
577fed06968SJulian Elischer  */
57893efcae8SPoul-Henning Kamp 
579fed06968SJulian Elischer int
580eb776aeaSBruce Evans at_fork(function)
581eb776aeaSBruce Evans 	forklist_fn function;
582fed06968SJulian Elischer {
58393efcae8SPoul-Henning Kamp 	struct forklist *ep;
584e0d898b4SJulian Elischer 
58593efcae8SPoul-Henning Kamp #ifdef INVARIANTS
586e0d898b4SJulian Elischer 	/* let the programmer know if he's been stupid */
587e0d898b4SJulian Elischer 	if (rm_at_fork(function))
58893efcae8SPoul-Henning Kamp 		printf("WARNING: fork callout entry (%p) already present\n",
58993efcae8SPoul-Henning Kamp 		    function);
59093efcae8SPoul-Henning Kamp #endif
59193efcae8SPoul-Henning Kamp 	ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT);
592e0d898b4SJulian Elischer 	if (ep == NULL)
593e0d898b4SJulian Elischer 		return (ENOMEM);
594fed06968SJulian Elischer 	ep->function = function;
59593efcae8SPoul-Henning Kamp 	TAILQ_INSERT_TAIL(&fork_list, ep, next);
596e0d898b4SJulian Elischer 	return (0);
597fed06968SJulian Elischer }
598e0d898b4SJulian Elischer 
599fed06968SJulian Elischer /*
60093efcae8SPoul-Henning Kamp  * Scan the exit callout list for the given item and remove it..
60193efcae8SPoul-Henning Kamp  * Returns the number of items removed (0 or 1)
602fed06968SJulian Elischer  */
60393efcae8SPoul-Henning Kamp 
604fed06968SJulian Elischer int
605eb776aeaSBruce Evans rm_at_fork(function)
606eb776aeaSBruce Evans 	forklist_fn function;
607fed06968SJulian Elischer {
60893efcae8SPoul-Henning Kamp 	struct forklist *ep;
609fed06968SJulian Elischer 
61093efcae8SPoul-Henning Kamp 	TAILQ_FOREACH(ep, &fork_list, next) {
611fed06968SJulian Elischer 		if (ep->function == function) {
61293efcae8SPoul-Henning Kamp 			TAILQ_REMOVE(&fork_list, ep, next);
61393efcae8SPoul-Henning Kamp 			free(ep, M_ATFORK);
61493efcae8SPoul-Henning Kamp 			return(1);
615fed06968SJulian Elischer 		}
616fed06968SJulian Elischer 	}
61793efcae8SPoul-Henning Kamp 	return (0);
618fed06968SJulian Elischer }
619