xref: /freebsd/sys/kern/kern_fork.c (revision 34ebdceac09af3d4bc7ac7c16dd7cef2d6fc75f4)
19454b2d8SWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
7df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
8df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
9df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
11df8bae1dSRodney W. Grimes  *
12df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
13df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
14df8bae1dSRodney W. Grimes  * are met:
15df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
17df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
18df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
19df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
2069a28758SEd Maste  * 3. Neither the name of the University nor the names of its contributors
21df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
22df8bae1dSRodney W. Grimes  *    without specific prior written permission.
23df8bae1dSRodney W. Grimes  *
24df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
35df8bae1dSRodney W. Grimes  *
36df8bae1dSRodney W. Grimes  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
37df8bae1dSRodney W. Grimes  */
38df8bae1dSRodney W. Grimes 
39677b542eSDavid E. O'Brien #include <sys/cdefs.h>
40677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
41677b542eSDavid E. O'Brien 
42db6a20e2SGarrett Wollman #include "opt_ktrace.h"
438a945d10SKonstantin Belousov #include "opt_kstack_pages.h"
44db6a20e2SGarrett Wollman 
45df8bae1dSRodney W. Grimes #include <sys/param.h>
46df8bae1dSRodney W. Grimes #include <sys/systm.h>
47*34ebdceaSMateusz Guzik #include <sys/bitstring.h>
48d2d3e875SBruce Evans #include <sys/sysproto.h>
4975b8b3b2SJohn Baldwin #include <sys/eventhandler.h>
50cfb5f768SJonathan Anderson #include <sys/fcntl.h>
51df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
520304c731SJamie Gritton #include <sys/jail.h>
53df8bae1dSRodney W. Grimes #include <sys/kernel.h>
5470fca427SJohn Baldwin #include <sys/kthread.h>
55c76e95c3SPeter Wemm #include <sys/sysctl.h>
5619284646SJohn Baldwin #include <sys/lock.h>
57df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5835e0e5b3SJohn Baldwin #include <sys/mutex.h>
59acd3428bSRobert Watson #include <sys/priv.h>
60df8bae1dSRodney W. Grimes #include <sys/proc.h>
61cfb5f768SJonathan Anderson #include <sys/procdesc.h>
629ccba881SMatthew N. Dodd #include <sys/pioctl.h>
63189ac973SJohn Baldwin #include <sys/ptrace.h>
64097055e2SEdward Tomasz Napierala #include <sys/racct.h>
65df8bae1dSRodney W. Grimes #include <sys/resourcevar.h>
66b43179fbSJeff Roberson #include <sys/sched.h>
67a7b124c3SJohn Baldwin #include <sys/syscall.h>
6870fca427SJohn Baldwin #include <sys/vmmeter.h>
69df8bae1dSRodney W. Grimes #include <sys/vnode.h>
70df8bae1dSRodney W. Grimes #include <sys/acct.h>
710384fff8SJason Evans #include <sys/ktr.h>
72df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
73b71fec07SBruce Evans #include <sys/unistd.h>
745d217f17SJohn Birrell #include <sys/sdt.h>
7557934cd3SJohn Baldwin #include <sys/sx.h>
76e5d81ef1SDmitry Chagin #include <sys/sysent.h>
776004362eSDavid Schultz #include <sys/signalvar.h>
78df8bae1dSRodney W. Grimes 
79fcf7f27aSRobert Watson #include <security/audit/audit.h>
80aed55708SRobert Watson #include <security/mac/mac_framework.h>
81fcf7f27aSRobert Watson 
82d93f860cSPoul-Henning Kamp #include <vm/vm.h>
83dabee6feSPeter Wemm #include <vm/pmap.h>
84dabee6feSPeter Wemm #include <vm/vm_map.h>
85efeaf95aSDavid Greenman #include <vm/vm_extern.h>
86c897b813SJeff Roberson #include <vm/uma.h>
87d93f860cSPoul-Henning Kamp 
885d217f17SJohn Birrell #ifdef KDTRACE_HOOKS
895d217f17SJohn Birrell #include <sys/dtrace_bsd.h>
905d217f17SJohn Birrell dtrace_fork_func_t	dtrace_fasttrap_fork;
915d217f17SJohn Birrell #endif
925d217f17SJohn Birrell 
935d217f17SJohn Birrell SDT_PROVIDER_DECLARE(proc);
9436160958SMark Johnston SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int");
9588c5ea45SJulian Elischer 
96d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
97ad7507e2SSteven Wallace struct fork_args {
98ad7507e2SSteven Wallace 	int     dummy;
99ad7507e2SSteven Wallace };
100d2d3e875SBruce Evans #endif
101ad7507e2SSteven Wallace 
1022ca45184SMatt Joras EVENTHANDLER_LIST_DECLARE(process_fork);
1032ca45184SMatt Joras 
104df8bae1dSRodney W. Grimes /* ARGSUSED */
10526f9a767SRodney W. Grimes int
1068451d0ddSKip Macy sys_fork(struct thread *td, struct fork_args *uap)
107df8bae1dSRodney W. Grimes {
10833fd9b9aSMateusz Guzik 	struct fork_req fr;
109813361c1SMateusz Guzik 	int error, pid;
110be67169aSBruce Evans 
11133fd9b9aSMateusz Guzik 	bzero(&fr, sizeof(fr));
11233fd9b9aSMateusz Guzik 	fr.fr_flags = RFFDG | RFPROC;
113813361c1SMateusz Guzik 	fr.fr_pidp = &pid;
11433fd9b9aSMateusz Guzik 	error = fork1(td, &fr);
115df8abd0bSPeter Wemm 	if (error == 0) {
116813361c1SMateusz Guzik 		td->td_retval[0] = pid;
117b40ce416SJulian Elischer 		td->td_retval[1] = 0;
118df8abd0bSPeter Wemm 	}
11970fca427SJohn Baldwin 	return (error);
120df8bae1dSRodney W. Grimes }
121df8bae1dSRodney W. Grimes 
122cfb5f768SJonathan Anderson /* ARGUSED */
123cfb5f768SJonathan Anderson int
1240c829a30SMateusz Guzik sys_pdfork(struct thread *td, struct pdfork_args *uap)
125cfb5f768SJonathan Anderson {
12633fd9b9aSMateusz Guzik 	struct fork_req fr;
127813361c1SMateusz Guzik 	int error, fd, pid;
128cfb5f768SJonathan Anderson 
12933fd9b9aSMateusz Guzik 	bzero(&fr, sizeof(fr));
13033fd9b9aSMateusz Guzik 	fr.fr_flags = RFFDG | RFPROC | RFPROCDESC;
131813361c1SMateusz Guzik 	fr.fr_pidp = &pid;
13233fd9b9aSMateusz Guzik 	fr.fr_pd_fd = &fd;
13333fd9b9aSMateusz Guzik 	fr.fr_pd_flags = uap->flags;
134cfb5f768SJonathan Anderson 	/*
135cfb5f768SJonathan Anderson 	 * It is necessary to return fd by reference because 0 is a valid file
136cfb5f768SJonathan Anderson 	 * descriptor number, and the child needs to be able to distinguish
137cfb5f768SJonathan Anderson 	 * itself from the parent using the return value.
138cfb5f768SJonathan Anderson 	 */
13933fd9b9aSMateusz Guzik 	error = fork1(td, &fr);
140cfb5f768SJonathan Anderson 	if (error == 0) {
141813361c1SMateusz Guzik 		td->td_retval[0] = pid;
142cfb5f768SJonathan Anderson 		td->td_retval[1] = 0;
143cfb5f768SJonathan Anderson 		error = copyout(&fd, uap->fdp, sizeof(fd));
144cfb5f768SJonathan Anderson 	}
145cfb5f768SJonathan Anderson 	return (error);
146cfb5f768SJonathan Anderson }
147cfb5f768SJonathan Anderson 
148df8bae1dSRodney W. Grimes /* ARGSUSED */
14926f9a767SRodney W. Grimes int
1508451d0ddSKip Macy sys_vfork(struct thread *td, struct vfork_args *uap)
151df8bae1dSRodney W. Grimes {
15233fd9b9aSMateusz Guzik 	struct fork_req fr;
153813361c1SMateusz Guzik 	int error, pid;
154be67169aSBruce Evans 
15533fd9b9aSMateusz Guzik 	bzero(&fr, sizeof(fr));
15633fd9b9aSMateusz Guzik 	fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
157813361c1SMateusz Guzik 	fr.fr_pidp = &pid;
15833fd9b9aSMateusz Guzik 	error = fork1(td, &fr);
159df8abd0bSPeter Wemm 	if (error == 0) {
160813361c1SMateusz Guzik 		td->td_retval[0] = pid;
161b40ce416SJulian Elischer 		td->td_retval[1] = 0;
162df8abd0bSPeter Wemm 	}
16370fca427SJohn Baldwin 	return (error);
164df8bae1dSRodney W. Grimes }
165df8bae1dSRodney W. Grimes 
166dabee6feSPeter Wemm int
1678451d0ddSKip Macy sys_rfork(struct thread *td, struct rfork_args *uap)
168dabee6feSPeter Wemm {
16933fd9b9aSMateusz Guzik 	struct fork_req fr;
170813361c1SMateusz Guzik 	int error, pid;
171be67169aSBruce Evans 
172c8564ad4SBruce Evans 	/* Don't allow kernel-only flags. */
173885ccc61SJohn Baldwin 	if ((uap->flags & RFKERNELONLY) != 0)
174885ccc61SJohn Baldwin 		return (EINVAL);
175c8564ad4SBruce Evans 
17614961ba7SRobert Watson 	AUDIT_ARG_FFLAGS(uap->flags);
17733fd9b9aSMateusz Guzik 	bzero(&fr, sizeof(fr));
17833fd9b9aSMateusz Guzik 	fr.fr_flags = uap->flags;
179813361c1SMateusz Guzik 	fr.fr_pidp = &pid;
18033fd9b9aSMateusz Guzik 	error = fork1(td, &fr);
181df8abd0bSPeter Wemm 	if (error == 0) {
182813361c1SMateusz Guzik 		td->td_retval[0] = pid;
183b40ce416SJulian Elischer 		td->td_retval[1] = 0;
184df8abd0bSPeter Wemm 	}
18570fca427SJohn Baldwin 	return (error);
186dabee6feSPeter Wemm }
187dabee6feSPeter Wemm 
188df8bae1dSRodney W. Grimes int	nprocs = 1;		/* process 0 */
1898f7e4eb5SDag-Erling Smørgrav int	lastpid = 0;
1908f7e4eb5SDag-Erling Smørgrav SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
191d941d475SRobert Watson     "Last used PID");
192df8bae1dSRodney W. Grimes 
193bb6a234eSPeter Wemm /*
1948f7e4eb5SDag-Erling Smørgrav  * Random component to lastpid generation.  We mix in a random factor to make
195bb6a234eSPeter Wemm  * it a little harder to predict.  We sanity check the modulus value to avoid
196bb6a234eSPeter Wemm  * doing it in critical paths.  Don't let it be too small or we pointlessly
197bb6a234eSPeter Wemm  * waste randomness entropy, and don't let it be impossibly large.  Using a
198bb6a234eSPeter Wemm  * modulus that is too big causes a LOT more process table scans and slows
199bb6a234eSPeter Wemm  * down fork processing as the pidchecked caching is defeated.
200bb6a234eSPeter Wemm  */
201ee3fd601SDan Moschuk static int randompid = 0;
202bb6a234eSPeter Wemm 
203bb6a234eSPeter Wemm static int
20482d9ae4eSPoul-Henning Kamp sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
205bb6a234eSPeter Wemm {
206bb6a234eSPeter Wemm 	int error, pid;
207bb6a234eSPeter Wemm 
20847934cefSDon Lewis 	error = sysctl_wire_old_buffer(req, sizeof(int));
20947934cefSDon Lewis 	if (error != 0)
21047934cefSDon Lewis 		return(error);
2113fc755c1SJohn Baldwin 	sx_xlock(&allproc_lock);
212bb6a234eSPeter Wemm 	pid = randompid;
213bb6a234eSPeter Wemm 	error = sysctl_handle_int(oidp, &pid, 0, req);
2143fc755c1SJohn Baldwin 	if (error == 0 && req->newptr != NULL) {
215008a0935SDag-Erling Smørgrav 		if (pid == 0)
216008a0935SDag-Erling Smørgrav 			randompid = 0;
217008a0935SDag-Erling Smørgrav 		else if (pid == 1)
218008a0935SDag-Erling Smørgrav 			/* generate a random PID modulus between 100 and 1123 */
219008a0935SDag-Erling Smørgrav 			randompid = 100 + arc4random() % 1024;
220008a0935SDag-Erling Smørgrav 		else if (pid < 0 || pid > pid_max - 100)
221008a0935SDag-Erling Smørgrav 			/* out of range */
222008a0935SDag-Erling Smørgrav 			randompid = pid_max - 100;
223008a0935SDag-Erling Smørgrav 		else if (pid < 100)
224008a0935SDag-Erling Smørgrav 			/* Make it reasonable */
225008a0935SDag-Erling Smørgrav 			randompid = 100;
226008a0935SDag-Erling Smørgrav 		else
227bb6a234eSPeter Wemm 			randompid = pid;
2283fc755c1SJohn Baldwin 	}
2293fc755c1SJohn Baldwin 	sx_xunlock(&allproc_lock);
230bb6a234eSPeter Wemm 	return (error);
231bb6a234eSPeter Wemm }
232bb6a234eSPeter Wemm 
233bb6a234eSPeter Wemm SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
234008a0935SDag-Erling Smørgrav     0, 0, sysctl_kern_randompid, "I", "Random PID modulus. Special values: 0: disable, 1: choose random value");
235ee3fd601SDan Moschuk 
236*34ebdceaSMateusz Guzik extern bitstr_t proc_id_pidmap;
237*34ebdceaSMateusz Guzik extern bitstr_t proc_id_grpidmap;
238*34ebdceaSMateusz Guzik extern bitstr_t proc_id_sessidmap;
239*34ebdceaSMateusz Guzik extern bitstr_t proc_id_reapmap;
240*34ebdceaSMateusz Guzik 
2411d845e86SEdward Tomasz Napierala static int
242afd01097SEdward Tomasz Napierala fork_findpid(int flags)
243afd01097SEdward Tomasz Napierala {
244*34ebdceaSMateusz Guzik 	pid_t result;
245afd01097SEdward Tomasz Napierala 	int trypid;
246afd01097SEdward Tomasz Napierala 
247afd01097SEdward Tomasz Napierala 	/*
248afd01097SEdward Tomasz Napierala 	 * Find an unused process ID.  We remember a range of unused IDs
249afd01097SEdward Tomasz Napierala 	 * ready to use (from lastpid+1 through pidchecked-1).
250afd01097SEdward Tomasz Napierala 	 *
251afd01097SEdward Tomasz Napierala 	 * If RFHIGHPID is set (used during system boot), do not allocate
252afd01097SEdward Tomasz Napierala 	 * low-numbered pids.
253afd01097SEdward Tomasz Napierala 	 */
254afd01097SEdward Tomasz Napierala 	trypid = lastpid + 1;
255afd01097SEdward Tomasz Napierala 	if (flags & RFHIGHPID) {
256afd01097SEdward Tomasz Napierala 		if (trypid < 10)
257afd01097SEdward Tomasz Napierala 			trypid = 10;
258afd01097SEdward Tomasz Napierala 	} else {
259afd01097SEdward Tomasz Napierala 		if (randompid)
260afd01097SEdward Tomasz Napierala 			trypid += arc4random() % randompid;
261afd01097SEdward Tomasz Napierala 	}
262*34ebdceaSMateusz Guzik 	mtx_lock(&procid_lock);
263afd01097SEdward Tomasz Napierala retry:
264afd01097SEdward Tomasz Napierala 	/*
265afd01097SEdward Tomasz Napierala 	 * If the process ID prototype has wrapped around,
266afd01097SEdward Tomasz Napierala 	 * restart somewhat above 0, as the low-numbered procs
267afd01097SEdward Tomasz Napierala 	 * tend to include daemons that don't exit.
268afd01097SEdward Tomasz Napierala 	 */
26902c6fc21SKonstantin Belousov 	if (trypid >= pid_max) {
27002c6fc21SKonstantin Belousov 		trypid = trypid % pid_max;
271afd01097SEdward Tomasz Napierala 		if (trypid < 100)
272afd01097SEdward Tomasz Napierala 			trypid += 100;
273afd01097SEdward Tomasz Napierala 	}
274afd01097SEdward Tomasz Napierala 
275*34ebdceaSMateusz Guzik 	bit_ffc_at(&proc_id_pidmap, trypid, pid_max, &result);
276*34ebdceaSMateusz Guzik 	if (result == -1)
277afd01097SEdward Tomasz Napierala 		goto retry;
278*34ebdceaSMateusz Guzik 	if (bit_test(&proc_id_grpidmap, result) ||
279*34ebdceaSMateusz Guzik 	    bit_test(&proc_id_sessidmap, result) ||
280*34ebdceaSMateusz Guzik 	    bit_test(&proc_id_reapmap, result)) {
281*34ebdceaSMateusz Guzik 		trypid++;
282*34ebdceaSMateusz Guzik 		goto retry;
283afd01097SEdward Tomasz Napierala 	}
284afd01097SEdward Tomasz Napierala 
285afd01097SEdward Tomasz Napierala 	/*
286afd01097SEdward Tomasz Napierala 	 * RFHIGHPID does not mess with the lastpid counter during boot.
287afd01097SEdward Tomasz Napierala 	 */
288*34ebdceaSMateusz Guzik 	if ((flags & RFHIGHPID) == 0)
289*34ebdceaSMateusz Guzik 		lastpid = result;
290afd01097SEdward Tomasz Napierala 
291*34ebdceaSMateusz Guzik 	bit_set(&proc_id_pidmap, result);
292*34ebdceaSMateusz Guzik 	mtx_unlock(&procid_lock);
2931e9a1bf5SMateusz Guzik 
294*34ebdceaSMateusz Guzik 	return (result);
295afd01097SEdward Tomasz Napierala }
296afd01097SEdward Tomasz Napierala 
297afd01097SEdward Tomasz Napierala static int
2983e73ff1eSEdward Tomasz Napierala fork_norfproc(struct thread *td, int flags)
2991d845e86SEdward Tomasz Napierala {
3001d845e86SEdward Tomasz Napierala 	int error;
3011d845e86SEdward Tomasz Napierala 	struct proc *p1;
3021d845e86SEdward Tomasz Napierala 
303087bfb0eSEdward Tomasz Napierala 	KASSERT((flags & RFPROC) == 0,
304087bfb0eSEdward Tomasz Napierala 	    ("fork_norfproc called with RFPROC set"));
3051d845e86SEdward Tomasz Napierala 	p1 = td->td_proc;
3061d845e86SEdward Tomasz Napierala 
3071d845e86SEdward Tomasz Napierala 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
3081d845e86SEdward Tomasz Napierala 	    (flags & (RFCFDG | RFFDG))) {
3091d845e86SEdward Tomasz Napierala 		PROC_LOCK(p1);
3106ddcc233SKonstantin Belousov 		if (thread_single(p1, SINGLE_BOUNDARY)) {
3111d845e86SEdward Tomasz Napierala 			PROC_UNLOCK(p1);
3121d845e86SEdward Tomasz Napierala 			return (ERESTART);
3131d845e86SEdward Tomasz Napierala 		}
3141d845e86SEdward Tomasz Napierala 		PROC_UNLOCK(p1);
3151d845e86SEdward Tomasz Napierala 	}
3161d845e86SEdward Tomasz Napierala 
3171d845e86SEdward Tomasz Napierala 	error = vm_forkproc(td, NULL, NULL, NULL, flags);
3181d845e86SEdward Tomasz Napierala 	if (error)
3191d845e86SEdward Tomasz Napierala 		goto fail;
3201d845e86SEdward Tomasz Napierala 
3211d845e86SEdward Tomasz Napierala 	/*
3221d845e86SEdward Tomasz Napierala 	 * Close all file descriptors.
3231d845e86SEdward Tomasz Napierala 	 */
3241d845e86SEdward Tomasz Napierala 	if (flags & RFCFDG) {
3251d845e86SEdward Tomasz Napierala 		struct filedesc *fdtmp;
326eb48fbd9SMateusz Guzik 		fdtmp = fdinit(td->td_proc->p_fd, false);
3272609222aSPawel Jakub Dawidek 		fdescfree(td);
3281d845e86SEdward Tomasz Napierala 		p1->p_fd = fdtmp;
3291d845e86SEdward Tomasz Napierala 	}
3301d845e86SEdward Tomasz Napierala 
3311d845e86SEdward Tomasz Napierala 	/*
3321d845e86SEdward Tomasz Napierala 	 * Unshare file descriptors (from parent).
3331d845e86SEdward Tomasz Napierala 	 */
3341d845e86SEdward Tomasz Napierala 	if (flags & RFFDG)
335b9d32c36SMateusz Guzik 		fdunshare(td);
3361d845e86SEdward Tomasz Napierala 
3371d845e86SEdward Tomasz Napierala fail:
3381d845e86SEdward Tomasz Napierala 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
3391d845e86SEdward Tomasz Napierala 	    (flags & (RFCFDG | RFFDG))) {
3401d845e86SEdward Tomasz Napierala 		PROC_LOCK(p1);
3416ddcc233SKonstantin Belousov 		thread_single_end(p1, SINGLE_BOUNDARY);
3421d845e86SEdward Tomasz Napierala 		PROC_UNLOCK(p1);
3431d845e86SEdward Tomasz Napierala 	}
3441d845e86SEdward Tomasz Napierala 	return (error);
3451d845e86SEdward Tomasz Napierala }
3461d845e86SEdward Tomasz Napierala 
347afd01097SEdward Tomasz Napierala static void
348813361c1SMateusz Guzik do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
349813361c1SMateusz Guzik     struct vmspace *vm2, struct file *fp_procdesc)
350df8bae1dSRodney W. Grimes {
351afd01097SEdward Tomasz Napierala 	struct proc *p1, *pptr;
352813361c1SMateusz Guzik 	int trypid;
3535641ae5dSJohn Baldwin 	struct filedesc *fd;
354ad05d580STor Egge 	struct filedesc_to_leader *fdtol;
3553fc755c1SJohn Baldwin 	struct sigacts *newsigacts;
3565856e12eSJohn Dyson 
357afd01097SEdward Tomasz Napierala 	sx_assert(&allproc_lock, SX_XLOCKED);
358df8bae1dSRodney W. Grimes 
35970fca427SJohn Baldwin 	p1 = td->td_proc;
36070fca427SJohn Baldwin 
361813361c1SMateusz Guzik 	trypid = fork_findpid(fr->fr_flags);
362e602ba25SJulian Elischer 	p2->p_state = PRS_NEW;		/* protect against others */
363553629ebSJake Burkholder 	p2->p_pid = trypid;
36414961ba7SRobert Watson 	AUDIT_ARG_PID(p2->p_pid);
365553629ebSJake Burkholder 	LIST_INSERT_HEAD(&allproc, p2, p_list);
3666ddcc233SKonstantin Belousov 	allproc_gen++;
3673d3e6793SMateusz Guzik 	sx_xlock(PIDHASHLOCK(p2->p_pid));
368553629ebSJake Burkholder 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
3693d3e6793SMateusz Guzik 	sx_xunlock(PIDHASHLOCK(p2->p_pid));
3701ad9ee86SXin LI 	PROC_LOCK(p2);
3711ad9ee86SXin LI 	PROC_LOCK(p1);
3721ad9ee86SXin LI 
3731005a129SJohn Baldwin 	sx_xunlock(&allproc_lock);
374553629ebSJake Burkholder 
3751ad9ee86SXin LI 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
3761ad9ee86SXin LI 	    __rangeof(struct proc, p_startcopy, p_endcopy));
3778b4a2800SKonstantin Belousov 	pargs_hold(p2->p_args);
3786520495aSAdrian Chadd 
3791ad9ee86SXin LI 	PROC_UNLOCK(p1);
3801ad9ee86SXin LI 
3811ad9ee86SXin LI 	bzero(&p2->p_startzero,
3821ad9ee86SXin LI 	    __rangeof(struct proc, p_startzero, p_endzero));
3831ad9ee86SXin LI 
3840304c731SJamie Gritton 	/* Tell the prison that we exist. */
385413628a7SBjoern A. Zeeb 	prison_proc_hold(p2->p_ucred->cr_prison);
386413628a7SBjoern A. Zeeb 
3871ad9ee86SXin LI 	PROC_UNLOCK(p2);
3881ad9ee86SXin LI 
38981d68271SMateusz Guzik 	tidhash_add(td2);
39081d68271SMateusz Guzik 
3910384fff8SJason Evans 	/*
3923fc755c1SJohn Baldwin 	 * Malloc things while we don't hold any locks.
3933fc755c1SJohn Baldwin 	 */
394813361c1SMateusz Guzik 	if (fr->fr_flags & RFSIGSHARE)
3953fc755c1SJohn Baldwin 		newsigacts = NULL;
39690af4afaSJohn Baldwin 	else
39790af4afaSJohn Baldwin 		newsigacts = sigacts_alloc();
3983fc755c1SJohn Baldwin 
3993fc755c1SJohn Baldwin 	/*
4003fc755c1SJohn Baldwin 	 * Copy filedesc.
4013fc755c1SJohn Baldwin 	 */
402813361c1SMateusz Guzik 	if (fr->fr_flags & RFCFDG) {
403eb48fbd9SMateusz Guzik 		fd = fdinit(p1->p_fd, false);
404ad05d580STor Egge 		fdtol = NULL;
405813361c1SMateusz Guzik 	} else if (fr->fr_flags & RFFDG) {
406598b7ec8SPoul-Henning Kamp 		fd = fdcopy(p1->p_fd);
407ad05d580STor Egge 		fdtol = NULL;
408ad05d580STor Egge 	} else {
409c7f1c11bSAlfred Perlstein 		fd = fdshare(p1->p_fd);
410ad05d580STor Egge 		if (p1->p_fdtol == NULL)
4113e73ff1eSEdward Tomasz Napierala 			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
412ad05d580STor Egge 			    p1->p_leader);
413813361c1SMateusz Guzik 		if ((fr->fr_flags & RFTHREAD) != 0) {
414ad05d580STor Egge 			/*
4153e73ff1eSEdward Tomasz Napierala 			 * Shared file descriptor table, and shared
4163e73ff1eSEdward Tomasz Napierala 			 * process leaders.
417ad05d580STor Egge 			 */
418ad05d580STor Egge 			fdtol = p1->p_fdtol;
4195e3f7694SRobert Watson 			FILEDESC_XLOCK(p1->p_fd);
420ad05d580STor Egge 			fdtol->fdl_refcount++;
4215e3f7694SRobert Watson 			FILEDESC_XUNLOCK(p1->p_fd);
422ad05d580STor Egge 		} else {
423ad05d580STor Egge 			/*
4243e73ff1eSEdward Tomasz Napierala 			 * Shared file descriptor table, and different
4253e73ff1eSEdward Tomasz Napierala 			 * process leaders.
426ad05d580STor Egge 			 */
427ad05d580STor Egge 			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
4283e73ff1eSEdward Tomasz Napierala 			    p1->p_fd, p2);
429ad05d580STor Egge 		}
430ad05d580STor Egge 	}
4313fc755c1SJohn Baldwin 	/*
432df8bae1dSRodney W. Grimes 	 * Make a proc table entry for the new process.
433df8bae1dSRodney W. Grimes 	 * Start by zeroing the section of proc that is zero-initialized,
434df8bae1dSRodney W. Grimes 	 * then copy the section that is copied directly from the parent.
435df8bae1dSRodney W. Grimes 	 */
436316ec49aSScott Long 
4377d447c95SJohn Baldwin 	PROC_LOCK(p2);
4387d447c95SJohn Baldwin 	PROC_LOCK(p1);
4397d447c95SJohn Baldwin 
440079b7badSJulian Elischer 	bzero(&td2->td_startzero,
4416db36923SDavid Schultz 	    __rangeof(struct thread, td_startzero, td_endzero));
442079b7badSJulian Elischer 
443079b7badSJulian Elischer 	bcopy(&td->td_startcopy, &td2->td_startcopy,
4446db36923SDavid Schultz 	    __rangeof(struct thread, td_startcopy, td_endcopy));
445df8bae1dSRodney W. Grimes 
4464b9322aeSJulian Elischer 	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
447a30ec4b9SDavid Xu 	td2->td_sigstk = td->td_sigstk;
448b61ce5b0SJeff Roberson 	td2->td_flags = TDF_INMEM;
449acbe332aSDavid Xu 	td2->td_lend_user_pri = PRI_MAX;
450a30ec4b9SDavid Xu 
45121ca7b57SMarko Zec #ifdef VIMAGE
45221ca7b57SMarko Zec 	td2->td_vnet = NULL;
45321ca7b57SMarko Zec 	td2->td_vnet_lpush = NULL;
45421ca7b57SMarko Zec #endif
45521ca7b57SMarko Zec 
456df8bae1dSRodney W. Grimes 	/*
45722d19207SJohn Baldwin 	 * Allow the scheduler to initialize the child.
45822d19207SJohn Baldwin 	 */
45922d19207SJohn Baldwin 	thread_lock(td);
46022d19207SJohn Baldwin 	sched_fork(td, td2);
46122d19207SJohn Baldwin 	thread_unlock(td);
46222d19207SJohn Baldwin 
46322d19207SJohn Baldwin 	/*
464df8bae1dSRodney W. Grimes 	 * Duplicate sub-structures as needed.
465df8bae1dSRodney W. Grimes 	 * Increase reference counts on shared objects.
466df8bae1dSRodney W. Grimes 	 */
467b61ce5b0SJeff Roberson 	p2->p_flag = P_INMEM;
468643f6f47SKonstantin Belousov 	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
46954b0e65fSJeff Roberson 	p2->p_swtick = ticks;
4709752f794SJohn Baldwin 	if (p1->p_flag & P_PROFIL)
4719752f794SJohn Baldwin 		startprofclock(p2);
472b9df5231SPoul-Henning Kamp 
473813361c1SMateusz Guzik 	if (fr->fr_flags & RFSIGSHARE) {
47490af4afaSJohn Baldwin 		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
4756626c604SJulian Elischer 	} else {
47690af4afaSJohn Baldwin 		sigacts_copy(newsigacts, p1->p_sigacts);
47790af4afaSJohn Baldwin 		p2->p_sigacts = newsigacts;
4786626c604SJulian Elischer 	}
479f49d8202SKonstantin Belousov 
480813361c1SMateusz Guzik 	if (fr->fr_flags & RFTSIGZMB)
481813361c1SMateusz Guzik 	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
482813361c1SMateusz Guzik 	else if (fr->fr_flags & RFLINUXTHPN)
4836626c604SJulian Elischer 	        p2->p_sigparent = SIGUSR1;
4844ac9ae70SJulian Elischer 	else
4854ac9ae70SJulian Elischer 	        p2->p_sigparent = SIGCHLD;
48688c5ea45SJulian Elischer 
487df8bae1dSRodney W. Grimes 	p2->p_textvp = p1->p_textvp;
4885641ae5dSJohn Baldwin 	p2->p_fd = fd;
489ad05d580STor Egge 	p2->p_fdtol = fdtol;
490dabee6feSPeter Wemm 
49155648840SJohn Baldwin 	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
49255648840SJohn Baldwin 		p2->p_flag |= P_PROTECTED;
49355648840SJohn Baldwin 		p2->p_flag2 |= P2_INHERIT_PROTECTED;
49455648840SJohn Baldwin 	}
49555648840SJohn Baldwin 
496df8bae1dSRodney W. Grimes 	/*
497c8564ad4SBruce Evans 	 * p_limit is copy-on-write.  Bump its refcount.
498df8bae1dSRodney W. Grimes 	 */
4991c4bcd05SJeff Roberson 	lim_fork(p1, p2);
5008b059651SDavid Schultz 
5014ea6a9a2SMateusz Guzik 	thread_cow_get_proc(td2, p2);
5024ea6a9a2SMateusz Guzik 
5038b059651SDavid Schultz 	pstats_fork(p1->p_stats, p2->p_stats);
5048b059651SDavid Schultz 
505299bc736SDavid Schultz 	PROC_UNLOCK(p1);
506cda5aba4SDavid Schultz 	PROC_UNLOCK(p2);
507df8bae1dSRodney W. Grimes 
5083e73ff1eSEdward Tomasz Napierala 	/* Bump references to the text vnode (for procfs). */
509a69d88afSPeter Wemm 	if (p2->p_textvp)
5105afb134cSMateusz Guzik 		vrefact(p2->p_textvp);
511a69d88afSPeter Wemm 
512c6544064SJohn Baldwin 	/*
513c8564ad4SBruce Evans 	 * Set up linkage for kernel based threading.
514c6544064SJohn Baldwin 	 */
515813361c1SMateusz Guzik 	if ((fr->fr_flags & RFTHREAD) != 0) {
516c6544064SJohn Baldwin 		mtx_lock(&ppeers_lock);
517c6544064SJohn Baldwin 		p2->p_peers = p1->p_peers;
518c6544064SJohn Baldwin 		p1->p_peers = p2;
519c6544064SJohn Baldwin 		p2->p_leader = p1->p_leader;
520c6544064SJohn Baldwin 		mtx_unlock(&ppeers_lock);
521c6544064SJohn Baldwin 		PROC_LOCK(p1->p_leader);
522c6544064SJohn Baldwin 		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
523c6544064SJohn Baldwin 			PROC_UNLOCK(p1->p_leader);
524c6544064SJohn Baldwin 			/*
525c6544064SJohn Baldwin 			 * The task leader is exiting, so process p1 is
526c6544064SJohn Baldwin 			 * going to be killed shortly.  Since p1 obviously
527c6544064SJohn Baldwin 			 * isn't dead yet, we know that the leader is either
528c6544064SJohn Baldwin 			 * sending SIGKILL's to all the processes in this
529c6544064SJohn Baldwin 			 * task or is sleeping waiting for all the peers to
530c6544064SJohn Baldwin 			 * exit.  We let p1 complete the fork, but we need
531c6544064SJohn Baldwin 			 * to go ahead and kill the new process p2 since
532c6544064SJohn Baldwin 			 * the task leader may not get a chance to send
533c6544064SJohn Baldwin 			 * SIGKILL to it.  We leave it on the list so that
534c6544064SJohn Baldwin 			 * the task leader will wait for this new process
535c6544064SJohn Baldwin 			 * to commit suicide.
536c6544064SJohn Baldwin 			 */
537c6544064SJohn Baldwin 			PROC_LOCK(p2);
5388451d0ddSKip Macy 			kern_psignal(p2, SIGKILL);
539c6544064SJohn Baldwin 			PROC_UNLOCK(p2);
540293d2d22SRobert Watson 		} else
541293d2d22SRobert Watson 			PROC_UNLOCK(p1->p_leader);
542c6544064SJohn Baldwin 	} else {
543c6544064SJohn Baldwin 		p2->p_peers = NULL;
544c6544064SJohn Baldwin 		p2->p_leader = p2;
545c6544064SJohn Baldwin 	}
546c6544064SJohn Baldwin 
5473fc755c1SJohn Baldwin 	sx_xlock(&proctree_lock);
5483fc755c1SJohn Baldwin 	PGRP_LOCK(p1->p_pgrp);
5493fc755c1SJohn Baldwin 	PROC_LOCK(p2);
5503fc755c1SJohn Baldwin 	PROC_LOCK(p1);
5513fc755c1SJohn Baldwin 
55270e534e7SDavid Greenman 	/*
5539752f794SJohn Baldwin 	 * Preserve some more flags in subprocess.  P_PROFIL has already
554be67169aSBruce Evans 	 * been preserved.
55570e534e7SDavid Greenman 	 */
556a30ec4b9SDavid Xu 	p2->p_flag |= p1->p_flag & P_SUGID;
557aff57357SEd Schouten 	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
558f591779bSSeigo Tanimura 	SESS_LOCK(p1->p_session);
559df8bae1dSRodney W. Grimes 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
560df8bae1dSRodney W. Grimes 		p2->p_flag |= P_CONTROLT;
561f591779bSSeigo Tanimura 	SESS_UNLOCK(p1->p_session);
562813361c1SMateusz Guzik 	if (fr->fr_flags & RFPPWAIT)
563df8bae1dSRodney W. Grimes 		p2->p_flag |= P_PPWAIT;
564be67169aSBruce Evans 
5655cded904SOlivier Houchard 	p2->p_pgrp = p1->p_pgrp;
566b75356e1SJeffrey Hsu 	LIST_INSERT_AFTER(p1, p2, p_pglist);
5672a60b9b9SSeigo Tanimura 	PGRP_UNLOCK(p1->p_pgrp);
568b75356e1SJeffrey Hsu 	LIST_INIT(&p2->p_children);
569dcd43281SKonstantin Belousov 	LIST_INIT(&p2->p_orphans);
570b75356e1SJeffrey Hsu 
571f7e50ea7SKonstantin Belousov 	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);
5724f559836SJake Burkholder 
573df8bae1dSRodney W. Grimes 	/*
574df95311aSMatthew N. Dodd 	 * If PF_FORK is set, the child process inherits the
575df95311aSMatthew N. Dodd 	 * procfs ioctl flags from its parent.
576df95311aSMatthew N. Dodd 	 */
577df95311aSMatthew N. Dodd 	if (p1->p_pfsflags & PF_FORK) {
578df95311aSMatthew N. Dodd 		p2->p_stops = p1->p_stops;
579df95311aSMatthew N. Dodd 		p2->p_pfsflags = p1->p_pfsflags;
580df95311aSMatthew N. Dodd 	}
581df95311aSMatthew N. Dodd 
582df95311aSMatthew N. Dodd 	/*
583df8bae1dSRodney W. Grimes 	 * This begins the section where we must prevent the parent
584cda5aba4SDavid Schultz 	 * from being swapped.
585df8bae1dSRodney W. Grimes 	 */
586cda5aba4SDavid Schultz 	_PHOLD(p1);
58757934cd3SJohn Baldwin 	PROC_UNLOCK(p1);
5880d2afceeSDavid Greenman 
589df8bae1dSRodney W. Grimes 	/*
5903fc755c1SJohn Baldwin 	 * Attach the new process to its parent.
5913fc755c1SJohn Baldwin 	 *
5923fc755c1SJohn Baldwin 	 * If RFNOWAIT is set, the newly created process becomes a child
5933fc755c1SJohn Baldwin 	 * of init.  This effectively disassociates the child from the
5943fc755c1SJohn Baldwin 	 * parent.
5953fc755c1SJohn Baldwin 	 */
596813361c1SMateusz Guzik 	if ((fr->fr_flags & RFNOWAIT) != 0) {
597237623b0SKonstantin Belousov 		pptr = p1->p_reaper;
598237623b0SKonstantin Belousov 		p2->p_reaper = pptr;
599237623b0SKonstantin Belousov 	} else {
600237623b0SKonstantin Belousov 		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
601237623b0SKonstantin Belousov 		    p1 : p1->p_reaper;
6023fc755c1SJohn Baldwin 		pptr = p1;
603237623b0SKonstantin Belousov 	}
6043fc755c1SJohn Baldwin 	p2->p_pptr = pptr;
6052c054ce9SMateusz Guzik 	p2->p_oppid = pptr->p_pid;
6063fc755c1SJohn Baldwin 	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
607237623b0SKonstantin Belousov 	LIST_INIT(&p2->p_reaplist);
608237623b0SKonstantin Belousov 	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
609*34ebdceaSMateusz Guzik 	if (p2->p_reaper == p1 && p1 != initproc) {
610237623b0SKonstantin Belousov 		p2->p_reapsubtree = p2->p_pid;
611*34ebdceaSMateusz Guzik 		proc_id_set_cond(PROC_ID_REAP, p2->p_pid);
612*34ebdceaSMateusz Guzik 	}
6133fc755c1SJohn Baldwin 	sx_xunlock(&proctree_lock);
6143fc755c1SJohn Baldwin 
615bb0e8070SJohn Baldwin 	/* Inform accounting that we have forked. */
616bb0e8070SJohn Baldwin 	p2->p_acflag = AFORK;
617bb0e8070SJohn Baldwin 	PROC_UNLOCK(p2);
618bb0e8070SJohn Baldwin 
6197705d4b2SDmitry Chagin #ifdef KTRACE
6207705d4b2SDmitry Chagin 	ktrprocfork(p1, p2);
6217705d4b2SDmitry Chagin #endif
6227705d4b2SDmitry Chagin 
6233fc755c1SJohn Baldwin 	/*
624a2a1c95cSPeter Wemm 	 * Finish creating the child process.  It will return via a different
625a2a1c95cSPeter Wemm 	 * execution path later.  (ie: directly into user mode)
626dabee6feSPeter Wemm 	 */
627813361c1SMateusz Guzik 	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);
628df8bae1dSRodney W. Grimes 
629813361c1SMateusz Guzik 	if (fr->fr_flags == (RFFDG | RFPROC)) {
63083c9dea1SGleb Smirnoff 		VM_CNT_INC(v_forks);
63183c9dea1SGleb Smirnoff 		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
63294ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
633813361c1SMateusz Guzik 	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
63483c9dea1SGleb Smirnoff 		VM_CNT_INC(v_vforks);
63583c9dea1SGleb Smirnoff 		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
63694ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6375d22597fSHajimu UMEMOTO 	} else if (p1 == &proc0) {
63883c9dea1SGleb Smirnoff 		VM_CNT_INC(v_kthreads);
63983c9dea1SGleb Smirnoff 		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
64094ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6415d22597fSHajimu UMEMOTO 	} else {
64283c9dea1SGleb Smirnoff 		VM_CNT_INC(v_rforks);
64383c9dea1SGleb Smirnoff 		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
64494ddc707SAlan Cox 		    p2->p_vmspace->vm_ssize);
6455d22597fSHajimu UMEMOTO 	}
6465d22597fSHajimu UMEMOTO 
647cfb5f768SJonathan Anderson 	/*
648cfb5f768SJonathan Anderson 	 * Associate the process descriptor with the process before anything
649cfb5f768SJonathan Anderson 	 * can happen that might cause that process to need the descriptor.
650cfb5f768SJonathan Anderson 	 * However, don't do this until after fork(2) can no longer fail.
651cfb5f768SJonathan Anderson 	 */
652813361c1SMateusz Guzik 	if (fr->fr_flags & RFPROCDESC)
653813361c1SMateusz Guzik 		procdesc_new(p2, fr->fr_pd_flags);
654cfb5f768SJonathan Anderson 
655df8bae1dSRodney W. Grimes 	/*
656e9189611SPeter Wemm 	 * Both processes are set up, now check if any loadable modules want
657e0d898b4SJulian Elischer 	 * to adjust anything.
658fed06968SJulian Elischer 	 */
6592ca45184SMatt Joras 	EVENTHANDLER_DIRECT_INVOKE(process_fork, p1, p2, fr->fr_flags);
660fed06968SJulian Elischer 
661fed06968SJulian Elischer 	/*
6624c3558aaSJohn Baldwin 	 * Set the child start time and mark the process as being complete.
6634c3558aaSJohn Baldwin 	 */
6648e6fa660SJohn Baldwin 	PROC_LOCK(p2);
6658e6fa660SJohn Baldwin 	PROC_LOCK(p1);
6664c3558aaSJohn Baldwin 	microuptime(&p2->p_stats->p_start);
66711bda9b8SJeff Roberson 	PROC_SLOCK(p2);
6684c3558aaSJohn Baldwin 	p2->p_state = PRS_NORMAL;
66911bda9b8SJeff Roberson 	PROC_SUNLOCK(p2);
6706fa39a73SKonstantin Belousov 
671d3555b6fSRui Paulo #ifdef KDTRACE_HOOKS
672d3555b6fSRui Paulo 	/*
6737159310fSMark Johnston 	 * Tell the DTrace fasttrap provider about the new process so that any
6747159310fSMark Johnston 	 * tracepoints inherited from the parent can be removed. We have to do
6757159310fSMark Johnston 	 * this only after p_state is PRS_NORMAL since the fasttrap module will
6767159310fSMark Johnston 	 * use pfind() later on.
677d3555b6fSRui Paulo 	 */
678813361c1SMateusz Guzik 	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
679d3555b6fSRui Paulo 		dtrace_fasttrap_fork(p1, p2);
680d3555b6fSRui Paulo #endif
681813361c1SMateusz Guzik 	if (fr->fr_flags & RFPPWAIT) {
6821d7ca9bbSKonstantin Belousov 		td->td_pflags |= TDP_RFPPWAIT;
6831d7ca9bbSKonstantin Belousov 		td->td_rfppwait_p = p2;
684fc4f075aSJohn Baldwin 		td->td_dbgflags |= TDB_VFORK;
6851d7ca9bbSKonstantin Belousov 	}
6868e6fa660SJohn Baldwin 	PROC_UNLOCK(p2);
687df8bae1dSRodney W. Grimes 
688df8bae1dSRodney W. Grimes 	/*
689df8bae1dSRodney W. Grimes 	 * Now can be swapped.
690df8bae1dSRodney W. Grimes 	 */
69157934cd3SJohn Baldwin 	_PRELE(p1);
6927054ee4eSKonstantin Belousov 	PROC_UNLOCK(p1);
693df8bae1dSRodney W. Grimes 
694df8bae1dSRodney W. Grimes 	/*
69570fca427SJohn Baldwin 	 * Tell any interested parties about the new process.
696cb679c38SJonathan Lemon 	 */
6979e590ff0SKonstantin Belousov 	knote_fork(p1->p_klist, p2->p_pid);
698813361c1SMateusz Guzik 	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);
6995d217f17SJohn Birrell 
700813361c1SMateusz Guzik 	if (fr->fr_flags & RFPROCDESC) {
701813361c1SMateusz Guzik 		procdesc_finit(p2->p_procdesc, fp_procdesc);
702813361c1SMateusz Guzik 		fdrop(fp_procdesc, td);
703813361c1SMateusz Guzik 	}
704813361c1SMateusz Guzik 
7056e22bbf6SKonstantin Belousov 	/*
7066e22bbf6SKonstantin Belousov 	 * Speculative check for PTRACE_FORK. PTRACE_FORK is not
7076e22bbf6SKonstantin Belousov 	 * synced with forks in progress so it is OK if we miss it
7086e22bbf6SKonstantin Belousov 	 * if being set atm.
7096e22bbf6SKonstantin Belousov 	 */
7106e22bbf6SKonstantin Belousov 	if ((p1->p_ptevents & PTRACE_FORK) != 0) {
7116e22bbf6SKonstantin Belousov 		sx_xlock(&proctree_lock);
7126e22bbf6SKonstantin Belousov 		PROC_LOCK(p2);
7136e22bbf6SKonstantin Belousov 
7146e22bbf6SKonstantin Belousov 		/*
7156e22bbf6SKonstantin Belousov 		 * p1->p_ptevents & p1->p_pptr are protected by both
7166e22bbf6SKonstantin Belousov 		 * process and proctree locks for modifications,
7176e22bbf6SKonstantin Belousov 		 * so owning proctree_lock allows the race-free read.
7186e22bbf6SKonstantin Belousov 		 */
7196e22bbf6SKonstantin Belousov 		if ((p1->p_ptevents & PTRACE_FORK) != 0) {
7206e22bbf6SKonstantin Belousov 			/*
7216e22bbf6SKonstantin Belousov 			 * Arrange for debugger to receive the fork event.
7226e22bbf6SKonstantin Belousov 			 *
7236e22bbf6SKonstantin Belousov 			 * We can report PL_FLAG_FORKED regardless of
7246e22bbf6SKonstantin Belousov 			 * P_FOLLOWFORK settings, but it does not make a sense
7256e22bbf6SKonstantin Belousov 			 * for runaway child.
7266e22bbf6SKonstantin Belousov 			 */
7276e22bbf6SKonstantin Belousov 			td->td_dbgflags |= TDB_FORK;
7286e22bbf6SKonstantin Belousov 			td->td_dbg_forked = p2->p_pid;
7296e22bbf6SKonstantin Belousov 			td2->td_dbgflags |= TDB_STOPATFORK;
7306e22bbf6SKonstantin Belousov 			proc_set_traced(p2, true);
7316e22bbf6SKonstantin Belousov 			CTR2(KTR_PTRACE,
7326e22bbf6SKonstantin Belousov 			    "do_fork: attaching to new child pid %d: oppid %d",
7336e22bbf6SKonstantin Belousov 			    p2->p_pid, p2->p_oppid);
7342c054ce9SMateusz Guzik 			proc_reparent(p2, p1->p_pptr, false);
7356e22bbf6SKonstantin Belousov 		}
7366e22bbf6SKonstantin Belousov 		PROC_UNLOCK(p2);
7376e22bbf6SKonstantin Belousov 		sx_xunlock(&proctree_lock);
7386e22bbf6SKonstantin Belousov 	}
7396e22bbf6SKonstantin Belousov 
740a5ac8272SMateusz Guzik 	racct_proc_fork_done(p2);
741a5ac8272SMateusz Guzik 
742813361c1SMateusz Guzik 	if ((fr->fr_flags & RFSTOPPED) == 0) {
743a5ac8272SMateusz Guzik 		if (fr->fr_pidp != NULL)
744a5ac8272SMateusz Guzik 			*fr->fr_pidp = p2->p_pid;
745813361c1SMateusz Guzik 		/*
746813361c1SMateusz Guzik 		 * If RFSTOPPED not requested, make child runnable and
747813361c1SMateusz Guzik 		 * add to run queue.
748813361c1SMateusz Guzik 		 */
749813361c1SMateusz Guzik 		thread_lock(td2);
750813361c1SMateusz Guzik 		TD_SET_CAN_RUN(td2);
751813361c1SMateusz Guzik 		sched_add(td2, SRQ_BORING);
752813361c1SMateusz Guzik 		thread_unlock(td2);
753813361c1SMateusz Guzik 	} else {
754813361c1SMateusz Guzik 		*fr->fr_procp = p2;
755813361c1SMateusz Guzik 	}
756afd01097SEdward Tomasz Napierala }
757afd01097SEdward Tomasz Napierala 
758afd01097SEdward Tomasz Napierala int
75933fd9b9aSMateusz Guzik fork1(struct thread *td, struct fork_req *fr)
760afd01097SEdward Tomasz Napierala {
7614b48959fSKonstantin Belousov 	struct proc *p1, *newproc;
762afd01097SEdward Tomasz Napierala 	struct thread *td2;
763afd01097SEdward Tomasz Napierala 	struct vmspace *vm2;
7644b48959fSKonstantin Belousov 	struct file *fp_procdesc;
765afd01097SEdward Tomasz Napierala 	vm_ooffset_t mem_charged;
7664b48959fSKonstantin Belousov 	int error, nprocs_new, ok;
767afd01097SEdward Tomasz Napierala 	static int curfail;
768afd01097SEdward Tomasz Napierala 	static struct timeval lastfail;
76933fd9b9aSMateusz Guzik 	int flags, pages;
77033fd9b9aSMateusz Guzik 
77133fd9b9aSMateusz Guzik 	flags = fr->fr_flags;
77233fd9b9aSMateusz Guzik 	pages = fr->fr_pages;
773afd01097SEdward Tomasz Napierala 
774813361c1SMateusz Guzik 	if ((flags & RFSTOPPED) != 0)
775813361c1SMateusz Guzik 		MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL);
776813361c1SMateusz Guzik 	else
777813361c1SMateusz Guzik 		MPASS(fr->fr_procp == NULL);
778813361c1SMateusz Guzik 
779f49d8202SKonstantin Belousov 	/* Check for the undefined or unimplemented flags. */
780f49d8202SKonstantin Belousov 	if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
781f49d8202SKonstantin Belousov 		return (EINVAL);
782f49d8202SKonstantin Belousov 
783f49d8202SKonstantin Belousov 	/* Signal value requires RFTSIGZMB. */
784f49d8202SKonstantin Belousov 	if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)
785f49d8202SKonstantin Belousov 		return (EINVAL);
786f49d8202SKonstantin Belousov 
787afd01097SEdward Tomasz Napierala 	/* Can't copy and clear. */
788afd01097SEdward Tomasz Napierala 	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
789afd01097SEdward Tomasz Napierala 		return (EINVAL);
790afd01097SEdward Tomasz Napierala 
791f49d8202SKonstantin Belousov 	/* Check the validity of the signal number. */
792f49d8202SKonstantin Belousov 	if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)
793f49d8202SKonstantin Belousov 		return (EINVAL);
794f49d8202SKonstantin Belousov 
795cfb5f768SJonathan Anderson 	if ((flags & RFPROCDESC) != 0) {
796cfb5f768SJonathan Anderson 		/* Can't not create a process yet get a process descriptor. */
797cfb5f768SJonathan Anderson 		if ((flags & RFPROC) == 0)
798cfb5f768SJonathan Anderson 			return (EINVAL);
799cfb5f768SJonathan Anderson 
800cfb5f768SJonathan Anderson 		/* Must provide a place to put a procdesc if creating one. */
80133fd9b9aSMateusz Guzik 		if (fr->fr_pd_fd == NULL)
802cfb5f768SJonathan Anderson 			return (EINVAL);
803b3a73448SMariusz Zaborski 
804b3a73448SMariusz Zaborski 		/* Check if we are using supported flags. */
805b3a73448SMariusz Zaborski 		if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0)
806b3a73448SMariusz Zaborski 			return (EINVAL);
807cfb5f768SJonathan Anderson 	}
808cfb5f768SJonathan Anderson 
809afd01097SEdward Tomasz Napierala 	p1 = td->td_proc;
810afd01097SEdward Tomasz Napierala 
811afd01097SEdward Tomasz Napierala 	/*
812afd01097SEdward Tomasz Napierala 	 * Here we don't create a new process, but we divorce
813afd01097SEdward Tomasz Napierala 	 * certain parts of a process from itself.
814afd01097SEdward Tomasz Napierala 	 */
8153e73ff1eSEdward Tomasz Napierala 	if ((flags & RFPROC) == 0) {
816813361c1SMateusz Guzik 		if (fr->fr_procp != NULL)
81733fd9b9aSMateusz Guzik 			*fr->fr_procp = NULL;
818813361c1SMateusz Guzik 		else if (fr->fr_pidp != NULL)
819813361c1SMateusz Guzik 			*fr->fr_pidp = 0;
8203e73ff1eSEdward Tomasz Napierala 		return (fork_norfproc(td, flags));
8213e73ff1eSEdward Tomasz Napierala 	}
822afd01097SEdward Tomasz Napierala 
8234b48959fSKonstantin Belousov 	fp_procdesc = NULL;
8244b48959fSKonstantin Belousov 	newproc = NULL;
8254b48959fSKonstantin Belousov 	vm2 = NULL;
8264b48959fSKonstantin Belousov 
8274b48959fSKonstantin Belousov 	/*
8284b48959fSKonstantin Belousov 	 * Increment the nprocs resource before allocations occur.
8294b48959fSKonstantin Belousov 	 * Although process entries are dynamically created, we still
8304b48959fSKonstantin Belousov 	 * keep a global limit on the maximum number we will
8314b48959fSKonstantin Belousov 	 * create. There are hard-limits as to the number of processes
8324b48959fSKonstantin Belousov 	 * that can run, established by the KVA and memory usage for
8334b48959fSKonstantin Belousov 	 * the process data.
8344b48959fSKonstantin Belousov 	 *
8354b48959fSKonstantin Belousov 	 * Don't allow a nonprivileged user to use the last ten
8364b48959fSKonstantin Belousov 	 * processes; don't let root exceed the limit.
8374b48959fSKonstantin Belousov 	 */
8384b48959fSKonstantin Belousov 	nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
8394b48959fSKonstantin Belousov 	if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred,
8404b48959fSKonstantin Belousov 	    PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) {
8414b48959fSKonstantin Belousov 		error = EAGAIN;
8424b48959fSKonstantin Belousov 		sx_xlock(&allproc_lock);
8434b48959fSKonstantin Belousov 		if (ppsratecheck(&lastfail, &curfail, 1)) {
8444b48959fSKonstantin Belousov 			printf("maxproc limit exceeded by uid %u (pid %d); "
8454b48959fSKonstantin Belousov 			    "see tuning(7) and login.conf(5)\n",
8464b48959fSKonstantin Belousov 			    td->td_ucred->cr_ruid, p1->p_pid);
8474b48959fSKonstantin Belousov 		}
8484b48959fSKonstantin Belousov 		sx_xunlock(&allproc_lock);
8494b48959fSKonstantin Belousov 		goto fail2;
8504b48959fSKonstantin Belousov 	}
8514b48959fSKonstantin Belousov 
852cfb5f768SJonathan Anderson 	/*
853cfb5f768SJonathan Anderson 	 * If required, create a process descriptor in the parent first; we
854cfb5f768SJonathan Anderson 	 * will abandon it if something goes wrong. We don't finit() until
855cfb5f768SJonathan Anderson 	 * later.
856cfb5f768SJonathan Anderson 	 */
857cfb5f768SJonathan Anderson 	if (flags & RFPROCDESC) {
858b3a73448SMariusz Zaborski 		error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd,
859b3a73448SMariusz Zaborski 		    fr->fr_pd_flags, fr->fr_pd_fcaps);
860b38520f0SEdward Tomasz Napierala 		if (error != 0)
861d8f3dc78SKonstantin Belousov 			goto fail2;
862cfb5f768SJonathan Anderson 	}
863cfb5f768SJonathan Anderson 
864afd01097SEdward Tomasz Napierala 	mem_charged = 0;
865afd01097SEdward Tomasz Napierala 	if (pages == 0)
866edc82223SKonstantin Belousov 		pages = kstack_pages;
867afd01097SEdward Tomasz Napierala 	/* Allocate new proc. */
868afd01097SEdward Tomasz Napierala 	newproc = uma_zalloc(proc_zone, M_WAITOK);
869afd01097SEdward Tomasz Napierala 	td2 = FIRST_THREAD_IN_PROC(newproc);
870afd01097SEdward Tomasz Napierala 	if (td2 == NULL) {
871afd01097SEdward Tomasz Napierala 		td2 = thread_alloc(pages);
872afd01097SEdward Tomasz Napierala 		if (td2 == NULL) {
873afd01097SEdward Tomasz Napierala 			error = ENOMEM;
87412cec311SMateusz Guzik 			goto fail2;
875afd01097SEdward Tomasz Napierala 		}
876afd01097SEdward Tomasz Napierala 		proc_linkup(newproc, td2);
877afd01097SEdward Tomasz Napierala 	} else {
878afd01097SEdward Tomasz Napierala 		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
879afd01097SEdward Tomasz Napierala 			if (td2->td_kstack != 0)
880afd01097SEdward Tomasz Napierala 				vm_thread_dispose(td2);
881afd01097SEdward Tomasz Napierala 			if (!thread_alloc_stack(td2, pages)) {
882afd01097SEdward Tomasz Napierala 				error = ENOMEM;
88312cec311SMateusz Guzik 				goto fail2;
884afd01097SEdward Tomasz Napierala 			}
885afd01097SEdward Tomasz Napierala 		}
886afd01097SEdward Tomasz Napierala 	}
887afd01097SEdward Tomasz Napierala 
888afd01097SEdward Tomasz Napierala 	if ((flags & RFMEM) == 0) {
889afd01097SEdward Tomasz Napierala 		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
890afd01097SEdward Tomasz Napierala 		if (vm2 == NULL) {
891afd01097SEdward Tomasz Napierala 			error = ENOMEM;
89212cec311SMateusz Guzik 			goto fail2;
893afd01097SEdward Tomasz Napierala 		}
894afd01097SEdward Tomasz Napierala 		if (!swap_reserve(mem_charged)) {
895afd01097SEdward Tomasz Napierala 			/*
896afd01097SEdward Tomasz Napierala 			 * The swap reservation failed. The accounting
897afd01097SEdward Tomasz Napierala 			 * from the entries of the copied vm2 will be
898e3043798SPedro F. Giffuni 			 * subtracted in vmspace_free(), so force the
899afd01097SEdward Tomasz Napierala 			 * reservation there.
900afd01097SEdward Tomasz Napierala 			 */
901afd01097SEdward Tomasz Napierala 			swap_reserve_force(mem_charged);
902afd01097SEdward Tomasz Napierala 			error = ENOMEM;
90312cec311SMateusz Guzik 			goto fail2;
904afd01097SEdward Tomasz Napierala 		}
905afd01097SEdward Tomasz Napierala 	} else
906afd01097SEdward Tomasz Napierala 		vm2 = NULL;
907afd01097SEdward Tomasz Napierala 
908097055e2SEdward Tomasz Napierala 	/*
909097055e2SEdward Tomasz Napierala 	 * XXX: This is ugly; when we copy resource usage, we need to bump
910097055e2SEdward Tomasz Napierala 	 *      per-cred resource counters.
911097055e2SEdward Tomasz Napierala 	 */
912ffb34484SMateusz Guzik 	proc_set_cred_init(newproc, crhold(td->td_ucred));
913097055e2SEdward Tomasz Napierala 
914097055e2SEdward Tomasz Napierala 	/*
915097055e2SEdward Tomasz Napierala 	 * Initialize resource accounting for the child process.
916097055e2SEdward Tomasz Napierala 	 */
917097055e2SEdward Tomasz Napierala 	error = racct_proc_fork(p1, newproc);
918097055e2SEdward Tomasz Napierala 	if (error != 0) {
919097055e2SEdward Tomasz Napierala 		error = EAGAIN;
920097055e2SEdward Tomasz Napierala 		goto fail1;
921097055e2SEdward Tomasz Napierala 	}
922097055e2SEdward Tomasz Napierala 
9231dbf9dccSEdward Tomasz Napierala #ifdef MAC
9241dbf9dccSEdward Tomasz Napierala 	mac_proc_init(newproc);
9251dbf9dccSEdward Tomasz Napierala #endif
9269e590ff0SKonstantin Belousov 	newproc->p_klist = knlist_alloc(&newproc->p_mtx);
9271dbf9dccSEdward Tomasz Napierala 	STAILQ_INIT(&newproc->p_ktr);
9281dbf9dccSEdward Tomasz Napierala 
929afd01097SEdward Tomasz Napierala 	sx_xlock(&allproc_lock);
930afd01097SEdward Tomasz Napierala 
93158c77a9dSEdward Tomasz Napierala 	/*
932afd01097SEdward Tomasz Napierala 	 * Increment the count of procs running with this uid. Don't allow
933afd01097SEdward Tomasz Napierala 	 * a nonprivileged user to exceed their current limit.
934afd01097SEdward Tomasz Napierala 	 *
935afd01097SEdward Tomasz Napierala 	 * XXXRW: Can we avoid privilege here if it's not needed?
936afd01097SEdward Tomasz Napierala 	 */
937afd01097SEdward Tomasz Napierala 	error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
938afd01097SEdward Tomasz Napierala 	if (error == 0)
939afd01097SEdward Tomasz Napierala 		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
940afd01097SEdward Tomasz Napierala 	else {
941afd01097SEdward Tomasz Napierala 		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
942f6f6d240SMateusz Guzik 		    lim_cur(td, RLIMIT_NPROC));
943afd01097SEdward Tomasz Napierala 	}
944afd01097SEdward Tomasz Napierala 	if (ok) {
945813361c1SMateusz Guzik 		do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
946df8bae1dSRodney W. Grimes 		return (0);
947afd01097SEdward Tomasz Napierala 	}
948afd01097SEdward Tomasz Napierala 
949afd01097SEdward Tomasz Napierala 	error = EAGAIN;
950c6544064SJohn Baldwin 	sx_xunlock(&allproc_lock);
9516bea667fSRobert Watson #ifdef MAC
95230d239bcSRobert Watson 	mac_proc_destroy(newproc);
9536bea667fSRobert Watson #endif
9541dbf9dccSEdward Tomasz Napierala 	racct_proc_exit(newproc);
955ab27d5d8SEdward Tomasz Napierala fail1:
956edf1796dSMateusz Guzik 	crfree(newproc->p_ucred);
957edf1796dSMateusz Guzik 	newproc->p_ucred = NULL;
95812cec311SMateusz Guzik fail2:
95969aa768aSKonstantin Belousov 	if (vm2 != NULL)
96069aa768aSKonstantin Belousov 		vmspace_free(vm2);
961c6544064SJohn Baldwin 	uma_zfree(proc_zone, newproc);
962de265498SPawel Jakub Dawidek 	if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
96333fd9b9aSMateusz Guzik 		fdclose(td, fp_procdesc, *fr->fr_pd_fd);
964cfb5f768SJonathan Anderson 		fdrop(fp_procdesc, td);
9650a7007b9SPawel Jakub Dawidek 	}
9664b48959fSKonstantin Belousov 	atomic_add_int(&nprocs, -1);
96784d37a46SJohn Baldwin 	pause("fork", hz / 2);
968c6544064SJohn Baldwin 	return (error);
969df8bae1dSRodney W. Grimes }
970fed06968SJulian Elischer 
971e0d898b4SJulian Elischer /*
972a7b124c3SJohn Baldwin  * Handle the return of a child process from fork1().  This function
973a7b124c3SJohn Baldwin  * is called from the MD fork_trampoline() entry point.
974a7b124c3SJohn Baldwin  */
975a7b124c3SJohn Baldwin void
9761d845e86SEdward Tomasz Napierala fork_exit(void (*callout)(void *, struct trapframe *), void *arg,
9771d845e86SEdward Tomasz Napierala     struct trapframe *frame)
978a7b124c3SJohn Baldwin {
979696058c3SJulian Elischer 	struct proc *p;
98070fca427SJohn Baldwin 	struct thread *td;
981fe54587fSJeff Roberson 	struct thread *dtd;
98270fca427SJohn Baldwin 
9830047b9a9SBosko Milekic 	td = curthread;
9840047b9a9SBosko Milekic 	p = td->td_proc;
9850047b9a9SBosko Milekic 	KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
9860047b9a9SBosko Milekic 
9876617724cSJeff Roberson 	CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)",
98893ccd6bfSKonstantin Belousov 	    td, td_get_sched(td), p->p_pid, td->td_name);
9890047b9a9SBosko Milekic 
99011bda9b8SJeff Roberson 	sched_fork_exit(td);
991a7b124c3SJohn Baldwin 	/*
992fe54587fSJeff Roberson 	* Processes normally resume in mi_switch() after being
993fe54587fSJeff Roberson 	* cpu_switch()'ed to, but when children start up they arrive here
994fe54587fSJeff Roberson 	* instead, so we must do much the same things as mi_switch() would.
995fe54587fSJeff Roberson 	*/
996fe54587fSJeff Roberson 	if ((dtd = PCPU_GET(deadthread))) {
997fe54587fSJeff Roberson 		PCPU_SET(deadthread, NULL);
998fe54587fSJeff Roberson 		thread_stash(dtd);
999fe54587fSJeff Roberson 	}
1000fe54587fSJeff Roberson 	thread_unlock(td);
1001fe54587fSJeff Roberson 
1002fe54587fSJeff Roberson 	/*
10035c2cf818SKonstantin Belousov 	 * cpu_fork_kthread_handler intercepts this function call to
1004a7b124c3SJohn Baldwin 	 * have this call a non-return function to stay in kernel mode.
1005a7b124c3SJohn Baldwin 	 * initproc has its own fork handler, but it does return.
1006a7b124c3SJohn Baldwin 	 */
10075813dc03SJohn Baldwin 	KASSERT(callout != NULL, ("NULL callout in fork_exit"));
10088865286bSJohn Baldwin 	callout(arg, frame);
1009a7b124c3SJohn Baldwin 
1010a7b124c3SJohn Baldwin 	/*
1011a7b124c3SJohn Baldwin 	 * Check if a kernel thread misbehaved and returned from its main
1012a7b124c3SJohn Baldwin 	 * function.
1013a7b124c3SJohn Baldwin 	 */
1014db57c70aSKonstantin Belousov 	if (p->p_flag & P_KPROC) {
1015a7b124c3SJohn Baldwin 		printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
1016e01eafefSJulian Elischer 		    td->td_name, p->p_pid);
1017fb1f4582SJohn Baldwin 		kthread_exit();
1018a7b124c3SJohn Baldwin 	}
1019a7b124c3SJohn Baldwin 	mtx_assert(&Giant, MA_NOTOWNED);
1020993182e5SAlexander Leidinger 
1021e5d81ef1SDmitry Chagin 	if (p->p_sysent->sv_schedtail != NULL)
1022e5d81ef1SDmitry Chagin 		(p->p_sysent->sv_schedtail)(td);
1023aff57357SEd Schouten 	td->td_pflags &= ~TDP_FORKING;
1024a7b124c3SJohn Baldwin }
1025a7b124c3SJohn Baldwin 
1026a7b124c3SJohn Baldwin /*
1027a7b124c3SJohn Baldwin  * Simplified back end of syscall(), used when returning from fork()
1028e69ba32fSKonstantin Belousov  * directly into user mode.  This function is passed in to fork_exit()
1029e69ba32fSKonstantin Belousov  * as the first parameter and is called when returning to a new
1030e69ba32fSKonstantin Belousov  * userland process.
1031a7b124c3SJohn Baldwin  */
1032a7b124c3SJohn Baldwin void
10331d845e86SEdward Tomasz Napierala fork_return(struct thread *td, struct trapframe *frame)
1034a7b124c3SJohn Baldwin {
10356e22bbf6SKonstantin Belousov 	struct proc *p;
10366fa39a73SKonstantin Belousov 
10376fa39a73SKonstantin Belousov 	p = td->td_proc;
1038189ac973SJohn Baldwin 	if (td->td_dbgflags & TDB_STOPATFORK) {
10396fa39a73SKonstantin Belousov 		PROC_LOCK(p);
10406e22bbf6SKonstantin Belousov 		if ((p->p_flag & P_TRACED) != 0) {
10416fa39a73SKonstantin Belousov 			/*
10426e22bbf6SKonstantin Belousov 			 * Inform the debugger if one is still present.
10436fa39a73SKonstantin Belousov 			 */
1044b7a25e63SKonstantin Belousov 			td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP;
104582a4538fSEric Badger 			ptracestop(td, SIGSTOP, NULL);
1046189ac973SJohn Baldwin 			td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
10476fa39a73SKonstantin Belousov 		} else {
10486fa39a73SKonstantin Belousov 			/*
10496fa39a73SKonstantin Belousov 			 * ... otherwise clear the request.
10506fa39a73SKonstantin Belousov 			 */
10516fa39a73SKonstantin Belousov 			td->td_dbgflags &= ~TDB_STOPATFORK;
10526fa39a73SKonstantin Belousov 		}
10536fa39a73SKonstantin Belousov 		PROC_UNLOCK(p);
10545fcfab6eSJohn Baldwin 	} else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) {
1055189ac973SJohn Baldwin  		/*
1056189ac973SJohn Baldwin 		 * This is the start of a new thread in a traced
1057189ac973SJohn Baldwin 		 * process.  Report a system call exit event.
1058189ac973SJohn Baldwin 		 */
1059189ac973SJohn Baldwin 		PROC_LOCK(p);
1060189ac973SJohn Baldwin 		td->td_dbgflags |= TDB_SCX;
10612d88da2fSKonstantin Belousov 		_STOPEVENT(p, S_SCX, td->td_sa.code);
10628d570f64SJohn Baldwin 		if ((p->p_ptevents & PTRACE_SCX) != 0 ||
10635fcfab6eSJohn Baldwin 		    (td->td_dbgflags & TDB_BORN) != 0)
106482a4538fSEric Badger 			ptracestop(td, SIGTRAP, NULL);
10655fcfab6eSJohn Baldwin 		td->td_dbgflags &= ~(TDB_SCX | TDB_BORN);
1066189ac973SJohn Baldwin 		PROC_UNLOCK(p);
10676fa39a73SKonstantin Belousov 	}
1068a7b124c3SJohn Baldwin 
1069eb2da9a5SPoul-Henning Kamp 	userret(td, frame);
10706fa39a73SKonstantin Belousov 
1071a7b124c3SJohn Baldwin #ifdef KTRACE
1072af300f23SJohn Baldwin 	if (KTRPOINT(td, KTR_SYSRET))
1073af300f23SJohn Baldwin 		ktrsysret(SYS_fork, 0, 0);
1074a7b124c3SJohn Baldwin #endif
1075a7b124c3SJohn Baldwin }
1076