1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 39 * $Id: kern_fork.c,v 1.53 1998/12/19 02:55:33 julian Exp $ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/malloc.h> 51 #include <sys/proc.h> 52 #include <sys/resourcevar.h> 53 #include <sys/vnode.h> 54 #include <sys/acct.h> 55 #include <sys/ktrace.h> 56 #include <sys/unistd.h> 57 58 #include <vm/vm.h> 59 #include <sys/lock.h> 60 #include <vm/pmap.h> 61 #include <vm/vm_map.h> 62 #include <vm/vm_extern.h> 63 #include <vm/vm_zone.h> 64 65 #ifdef COMPAT_LINUX_THREADS 66 #include <machine/frame.h> 67 #include <sys/user.h> 68 #endif /* COMPAT_LINUX_THREADS */ 69 #ifdef SMP 70 static int fast_vfork = 0; /* Doesn't work on SMP yet. */ 71 #else 72 static int fast_vfork = 1; 73 #endif 74 SYSCTL_INT(_kern, OID_AUTO, fast_vfork, CTLFLAG_RW, &fast_vfork, 0, ""); 75 76 /* 77 * These are the stuctures used to create a callout list for things to do 78 * when forking a process 79 */ 80 typedef struct fork_list_element { 81 struct fork_list_element *next; 82 forklist_fn function; 83 } *fle_p; 84 85 static fle_p fork_list; 86 87 #ifndef _SYS_SYSPROTO_H_ 88 struct fork_args { 89 int dummy; 90 }; 91 #endif 92 93 /* ARGSUSED */ 94 int 95 fork(p, uap) 96 struct proc *p; 97 struct fork_args *uap; 98 { 99 100 return (fork1(p, RFFDG | RFPROC)); 101 } 102 103 /* ARGSUSED */ 104 int 105 vfork(p, uap) 106 struct proc *p; 107 struct vfork_args *uap; 108 { 109 110 return (fork1(p, RFFDG | RFPROC | RFPPWAIT | (fast_vfork ? RFMEM : 0))); 111 } 112 113 /* ARGSUSED */ 114 int 115 rfork(p, uap) 116 struct proc *p; 117 struct rfork_args *uap; 118 { 119 120 return (fork1(p, uap->flags)); 121 } 122 123 124 int nprocs = 1; /* process 0 */ 125 static int nextpid = 0; 126 127 int 128 fork1(p1, flags) 129 register struct proc *p1; 130 int flags; 131 { 132 register struct proc *p2, *pptr; 133 register uid_t uid; 134 struct proc *newproc; 135 int count; 136 static int pidchecked = 0; 137 fle_p ep ; 138 139 ep = fork_list; 140 141 if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 142 return (EINVAL); 143 144 #ifdef SMP 145 /* 146 * FATAL now, we cannot have the same PTD on both cpus, the PTD 147 * needs to move out of PTmap and be per-process, even for shared 148 * page table processes. Unfortunately, this means either removing 149 * PTD[] as a fixed virtual address, or move it to the per-cpu map 150 * area for SMP mode. Both cases require seperate management of 151 * the per-process-even-if-PTmap-is-shared PTD. 152 */ 153 if (flags & RFMEM) { 154 printf("shared address space fork attempted: pid: %d\n", 155 p1->p_pid); 156 return (EOPNOTSUPP); 157 } 158 #endif 159 160 /* 161 * Here we don't create a new process, but we divorce 162 * certain parts of a process from itself. 163 */ 164 if ((flags & RFPROC) == 0) { 165 166 /* 167 * Divorce the memory, if it is shared, essentially 168 * this changes shared memory amongst threads, into 169 * COW locally. 170 */ 171 if ((flags & RFMEM) == 0) { 172 if (p1->p_vmspace->vm_refcnt > 1) { 173 vmspace_unshare(p1); 174 } 175 } 176 177 /* 178 * Close all file descriptors. 179 */ 180 if (flags & RFCFDG) { 181 struct filedesc *fdtmp; 182 fdtmp = fdinit(p1); 183 fdfree(p1); 184 p1->p_fd = fdtmp; 185 } 186 187 /* 188 * Unshare file descriptors (from parent.) 189 */ 190 if (flags & RFFDG) { 191 if (p1->p_fd->fd_refcnt > 1) { 192 struct filedesc *newfd; 193 newfd = fdcopy(p1); 194 fdfree(p1); 195 p1->p_fd = newfd; 196 } 197 } 198 return (0); 199 } 200 201 /* 202 * Although process entries are dynamically created, we still keep 203 * a global limit on the maximum number we will create. Don't allow 204 * a nonprivileged user to use the last process; don't let root 205 * exceed the limit. The variable nprocs is the current number of 206 * processes, maxproc is the limit. 207 */ 208 uid = p1->p_cred->p_ruid; 209 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { 210 tablefull("proc"); 211 return (EAGAIN); 212 } 213 /* 214 * Increment the nprocs resource before blocking can occur. There 215 * are hard-limits as to the number of processes that can run. 216 */ 217 nprocs++; 218 219 /* 220 * Increment the count of procs running with this uid. Don't allow 221 * a nonprivileged user to exceed their current limit. 222 */ 223 count = chgproccnt(uid, 1); 224 if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) { 225 (void)chgproccnt(uid, -1); 226 /* 227 * Back out the process count 228 */ 229 nprocs--; 230 return (EAGAIN); 231 } 232 233 /* Allocate new proc. */ 234 newproc = zalloc(proc_zone); 235 236 /* 237 * Setup linkage for kernel based threading 238 */ 239 if((flags & RFTHREAD) != 0) { 240 newproc->p_peers = p1->p_peers; 241 p1->p_peers = newproc; 242 newproc->p_leader = p1->p_leader; 243 } else { 244 newproc->p_peers = 0; 245 newproc->p_leader = newproc; 246 } 247 248 newproc->p_wakeup = 0; 249 250 /* 251 * Find an unused process ID. We remember a range of unused IDs 252 * ready to use (from nextpid+1 through pidchecked-1). 253 */ 254 nextpid++; 255 retry: 256 /* 257 * If the process ID prototype has wrapped around, 258 * restart somewhat above 0, as the low-numbered procs 259 * tend to include daemons that don't exit. 260 */ 261 if (nextpid >= PID_MAX) { 262 nextpid = 100; 263 pidchecked = 0; 264 } 265 if (nextpid >= pidchecked) { 266 int doingzomb = 0; 267 268 pidchecked = PID_MAX; 269 /* 270 * Scan the active and zombie procs to check whether this pid 271 * is in use. Remember the lowest pid that's greater 272 * than nextpid, so we can avoid checking for a while. 273 */ 274 p2 = allproc.lh_first; 275 again: 276 for (; p2 != 0; p2 = p2->p_list.le_next) { 277 while (p2->p_pid == nextpid || 278 p2->p_pgrp->pg_id == nextpid || 279 p2->p_session->s_sid == nextpid) { 280 nextpid++; 281 if (nextpid >= pidchecked) 282 goto retry; 283 } 284 if (p2->p_pid > nextpid && pidchecked > p2->p_pid) 285 pidchecked = p2->p_pid; 286 if (p2->p_pgrp->pg_id > nextpid && 287 pidchecked > p2->p_pgrp->pg_id) 288 pidchecked = p2->p_pgrp->pg_id; 289 if (p2->p_session->s_sid > nextpid && 290 pidchecked > p2->p_session->s_sid) 291 pidchecked = p2->p_session->s_sid; 292 } 293 if (!doingzomb) { 294 doingzomb = 1; 295 p2 = zombproc.lh_first; 296 goto again; 297 } 298 } 299 300 p2 = newproc; 301 p2->p_stat = SIDL; /* protect against others */ 302 p2->p_pid = nextpid; 303 LIST_INSERT_HEAD(&allproc, p2, p_list); 304 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 305 306 /* 307 * Make a proc table entry for the new process. 308 * Start by zeroing the section of proc that is zero-initialized, 309 * then copy the section that is copied directly from the parent. 310 */ 311 bzero(&p2->p_startzero, 312 (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); 313 bcopy(&p1->p_startcopy, &p2->p_startcopy, 314 (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); 315 316 p2->p_aioinfo = NULL; 317 318 /* 319 * Duplicate sub-structures as needed. 320 * Increase reference counts on shared objects. 321 * The p_stats and p_sigacts substructs are set in vm_fork. 322 */ 323 p2->p_flag = P_INMEM; 324 if (p1->p_flag & P_PROFIL) 325 startprofclock(p2); 326 MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), 327 M_SUBPROC, M_WAITOK); 328 bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); 329 p2->p_cred->p_refcnt = 1; 330 crhold(p1->p_ucred); 331 332 #ifdef COMPAT_LINUX_THREADS 333 if (flags & RFSIGSHARE) { 334 p2->p_procsig = p1->p_procsig; 335 p2->p_procsig->ps_refcnt++; 336 if (p1->p_sigacts == &p1->p_addr->u_sigacts) { 337 struct sigacts *newsigacts; 338 int s; 339 340 if (p2->p_procsig->ps_refcnt != 2) 341 printf ("PID:%d Creating shared sigacts with procsig->ps_refcnt %d\n", 342 p2->p_pid, p2->p_procsig->ps_refcnt); 343 /* Create the shared sigacts structure */ 344 MALLOC (newsigacts, struct sigacts *, sizeof (struct sigacts), 345 M_SUBPROC, M_WAITOK); 346 s = splhigh(); 347 /* Set p_sigacts to the new shared structure. Note that this 348 * is updating p1->p_sigacts at the same time, since p_sigacts 349 * is just a pointer to the shared p_procsig->ps_sigacts. 350 */ 351 p2->p_sigacts = newsigacts; 352 /* Copy in the values from the u area */ 353 *p2->p_sigacts = p1->p_addr->u_sigacts; 354 splx (s); 355 } 356 } else { 357 MALLOC (p2->p_procsig, struct procsig *, sizeof(struct procsig), 358 M_SUBPROC, M_WAITOK); 359 bcopy(&p1->p_procsig->ps_begincopy, &p2->p_procsig->ps_begincopy, 360 (unsigned)&p1->p_procsig->ps_endcopy - 361 (unsigned)&p1->p_procsig->ps_begincopy); 362 p2->p_procsig->ps_refcnt = 1; 363 /* Note that we fill in the values of sigacts in vm_fork */ 364 p2->p_sigacts = NULL; 365 } 366 if (flags & RFLINUXTHPN) { 367 p2->p_sigparent = SIGUSR1; 368 } 369 #endif /* COMPAT_LINUX_THREADS */ 370 /* bump references to the text vnode (for procfs) */ 371 p2->p_textvp = p1->p_textvp; 372 if (p2->p_textvp) 373 VREF(p2->p_textvp); 374 375 if (flags & RFCFDG) 376 p2->p_fd = fdinit(p1); 377 else if (flags & RFFDG) 378 p2->p_fd = fdcopy(p1); 379 else 380 p2->p_fd = fdshare(p1); 381 382 /* 383 * If p_limit is still copy-on-write, bump refcnt, 384 * otherwise get a copy that won't be modified. 385 * (If PL_SHAREMOD is clear, the structure is shared 386 * copy-on-write.) 387 */ 388 if (p1->p_limit->p_lflags & PL_SHAREMOD) 389 p2->p_limit = limcopy(p1->p_limit); 390 else { 391 p2->p_limit = p1->p_limit; 392 p2->p_limit->p_refcnt++; 393 } 394 395 /* 396 * Preserve some more flags in subprocess. P_PROFIL has already 397 * been preserved. 398 */ 399 p2->p_flag |= p1->p_flag & P_SUGID; 400 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 401 p2->p_flag |= P_CONTROLT; 402 if (flags & RFPPWAIT) 403 p2->p_flag |= P_PPWAIT; 404 405 LIST_INSERT_AFTER(p1, p2, p_pglist); 406 407 /* 408 * Attach the new process to its parent. 409 * 410 * If RFNOWAIT is set, the newly created process becomes a child 411 * of init. This effectively disassociates the child from the 412 * parent. 413 */ 414 if (flags & RFNOWAIT) 415 pptr = initproc; 416 else 417 pptr = p1; 418 p2->p_pptr = pptr; 419 LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 420 LIST_INIT(&p2->p_children); 421 422 #ifdef KTRACE 423 /* 424 * Copy traceflag and tracefile if enabled. 425 * If not inherited, these were zeroed above. 426 */ 427 if (p1->p_traceflag&KTRFAC_INHERIT) { 428 p2->p_traceflag = p1->p_traceflag; 429 if ((p2->p_tracep = p1->p_tracep) != NULL) 430 VREF(p2->p_tracep); 431 } 432 #endif 433 434 /* 435 * set priority of child to be that of parent 436 */ 437 p2->p_estcpu = p1->p_estcpu; 438 439 /* 440 * This begins the section where we must prevent the parent 441 * from being swapped. 442 */ 443 p1->p_flag |= P_NOSWAP; 444 445 /* 446 * Finish creating the child process. It will return via a different 447 * execution path later. (ie: directly into user mode) 448 */ 449 vm_fork(p1, p2, flags); 450 451 /* 452 * Both processes are set up, now check if any LKMs want 453 * to adjust anything. 454 * What if they have an error? XXX 455 */ 456 while (ep) { 457 (*ep->function)(p1, p2, flags); 458 ep = ep->next; 459 } 460 461 /* 462 * Make child runnable and add to run queue. 463 */ 464 microtime(&(p2->p_stats->p_start)); 465 p2->p_acflag = AFORK; 466 (void) splhigh(); 467 p2->p_stat = SRUN; 468 setrunqueue(p2); 469 (void) spl0(); 470 471 /* 472 * Now can be swapped. 473 */ 474 p1->p_flag &= ~P_NOSWAP; 475 476 /* 477 * Preserve synchronization semantics of vfork. If waiting for 478 * child to exec or exit, set P_PPWAIT on child, and sleep on our 479 * proc (in case of exit). 480 */ 481 while (p2->p_flag & P_PPWAIT) 482 tsleep(p1, PWAIT, "ppwait", 0); 483 484 /* 485 * Return child pid to parent process, 486 * marking us as parent via p1->p_retval[1]. 487 */ 488 p1->p_retval[0] = p2->p_pid; 489 p1->p_retval[1] = 0; 490 return (0); 491 } 492 493 /* 494 * The next two functionms are general routines to handle adding/deleting 495 * items on the fork callout list. 496 * 497 * at_fork(): 498 * Take the arguments given and put them onto the fork callout list, 499 * However first make sure that it's not already there. 500 * Returns 0 on success or a standard error number. 501 */ 502 int 503 at_fork(function) 504 forklist_fn function; 505 { 506 fle_p ep; 507 508 /* let the programmer know if he's been stupid */ 509 if (rm_at_fork(function)) 510 printf("fork callout entry already present\n"); 511 ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); 512 if (ep == NULL) 513 return (ENOMEM); 514 ep->next = fork_list; 515 ep->function = function; 516 fork_list = ep; 517 return (0); 518 } 519 520 /* 521 * Scan the exit callout list for the given items and remove them. 522 * Returns the number of items removed. 523 * Theoretically this value can only be 0 or 1. 524 */ 525 int 526 rm_at_fork(function) 527 forklist_fn function; 528 { 529 fle_p *epp, ep; 530 int count; 531 532 count= 0; 533 epp = &fork_list; 534 ep = *epp; 535 while (ep) { 536 if (ep->function == function) { 537 *epp = ep->next; 538 free(ep, M_TEMP); 539 count++; 540 } else { 541 epp = &ep->next; 542 } 543 ep = *epp; 544 } 545 return (count); 546 } 547