1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 39 * $Id: kern_fork.c,v 1.50 1997/12/12 04:00:58 dyson Exp $ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/malloc.h> 51 #include <sys/proc.h> 52 #include <sys/resourcevar.h> 53 #include <sys/vnode.h> 54 #include <sys/acct.h> 55 #include <sys/ktrace.h> 56 #include <sys/unistd.h> 57 58 #include <vm/vm.h> 59 #include <sys/lock.h> 60 #include <vm/pmap.h> 61 #include <vm/vm_map.h> 62 #include <vm/vm_extern.h> 63 #include <vm/vm_zone.h> 64 65 #ifdef SMP 66 static int fast_vfork = 0; /* Doesn't work on SMP yet. */ 67 #else 68 static int fast_vfork = 1; 69 #endif 70 SYSCTL_INT(_kern, OID_AUTO, fast_vfork, CTLFLAG_RW, &fast_vfork, 0, ""); 71 72 /* 73 * These are the stuctures used to create a callout list for things to do 74 * when forking a process 75 */ 76 typedef struct fork_list_element { 77 struct fork_list_element *next; 78 forklist_fn function; 79 } *fle_p; 80 81 static fle_p fork_list; 82 83 #ifndef _SYS_SYSPROTO_H_ 84 struct fork_args { 85 int dummy; 86 }; 87 #endif 88 89 /* ARGSUSED */ 90 int 91 fork(p, uap) 92 struct proc *p; 93 struct fork_args *uap; 94 { 95 96 return (fork1(p, RFFDG | RFPROC)); 97 } 98 99 /* ARGSUSED */ 100 int 101 vfork(p, uap) 102 struct proc *p; 103 struct vfork_args *uap; 104 { 105 106 return (fork1(p, RFFDG | RFPROC | RFPPWAIT | (fast_vfork ? RFMEM : 0))); 107 } 108 109 /* ARGSUSED */ 110 int 111 rfork(p, uap) 112 struct proc *p; 113 struct rfork_args *uap; 114 { 115 116 return (fork1(p, uap->flags)); 117 } 118 119 120 int nprocs = 1; /* process 0 */ 121 static int nextpid = 0; 122 123 int 124 fork1(p1, flags) 125 register struct proc *p1; 126 int flags; 127 { 128 register struct proc *p2, *pptr; 129 register uid_t uid; 130 struct proc *newproc; 131 int count; 132 static int pidchecked = 0; 133 fle_p ep ; 134 135 ep = fork_list; 136 137 if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 138 return (EINVAL); 139 140 #ifdef SMP 141 /* 142 * FATAL now, we cannot have the same PTD on both cpus, the PTD 143 * needs to move out of PTmap and be per-process, even for shared 144 * page table processes. Unfortunately, this means either removing 145 * PTD[] as a fixed virtual address, or move it to the per-cpu map 146 * area for SMP mode. Both cases require seperate management of 147 * the per-process-even-if-PTmap-is-shared PTD. 148 */ 149 if (flags & RFMEM) { 150 printf("shared address space fork attempted: pid: %d\n", 151 p1->p_pid); 152 return (EOPNOTSUPP); 153 } 154 #endif 155 156 /* 157 * Here we don't create a new process, but we divorce 158 * certain parts of a process from itself. 159 */ 160 if ((flags & RFPROC) == 0) { 161 162 /* 163 * Divorce the memory, if it is shared, essentially 164 * this changes shared memory amongst threads, into 165 * COW locally. 166 */ 167 if ((flags & RFMEM) == 0) { 168 if (p1->p_vmspace->vm_refcnt > 1) { 169 vmspace_unshare(p1); 170 } 171 } 172 173 /* 174 * Close all file descriptors. 175 */ 176 if (flags & RFCFDG) { 177 struct filedesc *fdtmp; 178 fdtmp = fdinit(p1); 179 fdfree(p1); 180 p1->p_fd = fdtmp; 181 } 182 183 /* 184 * Unshare file descriptors (from parent.) 185 */ 186 if (flags & RFFDG) { 187 if (p1->p_fd->fd_refcnt > 1) { 188 struct filedesc *newfd; 189 newfd = fdcopy(p1); 190 fdfree(p1); 191 p1->p_fd = newfd; 192 } 193 } 194 return (0); 195 } 196 197 /* 198 * Although process entries are dynamically created, we still keep 199 * a global limit on the maximum number we will create. Don't allow 200 * a nonprivileged user to use the last process; don't let root 201 * exceed the limit. The variable nprocs is the current number of 202 * processes, maxproc is the limit. 203 */ 204 uid = p1->p_cred->p_ruid; 205 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { 206 tablefull("proc"); 207 return (EAGAIN); 208 } 209 /* 210 * Increment the nprocs resource before blocking can occur. There 211 * are hard-limits as to the number of processes that can run. 212 */ 213 nprocs++; 214 215 /* 216 * Increment the count of procs running with this uid. Don't allow 217 * a nonprivileged user to exceed their current limit. 218 */ 219 count = chgproccnt(uid, 1); 220 if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) { 221 (void)chgproccnt(uid, -1); 222 /* 223 * Back out the process count 224 */ 225 nprocs--; 226 return (EAGAIN); 227 } 228 229 /* Allocate new proc. */ 230 newproc = zalloc(proc_zone); 231 232 /* 233 * Setup linkage for kernel based threading 234 */ 235 if((flags & RFTHREAD) != 0) { 236 newproc->p_peers = p1->p_peers; 237 p1->p_peers = newproc; 238 newproc->p_leader = p1->p_leader; 239 } else { 240 newproc->p_peers = 0; 241 newproc->p_leader = newproc; 242 } 243 244 newproc->p_wakeup = 0; 245 246 /* 247 * Find an unused process ID. We remember a range of unused IDs 248 * ready to use (from nextpid+1 through pidchecked-1). 249 */ 250 nextpid++; 251 retry: 252 /* 253 * If the process ID prototype has wrapped around, 254 * restart somewhat above 0, as the low-numbered procs 255 * tend to include daemons that don't exit. 256 */ 257 if (nextpid >= PID_MAX) { 258 nextpid = 100; 259 pidchecked = 0; 260 } 261 if (nextpid >= pidchecked) { 262 int doingzomb = 0; 263 264 pidchecked = PID_MAX; 265 /* 266 * Scan the active and zombie procs to check whether this pid 267 * is in use. Remember the lowest pid that's greater 268 * than nextpid, so we can avoid checking for a while. 269 */ 270 p2 = allproc.lh_first; 271 again: 272 for (; p2 != 0; p2 = p2->p_list.le_next) { 273 while (p2->p_pid == nextpid || 274 p2->p_pgrp->pg_id == nextpid) { 275 nextpid++; 276 if (nextpid >= pidchecked) 277 goto retry; 278 } 279 if (p2->p_pid > nextpid && pidchecked > p2->p_pid) 280 pidchecked = p2->p_pid; 281 if (p2->p_pgrp->pg_id > nextpid && 282 pidchecked > p2->p_pgrp->pg_id) 283 pidchecked = p2->p_pgrp->pg_id; 284 } 285 if (!doingzomb) { 286 doingzomb = 1; 287 p2 = zombproc.lh_first; 288 goto again; 289 } 290 } 291 292 p2 = newproc; 293 p2->p_stat = SIDL; /* protect against others */ 294 p2->p_pid = nextpid; 295 LIST_INSERT_HEAD(&allproc, p2, p_list); 296 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); 297 298 /* 299 * Make a proc table entry for the new process. 300 * Start by zeroing the section of proc that is zero-initialized, 301 * then copy the section that is copied directly from the parent. 302 */ 303 bzero(&p2->p_startzero, 304 (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); 305 bcopy(&p1->p_startcopy, &p2->p_startcopy, 306 (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); 307 308 p2->p_aioinfo = NULL; 309 310 /* 311 * Duplicate sub-structures as needed. 312 * Increase reference counts on shared objects. 313 * The p_stats and p_sigacts substructs are set in vm_fork. 314 */ 315 p2->p_flag = P_INMEM; 316 if (p1->p_flag & P_PROFIL) 317 startprofclock(p2); 318 MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), 319 M_SUBPROC, M_WAITOK); 320 bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); 321 p2->p_cred->p_refcnt = 1; 322 crhold(p1->p_ucred); 323 324 /* bump references to the text vnode (for procfs) */ 325 p2->p_textvp = p1->p_textvp; 326 if (p2->p_textvp) 327 VREF(p2->p_textvp); 328 329 if (flags & RFCFDG) 330 p2->p_fd = fdinit(p1); 331 else if (flags & RFFDG) 332 p2->p_fd = fdcopy(p1); 333 else 334 p2->p_fd = fdshare(p1); 335 336 /* 337 * If p_limit is still copy-on-write, bump refcnt, 338 * otherwise get a copy that won't be modified. 339 * (If PL_SHAREMOD is clear, the structure is shared 340 * copy-on-write.) 341 */ 342 if (p1->p_limit->p_lflags & PL_SHAREMOD) 343 p2->p_limit = limcopy(p1->p_limit); 344 else { 345 p2->p_limit = p1->p_limit; 346 p2->p_limit->p_refcnt++; 347 } 348 349 /* 350 * Preserve some more flags in subprocess. P_PROFIL has already 351 * been preserved. 352 */ 353 p2->p_flag |= p1->p_flag & P_SUGID; 354 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) 355 p2->p_flag |= P_CONTROLT; 356 if (flags & RFPPWAIT) 357 p2->p_flag |= P_PPWAIT; 358 359 LIST_INSERT_AFTER(p1, p2, p_pglist); 360 361 /* 362 * Attach the new process to its parent. 363 * 364 * If RFNOWAIT is set, the newly created process becomes a child 365 * of init. This effectively disassociates the child from the 366 * parent. 367 */ 368 if (flags & RFNOWAIT) 369 pptr = initproc; 370 else 371 pptr = p1; 372 p2->p_pptr = pptr; 373 LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); 374 LIST_INIT(&p2->p_children); 375 376 #ifdef KTRACE 377 /* 378 * Copy traceflag and tracefile if enabled. 379 * If not inherited, these were zeroed above. 380 */ 381 if (p1->p_traceflag&KTRFAC_INHERIT) { 382 p2->p_traceflag = p1->p_traceflag; 383 if ((p2->p_tracep = p1->p_tracep) != NULL) 384 VREF(p2->p_tracep); 385 } 386 #endif 387 388 /* 389 * set priority of child to be that of parent 390 */ 391 p2->p_estcpu = p1->p_estcpu; 392 393 /* 394 * This begins the section where we must prevent the parent 395 * from being swapped. 396 */ 397 p1->p_flag |= P_NOSWAP; 398 399 /* 400 * Finish creating the child process. It will return via a different 401 * execution path later. (ie: directly into user mode) 402 */ 403 vm_fork(p1, p2, flags); 404 405 /* 406 * Both processes are set up, now check if any LKMs want 407 * to adjust anything. 408 * What if they have an error? XXX 409 */ 410 while (ep) { 411 (*ep->function)(p1, p2, flags); 412 ep = ep->next; 413 } 414 415 /* 416 * Make child runnable and add to run queue. 417 */ 418 microtime(&(p2->p_stats->p_start)); 419 p2->p_acflag = AFORK; 420 (void) splhigh(); 421 p2->p_stat = SRUN; 422 setrunqueue(p2); 423 (void) spl0(); 424 425 /* 426 * Now can be swapped. 427 */ 428 p1->p_flag &= ~P_NOSWAP; 429 430 /* 431 * Preserve synchronization semantics of vfork. If waiting for 432 * child to exec or exit, set P_PPWAIT on child, and sleep on our 433 * proc (in case of exit). 434 */ 435 while (p2->p_flag & P_PPWAIT) 436 tsleep(p1, PWAIT, "ppwait", 0); 437 438 /* 439 * Return child pid to parent process, 440 * marking us as parent via p1->p_retval[1]. 441 */ 442 p1->p_retval[0] = p2->p_pid; 443 p1->p_retval[1] = 0; 444 return (0); 445 } 446 447 /* 448 * The next two functionms are general routines to handle adding/deleting 449 * items on the fork callout list. 450 * 451 * at_fork(): 452 * Take the arguments given and put them onto the fork callout list, 453 * However first make sure that it's not already there. 454 * Returns 0 on success or a standard error number. 455 */ 456 int 457 at_fork(function) 458 forklist_fn function; 459 { 460 fle_p ep; 461 462 /* let the programmer know if he's been stupid */ 463 if (rm_at_fork(function)) 464 printf("fork callout entry already present\n"); 465 ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); 466 if (ep == NULL) 467 return (ENOMEM); 468 ep->next = fork_list; 469 ep->function = function; 470 fork_list = ep; 471 return (0); 472 } 473 474 /* 475 * Scan the exit callout list for the given items and remove them. 476 * Returns the number of items removed. 477 * Theoretically this value can only be 0 or 1. 478 */ 479 int 480 rm_at_fork(function) 481 forklist_fn function; 482 { 483 fle_p *epp, ep; 484 int count; 485 486 count= 0; 487 epp = &fork_list; 488 ep = *epp; 489 while (ep) { 490 if (ep->function == function) { 491 *epp = ep->next; 492 free(ep, M_TEMP); 493 count++; 494 } else { 495 epp = &ep->next; 496 } 497 ep = *epp; 498 } 499 return (count); 500 } 501