1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 #include "opt_mac.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/eventhandler.h> 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/proc.h> 53 #include <sys/pioctl.h> 54 #include <sys/tty.h> 55 #include <sys/wait.h> 56 #include <sys/vmmeter.h> 57 #include <sys/vnode.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sbuf.h> 60 #include <sys/signalvar.h> 61 #include <sys/sched.h> 62 #include <sys/sx.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/syslog.h> 65 #include <sys/ptrace.h> 66 #include <sys/acct.h> /* for acct_process() function prototype */ 67 #include <sys/filedesc.h> 68 #include <sys/mac.h> 69 #include <sys/shm.h> 70 #include <sys/sem.h> 71 #ifdef KTRACE 72 #include <sys/ktrace.h> 73 #endif 74 75 #include <security/audit/audit.h> 76 77 #include <vm/vm.h> 78 #include <vm/vm_extern.h> 79 #include <vm/vm_param.h> 80 #include <vm/pmap.h> 81 #include <vm/vm_map.h> 82 #include <vm/vm_page.h> 83 #include <vm/uma.h> 84 85 /* Required to be non-static for SysVR4 emulator */ 86 MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status"); 87 88 /* Hook for NFS teardown procedure. */ 89 void (*nlminfo_release_p)(struct proc *p); 90 91 /* 92 * exit -- 93 * Death of process. 94 * 95 * MPSAFE 96 */ 97 void 98 sys_exit(struct thread *td, struct sys_exit_args *uap) 99 { 100 101 exit1(td, W_EXITCODE(uap->rval, 0)); 102 /* NOTREACHED */ 103 } 104 105 /* 106 * Exit: deallocate address space and other resources, change proc state 107 * to zombie, and unlink proc from allproc and parent's lists. Save exit 108 * status and rusage for wait(). Check for child processes and orphan them. 109 */ 110 void 111 exit1(struct thread *td, int rv) 112 { 113 struct bintime new_switchtime; 114 struct proc *p, *nq, *q; 115 struct tty *tp; 116 struct vnode *ttyvp; 117 struct vmspace *vm; 118 struct vnode *vtmp; 119 #ifdef KTRACE 120 struct vnode *tracevp; 121 struct ucred *tracecred; 122 #endif 123 struct plimit *plim; 124 int locked, refcnt; 125 126 /* 127 * Drop Giant if caller has it. Eventually we should warn about 128 * being called with Giant held. 129 */ 130 while (mtx_owned(&Giant)) 131 mtx_unlock(&Giant); 132 133 p = td->td_proc; 134 if (p == initproc) { 135 printf("init died (signal %d, exit %d)\n", 136 WTERMSIG(rv), WEXITSTATUS(rv)); 137 panic("Going nowhere without my init!"); 138 } 139 140 /* 141 * MUST abort all other threads before proceeding past here. 142 */ 143 PROC_LOCK(p); 144 if (p->p_flag & P_HADTHREADS) { 145 retry: 146 /* 147 * First check if some other thread got here before us.. 148 * if so, act apropriatly, (exit or suspend); 149 */ 150 thread_suspend_check(0); 151 152 /* 153 * Kill off the other threads. This requires 154 * some co-operation from other parts of the kernel 155 * so it may not be instantaneous. With this state set 156 * any thread entering the kernel from userspace will 157 * thread_exit() in trap(). Any thread attempting to 158 * sleep will return immediately with EINTR or EWOULDBLOCK 159 * which will hopefully force them to back out to userland 160 * freeing resources as they go. Any thread attempting 161 * to return to userland will thread_exit() from userret(). 162 * thread_exit() will unsuspend us when the last of the 163 * other threads exits. 164 * If there is already a thread singler after resumption, 165 * calling thread_single will fail; in that case, we just 166 * re-check all suspension request, the thread should 167 * either be suspended there or exit. 168 */ 169 if (thread_single(SINGLE_EXIT)) 170 goto retry; 171 172 /* 173 * All other activity in this process is now stopped. 174 * Threading support has been turned off. 175 */ 176 } 177 178 p->p_flag |= P_WEXIT; 179 180 PROC_LOCK(p->p_pptr); 181 sigqueue_take(p->p_ksi); 182 PROC_UNLOCK(p->p_pptr); 183 184 PROC_UNLOCK(p); 185 186 #ifdef AUDIT 187 /* 188 * The Sun BSM exit token contains two components: an exit status as 189 * passed to exit(), and a return value to indicate what sort of exit 190 * it was. The exit status is WEXITSTATUS(rv), but it's not clear 191 * what the return value is. 192 */ 193 AUDIT_ARG(exit, WEXITSTATUS(rv), 0); 194 AUDIT_SYSCALL_EXIT(0, td); 195 #endif 196 197 /* Are we a task leader? */ 198 if (p == p->p_leader) { 199 mtx_lock(&ppeers_lock); 200 q = p->p_peers; 201 while (q != NULL) { 202 PROC_LOCK(q); 203 psignal(q, SIGKILL); 204 PROC_UNLOCK(q); 205 q = q->p_peers; 206 } 207 while (p->p_peers != NULL) 208 msleep(p, &ppeers_lock, PWAIT, "exit1", 0); 209 mtx_unlock(&ppeers_lock); 210 } 211 212 PROC_LOCK(p); 213 _STOPEVENT(p, S_EXIT, rv); 214 wakeup(&p->p_stype); /* Wakeup anyone in procfs' PIOCWAIT */ 215 PROC_UNLOCK(p); 216 217 /* 218 * Check if any loadable modules need anything done at process exit. 219 * E.g. SYSV IPC stuff 220 * XXX what if one of these generates an error? 221 */ 222 EVENTHANDLER_INVOKE(process_exit, p); 223 224 MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage), 225 M_ZOMBIE, M_WAITOK); 226 /* 227 * If parent is waiting for us to exit or exec, 228 * P_PPWAIT is set; we will wakeup the parent below. 229 */ 230 PROC_LOCK(p); 231 stopprofclock(p); 232 p->p_flag &= ~(P_TRACED | P_PPWAIT); 233 234 /* 235 * Stop the real interval timer. If the handler is currently 236 * executing, prevent it from rearming itself and let it finish. 237 */ 238 if (timevalisset(&p->p_realtimer.it_value) && 239 callout_stop(&p->p_itcallout) == 0) { 240 timevalclear(&p->p_realtimer.it_interval); 241 msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); 242 KASSERT(!timevalisset(&p->p_realtimer.it_value), 243 ("realtime timer is still armed")); 244 } 245 sigqueue_flush(&p->p_sigqueue); 246 sigqueue_flush(&td->td_sigqueue); 247 PROC_UNLOCK(p); 248 249 /* 250 * Reset any sigio structures pointing to us as a result of 251 * F_SETOWN with our pid. 252 */ 253 mtx_lock(&Giant); /* XXX: not sure if needed */ 254 funsetownlst(&p->p_sigiolst); 255 mtx_unlock(&Giant); 256 257 /* 258 * If this process has an nlminfo data area (for lockd), release it 259 */ 260 if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) 261 (*nlminfo_release_p)(p); 262 263 /* 264 * Close open files and release open-file table. 265 * This may block! 266 */ 267 fdfree(td); 268 269 /* 270 * If this thread tickled GEOM, we need to wait for the giggling to 271 * stop before we return to userland 272 */ 273 if (td->td_pflags & TDP_GEOM) 274 g_waitidle(); 275 276 /* 277 * Remove ourself from our leader's peer list and wake our leader. 278 */ 279 mtx_lock(&ppeers_lock); 280 if (p->p_leader->p_peers) { 281 q = p->p_leader; 282 while (q->p_peers != p) 283 q = q->p_peers; 284 q->p_peers = p->p_peers; 285 wakeup(p->p_leader); 286 } 287 mtx_unlock(&ppeers_lock); 288 289 /* The next two chunks should probably be moved to vmspace_exit. */ 290 vm = p->p_vmspace; 291 /* 292 * Release user portion of address space. 293 * This releases references to vnodes, 294 * which could cause I/O if the file has been unlinked. 295 * Need to do this early enough that we can still sleep. 296 * Can't free the entire vmspace as the kernel stack 297 * may be mapped within that space also. 298 * 299 * Processes sharing the same vmspace may exit in one order, and 300 * get cleaned up by vmspace_exit() in a different order. The 301 * last exiting process to reach this point releases as much of 302 * the environment as it can, and the last process cleaned up 303 * by vmspace_exit() (which decrements exitingcnt) cleans up the 304 * remainder. 305 */ 306 atomic_add_int(&vm->vm_exitingcnt, 1); 307 do 308 refcnt = vm->vm_refcnt; 309 while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1)); 310 if (refcnt == 1) { 311 shmexit(vm); 312 pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map), 313 vm_map_max(&vm->vm_map)); 314 (void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), 315 vm_map_max(&vm->vm_map)); 316 } 317 318 sx_xlock(&proctree_lock); 319 if (SESS_LEADER(p)) { 320 struct session *sp; 321 322 sp = p->p_session; 323 if (sp->s_ttyvp) { 324 locked = VFS_LOCK_GIANT(sp->s_ttyvp->v_mount); 325 /* 326 * Controlling process. 327 * Signal foreground pgrp, 328 * drain controlling terminal 329 * and revoke access to controlling terminal. 330 */ 331 if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) { 332 tp = sp->s_ttyp; 333 if (sp->s_ttyp->t_pgrp) { 334 PGRP_LOCK(sp->s_ttyp->t_pgrp); 335 pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1); 336 PGRP_UNLOCK(sp->s_ttyp->t_pgrp); 337 } 338 /* XXX tp should be locked. */ 339 sx_xunlock(&proctree_lock); 340 (void) ttywait(tp); 341 sx_xlock(&proctree_lock); 342 /* 343 * The tty could have been revoked 344 * if we blocked. 345 */ 346 if (sp->s_ttyvp) { 347 ttyvp = sp->s_ttyvp; 348 SESS_LOCK(p->p_session); 349 sp->s_ttyvp = NULL; 350 SESS_UNLOCK(p->p_session); 351 sx_xunlock(&proctree_lock); 352 VOP_LOCK(ttyvp, LK_EXCLUSIVE, td); 353 VOP_REVOKE(ttyvp, REVOKEALL); 354 vput(ttyvp); 355 sx_xlock(&proctree_lock); 356 } 357 } 358 if (sp->s_ttyvp) { 359 ttyvp = sp->s_ttyvp; 360 SESS_LOCK(p->p_session); 361 sp->s_ttyvp = NULL; 362 SESS_UNLOCK(p->p_session); 363 vrele(ttyvp); 364 } 365 /* 366 * s_ttyp is not zero'd; we use this to indicate 367 * that the session once had a controlling terminal. 368 * (for logging and informational purposes) 369 */ 370 VFS_UNLOCK_GIANT(locked); 371 } 372 SESS_LOCK(p->p_session); 373 sp->s_leader = NULL; 374 SESS_UNLOCK(p->p_session); 375 } 376 fixjobc(p, p->p_pgrp, 0); 377 sx_xunlock(&proctree_lock); 378 (void)acct_process(td); 379 #ifdef KTRACE 380 /* 381 * Drain any pending records on the thread and release the trace 382 * file. It might be better if drain-and-clear were atomic. 383 */ 384 ktrprocexit(td); 385 PROC_LOCK(p); 386 mtx_lock(&ktrace_mtx); 387 p->p_traceflag = 0; /* don't trace the vrele() */ 388 tracevp = p->p_tracevp; 389 p->p_tracevp = NULL; 390 tracecred = p->p_tracecred; 391 p->p_tracecred = NULL; 392 mtx_unlock(&ktrace_mtx); 393 PROC_UNLOCK(p); 394 if (tracevp != NULL) { 395 locked = VFS_LOCK_GIANT(tracevp->v_mount); 396 vrele(tracevp); 397 VFS_UNLOCK_GIANT(locked); 398 } 399 if (tracecred != NULL) 400 crfree(tracecred); 401 #endif 402 /* 403 * Release reference to text vnode 404 */ 405 if ((vtmp = p->p_textvp) != NULL) { 406 p->p_textvp = NULL; 407 locked = VFS_LOCK_GIANT(vtmp->v_mount); 408 vrele(vtmp); 409 VFS_UNLOCK_GIANT(locked); 410 } 411 412 /* 413 * Release our limits structure. 414 */ 415 PROC_LOCK(p); 416 plim = p->p_limit; 417 p->p_limit = NULL; 418 PROC_UNLOCK(p); 419 lim_free(plim); 420 421 /* 422 * Remove proc from allproc queue and pidhash chain. 423 * Place onto zombproc. Unlink from parent's child list. 424 */ 425 sx_xlock(&allproc_lock); 426 LIST_REMOVE(p, p_list); 427 LIST_INSERT_HEAD(&zombproc, p, p_list); 428 LIST_REMOVE(p, p_hash); 429 sx_xunlock(&allproc_lock); 430 431 /* 432 * Reparent all of our children to init. 433 */ 434 sx_xlock(&proctree_lock); 435 q = LIST_FIRST(&p->p_children); 436 if (q != NULL) /* only need this if any child is S_ZOMB */ 437 wakeup(initproc); 438 for (; q != NULL; q = nq) { 439 nq = LIST_NEXT(q, p_sibling); 440 PROC_LOCK(q); 441 proc_reparent(q, initproc); 442 q->p_sigparent = SIGCHLD; 443 /* 444 * Traced processes are killed 445 * since their existence means someone is screwing up. 446 */ 447 if (q->p_flag & P_TRACED) { 448 q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); 449 psignal(q, SIGKILL); 450 } 451 PROC_UNLOCK(q); 452 } 453 454 /* 455 * Save exit status and finalize rusage info except for times, 456 * adding in child rusage info later when our time is locked. 457 */ 458 PROC_LOCK(p); 459 p->p_xstat = rv; 460 p->p_xthread = td; 461 p->p_stats->p_ru.ru_nvcsw++; 462 *p->p_ru = p->p_stats->p_ru; 463 464 /* 465 * Notify interested parties of our demise. 466 */ 467 KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); 468 469 /* 470 * Just delete all entries in the p_klist. At this point we won't 471 * report any more events, and there are nasty race conditions that 472 * can beat us if we don't. 473 */ 474 knlist_clear(&p->p_klist, 1); 475 476 /* 477 * Notify parent that we're gone. If parent has the PS_NOCLDWAIT 478 * flag set, or if the handler is set to SIG_IGN, notify process 479 * 1 instead (and hope it will handle this situation). 480 */ 481 PROC_LOCK(p->p_pptr); 482 mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); 483 if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { 484 struct proc *pp; 485 486 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 487 pp = p->p_pptr; 488 PROC_UNLOCK(pp); 489 proc_reparent(p, initproc); 490 p->p_sigparent = SIGCHLD; 491 PROC_LOCK(p->p_pptr); 492 /* 493 * If this was the last child of our parent, notify 494 * parent, so in case he was wait(2)ing, he will 495 * continue. 496 */ 497 if (LIST_EMPTY(&pp->p_children)) 498 wakeup(pp); 499 } else 500 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 501 502 if (p->p_pptr == initproc) 503 psignal(p->p_pptr, SIGCHLD); 504 else if (p->p_sigparent != 0) { 505 if (p->p_sigparent == SIGCHLD) 506 childproc_exited(p); 507 else /* LINUX thread */ 508 psignal(p->p_pptr, p->p_sigparent); 509 } 510 PROC_UNLOCK(p->p_pptr); 511 PROC_UNLOCK(p); 512 513 /* 514 * Finally, call machine-dependent code to release the remaining 515 * resources including address space. 516 * The address space is released by "vmspace_exitfree(p)" in 517 * vm_waitproc(). 518 */ 519 cpu_exit(td); 520 521 WITNESS_WARN(WARN_PANIC, &proctree_lock.sx_object, 522 "process (pid %d) exiting", p->p_pid); 523 524 PROC_LOCK(p); 525 PROC_LOCK(p->p_pptr); 526 sx_xunlock(&proctree_lock); 527 528 /* 529 * We have to wait until after acquiring all locks before 530 * changing p_state. We need to avoid all possible context 531 * switches (including ones from blocking on a mutex) while 532 * marked as a zombie. We also have to set the zombie state 533 * before we release the parent process' proc lock to avoid 534 * a lost wakeup. So, we first call wakeup, then we grab the 535 * sched lock, update the state, and release the parent process' 536 * proc lock. 537 */ 538 wakeup(p->p_pptr); 539 mtx_lock_spin(&sched_lock); 540 p->p_state = PRS_ZOMBIE; 541 PROC_UNLOCK(p->p_pptr); 542 543 ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); 544 545 /* Do the same timestamp bookkeeping that mi_switch() would do. */ 546 binuptime(&new_switchtime); 547 bintime_add(&p->p_rux.rux_runtime, &new_switchtime); 548 bintime_sub(&p->p_rux.rux_runtime, PCPU_PTR(switchtime)); 549 PCPU_SET(switchtime, new_switchtime); 550 PCPU_SET(switchticks, ticks); 551 cnt.v_swtch++; 552 553 sched_exit(p->p_pptr, td); 554 555 /* 556 * Hopefully no one will try to deliver a signal to the process this 557 * late in the game. 558 */ 559 knlist_destroy(&p->p_klist); 560 561 /* 562 * Make sure the scheduler takes this thread out of its tables etc. 563 * This will also release this thread's reference to the ucred. 564 * Other thread parts to release include pcb bits and such. 565 */ 566 thread_exit(); 567 } 568 569 570 #ifndef _SYS_SYSPROTO_H_ 571 struct abort2_args { 572 char *why; 573 int nargs; 574 void **args; 575 }; 576 #endif 577 578 /* 579 * MPSAFE. 580 */ 581 int 582 abort2(struct thread *td, struct abort2_args *uap) 583 { 584 struct proc *p = td->td_proc; 585 struct sbuf *sb; 586 void *uargs[16]; 587 int error, i, sig; 588 589 error = 0; /* satisfy compiler */ 590 591 /* 592 * Do it right now so we can log either proper call of abort2(), or 593 * note, that invalid argument was passed. 512 is big enough to 594 * handle 16 arguments' descriptions with additional comments. 595 */ 596 sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); 597 sbuf_clear(sb); 598 sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", 599 p->p_comm, p->p_pid, td->td_ucred->cr_uid); 600 /* 601 * Since we can't return from abort2(), send SIGKILL in cases, where 602 * abort2() was called improperly 603 */ 604 sig = SIGKILL; 605 /* Prevent from DoSes from user-space. */ 606 if (uap->nargs < 0 || uap->nargs > 16) 607 goto out; 608 if (uap->args == NULL) 609 goto out; 610 error = copyin(uap->args, uargs, uap->nargs * sizeof(void *)); 611 if (error != 0) 612 goto out; 613 /* 614 * Limit size of 'reason' string to 128. Will fit even when 615 * maximal number of arguments was chosen to be logged. 616 */ 617 if (uap->why != NULL) { 618 error = sbuf_copyin(sb, uap->why, 128); 619 if (error < 0) 620 goto out; 621 } else { 622 sbuf_printf(sb, "(null)"); 623 } 624 if (uap->nargs) { 625 sbuf_printf(sb, "("); 626 for (i = 0;i < uap->nargs; i++) 627 sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); 628 sbuf_printf(sb, ")"); 629 } 630 /* 631 * Final stage: arguments were proper, string has been 632 * successfully copied from userspace, and copying pointers 633 * from user-space succeed. 634 */ 635 sig = SIGABRT; 636 out: 637 if (sig == SIGKILL) { 638 sbuf_trim(sb); 639 sbuf_printf(sb, " (Reason text inaccessible)"); 640 } 641 sbuf_cat(sb, "\n"); 642 sbuf_finish(sb); 643 log(LOG_INFO, "%s", sbuf_data(sb)); 644 sbuf_delete(sb); 645 exit1(td, W_EXITCODE(0, sig)); 646 return (0); 647 } 648 649 650 #ifdef COMPAT_43 651 /* 652 * The dirty work is handled by kern_wait(). 653 * 654 * MPSAFE. 655 */ 656 int 657 owait(struct thread *td, struct owait_args *uap __unused) 658 { 659 int error, status; 660 661 error = kern_wait(td, WAIT_ANY, &status, 0, NULL); 662 if (error == 0) 663 td->td_retval[1] = status; 664 return (error); 665 } 666 #endif /* COMPAT_43 */ 667 668 /* 669 * The dirty work is handled by kern_wait(). 670 * 671 * MPSAFE. 672 */ 673 int 674 wait4(struct thread *td, struct wait_args *uap) 675 { 676 struct rusage ru, *rup; 677 int error, status; 678 679 if (uap->rusage != NULL) 680 rup = &ru; 681 else 682 rup = NULL; 683 error = kern_wait(td, uap->pid, &status, uap->options, rup); 684 if (uap->status != NULL && error == 0) 685 error = copyout(&status, uap->status, sizeof(status)); 686 if (uap->rusage != NULL && error == 0) 687 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 688 return (error); 689 } 690 691 int 692 kern_wait(struct thread *td, pid_t pid, int *status, int options, 693 struct rusage *rusage) 694 { 695 struct proc *p, *q, *t; 696 int error, nfound; 697 698 AUDIT_ARG(pid, pid); 699 700 q = td->td_proc; 701 if (pid == 0) { 702 PROC_LOCK(q); 703 pid = -q->p_pgid; 704 PROC_UNLOCK(q); 705 } 706 if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE)) 707 return (EINVAL); 708 loop: 709 if (q->p_flag & P_STATCHILD) { 710 PROC_LOCK(q); 711 q->p_flag &= ~P_STATCHILD; 712 PROC_UNLOCK(q); 713 } 714 nfound = 0; 715 sx_xlock(&proctree_lock); 716 LIST_FOREACH(p, &q->p_children, p_sibling) { 717 PROC_LOCK(p); 718 if (pid != WAIT_ANY && 719 p->p_pid != pid && p->p_pgid != -pid) { 720 PROC_UNLOCK(p); 721 continue; 722 } 723 if (p_canwait(td, p)) { 724 PROC_UNLOCK(p); 725 continue; 726 } 727 728 /* 729 * This special case handles a kthread spawned by linux_clone 730 * (see linux_misc.c). The linux_wait4 and linux_waitpid 731 * functions need to be able to distinguish between waiting 732 * on a process and waiting on a thread. It is a thread if 733 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option 734 * signifies we want to wait for threads and not processes. 735 */ 736 if ((p->p_sigparent != SIGCHLD) ^ 737 ((options & WLINUXCLONE) != 0)) { 738 PROC_UNLOCK(p); 739 continue; 740 } 741 742 nfound++; 743 if (p->p_state == PRS_ZOMBIE) { 744 745 /* 746 * It is possible that the last thread of this 747 * process is still running on another CPU 748 * in thread_exit() after having dropped the process 749 * lock via PROC_UNLOCK() but before it has completed 750 * cpu_throw(). In that case, the other thread must 751 * still hold sched_lock, so simply by acquiring 752 * sched_lock once we will wait long enough for the 753 * thread to exit in that case. 754 */ 755 mtx_lock_spin(&sched_lock); 756 mtx_unlock_spin(&sched_lock); 757 758 td->td_retval[0] = p->p_pid; 759 if (status) 760 *status = p->p_xstat; /* convert to int */ 761 if (rusage) { 762 *rusage = *p->p_ru; 763 calcru(p, &rusage->ru_utime, &rusage->ru_stime); 764 } 765 766 PROC_LOCK(q); 767 sigqueue_take(p->p_ksi); 768 PROC_UNLOCK(q); 769 770 /* 771 * If we got the child via a ptrace 'attach', 772 * we need to give it back to the old parent. 773 */ 774 PROC_UNLOCK(p); 775 if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { 776 PROC_LOCK(p); 777 p->p_oppid = 0; 778 proc_reparent(p, t); 779 PROC_UNLOCK(p); 780 tdsignal(t, NULL, SIGCHLD, p->p_ksi); 781 wakeup(t); 782 PROC_UNLOCK(t); 783 sx_xunlock(&proctree_lock); 784 return (0); 785 } 786 787 /* 788 * Remove other references to this process to ensure 789 * we have an exclusive reference. 790 */ 791 sx_xlock(&allproc_lock); 792 LIST_REMOVE(p, p_list); /* off zombproc */ 793 sx_xunlock(&allproc_lock); 794 LIST_REMOVE(p, p_sibling); 795 leavepgrp(p); 796 sx_xunlock(&proctree_lock); 797 798 /* 799 * As a side effect of this lock, we know that 800 * all other writes to this proc are visible now, so 801 * no more locking is needed for p. 802 */ 803 PROC_LOCK(p); 804 p->p_xstat = 0; /* XXX: why? */ 805 PROC_UNLOCK(p); 806 PROC_LOCK(q); 807 ruadd(&q->p_stats->p_cru, &q->p_crux, p->p_ru, 808 &p->p_rux); 809 PROC_UNLOCK(q); 810 FREE(p->p_ru, M_ZOMBIE); 811 p->p_ru = NULL; 812 813 /* 814 * Decrement the count of procs running with this uid. 815 */ 816 (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); 817 818 /* 819 * Free credentials, arguments, and sigacts. 820 */ 821 crfree(p->p_ucred); 822 p->p_ucred = NULL; 823 pargs_drop(p->p_args); 824 p->p_args = NULL; 825 sigacts_free(p->p_sigacts); 826 p->p_sigacts = NULL; 827 828 /* 829 * Do any thread-system specific cleanups. 830 */ 831 thread_wait(p); 832 833 /* 834 * Give vm and machine-dependent layer a chance 835 * to free anything that cpu_exit couldn't 836 * release while still running in process context. 837 */ 838 vm_waitproc(p); 839 #ifdef MAC 840 mac_destroy_proc(p); 841 #endif 842 #ifdef AUDIT 843 audit_proc_free(p); 844 #endif 845 KASSERT(FIRST_THREAD_IN_PROC(p), 846 ("kern_wait: no residual thread!")); 847 uma_zfree(proc_zone, p); 848 sx_xlock(&allproc_lock); 849 nprocs--; 850 sx_xunlock(&allproc_lock); 851 return (0); 852 } 853 mtx_lock_spin(&sched_lock); 854 if ((p->p_flag & P_STOPPED_SIG) && 855 (p->p_suspcount == p->p_numthreads) && 856 (p->p_flag & P_WAITED) == 0 && 857 (p->p_flag & P_TRACED || options & WUNTRACED)) { 858 mtx_unlock_spin(&sched_lock); 859 p->p_flag |= P_WAITED; 860 sx_xunlock(&proctree_lock); 861 td->td_retval[0] = p->p_pid; 862 if (status) 863 *status = W_STOPCODE(p->p_xstat); 864 PROC_UNLOCK(p); 865 866 PROC_LOCK(q); 867 sigqueue_take(p->p_ksi); 868 PROC_UNLOCK(q); 869 870 return (0); 871 } 872 mtx_unlock_spin(&sched_lock); 873 if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) { 874 sx_xunlock(&proctree_lock); 875 td->td_retval[0] = p->p_pid; 876 p->p_flag &= ~P_CONTINUED; 877 PROC_UNLOCK(p); 878 879 PROC_LOCK(q); 880 sigqueue_take(p->p_ksi); 881 PROC_UNLOCK(q); 882 883 if (status) 884 *status = SIGCONT; 885 return (0); 886 } 887 PROC_UNLOCK(p); 888 } 889 if (nfound == 0) { 890 sx_xunlock(&proctree_lock); 891 return (ECHILD); 892 } 893 if (options & WNOHANG) { 894 sx_xunlock(&proctree_lock); 895 td->td_retval[0] = 0; 896 return (0); 897 } 898 PROC_LOCK(q); 899 sx_xunlock(&proctree_lock); 900 if (q->p_flag & P_STATCHILD) { 901 q->p_flag &= ~P_STATCHILD; 902 error = 0; 903 } else 904 error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0); 905 PROC_UNLOCK(q); 906 if (error) 907 return (error); 908 goto loop; 909 } 910 911 /* 912 * Make process 'parent' the new parent of process 'child'. 913 * Must be called with an exclusive hold of proctree lock. 914 */ 915 void 916 proc_reparent(struct proc *child, struct proc *parent) 917 { 918 919 sx_assert(&proctree_lock, SX_XLOCKED); 920 PROC_LOCK_ASSERT(child, MA_OWNED); 921 if (child->p_pptr == parent) 922 return; 923 924 LIST_REMOVE(child, p_sibling); 925 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); 926 child->p_pptr = parent; 927 } 928