1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_kdtrace.h" 42 #include "opt_ktrace.h" 43 #include "opt_procdesc.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysproto.h> 48 #include <sys/capability.h> 49 #include <sys/eventhandler.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/lock.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/procdesc.h> 56 #include <sys/pioctl.h> 57 #include <sys/jail.h> 58 #include <sys/tty.h> 59 #include <sys/wait.h> 60 #include <sys/vmmeter.h> 61 #include <sys/vnode.h> 62 #include <sys/racct.h> 63 #include <sys/resourcevar.h> 64 #include <sys/sbuf.h> 65 #include <sys/signalvar.h> 66 #include <sys/sched.h> 67 #include <sys/sx.h> 68 #include <sys/syscallsubr.h> 69 #include <sys/syslog.h> 70 #include <sys/ptrace.h> 71 #include <sys/acct.h> /* for acct_process() function prototype */ 72 #include <sys/filedesc.h> 73 #include <sys/sdt.h> 74 #include <sys/shm.h> 75 #include <sys/sem.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <security/audit/audit.h> 81 #include <security/mac/mac_framework.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_extern.h> 85 #include <vm/vm_param.h> 86 #include <vm/pmap.h> 87 #include <vm/vm_map.h> 88 #include <vm/vm_page.h> 89 #include <vm/uma.h> 90 91 #ifdef KDTRACE_HOOKS 92 #include <sys/dtrace_bsd.h> 93 dtrace_execexit_func_t dtrace_fasttrap_exit; 94 #endif 95 96 SDT_PROVIDER_DECLARE(proc); 97 SDT_PROBE_DEFINE1(proc, kernel, , exit, exit, "int"); 98 99 /* Hook for NFS teardown procedure. */ 100 void (*nlminfo_release_p)(struct proc *p); 101 102 static void 103 clear_orphan(struct proc *p) 104 { 105 106 PROC_LOCK_ASSERT(p, MA_OWNED); 107 108 if (p->p_flag & P_ORPHAN) { 109 LIST_REMOVE(p, p_orphan); 110 p->p_flag &= ~P_ORPHAN; 111 } 112 } 113 114 /* 115 * exit -- death of process. 116 */ 117 void 118 sys_sys_exit(struct thread *td, struct sys_exit_args *uap) 119 { 120 121 exit1(td, W_EXITCODE(uap->rval, 0)); 122 /* NOTREACHED */ 123 } 124 125 /* 126 * Exit: deallocate address space and other resources, change proc state to 127 * zombie, and unlink proc from allproc and parent's lists. Save exit status 128 * and rusage for wait(). Check for child processes and orphan them. 129 */ 130 void 131 exit1(struct thread *td, int rv) 132 { 133 struct proc *p, *nq, *q; 134 struct vnode *vtmp; 135 struct vnode *ttyvp = NULL; 136 struct plimit *plim; 137 138 mtx_assert(&Giant, MA_NOTOWNED); 139 140 p = td->td_proc; 141 /* 142 * XXX in case we're rebooting we just let init die in order to 143 * work around an unsolved stack overflow seen very late during 144 * shutdown on sparc64 when the gmirror worker process exists. 145 */ 146 if (p == initproc && rebooting == 0) { 147 printf("init died (signal %d, exit %d)\n", 148 WTERMSIG(rv), WEXITSTATUS(rv)); 149 panic("Going nowhere without my init!"); 150 } 151 152 /* 153 * MUST abort all other threads before proceeding past here. 154 */ 155 PROC_LOCK(p); 156 while (p->p_flag & P_HADTHREADS) { 157 /* 158 * First check if some other thread got here before us. 159 * If so, act appropriately: exit or suspend. 160 */ 161 thread_suspend_check(0); 162 163 /* 164 * Kill off the other threads. This requires 165 * some co-operation from other parts of the kernel 166 * so it may not be instantaneous. With this state set 167 * any thread entering the kernel from userspace will 168 * thread_exit() in trap(). Any thread attempting to 169 * sleep will return immediately with EINTR or EWOULDBLOCK 170 * which will hopefully force them to back out to userland 171 * freeing resources as they go. Any thread attempting 172 * to return to userland will thread_exit() from userret(). 173 * thread_exit() will unsuspend us when the last of the 174 * other threads exits. 175 * If there is already a thread singler after resumption, 176 * calling thread_single will fail; in that case, we just 177 * re-check all suspension request, the thread should 178 * either be suspended there or exit. 179 */ 180 if (!thread_single(SINGLE_EXIT)) 181 break; 182 183 /* 184 * All other activity in this process is now stopped. 185 * Threading support has been turned off. 186 */ 187 } 188 KASSERT(p->p_numthreads == 1, 189 ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); 190 racct_sub(p, RACCT_NTHR, 1); 191 /* 192 * Wakeup anyone in procfs' PIOCWAIT. They should have a hold 193 * on our vmspace, so we should block below until they have 194 * released their reference to us. Note that if they have 195 * requested S_EXIT stops we will block here until they ack 196 * via PIOCCONT. 197 */ 198 _STOPEVENT(p, S_EXIT, rv); 199 200 /* 201 * Ignore any pending request to stop due to a stop signal. 202 * Once P_WEXIT is set, future requests will be ignored as 203 * well. 204 */ 205 p->p_flag &= ~P_STOPPED_SIG; 206 KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); 207 208 /* 209 * Note that we are exiting and do another wakeup of anyone in 210 * PIOCWAIT in case they aren't listening for S_EXIT stops or 211 * decided to wait again after we told them we are exiting. 212 */ 213 p->p_flag |= P_WEXIT; 214 wakeup(&p->p_stype); 215 216 /* 217 * Wait for any processes that have a hold on our vmspace to 218 * release their reference. 219 */ 220 while (p->p_lock > 0) 221 msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); 222 223 p->p_xstat = rv; /* Let event handler change exit status */ 224 PROC_UNLOCK(p); 225 /* Drain the limit callout while we don't have the proc locked */ 226 callout_drain(&p->p_limco); 227 228 #ifdef AUDIT 229 /* 230 * The Sun BSM exit token contains two components: an exit status as 231 * passed to exit(), and a return value to indicate what sort of exit 232 * it was. The exit status is WEXITSTATUS(rv), but it's not clear 233 * what the return value is. 234 */ 235 AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0); 236 AUDIT_SYSCALL_EXIT(0, td); 237 #endif 238 239 /* Are we a task leader? */ 240 if (p == p->p_leader) { 241 mtx_lock(&ppeers_lock); 242 q = p->p_peers; 243 while (q != NULL) { 244 PROC_LOCK(q); 245 kern_psignal(q, SIGKILL); 246 PROC_UNLOCK(q); 247 q = q->p_peers; 248 } 249 while (p->p_peers != NULL) 250 msleep(p, &ppeers_lock, PWAIT, "exit1", 0); 251 mtx_unlock(&ppeers_lock); 252 } 253 254 /* 255 * Check if any loadable modules need anything done at process exit. 256 * E.g. SYSV IPC stuff 257 * XXX what if one of these generates an error? 258 */ 259 EVENTHANDLER_INVOKE(process_exit, p); 260 261 /* 262 * If parent is waiting for us to exit or exec, 263 * P_PPWAIT is set; we will wakeup the parent below. 264 */ 265 PROC_LOCK(p); 266 rv = p->p_xstat; /* Event handler could change exit status */ 267 stopprofclock(p); 268 p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); 269 270 /* 271 * Stop the real interval timer. If the handler is currently 272 * executing, prevent it from rearming itself and let it finish. 273 */ 274 if (timevalisset(&p->p_realtimer.it_value) && 275 callout_stop(&p->p_itcallout) == 0) { 276 timevalclear(&p->p_realtimer.it_interval); 277 msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); 278 KASSERT(!timevalisset(&p->p_realtimer.it_value), 279 ("realtime timer is still armed")); 280 } 281 PROC_UNLOCK(p); 282 283 /* 284 * Reset any sigio structures pointing to us as a result of 285 * F_SETOWN with our pid. 286 */ 287 funsetownlst(&p->p_sigiolst); 288 289 /* 290 * If this process has an nlminfo data area (for lockd), release it 291 */ 292 if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) 293 (*nlminfo_release_p)(p); 294 295 /* 296 * Close open files and release open-file table. 297 * This may block! 298 */ 299 fdescfree(td); 300 301 /* 302 * If this thread tickled GEOM, we need to wait for the giggling to 303 * stop before we return to userland 304 */ 305 if (td->td_pflags & TDP_GEOM) 306 g_waitidle(); 307 308 /* 309 * Remove ourself from our leader's peer list and wake our leader. 310 */ 311 mtx_lock(&ppeers_lock); 312 if (p->p_leader->p_peers) { 313 q = p->p_leader; 314 while (q->p_peers != p) 315 q = q->p_peers; 316 q->p_peers = p->p_peers; 317 wakeup(p->p_leader); 318 } 319 mtx_unlock(&ppeers_lock); 320 321 vmspace_exit(td); 322 323 sx_xlock(&proctree_lock); 324 if (SESS_LEADER(p)) { 325 struct session *sp = p->p_session; 326 struct tty *tp; 327 328 /* 329 * s_ttyp is not zero'd; we use this to indicate that 330 * the session once had a controlling terminal. (for 331 * logging and informational purposes) 332 */ 333 SESS_LOCK(sp); 334 ttyvp = sp->s_ttyvp; 335 tp = sp->s_ttyp; 336 sp->s_ttyvp = NULL; 337 sp->s_ttydp = NULL; 338 sp->s_leader = NULL; 339 SESS_UNLOCK(sp); 340 341 /* 342 * Signal foreground pgrp and revoke access to 343 * controlling terminal if it has not been revoked 344 * already. 345 * 346 * Because the TTY may have been revoked in the mean 347 * time and could already have a new session associated 348 * with it, make sure we don't send a SIGHUP to a 349 * foreground process group that does not belong to this 350 * session. 351 */ 352 353 if (tp != NULL) { 354 tty_lock(tp); 355 if (tp->t_session == sp) 356 tty_signal_pgrp(tp, SIGHUP); 357 tty_unlock(tp); 358 } 359 360 if (ttyvp != NULL) { 361 sx_xunlock(&proctree_lock); 362 if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { 363 VOP_REVOKE(ttyvp, REVOKEALL); 364 VOP_UNLOCK(ttyvp, 0); 365 } 366 sx_xlock(&proctree_lock); 367 } 368 } 369 fixjobc(p, p->p_pgrp, 0); 370 sx_xunlock(&proctree_lock); 371 (void)acct_process(td); 372 373 /* Release the TTY now we've unlocked everything. */ 374 if (ttyvp != NULL) 375 vrele(ttyvp); 376 #ifdef KTRACE 377 ktrprocexit(td); 378 #endif 379 /* 380 * Release reference to text vnode 381 */ 382 if ((vtmp = p->p_textvp) != NULL) { 383 p->p_textvp = NULL; 384 vrele(vtmp); 385 } 386 387 /* 388 * Release our limits structure. 389 */ 390 PROC_LOCK(p); 391 plim = p->p_limit; 392 p->p_limit = NULL; 393 PROC_UNLOCK(p); 394 lim_free(plim); 395 396 tidhash_remove(td); 397 398 /* 399 * Remove proc from allproc queue and pidhash chain. 400 * Place onto zombproc. Unlink from parent's child list. 401 */ 402 sx_xlock(&allproc_lock); 403 LIST_REMOVE(p, p_list); 404 LIST_INSERT_HEAD(&zombproc, p, p_list); 405 LIST_REMOVE(p, p_hash); 406 sx_xunlock(&allproc_lock); 407 408 /* 409 * Call machine-dependent code to release any 410 * machine-dependent resources other than the address space. 411 * The address space is released by "vmspace_exitfree(p)" in 412 * vm_waitproc(). 413 */ 414 cpu_exit(td); 415 416 WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); 417 418 /* 419 * Reparent all of our children to init. 420 */ 421 sx_xlock(&proctree_lock); 422 q = LIST_FIRST(&p->p_children); 423 if (q != NULL) /* only need this if any child is S_ZOMB */ 424 wakeup(initproc); 425 for (; q != NULL; q = nq) { 426 nq = LIST_NEXT(q, p_sibling); 427 PROC_LOCK(q); 428 proc_reparent(q, initproc); 429 q->p_sigparent = SIGCHLD; 430 /* 431 * Traced processes are killed 432 * since their existence means someone is screwing up. 433 */ 434 if (q->p_flag & P_TRACED) { 435 struct thread *temp; 436 437 /* 438 * Since q was found on our children list, the 439 * proc_reparent() call moved q to the orphan 440 * list due to present P_TRACED flag. Clear 441 * orphan link for q now while q is locked. 442 */ 443 clear_orphan(q); 444 q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); 445 FOREACH_THREAD_IN_PROC(q, temp) 446 temp->td_dbgflags &= ~TDB_SUSPEND; 447 kern_psignal(q, SIGKILL); 448 } 449 PROC_UNLOCK(q); 450 } 451 452 /* 453 * Also get rid of our orphans. 454 */ 455 while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { 456 PROC_LOCK(q); 457 clear_orphan(q); 458 PROC_UNLOCK(q); 459 } 460 461 /* Save exit status. */ 462 PROC_LOCK(p); 463 p->p_xthread = td; 464 465 /* Tell the prison that we are gone. */ 466 prison_proc_free(p->p_ucred->cr_prison); 467 468 #ifdef KDTRACE_HOOKS 469 /* 470 * Tell the DTrace fasttrap provider about the exit if it 471 * has declared an interest. 472 */ 473 if (dtrace_fasttrap_exit) 474 dtrace_fasttrap_exit(p); 475 #endif 476 477 /* 478 * Notify interested parties of our demise. 479 */ 480 KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); 481 482 #ifdef KDTRACE_HOOKS 483 int reason = CLD_EXITED; 484 if (WCOREDUMP(rv)) 485 reason = CLD_DUMPED; 486 else if (WIFSIGNALED(rv)) 487 reason = CLD_KILLED; 488 SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0); 489 #endif 490 491 /* 492 * Just delete all entries in the p_klist. At this point we won't 493 * report any more events, and there are nasty race conditions that 494 * can beat us if we don't. 495 */ 496 knlist_clear(&p->p_klist, 1); 497 498 /* 499 * If this is a process with a descriptor, we may not need to deliver 500 * a signal to the parent. proctree_lock is held over 501 * procdesc_exit() to serialize concurrent calls to close() and 502 * exit(). 503 */ 504 #ifdef PROCDESC 505 if (p->p_procdesc == NULL || procdesc_exit(p)) { 506 #endif 507 /* 508 * Notify parent that we're gone. If parent has the 509 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, 510 * notify process 1 instead (and hope it will handle this 511 * situation). 512 */ 513 PROC_LOCK(p->p_pptr); 514 mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); 515 if (p->p_pptr->p_sigacts->ps_flag & 516 (PS_NOCLDWAIT | PS_CLDSIGIGN)) { 517 struct proc *pp; 518 519 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 520 pp = p->p_pptr; 521 PROC_UNLOCK(pp); 522 proc_reparent(p, initproc); 523 p->p_sigparent = SIGCHLD; 524 PROC_LOCK(p->p_pptr); 525 526 /* 527 * Notify parent, so in case he was wait(2)ing or 528 * executing waitpid(2) with our pid, he will 529 * continue. 530 */ 531 wakeup(pp); 532 } else 533 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 534 535 if (p->p_pptr == initproc) 536 kern_psignal(p->p_pptr, SIGCHLD); 537 else if (p->p_sigparent != 0) { 538 if (p->p_sigparent == SIGCHLD) 539 childproc_exited(p); 540 else /* LINUX thread */ 541 kern_psignal(p->p_pptr, p->p_sigparent); 542 } 543 #ifdef PROCDESC 544 } else 545 PROC_LOCK(p->p_pptr); 546 #endif 547 sx_xunlock(&proctree_lock); 548 549 /* 550 * The state PRS_ZOMBIE prevents other proesses from sending 551 * signal to the process, to avoid memory leak, we free memory 552 * for signal queue at the time when the state is set. 553 */ 554 sigqueue_flush(&p->p_sigqueue); 555 sigqueue_flush(&td->td_sigqueue); 556 557 /* 558 * We have to wait until after acquiring all locks before 559 * changing p_state. We need to avoid all possible context 560 * switches (including ones from blocking on a mutex) while 561 * marked as a zombie. We also have to set the zombie state 562 * before we release the parent process' proc lock to avoid 563 * a lost wakeup. So, we first call wakeup, then we grab the 564 * sched lock, update the state, and release the parent process' 565 * proc lock. 566 */ 567 wakeup(p->p_pptr); 568 cv_broadcast(&p->p_pwait); 569 sched_exit(p->p_pptr, td); 570 PROC_SLOCK(p); 571 p->p_state = PRS_ZOMBIE; 572 PROC_UNLOCK(p->p_pptr); 573 574 /* 575 * Hopefully no one will try to deliver a signal to the process this 576 * late in the game. 577 */ 578 knlist_destroy(&p->p_klist); 579 580 /* 581 * Save our children's rusage information in our exit rusage. 582 */ 583 ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); 584 585 /* 586 * Make sure the scheduler takes this thread out of its tables etc. 587 * This will also release this thread's reference to the ucred. 588 * Other thread parts to release include pcb bits and such. 589 */ 590 thread_exit(); 591 } 592 593 594 #ifndef _SYS_SYSPROTO_H_ 595 struct abort2_args { 596 char *why; 597 int nargs; 598 void **args; 599 }; 600 #endif 601 602 int 603 sys_abort2(struct thread *td, struct abort2_args *uap) 604 { 605 struct proc *p = td->td_proc; 606 struct sbuf *sb; 607 void *uargs[16]; 608 int error, i, sig; 609 610 /* 611 * Do it right now so we can log either proper call of abort2(), or 612 * note, that invalid argument was passed. 512 is big enough to 613 * handle 16 arguments' descriptions with additional comments. 614 */ 615 sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); 616 sbuf_clear(sb); 617 sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", 618 p->p_comm, p->p_pid, td->td_ucred->cr_uid); 619 /* 620 * Since we can't return from abort2(), send SIGKILL in cases, where 621 * abort2() was called improperly 622 */ 623 sig = SIGKILL; 624 /* Prevent from DoSes from user-space. */ 625 if (uap->nargs < 0 || uap->nargs > 16) 626 goto out; 627 if (uap->nargs > 0) { 628 if (uap->args == NULL) 629 goto out; 630 error = copyin(uap->args, uargs, uap->nargs * sizeof(void *)); 631 if (error != 0) 632 goto out; 633 } 634 /* 635 * Limit size of 'reason' string to 128. Will fit even when 636 * maximal number of arguments was chosen to be logged. 637 */ 638 if (uap->why != NULL) { 639 error = sbuf_copyin(sb, uap->why, 128); 640 if (error < 0) 641 goto out; 642 } else { 643 sbuf_printf(sb, "(null)"); 644 } 645 if (uap->nargs > 0) { 646 sbuf_printf(sb, "("); 647 for (i = 0;i < uap->nargs; i++) 648 sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); 649 sbuf_printf(sb, ")"); 650 } 651 /* 652 * Final stage: arguments were proper, string has been 653 * successfully copied from userspace, and copying pointers 654 * from user-space succeed. 655 */ 656 sig = SIGABRT; 657 out: 658 if (sig == SIGKILL) { 659 sbuf_trim(sb); 660 sbuf_printf(sb, " (Reason text inaccessible)"); 661 } 662 sbuf_cat(sb, "\n"); 663 sbuf_finish(sb); 664 log(LOG_INFO, "%s", sbuf_data(sb)); 665 sbuf_delete(sb); 666 exit1(td, W_EXITCODE(0, sig)); 667 return (0); 668 } 669 670 671 #ifdef COMPAT_43 672 /* 673 * The dirty work is handled by kern_wait(). 674 */ 675 int 676 owait(struct thread *td, struct owait_args *uap __unused) 677 { 678 int error, status; 679 680 error = kern_wait(td, WAIT_ANY, &status, 0, NULL); 681 if (error == 0) 682 td->td_retval[1] = status; 683 return (error); 684 } 685 #endif /* COMPAT_43 */ 686 687 /* 688 * The dirty work is handled by kern_wait(). 689 */ 690 int 691 sys_wait4(struct thread *td, struct wait4_args *uap) 692 { 693 struct rusage ru, *rup; 694 int error, status; 695 696 if (uap->rusage != NULL) 697 rup = &ru; 698 else 699 rup = NULL; 700 error = kern_wait(td, uap->pid, &status, uap->options, rup); 701 if (uap->status != NULL && error == 0) 702 error = copyout(&status, uap->status, sizeof(status)); 703 if (uap->rusage != NULL && error == 0) 704 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 705 return (error); 706 } 707 708 int 709 sys_wait6(struct thread *td, struct wait6_args *uap) 710 { 711 struct __wrusage wru, *wrup; 712 siginfo_t si, *sip; 713 idtype_t idtype; 714 id_t id; 715 int error, status; 716 717 idtype = uap->idtype; 718 id = uap->id; 719 720 if (uap->wrusage != NULL) 721 wrup = &wru; 722 else 723 wrup = NULL; 724 725 if (uap->info != NULL) { 726 sip = &si; 727 bzero(sip, sizeof(*sip)); 728 } else 729 sip = NULL; 730 731 /* 732 * We expect all callers of wait6() to know about WEXITED and 733 * WTRAPPED. 734 */ 735 error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip); 736 737 if (uap->status != NULL && error == 0) 738 error = copyout(&status, uap->status, sizeof(status)); 739 if (uap->wrusage != NULL && error == 0) 740 error = copyout(&wru, uap->wrusage, sizeof(wru)); 741 if (uap->info != NULL && error == 0) 742 error = copyout(&si, uap->info, sizeof(si)); 743 return (error); 744 } 745 746 /* 747 * Reap the remains of a zombie process and optionally return status and 748 * rusage. Asserts and will release both the proctree_lock and the process 749 * lock as part of its work. 750 */ 751 void 752 proc_reap(struct thread *td, struct proc *p, int *status, int options) 753 { 754 struct proc *q, *t; 755 756 sx_assert(&proctree_lock, SA_XLOCKED); 757 PROC_LOCK_ASSERT(p, MA_OWNED); 758 PROC_SLOCK_ASSERT(p, MA_OWNED); 759 KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE")); 760 761 q = td->td_proc; 762 763 PROC_SUNLOCK(p); 764 td->td_retval[0] = p->p_pid; 765 if (status) 766 *status = p->p_xstat; /* convert to int */ 767 if (options & WNOWAIT) { 768 /* 769 * Only poll, returning the status. Caller does not wish to 770 * release the proc struct just yet. 771 */ 772 PROC_UNLOCK(p); 773 sx_xunlock(&proctree_lock); 774 return; 775 } 776 777 PROC_LOCK(q); 778 sigqueue_take(p->p_ksi); 779 PROC_UNLOCK(q); 780 PROC_UNLOCK(p); 781 782 /* 783 * If we got the child via a ptrace 'attach', we need to give it back 784 * to the old parent. 785 */ 786 if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { 787 PROC_LOCK(p); 788 proc_reparent(p, t); 789 p->p_oppid = 0; 790 PROC_UNLOCK(p); 791 pksignal(t, SIGCHLD, p->p_ksi); 792 wakeup(t); 793 cv_broadcast(&p->p_pwait); 794 PROC_UNLOCK(t); 795 sx_xunlock(&proctree_lock); 796 return; 797 } 798 799 /* 800 * Remove other references to this process to ensure we have an 801 * exclusive reference. 802 */ 803 sx_xlock(&allproc_lock); 804 LIST_REMOVE(p, p_list); /* off zombproc */ 805 sx_xunlock(&allproc_lock); 806 LIST_REMOVE(p, p_sibling); 807 PROC_LOCK(p); 808 clear_orphan(p); 809 PROC_UNLOCK(p); 810 leavepgrp(p); 811 #ifdef PROCDESC 812 if (p->p_procdesc != NULL) 813 procdesc_reap(p); 814 #endif 815 sx_xunlock(&proctree_lock); 816 817 /* 818 * As a side effect of this lock, we know that all other writes to 819 * this proc are visible now, so no more locking is needed for p. 820 */ 821 PROC_LOCK(p); 822 p->p_xstat = 0; /* XXX: why? */ 823 PROC_UNLOCK(p); 824 PROC_LOCK(q); 825 ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux); 826 PROC_UNLOCK(q); 827 828 /* 829 * Decrement the count of procs running with this uid. 830 */ 831 (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); 832 833 /* 834 * Destroy resource accounting information associated with the process. 835 */ 836 #ifdef RACCT 837 PROC_LOCK(p); 838 racct_sub(p, RACCT_NPROC, 1); 839 PROC_UNLOCK(p); 840 #endif 841 racct_proc_exit(p); 842 843 /* 844 * Free credentials, arguments, and sigacts. 845 */ 846 crfree(p->p_ucred); 847 p->p_ucred = NULL; 848 pargs_drop(p->p_args); 849 p->p_args = NULL; 850 sigacts_free(p->p_sigacts); 851 p->p_sigacts = NULL; 852 853 /* 854 * Do any thread-system specific cleanups. 855 */ 856 thread_wait(p); 857 858 /* 859 * Give vm and machine-dependent layer a chance to free anything that 860 * cpu_exit couldn't release while still running in process context. 861 */ 862 vm_waitproc(p); 863 #ifdef MAC 864 mac_proc_destroy(p); 865 #endif 866 KASSERT(FIRST_THREAD_IN_PROC(p), 867 ("proc_reap: no residual thread!")); 868 uma_zfree(proc_zone, p); 869 sx_xlock(&allproc_lock); 870 nprocs--; 871 sx_xunlock(&allproc_lock); 872 } 873 874 static int 875 proc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id, 876 int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo) 877 { 878 struct proc *q; 879 struct rusage *rup; 880 881 sx_assert(&proctree_lock, SA_XLOCKED); 882 883 q = td->td_proc; 884 PROC_LOCK(p); 885 886 switch (idtype) { 887 case P_ALL: 888 break; 889 case P_PID: 890 if (p->p_pid != (pid_t)id) { 891 PROC_UNLOCK(p); 892 return (0); 893 } 894 break; 895 case P_PGID: 896 if (p->p_pgid != (pid_t)id) { 897 PROC_UNLOCK(p); 898 return (0); 899 } 900 break; 901 case P_SID: 902 if (p->p_session->s_sid != (pid_t)id) { 903 PROC_UNLOCK(p); 904 return (0); 905 } 906 break; 907 case P_UID: 908 if (p->p_ucred->cr_uid != (uid_t)id) { 909 PROC_UNLOCK(p); 910 return (0); 911 } 912 break; 913 case P_GID: 914 if (p->p_ucred->cr_gid != (gid_t)id) { 915 PROC_UNLOCK(p); 916 return (0); 917 } 918 break; 919 case P_JAILID: 920 if (p->p_ucred->cr_prison->pr_id != (int)id) { 921 PROC_UNLOCK(p); 922 return (0); 923 } 924 break; 925 /* 926 * It seems that the thread structures get zeroed out 927 * at process exit. This makes it impossible to 928 * support P_SETID, P_CID or P_CPUID. 929 */ 930 default: 931 PROC_UNLOCK(p); 932 return (0); 933 } 934 935 if (p_canwait(td, p)) { 936 PROC_UNLOCK(p); 937 return (0); 938 } 939 940 if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) { 941 PROC_UNLOCK(p); 942 return (0); 943 } 944 945 /* 946 * This special case handles a kthread spawned by linux_clone 947 * (see linux_misc.c). The linux_wait4 and linux_waitpid 948 * functions need to be able to distinguish between waiting 949 * on a process and waiting on a thread. It is a thread if 950 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option 951 * signifies we want to wait for threads and not processes. 952 */ 953 if ((p->p_sigparent != SIGCHLD) ^ 954 ((options & WLINUXCLONE) != 0)) { 955 PROC_UNLOCK(p); 956 return (0); 957 } 958 959 PROC_SLOCK(p); 960 961 if (siginfo != NULL) { 962 bzero(siginfo, sizeof(*siginfo)); 963 siginfo->si_errno = 0; 964 965 /* 966 * SUSv4 requires that the si_signo value is always 967 * SIGCHLD. Obey it despite the rfork(2) interface 968 * allows to request other signal for child exit 969 * notification. 970 */ 971 siginfo->si_signo = SIGCHLD; 972 973 /* 974 * This is still a rough estimate. We will fix the 975 * cases TRAPPED, STOPPED, and CONTINUED later. 976 */ 977 if (WCOREDUMP(p->p_xstat)) 978 siginfo->si_code = CLD_DUMPED; 979 else if (WIFSIGNALED(p->p_xstat)) 980 siginfo->si_code = CLD_KILLED; 981 else 982 siginfo->si_code = CLD_EXITED; 983 984 siginfo->si_pid = p->p_pid; 985 siginfo->si_uid = p->p_ucred->cr_uid; 986 siginfo->si_status = p->p_xstat; 987 988 /* 989 * The si_addr field would be useful additional 990 * detail, but apparently the PC value may be lost 991 * when we reach this point. bzero() above sets 992 * siginfo->si_addr to NULL. 993 */ 994 } 995 996 /* 997 * There should be no reason to limit resources usage info to 998 * exited processes only. A snapshot about any resources used 999 * by a stopped process may be exactly what is needed. 1000 */ 1001 if (wrusage != NULL) { 1002 rup = &wrusage->wru_self; 1003 *rup = p->p_ru; 1004 calcru(p, &rup->ru_utime, &rup->ru_stime); 1005 1006 rup = &wrusage->wru_children; 1007 *rup = p->p_stats->p_cru; 1008 calccru(p, &rup->ru_utime, &rup->ru_stime); 1009 } 1010 1011 if (p->p_state == PRS_ZOMBIE) { 1012 proc_reap(td, p, status, options); 1013 return (-1); 1014 } 1015 PROC_SUNLOCK(p); 1016 PROC_UNLOCK(p); 1017 return (1); 1018 } 1019 1020 int 1021 kern_wait(struct thread *td, pid_t pid, int *status, int options, 1022 struct rusage *rusage) 1023 { 1024 struct __wrusage wru, *wrup; 1025 idtype_t idtype; 1026 id_t id; 1027 int ret; 1028 1029 /* 1030 * Translate the special pid values into the (idtype, pid) 1031 * pair for kern_wait6. The WAIT_MYPGRP case is handled by 1032 * kern_wait6() on its own. 1033 */ 1034 if (pid == WAIT_ANY) { 1035 idtype = P_ALL; 1036 id = 0; 1037 } else if (pid < 0) { 1038 idtype = P_PGID; 1039 id = (id_t)-pid; 1040 } else { 1041 idtype = P_PID; 1042 id = (id_t)pid; 1043 } 1044 1045 if (rusage != NULL) 1046 wrup = &wru; 1047 else 1048 wrup = NULL; 1049 1050 /* 1051 * For backward compatibility we implicitly add flags WEXITED 1052 * and WTRAPPED here. 1053 */ 1054 options |= WEXITED | WTRAPPED; 1055 ret = kern_wait6(td, idtype, id, status, options, wrup, NULL); 1056 if (rusage != NULL) 1057 *rusage = wru.wru_self; 1058 return (ret); 1059 } 1060 1061 int 1062 kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status, 1063 int options, struct __wrusage *wrusage, siginfo_t *siginfo) 1064 { 1065 struct proc *p, *q; 1066 int error, nfound, ret; 1067 1068 AUDIT_ARG_VALUE((int)idtype); /* XXX - This is likely wrong! */ 1069 AUDIT_ARG_PID((pid_t)id); /* XXX - This may be wrong! */ 1070 AUDIT_ARG_VALUE(options); 1071 1072 q = td->td_proc; 1073 1074 if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) { 1075 PROC_LOCK(q); 1076 id = (id_t)q->p_pgid; 1077 PROC_UNLOCK(q); 1078 idtype = P_PGID; 1079 } 1080 1081 /* If we don't know the option, just return. */ 1082 if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT | 1083 WEXITED | WTRAPPED | WLINUXCLONE)) != 0) 1084 return (EINVAL); 1085 if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) { 1086 /* 1087 * We will be unable to find any matching processes, 1088 * because there are no known events to look for. 1089 * Prefer to return error instead of blocking 1090 * indefinitely. 1091 */ 1092 return (EINVAL); 1093 } 1094 1095 loop: 1096 if (q->p_flag & P_STATCHILD) { 1097 PROC_LOCK(q); 1098 q->p_flag &= ~P_STATCHILD; 1099 PROC_UNLOCK(q); 1100 } 1101 nfound = 0; 1102 sx_xlock(&proctree_lock); 1103 LIST_FOREACH(p, &q->p_children, p_sibling) { 1104 ret = proc_to_reap(td, p, idtype, id, status, options, 1105 wrusage, siginfo); 1106 if (ret == 0) 1107 continue; 1108 else if (ret == 1) 1109 nfound++; 1110 else 1111 return (0); 1112 1113 PROC_LOCK(p); 1114 PROC_SLOCK(p); 1115 1116 if ((options & WTRAPPED) != 0 && 1117 (p->p_flag & P_TRACED) != 0 && 1118 (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) != 0 && 1119 (p->p_suspcount == p->p_numthreads) && 1120 ((p->p_flag & P_WAITED) == 0)) { 1121 PROC_SUNLOCK(p); 1122 if ((options & WNOWAIT) == 0) 1123 p->p_flag |= P_WAITED; 1124 sx_xunlock(&proctree_lock); 1125 td->td_retval[0] = p->p_pid; 1126 1127 if (status != NULL) 1128 *status = W_STOPCODE(p->p_xstat); 1129 if (siginfo != NULL) { 1130 siginfo->si_status = p->p_xstat; 1131 siginfo->si_code = CLD_TRAPPED; 1132 } 1133 if ((options & WNOWAIT) == 0) { 1134 PROC_LOCK(q); 1135 sigqueue_take(p->p_ksi); 1136 PROC_UNLOCK(q); 1137 } 1138 1139 PROC_UNLOCK(p); 1140 return (0); 1141 } 1142 if ((options & WUNTRACED) != 0 && 1143 (p->p_flag & P_STOPPED_SIG) != 0 && 1144 (p->p_suspcount == p->p_numthreads) && 1145 ((p->p_flag & P_WAITED) == 0)) { 1146 PROC_SUNLOCK(p); 1147 if ((options & WNOWAIT) == 0) 1148 p->p_flag |= P_WAITED; 1149 sx_xunlock(&proctree_lock); 1150 td->td_retval[0] = p->p_pid; 1151 1152 if (status != NULL) 1153 *status = W_STOPCODE(p->p_xstat); 1154 if (siginfo != NULL) { 1155 siginfo->si_status = p->p_xstat; 1156 siginfo->si_code = CLD_STOPPED; 1157 } 1158 if ((options & WNOWAIT) == 0) { 1159 PROC_LOCK(q); 1160 sigqueue_take(p->p_ksi); 1161 PROC_UNLOCK(q); 1162 } 1163 1164 PROC_UNLOCK(p); 1165 return (0); 1166 } 1167 PROC_SUNLOCK(p); 1168 if ((options & WCONTINUED) != 0 && 1169 (p->p_flag & P_CONTINUED) != 0) { 1170 sx_xunlock(&proctree_lock); 1171 td->td_retval[0] = p->p_pid; 1172 if ((options & WNOWAIT) == 0) { 1173 p->p_flag &= ~P_CONTINUED; 1174 PROC_LOCK(q); 1175 sigqueue_take(p->p_ksi); 1176 PROC_UNLOCK(q); 1177 } 1178 PROC_UNLOCK(p); 1179 1180 if (status != NULL) 1181 *status = SIGCONT; 1182 if (siginfo != NULL) { 1183 siginfo->si_status = SIGCONT; 1184 siginfo->si_code = CLD_CONTINUED; 1185 } 1186 return (0); 1187 } 1188 PROC_UNLOCK(p); 1189 } 1190 1191 /* 1192 * Look in the orphans list too, to allow the parent to 1193 * collect it's child exit status even if child is being 1194 * debugged. 1195 * 1196 * Debugger detaches from the parent upon successful 1197 * switch-over from parent to child. At this point due to 1198 * re-parenting the parent loses the child to debugger and a 1199 * wait4(2) call would report that it has no children to wait 1200 * for. By maintaining a list of orphans we allow the parent 1201 * to successfully wait until the child becomes a zombie. 1202 */ 1203 LIST_FOREACH(p, &q->p_orphans, p_orphan) { 1204 ret = proc_to_reap(td, p, idtype, id, status, options, 1205 wrusage, siginfo); 1206 if (ret == 0) 1207 continue; 1208 else if (ret == 1) 1209 nfound++; 1210 else 1211 return (0); 1212 } 1213 if (nfound == 0) { 1214 sx_xunlock(&proctree_lock); 1215 return (ECHILD); 1216 } 1217 if (options & WNOHANG) { 1218 sx_xunlock(&proctree_lock); 1219 td->td_retval[0] = 0; 1220 return (0); 1221 } 1222 PROC_LOCK(q); 1223 sx_xunlock(&proctree_lock); 1224 if (q->p_flag & P_STATCHILD) { 1225 q->p_flag &= ~P_STATCHILD; 1226 error = 0; 1227 } else 1228 error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0); 1229 PROC_UNLOCK(q); 1230 if (error) 1231 return (error); 1232 goto loop; 1233 } 1234 1235 /* 1236 * Make process 'parent' the new parent of process 'child'. 1237 * Must be called with an exclusive hold of proctree lock. 1238 */ 1239 void 1240 proc_reparent(struct proc *child, struct proc *parent) 1241 { 1242 1243 sx_assert(&proctree_lock, SX_XLOCKED); 1244 PROC_LOCK_ASSERT(child, MA_OWNED); 1245 if (child->p_pptr == parent) 1246 return; 1247 1248 PROC_LOCK(child->p_pptr); 1249 sigqueue_take(child->p_ksi); 1250 PROC_UNLOCK(child->p_pptr); 1251 LIST_REMOVE(child, p_sibling); 1252 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); 1253 1254 clear_orphan(child); 1255 if (child->p_flag & P_TRACED) { 1256 LIST_INSERT_HEAD(&child->p_pptr->p_orphans, child, p_orphan); 1257 child->p_flag |= P_ORPHAN; 1258 } 1259 1260 child->p_pptr = parent; 1261 } 1262