1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_ddb.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_PROCEXIT 41 #include <sys/systm.h> 42 #include <sys/acct.h> /* for acct_process() function prototype */ 43 #include <sys/capsicum.h> 44 #include <sys/eventhandler.h> 45 #include <sys/exterrvar.h> 46 #include <sys/filedesc.h> 47 #include <sys/jail.h> 48 #include <sys/kernel.h> 49 #include <sys/ktr.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/procdesc.h> 55 #include <sys/ptrace.h> 56 #include <sys/racct.h> 57 #include <sys/resourcevar.h> 58 #include <sys/sbuf.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/sem.h> 62 #include <sys/shm.h> 63 #include <sys/signalvar.h> 64 #include <sys/sx.h> 65 #include <sys/syscallsubr.h> 66 #include <sys/sysctl.h> 67 #include <sys/sysent.h> 68 #include <sys/syslog.h> 69 #include <sys/sysproto.h> 70 #include <sys/timers.h> 71 #include <sys/tty.h> 72 #include <sys/umtxvar.h> 73 #include <sys/vmmeter.h> 74 #include <sys/vnode.h> 75 #include <sys/wait.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <security/audit/audit.h> 81 #include <security/mac/mac_framework.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_extern.h> 85 #include <vm/vm_param.h> 86 #include <vm/pmap.h> 87 #include <vm/vm_map.h> 88 #include <vm/vm_page.h> 89 #include <vm/uma.h> 90 91 #ifdef KDTRACE_HOOKS 92 #include <sys/dtrace_bsd.h> 93 dtrace_execexit_func_t dtrace_fasttrap_exit; 94 #endif 95 96 SDT_PROVIDER_DECLARE(proc); 97 SDT_PROBE_DEFINE1(proc, , , exit, "int"); 98 99 static int kern_kill_on_dbg_exit = 1; 100 SYSCTL_INT(_kern, OID_AUTO, kill_on_debugger_exit, CTLFLAG_RWTUN, 101 &kern_kill_on_dbg_exit, 0, 102 "Kill ptraced processes when debugger exits"); 103 104 static bool kern_wait_dequeue_sigchld = 1; 105 SYSCTL_BOOL(_kern, OID_AUTO, wait_dequeue_sigchld, CTLFLAG_RWTUN, 106 &kern_wait_dequeue_sigchld, 0, 107 "Dequeue SIGCHLD on wait(2) for live process"); 108 109 struct proc * 110 proc_realparent(struct proc *child) 111 { 112 struct proc *p, *parent; 113 114 sx_assert(&proctree_lock, SX_LOCKED); 115 if ((child->p_treeflag & P_TREE_ORPHANED) == 0) 116 return (child->p_pptr->p_pid == child->p_oppid ? 117 child->p_pptr : child->p_reaper); 118 for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) { 119 /* Cannot use LIST_PREV(), since the list head is not known. */ 120 p = __containerof(p->p_orphan.le_prev, struct proc, 121 p_orphan.le_next); 122 KASSERT((p->p_treeflag & P_TREE_ORPHANED) != 0, 123 ("missing P_ORPHAN %p", p)); 124 } 125 parent = __containerof(p->p_orphan.le_prev, struct proc, 126 p_orphans.lh_first); 127 return (parent); 128 } 129 130 static void 131 reaper_clear(struct proc *p, struct proc *rp) 132 { 133 struct proc *p1; 134 bool clear; 135 136 sx_assert(&proctree_lock, SX_XLOCKED); 137 LIST_REMOVE(p, p_reapsibling); 138 if (p->p_reapsubtree == 1) 139 return; 140 clear = true; 141 LIST_FOREACH(p1, &rp->p_reaplist, p_reapsibling) { 142 if (p1->p_reapsubtree == p->p_reapsubtree) { 143 clear = false; 144 break; 145 } 146 } 147 if (clear) 148 proc_id_clear(PROC_ID_REAP, p->p_reapsubtree); 149 } 150 151 void 152 reaper_abandon_children(struct proc *p, bool exiting) 153 { 154 struct proc *p1, *p2, *ptmp; 155 156 sx_assert(&proctree_lock, SX_XLOCKED); 157 KASSERT(p != initproc, ("reaper_abandon_children for initproc")); 158 if ((p->p_treeflag & P_TREE_REAPER) == 0) 159 return; 160 p1 = p->p_reaper; 161 LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) { 162 reaper_clear(p2, p); 163 p2->p_reaper = p1; 164 p2->p_reapsubtree = p->p_reapsubtree; 165 LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling); 166 if (exiting && p2->p_pptr == p) { 167 PROC_LOCK(p2); 168 proc_reparent(p2, p1, true); 169 PROC_UNLOCK(p2); 170 } 171 } 172 KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty")); 173 p->p_treeflag &= ~P_TREE_REAPER; 174 } 175 176 void 177 proc_clear_orphan(struct proc *p) 178 { 179 struct proc *p1; 180 181 sx_assert(&proctree_lock, SA_XLOCKED); 182 if ((p->p_treeflag & P_TREE_ORPHANED) == 0) 183 return; 184 if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) { 185 p1 = LIST_NEXT(p, p_orphan); 186 if (p1 != NULL) 187 p1->p_treeflag |= P_TREE_FIRST_ORPHAN; 188 p->p_treeflag &= ~P_TREE_FIRST_ORPHAN; 189 } 190 LIST_REMOVE(p, p_orphan); 191 p->p_treeflag &= ~P_TREE_ORPHANED; 192 } 193 194 void 195 exit_onexit(struct proc *p) 196 { 197 MPASS(p->p_numthreads == 1); 198 umtx_thread_exit(FIRST_THREAD_IN_PROC(p)); 199 } 200 201 /* 202 * exit -- death of process. 203 */ 204 int 205 sys__exit(struct thread *td, struct _exit_args *uap) 206 { 207 kern_exit(td, uap->rval, 0); 208 return (0); 209 } 210 211 void 212 proc_set_p2_wexit(struct proc *p) 213 { 214 PROC_LOCK_ASSERT(p, MA_OWNED); 215 p->p_flag2 |= P2_WEXIT; 216 } 217 218 static void 219 ast_async_exit(struct thread *td, int asts) 220 { 221 struct proc *p; 222 223 p = td->td_proc; 224 if ((p->p_flag & P_ASYNC_EXIT) != 0) 225 exit1(td, p->p_xexit, p->p_asig); 226 } 227 228 /* 229 * The variation on exit1() intended to be used in the syscall 230 * handlers. Unlike exit1(), it might delay the current process exit 231 * to ast. This is needed e.g. when _exit(2) is executed due to the 232 * ptrace(PT_SC_REMOTERQ), which must do more work after the syscall 233 * handler call. 234 */ 235 void 236 kern_exit(struct thread *td, int rval, int signo) 237 { 238 struct proc *p; 239 240 KASSERT(rval == 0 || signo == 0, 241 ("kern_exit rv %d sig %d", rval, signo)); 242 243 p = td->td_proc; 244 if ((td->td_dbgflags & TDB_SCREMOTEREQ) != 0) { 245 PROC_LOCK(p); 246 p->p_xexit = rval; 247 p->p_asig = signo; 248 p->p_flag |= P_ASYNC_EXIT; 249 ast_sched(td, TDA_ASYNC_EXIT); 250 PROC_UNLOCK(p); 251 return; 252 } 253 if ((p->p_flag & P_ASYNC_EXIT) != 0) { 254 rval = p->p_xexit; 255 signo = p->p_asig; 256 } 257 exit1(td, rval, signo); 258 } 259 260 /* 261 * Exit: deallocate address space and other resources, change proc state to 262 * zombie, and unlink proc from allproc and parent's lists. Save exit status 263 * and rusage for wait(). Check for child processes and orphan them. 264 */ 265 void 266 exit1(struct thread *td, int rval, int signo) 267 { 268 struct proc *p, *nq, *q, *t; 269 struct thread *tdt; 270 ksiginfo_t *ksi, *ksi1; 271 int signal_parent; 272 273 mtx_assert(&Giant, MA_NOTOWNED); 274 KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo)); 275 MPASS((td->td_dbgflags & TDB_SCREMOTEREQ) == 0); 276 TSPROCEXIT(td->td_proc->p_pid); 277 278 p = td->td_proc; 279 /* 280 * In case we're rebooting we just let init die in order to 281 * work around an issues where pid 1 might get a fatal signal. 282 * For instance, if network interface serving NFS root is 283 * going down due to reboot, page-in requests for text are 284 * failing. 285 */ 286 if (p == initproc && rebooting == 0) { 287 printf("init died (signal %d, exit %d)\n", signo, rval); 288 panic("Going nowhere without my init!"); 289 } 290 291 /* 292 * Process deferred operations, designated with ASTF_KCLEAR. 293 * For instance, we need to deref SU mp, since the thread does 294 * not return to userspace, and wait for geom to stabilize. 295 */ 296 ast_kclear(td); 297 298 /* 299 * MUST abort all other threads before proceeding past here. 300 */ 301 PROC_LOCK(p); 302 proc_set_p2_wexit(p); 303 304 /* 305 * First check if some other thread or external request got 306 * here before us. If so, act appropriately: exit or suspend. 307 * We must ensure that stop requests are handled before we set 308 * P_WEXIT. 309 */ 310 thread_suspend_check(0); 311 while (p->p_flag & P_HADTHREADS) { 312 /* 313 * Kill off the other threads. This requires 314 * some co-operation from other parts of the kernel 315 * so it may not be instantaneous. With this state set 316 * any thread attempting to interruptibly 317 * sleep will return immediately with EINTR or EWOULDBLOCK 318 * which will hopefully force them to back out to userland 319 * freeing resources as they go. Any thread attempting 320 * to return to userland will thread_exit() from ast(). 321 * thread_exit() will unsuspend us when the last of the 322 * other threads exits. 323 * If there is already a thread singler after resumption, 324 * calling thread_single() will fail; in that case, we just 325 * re-check all suspension request, the thread should 326 * either be suspended there or exit. 327 */ 328 if (!thread_single(p, SINGLE_EXIT)) 329 /* 330 * All other activity in this process is now 331 * stopped. Threading support has been turned 332 * off. 333 */ 334 break; 335 /* 336 * Recheck for new stop or suspend requests which 337 * might appear while process lock was dropped in 338 * thread_single(). 339 */ 340 thread_suspend_check(0); 341 } 342 KASSERT(p->p_numthreads == 1, 343 ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); 344 racct_sub(p, RACCT_NTHR, 1); 345 346 /* Let event handler change exit status */ 347 p->p_xexit = rval; 348 p->p_xsig = signo; 349 350 /* 351 * Ignore any pending request to stop due to a stop signal. 352 * Once P_WEXIT is set, future requests will be ignored as 353 * well. 354 */ 355 p->p_flag &= ~P_STOPPED_SIG; 356 KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); 357 358 /* Note that we are exiting. */ 359 p->p_flag |= P_WEXIT; 360 361 /* 362 * Wait for any processes that have a hold on our vmspace to 363 * release their reference. 364 */ 365 while (p->p_lock > 0) 366 msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); 367 368 MPASS(p->p_execblock == 0); 369 PROC_UNLOCK(p); 370 /* Drain the limit callout while we don't have the proc locked */ 371 callout_drain(&p->p_limco); 372 373 #ifdef AUDIT 374 /* 375 * The Sun BSM exit token contains two components: an exit status as 376 * passed to exit(), and a return value to indicate what sort of exit 377 * it was. The exit status is WEXITSTATUS(rv), but it's not clear 378 * what the return value is. 379 */ 380 AUDIT_ARG_EXIT(rval, 0); 381 AUDIT_SYSCALL_EXIT(0, td); 382 #endif 383 384 /* Are we a task leader with peers? */ 385 if (p->p_peers != NULL && p == p->p_leader) { 386 mtx_lock(&ppeers_lock); 387 q = p->p_peers; 388 while (q != NULL) { 389 PROC_LOCK(q); 390 kern_psignal(q, SIGKILL); 391 PROC_UNLOCK(q); 392 q = q->p_peers; 393 } 394 while (p->p_peers != NULL) 395 msleep(p, &ppeers_lock, PWAIT, "exit1", 0); 396 mtx_unlock(&ppeers_lock); 397 } 398 399 itimers_exit(p); 400 401 /* 402 * Check if any loadable modules need anything done at process exit. 403 * E.g. SYSV IPC stuff. 404 * Event handler could change exit status. 405 * XXX what if one of these generates an error? 406 */ 407 EVENTHANDLER_DIRECT_INVOKE(process_exit, p); 408 409 /* 410 * If parent is waiting for us to exit or exec, 411 * P_PPWAIT is set; we will wakeup the parent below. 412 */ 413 PROC_LOCK(p); 414 stopprofclock(p); 415 p->p_ptevents = 0; 416 417 /* 418 * Stop the real interval timer. If the handler is currently 419 * executing, prevent it from rearming itself and let it finish. 420 */ 421 p->p_flag2 &= ~P2_ITSTOPPED; 422 if (timevalisset(&p->p_realtimer.it_value) && 423 callout_stop(&p->p_itcallout) == 0) { 424 timevalclear(&p->p_realtimer.it_interval); 425 PROC_UNLOCK(p); 426 callout_drain(&p->p_itcallout); 427 } else { 428 PROC_UNLOCK(p); 429 } 430 431 if (p->p_sysent->sv_onexit != NULL) 432 p->p_sysent->sv_onexit(p); 433 seltdfini(td); 434 435 /* 436 * Reset any sigio structures pointing to us as a result of 437 * F_SETOWN with our pid. The P_WEXIT flag interlocks with fsetown(). 438 */ 439 funsetownlst(&p->p_sigiolst); 440 441 /* 442 * Close open files and release open-file table. 443 * This may block! 444 */ 445 pdescfree(td); 446 fdescfree(td); 447 448 /* 449 * Remove ourself from our leader's peer list and wake our leader. 450 */ 451 if (p->p_leader->p_peers != NULL) { 452 mtx_lock(&ppeers_lock); 453 if (p->p_leader->p_peers != NULL) { 454 q = p->p_leader; 455 while (q->p_peers != p) 456 q = q->p_peers; 457 q->p_peers = p->p_peers; 458 wakeup(p->p_leader); 459 } 460 mtx_unlock(&ppeers_lock); 461 } 462 463 exec_free_abi_mappings(p); 464 vmspace_exit(td); 465 (void)acct_process(td); 466 467 #ifdef KTRACE 468 ktrprocexit(td); 469 #endif 470 /* 471 * Release reference to text vnode etc 472 */ 473 if (p->p_textvp != NULL) { 474 vrele(p->p_textvp); 475 p->p_textvp = NULL; 476 } 477 if (p->p_textdvp != NULL) { 478 vrele(p->p_textdvp); 479 p->p_textdvp = NULL; 480 } 481 if (p->p_binname != NULL) { 482 free(p->p_binname, M_PARGS); 483 p->p_binname = NULL; 484 } 485 486 /* 487 * Release our limits structure. 488 */ 489 lim_free(p->p_limit); 490 p->p_limit = NULL; 491 492 tidhash_remove(td); 493 494 /* 495 * Call machine-dependent code to release any 496 * machine-dependent resources other than the address space. 497 * The address space is released by "vmspace_exitfree(p)" in 498 * vm_waitproc(). 499 */ 500 cpu_exit(td); 501 502 WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); 503 504 /* 505 * Remove from allproc. It still sits in the hash. 506 */ 507 sx_xlock(&allproc_lock); 508 LIST_REMOVE(p, p_list); 509 510 #ifdef DDB 511 /* 512 * Used by ddb's 'ps' command to find this process via the 513 * pidhash. 514 */ 515 p->p_list.le_prev = NULL; 516 #endif 517 prison_proc_unlink(p->p_ucred->cr_prison, p); 518 sx_xunlock(&allproc_lock); 519 520 sx_xlock(&proctree_lock); 521 if ((p->p_flag & (P_TRACED | P_PPWAIT | P_PPTRACE)) != 0) { 522 PROC_LOCK(p); 523 p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); 524 PROC_UNLOCK(p); 525 } 526 527 /* 528 * killjobc() might drop and re-acquire proctree_lock to 529 * revoke control tty if exiting process was a session leader. 530 */ 531 killjobc(); 532 533 /* 534 * Reparent all children processes: 535 * - traced ones to the original parent (or init if we are that parent) 536 * - the rest to init 537 */ 538 q = LIST_FIRST(&p->p_children); 539 if (q != NULL) /* only need this if any child is S_ZOMB */ 540 wakeup(q->p_reaper); 541 for (; q != NULL; q = nq) { 542 nq = LIST_NEXT(q, p_sibling); 543 ksi = ksiginfo_alloc(M_WAITOK); 544 PROC_LOCK(q); 545 q->p_sigparent = SIGCHLD; 546 547 if ((q->p_flag & P_TRACED) == 0) { 548 proc_reparent(q, q->p_reaper, true); 549 if (q->p_state == PRS_ZOMBIE) { 550 /* 551 * Inform reaper about the reparented 552 * zombie, since wait(2) has something 553 * new to report. Guarantee queueing 554 * of the SIGCHLD signal, similar to 555 * the _exit() behaviour, by providing 556 * our ksiginfo. Ksi is freed by the 557 * signal delivery. 558 */ 559 if (q->p_ksi == NULL) { 560 ksi1 = NULL; 561 } else { 562 ksiginfo_copy(q->p_ksi, ksi); 563 ksi->ksi_flags |= KSI_INS; 564 ksi1 = ksi; 565 ksi = NULL; 566 } 567 PROC_LOCK(q->p_reaper); 568 pksignal(q->p_reaper, SIGCHLD, ksi1); 569 PROC_UNLOCK(q->p_reaper); 570 } else if (q->p_pdeathsig > 0) { 571 /* 572 * The child asked to received a signal 573 * when we exit. 574 */ 575 kern_psignal(q, q->p_pdeathsig); 576 } 577 } else { 578 /* 579 * Traced processes are killed by default 580 * since their existence means someone is 581 * screwing up. 582 */ 583 t = proc_realparent(q); 584 if (t == p) { 585 proc_reparent(q, q->p_reaper, true); 586 } else { 587 PROC_LOCK(t); 588 proc_reparent(q, t, true); 589 PROC_UNLOCK(t); 590 } 591 /* 592 * Since q was found on our children list, the 593 * proc_reparent() call moved q to the orphan 594 * list due to present P_TRACED flag. Clear 595 * orphan link for q now while q is locked. 596 */ 597 proc_clear_orphan(q); 598 q->p_flag &= ~P_TRACED; 599 q->p_flag2 &= ~P2_PTRACE_FSTP; 600 q->p_ptevents = 0; 601 p->p_xthread = NULL; 602 FOREACH_THREAD_IN_PROC(q, tdt) { 603 tdt->td_dbgflags &= ~(TDB_SUSPEND | TDB_XSIG | 604 TDB_FSTP); 605 tdt->td_xsig = 0; 606 } 607 if (kern_kill_on_dbg_exit) { 608 q->p_flag &= ~P_STOPPED_TRACE; 609 kern_psignal(q, SIGKILL); 610 } else if ((q->p_flag & (P_STOPPED_TRACE | 611 P_STOPPED_SIG)) != 0) { 612 sigqueue_delete_proc(q, SIGTRAP); 613 ptrace_unsuspend(q); 614 } 615 } 616 PROC_UNLOCK(q); 617 if (ksi != NULL) 618 ksiginfo_free(ksi); 619 } 620 621 /* 622 * Also get rid of our orphans. 623 */ 624 while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { 625 PROC_LOCK(q); 626 KASSERT(q->p_oppid == p->p_pid, 627 ("orphan %p of %p has unexpected oppid %d", q, p, 628 q->p_oppid)); 629 q->p_oppid = q->p_reaper->p_pid; 630 631 /* 632 * If we are the real parent of this process 633 * but it has been reparented to a debugger, then 634 * check if it asked for a signal when we exit. 635 */ 636 if (q->p_pdeathsig > 0) 637 kern_psignal(q, q->p_pdeathsig); 638 CTR2(KTR_PTRACE, "exit: pid %d, clearing orphan %d", p->p_pid, 639 q->p_pid); 640 proc_clear_orphan(q); 641 PROC_UNLOCK(q); 642 } 643 644 #ifdef KDTRACE_HOOKS 645 if (SDT_PROBES_ENABLED()) { 646 int reason = CLD_EXITED; 647 if (WCOREDUMP(signo)) 648 reason = CLD_DUMPED; 649 else if (WIFSIGNALED(signo)) 650 reason = CLD_KILLED; 651 SDT_PROBE1(proc, , , exit, reason); 652 } 653 #endif 654 655 /* Save exit status. */ 656 PROC_LOCK(p); 657 p->p_xthread = td; 658 659 if (p->p_sysent->sv_ontdexit != NULL) 660 p->p_sysent->sv_ontdexit(td); 661 662 #ifdef KDTRACE_HOOKS 663 /* 664 * Tell the DTrace fasttrap provider about the exit if it 665 * has declared an interest. 666 */ 667 if (dtrace_fasttrap_exit) 668 dtrace_fasttrap_exit(p); 669 #endif 670 671 /* 672 * Notify interested parties of our demise. 673 */ 674 KNOTE_LOCKED(p->p_klist, NOTE_EXIT); 675 676 /* 677 * If this is a process with a descriptor, we may not need to deliver 678 * a signal to the parent. proctree_lock is held over 679 * procdesc_exit() to serialize concurrent calls to close() and 680 * exit(). 681 */ 682 signal_parent = 0; 683 if (p->p_procdesc == NULL || procdesc_exit(p)) { 684 /* 685 * Notify parent that we're gone. If parent has the 686 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, 687 * notify process 1 instead (and hope it will handle this 688 * situation). 689 */ 690 PROC_LOCK(p->p_pptr); 691 mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); 692 if (p->p_pptr->p_sigacts->ps_flag & 693 (PS_NOCLDWAIT | PS_CLDSIGIGN)) { 694 struct proc *pp; 695 696 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 697 pp = p->p_pptr; 698 PROC_UNLOCK(pp); 699 proc_reparent(p, p->p_reaper, true); 700 p->p_sigparent = SIGCHLD; 701 PROC_LOCK(p->p_pptr); 702 703 /* 704 * Notify parent, so in case he was wait(2)ing or 705 * executing waitpid(2) with our pid, he will 706 * continue. 707 */ 708 wakeup(pp); 709 } else 710 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 711 712 if (p->p_pptr == p->p_reaper || p->p_pptr == initproc) { 713 signal_parent = 1; 714 } else if (p->p_sigparent != 0) { 715 if (p->p_sigparent == SIGCHLD) { 716 signal_parent = 1; 717 } else { /* LINUX thread */ 718 signal_parent = 2; 719 } 720 } 721 } else 722 PROC_LOCK(p->p_pptr); 723 sx_xunlock(&proctree_lock); 724 725 if (signal_parent == 1) { 726 childproc_exited(p); 727 } else if (signal_parent == 2) { 728 kern_psignal(p->p_pptr, p->p_sigparent); 729 } 730 731 /* Tell the prison that we are gone. */ 732 prison_proc_free(p->p_ucred->cr_prison); 733 734 /* 735 * The state PRS_ZOMBIE prevents other processes from sending 736 * signal to the process, to avoid memory leak, we free memory 737 * for signal queue at the time when the state is set. 738 */ 739 sigqueue_flush(&p->p_sigqueue); 740 sigqueue_flush(&td->td_sigqueue); 741 742 /* 743 * We have to wait until after acquiring all locks before 744 * changing p_state. We need to avoid all possible context 745 * switches (including ones from blocking on a mutex) while 746 * marked as a zombie. We also have to set the zombie state 747 * before we release the parent process' proc lock to avoid 748 * a lost wakeup. So, we first call wakeup, then we grab the 749 * sched lock, update the state, and release the parent process' 750 * proc lock. 751 */ 752 wakeup(p->p_pptr); 753 cv_broadcast(&p->p_pwait); 754 sched_exit(p->p_pptr, td); 755 PROC_SLOCK(p); 756 p->p_state = PRS_ZOMBIE; 757 PROC_UNLOCK(p->p_pptr); 758 759 /* 760 * Save our children's rusage information in our exit rusage. 761 */ 762 PROC_STATLOCK(p); 763 ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); 764 PROC_STATUNLOCK(p); 765 766 /* 767 * Make sure the scheduler takes this thread out of its tables etc. 768 * This will also release this thread's reference to the ucred. 769 * Other thread parts to release include pcb bits and such. 770 */ 771 thread_exit(); 772 } 773 774 #ifndef _SYS_SYSPROTO_H_ 775 struct abort2_args { 776 char *why; 777 int nargs; 778 void **args; 779 }; 780 #endif 781 782 int 783 sys_abort2(struct thread *td, struct abort2_args *uap) 784 { 785 void *uargs[16]; 786 void **uargsp; 787 int error, nargs; 788 789 nargs = uap->nargs; 790 if (nargs < 0 || nargs > nitems(uargs)) 791 nargs = -1; 792 uargsp = NULL; 793 if (nargs > 0) { 794 if (uap->args != NULL) { 795 error = copyin(uap->args, uargs, 796 nargs * sizeof(void *)); 797 if (error != 0) 798 nargs = -1; 799 else 800 uargsp = uargs; 801 } else 802 nargs = -1; 803 } 804 return (kern_abort2(td, uap->why, nargs, uargsp)); 805 } 806 807 /* 808 * kern_abort2() 809 * Arguments: 810 * why - user pointer to why 811 * nargs - number of arguments copied or -1 if an error occurred in copying 812 * args - pointer to an array of pointers in kernel format 813 */ 814 int 815 kern_abort2(struct thread *td, const char *why, int nargs, void **uargs) 816 { 817 struct proc *p = td->td_proc; 818 struct sbuf *sb; 819 int error, i, sig; 820 821 /* 822 * Do it right now so we can log either proper call of abort2(), or 823 * note, that invalid argument was passed. 512 is big enough to 824 * handle 16 arguments' descriptions with additional comments. 825 */ 826 sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); 827 sbuf_clear(sb); 828 sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", 829 p->p_comm, p->p_pid, td->td_ucred->cr_uid); 830 /* 831 * Since we can't return from abort2(), send SIGKILL in cases, where 832 * abort2() was called improperly 833 */ 834 sig = SIGKILL; 835 /* Prevent from DoSes from user-space. */ 836 if (nargs == -1) 837 goto out; 838 KASSERT(nargs >= 0 && nargs <= 16, ("called with too many args (%d)", 839 nargs)); 840 /* 841 * Limit size of 'reason' string to 128. Will fit even when 842 * maximal number of arguments was chosen to be logged. 843 */ 844 if (why != NULL) { 845 error = sbuf_copyin(sb, why, 128); 846 if (error < 0) 847 goto out; 848 } else { 849 sbuf_cat(sb, "(null)"); 850 } 851 if (nargs > 0) { 852 sbuf_putc(sb, '('); 853 for (i = 0; i < nargs; i++) 854 sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); 855 sbuf_putc(sb, ')'); 856 } 857 /* 858 * Final stage: arguments were proper, string has been 859 * successfully copied from userspace, and copying pointers 860 * from user-space succeed. 861 */ 862 sig = SIGABRT; 863 out: 864 if (sig == SIGKILL) { 865 sbuf_trim(sb); 866 sbuf_cat(sb, " (Reason text inaccessible)"); 867 } 868 sbuf_cat(sb, "\n"); 869 sbuf_finish(sb); 870 log(LOG_INFO, "%s", sbuf_data(sb)); 871 sbuf_delete(sb); 872 PROC_LOCK(p); 873 sigexit(td, sig); 874 return (0); 875 } 876 877 #ifdef COMPAT_43 878 /* 879 * The dirty work is handled by kern_wait(). 880 */ 881 int 882 owait(struct thread *td, struct owait_args *uap __unused) 883 { 884 int error, status; 885 886 error = kern_wait(td, WAIT_ANY, &status, 0, NULL); 887 if (error == 0) 888 td->td_retval[1] = status; 889 return (error); 890 } 891 #endif /* COMPAT_43 */ 892 893 /* 894 * The dirty work is handled by kern_wait(). 895 */ 896 int 897 sys_wait4(struct thread *td, struct wait4_args *uap) 898 { 899 struct rusage ru, *rup; 900 int error, status; 901 902 if (uap->rusage != NULL) 903 rup = &ru; 904 else 905 rup = NULL; 906 error = kern_wait(td, uap->pid, &status, uap->options, rup); 907 if (uap->status != NULL && error == 0 && td->td_retval[0] != 0) 908 error = copyout(&status, uap->status, sizeof(status)); 909 if (uap->rusage != NULL && error == 0 && td->td_retval[0] != 0) 910 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 911 return (error); 912 } 913 914 int 915 sys_wait6(struct thread *td, struct wait6_args *uap) 916 { 917 struct __wrusage wru, *wrup; 918 siginfo_t si, *sip; 919 idtype_t idtype; 920 id_t id; 921 int error, status; 922 923 idtype = uap->idtype; 924 id = uap->id; 925 926 if (uap->wrusage != NULL) 927 wrup = &wru; 928 else 929 wrup = NULL; 930 931 if (uap->info != NULL) { 932 sip = &si; 933 bzero(sip, sizeof(*sip)); 934 } else 935 sip = NULL; 936 937 /* 938 * We expect all callers of wait6() to know about WEXITED and 939 * WTRAPPED. 940 */ 941 error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip); 942 943 if (uap->status != NULL && error == 0 && td->td_retval[0] != 0) 944 error = copyout(&status, uap->status, sizeof(status)); 945 if (uap->wrusage != NULL && error == 0 && td->td_retval[0] != 0) 946 error = copyout(&wru, uap->wrusage, sizeof(wru)); 947 if (uap->info != NULL && error == 0) 948 error = copyout(&si, uap->info, sizeof(si)); 949 return (error); 950 } 951 952 int 953 sys_pdwait(struct thread *td, struct pdwait_args *uap) 954 { 955 struct __wrusage wru, *wrup; 956 siginfo_t si, *sip; 957 int error, status; 958 959 wrup = uap->wrusage != NULL ? &wru : NULL; 960 961 if (uap->info != NULL) { 962 sip = &si; 963 bzero(sip, sizeof(*sip)); 964 } else { 965 sip = NULL; 966 } 967 968 error = kern_pdwait(td, uap->fd, &status, uap->options, wrup, sip); 969 970 if (uap->status != NULL && error == 0) 971 error = copyout(&status, uap->status, sizeof(status)); 972 if (uap->wrusage != NULL && error == 0) 973 error = copyout(&wru, uap->wrusage, sizeof(wru)); 974 if (uap->info != NULL && error == 0) 975 error = copyout(&si, uap->info, sizeof(si)); 976 return (error); 977 } 978 979 /* 980 * Reap the remains of a zombie process and optionally return status and 981 * rusage. Asserts and will release both the proctree_lock and the process 982 * lock as part of its work. 983 */ 984 void 985 proc_reap(struct thread *td, struct proc *p, int *status, int options) 986 { 987 struct proc *q, *t; 988 989 sx_assert(&proctree_lock, SA_XLOCKED); 990 PROC_LOCK_ASSERT(p, MA_OWNED); 991 KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE")); 992 993 mtx_spin_wait_unlocked(&p->p_slock); 994 995 q = td->td_proc; 996 997 if (status != NULL) 998 *status = KW_EXITCODE(p->p_xexit, p->p_xsig); 999 if ((options & WNOWAIT) != 0) { 1000 /* 1001 * Only poll, returning the status. Caller does not wish to 1002 * release the proc struct just yet. 1003 */ 1004 PROC_UNLOCK(p); 1005 sx_xunlock(&proctree_lock); 1006 return; 1007 } 1008 1009 PROC_LOCK(q); 1010 sigqueue_take(p->p_ksi); 1011 PROC_UNLOCK(q); 1012 1013 /* 1014 * If we got the child via a ptrace 'attach', we need to give it back 1015 * to the old parent. 1016 */ 1017 if (p->p_oppid != p->p_pptr->p_pid) { 1018 PROC_UNLOCK(p); 1019 t = proc_realparent(p); 1020 PROC_LOCK(t); 1021 PROC_LOCK(p); 1022 CTR2(KTR_PTRACE, 1023 "wait: traced child %d moved back to parent %d", p->p_pid, 1024 t->p_pid); 1025 proc_reparent(p, t, false); 1026 PROC_UNLOCK(p); 1027 pksignal(t, SIGCHLD, p->p_ksi); 1028 wakeup(t); 1029 cv_broadcast(&p->p_pwait); 1030 PROC_UNLOCK(t); 1031 sx_xunlock(&proctree_lock); 1032 return; 1033 } 1034 PROC_UNLOCK(p); 1035 1036 /* 1037 * Remove other references to this process to ensure we have an 1038 * exclusive reference. 1039 */ 1040 sx_xlock(PIDHASHLOCK(p->p_pid)); 1041 LIST_REMOVE(p, p_hash); 1042 sx_xunlock(PIDHASHLOCK(p->p_pid)); 1043 LIST_REMOVE(p, p_sibling); 1044 reaper_abandon_children(p, true); 1045 reaper_clear(p, p->p_reaper); 1046 PROC_LOCK(p); 1047 proc_clear_orphan(p); 1048 PROC_UNLOCK(p); 1049 leavepgrp(p); 1050 if (p->p_procdesc != NULL) 1051 procdesc_reap(p); 1052 else 1053 proc_id_clear(PROC_ID_PID, p->p_pid); 1054 sx_xunlock(&proctree_lock); 1055 1056 PROC_LOCK(p); 1057 knlist_detach(p->p_klist); 1058 p->p_klist = NULL; 1059 PROC_UNLOCK(p); 1060 1061 /* 1062 * Removal from allproc list and process group list paired with 1063 * PROC_LOCK which was executed during that time should guarantee 1064 * nothing can reach this process anymore. As such further locking 1065 * is unnecessary. 1066 */ 1067 p->p_xexit = p->p_xsig = 0; /* XXX: why? */ 1068 1069 PROC_LOCK(q); 1070 ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux); 1071 PROC_UNLOCK(q); 1072 1073 /* 1074 * Destroy resource accounting information associated with the process. 1075 */ 1076 #ifdef RACCT 1077 if (racct_enable) { 1078 PROC_LOCK(p); 1079 racct_sub(p, RACCT_NPROC, 1); 1080 PROC_UNLOCK(p); 1081 } 1082 #endif 1083 racct_proc_exit(p); 1084 1085 /* 1086 * Free credentials, arguments, and sigacts, and decrement the count of 1087 * processes running with this uid. 1088 */ 1089 proc_unset_cred(p, true); 1090 pargs_drop(p->p_args); 1091 p->p_args = NULL; 1092 sigacts_free(p->p_sigacts); 1093 p->p_sigacts = NULL; 1094 1095 /* 1096 * Do any thread-system specific cleanups. 1097 */ 1098 thread_wait(p); 1099 1100 /* 1101 * Give vm and machine-dependent layer a chance to free anything that 1102 * cpu_exit couldn't release while still running in process context. 1103 */ 1104 vm_waitproc(p); 1105 #ifdef MAC 1106 mac_proc_destroy(p); 1107 #endif 1108 1109 KASSERT(FIRST_THREAD_IN_PROC(p), 1110 ("proc_reap: no residual thread!")); 1111 PROC_TREE_UNREF(p); 1112 atomic_add_int(&nprocs, -1); 1113 } 1114 1115 static void 1116 wait_fill_siginfo(struct proc *p, siginfo_t *siginfo) 1117 { 1118 PROC_LOCK_ASSERT(p, MA_OWNED); 1119 1120 if (siginfo == NULL) 1121 return; 1122 1123 bzero(siginfo, sizeof(*siginfo)); 1124 siginfo->si_errno = 0; 1125 1126 /* 1127 * SUSv4 requires that the si_signo value is always 1128 * SIGCHLD. Obey it despite the rfork(2) interface allows to 1129 * request other signal for child exit notification. 1130 */ 1131 siginfo->si_signo = SIGCHLD; 1132 1133 /* 1134 * This is still a rough estimate. We will fix the cases 1135 * TRAPPED, STOPPED, and CONTINUED later. 1136 */ 1137 if (WCOREDUMP(p->p_xsig)) { 1138 siginfo->si_code = CLD_DUMPED; 1139 siginfo->si_status = WTERMSIG(p->p_xsig); 1140 } else if (WIFSIGNALED(p->p_xsig)) { 1141 siginfo->si_code = CLD_KILLED; 1142 siginfo->si_status = WTERMSIG(p->p_xsig); 1143 } else { 1144 siginfo->si_code = CLD_EXITED; 1145 siginfo->si_status = p->p_xexit; 1146 } 1147 1148 siginfo->si_pid = p->p_pid; 1149 siginfo->si_uid = p->p_ucred->cr_uid; 1150 1151 /* 1152 * The si_addr field would be useful additional detail, but 1153 * apparently the PC value may be lost when we reach this 1154 * point. bzero() above sets siginfo->si_addr to NULL. 1155 */ 1156 } 1157 1158 static void 1159 wait_fill_wrusage(struct proc *p, struct __wrusage *wrusage) 1160 { 1161 struct rusage *rup; 1162 1163 PROC_LOCK_ASSERT(p, MA_OWNED); 1164 1165 if (wrusage == NULL) 1166 return; 1167 1168 rup = &wrusage->wru_self; 1169 *rup = p->p_ru; 1170 PROC_STATLOCK(p); 1171 calcru(p, &rup->ru_utime, &rup->ru_stime); 1172 PROC_STATUNLOCK(p); 1173 1174 rup = &wrusage->wru_children; 1175 *rup = p->p_stats->p_cru; 1176 calccru(p, &rup->ru_utime, &rup->ru_stime); 1177 } 1178 1179 static int 1180 proc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id, 1181 int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo, 1182 int check_only) 1183 { 1184 sx_assert(&proctree_lock, SA_XLOCKED); 1185 1186 PROC_LOCK(p); 1187 1188 switch (idtype) { 1189 case P_ALL: 1190 if (p->p_procdesc == NULL || 1191 (p->p_pptr == td->td_proc && 1192 (p->p_flag & P_TRACED) != 0)) { 1193 break; 1194 } 1195 1196 PROC_UNLOCK(p); 1197 return (0); 1198 case P_PID: 1199 if (p->p_pid != (pid_t)id) { 1200 PROC_UNLOCK(p); 1201 return (0); 1202 } 1203 break; 1204 case P_PGID: 1205 if (p->p_pgid != (pid_t)id) { 1206 PROC_UNLOCK(p); 1207 return (0); 1208 } 1209 break; 1210 case P_SID: 1211 if (p->p_session->s_sid != (pid_t)id) { 1212 PROC_UNLOCK(p); 1213 return (0); 1214 } 1215 break; 1216 case P_UID: 1217 if (p->p_ucred->cr_uid != (uid_t)id) { 1218 PROC_UNLOCK(p); 1219 return (0); 1220 } 1221 break; 1222 case P_GID: 1223 if (p->p_ucred->cr_gid != (gid_t)id) { 1224 PROC_UNLOCK(p); 1225 return (0); 1226 } 1227 break; 1228 case P_JAILID: 1229 if (p->p_ucred->cr_prison->pr_id != (int)id) { 1230 PROC_UNLOCK(p); 1231 return (0); 1232 } 1233 break; 1234 /* 1235 * It seems that the thread structures get zeroed out 1236 * at process exit. This makes it impossible to 1237 * support P_SETID, P_CID or P_CPUID. 1238 */ 1239 default: 1240 PROC_UNLOCK(p); 1241 return (0); 1242 } 1243 1244 if (p_canwait(td, p)) { 1245 PROC_UNLOCK(p); 1246 return (0); 1247 } 1248 1249 if ((options & WEXITED) == 0 && p->p_state == PRS_ZOMBIE) { 1250 PROC_UNLOCK(p); 1251 return (0); 1252 } 1253 1254 /* 1255 * This special case handles a kthread spawned by linux_clone 1256 * (see linux_misc.c). The linux_wait4 and linux_waitpid 1257 * functions need to be able to distinguish between waiting 1258 * on a process and waiting on a thread. It is a thread if 1259 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option 1260 * signifies we want to wait for threads and not processes. 1261 */ 1262 if ((p->p_sigparent != SIGCHLD) ^ 1263 ((options & WLINUXCLONE) != 0)) { 1264 PROC_UNLOCK(p); 1265 return (0); 1266 } 1267 1268 wait_fill_siginfo(p, siginfo); 1269 1270 /* 1271 * There should be no reason to limit resources usage info to 1272 * exited processes only. A snapshot about any resources used 1273 * by a stopped process may be exactly what is needed. 1274 */ 1275 wait_fill_wrusage(p, wrusage); 1276 1277 if (p->p_state == PRS_ZOMBIE && !check_only) { 1278 proc_reap(td, p, status, options); 1279 return (-1); 1280 } 1281 return (1); 1282 } 1283 1284 int 1285 kern_wait(struct thread *td, pid_t pid, int *status, int options, 1286 struct rusage *rusage) 1287 { 1288 struct __wrusage wru, *wrup; 1289 idtype_t idtype; 1290 id_t id; 1291 int ret; 1292 1293 /* 1294 * Translate the special pid values into the (idtype, pid) 1295 * pair for kern_wait6. The WAIT_MYPGRP case is handled by 1296 * kern_wait6() on its own. 1297 */ 1298 if (pid == WAIT_ANY) { 1299 idtype = P_ALL; 1300 id = 0; 1301 } else if (pid < 0) { 1302 idtype = P_PGID; 1303 id = (id_t)-pid; 1304 } else { 1305 idtype = P_PID; 1306 id = (id_t)pid; 1307 } 1308 1309 if (rusage != NULL) 1310 wrup = &wru; 1311 else 1312 wrup = NULL; 1313 1314 /* 1315 * For backward compatibility we implicitly add flags WEXITED 1316 * and WTRAPPED here. 1317 */ 1318 options |= WEXITED | WTRAPPED; 1319 ret = kern_wait6(td, idtype, id, status, options, wrup, NULL); 1320 if (rusage != NULL) 1321 *rusage = wru.wru_self; 1322 return (ret); 1323 } 1324 1325 static void 1326 report_alive_proc(struct thread *td, struct proc *p, siginfo_t *siginfo, 1327 int *status, int options, int si_code) 1328 { 1329 bool cont; 1330 1331 PROC_LOCK_ASSERT(p, MA_OWNED); 1332 sx_assert(&proctree_lock, SA_XLOCKED); 1333 MPASS(si_code == CLD_TRAPPED || si_code == CLD_STOPPED || 1334 si_code == CLD_CONTINUED); 1335 1336 cont = si_code == CLD_CONTINUED; 1337 if ((options & WNOWAIT) == 0) { 1338 if (cont) 1339 p->p_flag &= ~P_CONTINUED; 1340 else 1341 p->p_flag |= P_WAITED; 1342 if (kern_wait_dequeue_sigchld && 1343 (td->td_proc->p_sysent->sv_flags & SV_SIG_WAITNDQ) == 0) { 1344 PROC_LOCK(td->td_proc); 1345 sigqueue_take(p->p_ksi); 1346 PROC_UNLOCK(td->td_proc); 1347 } 1348 } 1349 sx_xunlock(&proctree_lock); 1350 if (siginfo != NULL) { 1351 siginfo->si_code = si_code; 1352 siginfo->si_status = cont ? SIGCONT : p->p_xsig; 1353 } 1354 if (status != NULL) 1355 *status = cont ? SIGCONT : W_STOPCODE(p->p_xsig); 1356 PROC_UNLOCK(p); 1357 } 1358 1359 static int 1360 wait6_checkopt(int options) 1361 { 1362 /* If we don't know the option, just return. */ 1363 if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT | 1364 WEXITED | WTRAPPED | WLINUXCLONE)) != 0) 1365 return (EXTERROR(EINVAL, "Unknown options %#jx", options)); 1366 if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) { 1367 /* 1368 * We will be unable to find any matching processes, 1369 * because there are no known events to look for. 1370 * Prefer to return error instead of blocking 1371 * indefinitely. 1372 */ 1373 return (EXTERROR(EINVAL, 1374 "Cannot match processes %#jx", options)); 1375 } 1376 return (0); 1377 } 1378 1379 /* 1380 * Checks and reports status for alive process, according to the 1381 * options. Returns true if the process fits one of the requested 1382 * options and its status was updated in siginfo. 1383 * 1384 * If the process was reported (the function result is true), both the 1385 * process and proctree locks are unlocked. 1386 */ 1387 static bool 1388 wait6_check_alive(struct thread *td, int options, struct proc *p, int *status, 1389 siginfo_t *siginfo) 1390 { 1391 bool report; 1392 1393 PROC_LOCK_ASSERT(p, MA_OWNED); 1394 sx_assert(&proctree_lock, SA_XLOCKED); 1395 1396 if ((options & WTRAPPED) != 0 && (p->p_flag & P_TRACED) != 0) { 1397 PROC_SLOCK(p); 1398 report = (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) && 1399 p->p_suspcount == p->p_numthreads && 1400 (p->p_flag & P_WAITED) == 0; 1401 PROC_SUNLOCK(p); 1402 if (report) { 1403 CTR4(KTR_PTRACE, 1404 "wait: returning trapped pid %d status %#x (xstat %d) xthread %d", 1405 p->p_pid, W_STOPCODE(p->p_xsig), p->p_xsig, 1406 p->p_xthread != NULL ? 1407 p->p_xthread->td_tid : -1); 1408 report_alive_proc(td, p, siginfo, status, 1409 options, CLD_TRAPPED); 1410 return (true); 1411 } 1412 } 1413 1414 if ((options & WUNTRACED) != 0 && (p->p_flag & P_STOPPED_SIG) != 0) { 1415 PROC_SLOCK(p); 1416 report = p->p_suspcount == p->p_numthreads && 1417 (p->p_flag & P_WAITED) == 0; 1418 PROC_SUNLOCK(p); 1419 if (report) { 1420 report_alive_proc(td, p, siginfo, status, options, 1421 CLD_STOPPED); 1422 return (true); 1423 } 1424 } 1425 1426 if ((options & WCONTINUED) != 0 && (p->p_flag & P_CONTINUED) != 0) { 1427 report_alive_proc(td, p, siginfo, status, options, 1428 CLD_CONTINUED); 1429 return (true); 1430 } 1431 1432 return (false); 1433 } 1434 1435 int 1436 kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status, 1437 int options, struct __wrusage *wrusage, siginfo_t *siginfo) 1438 { 1439 struct proc *p, *q; 1440 pid_t pid; 1441 int error, nfound, ret; 1442 1443 AUDIT_ARG_VALUE((int)idtype); /* XXX - This is likely wrong! */ 1444 AUDIT_ARG_PID((pid_t)id); /* XXX - This may be wrong! */ 1445 AUDIT_ARG_VALUE(options); 1446 1447 q = td->td_proc; 1448 1449 if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) { 1450 PROC_LOCK(q); 1451 id = (id_t)q->p_pgid; 1452 PROC_UNLOCK(q); 1453 idtype = P_PGID; 1454 } 1455 1456 error = wait6_checkopt(options); 1457 if (error != 0) 1458 return (error); 1459 loop: 1460 if (q->p_flag & P_STATCHILD) { 1461 PROC_LOCK(q); 1462 q->p_flag &= ~P_STATCHILD; 1463 PROC_UNLOCK(q); 1464 } 1465 sx_xlock(&proctree_lock); 1466 loop_locked: 1467 nfound = 0; 1468 LIST_FOREACH(p, &q->p_children, p_sibling) { 1469 pid = p->p_pid; 1470 ret = proc_to_reap(td, p, idtype, id, status, options, 1471 wrusage, siginfo, 0); 1472 if (ret == 0) 1473 continue; 1474 else if (ret != 1) { 1475 td->td_retval[0] = pid; 1476 return (0); 1477 } 1478 1479 /* 1480 * When running in capsicum(4) mode, make wait(2) ignore 1481 * processes created with pdfork(2). This is because one can 1482 * disown them - by passing their process descriptor to another 1483 * process - which means it needs to be prevented from touching 1484 * them afterwards. 1485 */ 1486 if (IN_CAPABILITY_MODE(td) && p->p_procdesc != NULL) { 1487 PROC_UNLOCK(p); 1488 continue; 1489 } 1490 1491 nfound++; 1492 PROC_LOCK_ASSERT(p, MA_OWNED); 1493 1494 if (wait6_check_alive(td, options, p, status, siginfo)) { 1495 td->td_retval[0] = pid; 1496 return (0); 1497 } 1498 1499 PROC_UNLOCK(p); 1500 } 1501 1502 /* 1503 * Look in the orphans list too, to allow the parent to 1504 * collect it's child exit status even if child is being 1505 * debugged. 1506 * 1507 * Debugger detaches from the parent upon successful 1508 * switch-over from parent to child. At this point due to 1509 * re-parenting the parent loses the child to debugger and a 1510 * wait4(2) call would report that it has no children to wait 1511 * for. By maintaining a list of orphans we allow the parent 1512 * to successfully wait until the child becomes a zombie. 1513 */ 1514 if (nfound == 0) { 1515 LIST_FOREACH(p, &q->p_orphans, p_orphan) { 1516 ret = proc_to_reap(td, p, idtype, id, NULL, options, 1517 NULL, NULL, 1); 1518 if (ret != 0) { 1519 KASSERT(ret != -1, ("reaped an orphan (pid %d)", 1520 (int)td->td_retval[0])); 1521 PROC_UNLOCK(p); 1522 nfound++; 1523 break; 1524 } 1525 } 1526 } 1527 if (nfound == 0) { 1528 sx_xunlock(&proctree_lock); 1529 return (ECHILD); 1530 } 1531 if ((options & WNOHANG) != 0) { 1532 sx_xunlock(&proctree_lock); 1533 td->td_retval[0] = 0; 1534 return (0); 1535 } 1536 PROC_LOCK(q); 1537 if ((q->p_flag & P_STATCHILD) != 0) { 1538 q->p_flag &= ~P_STATCHILD; 1539 PROC_UNLOCK(q); 1540 goto loop_locked; 1541 } 1542 sx_xunlock(&proctree_lock); 1543 error = msleep(q, &q->p_mtx, PWAIT | PCATCH | PDROP, "wait", 0); 1544 if (error != 0) 1545 return (error); 1546 goto loop; 1547 } 1548 1549 int 1550 kern_pdwait(struct thread *td, int fd, int *status, 1551 int options, struct __wrusage *wrusage, siginfo_t *siginfo) 1552 { 1553 struct proc *p; 1554 struct file *fp; 1555 struct procdesc *pd; 1556 int error; 1557 1558 AUDIT_ARG_FD(fd); 1559 AUDIT_ARG_VALUE(options); 1560 1561 error = wait6_checkopt(options); 1562 if (error != 0) 1563 return (error); 1564 1565 error = fget(td, fd, &cap_pdwait_rights, &fp); 1566 if (error != 0) 1567 return (error); 1568 if (fp->f_type != DTYPE_PROCDESC) { 1569 error = EINVAL; 1570 goto exit_unlocked; 1571 } 1572 pd = fp->f_data; 1573 1574 for (;;) { 1575 /* We own a reference on the procdesc file. */ 1576 KASSERT((pd->pd_flags & PDF_CLOSED) == 0, 1577 ("PDF_CLOSED proc %p procdesc %p pd flags %#x", 1578 p, pd, pd->pd_flags)); 1579 1580 sx_xlock(&proctree_lock); 1581 p = pd->pd_proc; 1582 if (p == NULL) { 1583 error = ESRCH; 1584 goto exit_tree_locked; 1585 } 1586 PROC_LOCK(p); 1587 1588 error = p_canwait(td, p); 1589 if (error != 0) 1590 break; 1591 if ((options & WEXITED) == 0 && p->p_state == PRS_ZOMBIE) { 1592 error = ESRCH; 1593 break; 1594 } 1595 1596 wait_fill_siginfo(p, siginfo); 1597 wait_fill_wrusage(p, wrusage); 1598 1599 if (p->p_state == PRS_ZOMBIE) { 1600 proc_reap(td, p, status, options); 1601 goto exit_unlocked; 1602 } 1603 1604 if (wait6_check_alive(td, options, p, status, siginfo)) 1605 goto exit_unlocked; 1606 1607 if ((options & WNOHANG) != 0) { 1608 error = EWOULDBLOCK; 1609 break; 1610 } 1611 1612 PROC_UNLOCK(p); 1613 error = sx_sleep(&p->p_procdesc, &proctree_lock, 1614 PWAIT | PCATCH | PDROP, "pdwait", 0); 1615 if (error != 0) 1616 goto exit_unlocked; 1617 } 1618 1619 PROC_UNLOCK(p); 1620 exit_tree_locked: 1621 sx_xunlock(&proctree_lock); 1622 exit_unlocked: 1623 fdrop(fp, td); 1624 return (error); 1625 } 1626 1627 void 1628 proc_add_orphan(struct proc *child, struct proc *parent) 1629 { 1630 1631 sx_assert(&proctree_lock, SX_XLOCKED); 1632 KASSERT((child->p_flag & P_TRACED) != 0, 1633 ("proc_add_orphan: not traced")); 1634 1635 if (LIST_EMPTY(&parent->p_orphans)) { 1636 child->p_treeflag |= P_TREE_FIRST_ORPHAN; 1637 LIST_INSERT_HEAD(&parent->p_orphans, child, p_orphan); 1638 } else { 1639 LIST_INSERT_AFTER(LIST_FIRST(&parent->p_orphans), 1640 child, p_orphan); 1641 } 1642 child->p_treeflag |= P_TREE_ORPHANED; 1643 } 1644 1645 /* 1646 * Make process 'parent' the new parent of process 'child'. 1647 * Must be called with an exclusive hold of proctree lock. 1648 */ 1649 void 1650 proc_reparent(struct proc *child, struct proc *parent, bool set_oppid) 1651 { 1652 1653 sx_assert(&proctree_lock, SX_XLOCKED); 1654 PROC_LOCK_ASSERT(child, MA_OWNED); 1655 if (child->p_pptr == parent) 1656 return; 1657 1658 PROC_LOCK(child->p_pptr); 1659 sigqueue_take(child->p_ksi); 1660 PROC_UNLOCK(child->p_pptr); 1661 LIST_REMOVE(child, p_sibling); 1662 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); 1663 1664 proc_clear_orphan(child); 1665 if ((child->p_flag & P_TRACED) != 0) { 1666 proc_add_orphan(child, child->p_pptr); 1667 } 1668 1669 child->p_pptr = parent; 1670 if (set_oppid) 1671 child->p_oppid = parent->p_pid; 1672 } 1673 1674 static void 1675 initexit(void *dummy __unused) 1676 { 1677 ast_register(TDA_ASYNC_EXIT, ASTR_ASTF_REQUIRED, 0, ast_async_exit); 1678 } 1679 SYSINIT(exit, SI_SUB_EXEC, SI_ORDER_ANY, initexit, NULL); 1680