1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, Joyent, Inc. All rights reserved. 25 * Copyright 2020 Oxide Computer Company 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/errno.h> 37 #include <sys/proc.h> 38 #include <sys/ucontext.h> 39 #include <sys/procfs.h> 40 #include <sys/vnode.h> 41 #include <sys/acct.h> 42 #include <sys/var.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/wait.h> 46 #include <sys/siginfo.h> 47 #include <sys/procset.h> 48 #include <sys/class.h> 49 #include <sys/file.h> 50 #include <sys/session.h> 51 #include <sys/kmem.h> 52 #include <sys/vtrace.h> 53 #include <sys/prsystm.h> 54 #include <sys/ipc.h> 55 #include <sys/sem_impl.h> 56 #include <c2/audit.h> 57 #include <sys/aio_impl.h> 58 #include <vm/as.h> 59 #include <sys/poll.h> 60 #include <sys/door.h> 61 #include <sys/lwpchan_impl.h> 62 #include <sys/utrap.h> 63 #include <sys/task.h> 64 #include <sys/exacct.h> 65 #include <sys/cyclic.h> 66 #include <sys/schedctl.h> 67 #include <sys/rctl.h> 68 #include <sys/contract_impl.h> 69 #include <sys/contract/process_impl.h> 70 #include <sys/list.h> 71 #include <sys/dtrace.h> 72 #include <sys/pool.h> 73 #include <sys/sdt.h> 74 #include <sys/corectl.h> 75 #include <sys/core.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 tidhash_t *tidhash; 345 uint_t tidhash_sz; 346 ret_tidhash_t *ret_tidhash; 347 refstr_t *cwd; 348 hrtime_t hrutime, hrstime; 349 int evaporate; 350 351 /* 352 * Stop and discard the process's lwps except for the current one, 353 * unless some other lwp beat us to it. If exitlwps() fails then 354 * return and the calling lwp will call (or continue in) lwp_exit(). 355 */ 356 proc_is_exiting(p); 357 if (exitlwps(0) != 0) 358 return (1); 359 360 mutex_enter(&p->p_lock); 361 if (p->p_ttime > 0) { 362 /* 363 * Account any remaining ticks charged to this process 364 * on its way out. 365 */ 366 (void) task_cpu_time_incr(p->p_task, p->p_ttime); 367 p->p_ttime = 0; 368 } 369 mutex_exit(&p->p_lock); 370 371 DTRACE_PROC(lwp__exit); 372 DTRACE_PROC1(exit, int, why); 373 374 /* 375 * Will perform any brand specific proc exit processing, since this 376 * is always the last lwp, will also perform lwp_exit and free brand 377 * data 378 */ 379 if (PROC_IS_BRANDED(p)) { 380 lwp_detach_brand_hdlrs(lwp); 381 brand_clearbrand(p, B_FALSE); 382 } 383 384 /* 385 * Don't let init exit unless zone_start_init() failed its exec, or 386 * we are shutting down the zone or the machine. 387 * 388 * Since we are single threaded, we don't need to lock the 389 * following accesses to zone_proc_initpid. 390 */ 391 if (p->p_pid == z->zone_proc_initpid) { 392 if (z->zone_boot_err == 0 && 393 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 394 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) { 395 if (z->zone_restart_init == B_TRUE) { 396 if (restart_init(what, why) == 0) 397 return (0); 398 } else { 399 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, 400 CRED()); 401 } 402 } 403 404 /* 405 * Since we didn't or couldn't restart init, we clear 406 * the zone's init state and proceed with exit 407 * processing. 408 */ 409 z->zone_proc_initpid = -1; 410 } 411 412 lwp_pcb_exit(); 413 414 /* 415 * Allocate a sigqueue now, before we grab locks. 416 * It will be given to sigcld(), below. 417 * Special case: If we will be making the process disappear 418 * without a trace because it is either: 419 * * an exiting SSYS process, or 420 * * a posix_spawn() vfork child who requests it, 421 * we don't bother to allocate a useless sigqueue. 422 */ 423 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) && 424 why == CLD_EXITED && what == _EVAPORATE); 425 if (!evaporate) 426 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 427 428 /* 429 * revoke any doors created by the process. 430 */ 431 if (p->p_door_list) 432 door_exit(); 433 434 /* 435 * Release schedctl data structures. 436 */ 437 if (p->p_pagep) 438 schedctl_proc_cleanup(); 439 440 /* 441 * make sure all pending kaio has completed. 442 */ 443 if (p->p_aio) 444 aio_cleanup_exit(); 445 446 /* 447 * discard the lwpchan cache. 448 */ 449 if (p->p_lcp != NULL) 450 lwpchan_destroy_cache(0); 451 452 /* 453 * Clean up any DTrace helper actions or probes for the process. 454 */ 455 if (p->p_dtrace_helpers != NULL) { 456 ASSERT(dtrace_helpers_cleanup != NULL); 457 (*dtrace_helpers_cleanup)(p); 458 } 459 460 /* 461 * Clean up any signalfd state for the process. 462 */ 463 if (p->p_sigfd != NULL) { 464 VERIFY(sigfd_exit_helper != NULL); 465 (*sigfd_exit_helper)(); 466 } 467 468 /* untimeout the realtime timers */ 469 if (p->p_itimer != NULL) 470 timer_exit(); 471 472 if ((tmp_id = p->p_alarmid) != 0) { 473 p->p_alarmid = 0; 474 (void) untimeout(tmp_id); 475 } 476 477 /* 478 * If we had generated any upanic(2) state, free that now. 479 */ 480 if (p->p_upanic != NULL) { 481 kmem_free(p->p_upanic, PRUPANIC_BUFLEN); 482 p->p_upanic = NULL; 483 } 484 485 /* 486 * Remove any fpollinfo_t's for this (last) thread from our file 487 * descriptors so closeall() can ASSERT() that they're all gone. 488 */ 489 pollcleanup(); 490 491 if (p->p_rprof_cyclic != CYCLIC_NONE) { 492 mutex_enter(&cpu_lock); 493 cyclic_remove(p->p_rprof_cyclic); 494 mutex_exit(&cpu_lock); 495 } 496 497 mutex_enter(&p->p_lock); 498 499 /* 500 * Clean up any DTrace probes associated with this process. 501 */ 502 if (p->p_dtrace_probes) { 503 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 504 dtrace_fasttrap_exit_ptr(p); 505 } 506 507 while ((tmp_id = p->p_itimerid) != 0) { 508 p->p_itimerid = 0; 509 mutex_exit(&p->p_lock); 510 (void) untimeout(tmp_id); 511 mutex_enter(&p->p_lock); 512 } 513 514 lwp_cleanup(); 515 516 /* 517 * We are about to exit; prevent our resource associations from 518 * being changed. 519 */ 520 pool_barrier_enter(); 521 522 /* 523 * Block the process against /proc now that we have really 524 * acquired p->p_lock (to manipulate p_tlist at least). 525 */ 526 prbarrier(p); 527 528 sigfillset(&p->p_ignore); 529 sigemptyset(&p->p_siginfo); 530 sigemptyset(&p->p_sig); 531 sigemptyset(&p->p_extsig); 532 sigemptyset(&t->t_sig); 533 sigemptyset(&t->t_extsig); 534 sigemptyset(&p->p_sigmask); 535 sigdelq(p, t, 0); 536 lwp->lwp_cursig = 0; 537 lwp->lwp_extsig = 0; 538 p->p_flag &= ~(SKILLED | SEXTKILLED); 539 if (lwp->lwp_curinfo) { 540 siginfofree(lwp->lwp_curinfo); 541 lwp->lwp_curinfo = NULL; 542 } 543 544 t->t_proc_flag |= TP_LWPEXIT; 545 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 546 prlwpexit(t); /* notify /proc */ 547 lwp_hash_out(p, t->t_tid); 548 prexit(p); 549 550 p->p_lwpcnt = 0; 551 p->p_tlist = NULL; 552 sigqfree(p); 553 term_mstate(t); 554 p->p_mterm = gethrtime(); 555 556 exec_vp = p->p_exec; 557 execdir_vp = p->p_execdir; 558 p->p_exec = NULLVP; 559 p->p_execdir = NULLVP; 560 mutex_exit(&p->p_lock); 561 562 pr_free_watched_pages(p); 563 564 closeall(P_FINFO(p)); 565 566 /* Free the controlling tty. (freectty() always assumes curproc.) */ 567 ASSERT(p == curproc); 568 (void) freectty(B_TRUE); 569 570 #if defined(__sparc) 571 if (p->p_utraps != NULL) 572 utrap_free(p); 573 #endif 574 if (p->p_semacct) /* IPC semaphore exit */ 575 semexit(p); 576 rv = wstat(why, what); 577 578 acct(rv & 0xff); 579 exacct_commit_proc(p, rv); 580 581 /* 582 * Release any resources associated with C2 auditing 583 */ 584 if (AU_AUDITING()) { 585 /* 586 * audit exit system call 587 */ 588 audit_exit(why, what); 589 } 590 591 /* 592 * Free address space. 593 */ 594 relvm(); 595 596 if (exec_vp) { 597 /* 598 * Close this executable which has been opened when the process 599 * was created by getproc(). 600 */ 601 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL); 602 VN_RELE(exec_vp); 603 } 604 if (execdir_vp) 605 VN_RELE(execdir_vp); 606 607 /* 608 * Release held contracts. 609 */ 610 contract_exit(p); 611 612 /* 613 * Depart our encapsulating process contract. 614 */ 615 if ((p->p_flag & SSYS) == 0) { 616 ASSERT(p->p_ct_process); 617 contract_process_exit(p->p_ct_process, p, rv); 618 } 619 620 /* 621 * Remove pool association, and block if requested by pool_do_bind. 622 */ 623 mutex_enter(&p->p_lock); 624 ASSERT(p->p_pool->pool_ref > 0); 625 atomic_dec_32(&p->p_pool->pool_ref); 626 p->p_pool = pool_default; 627 /* 628 * Now that our address space has been freed and all other threads 629 * in this process have exited, set the PEXITED pool flag. This 630 * tells the pools subsystems to ignore this process if it was 631 * requested to rebind this process to a new pool. 632 */ 633 p->p_poolflag |= PEXITED; 634 pool_barrier_exit(); 635 mutex_exit(&p->p_lock); 636 637 mutex_enter(&pidlock); 638 639 /* 640 * Delete this process from the newstate list of its parent. We 641 * will put it in the right place in the sigcld in the end. 642 */ 643 delete_ns(p->p_parent, p); 644 645 /* 646 * Reassign the orphans to the next of kin. 647 * Don't rearrange init's orphanage. 648 */ 649 if ((q = p->p_orphan) != NULL && p != proc_init) { 650 651 proc_t *nokp = p->p_nextofkin; 652 653 for (;;) { 654 q->p_nextofkin = nokp; 655 if (q->p_nextorph == NULL) 656 break; 657 q = q->p_nextorph; 658 } 659 q->p_nextorph = nokp->p_orphan; 660 nokp->p_orphan = p->p_orphan; 661 p->p_orphan = NULL; 662 } 663 664 /* 665 * Reassign the children to init. 666 * Don't try to assign init's children to init. 667 */ 668 if ((q = p->p_child) != NULL && p != proc_init) { 669 struct proc *np; 670 struct proc *initp = proc_init; 671 boolean_t setzonetop = B_FALSE; 672 673 if (!INGLOBALZONE(curproc)) 674 setzonetop = B_TRUE; 675 676 pgdetach(p); 677 678 do { 679 np = q->p_sibling; 680 /* 681 * Delete it from its current parent new state 682 * list and add it to init new state list 683 */ 684 delete_ns(q->p_parent, q); 685 686 q->p_ppid = 1; 687 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 688 if (setzonetop) { 689 mutex_enter(&q->p_lock); 690 q->p_flag |= SZONETOP; 691 mutex_exit(&q->p_lock); 692 } 693 q->p_parent = initp; 694 695 /* 696 * Since q will be the first child, 697 * it will not have a previous sibling. 698 */ 699 q->p_psibling = NULL; 700 if (initp->p_child) { 701 initp->p_child->p_psibling = q; 702 } 703 q->p_sibling = initp->p_child; 704 initp->p_child = q; 705 if (q->p_proc_flag & P_PR_PTRACE) { 706 mutex_enter(&q->p_lock); 707 sigtoproc(q, NULL, SIGKILL); 708 mutex_exit(&q->p_lock); 709 } 710 /* 711 * sigcld() will add the child to parents 712 * newstate list. 713 */ 714 if (q->p_stat == SZOMB) 715 sigcld(q, NULL); 716 } while ((q = np) != NULL); 717 718 p->p_child = NULL; 719 ASSERT(p->p_child_ns == NULL); 720 } 721 722 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 723 724 mutex_enter(&p->p_lock); 725 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 726 727 /* 728 * Have our task accummulate our resource usage data before they 729 * become contaminated by p_cacct etc., and before we renounce 730 * membership of the task. 731 * 732 * We do this regardless of whether or not task accounting is active. 733 * This is to avoid having nonsense data reported for this task if 734 * task accounting is subsequently enabled. The overhead is minimal; 735 * by this point, this process has accounted for the usage of all its 736 * LWPs. We nonetheless do the work here, and under the protection of 737 * pidlock, so that the movement of the process's usage to the task 738 * happens at the same time as the removal of the process from the 739 * task, from the point of view of exacct_snapshot_task_usage(). 740 */ 741 exacct_update_task_mstate(p); 742 743 hrutime = mstate_aggr_state(p, LMS_USER); 744 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 745 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 746 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 747 748 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 749 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 750 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 751 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 752 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 753 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 754 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 755 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 756 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 757 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 758 759 p->p_ru.minflt += p->p_cru.minflt; 760 p->p_ru.majflt += p->p_cru.majflt; 761 p->p_ru.nswap += p->p_cru.nswap; 762 p->p_ru.inblock += p->p_cru.inblock; 763 p->p_ru.oublock += p->p_cru.oublock; 764 p->p_ru.msgsnd += p->p_cru.msgsnd; 765 p->p_ru.msgrcv += p->p_cru.msgrcv; 766 p->p_ru.nsignals += p->p_cru.nsignals; 767 p->p_ru.nvcsw += p->p_cru.nvcsw; 768 p->p_ru.nivcsw += p->p_cru.nivcsw; 769 p->p_ru.sysc += p->p_cru.sysc; 770 p->p_ru.ioch += p->p_cru.ioch; 771 772 p->p_stat = SZOMB; 773 p->p_proc_flag &= ~P_PR_PTRACE; 774 p->p_wdata = what; 775 p->p_wcode = (char)why; 776 777 cdir = PTOU(p)->u_cdir; 778 rdir = PTOU(p)->u_rdir; 779 cwd = PTOU(p)->u_cwd; 780 781 ASSERT(cdir != NULL || p->p_parent == &p0); 782 783 /* 784 * Release resource controls, as they are no longer enforceable. 785 */ 786 rctl_set_free(p->p_rctls); 787 788 /* 789 * Decrement tk_nlwps counter for our task.max-lwps resource control. 790 * An extended accounting record, if that facility is active, is 791 * scheduled to be written. We cannot give up task and project 792 * membership at this point because that would allow zombies to escape 793 * from the max-processes resource controls. Zombies stay in their 794 * current task and project until the process table slot is released 795 * in freeproc(). 796 */ 797 tk = p->p_task; 798 799 mutex_enter(&p->p_zone->zone_nlwps_lock); 800 tk->tk_nlwps--; 801 tk->tk_proj->kpj_nlwps--; 802 p->p_zone->zone_nlwps--; 803 mutex_exit(&p->p_zone->zone_nlwps_lock); 804 805 /* 806 * Clear the lwp directory and the lwpid hash table 807 * now that /proc can't bother us any more. 808 * We free the memory below, after dropping p->p_lock. 809 */ 810 lwpdir = p->p_lwpdir; 811 lwpdir_sz = p->p_lwpdir_sz; 812 tidhash = p->p_tidhash; 813 tidhash_sz = p->p_tidhash_sz; 814 ret_tidhash = p->p_ret_tidhash; 815 p->p_lwpdir = NULL; 816 p->p_lwpfree = NULL; 817 p->p_lwpdir_sz = 0; 818 p->p_tidhash = NULL; 819 p->p_tidhash_sz = 0; 820 p->p_ret_tidhash = NULL; 821 822 /* 823 * If the process has context ops installed, call the exit routine 824 * on behalf of this last remaining thread. Normally exitpctx() is 825 * called during thread_exit() or lwp_exit(), but because this is the 826 * last thread in the process, we must call it here. By the time 827 * thread_exit() is called (below), the association with the relevant 828 * process has been lost. 829 * 830 * We also free the context here. 831 */ 832 if (p->p_pctx) { 833 kpreempt_disable(); 834 exitpctx(p); 835 kpreempt_enable(); 836 837 freepctx(p, 0); 838 } 839 840 /* 841 * curthread's proc pointer is changed to point to the 'sched' 842 * process for the corresponding zone, except in the case when 843 * the exiting process is in fact a zsched instance, in which 844 * case the proc pointer is set to p0. We do so, so that the 845 * process still points at the right zone when we call the VN_RELE() 846 * below. 847 * 848 * This is because curthread's original proc pointer can be freed as 849 * soon as the child sends a SIGCLD to its parent. We use zsched so 850 * that for user processes, even in the final moments of death, the 851 * process is still associated with its zone. 852 */ 853 if (p != t->t_procp->p_zone->zone_zsched) 854 t->t_procp = t->t_procp->p_zone->zone_zsched; 855 else 856 t->t_procp = &p0; 857 858 mutex_exit(&p->p_lock); 859 if (!evaporate) { 860 p->p_pidflag &= ~CLDPEND; 861 sigcld(p, sqp); 862 } else { 863 /* 864 * Do what sigcld() would do if the disposition 865 * of the SIGCHLD signal were set to be ignored. 866 */ 867 cv_broadcast(&p->p_srwchan_cv); 868 freeproc(p); 869 } 870 mutex_exit(&pidlock); 871 872 /* 873 * We don't release u_cdir and u_rdir until SZOMB is set. 874 * This protects us against dofusers(). 875 */ 876 if (cdir) 877 VN_RELE(cdir); 878 if (rdir) 879 VN_RELE(rdir); 880 if (cwd) 881 refstr_rele(cwd); 882 883 /* 884 * task_rele() may ultimately cause the zone to go away (or 885 * may cause the last user process in a zone to go away, which 886 * signals zsched to go away). So prior to this call, we must 887 * no longer point at zsched. 888 */ 889 t->t_procp = &p0; 890 891 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 892 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t)); 893 while (ret_tidhash != NULL) { 894 ret_tidhash_t *next = ret_tidhash->rth_next; 895 kmem_free(ret_tidhash->rth_tidhash, 896 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t)); 897 kmem_free(ret_tidhash, sizeof (*ret_tidhash)); 898 ret_tidhash = next; 899 } 900 901 thread_exit(); 902 /* NOTREACHED */ 903 } 904 905 /* 906 * Format siginfo structure for wait system calls. 907 */ 908 void 909 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 910 { 911 ASSERT(MUTEX_HELD(&pidlock)); 912 913 bzero(ip, sizeof (k_siginfo_t)); 914 ip->si_signo = SIGCLD; 915 ip->si_code = pp->p_wcode; 916 ip->si_pid = pp->p_pid; 917 ip->si_ctid = PRCTID(pp); 918 ip->si_zoneid = pp->p_zone->zone_id; 919 ip->si_status = pp->p_wdata; 920 ip->si_stime = pp->p_stime; 921 ip->si_utime = pp->p_utime; 922 923 if (waitflag) { 924 pp->p_wcode = 0; 925 pp->p_wdata = 0; 926 pp->p_pidflag &= ~CLDPEND; 927 } 928 } 929 930 /* 931 * Wait system call. 932 * Search for a terminated (zombie) child, 933 * finally lay it to rest, and collect its status. 934 * Look also for stopped children, 935 * and pass back status from them. 936 */ 937 int 938 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 939 { 940 int found; 941 proc_t *cp, *pp; 942 int proc_gone; 943 int waitflag = !(options & WNOWAIT); 944 945 /* 946 * Obsolete flag, defined here only for binary compatibility 947 * with old statically linked executables. Delete this when 948 * we no longer care about these old and broken applications. 949 */ 950 #define _WNOCHLD 0400 951 options &= ~_WNOCHLD; 952 953 if (options == 0 || (options & ~WOPTMASK)) 954 return (EINVAL); 955 956 switch (idtype) { 957 case P_PID: 958 case P_PGID: 959 if (id < 0 || id >= maxpid) 960 return (EINVAL); 961 /* FALLTHROUGH */ 962 case P_ALL: 963 break; 964 default: 965 return (EINVAL); 966 } 967 968 pp = ttoproc(curthread); 969 970 /* 971 * lock parent mutex so that sibling chain can be searched. 972 */ 973 mutex_enter(&pidlock); 974 975 /* 976 * if we are only looking for exited processes and child_ns list 977 * is empty no reason to look at all children. 978 */ 979 if (idtype == P_ALL && 980 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 981 pp->p_child_ns == NULL) { 982 if (pp->p_child) { 983 mutex_exit(&pidlock); 984 bzero(ip, sizeof (k_siginfo_t)); 985 return (0); 986 } 987 mutex_exit(&pidlock); 988 return (ECHILD); 989 } 990 991 while (pp->p_child != NULL) { 992 993 proc_gone = 0; 994 995 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 996 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 997 continue; 998 if (idtype == P_PID && id != cp->p_pid) 999 continue; 1000 if (idtype == P_PGID && id != cp->p_pgrp) 1001 continue; 1002 1003 switch (cp->p_wcode) { 1004 1005 case CLD_TRAPPED: 1006 case CLD_STOPPED: 1007 case CLD_CONTINUED: 1008 cmn_err(CE_PANIC, 1009 "waitid: wrong state %d on the p_newstate" 1010 " list", cp->p_wcode); 1011 break; 1012 1013 case CLD_EXITED: 1014 case CLD_DUMPED: 1015 case CLD_KILLED: 1016 if (!(options & WEXITED)) { 1017 /* 1018 * Count how many are already gone 1019 * for good. 1020 */ 1021 proc_gone++; 1022 break; 1023 } 1024 if (!waitflag) { 1025 winfo(cp, ip, 0); 1026 } else { 1027 winfo(cp, ip, 1); 1028 freeproc(cp); 1029 } 1030 mutex_exit(&pidlock); 1031 if (waitflag) { /* accept SIGCLD */ 1032 sigcld_delete(ip); 1033 sigcld_repost(); 1034 } 1035 return (0); 1036 } 1037 1038 if (idtype == P_PID) 1039 break; 1040 } 1041 1042 /* 1043 * Wow! None of the threads on the p_sibling_ns list were 1044 * interesting threads. Check all the kids! 1045 */ 1046 found = 0; 1047 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 1048 if (idtype == P_PID && id != cp->p_pid) 1049 continue; 1050 if (idtype == P_PGID && id != cp->p_pgrp) 1051 continue; 1052 1053 switch (cp->p_wcode) { 1054 case CLD_TRAPPED: 1055 if (!(options & WTRAPPED)) 1056 break; 1057 winfo(cp, ip, waitflag); 1058 mutex_exit(&pidlock); 1059 if (waitflag) { /* accept SIGCLD */ 1060 sigcld_delete(ip); 1061 sigcld_repost(); 1062 } 1063 return (0); 1064 1065 case CLD_STOPPED: 1066 if (!(options & WSTOPPED)) 1067 break; 1068 /* Is it still stopped? */ 1069 mutex_enter(&cp->p_lock); 1070 if (!jobstopped(cp)) { 1071 mutex_exit(&cp->p_lock); 1072 break; 1073 } 1074 mutex_exit(&cp->p_lock); 1075 winfo(cp, ip, waitflag); 1076 mutex_exit(&pidlock); 1077 if (waitflag) { /* accept SIGCLD */ 1078 sigcld_delete(ip); 1079 sigcld_repost(); 1080 } 1081 return (0); 1082 1083 case CLD_CONTINUED: 1084 if (!(options & WCONTINUED)) 1085 break; 1086 winfo(cp, ip, waitflag); 1087 mutex_exit(&pidlock); 1088 if (waitflag) { /* accept SIGCLD */ 1089 sigcld_delete(ip); 1090 sigcld_repost(); 1091 } 1092 return (0); 1093 1094 case CLD_EXITED: 1095 case CLD_DUMPED: 1096 case CLD_KILLED: 1097 if (idtype != P_PID && 1098 (cp->p_pidflag & CLDWAITPID)) 1099 continue; 1100 /* 1101 * Don't complain if a process was found in 1102 * the first loop but we broke out of the loop 1103 * because of the arguments passed to us. 1104 */ 1105 if (proc_gone == 0) { 1106 cmn_err(CE_PANIC, 1107 "waitid: wrong state on the" 1108 " p_child list"); 1109 } else { 1110 break; 1111 } 1112 } 1113 1114 found++; 1115 1116 if (idtype == P_PID) 1117 break; 1118 } 1119 1120 /* 1121 * If we found no interesting processes at all, 1122 * break out and return ECHILD. 1123 */ 1124 if (found + proc_gone == 0) 1125 break; 1126 1127 if (options & WNOHANG) { 1128 mutex_exit(&pidlock); 1129 bzero(ip, sizeof (k_siginfo_t)); 1130 /* 1131 * We should set ip->si_signo = SIGCLD, 1132 * but there is an SVVS test that expects 1133 * ip->si_signo to be zero in this case. 1134 */ 1135 return (0); 1136 } 1137 1138 /* 1139 * If we found no processes of interest that could 1140 * change state while we wait, we don't wait at all. 1141 * Get out with ECHILD according to SVID. 1142 */ 1143 if (found == proc_gone) 1144 break; 1145 1146 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1147 mutex_exit(&pidlock); 1148 return (EINTR); 1149 } 1150 } 1151 mutex_exit(&pidlock); 1152 return (ECHILD); 1153 } 1154 1155 int 1156 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1157 { 1158 int error; 1159 k_siginfo_t info; 1160 1161 if (error = waitid(idtype, id, &info, options)) 1162 return (set_errno(error)); 1163 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1164 return (set_errno(EFAULT)); 1165 return (0); 1166 } 1167 1168 #ifdef _SYSCALL32_IMPL 1169 1170 int 1171 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1172 { 1173 int error; 1174 k_siginfo_t info; 1175 siginfo32_t info32; 1176 1177 if (error = waitid(idtype, id, &info, options)) 1178 return (set_errno(error)); 1179 siginfo_kto32(&info, &info32); 1180 if (copyout(&info32, infop, sizeof (info32))) 1181 return (set_errno(EFAULT)); 1182 return (0); 1183 } 1184 1185 #endif /* _SYSCALL32_IMPL */ 1186 1187 void 1188 proc_detach(proc_t *p) 1189 { 1190 proc_t *q; 1191 1192 ASSERT(MUTEX_HELD(&pidlock)); 1193 1194 q = p->p_parent; 1195 ASSERT(q != NULL); 1196 1197 /* 1198 * Take it off the newstate list of its parent 1199 */ 1200 delete_ns(q, p); 1201 1202 if (q->p_child == p) { 1203 q->p_child = p->p_sibling; 1204 /* 1205 * If the parent has no children, it better not 1206 * have any with new states either! 1207 */ 1208 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1209 } 1210 1211 if (p->p_sibling) { 1212 p->p_sibling->p_psibling = p->p_psibling; 1213 } 1214 1215 if (p->p_psibling) { 1216 p->p_psibling->p_sibling = p->p_sibling; 1217 } 1218 } 1219 1220 /* 1221 * Remove zombie children from the process table. 1222 */ 1223 void 1224 freeproc(proc_t *p) 1225 { 1226 proc_t *q; 1227 task_t *tk; 1228 1229 ASSERT(p->p_stat == SZOMB); 1230 ASSERT(p->p_tlist == NULL); 1231 ASSERT(MUTEX_HELD(&pidlock)); 1232 1233 sigdelq(p, NULL, 0); 1234 if (p->p_killsqp) { 1235 siginfofree(p->p_killsqp); 1236 p->p_killsqp = NULL; 1237 } 1238 1239 prfree(p); /* inform /proc */ 1240 1241 /* 1242 * Don't free the init processes. 1243 * Other dying processes will access it. 1244 */ 1245 if (p == proc_init) 1246 return; 1247 1248 1249 /* 1250 * We wait until now to free the cred structure because a 1251 * zombie process's credentials may be examined by /proc. 1252 * No cred locking needed because there are no threads at this point. 1253 */ 1254 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1255 crfree(p->p_cred); 1256 if (p->p_corefile != NULL) { 1257 corectl_path_rele(p->p_corefile); 1258 p->p_corefile = NULL; 1259 } 1260 if (p->p_content != NULL) { 1261 corectl_content_rele(p->p_content); 1262 p->p_content = NULL; 1263 } 1264 1265 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1266 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1267 /* 1268 * This should still do the right thing since p_utime/stime 1269 * get set to the correct value on process exit, so it 1270 * should get properly updated 1271 */ 1272 p->p_nextofkin->p_cutime += p->p_utime; 1273 p->p_nextofkin->p_cstime += p->p_stime; 1274 1275 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1276 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1277 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1278 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1279 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1280 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1281 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1282 += p->p_acct[LMS_USER_LOCK]; 1283 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1284 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1285 += p->p_acct[LMS_WAIT_CPU]; 1286 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1287 1288 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1289 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1290 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1291 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1292 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1293 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1294 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1295 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1296 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1297 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1298 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1299 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1300 1301 } 1302 1303 q = p->p_nextofkin; 1304 if (q && q->p_orphan == p) 1305 q->p_orphan = p->p_nextorph; 1306 else if (q) { 1307 for (q = q->p_orphan; q; q = q->p_nextorph) 1308 if (q->p_nextorph == p) 1309 break; 1310 ASSERT(q && q->p_nextorph == p); 1311 q->p_nextorph = p->p_nextorph; 1312 } 1313 1314 /* 1315 * The process table slot is being freed, so it is now safe to give up 1316 * task and project membership. 1317 */ 1318 mutex_enter(&p->p_lock); 1319 tk = p->p_task; 1320 task_detach(p); 1321 mutex_exit(&p->p_lock); 1322 1323 proc_detach(p); 1324 pid_exit(p, tk); /* frees pid and proc structure */ 1325 1326 task_rele(tk); 1327 } 1328 1329 /* 1330 * Delete process "child" from the newstate list of process "parent" 1331 */ 1332 void 1333 delete_ns(proc_t *parent, proc_t *child) 1334 { 1335 proc_t **ns; 1336 1337 ASSERT(MUTEX_HELD(&pidlock)); 1338 ASSERT(child->p_parent == parent); 1339 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1340 if (*ns == child) { 1341 1342 ASSERT((*ns)->p_parent == parent); 1343 1344 *ns = child->p_sibling_ns; 1345 child->p_sibling_ns = NULL; 1346 return; 1347 } 1348 } 1349 } 1350 1351 /* 1352 * Add process "child" to the new state list of process "parent" 1353 */ 1354 void 1355 add_ns(proc_t *parent, proc_t *child) 1356 { 1357 ASSERT(child->p_sibling_ns == NULL); 1358 child->p_sibling_ns = parent->p_child_ns; 1359 parent->p_child_ns = child; 1360 } 1361