1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 mutex_enter(&p->p_lock); 360 if (p->p_ttime > 0) { 361 /* 362 * Account any remaining ticks charged to this process 363 * on its way out. 364 */ 365 (void) task_cpu_time_incr(p->p_task, p->p_ttime); 366 p->p_ttime = 0; 367 } 368 mutex_exit(&p->p_lock); 369 370 DTRACE_PROC(lwp__exit); 371 DTRACE_PROC1(exit, int, why); 372 373 /* 374 * Will perform any brand specific proc exit processing, since this 375 * is always the last lwp, will also perform lwp_exit and free brand 376 * data 377 */ 378 if (PROC_IS_BRANDED(p)) { 379 lwp_detach_brand_hdlrs(lwp); 380 brand_clearbrand(p); 381 } 382 383 /* 384 * Don't let init exit unless zone_start_init() failed its exec, or 385 * we are shutting down the zone or the machine. 386 * 387 * Since we are single threaded, we don't need to lock the 388 * following accesses to zone_proc_initpid. 389 */ 390 if (p->p_pid == z->zone_proc_initpid) { 391 if (z->zone_boot_err == 0 && 392 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 393 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 394 z->zone_restart_init == B_TRUE && 395 restart_init(what, why) == 0) 396 return (0); 397 /* 398 * Since we didn't or couldn't restart init, we clear 399 * the zone's init state and proceed with exit 400 * processing. 401 */ 402 z->zone_proc_initpid = -1; 403 } 404 405 /* 406 * Allocate a sigqueue now, before we grab locks. 407 * It will be given to sigcld(), below. 408 * Special case: If we will be making the process disappear 409 * without a trace (for the benefit of posix_spawn() in libc) 410 * don't bother to allocate a useless sigqueue. 411 */ 412 evaporate = ((p->p_flag & SVFORK) && 413 why == CLD_EXITED && what == _EVAPORATE); 414 if (!evaporate) 415 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 416 417 /* 418 * revoke any doors created by the process. 419 */ 420 if (p->p_door_list) 421 door_exit(); 422 423 /* 424 * Release schedctl data structures. 425 */ 426 if (p->p_pagep) 427 schedctl_proc_cleanup(); 428 429 /* 430 * make sure all pending kaio has completed. 431 */ 432 if (p->p_aio) 433 aio_cleanup_exit(); 434 435 /* 436 * discard the lwpchan cache. 437 */ 438 if (p->p_lcp != NULL) 439 lwpchan_destroy_cache(0); 440 441 /* 442 * Clean up any DTrace helper actions or probes for the process. 443 */ 444 if (p->p_dtrace_helpers != NULL) { 445 ASSERT(dtrace_helpers_cleanup != NULL); 446 (*dtrace_helpers_cleanup)(); 447 } 448 449 /* untimeout the realtime timers */ 450 if (p->p_itimer != NULL) 451 timer_exit(); 452 453 if ((tmp_id = p->p_alarmid) != 0) { 454 p->p_alarmid = 0; 455 (void) untimeout(tmp_id); 456 } 457 458 /* 459 * Remove any fpollinfo_t's for this (last) thread from our file 460 * descriptors so closeall() can ASSERT() that they're all gone. 461 */ 462 pollcleanup(); 463 464 if (p->p_rprof_cyclic != CYCLIC_NONE) { 465 mutex_enter(&cpu_lock); 466 cyclic_remove(p->p_rprof_cyclic); 467 mutex_exit(&cpu_lock); 468 } 469 470 mutex_enter(&p->p_lock); 471 472 /* 473 * Clean up any DTrace probes associated with this process. 474 */ 475 if (p->p_dtrace_probes) { 476 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 477 dtrace_fasttrap_exit_ptr(p); 478 } 479 480 while ((tmp_id = p->p_itimerid) != 0) { 481 p->p_itimerid = 0; 482 mutex_exit(&p->p_lock); 483 (void) untimeout(tmp_id); 484 mutex_enter(&p->p_lock); 485 } 486 487 lwp_cleanup(); 488 489 /* 490 * We are about to exit; prevent our resource associations from 491 * being changed. 492 */ 493 pool_barrier_enter(); 494 495 /* 496 * Block the process against /proc now that we have really 497 * acquired p->p_lock (to manipulate p_tlist at least). 498 */ 499 prbarrier(p); 500 501 #ifdef SUN_SRC_COMPAT 502 if (code == CLD_KILLED) 503 u.u_acflag |= AXSIG; 504 #endif 505 sigfillset(&p->p_ignore); 506 sigemptyset(&p->p_siginfo); 507 sigemptyset(&p->p_sig); 508 sigemptyset(&p->p_extsig); 509 sigemptyset(&t->t_sig); 510 sigemptyset(&t->t_extsig); 511 sigemptyset(&p->p_sigmask); 512 sigdelq(p, t, 0); 513 lwp->lwp_cursig = 0; 514 lwp->lwp_extsig = 0; 515 p->p_flag &= ~(SKILLED | SEXTKILLED); 516 if (lwp->lwp_curinfo) { 517 siginfofree(lwp->lwp_curinfo); 518 lwp->lwp_curinfo = NULL; 519 } 520 521 t->t_proc_flag |= TP_LWPEXIT; 522 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 523 prlwpexit(t); /* notify /proc */ 524 lwp_hash_out(p, t->t_tid); 525 prexit(p); 526 527 p->p_lwpcnt = 0; 528 p->p_tlist = NULL; 529 sigqfree(p); 530 term_mstate(t); 531 p->p_mterm = gethrtime(); 532 533 exec_vp = p->p_exec; 534 execdir_vp = p->p_execdir; 535 p->p_exec = NULLVP; 536 p->p_execdir = NULLVP; 537 mutex_exit(&p->p_lock); 538 if (exec_vp) 539 VN_RELE(exec_vp); 540 if (execdir_vp) 541 VN_RELE(execdir_vp); 542 543 pr_free_watched_pages(p); 544 545 closeall(P_FINFO(p)); 546 547 /* Free the controlling tty. (freectty() always assumes curproc.) */ 548 ASSERT(p == curproc); 549 (void) freectty(B_TRUE); 550 551 #if defined(__sparc) 552 if (p->p_utraps != NULL) 553 utrap_free(p); 554 #endif 555 if (p->p_semacct) /* IPC semaphore exit */ 556 semexit(p); 557 rv = wstat(why, what); 558 559 acct(rv & 0xff); 560 exacct_commit_proc(p, rv); 561 562 /* 563 * Release any resources associated with C2 auditing 564 */ 565 if (audit_active) { 566 /* 567 * audit exit system call 568 */ 569 audit_exit(why, what); 570 } 571 572 /* 573 * Free address space. 574 */ 575 relvm(); 576 577 /* 578 * Release held contracts. 579 */ 580 contract_exit(p); 581 582 /* 583 * Depart our encapsulating process contract. 584 */ 585 if ((p->p_flag & SSYS) == 0) { 586 ASSERT(p->p_ct_process); 587 contract_process_exit(p->p_ct_process, p, rv); 588 } 589 590 /* 591 * Remove pool association, and block if requested by pool_do_bind. 592 */ 593 mutex_enter(&p->p_lock); 594 ASSERT(p->p_pool->pool_ref > 0); 595 atomic_add_32(&p->p_pool->pool_ref, -1); 596 p->p_pool = pool_default; 597 /* 598 * Now that our address space has been freed and all other threads 599 * in this process have exited, set the PEXITED pool flag. This 600 * tells the pools subsystems to ignore this process if it was 601 * requested to rebind this process to a new pool. 602 */ 603 p->p_poolflag |= PEXITED; 604 pool_barrier_exit(); 605 mutex_exit(&p->p_lock); 606 607 mutex_enter(&pidlock); 608 609 /* 610 * Delete this process from the newstate list of its parent. We 611 * will put it in the right place in the sigcld in the end. 612 */ 613 delete_ns(p->p_parent, p); 614 615 /* 616 * Reassign the orphans to the next of kin. 617 * Don't rearrange init's orphanage. 618 */ 619 if ((q = p->p_orphan) != NULL && p != proc_init) { 620 621 proc_t *nokp = p->p_nextofkin; 622 623 for (;;) { 624 q->p_nextofkin = nokp; 625 if (q->p_nextorph == NULL) 626 break; 627 q = q->p_nextorph; 628 } 629 q->p_nextorph = nokp->p_orphan; 630 nokp->p_orphan = p->p_orphan; 631 p->p_orphan = NULL; 632 } 633 634 /* 635 * Reassign the children to init. 636 * Don't try to assign init's children to init. 637 */ 638 if ((q = p->p_child) != NULL && p != proc_init) { 639 struct proc *np; 640 struct proc *initp = proc_init; 641 boolean_t setzonetop = B_FALSE; 642 643 if (!INGLOBALZONE(curproc)) 644 setzonetop = B_TRUE; 645 646 pgdetach(p); 647 648 do { 649 np = q->p_sibling; 650 /* 651 * Delete it from its current parent new state 652 * list and add it to init new state list 653 */ 654 delete_ns(q->p_parent, q); 655 656 q->p_ppid = 1; 657 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 658 if (setzonetop) { 659 mutex_enter(&q->p_lock); 660 q->p_flag |= SZONETOP; 661 mutex_exit(&q->p_lock); 662 } 663 q->p_parent = initp; 664 665 /* 666 * Since q will be the first child, 667 * it will not have a previous sibling. 668 */ 669 q->p_psibling = NULL; 670 if (initp->p_child) { 671 initp->p_child->p_psibling = q; 672 } 673 q->p_sibling = initp->p_child; 674 initp->p_child = q; 675 if (q->p_proc_flag & P_PR_PTRACE) { 676 mutex_enter(&q->p_lock); 677 sigtoproc(q, NULL, SIGKILL); 678 mutex_exit(&q->p_lock); 679 } 680 /* 681 * sigcld() will add the child to parents 682 * newstate list. 683 */ 684 if (q->p_stat == SZOMB) 685 sigcld(q, NULL); 686 } while ((q = np) != NULL); 687 688 p->p_child = NULL; 689 ASSERT(p->p_child_ns == NULL); 690 } 691 692 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 693 694 mutex_enter(&p->p_lock); 695 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 696 697 /* 698 * Have our task accummulate our resource usage data before they 699 * become contaminated by p_cacct etc., and before we renounce 700 * membership of the task. 701 * 702 * We do this regardless of whether or not task accounting is active. 703 * This is to avoid having nonsense data reported for this task if 704 * task accounting is subsequently enabled. The overhead is minimal; 705 * by this point, this process has accounted for the usage of all its 706 * LWPs. We nonetheless do the work here, and under the protection of 707 * pidlock, so that the movement of the process's usage to the task 708 * happens at the same time as the removal of the process from the 709 * task, from the point of view of exacct_snapshot_task_usage(). 710 */ 711 exacct_update_task_mstate(p); 712 713 hrutime = mstate_aggr_state(p, LMS_USER); 714 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 715 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 716 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 717 718 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 719 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 720 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 721 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 722 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 723 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 724 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 725 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 726 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 727 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 728 729 p->p_ru.minflt += p->p_cru.minflt; 730 p->p_ru.majflt += p->p_cru.majflt; 731 p->p_ru.nswap += p->p_cru.nswap; 732 p->p_ru.inblock += p->p_cru.inblock; 733 p->p_ru.oublock += p->p_cru.oublock; 734 p->p_ru.msgsnd += p->p_cru.msgsnd; 735 p->p_ru.msgrcv += p->p_cru.msgrcv; 736 p->p_ru.nsignals += p->p_cru.nsignals; 737 p->p_ru.nvcsw += p->p_cru.nvcsw; 738 p->p_ru.nivcsw += p->p_cru.nivcsw; 739 p->p_ru.sysc += p->p_cru.sysc; 740 p->p_ru.ioch += p->p_cru.ioch; 741 742 p->p_stat = SZOMB; 743 p->p_proc_flag &= ~P_PR_PTRACE; 744 p->p_wdata = what; 745 p->p_wcode = (char)why; 746 747 cdir = PTOU(p)->u_cdir; 748 rdir = PTOU(p)->u_rdir; 749 cwd = PTOU(p)->u_cwd; 750 751 /* 752 * Release resource controls, as they are no longer enforceable. 753 */ 754 rctl_set_free(p->p_rctls); 755 756 /* 757 * Give up task and project memberships. Decrement tk_nlwps counter 758 * for our task.max-lwps resource control. An extended accounting 759 * record, if that facility is active, is scheduled to be written. 760 * Zombie processes are false members of task0 for the remainder of 761 * their lifetime; no accounting information is recorded for them. 762 */ 763 tk = p->p_task; 764 765 mutex_enter(&p->p_zone->zone_nlwps_lock); 766 tk->tk_nlwps--; 767 tk->tk_proj->kpj_nlwps--; 768 p->p_zone->zone_nlwps--; 769 mutex_exit(&p->p_zone->zone_nlwps_lock); 770 task_detach(p); 771 p->p_task = task0p; 772 773 /* 774 * Clear the lwp directory and the lwpid hash table 775 * now that /proc can't bother us any more. 776 * We free the memory below, after dropping p->p_lock. 777 */ 778 lwpdir = p->p_lwpdir; 779 lwpdir_sz = p->p_lwpdir_sz; 780 tidhash = p->p_tidhash; 781 tidhash_sz = p->p_tidhash_sz; 782 p->p_lwpdir = NULL; 783 p->p_lwpfree = NULL; 784 p->p_lwpdir_sz = 0; 785 p->p_tidhash = NULL; 786 p->p_tidhash_sz = 0; 787 788 /* 789 * If the process has context ops installed, call the exit routine 790 * on behalf of this last remaining thread. Normally exitpctx() is 791 * called during thread_exit() or lwp_exit(), but because this is the 792 * last thread in the process, we must call it here. By the time 793 * thread_exit() is called (below), the association with the relevant 794 * process has been lost. 795 * 796 * We also free the context here. 797 */ 798 if (p->p_pctx) { 799 kpreempt_disable(); 800 exitpctx(p); 801 kpreempt_enable(); 802 803 freepctx(p, 0); 804 } 805 806 /* 807 * curthread's proc pointer is changed to point to the 'sched' 808 * process for the corresponding zone, except in the case when 809 * the exiting process is in fact a zsched instance, in which 810 * case the proc pointer is set to p0. We do so, so that the 811 * process still points at the right zone when we call the VN_RELE() 812 * below. 813 * 814 * This is because curthread's original proc pointer can be freed as 815 * soon as the child sends a SIGCLD to its parent. We use zsched so 816 * that for user processes, even in the final moments of death, the 817 * process is still associated with its zone. 818 */ 819 if (p != t->t_procp->p_zone->zone_zsched) 820 t->t_procp = t->t_procp->p_zone->zone_zsched; 821 else 822 t->t_procp = &p0; 823 824 mutex_exit(&p->p_lock); 825 if (!evaporate) { 826 p->p_pidflag &= ~CLDPEND; 827 sigcld(p, sqp); 828 } else { 829 /* 830 * Do what sigcld() would do if the disposition 831 * of the SIGCHLD signal were set to be ignored. 832 */ 833 cv_broadcast(&p->p_srwchan_cv); 834 freeproc(p); 835 } 836 mutex_exit(&pidlock); 837 838 /* 839 * We don't release u_cdir and u_rdir until SZOMB is set. 840 * This protects us against dofusers(). 841 */ 842 VN_RELE(cdir); 843 if (rdir) 844 VN_RELE(rdir); 845 if (cwd) 846 refstr_rele(cwd); 847 848 /* 849 * task_rele() may ultimately cause the zone to go away (or 850 * may cause the last user process in a zone to go away, which 851 * signals zsched to go away). So prior to this call, we must 852 * no longer point at zsched. 853 */ 854 t->t_procp = &p0; 855 task_rele(tk); 856 857 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 858 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 859 860 lwp_pcb_exit(); 861 862 thread_exit(); 863 /* NOTREACHED */ 864 } 865 866 /* 867 * Format siginfo structure for wait system calls. 868 */ 869 void 870 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 871 { 872 ASSERT(MUTEX_HELD(&pidlock)); 873 874 bzero(ip, sizeof (k_siginfo_t)); 875 ip->si_signo = SIGCLD; 876 ip->si_code = pp->p_wcode; 877 ip->si_pid = pp->p_pid; 878 ip->si_ctid = PRCTID(pp); 879 ip->si_zoneid = pp->p_zone->zone_id; 880 ip->si_status = pp->p_wdata; 881 ip->si_stime = pp->p_stime; 882 ip->si_utime = pp->p_utime; 883 884 if (waitflag) { 885 pp->p_wcode = 0; 886 pp->p_wdata = 0; 887 pp->p_pidflag &= ~CLDPEND; 888 } 889 } 890 891 /* 892 * Wait system call. 893 * Search for a terminated (zombie) child, 894 * finally lay it to rest, and collect its status. 895 * Look also for stopped children, 896 * and pass back status from them. 897 */ 898 int 899 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 900 { 901 int found; 902 proc_t *cp, *pp; 903 int proc_gone; 904 int waitflag = !(options & WNOWAIT); 905 906 /* 907 * Obsolete flag, defined here only for binary compatibility 908 * with old statically linked executables. Delete this when 909 * we no longer care about these old and broken applications. 910 */ 911 #define _WNOCHLD 0400 912 options &= ~_WNOCHLD; 913 914 if (options == 0 || (options & ~WOPTMASK)) 915 return (EINVAL); 916 917 switch (idtype) { 918 case P_PID: 919 case P_PGID: 920 if (id < 0 || id >= maxpid) 921 return (EINVAL); 922 /* FALLTHROUGH */ 923 case P_ALL: 924 break; 925 default: 926 return (EINVAL); 927 } 928 929 pp = ttoproc(curthread); 930 931 /* 932 * lock parent mutex so that sibling chain can be searched. 933 */ 934 mutex_enter(&pidlock); 935 936 /* 937 * if we are only looking for exited processes and child_ns list 938 * is empty no reason to look at all children. 939 */ 940 if (idtype == P_ALL && 941 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 942 pp->p_child_ns == NULL) { 943 if (pp->p_child) { 944 mutex_exit(&pidlock); 945 bzero(ip, sizeof (k_siginfo_t)); 946 return (0); 947 } 948 mutex_exit(&pidlock); 949 return (ECHILD); 950 } 951 952 while (pp->p_child != NULL) { 953 954 proc_gone = 0; 955 956 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 957 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 958 continue; 959 if (idtype == P_PID && id != cp->p_pid) 960 continue; 961 if (idtype == P_PGID && id != cp->p_pgrp) 962 continue; 963 964 switch (cp->p_wcode) { 965 966 case CLD_TRAPPED: 967 case CLD_STOPPED: 968 case CLD_CONTINUED: 969 cmn_err(CE_PANIC, 970 "waitid: wrong state %d on the p_newstate" 971 " list", cp->p_wcode); 972 break; 973 974 case CLD_EXITED: 975 case CLD_DUMPED: 976 case CLD_KILLED: 977 if (!(options & WEXITED)) { 978 /* 979 * Count how many are already gone 980 * for good. 981 */ 982 proc_gone++; 983 break; 984 } 985 if (!waitflag) { 986 winfo(cp, ip, 0); 987 } else { 988 winfo(cp, ip, 1); 989 freeproc(cp); 990 } 991 mutex_exit(&pidlock); 992 if (waitflag) { /* accept SIGCLD */ 993 sigcld_delete(ip); 994 sigcld_repost(); 995 } 996 return (0); 997 } 998 999 if (idtype == P_PID) 1000 break; 1001 } 1002 1003 /* 1004 * Wow! None of the threads on the p_sibling_ns list were 1005 * interesting threads. Check all the kids! 1006 */ 1007 found = 0; 1008 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 1009 if (idtype == P_PID && id != cp->p_pid) 1010 continue; 1011 if (idtype == P_PGID && id != cp->p_pgrp) 1012 continue; 1013 1014 switch (cp->p_wcode) { 1015 case CLD_TRAPPED: 1016 if (!(options & WTRAPPED)) 1017 break; 1018 winfo(cp, ip, waitflag); 1019 mutex_exit(&pidlock); 1020 if (waitflag) { /* accept SIGCLD */ 1021 sigcld_delete(ip); 1022 sigcld_repost(); 1023 } 1024 return (0); 1025 1026 case CLD_STOPPED: 1027 if (!(options & WSTOPPED)) 1028 break; 1029 /* Is it still stopped? */ 1030 mutex_enter(&cp->p_lock); 1031 if (!jobstopped(cp)) { 1032 mutex_exit(&cp->p_lock); 1033 break; 1034 } 1035 mutex_exit(&cp->p_lock); 1036 winfo(cp, ip, waitflag); 1037 mutex_exit(&pidlock); 1038 if (waitflag) { /* accept SIGCLD */ 1039 sigcld_delete(ip); 1040 sigcld_repost(); 1041 } 1042 return (0); 1043 1044 case CLD_CONTINUED: 1045 if (!(options & WCONTINUED)) 1046 break; 1047 winfo(cp, ip, waitflag); 1048 mutex_exit(&pidlock); 1049 if (waitflag) { /* accept SIGCLD */ 1050 sigcld_delete(ip); 1051 sigcld_repost(); 1052 } 1053 return (0); 1054 1055 case CLD_EXITED: 1056 case CLD_DUMPED: 1057 case CLD_KILLED: 1058 if (idtype != P_PID && 1059 (cp->p_pidflag & CLDWAITPID)) 1060 continue; 1061 /* 1062 * Don't complain if a process was found in 1063 * the first loop but we broke out of the loop 1064 * because of the arguments passed to us. 1065 */ 1066 if (proc_gone == 0) { 1067 cmn_err(CE_PANIC, 1068 "waitid: wrong state on the" 1069 " p_child list"); 1070 } else { 1071 break; 1072 } 1073 } 1074 1075 found++; 1076 1077 if (idtype == P_PID) 1078 break; 1079 } 1080 1081 /* 1082 * If we found no interesting processes at all, 1083 * break out and return ECHILD. 1084 */ 1085 if (found + proc_gone == 0) 1086 break; 1087 1088 if (options & WNOHANG) { 1089 mutex_exit(&pidlock); 1090 bzero(ip, sizeof (k_siginfo_t)); 1091 /* 1092 * We should set ip->si_signo = SIGCLD, 1093 * but there is an SVVS test that expects 1094 * ip->si_signo to be zero in this case. 1095 */ 1096 return (0); 1097 } 1098 1099 /* 1100 * If we found no processes of interest that could 1101 * change state while we wait, we don't wait at all. 1102 * Get out with ECHILD according to SVID. 1103 */ 1104 if (found == proc_gone) 1105 break; 1106 1107 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1108 mutex_exit(&pidlock); 1109 return (EINTR); 1110 } 1111 } 1112 mutex_exit(&pidlock); 1113 return (ECHILD); 1114 } 1115 1116 /* 1117 * The wait() system call trap is no longer invoked by libc. 1118 * It is retained only for the benefit of statically linked applications. 1119 * Delete this when we no longer care about these old and broken applications. 1120 */ 1121 int64_t 1122 wait(void) 1123 { 1124 int error; 1125 k_siginfo_t info; 1126 rval_t r; 1127 1128 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1129 return (set_errno(error)); 1130 r.r_val1 = info.si_pid; 1131 r.r_val2 = wstat(info.si_code, info.si_status); 1132 return (r.r_vals); 1133 } 1134 1135 int 1136 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1137 { 1138 int error; 1139 k_siginfo_t info; 1140 1141 if (error = waitid(idtype, id, &info, options)) 1142 return (set_errno(error)); 1143 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1144 return (set_errno(EFAULT)); 1145 return (0); 1146 } 1147 1148 #ifdef _SYSCALL32_IMPL 1149 1150 int 1151 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1152 { 1153 int error; 1154 k_siginfo_t info; 1155 siginfo32_t info32; 1156 1157 if (error = waitid(idtype, id, &info, options)) 1158 return (set_errno(error)); 1159 siginfo_kto32(&info, &info32); 1160 if (copyout(&info32, infop, sizeof (info32))) 1161 return (set_errno(EFAULT)); 1162 return (0); 1163 } 1164 1165 #endif /* _SYSCALL32_IMPL */ 1166 1167 void 1168 proc_detach(proc_t *p) 1169 { 1170 proc_t *q; 1171 1172 ASSERT(MUTEX_HELD(&pidlock)); 1173 1174 q = p->p_parent; 1175 ASSERT(q != NULL); 1176 1177 /* 1178 * Take it off the newstate list of its parent 1179 */ 1180 delete_ns(q, p); 1181 1182 if (q->p_child == p) { 1183 q->p_child = p->p_sibling; 1184 /* 1185 * If the parent has no children, it better not 1186 * have any with new states either! 1187 */ 1188 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1189 } 1190 1191 if (p->p_sibling) { 1192 p->p_sibling->p_psibling = p->p_psibling; 1193 } 1194 1195 if (p->p_psibling) { 1196 p->p_psibling->p_sibling = p->p_sibling; 1197 } 1198 } 1199 1200 /* 1201 * Remove zombie children from the process table. 1202 */ 1203 void 1204 freeproc(proc_t *p) 1205 { 1206 proc_t *q; 1207 1208 ASSERT(p->p_stat == SZOMB); 1209 ASSERT(p->p_tlist == NULL); 1210 ASSERT(MUTEX_HELD(&pidlock)); 1211 1212 sigdelq(p, NULL, 0); 1213 if (p->p_killsqp) { 1214 siginfofree(p->p_killsqp); 1215 p->p_killsqp = NULL; 1216 } 1217 1218 prfree(p); /* inform /proc */ 1219 1220 /* 1221 * Don't free the init processes. 1222 * Other dying processes will access it. 1223 */ 1224 if (p == proc_init) 1225 return; 1226 1227 1228 /* 1229 * We wait until now to free the cred structure because a 1230 * zombie process's credentials may be examined by /proc. 1231 * No cred locking needed because there are no threads at this point. 1232 */ 1233 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1234 crfree(p->p_cred); 1235 if (p->p_corefile != NULL) { 1236 corectl_path_rele(p->p_corefile); 1237 p->p_corefile = NULL; 1238 } 1239 if (p->p_content != NULL) { 1240 corectl_content_rele(p->p_content); 1241 p->p_content = NULL; 1242 } 1243 1244 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1245 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1246 /* 1247 * This should still do the right thing since p_utime/stime 1248 * get set to the correct value on process exit, so it 1249 * should get properly updated 1250 */ 1251 p->p_nextofkin->p_cutime += p->p_utime; 1252 p->p_nextofkin->p_cstime += p->p_stime; 1253 1254 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1255 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1256 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1257 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1258 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1259 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1260 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1261 += p->p_acct[LMS_USER_LOCK]; 1262 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1263 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1264 += p->p_acct[LMS_WAIT_CPU]; 1265 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1266 1267 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1268 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1269 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1270 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1271 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1272 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1273 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1274 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1275 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1276 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1277 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1278 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1279 1280 } 1281 1282 q = p->p_nextofkin; 1283 if (q && q->p_orphan == p) 1284 q->p_orphan = p->p_nextorph; 1285 else if (q) { 1286 for (q = q->p_orphan; q; q = q->p_nextorph) 1287 if (q->p_nextorph == p) 1288 break; 1289 ASSERT(q && q->p_nextorph == p); 1290 q->p_nextorph = p->p_nextorph; 1291 } 1292 1293 proc_detach(p); 1294 pid_exit(p); /* frees pid and proc structure */ 1295 } 1296 1297 /* 1298 * Delete process "child" from the newstate list of process "parent" 1299 */ 1300 void 1301 delete_ns(proc_t *parent, proc_t *child) 1302 { 1303 proc_t **ns; 1304 1305 ASSERT(MUTEX_HELD(&pidlock)); 1306 ASSERT(child->p_parent == parent); 1307 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1308 if (*ns == child) { 1309 1310 ASSERT((*ns)->p_parent == parent); 1311 1312 *ns = child->p_sibling_ns; 1313 child->p_sibling_ns = NULL; 1314 return; 1315 } 1316 } 1317 } 1318 1319 /* 1320 * Add process "child" to the new state list of process "parent" 1321 */ 1322 void 1323 add_ns(proc_t *parent, proc_t *child) 1324 { 1325 ASSERT(child->p_sibling_ns == NULL); 1326 child->p_sibling_ns = parent->p_child_ns; 1327 parent->p_child_ns = child; 1328 } 1329