1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 DTRACE_PROC(lwp__exit); 360 DTRACE_PROC1(exit, int, why); 361 362 /* 363 * Will perform any brand specific proc exit processing, since this 364 * is always the last lwp, will also perform lwp_exit and free brand 365 * data 366 */ 367 if (PROC_IS_BRANDED(p)) 368 BROP(p)->b_proc_exit(p, lwp); 369 370 /* 371 * Don't let init exit unless zone_start_init() failed its exec, or 372 * we are shutting down the zone or the machine. 373 * 374 * Since we are single threaded, we don't need to lock the 375 * following accesses to zone_proc_initpid. 376 */ 377 if (p->p_pid == z->zone_proc_initpid) { 378 if (z->zone_boot_err == 0 && 379 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 380 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 381 z->zone_restart_init == B_TRUE && 382 restart_init(what, why) == 0) 383 return (0); 384 /* 385 * Since we didn't or couldn't restart init, we clear 386 * the zone's init state and proceed with exit 387 * processing. 388 */ 389 z->zone_proc_initpid = -1; 390 } 391 392 /* 393 * Allocate a sigqueue now, before we grab locks. 394 * It will be given to sigcld(), below. 395 * Special case: If we will be making the process disappear 396 * without a trace (for the benefit of posix_spawn() in libc) 397 * don't bother to allocate a useless sigqueue. 398 */ 399 evaporate = ((p->p_flag & SVFORK) && 400 why == CLD_EXITED && what == _EVAPORATE); 401 if (!evaporate) 402 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 403 404 /* 405 * revoke any doors created by the process. 406 */ 407 if (p->p_door_list) 408 door_exit(); 409 410 /* 411 * Release schedctl data structures. 412 */ 413 if (p->p_pagep) 414 schedctl_proc_cleanup(); 415 416 /* 417 * make sure all pending kaio has completed. 418 */ 419 if (p->p_aio) 420 aio_cleanup_exit(); 421 422 /* 423 * discard the lwpchan cache. 424 */ 425 if (p->p_lcp != NULL) 426 lwpchan_destroy_cache(0); 427 428 /* 429 * Clean up any DTrace helper actions or probes for the process. 430 */ 431 if (p->p_dtrace_helpers != NULL) { 432 ASSERT(dtrace_helpers_cleanup != NULL); 433 (*dtrace_helpers_cleanup)(); 434 } 435 436 /* untimeout the realtime timers */ 437 if (p->p_itimer != NULL) 438 timer_exit(); 439 440 if ((tmp_id = p->p_alarmid) != 0) { 441 p->p_alarmid = 0; 442 (void) untimeout(tmp_id); 443 } 444 445 /* 446 * Remove any fpollinfo_t's for this (last) thread from our file 447 * descriptors so closeall() can ASSERT() that they're all gone. 448 */ 449 pollcleanup(); 450 451 if (p->p_rprof_cyclic != CYCLIC_NONE) { 452 mutex_enter(&cpu_lock); 453 cyclic_remove(p->p_rprof_cyclic); 454 mutex_exit(&cpu_lock); 455 } 456 457 mutex_enter(&p->p_lock); 458 459 /* 460 * Clean up any DTrace probes associated with this process. 461 */ 462 if (p->p_dtrace_probes) { 463 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 464 dtrace_fasttrap_exit_ptr(p); 465 } 466 467 while ((tmp_id = p->p_itimerid) != 0) { 468 p->p_itimerid = 0; 469 mutex_exit(&p->p_lock); 470 (void) untimeout(tmp_id); 471 mutex_enter(&p->p_lock); 472 } 473 474 lwp_cleanup(); 475 476 /* 477 * We are about to exit; prevent our resource associations from 478 * being changed. 479 */ 480 pool_barrier_enter(); 481 482 /* 483 * Block the process against /proc now that we have really 484 * acquired p->p_lock (to manipulate p_tlist at least). 485 */ 486 prbarrier(p); 487 488 #ifdef SUN_SRC_COMPAT 489 if (code == CLD_KILLED) 490 u.u_acflag |= AXSIG; 491 #endif 492 sigfillset(&p->p_ignore); 493 sigemptyset(&p->p_siginfo); 494 sigemptyset(&p->p_sig); 495 sigemptyset(&p->p_extsig); 496 sigemptyset(&t->t_sig); 497 sigemptyset(&t->t_extsig); 498 sigemptyset(&p->p_sigmask); 499 sigdelq(p, t, 0); 500 lwp->lwp_cursig = 0; 501 lwp->lwp_extsig = 0; 502 p->p_flag &= ~(SKILLED | SEXTKILLED); 503 if (lwp->lwp_curinfo) { 504 siginfofree(lwp->lwp_curinfo); 505 lwp->lwp_curinfo = NULL; 506 } 507 508 t->t_proc_flag |= TP_LWPEXIT; 509 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 510 prlwpexit(t); /* notify /proc */ 511 lwp_hash_out(p, t->t_tid); 512 prexit(p); 513 514 p->p_lwpcnt = 0; 515 p->p_tlist = NULL; 516 sigqfree(p); 517 term_mstate(t); 518 p->p_mterm = gethrtime(); 519 520 exec_vp = p->p_exec; 521 execdir_vp = p->p_execdir; 522 p->p_exec = NULLVP; 523 p->p_execdir = NULLVP; 524 mutex_exit(&p->p_lock); 525 if (exec_vp) 526 VN_RELE(exec_vp); 527 if (execdir_vp) 528 VN_RELE(execdir_vp); 529 530 pr_free_watched_pages(p); 531 532 closeall(P_FINFO(p)); 533 534 /* Free the controlling tty. (freectty() always assumes curproc.) */ 535 ASSERT(p == curproc); 536 (void) freectty(B_TRUE); 537 538 #if defined(__sparc) 539 if (p->p_utraps != NULL) 540 utrap_free(p); 541 #endif 542 if (p->p_semacct) /* IPC semaphore exit */ 543 semexit(p); 544 rv = wstat(why, what); 545 546 acct(rv & 0xff); 547 exacct_commit_proc(p, rv); 548 549 /* 550 * Release any resources associated with C2 auditing 551 */ 552 if (audit_active) { 553 /* 554 * audit exit system call 555 */ 556 audit_exit(why, what); 557 } 558 559 /* 560 * Free address space. 561 */ 562 relvm(); 563 564 /* 565 * Release held contracts. 566 */ 567 contract_exit(p); 568 569 /* 570 * Depart our encapsulating process contract. 571 */ 572 if ((p->p_flag & SSYS) == 0) { 573 ASSERT(p->p_ct_process); 574 contract_process_exit(p->p_ct_process, p, rv); 575 } 576 577 /* 578 * Remove pool association, and block if requested by pool_do_bind. 579 */ 580 mutex_enter(&p->p_lock); 581 ASSERT(p->p_pool->pool_ref > 0); 582 atomic_add_32(&p->p_pool->pool_ref, -1); 583 p->p_pool = pool_default; 584 /* 585 * Now that our address space has been freed and all other threads 586 * in this process have exited, set the PEXITED pool flag. This 587 * tells the pools subsystems to ignore this process if it was 588 * requested to rebind this process to a new pool. 589 */ 590 p->p_poolflag |= PEXITED; 591 pool_barrier_exit(); 592 mutex_exit(&p->p_lock); 593 594 mutex_enter(&pidlock); 595 596 /* 597 * Delete this process from the newstate list of its parent. We 598 * will put it in the right place in the sigcld in the end. 599 */ 600 delete_ns(p->p_parent, p); 601 602 /* 603 * Reassign the orphans to the next of kin. 604 * Don't rearrange init's orphanage. 605 */ 606 if ((q = p->p_orphan) != NULL && p != proc_init) { 607 608 proc_t *nokp = p->p_nextofkin; 609 610 for (;;) { 611 q->p_nextofkin = nokp; 612 if (q->p_nextorph == NULL) 613 break; 614 q = q->p_nextorph; 615 } 616 q->p_nextorph = nokp->p_orphan; 617 nokp->p_orphan = p->p_orphan; 618 p->p_orphan = NULL; 619 } 620 621 /* 622 * Reassign the children to init. 623 * Don't try to assign init's children to init. 624 */ 625 if ((q = p->p_child) != NULL && p != proc_init) { 626 struct proc *np; 627 struct proc *initp = proc_init; 628 boolean_t setzonetop = B_FALSE; 629 630 if (!INGLOBALZONE(curproc)) 631 setzonetop = B_TRUE; 632 633 pgdetach(p); 634 635 do { 636 np = q->p_sibling; 637 /* 638 * Delete it from its current parent new state 639 * list and add it to init new state list 640 */ 641 delete_ns(q->p_parent, q); 642 643 q->p_ppid = 1; 644 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 645 if (setzonetop) { 646 mutex_enter(&q->p_lock); 647 q->p_flag |= SZONETOP; 648 mutex_exit(&q->p_lock); 649 } 650 q->p_parent = initp; 651 652 /* 653 * Since q will be the first child, 654 * it will not have a previous sibling. 655 */ 656 q->p_psibling = NULL; 657 if (initp->p_child) { 658 initp->p_child->p_psibling = q; 659 } 660 q->p_sibling = initp->p_child; 661 initp->p_child = q; 662 if (q->p_proc_flag & P_PR_PTRACE) { 663 mutex_enter(&q->p_lock); 664 sigtoproc(q, NULL, SIGKILL); 665 mutex_exit(&q->p_lock); 666 } 667 /* 668 * sigcld() will add the child to parents 669 * newstate list. 670 */ 671 if (q->p_stat == SZOMB) 672 sigcld(q, NULL); 673 } while ((q = np) != NULL); 674 675 p->p_child = NULL; 676 ASSERT(p->p_child_ns == NULL); 677 } 678 679 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 680 681 mutex_enter(&p->p_lock); 682 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 683 684 /* 685 * Have our task accummulate our resource usage data before they 686 * become contaminated by p_cacct etc., and before we renounce 687 * membership of the task. 688 * 689 * We do this regardless of whether or not task accounting is active. 690 * This is to avoid having nonsense data reported for this task if 691 * task accounting is subsequently enabled. The overhead is minimal; 692 * by this point, this process has accounted for the usage of all its 693 * LWPs. We nonetheless do the work here, and under the protection of 694 * pidlock, so that the movement of the process's usage to the task 695 * happens at the same time as the removal of the process from the 696 * task, from the point of view of exacct_snapshot_task_usage(). 697 */ 698 exacct_update_task_mstate(p); 699 700 hrutime = mstate_aggr_state(p, LMS_USER); 701 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 702 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 703 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 704 705 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 706 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 707 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 708 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 709 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 710 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 711 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 712 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 713 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 714 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 715 716 p->p_ru.minflt += p->p_cru.minflt; 717 p->p_ru.majflt += p->p_cru.majflt; 718 p->p_ru.nswap += p->p_cru.nswap; 719 p->p_ru.inblock += p->p_cru.inblock; 720 p->p_ru.oublock += p->p_cru.oublock; 721 p->p_ru.msgsnd += p->p_cru.msgsnd; 722 p->p_ru.msgrcv += p->p_cru.msgrcv; 723 p->p_ru.nsignals += p->p_cru.nsignals; 724 p->p_ru.nvcsw += p->p_cru.nvcsw; 725 p->p_ru.nivcsw += p->p_cru.nivcsw; 726 p->p_ru.sysc += p->p_cru.sysc; 727 p->p_ru.ioch += p->p_cru.ioch; 728 729 p->p_stat = SZOMB; 730 p->p_proc_flag &= ~P_PR_PTRACE; 731 p->p_wdata = what; 732 p->p_wcode = (char)why; 733 734 cdir = PTOU(p)->u_cdir; 735 rdir = PTOU(p)->u_rdir; 736 cwd = PTOU(p)->u_cwd; 737 738 /* 739 * Release resource controls, as they are no longer enforceable. 740 */ 741 rctl_set_free(p->p_rctls); 742 743 /* 744 * Give up task and project memberships. Decrement tk_nlwps counter 745 * for our task.max-lwps resource control. An extended accounting 746 * record, if that facility is active, is scheduled to be written. 747 * Zombie processes are false members of task0 for the remainder of 748 * their lifetime; no accounting information is recorded for them. 749 */ 750 tk = p->p_task; 751 752 mutex_enter(&p->p_zone->zone_nlwps_lock); 753 tk->tk_nlwps--; 754 tk->tk_proj->kpj_nlwps--; 755 p->p_zone->zone_nlwps--; 756 mutex_exit(&p->p_zone->zone_nlwps_lock); 757 task_detach(p); 758 p->p_task = task0p; 759 760 /* 761 * Clear the lwp directory and the lwpid hash table 762 * now that /proc can't bother us any more. 763 * We free the memory below, after dropping p->p_lock. 764 */ 765 lwpdir = p->p_lwpdir; 766 lwpdir_sz = p->p_lwpdir_sz; 767 tidhash = p->p_tidhash; 768 tidhash_sz = p->p_tidhash_sz; 769 p->p_lwpdir = NULL; 770 p->p_lwpfree = NULL; 771 p->p_lwpdir_sz = 0; 772 p->p_tidhash = NULL; 773 p->p_tidhash_sz = 0; 774 775 /* 776 * If the process has context ops installed, call the exit routine 777 * on behalf of this last remaining thread. Normally exitpctx() is 778 * called during thread_exit() or lwp_exit(), but because this is the 779 * last thread in the process, we must call it here. By the time 780 * thread_exit() is called (below), the association with the relevant 781 * process has been lost. 782 * 783 * We also free the context here. 784 */ 785 if (p->p_pctx) { 786 kpreempt_disable(); 787 exitpctx(p); 788 kpreempt_enable(); 789 790 freepctx(p, 0); 791 } 792 793 /* 794 * curthread's proc pointer is changed to point to the 'sched' 795 * process for the corresponding zone, except in the case when 796 * the exiting process is in fact a zsched instance, in which 797 * case the proc pointer is set to p0. We do so, so that the 798 * process still points at the right zone when we call the VN_RELE() 799 * below. 800 * 801 * This is because curthread's original proc pointer can be freed as 802 * soon as the child sends a SIGCLD to its parent. We use zsched so 803 * that for user processes, even in the final moments of death, the 804 * process is still associated with its zone. 805 */ 806 if (p != t->t_procp->p_zone->zone_zsched) 807 t->t_procp = t->t_procp->p_zone->zone_zsched; 808 else 809 t->t_procp = &p0; 810 811 mutex_exit(&p->p_lock); 812 if (!evaporate) { 813 p->p_pidflag &= ~CLDPEND; 814 sigcld(p, sqp); 815 } else { 816 /* 817 * Do what sigcld() would do if the disposition 818 * of the SIGCHLD signal were set to be ignored. 819 */ 820 cv_broadcast(&p->p_srwchan_cv); 821 freeproc(p); 822 } 823 mutex_exit(&pidlock); 824 825 /* 826 * We don't release u_cdir and u_rdir until SZOMB is set. 827 * This protects us against dofusers(). 828 */ 829 VN_RELE(cdir); 830 if (rdir) 831 VN_RELE(rdir); 832 if (cwd) 833 refstr_rele(cwd); 834 835 /* 836 * task_rele() may ultimately cause the zone to go away (or 837 * may cause the last user process in a zone to go away, which 838 * signals zsched to go away). So prior to this call, we must 839 * no longer point at zsched. 840 */ 841 t->t_procp = &p0; 842 task_rele(tk); 843 844 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 845 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 846 847 lwp_pcb_exit(); 848 849 thread_exit(); 850 /* NOTREACHED */ 851 } 852 853 /* 854 * Format siginfo structure for wait system calls. 855 */ 856 void 857 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 858 { 859 ASSERT(MUTEX_HELD(&pidlock)); 860 861 bzero(ip, sizeof (k_siginfo_t)); 862 ip->si_signo = SIGCLD; 863 ip->si_code = pp->p_wcode; 864 ip->si_pid = pp->p_pid; 865 ip->si_ctid = PRCTID(pp); 866 ip->si_zoneid = pp->p_zone->zone_id; 867 ip->si_status = pp->p_wdata; 868 ip->si_stime = pp->p_stime; 869 ip->si_utime = pp->p_utime; 870 871 if (waitflag) { 872 pp->p_wcode = 0; 873 pp->p_wdata = 0; 874 pp->p_pidflag &= ~CLDPEND; 875 } 876 } 877 878 /* 879 * Wait system call. 880 * Search for a terminated (zombie) child, 881 * finally lay it to rest, and collect its status. 882 * Look also for stopped children, 883 * and pass back status from them. 884 */ 885 int 886 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 887 { 888 int found; 889 proc_t *cp, *pp; 890 int proc_gone; 891 int waitflag = !(options & WNOWAIT); 892 893 /* 894 * Obsolete flag, defined here only for binary compatibility 895 * with old statically linked executables. Delete this when 896 * we no longer care about these old and broken applications. 897 */ 898 #define _WNOCHLD 0400 899 options &= ~_WNOCHLD; 900 901 if (options == 0 || (options & ~WOPTMASK)) 902 return (EINVAL); 903 904 switch (idtype) { 905 case P_PID: 906 case P_PGID: 907 if (id < 0 || id >= maxpid) 908 return (EINVAL); 909 /* FALLTHROUGH */ 910 case P_ALL: 911 break; 912 default: 913 return (EINVAL); 914 } 915 916 pp = ttoproc(curthread); 917 918 /* 919 * lock parent mutex so that sibling chain can be searched. 920 */ 921 mutex_enter(&pidlock); 922 923 /* 924 * if we are only looking for exited processes and child_ns list 925 * is empty no reason to look at all children. 926 */ 927 if (idtype == P_ALL && 928 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 929 pp->p_child_ns == NULL) { 930 if (pp->p_child) { 931 mutex_exit(&pidlock); 932 bzero(ip, sizeof (k_siginfo_t)); 933 return (0); 934 } 935 mutex_exit(&pidlock); 936 return (ECHILD); 937 } 938 939 while (pp->p_child != NULL) { 940 941 proc_gone = 0; 942 943 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 944 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 945 continue; 946 if (idtype == P_PID && id != cp->p_pid) 947 continue; 948 if (idtype == P_PGID && id != cp->p_pgrp) 949 continue; 950 951 switch (cp->p_wcode) { 952 953 case CLD_TRAPPED: 954 case CLD_STOPPED: 955 case CLD_CONTINUED: 956 cmn_err(CE_PANIC, 957 "waitid: wrong state %d on the p_newstate" 958 " list", cp->p_wcode); 959 break; 960 961 case CLD_EXITED: 962 case CLD_DUMPED: 963 case CLD_KILLED: 964 if (!(options & WEXITED)) { 965 /* 966 * Count how many are already gone 967 * for good. 968 */ 969 proc_gone++; 970 break; 971 } 972 if (!waitflag) { 973 winfo(cp, ip, 0); 974 } else { 975 winfo(cp, ip, 1); 976 freeproc(cp); 977 } 978 mutex_exit(&pidlock); 979 if (waitflag) { /* accept SIGCLD */ 980 sigcld_delete(ip); 981 sigcld_repost(); 982 } 983 return (0); 984 } 985 986 if (idtype == P_PID) 987 break; 988 } 989 990 /* 991 * Wow! None of the threads on the p_sibling_ns list were 992 * interesting threads. Check all the kids! 993 */ 994 found = 0; 995 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 996 if (idtype == P_PID && id != cp->p_pid) 997 continue; 998 if (idtype == P_PGID && id != cp->p_pgrp) 999 continue; 1000 1001 switch (cp->p_wcode) { 1002 case CLD_TRAPPED: 1003 if (!(options & WTRAPPED)) 1004 break; 1005 winfo(cp, ip, waitflag); 1006 mutex_exit(&pidlock); 1007 if (waitflag) { /* accept SIGCLD */ 1008 sigcld_delete(ip); 1009 sigcld_repost(); 1010 } 1011 return (0); 1012 1013 case CLD_STOPPED: 1014 if (!(options & WSTOPPED)) 1015 break; 1016 /* Is it still stopped? */ 1017 mutex_enter(&cp->p_lock); 1018 if (!jobstopped(cp)) { 1019 mutex_exit(&cp->p_lock); 1020 break; 1021 } 1022 mutex_exit(&cp->p_lock); 1023 winfo(cp, ip, waitflag); 1024 mutex_exit(&pidlock); 1025 if (waitflag) { /* accept SIGCLD */ 1026 sigcld_delete(ip); 1027 sigcld_repost(); 1028 } 1029 return (0); 1030 1031 case CLD_CONTINUED: 1032 if (!(options & WCONTINUED)) 1033 break; 1034 winfo(cp, ip, waitflag); 1035 mutex_exit(&pidlock); 1036 if (waitflag) { /* accept SIGCLD */ 1037 sigcld_delete(ip); 1038 sigcld_repost(); 1039 } 1040 return (0); 1041 1042 case CLD_EXITED: 1043 case CLD_DUMPED: 1044 case CLD_KILLED: 1045 if (idtype != P_PID && 1046 (cp->p_pidflag & CLDWAITPID)) 1047 continue; 1048 /* 1049 * Don't complain if a process was found in 1050 * the first loop but we broke out of the loop 1051 * because of the arguments passed to us. 1052 */ 1053 if (proc_gone == 0) { 1054 cmn_err(CE_PANIC, 1055 "waitid: wrong state on the" 1056 " p_child list"); 1057 } else { 1058 break; 1059 } 1060 } 1061 1062 found++; 1063 1064 if (idtype == P_PID) 1065 break; 1066 } 1067 1068 /* 1069 * If we found no interesting processes at all, 1070 * break out and return ECHILD. 1071 */ 1072 if (found + proc_gone == 0) 1073 break; 1074 1075 if (options & WNOHANG) { 1076 mutex_exit(&pidlock); 1077 bzero(ip, sizeof (k_siginfo_t)); 1078 /* 1079 * We should set ip->si_signo = SIGCLD, 1080 * but there is an SVVS test that expects 1081 * ip->si_signo to be zero in this case. 1082 */ 1083 return (0); 1084 } 1085 1086 /* 1087 * If we found no processes of interest that could 1088 * change state while we wait, we don't wait at all. 1089 * Get out with ECHILD according to SVID. 1090 */ 1091 if (found == proc_gone) 1092 break; 1093 1094 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1095 mutex_exit(&pidlock); 1096 return (EINTR); 1097 } 1098 } 1099 mutex_exit(&pidlock); 1100 return (ECHILD); 1101 } 1102 1103 /* 1104 * The wait() system call trap is no longer invoked by libc. 1105 * It is retained only for the benefit of statically linked applications. 1106 * Delete this when we no longer care about these old and broken applications. 1107 */ 1108 int64_t 1109 wait(void) 1110 { 1111 int error; 1112 k_siginfo_t info; 1113 rval_t r; 1114 1115 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1116 return (set_errno(error)); 1117 r.r_val1 = info.si_pid; 1118 r.r_val2 = wstat(info.si_code, info.si_status); 1119 return (r.r_vals); 1120 } 1121 1122 int 1123 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1124 { 1125 int error; 1126 k_siginfo_t info; 1127 1128 if (error = waitid(idtype, id, &info, options)) 1129 return (set_errno(error)); 1130 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1131 return (set_errno(EFAULT)); 1132 return (0); 1133 } 1134 1135 #ifdef _SYSCALL32_IMPL 1136 1137 int 1138 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1139 { 1140 int error; 1141 k_siginfo_t info; 1142 siginfo32_t info32; 1143 1144 if (error = waitid(idtype, id, &info, options)) 1145 return (set_errno(error)); 1146 siginfo_kto32(&info, &info32); 1147 if (copyout(&info32, infop, sizeof (info32))) 1148 return (set_errno(EFAULT)); 1149 return (0); 1150 } 1151 1152 #endif /* _SYSCALL32_IMPL */ 1153 1154 void 1155 proc_detach(proc_t *p) 1156 { 1157 proc_t *q; 1158 1159 ASSERT(MUTEX_HELD(&pidlock)); 1160 1161 q = p->p_parent; 1162 ASSERT(q != NULL); 1163 1164 /* 1165 * Take it off the newstate list of its parent 1166 */ 1167 delete_ns(q, p); 1168 1169 if (q->p_child == p) { 1170 q->p_child = p->p_sibling; 1171 /* 1172 * If the parent has no children, it better not 1173 * have any with new states either! 1174 */ 1175 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1176 } 1177 1178 if (p->p_sibling) { 1179 p->p_sibling->p_psibling = p->p_psibling; 1180 } 1181 1182 if (p->p_psibling) { 1183 p->p_psibling->p_sibling = p->p_sibling; 1184 } 1185 } 1186 1187 /* 1188 * Remove zombie children from the process table. 1189 */ 1190 void 1191 freeproc(proc_t *p) 1192 { 1193 proc_t *q; 1194 1195 ASSERT(p->p_stat == SZOMB); 1196 ASSERT(p->p_tlist == NULL); 1197 ASSERT(MUTEX_HELD(&pidlock)); 1198 1199 sigdelq(p, NULL, 0); 1200 if (p->p_killsqp) { 1201 siginfofree(p->p_killsqp); 1202 p->p_killsqp = NULL; 1203 } 1204 1205 prfree(p); /* inform /proc */ 1206 1207 /* 1208 * Don't free the init processes. 1209 * Other dying processes will access it. 1210 */ 1211 if (p == proc_init) 1212 return; 1213 1214 1215 /* 1216 * We wait until now to free the cred structure because a 1217 * zombie process's credentials may be examined by /proc. 1218 * No cred locking needed because there are no threads at this point. 1219 */ 1220 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1221 crfree(p->p_cred); 1222 if (p->p_corefile != NULL) { 1223 corectl_path_rele(p->p_corefile); 1224 p->p_corefile = NULL; 1225 } 1226 if (p->p_content != NULL) { 1227 corectl_content_rele(p->p_content); 1228 p->p_content = NULL; 1229 } 1230 1231 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1232 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1233 /* 1234 * This should still do the right thing since p_utime/stime 1235 * get set to the correct value on process exit, so it 1236 * should get properly updated 1237 */ 1238 p->p_nextofkin->p_cutime += p->p_utime; 1239 p->p_nextofkin->p_cstime += p->p_stime; 1240 1241 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1242 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1243 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1244 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1245 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1246 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1247 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1248 += p->p_acct[LMS_USER_LOCK]; 1249 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1250 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1251 += p->p_acct[LMS_WAIT_CPU]; 1252 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1253 1254 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1255 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1256 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1257 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1258 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1259 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1260 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1261 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1262 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1263 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1264 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1265 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1266 1267 } 1268 1269 q = p->p_nextofkin; 1270 if (q && q->p_orphan == p) 1271 q->p_orphan = p->p_nextorph; 1272 else if (q) { 1273 for (q = q->p_orphan; q; q = q->p_nextorph) 1274 if (q->p_nextorph == p) 1275 break; 1276 ASSERT(q && q->p_nextorph == p); 1277 q->p_nextorph = p->p_nextorph; 1278 } 1279 1280 proc_detach(p); 1281 pid_exit(p); /* frees pid and proc structure */ 1282 } 1283 1284 /* 1285 * Delete process "child" from the newstate list of process "parent" 1286 */ 1287 void 1288 delete_ns(proc_t *parent, proc_t *child) 1289 { 1290 proc_t **ns; 1291 1292 ASSERT(MUTEX_HELD(&pidlock)); 1293 ASSERT(child->p_parent == parent); 1294 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1295 if (*ns == child) { 1296 1297 ASSERT((*ns)->p_parent == parent); 1298 1299 *ns = child->p_sibling_ns; 1300 child->p_sibling_ns = NULL; 1301 return; 1302 } 1303 } 1304 } 1305 1306 /* 1307 * Add process "child" to the new state list of process "parent" 1308 */ 1309 void 1310 add_ns(proc_t *parent, proc_t *child) 1311 { 1312 ASSERT(child->p_sibling_ns == NULL); 1313 child->p_sibling_ns = parent->p_child_ns; 1314 parent->p_child_ns = child; 1315 } 1316