1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 mutex_enter(&p->p_lock); 360 if (p->p_ttime > 0) { 361 /* 362 * Account any remaining ticks charged to this process 363 * on its way out. 364 */ 365 (void) task_cpu_time_incr(p->p_task, p->p_ttime); 366 p->p_ttime = 0; 367 } 368 mutex_exit(&p->p_lock); 369 370 DTRACE_PROC(lwp__exit); 371 DTRACE_PROC1(exit, int, why); 372 373 /* 374 * Will perform any brand specific proc exit processing, since this 375 * is always the last lwp, will also perform lwp_exit and free brand 376 * data 377 */ 378 if (PROC_IS_BRANDED(p)) 379 BROP(p)->b_proc_exit(p, lwp); 380 381 /* 382 * Don't let init exit unless zone_start_init() failed its exec, or 383 * we are shutting down the zone or the machine. 384 * 385 * Since we are single threaded, we don't need to lock the 386 * following accesses to zone_proc_initpid. 387 */ 388 if (p->p_pid == z->zone_proc_initpid) { 389 if (z->zone_boot_err == 0 && 390 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 391 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 392 z->zone_restart_init == B_TRUE && 393 restart_init(what, why) == 0) 394 return (0); 395 /* 396 * Since we didn't or couldn't restart init, we clear 397 * the zone's init state and proceed with exit 398 * processing. 399 */ 400 z->zone_proc_initpid = -1; 401 } 402 403 /* 404 * Allocate a sigqueue now, before we grab locks. 405 * It will be given to sigcld(), below. 406 * Special case: If we will be making the process disappear 407 * without a trace (for the benefit of posix_spawn() in libc) 408 * don't bother to allocate a useless sigqueue. 409 */ 410 evaporate = ((p->p_flag & SVFORK) && 411 why == CLD_EXITED && what == _EVAPORATE); 412 if (!evaporate) 413 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 414 415 /* 416 * revoke any doors created by the process. 417 */ 418 if (p->p_door_list) 419 door_exit(); 420 421 /* 422 * Release schedctl data structures. 423 */ 424 if (p->p_pagep) 425 schedctl_proc_cleanup(); 426 427 /* 428 * make sure all pending kaio has completed. 429 */ 430 if (p->p_aio) 431 aio_cleanup_exit(); 432 433 /* 434 * discard the lwpchan cache. 435 */ 436 if (p->p_lcp != NULL) 437 lwpchan_destroy_cache(0); 438 439 /* 440 * Clean up any DTrace helper actions or probes for the process. 441 */ 442 if (p->p_dtrace_helpers != NULL) { 443 ASSERT(dtrace_helpers_cleanup != NULL); 444 (*dtrace_helpers_cleanup)(); 445 } 446 447 /* untimeout the realtime timers */ 448 if (p->p_itimer != NULL) 449 timer_exit(); 450 451 if ((tmp_id = p->p_alarmid) != 0) { 452 p->p_alarmid = 0; 453 (void) untimeout(tmp_id); 454 } 455 456 /* 457 * Remove any fpollinfo_t's for this (last) thread from our file 458 * descriptors so closeall() can ASSERT() that they're all gone. 459 */ 460 pollcleanup(); 461 462 if (p->p_rprof_cyclic != CYCLIC_NONE) { 463 mutex_enter(&cpu_lock); 464 cyclic_remove(p->p_rprof_cyclic); 465 mutex_exit(&cpu_lock); 466 } 467 468 mutex_enter(&p->p_lock); 469 470 /* 471 * Clean up any DTrace probes associated with this process. 472 */ 473 if (p->p_dtrace_probes) { 474 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 475 dtrace_fasttrap_exit_ptr(p); 476 } 477 478 while ((tmp_id = p->p_itimerid) != 0) { 479 p->p_itimerid = 0; 480 mutex_exit(&p->p_lock); 481 (void) untimeout(tmp_id); 482 mutex_enter(&p->p_lock); 483 } 484 485 lwp_cleanup(); 486 487 /* 488 * We are about to exit; prevent our resource associations from 489 * being changed. 490 */ 491 pool_barrier_enter(); 492 493 /* 494 * Block the process against /proc now that we have really 495 * acquired p->p_lock (to manipulate p_tlist at least). 496 */ 497 prbarrier(p); 498 499 #ifdef SUN_SRC_COMPAT 500 if (code == CLD_KILLED) 501 u.u_acflag |= AXSIG; 502 #endif 503 sigfillset(&p->p_ignore); 504 sigemptyset(&p->p_siginfo); 505 sigemptyset(&p->p_sig); 506 sigemptyset(&p->p_extsig); 507 sigemptyset(&t->t_sig); 508 sigemptyset(&t->t_extsig); 509 sigemptyset(&p->p_sigmask); 510 sigdelq(p, t, 0); 511 lwp->lwp_cursig = 0; 512 lwp->lwp_extsig = 0; 513 p->p_flag &= ~(SKILLED | SEXTKILLED); 514 if (lwp->lwp_curinfo) { 515 siginfofree(lwp->lwp_curinfo); 516 lwp->lwp_curinfo = NULL; 517 } 518 519 t->t_proc_flag |= TP_LWPEXIT; 520 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 521 prlwpexit(t); /* notify /proc */ 522 lwp_hash_out(p, t->t_tid); 523 prexit(p); 524 525 p->p_lwpcnt = 0; 526 p->p_tlist = NULL; 527 sigqfree(p); 528 term_mstate(t); 529 p->p_mterm = gethrtime(); 530 531 exec_vp = p->p_exec; 532 execdir_vp = p->p_execdir; 533 p->p_exec = NULLVP; 534 p->p_execdir = NULLVP; 535 mutex_exit(&p->p_lock); 536 if (exec_vp) 537 VN_RELE(exec_vp); 538 if (execdir_vp) 539 VN_RELE(execdir_vp); 540 541 pr_free_watched_pages(p); 542 543 closeall(P_FINFO(p)); 544 545 /* Free the controlling tty. (freectty() always assumes curproc.) */ 546 ASSERT(p == curproc); 547 (void) freectty(B_TRUE); 548 549 #if defined(__sparc) 550 if (p->p_utraps != NULL) 551 utrap_free(p); 552 #endif 553 if (p->p_semacct) /* IPC semaphore exit */ 554 semexit(p); 555 rv = wstat(why, what); 556 557 acct(rv & 0xff); 558 exacct_commit_proc(p, rv); 559 560 /* 561 * Release any resources associated with C2 auditing 562 */ 563 if (audit_active) { 564 /* 565 * audit exit system call 566 */ 567 audit_exit(why, what); 568 } 569 570 /* 571 * Free address space. 572 */ 573 relvm(); 574 575 /* 576 * Release held contracts. 577 */ 578 contract_exit(p); 579 580 /* 581 * Depart our encapsulating process contract. 582 */ 583 if ((p->p_flag & SSYS) == 0) { 584 ASSERT(p->p_ct_process); 585 contract_process_exit(p->p_ct_process, p, rv); 586 } 587 588 /* 589 * Remove pool association, and block if requested by pool_do_bind. 590 */ 591 mutex_enter(&p->p_lock); 592 ASSERT(p->p_pool->pool_ref > 0); 593 atomic_add_32(&p->p_pool->pool_ref, -1); 594 p->p_pool = pool_default; 595 /* 596 * Now that our address space has been freed and all other threads 597 * in this process have exited, set the PEXITED pool flag. This 598 * tells the pools subsystems to ignore this process if it was 599 * requested to rebind this process to a new pool. 600 */ 601 p->p_poolflag |= PEXITED; 602 pool_barrier_exit(); 603 mutex_exit(&p->p_lock); 604 605 mutex_enter(&pidlock); 606 607 /* 608 * Delete this process from the newstate list of its parent. We 609 * will put it in the right place in the sigcld in the end. 610 */ 611 delete_ns(p->p_parent, p); 612 613 /* 614 * Reassign the orphans to the next of kin. 615 * Don't rearrange init's orphanage. 616 */ 617 if ((q = p->p_orphan) != NULL && p != proc_init) { 618 619 proc_t *nokp = p->p_nextofkin; 620 621 for (;;) { 622 q->p_nextofkin = nokp; 623 if (q->p_nextorph == NULL) 624 break; 625 q = q->p_nextorph; 626 } 627 q->p_nextorph = nokp->p_orphan; 628 nokp->p_orphan = p->p_orphan; 629 p->p_orphan = NULL; 630 } 631 632 /* 633 * Reassign the children to init. 634 * Don't try to assign init's children to init. 635 */ 636 if ((q = p->p_child) != NULL && p != proc_init) { 637 struct proc *np; 638 struct proc *initp = proc_init; 639 boolean_t setzonetop = B_FALSE; 640 641 if (!INGLOBALZONE(curproc)) 642 setzonetop = B_TRUE; 643 644 pgdetach(p); 645 646 do { 647 np = q->p_sibling; 648 /* 649 * Delete it from its current parent new state 650 * list and add it to init new state list 651 */ 652 delete_ns(q->p_parent, q); 653 654 q->p_ppid = 1; 655 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 656 if (setzonetop) { 657 mutex_enter(&q->p_lock); 658 q->p_flag |= SZONETOP; 659 mutex_exit(&q->p_lock); 660 } 661 q->p_parent = initp; 662 663 /* 664 * Since q will be the first child, 665 * it will not have a previous sibling. 666 */ 667 q->p_psibling = NULL; 668 if (initp->p_child) { 669 initp->p_child->p_psibling = q; 670 } 671 q->p_sibling = initp->p_child; 672 initp->p_child = q; 673 if (q->p_proc_flag & P_PR_PTRACE) { 674 mutex_enter(&q->p_lock); 675 sigtoproc(q, NULL, SIGKILL); 676 mutex_exit(&q->p_lock); 677 } 678 /* 679 * sigcld() will add the child to parents 680 * newstate list. 681 */ 682 if (q->p_stat == SZOMB) 683 sigcld(q, NULL); 684 } while ((q = np) != NULL); 685 686 p->p_child = NULL; 687 ASSERT(p->p_child_ns == NULL); 688 } 689 690 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 691 692 mutex_enter(&p->p_lock); 693 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 694 695 /* 696 * Have our task accummulate our resource usage data before they 697 * become contaminated by p_cacct etc., and before we renounce 698 * membership of the task. 699 * 700 * We do this regardless of whether or not task accounting is active. 701 * This is to avoid having nonsense data reported for this task if 702 * task accounting is subsequently enabled. The overhead is minimal; 703 * by this point, this process has accounted for the usage of all its 704 * LWPs. We nonetheless do the work here, and under the protection of 705 * pidlock, so that the movement of the process's usage to the task 706 * happens at the same time as the removal of the process from the 707 * task, from the point of view of exacct_snapshot_task_usage(). 708 */ 709 exacct_update_task_mstate(p); 710 711 hrutime = mstate_aggr_state(p, LMS_USER); 712 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 713 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 714 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 715 716 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 717 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 718 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 719 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 720 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 721 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 722 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 723 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 724 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 725 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 726 727 p->p_ru.minflt += p->p_cru.minflt; 728 p->p_ru.majflt += p->p_cru.majflt; 729 p->p_ru.nswap += p->p_cru.nswap; 730 p->p_ru.inblock += p->p_cru.inblock; 731 p->p_ru.oublock += p->p_cru.oublock; 732 p->p_ru.msgsnd += p->p_cru.msgsnd; 733 p->p_ru.msgrcv += p->p_cru.msgrcv; 734 p->p_ru.nsignals += p->p_cru.nsignals; 735 p->p_ru.nvcsw += p->p_cru.nvcsw; 736 p->p_ru.nivcsw += p->p_cru.nivcsw; 737 p->p_ru.sysc += p->p_cru.sysc; 738 p->p_ru.ioch += p->p_cru.ioch; 739 740 p->p_stat = SZOMB; 741 p->p_proc_flag &= ~P_PR_PTRACE; 742 p->p_wdata = what; 743 p->p_wcode = (char)why; 744 745 cdir = PTOU(p)->u_cdir; 746 rdir = PTOU(p)->u_rdir; 747 cwd = PTOU(p)->u_cwd; 748 749 /* 750 * Release resource controls, as they are no longer enforceable. 751 */ 752 rctl_set_free(p->p_rctls); 753 754 /* 755 * Give up task and project memberships. Decrement tk_nlwps counter 756 * for our task.max-lwps resource control. An extended accounting 757 * record, if that facility is active, is scheduled to be written. 758 * Zombie processes are false members of task0 for the remainder of 759 * their lifetime; no accounting information is recorded for them. 760 */ 761 tk = p->p_task; 762 763 mutex_enter(&p->p_zone->zone_nlwps_lock); 764 tk->tk_nlwps--; 765 tk->tk_proj->kpj_nlwps--; 766 p->p_zone->zone_nlwps--; 767 mutex_exit(&p->p_zone->zone_nlwps_lock); 768 task_detach(p); 769 p->p_task = task0p; 770 771 /* 772 * Clear the lwp directory and the lwpid hash table 773 * now that /proc can't bother us any more. 774 * We free the memory below, after dropping p->p_lock. 775 */ 776 lwpdir = p->p_lwpdir; 777 lwpdir_sz = p->p_lwpdir_sz; 778 tidhash = p->p_tidhash; 779 tidhash_sz = p->p_tidhash_sz; 780 p->p_lwpdir = NULL; 781 p->p_lwpfree = NULL; 782 p->p_lwpdir_sz = 0; 783 p->p_tidhash = NULL; 784 p->p_tidhash_sz = 0; 785 786 /* 787 * If the process has context ops installed, call the exit routine 788 * on behalf of this last remaining thread. Normally exitpctx() is 789 * called during thread_exit() or lwp_exit(), but because this is the 790 * last thread in the process, we must call it here. By the time 791 * thread_exit() is called (below), the association with the relevant 792 * process has been lost. 793 * 794 * We also free the context here. 795 */ 796 if (p->p_pctx) { 797 kpreempt_disable(); 798 exitpctx(p); 799 kpreempt_enable(); 800 801 freepctx(p, 0); 802 } 803 804 /* 805 * curthread's proc pointer is changed to point to the 'sched' 806 * process for the corresponding zone, except in the case when 807 * the exiting process is in fact a zsched instance, in which 808 * case the proc pointer is set to p0. We do so, so that the 809 * process still points at the right zone when we call the VN_RELE() 810 * below. 811 * 812 * This is because curthread's original proc pointer can be freed as 813 * soon as the child sends a SIGCLD to its parent. We use zsched so 814 * that for user processes, even in the final moments of death, the 815 * process is still associated with its zone. 816 */ 817 if (p != t->t_procp->p_zone->zone_zsched) 818 t->t_procp = t->t_procp->p_zone->zone_zsched; 819 else 820 t->t_procp = &p0; 821 822 mutex_exit(&p->p_lock); 823 if (!evaporate) { 824 p->p_pidflag &= ~CLDPEND; 825 sigcld(p, sqp); 826 } else { 827 /* 828 * Do what sigcld() would do if the disposition 829 * of the SIGCHLD signal were set to be ignored. 830 */ 831 cv_broadcast(&p->p_srwchan_cv); 832 freeproc(p); 833 } 834 mutex_exit(&pidlock); 835 836 /* 837 * We don't release u_cdir and u_rdir until SZOMB is set. 838 * This protects us against dofusers(). 839 */ 840 VN_RELE(cdir); 841 if (rdir) 842 VN_RELE(rdir); 843 if (cwd) 844 refstr_rele(cwd); 845 846 /* 847 * task_rele() may ultimately cause the zone to go away (or 848 * may cause the last user process in a zone to go away, which 849 * signals zsched to go away). So prior to this call, we must 850 * no longer point at zsched. 851 */ 852 t->t_procp = &p0; 853 task_rele(tk); 854 855 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 856 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 857 858 lwp_pcb_exit(); 859 860 thread_exit(); 861 /* NOTREACHED */ 862 } 863 864 /* 865 * Format siginfo structure for wait system calls. 866 */ 867 void 868 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 869 { 870 ASSERT(MUTEX_HELD(&pidlock)); 871 872 bzero(ip, sizeof (k_siginfo_t)); 873 ip->si_signo = SIGCLD; 874 ip->si_code = pp->p_wcode; 875 ip->si_pid = pp->p_pid; 876 ip->si_ctid = PRCTID(pp); 877 ip->si_zoneid = pp->p_zone->zone_id; 878 ip->si_status = pp->p_wdata; 879 ip->si_stime = pp->p_stime; 880 ip->si_utime = pp->p_utime; 881 882 if (waitflag) { 883 pp->p_wcode = 0; 884 pp->p_wdata = 0; 885 pp->p_pidflag &= ~CLDPEND; 886 } 887 } 888 889 /* 890 * Wait system call. 891 * Search for a terminated (zombie) child, 892 * finally lay it to rest, and collect its status. 893 * Look also for stopped children, 894 * and pass back status from them. 895 */ 896 int 897 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 898 { 899 int found; 900 proc_t *cp, *pp; 901 int proc_gone; 902 int waitflag = !(options & WNOWAIT); 903 904 /* 905 * Obsolete flag, defined here only for binary compatibility 906 * with old statically linked executables. Delete this when 907 * we no longer care about these old and broken applications. 908 */ 909 #define _WNOCHLD 0400 910 options &= ~_WNOCHLD; 911 912 if (options == 0 || (options & ~WOPTMASK)) 913 return (EINVAL); 914 915 switch (idtype) { 916 case P_PID: 917 case P_PGID: 918 if (id < 0 || id >= maxpid) 919 return (EINVAL); 920 /* FALLTHROUGH */ 921 case P_ALL: 922 break; 923 default: 924 return (EINVAL); 925 } 926 927 pp = ttoproc(curthread); 928 929 /* 930 * lock parent mutex so that sibling chain can be searched. 931 */ 932 mutex_enter(&pidlock); 933 934 /* 935 * if we are only looking for exited processes and child_ns list 936 * is empty no reason to look at all children. 937 */ 938 if (idtype == P_ALL && 939 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 940 pp->p_child_ns == NULL) { 941 if (pp->p_child) { 942 mutex_exit(&pidlock); 943 bzero(ip, sizeof (k_siginfo_t)); 944 return (0); 945 } 946 mutex_exit(&pidlock); 947 return (ECHILD); 948 } 949 950 while (pp->p_child != NULL) { 951 952 proc_gone = 0; 953 954 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 955 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 956 continue; 957 if (idtype == P_PID && id != cp->p_pid) 958 continue; 959 if (idtype == P_PGID && id != cp->p_pgrp) 960 continue; 961 962 switch (cp->p_wcode) { 963 964 case CLD_TRAPPED: 965 case CLD_STOPPED: 966 case CLD_CONTINUED: 967 cmn_err(CE_PANIC, 968 "waitid: wrong state %d on the p_newstate" 969 " list", cp->p_wcode); 970 break; 971 972 case CLD_EXITED: 973 case CLD_DUMPED: 974 case CLD_KILLED: 975 if (!(options & WEXITED)) { 976 /* 977 * Count how many are already gone 978 * for good. 979 */ 980 proc_gone++; 981 break; 982 } 983 if (!waitflag) { 984 winfo(cp, ip, 0); 985 } else { 986 winfo(cp, ip, 1); 987 freeproc(cp); 988 } 989 mutex_exit(&pidlock); 990 if (waitflag) { /* accept SIGCLD */ 991 sigcld_delete(ip); 992 sigcld_repost(); 993 } 994 return (0); 995 } 996 997 if (idtype == P_PID) 998 break; 999 } 1000 1001 /* 1002 * Wow! None of the threads on the p_sibling_ns list were 1003 * interesting threads. Check all the kids! 1004 */ 1005 found = 0; 1006 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 1007 if (idtype == P_PID && id != cp->p_pid) 1008 continue; 1009 if (idtype == P_PGID && id != cp->p_pgrp) 1010 continue; 1011 1012 switch (cp->p_wcode) { 1013 case CLD_TRAPPED: 1014 if (!(options & WTRAPPED)) 1015 break; 1016 winfo(cp, ip, waitflag); 1017 mutex_exit(&pidlock); 1018 if (waitflag) { /* accept SIGCLD */ 1019 sigcld_delete(ip); 1020 sigcld_repost(); 1021 } 1022 return (0); 1023 1024 case CLD_STOPPED: 1025 if (!(options & WSTOPPED)) 1026 break; 1027 /* Is it still stopped? */ 1028 mutex_enter(&cp->p_lock); 1029 if (!jobstopped(cp)) { 1030 mutex_exit(&cp->p_lock); 1031 break; 1032 } 1033 mutex_exit(&cp->p_lock); 1034 winfo(cp, ip, waitflag); 1035 mutex_exit(&pidlock); 1036 if (waitflag) { /* accept SIGCLD */ 1037 sigcld_delete(ip); 1038 sigcld_repost(); 1039 } 1040 return (0); 1041 1042 case CLD_CONTINUED: 1043 if (!(options & WCONTINUED)) 1044 break; 1045 winfo(cp, ip, waitflag); 1046 mutex_exit(&pidlock); 1047 if (waitflag) { /* accept SIGCLD */ 1048 sigcld_delete(ip); 1049 sigcld_repost(); 1050 } 1051 return (0); 1052 1053 case CLD_EXITED: 1054 case CLD_DUMPED: 1055 case CLD_KILLED: 1056 if (idtype != P_PID && 1057 (cp->p_pidflag & CLDWAITPID)) 1058 continue; 1059 /* 1060 * Don't complain if a process was found in 1061 * the first loop but we broke out of the loop 1062 * because of the arguments passed to us. 1063 */ 1064 if (proc_gone == 0) { 1065 cmn_err(CE_PANIC, 1066 "waitid: wrong state on the" 1067 " p_child list"); 1068 } else { 1069 break; 1070 } 1071 } 1072 1073 found++; 1074 1075 if (idtype == P_PID) 1076 break; 1077 } 1078 1079 /* 1080 * If we found no interesting processes at all, 1081 * break out and return ECHILD. 1082 */ 1083 if (found + proc_gone == 0) 1084 break; 1085 1086 if (options & WNOHANG) { 1087 mutex_exit(&pidlock); 1088 bzero(ip, sizeof (k_siginfo_t)); 1089 /* 1090 * We should set ip->si_signo = SIGCLD, 1091 * but there is an SVVS test that expects 1092 * ip->si_signo to be zero in this case. 1093 */ 1094 return (0); 1095 } 1096 1097 /* 1098 * If we found no processes of interest that could 1099 * change state while we wait, we don't wait at all. 1100 * Get out with ECHILD according to SVID. 1101 */ 1102 if (found == proc_gone) 1103 break; 1104 1105 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1106 mutex_exit(&pidlock); 1107 return (EINTR); 1108 } 1109 } 1110 mutex_exit(&pidlock); 1111 return (ECHILD); 1112 } 1113 1114 /* 1115 * The wait() system call trap is no longer invoked by libc. 1116 * It is retained only for the benefit of statically linked applications. 1117 * Delete this when we no longer care about these old and broken applications. 1118 */ 1119 int64_t 1120 wait(void) 1121 { 1122 int error; 1123 k_siginfo_t info; 1124 rval_t r; 1125 1126 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1127 return (set_errno(error)); 1128 r.r_val1 = info.si_pid; 1129 r.r_val2 = wstat(info.si_code, info.si_status); 1130 return (r.r_vals); 1131 } 1132 1133 int 1134 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1135 { 1136 int error; 1137 k_siginfo_t info; 1138 1139 if (error = waitid(idtype, id, &info, options)) 1140 return (set_errno(error)); 1141 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1142 return (set_errno(EFAULT)); 1143 return (0); 1144 } 1145 1146 #ifdef _SYSCALL32_IMPL 1147 1148 int 1149 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1150 { 1151 int error; 1152 k_siginfo_t info; 1153 siginfo32_t info32; 1154 1155 if (error = waitid(idtype, id, &info, options)) 1156 return (set_errno(error)); 1157 siginfo_kto32(&info, &info32); 1158 if (copyout(&info32, infop, sizeof (info32))) 1159 return (set_errno(EFAULT)); 1160 return (0); 1161 } 1162 1163 #endif /* _SYSCALL32_IMPL */ 1164 1165 void 1166 proc_detach(proc_t *p) 1167 { 1168 proc_t *q; 1169 1170 ASSERT(MUTEX_HELD(&pidlock)); 1171 1172 q = p->p_parent; 1173 ASSERT(q != NULL); 1174 1175 /* 1176 * Take it off the newstate list of its parent 1177 */ 1178 delete_ns(q, p); 1179 1180 if (q->p_child == p) { 1181 q->p_child = p->p_sibling; 1182 /* 1183 * If the parent has no children, it better not 1184 * have any with new states either! 1185 */ 1186 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1187 } 1188 1189 if (p->p_sibling) { 1190 p->p_sibling->p_psibling = p->p_psibling; 1191 } 1192 1193 if (p->p_psibling) { 1194 p->p_psibling->p_sibling = p->p_sibling; 1195 } 1196 } 1197 1198 /* 1199 * Remove zombie children from the process table. 1200 */ 1201 void 1202 freeproc(proc_t *p) 1203 { 1204 proc_t *q; 1205 1206 ASSERT(p->p_stat == SZOMB); 1207 ASSERT(p->p_tlist == NULL); 1208 ASSERT(MUTEX_HELD(&pidlock)); 1209 1210 sigdelq(p, NULL, 0); 1211 if (p->p_killsqp) { 1212 siginfofree(p->p_killsqp); 1213 p->p_killsqp = NULL; 1214 } 1215 1216 prfree(p); /* inform /proc */ 1217 1218 /* 1219 * Don't free the init processes. 1220 * Other dying processes will access it. 1221 */ 1222 if (p == proc_init) 1223 return; 1224 1225 1226 /* 1227 * We wait until now to free the cred structure because a 1228 * zombie process's credentials may be examined by /proc. 1229 * No cred locking needed because there are no threads at this point. 1230 */ 1231 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1232 crfree(p->p_cred); 1233 if (p->p_corefile != NULL) { 1234 corectl_path_rele(p->p_corefile); 1235 p->p_corefile = NULL; 1236 } 1237 if (p->p_content != NULL) { 1238 corectl_content_rele(p->p_content); 1239 p->p_content = NULL; 1240 } 1241 1242 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1243 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1244 /* 1245 * This should still do the right thing since p_utime/stime 1246 * get set to the correct value on process exit, so it 1247 * should get properly updated 1248 */ 1249 p->p_nextofkin->p_cutime += p->p_utime; 1250 p->p_nextofkin->p_cstime += p->p_stime; 1251 1252 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1253 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1254 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1255 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1256 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1257 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1258 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1259 += p->p_acct[LMS_USER_LOCK]; 1260 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1261 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1262 += p->p_acct[LMS_WAIT_CPU]; 1263 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1264 1265 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1266 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1267 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1268 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1269 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1270 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1271 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1272 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1273 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1274 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1275 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1276 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1277 1278 } 1279 1280 q = p->p_nextofkin; 1281 if (q && q->p_orphan == p) 1282 q->p_orphan = p->p_nextorph; 1283 else if (q) { 1284 for (q = q->p_orphan; q; q = q->p_nextorph) 1285 if (q->p_nextorph == p) 1286 break; 1287 ASSERT(q && q->p_nextorph == p); 1288 q->p_nextorph = p->p_nextorph; 1289 } 1290 1291 proc_detach(p); 1292 pid_exit(p); /* frees pid and proc structure */ 1293 } 1294 1295 /* 1296 * Delete process "child" from the newstate list of process "parent" 1297 */ 1298 void 1299 delete_ns(proc_t *parent, proc_t *child) 1300 { 1301 proc_t **ns; 1302 1303 ASSERT(MUTEX_HELD(&pidlock)); 1304 ASSERT(child->p_parent == parent); 1305 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1306 if (*ns == child) { 1307 1308 ASSERT((*ns)->p_parent == parent); 1309 1310 *ns = child->p_sibling_ns; 1311 child->p_sibling_ns = NULL; 1312 return; 1313 } 1314 } 1315 } 1316 1317 /* 1318 * Add process "child" to the new state list of process "parent" 1319 */ 1320 void 1321 add_ns(proc_t *parent, proc_t *child) 1322 { 1323 ASSERT(child->p_sibling_ns == NULL); 1324 child->p_sibling_ns = parent->p_child_ns; 1325 parent->p_child_ns = child; 1326 } 1327