1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 DTRACE_PROC(lwp__exit); 360 DTRACE_PROC1(exit, int, why); 361 362 /* 363 * Will perform any brand specific proc exit processing, since this 364 * is always the last lwp, will also perform lwp_exit and free brand 365 * data 366 */ 367 if (PROC_IS_BRANDED(p)) 368 BROP(p)->b_proc_exit(p, lwp); 369 370 /* 371 * Don't let init exit unless zone_start_init() failed its exec, or 372 * we are shutting down the zone or the machine. 373 * 374 * Since we are single threaded, we don't need to lock the 375 * following accesses to zone_proc_initpid. 376 */ 377 if (p->p_pid == z->zone_proc_initpid) { 378 if (z->zone_boot_err == 0 && 379 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 380 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 381 z->zone_restart_init == B_TRUE && 382 restart_init(what, why) == 0) 383 return (0); 384 /* 385 * Since we didn't or couldn't restart init, we clear 386 * the zone's init state and proceed with exit 387 * processing. 388 */ 389 z->zone_proc_initpid = -1; 390 } 391 392 /* 393 * Allocate a sigqueue now, before we grab locks. 394 * It will be given to sigcld(), below. 395 * Special case: If we will be making the process disappear 396 * without a trace (for the benefit of posix_spawn() in libc) 397 * don't bother to allocate a useless sigqueue. 398 */ 399 evaporate = ((p->p_flag & SVFORK) && 400 why == CLD_EXITED && what == _EVAPORATE); 401 if (!evaporate) 402 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 403 404 /* 405 * revoke any doors created by the process. 406 */ 407 if (p->p_door_list) 408 door_exit(); 409 410 /* 411 * Release schedctl data structures. 412 */ 413 if (p->p_pagep) 414 schedctl_proc_cleanup(); 415 416 /* 417 * make sure all pending kaio has completed. 418 */ 419 if (p->p_aio) 420 aio_cleanup_exit(); 421 422 /* 423 * discard the lwpchan cache. 424 */ 425 if (p->p_lcp != NULL) 426 lwpchan_destroy_cache(0); 427 428 /* 429 * Clean up any DTrace helper actions or probes for the process. 430 */ 431 if (p->p_dtrace_helpers != NULL) { 432 ASSERT(dtrace_helpers_cleanup != NULL); 433 (*dtrace_helpers_cleanup)(); 434 } 435 436 /* untimeout the realtime timers */ 437 if (p->p_itimer != NULL) 438 timer_exit(); 439 440 if ((tmp_id = p->p_alarmid) != 0) { 441 p->p_alarmid = 0; 442 (void) untimeout(tmp_id); 443 } 444 445 /* 446 * Remove any fpollinfo_t's for this (last) thread from our file 447 * descriptors so closeall() can ASSERT() that they're all gone. 448 */ 449 pollcleanup(); 450 451 if (p->p_rprof_cyclic != CYCLIC_NONE) { 452 mutex_enter(&cpu_lock); 453 cyclic_remove(p->p_rprof_cyclic); 454 mutex_exit(&cpu_lock); 455 } 456 457 mutex_enter(&p->p_lock); 458 459 /* 460 * Clean up any DTrace probes associated with this process. 461 */ 462 if (p->p_dtrace_probes) { 463 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 464 dtrace_fasttrap_exit_ptr(p); 465 } 466 467 while ((tmp_id = p->p_itimerid) != 0) { 468 p->p_itimerid = 0; 469 mutex_exit(&p->p_lock); 470 (void) untimeout(tmp_id); 471 mutex_enter(&p->p_lock); 472 } 473 474 lwp_cleanup(); 475 476 /* 477 * We are about to exit; prevent our resource associations from 478 * being changed. 479 */ 480 pool_barrier_enter(); 481 482 /* 483 * Block the process against /proc now that we have really 484 * acquired p->p_lock (to manipulate p_tlist at least). 485 */ 486 prbarrier(p); 487 488 #ifdef SUN_SRC_COMPAT 489 if (code == CLD_KILLED) 490 u.u_acflag |= AXSIG; 491 #endif 492 sigfillset(&p->p_ignore); 493 sigemptyset(&p->p_siginfo); 494 sigemptyset(&p->p_sig); 495 sigemptyset(&p->p_extsig); 496 sigemptyset(&t->t_sig); 497 sigemptyset(&t->t_extsig); 498 sigemptyset(&p->p_sigmask); 499 sigdelq(p, t, 0); 500 lwp->lwp_cursig = 0; 501 lwp->lwp_extsig = 0; 502 p->p_flag &= ~(SKILLED | SEXTKILLED); 503 if (lwp->lwp_curinfo) { 504 siginfofree(lwp->lwp_curinfo); 505 lwp->lwp_curinfo = NULL; 506 } 507 508 t->t_proc_flag |= TP_LWPEXIT; 509 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 510 prlwpexit(t); /* notify /proc */ 511 lwp_hash_out(p, t->t_tid); 512 prexit(p); 513 514 p->p_lwpcnt = 0; 515 p->p_tlist = NULL; 516 sigqfree(p); 517 term_mstate(t); 518 p->p_mterm = gethrtime(); 519 520 exec_vp = p->p_exec; 521 execdir_vp = p->p_execdir; 522 p->p_exec = NULLVP; 523 p->p_execdir = NULLVP; 524 mutex_exit(&p->p_lock); 525 if (exec_vp) 526 VN_RELE(exec_vp); 527 if (execdir_vp) 528 VN_RELE(execdir_vp); 529 530 pr_free_watched_pages(p); 531 532 closeall(P_FINFO(p)); 533 534 /* Free the controlling tty. (freectty() always assumes curproc.) */ 535 ASSERT(p == curproc); 536 (void) freectty(B_TRUE); 537 538 #if defined(__sparc) 539 if (p->p_utraps != NULL) 540 utrap_free(p); 541 #endif 542 if (p->p_semacct) /* IPC semaphore exit */ 543 semexit(p); 544 rv = wstat(why, what); 545 546 acct(rv & 0xff); 547 exacct_commit_proc(p, rv); 548 549 /* 550 * Release any resources associated with C2 auditing 551 */ 552 #ifdef C2_AUDIT 553 if (audit_active) { 554 /* 555 * audit exit system call 556 */ 557 audit_exit(why, what); 558 } 559 #endif 560 561 /* 562 * Free address space. 563 */ 564 relvm(); 565 566 /* 567 * Release held contracts. 568 */ 569 contract_exit(p); 570 571 /* 572 * Depart our encapsulating process contract. 573 */ 574 if ((p->p_flag & SSYS) == 0) { 575 ASSERT(p->p_ct_process); 576 contract_process_exit(p->p_ct_process, p, rv); 577 } 578 579 /* 580 * Remove pool association, and block if requested by pool_do_bind. 581 */ 582 mutex_enter(&p->p_lock); 583 ASSERT(p->p_pool->pool_ref > 0); 584 atomic_add_32(&p->p_pool->pool_ref, -1); 585 p->p_pool = pool_default; 586 /* 587 * Now that our address space has been freed and all other threads 588 * in this process have exited, set the PEXITED pool flag. This 589 * tells the pools subsystems to ignore this process if it was 590 * requested to rebind this process to a new pool. 591 */ 592 p->p_poolflag |= PEXITED; 593 pool_barrier_exit(); 594 mutex_exit(&p->p_lock); 595 596 mutex_enter(&pidlock); 597 598 /* 599 * Delete this process from the newstate list of its parent. We 600 * will put it in the right place in the sigcld in the end. 601 */ 602 delete_ns(p->p_parent, p); 603 604 /* 605 * Reassign the orphans to the next of kin. 606 * Don't rearrange init's orphanage. 607 */ 608 if ((q = p->p_orphan) != NULL && p != proc_init) { 609 610 proc_t *nokp = p->p_nextofkin; 611 612 for (;;) { 613 q->p_nextofkin = nokp; 614 if (q->p_nextorph == NULL) 615 break; 616 q = q->p_nextorph; 617 } 618 q->p_nextorph = nokp->p_orphan; 619 nokp->p_orphan = p->p_orphan; 620 p->p_orphan = NULL; 621 } 622 623 /* 624 * Reassign the children to init. 625 * Don't try to assign init's children to init. 626 */ 627 if ((q = p->p_child) != NULL && p != proc_init) { 628 struct proc *np; 629 struct proc *initp = proc_init; 630 boolean_t setzonetop = B_FALSE; 631 632 if (!INGLOBALZONE(curproc)) 633 setzonetop = B_TRUE; 634 635 pgdetach(p); 636 637 do { 638 np = q->p_sibling; 639 /* 640 * Delete it from its current parent new state 641 * list and add it to init new state list 642 */ 643 delete_ns(q->p_parent, q); 644 645 q->p_ppid = 1; 646 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 647 if (setzonetop) { 648 mutex_enter(&q->p_lock); 649 q->p_flag |= SZONETOP; 650 mutex_exit(&q->p_lock); 651 } 652 q->p_parent = initp; 653 654 /* 655 * Since q will be the first child, 656 * it will not have a previous sibling. 657 */ 658 q->p_psibling = NULL; 659 if (initp->p_child) { 660 initp->p_child->p_psibling = q; 661 } 662 q->p_sibling = initp->p_child; 663 initp->p_child = q; 664 if (q->p_proc_flag & P_PR_PTRACE) { 665 mutex_enter(&q->p_lock); 666 sigtoproc(q, NULL, SIGKILL); 667 mutex_exit(&q->p_lock); 668 } 669 /* 670 * sigcld() will add the child to parents 671 * newstate list. 672 */ 673 if (q->p_stat == SZOMB) 674 sigcld(q, NULL); 675 } while ((q = np) != NULL); 676 677 p->p_child = NULL; 678 ASSERT(p->p_child_ns == NULL); 679 } 680 681 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 682 683 mutex_enter(&p->p_lock); 684 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 685 686 /* 687 * Have our task accummulate our resource usage data before they 688 * become contaminated by p_cacct etc., and before we renounce 689 * membership of the task. 690 * 691 * We do this regardless of whether or not task accounting is active. 692 * This is to avoid having nonsense data reported for this task if 693 * task accounting is subsequently enabled. The overhead is minimal; 694 * by this point, this process has accounted for the usage of all its 695 * LWPs. We nonetheless do the work here, and under the protection of 696 * pidlock, so that the movement of the process's usage to the task 697 * happens at the same time as the removal of the process from the 698 * task, from the point of view of exacct_snapshot_task_usage(). 699 */ 700 exacct_update_task_mstate(p); 701 702 hrutime = mstate_aggr_state(p, LMS_USER); 703 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 704 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 705 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 706 707 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 708 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 709 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 710 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 711 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 712 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 713 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 714 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 715 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 716 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 717 718 p->p_ru.minflt += p->p_cru.minflt; 719 p->p_ru.majflt += p->p_cru.majflt; 720 p->p_ru.nswap += p->p_cru.nswap; 721 p->p_ru.inblock += p->p_cru.inblock; 722 p->p_ru.oublock += p->p_cru.oublock; 723 p->p_ru.msgsnd += p->p_cru.msgsnd; 724 p->p_ru.msgrcv += p->p_cru.msgrcv; 725 p->p_ru.nsignals += p->p_cru.nsignals; 726 p->p_ru.nvcsw += p->p_cru.nvcsw; 727 p->p_ru.nivcsw += p->p_cru.nivcsw; 728 p->p_ru.sysc += p->p_cru.sysc; 729 p->p_ru.ioch += p->p_cru.ioch; 730 731 p->p_stat = SZOMB; 732 p->p_proc_flag &= ~P_PR_PTRACE; 733 p->p_wdata = what; 734 p->p_wcode = (char)why; 735 736 cdir = PTOU(p)->u_cdir; 737 rdir = PTOU(p)->u_rdir; 738 cwd = PTOU(p)->u_cwd; 739 740 /* 741 * Release resource controls, as they are no longer enforceable. 742 */ 743 rctl_set_free(p->p_rctls); 744 745 /* 746 * Give up task and project memberships. Decrement tk_nlwps counter 747 * for our task.max-lwps resource control. An extended accounting 748 * record, if that facility is active, is scheduled to be written. 749 * Zombie processes are false members of task0 for the remainder of 750 * their lifetime; no accounting information is recorded for them. 751 */ 752 tk = p->p_task; 753 754 mutex_enter(&p->p_zone->zone_nlwps_lock); 755 tk->tk_nlwps--; 756 tk->tk_proj->kpj_nlwps--; 757 p->p_zone->zone_nlwps--; 758 mutex_exit(&p->p_zone->zone_nlwps_lock); 759 task_detach(p); 760 p->p_task = task0p; 761 762 /* 763 * Clear the lwp directory and the lwpid hash table 764 * now that /proc can't bother us any more. 765 * We free the memory below, after dropping p->p_lock. 766 */ 767 lwpdir = p->p_lwpdir; 768 lwpdir_sz = p->p_lwpdir_sz; 769 tidhash = p->p_tidhash; 770 tidhash_sz = p->p_tidhash_sz; 771 p->p_lwpdir = NULL; 772 p->p_lwpfree = NULL; 773 p->p_lwpdir_sz = 0; 774 p->p_tidhash = NULL; 775 p->p_tidhash_sz = 0; 776 777 /* 778 * If the process has context ops installed, call the exit routine 779 * on behalf of this last remaining thread. Normally exitpctx() is 780 * called during thread_exit() or lwp_exit(), but because this is the 781 * last thread in the process, we must call it here. By the time 782 * thread_exit() is called (below), the association with the relevant 783 * process has been lost. 784 * 785 * We also free the context here. 786 */ 787 if (p->p_pctx) { 788 kpreempt_disable(); 789 exitpctx(p); 790 kpreempt_enable(); 791 792 freepctx(p, 0); 793 } 794 795 /* 796 * curthread's proc pointer is changed to point to the 'sched' 797 * process for the corresponding zone, except in the case when 798 * the exiting process is in fact a zsched instance, in which 799 * case the proc pointer is set to p0. We do so, so that the 800 * process still points at the right zone when we call the VN_RELE() 801 * below. 802 * 803 * This is because curthread's original proc pointer can be freed as 804 * soon as the child sends a SIGCLD to its parent. We use zsched so 805 * that for user processes, even in the final moments of death, the 806 * process is still associated with its zone. 807 */ 808 if (p != t->t_procp->p_zone->zone_zsched) 809 t->t_procp = t->t_procp->p_zone->zone_zsched; 810 else 811 t->t_procp = &p0; 812 813 mutex_exit(&p->p_lock); 814 if (!evaporate) { 815 p->p_pidflag &= ~CLDPEND; 816 sigcld(p, sqp); 817 } else { 818 /* 819 * Do what sigcld() would do if the disposition 820 * of the SIGCHLD signal were set to be ignored. 821 */ 822 cv_broadcast(&p->p_srwchan_cv); 823 freeproc(p); 824 } 825 mutex_exit(&pidlock); 826 827 /* 828 * We don't release u_cdir and u_rdir until SZOMB is set. 829 * This protects us against dofusers(). 830 */ 831 VN_RELE(cdir); 832 if (rdir) 833 VN_RELE(rdir); 834 if (cwd) 835 refstr_rele(cwd); 836 837 /* 838 * task_rele() may ultimately cause the zone to go away (or 839 * may cause the last user process in a zone to go away, which 840 * signals zsched to go away). So prior to this call, we must 841 * no longer point at zsched. 842 */ 843 t->t_procp = &p0; 844 task_rele(tk); 845 846 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 847 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 848 849 lwp_pcb_exit(); 850 851 thread_exit(); 852 /* NOTREACHED */ 853 } 854 855 /* 856 * Format siginfo structure for wait system calls. 857 */ 858 void 859 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 860 { 861 ASSERT(MUTEX_HELD(&pidlock)); 862 863 bzero(ip, sizeof (k_siginfo_t)); 864 ip->si_signo = SIGCLD; 865 ip->si_code = pp->p_wcode; 866 ip->si_pid = pp->p_pid; 867 ip->si_ctid = PRCTID(pp); 868 ip->si_zoneid = pp->p_zone->zone_id; 869 ip->si_status = pp->p_wdata; 870 ip->si_stime = pp->p_stime; 871 ip->si_utime = pp->p_utime; 872 873 if (waitflag) { 874 pp->p_wcode = 0; 875 pp->p_wdata = 0; 876 pp->p_pidflag &= ~CLDPEND; 877 } 878 } 879 880 /* 881 * Wait system call. 882 * Search for a terminated (zombie) child, 883 * finally lay it to rest, and collect its status. 884 * Look also for stopped children, 885 * and pass back status from them. 886 */ 887 int 888 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 889 { 890 int found; 891 proc_t *cp, *pp; 892 int proc_gone; 893 int waitflag = !(options & WNOWAIT); 894 895 /* 896 * Obsolete flag, defined here only for binary compatibility 897 * with old statically linked executables. Delete this when 898 * we no longer care about these old and broken applications. 899 */ 900 #define _WNOCHLD 0400 901 options &= ~_WNOCHLD; 902 903 if (options == 0 || (options & ~WOPTMASK)) 904 return (EINVAL); 905 906 switch (idtype) { 907 case P_PID: 908 case P_PGID: 909 if (id < 0 || id >= maxpid) 910 return (EINVAL); 911 /* FALLTHROUGH */ 912 case P_ALL: 913 break; 914 default: 915 return (EINVAL); 916 } 917 918 pp = ttoproc(curthread); 919 920 /* 921 * lock parent mutex so that sibling chain can be searched. 922 */ 923 mutex_enter(&pidlock); 924 925 /* 926 * if we are only looking for exited processes and child_ns list 927 * is empty no reason to look at all children. 928 */ 929 if (idtype == P_ALL && 930 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 931 pp->p_child_ns == NULL) { 932 if (pp->p_child) { 933 mutex_exit(&pidlock); 934 bzero(ip, sizeof (k_siginfo_t)); 935 return (0); 936 } 937 mutex_exit(&pidlock); 938 return (ECHILD); 939 } 940 941 while (pp->p_child != NULL) { 942 943 proc_gone = 0; 944 945 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 946 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 947 continue; 948 if (idtype == P_PID && id != cp->p_pid) 949 continue; 950 if (idtype == P_PGID && id != cp->p_pgrp) 951 continue; 952 953 switch (cp->p_wcode) { 954 955 case CLD_TRAPPED: 956 case CLD_STOPPED: 957 case CLD_CONTINUED: 958 cmn_err(CE_PANIC, 959 "waitid: wrong state %d on the p_newstate" 960 " list", cp->p_wcode); 961 break; 962 963 case CLD_EXITED: 964 case CLD_DUMPED: 965 case CLD_KILLED: 966 if (!(options & WEXITED)) { 967 /* 968 * Count how many are already gone 969 * for good. 970 */ 971 proc_gone++; 972 break; 973 } 974 if (!waitflag) { 975 winfo(cp, ip, 0); 976 } else { 977 winfo(cp, ip, 1); 978 freeproc(cp); 979 } 980 mutex_exit(&pidlock); 981 if (waitflag) { /* accept SIGCLD */ 982 sigcld_delete(ip); 983 sigcld_repost(); 984 } 985 return (0); 986 } 987 988 if (idtype == P_PID) 989 break; 990 } 991 992 /* 993 * Wow! None of the threads on the p_sibling_ns list were 994 * interesting threads. Check all the kids! 995 */ 996 found = 0; 997 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 998 if (idtype == P_PID && id != cp->p_pid) 999 continue; 1000 if (idtype == P_PGID && id != cp->p_pgrp) 1001 continue; 1002 1003 switch (cp->p_wcode) { 1004 case CLD_TRAPPED: 1005 if (!(options & WTRAPPED)) 1006 break; 1007 winfo(cp, ip, waitflag); 1008 mutex_exit(&pidlock); 1009 if (waitflag) { /* accept SIGCLD */ 1010 sigcld_delete(ip); 1011 sigcld_repost(); 1012 } 1013 return (0); 1014 1015 case CLD_STOPPED: 1016 if (!(options & WSTOPPED)) 1017 break; 1018 /* Is it still stopped? */ 1019 mutex_enter(&cp->p_lock); 1020 if (!jobstopped(cp)) { 1021 mutex_exit(&cp->p_lock); 1022 break; 1023 } 1024 mutex_exit(&cp->p_lock); 1025 winfo(cp, ip, waitflag); 1026 mutex_exit(&pidlock); 1027 if (waitflag) { /* accept SIGCLD */ 1028 sigcld_delete(ip); 1029 sigcld_repost(); 1030 } 1031 return (0); 1032 1033 case CLD_CONTINUED: 1034 if (!(options & WCONTINUED)) 1035 break; 1036 winfo(cp, ip, waitflag); 1037 mutex_exit(&pidlock); 1038 if (waitflag) { /* accept SIGCLD */ 1039 sigcld_delete(ip); 1040 sigcld_repost(); 1041 } 1042 return (0); 1043 1044 case CLD_EXITED: 1045 case CLD_DUMPED: 1046 case CLD_KILLED: 1047 if (idtype != P_PID && 1048 (cp->p_pidflag & CLDWAITPID)) 1049 continue; 1050 /* 1051 * Don't complain if a process was found in 1052 * the first loop but we broke out of the loop 1053 * because of the arguments passed to us. 1054 */ 1055 if (proc_gone == 0) { 1056 cmn_err(CE_PANIC, 1057 "waitid: wrong state on the" 1058 " p_child list"); 1059 } else { 1060 break; 1061 } 1062 } 1063 1064 found++; 1065 1066 if (idtype == P_PID) 1067 break; 1068 } 1069 1070 /* 1071 * If we found no interesting processes at all, 1072 * break out and return ECHILD. 1073 */ 1074 if (found + proc_gone == 0) 1075 break; 1076 1077 if (options & WNOHANG) { 1078 mutex_exit(&pidlock); 1079 bzero(ip, sizeof (k_siginfo_t)); 1080 /* 1081 * We should set ip->si_signo = SIGCLD, 1082 * but there is an SVVS test that expects 1083 * ip->si_signo to be zero in this case. 1084 */ 1085 return (0); 1086 } 1087 1088 /* 1089 * If we found no processes of interest that could 1090 * change state while we wait, we don't wait at all. 1091 * Get out with ECHILD according to SVID. 1092 */ 1093 if (found == proc_gone) 1094 break; 1095 1096 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1097 mutex_exit(&pidlock); 1098 return (EINTR); 1099 } 1100 } 1101 mutex_exit(&pidlock); 1102 return (ECHILD); 1103 } 1104 1105 /* 1106 * The wait() system call trap is no longer invoked by libc. 1107 * It is retained only for the benefit of statically linked applications. 1108 * Delete this when we no longer care about these old and broken applications. 1109 */ 1110 int64_t 1111 wait(void) 1112 { 1113 int error; 1114 k_siginfo_t info; 1115 rval_t r; 1116 1117 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1118 return (set_errno(error)); 1119 r.r_val1 = info.si_pid; 1120 r.r_val2 = wstat(info.si_code, info.si_status); 1121 return (r.r_vals); 1122 } 1123 1124 int 1125 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1126 { 1127 int error; 1128 k_siginfo_t info; 1129 1130 if (error = waitid(idtype, id, &info, options)) 1131 return (set_errno(error)); 1132 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1133 return (set_errno(EFAULT)); 1134 return (0); 1135 } 1136 1137 #ifdef _SYSCALL32_IMPL 1138 1139 int 1140 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1141 { 1142 int error; 1143 k_siginfo_t info; 1144 siginfo32_t info32; 1145 1146 if (error = waitid(idtype, id, &info, options)) 1147 return (set_errno(error)); 1148 siginfo_kto32(&info, &info32); 1149 if (copyout(&info32, infop, sizeof (info32))) 1150 return (set_errno(EFAULT)); 1151 return (0); 1152 } 1153 1154 #endif /* _SYSCALL32_IMPL */ 1155 1156 void 1157 proc_detach(proc_t *p) 1158 { 1159 proc_t *q; 1160 1161 ASSERT(MUTEX_HELD(&pidlock)); 1162 1163 q = p->p_parent; 1164 ASSERT(q != NULL); 1165 1166 /* 1167 * Take it off the newstate list of its parent 1168 */ 1169 delete_ns(q, p); 1170 1171 if (q->p_child == p) { 1172 q->p_child = p->p_sibling; 1173 /* 1174 * If the parent has no children, it better not 1175 * have any with new states either! 1176 */ 1177 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1178 } 1179 1180 if (p->p_sibling) { 1181 p->p_sibling->p_psibling = p->p_psibling; 1182 } 1183 1184 if (p->p_psibling) { 1185 p->p_psibling->p_sibling = p->p_sibling; 1186 } 1187 } 1188 1189 /* 1190 * Remove zombie children from the process table. 1191 */ 1192 void 1193 freeproc(proc_t *p) 1194 { 1195 proc_t *q; 1196 1197 ASSERT(p->p_stat == SZOMB); 1198 ASSERT(p->p_tlist == NULL); 1199 ASSERT(MUTEX_HELD(&pidlock)); 1200 1201 sigdelq(p, NULL, 0); 1202 if (p->p_killsqp) { 1203 siginfofree(p->p_killsqp); 1204 p->p_killsqp = NULL; 1205 } 1206 1207 prfree(p); /* inform /proc */ 1208 1209 /* 1210 * Don't free the init processes. 1211 * Other dying processes will access it. 1212 */ 1213 if (p == proc_init) 1214 return; 1215 1216 1217 /* 1218 * We wait until now to free the cred structure because a 1219 * zombie process's credentials may be examined by /proc. 1220 * No cred locking needed because there are no threads at this point. 1221 */ 1222 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1223 crfree(p->p_cred); 1224 if (p->p_corefile != NULL) { 1225 corectl_path_rele(p->p_corefile); 1226 p->p_corefile = NULL; 1227 } 1228 if (p->p_content != NULL) { 1229 corectl_content_rele(p->p_content); 1230 p->p_content = NULL; 1231 } 1232 1233 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1234 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1235 /* 1236 * This should still do the right thing since p_utime/stime 1237 * get set to the correct value on process exit, so it 1238 * should get properly updated 1239 */ 1240 p->p_nextofkin->p_cutime += p->p_utime; 1241 p->p_nextofkin->p_cstime += p->p_stime; 1242 1243 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1244 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1245 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1246 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1247 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1248 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1249 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1250 += p->p_acct[LMS_USER_LOCK]; 1251 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1252 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1253 += p->p_acct[LMS_WAIT_CPU]; 1254 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1255 1256 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1257 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1258 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1259 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1260 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1261 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1262 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1263 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1264 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1265 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1266 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1267 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1268 1269 } 1270 1271 q = p->p_nextofkin; 1272 if (q && q->p_orphan == p) 1273 q->p_orphan = p->p_nextorph; 1274 else if (q) { 1275 for (q = q->p_orphan; q; q = q->p_nextorph) 1276 if (q->p_nextorph == p) 1277 break; 1278 ASSERT(q && q->p_nextorph == p); 1279 q->p_nextorph = p->p_nextorph; 1280 } 1281 1282 proc_detach(p); 1283 pid_exit(p); /* frees pid and proc structure */ 1284 } 1285 1286 /* 1287 * Delete process "child" from the newstate list of process "parent" 1288 */ 1289 void 1290 delete_ns(proc_t *parent, proc_t *child) 1291 { 1292 proc_t **ns; 1293 1294 ASSERT(MUTEX_HELD(&pidlock)); 1295 ASSERT(child->p_parent == parent); 1296 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1297 if (*ns == child) { 1298 1299 ASSERT((*ns)->p_parent == parent); 1300 1301 *ns = child->p_sibling_ns; 1302 child->p_sibling_ns = NULL; 1303 return; 1304 } 1305 } 1306 } 1307 1308 /* 1309 * Add process "child" to the new state list of process "parent" 1310 */ 1311 void 1312 add_ns(proc_t *parent, proc_t *child) 1313 { 1314 ASSERT(child->p_sibling_ns == NULL); 1315 child->p_sibling_ns = parent->p_child_ns; 1316 parent->p_child_ns = child; 1317 } 1318