1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/cred.h> 34 #include <sys/user.h> 35 #include <sys/errno.h> 36 #include <sys/proc.h> 37 #include <sys/ucontext.h> 38 #include <sys/procfs.h> 39 #include <sys/vnode.h> 40 #include <sys/acct.h> 41 #include <sys/var.h> 42 #include <sys/cmn_err.h> 43 #include <sys/debug.h> 44 #include <sys/wait.h> 45 #include <sys/siginfo.h> 46 #include <sys/procset.h> 47 #include <sys/class.h> 48 #include <sys/file.h> 49 #include <sys/session.h> 50 #include <sys/kmem.h> 51 #include <sys/vtrace.h> 52 #include <sys/prsystm.h> 53 #include <sys/ipc.h> 54 #include <sys/sem_impl.h> 55 #include <c2/audit.h> 56 #include <sys/aio_impl.h> 57 #include <vm/as.h> 58 #include <sys/poll.h> 59 #include <sys/door.h> 60 #include <sys/lwpchan_impl.h> 61 #include <sys/utrap.h> 62 #include <sys/task.h> 63 #include <sys/exacct.h> 64 #include <sys/cyclic.h> 65 #include <sys/schedctl.h> 66 #include <sys/rctl.h> 67 #include <sys/contract_impl.h> 68 #include <sys/contract/process_impl.h> 69 #include <sys/list.h> 70 #include <sys/dtrace.h> 71 #include <sys/pool.h> 72 #include <sys/sdt.h> 73 #include <sys/corectl.h> 74 #include <sys/brand.h> 75 #include <sys/libc_kernel.h> 76 77 /* 78 * convert code/data pair into old style wait status 79 */ 80 int 81 wstat(int code, int data) 82 { 83 int stat = (data & 0377); 84 85 switch (code) { 86 case CLD_EXITED: 87 stat <<= 8; 88 break; 89 case CLD_DUMPED: 90 stat |= WCOREFLG; 91 break; 92 case CLD_KILLED: 93 break; 94 case CLD_TRAPPED: 95 case CLD_STOPPED: 96 stat <<= 8; 97 stat |= WSTOPFLG; 98 break; 99 case CLD_CONTINUED: 100 stat = WCONTFLG; 101 break; 102 default: 103 cmn_err(CE_PANIC, "wstat: bad code"); 104 /* NOTREACHED */ 105 } 106 return (stat); 107 } 108 109 static char * 110 exit_reason(char *buf, size_t bufsz, int what, int why) 111 { 112 switch (why) { 113 case CLD_EXITED: 114 (void) snprintf(buf, bufsz, "exited with status %d", what); 115 break; 116 case CLD_KILLED: 117 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 118 break; 119 case CLD_DUMPED: 120 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 121 break; 122 default: 123 (void) snprintf(buf, bufsz, "encountered unknown error " 124 "(%d, %d)", why, what); 125 break; 126 } 127 128 return (buf); 129 } 130 131 /* 132 * exit system call: pass back caller's arg. 133 */ 134 void 135 rexit(int rval) 136 { 137 exit(CLD_EXITED, rval); 138 } 139 140 /* 141 * Called by proc_exit() when a zone's init exits, presumably because 142 * it failed. As long as the given zone is still in the "running" 143 * state, we will re-exec() init, but first we need to reset things 144 * which are usually inherited across exec() but will break init's 145 * assumption that it is being exec()'d from a virgin process. Most 146 * importantly this includes closing all file descriptors (exec only 147 * closes those marked close-on-exec) and resetting signals (exec only 148 * resets handled signals, and we need to clear any signals which 149 * killed init). Anything else that exec(2) says would be inherited, 150 * but would affect the execution of init, needs to be reset. 151 */ 152 static int 153 restart_init(int what, int why) 154 { 155 kthread_t *t = curthread; 156 klwp_t *lwp = ttolwp(t); 157 proc_t *p = ttoproc(t); 158 user_t *up = PTOU(p); 159 160 vnode_t *oldcd, *oldrd; 161 int i, err; 162 char reason_buf[64]; 163 164 /* 165 * Let zone admin (and global zone admin if this is for a non-global 166 * zone) know that init has failed and will be restarted. 167 */ 168 zcmn_err(p->p_zone->zone_id, CE_WARN, 169 "init(1M) %s: restarting automatically", 170 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 171 172 if (!INGLOBALZONE(p)) { 173 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 174 "restarting automatically", 175 p->p_zone->zone_name, p->p_pid, reason_buf); 176 } 177 178 /* 179 * Remove any fpollinfo_t's for this (last) thread from our file 180 * descriptors so closeall() can ASSERT() that they're all gone. 181 * Then close all open file descriptors in the process. 182 */ 183 pollcleanup(); 184 closeall(P_FINFO(p)); 185 186 /* 187 * Grab p_lock and begin clearing miscellaneous global process 188 * state that needs to be reset before we exec the new init(1M). 189 */ 190 191 mutex_enter(&p->p_lock); 192 prbarrier(p); 193 194 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 195 up->u_cmask = CMASK; 196 197 sigemptyset(&t->t_hold); 198 sigemptyset(&t->t_sig); 199 sigemptyset(&t->t_extsig); 200 201 sigemptyset(&p->p_sig); 202 sigemptyset(&p->p_extsig); 203 204 sigdelq(p, t, 0); 205 sigdelq(p, NULL, 0); 206 207 if (p->p_killsqp) { 208 siginfofree(p->p_killsqp); 209 p->p_killsqp = NULL; 210 } 211 212 /* 213 * Reset any signals that are ignored back to the default disposition. 214 * Other u_signal members will be cleared when exec calls sigdefault(). 215 */ 216 for (i = 1; i < NSIG; i++) { 217 if (up->u_signal[i - 1] == SIG_IGN) { 218 up->u_signal[i - 1] = SIG_DFL; 219 sigemptyset(&up->u_sigmask[i - 1]); 220 } 221 } 222 223 /* 224 * Clear the current signal, any signal info associated with it, and 225 * any signal information from contracts and/or contract templates. 226 */ 227 lwp->lwp_cursig = 0; 228 lwp->lwp_extsig = 0; 229 if (lwp->lwp_curinfo != NULL) { 230 siginfofree(lwp->lwp_curinfo); 231 lwp->lwp_curinfo = NULL; 232 } 233 lwp_ctmpl_clear(lwp); 234 235 /* 236 * Reset both the process root directory and the current working 237 * directory to the root of the zone just as we do during boot. 238 */ 239 VN_HOLD(p->p_zone->zone_rootvp); 240 oldrd = up->u_rdir; 241 up->u_rdir = p->p_zone->zone_rootvp; 242 243 VN_HOLD(p->p_zone->zone_rootvp); 244 oldcd = up->u_cdir; 245 up->u_cdir = p->p_zone->zone_rootvp; 246 247 if (up->u_cwd != NULL) { 248 refstr_rele(up->u_cwd); 249 up->u_cwd = NULL; 250 } 251 252 mutex_exit(&p->p_lock); 253 254 if (oldrd != NULL) 255 VN_RELE(oldrd); 256 if (oldcd != NULL) 257 VN_RELE(oldcd); 258 259 /* Free the controlling tty. (freectty() always assumes curproc.) */ 260 ASSERT(p == curproc); 261 (void) freectty(B_TRUE); 262 263 /* 264 * Now exec() the new init(1M) on top of the current process. If we 265 * succeed, the caller will treat this like a successful system call. 266 * If we fail, we issue messages and the caller will proceed with exit. 267 */ 268 err = exec_init(p->p_zone->zone_initname, NULL); 269 270 if (err == 0) 271 return (0); 272 273 zcmn_err(p->p_zone->zone_id, CE_WARN, 274 "failed to restart init(1M) (err=%d): system reboot required", err); 275 276 if (!INGLOBALZONE(p)) { 277 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 278 "(pid %d, err=%d): zoneadm(1M) boot required", 279 p->p_zone->zone_name, p->p_pid, err); 280 } 281 282 return (-1); 283 } 284 285 /* 286 * Release resources. 287 * Enter zombie state. 288 * Wake up parent and init processes, 289 * and dispose of children. 290 */ 291 void 292 exit(int why, int what) 293 { 294 /* 295 * If proc_exit() fails, then some other lwp in the process 296 * got there first. We just have to call lwp_exit() to allow 297 * the other lwp to finish exiting the process. Otherwise we're 298 * restarting init, and should return. 299 */ 300 if (proc_exit(why, what) != 0) { 301 mutex_enter(&curproc->p_lock); 302 ASSERT(curproc->p_flag & SEXITLWPS); 303 lwp_exit(); 304 /* NOTREACHED */ 305 } 306 } 307 308 /* 309 * Set the SEXITING flag on the process, after making sure /proc does 310 * not have it locked. This is done in more places than proc_exit(), 311 * so it is a separate function. 312 */ 313 void 314 proc_is_exiting(proc_t *p) 315 { 316 mutex_enter(&p->p_lock); 317 prbarrier(p); 318 p->p_flag |= SEXITING; 319 mutex_exit(&p->p_lock); 320 } 321 322 /* 323 * Return value: 324 * 1 - exitlwps() failed, call (or continue) lwp_exit() 325 * 0 - restarting init. Return through system call path 326 */ 327 int 328 proc_exit(int why, int what) 329 { 330 kthread_t *t = curthread; 331 klwp_t *lwp = ttolwp(t); 332 proc_t *p = ttoproc(t); 333 zone_t *z = p->p_zone; 334 timeout_id_t tmp_id; 335 int rv; 336 proc_t *q; 337 task_t *tk; 338 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 339 sigqueue_t *sqp; 340 lwpdir_t *lwpdir; 341 uint_t lwpdir_sz; 342 tidhash_t *tidhash; 343 uint_t tidhash_sz; 344 ret_tidhash_t *ret_tidhash; 345 refstr_t *cwd; 346 hrtime_t hrutime, hrstime; 347 int evaporate; 348 349 /* 350 * Stop and discard the process's lwps except for the current one, 351 * unless some other lwp beat us to it. If exitlwps() fails then 352 * return and the calling lwp will call (or continue in) lwp_exit(). 353 */ 354 proc_is_exiting(p); 355 if (exitlwps(0) != 0) 356 return (1); 357 358 mutex_enter(&p->p_lock); 359 if (p->p_ttime > 0) { 360 /* 361 * Account any remaining ticks charged to this process 362 * on its way out. 363 */ 364 (void) task_cpu_time_incr(p->p_task, p->p_ttime); 365 p->p_ttime = 0; 366 } 367 mutex_exit(&p->p_lock); 368 369 DTRACE_PROC(lwp__exit); 370 DTRACE_PROC1(exit, int, why); 371 372 /* 373 * Will perform any brand specific proc exit processing, since this 374 * is always the last lwp, will also perform lwp_exit and free brand 375 * data 376 */ 377 if (PROC_IS_BRANDED(p)) { 378 lwp_detach_brand_hdlrs(lwp); 379 brand_clearbrand(p); 380 } 381 382 /* 383 * Don't let init exit unless zone_start_init() failed its exec, or 384 * we are shutting down the zone or the machine. 385 * 386 * Since we are single threaded, we don't need to lock the 387 * following accesses to zone_proc_initpid. 388 */ 389 if (p->p_pid == z->zone_proc_initpid) { 390 if (z->zone_boot_err == 0 && 391 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 392 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 393 z->zone_restart_init == B_TRUE && 394 restart_init(what, why) == 0) 395 return (0); 396 /* 397 * Since we didn't or couldn't restart init, we clear 398 * the zone's init state and proceed with exit 399 * processing. 400 */ 401 z->zone_proc_initpid = -1; 402 } 403 404 lwp_pcb_exit(); 405 406 /* 407 * Allocate a sigqueue now, before we grab locks. 408 * It will be given to sigcld(), below. 409 * Special case: If we will be making the process disappear 410 * without a trace because it is either: 411 * * an exiting SSYS process, or 412 * * a posix_spawn() vfork child who requests it, 413 * we don't bother to allocate a useless sigqueue. 414 */ 415 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) && 416 why == CLD_EXITED && what == _EVAPORATE); 417 if (!evaporate) 418 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 419 420 /* 421 * revoke any doors created by the process. 422 */ 423 if (p->p_door_list) 424 door_exit(); 425 426 /* 427 * Release schedctl data structures. 428 */ 429 if (p->p_pagep) 430 schedctl_proc_cleanup(); 431 432 /* 433 * make sure all pending kaio has completed. 434 */ 435 if (p->p_aio) 436 aio_cleanup_exit(); 437 438 /* 439 * discard the lwpchan cache. 440 */ 441 if (p->p_lcp != NULL) 442 lwpchan_destroy_cache(0); 443 444 /* 445 * Clean up any DTrace helper actions or probes for the process. 446 */ 447 if (p->p_dtrace_helpers != NULL) { 448 ASSERT(dtrace_helpers_cleanup != NULL); 449 (*dtrace_helpers_cleanup)(); 450 } 451 452 /* untimeout the realtime timers */ 453 if (p->p_itimer != NULL) 454 timer_exit(); 455 456 if ((tmp_id = p->p_alarmid) != 0) { 457 p->p_alarmid = 0; 458 (void) untimeout(tmp_id); 459 } 460 461 /* 462 * Remove any fpollinfo_t's for this (last) thread from our file 463 * descriptors so closeall() can ASSERT() that they're all gone. 464 */ 465 pollcleanup(); 466 467 if (p->p_rprof_cyclic != CYCLIC_NONE) { 468 mutex_enter(&cpu_lock); 469 cyclic_remove(p->p_rprof_cyclic); 470 mutex_exit(&cpu_lock); 471 } 472 473 mutex_enter(&p->p_lock); 474 475 /* 476 * Clean up any DTrace probes associated with this process. 477 */ 478 if (p->p_dtrace_probes) { 479 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 480 dtrace_fasttrap_exit_ptr(p); 481 } 482 483 while ((tmp_id = p->p_itimerid) != 0) { 484 p->p_itimerid = 0; 485 mutex_exit(&p->p_lock); 486 (void) untimeout(tmp_id); 487 mutex_enter(&p->p_lock); 488 } 489 490 lwp_cleanup(); 491 492 /* 493 * We are about to exit; prevent our resource associations from 494 * being changed. 495 */ 496 pool_barrier_enter(); 497 498 /* 499 * Block the process against /proc now that we have really 500 * acquired p->p_lock (to manipulate p_tlist at least). 501 */ 502 prbarrier(p); 503 504 #ifdef SUN_SRC_COMPAT 505 if (code == CLD_KILLED) 506 u.u_acflag |= AXSIG; 507 #endif 508 sigfillset(&p->p_ignore); 509 sigemptyset(&p->p_siginfo); 510 sigemptyset(&p->p_sig); 511 sigemptyset(&p->p_extsig); 512 sigemptyset(&t->t_sig); 513 sigemptyset(&t->t_extsig); 514 sigemptyset(&p->p_sigmask); 515 sigdelq(p, t, 0); 516 lwp->lwp_cursig = 0; 517 lwp->lwp_extsig = 0; 518 p->p_flag &= ~(SKILLED | SEXTKILLED); 519 if (lwp->lwp_curinfo) { 520 siginfofree(lwp->lwp_curinfo); 521 lwp->lwp_curinfo = NULL; 522 } 523 524 t->t_proc_flag |= TP_LWPEXIT; 525 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 526 prlwpexit(t); /* notify /proc */ 527 lwp_hash_out(p, t->t_tid); 528 prexit(p); 529 530 p->p_lwpcnt = 0; 531 p->p_tlist = NULL; 532 sigqfree(p); 533 term_mstate(t); 534 p->p_mterm = gethrtime(); 535 536 exec_vp = p->p_exec; 537 execdir_vp = p->p_execdir; 538 p->p_exec = NULLVP; 539 p->p_execdir = NULLVP; 540 mutex_exit(&p->p_lock); 541 542 pr_free_watched_pages(p); 543 544 closeall(P_FINFO(p)); 545 546 /* Free the controlling tty. (freectty() always assumes curproc.) */ 547 ASSERT(p == curproc); 548 (void) freectty(B_TRUE); 549 550 #if defined(__sparc) 551 if (p->p_utraps != NULL) 552 utrap_free(p); 553 #endif 554 if (p->p_semacct) /* IPC semaphore exit */ 555 semexit(p); 556 rv = wstat(why, what); 557 558 acct(rv & 0xff); 559 exacct_commit_proc(p, rv); 560 561 /* 562 * Release any resources associated with C2 auditing 563 */ 564 if (audit_active) { 565 /* 566 * audit exit system call 567 */ 568 audit_exit(why, what); 569 } 570 571 /* 572 * Free address space. 573 */ 574 relvm(); 575 576 if (exec_vp) { 577 /* 578 * Close this executable which has been opened when the process 579 * was created by getproc(). 580 */ 581 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL); 582 VN_RELE(exec_vp); 583 } 584 if (execdir_vp) 585 VN_RELE(execdir_vp); 586 587 /* 588 * Release held contracts. 589 */ 590 contract_exit(p); 591 592 /* 593 * Depart our encapsulating process contract. 594 */ 595 if ((p->p_flag & SSYS) == 0) { 596 ASSERT(p->p_ct_process); 597 contract_process_exit(p->p_ct_process, p, rv); 598 } 599 600 /* 601 * Remove pool association, and block if requested by pool_do_bind. 602 */ 603 mutex_enter(&p->p_lock); 604 ASSERT(p->p_pool->pool_ref > 0); 605 atomic_add_32(&p->p_pool->pool_ref, -1); 606 p->p_pool = pool_default; 607 /* 608 * Now that our address space has been freed and all other threads 609 * in this process have exited, set the PEXITED pool flag. This 610 * tells the pools subsystems to ignore this process if it was 611 * requested to rebind this process to a new pool. 612 */ 613 p->p_poolflag |= PEXITED; 614 pool_barrier_exit(); 615 mutex_exit(&p->p_lock); 616 617 mutex_enter(&pidlock); 618 619 /* 620 * Delete this process from the newstate list of its parent. We 621 * will put it in the right place in the sigcld in the end. 622 */ 623 delete_ns(p->p_parent, p); 624 625 /* 626 * Reassign the orphans to the next of kin. 627 * Don't rearrange init's orphanage. 628 */ 629 if ((q = p->p_orphan) != NULL && p != proc_init) { 630 631 proc_t *nokp = p->p_nextofkin; 632 633 for (;;) { 634 q->p_nextofkin = nokp; 635 if (q->p_nextorph == NULL) 636 break; 637 q = q->p_nextorph; 638 } 639 q->p_nextorph = nokp->p_orphan; 640 nokp->p_orphan = p->p_orphan; 641 p->p_orphan = NULL; 642 } 643 644 /* 645 * Reassign the children to init. 646 * Don't try to assign init's children to init. 647 */ 648 if ((q = p->p_child) != NULL && p != proc_init) { 649 struct proc *np; 650 struct proc *initp = proc_init; 651 boolean_t setzonetop = B_FALSE; 652 653 if (!INGLOBALZONE(curproc)) 654 setzonetop = B_TRUE; 655 656 pgdetach(p); 657 658 do { 659 np = q->p_sibling; 660 /* 661 * Delete it from its current parent new state 662 * list and add it to init new state list 663 */ 664 delete_ns(q->p_parent, q); 665 666 q->p_ppid = 1; 667 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 668 if (setzonetop) { 669 mutex_enter(&q->p_lock); 670 q->p_flag |= SZONETOP; 671 mutex_exit(&q->p_lock); 672 } 673 q->p_parent = initp; 674 675 /* 676 * Since q will be the first child, 677 * it will not have a previous sibling. 678 */ 679 q->p_psibling = NULL; 680 if (initp->p_child) { 681 initp->p_child->p_psibling = q; 682 } 683 q->p_sibling = initp->p_child; 684 initp->p_child = q; 685 if (q->p_proc_flag & P_PR_PTRACE) { 686 mutex_enter(&q->p_lock); 687 sigtoproc(q, NULL, SIGKILL); 688 mutex_exit(&q->p_lock); 689 } 690 /* 691 * sigcld() will add the child to parents 692 * newstate list. 693 */ 694 if (q->p_stat == SZOMB) 695 sigcld(q, NULL); 696 } while ((q = np) != NULL); 697 698 p->p_child = NULL; 699 ASSERT(p->p_child_ns == NULL); 700 } 701 702 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 703 704 mutex_enter(&p->p_lock); 705 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 706 707 /* 708 * Have our task accummulate our resource usage data before they 709 * become contaminated by p_cacct etc., and before we renounce 710 * membership of the task. 711 * 712 * We do this regardless of whether or not task accounting is active. 713 * This is to avoid having nonsense data reported for this task if 714 * task accounting is subsequently enabled. The overhead is minimal; 715 * by this point, this process has accounted for the usage of all its 716 * LWPs. We nonetheless do the work here, and under the protection of 717 * pidlock, so that the movement of the process's usage to the task 718 * happens at the same time as the removal of the process from the 719 * task, from the point of view of exacct_snapshot_task_usage(). 720 */ 721 exacct_update_task_mstate(p); 722 723 hrutime = mstate_aggr_state(p, LMS_USER); 724 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 725 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 726 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 727 728 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 729 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 730 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 731 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 732 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 733 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 734 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 735 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 736 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 737 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 738 739 p->p_ru.minflt += p->p_cru.minflt; 740 p->p_ru.majflt += p->p_cru.majflt; 741 p->p_ru.nswap += p->p_cru.nswap; 742 p->p_ru.inblock += p->p_cru.inblock; 743 p->p_ru.oublock += p->p_cru.oublock; 744 p->p_ru.msgsnd += p->p_cru.msgsnd; 745 p->p_ru.msgrcv += p->p_cru.msgrcv; 746 p->p_ru.nsignals += p->p_cru.nsignals; 747 p->p_ru.nvcsw += p->p_cru.nvcsw; 748 p->p_ru.nivcsw += p->p_cru.nivcsw; 749 p->p_ru.sysc += p->p_cru.sysc; 750 p->p_ru.ioch += p->p_cru.ioch; 751 752 p->p_stat = SZOMB; 753 p->p_proc_flag &= ~P_PR_PTRACE; 754 p->p_wdata = what; 755 p->p_wcode = (char)why; 756 757 cdir = PTOU(p)->u_cdir; 758 rdir = PTOU(p)->u_rdir; 759 cwd = PTOU(p)->u_cwd; 760 761 ASSERT(cdir != NULL || p->p_parent == &p0); 762 763 /* 764 * Release resource controls, as they are no longer enforceable. 765 */ 766 rctl_set_free(p->p_rctls); 767 768 /* 769 * Give up task and project memberships. Decrement tk_nlwps counter 770 * for our task.max-lwps resource control. An extended accounting 771 * record, if that facility is active, is scheduled to be written. 772 * Zombie processes are false members of task0 for the remainder of 773 * their lifetime; no accounting information is recorded for them. 774 */ 775 tk = p->p_task; 776 777 mutex_enter(&p->p_zone->zone_nlwps_lock); 778 tk->tk_nlwps--; 779 tk->tk_proj->kpj_nlwps--; 780 p->p_zone->zone_nlwps--; 781 mutex_exit(&p->p_zone->zone_nlwps_lock); 782 task_detach(p); 783 p->p_task = task0p; 784 785 /* 786 * Clear the lwp directory and the lwpid hash table 787 * now that /proc can't bother us any more. 788 * We free the memory below, after dropping p->p_lock. 789 */ 790 lwpdir = p->p_lwpdir; 791 lwpdir_sz = p->p_lwpdir_sz; 792 tidhash = p->p_tidhash; 793 tidhash_sz = p->p_tidhash_sz; 794 ret_tidhash = p->p_ret_tidhash; 795 p->p_lwpdir = NULL; 796 p->p_lwpfree = NULL; 797 p->p_lwpdir_sz = 0; 798 p->p_tidhash = NULL; 799 p->p_tidhash_sz = 0; 800 p->p_ret_tidhash = NULL; 801 802 /* 803 * If the process has context ops installed, call the exit routine 804 * on behalf of this last remaining thread. Normally exitpctx() is 805 * called during thread_exit() or lwp_exit(), but because this is the 806 * last thread in the process, we must call it here. By the time 807 * thread_exit() is called (below), the association with the relevant 808 * process has been lost. 809 * 810 * We also free the context here. 811 */ 812 if (p->p_pctx) { 813 kpreempt_disable(); 814 exitpctx(p); 815 kpreempt_enable(); 816 817 freepctx(p, 0); 818 } 819 820 /* 821 * curthread's proc pointer is changed to point to the 'sched' 822 * process for the corresponding zone, except in the case when 823 * the exiting process is in fact a zsched instance, in which 824 * case the proc pointer is set to p0. We do so, so that the 825 * process still points at the right zone when we call the VN_RELE() 826 * below. 827 * 828 * This is because curthread's original proc pointer can be freed as 829 * soon as the child sends a SIGCLD to its parent. We use zsched so 830 * that for user processes, even in the final moments of death, the 831 * process is still associated with its zone. 832 */ 833 if (p != t->t_procp->p_zone->zone_zsched) 834 t->t_procp = t->t_procp->p_zone->zone_zsched; 835 else 836 t->t_procp = &p0; 837 838 mutex_exit(&p->p_lock); 839 if (!evaporate) { 840 p->p_pidflag &= ~CLDPEND; 841 sigcld(p, sqp); 842 } else { 843 /* 844 * Do what sigcld() would do if the disposition 845 * of the SIGCHLD signal were set to be ignored. 846 */ 847 cv_broadcast(&p->p_srwchan_cv); 848 freeproc(p); 849 } 850 mutex_exit(&pidlock); 851 852 /* 853 * We don't release u_cdir and u_rdir until SZOMB is set. 854 * This protects us against dofusers(). 855 */ 856 if (cdir) 857 VN_RELE(cdir); 858 if (rdir) 859 VN_RELE(rdir); 860 if (cwd) 861 refstr_rele(cwd); 862 863 /* 864 * task_rele() may ultimately cause the zone to go away (or 865 * may cause the last user process in a zone to go away, which 866 * signals zsched to go away). So prior to this call, we must 867 * no longer point at zsched. 868 */ 869 t->t_procp = &p0; 870 task_rele(tk); 871 872 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 873 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t)); 874 while (ret_tidhash != NULL) { 875 ret_tidhash_t *next = ret_tidhash->rth_next; 876 kmem_free(ret_tidhash->rth_tidhash, 877 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t)); 878 kmem_free(ret_tidhash, sizeof (*ret_tidhash)); 879 ret_tidhash = next; 880 } 881 882 thread_exit(); 883 /* NOTREACHED */ 884 } 885 886 /* 887 * Format siginfo structure for wait system calls. 888 */ 889 void 890 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 891 { 892 ASSERT(MUTEX_HELD(&pidlock)); 893 894 bzero(ip, sizeof (k_siginfo_t)); 895 ip->si_signo = SIGCLD; 896 ip->si_code = pp->p_wcode; 897 ip->si_pid = pp->p_pid; 898 ip->si_ctid = PRCTID(pp); 899 ip->si_zoneid = pp->p_zone->zone_id; 900 ip->si_status = pp->p_wdata; 901 ip->si_stime = pp->p_stime; 902 ip->si_utime = pp->p_utime; 903 904 if (waitflag) { 905 pp->p_wcode = 0; 906 pp->p_wdata = 0; 907 pp->p_pidflag &= ~CLDPEND; 908 } 909 } 910 911 /* 912 * Wait system call. 913 * Search for a terminated (zombie) child, 914 * finally lay it to rest, and collect its status. 915 * Look also for stopped children, 916 * and pass back status from them. 917 */ 918 int 919 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 920 { 921 int found; 922 proc_t *cp, *pp; 923 int proc_gone; 924 int waitflag = !(options & WNOWAIT); 925 926 /* 927 * Obsolete flag, defined here only for binary compatibility 928 * with old statically linked executables. Delete this when 929 * we no longer care about these old and broken applications. 930 */ 931 #define _WNOCHLD 0400 932 options &= ~_WNOCHLD; 933 934 if (options == 0 || (options & ~WOPTMASK)) 935 return (EINVAL); 936 937 switch (idtype) { 938 case P_PID: 939 case P_PGID: 940 if (id < 0 || id >= maxpid) 941 return (EINVAL); 942 /* FALLTHROUGH */ 943 case P_ALL: 944 break; 945 default: 946 return (EINVAL); 947 } 948 949 pp = ttoproc(curthread); 950 951 /* 952 * lock parent mutex so that sibling chain can be searched. 953 */ 954 mutex_enter(&pidlock); 955 956 /* 957 * if we are only looking for exited processes and child_ns list 958 * is empty no reason to look at all children. 959 */ 960 if (idtype == P_ALL && 961 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 962 pp->p_child_ns == NULL) { 963 if (pp->p_child) { 964 mutex_exit(&pidlock); 965 bzero(ip, sizeof (k_siginfo_t)); 966 return (0); 967 } 968 mutex_exit(&pidlock); 969 return (ECHILD); 970 } 971 972 while (pp->p_child != NULL) { 973 974 proc_gone = 0; 975 976 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 977 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 978 continue; 979 if (idtype == P_PID && id != cp->p_pid) 980 continue; 981 if (idtype == P_PGID && id != cp->p_pgrp) 982 continue; 983 984 switch (cp->p_wcode) { 985 986 case CLD_TRAPPED: 987 case CLD_STOPPED: 988 case CLD_CONTINUED: 989 cmn_err(CE_PANIC, 990 "waitid: wrong state %d on the p_newstate" 991 " list", cp->p_wcode); 992 break; 993 994 case CLD_EXITED: 995 case CLD_DUMPED: 996 case CLD_KILLED: 997 if (!(options & WEXITED)) { 998 /* 999 * Count how many are already gone 1000 * for good. 1001 */ 1002 proc_gone++; 1003 break; 1004 } 1005 if (!waitflag) { 1006 winfo(cp, ip, 0); 1007 } else { 1008 winfo(cp, ip, 1); 1009 freeproc(cp); 1010 } 1011 mutex_exit(&pidlock); 1012 if (waitflag) { /* accept SIGCLD */ 1013 sigcld_delete(ip); 1014 sigcld_repost(); 1015 } 1016 return (0); 1017 } 1018 1019 if (idtype == P_PID) 1020 break; 1021 } 1022 1023 /* 1024 * Wow! None of the threads on the p_sibling_ns list were 1025 * interesting threads. Check all the kids! 1026 */ 1027 found = 0; 1028 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 1029 if (idtype == P_PID && id != cp->p_pid) 1030 continue; 1031 if (idtype == P_PGID && id != cp->p_pgrp) 1032 continue; 1033 1034 switch (cp->p_wcode) { 1035 case CLD_TRAPPED: 1036 if (!(options & WTRAPPED)) 1037 break; 1038 winfo(cp, ip, waitflag); 1039 mutex_exit(&pidlock); 1040 if (waitflag) { /* accept SIGCLD */ 1041 sigcld_delete(ip); 1042 sigcld_repost(); 1043 } 1044 return (0); 1045 1046 case CLD_STOPPED: 1047 if (!(options & WSTOPPED)) 1048 break; 1049 /* Is it still stopped? */ 1050 mutex_enter(&cp->p_lock); 1051 if (!jobstopped(cp)) { 1052 mutex_exit(&cp->p_lock); 1053 break; 1054 } 1055 mutex_exit(&cp->p_lock); 1056 winfo(cp, ip, waitflag); 1057 mutex_exit(&pidlock); 1058 if (waitflag) { /* accept SIGCLD */ 1059 sigcld_delete(ip); 1060 sigcld_repost(); 1061 } 1062 return (0); 1063 1064 case CLD_CONTINUED: 1065 if (!(options & WCONTINUED)) 1066 break; 1067 winfo(cp, ip, waitflag); 1068 mutex_exit(&pidlock); 1069 if (waitflag) { /* accept SIGCLD */ 1070 sigcld_delete(ip); 1071 sigcld_repost(); 1072 } 1073 return (0); 1074 1075 case CLD_EXITED: 1076 case CLD_DUMPED: 1077 case CLD_KILLED: 1078 if (idtype != P_PID && 1079 (cp->p_pidflag & CLDWAITPID)) 1080 continue; 1081 /* 1082 * Don't complain if a process was found in 1083 * the first loop but we broke out of the loop 1084 * because of the arguments passed to us. 1085 */ 1086 if (proc_gone == 0) { 1087 cmn_err(CE_PANIC, 1088 "waitid: wrong state on the" 1089 " p_child list"); 1090 } else { 1091 break; 1092 } 1093 } 1094 1095 found++; 1096 1097 if (idtype == P_PID) 1098 break; 1099 } 1100 1101 /* 1102 * If we found no interesting processes at all, 1103 * break out and return ECHILD. 1104 */ 1105 if (found + proc_gone == 0) 1106 break; 1107 1108 if (options & WNOHANG) { 1109 mutex_exit(&pidlock); 1110 bzero(ip, sizeof (k_siginfo_t)); 1111 /* 1112 * We should set ip->si_signo = SIGCLD, 1113 * but there is an SVVS test that expects 1114 * ip->si_signo to be zero in this case. 1115 */ 1116 return (0); 1117 } 1118 1119 /* 1120 * If we found no processes of interest that could 1121 * change state while we wait, we don't wait at all. 1122 * Get out with ECHILD according to SVID. 1123 */ 1124 if (found == proc_gone) 1125 break; 1126 1127 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1128 mutex_exit(&pidlock); 1129 return (EINTR); 1130 } 1131 } 1132 mutex_exit(&pidlock); 1133 return (ECHILD); 1134 } 1135 1136 /* 1137 * The wait() system call trap is no longer invoked by libc. 1138 * It is retained only for the benefit of statically linked applications. 1139 * Delete this when we no longer care about these old and broken applications. 1140 */ 1141 int64_t 1142 wait(void) 1143 { 1144 int error; 1145 k_siginfo_t info; 1146 rval_t r; 1147 1148 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1149 return (set_errno(error)); 1150 r.r_val1 = info.si_pid; 1151 r.r_val2 = wstat(info.si_code, info.si_status); 1152 return (r.r_vals); 1153 } 1154 1155 int 1156 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1157 { 1158 int error; 1159 k_siginfo_t info; 1160 1161 if (error = waitid(idtype, id, &info, options)) 1162 return (set_errno(error)); 1163 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1164 return (set_errno(EFAULT)); 1165 return (0); 1166 } 1167 1168 #ifdef _SYSCALL32_IMPL 1169 1170 int 1171 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1172 { 1173 int error; 1174 k_siginfo_t info; 1175 siginfo32_t info32; 1176 1177 if (error = waitid(idtype, id, &info, options)) 1178 return (set_errno(error)); 1179 siginfo_kto32(&info, &info32); 1180 if (copyout(&info32, infop, sizeof (info32))) 1181 return (set_errno(EFAULT)); 1182 return (0); 1183 } 1184 1185 #endif /* _SYSCALL32_IMPL */ 1186 1187 void 1188 proc_detach(proc_t *p) 1189 { 1190 proc_t *q; 1191 1192 ASSERT(MUTEX_HELD(&pidlock)); 1193 1194 q = p->p_parent; 1195 ASSERT(q != NULL); 1196 1197 /* 1198 * Take it off the newstate list of its parent 1199 */ 1200 delete_ns(q, p); 1201 1202 if (q->p_child == p) { 1203 q->p_child = p->p_sibling; 1204 /* 1205 * If the parent has no children, it better not 1206 * have any with new states either! 1207 */ 1208 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1209 } 1210 1211 if (p->p_sibling) { 1212 p->p_sibling->p_psibling = p->p_psibling; 1213 } 1214 1215 if (p->p_psibling) { 1216 p->p_psibling->p_sibling = p->p_sibling; 1217 } 1218 } 1219 1220 /* 1221 * Remove zombie children from the process table. 1222 */ 1223 void 1224 freeproc(proc_t *p) 1225 { 1226 proc_t *q; 1227 1228 ASSERT(p->p_stat == SZOMB); 1229 ASSERT(p->p_tlist == NULL); 1230 ASSERT(MUTEX_HELD(&pidlock)); 1231 1232 sigdelq(p, NULL, 0); 1233 if (p->p_killsqp) { 1234 siginfofree(p->p_killsqp); 1235 p->p_killsqp = NULL; 1236 } 1237 1238 prfree(p); /* inform /proc */ 1239 1240 /* 1241 * Don't free the init processes. 1242 * Other dying processes will access it. 1243 */ 1244 if (p == proc_init) 1245 return; 1246 1247 1248 /* 1249 * We wait until now to free the cred structure because a 1250 * zombie process's credentials may be examined by /proc. 1251 * No cred locking needed because there are no threads at this point. 1252 */ 1253 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1254 crfree(p->p_cred); 1255 if (p->p_corefile != NULL) { 1256 corectl_path_rele(p->p_corefile); 1257 p->p_corefile = NULL; 1258 } 1259 if (p->p_content != NULL) { 1260 corectl_content_rele(p->p_content); 1261 p->p_content = NULL; 1262 } 1263 1264 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1265 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1266 /* 1267 * This should still do the right thing since p_utime/stime 1268 * get set to the correct value on process exit, so it 1269 * should get properly updated 1270 */ 1271 p->p_nextofkin->p_cutime += p->p_utime; 1272 p->p_nextofkin->p_cstime += p->p_stime; 1273 1274 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1275 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1276 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1277 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1278 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1279 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1280 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1281 += p->p_acct[LMS_USER_LOCK]; 1282 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1283 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1284 += p->p_acct[LMS_WAIT_CPU]; 1285 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1286 1287 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1288 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1289 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1290 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1291 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1292 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1293 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1294 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1295 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1296 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1297 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1298 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1299 1300 } 1301 1302 q = p->p_nextofkin; 1303 if (q && q->p_orphan == p) 1304 q->p_orphan = p->p_nextorph; 1305 else if (q) { 1306 for (q = q->p_orphan; q; q = q->p_nextorph) 1307 if (q->p_nextorph == p) 1308 break; 1309 ASSERT(q && q->p_nextorph == p); 1310 q->p_nextorph = p->p_nextorph; 1311 } 1312 1313 proc_detach(p); 1314 pid_exit(p); /* frees pid and proc structure */ 1315 } 1316 1317 /* 1318 * Delete process "child" from the newstate list of process "parent" 1319 */ 1320 void 1321 delete_ns(proc_t *parent, proc_t *child) 1322 { 1323 proc_t **ns; 1324 1325 ASSERT(MUTEX_HELD(&pidlock)); 1326 ASSERT(child->p_parent == parent); 1327 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1328 if (*ns == child) { 1329 1330 ASSERT((*ns)->p_parent == parent); 1331 1332 *ns = child->p_sibling_ns; 1333 child->p_sibling_ns = NULL; 1334 return; 1335 } 1336 } 1337 } 1338 1339 /* 1340 * Add process "child" to the new state list of process "parent" 1341 */ 1342 void 1343 add_ns(proc_t *parent, proc_t *child) 1344 { 1345 ASSERT(child->p_sibling_ns == NULL); 1346 child->p_sibling_ns = parent->p_child_ns; 1347 parent->p_child_ns = child; 1348 } 1349