1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/cred.h> 33 #include <sys/user.h> 34 #include <sys/errno.h> 35 #include <sys/proc.h> 36 #include <sys/ucontext.h> 37 #include <sys/procfs.h> 38 #include <sys/vnode.h> 39 #include <sys/acct.h> 40 #include <sys/var.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/wait.h> 44 #include <sys/siginfo.h> 45 #include <sys/procset.h> 46 #include <sys/class.h> 47 #include <sys/file.h> 48 #include <sys/session.h> 49 #include <sys/kmem.h> 50 #include <sys/vtrace.h> 51 #include <sys/prsystm.h> 52 #include <sys/ipc.h> 53 #include <sys/sem_impl.h> 54 #include <c2/audit.h> 55 #include <sys/aio_impl.h> 56 #include <vm/as.h> 57 #include <sys/poll.h> 58 #include <sys/door.h> 59 #include <sys/lwpchan_impl.h> 60 #include <sys/utrap.h> 61 #include <sys/task.h> 62 #include <sys/exacct.h> 63 #include <sys/cyclic.h> 64 #include <sys/schedctl.h> 65 #include <sys/rctl.h> 66 #include <sys/contract_impl.h> 67 #include <sys/contract/process_impl.h> 68 #include <sys/list.h> 69 #include <sys/dtrace.h> 70 #include <sys/pool.h> 71 #include <sys/sdt.h> 72 #include <sys/corectl.h> 73 #include <sys/brand.h> 74 #include <sys/libc_kernel.h> 75 76 /* 77 * convert code/data pair into old style wait status 78 */ 79 int 80 wstat(int code, int data) 81 { 82 int stat = (data & 0377); 83 84 switch (code) { 85 case CLD_EXITED: 86 stat <<= 8; 87 break; 88 case CLD_DUMPED: 89 stat |= WCOREFLG; 90 break; 91 case CLD_KILLED: 92 break; 93 case CLD_TRAPPED: 94 case CLD_STOPPED: 95 stat <<= 8; 96 stat |= WSTOPFLG; 97 break; 98 case CLD_CONTINUED: 99 stat = WCONTFLG; 100 break; 101 default: 102 cmn_err(CE_PANIC, "wstat: bad code"); 103 /* NOTREACHED */ 104 } 105 return (stat); 106 } 107 108 static char * 109 exit_reason(char *buf, size_t bufsz, int what, int why) 110 { 111 switch (why) { 112 case CLD_EXITED: 113 (void) snprintf(buf, bufsz, "exited with status %d", what); 114 break; 115 case CLD_KILLED: 116 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 117 break; 118 case CLD_DUMPED: 119 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 120 break; 121 default: 122 (void) snprintf(buf, bufsz, "encountered unknown error " 123 "(%d, %d)", why, what); 124 break; 125 } 126 127 return (buf); 128 } 129 130 /* 131 * exit system call: pass back caller's arg. 132 */ 133 void 134 rexit(int rval) 135 { 136 exit(CLD_EXITED, rval); 137 } 138 139 /* 140 * Called by proc_exit() when a zone's init exits, presumably because 141 * it failed. As long as the given zone is still in the "running" 142 * state, we will re-exec() init, but first we need to reset things 143 * which are usually inherited across exec() but will break init's 144 * assumption that it is being exec()'d from a virgin process. Most 145 * importantly this includes closing all file descriptors (exec only 146 * closes those marked close-on-exec) and resetting signals (exec only 147 * resets handled signals, and we need to clear any signals which 148 * killed init). Anything else that exec(2) says would be inherited, 149 * but would affect the execution of init, needs to be reset. 150 */ 151 static int 152 restart_init(int what, int why) 153 { 154 kthread_t *t = curthread; 155 klwp_t *lwp = ttolwp(t); 156 proc_t *p = ttoproc(t); 157 user_t *up = PTOU(p); 158 159 vnode_t *oldcd, *oldrd; 160 int i, err; 161 char reason_buf[64]; 162 163 /* 164 * Let zone admin (and global zone admin if this is for a non-global 165 * zone) know that init has failed and will be restarted. 166 */ 167 zcmn_err(p->p_zone->zone_id, CE_WARN, 168 "init(1M) %s: restarting automatically", 169 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 170 171 if (!INGLOBALZONE(p)) { 172 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 173 "restarting automatically", 174 p->p_zone->zone_name, p->p_pid, reason_buf); 175 } 176 177 /* 178 * Remove any fpollinfo_t's for this (last) thread from our file 179 * descriptors so closeall() can ASSERT() that they're all gone. 180 * Then close all open file descriptors in the process. 181 */ 182 pollcleanup(); 183 closeall(P_FINFO(p)); 184 185 /* 186 * Grab p_lock and begin clearing miscellaneous global process 187 * state that needs to be reset before we exec the new init(1M). 188 */ 189 190 mutex_enter(&p->p_lock); 191 prbarrier(p); 192 193 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 194 up->u_cmask = CMASK; 195 196 sigemptyset(&t->t_hold); 197 sigemptyset(&t->t_sig); 198 sigemptyset(&t->t_extsig); 199 200 sigemptyset(&p->p_sig); 201 sigemptyset(&p->p_extsig); 202 203 sigdelq(p, t, 0); 204 sigdelq(p, NULL, 0); 205 206 if (p->p_killsqp) { 207 siginfofree(p->p_killsqp); 208 p->p_killsqp = NULL; 209 } 210 211 /* 212 * Reset any signals that are ignored back to the default disposition. 213 * Other u_signal members will be cleared when exec calls sigdefault(). 214 */ 215 for (i = 1; i < NSIG; i++) { 216 if (up->u_signal[i - 1] == SIG_IGN) { 217 up->u_signal[i - 1] = SIG_DFL; 218 sigemptyset(&up->u_sigmask[i - 1]); 219 } 220 } 221 222 /* 223 * Clear the current signal, any signal info associated with it, and 224 * any signal information from contracts and/or contract templates. 225 */ 226 lwp->lwp_cursig = 0; 227 lwp->lwp_extsig = 0; 228 if (lwp->lwp_curinfo != NULL) { 229 siginfofree(lwp->lwp_curinfo); 230 lwp->lwp_curinfo = NULL; 231 } 232 lwp_ctmpl_clear(lwp); 233 234 /* 235 * Reset both the process root directory and the current working 236 * directory to the root of the zone just as we do during boot. 237 */ 238 VN_HOLD(p->p_zone->zone_rootvp); 239 oldrd = up->u_rdir; 240 up->u_rdir = p->p_zone->zone_rootvp; 241 242 VN_HOLD(p->p_zone->zone_rootvp); 243 oldcd = up->u_cdir; 244 up->u_cdir = p->p_zone->zone_rootvp; 245 246 if (up->u_cwd != NULL) { 247 refstr_rele(up->u_cwd); 248 up->u_cwd = NULL; 249 } 250 251 mutex_exit(&p->p_lock); 252 253 if (oldrd != NULL) 254 VN_RELE(oldrd); 255 if (oldcd != NULL) 256 VN_RELE(oldcd); 257 258 /* Free the controlling tty. (freectty() always assumes curproc.) */ 259 ASSERT(p == curproc); 260 (void) freectty(B_TRUE); 261 262 /* 263 * Now exec() the new init(1M) on top of the current process. If we 264 * succeed, the caller will treat this like a successful system call. 265 * If we fail, we issue messages and the caller will proceed with exit. 266 */ 267 err = exec_init(p->p_zone->zone_initname, NULL); 268 269 if (err == 0) 270 return (0); 271 272 zcmn_err(p->p_zone->zone_id, CE_WARN, 273 "failed to restart init(1M) (err=%d): system reboot required", err); 274 275 if (!INGLOBALZONE(p)) { 276 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 277 "(pid %d, err=%d): zoneadm(1M) boot required", 278 p->p_zone->zone_name, p->p_pid, err); 279 } 280 281 return (-1); 282 } 283 284 /* 285 * Release resources. 286 * Enter zombie state. 287 * Wake up parent and init processes, 288 * and dispose of children. 289 */ 290 void 291 exit(int why, int what) 292 { 293 /* 294 * If proc_exit() fails, then some other lwp in the process 295 * got there first. We just have to call lwp_exit() to allow 296 * the other lwp to finish exiting the process. Otherwise we're 297 * restarting init, and should return. 298 */ 299 if (proc_exit(why, what) != 0) { 300 mutex_enter(&curproc->p_lock); 301 ASSERT(curproc->p_flag & SEXITLWPS); 302 lwp_exit(); 303 /* NOTREACHED */ 304 } 305 } 306 307 /* 308 * Set the SEXITING flag on the process, after making sure /proc does 309 * not have it locked. This is done in more places than proc_exit(), 310 * so it is a separate function. 311 */ 312 void 313 proc_is_exiting(proc_t *p) 314 { 315 mutex_enter(&p->p_lock); 316 prbarrier(p); 317 p->p_flag |= SEXITING; 318 mutex_exit(&p->p_lock); 319 } 320 321 /* 322 * Return value: 323 * 1 - exitlwps() failed, call (or continue) lwp_exit() 324 * 0 - restarting init. Return through system call path 325 */ 326 int 327 proc_exit(int why, int what) 328 { 329 kthread_t *t = curthread; 330 klwp_t *lwp = ttolwp(t); 331 proc_t *p = ttoproc(t); 332 zone_t *z = p->p_zone; 333 timeout_id_t tmp_id; 334 int rv; 335 proc_t *q; 336 task_t *tk; 337 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 338 sigqueue_t *sqp; 339 lwpdir_t *lwpdir; 340 uint_t lwpdir_sz; 341 tidhash_t *tidhash; 342 uint_t tidhash_sz; 343 ret_tidhash_t *ret_tidhash; 344 refstr_t *cwd; 345 hrtime_t hrutime, hrstime; 346 int evaporate; 347 348 /* 349 * Stop and discard the process's lwps except for the current one, 350 * unless some other lwp beat us to it. If exitlwps() fails then 351 * return and the calling lwp will call (or continue in) lwp_exit(). 352 */ 353 proc_is_exiting(p); 354 if (exitlwps(0) != 0) 355 return (1); 356 357 mutex_enter(&p->p_lock); 358 if (p->p_ttime > 0) { 359 /* 360 * Account any remaining ticks charged to this process 361 * on its way out. 362 */ 363 (void) task_cpu_time_incr(p->p_task, p->p_ttime); 364 p->p_ttime = 0; 365 } 366 mutex_exit(&p->p_lock); 367 368 DTRACE_PROC(lwp__exit); 369 DTRACE_PROC1(exit, int, why); 370 371 /* 372 * Will perform any brand specific proc exit processing, since this 373 * is always the last lwp, will also perform lwp_exit and free brand 374 * data 375 */ 376 if (PROC_IS_BRANDED(p)) { 377 lwp_detach_brand_hdlrs(lwp); 378 brand_clearbrand(p); 379 } 380 381 /* 382 * Don't let init exit unless zone_start_init() failed its exec, or 383 * we are shutting down the zone or the machine. 384 * 385 * Since we are single threaded, we don't need to lock the 386 * following accesses to zone_proc_initpid. 387 */ 388 if (p->p_pid == z->zone_proc_initpid) { 389 if (z->zone_boot_err == 0 && 390 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 391 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 392 z->zone_restart_init == B_TRUE && 393 restart_init(what, why) == 0) 394 return (0); 395 /* 396 * Since we didn't or couldn't restart init, we clear 397 * the zone's init state and proceed with exit 398 * processing. 399 */ 400 z->zone_proc_initpid = -1; 401 } 402 403 lwp_pcb_exit(); 404 405 /* 406 * Allocate a sigqueue now, before we grab locks. 407 * It will be given to sigcld(), below. 408 * Special case: If we will be making the process disappear 409 * without a trace because it is either: 410 * * an exiting SSYS process, or 411 * * a posix_spawn() vfork child who requests it, 412 * we don't bother to allocate a useless sigqueue. 413 */ 414 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) && 415 why == CLD_EXITED && what == _EVAPORATE); 416 if (!evaporate) 417 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 418 419 /* 420 * revoke any doors created by the process. 421 */ 422 if (p->p_door_list) 423 door_exit(); 424 425 /* 426 * Release schedctl data structures. 427 */ 428 if (p->p_pagep) 429 schedctl_proc_cleanup(); 430 431 /* 432 * make sure all pending kaio has completed. 433 */ 434 if (p->p_aio) 435 aio_cleanup_exit(); 436 437 /* 438 * discard the lwpchan cache. 439 */ 440 if (p->p_lcp != NULL) 441 lwpchan_destroy_cache(0); 442 443 /* 444 * Clean up any DTrace helper actions or probes for the process. 445 */ 446 if (p->p_dtrace_helpers != NULL) { 447 ASSERT(dtrace_helpers_cleanup != NULL); 448 (*dtrace_helpers_cleanup)(); 449 } 450 451 /* untimeout the realtime timers */ 452 if (p->p_itimer != NULL) 453 timer_exit(); 454 455 if ((tmp_id = p->p_alarmid) != 0) { 456 p->p_alarmid = 0; 457 (void) untimeout(tmp_id); 458 } 459 460 /* 461 * Remove any fpollinfo_t's for this (last) thread from our file 462 * descriptors so closeall() can ASSERT() that they're all gone. 463 */ 464 pollcleanup(); 465 466 if (p->p_rprof_cyclic != CYCLIC_NONE) { 467 mutex_enter(&cpu_lock); 468 cyclic_remove(p->p_rprof_cyclic); 469 mutex_exit(&cpu_lock); 470 } 471 472 mutex_enter(&p->p_lock); 473 474 /* 475 * Clean up any DTrace probes associated with this process. 476 */ 477 if (p->p_dtrace_probes) { 478 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 479 dtrace_fasttrap_exit_ptr(p); 480 } 481 482 while ((tmp_id = p->p_itimerid) != 0) { 483 p->p_itimerid = 0; 484 mutex_exit(&p->p_lock); 485 (void) untimeout(tmp_id); 486 mutex_enter(&p->p_lock); 487 } 488 489 lwp_cleanup(); 490 491 /* 492 * We are about to exit; prevent our resource associations from 493 * being changed. 494 */ 495 pool_barrier_enter(); 496 497 /* 498 * Block the process against /proc now that we have really 499 * acquired p->p_lock (to manipulate p_tlist at least). 500 */ 501 prbarrier(p); 502 503 #ifdef SUN_SRC_COMPAT 504 if (code == CLD_KILLED) 505 u.u_acflag |= AXSIG; 506 #endif 507 sigfillset(&p->p_ignore); 508 sigemptyset(&p->p_siginfo); 509 sigemptyset(&p->p_sig); 510 sigemptyset(&p->p_extsig); 511 sigemptyset(&t->t_sig); 512 sigemptyset(&t->t_extsig); 513 sigemptyset(&p->p_sigmask); 514 sigdelq(p, t, 0); 515 lwp->lwp_cursig = 0; 516 lwp->lwp_extsig = 0; 517 p->p_flag &= ~(SKILLED | SEXTKILLED); 518 if (lwp->lwp_curinfo) { 519 siginfofree(lwp->lwp_curinfo); 520 lwp->lwp_curinfo = NULL; 521 } 522 523 t->t_proc_flag |= TP_LWPEXIT; 524 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 525 prlwpexit(t); /* notify /proc */ 526 lwp_hash_out(p, t->t_tid); 527 prexit(p); 528 529 p->p_lwpcnt = 0; 530 p->p_tlist = NULL; 531 sigqfree(p); 532 term_mstate(t); 533 p->p_mterm = gethrtime(); 534 535 exec_vp = p->p_exec; 536 execdir_vp = p->p_execdir; 537 p->p_exec = NULLVP; 538 p->p_execdir = NULLVP; 539 mutex_exit(&p->p_lock); 540 541 pr_free_watched_pages(p); 542 543 closeall(P_FINFO(p)); 544 545 /* Free the controlling tty. (freectty() always assumes curproc.) */ 546 ASSERT(p == curproc); 547 (void) freectty(B_TRUE); 548 549 #if defined(__sparc) 550 if (p->p_utraps != NULL) 551 utrap_free(p); 552 #endif 553 if (p->p_semacct) /* IPC semaphore exit */ 554 semexit(p); 555 rv = wstat(why, what); 556 557 acct(rv & 0xff); 558 exacct_commit_proc(p, rv); 559 560 /* 561 * Release any resources associated with C2 auditing 562 */ 563 if (AU_AUDITING()) { 564 /* 565 * audit exit system call 566 */ 567 audit_exit(why, what); 568 } 569 570 /* 571 * Free address space. 572 */ 573 relvm(); 574 575 if (exec_vp) { 576 /* 577 * Close this executable which has been opened when the process 578 * was created by getproc(). 579 */ 580 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL); 581 VN_RELE(exec_vp); 582 } 583 if (execdir_vp) 584 VN_RELE(execdir_vp); 585 586 /* 587 * Release held contracts. 588 */ 589 contract_exit(p); 590 591 /* 592 * Depart our encapsulating process contract. 593 */ 594 if ((p->p_flag & SSYS) == 0) { 595 ASSERT(p->p_ct_process); 596 contract_process_exit(p->p_ct_process, p, rv); 597 } 598 599 /* 600 * Remove pool association, and block if requested by pool_do_bind. 601 */ 602 mutex_enter(&p->p_lock); 603 ASSERT(p->p_pool->pool_ref > 0); 604 atomic_add_32(&p->p_pool->pool_ref, -1); 605 p->p_pool = pool_default; 606 /* 607 * Now that our address space has been freed and all other threads 608 * in this process have exited, set the PEXITED pool flag. This 609 * tells the pools subsystems to ignore this process if it was 610 * requested to rebind this process to a new pool. 611 */ 612 p->p_poolflag |= PEXITED; 613 pool_barrier_exit(); 614 mutex_exit(&p->p_lock); 615 616 mutex_enter(&pidlock); 617 618 /* 619 * Delete this process from the newstate list of its parent. We 620 * will put it in the right place in the sigcld in the end. 621 */ 622 delete_ns(p->p_parent, p); 623 624 /* 625 * Reassign the orphans to the next of kin. 626 * Don't rearrange init's orphanage. 627 */ 628 if ((q = p->p_orphan) != NULL && p != proc_init) { 629 630 proc_t *nokp = p->p_nextofkin; 631 632 for (;;) { 633 q->p_nextofkin = nokp; 634 if (q->p_nextorph == NULL) 635 break; 636 q = q->p_nextorph; 637 } 638 q->p_nextorph = nokp->p_orphan; 639 nokp->p_orphan = p->p_orphan; 640 p->p_orphan = NULL; 641 } 642 643 /* 644 * Reassign the children to init. 645 * Don't try to assign init's children to init. 646 */ 647 if ((q = p->p_child) != NULL && p != proc_init) { 648 struct proc *np; 649 struct proc *initp = proc_init; 650 boolean_t setzonetop = B_FALSE; 651 652 if (!INGLOBALZONE(curproc)) 653 setzonetop = B_TRUE; 654 655 pgdetach(p); 656 657 do { 658 np = q->p_sibling; 659 /* 660 * Delete it from its current parent new state 661 * list and add it to init new state list 662 */ 663 delete_ns(q->p_parent, q); 664 665 q->p_ppid = 1; 666 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 667 if (setzonetop) { 668 mutex_enter(&q->p_lock); 669 q->p_flag |= SZONETOP; 670 mutex_exit(&q->p_lock); 671 } 672 q->p_parent = initp; 673 674 /* 675 * Since q will be the first child, 676 * it will not have a previous sibling. 677 */ 678 q->p_psibling = NULL; 679 if (initp->p_child) { 680 initp->p_child->p_psibling = q; 681 } 682 q->p_sibling = initp->p_child; 683 initp->p_child = q; 684 if (q->p_proc_flag & P_PR_PTRACE) { 685 mutex_enter(&q->p_lock); 686 sigtoproc(q, NULL, SIGKILL); 687 mutex_exit(&q->p_lock); 688 } 689 /* 690 * sigcld() will add the child to parents 691 * newstate list. 692 */ 693 if (q->p_stat == SZOMB) 694 sigcld(q, NULL); 695 } while ((q = np) != NULL); 696 697 p->p_child = NULL; 698 ASSERT(p->p_child_ns == NULL); 699 } 700 701 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 702 703 mutex_enter(&p->p_lock); 704 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 705 706 /* 707 * Have our task accummulate our resource usage data before they 708 * become contaminated by p_cacct etc., and before we renounce 709 * membership of the task. 710 * 711 * We do this regardless of whether or not task accounting is active. 712 * This is to avoid having nonsense data reported for this task if 713 * task accounting is subsequently enabled. The overhead is minimal; 714 * by this point, this process has accounted for the usage of all its 715 * LWPs. We nonetheless do the work here, and under the protection of 716 * pidlock, so that the movement of the process's usage to the task 717 * happens at the same time as the removal of the process from the 718 * task, from the point of view of exacct_snapshot_task_usage(). 719 */ 720 exacct_update_task_mstate(p); 721 722 hrutime = mstate_aggr_state(p, LMS_USER); 723 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 724 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 725 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 726 727 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 728 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 729 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 730 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 731 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 732 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 733 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 734 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 735 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 736 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 737 738 p->p_ru.minflt += p->p_cru.minflt; 739 p->p_ru.majflt += p->p_cru.majflt; 740 p->p_ru.nswap += p->p_cru.nswap; 741 p->p_ru.inblock += p->p_cru.inblock; 742 p->p_ru.oublock += p->p_cru.oublock; 743 p->p_ru.msgsnd += p->p_cru.msgsnd; 744 p->p_ru.msgrcv += p->p_cru.msgrcv; 745 p->p_ru.nsignals += p->p_cru.nsignals; 746 p->p_ru.nvcsw += p->p_cru.nvcsw; 747 p->p_ru.nivcsw += p->p_cru.nivcsw; 748 p->p_ru.sysc += p->p_cru.sysc; 749 p->p_ru.ioch += p->p_cru.ioch; 750 751 p->p_stat = SZOMB; 752 p->p_proc_flag &= ~P_PR_PTRACE; 753 p->p_wdata = what; 754 p->p_wcode = (char)why; 755 756 cdir = PTOU(p)->u_cdir; 757 rdir = PTOU(p)->u_rdir; 758 cwd = PTOU(p)->u_cwd; 759 760 ASSERT(cdir != NULL || p->p_parent == &p0); 761 762 /* 763 * Release resource controls, as they are no longer enforceable. 764 */ 765 rctl_set_free(p->p_rctls); 766 767 /* 768 * Decrement tk_nlwps counter for our task.max-lwps resource control. 769 * An extended accounting record, if that facility is active, is 770 * scheduled to be written. We cannot give up task and project 771 * membership at this point because that would allow zombies to escape 772 * from the max-processes resource controls. Zombies stay in their 773 * current task and project until the process table slot is released 774 * in freeproc(). 775 */ 776 tk = p->p_task; 777 778 mutex_enter(&p->p_zone->zone_nlwps_lock); 779 tk->tk_nlwps--; 780 tk->tk_proj->kpj_nlwps--; 781 p->p_zone->zone_nlwps--; 782 mutex_exit(&p->p_zone->zone_nlwps_lock); 783 784 /* 785 * Clear the lwp directory and the lwpid hash table 786 * now that /proc can't bother us any more. 787 * We free the memory below, after dropping p->p_lock. 788 */ 789 lwpdir = p->p_lwpdir; 790 lwpdir_sz = p->p_lwpdir_sz; 791 tidhash = p->p_tidhash; 792 tidhash_sz = p->p_tidhash_sz; 793 ret_tidhash = p->p_ret_tidhash; 794 p->p_lwpdir = NULL; 795 p->p_lwpfree = NULL; 796 p->p_lwpdir_sz = 0; 797 p->p_tidhash = NULL; 798 p->p_tidhash_sz = 0; 799 p->p_ret_tidhash = NULL; 800 801 /* 802 * If the process has context ops installed, call the exit routine 803 * on behalf of this last remaining thread. Normally exitpctx() is 804 * called during thread_exit() or lwp_exit(), but because this is the 805 * last thread in the process, we must call it here. By the time 806 * thread_exit() is called (below), the association with the relevant 807 * process has been lost. 808 * 809 * We also free the context here. 810 */ 811 if (p->p_pctx) { 812 kpreempt_disable(); 813 exitpctx(p); 814 kpreempt_enable(); 815 816 freepctx(p, 0); 817 } 818 819 /* 820 * curthread's proc pointer is changed to point to the 'sched' 821 * process for the corresponding zone, except in the case when 822 * the exiting process is in fact a zsched instance, in which 823 * case the proc pointer is set to p0. We do so, so that the 824 * process still points at the right zone when we call the VN_RELE() 825 * below. 826 * 827 * This is because curthread's original proc pointer can be freed as 828 * soon as the child sends a SIGCLD to its parent. We use zsched so 829 * that for user processes, even in the final moments of death, the 830 * process is still associated with its zone. 831 */ 832 if (p != t->t_procp->p_zone->zone_zsched) 833 t->t_procp = t->t_procp->p_zone->zone_zsched; 834 else 835 t->t_procp = &p0; 836 837 mutex_exit(&p->p_lock); 838 if (!evaporate) { 839 p->p_pidflag &= ~CLDPEND; 840 sigcld(p, sqp); 841 } else { 842 /* 843 * Do what sigcld() would do if the disposition 844 * of the SIGCHLD signal were set to be ignored. 845 */ 846 cv_broadcast(&p->p_srwchan_cv); 847 freeproc(p); 848 } 849 mutex_exit(&pidlock); 850 851 /* 852 * We don't release u_cdir and u_rdir until SZOMB is set. 853 * This protects us against dofusers(). 854 */ 855 if (cdir) 856 VN_RELE(cdir); 857 if (rdir) 858 VN_RELE(rdir); 859 if (cwd) 860 refstr_rele(cwd); 861 862 /* 863 * task_rele() may ultimately cause the zone to go away (or 864 * may cause the last user process in a zone to go away, which 865 * signals zsched to go away). So prior to this call, we must 866 * no longer point at zsched. 867 */ 868 t->t_procp = &p0; 869 870 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 871 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t)); 872 while (ret_tidhash != NULL) { 873 ret_tidhash_t *next = ret_tidhash->rth_next; 874 kmem_free(ret_tidhash->rth_tidhash, 875 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t)); 876 kmem_free(ret_tidhash, sizeof (*ret_tidhash)); 877 ret_tidhash = next; 878 } 879 880 thread_exit(); 881 /* NOTREACHED */ 882 } 883 884 /* 885 * Format siginfo structure for wait system calls. 886 */ 887 void 888 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 889 { 890 ASSERT(MUTEX_HELD(&pidlock)); 891 892 bzero(ip, sizeof (k_siginfo_t)); 893 ip->si_signo = SIGCLD; 894 ip->si_code = pp->p_wcode; 895 ip->si_pid = pp->p_pid; 896 ip->si_ctid = PRCTID(pp); 897 ip->si_zoneid = pp->p_zone->zone_id; 898 ip->si_status = pp->p_wdata; 899 ip->si_stime = pp->p_stime; 900 ip->si_utime = pp->p_utime; 901 902 if (waitflag) { 903 pp->p_wcode = 0; 904 pp->p_wdata = 0; 905 pp->p_pidflag &= ~CLDPEND; 906 } 907 } 908 909 /* 910 * Wait system call. 911 * Search for a terminated (zombie) child, 912 * finally lay it to rest, and collect its status. 913 * Look also for stopped children, 914 * and pass back status from them. 915 */ 916 int 917 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 918 { 919 int found; 920 proc_t *cp, *pp; 921 int proc_gone; 922 int waitflag = !(options & WNOWAIT); 923 924 /* 925 * Obsolete flag, defined here only for binary compatibility 926 * with old statically linked executables. Delete this when 927 * we no longer care about these old and broken applications. 928 */ 929 #define _WNOCHLD 0400 930 options &= ~_WNOCHLD; 931 932 if (options == 0 || (options & ~WOPTMASK)) 933 return (EINVAL); 934 935 switch (idtype) { 936 case P_PID: 937 case P_PGID: 938 if (id < 0 || id >= maxpid) 939 return (EINVAL); 940 /* FALLTHROUGH */ 941 case P_ALL: 942 break; 943 default: 944 return (EINVAL); 945 } 946 947 pp = ttoproc(curthread); 948 949 /* 950 * lock parent mutex so that sibling chain can be searched. 951 */ 952 mutex_enter(&pidlock); 953 954 /* 955 * if we are only looking for exited processes and child_ns list 956 * is empty no reason to look at all children. 957 */ 958 if (idtype == P_ALL && 959 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 960 pp->p_child_ns == NULL) { 961 if (pp->p_child) { 962 mutex_exit(&pidlock); 963 bzero(ip, sizeof (k_siginfo_t)); 964 return (0); 965 } 966 mutex_exit(&pidlock); 967 return (ECHILD); 968 } 969 970 while (pp->p_child != NULL) { 971 972 proc_gone = 0; 973 974 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 975 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 976 continue; 977 if (idtype == P_PID && id != cp->p_pid) 978 continue; 979 if (idtype == P_PGID && id != cp->p_pgrp) 980 continue; 981 982 switch (cp->p_wcode) { 983 984 case CLD_TRAPPED: 985 case CLD_STOPPED: 986 case CLD_CONTINUED: 987 cmn_err(CE_PANIC, 988 "waitid: wrong state %d on the p_newstate" 989 " list", cp->p_wcode); 990 break; 991 992 case CLD_EXITED: 993 case CLD_DUMPED: 994 case CLD_KILLED: 995 if (!(options & WEXITED)) { 996 /* 997 * Count how many are already gone 998 * for good. 999 */ 1000 proc_gone++; 1001 break; 1002 } 1003 if (!waitflag) { 1004 winfo(cp, ip, 0); 1005 } else { 1006 winfo(cp, ip, 1); 1007 freeproc(cp); 1008 } 1009 mutex_exit(&pidlock); 1010 if (waitflag) { /* accept SIGCLD */ 1011 sigcld_delete(ip); 1012 sigcld_repost(); 1013 } 1014 return (0); 1015 } 1016 1017 if (idtype == P_PID) 1018 break; 1019 } 1020 1021 /* 1022 * Wow! None of the threads on the p_sibling_ns list were 1023 * interesting threads. Check all the kids! 1024 */ 1025 found = 0; 1026 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 1027 if (idtype == P_PID && id != cp->p_pid) 1028 continue; 1029 if (idtype == P_PGID && id != cp->p_pgrp) 1030 continue; 1031 1032 switch (cp->p_wcode) { 1033 case CLD_TRAPPED: 1034 if (!(options & WTRAPPED)) 1035 break; 1036 winfo(cp, ip, waitflag); 1037 mutex_exit(&pidlock); 1038 if (waitflag) { /* accept SIGCLD */ 1039 sigcld_delete(ip); 1040 sigcld_repost(); 1041 } 1042 return (0); 1043 1044 case CLD_STOPPED: 1045 if (!(options & WSTOPPED)) 1046 break; 1047 /* Is it still stopped? */ 1048 mutex_enter(&cp->p_lock); 1049 if (!jobstopped(cp)) { 1050 mutex_exit(&cp->p_lock); 1051 break; 1052 } 1053 mutex_exit(&cp->p_lock); 1054 winfo(cp, ip, waitflag); 1055 mutex_exit(&pidlock); 1056 if (waitflag) { /* accept SIGCLD */ 1057 sigcld_delete(ip); 1058 sigcld_repost(); 1059 } 1060 return (0); 1061 1062 case CLD_CONTINUED: 1063 if (!(options & WCONTINUED)) 1064 break; 1065 winfo(cp, ip, waitflag); 1066 mutex_exit(&pidlock); 1067 if (waitflag) { /* accept SIGCLD */ 1068 sigcld_delete(ip); 1069 sigcld_repost(); 1070 } 1071 return (0); 1072 1073 case CLD_EXITED: 1074 case CLD_DUMPED: 1075 case CLD_KILLED: 1076 if (idtype != P_PID && 1077 (cp->p_pidflag & CLDWAITPID)) 1078 continue; 1079 /* 1080 * Don't complain if a process was found in 1081 * the first loop but we broke out of the loop 1082 * because of the arguments passed to us. 1083 */ 1084 if (proc_gone == 0) { 1085 cmn_err(CE_PANIC, 1086 "waitid: wrong state on the" 1087 " p_child list"); 1088 } else { 1089 break; 1090 } 1091 } 1092 1093 found++; 1094 1095 if (idtype == P_PID) 1096 break; 1097 } 1098 1099 /* 1100 * If we found no interesting processes at all, 1101 * break out and return ECHILD. 1102 */ 1103 if (found + proc_gone == 0) 1104 break; 1105 1106 if (options & WNOHANG) { 1107 mutex_exit(&pidlock); 1108 bzero(ip, sizeof (k_siginfo_t)); 1109 /* 1110 * We should set ip->si_signo = SIGCLD, 1111 * but there is an SVVS test that expects 1112 * ip->si_signo to be zero in this case. 1113 */ 1114 return (0); 1115 } 1116 1117 /* 1118 * If we found no processes of interest that could 1119 * change state while we wait, we don't wait at all. 1120 * Get out with ECHILD according to SVID. 1121 */ 1122 if (found == proc_gone) 1123 break; 1124 1125 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1126 mutex_exit(&pidlock); 1127 return (EINTR); 1128 } 1129 } 1130 mutex_exit(&pidlock); 1131 return (ECHILD); 1132 } 1133 1134 int 1135 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1136 { 1137 int error; 1138 k_siginfo_t info; 1139 1140 if (error = waitid(idtype, id, &info, options)) 1141 return (set_errno(error)); 1142 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1143 return (set_errno(EFAULT)); 1144 return (0); 1145 } 1146 1147 #ifdef _SYSCALL32_IMPL 1148 1149 int 1150 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1151 { 1152 int error; 1153 k_siginfo_t info; 1154 siginfo32_t info32; 1155 1156 if (error = waitid(idtype, id, &info, options)) 1157 return (set_errno(error)); 1158 siginfo_kto32(&info, &info32); 1159 if (copyout(&info32, infop, sizeof (info32))) 1160 return (set_errno(EFAULT)); 1161 return (0); 1162 } 1163 1164 #endif /* _SYSCALL32_IMPL */ 1165 1166 void 1167 proc_detach(proc_t *p) 1168 { 1169 proc_t *q; 1170 1171 ASSERT(MUTEX_HELD(&pidlock)); 1172 1173 q = p->p_parent; 1174 ASSERT(q != NULL); 1175 1176 /* 1177 * Take it off the newstate list of its parent 1178 */ 1179 delete_ns(q, p); 1180 1181 if (q->p_child == p) { 1182 q->p_child = p->p_sibling; 1183 /* 1184 * If the parent has no children, it better not 1185 * have any with new states either! 1186 */ 1187 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1188 } 1189 1190 if (p->p_sibling) { 1191 p->p_sibling->p_psibling = p->p_psibling; 1192 } 1193 1194 if (p->p_psibling) { 1195 p->p_psibling->p_sibling = p->p_sibling; 1196 } 1197 } 1198 1199 /* 1200 * Remove zombie children from the process table. 1201 */ 1202 void 1203 freeproc(proc_t *p) 1204 { 1205 proc_t *q; 1206 task_t *tk; 1207 zone_t *zone; 1208 1209 ASSERT(p->p_stat == SZOMB); 1210 ASSERT(p->p_tlist == NULL); 1211 ASSERT(MUTEX_HELD(&pidlock)); 1212 1213 sigdelq(p, NULL, 0); 1214 if (p->p_killsqp) { 1215 siginfofree(p->p_killsqp); 1216 p->p_killsqp = NULL; 1217 } 1218 1219 prfree(p); /* inform /proc */ 1220 1221 /* 1222 * Don't free the init processes. 1223 * Other dying processes will access it. 1224 */ 1225 if (p == proc_init) 1226 return; 1227 1228 1229 /* 1230 * We wait until now to free the cred structure because a 1231 * zombie process's credentials may be examined by /proc. 1232 * No cred locking needed because there are no threads at this point. 1233 */ 1234 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1235 crfree(p->p_cred); 1236 if (p->p_corefile != NULL) { 1237 corectl_path_rele(p->p_corefile); 1238 p->p_corefile = NULL; 1239 } 1240 if (p->p_content != NULL) { 1241 corectl_content_rele(p->p_content); 1242 p->p_content = NULL; 1243 } 1244 1245 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1246 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1247 /* 1248 * This should still do the right thing since p_utime/stime 1249 * get set to the correct value on process exit, so it 1250 * should get properly updated 1251 */ 1252 p->p_nextofkin->p_cutime += p->p_utime; 1253 p->p_nextofkin->p_cstime += p->p_stime; 1254 1255 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1256 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1257 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1258 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1259 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1260 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1261 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1262 += p->p_acct[LMS_USER_LOCK]; 1263 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1264 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1265 += p->p_acct[LMS_WAIT_CPU]; 1266 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1267 1268 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1269 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1270 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1271 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1272 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1273 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1274 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1275 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1276 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1277 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1278 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1279 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1280 1281 } 1282 1283 q = p->p_nextofkin; 1284 if (q && q->p_orphan == p) 1285 q->p_orphan = p->p_nextorph; 1286 else if (q) { 1287 for (q = q->p_orphan; q; q = q->p_nextorph) 1288 if (q->p_nextorph == p) 1289 break; 1290 ASSERT(q && q->p_nextorph == p); 1291 q->p_nextorph = p->p_nextorph; 1292 } 1293 1294 /* 1295 * The process table slot is being freed, so it is now safe to give up 1296 * task and project membership. 1297 */ 1298 zone = p->p_zone; 1299 mutex_enter(&p->p_lock); 1300 tk = p->p_task; 1301 task_detach(p); 1302 p->p_task = task0p; 1303 mutex_exit(&p->p_lock); 1304 1305 proc_detach(p); 1306 pid_exit(p); /* frees pid and proc structure */ 1307 1308 mutex_enter(&zone->zone_nlwps_lock); 1309 tk->tk_nprocs--; 1310 tk->tk_proj->kpj_nprocs--; 1311 zone->zone_nprocs--; 1312 mutex_exit(&zone->zone_nlwps_lock); 1313 1314 task_rele(tk); 1315 } 1316 1317 /* 1318 * Delete process "child" from the newstate list of process "parent" 1319 */ 1320 void 1321 delete_ns(proc_t *parent, proc_t *child) 1322 { 1323 proc_t **ns; 1324 1325 ASSERT(MUTEX_HELD(&pidlock)); 1326 ASSERT(child->p_parent == parent); 1327 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1328 if (*ns == child) { 1329 1330 ASSERT((*ns)->p_parent == parent); 1331 1332 *ns = child->p_sibling_ns; 1333 child->p_sibling_ns = NULL; 1334 return; 1335 } 1336 } 1337 } 1338 1339 /* 1340 * Add process "child" to the new state list of process "parent" 1341 */ 1342 void 1343 add_ns(proc_t *parent, proc_t *child) 1344 { 1345 ASSERT(child->p_sibling_ns == NULL); 1346 child->p_sibling_ns = parent->p_child_ns; 1347 parent->p_child_ns = child; 1348 } 1349