1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 DTRACE_PROC(lwp__exit); 360 DTRACE_PROC1(exit, int, why); 361 362 /* 363 * Will perform any brand specific proc exit processing, since this 364 * is always the last lwp, will also perform lwp_exit and free brand 365 * data 366 */ 367 if (PROC_IS_BRANDED(p)) 368 BROP(p)->b_proc_exit(p, lwp); 369 370 /* 371 * Don't let init exit unless zone_start_init() failed its exec, or 372 * we are shutting down the zone or the machine. 373 * 374 * Since we are single threaded, we don't need to lock the 375 * following accesses to zone_proc_initpid. 376 */ 377 if (p->p_pid == z->zone_proc_initpid) { 378 if (z->zone_boot_err == 0 && 379 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 380 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 381 z->zone_restart_init == B_TRUE && 382 restart_init(what, why) == 0) 383 return (0); 384 /* 385 * Since we didn't or couldn't restart init, we clear 386 * the zone's init state and proceed with exit 387 * processing. 388 */ 389 z->zone_proc_initpid = -1; 390 } 391 392 /* 393 * Allocate a sigqueue now, before we grab locks. 394 * It will be given to sigcld(), below. 395 * Special case: If we will be making the process disappear 396 * without a trace (for the benefit of posix_spawn() in libc) 397 * don't bother to allocate a useless sigqueue. 398 */ 399 evaporate = ((p->p_flag & SVFORK) && 400 why == CLD_EXITED && what == _EVAPORATE); 401 if (!evaporate) 402 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 403 404 /* 405 * revoke any doors created by the process. 406 */ 407 if (p->p_door_list) 408 door_exit(); 409 410 /* 411 * Release schedctl data structures. 412 */ 413 if (p->p_pagep) 414 schedctl_proc_cleanup(); 415 416 /* 417 * make sure all pending kaio has completed. 418 */ 419 if (p->p_aio) 420 aio_cleanup_exit(); 421 422 /* 423 * discard the lwpchan cache. 424 */ 425 if (p->p_lcp != NULL) 426 lwpchan_destroy_cache(0); 427 428 /* 429 * Clean up any DTrace helper actions or probes for the process. 430 */ 431 if (p->p_dtrace_helpers != NULL) { 432 ASSERT(dtrace_helpers_cleanup != NULL); 433 (*dtrace_helpers_cleanup)(); 434 } 435 436 /* untimeout the realtime timers */ 437 if (p->p_itimer != NULL) 438 timer_exit(); 439 440 if ((tmp_id = p->p_alarmid) != 0) { 441 p->p_alarmid = 0; 442 (void) untimeout(tmp_id); 443 } 444 445 /* 446 * Remove any fpollinfo_t's for this (last) thread from our file 447 * descriptors so closeall() can ASSERT() that they're all gone. 448 */ 449 pollcleanup(); 450 451 if (p->p_rprof_cyclic != CYCLIC_NONE) { 452 mutex_enter(&cpu_lock); 453 cyclic_remove(p->p_rprof_cyclic); 454 mutex_exit(&cpu_lock); 455 } 456 457 mutex_enter(&p->p_lock); 458 459 /* 460 * Clean up any DTrace probes associated with this process. 461 */ 462 if (p->p_dtrace_probes) { 463 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 464 dtrace_fasttrap_exit_ptr(p); 465 } 466 467 while ((tmp_id = p->p_itimerid) != 0) { 468 p->p_itimerid = 0; 469 mutex_exit(&p->p_lock); 470 (void) untimeout(tmp_id); 471 mutex_enter(&p->p_lock); 472 } 473 474 lwp_cleanup(); 475 476 /* 477 * We are about to exit; prevent our resource associations from 478 * being changed. 479 */ 480 pool_barrier_enter(); 481 482 /* 483 * Block the process against /proc now that we have really 484 * acquired p->p_lock (to manipulate p_tlist at least). 485 */ 486 prbarrier(p); 487 488 #ifdef SUN_SRC_COMPAT 489 if (code == CLD_KILLED) 490 u.u_acflag |= AXSIG; 491 #endif 492 sigfillset(&p->p_ignore); 493 sigemptyset(&p->p_siginfo); 494 sigemptyset(&p->p_sig); 495 sigemptyset(&p->p_extsig); 496 sigemptyset(&t->t_sig); 497 sigemptyset(&t->t_extsig); 498 sigemptyset(&p->p_sigmask); 499 sigdelq(p, t, 0); 500 lwp->lwp_cursig = 0; 501 lwp->lwp_extsig = 0; 502 p->p_flag &= ~(SKILLED | SEXTKILLED); 503 if (lwp->lwp_curinfo) { 504 siginfofree(lwp->lwp_curinfo); 505 lwp->lwp_curinfo = NULL; 506 } 507 508 t->t_proc_flag |= TP_LWPEXIT; 509 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 510 prlwpexit(t); /* notify /proc */ 511 lwp_hash_out(p, t->t_tid); 512 prexit(p); 513 514 p->p_lwpcnt = 0; 515 p->p_tlist = NULL; 516 sigqfree(p); 517 term_mstate(t); 518 p->p_mterm = gethrtime(); 519 520 exec_vp = p->p_exec; 521 execdir_vp = p->p_execdir; 522 p->p_exec = NULLVP; 523 p->p_execdir = NULLVP; 524 mutex_exit(&p->p_lock); 525 if (exec_vp) 526 VN_RELE(exec_vp); 527 if (execdir_vp) 528 VN_RELE(execdir_vp); 529 530 pr_free_watched_pages(p); 531 532 closeall(P_FINFO(p)); 533 534 /* Free the controlling tty. (freectty() always assumes curproc.) */ 535 ASSERT(p == curproc); 536 (void) freectty(B_TRUE); 537 538 #if defined(__sparc) 539 if (p->p_utraps != NULL) 540 utrap_free(p); 541 #endif 542 if (p->p_semacct) /* IPC semaphore exit */ 543 semexit(p); 544 rv = wstat(why, what); 545 546 acct(rv & 0xff); 547 exacct_commit_proc(p, rv); 548 549 /* 550 * Release any resources associated with C2 auditing 551 */ 552 #ifdef C2_AUDIT 553 if (audit_active) { 554 /* 555 * audit exit system call 556 */ 557 audit_exit(why, what); 558 } 559 #endif 560 561 /* 562 * Free address space. 563 */ 564 relvm(); 565 566 /* 567 * Release held contracts. 568 */ 569 contract_exit(p); 570 571 /* 572 * Depart our encapsulating process contract. 573 */ 574 if ((p->p_flag & SSYS) == 0) { 575 ASSERT(p->p_ct_process); 576 contract_process_exit(p->p_ct_process, p, rv); 577 } 578 579 /* 580 * Remove pool association, and block if requested by pool_do_bind. 581 */ 582 mutex_enter(&p->p_lock); 583 ASSERT(p->p_pool->pool_ref > 0); 584 atomic_add_32(&p->p_pool->pool_ref, -1); 585 p->p_pool = pool_default; 586 /* 587 * Now that our address space has been freed and all other threads 588 * in this process have exited, set the PEXITED pool flag. This 589 * tells the pools subsystems to ignore this process if it was 590 * requested to rebind this process to a new pool. 591 */ 592 p->p_poolflag |= PEXITED; 593 pool_barrier_exit(); 594 mutex_exit(&p->p_lock); 595 596 mutex_enter(&pidlock); 597 598 /* 599 * Delete this process from the newstate list of its parent. We 600 * will put it in the right place in the sigcld in the end. 601 */ 602 delete_ns(p->p_parent, p); 603 604 /* 605 * Reassign the orphans to the next of kin. 606 * Don't rearrange init's orphanage. 607 */ 608 if ((q = p->p_orphan) != NULL && p != proc_init) { 609 610 proc_t *nokp = p->p_nextofkin; 611 612 for (;;) { 613 q->p_nextofkin = nokp; 614 if (q->p_nextorph == NULL) 615 break; 616 q = q->p_nextorph; 617 } 618 q->p_nextorph = nokp->p_orphan; 619 nokp->p_orphan = p->p_orphan; 620 p->p_orphan = NULL; 621 } 622 623 /* 624 * Reassign the children to init. 625 * Don't try to assign init's children to init. 626 */ 627 if ((q = p->p_child) != NULL && p != proc_init) { 628 struct proc *np; 629 struct proc *initp = proc_init; 630 boolean_t setzonetop = B_FALSE; 631 632 if (!INGLOBALZONE(curproc)) 633 setzonetop = B_TRUE; 634 635 pgdetach(p); 636 637 do { 638 np = q->p_sibling; 639 /* 640 * Delete it from its current parent new state 641 * list and add it to init new state list 642 */ 643 delete_ns(q->p_parent, q); 644 645 q->p_ppid = 1; 646 if (setzonetop) { 647 mutex_enter(&q->p_lock); 648 q->p_flag |= SZONETOP; 649 mutex_exit(&q->p_lock); 650 } 651 q->p_parent = initp; 652 653 /* 654 * Since q will be the first child, 655 * it will not have a previous sibling. 656 */ 657 q->p_psibling = NULL; 658 if (initp->p_child) { 659 initp->p_child->p_psibling = q; 660 } 661 q->p_sibling = initp->p_child; 662 initp->p_child = q; 663 if (q->p_proc_flag & P_PR_PTRACE) { 664 mutex_enter(&q->p_lock); 665 sigtoproc(q, NULL, SIGKILL); 666 mutex_exit(&q->p_lock); 667 } 668 /* 669 * sigcld() will add the child to parents 670 * newstate list. 671 */ 672 if (q->p_stat == SZOMB) 673 sigcld(q, NULL); 674 } while ((q = np) != NULL); 675 676 p->p_child = NULL; 677 ASSERT(p->p_child_ns == NULL); 678 } 679 680 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 681 682 mutex_enter(&p->p_lock); 683 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 684 685 hrutime = mstate_aggr_state(p, LMS_USER); 686 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 687 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 688 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 689 690 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 691 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 692 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 693 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 694 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 695 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 696 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 697 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 698 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 699 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 700 701 p->p_ru.minflt += p->p_cru.minflt; 702 p->p_ru.majflt += p->p_cru.majflt; 703 p->p_ru.nswap += p->p_cru.nswap; 704 p->p_ru.inblock += p->p_cru.inblock; 705 p->p_ru.oublock += p->p_cru.oublock; 706 p->p_ru.msgsnd += p->p_cru.msgsnd; 707 p->p_ru.msgrcv += p->p_cru.msgrcv; 708 p->p_ru.nsignals += p->p_cru.nsignals; 709 p->p_ru.nvcsw += p->p_cru.nvcsw; 710 p->p_ru.nivcsw += p->p_cru.nivcsw; 711 p->p_ru.sysc += p->p_cru.sysc; 712 p->p_ru.ioch += p->p_cru.ioch; 713 714 p->p_stat = SZOMB; 715 p->p_proc_flag &= ~P_PR_PTRACE; 716 p->p_wdata = what; 717 p->p_wcode = (char)why; 718 719 cdir = PTOU(p)->u_cdir; 720 rdir = PTOU(p)->u_rdir; 721 cwd = PTOU(p)->u_cwd; 722 723 /* 724 * Release resource controls, as they are no longer enforceable. 725 */ 726 rctl_set_free(p->p_rctls); 727 728 /* 729 * Give up task and project memberships. Decrement tk_nlwps counter 730 * for our task.max-lwps resource control. An extended accounting 731 * record, if that facility is active, is scheduled to be written. 732 * Zombie processes are false members of task0 for the remainder of 733 * their lifetime; no accounting information is recorded for them. 734 */ 735 tk = p->p_task; 736 737 mutex_enter(&p->p_zone->zone_nlwps_lock); 738 tk->tk_nlwps--; 739 tk->tk_proj->kpj_nlwps--; 740 p->p_zone->zone_nlwps--; 741 mutex_exit(&p->p_zone->zone_nlwps_lock); 742 task_detach(p); 743 p->p_task = task0p; 744 745 /* 746 * Clear the lwp directory and the lwpid hash table 747 * now that /proc can't bother us any more. 748 * We free the memory below, after dropping p->p_lock. 749 */ 750 lwpdir = p->p_lwpdir; 751 lwpdir_sz = p->p_lwpdir_sz; 752 tidhash = p->p_tidhash; 753 tidhash_sz = p->p_tidhash_sz; 754 p->p_lwpdir = NULL; 755 p->p_lwpfree = NULL; 756 p->p_lwpdir_sz = 0; 757 p->p_tidhash = NULL; 758 p->p_tidhash_sz = 0; 759 760 /* 761 * If the process has context ops installed, call the exit routine 762 * on behalf of this last remaining thread. Normally exitpctx() is 763 * called during thread_exit() or lwp_exit(), but because this is the 764 * last thread in the process, we must call it here. By the time 765 * thread_exit() is called (below), the association with the relevant 766 * process has been lost. 767 * 768 * We also free the context here. 769 */ 770 if (p->p_pctx) { 771 kpreempt_disable(); 772 exitpctx(p); 773 kpreempt_enable(); 774 775 freepctx(p, 0); 776 } 777 778 /* 779 * curthread's proc pointer is changed to point at p0 because 780 * curthread's original proc pointer can be freed as soon as 781 * the child sends a SIGCLD to its parent. 782 */ 783 t->t_procp = &p0; 784 785 mutex_exit(&p->p_lock); 786 if (!evaporate) 787 sigcld(p, sqp); 788 else { 789 /* 790 * Do what sigcld() would do if the disposition 791 * of the SIGCHLD signal were set to be ignored. 792 */ 793 cv_broadcast(&p->p_srwchan_cv); 794 freeproc(p); 795 } 796 mutex_exit(&pidlock); 797 798 task_rele(tk); 799 800 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 801 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 802 803 /* 804 * We don't release u_cdir and u_rdir until SZOMB is set. 805 * This protects us against dofusers(). 806 */ 807 VN_RELE(cdir); 808 if (rdir) 809 VN_RELE(rdir); 810 if (cwd) 811 refstr_rele(cwd); 812 813 lwp_pcb_exit(); 814 815 thread_exit(); 816 /* NOTREACHED */ 817 } 818 819 /* 820 * Format siginfo structure for wait system calls. 821 */ 822 void 823 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 824 { 825 ASSERT(MUTEX_HELD(&pidlock)); 826 827 bzero(ip, sizeof (k_siginfo_t)); 828 ip->si_signo = SIGCLD; 829 ip->si_code = pp->p_wcode; 830 ip->si_pid = pp->p_pid; 831 ip->si_ctid = PRCTID(pp); 832 ip->si_zoneid = pp->p_zone->zone_id; 833 ip->si_status = pp->p_wdata; 834 ip->si_stime = pp->p_stime; 835 ip->si_utime = pp->p_utime; 836 837 if (waitflag) { 838 pp->p_wcode = 0; 839 pp->p_wdata = 0; 840 pp->p_pidflag &= ~CLDPEND; 841 } 842 } 843 844 /* 845 * Wait system call. 846 * Search for a terminated (zombie) child, 847 * finally lay it to rest, and collect its status. 848 * Look also for stopped children, 849 * and pass back status from them. 850 */ 851 int 852 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 853 { 854 int found; 855 proc_t *cp, *pp; 856 proc_t **nsp; 857 int proc_gone; 858 int waitflag = !(options & WNOWAIT); 859 860 /* 861 * Obsolete flag, defined here only for binary compatibility 862 * with old statically linked executables. Delete this when 863 * we no longer care about these old and broken applications. 864 */ 865 #define _WNOCHLD 0400 866 options &= ~_WNOCHLD; 867 868 if (options == 0 || (options & ~WOPTMASK)) 869 return (EINVAL); 870 871 switch (idtype) { 872 case P_PID: 873 case P_PGID: 874 if (id < 0 || id >= maxpid) 875 return (EINVAL); 876 /* FALLTHROUGH */ 877 case P_ALL: 878 break; 879 default: 880 return (EINVAL); 881 } 882 883 pp = ttoproc(curthread); 884 885 /* 886 * lock parent mutex so that sibling chain can be searched. 887 */ 888 mutex_enter(&pidlock); 889 890 /* 891 * if we are only looking for exited processes and child_ns list 892 * is empty no reason to look at all children. 893 */ 894 if (idtype == P_ALL && 895 (options & (WOPTMASK & ~WNOWAIT)) == (WNOHANG | WEXITED) && 896 pp->p_child_ns == NULL) { 897 898 if (pp->p_child) { 899 mutex_exit(&pidlock); 900 bzero(ip, sizeof (k_siginfo_t)); 901 return (0); 902 } 903 mutex_exit(&pidlock); 904 return (ECHILD); 905 } 906 907 while ((cp = pp->p_child) != NULL) { 908 909 proc_gone = 0; 910 911 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 912 if (idtype == P_PID && id != (*nsp)->p_pid) { 913 continue; 914 } 915 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 916 continue; 917 } 918 919 switch ((*nsp)->p_wcode) { 920 921 case CLD_TRAPPED: 922 case CLD_STOPPED: 923 case CLD_CONTINUED: 924 cmn_err(CE_PANIC, 925 "waitid: wrong state %d on the p_newstate" 926 " list", (*nsp)->p_wcode); 927 break; 928 929 case CLD_EXITED: 930 case CLD_DUMPED: 931 case CLD_KILLED: 932 if (!(options & WEXITED)) { 933 /* 934 * Count how many are already gone 935 * for good. 936 */ 937 proc_gone++; 938 break; 939 } 940 if (!waitflag) { 941 winfo((*nsp), ip, 0); 942 } else { 943 proc_t *xp = *nsp; 944 winfo(xp, ip, 1); 945 freeproc(xp); 946 } 947 mutex_exit(&pidlock); 948 if (waitflag) { /* accept SIGCLD */ 949 sigcld_delete(ip); 950 sigcld_repost(); 951 } 952 return (0); 953 } 954 955 if (idtype == P_PID) 956 break; 957 } 958 959 /* 960 * Wow! None of the threads on the p_sibling_ns list were 961 * interesting threads. Check all the kids! 962 */ 963 found = 0; 964 cp = pp->p_child; 965 do { 966 if (idtype == P_PID && id != cp->p_pid) { 967 continue; 968 } 969 if (idtype == P_PGID && id != cp->p_pgrp) { 970 continue; 971 } 972 973 found++; 974 975 switch (cp->p_wcode) { 976 case CLD_TRAPPED: 977 if (!(options & WTRAPPED)) 978 break; 979 winfo(cp, ip, waitflag); 980 mutex_exit(&pidlock); 981 if (waitflag) { /* accept SIGCLD */ 982 sigcld_delete(ip); 983 sigcld_repost(); 984 } 985 return (0); 986 987 case CLD_STOPPED: 988 if (!(options & WSTOPPED)) 989 break; 990 /* Is it still stopped? */ 991 mutex_enter(&cp->p_lock); 992 if (!jobstopped(cp)) { 993 mutex_exit(&cp->p_lock); 994 break; 995 } 996 mutex_exit(&cp->p_lock); 997 winfo(cp, ip, waitflag); 998 mutex_exit(&pidlock); 999 if (waitflag) { /* accept SIGCLD */ 1000 sigcld_delete(ip); 1001 sigcld_repost(); 1002 } 1003 return (0); 1004 1005 case CLD_CONTINUED: 1006 if (!(options & WCONTINUED)) 1007 break; 1008 winfo(cp, ip, waitflag); 1009 mutex_exit(&pidlock); 1010 if (waitflag) { /* accept SIGCLD */ 1011 sigcld_delete(ip); 1012 sigcld_repost(); 1013 } 1014 return (0); 1015 1016 case CLD_EXITED: 1017 case CLD_DUMPED: 1018 case CLD_KILLED: 1019 /* 1020 * Don't complain if a process was found in 1021 * the first loop but we broke out of the loop 1022 * because of the arguments passed to us. 1023 */ 1024 if (proc_gone == 0) { 1025 cmn_err(CE_PANIC, 1026 "waitid: wrong state on the" 1027 " p_child list"); 1028 } else { 1029 break; 1030 } 1031 } 1032 1033 if (idtype == P_PID) 1034 break; 1035 } while ((cp = cp->p_sibling) != NULL); 1036 1037 /* 1038 * If we found no interesting processes at all, 1039 * break out and return ECHILD. 1040 */ 1041 if (found + proc_gone == 0) 1042 break; 1043 1044 if (options & WNOHANG) { 1045 bzero(ip, sizeof (k_siginfo_t)); 1046 /* 1047 * We should set ip->si_signo = SIGCLD, 1048 * but there is an SVVS test that expects 1049 * ip->si_signo to be zero in this case. 1050 */ 1051 mutex_exit(&pidlock); 1052 return (0); 1053 } 1054 1055 /* 1056 * If we found no processes of interest that could 1057 * change state while we wait, we don't wait at all. 1058 * Get out with ECHILD according to SVID. 1059 */ 1060 if (found == proc_gone) 1061 break; 1062 1063 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1064 mutex_exit(&pidlock); 1065 return (EINTR); 1066 } 1067 } 1068 mutex_exit(&pidlock); 1069 return (ECHILD); 1070 } 1071 1072 /* 1073 * For implementations that don't require binary compatibility, 1074 * the wait system call may be made into a library call to the 1075 * waitid system call. 1076 */ 1077 int64_t 1078 wait(void) 1079 { 1080 int error; 1081 k_siginfo_t info; 1082 rval_t r; 1083 1084 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1085 return (set_errno(error)); 1086 r.r_val1 = info.si_pid; 1087 r.r_val2 = wstat(info.si_code, info.si_status); 1088 return (r.r_vals); 1089 } 1090 1091 int 1092 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1093 { 1094 int error; 1095 k_siginfo_t info; 1096 1097 if (error = waitid(idtype, id, &info, options)) 1098 return (set_errno(error)); 1099 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1100 return (set_errno(EFAULT)); 1101 return (0); 1102 } 1103 1104 #ifdef _SYSCALL32_IMPL 1105 1106 int 1107 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1108 { 1109 int error; 1110 k_siginfo_t info; 1111 siginfo32_t info32; 1112 1113 if (error = waitid(idtype, id, &info, options)) 1114 return (set_errno(error)); 1115 siginfo_kto32(&info, &info32); 1116 if (copyout(&info32, infop, sizeof (info32))) 1117 return (set_errno(EFAULT)); 1118 return (0); 1119 } 1120 1121 #endif /* _SYSCALL32_IMPL */ 1122 1123 void 1124 proc_detach(proc_t *p) 1125 { 1126 proc_t *q; 1127 1128 ASSERT(MUTEX_HELD(&pidlock)); 1129 1130 q = p->p_parent; 1131 ASSERT(q != NULL); 1132 1133 /* 1134 * Take it off the newstate list of its parent 1135 */ 1136 delete_ns(q, p); 1137 1138 if (q->p_child == p) { 1139 q->p_child = p->p_sibling; 1140 /* 1141 * If the parent has no children, it better not 1142 * have any with new states either! 1143 */ 1144 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1145 } 1146 1147 if (p->p_sibling) { 1148 p->p_sibling->p_psibling = p->p_psibling; 1149 } 1150 1151 if (p->p_psibling) { 1152 p->p_psibling->p_sibling = p->p_sibling; 1153 } 1154 } 1155 1156 /* 1157 * Remove zombie children from the process table. 1158 */ 1159 void 1160 freeproc(proc_t *p) 1161 { 1162 proc_t *q; 1163 1164 ASSERT(p->p_stat == SZOMB); 1165 ASSERT(p->p_tlist == NULL); 1166 ASSERT(MUTEX_HELD(&pidlock)); 1167 1168 sigdelq(p, NULL, 0); 1169 if (p->p_killsqp) { 1170 siginfofree(p->p_killsqp); 1171 p->p_killsqp = NULL; 1172 } 1173 1174 prfree(p); /* inform /proc */ 1175 1176 /* 1177 * Don't free the init processes. 1178 * Other dying processes will access it. 1179 */ 1180 if (p == proc_init) 1181 return; 1182 1183 1184 /* 1185 * We wait until now to free the cred structure because a 1186 * zombie process's credentials may be examined by /proc. 1187 * No cred locking needed because there are no threads at this point. 1188 */ 1189 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1190 crfree(p->p_cred); 1191 if (p->p_corefile != NULL) { 1192 corectl_path_rele(p->p_corefile); 1193 p->p_corefile = NULL; 1194 } 1195 if (p->p_content != NULL) { 1196 corectl_content_rele(p->p_content); 1197 p->p_content = NULL; 1198 } 1199 1200 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1201 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1202 /* 1203 * This should still do the right thing since p_utime/stime 1204 * get set to the correct value on process exit, so it 1205 * should get properly updated 1206 */ 1207 p->p_nextofkin->p_cutime += p->p_utime; 1208 p->p_nextofkin->p_cstime += p->p_stime; 1209 1210 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1211 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1212 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1213 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1214 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1215 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1216 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1217 += p->p_acct[LMS_USER_LOCK]; 1218 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1219 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1220 += p->p_acct[LMS_WAIT_CPU]; 1221 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1222 1223 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1224 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1225 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1226 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1227 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1228 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1229 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1230 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1231 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1232 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1233 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1234 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1235 1236 } 1237 1238 q = p->p_nextofkin; 1239 if (q && q->p_orphan == p) 1240 q->p_orphan = p->p_nextorph; 1241 else if (q) { 1242 for (q = q->p_orphan; q; q = q->p_nextorph) 1243 if (q->p_nextorph == p) 1244 break; 1245 ASSERT(q && q->p_nextorph == p); 1246 q->p_nextorph = p->p_nextorph; 1247 } 1248 1249 proc_detach(p); 1250 pid_exit(p); /* frees pid and proc structure */ 1251 } 1252 1253 /* 1254 * Delete process "child" from the newstate list of process "parent" 1255 */ 1256 void 1257 delete_ns(proc_t *parent, proc_t *child) 1258 { 1259 proc_t **ns; 1260 1261 ASSERT(MUTEX_HELD(&pidlock)); 1262 ASSERT(child->p_parent == parent); 1263 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1264 if (*ns == child) { 1265 1266 ASSERT((*ns)->p_parent == parent); 1267 1268 *ns = child->p_sibling_ns; 1269 child->p_sibling_ns = NULL; 1270 return; 1271 } 1272 } 1273 } 1274 1275 /* 1276 * Add process "child" to the new state list of process "parent" 1277 */ 1278 void 1279 add_ns(proc_t *parent, proc_t *child) 1280 { 1281 ASSERT(child->p_sibling_ns == NULL); 1282 child->p_sibling_ns = parent->p_child_ns; 1283 parent->p_child_ns = child; 1284 } 1285