1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 DTRACE_PROC(lwp__exit); 360 DTRACE_PROC1(exit, int, why); 361 362 /* 363 * Will perform any brand specific proc exit processing, since this 364 * is always the last lwp, will also perform lwp_exit and free brand 365 * data 366 */ 367 if (PROC_IS_BRANDED(p)) 368 BROP(p)->b_proc_exit(p, lwp); 369 370 /* 371 * Don't let init exit unless zone_start_init() failed its exec, or 372 * we are shutting down the zone or the machine. 373 * 374 * Since we are single threaded, we don't need to lock the 375 * following accesses to zone_proc_initpid. 376 */ 377 if (p->p_pid == z->zone_proc_initpid) { 378 if (z->zone_boot_err == 0 && 379 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 380 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 381 z->zone_restart_init == B_TRUE && 382 restart_init(what, why) == 0) 383 return (0); 384 /* 385 * Since we didn't or couldn't restart init, we clear 386 * the zone's init state and proceed with exit 387 * processing. 388 */ 389 z->zone_proc_initpid = -1; 390 } 391 392 /* 393 * Allocate a sigqueue now, before we grab locks. 394 * It will be given to sigcld(), below. 395 * Special case: If we will be making the process disappear 396 * without a trace (for the benefit of posix_spawn() in libc) 397 * don't bother to allocate a useless sigqueue. 398 */ 399 evaporate = ((p->p_flag & SVFORK) && 400 why == CLD_EXITED && what == _EVAPORATE); 401 if (!evaporate) 402 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 403 404 /* 405 * revoke any doors created by the process. 406 */ 407 if (p->p_door_list) 408 door_exit(); 409 410 /* 411 * Release schedctl data structures. 412 */ 413 if (p->p_pagep) 414 schedctl_proc_cleanup(); 415 416 /* 417 * make sure all pending kaio has completed. 418 */ 419 if (p->p_aio) 420 aio_cleanup_exit(); 421 422 /* 423 * discard the lwpchan cache. 424 */ 425 if (p->p_lcp != NULL) 426 lwpchan_destroy_cache(0); 427 428 /* 429 * Clean up any DTrace helper actions or probes for the process. 430 */ 431 if (p->p_dtrace_helpers != NULL) { 432 ASSERT(dtrace_helpers_cleanup != NULL); 433 (*dtrace_helpers_cleanup)(); 434 } 435 436 /* untimeout the realtime timers */ 437 if (p->p_itimer != NULL) 438 timer_exit(); 439 440 if ((tmp_id = p->p_alarmid) != 0) { 441 p->p_alarmid = 0; 442 (void) untimeout(tmp_id); 443 } 444 445 /* 446 * Remove any fpollinfo_t's for this (last) thread from our file 447 * descriptors so closeall() can ASSERT() that they're all gone. 448 */ 449 pollcleanup(); 450 451 if (p->p_rprof_cyclic != CYCLIC_NONE) { 452 mutex_enter(&cpu_lock); 453 cyclic_remove(p->p_rprof_cyclic); 454 mutex_exit(&cpu_lock); 455 } 456 457 mutex_enter(&p->p_lock); 458 459 /* 460 * Clean up any DTrace probes associated with this process. 461 */ 462 if (p->p_dtrace_probes) { 463 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 464 dtrace_fasttrap_exit_ptr(p); 465 } 466 467 while ((tmp_id = p->p_itimerid) != 0) { 468 p->p_itimerid = 0; 469 mutex_exit(&p->p_lock); 470 (void) untimeout(tmp_id); 471 mutex_enter(&p->p_lock); 472 } 473 474 lwp_cleanup(); 475 476 /* 477 * We are about to exit; prevent our resource associations from 478 * being changed. 479 */ 480 pool_barrier_enter(); 481 482 /* 483 * Block the process against /proc now that we have really 484 * acquired p->p_lock (to manipulate p_tlist at least). 485 */ 486 prbarrier(p); 487 488 #ifdef SUN_SRC_COMPAT 489 if (code == CLD_KILLED) 490 u.u_acflag |= AXSIG; 491 #endif 492 sigfillset(&p->p_ignore); 493 sigemptyset(&p->p_siginfo); 494 sigemptyset(&p->p_sig); 495 sigemptyset(&p->p_extsig); 496 sigemptyset(&t->t_sig); 497 sigemptyset(&t->t_extsig); 498 sigemptyset(&p->p_sigmask); 499 sigdelq(p, t, 0); 500 lwp->lwp_cursig = 0; 501 lwp->lwp_extsig = 0; 502 p->p_flag &= ~(SKILLED | SEXTKILLED); 503 if (lwp->lwp_curinfo) { 504 siginfofree(lwp->lwp_curinfo); 505 lwp->lwp_curinfo = NULL; 506 } 507 508 t->t_proc_flag |= TP_LWPEXIT; 509 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 510 prlwpexit(t); /* notify /proc */ 511 lwp_hash_out(p, t->t_tid); 512 prexit(p); 513 514 p->p_lwpcnt = 0; 515 p->p_tlist = NULL; 516 sigqfree(p); 517 term_mstate(t); 518 p->p_mterm = gethrtime(); 519 520 exec_vp = p->p_exec; 521 execdir_vp = p->p_execdir; 522 p->p_exec = NULLVP; 523 p->p_execdir = NULLVP; 524 mutex_exit(&p->p_lock); 525 if (exec_vp) 526 VN_RELE(exec_vp); 527 if (execdir_vp) 528 VN_RELE(execdir_vp); 529 530 pr_free_watched_pages(p); 531 532 closeall(P_FINFO(p)); 533 534 /* Free the controlling tty. (freectty() always assumes curproc.) */ 535 ASSERT(p == curproc); 536 (void) freectty(B_TRUE); 537 538 #if defined(__sparc) 539 if (p->p_utraps != NULL) 540 utrap_free(p); 541 #endif 542 if (p->p_semacct) /* IPC semaphore exit */ 543 semexit(p); 544 rv = wstat(why, what); 545 546 acct(rv & 0xff); 547 exacct_commit_proc(p, rv); 548 549 /* 550 * Release any resources associated with C2 auditing 551 */ 552 #ifdef C2_AUDIT 553 if (audit_active) { 554 /* 555 * audit exit system call 556 */ 557 audit_exit(why, what); 558 } 559 #endif 560 561 /* 562 * Free address space. 563 */ 564 relvm(); 565 566 /* 567 * Release held contracts. 568 */ 569 contract_exit(p); 570 571 /* 572 * Depart our encapsulating process contract. 573 */ 574 if ((p->p_flag & SSYS) == 0) { 575 ASSERT(p->p_ct_process); 576 contract_process_exit(p->p_ct_process, p, rv); 577 } 578 579 /* 580 * Remove pool association, and block if requested by pool_do_bind. 581 */ 582 mutex_enter(&p->p_lock); 583 ASSERT(p->p_pool->pool_ref > 0); 584 atomic_add_32(&p->p_pool->pool_ref, -1); 585 p->p_pool = pool_default; 586 /* 587 * Now that our address space has been freed and all other threads 588 * in this process have exited, set the PEXITED pool flag. This 589 * tells the pools subsystems to ignore this process if it was 590 * requested to rebind this process to a new pool. 591 */ 592 p->p_poolflag |= PEXITED; 593 pool_barrier_exit(); 594 mutex_exit(&p->p_lock); 595 596 mutex_enter(&pidlock); 597 598 /* 599 * Delete this process from the newstate list of its parent. We 600 * will put it in the right place in the sigcld in the end. 601 */ 602 delete_ns(p->p_parent, p); 603 604 /* 605 * Reassign the orphans to the next of kin. 606 * Don't rearrange init's orphanage. 607 */ 608 if ((q = p->p_orphan) != NULL && p != proc_init) { 609 610 proc_t *nokp = p->p_nextofkin; 611 612 for (;;) { 613 q->p_nextofkin = nokp; 614 if (q->p_nextorph == NULL) 615 break; 616 q = q->p_nextorph; 617 } 618 q->p_nextorph = nokp->p_orphan; 619 nokp->p_orphan = p->p_orphan; 620 p->p_orphan = NULL; 621 } 622 623 /* 624 * Reassign the children to init. 625 * Don't try to assign init's children to init. 626 */ 627 if ((q = p->p_child) != NULL && p != proc_init) { 628 struct proc *np; 629 struct proc *initp = proc_init; 630 boolean_t setzonetop = B_FALSE; 631 632 if (!INGLOBALZONE(curproc)) 633 setzonetop = B_TRUE; 634 635 pgdetach(p); 636 637 do { 638 np = q->p_sibling; 639 /* 640 * Delete it from its current parent new state 641 * list and add it to init new state list 642 */ 643 delete_ns(q->p_parent, q); 644 645 q->p_ppid = 1; 646 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 647 if (setzonetop) { 648 mutex_enter(&q->p_lock); 649 q->p_flag |= SZONETOP; 650 mutex_exit(&q->p_lock); 651 } 652 q->p_parent = initp; 653 654 /* 655 * Since q will be the first child, 656 * it will not have a previous sibling. 657 */ 658 q->p_psibling = NULL; 659 if (initp->p_child) { 660 initp->p_child->p_psibling = q; 661 } 662 q->p_sibling = initp->p_child; 663 initp->p_child = q; 664 if (q->p_proc_flag & P_PR_PTRACE) { 665 mutex_enter(&q->p_lock); 666 sigtoproc(q, NULL, SIGKILL); 667 mutex_exit(&q->p_lock); 668 } 669 /* 670 * sigcld() will add the child to parents 671 * newstate list. 672 */ 673 if (q->p_stat == SZOMB) 674 sigcld(q, NULL); 675 } while ((q = np) != NULL); 676 677 p->p_child = NULL; 678 ASSERT(p->p_child_ns == NULL); 679 } 680 681 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 682 683 mutex_enter(&p->p_lock); 684 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 685 686 hrutime = mstate_aggr_state(p, LMS_USER); 687 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 688 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 689 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 690 691 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 692 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 693 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 694 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 695 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 696 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 697 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 698 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 699 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 700 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 701 702 p->p_ru.minflt += p->p_cru.minflt; 703 p->p_ru.majflt += p->p_cru.majflt; 704 p->p_ru.nswap += p->p_cru.nswap; 705 p->p_ru.inblock += p->p_cru.inblock; 706 p->p_ru.oublock += p->p_cru.oublock; 707 p->p_ru.msgsnd += p->p_cru.msgsnd; 708 p->p_ru.msgrcv += p->p_cru.msgrcv; 709 p->p_ru.nsignals += p->p_cru.nsignals; 710 p->p_ru.nvcsw += p->p_cru.nvcsw; 711 p->p_ru.nivcsw += p->p_cru.nivcsw; 712 p->p_ru.sysc += p->p_cru.sysc; 713 p->p_ru.ioch += p->p_cru.ioch; 714 715 p->p_stat = SZOMB; 716 p->p_proc_flag &= ~P_PR_PTRACE; 717 p->p_wdata = what; 718 p->p_wcode = (char)why; 719 720 cdir = PTOU(p)->u_cdir; 721 rdir = PTOU(p)->u_rdir; 722 cwd = PTOU(p)->u_cwd; 723 724 /* 725 * Release resource controls, as they are no longer enforceable. 726 */ 727 rctl_set_free(p->p_rctls); 728 729 /* 730 * Give up task and project memberships. Decrement tk_nlwps counter 731 * for our task.max-lwps resource control. An extended accounting 732 * record, if that facility is active, is scheduled to be written. 733 * Zombie processes are false members of task0 for the remainder of 734 * their lifetime; no accounting information is recorded for them. 735 */ 736 tk = p->p_task; 737 738 mutex_enter(&p->p_zone->zone_nlwps_lock); 739 tk->tk_nlwps--; 740 tk->tk_proj->kpj_nlwps--; 741 p->p_zone->zone_nlwps--; 742 mutex_exit(&p->p_zone->zone_nlwps_lock); 743 task_detach(p); 744 p->p_task = task0p; 745 746 /* 747 * Clear the lwp directory and the lwpid hash table 748 * now that /proc can't bother us any more. 749 * We free the memory below, after dropping p->p_lock. 750 */ 751 lwpdir = p->p_lwpdir; 752 lwpdir_sz = p->p_lwpdir_sz; 753 tidhash = p->p_tidhash; 754 tidhash_sz = p->p_tidhash_sz; 755 p->p_lwpdir = NULL; 756 p->p_lwpfree = NULL; 757 p->p_lwpdir_sz = 0; 758 p->p_tidhash = NULL; 759 p->p_tidhash_sz = 0; 760 761 /* 762 * If the process has context ops installed, call the exit routine 763 * on behalf of this last remaining thread. Normally exitpctx() is 764 * called during thread_exit() or lwp_exit(), but because this is the 765 * last thread in the process, we must call it here. By the time 766 * thread_exit() is called (below), the association with the relevant 767 * process has been lost. 768 * 769 * We also free the context here. 770 */ 771 if (p->p_pctx) { 772 kpreempt_disable(); 773 exitpctx(p); 774 kpreempt_enable(); 775 776 freepctx(p, 0); 777 } 778 779 /* 780 * curthread's proc pointer is changed to point to the 'sched' 781 * process for the corresponding zone, except in the case when 782 * the exiting process is in fact a zsched instance, in which 783 * case the proc pointer is set to p0. We do so, so that the 784 * process still points at the right zone when we call the VN_RELE() 785 * below. 786 * 787 * This is because curthread's original proc pointer can be freed as 788 * soon as the child sends a SIGCLD to its parent. We use zsched so 789 * that for user processes, even in the final moments of death, the 790 * process is still associated with its zone. 791 */ 792 if (p != t->t_procp->p_zone->zone_zsched) 793 t->t_procp = t->t_procp->p_zone->zone_zsched; 794 else 795 t->t_procp = &p0; 796 797 mutex_exit(&p->p_lock); 798 if (!evaporate) { 799 p->p_pidflag &= ~CLDPEND; 800 sigcld(p, sqp); 801 } else { 802 /* 803 * Do what sigcld() would do if the disposition 804 * of the SIGCHLD signal were set to be ignored. 805 */ 806 cv_broadcast(&p->p_srwchan_cv); 807 freeproc(p); 808 } 809 mutex_exit(&pidlock); 810 811 /* 812 * We don't release u_cdir and u_rdir until SZOMB is set. 813 * This protects us against dofusers(). 814 */ 815 VN_RELE(cdir); 816 if (rdir) 817 VN_RELE(rdir); 818 if (cwd) 819 refstr_rele(cwd); 820 821 /* 822 * task_rele() may ultimately cause the zone to go away (or 823 * may cause the last user process in a zone to go away, which 824 * signals zsched to go away). So prior to this call, we must 825 * no longer point at zsched. 826 */ 827 t->t_procp = &p0; 828 task_rele(tk); 829 830 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 831 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 832 833 lwp_pcb_exit(); 834 835 thread_exit(); 836 /* NOTREACHED */ 837 } 838 839 /* 840 * Format siginfo structure for wait system calls. 841 */ 842 void 843 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 844 { 845 ASSERT(MUTEX_HELD(&pidlock)); 846 847 bzero(ip, sizeof (k_siginfo_t)); 848 ip->si_signo = SIGCLD; 849 ip->si_code = pp->p_wcode; 850 ip->si_pid = pp->p_pid; 851 ip->si_ctid = PRCTID(pp); 852 ip->si_zoneid = pp->p_zone->zone_id; 853 ip->si_status = pp->p_wdata; 854 ip->si_stime = pp->p_stime; 855 ip->si_utime = pp->p_utime; 856 857 if (waitflag) { 858 pp->p_wcode = 0; 859 pp->p_wdata = 0; 860 pp->p_pidflag &= ~CLDPEND; 861 } 862 } 863 864 /* 865 * Wait system call. 866 * Search for a terminated (zombie) child, 867 * finally lay it to rest, and collect its status. 868 * Look also for stopped children, 869 * and pass back status from them. 870 */ 871 int 872 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 873 { 874 int found; 875 proc_t *cp, *pp; 876 int proc_gone; 877 int waitflag = !(options & WNOWAIT); 878 879 /* 880 * Obsolete flag, defined here only for binary compatibility 881 * with old statically linked executables. Delete this when 882 * we no longer care about these old and broken applications. 883 */ 884 #define _WNOCHLD 0400 885 options &= ~_WNOCHLD; 886 887 if (options == 0 || (options & ~WOPTMASK)) 888 return (EINVAL); 889 890 switch (idtype) { 891 case P_PID: 892 case P_PGID: 893 if (id < 0 || id >= maxpid) 894 return (EINVAL); 895 /* FALLTHROUGH */ 896 case P_ALL: 897 break; 898 default: 899 return (EINVAL); 900 } 901 902 pp = ttoproc(curthread); 903 904 /* 905 * lock parent mutex so that sibling chain can be searched. 906 */ 907 mutex_enter(&pidlock); 908 909 /* 910 * if we are only looking for exited processes and child_ns list 911 * is empty no reason to look at all children. 912 */ 913 if (idtype == P_ALL && 914 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 915 pp->p_child_ns == NULL) { 916 if (pp->p_child) { 917 mutex_exit(&pidlock); 918 bzero(ip, sizeof (k_siginfo_t)); 919 return (0); 920 } 921 mutex_exit(&pidlock); 922 return (ECHILD); 923 } 924 925 while (pp->p_child != NULL) { 926 927 proc_gone = 0; 928 929 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 930 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 931 continue; 932 if (idtype == P_PID && id != cp->p_pid) 933 continue; 934 if (idtype == P_PGID && id != cp->p_pgrp) 935 continue; 936 937 switch (cp->p_wcode) { 938 939 case CLD_TRAPPED: 940 case CLD_STOPPED: 941 case CLD_CONTINUED: 942 cmn_err(CE_PANIC, 943 "waitid: wrong state %d on the p_newstate" 944 " list", cp->p_wcode); 945 break; 946 947 case CLD_EXITED: 948 case CLD_DUMPED: 949 case CLD_KILLED: 950 if (!(options & WEXITED)) { 951 /* 952 * Count how many are already gone 953 * for good. 954 */ 955 proc_gone++; 956 break; 957 } 958 if (!waitflag) { 959 winfo(cp, ip, 0); 960 } else { 961 winfo(cp, ip, 1); 962 freeproc(cp); 963 } 964 mutex_exit(&pidlock); 965 if (waitflag) { /* accept SIGCLD */ 966 sigcld_delete(ip); 967 sigcld_repost(); 968 } 969 return (0); 970 } 971 972 if (idtype == P_PID) 973 break; 974 } 975 976 /* 977 * Wow! None of the threads on the p_sibling_ns list were 978 * interesting threads. Check all the kids! 979 */ 980 found = 0; 981 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 982 if (idtype == P_PID && id != cp->p_pid) 983 continue; 984 if (idtype == P_PGID && id != cp->p_pgrp) 985 continue; 986 987 switch (cp->p_wcode) { 988 case CLD_TRAPPED: 989 if (!(options & WTRAPPED)) 990 break; 991 winfo(cp, ip, waitflag); 992 mutex_exit(&pidlock); 993 if (waitflag) { /* accept SIGCLD */ 994 sigcld_delete(ip); 995 sigcld_repost(); 996 } 997 return (0); 998 999 case CLD_STOPPED: 1000 if (!(options & WSTOPPED)) 1001 break; 1002 /* Is it still stopped? */ 1003 mutex_enter(&cp->p_lock); 1004 if (!jobstopped(cp)) { 1005 mutex_exit(&cp->p_lock); 1006 break; 1007 } 1008 mutex_exit(&cp->p_lock); 1009 winfo(cp, ip, waitflag); 1010 mutex_exit(&pidlock); 1011 if (waitflag) { /* accept SIGCLD */ 1012 sigcld_delete(ip); 1013 sigcld_repost(); 1014 } 1015 return (0); 1016 1017 case CLD_CONTINUED: 1018 if (!(options & WCONTINUED)) 1019 break; 1020 winfo(cp, ip, waitflag); 1021 mutex_exit(&pidlock); 1022 if (waitflag) { /* accept SIGCLD */ 1023 sigcld_delete(ip); 1024 sigcld_repost(); 1025 } 1026 return (0); 1027 1028 case CLD_EXITED: 1029 case CLD_DUMPED: 1030 case CLD_KILLED: 1031 if (idtype != P_PID && 1032 (cp->p_pidflag & CLDWAITPID)) 1033 continue; 1034 /* 1035 * Don't complain if a process was found in 1036 * the first loop but we broke out of the loop 1037 * because of the arguments passed to us. 1038 */ 1039 if (proc_gone == 0) { 1040 cmn_err(CE_PANIC, 1041 "waitid: wrong state on the" 1042 " p_child list"); 1043 } else { 1044 break; 1045 } 1046 } 1047 1048 found++; 1049 1050 if (idtype == P_PID) 1051 break; 1052 } 1053 1054 /* 1055 * If we found no interesting processes at all, 1056 * break out and return ECHILD. 1057 */ 1058 if (found + proc_gone == 0) 1059 break; 1060 1061 if (options & WNOHANG) { 1062 mutex_exit(&pidlock); 1063 bzero(ip, sizeof (k_siginfo_t)); 1064 /* 1065 * We should set ip->si_signo = SIGCLD, 1066 * but there is an SVVS test that expects 1067 * ip->si_signo to be zero in this case. 1068 */ 1069 return (0); 1070 } 1071 1072 /* 1073 * If we found no processes of interest that could 1074 * change state while we wait, we don't wait at all. 1075 * Get out with ECHILD according to SVID. 1076 */ 1077 if (found == proc_gone) 1078 break; 1079 1080 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1081 mutex_exit(&pidlock); 1082 return (EINTR); 1083 } 1084 } 1085 mutex_exit(&pidlock); 1086 return (ECHILD); 1087 } 1088 1089 /* 1090 * The wait() system call trap is no longer invoked by libc. 1091 * It is retained only for the benefit of statically linked applications. 1092 * Delete this when we no longer care about these old and broken applications. 1093 */ 1094 int64_t 1095 wait(void) 1096 { 1097 int error; 1098 k_siginfo_t info; 1099 rval_t r; 1100 1101 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1102 return (set_errno(error)); 1103 r.r_val1 = info.si_pid; 1104 r.r_val2 = wstat(info.si_code, info.si_status); 1105 return (r.r_vals); 1106 } 1107 1108 int 1109 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1110 { 1111 int error; 1112 k_siginfo_t info; 1113 1114 if (error = waitid(idtype, id, &info, options)) 1115 return (set_errno(error)); 1116 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1117 return (set_errno(EFAULT)); 1118 return (0); 1119 } 1120 1121 #ifdef _SYSCALL32_IMPL 1122 1123 int 1124 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1125 { 1126 int error; 1127 k_siginfo_t info; 1128 siginfo32_t info32; 1129 1130 if (error = waitid(idtype, id, &info, options)) 1131 return (set_errno(error)); 1132 siginfo_kto32(&info, &info32); 1133 if (copyout(&info32, infop, sizeof (info32))) 1134 return (set_errno(EFAULT)); 1135 return (0); 1136 } 1137 1138 #endif /* _SYSCALL32_IMPL */ 1139 1140 void 1141 proc_detach(proc_t *p) 1142 { 1143 proc_t *q; 1144 1145 ASSERT(MUTEX_HELD(&pidlock)); 1146 1147 q = p->p_parent; 1148 ASSERT(q != NULL); 1149 1150 /* 1151 * Take it off the newstate list of its parent 1152 */ 1153 delete_ns(q, p); 1154 1155 if (q->p_child == p) { 1156 q->p_child = p->p_sibling; 1157 /* 1158 * If the parent has no children, it better not 1159 * have any with new states either! 1160 */ 1161 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1162 } 1163 1164 if (p->p_sibling) { 1165 p->p_sibling->p_psibling = p->p_psibling; 1166 } 1167 1168 if (p->p_psibling) { 1169 p->p_psibling->p_sibling = p->p_sibling; 1170 } 1171 } 1172 1173 /* 1174 * Remove zombie children from the process table. 1175 */ 1176 void 1177 freeproc(proc_t *p) 1178 { 1179 proc_t *q; 1180 1181 ASSERT(p->p_stat == SZOMB); 1182 ASSERT(p->p_tlist == NULL); 1183 ASSERT(MUTEX_HELD(&pidlock)); 1184 1185 sigdelq(p, NULL, 0); 1186 if (p->p_killsqp) { 1187 siginfofree(p->p_killsqp); 1188 p->p_killsqp = NULL; 1189 } 1190 1191 prfree(p); /* inform /proc */ 1192 1193 /* 1194 * Don't free the init processes. 1195 * Other dying processes will access it. 1196 */ 1197 if (p == proc_init) 1198 return; 1199 1200 1201 /* 1202 * We wait until now to free the cred structure because a 1203 * zombie process's credentials may be examined by /proc. 1204 * No cred locking needed because there are no threads at this point. 1205 */ 1206 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1207 crfree(p->p_cred); 1208 if (p->p_corefile != NULL) { 1209 corectl_path_rele(p->p_corefile); 1210 p->p_corefile = NULL; 1211 } 1212 if (p->p_content != NULL) { 1213 corectl_content_rele(p->p_content); 1214 p->p_content = NULL; 1215 } 1216 1217 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1218 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1219 /* 1220 * This should still do the right thing since p_utime/stime 1221 * get set to the correct value on process exit, so it 1222 * should get properly updated 1223 */ 1224 p->p_nextofkin->p_cutime += p->p_utime; 1225 p->p_nextofkin->p_cstime += p->p_stime; 1226 1227 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1228 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1229 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1230 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1231 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1232 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1233 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1234 += p->p_acct[LMS_USER_LOCK]; 1235 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1236 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1237 += p->p_acct[LMS_WAIT_CPU]; 1238 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1239 1240 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1241 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1242 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1243 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1244 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1245 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1246 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1247 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1248 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1249 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1250 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1251 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1252 1253 } 1254 1255 q = p->p_nextofkin; 1256 if (q && q->p_orphan == p) 1257 q->p_orphan = p->p_nextorph; 1258 else if (q) { 1259 for (q = q->p_orphan; q; q = q->p_nextorph) 1260 if (q->p_nextorph == p) 1261 break; 1262 ASSERT(q && q->p_nextorph == p); 1263 q->p_nextorph = p->p_nextorph; 1264 } 1265 1266 proc_detach(p); 1267 pid_exit(p); /* frees pid and proc structure */ 1268 } 1269 1270 /* 1271 * Delete process "child" from the newstate list of process "parent" 1272 */ 1273 void 1274 delete_ns(proc_t *parent, proc_t *child) 1275 { 1276 proc_t **ns; 1277 1278 ASSERT(MUTEX_HELD(&pidlock)); 1279 ASSERT(child->p_parent == parent); 1280 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1281 if (*ns == child) { 1282 1283 ASSERT((*ns)->p_parent == parent); 1284 1285 *ns = child->p_sibling_ns; 1286 child->p_sibling_ns = NULL; 1287 return; 1288 } 1289 } 1290 } 1291 1292 /* 1293 * Add process "child" to the new state list of process "parent" 1294 */ 1295 void 1296 add_ns(proc_t *parent, proc_t *child) 1297 { 1298 ASSERT(child->p_sibling_ns == NULL); 1299 child->p_sibling_ns = parent->p_child_ns; 1300 parent->p_child_ns = child; 1301 } 1302