1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 #include <sys/libc_kernel.h> 78 79 /* 80 * convert code/data pair into old style wait status 81 */ 82 int 83 wstat(int code, int data) 84 { 85 int stat = (data & 0377); 86 87 switch (code) { 88 case CLD_EXITED: 89 stat <<= 8; 90 break; 91 case CLD_DUMPED: 92 stat |= WCOREFLG; 93 break; 94 case CLD_KILLED: 95 break; 96 case CLD_TRAPPED: 97 case CLD_STOPPED: 98 stat <<= 8; 99 stat |= WSTOPFLG; 100 break; 101 case CLD_CONTINUED: 102 stat = WCONTFLG; 103 break; 104 default: 105 cmn_err(CE_PANIC, "wstat: bad code"); 106 /* NOTREACHED */ 107 } 108 return (stat); 109 } 110 111 static char * 112 exit_reason(char *buf, size_t bufsz, int what, int why) 113 { 114 switch (why) { 115 case CLD_EXITED: 116 (void) snprintf(buf, bufsz, "exited with status %d", what); 117 break; 118 case CLD_KILLED: 119 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 120 break; 121 case CLD_DUMPED: 122 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 123 break; 124 default: 125 (void) snprintf(buf, bufsz, "encountered unknown error " 126 "(%d, %d)", why, what); 127 break; 128 } 129 130 return (buf); 131 } 132 133 /* 134 * exit system call: pass back caller's arg. 135 */ 136 void 137 rexit(int rval) 138 { 139 exit(CLD_EXITED, rval); 140 } 141 142 /* 143 * Called by proc_exit() when a zone's init exits, presumably because 144 * it failed. As long as the given zone is still in the "running" 145 * state, we will re-exec() init, but first we need to reset things 146 * which are usually inherited across exec() but will break init's 147 * assumption that it is being exec()'d from a virgin process. Most 148 * importantly this includes closing all file descriptors (exec only 149 * closes those marked close-on-exec) and resetting signals (exec only 150 * resets handled signals, and we need to clear any signals which 151 * killed init). Anything else that exec(2) says would be inherited, 152 * but would affect the execution of init, needs to be reset. 153 */ 154 static int 155 restart_init(int what, int why) 156 { 157 kthread_t *t = curthread; 158 klwp_t *lwp = ttolwp(t); 159 proc_t *p = ttoproc(t); 160 user_t *up = PTOU(p); 161 162 vnode_t *oldcd, *oldrd; 163 int i, err; 164 char reason_buf[64]; 165 166 /* 167 * Let zone admin (and global zone admin if this is for a non-global 168 * zone) know that init has failed and will be restarted. 169 */ 170 zcmn_err(p->p_zone->zone_id, CE_WARN, 171 "init(1M) %s: restarting automatically", 172 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 173 174 if (!INGLOBALZONE(p)) { 175 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 176 "restarting automatically", 177 p->p_zone->zone_name, p->p_pid, reason_buf); 178 } 179 180 /* 181 * Remove any fpollinfo_t's for this (last) thread from our file 182 * descriptors so closeall() can ASSERT() that they're all gone. 183 * Then close all open file descriptors in the process. 184 */ 185 pollcleanup(); 186 closeall(P_FINFO(p)); 187 188 /* 189 * Grab p_lock and begin clearing miscellaneous global process 190 * state that needs to be reset before we exec the new init(1M). 191 */ 192 193 mutex_enter(&p->p_lock); 194 prbarrier(p); 195 196 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 197 up->u_cmask = CMASK; 198 199 sigemptyset(&t->t_hold); 200 sigemptyset(&t->t_sig); 201 sigemptyset(&t->t_extsig); 202 203 sigemptyset(&p->p_sig); 204 sigemptyset(&p->p_extsig); 205 206 sigdelq(p, t, 0); 207 sigdelq(p, NULL, 0); 208 209 if (p->p_killsqp) { 210 siginfofree(p->p_killsqp); 211 p->p_killsqp = NULL; 212 } 213 214 /* 215 * Reset any signals that are ignored back to the default disposition. 216 * Other u_signal members will be cleared when exec calls sigdefault(). 217 */ 218 for (i = 1; i < NSIG; i++) { 219 if (up->u_signal[i - 1] == SIG_IGN) { 220 up->u_signal[i - 1] = SIG_DFL; 221 sigemptyset(&up->u_sigmask[i - 1]); 222 } 223 } 224 225 /* 226 * Clear the current signal, any signal info associated with it, and 227 * any signal information from contracts and/or contract templates. 228 */ 229 lwp->lwp_cursig = 0; 230 lwp->lwp_extsig = 0; 231 if (lwp->lwp_curinfo != NULL) { 232 siginfofree(lwp->lwp_curinfo); 233 lwp->lwp_curinfo = NULL; 234 } 235 lwp_ctmpl_clear(lwp); 236 237 /* 238 * Reset both the process root directory and the current working 239 * directory to the root of the zone just as we do during boot. 240 */ 241 VN_HOLD(p->p_zone->zone_rootvp); 242 oldrd = up->u_rdir; 243 up->u_rdir = p->p_zone->zone_rootvp; 244 245 VN_HOLD(p->p_zone->zone_rootvp); 246 oldcd = up->u_cdir; 247 up->u_cdir = p->p_zone->zone_rootvp; 248 249 if (up->u_cwd != NULL) { 250 refstr_rele(up->u_cwd); 251 up->u_cwd = NULL; 252 } 253 254 mutex_exit(&p->p_lock); 255 256 if (oldrd != NULL) 257 VN_RELE(oldrd); 258 if (oldcd != NULL) 259 VN_RELE(oldcd); 260 261 /* Free the controlling tty. (freectty() always assumes curproc.) */ 262 ASSERT(p == curproc); 263 (void) freectty(B_TRUE); 264 265 /* 266 * Now exec() the new init(1M) on top of the current process. If we 267 * succeed, the caller will treat this like a successful system call. 268 * If we fail, we issue messages and the caller will proceed with exit. 269 */ 270 err = exec_init(p->p_zone->zone_initname, NULL); 271 272 if (err == 0) 273 return (0); 274 275 zcmn_err(p->p_zone->zone_id, CE_WARN, 276 "failed to restart init(1M) (err=%d): system reboot required", err); 277 278 if (!INGLOBALZONE(p)) { 279 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 280 "(pid %d, err=%d): zoneadm(1M) boot required", 281 p->p_zone->zone_name, p->p_pid, err); 282 } 283 284 return (-1); 285 } 286 287 /* 288 * Release resources. 289 * Enter zombie state. 290 * Wake up parent and init processes, 291 * and dispose of children. 292 */ 293 void 294 exit(int why, int what) 295 { 296 /* 297 * If proc_exit() fails, then some other lwp in the process 298 * got there first. We just have to call lwp_exit() to allow 299 * the other lwp to finish exiting the process. Otherwise we're 300 * restarting init, and should return. 301 */ 302 if (proc_exit(why, what) != 0) { 303 mutex_enter(&curproc->p_lock); 304 ASSERT(curproc->p_flag & SEXITLWPS); 305 lwp_exit(); 306 /* NOTREACHED */ 307 } 308 } 309 310 /* 311 * Set the SEXITING flag on the process, after making sure /proc does 312 * not have it locked. This is done in more places than proc_exit(), 313 * so it is a separate function. 314 */ 315 void 316 proc_is_exiting(proc_t *p) 317 { 318 mutex_enter(&p->p_lock); 319 prbarrier(p); 320 p->p_flag |= SEXITING; 321 mutex_exit(&p->p_lock); 322 } 323 324 /* 325 * Return value: 326 * 1 - exitlwps() failed, call (or continue) lwp_exit() 327 * 0 - restarting init. Return through system call path 328 */ 329 int 330 proc_exit(int why, int what) 331 { 332 kthread_t *t = curthread; 333 klwp_t *lwp = ttolwp(t); 334 proc_t *p = ttoproc(t); 335 zone_t *z = p->p_zone; 336 timeout_id_t tmp_id; 337 int rv; 338 proc_t *q; 339 task_t *tk; 340 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 341 sigqueue_t *sqp; 342 lwpdir_t *lwpdir; 343 uint_t lwpdir_sz; 344 lwpdir_t **tidhash; 345 uint_t tidhash_sz; 346 refstr_t *cwd; 347 hrtime_t hrutime, hrstime; 348 int evaporate; 349 350 /* 351 * Stop and discard the process's lwps except for the current one, 352 * unless some other lwp beat us to it. If exitlwps() fails then 353 * return and the calling lwp will call (or continue in) lwp_exit(). 354 */ 355 proc_is_exiting(p); 356 if (exitlwps(0) != 0) 357 return (1); 358 359 DTRACE_PROC(lwp__exit); 360 DTRACE_PROC1(exit, int, why); 361 362 /* 363 * Will perform any brand specific proc exit processing, since this 364 * is always the last lwp, will also perform lwp_exit and free brand 365 * data 366 */ 367 if (PROC_IS_BRANDED(p)) 368 BROP(p)->b_proc_exit(p, lwp); 369 370 /* 371 * Don't let init exit unless zone_start_init() failed its exec, or 372 * we are shutting down the zone or the machine. 373 * 374 * Since we are single threaded, we don't need to lock the 375 * following accesses to zone_proc_initpid. 376 */ 377 if (p->p_pid == z->zone_proc_initpid) { 378 if (z->zone_boot_err == 0 && 379 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 380 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 381 z->zone_restart_init == B_TRUE && 382 restart_init(what, why) == 0) 383 return (0); 384 /* 385 * Since we didn't or couldn't restart init, we clear 386 * the zone's init state and proceed with exit 387 * processing. 388 */ 389 z->zone_proc_initpid = -1; 390 } 391 392 /* 393 * Allocate a sigqueue now, before we grab locks. 394 * It will be given to sigcld(), below. 395 * Special case: If we will be making the process disappear 396 * without a trace (for the benefit of posix_spawn() in libc) 397 * don't bother to allocate a useless sigqueue. 398 */ 399 evaporate = ((p->p_flag & SVFORK) && 400 why == CLD_EXITED && what == _EVAPORATE); 401 if (!evaporate) 402 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 403 404 /* 405 * revoke any doors created by the process. 406 */ 407 if (p->p_door_list) 408 door_exit(); 409 410 /* 411 * Release schedctl data structures. 412 */ 413 if (p->p_pagep) 414 schedctl_proc_cleanup(); 415 416 /* 417 * make sure all pending kaio has completed. 418 */ 419 if (p->p_aio) 420 aio_cleanup_exit(); 421 422 /* 423 * discard the lwpchan cache. 424 */ 425 if (p->p_lcp != NULL) 426 lwpchan_destroy_cache(0); 427 428 /* 429 * Clean up any DTrace helper actions or probes for the process. 430 */ 431 if (p->p_dtrace_helpers != NULL) { 432 ASSERT(dtrace_helpers_cleanup != NULL); 433 (*dtrace_helpers_cleanup)(); 434 } 435 436 /* untimeout the realtime timers */ 437 if (p->p_itimer != NULL) 438 timer_exit(); 439 440 if ((tmp_id = p->p_alarmid) != 0) { 441 p->p_alarmid = 0; 442 (void) untimeout(tmp_id); 443 } 444 445 /* 446 * Remove any fpollinfo_t's for this (last) thread from our file 447 * descriptors so closeall() can ASSERT() that they're all gone. 448 */ 449 pollcleanup(); 450 451 if (p->p_rprof_cyclic != CYCLIC_NONE) { 452 mutex_enter(&cpu_lock); 453 cyclic_remove(p->p_rprof_cyclic); 454 mutex_exit(&cpu_lock); 455 } 456 457 mutex_enter(&p->p_lock); 458 459 /* 460 * Clean up any DTrace probes associated with this process. 461 */ 462 if (p->p_dtrace_probes) { 463 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 464 dtrace_fasttrap_exit_ptr(p); 465 } 466 467 while ((tmp_id = p->p_itimerid) != 0) { 468 p->p_itimerid = 0; 469 mutex_exit(&p->p_lock); 470 (void) untimeout(tmp_id); 471 mutex_enter(&p->p_lock); 472 } 473 474 lwp_cleanup(); 475 476 /* 477 * We are about to exit; prevent our resource associations from 478 * being changed. 479 */ 480 pool_barrier_enter(); 481 482 /* 483 * Block the process against /proc now that we have really 484 * acquired p->p_lock (to manipulate p_tlist at least). 485 */ 486 prbarrier(p); 487 488 #ifdef SUN_SRC_COMPAT 489 if (code == CLD_KILLED) 490 u.u_acflag |= AXSIG; 491 #endif 492 sigfillset(&p->p_ignore); 493 sigemptyset(&p->p_siginfo); 494 sigemptyset(&p->p_sig); 495 sigemptyset(&p->p_extsig); 496 sigemptyset(&t->t_sig); 497 sigemptyset(&t->t_extsig); 498 sigemptyset(&p->p_sigmask); 499 sigdelq(p, t, 0); 500 lwp->lwp_cursig = 0; 501 lwp->lwp_extsig = 0; 502 p->p_flag &= ~(SKILLED | SEXTKILLED); 503 if (lwp->lwp_curinfo) { 504 siginfofree(lwp->lwp_curinfo); 505 lwp->lwp_curinfo = NULL; 506 } 507 508 t->t_proc_flag |= TP_LWPEXIT; 509 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 510 prlwpexit(t); /* notify /proc */ 511 lwp_hash_out(p, t->t_tid); 512 prexit(p); 513 514 p->p_lwpcnt = 0; 515 p->p_tlist = NULL; 516 sigqfree(p); 517 term_mstate(t); 518 p->p_mterm = gethrtime(); 519 520 exec_vp = p->p_exec; 521 execdir_vp = p->p_execdir; 522 p->p_exec = NULLVP; 523 p->p_execdir = NULLVP; 524 mutex_exit(&p->p_lock); 525 if (exec_vp) 526 VN_RELE(exec_vp); 527 if (execdir_vp) 528 VN_RELE(execdir_vp); 529 530 pr_free_watched_pages(p); 531 532 closeall(P_FINFO(p)); 533 534 /* Free the controlling tty. (freectty() always assumes curproc.) */ 535 ASSERT(p == curproc); 536 (void) freectty(B_TRUE); 537 538 #if defined(__sparc) 539 if (p->p_utraps != NULL) 540 utrap_free(p); 541 #endif 542 if (p->p_semacct) /* IPC semaphore exit */ 543 semexit(p); 544 rv = wstat(why, what); 545 546 acct(rv & 0xff); 547 exacct_commit_proc(p, rv); 548 549 /* 550 * Release any resources associated with C2 auditing 551 */ 552 #ifdef C2_AUDIT 553 if (audit_active) { 554 /* 555 * audit exit system call 556 */ 557 audit_exit(why, what); 558 } 559 #endif 560 561 /* 562 * Free address space. 563 */ 564 relvm(); 565 566 /* 567 * Release held contracts. 568 */ 569 contract_exit(p); 570 571 /* 572 * Depart our encapsulating process contract. 573 */ 574 if ((p->p_flag & SSYS) == 0) { 575 ASSERT(p->p_ct_process); 576 contract_process_exit(p->p_ct_process, p, rv); 577 } 578 579 /* 580 * Remove pool association, and block if requested by pool_do_bind. 581 */ 582 mutex_enter(&p->p_lock); 583 ASSERT(p->p_pool->pool_ref > 0); 584 atomic_add_32(&p->p_pool->pool_ref, -1); 585 p->p_pool = pool_default; 586 /* 587 * Now that our address space has been freed and all other threads 588 * in this process have exited, set the PEXITED pool flag. This 589 * tells the pools subsystems to ignore this process if it was 590 * requested to rebind this process to a new pool. 591 */ 592 p->p_poolflag |= PEXITED; 593 pool_barrier_exit(); 594 mutex_exit(&p->p_lock); 595 596 mutex_enter(&pidlock); 597 598 /* 599 * Delete this process from the newstate list of its parent. We 600 * will put it in the right place in the sigcld in the end. 601 */ 602 delete_ns(p->p_parent, p); 603 604 /* 605 * Reassign the orphans to the next of kin. 606 * Don't rearrange init's orphanage. 607 */ 608 if ((q = p->p_orphan) != NULL && p != proc_init) { 609 610 proc_t *nokp = p->p_nextofkin; 611 612 for (;;) { 613 q->p_nextofkin = nokp; 614 if (q->p_nextorph == NULL) 615 break; 616 q = q->p_nextorph; 617 } 618 q->p_nextorph = nokp->p_orphan; 619 nokp->p_orphan = p->p_orphan; 620 p->p_orphan = NULL; 621 } 622 623 /* 624 * Reassign the children to init. 625 * Don't try to assign init's children to init. 626 */ 627 if ((q = p->p_child) != NULL && p != proc_init) { 628 struct proc *np; 629 struct proc *initp = proc_init; 630 boolean_t setzonetop = B_FALSE; 631 632 if (!INGLOBALZONE(curproc)) 633 setzonetop = B_TRUE; 634 635 pgdetach(p); 636 637 do { 638 np = q->p_sibling; 639 /* 640 * Delete it from its current parent new state 641 * list and add it to init new state list 642 */ 643 delete_ns(q->p_parent, q); 644 645 q->p_ppid = 1; 646 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); 647 if (setzonetop) { 648 mutex_enter(&q->p_lock); 649 q->p_flag |= SZONETOP; 650 mutex_exit(&q->p_lock); 651 } 652 q->p_parent = initp; 653 654 /* 655 * Since q will be the first child, 656 * it will not have a previous sibling. 657 */ 658 q->p_psibling = NULL; 659 if (initp->p_child) { 660 initp->p_child->p_psibling = q; 661 } 662 q->p_sibling = initp->p_child; 663 initp->p_child = q; 664 if (q->p_proc_flag & P_PR_PTRACE) { 665 mutex_enter(&q->p_lock); 666 sigtoproc(q, NULL, SIGKILL); 667 mutex_exit(&q->p_lock); 668 } 669 /* 670 * sigcld() will add the child to parents 671 * newstate list. 672 */ 673 if (q->p_stat == SZOMB) 674 sigcld(q, NULL); 675 } while ((q = np) != NULL); 676 677 p->p_child = NULL; 678 ASSERT(p->p_child_ns == NULL); 679 } 680 681 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 682 683 mutex_enter(&p->p_lock); 684 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 685 686 hrutime = mstate_aggr_state(p, LMS_USER); 687 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 688 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 689 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 690 691 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 692 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 693 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 694 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 695 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 696 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 697 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 698 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 699 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 700 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 701 702 p->p_ru.minflt += p->p_cru.minflt; 703 p->p_ru.majflt += p->p_cru.majflt; 704 p->p_ru.nswap += p->p_cru.nswap; 705 p->p_ru.inblock += p->p_cru.inblock; 706 p->p_ru.oublock += p->p_cru.oublock; 707 p->p_ru.msgsnd += p->p_cru.msgsnd; 708 p->p_ru.msgrcv += p->p_cru.msgrcv; 709 p->p_ru.nsignals += p->p_cru.nsignals; 710 p->p_ru.nvcsw += p->p_cru.nvcsw; 711 p->p_ru.nivcsw += p->p_cru.nivcsw; 712 p->p_ru.sysc += p->p_cru.sysc; 713 p->p_ru.ioch += p->p_cru.ioch; 714 715 p->p_stat = SZOMB; 716 p->p_proc_flag &= ~P_PR_PTRACE; 717 p->p_wdata = what; 718 p->p_wcode = (char)why; 719 720 cdir = PTOU(p)->u_cdir; 721 rdir = PTOU(p)->u_rdir; 722 cwd = PTOU(p)->u_cwd; 723 724 /* 725 * Release resource controls, as they are no longer enforceable. 726 */ 727 rctl_set_free(p->p_rctls); 728 729 /* 730 * Give up task and project memberships. Decrement tk_nlwps counter 731 * for our task.max-lwps resource control. An extended accounting 732 * record, if that facility is active, is scheduled to be written. 733 * Zombie processes are false members of task0 for the remainder of 734 * their lifetime; no accounting information is recorded for them. 735 */ 736 tk = p->p_task; 737 738 mutex_enter(&p->p_zone->zone_nlwps_lock); 739 tk->tk_nlwps--; 740 tk->tk_proj->kpj_nlwps--; 741 p->p_zone->zone_nlwps--; 742 mutex_exit(&p->p_zone->zone_nlwps_lock); 743 task_detach(p); 744 p->p_task = task0p; 745 746 /* 747 * Clear the lwp directory and the lwpid hash table 748 * now that /proc can't bother us any more. 749 * We free the memory below, after dropping p->p_lock. 750 */ 751 lwpdir = p->p_lwpdir; 752 lwpdir_sz = p->p_lwpdir_sz; 753 tidhash = p->p_tidhash; 754 tidhash_sz = p->p_tidhash_sz; 755 p->p_lwpdir = NULL; 756 p->p_lwpfree = NULL; 757 p->p_lwpdir_sz = 0; 758 p->p_tidhash = NULL; 759 p->p_tidhash_sz = 0; 760 761 /* 762 * If the process has context ops installed, call the exit routine 763 * on behalf of this last remaining thread. Normally exitpctx() is 764 * called during thread_exit() or lwp_exit(), but because this is the 765 * last thread in the process, we must call it here. By the time 766 * thread_exit() is called (below), the association with the relevant 767 * process has been lost. 768 * 769 * We also free the context here. 770 */ 771 if (p->p_pctx) { 772 kpreempt_disable(); 773 exitpctx(p); 774 kpreempt_enable(); 775 776 freepctx(p, 0); 777 } 778 779 /* 780 * curthread's proc pointer is changed to point at p0 because 781 * curthread's original proc pointer can be freed as soon as 782 * the child sends a SIGCLD to its parent. 783 */ 784 t->t_procp = &p0; 785 786 mutex_exit(&p->p_lock); 787 if (!evaporate) { 788 p->p_pidflag &= ~CLDPEND; 789 sigcld(p, sqp); 790 } else { 791 /* 792 * Do what sigcld() would do if the disposition 793 * of the SIGCHLD signal were set to be ignored. 794 */ 795 cv_broadcast(&p->p_srwchan_cv); 796 freeproc(p); 797 } 798 mutex_exit(&pidlock); 799 800 task_rele(tk); 801 802 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 803 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 804 805 /* 806 * We don't release u_cdir and u_rdir until SZOMB is set. 807 * This protects us against dofusers(). 808 */ 809 VN_RELE(cdir); 810 if (rdir) 811 VN_RELE(rdir); 812 if (cwd) 813 refstr_rele(cwd); 814 815 lwp_pcb_exit(); 816 817 thread_exit(); 818 /* NOTREACHED */ 819 } 820 821 /* 822 * Format siginfo structure for wait system calls. 823 */ 824 void 825 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 826 { 827 ASSERT(MUTEX_HELD(&pidlock)); 828 829 bzero(ip, sizeof (k_siginfo_t)); 830 ip->si_signo = SIGCLD; 831 ip->si_code = pp->p_wcode; 832 ip->si_pid = pp->p_pid; 833 ip->si_ctid = PRCTID(pp); 834 ip->si_zoneid = pp->p_zone->zone_id; 835 ip->si_status = pp->p_wdata; 836 ip->si_stime = pp->p_stime; 837 ip->si_utime = pp->p_utime; 838 839 if (waitflag) { 840 pp->p_wcode = 0; 841 pp->p_wdata = 0; 842 pp->p_pidflag &= ~CLDPEND; 843 } 844 } 845 846 /* 847 * Wait system call. 848 * Search for a terminated (zombie) child, 849 * finally lay it to rest, and collect its status. 850 * Look also for stopped children, 851 * and pass back status from them. 852 */ 853 int 854 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 855 { 856 int found; 857 proc_t *cp, *pp; 858 int proc_gone; 859 int waitflag = !(options & WNOWAIT); 860 861 /* 862 * Obsolete flag, defined here only for binary compatibility 863 * with old statically linked executables. Delete this when 864 * we no longer care about these old and broken applications. 865 */ 866 #define _WNOCHLD 0400 867 options &= ~_WNOCHLD; 868 869 if (options == 0 || (options & ~WOPTMASK)) 870 return (EINVAL); 871 872 switch (idtype) { 873 case P_PID: 874 case P_PGID: 875 if (id < 0 || id >= maxpid) 876 return (EINVAL); 877 /* FALLTHROUGH */ 878 case P_ALL: 879 break; 880 default: 881 return (EINVAL); 882 } 883 884 pp = ttoproc(curthread); 885 886 /* 887 * lock parent mutex so that sibling chain can be searched. 888 */ 889 mutex_enter(&pidlock); 890 891 /* 892 * if we are only looking for exited processes and child_ns list 893 * is empty no reason to look at all children. 894 */ 895 if (idtype == P_ALL && 896 (options & ~WNOWAIT) == (WNOHANG | WEXITED) && 897 pp->p_child_ns == NULL) { 898 if (pp->p_child) { 899 mutex_exit(&pidlock); 900 bzero(ip, sizeof (k_siginfo_t)); 901 return (0); 902 } 903 mutex_exit(&pidlock); 904 return (ECHILD); 905 } 906 907 while (pp->p_child != NULL) { 908 909 proc_gone = 0; 910 911 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) { 912 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID)) 913 continue; 914 if (idtype == P_PID && id != cp->p_pid) 915 continue; 916 if (idtype == P_PGID && id != cp->p_pgrp) 917 continue; 918 919 switch (cp->p_wcode) { 920 921 case CLD_TRAPPED: 922 case CLD_STOPPED: 923 case CLD_CONTINUED: 924 cmn_err(CE_PANIC, 925 "waitid: wrong state %d on the p_newstate" 926 " list", cp->p_wcode); 927 break; 928 929 case CLD_EXITED: 930 case CLD_DUMPED: 931 case CLD_KILLED: 932 if (!(options & WEXITED)) { 933 /* 934 * Count how many are already gone 935 * for good. 936 */ 937 proc_gone++; 938 break; 939 } 940 if (!waitflag) { 941 winfo(cp, ip, 0); 942 } else { 943 winfo(cp, ip, 1); 944 freeproc(cp); 945 } 946 mutex_exit(&pidlock); 947 if (waitflag) { /* accept SIGCLD */ 948 sigcld_delete(ip); 949 sigcld_repost(); 950 } 951 return (0); 952 } 953 954 if (idtype == P_PID) 955 break; 956 } 957 958 /* 959 * Wow! None of the threads on the p_sibling_ns list were 960 * interesting threads. Check all the kids! 961 */ 962 found = 0; 963 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) { 964 if (idtype == P_PID && id != cp->p_pid) 965 continue; 966 if (idtype == P_PGID && id != cp->p_pgrp) 967 continue; 968 969 switch (cp->p_wcode) { 970 case CLD_TRAPPED: 971 if (!(options & WTRAPPED)) 972 break; 973 winfo(cp, ip, waitflag); 974 mutex_exit(&pidlock); 975 if (waitflag) { /* accept SIGCLD */ 976 sigcld_delete(ip); 977 sigcld_repost(); 978 } 979 return (0); 980 981 case CLD_STOPPED: 982 if (!(options & WSTOPPED)) 983 break; 984 /* Is it still stopped? */ 985 mutex_enter(&cp->p_lock); 986 if (!jobstopped(cp)) { 987 mutex_exit(&cp->p_lock); 988 break; 989 } 990 mutex_exit(&cp->p_lock); 991 winfo(cp, ip, waitflag); 992 mutex_exit(&pidlock); 993 if (waitflag) { /* accept SIGCLD */ 994 sigcld_delete(ip); 995 sigcld_repost(); 996 } 997 return (0); 998 999 case CLD_CONTINUED: 1000 if (!(options & WCONTINUED)) 1001 break; 1002 winfo(cp, ip, waitflag); 1003 mutex_exit(&pidlock); 1004 if (waitflag) { /* accept SIGCLD */ 1005 sigcld_delete(ip); 1006 sigcld_repost(); 1007 } 1008 return (0); 1009 1010 case CLD_EXITED: 1011 case CLD_DUMPED: 1012 case CLD_KILLED: 1013 if (idtype != P_PID && 1014 (cp->p_pidflag & CLDWAITPID)) 1015 continue; 1016 /* 1017 * Don't complain if a process was found in 1018 * the first loop but we broke out of the loop 1019 * because of the arguments passed to us. 1020 */ 1021 if (proc_gone == 0) { 1022 cmn_err(CE_PANIC, 1023 "waitid: wrong state on the" 1024 " p_child list"); 1025 } else { 1026 break; 1027 } 1028 } 1029 1030 found++; 1031 1032 if (idtype == P_PID) 1033 break; 1034 } 1035 1036 /* 1037 * If we found no interesting processes at all, 1038 * break out and return ECHILD. 1039 */ 1040 if (found + proc_gone == 0) 1041 break; 1042 1043 if (options & WNOHANG) { 1044 mutex_exit(&pidlock); 1045 bzero(ip, sizeof (k_siginfo_t)); 1046 /* 1047 * We should set ip->si_signo = SIGCLD, 1048 * but there is an SVVS test that expects 1049 * ip->si_signo to be zero in this case. 1050 */ 1051 return (0); 1052 } 1053 1054 /* 1055 * If we found no processes of interest that could 1056 * change state while we wait, we don't wait at all. 1057 * Get out with ECHILD according to SVID. 1058 */ 1059 if (found == proc_gone) 1060 break; 1061 1062 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1063 mutex_exit(&pidlock); 1064 return (EINTR); 1065 } 1066 } 1067 mutex_exit(&pidlock); 1068 return (ECHILD); 1069 } 1070 1071 /* 1072 * The wait() system call trap is no longer invoked by libc. 1073 * It is retained only for the benefit of statically linked applications. 1074 * Delete this when we no longer care about these old and broken applications. 1075 */ 1076 int64_t 1077 wait(void) 1078 { 1079 int error; 1080 k_siginfo_t info; 1081 rval_t r; 1082 1083 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1084 return (set_errno(error)); 1085 r.r_val1 = info.si_pid; 1086 r.r_val2 = wstat(info.si_code, info.si_status); 1087 return (r.r_vals); 1088 } 1089 1090 int 1091 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1092 { 1093 int error; 1094 k_siginfo_t info; 1095 1096 if (error = waitid(idtype, id, &info, options)) 1097 return (set_errno(error)); 1098 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1099 return (set_errno(EFAULT)); 1100 return (0); 1101 } 1102 1103 #ifdef _SYSCALL32_IMPL 1104 1105 int 1106 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1107 { 1108 int error; 1109 k_siginfo_t info; 1110 siginfo32_t info32; 1111 1112 if (error = waitid(idtype, id, &info, options)) 1113 return (set_errno(error)); 1114 siginfo_kto32(&info, &info32); 1115 if (copyout(&info32, infop, sizeof (info32))) 1116 return (set_errno(EFAULT)); 1117 return (0); 1118 } 1119 1120 #endif /* _SYSCALL32_IMPL */ 1121 1122 void 1123 proc_detach(proc_t *p) 1124 { 1125 proc_t *q; 1126 1127 ASSERT(MUTEX_HELD(&pidlock)); 1128 1129 q = p->p_parent; 1130 ASSERT(q != NULL); 1131 1132 /* 1133 * Take it off the newstate list of its parent 1134 */ 1135 delete_ns(q, p); 1136 1137 if (q->p_child == p) { 1138 q->p_child = p->p_sibling; 1139 /* 1140 * If the parent has no children, it better not 1141 * have any with new states either! 1142 */ 1143 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1144 } 1145 1146 if (p->p_sibling) { 1147 p->p_sibling->p_psibling = p->p_psibling; 1148 } 1149 1150 if (p->p_psibling) { 1151 p->p_psibling->p_sibling = p->p_sibling; 1152 } 1153 } 1154 1155 /* 1156 * Remove zombie children from the process table. 1157 */ 1158 void 1159 freeproc(proc_t *p) 1160 { 1161 proc_t *q; 1162 1163 ASSERT(p->p_stat == SZOMB); 1164 ASSERT(p->p_tlist == NULL); 1165 ASSERT(MUTEX_HELD(&pidlock)); 1166 1167 sigdelq(p, NULL, 0); 1168 if (p->p_killsqp) { 1169 siginfofree(p->p_killsqp); 1170 p->p_killsqp = NULL; 1171 } 1172 1173 prfree(p); /* inform /proc */ 1174 1175 /* 1176 * Don't free the init processes. 1177 * Other dying processes will access it. 1178 */ 1179 if (p == proc_init) 1180 return; 1181 1182 1183 /* 1184 * We wait until now to free the cred structure because a 1185 * zombie process's credentials may be examined by /proc. 1186 * No cred locking needed because there are no threads at this point. 1187 */ 1188 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1189 crfree(p->p_cred); 1190 if (p->p_corefile != NULL) { 1191 corectl_path_rele(p->p_corefile); 1192 p->p_corefile = NULL; 1193 } 1194 if (p->p_content != NULL) { 1195 corectl_content_rele(p->p_content); 1196 p->p_content = NULL; 1197 } 1198 1199 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1200 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1201 /* 1202 * This should still do the right thing since p_utime/stime 1203 * get set to the correct value on process exit, so it 1204 * should get properly updated 1205 */ 1206 p->p_nextofkin->p_cutime += p->p_utime; 1207 p->p_nextofkin->p_cstime += p->p_stime; 1208 1209 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1210 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1211 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1212 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1213 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1214 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1215 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1216 += p->p_acct[LMS_USER_LOCK]; 1217 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1218 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1219 += p->p_acct[LMS_WAIT_CPU]; 1220 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1221 1222 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1223 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1224 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1225 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1226 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1227 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1228 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1229 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1230 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1231 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1232 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1233 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1234 1235 } 1236 1237 q = p->p_nextofkin; 1238 if (q && q->p_orphan == p) 1239 q->p_orphan = p->p_nextorph; 1240 else if (q) { 1241 for (q = q->p_orphan; q; q = q->p_nextorph) 1242 if (q->p_nextorph == p) 1243 break; 1244 ASSERT(q && q->p_nextorph == p); 1245 q->p_nextorph = p->p_nextorph; 1246 } 1247 1248 proc_detach(p); 1249 pid_exit(p); /* frees pid and proc structure */ 1250 } 1251 1252 /* 1253 * Delete process "child" from the newstate list of process "parent" 1254 */ 1255 void 1256 delete_ns(proc_t *parent, proc_t *child) 1257 { 1258 proc_t **ns; 1259 1260 ASSERT(MUTEX_HELD(&pidlock)); 1261 ASSERT(child->p_parent == parent); 1262 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1263 if (*ns == child) { 1264 1265 ASSERT((*ns)->p_parent == parent); 1266 1267 *ns = child->p_sibling_ns; 1268 child->p_sibling_ns = NULL; 1269 return; 1270 } 1271 } 1272 } 1273 1274 /* 1275 * Add process "child" to the new state list of process "parent" 1276 */ 1277 void 1278 add_ns(proc_t *parent, proc_t *child) 1279 { 1280 ASSERT(child->p_sibling_ns == NULL); 1281 child->p_sibling_ns = parent->p_child_ns; 1282 parent->p_child_ns = child; 1283 } 1284