1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/proc.h> 40 #include <sys/ucontext.h> 41 #include <sys/procfs.h> 42 #include <sys/vnode.h> 43 #include <sys/acct.h> 44 #include <sys/var.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/wait.h> 48 #include <sys/siginfo.h> 49 #include <sys/procset.h> 50 #include <sys/class.h> 51 #include <sys/file.h> 52 #include <sys/session.h> 53 #include <sys/kmem.h> 54 #include <sys/vtrace.h> 55 #include <sys/prsystm.h> 56 #include <sys/ipc.h> 57 #include <sys/sem_impl.h> 58 #include <c2/audit.h> 59 #include <sys/aio_impl.h> 60 #include <vm/as.h> 61 #include <sys/poll.h> 62 #include <sys/door.h> 63 #include <sys/lwpchan_impl.h> 64 #include <sys/utrap.h> 65 #include <sys/task.h> 66 #include <sys/exacct.h> 67 #include <sys/cyclic.h> 68 #include <sys/schedctl.h> 69 #include <sys/rctl.h> 70 #include <sys/contract_impl.h> 71 #include <sys/contract/process_impl.h> 72 #include <sys/list.h> 73 #include <sys/dtrace.h> 74 #include <sys/pool.h> 75 #include <sys/sdt.h> 76 #include <sys/corectl.h> 77 78 #if defined(__x86) 79 extern void ldt_free(proc_t *pp); 80 #endif 81 82 /* 83 * convert code/data pair into old style wait status 84 */ 85 int 86 wstat(int code, int data) 87 { 88 int stat = (data & 0377); 89 90 switch (code) { 91 case CLD_EXITED: 92 stat <<= 8; 93 break; 94 case CLD_DUMPED: 95 stat |= WCOREFLG; 96 break; 97 case CLD_KILLED: 98 break; 99 case CLD_TRAPPED: 100 case CLD_STOPPED: 101 stat <<= 8; 102 stat |= WSTOPFLG; 103 break; 104 case CLD_CONTINUED: 105 stat = WCONTFLG; 106 break; 107 default: 108 cmn_err(CE_PANIC, "wstat: bad code"); 109 /* NOTREACHED */ 110 } 111 return (stat); 112 } 113 114 static char * 115 exit_reason(char *buf, size_t bufsz, int what, int why) 116 { 117 switch (why) { 118 case CLD_EXITED: 119 (void) snprintf(buf, bufsz, "exited with status %d", what); 120 break; 121 case CLD_KILLED: 122 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 123 break; 124 case CLD_DUMPED: 125 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 126 break; 127 default: 128 (void) snprintf(buf, bufsz, "encountered unknown error " 129 "(%d, %d)", why, what); 130 break; 131 } 132 133 return (buf); 134 } 135 136 /* 137 * exit system call: pass back caller's arg. 138 */ 139 void 140 rexit(int rval) 141 { 142 exit(CLD_EXITED, rval); 143 } 144 145 /* 146 * Called by proc_exit() when a zone's init exits, presumably because 147 * it failed. As long as the given zone is still in the "running" 148 * state, we will re-exec() init, but first we need to reset things 149 * which are usually inherited across exec() but will break init's 150 * assumption that it is being exec()'d from a virgin process. Most 151 * importantly this includes closing all file descriptors (exec only 152 * closes those marked close-on-exec) and resetting signals (exec only 153 * resets handled signals, and we need to clear any signals which 154 * killed init). Anything else that exec(2) says would be inherited, 155 * but would affect the execution of init, needs to be reset. 156 */ 157 static int 158 restart_init(int what, int why) 159 { 160 kthread_t *t = curthread; 161 klwp_t *lwp = ttolwp(t); 162 proc_t *p = ttoproc(t); 163 user_t *up = PTOU(p); 164 165 vnode_t *oldcd, *oldrd; 166 sess_t *sp; 167 int i, err; 168 char reason_buf[64]; 169 const char *ipath; 170 171 /* 172 * Let zone admin (and global zone admin if this is for a non-global 173 * zone) know that init has failed and will be restarted. 174 */ 175 zcmn_err(p->p_zone->zone_id, CE_WARN, 176 "init(1M) %s: restarting automatically", 177 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 178 179 if (!INGLOBALZONE(p)) { 180 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 181 "restarting automatically", 182 p->p_zone->zone_name, p->p_pid, reason_buf); 183 } 184 185 /* 186 * Remove any fpollinfo_t's for this (last) thread from our file 187 * descriptors so closeall() can ASSERT() that they're all gone. 188 * Then close all open file descriptors in the process. 189 */ 190 pollcleanup(); 191 closeall(P_FINFO(p)); 192 193 /* 194 * Grab p_lock and begin clearing miscellaneous global process 195 * state that needs to be reset before we exec the new init(1M). 196 */ 197 198 mutex_enter(&p->p_lock); 199 prbarrier(p); 200 201 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 202 up->u_cmask = CMASK; 203 204 sigemptyset(&t->t_hold); 205 sigemptyset(&t->t_sig); 206 sigemptyset(&t->t_extsig); 207 208 sigemptyset(&p->p_sig); 209 sigemptyset(&p->p_extsig); 210 211 sigdelq(p, t, 0); 212 sigdelq(p, NULL, 0); 213 214 if (p->p_killsqp) { 215 siginfofree(p->p_killsqp); 216 p->p_killsqp = NULL; 217 } 218 219 /* 220 * Reset any signals that are ignored back to the default disposition. 221 * Other u_signal members will be cleared when exec calls sigdefault(). 222 */ 223 for (i = 1; i < NSIG; i++) { 224 if (up->u_signal[i - 1] == SIG_IGN) { 225 up->u_signal[i - 1] = SIG_DFL; 226 sigemptyset(&up->u_sigmask[i - 1]); 227 } 228 } 229 230 /* 231 * Clear the current signal, any signal info associated with it, and 232 * any signal information from contracts and/or contract templates. 233 */ 234 lwp->lwp_cursig = 0; 235 lwp->lwp_extsig = 0; 236 if (lwp->lwp_curinfo != NULL) { 237 siginfofree(lwp->lwp_curinfo); 238 lwp->lwp_curinfo = NULL; 239 } 240 lwp_ctmpl_clear(lwp); 241 242 /* 243 * Reset both the process root directory and the current working 244 * directory to the root of the zone just as we do during boot. 245 */ 246 VN_HOLD(p->p_zone->zone_rootvp); 247 oldrd = up->u_rdir; 248 up->u_rdir = p->p_zone->zone_rootvp; 249 250 VN_HOLD(p->p_zone->zone_rootvp); 251 oldcd = up->u_cdir; 252 up->u_cdir = p->p_zone->zone_rootvp; 253 254 if (up->u_cwd != NULL) { 255 refstr_rele(up->u_cwd); 256 up->u_cwd = NULL; 257 } 258 259 mutex_exit(&p->p_lock); 260 261 if (oldrd != NULL) 262 VN_RELE(oldrd); 263 if (oldcd != NULL) 264 VN_RELE(oldcd); 265 266 /* 267 * Free the controlling tty. 268 */ 269 mutex_enter(&pidlock); 270 sp = p->p_sessp; 271 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 272 mutex_exit(&pidlock); 273 freectty(sp); 274 } else { 275 mutex_exit(&pidlock); 276 } 277 278 /* 279 * Now exec() the new init(1M) on top of the current process. If we 280 * succeed, the caller will treat this like a successful system call. 281 * If we fail, we issue messages and the caller will proceed with exit. 282 */ 283 ipath = INGLOBALZONE(p) ? initname : zone_initname; 284 err = exec_init(ipath, 0, NULL); 285 286 if (err == 0) 287 return (0); 288 289 zcmn_err(p->p_zone->zone_id, CE_WARN, 290 "failed to restart init(1M) (err=%d): system reboot required", err); 291 292 if (!INGLOBALZONE(p)) { 293 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 294 "(pid %d, err=%d): zoneadm(1M) boot required", 295 p->p_zone->zone_name, p->p_pid, err); 296 } 297 298 return (-1); 299 } 300 301 /* 302 * Release resources. 303 * Enter zombie state. 304 * Wake up parent and init processes, 305 * and dispose of children. 306 */ 307 void 308 exit(int why, int what) 309 { 310 /* 311 * If proc_exit() fails, then some other lwp in the process 312 * got there first. We just have to call lwp_exit() to allow 313 * the other lwp to finish exiting the process. Otherwise we're 314 * restarting init, and should return. 315 */ 316 if (proc_exit(why, what) != 0) { 317 mutex_enter(&curproc->p_lock); 318 ASSERT(curproc->p_flag & SEXITLWPS); 319 lwp_exit(); 320 /* NOTREACHED */ 321 } 322 } 323 324 /* 325 * Set the SEXITING flag on the process, after making sure /proc does 326 * not have it locked. This is done in more places than proc_exit(), 327 * so it is a separate function. 328 */ 329 void 330 proc_is_exiting(proc_t *p) 331 { 332 mutex_enter(&p->p_lock); 333 prbarrier(p); 334 p->p_flag |= SEXITING; 335 mutex_exit(&p->p_lock); 336 } 337 338 /* 339 * Return value: 340 * 1 - exitlwps() failed, call (or continue) lwp_exit() 341 * 0 - restarting init. Return through system call path 342 */ 343 int 344 proc_exit(int why, int what) 345 { 346 kthread_t *t = curthread; 347 klwp_t *lwp = ttolwp(t); 348 proc_t *p = ttoproc(t); 349 zone_t *z = p->p_zone; 350 timeout_id_t tmp_id; 351 int rv; 352 proc_t *q; 353 sess_t *sp; 354 task_t *tk; 355 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 356 sigqueue_t *sqp; 357 lwpdir_t *lwpdir; 358 uint_t lwpdir_sz; 359 lwpdir_t **tidhash; 360 uint_t tidhash_sz; 361 refstr_t *cwd; 362 hrtime_t hrutime, hrstime; 363 364 /* 365 * Stop and discard the process's lwps except for the current one, 366 * unless some other lwp beat us to it. If exitlwps() fails then 367 * return and the calling lwp will call (or continue in) lwp_exit(). 368 */ 369 proc_is_exiting(p); 370 if (exitlwps(0) != 0) 371 return (1); 372 373 DTRACE_PROC(lwp__exit); 374 DTRACE_PROC1(exit, int, why); 375 376 /* 377 * Don't let init exit unless zone_icode() failed its exec, or 378 * we are shutting down the zone or the machine. 379 * 380 * Since we are single threaded, we don't need to lock the 381 * following accesses to zone_proc_initpid. 382 */ 383 if (p->p_pid == z->zone_proc_initpid) { 384 if (z->zone_boot_err == 0 && 385 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 386 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 387 restart_init(what, why) == 0) 388 return (0); 389 /* 390 * Since we didn't or couldn't restart init, we clear 391 * the zone's init state and proceed with exit 392 * processing. 393 */ 394 z->zone_proc_initpid = -1; 395 } 396 397 /* 398 * Allocate a sigqueue now, before we grab locks. 399 * It will be given to sigcld(), below. 400 */ 401 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 402 403 /* 404 * revoke any doors created by the process. 405 */ 406 if (p->p_door_list) 407 door_exit(); 408 409 /* 410 * Release schedctl data structures. 411 */ 412 if (p->p_pagep) 413 schedctl_proc_cleanup(); 414 415 /* 416 * make sure all pending kaio has completed. 417 */ 418 if (p->p_aio) 419 aio_cleanup_exit(); 420 421 /* 422 * discard the lwpchan cache. 423 */ 424 if (p->p_lcp != NULL) 425 lwpchan_destroy_cache(0); 426 427 /* 428 * Clean up any DTrace helper actions or probes for the process. 429 */ 430 if (p->p_dtrace_helpers != NULL) { 431 ASSERT(dtrace_helpers_cleanup != NULL); 432 (*dtrace_helpers_cleanup)(); 433 } 434 435 /* untimeout the realtime timers */ 436 if (p->p_itimer != NULL) 437 timer_exit(); 438 439 if ((tmp_id = p->p_alarmid) != 0) { 440 p->p_alarmid = 0; 441 (void) untimeout(tmp_id); 442 } 443 444 /* 445 * Remove any fpollinfo_t's for this (last) thread from our file 446 * descriptors so closeall() can ASSERT() that they're all gone. 447 */ 448 pollcleanup(); 449 450 if (p->p_rprof_cyclic != CYCLIC_NONE) { 451 mutex_enter(&cpu_lock); 452 cyclic_remove(p->p_rprof_cyclic); 453 mutex_exit(&cpu_lock); 454 } 455 456 mutex_enter(&p->p_lock); 457 458 /* 459 * Clean up any DTrace probes associated with this process. 460 */ 461 if (p->p_dtrace_probes) { 462 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 463 dtrace_fasttrap_exit_ptr(p); 464 } 465 466 while ((tmp_id = p->p_itimerid) != 0) { 467 p->p_itimerid = 0; 468 mutex_exit(&p->p_lock); 469 (void) untimeout(tmp_id); 470 mutex_enter(&p->p_lock); 471 } 472 473 lwp_cleanup(); 474 475 /* 476 * We are about to exit; prevent our resource associations from 477 * being changed. 478 */ 479 pool_barrier_enter(); 480 481 /* 482 * Block the process against /proc now that we have really 483 * acquired p->p_lock (to manipulate p_tlist at least). 484 */ 485 prbarrier(p); 486 487 #ifdef SUN_SRC_COMPAT 488 if (code == CLD_KILLED) 489 u.u_acflag |= AXSIG; 490 #endif 491 sigfillset(&p->p_ignore); 492 sigemptyset(&p->p_siginfo); 493 sigemptyset(&p->p_sig); 494 sigemptyset(&p->p_extsig); 495 sigemptyset(&t->t_sig); 496 sigemptyset(&t->t_extsig); 497 sigemptyset(&p->p_sigmask); 498 sigdelq(p, t, 0); 499 lwp->lwp_cursig = 0; 500 lwp->lwp_extsig = 0; 501 p->p_flag &= ~(SKILLED | SEXTKILLED); 502 if (lwp->lwp_curinfo) { 503 siginfofree(lwp->lwp_curinfo); 504 lwp->lwp_curinfo = NULL; 505 } 506 507 t->t_proc_flag |= TP_LWPEXIT; 508 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 509 prlwpexit(t); /* notify /proc */ 510 lwp_hash_out(p, t->t_tid); 511 prexit(p); 512 513 p->p_lwpcnt = 0; 514 p->p_tlist = NULL; 515 sigqfree(p); 516 term_mstate(t); 517 p->p_mterm = gethrtime(); 518 519 exec_vp = p->p_exec; 520 execdir_vp = p->p_execdir; 521 p->p_exec = NULLVP; 522 p->p_execdir = NULLVP; 523 mutex_exit(&p->p_lock); 524 if (exec_vp) 525 VN_RELE(exec_vp); 526 if (execdir_vp) 527 VN_RELE(execdir_vp); 528 529 pr_free_watched_pages(p); 530 531 closeall(P_FINFO(p)); 532 533 mutex_enter(&pidlock); 534 sp = p->p_sessp; 535 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 536 mutex_exit(&pidlock); 537 freectty(sp); 538 } else 539 mutex_exit(&pidlock); 540 541 #if defined(__x86) 542 /* 543 * If the process was using a private LDT then free it. 544 */ 545 if (p->p_ldt) 546 ldt_free(p); 547 #endif 548 549 #if defined(__sparc) 550 if (p->p_utraps != NULL) 551 utrap_free(p); 552 #endif 553 if (p->p_semacct) /* IPC semaphore exit */ 554 semexit(p); 555 rv = wstat(why, what); 556 557 acct(rv & 0xff); 558 exacct_commit_proc(p, rv); 559 560 /* 561 * Release any resources associated with C2 auditing 562 */ 563 #ifdef C2_AUDIT 564 if (audit_active) { 565 /* 566 * audit exit system call 567 */ 568 audit_exit(why, what); 569 } 570 #endif 571 572 /* 573 * Free address space. 574 */ 575 relvm(); 576 577 /* 578 * Release held contracts. 579 */ 580 contract_exit(p); 581 582 /* 583 * Depart our encapsulating process contract. 584 */ 585 if ((p->p_flag & SSYS) == 0) { 586 ASSERT(p->p_ct_process); 587 contract_process_exit(p->p_ct_process, p, rv); 588 } 589 590 /* 591 * Remove pool association, and block if requested by pool_do_bind. 592 */ 593 mutex_enter(&p->p_lock); 594 ASSERT(p->p_pool->pool_ref > 0); 595 atomic_add_32(&p->p_pool->pool_ref, -1); 596 p->p_pool = pool_default; 597 /* 598 * Now that our address space has been freed and all other threads 599 * in this process have exited, set the PEXITED pool flag. This 600 * tells the pools subsystems to ignore this process if it was 601 * requested to rebind this process to a new pool. 602 */ 603 p->p_poolflag |= PEXITED; 604 pool_barrier_exit(); 605 mutex_exit(&p->p_lock); 606 607 mutex_enter(&pidlock); 608 609 /* 610 * Delete this process from the newstate list of its parent. We 611 * will put it in the right place in the sigcld in the end. 612 */ 613 delete_ns(p->p_parent, p); 614 615 /* 616 * Reassign the orphans to the next of kin. 617 * Don't rearrange init's orphanage. 618 */ 619 if ((q = p->p_orphan) != NULL && p != proc_init) { 620 621 proc_t *nokp = p->p_nextofkin; 622 623 for (;;) { 624 q->p_nextofkin = nokp; 625 if (q->p_nextorph == NULL) 626 break; 627 q = q->p_nextorph; 628 } 629 q->p_nextorph = nokp->p_orphan; 630 nokp->p_orphan = p->p_orphan; 631 p->p_orphan = NULL; 632 } 633 634 /* 635 * Reassign the children to init. 636 * Don't try to assign init's children to init. 637 */ 638 if ((q = p->p_child) != NULL && p != proc_init) { 639 struct proc *np; 640 struct proc *initp = proc_init; 641 boolean_t setzonetop = B_FALSE; 642 643 if (!INGLOBALZONE(curproc)) 644 setzonetop = B_TRUE; 645 646 pgdetach(p); 647 648 do { 649 np = q->p_sibling; 650 /* 651 * Delete it from its current parent new state 652 * list and add it to init new state list 653 */ 654 delete_ns(q->p_parent, q); 655 656 q->p_ppid = 1; 657 if (setzonetop) { 658 mutex_enter(&q->p_lock); 659 q->p_flag |= SZONETOP; 660 mutex_exit(&q->p_lock); 661 } 662 q->p_parent = initp; 663 664 /* 665 * Since q will be the first child, 666 * it will not have a previous sibling. 667 */ 668 q->p_psibling = NULL; 669 if (initp->p_child) { 670 initp->p_child->p_psibling = q; 671 } 672 q->p_sibling = initp->p_child; 673 initp->p_child = q; 674 if (q->p_proc_flag & P_PR_PTRACE) { 675 mutex_enter(&q->p_lock); 676 sigtoproc(q, NULL, SIGKILL); 677 mutex_exit(&q->p_lock); 678 } 679 /* 680 * sigcld() will add the child to parents 681 * newstate list. 682 */ 683 if (q->p_stat == SZOMB) 684 sigcld(q, NULL); 685 } while ((q = np) != NULL); 686 687 p->p_child = NULL; 688 ASSERT(p->p_child_ns == NULL); 689 } 690 691 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 692 693 mutex_enter(&p->p_lock); 694 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 695 696 hrutime = mstate_aggr_state(p, LMS_USER); 697 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 698 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 699 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 700 701 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 702 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 703 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 704 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 705 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 706 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 707 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 708 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 709 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 710 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 711 712 p->p_ru.minflt += p->p_cru.minflt; 713 p->p_ru.majflt += p->p_cru.majflt; 714 p->p_ru.nswap += p->p_cru.nswap; 715 p->p_ru.inblock += p->p_cru.inblock; 716 p->p_ru.oublock += p->p_cru.oublock; 717 p->p_ru.msgsnd += p->p_cru.msgsnd; 718 p->p_ru.msgrcv += p->p_cru.msgrcv; 719 p->p_ru.nsignals += p->p_cru.nsignals; 720 p->p_ru.nvcsw += p->p_cru.nvcsw; 721 p->p_ru.nivcsw += p->p_cru.nivcsw; 722 p->p_ru.sysc += p->p_cru.sysc; 723 p->p_ru.ioch += p->p_cru.ioch; 724 725 p->p_stat = SZOMB; 726 p->p_proc_flag &= ~P_PR_PTRACE; 727 p->p_wdata = what; 728 p->p_wcode = (char)why; 729 730 cdir = PTOU(p)->u_cdir; 731 rdir = PTOU(p)->u_rdir; 732 cwd = PTOU(p)->u_cwd; 733 734 /* 735 * Release resource controls, as they are no longer enforceable. 736 */ 737 rctl_set_free(p->p_rctls); 738 739 /* 740 * Give up task and project memberships. Decrement tk_nlwps counter 741 * for our task.max-lwps resource control. An extended accounting 742 * record, if that facility is active, is scheduled to be written. 743 * Zombie processes are false members of task0 for the remainder of 744 * their lifetime; no accounting information is recorded for them. 745 */ 746 tk = p->p_task; 747 748 mutex_enter(&p->p_zone->zone_nlwps_lock); 749 tk->tk_nlwps--; 750 tk->tk_proj->kpj_nlwps--; 751 p->p_zone->zone_nlwps--; 752 mutex_exit(&p->p_zone->zone_nlwps_lock); 753 task_detach(p); 754 p->p_task = task0p; 755 756 /* 757 * Clear the lwp directory and the lwpid hash table 758 * now that /proc can't bother us any more. 759 * We free the memory below, after dropping p->p_lock. 760 */ 761 lwpdir = p->p_lwpdir; 762 lwpdir_sz = p->p_lwpdir_sz; 763 tidhash = p->p_tidhash; 764 tidhash_sz = p->p_tidhash_sz; 765 p->p_lwpdir = NULL; 766 p->p_lwpfree = NULL; 767 p->p_lwpdir_sz = 0; 768 p->p_tidhash = NULL; 769 p->p_tidhash_sz = 0; 770 771 /* 772 * curthread's proc pointer is changed to point at p0 because 773 * curthread's original proc pointer can be freed as soon as 774 * the child sends a SIGCLD to its parent. 775 */ 776 t->t_procp = &p0; 777 778 mutex_exit(&p->p_lock); 779 sigcld(p, sqp); 780 mutex_exit(&pidlock); 781 782 task_rele(tk); 783 784 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 785 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 786 787 /* 788 * We don't release u_cdir and u_rdir until SZOMB is set. 789 * This protects us against dofusers(). 790 */ 791 VN_RELE(cdir); 792 if (rdir) 793 VN_RELE(rdir); 794 if (cwd) 795 refstr_rele(cwd); 796 797 lwp_pcb_exit(); 798 799 thread_exit(); 800 /* NOTREACHED */ 801 } 802 803 /* 804 * Format siginfo structure for wait system calls. 805 */ 806 void 807 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 808 { 809 ASSERT(MUTEX_HELD(&pidlock)); 810 811 bzero(ip, sizeof (k_siginfo_t)); 812 ip->si_signo = SIGCLD; 813 ip->si_code = pp->p_wcode; 814 ip->si_pid = pp->p_pid; 815 ip->si_ctid = PRCTID(pp); 816 ip->si_zoneid = pp->p_zone->zone_id; 817 ip->si_status = pp->p_wdata; 818 ip->si_stime = pp->p_stime; 819 ip->si_utime = pp->p_utime; 820 821 if (waitflag) { 822 pp->p_wcode = 0; 823 pp->p_wdata = 0; 824 pp->p_pidflag &= ~CLDPEND; 825 } 826 } 827 828 /* 829 * Wait system call. 830 * Search for a terminated (zombie) child, 831 * finally lay it to rest, and collect its status. 832 * Look also for stopped children, 833 * and pass back status from them. 834 */ 835 int 836 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 837 { 838 int found; 839 proc_t *cp, *pp; 840 proc_t **nsp; 841 int proc_gone; 842 int waitflag = !(options & WNOWAIT); 843 844 /* 845 * Obsolete flag, defined here only for binary compatibility 846 * with old statically linked executables. Delete this when 847 * we no longer care about these old and broken applications. 848 */ 849 #define _WNOCHLD 0400 850 options &= ~_WNOCHLD; 851 852 if (options == 0 || (options & ~WOPTMASK)) 853 return (EINVAL); 854 855 switch (idtype) { 856 case P_PID: 857 case P_PGID: 858 if (id < 0 || id >= maxpid) 859 return (EINVAL); 860 /* FALLTHROUGH */ 861 case P_ALL: 862 break; 863 default: 864 return (EINVAL); 865 } 866 867 pp = ttoproc(curthread); 868 869 /* 870 * lock parent mutex so that sibling chain can be searched. 871 */ 872 mutex_enter(&pidlock); 873 874 /* 875 * if we are only looking for exited processes and child_ns list 876 * is empty no reason to look at all children. 877 */ 878 if (idtype == P_ALL && 879 (options & (WOPTMASK & ~WNOWAIT)) == (WNOHANG | WEXITED) && 880 pp->p_child_ns == NULL) { 881 882 if (pp->p_child) { 883 mutex_exit(&pidlock); 884 bzero(ip, sizeof (k_siginfo_t)); 885 return (0); 886 } 887 mutex_exit(&pidlock); 888 return (ECHILD); 889 } 890 891 while ((cp = pp->p_child) != NULL) { 892 893 proc_gone = 0; 894 895 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 896 if (idtype == P_PID && id != (*nsp)->p_pid) { 897 continue; 898 } 899 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 900 continue; 901 } 902 903 switch ((*nsp)->p_wcode) { 904 905 case CLD_TRAPPED: 906 case CLD_STOPPED: 907 case CLD_CONTINUED: 908 cmn_err(CE_PANIC, 909 "waitid: wrong state %d on the p_newstate" 910 " list", (*nsp)->p_wcode); 911 break; 912 913 case CLD_EXITED: 914 case CLD_DUMPED: 915 case CLD_KILLED: 916 if (!(options & WEXITED)) { 917 /* 918 * Count how many are already gone 919 * for good. 920 */ 921 proc_gone++; 922 break; 923 } 924 if (!waitflag) { 925 winfo((*nsp), ip, 0); 926 } else { 927 proc_t *xp = *nsp; 928 winfo(xp, ip, 1); 929 freeproc(xp); 930 } 931 mutex_exit(&pidlock); 932 if (waitflag) { /* accept SIGCLD */ 933 sigcld_delete(ip); 934 sigcld_repost(); 935 } 936 return (0); 937 } 938 939 if (idtype == P_PID) 940 break; 941 } 942 943 /* 944 * Wow! None of the threads on the p_sibling_ns list were 945 * interesting threads. Check all the kids! 946 */ 947 found = 0; 948 cp = pp->p_child; 949 do { 950 if (idtype == P_PID && id != cp->p_pid) { 951 continue; 952 } 953 if (idtype == P_PGID && id != cp->p_pgrp) { 954 continue; 955 } 956 957 found++; 958 959 switch (cp->p_wcode) { 960 case CLD_TRAPPED: 961 if (!(options & WTRAPPED)) 962 break; 963 winfo(cp, ip, waitflag); 964 mutex_exit(&pidlock); 965 if (waitflag) { /* accept SIGCLD */ 966 sigcld_delete(ip); 967 sigcld_repost(); 968 } 969 return (0); 970 971 case CLD_STOPPED: 972 if (!(options & WSTOPPED)) 973 break; 974 /* Is it still stopped? */ 975 mutex_enter(&cp->p_lock); 976 if (!jobstopped(cp)) { 977 mutex_exit(&cp->p_lock); 978 break; 979 } 980 mutex_exit(&cp->p_lock); 981 winfo(cp, ip, waitflag); 982 mutex_exit(&pidlock); 983 if (waitflag) { /* accept SIGCLD */ 984 sigcld_delete(ip); 985 sigcld_repost(); 986 } 987 return (0); 988 989 case CLD_CONTINUED: 990 if (!(options & WCONTINUED)) 991 break; 992 winfo(cp, ip, waitflag); 993 mutex_exit(&pidlock); 994 if (waitflag) { /* accept SIGCLD */ 995 sigcld_delete(ip); 996 sigcld_repost(); 997 } 998 return (0); 999 1000 case CLD_EXITED: 1001 case CLD_DUMPED: 1002 case CLD_KILLED: 1003 /* 1004 * Don't complain if a process was found in 1005 * the first loop but we broke out of the loop 1006 * because of the arguments passed to us. 1007 */ 1008 if (proc_gone == 0) { 1009 cmn_err(CE_PANIC, 1010 "waitid: wrong state on the" 1011 " p_child list"); 1012 } else { 1013 break; 1014 } 1015 } 1016 1017 if (idtype == P_PID) 1018 break; 1019 } while ((cp = cp->p_sibling) != NULL); 1020 1021 /* 1022 * If we found no interesting processes at all, 1023 * break out and return ECHILD. 1024 */ 1025 if (found + proc_gone == 0) 1026 break; 1027 1028 if (options & WNOHANG) { 1029 bzero(ip, sizeof (k_siginfo_t)); 1030 /* 1031 * We should set ip->si_signo = SIGCLD, 1032 * but there is an SVVS test that expects 1033 * ip->si_signo to be zero in this case. 1034 */ 1035 mutex_exit(&pidlock); 1036 return (0); 1037 } 1038 1039 /* 1040 * If we found no processes of interest that could 1041 * change state while we wait, we don't wait at all. 1042 * Get out with ECHILD according to SVID. 1043 */ 1044 if (found == proc_gone) 1045 break; 1046 1047 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1048 mutex_exit(&pidlock); 1049 return (EINTR); 1050 } 1051 } 1052 mutex_exit(&pidlock); 1053 return (ECHILD); 1054 } 1055 1056 /* 1057 * For implementations that don't require binary compatibility, 1058 * the wait system call may be made into a library call to the 1059 * waitid system call. 1060 */ 1061 int64_t 1062 wait(void) 1063 { 1064 int error; 1065 k_siginfo_t info; 1066 rval_t r; 1067 1068 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1069 return (set_errno(error)); 1070 r.r_val1 = info.si_pid; 1071 r.r_val2 = wstat(info.si_code, info.si_status); 1072 return (r.r_vals); 1073 } 1074 1075 int 1076 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1077 { 1078 int error; 1079 k_siginfo_t info; 1080 1081 if (error = waitid(idtype, id, &info, options)) 1082 return (set_errno(error)); 1083 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1084 return (set_errno(EFAULT)); 1085 return (0); 1086 } 1087 1088 #ifdef _SYSCALL32_IMPL 1089 1090 int 1091 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1092 { 1093 int error; 1094 k_siginfo_t info; 1095 siginfo32_t info32; 1096 1097 if (error = waitid(idtype, id, &info, options)) 1098 return (set_errno(error)); 1099 siginfo_kto32(&info, &info32); 1100 if (copyout(&info32, infop, sizeof (info32))) 1101 return (set_errno(EFAULT)); 1102 return (0); 1103 } 1104 1105 #endif /* _SYSCALL32_IMPL */ 1106 1107 void 1108 proc_detach(proc_t *p) 1109 { 1110 proc_t *q; 1111 1112 ASSERT(MUTEX_HELD(&pidlock)); 1113 1114 q = p->p_parent; 1115 ASSERT(q != NULL); 1116 1117 /* 1118 * Take it off the newstate list of its parent 1119 */ 1120 delete_ns(q, p); 1121 1122 if (q->p_child == p) { 1123 q->p_child = p->p_sibling; 1124 /* 1125 * If the parent has no children, it better not 1126 * have any with new states either! 1127 */ 1128 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1129 } 1130 1131 if (p->p_sibling) { 1132 p->p_sibling->p_psibling = p->p_psibling; 1133 } 1134 1135 if (p->p_psibling) { 1136 p->p_psibling->p_sibling = p->p_sibling; 1137 } 1138 } 1139 1140 /* 1141 * Remove zombie children from the process table. 1142 */ 1143 void 1144 freeproc(proc_t *p) 1145 { 1146 proc_t *q; 1147 1148 ASSERT(p->p_stat == SZOMB); 1149 ASSERT(p->p_tlist == NULL); 1150 ASSERT(MUTEX_HELD(&pidlock)); 1151 1152 sigdelq(p, NULL, 0); 1153 if (p->p_killsqp) { 1154 siginfofree(p->p_killsqp); 1155 p->p_killsqp = NULL; 1156 } 1157 1158 prfree(p); /* inform /proc */ 1159 1160 /* 1161 * Don't free the init processes. 1162 * Other dying processes will access it. 1163 */ 1164 if (p == proc_init) 1165 return; 1166 1167 1168 /* 1169 * We wait until now to free the cred structure because a 1170 * zombie process's credentials may be examined by /proc. 1171 * No cred locking needed because there are no threads at this point. 1172 */ 1173 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1174 crfree(p->p_cred); 1175 if (p->p_corefile != NULL) { 1176 corectl_path_rele(p->p_corefile); 1177 p->p_corefile = NULL; 1178 } 1179 if (p->p_content != NULL) { 1180 corectl_content_rele(p->p_content); 1181 p->p_content = NULL; 1182 } 1183 1184 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1185 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1186 /* 1187 * This should still do the right thing since p_utime/stime 1188 * get set to the correct value on process exit, so it 1189 * should get properly updated 1190 */ 1191 p->p_nextofkin->p_cutime += p->p_utime; 1192 p->p_nextofkin->p_cstime += p->p_stime; 1193 1194 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1195 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1196 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1197 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1198 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1199 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1200 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1201 += p->p_acct[LMS_USER_LOCK]; 1202 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1203 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1204 += p->p_acct[LMS_WAIT_CPU]; 1205 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1206 1207 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1208 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1209 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1210 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1211 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1212 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1213 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1214 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1215 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1216 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1217 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1218 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1219 1220 } 1221 1222 q = p->p_nextofkin; 1223 if (q && q->p_orphan == p) 1224 q->p_orphan = p->p_nextorph; 1225 else if (q) { 1226 for (q = q->p_orphan; q; q = q->p_nextorph) 1227 if (q->p_nextorph == p) 1228 break; 1229 ASSERT(q && q->p_nextorph == p); 1230 q->p_nextorph = p->p_nextorph; 1231 } 1232 1233 proc_detach(p); 1234 pid_exit(p); /* frees pid and proc structure */ 1235 } 1236 1237 /* 1238 * Delete process "child" from the newstate list of process "parent" 1239 */ 1240 void 1241 delete_ns(proc_t *parent, proc_t *child) 1242 { 1243 proc_t **ns; 1244 1245 ASSERT(MUTEX_HELD(&pidlock)); 1246 ASSERT(child->p_parent == parent); 1247 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1248 if (*ns == child) { 1249 1250 ASSERT((*ns)->p_parent == parent); 1251 1252 *ns = child->p_sibling_ns; 1253 child->p_sibling_ns = NULL; 1254 return; 1255 } 1256 } 1257 } 1258 1259 /* 1260 * Add process "child" to the new state list of process "parent" 1261 */ 1262 void 1263 add_ns(proc_t *parent, proc_t *child) 1264 { 1265 ASSERT(child->p_sibling_ns == NULL); 1266 child->p_sibling_ns = parent->p_child_ns; 1267 parent->p_child_ns = child; 1268 } 1269