1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/proc.h> 40 #include <sys/ucontext.h> 41 #include <sys/procfs.h> 42 #include <sys/vnode.h> 43 #include <sys/acct.h> 44 #include <sys/var.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/wait.h> 48 #include <sys/siginfo.h> 49 #include <sys/procset.h> 50 #include <sys/class.h> 51 #include <sys/file.h> 52 #include <sys/session.h> 53 #include <sys/kmem.h> 54 #include <sys/vtrace.h> 55 #include <sys/prsystm.h> 56 #include <sys/ipc.h> 57 #include <sys/sem_impl.h> 58 #include <c2/audit.h> 59 #include <sys/aio_impl.h> 60 #include <vm/as.h> 61 #include <sys/poll.h> 62 #include <sys/door.h> 63 #include <sys/lwpchan_impl.h> 64 #include <sys/utrap.h> 65 #include <sys/task.h> 66 #include <sys/exacct.h> 67 #include <sys/cyclic.h> 68 #include <sys/schedctl.h> 69 #include <sys/rctl.h> 70 #include <sys/contract_impl.h> 71 #include <sys/contract/process_impl.h> 72 #include <sys/list.h> 73 #include <sys/dtrace.h> 74 #include <sys/pool.h> 75 #include <sys/sdt.h> 76 #include <sys/corectl.h> 77 78 #if defined(__x86) 79 extern void ldt_free(proc_t *pp); 80 #endif 81 82 /* 83 * convert code/data pair into old style wait status 84 */ 85 int 86 wstat(int code, int data) 87 { 88 int stat = (data & 0377); 89 90 switch (code) { 91 case CLD_EXITED: 92 stat <<= 8; 93 break; 94 case CLD_DUMPED: 95 stat |= WCOREFLG; 96 break; 97 case CLD_KILLED: 98 break; 99 case CLD_TRAPPED: 100 case CLD_STOPPED: 101 stat <<= 8; 102 stat |= WSTOPFLG; 103 break; 104 case CLD_CONTINUED: 105 stat = WCONTFLG; 106 break; 107 default: 108 cmn_err(CE_PANIC, "wstat: bad code"); 109 /* NOTREACHED */ 110 } 111 return (stat); 112 } 113 114 static char * 115 exit_reason(char *buf, size_t bufsz, int what, int why) 116 { 117 switch (why) { 118 case CLD_EXITED: 119 (void) snprintf(buf, bufsz, "exited with status %d", what); 120 break; 121 case CLD_KILLED: 122 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 123 break; 124 case CLD_DUMPED: 125 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 126 break; 127 default: 128 (void) snprintf(buf, bufsz, "encountered unknown error " 129 "(%d, %d)", why, what); 130 break; 131 } 132 133 return (buf); 134 } 135 136 /* 137 * exit system call: pass back caller's arg. 138 */ 139 void 140 rexit(int rval) 141 { 142 exit(CLD_EXITED, rval); 143 } 144 145 /* 146 * Called by proc_exit() when a zone's init exits, presumably because 147 * it failed. As long as the given zone is still in the "running" 148 * state, we will re-exec() init, but first we need to reset things 149 * which are usually inherited across exec() but will break init's 150 * assumption that it is being exec()'d from a virgin process. Most 151 * importantly this includes closing all file descriptors (exec only 152 * closes those marked close-on-exec) and resetting signals (exec only 153 * resets handled signals, and we need to clear any signals which 154 * killed init). Anything else that exec(2) says would be inherited, 155 * but would affect the execution of init, needs to be reset. 156 */ 157 static int 158 restart_init(int what, int why) 159 { 160 kthread_t *t = curthread; 161 klwp_t *lwp = ttolwp(t); 162 proc_t *p = ttoproc(t); 163 user_t *up = PTOU(p); 164 165 vnode_t *oldcd, *oldrd; 166 sess_t *sp; 167 int i, err; 168 char reason_buf[64]; 169 const char *ipath; 170 171 /* 172 * Let zone admin (and global zone admin if this is for a non-global 173 * zone) know that init has failed and will be restarted. 174 */ 175 zcmn_err(p->p_zone->zone_id, CE_WARN, 176 "init(1M) %s: restarting automatically", 177 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 178 179 if (!INGLOBALZONE(p)) { 180 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 181 "restarting automatically", 182 p->p_zone->zone_name, p->p_pid, reason_buf); 183 } 184 185 /* 186 * Remove any fpollinfo_t's for this (last) thread from our file 187 * descriptors so closeall() can ASSERT() that they're all gone. 188 * Then close all open file descriptors in the process. 189 */ 190 pollcleanup(); 191 closeall(P_FINFO(p)); 192 193 /* 194 * Grab p_lock and begin clearing miscellaneous global process 195 * state that needs to be reset before we exec the new init(1M). 196 */ 197 198 mutex_enter(&p->p_lock); 199 prbarrier(p); 200 201 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 202 up->u_cmask = CMASK; 203 204 sigemptyset(&t->t_hold); 205 sigemptyset(&t->t_sig); 206 sigemptyset(&t->t_extsig); 207 208 sigemptyset(&p->p_sig); 209 sigemptyset(&p->p_extsig); 210 211 sigdelq(p, t, 0); 212 sigdelq(p, NULL, 0); 213 214 if (p->p_killsqp) { 215 siginfofree(p->p_killsqp); 216 p->p_killsqp = NULL; 217 } 218 219 /* 220 * Reset any signals that are ignored back to the default disposition. 221 * Other u_signal members will be cleared when exec calls sigdefault(). 222 */ 223 for (i = 1; i < NSIG; i++) { 224 if (up->u_signal[i - 1] == SIG_IGN) { 225 up->u_signal[i - 1] = SIG_DFL; 226 sigemptyset(&up->u_sigmask[i - 1]); 227 } 228 } 229 230 /* 231 * Clear the current signal, any signal info associated with it, and 232 * any signal information from contracts and/or contract templates. 233 */ 234 lwp->lwp_cursig = 0; 235 lwp->lwp_extsig = 0; 236 if (lwp->lwp_curinfo != NULL) { 237 siginfofree(lwp->lwp_curinfo); 238 lwp->lwp_curinfo = NULL; 239 } 240 lwp_ctmpl_clear(lwp); 241 242 /* 243 * Reset both the process root directory and the current working 244 * directory to the root of the zone just as we do during boot. 245 */ 246 VN_HOLD(p->p_zone->zone_rootvp); 247 oldrd = up->u_rdir; 248 up->u_rdir = p->p_zone->zone_rootvp; 249 250 VN_HOLD(p->p_zone->zone_rootvp); 251 oldcd = up->u_cdir; 252 up->u_cdir = p->p_zone->zone_rootvp; 253 254 if (up->u_cwd != NULL) { 255 refstr_rele(up->u_cwd); 256 up->u_cwd = NULL; 257 } 258 259 mutex_exit(&p->p_lock); 260 261 if (oldrd != NULL) 262 VN_RELE(oldrd); 263 if (oldcd != NULL) 264 VN_RELE(oldcd); 265 266 /* 267 * Free the controlling tty. 268 */ 269 mutex_enter(&pidlock); 270 sp = p->p_sessp; 271 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 272 mutex_exit(&pidlock); 273 freectty(sp); 274 } else { 275 mutex_exit(&pidlock); 276 } 277 278 /* 279 * Now exec() the new init(1M) on top of the current process. If we 280 * succeed, the caller will treat this like a successful system call. 281 * If we fail, we issue messages and the caller will proceed with exit. 282 */ 283 ipath = INGLOBALZONE(p) ? initname : zone_initname; 284 err = exec_init(ipath, 0, NULL); 285 286 if (err == 0) 287 return (0); 288 289 zcmn_err(p->p_zone->zone_id, CE_WARN, 290 "failed to restart init(1M) (err=%d): system reboot required", err); 291 292 if (!INGLOBALZONE(p)) { 293 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 294 "(pid %d, err=%d): zoneadm(1M) boot required", 295 p->p_zone->zone_name, p->p_pid, err); 296 } 297 298 return (-1); 299 } 300 301 /* 302 * Release resources. 303 * Enter zombie state. 304 * Wake up parent and init processes, 305 * and dispose of children. 306 */ 307 void 308 exit(int why, int what) 309 { 310 /* 311 * If proc_exit() fails, then some other lwp in the process 312 * got there first. We just have to call lwp_exit() to allow 313 * the other lwp to finish exiting the process. Otherwise we're 314 * restarting init, and should return. 315 */ 316 if (proc_exit(why, what) != 0) { 317 mutex_enter(&curproc->p_lock); 318 ASSERT(curproc->p_flag & SEXITLWPS); 319 lwp_exit(); 320 /* NOTREACHED */ 321 } 322 } 323 324 /* 325 * Set the SEXITING flag on the process, after making sure /proc does 326 * not have it locked. This is done in more places than proc_exit(), 327 * so it is a separate function. 328 */ 329 void 330 proc_is_exiting(proc_t *p) 331 { 332 mutex_enter(&p->p_lock); 333 prbarrier(p); 334 p->p_flag |= SEXITING; 335 mutex_exit(&p->p_lock); 336 } 337 338 /* 339 * Return value: 340 * 1 - exitlwps() failed, call (or continue) lwp_exit() 341 * 0 - restarting init. Return through system call path 342 */ 343 int 344 proc_exit(int why, int what) 345 { 346 kthread_t *t = curthread; 347 klwp_t *lwp = ttolwp(t); 348 proc_t *p = ttoproc(t); 349 zone_t *z = p->p_zone; 350 timeout_id_t tmp_id; 351 int rv; 352 proc_t *q; 353 sess_t *sp; 354 task_t *tk; 355 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 356 sigqueue_t *sqp; 357 lwpdir_t *lwpdir; 358 uint_t lwpdir_sz; 359 lwpdir_t **tidhash; 360 uint_t tidhash_sz; 361 refstr_t *cwd; 362 hrtime_t hrutime, hrstime; 363 364 /* 365 * Stop and discard the process's lwps except for the current one, 366 * unless some other lwp beat us to it. If exitlwps() fails then 367 * return and the calling lwp will call (or continue in) lwp_exit(). 368 */ 369 proc_is_exiting(p); 370 if (exitlwps(0) != 0) 371 return (1); 372 373 DTRACE_PROC(lwp__exit); 374 DTRACE_PROC1(exit, int, why); 375 376 /* 377 * Don't let init exit unless zone_icode() failed its exec, or 378 * we are shutting down the zone or the machine. 379 * 380 * Since we are single threaded, we don't need to lock the 381 * following accesses to zone_proc_initpid. 382 */ 383 if (p->p_pid == z->zone_proc_initpid) { 384 if (z->zone_boot_err == 0 && 385 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 386 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 387 restart_init(what, why) == 0) 388 return (0); 389 /* 390 * Since we didn't or couldn't restart init, we clear 391 * the zone's init state and proceed with exit 392 * processing. 393 */ 394 z->zone_proc_initpid = -1; 395 } 396 397 /* 398 * Allocate a sigqueue now, before we grab locks. 399 * It will be given to sigcld(), below. 400 */ 401 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 402 403 /* 404 * revoke any doors created by the process. 405 */ 406 if (p->p_door_list) 407 door_exit(); 408 409 /* 410 * Release schedctl data structures. 411 */ 412 if (p->p_pagep) 413 schedctl_proc_cleanup(); 414 415 /* 416 * make sure all pending kaio has completed. 417 */ 418 if (p->p_aio) 419 aio_cleanup_exit(); 420 421 /* 422 * discard the lwpchan cache. 423 */ 424 if (p->p_lcp != NULL) 425 lwpchan_destroy_cache(0); 426 427 /* 428 * Clean up any DTrace helper actions or probes for the process. 429 */ 430 if (p->p_dtrace_helpers != NULL) { 431 ASSERT(dtrace_helpers_cleanup != NULL); 432 (*dtrace_helpers_cleanup)(); 433 } 434 435 /* untimeout the realtime timers */ 436 if (p->p_itimer != NULL) 437 timer_exit(); 438 439 if ((tmp_id = p->p_alarmid) != 0) { 440 p->p_alarmid = 0; 441 (void) untimeout(tmp_id); 442 } 443 444 /* 445 * Remove any fpollinfo_t's for this (last) thread from our file 446 * descriptors so closeall() can ASSERT() that they're all gone. 447 */ 448 pollcleanup(); 449 450 if (p->p_rprof_cyclic != CYCLIC_NONE) { 451 mutex_enter(&cpu_lock); 452 cyclic_remove(p->p_rprof_cyclic); 453 mutex_exit(&cpu_lock); 454 } 455 456 mutex_enter(&p->p_lock); 457 458 /* 459 * Clean up any DTrace probes associated with this process. 460 */ 461 if (p->p_dtrace_probes) { 462 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 463 dtrace_fasttrap_exit_ptr(p); 464 } 465 466 while ((tmp_id = p->p_itimerid) != 0) { 467 p->p_itimerid = 0; 468 mutex_exit(&p->p_lock); 469 (void) untimeout(tmp_id); 470 mutex_enter(&p->p_lock); 471 } 472 473 lwp_cleanup(); 474 475 /* 476 * We are about to exit; prevent our resource associations from 477 * being changed. 478 */ 479 pool_barrier_enter(); 480 481 /* 482 * Block the process against /proc now that we have really 483 * acquired p->p_lock (to manipulate p_tlist at least). 484 */ 485 prbarrier(p); 486 487 #ifdef SUN_SRC_COMPAT 488 if (code == CLD_KILLED) 489 u.u_acflag |= AXSIG; 490 #endif 491 sigfillset(&p->p_ignore); 492 sigemptyset(&p->p_siginfo); 493 sigemptyset(&p->p_sig); 494 sigemptyset(&p->p_extsig); 495 sigemptyset(&t->t_sig); 496 sigemptyset(&t->t_extsig); 497 sigemptyset(&p->p_sigmask); 498 sigdelq(p, t, 0); 499 lwp->lwp_cursig = 0; 500 lwp->lwp_extsig = 0; 501 p->p_flag &= ~(SKILLED | SEXTKILLED); 502 if (lwp->lwp_curinfo) { 503 siginfofree(lwp->lwp_curinfo); 504 lwp->lwp_curinfo = NULL; 505 } 506 507 t->t_proc_flag |= TP_LWPEXIT; 508 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 509 prlwpexit(t); /* notify /proc */ 510 lwp_hash_out(p, t->t_tid); 511 prexit(p); 512 513 p->p_lwpcnt = 0; 514 p->p_tlist = NULL; 515 sigqfree(p); 516 term_mstate(t); 517 p->p_mterm = gethrtime(); 518 519 exec_vp = p->p_exec; 520 execdir_vp = p->p_execdir; 521 p->p_exec = NULLVP; 522 p->p_execdir = NULLVP; 523 mutex_exit(&p->p_lock); 524 if (exec_vp) 525 VN_RELE(exec_vp); 526 if (execdir_vp) 527 VN_RELE(execdir_vp); 528 529 pr_free_watched_pages(p); 530 531 closeall(P_FINFO(p)); 532 533 mutex_enter(&pidlock); 534 sp = p->p_sessp; 535 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 536 mutex_exit(&pidlock); 537 freectty(sp); 538 } else 539 mutex_exit(&pidlock); 540 541 #if defined(__x86) 542 /* 543 * If the process was using a private LDT then free it. 544 */ 545 if (p->p_ldt) 546 ldt_free(p); 547 #endif 548 549 #if defined(__sparc) 550 if (p->p_utraps != NULL) 551 utrap_free(p); 552 #endif 553 if (p->p_semacct) /* IPC semaphore exit */ 554 semexit(p); 555 rv = wstat(why, what); 556 557 acct(rv & 0xff); 558 exacct_commit_proc(p, rv); 559 560 /* 561 * Release any resources associated with C2 auditing 562 */ 563 #ifdef C2_AUDIT 564 if (audit_active) { 565 /* 566 * audit exit system call 567 */ 568 audit_exit(why, what); 569 } 570 #endif 571 572 /* 573 * Free address space. 574 */ 575 relvm(); 576 577 /* 578 * Release held contracts. 579 */ 580 contract_exit(p); 581 582 /* 583 * Depart our encapsulating process contract. 584 */ 585 if ((p->p_flag & SSYS) == 0) { 586 ASSERT(p->p_ct_process); 587 contract_process_exit(p->p_ct_process, p, rv); 588 } 589 590 /* 591 * Remove pool association, and block if requested by pool_do_bind. 592 */ 593 mutex_enter(&p->p_lock); 594 ASSERT(p->p_pool->pool_ref > 0); 595 atomic_add_32(&p->p_pool->pool_ref, -1); 596 p->p_pool = pool_default; 597 /* 598 * Now that our address space has been freed and all other threads 599 * in this process have exited, set the PEXITED pool flag. This 600 * tells the pools subsystems to ignore this process if it was 601 * requested to rebind this process to a new pool. 602 */ 603 p->p_poolflag |= PEXITED; 604 pool_barrier_exit(); 605 mutex_exit(&p->p_lock); 606 607 mutex_enter(&pidlock); 608 609 /* 610 * Delete this process from the newstate list of its parent. We 611 * will put it in the right place in the sigcld in the end. 612 */ 613 delete_ns(p->p_parent, p); 614 615 /* 616 * Reassign the orphans to the next of kin. 617 * Don't rearrange init's orphanage. 618 */ 619 if ((q = p->p_orphan) != NULL && p != proc_init) { 620 621 proc_t *nokp = p->p_nextofkin; 622 623 for (;;) { 624 q->p_nextofkin = nokp; 625 if (q->p_nextorph == NULL) 626 break; 627 q = q->p_nextorph; 628 } 629 q->p_nextorph = nokp->p_orphan; 630 nokp->p_orphan = p->p_orphan; 631 p->p_orphan = NULL; 632 } 633 634 /* 635 * Reassign the children to init. 636 * Don't try to assign init's children to init. 637 */ 638 if ((q = p->p_child) != NULL && p != proc_init) { 639 struct proc *np; 640 struct proc *initp = proc_init; 641 boolean_t setzonetop = B_FALSE; 642 643 if (!INGLOBALZONE(curproc)) 644 setzonetop = B_TRUE; 645 646 pgdetach(p); 647 648 do { 649 np = q->p_sibling; 650 /* 651 * Delete it from its current parent new state 652 * list and add it to init new state list 653 */ 654 delete_ns(q->p_parent, q); 655 656 q->p_ppid = 1; 657 if (setzonetop) { 658 mutex_enter(&q->p_lock); 659 q->p_flag |= SZONETOP; 660 mutex_exit(&q->p_lock); 661 } 662 q->p_parent = initp; 663 664 /* 665 * Since q will be the first child, 666 * it will not have a previous sibling. 667 */ 668 q->p_psibling = NULL; 669 if (initp->p_child) { 670 initp->p_child->p_psibling = q; 671 } 672 q->p_sibling = initp->p_child; 673 initp->p_child = q; 674 if (q->p_proc_flag & P_PR_PTRACE) { 675 mutex_enter(&q->p_lock); 676 sigtoproc(q, NULL, SIGKILL); 677 mutex_exit(&q->p_lock); 678 } 679 /* 680 * sigcld() will add the child to parents 681 * newstate list. 682 */ 683 if (q->p_stat == SZOMB) 684 sigcld(q, NULL); 685 } while ((q = np) != NULL); 686 687 p->p_child = NULL; 688 ASSERT(p->p_child_ns == NULL); 689 } 690 691 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 692 693 mutex_enter(&p->p_lock); 694 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 695 696 hrutime = mstate_aggr_state(p, LMS_USER); 697 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 698 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 699 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 700 701 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 702 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 703 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 704 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 705 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 706 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 707 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 708 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 709 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 710 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 711 712 p->p_ru.minflt += p->p_cru.minflt; 713 p->p_ru.majflt += p->p_cru.majflt; 714 p->p_ru.nswap += p->p_cru.nswap; 715 p->p_ru.inblock += p->p_cru.inblock; 716 p->p_ru.oublock += p->p_cru.oublock; 717 p->p_ru.msgsnd += p->p_cru.msgsnd; 718 p->p_ru.msgrcv += p->p_cru.msgrcv; 719 p->p_ru.nsignals += p->p_cru.nsignals; 720 p->p_ru.nvcsw += p->p_cru.nvcsw; 721 p->p_ru.nivcsw += p->p_cru.nivcsw; 722 p->p_ru.sysc += p->p_cru.sysc; 723 p->p_ru.ioch += p->p_cru.ioch; 724 725 p->p_stat = SZOMB; 726 p->p_proc_flag &= ~P_PR_PTRACE; 727 p->p_wdata = what; 728 p->p_wcode = (char)why; 729 730 cdir = PTOU(p)->u_cdir; 731 rdir = PTOU(p)->u_rdir; 732 cwd = PTOU(p)->u_cwd; 733 734 /* 735 * Release resource controls, as they are no longer enforceable. 736 */ 737 rctl_set_free(p->p_rctls); 738 739 /* 740 * Give up task and project memberships. Decrement tk_nlwps counter 741 * for our task.max-lwps resource control. An extended accounting 742 * record, if that facility is active, is scheduled to be written. 743 * Zombie processes are false members of task0 for the remainder of 744 * their lifetime; no accounting information is recorded for them. 745 */ 746 tk = p->p_task; 747 748 mutex_enter(&p->p_zone->zone_nlwps_lock); 749 tk->tk_nlwps--; 750 tk->tk_proj->kpj_nlwps--; 751 p->p_zone->zone_nlwps--; 752 mutex_exit(&p->p_zone->zone_nlwps_lock); 753 task_detach(p); 754 p->p_task = task0p; 755 756 /* 757 * Clear the lwp directory and the lwpid hash table 758 * now that /proc can't bother us any more. 759 * We free the memory below, after dropping p->p_lock. 760 */ 761 lwpdir = p->p_lwpdir; 762 lwpdir_sz = p->p_lwpdir_sz; 763 tidhash = p->p_tidhash; 764 tidhash_sz = p->p_tidhash_sz; 765 p->p_lwpdir = NULL; 766 p->p_lwpfree = NULL; 767 p->p_lwpdir_sz = 0; 768 p->p_tidhash = NULL; 769 p->p_tidhash_sz = 0; 770 771 /* 772 * curthread's proc pointer is changed to point at p0 because 773 * curthread's original proc pointer can be freed as soon as 774 * the child sends a SIGCLD to its parent. 775 */ 776 t->t_procp = &p0; 777 778 mutex_exit(&p->p_lock); 779 sigcld(p, sqp); 780 mutex_exit(&pidlock); 781 782 task_rele(tk); 783 784 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 785 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 786 787 /* 788 * We don't release u_cdir and u_rdir until SZOMB is set. 789 * This protects us against dofusers(). 790 */ 791 VN_RELE(cdir); 792 if (rdir) 793 VN_RELE(rdir); 794 if (cwd) 795 refstr_rele(cwd); 796 797 lwp_pcb_exit(); 798 799 thread_exit(); 800 /* NOTREACHED */ 801 } 802 803 /* 804 * Format siginfo structure for wait system calls. 805 */ 806 void 807 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 808 { 809 ASSERT(MUTEX_HELD(&pidlock)); 810 811 bzero(ip, sizeof (k_siginfo_t)); 812 ip->si_signo = SIGCLD; 813 ip->si_code = pp->p_wcode; 814 ip->si_pid = pp->p_pid; 815 ip->si_ctid = PRCTID(pp); 816 ip->si_zoneid = pp->p_zone->zone_id; 817 ip->si_status = pp->p_wdata; 818 ip->si_stime = pp->p_stime; 819 ip->si_utime = pp->p_utime; 820 821 if (waitflag) { 822 pp->p_wcode = 0; 823 pp->p_wdata = 0; 824 pp->p_pidflag &= ~CLDPEND; 825 } 826 } 827 828 /* 829 * Wait system call. 830 * Search for a terminated (zombie) child, 831 * finally lay it to rest, and collect its status. 832 * Look also for stopped children, 833 * and pass back status from them. 834 */ 835 int 836 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 837 { 838 int found; 839 proc_t *cp, *pp; 840 proc_t **nsp; 841 int proc_gone; 842 int waitflag = !(options & WNOWAIT); 843 844 /* 845 * Obsolete flag, defined here only for binary compatibility 846 * with old statically linked executables. Delete this when 847 * we no longer care about these old and broken applications. 848 */ 849 #define _WNOCHLD 0400 850 options &= ~_WNOCHLD; 851 852 if (options == 0 || (options & ~WOPTMASK)) 853 return (EINVAL); 854 855 switch (idtype) { 856 case P_PID: 857 case P_PGID: 858 if (id < 0 || id >= maxpid) 859 return (EINVAL); 860 /* FALLTHROUGH */ 861 case P_ALL: 862 break; 863 default: 864 return (EINVAL); 865 } 866 867 pp = ttoproc(curthread); 868 /* 869 * lock parent mutex so that sibling chain can be searched. 870 */ 871 mutex_enter(&pidlock); 872 while ((cp = pp->p_child) != NULL) { 873 874 proc_gone = 0; 875 876 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 877 if (idtype == P_PID && id != (*nsp)->p_pid) { 878 continue; 879 } 880 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 881 continue; 882 } 883 884 switch ((*nsp)->p_wcode) { 885 886 case CLD_TRAPPED: 887 case CLD_STOPPED: 888 case CLD_CONTINUED: 889 cmn_err(CE_PANIC, 890 "waitid: wrong state %d on the p_newstate" 891 " list", (*nsp)->p_wcode); 892 break; 893 894 case CLD_EXITED: 895 case CLD_DUMPED: 896 case CLD_KILLED: 897 if (!(options & WEXITED)) { 898 /* 899 * Count how many are already gone 900 * for good. 901 */ 902 proc_gone++; 903 break; 904 } 905 if (!waitflag) { 906 winfo((*nsp), ip, 0); 907 } else { 908 proc_t *xp = *nsp; 909 winfo(xp, ip, 1); 910 freeproc(xp); 911 } 912 mutex_exit(&pidlock); 913 if (waitflag) { /* accept SIGCLD */ 914 sigcld_delete(ip); 915 sigcld_repost(); 916 } 917 return (0); 918 } 919 920 if (idtype == P_PID) 921 break; 922 } 923 924 /* 925 * Wow! None of the threads on the p_sibling_ns list were 926 * interesting threads. Check all the kids! 927 */ 928 found = 0; 929 cp = pp->p_child; 930 do { 931 if (idtype == P_PID && id != cp->p_pid) { 932 continue; 933 } 934 if (idtype == P_PGID && id != cp->p_pgrp) { 935 continue; 936 } 937 938 found++; 939 940 switch (cp->p_wcode) { 941 case CLD_TRAPPED: 942 if (!(options & WTRAPPED)) 943 break; 944 winfo(cp, ip, waitflag); 945 mutex_exit(&pidlock); 946 if (waitflag) { /* accept SIGCLD */ 947 sigcld_delete(ip); 948 sigcld_repost(); 949 } 950 return (0); 951 952 case CLD_STOPPED: 953 if (!(options & WSTOPPED)) 954 break; 955 /* Is it still stopped? */ 956 mutex_enter(&cp->p_lock); 957 if (!jobstopped(cp)) { 958 mutex_exit(&cp->p_lock); 959 break; 960 } 961 mutex_exit(&cp->p_lock); 962 winfo(cp, ip, waitflag); 963 mutex_exit(&pidlock); 964 if (waitflag) { /* accept SIGCLD */ 965 sigcld_delete(ip); 966 sigcld_repost(); 967 } 968 return (0); 969 970 case CLD_CONTINUED: 971 if (!(options & WCONTINUED)) 972 break; 973 winfo(cp, ip, waitflag); 974 mutex_exit(&pidlock); 975 if (waitflag) { /* accept SIGCLD */ 976 sigcld_delete(ip); 977 sigcld_repost(); 978 } 979 return (0); 980 981 case CLD_EXITED: 982 case CLD_DUMPED: 983 case CLD_KILLED: 984 /* 985 * Don't complain if a process was found in 986 * the first loop but we broke out of the loop 987 * because of the arguments passed to us. 988 */ 989 if (proc_gone == 0) { 990 cmn_err(CE_PANIC, 991 "waitid: wrong state on the" 992 " p_child list"); 993 } else { 994 break; 995 } 996 } 997 998 if (idtype == P_PID) 999 break; 1000 } while ((cp = cp->p_sibling) != NULL); 1001 1002 /* 1003 * If we found no interesting processes at all, 1004 * break out and return ECHILD. 1005 */ 1006 if (found + proc_gone == 0) 1007 break; 1008 1009 if (options & WNOHANG) { 1010 bzero(ip, sizeof (k_siginfo_t)); 1011 /* 1012 * We should set ip->si_signo = SIGCLD, 1013 * but there is an SVVS test that expects 1014 * ip->si_signo to be zero in this case. 1015 */ 1016 mutex_exit(&pidlock); 1017 return (0); 1018 } 1019 1020 /* 1021 * If we found no processes of interest that could 1022 * change state while we wait, we don't wait at all. 1023 * Get out with ECHILD according to SVID. 1024 */ 1025 if (found == proc_gone) 1026 break; 1027 1028 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1029 mutex_exit(&pidlock); 1030 return (EINTR); 1031 } 1032 } 1033 mutex_exit(&pidlock); 1034 return (ECHILD); 1035 } 1036 1037 /* 1038 * For implementations that don't require binary compatibility, 1039 * the wait system call may be made into a library call to the 1040 * waitid system call. 1041 */ 1042 int64_t 1043 wait(void) 1044 { 1045 int error; 1046 k_siginfo_t info; 1047 rval_t r; 1048 1049 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1050 return (set_errno(error)); 1051 r.r_val1 = info.si_pid; 1052 r.r_val2 = wstat(info.si_code, info.si_status); 1053 return (r.r_vals); 1054 } 1055 1056 int 1057 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1058 { 1059 int error; 1060 k_siginfo_t info; 1061 1062 if (error = waitid(idtype, id, &info, options)) 1063 return (set_errno(error)); 1064 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1065 return (set_errno(EFAULT)); 1066 return (0); 1067 } 1068 1069 #ifdef _SYSCALL32_IMPL 1070 1071 int 1072 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1073 { 1074 int error; 1075 k_siginfo_t info; 1076 siginfo32_t info32; 1077 1078 if (error = waitid(idtype, id, &info, options)) 1079 return (set_errno(error)); 1080 siginfo_kto32(&info, &info32); 1081 if (copyout(&info32, infop, sizeof (info32))) 1082 return (set_errno(EFAULT)); 1083 return (0); 1084 } 1085 1086 #endif /* _SYSCALL32_IMPL */ 1087 1088 void 1089 proc_detach(proc_t *p) 1090 { 1091 proc_t *q; 1092 1093 ASSERT(MUTEX_HELD(&pidlock)); 1094 1095 q = p->p_parent; 1096 ASSERT(q != NULL); 1097 1098 /* 1099 * Take it off the newstate list of its parent 1100 */ 1101 delete_ns(q, p); 1102 1103 if (q->p_child == p) { 1104 q->p_child = p->p_sibling; 1105 /* 1106 * If the parent has no children, it better not 1107 * have any with new states either! 1108 */ 1109 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1110 } 1111 1112 if (p->p_sibling) { 1113 p->p_sibling->p_psibling = p->p_psibling; 1114 } 1115 1116 if (p->p_psibling) { 1117 p->p_psibling->p_sibling = p->p_sibling; 1118 } 1119 } 1120 1121 /* 1122 * Remove zombie children from the process table. 1123 */ 1124 void 1125 freeproc(proc_t *p) 1126 { 1127 proc_t *q; 1128 1129 ASSERT(p->p_stat == SZOMB); 1130 ASSERT(p->p_tlist == NULL); 1131 ASSERT(MUTEX_HELD(&pidlock)); 1132 1133 sigdelq(p, NULL, 0); 1134 if (p->p_killsqp) { 1135 siginfofree(p->p_killsqp); 1136 p->p_killsqp = NULL; 1137 } 1138 1139 prfree(p); /* inform /proc */ 1140 1141 /* 1142 * Don't free the init processes. 1143 * Other dying processes will access it. 1144 */ 1145 if (p == proc_init) 1146 return; 1147 1148 1149 /* 1150 * We wait until now to free the cred structure because a 1151 * zombie process's credentials may be examined by /proc. 1152 * No cred locking needed because there are no threads at this point. 1153 */ 1154 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1155 crfree(p->p_cred); 1156 if (p->p_corefile != NULL) { 1157 corectl_path_rele(p->p_corefile); 1158 p->p_corefile = NULL; 1159 } 1160 if (p->p_content != NULL) { 1161 corectl_content_rele(p->p_content); 1162 p->p_content = NULL; 1163 } 1164 1165 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1166 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1167 /* 1168 * This should still do the right thing since p_utime/stime 1169 * get set to the correct value on process exit, so it 1170 * should get properly updated 1171 */ 1172 p->p_nextofkin->p_cutime += p->p_utime; 1173 p->p_nextofkin->p_cstime += p->p_stime; 1174 1175 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1176 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1177 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1178 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1179 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1180 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1181 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1182 += p->p_acct[LMS_USER_LOCK]; 1183 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1184 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1185 += p->p_acct[LMS_WAIT_CPU]; 1186 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1187 1188 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1189 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1190 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1191 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1192 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1193 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1194 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1195 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1196 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1197 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1198 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1199 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1200 1201 } 1202 1203 q = p->p_nextofkin; 1204 if (q && q->p_orphan == p) 1205 q->p_orphan = p->p_nextorph; 1206 else if (q) { 1207 for (q = q->p_orphan; q; q = q->p_nextorph) 1208 if (q->p_nextorph == p) 1209 break; 1210 ASSERT(q && q->p_nextorph == p); 1211 q->p_nextorph = p->p_nextorph; 1212 } 1213 1214 proc_detach(p); 1215 pid_exit(p); /* frees pid and proc structure */ 1216 } 1217 1218 /* 1219 * Delete process "child" from the newstate list of process "parent" 1220 */ 1221 void 1222 delete_ns(proc_t *parent, proc_t *child) 1223 { 1224 proc_t **ns; 1225 1226 ASSERT(MUTEX_HELD(&pidlock)); 1227 ASSERT(child->p_parent == parent); 1228 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1229 if (*ns == child) { 1230 1231 ASSERT((*ns)->p_parent == parent); 1232 1233 *ns = child->p_sibling_ns; 1234 child->p_sibling_ns = NULL; 1235 return; 1236 } 1237 } 1238 } 1239 1240 /* 1241 * Add process "child" to the new state list of process "parent" 1242 */ 1243 void 1244 add_ns(proc_t *parent, proc_t *child) 1245 { 1246 ASSERT(child->p_sibling_ns == NULL); 1247 child->p_sibling_ns = parent->p_child_ns; 1248 parent->p_child_ns = child; 1249 } 1250