1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 77 /* 78 * convert code/data pair into old style wait status 79 */ 80 int 81 wstat(int code, int data) 82 { 83 int stat = (data & 0377); 84 85 switch (code) { 86 case CLD_EXITED: 87 stat <<= 8; 88 break; 89 case CLD_DUMPED: 90 stat |= WCOREFLG; 91 break; 92 case CLD_KILLED: 93 break; 94 case CLD_TRAPPED: 95 case CLD_STOPPED: 96 stat <<= 8; 97 stat |= WSTOPFLG; 98 break; 99 case CLD_CONTINUED: 100 stat = WCONTFLG; 101 break; 102 default: 103 cmn_err(CE_PANIC, "wstat: bad code"); 104 /* NOTREACHED */ 105 } 106 return (stat); 107 } 108 109 static char * 110 exit_reason(char *buf, size_t bufsz, int what, int why) 111 { 112 switch (why) { 113 case CLD_EXITED: 114 (void) snprintf(buf, bufsz, "exited with status %d", what); 115 break; 116 case CLD_KILLED: 117 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 118 break; 119 case CLD_DUMPED: 120 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 121 break; 122 default: 123 (void) snprintf(buf, bufsz, "encountered unknown error " 124 "(%d, %d)", why, what); 125 break; 126 } 127 128 return (buf); 129 } 130 131 /* 132 * exit system call: pass back caller's arg. 133 */ 134 void 135 rexit(int rval) 136 { 137 exit(CLD_EXITED, rval); 138 } 139 140 /* 141 * Called by proc_exit() when a zone's init exits, presumably because 142 * it failed. As long as the given zone is still in the "running" 143 * state, we will re-exec() init, but first we need to reset things 144 * which are usually inherited across exec() but will break init's 145 * assumption that it is being exec()'d from a virgin process. Most 146 * importantly this includes closing all file descriptors (exec only 147 * closes those marked close-on-exec) and resetting signals (exec only 148 * resets handled signals, and we need to clear any signals which 149 * killed init). Anything else that exec(2) says would be inherited, 150 * but would affect the execution of init, needs to be reset. 151 */ 152 static int 153 restart_init(int what, int why) 154 { 155 kthread_t *t = curthread; 156 klwp_t *lwp = ttolwp(t); 157 proc_t *p = ttoproc(t); 158 user_t *up = PTOU(p); 159 160 vnode_t *oldcd, *oldrd; 161 sess_t *sp; 162 int i, err; 163 char reason_buf[64]; 164 165 /* 166 * Let zone admin (and global zone admin if this is for a non-global 167 * zone) know that init has failed and will be restarted. 168 */ 169 zcmn_err(p->p_zone->zone_id, CE_WARN, 170 "init(1M) %s: restarting automatically", 171 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 172 173 if (!INGLOBALZONE(p)) { 174 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 175 "restarting automatically", 176 p->p_zone->zone_name, p->p_pid, reason_buf); 177 } 178 179 /* 180 * Remove any fpollinfo_t's for this (last) thread from our file 181 * descriptors so closeall() can ASSERT() that they're all gone. 182 * Then close all open file descriptors in the process. 183 */ 184 pollcleanup(); 185 closeall(P_FINFO(p)); 186 187 /* 188 * Grab p_lock and begin clearing miscellaneous global process 189 * state that needs to be reset before we exec the new init(1M). 190 */ 191 192 mutex_enter(&p->p_lock); 193 prbarrier(p); 194 195 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 196 up->u_cmask = CMASK; 197 198 sigemptyset(&t->t_hold); 199 sigemptyset(&t->t_sig); 200 sigemptyset(&t->t_extsig); 201 202 sigemptyset(&p->p_sig); 203 sigemptyset(&p->p_extsig); 204 205 sigdelq(p, t, 0); 206 sigdelq(p, NULL, 0); 207 208 if (p->p_killsqp) { 209 siginfofree(p->p_killsqp); 210 p->p_killsqp = NULL; 211 } 212 213 /* 214 * Reset any signals that are ignored back to the default disposition. 215 * Other u_signal members will be cleared when exec calls sigdefault(). 216 */ 217 for (i = 1; i < NSIG; i++) { 218 if (up->u_signal[i - 1] == SIG_IGN) { 219 up->u_signal[i - 1] = SIG_DFL; 220 sigemptyset(&up->u_sigmask[i - 1]); 221 } 222 } 223 224 /* 225 * Clear the current signal, any signal info associated with it, and 226 * any signal information from contracts and/or contract templates. 227 */ 228 lwp->lwp_cursig = 0; 229 lwp->lwp_extsig = 0; 230 if (lwp->lwp_curinfo != NULL) { 231 siginfofree(lwp->lwp_curinfo); 232 lwp->lwp_curinfo = NULL; 233 } 234 lwp_ctmpl_clear(lwp); 235 236 /* 237 * Reset both the process root directory and the current working 238 * directory to the root of the zone just as we do during boot. 239 */ 240 VN_HOLD(p->p_zone->zone_rootvp); 241 oldrd = up->u_rdir; 242 up->u_rdir = p->p_zone->zone_rootvp; 243 244 VN_HOLD(p->p_zone->zone_rootvp); 245 oldcd = up->u_cdir; 246 up->u_cdir = p->p_zone->zone_rootvp; 247 248 if (up->u_cwd != NULL) { 249 refstr_rele(up->u_cwd); 250 up->u_cwd = NULL; 251 } 252 253 mutex_exit(&p->p_lock); 254 255 if (oldrd != NULL) 256 VN_RELE(oldrd); 257 if (oldcd != NULL) 258 VN_RELE(oldcd); 259 260 /* 261 * Free the controlling tty. 262 */ 263 mutex_enter(&pidlock); 264 sp = p->p_sessp; 265 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 266 mutex_exit(&pidlock); 267 freectty(sp); 268 } else { 269 mutex_exit(&pidlock); 270 } 271 272 /* 273 * Now exec() the new init(1M) on top of the current process. If we 274 * succeed, the caller will treat this like a successful system call. 275 * If we fail, we issue messages and the caller will proceed with exit. 276 */ 277 err = exec_init(p->p_zone->zone_initname, NULL); 278 279 if (err == 0) 280 return (0); 281 282 zcmn_err(p->p_zone->zone_id, CE_WARN, 283 "failed to restart init(1M) (err=%d): system reboot required", err); 284 285 if (!INGLOBALZONE(p)) { 286 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 287 "(pid %d, err=%d): zoneadm(1M) boot required", 288 p->p_zone->zone_name, p->p_pid, err); 289 } 290 291 return (-1); 292 } 293 294 /* 295 * Release resources. 296 * Enter zombie state. 297 * Wake up parent and init processes, 298 * and dispose of children. 299 */ 300 void 301 exit(int why, int what) 302 { 303 /* 304 * If proc_exit() fails, then some other lwp in the process 305 * got there first. We just have to call lwp_exit() to allow 306 * the other lwp to finish exiting the process. Otherwise we're 307 * restarting init, and should return. 308 */ 309 if (proc_exit(why, what) != 0) { 310 mutex_enter(&curproc->p_lock); 311 ASSERT(curproc->p_flag & SEXITLWPS); 312 lwp_exit(); 313 /* NOTREACHED */ 314 } 315 } 316 317 /* 318 * Set the SEXITING flag on the process, after making sure /proc does 319 * not have it locked. This is done in more places than proc_exit(), 320 * so it is a separate function. 321 */ 322 void 323 proc_is_exiting(proc_t *p) 324 { 325 mutex_enter(&p->p_lock); 326 prbarrier(p); 327 p->p_flag |= SEXITING; 328 mutex_exit(&p->p_lock); 329 } 330 331 /* 332 * Return value: 333 * 1 - exitlwps() failed, call (or continue) lwp_exit() 334 * 0 - restarting init. Return through system call path 335 */ 336 int 337 proc_exit(int why, int what) 338 { 339 kthread_t *t = curthread; 340 klwp_t *lwp = ttolwp(t); 341 proc_t *p = ttoproc(t); 342 zone_t *z = p->p_zone; 343 timeout_id_t tmp_id; 344 int rv; 345 proc_t *q; 346 sess_t *sp; 347 task_t *tk; 348 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 349 sigqueue_t *sqp; 350 lwpdir_t *lwpdir; 351 uint_t lwpdir_sz; 352 lwpdir_t **tidhash; 353 uint_t tidhash_sz; 354 refstr_t *cwd; 355 hrtime_t hrutime, hrstime; 356 357 /* 358 * Stop and discard the process's lwps except for the current one, 359 * unless some other lwp beat us to it. If exitlwps() fails then 360 * return and the calling lwp will call (or continue in) lwp_exit(). 361 */ 362 proc_is_exiting(p); 363 if (exitlwps(0) != 0) 364 return (1); 365 366 DTRACE_PROC(lwp__exit); 367 DTRACE_PROC1(exit, int, why); 368 369 /* 370 * Don't let init exit unless zone_start_init() failed its exec, or 371 * we are shutting down the zone or the machine. 372 * 373 * Since we are single threaded, we don't need to lock the 374 * following accesses to zone_proc_initpid. 375 */ 376 if (p->p_pid == z->zone_proc_initpid) { 377 if (z->zone_boot_err == 0 && 378 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 379 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 380 restart_init(what, why) == 0) 381 return (0); 382 /* 383 * Since we didn't or couldn't restart init, we clear 384 * the zone's init state and proceed with exit 385 * processing. 386 */ 387 z->zone_proc_initpid = -1; 388 } 389 390 /* 391 * Allocate a sigqueue now, before we grab locks. 392 * It will be given to sigcld(), below. 393 */ 394 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 395 396 /* 397 * revoke any doors created by the process. 398 */ 399 if (p->p_door_list) 400 door_exit(); 401 402 /* 403 * Release schedctl data structures. 404 */ 405 if (p->p_pagep) 406 schedctl_proc_cleanup(); 407 408 /* 409 * make sure all pending kaio has completed. 410 */ 411 if (p->p_aio) 412 aio_cleanup_exit(); 413 414 /* 415 * discard the lwpchan cache. 416 */ 417 if (p->p_lcp != NULL) 418 lwpchan_destroy_cache(0); 419 420 /* 421 * Clean up any DTrace helper actions or probes for the process. 422 */ 423 if (p->p_dtrace_helpers != NULL) { 424 ASSERT(dtrace_helpers_cleanup != NULL); 425 (*dtrace_helpers_cleanup)(); 426 } 427 428 /* untimeout the realtime timers */ 429 if (p->p_itimer != NULL) 430 timer_exit(); 431 432 if ((tmp_id = p->p_alarmid) != 0) { 433 p->p_alarmid = 0; 434 (void) untimeout(tmp_id); 435 } 436 437 /* 438 * Remove any fpollinfo_t's for this (last) thread from our file 439 * descriptors so closeall() can ASSERT() that they're all gone. 440 */ 441 pollcleanup(); 442 443 if (p->p_rprof_cyclic != CYCLIC_NONE) { 444 mutex_enter(&cpu_lock); 445 cyclic_remove(p->p_rprof_cyclic); 446 mutex_exit(&cpu_lock); 447 } 448 449 mutex_enter(&p->p_lock); 450 451 /* 452 * Clean up any DTrace probes associated with this process. 453 */ 454 if (p->p_dtrace_probes) { 455 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 456 dtrace_fasttrap_exit_ptr(p); 457 } 458 459 while ((tmp_id = p->p_itimerid) != 0) { 460 p->p_itimerid = 0; 461 mutex_exit(&p->p_lock); 462 (void) untimeout(tmp_id); 463 mutex_enter(&p->p_lock); 464 } 465 466 lwp_cleanup(); 467 468 /* 469 * We are about to exit; prevent our resource associations from 470 * being changed. 471 */ 472 pool_barrier_enter(); 473 474 /* 475 * Block the process against /proc now that we have really 476 * acquired p->p_lock (to manipulate p_tlist at least). 477 */ 478 prbarrier(p); 479 480 #ifdef SUN_SRC_COMPAT 481 if (code == CLD_KILLED) 482 u.u_acflag |= AXSIG; 483 #endif 484 sigfillset(&p->p_ignore); 485 sigemptyset(&p->p_siginfo); 486 sigemptyset(&p->p_sig); 487 sigemptyset(&p->p_extsig); 488 sigemptyset(&t->t_sig); 489 sigemptyset(&t->t_extsig); 490 sigemptyset(&p->p_sigmask); 491 sigdelq(p, t, 0); 492 lwp->lwp_cursig = 0; 493 lwp->lwp_extsig = 0; 494 p->p_flag &= ~(SKILLED | SEXTKILLED); 495 if (lwp->lwp_curinfo) { 496 siginfofree(lwp->lwp_curinfo); 497 lwp->lwp_curinfo = NULL; 498 } 499 500 t->t_proc_flag |= TP_LWPEXIT; 501 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 502 prlwpexit(t); /* notify /proc */ 503 lwp_hash_out(p, t->t_tid); 504 prexit(p); 505 506 p->p_lwpcnt = 0; 507 p->p_tlist = NULL; 508 sigqfree(p); 509 term_mstate(t); 510 p->p_mterm = gethrtime(); 511 512 exec_vp = p->p_exec; 513 execdir_vp = p->p_execdir; 514 p->p_exec = NULLVP; 515 p->p_execdir = NULLVP; 516 mutex_exit(&p->p_lock); 517 if (exec_vp) 518 VN_RELE(exec_vp); 519 if (execdir_vp) 520 VN_RELE(execdir_vp); 521 522 pr_free_watched_pages(p); 523 524 closeall(P_FINFO(p)); 525 526 mutex_enter(&pidlock); 527 sp = p->p_sessp; 528 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 529 mutex_exit(&pidlock); 530 freectty(sp); 531 } else 532 mutex_exit(&pidlock); 533 534 #if defined(__sparc) 535 if (p->p_utraps != NULL) 536 utrap_free(p); 537 #endif 538 if (p->p_semacct) /* IPC semaphore exit */ 539 semexit(p); 540 rv = wstat(why, what); 541 542 acct(rv & 0xff); 543 exacct_commit_proc(p, rv); 544 545 /* 546 * Release any resources associated with C2 auditing 547 */ 548 #ifdef C2_AUDIT 549 if (audit_active) { 550 /* 551 * audit exit system call 552 */ 553 audit_exit(why, what); 554 } 555 #endif 556 557 /* 558 * Free address space. 559 */ 560 relvm(); 561 562 /* 563 * Release held contracts. 564 */ 565 contract_exit(p); 566 567 /* 568 * Depart our encapsulating process contract. 569 */ 570 if ((p->p_flag & SSYS) == 0) { 571 ASSERT(p->p_ct_process); 572 contract_process_exit(p->p_ct_process, p, rv); 573 } 574 575 /* 576 * Remove pool association, and block if requested by pool_do_bind. 577 */ 578 mutex_enter(&p->p_lock); 579 ASSERT(p->p_pool->pool_ref > 0); 580 atomic_add_32(&p->p_pool->pool_ref, -1); 581 p->p_pool = pool_default; 582 /* 583 * Now that our address space has been freed and all other threads 584 * in this process have exited, set the PEXITED pool flag. This 585 * tells the pools subsystems to ignore this process if it was 586 * requested to rebind this process to a new pool. 587 */ 588 p->p_poolflag |= PEXITED; 589 pool_barrier_exit(); 590 mutex_exit(&p->p_lock); 591 592 mutex_enter(&pidlock); 593 594 /* 595 * Delete this process from the newstate list of its parent. We 596 * will put it in the right place in the sigcld in the end. 597 */ 598 delete_ns(p->p_parent, p); 599 600 /* 601 * Reassign the orphans to the next of kin. 602 * Don't rearrange init's orphanage. 603 */ 604 if ((q = p->p_orphan) != NULL && p != proc_init) { 605 606 proc_t *nokp = p->p_nextofkin; 607 608 for (;;) { 609 q->p_nextofkin = nokp; 610 if (q->p_nextorph == NULL) 611 break; 612 q = q->p_nextorph; 613 } 614 q->p_nextorph = nokp->p_orphan; 615 nokp->p_orphan = p->p_orphan; 616 p->p_orphan = NULL; 617 } 618 619 /* 620 * Reassign the children to init. 621 * Don't try to assign init's children to init. 622 */ 623 if ((q = p->p_child) != NULL && p != proc_init) { 624 struct proc *np; 625 struct proc *initp = proc_init; 626 boolean_t setzonetop = B_FALSE; 627 628 if (!INGLOBALZONE(curproc)) 629 setzonetop = B_TRUE; 630 631 pgdetach(p); 632 633 do { 634 np = q->p_sibling; 635 /* 636 * Delete it from its current parent new state 637 * list and add it to init new state list 638 */ 639 delete_ns(q->p_parent, q); 640 641 q->p_ppid = 1; 642 if (setzonetop) { 643 mutex_enter(&q->p_lock); 644 q->p_flag |= SZONETOP; 645 mutex_exit(&q->p_lock); 646 } 647 q->p_parent = initp; 648 649 /* 650 * Since q will be the first child, 651 * it will not have a previous sibling. 652 */ 653 q->p_psibling = NULL; 654 if (initp->p_child) { 655 initp->p_child->p_psibling = q; 656 } 657 q->p_sibling = initp->p_child; 658 initp->p_child = q; 659 if (q->p_proc_flag & P_PR_PTRACE) { 660 mutex_enter(&q->p_lock); 661 sigtoproc(q, NULL, SIGKILL); 662 mutex_exit(&q->p_lock); 663 } 664 /* 665 * sigcld() will add the child to parents 666 * newstate list. 667 */ 668 if (q->p_stat == SZOMB) 669 sigcld(q, NULL); 670 } while ((q = np) != NULL); 671 672 p->p_child = NULL; 673 ASSERT(p->p_child_ns == NULL); 674 } 675 676 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 677 678 mutex_enter(&p->p_lock); 679 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 680 681 hrutime = mstate_aggr_state(p, LMS_USER); 682 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 683 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 684 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 685 686 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 687 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 688 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 689 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 690 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 691 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 692 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 693 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 694 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 695 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 696 697 p->p_ru.minflt += p->p_cru.minflt; 698 p->p_ru.majflt += p->p_cru.majflt; 699 p->p_ru.nswap += p->p_cru.nswap; 700 p->p_ru.inblock += p->p_cru.inblock; 701 p->p_ru.oublock += p->p_cru.oublock; 702 p->p_ru.msgsnd += p->p_cru.msgsnd; 703 p->p_ru.msgrcv += p->p_cru.msgrcv; 704 p->p_ru.nsignals += p->p_cru.nsignals; 705 p->p_ru.nvcsw += p->p_cru.nvcsw; 706 p->p_ru.nivcsw += p->p_cru.nivcsw; 707 p->p_ru.sysc += p->p_cru.sysc; 708 p->p_ru.ioch += p->p_cru.ioch; 709 710 p->p_stat = SZOMB; 711 p->p_proc_flag &= ~P_PR_PTRACE; 712 p->p_wdata = what; 713 p->p_wcode = (char)why; 714 715 cdir = PTOU(p)->u_cdir; 716 rdir = PTOU(p)->u_rdir; 717 cwd = PTOU(p)->u_cwd; 718 719 /* 720 * Release resource controls, as they are no longer enforceable. 721 */ 722 rctl_set_free(p->p_rctls); 723 724 /* 725 * Give up task and project memberships. Decrement tk_nlwps counter 726 * for our task.max-lwps resource control. An extended accounting 727 * record, if that facility is active, is scheduled to be written. 728 * Zombie processes are false members of task0 for the remainder of 729 * their lifetime; no accounting information is recorded for them. 730 */ 731 tk = p->p_task; 732 733 mutex_enter(&p->p_zone->zone_nlwps_lock); 734 tk->tk_nlwps--; 735 tk->tk_proj->kpj_nlwps--; 736 p->p_zone->zone_nlwps--; 737 mutex_exit(&p->p_zone->zone_nlwps_lock); 738 task_detach(p); 739 p->p_task = task0p; 740 741 /* 742 * Clear the lwp directory and the lwpid hash table 743 * now that /proc can't bother us any more. 744 * We free the memory below, after dropping p->p_lock. 745 */ 746 lwpdir = p->p_lwpdir; 747 lwpdir_sz = p->p_lwpdir_sz; 748 tidhash = p->p_tidhash; 749 tidhash_sz = p->p_tidhash_sz; 750 p->p_lwpdir = NULL; 751 p->p_lwpfree = NULL; 752 p->p_lwpdir_sz = 0; 753 p->p_tidhash = NULL; 754 p->p_tidhash_sz = 0; 755 756 /* 757 * If the process has context ops installed, call the exit routine 758 * on behalf of this last remaining thread. Normally exitpctx() is 759 * called during thread_exit() or lwp_exit(), but because this is the 760 * last thread in the process, we must call it here. By the time 761 * thread_exit() is called (below), the association with the relevant 762 * process has been lost. 763 * 764 * We also free the context here. 765 */ 766 if (p->p_pctx) { 767 kpreempt_disable(); 768 exitpctx(p); 769 kpreempt_enable(); 770 771 freepctx(p, 0); 772 } 773 774 /* 775 * curthread's proc pointer is changed to point at p0 because 776 * curthread's original proc pointer can be freed as soon as 777 * the child sends a SIGCLD to its parent. 778 */ 779 t->t_procp = &p0; 780 781 mutex_exit(&p->p_lock); 782 sigcld(p, sqp); 783 mutex_exit(&pidlock); 784 785 task_rele(tk); 786 787 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 788 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 789 790 /* 791 * We don't release u_cdir and u_rdir until SZOMB is set. 792 * This protects us against dofusers(). 793 */ 794 VN_RELE(cdir); 795 if (rdir) 796 VN_RELE(rdir); 797 if (cwd) 798 refstr_rele(cwd); 799 800 lwp_pcb_exit(); 801 802 thread_exit(); 803 /* NOTREACHED */ 804 } 805 806 /* 807 * Format siginfo structure for wait system calls. 808 */ 809 void 810 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 811 { 812 ASSERT(MUTEX_HELD(&pidlock)); 813 814 bzero(ip, sizeof (k_siginfo_t)); 815 ip->si_signo = SIGCLD; 816 ip->si_code = pp->p_wcode; 817 ip->si_pid = pp->p_pid; 818 ip->si_ctid = PRCTID(pp); 819 ip->si_zoneid = pp->p_zone->zone_id; 820 ip->si_status = pp->p_wdata; 821 ip->si_stime = pp->p_stime; 822 ip->si_utime = pp->p_utime; 823 824 if (waitflag) { 825 pp->p_wcode = 0; 826 pp->p_wdata = 0; 827 pp->p_pidflag &= ~CLDPEND; 828 } 829 } 830 831 /* 832 * Wait system call. 833 * Search for a terminated (zombie) child, 834 * finally lay it to rest, and collect its status. 835 * Look also for stopped children, 836 * and pass back status from them. 837 */ 838 int 839 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 840 { 841 int found; 842 proc_t *cp, *pp; 843 proc_t **nsp; 844 int proc_gone; 845 int waitflag = !(options & WNOWAIT); 846 847 /* 848 * Obsolete flag, defined here only for binary compatibility 849 * with old statically linked executables. Delete this when 850 * we no longer care about these old and broken applications. 851 */ 852 #define _WNOCHLD 0400 853 options &= ~_WNOCHLD; 854 855 if (options == 0 || (options & ~WOPTMASK)) 856 return (EINVAL); 857 858 switch (idtype) { 859 case P_PID: 860 case P_PGID: 861 if (id < 0 || id >= maxpid) 862 return (EINVAL); 863 /* FALLTHROUGH */ 864 case P_ALL: 865 break; 866 default: 867 return (EINVAL); 868 } 869 870 pp = ttoproc(curthread); 871 872 /* 873 * lock parent mutex so that sibling chain can be searched. 874 */ 875 mutex_enter(&pidlock); 876 877 /* 878 * if we are only looking for exited processes and child_ns list 879 * is empty no reason to look at all children. 880 */ 881 if (idtype == P_ALL && 882 (options & (WOPTMASK & ~WNOWAIT)) == (WNOHANG | WEXITED) && 883 pp->p_child_ns == NULL) { 884 885 if (pp->p_child) { 886 mutex_exit(&pidlock); 887 bzero(ip, sizeof (k_siginfo_t)); 888 return (0); 889 } 890 mutex_exit(&pidlock); 891 return (ECHILD); 892 } 893 894 while ((cp = pp->p_child) != NULL) { 895 896 proc_gone = 0; 897 898 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 899 if (idtype == P_PID && id != (*nsp)->p_pid) { 900 continue; 901 } 902 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 903 continue; 904 } 905 906 switch ((*nsp)->p_wcode) { 907 908 case CLD_TRAPPED: 909 case CLD_STOPPED: 910 case CLD_CONTINUED: 911 cmn_err(CE_PANIC, 912 "waitid: wrong state %d on the p_newstate" 913 " list", (*nsp)->p_wcode); 914 break; 915 916 case CLD_EXITED: 917 case CLD_DUMPED: 918 case CLD_KILLED: 919 if (!(options & WEXITED)) { 920 /* 921 * Count how many are already gone 922 * for good. 923 */ 924 proc_gone++; 925 break; 926 } 927 if (!waitflag) { 928 winfo((*nsp), ip, 0); 929 } else { 930 proc_t *xp = *nsp; 931 winfo(xp, ip, 1); 932 freeproc(xp); 933 } 934 mutex_exit(&pidlock); 935 if (waitflag) { /* accept SIGCLD */ 936 sigcld_delete(ip); 937 sigcld_repost(); 938 } 939 return (0); 940 } 941 942 if (idtype == P_PID) 943 break; 944 } 945 946 /* 947 * Wow! None of the threads on the p_sibling_ns list were 948 * interesting threads. Check all the kids! 949 */ 950 found = 0; 951 cp = pp->p_child; 952 do { 953 if (idtype == P_PID && id != cp->p_pid) { 954 continue; 955 } 956 if (idtype == P_PGID && id != cp->p_pgrp) { 957 continue; 958 } 959 960 found++; 961 962 switch (cp->p_wcode) { 963 case CLD_TRAPPED: 964 if (!(options & WTRAPPED)) 965 break; 966 winfo(cp, ip, waitflag); 967 mutex_exit(&pidlock); 968 if (waitflag) { /* accept SIGCLD */ 969 sigcld_delete(ip); 970 sigcld_repost(); 971 } 972 return (0); 973 974 case CLD_STOPPED: 975 if (!(options & WSTOPPED)) 976 break; 977 /* Is it still stopped? */ 978 mutex_enter(&cp->p_lock); 979 if (!jobstopped(cp)) { 980 mutex_exit(&cp->p_lock); 981 break; 982 } 983 mutex_exit(&cp->p_lock); 984 winfo(cp, ip, waitflag); 985 mutex_exit(&pidlock); 986 if (waitflag) { /* accept SIGCLD */ 987 sigcld_delete(ip); 988 sigcld_repost(); 989 } 990 return (0); 991 992 case CLD_CONTINUED: 993 if (!(options & WCONTINUED)) 994 break; 995 winfo(cp, ip, waitflag); 996 mutex_exit(&pidlock); 997 if (waitflag) { /* accept SIGCLD */ 998 sigcld_delete(ip); 999 sigcld_repost(); 1000 } 1001 return (0); 1002 1003 case CLD_EXITED: 1004 case CLD_DUMPED: 1005 case CLD_KILLED: 1006 /* 1007 * Don't complain if a process was found in 1008 * the first loop but we broke out of the loop 1009 * because of the arguments passed to us. 1010 */ 1011 if (proc_gone == 0) { 1012 cmn_err(CE_PANIC, 1013 "waitid: wrong state on the" 1014 " p_child list"); 1015 } else { 1016 break; 1017 } 1018 } 1019 1020 if (idtype == P_PID) 1021 break; 1022 } while ((cp = cp->p_sibling) != NULL); 1023 1024 /* 1025 * If we found no interesting processes at all, 1026 * break out and return ECHILD. 1027 */ 1028 if (found + proc_gone == 0) 1029 break; 1030 1031 if (options & WNOHANG) { 1032 bzero(ip, sizeof (k_siginfo_t)); 1033 /* 1034 * We should set ip->si_signo = SIGCLD, 1035 * but there is an SVVS test that expects 1036 * ip->si_signo to be zero in this case. 1037 */ 1038 mutex_exit(&pidlock); 1039 return (0); 1040 } 1041 1042 /* 1043 * If we found no processes of interest that could 1044 * change state while we wait, we don't wait at all. 1045 * Get out with ECHILD according to SVID. 1046 */ 1047 if (found == proc_gone) 1048 break; 1049 1050 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1051 mutex_exit(&pidlock); 1052 return (EINTR); 1053 } 1054 } 1055 mutex_exit(&pidlock); 1056 return (ECHILD); 1057 } 1058 1059 /* 1060 * For implementations that don't require binary compatibility, 1061 * the wait system call may be made into a library call to the 1062 * waitid system call. 1063 */ 1064 int64_t 1065 wait(void) 1066 { 1067 int error; 1068 k_siginfo_t info; 1069 rval_t r; 1070 1071 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1072 return (set_errno(error)); 1073 r.r_val1 = info.si_pid; 1074 r.r_val2 = wstat(info.si_code, info.si_status); 1075 return (r.r_vals); 1076 } 1077 1078 int 1079 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1080 { 1081 int error; 1082 k_siginfo_t info; 1083 1084 if (error = waitid(idtype, id, &info, options)) 1085 return (set_errno(error)); 1086 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1087 return (set_errno(EFAULT)); 1088 return (0); 1089 } 1090 1091 #ifdef _SYSCALL32_IMPL 1092 1093 int 1094 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1095 { 1096 int error; 1097 k_siginfo_t info; 1098 siginfo32_t info32; 1099 1100 if (error = waitid(idtype, id, &info, options)) 1101 return (set_errno(error)); 1102 siginfo_kto32(&info, &info32); 1103 if (copyout(&info32, infop, sizeof (info32))) 1104 return (set_errno(EFAULT)); 1105 return (0); 1106 } 1107 1108 #endif /* _SYSCALL32_IMPL */ 1109 1110 void 1111 proc_detach(proc_t *p) 1112 { 1113 proc_t *q; 1114 1115 ASSERT(MUTEX_HELD(&pidlock)); 1116 1117 q = p->p_parent; 1118 ASSERT(q != NULL); 1119 1120 /* 1121 * Take it off the newstate list of its parent 1122 */ 1123 delete_ns(q, p); 1124 1125 if (q->p_child == p) { 1126 q->p_child = p->p_sibling; 1127 /* 1128 * If the parent has no children, it better not 1129 * have any with new states either! 1130 */ 1131 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1132 } 1133 1134 if (p->p_sibling) { 1135 p->p_sibling->p_psibling = p->p_psibling; 1136 } 1137 1138 if (p->p_psibling) { 1139 p->p_psibling->p_sibling = p->p_sibling; 1140 } 1141 } 1142 1143 /* 1144 * Remove zombie children from the process table. 1145 */ 1146 void 1147 freeproc(proc_t *p) 1148 { 1149 proc_t *q; 1150 1151 ASSERT(p->p_stat == SZOMB); 1152 ASSERT(p->p_tlist == NULL); 1153 ASSERT(MUTEX_HELD(&pidlock)); 1154 1155 sigdelq(p, NULL, 0); 1156 if (p->p_killsqp) { 1157 siginfofree(p->p_killsqp); 1158 p->p_killsqp = NULL; 1159 } 1160 1161 prfree(p); /* inform /proc */ 1162 1163 /* 1164 * Don't free the init processes. 1165 * Other dying processes will access it. 1166 */ 1167 if (p == proc_init) 1168 return; 1169 1170 1171 /* 1172 * We wait until now to free the cred structure because a 1173 * zombie process's credentials may be examined by /proc. 1174 * No cred locking needed because there are no threads at this point. 1175 */ 1176 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1177 crfree(p->p_cred); 1178 if (p->p_corefile != NULL) { 1179 corectl_path_rele(p->p_corefile); 1180 p->p_corefile = NULL; 1181 } 1182 if (p->p_content != NULL) { 1183 corectl_content_rele(p->p_content); 1184 p->p_content = NULL; 1185 } 1186 1187 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1188 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1189 /* 1190 * This should still do the right thing since p_utime/stime 1191 * get set to the correct value on process exit, so it 1192 * should get properly updated 1193 */ 1194 p->p_nextofkin->p_cutime += p->p_utime; 1195 p->p_nextofkin->p_cstime += p->p_stime; 1196 1197 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1198 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1199 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1200 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1201 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1202 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1203 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1204 += p->p_acct[LMS_USER_LOCK]; 1205 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1206 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1207 += p->p_acct[LMS_WAIT_CPU]; 1208 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1209 1210 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1211 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1212 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1213 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1214 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1215 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1216 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1217 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1218 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1219 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1220 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1221 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1222 1223 } 1224 1225 q = p->p_nextofkin; 1226 if (q && q->p_orphan == p) 1227 q->p_orphan = p->p_nextorph; 1228 else if (q) { 1229 for (q = q->p_orphan; q; q = q->p_nextorph) 1230 if (q->p_nextorph == p) 1231 break; 1232 ASSERT(q && q->p_nextorph == p); 1233 q->p_nextorph = p->p_nextorph; 1234 } 1235 1236 proc_detach(p); 1237 pid_exit(p); /* frees pid and proc structure */ 1238 } 1239 1240 /* 1241 * Delete process "child" from the newstate list of process "parent" 1242 */ 1243 void 1244 delete_ns(proc_t *parent, proc_t *child) 1245 { 1246 proc_t **ns; 1247 1248 ASSERT(MUTEX_HELD(&pidlock)); 1249 ASSERT(child->p_parent == parent); 1250 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1251 if (*ns == child) { 1252 1253 ASSERT((*ns)->p_parent == parent); 1254 1255 *ns = child->p_sibling_ns; 1256 child->p_sibling_ns = NULL; 1257 return; 1258 } 1259 } 1260 } 1261 1262 /* 1263 * Add process "child" to the new state list of process "parent" 1264 */ 1265 void 1266 add_ns(proc_t *parent, proc_t *child) 1267 { 1268 ASSERT(child->p_sibling_ns == NULL); 1269 child->p_sibling_ns = parent->p_child_ns; 1270 parent->p_child_ns = child; 1271 } 1272