1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/proc.h> 40 #include <sys/ucontext.h> 41 #include <sys/procfs.h> 42 #include <sys/vnode.h> 43 #include <sys/acct.h> 44 #include <sys/var.h> 45 #include <sys/cmn_err.h> 46 #include <sys/debug.h> 47 #include <sys/wait.h> 48 #include <sys/siginfo.h> 49 #include <sys/procset.h> 50 #include <sys/class.h> 51 #include <sys/file.h> 52 #include <sys/session.h> 53 #include <sys/kmem.h> 54 #include <sys/vtrace.h> 55 #include <sys/prsystm.h> 56 #include <sys/ipc.h> 57 #include <sys/sem_impl.h> 58 #include <c2/audit.h> 59 #include <sys/aio_impl.h> 60 #include <vm/as.h> 61 #include <sys/poll.h> 62 #include <sys/door.h> 63 #include <sys/lwpchan_impl.h> 64 #include <sys/utrap.h> 65 #include <sys/task.h> 66 #include <sys/exacct.h> 67 #include <sys/cyclic.h> 68 #include <sys/schedctl.h> 69 #include <sys/rctl.h> 70 #include <sys/contract_impl.h> 71 #include <sys/contract/process_impl.h> 72 #include <sys/list.h> 73 #include <sys/dtrace.h> 74 #include <sys/pool.h> 75 #include <sys/sdt.h> 76 #include <sys/corectl.h> 77 78 /* 79 * convert code/data pair into old style wait status 80 */ 81 int 82 wstat(int code, int data) 83 { 84 int stat = (data & 0377); 85 86 switch (code) { 87 case CLD_EXITED: 88 stat <<= 8; 89 break; 90 case CLD_DUMPED: 91 stat |= WCOREFLG; 92 break; 93 case CLD_KILLED: 94 break; 95 case CLD_TRAPPED: 96 case CLD_STOPPED: 97 stat <<= 8; 98 stat |= WSTOPFLG; 99 break; 100 case CLD_CONTINUED: 101 stat = WCONTFLG; 102 break; 103 default: 104 cmn_err(CE_PANIC, "wstat: bad code"); 105 /* NOTREACHED */ 106 } 107 return (stat); 108 } 109 110 static char * 111 exit_reason(char *buf, size_t bufsz, int what, int why) 112 { 113 switch (why) { 114 case CLD_EXITED: 115 (void) snprintf(buf, bufsz, "exited with status %d", what); 116 break; 117 case CLD_KILLED: 118 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 119 break; 120 case CLD_DUMPED: 121 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 122 break; 123 default: 124 (void) snprintf(buf, bufsz, "encountered unknown error " 125 "(%d, %d)", why, what); 126 break; 127 } 128 129 return (buf); 130 } 131 132 /* 133 * exit system call: pass back caller's arg. 134 */ 135 void 136 rexit(int rval) 137 { 138 exit(CLD_EXITED, rval); 139 } 140 141 /* 142 * Called by proc_exit() when a zone's init exits, presumably because 143 * it failed. As long as the given zone is still in the "running" 144 * state, we will re-exec() init, but first we need to reset things 145 * which are usually inherited across exec() but will break init's 146 * assumption that it is being exec()'d from a virgin process. Most 147 * importantly this includes closing all file descriptors (exec only 148 * closes those marked close-on-exec) and resetting signals (exec only 149 * resets handled signals, and we need to clear any signals which 150 * killed init). Anything else that exec(2) says would be inherited, 151 * but would affect the execution of init, needs to be reset. 152 */ 153 static int 154 restart_init(int what, int why) 155 { 156 kthread_t *t = curthread; 157 klwp_t *lwp = ttolwp(t); 158 proc_t *p = ttoproc(t); 159 user_t *up = PTOU(p); 160 161 vnode_t *oldcd, *oldrd; 162 sess_t *sp; 163 int i, err; 164 char reason_buf[64]; 165 const char *ipath; 166 167 /* 168 * Let zone admin (and global zone admin if this is for a non-global 169 * zone) know that init has failed and will be restarted. 170 */ 171 zcmn_err(p->p_zone->zone_id, CE_WARN, 172 "init(1M) %s: restarting automatically", 173 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 174 175 if (!INGLOBALZONE(p)) { 176 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 177 "restarting automatically", 178 p->p_zone->zone_name, p->p_pid, reason_buf); 179 } 180 181 /* 182 * Remove any fpollinfo_t's for this (last) thread from our file 183 * descriptors so closeall() can ASSERT() that they're all gone. 184 * Then close all open file descriptors in the process. 185 */ 186 pollcleanup(); 187 closeall(P_FINFO(p)); 188 189 /* 190 * Grab p_lock and begin clearing miscellaneous global process 191 * state that needs to be reset before we exec the new init(1M). 192 */ 193 194 mutex_enter(&p->p_lock); 195 prbarrier(p); 196 197 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 198 up->u_cmask = CMASK; 199 200 sigemptyset(&t->t_hold); 201 sigemptyset(&t->t_sig); 202 sigemptyset(&t->t_extsig); 203 204 sigemptyset(&p->p_sig); 205 sigemptyset(&p->p_extsig); 206 207 sigdelq(p, t, 0); 208 sigdelq(p, NULL, 0); 209 210 if (p->p_killsqp) { 211 siginfofree(p->p_killsqp); 212 p->p_killsqp = NULL; 213 } 214 215 /* 216 * Reset any signals that are ignored back to the default disposition. 217 * Other u_signal members will be cleared when exec calls sigdefault(). 218 */ 219 for (i = 1; i < NSIG; i++) { 220 if (up->u_signal[i - 1] == SIG_IGN) { 221 up->u_signal[i - 1] = SIG_DFL; 222 sigemptyset(&up->u_sigmask[i - 1]); 223 } 224 } 225 226 /* 227 * Clear the current signal, any signal info associated with it, and 228 * any signal information from contracts and/or contract templates. 229 */ 230 lwp->lwp_cursig = 0; 231 lwp->lwp_extsig = 0; 232 if (lwp->lwp_curinfo != NULL) { 233 siginfofree(lwp->lwp_curinfo); 234 lwp->lwp_curinfo = NULL; 235 } 236 lwp_ctmpl_clear(lwp); 237 238 /* 239 * Reset both the process root directory and the current working 240 * directory to the root of the zone just as we do during boot. 241 */ 242 VN_HOLD(p->p_zone->zone_rootvp); 243 oldrd = up->u_rdir; 244 up->u_rdir = p->p_zone->zone_rootvp; 245 246 VN_HOLD(p->p_zone->zone_rootvp); 247 oldcd = up->u_cdir; 248 up->u_cdir = p->p_zone->zone_rootvp; 249 250 if (up->u_cwd != NULL) { 251 refstr_rele(up->u_cwd); 252 up->u_cwd = NULL; 253 } 254 255 mutex_exit(&p->p_lock); 256 257 if (oldrd != NULL) 258 VN_RELE(oldrd); 259 if (oldcd != NULL) 260 VN_RELE(oldcd); 261 262 /* 263 * Free the controlling tty. 264 */ 265 mutex_enter(&pidlock); 266 sp = p->p_sessp; 267 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 268 mutex_exit(&pidlock); 269 freectty(sp); 270 } else { 271 mutex_exit(&pidlock); 272 } 273 274 /* 275 * Now exec() the new init(1M) on top of the current process. If we 276 * succeed, the caller will treat this like a successful system call. 277 * If we fail, we issue messages and the caller will proceed with exit. 278 */ 279 ipath = INGLOBALZONE(p) ? initname : zone_initname; 280 err = exec_init(ipath, 0, NULL); 281 282 if (err == 0) 283 return (0); 284 285 zcmn_err(p->p_zone->zone_id, CE_WARN, 286 "failed to restart init(1M) (err=%d): system reboot required", err); 287 288 if (!INGLOBALZONE(p)) { 289 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 290 "(pid %d, err=%d): zoneadm(1M) boot required", 291 p->p_zone->zone_name, p->p_pid, err); 292 } 293 294 return (-1); 295 } 296 297 /* 298 * Release resources. 299 * Enter zombie state. 300 * Wake up parent and init processes, 301 * and dispose of children. 302 */ 303 void 304 exit(int why, int what) 305 { 306 /* 307 * If proc_exit() fails, then some other lwp in the process 308 * got there first. We just have to call lwp_exit() to allow 309 * the other lwp to finish exiting the process. Otherwise we're 310 * restarting init, and should return. 311 */ 312 if (proc_exit(why, what) != 0) { 313 mutex_enter(&curproc->p_lock); 314 ASSERT(curproc->p_flag & SEXITLWPS); 315 lwp_exit(); 316 /* NOTREACHED */ 317 } 318 } 319 320 /* 321 * Set the SEXITING flag on the process, after making sure /proc does 322 * not have it locked. This is done in more places than proc_exit(), 323 * so it is a separate function. 324 */ 325 void 326 proc_is_exiting(proc_t *p) 327 { 328 mutex_enter(&p->p_lock); 329 prbarrier(p); 330 p->p_flag |= SEXITING; 331 mutex_exit(&p->p_lock); 332 } 333 334 /* 335 * Return value: 336 * 1 - exitlwps() failed, call (or continue) lwp_exit() 337 * 0 - restarting init. Return through system call path 338 */ 339 int 340 proc_exit(int why, int what) 341 { 342 kthread_t *t = curthread; 343 klwp_t *lwp = ttolwp(t); 344 proc_t *p = ttoproc(t); 345 zone_t *z = p->p_zone; 346 timeout_id_t tmp_id; 347 int rv; 348 proc_t *q; 349 sess_t *sp; 350 task_t *tk; 351 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 352 sigqueue_t *sqp; 353 lwpdir_t *lwpdir; 354 uint_t lwpdir_sz; 355 lwpdir_t **tidhash; 356 uint_t tidhash_sz; 357 refstr_t *cwd; 358 hrtime_t hrutime, hrstime; 359 360 /* 361 * Stop and discard the process's lwps except for the current one, 362 * unless some other lwp beat us to it. If exitlwps() fails then 363 * return and the calling lwp will call (or continue in) lwp_exit(). 364 */ 365 proc_is_exiting(p); 366 if (exitlwps(0) != 0) 367 return (1); 368 369 DTRACE_PROC(lwp__exit); 370 DTRACE_PROC1(exit, int, why); 371 372 /* 373 * Don't let init exit unless zone_icode() failed its exec, or 374 * we are shutting down the zone or the machine. 375 * 376 * Since we are single threaded, we don't need to lock the 377 * following accesses to zone_proc_initpid. 378 */ 379 if (p->p_pid == z->zone_proc_initpid) { 380 if (z->zone_boot_err == 0 && 381 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 382 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 383 restart_init(what, why) == 0) 384 return (0); 385 /* 386 * Since we didn't or couldn't restart init, we clear 387 * the zone's init state and proceed with exit 388 * processing. 389 */ 390 z->zone_proc_initpid = -1; 391 } 392 393 /* 394 * Allocate a sigqueue now, before we grab locks. 395 * It will be given to sigcld(), below. 396 */ 397 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 398 399 /* 400 * revoke any doors created by the process. 401 */ 402 if (p->p_door_list) 403 door_exit(); 404 405 /* 406 * Release schedctl data structures. 407 */ 408 if (p->p_pagep) 409 schedctl_proc_cleanup(); 410 411 /* 412 * make sure all pending kaio has completed. 413 */ 414 if (p->p_aio) 415 aio_cleanup_exit(); 416 417 /* 418 * discard the lwpchan cache. 419 */ 420 if (p->p_lcp != NULL) 421 lwpchan_destroy_cache(0); 422 423 /* 424 * Clean up any DTrace helper actions or probes for the process. 425 */ 426 if (p->p_dtrace_helpers != NULL) { 427 ASSERT(dtrace_helpers_cleanup != NULL); 428 (*dtrace_helpers_cleanup)(); 429 } 430 431 /* untimeout the realtime timers */ 432 if (p->p_itimer != NULL) 433 timer_exit(); 434 435 if ((tmp_id = p->p_alarmid) != 0) { 436 p->p_alarmid = 0; 437 (void) untimeout(tmp_id); 438 } 439 440 /* 441 * Remove any fpollinfo_t's for this (last) thread from our file 442 * descriptors so closeall() can ASSERT() that they're all gone. 443 */ 444 pollcleanup(); 445 446 if (p->p_rprof_cyclic != CYCLIC_NONE) { 447 mutex_enter(&cpu_lock); 448 cyclic_remove(p->p_rprof_cyclic); 449 mutex_exit(&cpu_lock); 450 } 451 452 mutex_enter(&p->p_lock); 453 454 /* 455 * Clean up any DTrace probes associated with this process. 456 */ 457 if (p->p_dtrace_probes) { 458 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 459 dtrace_fasttrap_exit_ptr(p); 460 } 461 462 while ((tmp_id = p->p_itimerid) != 0) { 463 p->p_itimerid = 0; 464 mutex_exit(&p->p_lock); 465 (void) untimeout(tmp_id); 466 mutex_enter(&p->p_lock); 467 } 468 469 lwp_cleanup(); 470 471 /* 472 * We are about to exit; prevent our resource associations from 473 * being changed. 474 */ 475 pool_barrier_enter(); 476 477 /* 478 * Block the process against /proc now that we have really 479 * acquired p->p_lock (to manipulate p_tlist at least). 480 */ 481 prbarrier(p); 482 483 #ifdef SUN_SRC_COMPAT 484 if (code == CLD_KILLED) 485 u.u_acflag |= AXSIG; 486 #endif 487 sigfillset(&p->p_ignore); 488 sigemptyset(&p->p_siginfo); 489 sigemptyset(&p->p_sig); 490 sigemptyset(&p->p_extsig); 491 sigemptyset(&t->t_sig); 492 sigemptyset(&t->t_extsig); 493 sigemptyset(&p->p_sigmask); 494 sigdelq(p, t, 0); 495 lwp->lwp_cursig = 0; 496 lwp->lwp_extsig = 0; 497 p->p_flag &= ~(SKILLED | SEXTKILLED); 498 if (lwp->lwp_curinfo) { 499 siginfofree(lwp->lwp_curinfo); 500 lwp->lwp_curinfo = NULL; 501 } 502 503 t->t_proc_flag |= TP_LWPEXIT; 504 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 505 prlwpexit(t); /* notify /proc */ 506 lwp_hash_out(p, t->t_tid); 507 prexit(p); 508 509 p->p_lwpcnt = 0; 510 p->p_tlist = NULL; 511 sigqfree(p); 512 term_mstate(t); 513 p->p_mterm = gethrtime(); 514 515 exec_vp = p->p_exec; 516 execdir_vp = p->p_execdir; 517 p->p_exec = NULLVP; 518 p->p_execdir = NULLVP; 519 mutex_exit(&p->p_lock); 520 if (exec_vp) 521 VN_RELE(exec_vp); 522 if (execdir_vp) 523 VN_RELE(execdir_vp); 524 525 pr_free_watched_pages(p); 526 527 closeall(P_FINFO(p)); 528 529 mutex_enter(&pidlock); 530 sp = p->p_sessp; 531 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 532 mutex_exit(&pidlock); 533 freectty(sp); 534 } else 535 mutex_exit(&pidlock); 536 537 #if defined(__sparc) 538 if (p->p_utraps != NULL) 539 utrap_free(p); 540 #endif 541 if (p->p_semacct) /* IPC semaphore exit */ 542 semexit(p); 543 rv = wstat(why, what); 544 545 acct(rv & 0xff); 546 exacct_commit_proc(p, rv); 547 548 /* 549 * Release any resources associated with C2 auditing 550 */ 551 #ifdef C2_AUDIT 552 if (audit_active) { 553 /* 554 * audit exit system call 555 */ 556 audit_exit(why, what); 557 } 558 #endif 559 560 /* 561 * Free address space. 562 */ 563 relvm(); 564 565 /* 566 * Release held contracts. 567 */ 568 contract_exit(p); 569 570 /* 571 * Depart our encapsulating process contract. 572 */ 573 if ((p->p_flag & SSYS) == 0) { 574 ASSERT(p->p_ct_process); 575 contract_process_exit(p->p_ct_process, p, rv); 576 } 577 578 /* 579 * Remove pool association, and block if requested by pool_do_bind. 580 */ 581 mutex_enter(&p->p_lock); 582 ASSERT(p->p_pool->pool_ref > 0); 583 atomic_add_32(&p->p_pool->pool_ref, -1); 584 p->p_pool = pool_default; 585 /* 586 * Now that our address space has been freed and all other threads 587 * in this process have exited, set the PEXITED pool flag. This 588 * tells the pools subsystems to ignore this process if it was 589 * requested to rebind this process to a new pool. 590 */ 591 p->p_poolflag |= PEXITED; 592 pool_barrier_exit(); 593 mutex_exit(&p->p_lock); 594 595 mutex_enter(&pidlock); 596 597 /* 598 * Delete this process from the newstate list of its parent. We 599 * will put it in the right place in the sigcld in the end. 600 */ 601 delete_ns(p->p_parent, p); 602 603 /* 604 * Reassign the orphans to the next of kin. 605 * Don't rearrange init's orphanage. 606 */ 607 if ((q = p->p_orphan) != NULL && p != proc_init) { 608 609 proc_t *nokp = p->p_nextofkin; 610 611 for (;;) { 612 q->p_nextofkin = nokp; 613 if (q->p_nextorph == NULL) 614 break; 615 q = q->p_nextorph; 616 } 617 q->p_nextorph = nokp->p_orphan; 618 nokp->p_orphan = p->p_orphan; 619 p->p_orphan = NULL; 620 } 621 622 /* 623 * Reassign the children to init. 624 * Don't try to assign init's children to init. 625 */ 626 if ((q = p->p_child) != NULL && p != proc_init) { 627 struct proc *np; 628 struct proc *initp = proc_init; 629 boolean_t setzonetop = B_FALSE; 630 631 if (!INGLOBALZONE(curproc)) 632 setzonetop = B_TRUE; 633 634 pgdetach(p); 635 636 do { 637 np = q->p_sibling; 638 /* 639 * Delete it from its current parent new state 640 * list and add it to init new state list 641 */ 642 delete_ns(q->p_parent, q); 643 644 q->p_ppid = 1; 645 if (setzonetop) { 646 mutex_enter(&q->p_lock); 647 q->p_flag |= SZONETOP; 648 mutex_exit(&q->p_lock); 649 } 650 q->p_parent = initp; 651 652 /* 653 * Since q will be the first child, 654 * it will not have a previous sibling. 655 */ 656 q->p_psibling = NULL; 657 if (initp->p_child) { 658 initp->p_child->p_psibling = q; 659 } 660 q->p_sibling = initp->p_child; 661 initp->p_child = q; 662 if (q->p_proc_flag & P_PR_PTRACE) { 663 mutex_enter(&q->p_lock); 664 sigtoproc(q, NULL, SIGKILL); 665 mutex_exit(&q->p_lock); 666 } 667 /* 668 * sigcld() will add the child to parents 669 * newstate list. 670 */ 671 if (q->p_stat == SZOMB) 672 sigcld(q, NULL); 673 } while ((q = np) != NULL); 674 675 p->p_child = NULL; 676 ASSERT(p->p_child_ns == NULL); 677 } 678 679 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 680 681 mutex_enter(&p->p_lock); 682 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 683 684 hrutime = mstate_aggr_state(p, LMS_USER); 685 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 686 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 687 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 688 689 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 690 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 691 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 692 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 693 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 694 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 695 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 696 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 697 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 698 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 699 700 p->p_ru.minflt += p->p_cru.minflt; 701 p->p_ru.majflt += p->p_cru.majflt; 702 p->p_ru.nswap += p->p_cru.nswap; 703 p->p_ru.inblock += p->p_cru.inblock; 704 p->p_ru.oublock += p->p_cru.oublock; 705 p->p_ru.msgsnd += p->p_cru.msgsnd; 706 p->p_ru.msgrcv += p->p_cru.msgrcv; 707 p->p_ru.nsignals += p->p_cru.nsignals; 708 p->p_ru.nvcsw += p->p_cru.nvcsw; 709 p->p_ru.nivcsw += p->p_cru.nivcsw; 710 p->p_ru.sysc += p->p_cru.sysc; 711 p->p_ru.ioch += p->p_cru.ioch; 712 713 p->p_stat = SZOMB; 714 p->p_proc_flag &= ~P_PR_PTRACE; 715 p->p_wdata = what; 716 p->p_wcode = (char)why; 717 718 cdir = PTOU(p)->u_cdir; 719 rdir = PTOU(p)->u_rdir; 720 cwd = PTOU(p)->u_cwd; 721 722 /* 723 * Release resource controls, as they are no longer enforceable. 724 */ 725 rctl_set_free(p->p_rctls); 726 727 /* 728 * Give up task and project memberships. Decrement tk_nlwps counter 729 * for our task.max-lwps resource control. An extended accounting 730 * record, if that facility is active, is scheduled to be written. 731 * Zombie processes are false members of task0 for the remainder of 732 * their lifetime; no accounting information is recorded for them. 733 */ 734 tk = p->p_task; 735 736 mutex_enter(&p->p_zone->zone_nlwps_lock); 737 tk->tk_nlwps--; 738 tk->tk_proj->kpj_nlwps--; 739 p->p_zone->zone_nlwps--; 740 mutex_exit(&p->p_zone->zone_nlwps_lock); 741 task_detach(p); 742 p->p_task = task0p; 743 744 /* 745 * Clear the lwp directory and the lwpid hash table 746 * now that /proc can't bother us any more. 747 * We free the memory below, after dropping p->p_lock. 748 */ 749 lwpdir = p->p_lwpdir; 750 lwpdir_sz = p->p_lwpdir_sz; 751 tidhash = p->p_tidhash; 752 tidhash_sz = p->p_tidhash_sz; 753 p->p_lwpdir = NULL; 754 p->p_lwpfree = NULL; 755 p->p_lwpdir_sz = 0; 756 p->p_tidhash = NULL; 757 p->p_tidhash_sz = 0; 758 759 /* 760 * If the process has context ops installed, call the exit routine 761 * on behalf of this last remaining thread. Normally exitpctx() is 762 * called during thread_exit() or lwp_exit(), but because this is the 763 * last thread in the process, we must call it here. By the time 764 * thread_exit() is called (below), the association with the relevant 765 * process has been lost. 766 * 767 * We also free the context here. 768 */ 769 if (p->p_pctx) { 770 kpreempt_disable(); 771 exitpctx(p); 772 kpreempt_enable(); 773 774 freepctx(p, 0); 775 } 776 777 /* 778 * curthread's proc pointer is changed to point at p0 because 779 * curthread's original proc pointer can be freed as soon as 780 * the child sends a SIGCLD to its parent. 781 */ 782 t->t_procp = &p0; 783 784 mutex_exit(&p->p_lock); 785 sigcld(p, sqp); 786 mutex_exit(&pidlock); 787 788 task_rele(tk); 789 790 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 791 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 792 793 /* 794 * We don't release u_cdir and u_rdir until SZOMB is set. 795 * This protects us against dofusers(). 796 */ 797 VN_RELE(cdir); 798 if (rdir) 799 VN_RELE(rdir); 800 if (cwd) 801 refstr_rele(cwd); 802 803 lwp_pcb_exit(); 804 805 thread_exit(); 806 /* NOTREACHED */ 807 } 808 809 /* 810 * Format siginfo structure for wait system calls. 811 */ 812 void 813 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 814 { 815 ASSERT(MUTEX_HELD(&pidlock)); 816 817 bzero(ip, sizeof (k_siginfo_t)); 818 ip->si_signo = SIGCLD; 819 ip->si_code = pp->p_wcode; 820 ip->si_pid = pp->p_pid; 821 ip->si_ctid = PRCTID(pp); 822 ip->si_zoneid = pp->p_zone->zone_id; 823 ip->si_status = pp->p_wdata; 824 ip->si_stime = pp->p_stime; 825 ip->si_utime = pp->p_utime; 826 827 if (waitflag) { 828 pp->p_wcode = 0; 829 pp->p_wdata = 0; 830 pp->p_pidflag &= ~CLDPEND; 831 } 832 } 833 834 /* 835 * Wait system call. 836 * Search for a terminated (zombie) child, 837 * finally lay it to rest, and collect its status. 838 * Look also for stopped children, 839 * and pass back status from them. 840 */ 841 int 842 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 843 { 844 int found; 845 proc_t *cp, *pp; 846 proc_t **nsp; 847 int proc_gone; 848 int waitflag = !(options & WNOWAIT); 849 850 /* 851 * Obsolete flag, defined here only for binary compatibility 852 * with old statically linked executables. Delete this when 853 * we no longer care about these old and broken applications. 854 */ 855 #define _WNOCHLD 0400 856 options &= ~_WNOCHLD; 857 858 if (options == 0 || (options & ~WOPTMASK)) 859 return (EINVAL); 860 861 switch (idtype) { 862 case P_PID: 863 case P_PGID: 864 if (id < 0 || id >= maxpid) 865 return (EINVAL); 866 /* FALLTHROUGH */ 867 case P_ALL: 868 break; 869 default: 870 return (EINVAL); 871 } 872 873 pp = ttoproc(curthread); 874 875 /* 876 * lock parent mutex so that sibling chain can be searched. 877 */ 878 mutex_enter(&pidlock); 879 880 /* 881 * if we are only looking for exited processes and child_ns list 882 * is empty no reason to look at all children. 883 */ 884 if (idtype == P_ALL && 885 (options & (WOPTMASK & ~WNOWAIT)) == (WNOHANG | WEXITED) && 886 pp->p_child_ns == NULL) { 887 888 if (pp->p_child) { 889 mutex_exit(&pidlock); 890 bzero(ip, sizeof (k_siginfo_t)); 891 return (0); 892 } 893 mutex_exit(&pidlock); 894 return (ECHILD); 895 } 896 897 while ((cp = pp->p_child) != NULL) { 898 899 proc_gone = 0; 900 901 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 902 if (idtype == P_PID && id != (*nsp)->p_pid) { 903 continue; 904 } 905 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 906 continue; 907 } 908 909 switch ((*nsp)->p_wcode) { 910 911 case CLD_TRAPPED: 912 case CLD_STOPPED: 913 case CLD_CONTINUED: 914 cmn_err(CE_PANIC, 915 "waitid: wrong state %d on the p_newstate" 916 " list", (*nsp)->p_wcode); 917 break; 918 919 case CLD_EXITED: 920 case CLD_DUMPED: 921 case CLD_KILLED: 922 if (!(options & WEXITED)) { 923 /* 924 * Count how many are already gone 925 * for good. 926 */ 927 proc_gone++; 928 break; 929 } 930 if (!waitflag) { 931 winfo((*nsp), ip, 0); 932 } else { 933 proc_t *xp = *nsp; 934 winfo(xp, ip, 1); 935 freeproc(xp); 936 } 937 mutex_exit(&pidlock); 938 if (waitflag) { /* accept SIGCLD */ 939 sigcld_delete(ip); 940 sigcld_repost(); 941 } 942 return (0); 943 } 944 945 if (idtype == P_PID) 946 break; 947 } 948 949 /* 950 * Wow! None of the threads on the p_sibling_ns list were 951 * interesting threads. Check all the kids! 952 */ 953 found = 0; 954 cp = pp->p_child; 955 do { 956 if (idtype == P_PID && id != cp->p_pid) { 957 continue; 958 } 959 if (idtype == P_PGID && id != cp->p_pgrp) { 960 continue; 961 } 962 963 found++; 964 965 switch (cp->p_wcode) { 966 case CLD_TRAPPED: 967 if (!(options & WTRAPPED)) 968 break; 969 winfo(cp, ip, waitflag); 970 mutex_exit(&pidlock); 971 if (waitflag) { /* accept SIGCLD */ 972 sigcld_delete(ip); 973 sigcld_repost(); 974 } 975 return (0); 976 977 case CLD_STOPPED: 978 if (!(options & WSTOPPED)) 979 break; 980 /* Is it still stopped? */ 981 mutex_enter(&cp->p_lock); 982 if (!jobstopped(cp)) { 983 mutex_exit(&cp->p_lock); 984 break; 985 } 986 mutex_exit(&cp->p_lock); 987 winfo(cp, ip, waitflag); 988 mutex_exit(&pidlock); 989 if (waitflag) { /* accept SIGCLD */ 990 sigcld_delete(ip); 991 sigcld_repost(); 992 } 993 return (0); 994 995 case CLD_CONTINUED: 996 if (!(options & WCONTINUED)) 997 break; 998 winfo(cp, ip, waitflag); 999 mutex_exit(&pidlock); 1000 if (waitflag) { /* accept SIGCLD */ 1001 sigcld_delete(ip); 1002 sigcld_repost(); 1003 } 1004 return (0); 1005 1006 case CLD_EXITED: 1007 case CLD_DUMPED: 1008 case CLD_KILLED: 1009 /* 1010 * Don't complain if a process was found in 1011 * the first loop but we broke out of the loop 1012 * because of the arguments passed to us. 1013 */ 1014 if (proc_gone == 0) { 1015 cmn_err(CE_PANIC, 1016 "waitid: wrong state on the" 1017 " p_child list"); 1018 } else { 1019 break; 1020 } 1021 } 1022 1023 if (idtype == P_PID) 1024 break; 1025 } while ((cp = cp->p_sibling) != NULL); 1026 1027 /* 1028 * If we found no interesting processes at all, 1029 * break out and return ECHILD. 1030 */ 1031 if (found + proc_gone == 0) 1032 break; 1033 1034 if (options & WNOHANG) { 1035 bzero(ip, sizeof (k_siginfo_t)); 1036 /* 1037 * We should set ip->si_signo = SIGCLD, 1038 * but there is an SVVS test that expects 1039 * ip->si_signo to be zero in this case. 1040 */ 1041 mutex_exit(&pidlock); 1042 return (0); 1043 } 1044 1045 /* 1046 * If we found no processes of interest that could 1047 * change state while we wait, we don't wait at all. 1048 * Get out with ECHILD according to SVID. 1049 */ 1050 if (found == proc_gone) 1051 break; 1052 1053 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1054 mutex_exit(&pidlock); 1055 return (EINTR); 1056 } 1057 } 1058 mutex_exit(&pidlock); 1059 return (ECHILD); 1060 } 1061 1062 /* 1063 * For implementations that don't require binary compatibility, 1064 * the wait system call may be made into a library call to the 1065 * waitid system call. 1066 */ 1067 int64_t 1068 wait(void) 1069 { 1070 int error; 1071 k_siginfo_t info; 1072 rval_t r; 1073 1074 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1075 return (set_errno(error)); 1076 r.r_val1 = info.si_pid; 1077 r.r_val2 = wstat(info.si_code, info.si_status); 1078 return (r.r_vals); 1079 } 1080 1081 int 1082 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1083 { 1084 int error; 1085 k_siginfo_t info; 1086 1087 if (error = waitid(idtype, id, &info, options)) 1088 return (set_errno(error)); 1089 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1090 return (set_errno(EFAULT)); 1091 return (0); 1092 } 1093 1094 #ifdef _SYSCALL32_IMPL 1095 1096 int 1097 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1098 { 1099 int error; 1100 k_siginfo_t info; 1101 siginfo32_t info32; 1102 1103 if (error = waitid(idtype, id, &info, options)) 1104 return (set_errno(error)); 1105 siginfo_kto32(&info, &info32); 1106 if (copyout(&info32, infop, sizeof (info32))) 1107 return (set_errno(EFAULT)); 1108 return (0); 1109 } 1110 1111 #endif /* _SYSCALL32_IMPL */ 1112 1113 void 1114 proc_detach(proc_t *p) 1115 { 1116 proc_t *q; 1117 1118 ASSERT(MUTEX_HELD(&pidlock)); 1119 1120 q = p->p_parent; 1121 ASSERT(q != NULL); 1122 1123 /* 1124 * Take it off the newstate list of its parent 1125 */ 1126 delete_ns(q, p); 1127 1128 if (q->p_child == p) { 1129 q->p_child = p->p_sibling; 1130 /* 1131 * If the parent has no children, it better not 1132 * have any with new states either! 1133 */ 1134 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1135 } 1136 1137 if (p->p_sibling) { 1138 p->p_sibling->p_psibling = p->p_psibling; 1139 } 1140 1141 if (p->p_psibling) { 1142 p->p_psibling->p_sibling = p->p_sibling; 1143 } 1144 } 1145 1146 /* 1147 * Remove zombie children from the process table. 1148 */ 1149 void 1150 freeproc(proc_t *p) 1151 { 1152 proc_t *q; 1153 1154 ASSERT(p->p_stat == SZOMB); 1155 ASSERT(p->p_tlist == NULL); 1156 ASSERT(MUTEX_HELD(&pidlock)); 1157 1158 sigdelq(p, NULL, 0); 1159 if (p->p_killsqp) { 1160 siginfofree(p->p_killsqp); 1161 p->p_killsqp = NULL; 1162 } 1163 1164 prfree(p); /* inform /proc */ 1165 1166 /* 1167 * Don't free the init processes. 1168 * Other dying processes will access it. 1169 */ 1170 if (p == proc_init) 1171 return; 1172 1173 1174 /* 1175 * We wait until now to free the cred structure because a 1176 * zombie process's credentials may be examined by /proc. 1177 * No cred locking needed because there are no threads at this point. 1178 */ 1179 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1180 crfree(p->p_cred); 1181 if (p->p_corefile != NULL) { 1182 corectl_path_rele(p->p_corefile); 1183 p->p_corefile = NULL; 1184 } 1185 if (p->p_content != NULL) { 1186 corectl_content_rele(p->p_content); 1187 p->p_content = NULL; 1188 } 1189 1190 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1191 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1192 /* 1193 * This should still do the right thing since p_utime/stime 1194 * get set to the correct value on process exit, so it 1195 * should get properly updated 1196 */ 1197 p->p_nextofkin->p_cutime += p->p_utime; 1198 p->p_nextofkin->p_cstime += p->p_stime; 1199 1200 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1201 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1202 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1203 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1204 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1205 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1206 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1207 += p->p_acct[LMS_USER_LOCK]; 1208 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1209 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1210 += p->p_acct[LMS_WAIT_CPU]; 1211 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1212 1213 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1214 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1215 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1216 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1217 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1218 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1219 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1220 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1221 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1222 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1223 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1224 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1225 1226 } 1227 1228 q = p->p_nextofkin; 1229 if (q && q->p_orphan == p) 1230 q->p_orphan = p->p_nextorph; 1231 else if (q) { 1232 for (q = q->p_orphan; q; q = q->p_nextorph) 1233 if (q->p_nextorph == p) 1234 break; 1235 ASSERT(q && q->p_nextorph == p); 1236 q->p_nextorph = p->p_nextorph; 1237 } 1238 1239 proc_detach(p); 1240 pid_exit(p); /* frees pid and proc structure */ 1241 } 1242 1243 /* 1244 * Delete process "child" from the newstate list of process "parent" 1245 */ 1246 void 1247 delete_ns(proc_t *parent, proc_t *child) 1248 { 1249 proc_t **ns; 1250 1251 ASSERT(MUTEX_HELD(&pidlock)); 1252 ASSERT(child->p_parent == parent); 1253 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1254 if (*ns == child) { 1255 1256 ASSERT((*ns)->p_parent == parent); 1257 1258 *ns = child->p_sibling_ns; 1259 child->p_sibling_ns = NULL; 1260 return; 1261 } 1262 } 1263 } 1264 1265 /* 1266 * Add process "child" to the new state list of process "parent" 1267 */ 1268 void 1269 add_ns(proc_t *parent, proc_t *child) 1270 { 1271 ASSERT(child->p_sibling_ns == NULL); 1272 child->p_sibling_ns = parent->p_child_ns; 1273 parent->p_child_ns = child; 1274 } 1275