1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 #include <sys/brand.h> 77 78 /* 79 * convert code/data pair into old style wait status 80 */ 81 int 82 wstat(int code, int data) 83 { 84 int stat = (data & 0377); 85 86 switch (code) { 87 case CLD_EXITED: 88 stat <<= 8; 89 break; 90 case CLD_DUMPED: 91 stat |= WCOREFLG; 92 break; 93 case CLD_KILLED: 94 break; 95 case CLD_TRAPPED: 96 case CLD_STOPPED: 97 stat <<= 8; 98 stat |= WSTOPFLG; 99 break; 100 case CLD_CONTINUED: 101 stat = WCONTFLG; 102 break; 103 default: 104 cmn_err(CE_PANIC, "wstat: bad code"); 105 /* NOTREACHED */ 106 } 107 return (stat); 108 } 109 110 static char * 111 exit_reason(char *buf, size_t bufsz, int what, int why) 112 { 113 switch (why) { 114 case CLD_EXITED: 115 (void) snprintf(buf, bufsz, "exited with status %d", what); 116 break; 117 case CLD_KILLED: 118 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 119 break; 120 case CLD_DUMPED: 121 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 122 break; 123 default: 124 (void) snprintf(buf, bufsz, "encountered unknown error " 125 "(%d, %d)", why, what); 126 break; 127 } 128 129 return (buf); 130 } 131 132 /* 133 * exit system call: pass back caller's arg. 134 */ 135 void 136 rexit(int rval) 137 { 138 exit(CLD_EXITED, rval); 139 } 140 141 /* 142 * Called by proc_exit() when a zone's init exits, presumably because 143 * it failed. As long as the given zone is still in the "running" 144 * state, we will re-exec() init, but first we need to reset things 145 * which are usually inherited across exec() but will break init's 146 * assumption that it is being exec()'d from a virgin process. Most 147 * importantly this includes closing all file descriptors (exec only 148 * closes those marked close-on-exec) and resetting signals (exec only 149 * resets handled signals, and we need to clear any signals which 150 * killed init). Anything else that exec(2) says would be inherited, 151 * but would affect the execution of init, needs to be reset. 152 */ 153 static int 154 restart_init(int what, int why) 155 { 156 kthread_t *t = curthread; 157 klwp_t *lwp = ttolwp(t); 158 proc_t *p = ttoproc(t); 159 user_t *up = PTOU(p); 160 161 vnode_t *oldcd, *oldrd; 162 int i, err; 163 char reason_buf[64]; 164 165 /* 166 * Let zone admin (and global zone admin if this is for a non-global 167 * zone) know that init has failed and will be restarted. 168 */ 169 zcmn_err(p->p_zone->zone_id, CE_WARN, 170 "init(1M) %s: restarting automatically", 171 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 172 173 if (!INGLOBALZONE(p)) { 174 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 175 "restarting automatically", 176 p->p_zone->zone_name, p->p_pid, reason_buf); 177 } 178 179 /* 180 * Remove any fpollinfo_t's for this (last) thread from our file 181 * descriptors so closeall() can ASSERT() that they're all gone. 182 * Then close all open file descriptors in the process. 183 */ 184 pollcleanup(); 185 closeall(P_FINFO(p)); 186 187 /* 188 * Grab p_lock and begin clearing miscellaneous global process 189 * state that needs to be reset before we exec the new init(1M). 190 */ 191 192 mutex_enter(&p->p_lock); 193 prbarrier(p); 194 195 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); 196 up->u_cmask = CMASK; 197 198 sigemptyset(&t->t_hold); 199 sigemptyset(&t->t_sig); 200 sigemptyset(&t->t_extsig); 201 202 sigemptyset(&p->p_sig); 203 sigemptyset(&p->p_extsig); 204 205 sigdelq(p, t, 0); 206 sigdelq(p, NULL, 0); 207 208 if (p->p_killsqp) { 209 siginfofree(p->p_killsqp); 210 p->p_killsqp = NULL; 211 } 212 213 /* 214 * Reset any signals that are ignored back to the default disposition. 215 * Other u_signal members will be cleared when exec calls sigdefault(). 216 */ 217 for (i = 1; i < NSIG; i++) { 218 if (up->u_signal[i - 1] == SIG_IGN) { 219 up->u_signal[i - 1] = SIG_DFL; 220 sigemptyset(&up->u_sigmask[i - 1]); 221 } 222 } 223 224 /* 225 * Clear the current signal, any signal info associated with it, and 226 * any signal information from contracts and/or contract templates. 227 */ 228 lwp->lwp_cursig = 0; 229 lwp->lwp_extsig = 0; 230 if (lwp->lwp_curinfo != NULL) { 231 siginfofree(lwp->lwp_curinfo); 232 lwp->lwp_curinfo = NULL; 233 } 234 lwp_ctmpl_clear(lwp); 235 236 /* 237 * Reset both the process root directory and the current working 238 * directory to the root of the zone just as we do during boot. 239 */ 240 VN_HOLD(p->p_zone->zone_rootvp); 241 oldrd = up->u_rdir; 242 up->u_rdir = p->p_zone->zone_rootvp; 243 244 VN_HOLD(p->p_zone->zone_rootvp); 245 oldcd = up->u_cdir; 246 up->u_cdir = p->p_zone->zone_rootvp; 247 248 if (up->u_cwd != NULL) { 249 refstr_rele(up->u_cwd); 250 up->u_cwd = NULL; 251 } 252 253 mutex_exit(&p->p_lock); 254 255 if (oldrd != NULL) 256 VN_RELE(oldrd); 257 if (oldcd != NULL) 258 VN_RELE(oldcd); 259 260 /* Free the controlling tty. (freectty() always assumes curproc.) */ 261 ASSERT(p == curproc); 262 (void) freectty(B_TRUE); 263 264 /* 265 * Now exec() the new init(1M) on top of the current process. If we 266 * succeed, the caller will treat this like a successful system call. 267 * If we fail, we issue messages and the caller will proceed with exit. 268 */ 269 err = exec_init(p->p_zone->zone_initname, NULL); 270 271 if (err == 0) 272 return (0); 273 274 zcmn_err(p->p_zone->zone_id, CE_WARN, 275 "failed to restart init(1M) (err=%d): system reboot required", err); 276 277 if (!INGLOBALZONE(p)) { 278 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 279 "(pid %d, err=%d): zoneadm(1M) boot required", 280 p->p_zone->zone_name, p->p_pid, err); 281 } 282 283 return (-1); 284 } 285 286 /* 287 * Release resources. 288 * Enter zombie state. 289 * Wake up parent and init processes, 290 * and dispose of children. 291 */ 292 void 293 exit(int why, int what) 294 { 295 /* 296 * If proc_exit() fails, then some other lwp in the process 297 * got there first. We just have to call lwp_exit() to allow 298 * the other lwp to finish exiting the process. Otherwise we're 299 * restarting init, and should return. 300 */ 301 if (proc_exit(why, what) != 0) { 302 mutex_enter(&curproc->p_lock); 303 ASSERT(curproc->p_flag & SEXITLWPS); 304 lwp_exit(); 305 /* NOTREACHED */ 306 } 307 } 308 309 /* 310 * Set the SEXITING flag on the process, after making sure /proc does 311 * not have it locked. This is done in more places than proc_exit(), 312 * so it is a separate function. 313 */ 314 void 315 proc_is_exiting(proc_t *p) 316 { 317 mutex_enter(&p->p_lock); 318 prbarrier(p); 319 p->p_flag |= SEXITING; 320 mutex_exit(&p->p_lock); 321 } 322 323 /* 324 * Return value: 325 * 1 - exitlwps() failed, call (or continue) lwp_exit() 326 * 0 - restarting init. Return through system call path 327 */ 328 int 329 proc_exit(int why, int what) 330 { 331 kthread_t *t = curthread; 332 klwp_t *lwp = ttolwp(t); 333 proc_t *p = ttoproc(t); 334 zone_t *z = p->p_zone; 335 timeout_id_t tmp_id; 336 int rv; 337 proc_t *q; 338 task_t *tk; 339 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 340 sigqueue_t *sqp; 341 lwpdir_t *lwpdir; 342 uint_t lwpdir_sz; 343 lwpdir_t **tidhash; 344 uint_t tidhash_sz; 345 refstr_t *cwd; 346 hrtime_t hrutime, hrstime; 347 348 /* 349 * Stop and discard the process's lwps except for the current one, 350 * unless some other lwp beat us to it. If exitlwps() fails then 351 * return and the calling lwp will call (or continue in) lwp_exit(). 352 */ 353 proc_is_exiting(p); 354 if (exitlwps(0) != 0) 355 return (1); 356 357 DTRACE_PROC(lwp__exit); 358 DTRACE_PROC1(exit, int, why); 359 360 /* 361 * Will perform any brand specific proc exit processing, since this 362 * is always the last lwp, will also perform lwp_exit and free brand 363 * data 364 */ 365 if (PROC_IS_BRANDED(p)) 366 BROP(p)->b_proc_exit(p, lwp); 367 368 /* 369 * Don't let init exit unless zone_start_init() failed its exec, or 370 * we are shutting down the zone or the machine. 371 * 372 * Since we are single threaded, we don't need to lock the 373 * following accesses to zone_proc_initpid. 374 */ 375 if (p->p_pid == z->zone_proc_initpid) { 376 if (z->zone_boot_err == 0 && 377 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 378 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 379 z->zone_restart_init == B_TRUE && 380 restart_init(what, why) == 0) 381 return (0); 382 /* 383 * Since we didn't or couldn't restart init, we clear 384 * the zone's init state and proceed with exit 385 * processing. 386 */ 387 z->zone_proc_initpid = -1; 388 } 389 390 /* 391 * Allocate a sigqueue now, before we grab locks. 392 * It will be given to sigcld(), below. 393 */ 394 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 395 396 /* 397 * revoke any doors created by the process. 398 */ 399 if (p->p_door_list) 400 door_exit(); 401 402 /* 403 * Release schedctl data structures. 404 */ 405 if (p->p_pagep) 406 schedctl_proc_cleanup(); 407 408 /* 409 * make sure all pending kaio has completed. 410 */ 411 if (p->p_aio) 412 aio_cleanup_exit(); 413 414 /* 415 * discard the lwpchan cache. 416 */ 417 if (p->p_lcp != NULL) 418 lwpchan_destroy_cache(0); 419 420 /* 421 * Clean up any DTrace helper actions or probes for the process. 422 */ 423 if (p->p_dtrace_helpers != NULL) { 424 ASSERT(dtrace_helpers_cleanup != NULL); 425 (*dtrace_helpers_cleanup)(); 426 } 427 428 /* untimeout the realtime timers */ 429 if (p->p_itimer != NULL) 430 timer_exit(); 431 432 if ((tmp_id = p->p_alarmid) != 0) { 433 p->p_alarmid = 0; 434 (void) untimeout(tmp_id); 435 } 436 437 /* 438 * Remove any fpollinfo_t's for this (last) thread from our file 439 * descriptors so closeall() can ASSERT() that they're all gone. 440 */ 441 pollcleanup(); 442 443 if (p->p_rprof_cyclic != CYCLIC_NONE) { 444 mutex_enter(&cpu_lock); 445 cyclic_remove(p->p_rprof_cyclic); 446 mutex_exit(&cpu_lock); 447 } 448 449 mutex_enter(&p->p_lock); 450 451 /* 452 * Clean up any DTrace probes associated with this process. 453 */ 454 if (p->p_dtrace_probes) { 455 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 456 dtrace_fasttrap_exit_ptr(p); 457 } 458 459 while ((tmp_id = p->p_itimerid) != 0) { 460 p->p_itimerid = 0; 461 mutex_exit(&p->p_lock); 462 (void) untimeout(tmp_id); 463 mutex_enter(&p->p_lock); 464 } 465 466 lwp_cleanup(); 467 468 /* 469 * We are about to exit; prevent our resource associations from 470 * being changed. 471 */ 472 pool_barrier_enter(); 473 474 /* 475 * Block the process against /proc now that we have really 476 * acquired p->p_lock (to manipulate p_tlist at least). 477 */ 478 prbarrier(p); 479 480 #ifdef SUN_SRC_COMPAT 481 if (code == CLD_KILLED) 482 u.u_acflag |= AXSIG; 483 #endif 484 sigfillset(&p->p_ignore); 485 sigemptyset(&p->p_siginfo); 486 sigemptyset(&p->p_sig); 487 sigemptyset(&p->p_extsig); 488 sigemptyset(&t->t_sig); 489 sigemptyset(&t->t_extsig); 490 sigemptyset(&p->p_sigmask); 491 sigdelq(p, t, 0); 492 lwp->lwp_cursig = 0; 493 lwp->lwp_extsig = 0; 494 p->p_flag &= ~(SKILLED | SEXTKILLED); 495 if (lwp->lwp_curinfo) { 496 siginfofree(lwp->lwp_curinfo); 497 lwp->lwp_curinfo = NULL; 498 } 499 500 t->t_proc_flag |= TP_LWPEXIT; 501 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 502 prlwpexit(t); /* notify /proc */ 503 lwp_hash_out(p, t->t_tid); 504 prexit(p); 505 506 p->p_lwpcnt = 0; 507 p->p_tlist = NULL; 508 sigqfree(p); 509 term_mstate(t); 510 p->p_mterm = gethrtime(); 511 512 exec_vp = p->p_exec; 513 execdir_vp = p->p_execdir; 514 p->p_exec = NULLVP; 515 p->p_execdir = NULLVP; 516 mutex_exit(&p->p_lock); 517 if (exec_vp) 518 VN_RELE(exec_vp); 519 if (execdir_vp) 520 VN_RELE(execdir_vp); 521 522 pr_free_watched_pages(p); 523 524 closeall(P_FINFO(p)); 525 526 /* Free the controlling tty. (freectty() always assumes curproc.) */ 527 ASSERT(p == curproc); 528 (void) freectty(B_TRUE); 529 530 #if defined(__sparc) 531 if (p->p_utraps != NULL) 532 utrap_free(p); 533 #endif 534 if (p->p_semacct) /* IPC semaphore exit */ 535 semexit(p); 536 rv = wstat(why, what); 537 538 acct(rv & 0xff); 539 exacct_commit_proc(p, rv); 540 541 /* 542 * Release any resources associated with C2 auditing 543 */ 544 #ifdef C2_AUDIT 545 if (audit_active) { 546 /* 547 * audit exit system call 548 */ 549 audit_exit(why, what); 550 } 551 #endif 552 553 /* 554 * Free address space. 555 */ 556 relvm(); 557 558 /* 559 * Release held contracts. 560 */ 561 contract_exit(p); 562 563 /* 564 * Depart our encapsulating process contract. 565 */ 566 if ((p->p_flag & SSYS) == 0) { 567 ASSERT(p->p_ct_process); 568 contract_process_exit(p->p_ct_process, p, rv); 569 } 570 571 /* 572 * Remove pool association, and block if requested by pool_do_bind. 573 */ 574 mutex_enter(&p->p_lock); 575 ASSERT(p->p_pool->pool_ref > 0); 576 atomic_add_32(&p->p_pool->pool_ref, -1); 577 p->p_pool = pool_default; 578 /* 579 * Now that our address space has been freed and all other threads 580 * in this process have exited, set the PEXITED pool flag. This 581 * tells the pools subsystems to ignore this process if it was 582 * requested to rebind this process to a new pool. 583 */ 584 p->p_poolflag |= PEXITED; 585 pool_barrier_exit(); 586 mutex_exit(&p->p_lock); 587 588 mutex_enter(&pidlock); 589 590 /* 591 * Delete this process from the newstate list of its parent. We 592 * will put it in the right place in the sigcld in the end. 593 */ 594 delete_ns(p->p_parent, p); 595 596 /* 597 * Reassign the orphans to the next of kin. 598 * Don't rearrange init's orphanage. 599 */ 600 if ((q = p->p_orphan) != NULL && p != proc_init) { 601 602 proc_t *nokp = p->p_nextofkin; 603 604 for (;;) { 605 q->p_nextofkin = nokp; 606 if (q->p_nextorph == NULL) 607 break; 608 q = q->p_nextorph; 609 } 610 q->p_nextorph = nokp->p_orphan; 611 nokp->p_orphan = p->p_orphan; 612 p->p_orphan = NULL; 613 } 614 615 /* 616 * Reassign the children to init. 617 * Don't try to assign init's children to init. 618 */ 619 if ((q = p->p_child) != NULL && p != proc_init) { 620 struct proc *np; 621 struct proc *initp = proc_init; 622 boolean_t setzonetop = B_FALSE; 623 624 if (!INGLOBALZONE(curproc)) 625 setzonetop = B_TRUE; 626 627 pgdetach(p); 628 629 do { 630 np = q->p_sibling; 631 /* 632 * Delete it from its current parent new state 633 * list and add it to init new state list 634 */ 635 delete_ns(q->p_parent, q); 636 637 q->p_ppid = 1; 638 if (setzonetop) { 639 mutex_enter(&q->p_lock); 640 q->p_flag |= SZONETOP; 641 mutex_exit(&q->p_lock); 642 } 643 q->p_parent = initp; 644 645 /* 646 * Since q will be the first child, 647 * it will not have a previous sibling. 648 */ 649 q->p_psibling = NULL; 650 if (initp->p_child) { 651 initp->p_child->p_psibling = q; 652 } 653 q->p_sibling = initp->p_child; 654 initp->p_child = q; 655 if (q->p_proc_flag & P_PR_PTRACE) { 656 mutex_enter(&q->p_lock); 657 sigtoproc(q, NULL, SIGKILL); 658 mutex_exit(&q->p_lock); 659 } 660 /* 661 * sigcld() will add the child to parents 662 * newstate list. 663 */ 664 if (q->p_stat == SZOMB) 665 sigcld(q, NULL); 666 } while ((q = np) != NULL); 667 668 p->p_child = NULL; 669 ASSERT(p->p_child_ns == NULL); 670 } 671 672 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 673 674 mutex_enter(&p->p_lock); 675 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 676 677 hrutime = mstate_aggr_state(p, LMS_USER); 678 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 679 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 680 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 681 682 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 683 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 684 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 685 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 686 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 687 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 688 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 689 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 690 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 691 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 692 693 p->p_ru.minflt += p->p_cru.minflt; 694 p->p_ru.majflt += p->p_cru.majflt; 695 p->p_ru.nswap += p->p_cru.nswap; 696 p->p_ru.inblock += p->p_cru.inblock; 697 p->p_ru.oublock += p->p_cru.oublock; 698 p->p_ru.msgsnd += p->p_cru.msgsnd; 699 p->p_ru.msgrcv += p->p_cru.msgrcv; 700 p->p_ru.nsignals += p->p_cru.nsignals; 701 p->p_ru.nvcsw += p->p_cru.nvcsw; 702 p->p_ru.nivcsw += p->p_cru.nivcsw; 703 p->p_ru.sysc += p->p_cru.sysc; 704 p->p_ru.ioch += p->p_cru.ioch; 705 706 p->p_stat = SZOMB; 707 p->p_proc_flag &= ~P_PR_PTRACE; 708 p->p_wdata = what; 709 p->p_wcode = (char)why; 710 711 cdir = PTOU(p)->u_cdir; 712 rdir = PTOU(p)->u_rdir; 713 cwd = PTOU(p)->u_cwd; 714 715 /* 716 * Release resource controls, as they are no longer enforceable. 717 */ 718 rctl_set_free(p->p_rctls); 719 720 /* 721 * Give up task and project memberships. Decrement tk_nlwps counter 722 * for our task.max-lwps resource control. An extended accounting 723 * record, if that facility is active, is scheduled to be written. 724 * Zombie processes are false members of task0 for the remainder of 725 * their lifetime; no accounting information is recorded for them. 726 */ 727 tk = p->p_task; 728 729 mutex_enter(&p->p_zone->zone_nlwps_lock); 730 tk->tk_nlwps--; 731 tk->tk_proj->kpj_nlwps--; 732 p->p_zone->zone_nlwps--; 733 mutex_exit(&p->p_zone->zone_nlwps_lock); 734 task_detach(p); 735 p->p_task = task0p; 736 737 /* 738 * Clear the lwp directory and the lwpid hash table 739 * now that /proc can't bother us any more. 740 * We free the memory below, after dropping p->p_lock. 741 */ 742 lwpdir = p->p_lwpdir; 743 lwpdir_sz = p->p_lwpdir_sz; 744 tidhash = p->p_tidhash; 745 tidhash_sz = p->p_tidhash_sz; 746 p->p_lwpdir = NULL; 747 p->p_lwpfree = NULL; 748 p->p_lwpdir_sz = 0; 749 p->p_tidhash = NULL; 750 p->p_tidhash_sz = 0; 751 752 /* 753 * If the process has context ops installed, call the exit routine 754 * on behalf of this last remaining thread. Normally exitpctx() is 755 * called during thread_exit() or lwp_exit(), but because this is the 756 * last thread in the process, we must call it here. By the time 757 * thread_exit() is called (below), the association with the relevant 758 * process has been lost. 759 * 760 * We also free the context here. 761 */ 762 if (p->p_pctx) { 763 kpreempt_disable(); 764 exitpctx(p); 765 kpreempt_enable(); 766 767 freepctx(p, 0); 768 } 769 770 /* 771 * curthread's proc pointer is changed to point at p0 because 772 * curthread's original proc pointer can be freed as soon as 773 * the child sends a SIGCLD to its parent. 774 */ 775 t->t_procp = &p0; 776 777 mutex_exit(&p->p_lock); 778 sigcld(p, sqp); 779 mutex_exit(&pidlock); 780 781 task_rele(tk); 782 783 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 784 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 785 786 /* 787 * We don't release u_cdir and u_rdir until SZOMB is set. 788 * This protects us against dofusers(). 789 */ 790 VN_RELE(cdir); 791 if (rdir) 792 VN_RELE(rdir); 793 if (cwd) 794 refstr_rele(cwd); 795 796 lwp_pcb_exit(); 797 798 thread_exit(); 799 /* NOTREACHED */ 800 } 801 802 /* 803 * Format siginfo structure for wait system calls. 804 */ 805 void 806 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 807 { 808 ASSERT(MUTEX_HELD(&pidlock)); 809 810 bzero(ip, sizeof (k_siginfo_t)); 811 ip->si_signo = SIGCLD; 812 ip->si_code = pp->p_wcode; 813 ip->si_pid = pp->p_pid; 814 ip->si_ctid = PRCTID(pp); 815 ip->si_zoneid = pp->p_zone->zone_id; 816 ip->si_status = pp->p_wdata; 817 ip->si_stime = pp->p_stime; 818 ip->si_utime = pp->p_utime; 819 820 if (waitflag) { 821 pp->p_wcode = 0; 822 pp->p_wdata = 0; 823 pp->p_pidflag &= ~CLDPEND; 824 } 825 } 826 827 /* 828 * Wait system call. 829 * Search for a terminated (zombie) child, 830 * finally lay it to rest, and collect its status. 831 * Look also for stopped children, 832 * and pass back status from them. 833 */ 834 int 835 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 836 { 837 int found; 838 proc_t *cp, *pp; 839 proc_t **nsp; 840 int proc_gone; 841 int waitflag = !(options & WNOWAIT); 842 843 /* 844 * Obsolete flag, defined here only for binary compatibility 845 * with old statically linked executables. Delete this when 846 * we no longer care about these old and broken applications. 847 */ 848 #define _WNOCHLD 0400 849 options &= ~_WNOCHLD; 850 851 if (options == 0 || (options & ~WOPTMASK)) 852 return (EINVAL); 853 854 switch (idtype) { 855 case P_PID: 856 case P_PGID: 857 if (id < 0 || id >= maxpid) 858 return (EINVAL); 859 /* FALLTHROUGH */ 860 case P_ALL: 861 break; 862 default: 863 return (EINVAL); 864 } 865 866 pp = ttoproc(curthread); 867 868 /* 869 * lock parent mutex so that sibling chain can be searched. 870 */ 871 mutex_enter(&pidlock); 872 873 /* 874 * if we are only looking for exited processes and child_ns list 875 * is empty no reason to look at all children. 876 */ 877 if (idtype == P_ALL && 878 (options & (WOPTMASK & ~WNOWAIT)) == (WNOHANG | WEXITED) && 879 pp->p_child_ns == NULL) { 880 881 if (pp->p_child) { 882 mutex_exit(&pidlock); 883 bzero(ip, sizeof (k_siginfo_t)); 884 return (0); 885 } 886 mutex_exit(&pidlock); 887 return (ECHILD); 888 } 889 890 while ((cp = pp->p_child) != NULL) { 891 892 proc_gone = 0; 893 894 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 895 if (idtype == P_PID && id != (*nsp)->p_pid) { 896 continue; 897 } 898 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 899 continue; 900 } 901 902 switch ((*nsp)->p_wcode) { 903 904 case CLD_TRAPPED: 905 case CLD_STOPPED: 906 case CLD_CONTINUED: 907 cmn_err(CE_PANIC, 908 "waitid: wrong state %d on the p_newstate" 909 " list", (*nsp)->p_wcode); 910 break; 911 912 case CLD_EXITED: 913 case CLD_DUMPED: 914 case CLD_KILLED: 915 if (!(options & WEXITED)) { 916 /* 917 * Count how many are already gone 918 * for good. 919 */ 920 proc_gone++; 921 break; 922 } 923 if (!waitflag) { 924 winfo((*nsp), ip, 0); 925 } else { 926 proc_t *xp = *nsp; 927 winfo(xp, ip, 1); 928 freeproc(xp); 929 } 930 mutex_exit(&pidlock); 931 if (waitflag) { /* accept SIGCLD */ 932 sigcld_delete(ip); 933 sigcld_repost(); 934 } 935 return (0); 936 } 937 938 if (idtype == P_PID) 939 break; 940 } 941 942 /* 943 * Wow! None of the threads on the p_sibling_ns list were 944 * interesting threads. Check all the kids! 945 */ 946 found = 0; 947 cp = pp->p_child; 948 do { 949 if (idtype == P_PID && id != cp->p_pid) { 950 continue; 951 } 952 if (idtype == P_PGID && id != cp->p_pgrp) { 953 continue; 954 } 955 956 found++; 957 958 switch (cp->p_wcode) { 959 case CLD_TRAPPED: 960 if (!(options & WTRAPPED)) 961 break; 962 winfo(cp, ip, waitflag); 963 mutex_exit(&pidlock); 964 if (waitflag) { /* accept SIGCLD */ 965 sigcld_delete(ip); 966 sigcld_repost(); 967 } 968 return (0); 969 970 case CLD_STOPPED: 971 if (!(options & WSTOPPED)) 972 break; 973 /* Is it still stopped? */ 974 mutex_enter(&cp->p_lock); 975 if (!jobstopped(cp)) { 976 mutex_exit(&cp->p_lock); 977 break; 978 } 979 mutex_exit(&cp->p_lock); 980 winfo(cp, ip, waitflag); 981 mutex_exit(&pidlock); 982 if (waitflag) { /* accept SIGCLD */ 983 sigcld_delete(ip); 984 sigcld_repost(); 985 } 986 return (0); 987 988 case CLD_CONTINUED: 989 if (!(options & WCONTINUED)) 990 break; 991 winfo(cp, ip, waitflag); 992 mutex_exit(&pidlock); 993 if (waitflag) { /* accept SIGCLD */ 994 sigcld_delete(ip); 995 sigcld_repost(); 996 } 997 return (0); 998 999 case CLD_EXITED: 1000 case CLD_DUMPED: 1001 case CLD_KILLED: 1002 /* 1003 * Don't complain if a process was found in 1004 * the first loop but we broke out of the loop 1005 * because of the arguments passed to us. 1006 */ 1007 if (proc_gone == 0) { 1008 cmn_err(CE_PANIC, 1009 "waitid: wrong state on the" 1010 " p_child list"); 1011 } else { 1012 break; 1013 } 1014 } 1015 1016 if (idtype == P_PID) 1017 break; 1018 } while ((cp = cp->p_sibling) != NULL); 1019 1020 /* 1021 * If we found no interesting processes at all, 1022 * break out and return ECHILD. 1023 */ 1024 if (found + proc_gone == 0) 1025 break; 1026 1027 if (options & WNOHANG) { 1028 bzero(ip, sizeof (k_siginfo_t)); 1029 /* 1030 * We should set ip->si_signo = SIGCLD, 1031 * but there is an SVVS test that expects 1032 * ip->si_signo to be zero in this case. 1033 */ 1034 mutex_exit(&pidlock); 1035 return (0); 1036 } 1037 1038 /* 1039 * If we found no processes of interest that could 1040 * change state while we wait, we don't wait at all. 1041 * Get out with ECHILD according to SVID. 1042 */ 1043 if (found == proc_gone) 1044 break; 1045 1046 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1047 mutex_exit(&pidlock); 1048 return (EINTR); 1049 } 1050 } 1051 mutex_exit(&pidlock); 1052 return (ECHILD); 1053 } 1054 1055 /* 1056 * For implementations that don't require binary compatibility, 1057 * the wait system call may be made into a library call to the 1058 * waitid system call. 1059 */ 1060 int64_t 1061 wait(void) 1062 { 1063 int error; 1064 k_siginfo_t info; 1065 rval_t r; 1066 1067 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1068 return (set_errno(error)); 1069 r.r_val1 = info.si_pid; 1070 r.r_val2 = wstat(info.si_code, info.si_status); 1071 return (r.r_vals); 1072 } 1073 1074 int 1075 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1076 { 1077 int error; 1078 k_siginfo_t info; 1079 1080 if (error = waitid(idtype, id, &info, options)) 1081 return (set_errno(error)); 1082 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1083 return (set_errno(EFAULT)); 1084 return (0); 1085 } 1086 1087 #ifdef _SYSCALL32_IMPL 1088 1089 int 1090 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1091 { 1092 int error; 1093 k_siginfo_t info; 1094 siginfo32_t info32; 1095 1096 if (error = waitid(idtype, id, &info, options)) 1097 return (set_errno(error)); 1098 siginfo_kto32(&info, &info32); 1099 if (copyout(&info32, infop, sizeof (info32))) 1100 return (set_errno(EFAULT)); 1101 return (0); 1102 } 1103 1104 #endif /* _SYSCALL32_IMPL */ 1105 1106 void 1107 proc_detach(proc_t *p) 1108 { 1109 proc_t *q; 1110 1111 ASSERT(MUTEX_HELD(&pidlock)); 1112 1113 q = p->p_parent; 1114 ASSERT(q != NULL); 1115 1116 /* 1117 * Take it off the newstate list of its parent 1118 */ 1119 delete_ns(q, p); 1120 1121 if (q->p_child == p) { 1122 q->p_child = p->p_sibling; 1123 /* 1124 * If the parent has no children, it better not 1125 * have any with new states either! 1126 */ 1127 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1128 } 1129 1130 if (p->p_sibling) { 1131 p->p_sibling->p_psibling = p->p_psibling; 1132 } 1133 1134 if (p->p_psibling) { 1135 p->p_psibling->p_sibling = p->p_sibling; 1136 } 1137 } 1138 1139 /* 1140 * Remove zombie children from the process table. 1141 */ 1142 void 1143 freeproc(proc_t *p) 1144 { 1145 proc_t *q; 1146 1147 ASSERT(p->p_stat == SZOMB); 1148 ASSERT(p->p_tlist == NULL); 1149 ASSERT(MUTEX_HELD(&pidlock)); 1150 1151 sigdelq(p, NULL, 0); 1152 if (p->p_killsqp) { 1153 siginfofree(p->p_killsqp); 1154 p->p_killsqp = NULL; 1155 } 1156 1157 prfree(p); /* inform /proc */ 1158 1159 /* 1160 * Don't free the init processes. 1161 * Other dying processes will access it. 1162 */ 1163 if (p == proc_init) 1164 return; 1165 1166 1167 /* 1168 * We wait until now to free the cred structure because a 1169 * zombie process's credentials may be examined by /proc. 1170 * No cred locking needed because there are no threads at this point. 1171 */ 1172 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1173 crfree(p->p_cred); 1174 if (p->p_corefile != NULL) { 1175 corectl_path_rele(p->p_corefile); 1176 p->p_corefile = NULL; 1177 } 1178 if (p->p_content != NULL) { 1179 corectl_content_rele(p->p_content); 1180 p->p_content = NULL; 1181 } 1182 1183 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1184 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1185 /* 1186 * This should still do the right thing since p_utime/stime 1187 * get set to the correct value on process exit, so it 1188 * should get properly updated 1189 */ 1190 p->p_nextofkin->p_cutime += p->p_utime; 1191 p->p_nextofkin->p_cstime += p->p_stime; 1192 1193 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1194 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1195 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1196 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1197 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1198 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1199 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1200 += p->p_acct[LMS_USER_LOCK]; 1201 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1202 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1203 += p->p_acct[LMS_WAIT_CPU]; 1204 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1205 1206 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1207 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1208 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1209 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1210 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1211 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1212 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1213 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1214 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1215 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1216 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1217 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1218 1219 } 1220 1221 q = p->p_nextofkin; 1222 if (q && q->p_orphan == p) 1223 q->p_orphan = p->p_nextorph; 1224 else if (q) { 1225 for (q = q->p_orphan; q; q = q->p_nextorph) 1226 if (q->p_nextorph == p) 1227 break; 1228 ASSERT(q && q->p_nextorph == p); 1229 q->p_nextorph = p->p_nextorph; 1230 } 1231 1232 proc_detach(p); 1233 pid_exit(p); /* frees pid and proc structure */ 1234 } 1235 1236 /* 1237 * Delete process "child" from the newstate list of process "parent" 1238 */ 1239 void 1240 delete_ns(proc_t *parent, proc_t *child) 1241 { 1242 proc_t **ns; 1243 1244 ASSERT(MUTEX_HELD(&pidlock)); 1245 ASSERT(child->p_parent == parent); 1246 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1247 if (*ns == child) { 1248 1249 ASSERT((*ns)->p_parent == parent); 1250 1251 *ns = child->p_sibling_ns; 1252 child->p_sibling_ns = NULL; 1253 return; 1254 } 1255 } 1256 } 1257 1258 /* 1259 * Add process "child" to the new state list of process "parent" 1260 */ 1261 void 1262 add_ns(proc_t *parent, proc_t *child) 1263 { 1264 ASSERT(child->p_sibling_ns == NULL); 1265 child->p_sibling_ns = parent->p_child_ns; 1266 parent->p_child_ns = child; 1267 } 1268