1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.74 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/proc.h> 39 #include <sys/ucontext.h> 40 #include <sys/procfs.h> 41 #include <sys/vnode.h> 42 #include <sys/acct.h> 43 #include <sys/var.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/wait.h> 47 #include <sys/siginfo.h> 48 #include <sys/procset.h> 49 #include <sys/class.h> 50 #include <sys/file.h> 51 #include <sys/session.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/prsystm.h> 55 #include <sys/ipc.h> 56 #include <sys/sem_impl.h> 57 #include <c2/audit.h> 58 #include <sys/aio_impl.h> 59 #include <vm/as.h> 60 #include <sys/poll.h> 61 #include <sys/door.h> 62 #include <sys/lwpchan_impl.h> 63 #include <sys/utrap.h> 64 #include <sys/task.h> 65 #include <sys/exacct.h> 66 #include <sys/cyclic.h> 67 #include <sys/schedctl.h> 68 #include <sys/rctl.h> 69 #include <sys/contract_impl.h> 70 #include <sys/contract/process_impl.h> 71 #include <sys/list.h> 72 #include <sys/dtrace.h> 73 #include <sys/pool.h> 74 #include <sys/sdt.h> 75 #include <sys/corectl.h> 76 77 #if defined(__x86) 78 extern void ldt_free(proc_t *pp); 79 #endif 80 81 /* 82 * convert code/data pair into old style wait status 83 */ 84 int 85 wstat(int code, int data) 86 { 87 int stat = (data & 0377); 88 89 switch (code) { 90 case CLD_EXITED: 91 stat <<= 8; 92 break; 93 case CLD_DUMPED: 94 stat |= WCOREFLG; 95 break; 96 case CLD_KILLED: 97 break; 98 case CLD_TRAPPED: 99 case CLD_STOPPED: 100 stat <<= 8; 101 stat |= WSTOPFLG; 102 break; 103 case CLD_CONTINUED: 104 stat = WCONTFLG; 105 break; 106 default: 107 cmn_err(CE_PANIC, "wstat: bad code"); 108 /* NOTREACHED */ 109 } 110 return (stat); 111 } 112 113 static char * 114 exit_reason(char *buf, size_t bufsz, int what, int why) 115 { 116 switch (why) { 117 case CLD_EXITED: 118 (void) snprintf(buf, bufsz, "exited with status %d", what); 119 break; 120 case CLD_KILLED: 121 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what); 122 break; 123 case CLD_DUMPED: 124 (void) snprintf(buf, bufsz, "core dumped on signal %d", what); 125 break; 126 default: 127 (void) snprintf(buf, bufsz, "encountered unknown error " 128 "(%d, %d)", why, what); 129 break; 130 } 131 132 return (buf); 133 } 134 135 /* 136 * exit system call: pass back caller's arg. 137 */ 138 void 139 rexit(int rval) 140 { 141 exit(CLD_EXITED, rval); 142 } 143 144 /* 145 * Called by proc_exit() when a zone's init exits, presumably because 146 * it failed. As long as the given zone is still in the "running" 147 * state, we will re-exec() init, but first we need to reset things 148 * which are usually inherited across exec() but will break init's 149 * assumption that it is being exec()'d from a virgin process. Most 150 * importantly this includes closing all file descriptors (exec only 151 * closes those marked close-on-exec) and resetting signals (exec only 152 * resets handled signals, and we need to clear any signals which 153 * killed init). Anything else that exec(2) says would be inherited, 154 * but would affect the execution of init, needs to be reset. 155 */ 156 static int 157 restart_init(int what, int why) 158 { 159 kthread_t *t = curthread; 160 klwp_t *lwp = ttolwp(t); 161 proc_t *p = ttoproc(t); 162 user_t *up = PTOU(p); 163 164 vnode_t *oldcd, *oldrd; 165 sess_t *sp; 166 int i, err; 167 char reason_buf[64]; 168 const char *ipath; 169 170 /* 171 * Let zone admin (and global zone admin if this is for a non-global 172 * zone) know that init has failed and will be restarted. 173 */ 174 zcmn_err(p->p_zone->zone_id, CE_WARN, 175 "init(1M) %s: restarting automatically", 176 exit_reason(reason_buf, sizeof (reason_buf), what, why)); 177 178 if (!INGLOBALZONE(p)) { 179 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " 180 "restarting automatically", 181 p->p_zone->zone_name, p->p_pid, reason_buf); 182 } 183 184 /* 185 * Remove any fpollinfo_t's for this (last) thread from our file 186 * descriptors so closeall() can ASSERT() that they're all gone. 187 * Then close all open file descriptors in the process. 188 */ 189 pollcleanup(); 190 closeall(P_FINFO(p)); 191 192 /* 193 * Grab p_lock and begin clearing miscellaneous global process 194 * state that needs to be reset before we exec the new init(1M). 195 */ 196 197 mutex_enter(&p->p_lock); 198 199 p->p_flag &= ~(SKILLED | SEXITLWPS | SEXTKILLED | SCOREDUMP | SDOCORE); 200 up->u_cmask = CMASK; 201 202 sigemptyset(&t->t_hold); 203 sigemptyset(&t->t_sig); 204 sigemptyset(&t->t_extsig); 205 206 sigemptyset(&p->p_sig); 207 sigemptyset(&p->p_extsig); 208 209 sigdelq(p, t, 0); 210 sigdelq(p, NULL, 0); 211 212 if (p->p_killsqp) { 213 siginfofree(p->p_killsqp); 214 p->p_killsqp = NULL; 215 } 216 217 /* 218 * Reset any signals that are ignored back to the default disposition. 219 * Other u_signal members will be cleared when exec calls sigdefault(). 220 */ 221 for (i = 1; i < NSIG; i++) { 222 if (up->u_signal[i - 1] == SIG_IGN) { 223 up->u_signal[i - 1] = SIG_DFL; 224 sigemptyset(&up->u_sigmask[i - 1]); 225 } 226 } 227 228 /* 229 * Clear the current signal, any signal info associated with it, and 230 * any signal information from contracts and/or contract templates. 231 */ 232 lwp->lwp_cursig = 0; 233 lwp->lwp_extsig = 0; 234 if (lwp->lwp_curinfo != NULL) { 235 siginfofree(lwp->lwp_curinfo); 236 lwp->lwp_curinfo = NULL; 237 } 238 lwp_ctmpl_clear(lwp); 239 240 /* 241 * Reset both the process root directory and the current working 242 * directory to the root of the zone just as we do during boot. 243 */ 244 VN_HOLD(p->p_zone->zone_rootvp); 245 oldrd = up->u_rdir; 246 up->u_rdir = p->p_zone->zone_rootvp; 247 248 VN_HOLD(p->p_zone->zone_rootvp); 249 oldcd = up->u_cdir; 250 up->u_cdir = p->p_zone->zone_rootvp; 251 252 if (up->u_cwd != NULL) { 253 refstr_rele(up->u_cwd); 254 up->u_cwd = NULL; 255 } 256 257 mutex_exit(&p->p_lock); 258 259 if (oldrd != NULL) 260 VN_RELE(oldrd); 261 if (oldcd != NULL) 262 VN_RELE(oldcd); 263 264 /* 265 * Free the controlling tty. 266 */ 267 mutex_enter(&pidlock); 268 sp = p->p_sessp; 269 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 270 mutex_exit(&pidlock); 271 freectty(sp); 272 } else { 273 mutex_exit(&pidlock); 274 } 275 276 /* 277 * Now exec() the new init(1M) on top of the current process. If we 278 * succeed, the caller will treat this like a successful system call. 279 * If we fail, we issue messages and the caller will proceed with exit. 280 */ 281 ipath = INGLOBALZONE(p) ? initname : zone_initname; 282 err = exec_init(ipath, 0, NULL); 283 284 if (err == 0) 285 return (0); 286 287 zcmn_err(p->p_zone->zone_id, CE_WARN, 288 "failed to restart init(1M) (err=%d): system reboot required", err); 289 290 if (!INGLOBALZONE(p)) { 291 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " 292 "(pid %d, err=%d): zoneadm(1M) boot required", 293 p->p_zone->zone_name, p->p_pid, err); 294 } 295 296 return (-1); 297 } 298 299 /* 300 * Release resources. 301 * Enter zombie state. 302 * Wake up parent and init processes, 303 * and dispose of children. 304 */ 305 void 306 exit(int why, int what) 307 { 308 /* 309 * If proc_exit() fails, then some other lwp in the process 310 * got there first. We just have to call lwp_exit() to allow 311 * the other lwp to finish exiting the process. Otherwise we're 312 * restarting init, and should return. 313 */ 314 if (proc_exit(why, what) != 0) { 315 mutex_enter(&curproc->p_lock); 316 ASSERT(curproc->p_flag & SEXITLWPS); 317 lwp_exit(); 318 /* NOTREACHED */ 319 } 320 } 321 322 /* 323 * Return value: 324 * 1 - exitlwps() failed, call (or continue) lwp_exit() 325 * 0 - restarting init. Return through system call path 326 */ 327 int 328 proc_exit(int why, int what) 329 { 330 kthread_t *t = curthread; 331 klwp_t *lwp = ttolwp(t); 332 proc_t *p = ttoproc(t); 333 zone_t *z = p->p_zone; 334 timeout_id_t tmp_id; 335 int rv; 336 proc_t *q; 337 sess_t *sp; 338 task_t *tk; 339 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; 340 sigqueue_t *sqp; 341 lwpdir_t *lwpdir; 342 uint_t lwpdir_sz; 343 lwpdir_t **tidhash; 344 uint_t tidhash_sz; 345 refstr_t *cwd; 346 hrtime_t hrutime, hrstime; 347 348 /* 349 * Stop and discard the process's lwps except for the current one, 350 * unless some other lwp beat us to it. If exitlwps() fails then 351 * return and the calling lwp will call (or continue in) lwp_exit(). 352 */ 353 if (exitlwps(0) != 0) 354 return (1); 355 356 DTRACE_PROC(lwp__exit); 357 DTRACE_PROC1(exit, int, why); 358 359 /* 360 * Don't let init exit unless zone_icode() failed its exec, or 361 * we are shutting down the zone or the machine. 362 * 363 * Since we are single threaded, we don't need to lock the 364 * following accesses to zone_proc_initpid. 365 */ 366 if (p->p_pid == z->zone_proc_initpid) { 367 if (z->zone_boot_err == 0 && 368 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && 369 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && 370 restart_init(what, why) == 0) 371 return (0); 372 /* 373 * Since we didn't or couldn't restart init, we clear 374 * the zone's init state and proceed with exit 375 * processing. 376 */ 377 z->zone_proc_initpid = -1; 378 } 379 380 /* 381 * Allocate a sigqueue now, before we grab locks. 382 * It will be given to sigcld(), below. 383 */ 384 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 385 386 /* 387 * revoke any doors created by the process. 388 */ 389 if (p->p_door_list) 390 door_exit(); 391 392 /* 393 * Release schedctl data structures. 394 */ 395 if (p->p_pagep) 396 schedctl_proc_cleanup(); 397 398 /* 399 * make sure all pending kaio has completed. 400 */ 401 if (p->p_aio) 402 aio_cleanup_exit(); 403 404 /* 405 * discard the lwpchan cache. 406 */ 407 if (p->p_lcp != NULL) 408 lwpchan_destroy_cache(0); 409 410 /* 411 * Clean up any DTrace helper actions or probes for the process. 412 */ 413 if (p->p_dtrace_helpers != NULL) { 414 ASSERT(dtrace_helpers_cleanup != NULL); 415 (*dtrace_helpers_cleanup)(); 416 } 417 418 /* untimeout the realtime timers */ 419 if (p->p_itimer != NULL) 420 timer_exit(); 421 422 if ((tmp_id = p->p_alarmid) != 0) { 423 p->p_alarmid = 0; 424 (void) untimeout(tmp_id); 425 } 426 427 /* 428 * Remove any fpollinfo_t's for this (last) thread from our file 429 * descriptors so closeall() can ASSERT() that they're all gone. 430 */ 431 pollcleanup(); 432 433 if (p->p_rprof_cyclic != CYCLIC_NONE) { 434 mutex_enter(&cpu_lock); 435 cyclic_remove(p->p_rprof_cyclic); 436 mutex_exit(&cpu_lock); 437 } 438 439 mutex_enter(&p->p_lock); 440 441 /* 442 * Clean up any DTrace probes associated with this process. 443 */ 444 if (p->p_dtrace_probes) { 445 ASSERT(dtrace_fasttrap_exit_ptr != NULL); 446 dtrace_fasttrap_exit_ptr(p); 447 } 448 449 while ((tmp_id = p->p_itimerid) != 0) { 450 p->p_itimerid = 0; 451 mutex_exit(&p->p_lock); 452 (void) untimeout(tmp_id); 453 mutex_enter(&p->p_lock); 454 } 455 456 lwp_cleanup(); 457 458 /* 459 * We are about to exit; prevent our resource associations from 460 * being changed. 461 */ 462 pool_barrier_enter(); 463 464 /* 465 * Block the process against /proc now that we have really 466 * acquired p->p_lock (to manipulate p_tlist at least). 467 */ 468 prbarrier(p); 469 470 #ifdef SUN_SRC_COMPAT 471 if (code == CLD_KILLED) 472 u.u_acflag |= AXSIG; 473 #endif 474 sigfillset(&p->p_ignore); 475 sigemptyset(&p->p_siginfo); 476 sigemptyset(&p->p_sig); 477 sigemptyset(&p->p_extsig); 478 sigemptyset(&t->t_sig); 479 sigemptyset(&t->t_extsig); 480 sigemptyset(&p->p_sigmask); 481 sigdelq(p, t, 0); 482 lwp->lwp_cursig = 0; 483 lwp->lwp_extsig = 0; 484 p->p_flag &= ~(SKILLED | SEXTKILLED); 485 if (lwp->lwp_curinfo) { 486 siginfofree(lwp->lwp_curinfo); 487 lwp->lwp_curinfo = NULL; 488 } 489 490 t->t_proc_flag |= TP_LWPEXIT; 491 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 492 prlwpexit(t); /* notify /proc */ 493 lwp_hash_out(p, t->t_tid); 494 prexit(p); 495 496 p->p_lwpcnt = 0; 497 p->p_tlist = NULL; 498 sigqfree(p); 499 term_mstate(t); 500 p->p_mterm = gethrtime(); 501 502 exec_vp = p->p_exec; 503 execdir_vp = p->p_execdir; 504 p->p_exec = NULLVP; 505 p->p_execdir = NULLVP; 506 mutex_exit(&p->p_lock); 507 if (exec_vp) 508 VN_RELE(exec_vp); 509 if (execdir_vp) 510 VN_RELE(execdir_vp); 511 512 pr_free_watched_pages(p); 513 514 closeall(P_FINFO(p)); 515 516 mutex_enter(&pidlock); 517 sp = p->p_sessp; 518 if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { 519 mutex_exit(&pidlock); 520 freectty(sp); 521 } else 522 mutex_exit(&pidlock); 523 524 #if defined(__x86) 525 /* 526 * If the process was using a private LDT then free it. 527 */ 528 if (p->p_ldt) 529 ldt_free(p); 530 #endif 531 532 #if defined(__sparc) 533 if (p->p_utraps != NULL) 534 utrap_free(p); 535 #endif 536 if (p->p_semacct) /* IPC semaphore exit */ 537 semexit(p); 538 rv = wstat(why, what); 539 540 acct(rv & 0xff); 541 exacct_commit_proc(p, rv); 542 543 /* 544 * Release any resources associated with C2 auditing 545 */ 546 #ifdef C2_AUDIT 547 if (audit_active) { 548 /* 549 * audit exit system call 550 */ 551 audit_exit(why, what); 552 } 553 #endif 554 555 /* 556 * Free address space. 557 */ 558 relvm(); 559 560 /* 561 * Release held contracts. 562 */ 563 contract_exit(p); 564 565 /* 566 * Depart our encapsulating process contract. 567 */ 568 if ((p->p_flag & SSYS) == 0) { 569 ASSERT(p->p_ct_process); 570 contract_process_exit(p->p_ct_process, p, rv); 571 } 572 573 /* 574 * Remove pool association, and block if requested by pool_do_bind. 575 */ 576 mutex_enter(&p->p_lock); 577 ASSERT(p->p_pool->pool_ref > 0); 578 atomic_add_32(&p->p_pool->pool_ref, -1); 579 p->p_pool = pool_default; 580 /* 581 * Now that our address space has been freed and all other threads 582 * in this process have exited, set the PEXITED pool flag. This 583 * tells the pools subsystems to ignore this process if it was 584 * requested to rebind this process to a new pool. 585 */ 586 p->p_poolflag |= PEXITED; 587 pool_barrier_exit(); 588 mutex_exit(&p->p_lock); 589 590 mutex_enter(&pidlock); 591 592 /* 593 * Delete this process from the newstate list of its parent. We 594 * will put it in the right place in the sigcld in the end. 595 */ 596 delete_ns(p->p_parent, p); 597 598 /* 599 * Reassign the orphans to the next of kin. 600 * Don't rearrange init's orphanage. 601 */ 602 if ((q = p->p_orphan) != NULL && p != proc_init) { 603 604 proc_t *nokp = p->p_nextofkin; 605 606 for (;;) { 607 q->p_nextofkin = nokp; 608 if (q->p_nextorph == NULL) 609 break; 610 q = q->p_nextorph; 611 } 612 q->p_nextorph = nokp->p_orphan; 613 nokp->p_orphan = p->p_orphan; 614 p->p_orphan = NULL; 615 } 616 617 /* 618 * Reassign the children to init. 619 * Don't try to assign init's children to init. 620 */ 621 if ((q = p->p_child) != NULL && p != proc_init) { 622 struct proc *np; 623 struct proc *initp = proc_init; 624 boolean_t setzonetop = B_FALSE; 625 626 if (!INGLOBALZONE(curproc)) 627 setzonetop = B_TRUE; 628 629 pgdetach(p); 630 631 do { 632 np = q->p_sibling; 633 /* 634 * Delete it from its current parent new state 635 * list and add it to init new state list 636 */ 637 delete_ns(q->p_parent, q); 638 639 q->p_ppid = 1; 640 if (setzonetop) { 641 mutex_enter(&q->p_lock); 642 q->p_flag |= SZONETOP; 643 mutex_exit(&q->p_lock); 644 } 645 q->p_parent = initp; 646 647 /* 648 * Since q will be the first child, 649 * it will not have a previous sibling. 650 */ 651 q->p_psibling = NULL; 652 if (initp->p_child) { 653 initp->p_child->p_psibling = q; 654 } 655 q->p_sibling = initp->p_child; 656 initp->p_child = q; 657 if (q->p_proc_flag & P_PR_PTRACE) { 658 mutex_enter(&q->p_lock); 659 sigtoproc(q, NULL, SIGKILL); 660 mutex_exit(&q->p_lock); 661 } 662 /* 663 * sigcld() will add the child to parents 664 * newstate list. 665 */ 666 if (q->p_stat == SZOMB) 667 sigcld(q, NULL); 668 } while ((q = np) != NULL); 669 670 p->p_child = NULL; 671 ASSERT(p->p_child_ns == NULL); 672 } 673 674 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); 675 676 mutex_enter(&p->p_lock); 677 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ 678 679 hrutime = mstate_aggr_state(p, LMS_USER); 680 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 681 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; 682 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; 683 684 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; 685 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; 686 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; 687 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; 688 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; 689 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; 690 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; 691 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; 692 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; 693 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; 694 695 p->p_ru.minflt += p->p_cru.minflt; 696 p->p_ru.majflt += p->p_cru.majflt; 697 p->p_ru.nswap += p->p_cru.nswap; 698 p->p_ru.inblock += p->p_cru.inblock; 699 p->p_ru.oublock += p->p_cru.oublock; 700 p->p_ru.msgsnd += p->p_cru.msgsnd; 701 p->p_ru.msgrcv += p->p_cru.msgrcv; 702 p->p_ru.nsignals += p->p_cru.nsignals; 703 p->p_ru.nvcsw += p->p_cru.nvcsw; 704 p->p_ru.nivcsw += p->p_cru.nivcsw; 705 p->p_ru.sysc += p->p_cru.sysc; 706 p->p_ru.ioch += p->p_cru.ioch; 707 708 p->p_stat = SZOMB; 709 p->p_proc_flag &= ~P_PR_PTRACE; 710 p->p_wdata = what; 711 p->p_wcode = (char)why; 712 713 cdir = PTOU(p)->u_cdir; 714 rdir = PTOU(p)->u_rdir; 715 cwd = PTOU(p)->u_cwd; 716 717 /* 718 * Release resource controls, as they are no longer enforceable. 719 */ 720 rctl_set_free(p->p_rctls); 721 722 /* 723 * Give up task and project memberships. Decrement tk_nlwps counter 724 * for our task.max-lwps resource control. An extended accounting 725 * record, if that facility is active, is scheduled to be written. 726 * Zombie processes are false members of task0 for the remainder of 727 * their lifetime; no accounting information is recorded for them. 728 */ 729 tk = p->p_task; 730 731 mutex_enter(&p->p_zone->zone_nlwps_lock); 732 tk->tk_nlwps--; 733 tk->tk_proj->kpj_nlwps--; 734 p->p_zone->zone_nlwps--; 735 mutex_exit(&p->p_zone->zone_nlwps_lock); 736 task_detach(p); 737 p->p_task = task0p; 738 739 /* 740 * Clear the lwp directory and the lwpid hash table 741 * now that /proc can't bother us any more. 742 * We free the memory below, after dropping p->p_lock. 743 */ 744 lwpdir = p->p_lwpdir; 745 lwpdir_sz = p->p_lwpdir_sz; 746 tidhash = p->p_tidhash; 747 tidhash_sz = p->p_tidhash_sz; 748 p->p_lwpdir = NULL; 749 p->p_lwpfree = NULL; 750 p->p_lwpdir_sz = 0; 751 p->p_tidhash = NULL; 752 p->p_tidhash_sz = 0; 753 754 /* 755 * curthread's proc pointer is changed to point at p0 because 756 * curthread's original proc pointer can be freed as soon as 757 * the child sends a SIGCLD to its parent. 758 */ 759 t->t_procp = &p0; 760 761 mutex_exit(&p->p_lock); 762 sigcld(p, sqp); 763 mutex_exit(&pidlock); 764 765 task_rele(tk); 766 767 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); 768 kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *)); 769 770 /* 771 * We don't release u_cdir and u_rdir until SZOMB is set. 772 * This protects us against dofusers(). 773 */ 774 VN_RELE(cdir); 775 if (rdir) 776 VN_RELE(rdir); 777 if (cwd) 778 refstr_rele(cwd); 779 780 lwp_pcb_exit(); 781 782 thread_exit(); 783 /* NOTREACHED */ 784 } 785 786 /* 787 * Format siginfo structure for wait system calls. 788 */ 789 void 790 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag) 791 { 792 ASSERT(MUTEX_HELD(&pidlock)); 793 794 bzero(ip, sizeof (k_siginfo_t)); 795 ip->si_signo = SIGCLD; 796 ip->si_code = pp->p_wcode; 797 ip->si_pid = pp->p_pid; 798 ip->si_ctid = PRCTID(pp); 799 ip->si_zoneid = pp->p_zone->zone_id; 800 ip->si_status = pp->p_wdata; 801 ip->si_stime = pp->p_stime; 802 ip->si_utime = pp->p_utime; 803 804 if (waitflag) { 805 pp->p_wcode = 0; 806 pp->p_wdata = 0; 807 pp->p_pidflag &= ~CLDPEND; 808 } 809 } 810 811 /* 812 * Wait system call. 813 * Search for a terminated (zombie) child, 814 * finally lay it to rest, and collect its status. 815 * Look also for stopped children, 816 * and pass back status from them. 817 */ 818 int 819 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) 820 { 821 int found; 822 proc_t *cp, *pp; 823 proc_t **nsp; 824 int proc_gone; 825 int waitflag = !(options & WNOWAIT); 826 827 /* 828 * Obsolete flag, defined here only for binary compatibility 829 * with old statically linked executables. Delete this when 830 * we no longer care about these old and broken applications. 831 */ 832 #define _WNOCHLD 0400 833 options &= ~_WNOCHLD; 834 835 if (options == 0 || (options & ~WOPTMASK)) 836 return (EINVAL); 837 838 switch (idtype) { 839 case P_PID: 840 case P_PGID: 841 if (id < 0 || id >= maxpid) 842 return (EINVAL); 843 /* FALLTHROUGH */ 844 case P_ALL: 845 break; 846 default: 847 return (EINVAL); 848 } 849 850 pp = ttoproc(curthread); 851 /* 852 * lock parent mutex so that sibling chain can be searched. 853 */ 854 mutex_enter(&pidlock); 855 while ((cp = pp->p_child) != NULL) { 856 857 proc_gone = 0; 858 859 for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) { 860 if (idtype == P_PID && id != (*nsp)->p_pid) { 861 continue; 862 } 863 if (idtype == P_PGID && id != (*nsp)->p_pgrp) { 864 continue; 865 } 866 867 switch ((*nsp)->p_wcode) { 868 869 case CLD_TRAPPED: 870 case CLD_STOPPED: 871 case CLD_CONTINUED: 872 cmn_err(CE_PANIC, 873 "waitid: wrong state %d on the p_newstate" 874 " list", (*nsp)->p_wcode); 875 break; 876 877 case CLD_EXITED: 878 case CLD_DUMPED: 879 case CLD_KILLED: 880 if (!(options & WEXITED)) { 881 /* 882 * Count how many are already gone 883 * for good. 884 */ 885 proc_gone++; 886 break; 887 } 888 if (!waitflag) { 889 winfo((*nsp), ip, 0); 890 } else { 891 proc_t *xp = *nsp; 892 winfo(xp, ip, 1); 893 freeproc(xp); 894 } 895 mutex_exit(&pidlock); 896 if (waitflag) { /* accept SIGCLD */ 897 sigcld_delete(ip); 898 sigcld_repost(); 899 } 900 return (0); 901 } 902 903 if (idtype == P_PID) 904 break; 905 } 906 907 /* 908 * Wow! None of the threads on the p_sibling_ns list were 909 * interesting threads. Check all the kids! 910 */ 911 found = 0; 912 cp = pp->p_child; 913 do { 914 if (idtype == P_PID && id != cp->p_pid) { 915 continue; 916 } 917 if (idtype == P_PGID && id != cp->p_pgrp) { 918 continue; 919 } 920 921 found++; 922 923 switch (cp->p_wcode) { 924 case CLD_TRAPPED: 925 if (!(options & WTRAPPED)) 926 break; 927 winfo(cp, ip, waitflag); 928 mutex_exit(&pidlock); 929 if (waitflag) { /* accept SIGCLD */ 930 sigcld_delete(ip); 931 sigcld_repost(); 932 } 933 return (0); 934 935 case CLD_STOPPED: 936 if (!(options & WSTOPPED)) 937 break; 938 /* Is it still stopped? */ 939 mutex_enter(&cp->p_lock); 940 if (!jobstopped(cp)) { 941 mutex_exit(&cp->p_lock); 942 break; 943 } 944 mutex_exit(&cp->p_lock); 945 winfo(cp, ip, waitflag); 946 mutex_exit(&pidlock); 947 if (waitflag) { /* accept SIGCLD */ 948 sigcld_delete(ip); 949 sigcld_repost(); 950 } 951 return (0); 952 953 case CLD_CONTINUED: 954 if (!(options & WCONTINUED)) 955 break; 956 winfo(cp, ip, waitflag); 957 mutex_exit(&pidlock); 958 if (waitflag) { /* accept SIGCLD */ 959 sigcld_delete(ip); 960 sigcld_repost(); 961 } 962 return (0); 963 964 case CLD_EXITED: 965 case CLD_DUMPED: 966 case CLD_KILLED: 967 /* 968 * Don't complain if a process was found in 969 * the first loop but we broke out of the loop 970 * because of the arguments passed to us. 971 */ 972 if (proc_gone == 0) { 973 cmn_err(CE_PANIC, 974 "waitid: wrong state on the" 975 " p_child list"); 976 } else { 977 break; 978 } 979 } 980 981 if (idtype == P_PID) 982 break; 983 } while ((cp = cp->p_sibling) != NULL); 984 985 /* 986 * If we found no interesting processes at all, 987 * break out and return ECHILD. 988 */ 989 if (found + proc_gone == 0) 990 break; 991 992 if (options & WNOHANG) { 993 bzero(ip, sizeof (k_siginfo_t)); 994 /* 995 * We should set ip->si_signo = SIGCLD, 996 * but there is an SVVS test that expects 997 * ip->si_signo to be zero in this case. 998 */ 999 mutex_exit(&pidlock); 1000 return (0); 1001 } 1002 1003 /* 1004 * If we found no processes of interest that could 1005 * change state while we wait, we don't wait at all. 1006 * Get out with ECHILD according to SVID. 1007 */ 1008 if (found == proc_gone) 1009 break; 1010 1011 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) { 1012 mutex_exit(&pidlock); 1013 return (EINTR); 1014 } 1015 } 1016 mutex_exit(&pidlock); 1017 return (ECHILD); 1018 } 1019 1020 /* 1021 * For implementations that don't require binary compatibility, 1022 * the wait system call may be made into a library call to the 1023 * waitid system call. 1024 */ 1025 int64_t 1026 wait(void) 1027 { 1028 int error; 1029 k_siginfo_t info; 1030 rval_t r; 1031 1032 if (error = waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED)) 1033 return (set_errno(error)); 1034 r.r_val1 = info.si_pid; 1035 r.r_val2 = wstat(info.si_code, info.si_status); 1036 return (r.r_vals); 1037 } 1038 1039 int 1040 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1041 { 1042 int error; 1043 k_siginfo_t info; 1044 1045 if (error = waitid(idtype, id, &info, options)) 1046 return (set_errno(error)); 1047 if (copyout(&info, infop, sizeof (k_siginfo_t))) 1048 return (set_errno(EFAULT)); 1049 return (0); 1050 } 1051 1052 #ifdef _SYSCALL32_IMPL 1053 1054 int 1055 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options) 1056 { 1057 int error; 1058 k_siginfo_t info; 1059 siginfo32_t info32; 1060 1061 if (error = waitid(idtype, id, &info, options)) 1062 return (set_errno(error)); 1063 siginfo_kto32(&info, &info32); 1064 if (copyout(&info32, infop, sizeof (info32))) 1065 return (set_errno(EFAULT)); 1066 return (0); 1067 } 1068 1069 #endif /* _SYSCALL32_IMPL */ 1070 1071 void 1072 proc_detach(proc_t *p) 1073 { 1074 proc_t *q; 1075 1076 ASSERT(MUTEX_HELD(&pidlock)); 1077 1078 q = p->p_parent; 1079 ASSERT(q != NULL); 1080 1081 /* 1082 * Take it off the newstate list of its parent 1083 */ 1084 delete_ns(q, p); 1085 1086 if (q->p_child == p) { 1087 q->p_child = p->p_sibling; 1088 /* 1089 * If the parent has no children, it better not 1090 * have any with new states either! 1091 */ 1092 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL); 1093 } 1094 1095 if (p->p_sibling) { 1096 p->p_sibling->p_psibling = p->p_psibling; 1097 } 1098 1099 if (p->p_psibling) { 1100 p->p_psibling->p_sibling = p->p_sibling; 1101 } 1102 } 1103 1104 /* 1105 * Remove zombie children from the process table. 1106 */ 1107 void 1108 freeproc(proc_t *p) 1109 { 1110 proc_t *q; 1111 1112 ASSERT(p->p_stat == SZOMB); 1113 ASSERT(p->p_tlist == NULL); 1114 ASSERT(MUTEX_HELD(&pidlock)); 1115 1116 sigdelq(p, NULL, 0); 1117 if (p->p_killsqp) { 1118 siginfofree(p->p_killsqp); 1119 p->p_killsqp = NULL; 1120 } 1121 1122 prfree(p); /* inform /proc */ 1123 1124 /* 1125 * Don't free the init processes. 1126 * Other dying processes will access it. 1127 */ 1128 if (p == proc_init) 1129 return; 1130 1131 1132 /* 1133 * We wait until now to free the cred structure because a 1134 * zombie process's credentials may be examined by /proc. 1135 * No cred locking needed because there are no threads at this point. 1136 */ 1137 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred)); 1138 crfree(p->p_cred); 1139 if (p->p_corefile != NULL) { 1140 corectl_path_rele(p->p_corefile); 1141 p->p_corefile = NULL; 1142 } 1143 if (p->p_content != NULL) { 1144 corectl_content_rele(p->p_content); 1145 p->p_content = NULL; 1146 } 1147 1148 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) || 1149 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) { 1150 /* 1151 * This should still do the right thing since p_utime/stime 1152 * get set to the correct value on process exit, so it 1153 * should get properly updated 1154 */ 1155 p->p_nextofkin->p_cutime += p->p_utime; 1156 p->p_nextofkin->p_cstime += p->p_stime; 1157 1158 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER]; 1159 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM]; 1160 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP]; 1161 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT]; 1162 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT]; 1163 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT]; 1164 p->p_nextofkin->p_cacct[LMS_USER_LOCK] 1165 += p->p_acct[LMS_USER_LOCK]; 1166 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP]; 1167 p->p_nextofkin->p_cacct[LMS_WAIT_CPU] 1168 += p->p_acct[LMS_WAIT_CPU]; 1169 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED]; 1170 1171 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt; 1172 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt; 1173 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap; 1174 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock; 1175 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock; 1176 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd; 1177 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv; 1178 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals; 1179 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw; 1180 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw; 1181 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc; 1182 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch; 1183 1184 } 1185 1186 q = p->p_nextofkin; 1187 if (q && q->p_orphan == p) 1188 q->p_orphan = p->p_nextorph; 1189 else if (q) { 1190 for (q = q->p_orphan; q; q = q->p_nextorph) 1191 if (q->p_nextorph == p) 1192 break; 1193 ASSERT(q && q->p_nextorph == p); 1194 q->p_nextorph = p->p_nextorph; 1195 } 1196 1197 proc_detach(p); 1198 pid_exit(p); /* frees pid and proc structure */ 1199 } 1200 1201 /* 1202 * Delete process "child" from the newstate list of process "parent" 1203 */ 1204 void 1205 delete_ns(proc_t *parent, proc_t *child) 1206 { 1207 proc_t **ns; 1208 1209 ASSERT(MUTEX_HELD(&pidlock)); 1210 ASSERT(child->p_parent == parent); 1211 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) { 1212 if (*ns == child) { 1213 1214 ASSERT((*ns)->p_parent == parent); 1215 1216 *ns = child->p_sibling_ns; 1217 child->p_sibling_ns = NULL; 1218 return; 1219 } 1220 } 1221 } 1222 1223 /* 1224 * Add process "child" to the new state list of process "parent" 1225 */ 1226 void 1227 add_ns(proc_t *parent, proc_t *child) 1228 { 1229 ASSERT(child->p_sibling_ns == NULL); 1230 child->p_sibling_ns = parent->p_child_ns; 1231 parent->p_child_ns = child; 1232 } 1233