xref: /titanic_50/usr/src/uts/common/os/exit.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.74 */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/user.h>
37 #include <sys/errno.h>
38 #include <sys/proc.h>
39 #include <sys/ucontext.h>
40 #include <sys/procfs.h>
41 #include <sys/vnode.h>
42 #include <sys/acct.h>
43 #include <sys/var.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/wait.h>
47 #include <sys/siginfo.h>
48 #include <sys/procset.h>
49 #include <sys/class.h>
50 #include <sys/file.h>
51 #include <sys/session.h>
52 #include <sys/kmem.h>
53 #include <sys/vtrace.h>
54 #include <sys/prsystm.h>
55 #include <sys/ipc.h>
56 #include <sys/sem_impl.h>
57 #include <c2/audit.h>
58 #include <sys/aio_impl.h>
59 #include <vm/as.h>
60 #include <sys/poll.h>
61 #include <sys/door.h>
62 #include <sys/lwpchan_impl.h>
63 #include <sys/utrap.h>
64 #include <sys/task.h>
65 #include <sys/exacct.h>
66 #include <sys/cyclic.h>
67 #include <sys/schedctl.h>
68 #include <sys/rctl.h>
69 #include <sys/contract_impl.h>
70 #include <sys/contract/process_impl.h>
71 #include <sys/list.h>
72 #include <sys/dtrace.h>
73 #include <sys/pool.h>
74 #include <sys/sdt.h>
75 #include <sys/corectl.h>
76 
77 #if defined(__x86)
78 extern void ldt_free(proc_t *pp);
79 #endif
80 
81 /*
82  * convert code/data pair into old style wait status
83  */
84 int
85 wstat(int code, int data)
86 {
87 	int stat = (data & 0377);
88 
89 	switch (code) {
90 	case CLD_EXITED:
91 		stat <<= 8;
92 		break;
93 	case CLD_DUMPED:
94 		stat |= WCOREFLG;
95 		break;
96 	case CLD_KILLED:
97 		break;
98 	case CLD_TRAPPED:
99 	case CLD_STOPPED:
100 		stat <<= 8;
101 		stat |= WSTOPFLG;
102 		break;
103 	case CLD_CONTINUED:
104 		stat = WCONTFLG;
105 		break;
106 	default:
107 		cmn_err(CE_PANIC, "wstat: bad code");
108 		/* NOTREACHED */
109 	}
110 	return (stat);
111 }
112 
113 static char *
114 exit_reason(char *buf, size_t bufsz, int what, int why)
115 {
116 	switch (why) {
117 	case CLD_EXITED:
118 		(void) snprintf(buf, bufsz, "exited with status %d", what);
119 		break;
120 	case CLD_KILLED:
121 		(void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
122 		break;
123 	case CLD_DUMPED:
124 		(void) snprintf(buf, bufsz, "core dumped on signal %d", what);
125 		break;
126 	default:
127 		(void) snprintf(buf, bufsz, "encountered unknown error "
128 		    "(%d, %d)", why, what);
129 		break;
130 	}
131 
132 	return (buf);
133 }
134 
135 /*
136  * exit system call: pass back caller's arg.
137  */
138 void
139 rexit(int rval)
140 {
141 	exit(CLD_EXITED, rval);
142 }
143 
144 /*
145  * Called by proc_exit() when a zone's init exits, presumably because
146  * it failed.  As long as the given zone is still in the "running"
147  * state, we will re-exec() init, but first we need to reset things
148  * which are usually inherited across exec() but will break init's
149  * assumption that it is being exec()'d from a virgin process.  Most
150  * importantly this includes closing all file descriptors (exec only
151  * closes those marked close-on-exec) and resetting signals (exec only
152  * resets handled signals, and we need to clear any signals which
153  * killed init).  Anything else that exec(2) says would be inherited,
154  * but would affect the execution of init, needs to be reset.
155  */
156 static int
157 restart_init(int what, int why)
158 {
159 	kthread_t *t = curthread;
160 	klwp_t *lwp = ttolwp(t);
161 	proc_t *p = ttoproc(t);
162 	user_t *up = PTOU(p);
163 
164 	vnode_t *oldcd, *oldrd;
165 	sess_t *sp;
166 	int i, err;
167 	char reason_buf[64];
168 	const char *ipath;
169 
170 	/*
171 	 * Let zone admin (and global zone admin if this is for a non-global
172 	 * zone) know that init has failed and will be restarted.
173 	 */
174 	zcmn_err(p->p_zone->zone_id, CE_WARN,
175 	    "init(1M) %s: restarting automatically",
176 	    exit_reason(reason_buf, sizeof (reason_buf), what, why));
177 
178 	if (!INGLOBALZONE(p)) {
179 		cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
180 		    "restarting automatically",
181 		    p->p_zone->zone_name, p->p_pid, reason_buf);
182 	}
183 
184 	/*
185 	 * Remove any fpollinfo_t's for this (last) thread from our file
186 	 * descriptors so closeall() can ASSERT() that they're all gone.
187 	 * Then close all open file descriptors in the process.
188 	 */
189 	pollcleanup();
190 	closeall(P_FINFO(p));
191 
192 	/*
193 	 * Grab p_lock and begin clearing miscellaneous global process
194 	 * state that needs to be reset before we exec the new init(1M).
195 	 */
196 
197 	mutex_enter(&p->p_lock);
198 
199 	p->p_flag &= ~(SKILLED | SEXITLWPS | SEXTKILLED | SCOREDUMP | SDOCORE);
200 	up->u_cmask = CMASK;
201 
202 	sigemptyset(&t->t_hold);
203 	sigemptyset(&t->t_sig);
204 	sigemptyset(&t->t_extsig);
205 
206 	sigemptyset(&p->p_sig);
207 	sigemptyset(&p->p_extsig);
208 
209 	sigdelq(p, t, 0);
210 	sigdelq(p, NULL, 0);
211 
212 	if (p->p_killsqp) {
213 		siginfofree(p->p_killsqp);
214 		p->p_killsqp = NULL;
215 	}
216 
217 	/*
218 	 * Reset any signals that are ignored back to the default disposition.
219 	 * Other u_signal members will be cleared when exec calls sigdefault().
220 	 */
221 	for (i = 1; i < NSIG; i++) {
222 		if (up->u_signal[i - 1] == SIG_IGN) {
223 			up->u_signal[i - 1] = SIG_DFL;
224 			sigemptyset(&up->u_sigmask[i - 1]);
225 		}
226 	}
227 
228 	/*
229 	 * Clear the current signal, any signal info associated with it, and
230 	 * any signal information from contracts and/or contract templates.
231 	 */
232 	lwp->lwp_cursig = 0;
233 	lwp->lwp_extsig = 0;
234 	if (lwp->lwp_curinfo != NULL) {
235 		siginfofree(lwp->lwp_curinfo);
236 		lwp->lwp_curinfo = NULL;
237 	}
238 	lwp_ctmpl_clear(lwp);
239 
240 	/*
241 	 * Reset both the process root directory and the current working
242 	 * directory to the root of the zone just as we do during boot.
243 	 */
244 	VN_HOLD(p->p_zone->zone_rootvp);
245 	oldrd = up->u_rdir;
246 	up->u_rdir = p->p_zone->zone_rootvp;
247 
248 	VN_HOLD(p->p_zone->zone_rootvp);
249 	oldcd = up->u_cdir;
250 	up->u_cdir = p->p_zone->zone_rootvp;
251 
252 	if (up->u_cwd != NULL) {
253 		refstr_rele(up->u_cwd);
254 		up->u_cwd = NULL;
255 	}
256 
257 	mutex_exit(&p->p_lock);
258 
259 	if (oldrd != NULL)
260 		VN_RELE(oldrd);
261 	if (oldcd != NULL)
262 		VN_RELE(oldcd);
263 
264 	/*
265 	 * Free the controlling tty.
266 	 */
267 	mutex_enter(&pidlock);
268 	sp = p->p_sessp;
269 	if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) {
270 		mutex_exit(&pidlock);
271 		freectty(sp);
272 	} else {
273 		mutex_exit(&pidlock);
274 	}
275 
276 	/*
277 	 * Now exec() the new init(1M) on top of the current process.  If we
278 	 * succeed, the caller will treat this like a successful system call.
279 	 * If we fail, we issue messages and the caller will proceed with exit.
280 	 */
281 	ipath = INGLOBALZONE(p) ? initname : zone_initname;
282 	err = exec_init(ipath, 0, NULL);
283 
284 	if (err == 0)
285 		return (0);
286 
287 	zcmn_err(p->p_zone->zone_id, CE_WARN,
288 	    "failed to restart init(1M) (err=%d): system reboot required", err);
289 
290 	if (!INGLOBALZONE(p)) {
291 		cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
292 		    "(pid %d, err=%d): zoneadm(1M) boot required",
293 		    p->p_zone->zone_name, p->p_pid, err);
294 	}
295 
296 	return (-1);
297 }
298 
299 /*
300  * Release resources.
301  * Enter zombie state.
302  * Wake up parent and init processes,
303  * and dispose of children.
304  */
305 void
306 exit(int why, int what)
307 {
308 	/*
309 	 * If proc_exit() fails, then some other lwp in the process
310 	 * got there first.  We just have to call lwp_exit() to allow
311 	 * the other lwp to finish exiting the process.  Otherwise we're
312 	 * restarting init, and should return.
313 	 */
314 	if (proc_exit(why, what) != 0) {
315 		mutex_enter(&curproc->p_lock);
316 		ASSERT(curproc->p_flag & SEXITLWPS);
317 		lwp_exit();
318 		/* NOTREACHED */
319 	}
320 }
321 
322 /*
323  * Return value:
324  *   1 - exitlwps() failed, call (or continue) lwp_exit()
325  *   0 - restarting init.  Return through system call path
326  */
327 int
328 proc_exit(int why, int what)
329 {
330 	kthread_t *t = curthread;
331 	klwp_t *lwp = ttolwp(t);
332 	proc_t *p = ttoproc(t);
333 	zone_t *z = p->p_zone;
334 	timeout_id_t tmp_id;
335 	int rv;
336 	proc_t *q;
337 	sess_t *sp;
338 	task_t *tk;
339 	vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
340 	sigqueue_t *sqp;
341 	lwpdir_t *lwpdir;
342 	uint_t lwpdir_sz;
343 	lwpdir_t **tidhash;
344 	uint_t tidhash_sz;
345 	refstr_t *cwd;
346 	hrtime_t hrutime, hrstime;
347 
348 	/*
349 	 * Stop and discard the process's lwps except for the current one,
350 	 * unless some other lwp beat us to it.  If exitlwps() fails then
351 	 * return and the calling lwp will call (or continue in) lwp_exit().
352 	 */
353 	if (exitlwps(0) != 0)
354 		return (1);
355 
356 	DTRACE_PROC(lwp__exit);
357 	DTRACE_PROC1(exit, int, why);
358 
359 	/*
360 	 * Don't let init exit unless zone_icode() failed its exec, or
361 	 * we are shutting down the zone or the machine.
362 	 *
363 	 * Since we are single threaded, we don't need to lock the
364 	 * following accesses to zone_proc_initpid.
365 	 */
366 	if (p->p_pid == z->zone_proc_initpid) {
367 		if (z->zone_boot_err == 0 &&
368 		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
369 		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN &&
370 		    restart_init(what, why) == 0)
371 			return (0);
372 		/*
373 		 * Since we didn't or couldn't restart init, we clear
374 		 * the zone's init state and proceed with exit
375 		 * processing.
376 		 */
377 		z->zone_proc_initpid = -1;
378 	}
379 
380 	/*
381 	 * Allocate a sigqueue now, before we grab locks.
382 	 * It will be given to sigcld(), below.
383 	 */
384 	sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
385 
386 	/*
387 	 * revoke any doors created by the process.
388 	 */
389 	if (p->p_door_list)
390 		door_exit();
391 
392 	/*
393 	 * Release schedctl data structures.
394 	 */
395 	if (p->p_pagep)
396 		schedctl_proc_cleanup();
397 
398 	/*
399 	 * make sure all pending kaio has completed.
400 	 */
401 	if (p->p_aio)
402 		aio_cleanup_exit();
403 
404 	/*
405 	 * discard the lwpchan cache.
406 	 */
407 	if (p->p_lcp != NULL)
408 		lwpchan_destroy_cache(0);
409 
410 	/*
411 	 * Clean up any DTrace helper actions or probes for the process.
412 	 */
413 	if (p->p_dtrace_helpers != NULL) {
414 		ASSERT(dtrace_helpers_cleanup != NULL);
415 		(*dtrace_helpers_cleanup)();
416 	}
417 
418 	/* untimeout the realtime timers */
419 	if (p->p_itimer != NULL)
420 		timer_exit();
421 
422 	if ((tmp_id = p->p_alarmid) != 0) {
423 		p->p_alarmid = 0;
424 		(void) untimeout(tmp_id);
425 	}
426 
427 	/*
428 	 * Remove any fpollinfo_t's for this (last) thread from our file
429 	 * descriptors so closeall() can ASSERT() that they're all gone.
430 	 */
431 	pollcleanup();
432 
433 	if (p->p_rprof_cyclic != CYCLIC_NONE) {
434 		mutex_enter(&cpu_lock);
435 		cyclic_remove(p->p_rprof_cyclic);
436 		mutex_exit(&cpu_lock);
437 	}
438 
439 	mutex_enter(&p->p_lock);
440 
441 	/*
442 	 * Clean up any DTrace probes associated with this process.
443 	 */
444 	if (p->p_dtrace_probes) {
445 		ASSERT(dtrace_fasttrap_exit_ptr != NULL);
446 		dtrace_fasttrap_exit_ptr(p);
447 	}
448 
449 	while ((tmp_id = p->p_itimerid) != 0) {
450 		p->p_itimerid = 0;
451 		mutex_exit(&p->p_lock);
452 		(void) untimeout(tmp_id);
453 		mutex_enter(&p->p_lock);
454 	}
455 
456 	lwp_cleanup();
457 
458 	/*
459 	 * We are about to exit; prevent our resource associations from
460 	 * being changed.
461 	 */
462 	pool_barrier_enter();
463 
464 	/*
465 	 * Block the process against /proc now that we have really
466 	 * acquired p->p_lock (to manipulate p_tlist at least).
467 	 */
468 	prbarrier(p);
469 
470 #ifdef	SUN_SRC_COMPAT
471 	if (code == CLD_KILLED)
472 		u.u_acflag |= AXSIG;
473 #endif
474 	sigfillset(&p->p_ignore);
475 	sigemptyset(&p->p_siginfo);
476 	sigemptyset(&p->p_sig);
477 	sigemptyset(&p->p_extsig);
478 	sigemptyset(&t->t_sig);
479 	sigemptyset(&t->t_extsig);
480 	sigemptyset(&p->p_sigmask);
481 	sigdelq(p, t, 0);
482 	lwp->lwp_cursig = 0;
483 	lwp->lwp_extsig = 0;
484 	p->p_flag &= ~(SKILLED | SEXTKILLED);
485 	if (lwp->lwp_curinfo) {
486 		siginfofree(lwp->lwp_curinfo);
487 		lwp->lwp_curinfo = NULL;
488 	}
489 
490 	t->t_proc_flag |= TP_LWPEXIT;
491 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
492 	prlwpexit(t);		/* notify /proc */
493 	lwp_hash_out(p, t->t_tid);
494 	prexit(p);
495 
496 	p->p_lwpcnt = 0;
497 	p->p_tlist = NULL;
498 	sigqfree(p);
499 	term_mstate(t);
500 	p->p_mterm = gethrtime();
501 
502 	exec_vp = p->p_exec;
503 	execdir_vp = p->p_execdir;
504 	p->p_exec = NULLVP;
505 	p->p_execdir = NULLVP;
506 	mutex_exit(&p->p_lock);
507 	if (exec_vp)
508 		VN_RELE(exec_vp);
509 	if (execdir_vp)
510 		VN_RELE(execdir_vp);
511 
512 	pr_free_watched_pages(p);
513 
514 	closeall(P_FINFO(p));
515 
516 	mutex_enter(&pidlock);
517 	sp = p->p_sessp;
518 	if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) {
519 		mutex_exit(&pidlock);
520 		freectty(sp);
521 	} else
522 		mutex_exit(&pidlock);
523 
524 #if defined(__x86)
525 	/*
526 	 * If the process was using a private LDT then free it.
527 	 */
528 	if (p->p_ldt)
529 		ldt_free(p);
530 #endif
531 
532 #if defined(__sparc)
533 	if (p->p_utraps != NULL)
534 		utrap_free(p);
535 #endif
536 	if (p->p_semacct)			/* IPC semaphore exit */
537 		semexit(p);
538 	rv = wstat(why, what);
539 
540 	acct(rv & 0xff);
541 	exacct_commit_proc(p, rv);
542 
543 	/*
544 	 * Release any resources associated with C2 auditing
545 	 */
546 #ifdef C2_AUDIT
547 	if (audit_active) {
548 		/*
549 		 * audit exit system call
550 		 */
551 		audit_exit(why, what);
552 	}
553 #endif
554 
555 	/*
556 	 * Free address space.
557 	 */
558 	relvm();
559 
560 	/*
561 	 * Release held contracts.
562 	 */
563 	contract_exit(p);
564 
565 	/*
566 	 * Depart our encapsulating process contract.
567 	 */
568 	if ((p->p_flag & SSYS) == 0) {
569 		ASSERT(p->p_ct_process);
570 		contract_process_exit(p->p_ct_process, p, rv);
571 	}
572 
573 	/*
574 	 * Remove pool association, and block if requested by pool_do_bind.
575 	 */
576 	mutex_enter(&p->p_lock);
577 	ASSERT(p->p_pool->pool_ref > 0);
578 	atomic_add_32(&p->p_pool->pool_ref, -1);
579 	p->p_pool = pool_default;
580 	/*
581 	 * Now that our address space has been freed and all other threads
582 	 * in this process have exited, set the PEXITED pool flag.  This
583 	 * tells the pools subsystems to ignore this process if it was
584 	 * requested to rebind this process to a new pool.
585 	 */
586 	p->p_poolflag |= PEXITED;
587 	pool_barrier_exit();
588 	mutex_exit(&p->p_lock);
589 
590 	mutex_enter(&pidlock);
591 
592 	/*
593 	 * Delete this process from the newstate list of its parent. We
594 	 * will put it in the right place in the sigcld in the end.
595 	 */
596 	delete_ns(p->p_parent, p);
597 
598 	/*
599 	 * Reassign the orphans to the next of kin.
600 	 * Don't rearrange init's orphanage.
601 	 */
602 	if ((q = p->p_orphan) != NULL && p != proc_init) {
603 
604 		proc_t *nokp = p->p_nextofkin;
605 
606 		for (;;) {
607 			q->p_nextofkin = nokp;
608 			if (q->p_nextorph == NULL)
609 				break;
610 			q = q->p_nextorph;
611 		}
612 		q->p_nextorph = nokp->p_orphan;
613 		nokp->p_orphan = p->p_orphan;
614 		p->p_orphan = NULL;
615 	}
616 
617 	/*
618 	 * Reassign the children to init.
619 	 * Don't try to assign init's children to init.
620 	 */
621 	if ((q = p->p_child) != NULL && p != proc_init) {
622 		struct proc	*np;
623 		struct proc	*initp = proc_init;
624 		boolean_t	setzonetop = B_FALSE;
625 
626 		if (!INGLOBALZONE(curproc))
627 			setzonetop = B_TRUE;
628 
629 		pgdetach(p);
630 
631 		do {
632 			np = q->p_sibling;
633 			/*
634 			 * Delete it from its current parent new state
635 			 * list and add it to init new state list
636 			 */
637 			delete_ns(q->p_parent, q);
638 
639 			q->p_ppid = 1;
640 			if (setzonetop) {
641 				mutex_enter(&q->p_lock);
642 				q->p_flag |= SZONETOP;
643 				mutex_exit(&q->p_lock);
644 			}
645 			q->p_parent = initp;
646 
647 			/*
648 			 * Since q will be the first child,
649 			 * it will not have a previous sibling.
650 			 */
651 			q->p_psibling = NULL;
652 			if (initp->p_child) {
653 				initp->p_child->p_psibling = q;
654 			}
655 			q->p_sibling = initp->p_child;
656 			initp->p_child = q;
657 			if (q->p_proc_flag & P_PR_PTRACE) {
658 				mutex_enter(&q->p_lock);
659 				sigtoproc(q, NULL, SIGKILL);
660 				mutex_exit(&q->p_lock);
661 			}
662 			/*
663 			 * sigcld() will add the child to parents
664 			 * newstate list.
665 			 */
666 			if (q->p_stat == SZOMB)
667 				sigcld(q, NULL);
668 		} while ((q = np) != NULL);
669 
670 		p->p_child = NULL;
671 		ASSERT(p->p_child_ns == NULL);
672 	}
673 
674 	TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
675 
676 	mutex_enter(&p->p_lock);
677 	CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
678 
679 	hrutime = mstate_aggr_state(p, LMS_USER);
680 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
681 	p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
682 	p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
683 
684 	p->p_acct[LMS_USER]	+= p->p_cacct[LMS_USER];
685 	p->p_acct[LMS_SYSTEM]	+= p->p_cacct[LMS_SYSTEM];
686 	p->p_acct[LMS_TRAP]	+= p->p_cacct[LMS_TRAP];
687 	p->p_acct[LMS_TFAULT]	+= p->p_cacct[LMS_TFAULT];
688 	p->p_acct[LMS_DFAULT]	+= p->p_cacct[LMS_DFAULT];
689 	p->p_acct[LMS_KFAULT]	+= p->p_cacct[LMS_KFAULT];
690 	p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
691 	p->p_acct[LMS_SLEEP]	+= p->p_cacct[LMS_SLEEP];
692 	p->p_acct[LMS_WAIT_CPU]	+= p->p_cacct[LMS_WAIT_CPU];
693 	p->p_acct[LMS_STOPPED]	+= p->p_cacct[LMS_STOPPED];
694 
695 	p->p_ru.minflt	+= p->p_cru.minflt;
696 	p->p_ru.majflt	+= p->p_cru.majflt;
697 	p->p_ru.nswap	+= p->p_cru.nswap;
698 	p->p_ru.inblock	+= p->p_cru.inblock;
699 	p->p_ru.oublock	+= p->p_cru.oublock;
700 	p->p_ru.msgsnd	+= p->p_cru.msgsnd;
701 	p->p_ru.msgrcv	+= p->p_cru.msgrcv;
702 	p->p_ru.nsignals += p->p_cru.nsignals;
703 	p->p_ru.nvcsw	+= p->p_cru.nvcsw;
704 	p->p_ru.nivcsw	+= p->p_cru.nivcsw;
705 	p->p_ru.sysc	+= p->p_cru.sysc;
706 	p->p_ru.ioch	+= p->p_cru.ioch;
707 
708 	p->p_stat = SZOMB;
709 	p->p_proc_flag &= ~P_PR_PTRACE;
710 	p->p_wdata = what;
711 	p->p_wcode = (char)why;
712 
713 	cdir = PTOU(p)->u_cdir;
714 	rdir = PTOU(p)->u_rdir;
715 	cwd = PTOU(p)->u_cwd;
716 
717 	/*
718 	 * Release resource controls, as they are no longer enforceable.
719 	 */
720 	rctl_set_free(p->p_rctls);
721 
722 	/*
723 	 * Give up task and project memberships.  Decrement tk_nlwps counter
724 	 * for our task.max-lwps resource control.  An extended accounting
725 	 * record, if that facility is active, is scheduled to be written.
726 	 * Zombie processes are false members of task0 for the remainder of
727 	 * their lifetime; no accounting information is recorded for them.
728 	 */
729 	tk = p->p_task;
730 
731 	mutex_enter(&p->p_zone->zone_nlwps_lock);
732 	tk->tk_nlwps--;
733 	tk->tk_proj->kpj_nlwps--;
734 	p->p_zone->zone_nlwps--;
735 	mutex_exit(&p->p_zone->zone_nlwps_lock);
736 	task_detach(p);
737 	p->p_task = task0p;
738 
739 	/*
740 	 * Clear the lwp directory and the lwpid hash table
741 	 * now that /proc can't bother us any more.
742 	 * We free the memory below, after dropping p->p_lock.
743 	 */
744 	lwpdir = p->p_lwpdir;
745 	lwpdir_sz = p->p_lwpdir_sz;
746 	tidhash = p->p_tidhash;
747 	tidhash_sz = p->p_tidhash_sz;
748 	p->p_lwpdir = NULL;
749 	p->p_lwpfree = NULL;
750 	p->p_lwpdir_sz = 0;
751 	p->p_tidhash = NULL;
752 	p->p_tidhash_sz = 0;
753 
754 	/*
755 	 * curthread's proc pointer is changed to point at p0 because
756 	 * curthread's original proc pointer can be freed as soon as
757 	 * the child sends a SIGCLD to its parent.
758 	 */
759 	t->t_procp = &p0;
760 
761 	mutex_exit(&p->p_lock);
762 	sigcld(p, sqp);
763 	mutex_exit(&pidlock);
764 
765 	task_rele(tk);
766 
767 	kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
768 	kmem_free(tidhash, tidhash_sz * sizeof (lwpdir_t *));
769 
770 	/*
771 	 * We don't release u_cdir and u_rdir until SZOMB is set.
772 	 * This protects us against dofusers().
773 	 */
774 	VN_RELE(cdir);
775 	if (rdir)
776 		VN_RELE(rdir);
777 	if (cwd)
778 		refstr_rele(cwd);
779 
780 	lwp_pcb_exit();
781 
782 	thread_exit();
783 	/* NOTREACHED */
784 }
785 
786 /*
787  * Format siginfo structure for wait system calls.
788  */
789 void
790 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
791 {
792 	ASSERT(MUTEX_HELD(&pidlock));
793 
794 	bzero(ip, sizeof (k_siginfo_t));
795 	ip->si_signo = SIGCLD;
796 	ip->si_code = pp->p_wcode;
797 	ip->si_pid = pp->p_pid;
798 	ip->si_ctid = PRCTID(pp);
799 	ip->si_zoneid = pp->p_zone->zone_id;
800 	ip->si_status = pp->p_wdata;
801 	ip->si_stime = pp->p_stime;
802 	ip->si_utime = pp->p_utime;
803 
804 	if (waitflag) {
805 		pp->p_wcode = 0;
806 		pp->p_wdata = 0;
807 		pp->p_pidflag &= ~CLDPEND;
808 	}
809 }
810 
811 /*
812  * Wait system call.
813  * Search for a terminated (zombie) child,
814  * finally lay it to rest, and collect its status.
815  * Look also for stopped children,
816  * and pass back status from them.
817  */
818 int
819 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
820 {
821 	int found;
822 	proc_t *cp, *pp;
823 	proc_t **nsp;
824 	int proc_gone;
825 	int waitflag = !(options & WNOWAIT);
826 
827 	/*
828 	 * Obsolete flag, defined here only for binary compatibility
829 	 * with old statically linked executables.  Delete this when
830 	 * we no longer care about these old and broken applications.
831 	 */
832 #define	_WNOCHLD	0400
833 	options &= ~_WNOCHLD;
834 
835 	if (options == 0 || (options & ~WOPTMASK))
836 		return (EINVAL);
837 
838 	switch (idtype) {
839 	case P_PID:
840 	case P_PGID:
841 		if (id < 0 || id >= maxpid)
842 			return (EINVAL);
843 		/* FALLTHROUGH */
844 	case P_ALL:
845 		break;
846 	default:
847 		return (EINVAL);
848 	}
849 
850 	pp = ttoproc(curthread);
851 	/*
852 	 * lock parent mutex so that sibling chain can be searched.
853 	 */
854 	mutex_enter(&pidlock);
855 	while ((cp = pp->p_child) != NULL) {
856 
857 		proc_gone = 0;
858 
859 		for (nsp = &pp->p_child_ns; *nsp; nsp = &(*nsp)->p_sibling_ns) {
860 			if (idtype == P_PID && id != (*nsp)->p_pid) {
861 				continue;
862 			}
863 			if (idtype == P_PGID && id != (*nsp)->p_pgrp) {
864 				continue;
865 			}
866 
867 			switch ((*nsp)->p_wcode) {
868 
869 			case CLD_TRAPPED:
870 			case CLD_STOPPED:
871 			case CLD_CONTINUED:
872 				cmn_err(CE_PANIC,
873 				    "waitid: wrong state %d on the p_newstate"
874 				    " list", (*nsp)->p_wcode);
875 				break;
876 
877 			case CLD_EXITED:
878 			case CLD_DUMPED:
879 			case CLD_KILLED:
880 				if (!(options & WEXITED)) {
881 					/*
882 					 * Count how many are already gone
883 					 * for good.
884 					 */
885 					proc_gone++;
886 					break;
887 				}
888 				if (!waitflag) {
889 					winfo((*nsp), ip, 0);
890 				} else {
891 					proc_t *xp = *nsp;
892 					winfo(xp, ip, 1);
893 					freeproc(xp);
894 				}
895 				mutex_exit(&pidlock);
896 				if (waitflag) {		/* accept SIGCLD */
897 					sigcld_delete(ip);
898 					sigcld_repost();
899 				}
900 				return (0);
901 			}
902 
903 			if (idtype == P_PID)
904 				break;
905 		}
906 
907 		/*
908 		 * Wow! None of the threads on the p_sibling_ns list were
909 		 * interesting threads. Check all the kids!
910 		 */
911 		found = 0;
912 		cp = pp->p_child;
913 		do {
914 			if (idtype == P_PID && id != cp->p_pid) {
915 				continue;
916 			}
917 			if (idtype == P_PGID && id != cp->p_pgrp) {
918 				continue;
919 			}
920 
921 			found++;
922 
923 			switch (cp->p_wcode) {
924 			case CLD_TRAPPED:
925 				if (!(options & WTRAPPED))
926 					break;
927 				winfo(cp, ip, waitflag);
928 				mutex_exit(&pidlock);
929 				if (waitflag) {		/* accept SIGCLD */
930 					sigcld_delete(ip);
931 					sigcld_repost();
932 				}
933 				return (0);
934 
935 			case CLD_STOPPED:
936 				if (!(options & WSTOPPED))
937 					break;
938 				/* Is it still stopped? */
939 				mutex_enter(&cp->p_lock);
940 				if (!jobstopped(cp)) {
941 					mutex_exit(&cp->p_lock);
942 					break;
943 				}
944 				mutex_exit(&cp->p_lock);
945 				winfo(cp, ip, waitflag);
946 				mutex_exit(&pidlock);
947 				if (waitflag) {		/* accept SIGCLD */
948 					sigcld_delete(ip);
949 					sigcld_repost();
950 				}
951 				return (0);
952 
953 			case CLD_CONTINUED:
954 				if (!(options & WCONTINUED))
955 					break;
956 				winfo(cp, ip, waitflag);
957 				mutex_exit(&pidlock);
958 				if (waitflag) {		/* accept SIGCLD */
959 					sigcld_delete(ip);
960 					sigcld_repost();
961 				}
962 				return (0);
963 
964 			case CLD_EXITED:
965 			case CLD_DUMPED:
966 			case CLD_KILLED:
967 				/*
968 				 * Don't complain if a process was found in
969 				 * the first loop but we broke out of the loop
970 				 * because of the arguments passed to us.
971 				 */
972 				if (proc_gone == 0) {
973 					cmn_err(CE_PANIC,
974 					    "waitid: wrong state on the"
975 					    " p_child list");
976 				} else {
977 					break;
978 				}
979 			}
980 
981 			if (idtype == P_PID)
982 				break;
983 		} while ((cp = cp->p_sibling) != NULL);
984 
985 		/*
986 		 * If we found no interesting processes at all,
987 		 * break out and return ECHILD.
988 		 */
989 		if (found + proc_gone == 0)
990 			break;
991 
992 		if (options & WNOHANG) {
993 			bzero(ip, sizeof (k_siginfo_t));
994 			/*
995 			 * We should set ip->si_signo = SIGCLD,
996 			 * but there is an SVVS test that expects
997 			 * ip->si_signo to be zero in this case.
998 			 */
999 			mutex_exit(&pidlock);
1000 			return (0);
1001 		}
1002 
1003 		/*
1004 		 * If we found no processes of interest that could
1005 		 * change state while we wait, we don't wait at all.
1006 		 * Get out with ECHILD according to SVID.
1007 		 */
1008 		if (found == proc_gone)
1009 			break;
1010 
1011 		if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1012 			mutex_exit(&pidlock);
1013 			return (EINTR);
1014 		}
1015 	}
1016 	mutex_exit(&pidlock);
1017 	return (ECHILD);
1018 }
1019 
1020 /*
1021  * For implementations that don't require binary compatibility,
1022  * the wait system call may be made into a library call to the
1023  * waitid system call.
1024  */
1025 int64_t
1026 wait(void)
1027 {
1028 	int error;
1029 	k_siginfo_t info;
1030 	rval_t	r;
1031 
1032 	if (error =  waitid(P_ALL, (id_t)0, &info, WEXITED|WTRAPPED))
1033 		return (set_errno(error));
1034 	r.r_val1 = info.si_pid;
1035 	r.r_val2 = wstat(info.si_code, info.si_status);
1036 	return (r.r_vals);
1037 }
1038 
1039 int
1040 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1041 {
1042 	int error;
1043 	k_siginfo_t info;
1044 
1045 	if (error = waitid(idtype, id, &info, options))
1046 		return (set_errno(error));
1047 	if (copyout(&info, infop, sizeof (k_siginfo_t)))
1048 		return (set_errno(EFAULT));
1049 	return (0);
1050 }
1051 
1052 #ifdef _SYSCALL32_IMPL
1053 
1054 int
1055 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1056 {
1057 	int error;
1058 	k_siginfo_t info;
1059 	siginfo32_t info32;
1060 
1061 	if (error = waitid(idtype, id, &info, options))
1062 		return (set_errno(error));
1063 	siginfo_kto32(&info, &info32);
1064 	if (copyout(&info32, infop, sizeof (info32)))
1065 		return (set_errno(EFAULT));
1066 	return (0);
1067 }
1068 
1069 #endif	/* _SYSCALL32_IMPL */
1070 
1071 void
1072 proc_detach(proc_t *p)
1073 {
1074 	proc_t *q;
1075 
1076 	ASSERT(MUTEX_HELD(&pidlock));
1077 
1078 	q = p->p_parent;
1079 	ASSERT(q != NULL);
1080 
1081 	/*
1082 	 * Take it off the newstate list of its parent
1083 	 */
1084 	delete_ns(q, p);
1085 
1086 	if (q->p_child == p) {
1087 		q->p_child = p->p_sibling;
1088 		/*
1089 		 * If the parent has no children, it better not
1090 		 * have any with new states either!
1091 		 */
1092 		ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1093 	}
1094 
1095 	if (p->p_sibling) {
1096 		p->p_sibling->p_psibling = p->p_psibling;
1097 	}
1098 
1099 	if (p->p_psibling) {
1100 		p->p_psibling->p_sibling = p->p_sibling;
1101 	}
1102 }
1103 
1104 /*
1105  * Remove zombie children from the process table.
1106  */
1107 void
1108 freeproc(proc_t *p)
1109 {
1110 	proc_t *q;
1111 
1112 	ASSERT(p->p_stat == SZOMB);
1113 	ASSERT(p->p_tlist == NULL);
1114 	ASSERT(MUTEX_HELD(&pidlock));
1115 
1116 	sigdelq(p, NULL, 0);
1117 	if (p->p_killsqp) {
1118 		siginfofree(p->p_killsqp);
1119 		p->p_killsqp = NULL;
1120 	}
1121 
1122 	prfree(p);	/* inform /proc */
1123 
1124 	/*
1125 	 * Don't free the init processes.
1126 	 * Other dying processes will access it.
1127 	 */
1128 	if (p == proc_init)
1129 		return;
1130 
1131 
1132 	/*
1133 	 * We wait until now to free the cred structure because a
1134 	 * zombie process's credentials may be examined by /proc.
1135 	 * No cred locking needed because there are no threads at this point.
1136 	 */
1137 	upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1138 	crfree(p->p_cred);
1139 	if (p->p_corefile != NULL) {
1140 		corectl_path_rele(p->p_corefile);
1141 		p->p_corefile = NULL;
1142 	}
1143 	if (p->p_content != NULL) {
1144 		corectl_content_rele(p->p_content);
1145 		p->p_content = NULL;
1146 	}
1147 
1148 	if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1149 	    (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1150 		/*
1151 		 * This should still do the right thing since p_utime/stime
1152 		 * get set to the correct value on process exit, so it
1153 		 * should get properly updated
1154 		 */
1155 		p->p_nextofkin->p_cutime += p->p_utime;
1156 		p->p_nextofkin->p_cstime += p->p_stime;
1157 
1158 		p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1159 		p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1160 		p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1161 		p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1162 		p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1163 		p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1164 		p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1165 		    += p->p_acct[LMS_USER_LOCK];
1166 		p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1167 		p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1168 		    += p->p_acct[LMS_WAIT_CPU];
1169 		p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1170 
1171 		p->p_nextofkin->p_cru.minflt	+= p->p_ru.minflt;
1172 		p->p_nextofkin->p_cru.majflt	+= p->p_ru.majflt;
1173 		p->p_nextofkin->p_cru.nswap	+= p->p_ru.nswap;
1174 		p->p_nextofkin->p_cru.inblock	+= p->p_ru.inblock;
1175 		p->p_nextofkin->p_cru.oublock	+= p->p_ru.oublock;
1176 		p->p_nextofkin->p_cru.msgsnd	+= p->p_ru.msgsnd;
1177 		p->p_nextofkin->p_cru.msgrcv	+= p->p_ru.msgrcv;
1178 		p->p_nextofkin->p_cru.nsignals	+= p->p_ru.nsignals;
1179 		p->p_nextofkin->p_cru.nvcsw	+= p->p_ru.nvcsw;
1180 		p->p_nextofkin->p_cru.nivcsw	+= p->p_ru.nivcsw;
1181 		p->p_nextofkin->p_cru.sysc	+= p->p_ru.sysc;
1182 		p->p_nextofkin->p_cru.ioch	+= p->p_ru.ioch;
1183 
1184 	}
1185 
1186 	q = p->p_nextofkin;
1187 	if (q && q->p_orphan == p)
1188 		q->p_orphan = p->p_nextorph;
1189 	else if (q) {
1190 		for (q = q->p_orphan; q; q = q->p_nextorph)
1191 			if (q->p_nextorph == p)
1192 				break;
1193 		ASSERT(q && q->p_nextorph == p);
1194 		q->p_nextorph = p->p_nextorph;
1195 	}
1196 
1197 	proc_detach(p);
1198 	pid_exit(p);	/* frees pid and proc structure */
1199 }
1200 
1201 /*
1202  * Delete process "child" from the newstate list of process "parent"
1203  */
1204 void
1205 delete_ns(proc_t *parent, proc_t *child)
1206 {
1207 	proc_t **ns;
1208 
1209 	ASSERT(MUTEX_HELD(&pidlock));
1210 	ASSERT(child->p_parent == parent);
1211 	for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1212 		if (*ns == child) {
1213 
1214 			ASSERT((*ns)->p_parent == parent);
1215 
1216 			*ns = child->p_sibling_ns;
1217 			child->p_sibling_ns = NULL;
1218 			return;
1219 		}
1220 	}
1221 }
1222 
1223 /*
1224  * Add process "child" to the new state list of process "parent"
1225  */
1226 void
1227 add_ns(proc_t *parent, proc_t *child)
1228 {
1229 	ASSERT(child->p_sibling_ns == NULL);
1230 	child->p_sibling_ns = parent->p_child_ns;
1231 	parent->p_child_ns = child;
1232 }
1233